Compare commits

..

2 Commits

Author SHA1 Message Date
Joonas Koivunen
ca1ed3dc3b drive by typo fix 2022-11-02 21:11:05 +02:00
Joonas Koivunen
dc2554dff6 chore: remove no longer needed empty rel fix
this seems to have been fixed long enough ago.
2022-11-02 21:10:44 +02:00
292 changed files with 18741 additions and 53395 deletions

View File

@@ -14,7 +14,6 @@
!pgxn/
!proxy/
!safekeeper/
!storage_broker/
!vendor/postgres-v14/
!vendor/postgres-v15/
!workspace_hack/

View File

@@ -32,8 +32,8 @@ runs:
exit 2
fi
- name: Calculate variables
id: calculate-vars
- name: Calculate key
id: calculate-key
shell: bash -euxo pipefail {0}
run: |
# TODO: for manually triggered workflows (via workflow_dispatch) we need to have a separate key
@@ -41,22 +41,14 @@ runs:
pr_number=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true)
if [ "${pr_number}" != "null" ]; then
key=pr-${pr_number}
elif [ "${GITHUB_REF_NAME}" = "main" ]; then
elif [ "${GITHUB_REF}" = "refs/heads/main" ]; then
# Shortcut for a special branch
key=main
elif [ "${GITHUB_REF_NAME}" = "release" ]; then
# Shortcut for a special branch
key=release
else
key=branch-$(printf "${GITHUB_REF_NAME}" | tr -c "[:alnum:]._-" "-")
key=branch-$(echo ${GITHUB_REF#refs/heads/} | tr -c "[:alnum:]._-" "-")
fi
echo "KEY=${key}" >> $GITHUB_OUTPUT
# Sanitize test selection to remove `/` and any other special characters
# Use printf instead of echo to avoid having `\n` at the end of the string
test_selection=$(printf "${{ inputs.test_selection }}" | tr -c "[:alnum:]._-" "-" )
echo "TEST_SELECTION=${test_selection}" >> $GITHUB_OUTPUT
- uses: actions/setup-java@v3
if: ${{ inputs.action == 'generate' }}
with:
@@ -82,11 +74,10 @@ runs:
- name: Upload Allure results
if: ${{ inputs.action == 'store' }}
env:
REPORT_PREFIX: reports/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
RAW_PREFIX: reports-raw/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
REPORT_PREFIX: reports/${{ steps.calculate-key.outputs.KEY }}/${{ inputs.build_type }}
RAW_PREFIX: reports-raw/${{ steps.calculate-key.outputs.KEY }}/${{ inputs.build_type }}
TEST_OUTPUT: /tmp/test_output
BUCKET: neon-github-public-dev
TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }}
shell: bash -euxo pipefail {0}
run: |
# Add metadata
@@ -107,7 +98,7 @@ runs:
BUILD_TYPE=${{ inputs.build_type }}
EOF
ARCHIVE="${GITHUB_RUN_ID}-${TEST_SELECTION}-${GITHUB_RUN_ATTEMPT}-$(date +%s).tar.zst"
ARCHIVE="${GITHUB_RUN_ID}-${{ inputs.test_selection }}-${GITHUB_RUN_ATTEMPT}-$(date +%s).tar.zst"
ZSTD_NBTHREADS=0
tar -C ${TEST_OUTPUT}/allure/results -cf ${ARCHIVE} --zstd .
@@ -118,9 +109,8 @@ runs:
if: ${{ inputs.action == 'generate' }}
shell: bash -euxo pipefail {0}
env:
LOCK_FILE: reports/${{ steps.calculate-vars.outputs.KEY }}/lock.txt
LOCK_FILE: reports/${{ steps.calculate-key.outputs.KEY }}/lock.txt
BUCKET: neon-github-public-dev
TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }}
run: |
LOCK_TIMEOUT=300 # seconds
@@ -133,12 +123,12 @@ runs:
fi
sleep 1
done
echo "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${TEST_SELECTION}" > lock.txt
echo "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${{ inputs.test_selection }}" > lock.txt
aws s3 mv --only-show-errors lock.txt "s3://${BUCKET}/${LOCK_FILE}"
# A double-check that exactly WE have acquired the lock
aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt
if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${TEST_SELECTION}" ]; then
if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${{ inputs.test_selection }}" ]; then
break
fi
done
@@ -147,8 +137,8 @@ runs:
if: ${{ inputs.action == 'generate' }}
id: generate-report
env:
REPORT_PREFIX: reports/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
RAW_PREFIX: reports-raw/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
REPORT_PREFIX: reports/${{ steps.calculate-key.outputs.KEY }}/${{ inputs.build_type }}
RAW_PREFIX: reports-raw/${{ steps.calculate-key.outputs.KEY }}/${{ inputs.build_type }}
TEST_OUTPUT: /tmp/test_output
BUCKET: neon-github-public-dev
shell: bash -euxo pipefail {0}
@@ -202,13 +192,12 @@ runs:
if: ${{ inputs.action == 'generate' && always() }}
shell: bash -euxo pipefail {0}
env:
LOCK_FILE: reports/${{ steps.calculate-vars.outputs.KEY }}/lock.txt
LOCK_FILE: reports/${{ steps.calculate-key.outputs.KEY }}/lock.txt
BUCKET: neon-github-public-dev
TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }}
run: |
aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt || exit 0
if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${TEST_SELECTION}" ]; then
if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${{ inputs.test_selection }}" ]; then
aws s3 rm "s3://${BUCKET}/${LOCK_FILE}"
fi

View File

@@ -1,138 +0,0 @@
name: 'Create Branch'
description: 'Create Branch using API'
inputs:
api_key:
desctiption: 'Neon API key'
required: true
project_id:
desctiption: 'ID of the Project to create Branch in'
required: true
api_host:
desctiption: 'Neon API host'
default: console.stage.neon.tech
outputs:
dsn:
description: 'Created Branch DSN (for main database)'
value: ${{ steps.change-password.outputs.dsn }}
branch_id:
description: 'Created Branch ID'
value: ${{ steps.create-branch.outputs.branch_id }}
runs:
using: "composite"
steps:
- name: Create New Branch
id: create-branch
shell: bash -euxo pipefail {0}
run: |
for i in $(seq 1 10); do
branch=$(curl \
"https://${API_HOST}/api/v2/projects/${PROJECT_ID}/branches" \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}" \
--data "{
\"branch\": {
\"name\": \"Created by actions/neon-branch-create; GITHUB_RUN_ID=${GITHUB_RUN_ID} at $(date +%s)\"
},
\"endpoints\": [
{
\"type\": \"read_write\"
}
]
}")
if [ -z "${branch}" ]; then
sleep 1
continue
fi
branch_id=$(echo $branch | jq --raw-output '.branch.id')
if [ "${branch_id}" == "null" ]; then
sleep 1
continue
fi
break
done
if [ -z "${branch_id}" ] || [ "${branch_id}" == "null" ]; then
echo 2>&1 "Failed to create branch after 10 attempts, the latest response was: ${branch}"
exit 1
fi
branch_id=$(echo $branch | jq --raw-output '.branch.id')
echo "branch_id=${branch_id}" >> $GITHUB_OUTPUT
host=$(echo $branch | jq --raw-output '.endpoints[0].host')
echo "host=${host}" >> $GITHUB_OUTPUT
env:
API_HOST: ${{ inputs.api_host }}
API_KEY: ${{ inputs.api_key }}
PROJECT_ID: ${{ inputs.project_id }}
- name: Get Role name
id: role-name
shell: bash -euxo pipefail {0}
run: |
roles=$(curl \
"https://${API_HOST}/api/v2/projects/${PROJECT_ID}/branches/${BRANCH_ID}/roles" \
--fail \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}"
)
role_name=$(echo $roles | jq --raw-output '.roles[] | select(.protected == false) | .name')
echo "role_name=${role_name}" >> $GITHUB_OUTPUT
env:
API_HOST: ${{ inputs.api_host }}
API_KEY: ${{ inputs.api_key }}
PROJECT_ID: ${{ inputs.project_id }}
BRANCH_ID: ${{ steps.create-branch.outputs.branch_id }}
- name: Change Password
id: change-password
# A shell without `set -x` to not to expose password/dsn in logs
shell: bash -euo pipefail {0}
run: |
for i in $(seq 1 10); do
reset_password=$(curl \
"https://${API_HOST}/api/v2/projects/${PROJECT_ID}/branches/${BRANCH_ID}/roles/${ROLE_NAME}/reset_password" \
--request POST \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}"
)
if [ -z "${reset_password}" ]; then
sleep 1
continue
fi
password=$(echo $reset_password | jq --raw-output '.role.password')
if [ "${password}" == "null" ]; then
sleep 1
continue
fi
echo "::add-mask::${password}"
break
done
if [ -z "${password}" ] || [ "${password}" == "null" ]; then
echo 2>&1 "Failed to reset password after 10 attempts, the latest response was: ${reset_password}"
exit 1
fi
dsn="postgres://${ROLE_NAME}:${password}@${HOST}/neondb"
echo "::add-mask::${dsn}"
echo "dsn=${dsn}" >> $GITHUB_OUTPUT
env:
API_HOST: ${{ inputs.api_host }}
API_KEY: ${{ inputs.api_key }}
PROJECT_ID: ${{ inputs.project_id }}
BRANCH_ID: ${{ steps.create-branch.outputs.branch_id }}
ROLE_NAME: ${{ steps.role-name.outputs.role_name }}
HOST: ${{ steps.create-branch.outputs.host }}

View File

@@ -1,58 +0,0 @@
name: 'Delete Branch'
description: 'Delete Branch using API'
inputs:
api_key:
desctiption: 'Neon API key'
required: true
project_id:
desctiption: 'ID of the Project which should be deleted'
required: true
branch_id:
desctiption: 'ID of the branch to delete'
required: true
api_host:
desctiption: 'Neon API host'
default: console.stage.neon.tech
runs:
using: "composite"
steps:
- name: Delete Branch
# Do not try to delete a branch if .github/actions/neon-project-create
# or .github/actions/neon-branch-create failed before
if: ${{ inputs.project_id != '' && inputs.branch_id != '' }}
shell: bash -euxo pipefail {0}
run: |
for i in $(seq 1 10); do
deleted_branch=$(curl \
"https://${API_HOST}/api/v2/projects/${PROJECT_ID}/branches/${BRANCH_ID}" \
--request DELETE \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}"
)
if [ -z "${deleted_branch}" ]; then
sleep 1
continue
fi
branch_id=$(echo $deleted_branch | jq --raw-output '.branch.id')
if [ "${branch_id}" == "null" ]; then
sleep 1
continue
fi
break
done
if [ -z "${branch_id}" ] || [ "${branch_id}" == "null" ]; then
echo 2>&1 "Failed to delete branch after 10 attempts, the latest response was: ${deleted_branch}"
exit 1
fi
env:
API_HOST: ${{ inputs.api_host }}
API_KEY: ${{ inputs.api_key }}
PROJECT_ID: ${{ inputs.project_id }}
BRANCH_ID: ${{ inputs.branch_id }}

View File

@@ -5,16 +5,12 @@ inputs:
api_key:
desctiption: 'Neon API key'
required: true
environment:
desctiption: 'dev (aka captest) or stage'
required: true
region_id:
desctiption: 'Region ID, if not set the project will be created in the default region'
default: aws-us-east-2
postgres_version:
desctiption: 'Postgres version; default is 15'
default: 15
api_host:
desctiption: 'Neon API host'
default: console.stage.neon.tech
required: false
outputs:
dsn:
description: 'Created Project DSN (for main database)'
@@ -26,13 +22,38 @@ outputs:
runs:
using: "composite"
steps:
- name: Parse Input
id: parse-input
shell: bash -euxo pipefail {0}
run: |
case "${ENVIRONMENT}" in
dev)
API_HOST=console.dev.neon.tech
REGION_ID=${REGION_ID:-eu-west-1}
;;
staging)
API_HOST=console.stage.neon.tech
REGION_ID=${REGION_ID:-us-east-1}
;;
*)
echo 2>&1 "Unknown environment=${ENVIRONMENT}. Allowed 'dev' or 'staging' only"
exit 1
;;
esac
echo "api_host=${API_HOST}" >> $GITHUB_OUTPUT
echo "region_id=${REGION_ID}" >> $GITHUB_OUTPUT
env:
ENVIRONMENT: ${{ inputs.environment }}
REGION_ID: ${{ inputs.region_id }}
- name: Create Neon Project
id: create-neon-project
# A shell without `set -x` to not to expose password/dsn in logs
shell: bash -euo pipefail {0}
run: |
project=$(curl \
"https://${API_HOST}/api/v2/projects" \
"https://${API_HOST}/api/v1/projects" \
--fail \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
@@ -40,7 +61,7 @@ runs:
--data "{
\"project\": {
\"name\": \"Created by actions/neon-project-create; GITHUB_RUN_ID=${GITHUB_RUN_ID}\",
\"pg_version\": ${POSTGRES_VERSION},
\"platform_id\": \"aws\",
\"region_id\": \"${REGION_ID}\",
\"settings\": { }
}
@@ -49,14 +70,13 @@ runs:
# Mask password
echo "::add-mask::$(echo $project | jq --raw-output '.roles[] | select(.name != "web_access") | .password')"
dsn=$(echo $project | jq --raw-output '.connection_uris[0].connection_uri')
dsn=$(echo $project | jq --raw-output '.roles[] | select(.name != "web_access") | .dsn')/main
echo "::add-mask::${dsn}"
echo "dsn=${dsn}" >> $GITHUB_OUTPUT
project_id=$(echo $project | jq --raw-output '.project.id')
project_id=$(echo $project | jq --raw-output '.id')
echo "project_id=${project_id}" >> $GITHUB_OUTPUT
env:
API_HOST: ${{ inputs.api_host }}
API_KEY: ${{ inputs.api_key }}
REGION_ID: ${{ inputs.region_id }}
POSTGRES_VERSION: ${{ inputs.postgres_version }}
API_HOST: ${{ steps.parse-input.outputs.api_host }}
REGION_ID: ${{ steps.parse-input.outputs.region_id }}

View File

@@ -5,29 +5,50 @@ inputs:
api_key:
desctiption: 'Neon API key'
required: true
environment:
desctiption: 'dev (aka captest) or stage'
required: true
project_id:
desctiption: 'ID of the Project to delete'
required: true
api_host:
desctiption: 'Neon API host'
default: console.stage.neon.tech
runs:
using: "composite"
steps:
- name: Delete Neon Project
# Do not try to delete a project if .github/actions/neon-project-create failed before
if: ${{ inputs.project_id != '' }}
- name: Parse Input
id: parse-input
shell: bash -euxo pipefail {0}
run: |
curl \
"https://${API_HOST}/api/v2/projects/${PROJECT_ID}" \
--fail \
--request DELETE \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}"
case "${ENVIRONMENT}" in
dev)
API_HOST=console.dev.neon.tech
;;
staging)
API_HOST=console.stage.neon.tech
;;
*)
echo 2>&1 "Unknown environment=${ENVIRONMENT}. Allowed 'dev' or 'staging' only"
exit 1
;;
esac
echo "api_host=${API_HOST}" >> $GITHUB_OUTPUT
env:
ENVIRONMENT: ${{ inputs.environment }}
- name: Delete Neon Project
shell: bash -euxo pipefail {0}
run: |
# Allow PROJECT_ID to be empty/null for cases when .github/actions/neon-project-create failed
if [ -n "${PROJECT_ID}" ]; then
curl -X "POST" \
"https://${API_HOST}/api/v1/projects/${PROJECT_ID}/delete" \
--fail \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}"
fi
env:
API_HOST: ${{ inputs.api_host }}
API_KEY: ${{ inputs.api_key }}
PROJECT_ID: ${{ inputs.project_id }}
API_HOST: ${{ steps.parse-input.outputs.api_host }}

View File

@@ -55,22 +55,6 @@ runs:
name: neon-${{ runner.os }}-${{ inputs.build_type }}-artifact
path: /tmp/neon
- name: Download Neon binaries for the previous release
if: inputs.build_type != 'remote'
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ inputs.build_type }}-artifact
path: /tmp/neon-previous
prefix: latest
- name: Download compatibility snapshot for Postgres 14
if: inputs.build_type != 'remote'
uses: ./.github/actions/download
with:
name: compatibility-snapshot-${{ inputs.build_type }}-pg14
path: /tmp/compatibility_snapshot_pg14
prefix: latest
- name: Checkout
if: inputs.needs_postgres_source == 'true'
uses: actions/checkout@v3
@@ -89,18 +73,23 @@ runs:
shell: bash -euxo pipefail {0}
run: ./scripts/pysync
- name: Download compatibility snapshot for Postgres 14
if: inputs.build_type != 'remote'
uses: ./.github/actions/download
with:
name: compatibility-snapshot-${{ inputs.build_type }}-pg14
path: /tmp/compatibility_snapshot_pg14
prefix: latest
- name: Run pytest
env:
NEON_BIN: /tmp/neon/bin
COMPATIBILITY_NEON_BIN: /tmp/neon-previous/bin
COMPATIBILITY_POSTGRES_DISTRIB_DIR: /tmp/neon-previous/pg_install
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: ${{ inputs.build_type }}
AWS_ACCESS_KEY_ID: ${{ inputs.real_s3_access_key_id }}
AWS_SECRET_ACCESS_KEY: ${{ inputs.real_s3_secret_access_key }}
COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg14
ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'backward compatibility breakage')
ALLOW_FORWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'forward compatibility breakage')
ALLOW_BREAKING_CHANGES: contains(github.event.pull_request.labels.*.name, 'breaking changes')
shell: bash -euxo pipefail {0}
run: |
# PLATFORM will be embedded in the perf test report
@@ -123,12 +112,7 @@ runs:
exit 1
fi
if [[ "${{ inputs.run_in_parallel }}" == "true" ]]; then
# -n4 uses four processes to run tests via pytest-xdist
EXTRA_PARAMS="-n4 $EXTRA_PARAMS"
# --dist=loadgroup points tests marked with @pytest.mark.xdist_group
# to the same worker to make @pytest.mark.order work with xdist
EXTRA_PARAMS="--dist=loadgroup $EXTRA_PARAMS"
fi
if [[ "${{ inputs.run_with_real_s3 }}" == "true" ]]; then
@@ -163,9 +147,9 @@ runs:
# --verbose prints name of each test (helpful when there are
# multiple tests in one file)
# -rA prints summary in the end
# -n4 uses four processes to run tests via pytest-xdist
# -s is not used to prevent pytest from capturing output, because tests are running
# in parallel and logs are mixed between different tests
#
mkdir -p $TEST_OUTPUT/allure/results
"${cov_prefix[@]}" ./scripts/pytest \
--junitxml=$TEST_OUTPUT/junit.xml \
@@ -185,12 +169,12 @@ runs:
uses: ./.github/actions/upload
with:
name: compatibility-snapshot-${{ inputs.build_type }}-pg14-${{ github.run_id }}
# The path includes a test name (test_create_snapshot) and directory that the test creates (compatibility_snapshot_pg14), keep the path in sync with the test
path: /tmp/test_output/test_create_snapshot/compatibility_snapshot_pg14/
# The path includes a test name (test_prepare_snapshot) and directory that the test creates (compatibility_snapshot_pg14), keep the path in sync with the test
path: /tmp/test_output/test_prepare_snapshot/compatibility_snapshot_pg14/
prefix: latest
- name: Create Allure report
if: success() || failure()
if: always()
uses: ./.github/actions/allure-report
with:
action: store

View File

@@ -1,3 +1,5 @@
zenith_install.tar.gz
.zenith_current_version
neon_install.tar.gz
.neon_current_version

View File

@@ -25,7 +25,6 @@ mkdir neon_install/bin/
docker cp ${ID}:/usr/local/bin/pageserver neon_install/bin/
docker cp ${ID}:/usr/local/bin/pageserver_binutils neon_install/bin/
docker cp ${ID}:/usr/local/bin/safekeeper neon_install/bin/
docker cp ${ID}:/usr/local/bin/storage_broker neon_install/bin/
docker cp ${ID}:/usr/local/bin/proxy neon_install/bin/
docker cp ${ID}:/usr/local/v14/bin/ neon_install/v14/bin/
docker cp ${ID}:/usr/local/v15/bin/ neon_install/v15/bin/

View File

@@ -3,7 +3,7 @@ storage:
bucket_name: neon-storage-ireland
bucket_region: eu-west-1
console_mgmt_base_url: http://neon-stress-console.local
broker_endpoint: http://storage-broker.neon-stress.local:50051
etcd_endpoints: neon-stress-etcd.local:2379
safekeeper_enable_s3_offload: 'false'
pageserver_config_stub:
pg_distrib_dir: /usr/local

View File

@@ -3,7 +3,7 @@ storage:
bucket_name: neon-prod-storage-ap-southeast-1
bucket_region: ap-southeast-1
console_mgmt_base_url: http://console-release.local
broker_endpoint: https://storage-broker.epsilon.ap-southeast-1.internal.aws.neon.tech:443
etcd_endpoints: etcd-0.ap-southeast-1.aws.neon.tech:2379
pageserver_config_stub:
pg_distrib_dir: /usr/local
remote_storage:

View File

@@ -3,7 +3,7 @@ storage:
bucket_name: neon-prod-storage-eu-central-1
bucket_region: eu-central-1
console_mgmt_base_url: http://console-release.local
broker_endpoint: https://storage-broker.gamma.eu-central-1.internal.aws.neon.tech:443
etcd_endpoints: etcd-0.eu-central-1.aws.neon.tech:2379
pageserver_config_stub:
pg_distrib_dir: /usr/local
remote_storage:

View File

@@ -3,7 +3,7 @@ storage:
bucket_name: neon-prod-storage-us-east-2
bucket_region: us-east-2
console_mgmt_base_url: http://console-release.local
broker_endpoint: https://storage-broker.delta.us-east-2.internal.aws.neon.tech:443
etcd_endpoints: etcd-0.us-east-2.aws.neon.tech:2379
pageserver_config_stub:
pg_distrib_dir: /usr/local
remote_storage:

View File

@@ -4,7 +4,7 @@ storage:
console_mgmt_base_url: http://console-release.local
bucket_name: zenith-storage-oregon
bucket_region: us-west-2
broker_endpoint: http://storage-broker.prod.local:50051
etcd_endpoints: zenith-1-etcd.local:2379
pageserver_config_stub:
pg_distrib_dir: /usr/local
remote_storage:
@@ -22,10 +22,6 @@ storage:
console_region_id: aws-us-west-2
zenith-1-ps-3:
console_region_id: aws-us-west-2
zenith-1-ps-4:
console_region_id: aws-us-west-2
zenith-1-ps-5:
console_region_id: aws-us-west-2
safekeepers:
hosts:

View File

@@ -1,8 +1,7 @@
#!/bin/sh
# fetch params from meta-data service
# get instance id from meta-data service
INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
AZ_ID=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone)
# store fqdn hostname in var
HOST=$(hostname -f)
@@ -17,8 +16,7 @@ cat <<EOF | tee /tmp/payload
"instance_id": "${INSTANCE_ID}",
"http_host": "${HOST}",
"http_port": 9898,
"active": false,
"availability_zone_id": "${AZ_ID}"
"active": false
}
EOF

View File

@@ -1,33 +0,0 @@
storage:
vars:
bucket_name: neon-dev-storage-eu-west-1
bucket_region: eu-west-1
console_mgmt_base_url: http://console-staging.local
broker_endpoint: https://storage-broker.zeta.eu-west-1.internal.aws.neon.build:443
pageserver_config_stub:
pg_distrib_dir: /usr/local
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "pageserver/v1"
safekeeper_s3_prefix: safekeeper/v1/wal
hostname_suffix: ""
remote_user: ssm-user
ansible_aws_ssm_region: eu-west-1
ansible_aws_ssm_bucket_name: neon-dev-storage-eu-west-1
console_region_id: aws-eu-west-1
children:
pageservers:
hosts:
pageserver-0.eu-west-1.aws.neon.build:
ansible_host: i-01d496c5041c7f34c
safekeepers:
hosts:
safekeeper-0.eu-west-1.aws.neon.build:
ansible_host: i-05226ef85722831bf
safekeeper-1.eu-west-1.aws.neon.build:
ansible_host: i-06969ee1bf2958bfc
safekeeper-2.eu-west-1.aws.neon.build:
ansible_host: i-087892e9625984a0b

View File

@@ -3,7 +3,7 @@ storage:
bucket_name: zenith-staging-storage-us-east-1
bucket_region: us-east-1
console_mgmt_base_url: http://console-staging.local
broker_endpoint: http://storage-broker.staging.local:50051
etcd_endpoints: zenith-us-stage-etcd.local:2379
pageserver_config_stub:
pg_distrib_dir: /usr/local
remote_storage:

View File

@@ -3,7 +3,7 @@ storage:
bucket_name: neon-staging-storage-us-east-2
bucket_region: us-east-2
console_mgmt_base_url: http://console-staging.local
broker_endpoint: https://storage-broker.beta.us-east-2.internal.aws.neon.build:443
etcd_endpoints: etcd-0.us-east-2.aws.neon.build:2379
pageserver_config_stub:
pg_distrib_dir: /usr/local
remote_storage:
@@ -22,8 +22,6 @@ storage:
hosts:
pageserver-0.us-east-2.aws.neon.build:
ansible_host: i-0c3e70929edb5d691
pageserver-1.us-east-2.aws.neon.build:
ansible_host: i-0565a8b4008aa3f40
safekeepers:
hosts:

View File

@@ -5,8 +5,8 @@ After=network.target auditd.service
[Service]
Type=simple
User=pageserver
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/v14/lib SENTRY_DSN={{ SENTRY_URL_PAGESERVER }}
ExecStart=/usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -c "broker_endpoint='{{ broker_endpoint }}'" -D /storage/pageserver/data
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/v14/lib
ExecStart=/usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -c "broker_endpoints=['{{ etcd_endpoints }}']" -D /storage/pageserver/data
ExecReload=/bin/kill -HUP $MAINPID
KillMode=mixed
KillSignal=SIGINT

View File

@@ -5,8 +5,8 @@ After=network.target auditd.service
[Service]
Type=simple
User=safekeeper
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/v14/lib SENTRY_DSN={{ SENTRY_URL_SAFEKEEPER }}
ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}{{ hostname_suffix }}:6500 --listen-http {{ inventory_hostname }}{{ hostname_suffix }}:7676 -D /storage/safekeeper/data --broker-endpoint={{ broker_endpoint }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ safekeeper_s3_prefix }}"}'
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/v14/lib
ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}{{ hostname_suffix }}:6500 --listen-http {{ inventory_hostname }}{{ hostname_suffix }}:7676 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ safekeeper_s3_prefix }}"}'
ExecReload=/bin/kill -HUP $MAINPID
KillMode=mixed
KillSignal=SIGINT

View File

@@ -1,31 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://console-staging.local/management/api/v2"
domain: "*.eu-west-1.aws.neon.build"
# -- Additional labels for neon-proxy pods
podLabels:
zenith_service: proxy-scram
zenith_env: dev
zenith_region: eu-west-1
zenith_region_slug: eu-west-1
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: eu-west-1.aws.neon.build
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack

View File

@@ -1,53 +0,0 @@
# Helm chart values for neon-storage-broker
podLabels:
neon_env: staging
neon_service: storage-broker
ingress:
enabled: true
annotations:
kubernetes.io/ingress.class: nginx-internal
nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
cert-manager.io/cluster-issuer: "cert-manager-clusterissuer"
hosts:
- host: storage-broker.zeta.eu-west-1.internal.aws.neon.build
paths:
- path: /
pathType: Prefix
tls:
- hosts:
- storage-broker.zeta.eu-west-1.internal.aws.neon.build
secretName: storage-broker-tls
metrics:
enabled: false
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-storage-broker.fullname\" . }}"
labels:
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
app.kubernetes.io/name: neon-storage-broker
app.kubernetes.io/instance: neon-storage-broker
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-storage-broker"
endpoints:
- port: broker
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,39 +0,0 @@
# Helm chart values for neon-proxy-link.
# This is a YAML-formatted file.
image:
repository: neondatabase/neon
settings:
authBackend: "link"
authEndpoint: "https://console.stage.neon.tech/authenticate_proxy_request/"
uri: "https://console.stage.neon.tech/psql_session/"
# -- Additional labels for neon-proxy-link pods
podLabels:
zenith_service: proxy
zenith_env: dev
zenith_region: us-east-2
zenith_region_slug: us-east-2
service:
type: LoadBalancer
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal
external-dns.alpha.kubernetes.io/hostname: neon-proxy-link-mgmt.beta.us-east-2.aws.neon.build
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: neon-proxy-link.beta.us-east-2.aws.neon.build
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack

View File

@@ -1,31 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://console-staging.local/management/api/v2"
domain: "*.cloud.stage.neon.tech"
# -- Additional labels for neon-proxy pods
podLabels:
zenith_service: proxy-scram-legacy
zenith_env: dev
zenith_region: us-east-2
zenith_region_slug: us-east-2
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: neon-proxy-scram-legacy.beta.us-east-2.aws.neon.build
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack

View File

@@ -1,53 +0,0 @@
# Helm chart values for neon-storage-broker
podLabels:
neon_env: staging
neon_service: storage-broker
ingress:
enabled: true
annotations:
kubernetes.io/ingress.class: nginx-internal
nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
cert-manager.io/cluster-issuer: "cert-manager-clusterissuer"
hosts:
- host: storage-broker.beta.us-east-2.internal.aws.neon.build
paths:
- path: /
pathType: Prefix
tls:
- hosts:
- storage-broker.beta.us-east-2.internal.aws.neon.build
secretName: storage-broker-tls
metrics:
enabled: false
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-storage-broker.fullname\" . }}"
labels:
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
app.kubernetes.io/name: neon-storage-broker
app.kubernetes.io/instance: neon-storage-broker
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-storage-broker"
endpoints:
- port: broker
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,54 +0,0 @@
# Helm chart values for neon-storage-broker
podLabels:
neon_env: neon-stress
neon_service: storage-broker
# Use L4 LB
service:
# service.annotations -- Annotations to add to the service
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external # use newer AWS Load Balancer Controller
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal # deploy LB to private subnet
# assign service to this name at external-dns
external-dns.alpha.kubernetes.io/hostname: storage-broker.neon-stress.local
# service.type -- Service type
type: LoadBalancer
# service.port -- broker listen port
port: 50051
ingress:
enabled: false
metrics:
enabled: true
serviceMonitor:
enabled: true
selector:
release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-storage-broker.fullname\" . }}"
labels:
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
app.kubernetes.io/name: neon-storage-broker
app.kubernetes.io/instance: neon-storage-broker
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-storage-broker"
endpoints:
- port: broker
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,53 +0,0 @@
# Helm chart values for neon-storage-broker
podLabels:
neon_env: production
neon_service: storage-broker
ingress:
enabled: true
annotations:
kubernetes.io/ingress.class: nginx-internal
nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
cert-manager.io/cluster-issuer: "cert-manager-clusterissuer"
hosts:
- host: storage-broker.epsilon.ap-southeast-1.internal.aws.neon.tech
paths:
- path: /
pathType: Prefix
tls:
- hosts:
- storage-broker.epsilon.ap-southeast-1.internal.aws.neon.tech
secretName: storage-broker-tls
metrics:
enabled: false
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-storage-broker.fullname\" . }}"
labels:
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
app.kubernetes.io/name: neon-storage-broker
app.kubernetes.io/instance: neon-storage-broker
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-storage-broker"
endpoints:
- port: broker
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,53 +0,0 @@
# Helm chart values for neon-storage-broker
podLabels:
neon_env: production
neon_service: storage-broker
ingress:
enabled: true
annotations:
kubernetes.io/ingress.class: nginx-internal
nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
cert-manager.io/cluster-issuer: "cert-manager-clusterissuer"
hosts:
- host: storage-broker.gamma.eu-central-1.internal.aws.neon.tech
paths:
- path: /
pathType: Prefix
tls:
- hosts:
- storage-broker.gamma.eu-central-1.internal.aws.neon.tech
secretName: storage-broker-tls
metrics:
enabled: false
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-storage-broker.fullname\" . }}"
labels:
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
app.kubernetes.io/name: neon-storage-broker
app.kubernetes.io/instance: neon-storage-broker
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-storage-broker"
endpoints:
- port: broker
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,53 +0,0 @@
# Helm chart values for neon-storage-broker
podLabels:
neon_env: production
neon_service: storage-broker
ingress:
enabled: true
annotations:
kubernetes.io/ingress.class: nginx-internal
nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
cert-manager.io/cluster-issuer: "cert-manager-clusterissuer"
hosts:
- host: storage-broker.delta.us-east-2.internal.aws.neon.tech
paths:
- path: /
pathType: Prefix
tls:
- hosts:
- storage-broker.delta.us-east-2.internal.aws.neon.tech
secretName: storage-broker-tls
metrics:
enabled: false
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-storage-broker.fullname\" . }}"
labels:
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
app.kubernetes.io/name: neon-storage-broker
app.kubernetes.io/instance: neon-storage-broker
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-storage-broker"
endpoints:
- port: broker
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,31 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://console-release.local/management/api/v2"
domain: "*.us-west-2.aws.neon.tech"
# -- Additional labels for neon-proxy pods
podLabels:
zenith_service: proxy-scram
zenith_env: prod
zenith_region: us-west-2
zenith_region_slug: us-west-2
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: us-west-2.aws.neon.tech
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack

View File

@@ -1,53 +0,0 @@
# Helm chart values for neon-storage-broker
podLabels:
neon_env: production
neon_service: storage-broker
ingress:
enabled: true
annotations:
kubernetes.io/ingress.class: nginx-internal
nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
cert-manager.io/cluster-issuer: "cert-manager-clusterissuer"
hosts:
- host: storage-broker.eta.us-west-2.internal.aws.neon.tech
paths:
- path: /
pathType: Prefix
tls:
- hosts:
- storage-broker.eta.us-west-2.internal.aws.neon.tech
secretName: storage-broker-tls
metrics:
enabled: false
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-storage-broker.fullname\" . }}"
labels:
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
app.kubernetes.io/name: neon-storage-broker
app.kubernetes.io/instance: neon-storage-broker
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-storage-broker"
endpoints:
- port: broker
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,54 +0,0 @@
# Helm chart values for neon-storage-broker
podLabels:
neon_env: production
neon_service: storage-broker
# Use L4 LB
service:
# service.annotations -- Annotations to add to the service
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external # use newer AWS Load Balancer Controller
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal # deploy LB to private subnet
# assign service to this name at external-dns
external-dns.alpha.kubernetes.io/hostname: storage-broker.prod.local
# service.type -- Service type
type: LoadBalancer
# service.port -- broker listen port
port: 50051
ingress:
enabled: false
metrics:
enabled: true
serviceMonitor:
enabled: true
selector:
release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-storage-broker.fullname\" . }}"
labels:
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
app.kubernetes.io/name: neon-storage-broker
app.kubernetes.io/instance: neon-storage-broker
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-storage-broker"
endpoints:
- port: broker
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,54 +0,0 @@
# Helm chart values for neon-storage-broker
podLabels:
neon_env: staging
neon_service: storage-broker
# Use L4 LB
service:
# service.annotations -- Annotations to add to the service
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external # use newer AWS Load Balancer Controller
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal # deploy LB to private subnet
# assign service to this name at external-dns
external-dns.alpha.kubernetes.io/hostname: storage-broker.staging.local
# service.type -- Service type
type: LoadBalancer
# service.port -- broker listen port
port: 50051
ingress:
enabled: false
metrics:
enabled: true
serviceMonitor:
enabled: true
selector:
release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-storage-broker.fullname\" . }}"
labels:
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
app.kubernetes.io/name: neon-storage-broker
app.kubernetes.io/instance: neon-storage-broker
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-storage-broker"
endpoints:
- port: broker
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -15,6 +15,9 @@ on:
workflow_dispatch: # adds ability to run this manually
inputs:
environment:
description: 'Environment to run remote tests on (dev or staging)'
required: false
region_id:
description: 'Use a particular region. If not set the default region will be used'
required: false
@@ -34,69 +37,97 @@ concurrency:
jobs:
bench:
env:
TEST_PG_BENCH_DURATIONS_MATRIX: "300"
TEST_PG_BENCH_SCALES_MATRIX: "10,100"
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
PLATFORM: "neon-staging"
# this workflow runs on self hosteed runner
# it's environment is quite different from usual guthub runner
# probably the most important difference is that it doesn't start from clean workspace each time
# e g if you install system packages they are not cleaned up since you install them directly in host machine
# not a container or something
# See documentation for more info: https://docs.github.com/en/actions/hosting-your-own-runners/about-self-hosted-runners
runs-on: [self-hosted, zenith-benchmarker]
runs-on: [ self-hosted, us-east-2, x64 ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
env:
POSTGRES_DISTRIB_DIR: /usr/pgsql
DEFAULT_PG_VERSION: 14
steps:
- uses: actions/checkout@v3
- name: Checkout zenith repo
uses: actions/checkout@v3
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-release-artifact
path: /tmp/neon/
prefix: latest
# actions/setup-python@v2 is not working correctly on self-hosted runners
# see https://github.com/actions/setup-python/issues/162
# and probably https://github.com/actions/setup-python/issues/162#issuecomment-865387976 in particular
# so the simplest solution to me is to use already installed system python and spin virtualenvs for job runs.
# there is Python 3.7.10 already installed on the machine so use it to install poetry and then use poetry's virtuealenvs
- name: Install poetry & deps
run: |
python3 -m pip install --upgrade poetry wheel
# since pip/poetry caches are reused there shouldn't be any troubles with install every time
./scripts/pysync
- name: Show versions
run: |
echo Python
python3 --version
poetry run python3 --version
echo Poetry
poetry --version
echo Pgbench
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
- name: Create Neon Project
id: create-neon-project
uses: ./.github/actions/neon-project-create
with:
region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
environment: ${{ github.event.inputs.environment || 'staging' }}
api_key: ${{ ( github.event.inputs.environment || 'staging' ) == 'staging' && secrets.NEON_STAGING_API_KEY || secrets.NEON_CAPTEST_API_KEY }}
- name: Run benchmark
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
# Set --sparse-ordering option of pytest-order plugin
# to ensure tests are running in order of appears in the file.
# It's important for test_perf_pgbench.py::test_pgbench_remote_* tests
extra_params: -m remote_cluster --sparse-ordering --timeout 5400 --ignore test_runner/performance/test_perf_olap.py
# pgbench is installed system wide from official repo
# https://download.postgresql.org/pub/repos/yum/13/redhat/rhel-7-x86_64/
# via
# sudo tee /etc/yum.repos.d/pgdg.repo<<EOF
# [pgdg13]
# name=PostgreSQL 13 for RHEL/CentOS 7 - x86_64
# baseurl=https://download.postgresql.org/pub/repos/yum/13/redhat/rhel-7-x86_64/
# enabled=1
# gpgcheck=0
# EOF
# sudo yum makecache
# sudo yum install postgresql13-contrib
# actual binaries are located in /usr/pgsql-13/bin/
env:
# The pgbench test runs two tests of given duration against each scale.
# So the total runtime with these parameters is 2 * 2 * 300 = 1200, or 20 minutes.
# Plus time needed to initialize the test databases.
TEST_PG_BENCH_DURATIONS_MATRIX: "300"
TEST_PG_BENCH_SCALES_MATRIX: "10,100"
PLATFORM: "neon-staging"
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
REMOTE_ENV: "1" # indicate to test harness that we do not have zenith binaries locally
run: |
# just to be sure that no data was cached on self hosted runner
# since it might generate duplicates when calling ingest_perf_test_result.py
rm -rf perf-report-staging
mkdir -p perf-report-staging
# Set --sparse-ordering option of pytest-order plugin to ensure tests are running in order of appears in the file,
# it's important for test_perf_pgbench.py::test_pgbench_remote_* tests
./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --sparse-ordering --out-dir perf-report-staging --timeout 5400
- name: Submit result
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
run: |
REPORT_FROM=$(realpath perf-report-staging) REPORT_TO=staging scripts/generate_and_push_perf_report.sh
- name: Delete Neon Project
if: ${{ always() }}
uses: ./.github/actions/neon-project-delete
with:
environment: staging
project_id: ${{ steps.create-neon-project.outputs.project_id }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Create Allure report
if: success() || failure()
uses: ./.github/actions/allure-report
with:
action: generate
build_type: ${{ env.BUILD_TYPE }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
@@ -113,24 +144,15 @@ jobs:
# neon-captest-new: Run pgbench in a freshly created project
# neon-captest-reuse: Same, but reusing existing project
# neon-captest-prefetch: Same, with prefetching enabled (new project)
# rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
# rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
platform: [ neon-captest-new, neon-captest-prefetch, rds-postgres ]
platform: [ neon-captest-new, neon-captest-reuse, neon-captest-prefetch ]
db_size: [ 10gb ]
runner: [ us-east-2 ]
include:
- platform: neon-captest-reuse
db_size: 10gb
runner: dev # TODO: Switch to us-east-2 after dry-bonus-223539 migration to staging
- platform: neon-captest-new
db_size: 50gb
runner: us-east-2
- platform: neon-captest-prefetch
db_size: 50gb
runner: us-east-2
- platform: rds-aurora
db_size: 50gb
runner: us-east-2
env:
TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
@@ -142,9 +164,9 @@ jobs:
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
PLATFORM: ${{ matrix.platform }}
runs-on: [ self-hosted, "${{ matrix.runner }}", x64 ]
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:pinned
options: --init
timeout-minutes: 360 # 6h
@@ -169,9 +191,8 @@ jobs:
id: create-neon-project
uses: ./.github/actions/neon-project-create
with:
region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
environment: ${{ github.event.inputs.environment || 'dev' }}
api_key: ${{ ( github.event.inputs.environment || 'dev' ) == 'staging' && secrets.NEON_STAGING_API_KEY || secrets.NEON_CAPTEST_API_KEY }}
- name: Set up Connection String
id: set-up-connstr
@@ -184,13 +205,10 @@ jobs:
CONNSTR=${{ steps.create-neon-project.outputs.dsn }}
;;
rds-aurora)
CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_CONNSTR }}
;;
rds-postgres)
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }}
CONNSTR=${{ secrets.BENCHMARK_RDS_CONNSTR }}
;;
*)
echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-prefetch', 'rds-aurora', or 'rds-postgres'"
echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-prefetch' or 'rds-aurora'"
exit 1
;;
esac
@@ -202,11 +220,8 @@ jobs:
- name: Set database options
if: matrix.platform == 'neon-captest-prefetch'
run: |
DB_NAME=$(psql ${BENCHMARK_CONNSTR} --no-align --quiet -t -c "SELECT current_database()")
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET enable_seqscan_prefetch=on"
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET effective_io_concurrency=32"
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET maintenance_io_concurrency=32"
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE main SET enable_seqscan_prefetch=on"
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE main SET seqscan_prefetch_buffers=10"
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
@@ -249,20 +264,21 @@ jobs:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
- name: Delete Neon Project
if: ${{ steps.create-neon-project.outputs.project_id && always() }}
uses: ./.github/actions/neon-project-delete
with:
project_id: ${{ steps.create-neon-project.outputs.project_id }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Create Allure report
if: success() || failure()
if: always()
uses: ./.github/actions/allure-report
with:
action: generate
build_type: ${{ env.BUILD_TYPE }}
- name: Delete Neon Project
if: ${{ steps.create-neon-project.outputs.project_id && always() }}
uses: ./.github/actions/neon-project-delete
with:
environment: dev
project_id: ${{ steps.create-neon-project.outputs.project_id }}
api_key: ${{ secrets.NEON_CAPTEST_API_KEY }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
@@ -271,226 +287,3 @@ jobs:
slack-message: "Periodic perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
clickbench-compare:
# ClichBench DB for rds-aurora and rds-Postgres deployed to the same clusters
# we use for performance testing in pgbench-compare.
# Run this job only when pgbench-compare is finished to avoid the intersection.
# We might change it after https://github.com/neondatabase/neon/issues/2900.
#
# *_CLICKBENCH_CONNSTR: Genuine ClickBench DB with ~100M rows
# *_CLICKBENCH_10M_CONNSTR: DB with the first 10M rows of ClickBench DB
if: success() || failure()
needs: [ pgbench-compare ]
strategy:
fail-fast: false
matrix:
# neon-captest-prefetch: We have pre-created projects with prefetch enabled
# rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
# rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
platform: [ neon-captest-prefetch, rds-postgres, rds-aurora ]
env:
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
PLATFORM: ${{ matrix.platform }}
runs-on: [ self-hosted, us-east-2, x64 ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
timeout-minutes: 360 # 6h
steps:
- uses: actions/checkout@v3
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-release-artifact
path: /tmp/neon/
prefix: latest
- name: Add Postgres binaries to PATH
run: |
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
- name: Set up Connection String
id: set-up-connstr
run: |
case "${PLATFORM}" in
neon-captest-prefetch)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_10M_CONNSTR }}
;;
rds-aurora)
CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_CLICKBENCH_10M_CONNSTR }}
;;
rds-postgres)
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CLICKBENCH_10M_CONNSTR }}
;;
*)
echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-prefetch', 'rds-aurora', or 'rds-postgres'"
exit 1
;;
esac
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
psql ${CONNSTR} -c "SELECT version();"
- name: Set database options
if: matrix.platform == 'neon-captest-prefetch'
run: |
DB_NAME=$(psql ${BENCHMARK_CONNSTR} --no-align --quiet -t -c "SELECT current_database()")
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET enable_seqscan_prefetch=on"
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET effective_io_concurrency=32"
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET maintenance_io_concurrency=32"
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
- name: ClickBench benchmark
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance/test_perf_olap.py
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_clickbench
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
- name: Create Allure report
if: success() || failure()
uses: ./.github/actions/allure-report
with:
action: generate
build_type: ${{ env.BUILD_TYPE }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: "Periodic OLAP perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
tpch-compare:
# TCP-H DB for rds-aurora and rds-Postgres deployed to the same clusters
# we use for performance testing in pgbench-compare & clickbench-compare.
# Run this job only when clickbench-compare is finished to avoid the intersection.
# We might change it after https://github.com/neondatabase/neon/issues/2900.
#
# *_TPCH_S10_CONNSTR: DB generated with scale factor 10 (~10 GB)
if: success() || failure()
needs: [ clickbench-compare ]
strategy:
fail-fast: false
matrix:
# neon-captest-prefetch: We have pre-created projects with prefetch enabled
# rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
# rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
platform: [ neon-captest-prefetch, rds-postgres, rds-aurora ]
env:
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
PLATFORM: ${{ matrix.platform }}
runs-on: [ self-hosted, us-east-2, x64 ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:pinned
options: --init
timeout-minutes: 360 # 6h
steps:
- uses: actions/checkout@v3
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-release-artifact
path: /tmp/neon/
prefix: latest
- name: Add Postgres binaries to PATH
run: |
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
- name: Set up Connection String
id: set-up-connstr
run: |
case "${PLATFORM}" in
neon-captest-prefetch)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_TPCH_S10_CONNSTR }}
;;
rds-aurora)
CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_TPCH_S10_CONNSTR }}
;;
rds-postgres)
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_TPCH_S10_CONNSTR }}
;;
*)
echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-prefetch', 'rds-aurora', or 'rds-postgres'"
exit 1
;;
esac
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
psql ${CONNSTR} -c "SELECT version();"
- name: Set database options
if: matrix.platform == 'neon-captest-prefetch'
run: |
DB_NAME=$(psql ${BENCHMARK_CONNSTR} --no-align --quiet -t -c "SELECT current_database()")
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET enable_seqscan_prefetch=on"
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET effective_io_concurrency=32"
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET maintenance_io_concurrency=32"
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
- name: Run TPC-H benchmark
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance/test_perf_olap.py
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_tpch
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
- name: Create Allure report
if: success() || failure()
uses: ./.github/actions/allure-report
with:
action: generate
build_type: ${{ env.BUILD_TYPE }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: "Periodic TPC-H perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

View File

@@ -7,10 +7,6 @@ on:
- release
pull_request:
defaults:
run:
shell: bash -euxo pipefail {0}
concurrency:
# Allow only one workflow per any non-`main` branch.
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
@@ -22,8 +18,8 @@ env:
jobs:
tag:
runs-on: [ self-hosted, dev, x64 ]
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
runs-on: dev
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
outputs:
build-tag: ${{steps.build-tag.outputs.tag}}
@@ -49,85 +45,8 @@ jobs:
shell: bash
id: build-tag
check-codestyle-python:
runs-on: [ self-hosted, dev, x64 ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cloud:pinned
options: --init
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: false
fetch-depth: 1
- name: Cache poetry deps
id: cache_poetry
uses: actions/cache@v3
with:
path: ~/.cache/pypoetry/virtualenvs
key: v1-codestyle-python-deps-${{ hashFiles('poetry.lock') }}
- name: Install Python deps
run: ./scripts/pysync
- name: Run isort to ensure code format
run: poetry run isort --diff --check .
- name: Run black to ensure code format
run: poetry run black --diff --check .
- name: Run flake8 to ensure code format
run: poetry run flake8 .
- name: Run mypy to check types
run: poetry run mypy .
check-codestyle-rust:
runs-on: [ self-hosted, dev, x64 ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 1
- name: Restore cargo deps cache
id: cache_cargo
uses: actions/cache@v3
with:
path: |
~/.cargo/registry/
!~/.cargo/registry/src
~/.cargo/git/
target/
key: v1-${{ runner.os }}-cargo-clippy-${{ hashFiles('rust-toolchain.toml') }}-${{ hashFiles('Cargo.lock') }}
# Some of our rust modules use FFI and need those to be checked
- name: Get postgres headers
run: make postgres-headers -j$(nproc)
- name: Run cargo clippy
run: ./run_clippy.sh
# Use `${{ !cancelled() }}` to run quck tests after the longer clippy run
- name: Check formatting
if: ${{ !cancelled() }}
run: cargo fmt --all -- --check
# https://github.com/facebookincubator/cargo-guppy/tree/bec4e0eb29dcd1faac70b1b5360267fc02bf830e/tools/cargo-hakari#2-keep-the-workspace-hack-up-to-date-in-ci
- name: Check rust dependencies
if: ${{ !cancelled() }}
run: |
cargo hakari generate --diff # workspace-hack Cargo.toml is up-to-date
cargo hakari manage-deps --dry-run # all workspace crates depend on workspace-hack
build-neon:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
@@ -160,10 +79,12 @@ jobs:
- name: Set pg 14 revision for caching
id: pg_v14_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
shell: bash -euxo pipefail {0}
- name: Set pg 15 revision for caching
id: pg_v15_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
shell: bash -euxo pipefail {0}
# Set some environment variables used by all the steps.
#
@@ -180,15 +101,16 @@ jobs:
if [[ $BUILD_TYPE == "debug" ]]; then
cov_prefix="scripts/coverage --profraw-prefix=$GITHUB_JOB --dir=/tmp/coverage run"
CARGO_FEATURES="--features testing"
CARGO_FLAGS="--locked $CARGO_FEATURES"
CARGO_FLAGS="--locked --timings $CARGO_FEATURES"
elif [[ $BUILD_TYPE == "release" ]]; then
cov_prefix=""
CARGO_FEATURES="--features testing,profiling"
CARGO_FLAGS="--locked --release $CARGO_FEATURES"
CARGO_FLAGS="--locked --timings --release $CARGO_FEATURES"
fi
echo "cov_prefix=${cov_prefix}" >> $GITHUB_ENV
echo "CARGO_FEATURES=${CARGO_FEATURES}" >> $GITHUB_ENV
echo "CARGO_FLAGS=${CARGO_FLAGS}" >> $GITHUB_ENV
shell: bash -euxo pipefail {0}
# Don't include the ~/.cargo/registry/src directory. It contains just
# uncompressed versions of the crates in ~/.cargo/registry/cache
@@ -205,8 +127,8 @@ jobs:
target/
# Fall back to older versions of the key, if no cache for current Cargo.lock was found
key: |
v1-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ hashFiles('rust-toolchain.toml') }}-${{ hashFiles('Cargo.lock') }}
v1-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ hashFiles('rust-toolchain.toml') }}-
v10-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ hashFiles('Cargo.lock') }}
v10-${{ runner.os }}-${{ matrix.build_type }}-cargo-
- name: Cache postgres v14 build
id: cache_pg_14
@@ -225,21 +147,26 @@ jobs:
- name: Build postgres v14
if: steps.cache_pg_14.outputs.cache-hit != 'true'
run: mold -run make postgres-v14 -j$(nproc)
shell: bash -euxo pipefail {0}
- name: Build postgres v15
if: steps.cache_pg_15.outputs.cache-hit != 'true'
run: mold -run make postgres-v15 -j$(nproc)
shell: bash -euxo pipefail {0}
- name: Build neon extensions
run: mold -run make neon-pg-ext -j$(nproc)
shell: bash -euxo pipefail {0}
- name: Run cargo build
run: |
${cov_prefix} mold -run cargo build $CARGO_FLAGS --bins --tests
shell: bash -euxo pipefail {0}
- name: Run cargo test
run: |
${cov_prefix} cargo test $CARGO_FLAGS
shell: bash -euxo pipefail {0}
- name: Install rust binaries
run: |
@@ -280,9 +207,11 @@ jobs:
echo "/tmp/neon/bin/$bin" >> /tmp/coverage/binaries.list
done
fi
shell: bash -euxo pipefail {0}
- name: Install postgres binaries
run: cp -a pg_install /tmp/neon/pg_install
shell: bash -euxo pipefail {0}
- name: Upload Neon artifact
uses: ./.github/actions/upload
@@ -290,13 +219,24 @@ jobs:
name: neon-${{ runner.os }}-${{ matrix.build_type }}-artifact
path: /tmp/neon
- name: Prepare cargo build timing stats for storing
run: |
mkdir -p "/tmp/neon/cargo-timings/$BUILD_TYPE/"
cp -r ./target/cargo-timings/* "/tmp/neon/cargo-timings/$BUILD_TYPE/"
shell: bash -euxo pipefail {0}
- name: Upload cargo build stats
uses: ./.github/actions/upload
with:
name: neon-${{ runner.os }}-${{ matrix.build_type }}-build-stats
path: /tmp/neon/cargo-timings/
# XXX: keep this after the binaries.list is formed, so the coverage can properly work later
- name: Merge and upload coverage data
if: matrix.build_type == 'debug'
uses: ./.github/actions/save-coverage-data
regress-tests:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
@@ -310,7 +250,7 @@ jobs:
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 1
fetch-depth: 2
- name: Pytest regression tests
uses: ./.github/actions/run-python-test-set
@@ -328,8 +268,34 @@ jobs:
if: matrix.build_type == 'debug'
uses: ./.github/actions/save-coverage-data
upload-latest-artifacts:
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
needs: [ regress-tests ]
if: github.ref_name == 'main'
steps:
- name: Copy Neon artifact to the latest directory
shell: bash -euxo pipefail {0}
env:
BUCKET: neon-github-public-dev
PREFIX: artifacts/${{ github.run_id }}
run: |
for build_type in debug release; do
FILENAME=neon-${{ runner.os }}-${build_type}-artifact.tar.zst
S3_KEY=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${PREFIX} | jq -r '.Contents[].Key' | grep ${FILENAME} | sort --version-sort | tail -1 || true)
if [ -z "${S3_KEY}" ]; then
echo 2>&1 "Neither s3://${BUCKET}/${PREFIX}/${FILENAME} nor its version from previous attempts exist"
exit 1
fi
time aws s3 cp --only-show-errors s3://${BUCKET}/${S3_KEY} s3://${BUCKET}/artifacts/latest/${FILENAME}
done
benchmarks:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
@@ -344,7 +310,7 @@ jobs:
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 1
fetch-depth: 2
- name: Pytest benchmarks
uses: ./.github/actions/run-python-test-set
@@ -360,12 +326,12 @@ jobs:
# while coverage is currently collected for the debug ones
merge-allure-report:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
needs: [ regress-tests, benchmarks ]
if: ${{ !cancelled() }}
if: always()
strategy:
fail-fast: false
matrix:
@@ -390,6 +356,7 @@ jobs:
SHA: ${{ github.event.pull_request.head.sha || github.sha }}
REPORT_URL: ${{ steps.create-allure-report.outputs.report-url }}
TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR }}
shell: bash -euxo pipefail {0}
run: |
curl --fail --output suites.json ${REPORT_URL%/index.html}/data/suites.json
./scripts/pysync
@@ -397,7 +364,7 @@ jobs:
DATABASE_URL="$TEST_RESULT_CONNSTR" poetry run python3 scripts/ingest_regress_test_result.py --revision ${SHA} --reference ${GITHUB_REF} --build-type ${BUILD_TYPE} --ingest suites.json
coverage-report:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
@@ -422,7 +389,7 @@ jobs:
!~/.cargo/registry/src
~/.cargo/git/
target/
key: v1-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ hashFiles('rust-toolchain.toml') }}-${{ hashFiles('Cargo.lock') }}
key: v10-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ hashFiles('Cargo.lock') }}
- name: Get Neon artifact
uses: ./.github/actions/download
@@ -438,6 +405,7 @@ jobs:
- name: Merge coverage data
run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
shell: bash -euxo pipefail {0}
- name: Build and upload coverage report
run: |
@@ -470,21 +438,18 @@ jobs:
\"description\": \"Coverage report is ready\",
\"target_url\": \"$REPORT_URL\"
}"
shell: bash -euxo pipefail {0}
trigger-e2e-tests:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
options: --init
needs: [ push-docker-hub, tag ]
needs: [ build-neon ]
steps:
- name: Set PR's status to pending and request a remote CI test
run: |
# For pull requests, GH Actions set "github.sha" variable to point at a fake merge commit
# but we need to use a real sha of a latest commit in the PR's branch for the e2e job,
# to place a job run status update later.
COMMIT_SHA=${{ github.event.pull_request.head.sha }}
# For non-PR kinds of runs, the above will produce an empty variable, pick the original sha value for those
COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}
REMOTE_REPO="${{ github.repository_owner }}/cloud"
@@ -510,19 +475,14 @@ jobs:
\"inputs\": {
\"ci_job_name\": \"neon-cloud-e2e\",
\"commit_hash\": \"$COMMIT_SHA\",
\"remote_repo\": \"${{ github.repository }}\",
\"storage_image_tag\": \"${{ needs.tag.outputs.build-tag }}\",
\"compute_image_tag\": \"${{ needs.tag.outputs.build-tag }}\"
\"remote_repo\": \"${{ github.repository }}\"
}
}"
neon-image:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
needs: [ tag ]
container: gcr.io/kaniko-project/executor:v1.9.0-debug
defaults:
run:
shell: sh -eu {0}
steps:
- name: Checkout
@@ -538,12 +498,9 @@ jobs:
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}}
compute-tools-image:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
needs: [ tag ]
container: gcr.io/kaniko-project/executor:v1.9.0-debug
defaults:
run:
shell: sh -eu {0}
steps:
- name: Checkout
@@ -555,14 +512,30 @@ jobs:
- name: Kaniko build compute tools
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-tools --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}}
compute-node-image-v14:
runs-on: [ self-hosted, dev, x64 ]
compute-node-image:
runs-on: dev
container: gcr.io/kaniko-project/executor:v1.9.0-debug
needs: [ tag ]
defaults:
run:
shell: sh -eu {0}
steps:
- name: Checkout
uses: actions/checkout@v1 # v3 won't work with kaniko
with:
submodules: true
fetch-depth: 0
- name: Configure ECR login
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
# compute-node uses postgres 14, which is default now
# cloud repo depends on this image name, thus duplicating it
# remove compute-node when cloud repo is updated
- name: Kaniko build compute node with extensions v14 (compatibility)
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:${{needs.tag.outputs.build-tag}}
compute-node-image-v14:
runs-on: dev
container: gcr.io/kaniko-project/executor:v1.9.0-debug
needs: [ tag ]
steps:
- name: Checkout
uses: actions/checkout@v1 # v3 won't work with kaniko
@@ -576,14 +549,11 @@ jobs:
- name: Kaniko build compute node with extensions v14
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}}
compute-node-image-v15:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
container: gcr.io/kaniko-project/executor:v1.9.0-debug
needs: [ tag ]
defaults:
run:
shell: sh -eu {0}
steps:
- name: Checkout
uses: actions/checkout@v1 # v3 won't work with kaniko
@@ -597,58 +567,18 @@ jobs:
- name: Kaniko build compute node with extensions v15
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v15 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}}
test-images:
needs: [ tag, neon-image, compute-node-image-v14, compute-node-image-v15, compute-tools-image ]
runs-on: [ self-hosted, dev, x64 ]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
# `neondatabase/neon` contains multiple binaries, all of them use the same input for the version into the same version formatting library.
# Pick pageserver as currently the only binary with extra "version" features printed in the string to verify.
# Regular pageserver version string looks like
# Neon page server git-env:32d14403bd6ab4f4520a94cbfd81a6acef7a526c failpoints: true, features: []
# Bad versions might loop like:
# Neon page server git-env:local failpoints: true, features: ["testing"]
# Ensure that we don't have bad versions.
- name: Verify image versions
shell: bash # ensure no set -e for better error messages
run: |
pageserver_version=$(docker run --rm 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} "/bin/sh" "-c" "/usr/local/bin/pageserver --version")
echo "Pageserver version string: $pageserver_version"
if ! echo "$pageserver_version" | grep -qv 'git-env:local' ; then
echo "Pageserver version should not be the default Dockerfile one"
exit 1
fi
if ! echo "$pageserver_version" | grep -qv '"testing"' ; then
echo "Pageserver version should have no testing feature enabled"
exit 1
fi
- name: Verify docker-compose example
run: env REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com TAG=${{needs.tag.outputs.build-tag}} ./docker-compose/docker_compose_test.sh
- name: Print logs and clean up
if: always()
run: |
docker compose -f ./docker-compose/docker-compose.yml logs || 0
docker compose -f ./docker-compose/docker-compose.yml down
promote-images:
runs-on: [ self-hosted, dev, x64 ]
needs: [ tag, test-images ]
runs-on: dev
needs: [ tag, neon-image, compute-node-image, compute-node-image-v14, compute-node-image-v15, compute-tools-image ]
if: github.event_name != 'workflow_dispatch'
container: amazon/aws-cli
strategy:
fail-fast: false
matrix:
name: [ neon, compute-node-v14, compute-node-v15, compute-tools ]
# compute-node uses postgres 14, which is default now
# cloud repo depends on this image name, thus duplicating it
# remove compute-node when cloud repo is updated
name: [ neon, compute-node, compute-node-v14, compute-node-v15, compute-tools ]
steps:
- name: Promote image to latest
@@ -657,7 +587,7 @@ jobs:
aws ecr put-image --repository-name ${{ matrix.name }} --image-tag latest --image-manifest "$MANIFEST"
push-docker-hub:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
needs: [ promote-images, tag ]
container: golang:1.19-bullseye
@@ -678,6 +608,9 @@ jobs:
- name: Pull compute tools image from ECR
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} compute-tools
- name: Pull compute node image from ECR
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:${{needs.tag.outputs.build-tag}} compute-node
- name: Pull compute node v14 image from ECR
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} compute-node-v14
@@ -694,6 +627,7 @@ jobs:
run: |
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/neon:latest
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:latest
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node:latest
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:latest
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:latest
@@ -709,6 +643,9 @@ jobs:
- name: Push compute tools image to Docker Hub
run: crane push compute-tools neondatabase/compute-tools:${{needs.tag.outputs.build-tag}}
- name: Push compute node image to Docker Hub
run: crane push compute-node neondatabase/compute-node:${{needs.tag.outputs.build-tag}}
- name: Push compute node v14 image to Docker Hub
run: crane push compute-node-v14 neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}}
@@ -725,11 +662,12 @@ jobs:
run: |
crane tag neondatabase/neon:${{needs.tag.outputs.build-tag}} latest
crane tag neondatabase/compute-tools:${{needs.tag.outputs.build-tag}} latest
crane tag neondatabase/compute-node:${{needs.tag.outputs.build-tag}} latest
crane tag neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}} latest
crane tag neondatabase/compute-node-v15:${{needs.tag.outputs.build-tag}} latest
calculate-deploy-targets:
runs-on: [ self-hosted, dev, x64 ]
runs-on: [ self-hosted, Linux, k8s-runner ]
if: |
(github.ref_name == 'main' || github.ref_name == 'release') &&
github.event_name != 'workflow_dispatch'
@@ -739,11 +677,11 @@ jobs:
- id: set-matrix
run: |
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
STAGING='{"env_name": "staging", "proxy_job": "neon-proxy", "proxy_config": "staging.proxy", "storage_broker_ns": "neon-storage-broker", "storage_broker_config": "staging.neon-storage-broker", "kubeconfig_secret": "STAGING_KUBECONFIG_DATA", "console_api_key_secret": "NEON_STAGING_API_KEY"}'
NEON_STRESS='{"env_name": "neon-stress", "proxy_job": "neon-stress-proxy", "proxy_config": "neon-stress.proxy", "storage_broker_ns": "neon-stress-storage-broker", "storage_broker_config": "neon-stress.neon-storage-broker", "kubeconfig_secret": "NEON_STRESS_KUBECONFIG_DATA", "console_api_key_secret": "NEON_CAPTEST_API_KEY"}'
STAGING='{"env_name": "staging", "proxy_job": "neon-proxy", "proxy_config": "staging.proxy", "kubeconfig_secret": "STAGING_KUBECONFIG_DATA", "console_api_key_secret": "NEON_STAGING_API_KEY"}'
NEON_STRESS='{"env_name": "neon-stress", "proxy_job": "neon-stress-proxy", "proxy_config": "neon-stress.proxy", "kubeconfig_secret": "NEON_STRESS_KUBECONFIG_DATA", "console_api_key_secret": "NEON_CAPTEST_API_KEY"}'
echo "include=[$STAGING, $NEON_STRESS]" >> $GITHUB_OUTPUT
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
PRODUCTION='{"env_name": "production", "proxy_job": "neon-proxy", "proxy_config": "production.proxy", "storage_broker_ns": "neon-storage-broker", "storage_broker_config": "production.neon-storage-broker", "kubeconfig_secret": "PRODUCTION_KUBECONFIG_DATA", "console_api_key_secret": "NEON_PRODUCTION_API_KEY"}'
PRODUCTION='{"env_name": "production", "proxy_job": "neon-proxy", "proxy_config": "production.proxy", "kubeconfig_secret": "PRODUCTION_KUBECONFIG_DATA", "console_api_key_secret": "NEON_PRODUCTION_API_KEY"}'
echo "include=[$PRODUCTION]" >> $GITHUB_OUTPUT
else
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
@@ -751,8 +689,8 @@ jobs:
fi
deploy:
runs-on: [ self-hosted, dev, x64 ]
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
runs-on: [ self-hosted, Linux, k8s-runner ]
#container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
# We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
# If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
@@ -803,11 +741,11 @@ jobs:
ssh-add ssh-key
rm -f ssh-key ssh-key-cert.pub
ansible-galaxy collection install sivel.toiletwater
ansible-playbook deploy.yaml -i ${{ matrix.env_name }}.hosts.yaml -e CONSOLE_API_TOKEN=${{ secrets[matrix.console_api_key_secret] }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
ansible-playbook deploy.yaml -i ${{ matrix.env_name }}.hosts.yaml -e CONSOLE_API_TOKEN=${{ secrets[matrix.console_api_key_secret] }}
rm -f neon_install.tar.gz .neon_current_version
deploy-new:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
# We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
# If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
@@ -832,6 +770,7 @@ jobs:
run: |
export DOCKER_TAG=${{needs.tag.outputs.build-tag}}
cd "$(pwd)/.github/ansible"
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
./get_binaries.sh
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
@@ -840,41 +779,9 @@ jobs:
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
exit 1
fi
ansible-galaxy collection install sivel.toiletwater
ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_STAGING_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
rm -f neon_install.tar.gz .neon_current_version
deploy-pr-test-new:
runs-on: [ self-hosted, dev, x64 ]
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
# We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
# If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
needs: [ push-docker-hub, tag, regress-tests ]
if: |
contains(github.event.pull_request.labels.*.name, 'deploy-test-storage') &&
github.event_name != 'workflow_dispatch'
defaults:
run:
shell: bash
strategy:
matrix:
target_region: [ eu-west-1 ]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
- name: Redeploy
run: |
export DOCKER_TAG=${{needs.tag.outputs.build-tag}}
cd "$(pwd)/.github/ansible"
./get_binaries.sh
ansible-galaxy collection install sivel.toiletwater
ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_STAGING_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_STAGING_API_KEY}}
rm -f neon_install.tar.gz .neon_current_version
deploy-prod-new:
@@ -882,7 +789,7 @@ jobs:
container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
# We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
# If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
needs: [ push-docker-hub, tag, regress-tests ]
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
if: |
(github.ref_name == 'release') &&
github.event_name != 'workflow_dispatch'
@@ -914,12 +821,12 @@ jobs:
fi
ansible-galaxy collection install sivel.toiletwater
ansible-playbook deploy.yaml -i prod.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_PRODUCTION_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
ansible-playbook deploy.yaml -i prod.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_PRODUCTION_API_KEY}}
rm -f neon_install.tar.gz .neon_current_version
deploy-proxy:
runs-on: [ self-hosted, dev, x64 ]
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
runs-on: dev
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
if: |
@@ -956,55 +863,14 @@ jobs:
- name: Re-deploy proxy
run: |
DOCKER_TAG=${{needs.tag.outputs.build-tag}}
helm upgrade ${{ matrix.proxy_job }} neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
helm upgrade ${{ matrix.proxy_job }} neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
deploy-storage-broker:
name: deploy storage broker on old staging and old prod
runs-on: [ self-hosted, dev, x64 ]
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
deploy-proxy-new:
runs-on: dev
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
if: |
(github.ref_name == 'main' || github.ref_name == 'release') &&
github.event_name != 'workflow_dispatch'
defaults:
run:
shell: bash
strategy:
matrix:
include: ${{fromJSON(needs.calculate-deploy-targets.outputs.matrix-include)}}
env:
KUBECONFIG: .kubeconfig
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
- name: Add curl
run: apt update && apt install curl -y
- name: Store kubeconfig file
run: |
echo "${{ secrets[matrix.kubeconfig_secret] }}" | base64 --decode > ${KUBECONFIG}
chmod 0600 ${KUBECONFIG}
- name: Setup helm v3
run: |
curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
helm repo add neondatabase https://neondatabase.github.io/helm-charts
- name: Deploy storage-broker
run:
helm upgrade neon-storage-broker neondatabase/neon-storage-broker --namespace ${{ matrix.storage_broker_ns }} --create-namespace --install --atomic -f .github/helm-values/${{ matrix.storage_broker_config }}.yaml --set image.tag=${{ needs.tag.outputs.build-tag }} --set settings.sentryUrl=${{ secrets.SENTRY_URL_BROKER }} --wait --timeout 5m0s
deploy-proxy-new:
runs-on: [ self-hosted, dev, x64 ]
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
needs: [ push-docker-hub, tag, regress-tests ]
if: |
(github.ref_name == 'main') &&
github.event_name != 'workflow_dispatch'
@@ -1016,12 +882,6 @@ jobs:
include:
- target_region: us-east-2
target_cluster: dev-us-east-2-beta
deploy_link_proxy: true
deploy_legacy_scram_proxy: true
- target_region: eu-west-1
target_cluster: dev-eu-west-1-zeta
deploy_link_proxy: false
deploy_legacy_scram_proxy: false
steps:
- name: Checkout
uses: actions/checkout@v3
@@ -1034,62 +894,16 @@ jobs:
helm repo add neondatabase https://neondatabase.github.io/helm-charts
aws --region ${{ matrix.target_region }} eks update-kubeconfig --name ${{ matrix.target_cluster }}
- name: Re-deploy scram proxy
- name: Re-deploy proxy
run: |
DOCKER_TAG=${{needs.tag.outputs.build-tag}}
helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
- name: Re-deploy link proxy
if: matrix.deploy_link_proxy
run: |
DOCKER_TAG=${{needs.tag.outputs.build-tag}}
helm upgrade neon-proxy-link neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-link.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
- name: Re-deploy legacy scram proxy
if: matrix.deploy_legacy_scram_proxy
run: |
DOCKER_TAG=${{needs.tag.outputs.build-tag}}
helm upgrade neon-proxy-scram-legacy neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram-legacy.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
deploy-storage-broker-dev-new:
runs-on: [ self-hosted, dev, x64 ]
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
needs: [ push-docker-hub, tag, regress-tests ]
if: |
(github.ref_name == 'main') &&
github.event_name != 'workflow_dispatch'
defaults:
run:
shell: bash
strategy:
matrix:
include:
- target_region: us-east-2
target_cluster: dev-us-east-2-beta
- target_region: eu-west-1
target_cluster: dev-eu-west-1-zeta
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
- name: Configure environment
run: |
helm repo add neondatabase https://neondatabase.github.io/helm-charts
aws --region ${{ matrix.target_region }} eks update-kubeconfig --name ${{ matrix.target_cluster }}
- name: Deploy storage-broker
run:
helm upgrade neon-storage-broker neondatabase/neon-storage-broker --namespace neon-storage-broker --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-storage-broker.yaml --set image.tag=${{ needs.tag.outputs.build-tag }} --set settings.sentryUrl=${{ secrets.SENTRY_URL_BROKER }} --wait --timeout 5m0s
helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
deploy-proxy-prod-new:
runs-on: prod
container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
needs: [ push-docker-hub, tag, regress-tests ]
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
if: |
(github.ref_name == 'release') &&
github.event_name != 'workflow_dispatch'
@@ -1101,8 +915,6 @@ jobs:
include:
- target_region: us-east-2
target_cluster: prod-us-east-2-delta
- target_region: us-west-2
target_cluster: prod-us-west-2-eta
- target_region: eu-central-1
target_cluster: prod-eu-central-1-gamma
- target_region: ap-southeast-1
@@ -1122,48 +934,10 @@ jobs:
- name: Re-deploy proxy
run: |
DOCKER_TAG=${{needs.tag.outputs.build-tag}}
helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
deploy-storage-broker-prod-new:
runs-on: prod
container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
needs: [ push-docker-hub, tag, regress-tests ]
if: |
(github.ref_name == 'release') &&
github.event_name != 'workflow_dispatch'
defaults:
run:
shell: bash
strategy:
matrix:
include:
- target_region: us-east-2
target_cluster: prod-us-east-2-delta
- target_region: us-west-2
target_cluster: prod-us-west-2-eta
- target_region: eu-central-1
target_cluster: prod-eu-central-1-gamma
- target_region: ap-southeast-1
target_cluster: prod-ap-southeast-1-epsilon
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
- name: Configure environment
run: |
helm repo add neondatabase https://neondatabase.github.io/helm-charts
aws --region ${{ matrix.target_region }} eks update-kubeconfig --name ${{ matrix.target_cluster }}
- name: Deploy storage-broker
run:
helm upgrade neon-storage-broker neondatabase/neon-storage-broker --namespace neon-storage-broker --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-storage-broker.yaml --set image.tag=${{ needs.tag.outputs.build-tag }} --set settings.sentryUrl=${{ secrets.SENTRY_URL_BROKER }} --wait --timeout 5m0s
promote-compatibility-data:
runs-on: [ self-hosted, dev, x64 ]
promote-compatibility-test-snapshot:
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
@@ -1171,28 +945,14 @@ jobs:
if: github.ref_name == 'release' && github.event_name != 'workflow_dispatch'
steps:
- name: Promote compatibility snapshot for the release
shell: bash -euxo pipefail {0}
env:
BUCKET: neon-github-public-dev
PREFIX: artifacts/latest
run: |
# Update compatibility snapshot for the release
for build_type in debug release; do
OLD_FILENAME=compatibility-snapshot-${build_type}-pg14-${GITHUB_RUN_ID}.tar.zst
NEW_FILENAME=compatibility-snapshot-${build_type}-pg14.tar.zst
time aws s3 mv --only-show-errors s3://${BUCKET}/${PREFIX}/${OLD_FILENAME} s3://${BUCKET}/${PREFIX}/${NEW_FILENAME}
done
# Update Neon artifact for the release (reuse already uploaded artifact)
for build_type in debug release; do
OLD_PREFIX=artifacts/${GITHUB_RUN_ID}
FILENAME=neon-${{ runner.os }}-${build_type}-artifact.tar.zst
S3_KEY=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${OLD_PREFIX} | jq -r '.Contents[].Key' | grep ${FILENAME} | sort --version-sort | tail -1 || true)
if [ -z "${S3_KEY}" ]; then
echo 2>&1 "Neither s3://${BUCKET}/${OLD_PREFIX}/${FILENAME} nor its version from previous attempts exist"
exit 1
fi
time aws s3 cp --only-show-errors s3://${BUCKET}/${S3_KEY} s3://${BUCKET}/${PREFIX}/${FILENAME}
done

166
.github/workflows/codestyle.yml vendored Normal file
View File

@@ -0,0 +1,166 @@
name: Check code style and build
on:
push:
branches:
- main
pull_request:
defaults:
run:
shell: bash -euxo pipefail {0}
concurrency:
# Allow only one workflow per any non-`main` branch.
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
cancel-in-progress: true
env:
RUST_BACKTRACE: 1
COPT: '-Werror'
jobs:
check-codestyle-rust:
strategy:
fail-fast: false
matrix:
# XXX: both OSes have rustup
# * https://github.com/actions/runner-images/blob/main/images/macos/macos-12-Readme.md#rust-tools
# * https://github.com/actions/runner-images/blob/main/images/linux/Ubuntu2204-Readme.md#rust-tools
# this is all we need to install our toolchain later via rust-toolchain.toml
# so don't install any toolchain explicitly.
os: [ubuntu-latest, macos-latest]
timeout-minutes: 90
name: check codestyle rust and postgres
runs-on: ${{ matrix.os }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 2
- name: Check formatting
run: cargo fmt --all -- --check
- name: Install Ubuntu postgres dependencies
if: matrix.os == 'ubuntu-latest'
run: |
sudo apt update
sudo apt install build-essential libreadline-dev zlib1g-dev flex bison libseccomp-dev libssl-dev
- name: Install macOS postgres dependencies
if: matrix.os == 'macos-latest'
run: brew install flex bison openssl
- name: Set pg 14 revision for caching
id: pg_v14_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
shell: bash -euxo pipefail {0}
- name: Set pg 15 revision for caching
id: pg_v15_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
shell: bash -euxo pipefail {0}
- name: Cache postgres v14 build
id: cache_pg_14
uses: actions/cache@v3
with:
path: pg_install/v14
key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Cache postgres v15 build
id: cache_pg_15
uses: actions/cache@v3
with:
path: pg_install/v15
key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Set extra env for macOS
if: matrix.os == 'macos-latest'
run: |
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
- name: Build postgres v14
if: steps.cache_pg_14.outputs.cache-hit != 'true'
run: make postgres-v14
shell: bash -euxo pipefail {0}
- name: Build postgres v15
if: steps.cache_pg_15.outputs.cache-hit != 'true'
run: make postgres-v15
shell: bash -euxo pipefail {0}
- name: Build neon extensions
run: make neon-pg-ext
- name: Cache cargo deps
id: cache_cargo
uses: actions/cache@v3
with:
path: |
~/.cargo/registry
!~/.cargo/registry/src
~/.cargo/git
target
key: v6-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust
- name: Run cargo clippy
run: ./run_clippy.sh
- name: Ensure all project builds
run: cargo build --locked --all --all-targets
check-rust-dependencies:
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: false
fetch-depth: 1
# https://github.com/facebookincubator/cargo-guppy/tree/bec4e0eb29dcd1faac70b1b5360267fc02bf830e/tools/cargo-hakari#2-keep-the-workspace-hack-up-to-date-in-ci
- name: Check every project module is covered by Hakari
run: |
cargo hakari generate --diff # workspace-hack Cargo.toml is up-to-date
cargo hakari manage-deps --dry-run # all workspace crates depend on workspace-hack
shell: bash -euxo pipefail {0}
check-codestyle-python:
runs-on: [ self-hosted, Linux, k8s-runner ]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: false
fetch-depth: 1
- name: Cache poetry deps
id: cache_poetry
uses: actions/cache@v3
with:
path: ~/.cache/pypoetry/virtualenvs
key: v1-codestyle-python-deps-${{ hashFiles('poetry.lock') }}
- name: Install Python deps
run: ./scripts/pysync
- name: Run isort to ensure code format
run: poetry run isort --diff --check .
- name: Run black to ensure code format
run: poetry run black --diff --check .
- name: Run flake8 to ensure code format
run: poetry run flake8 .
- name: Run mypy to check types
run: poetry run mypy .

View File

@@ -1,128 +0,0 @@
name: Check neon with extra platform builds
on:
push:
branches:
- main
defaults:
run:
shell: bash -euxo pipefail {0}
concurrency:
# Allow only one workflow per any non-`main` branch.
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
cancel-in-progress: true
env:
RUST_BACKTRACE: 1
COPT: '-Werror'
jobs:
check-macos-build:
timeout-minutes: 90
runs-on: macos-latest
env:
# Use release build only, to have less debug info around
# Hence keeping target/ (and general cache size) smaller
BUILD_TYPE: release
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 1
- name: Install macOS postgres dependencies
run: brew install flex bison openssl protobuf
- name: Set pg 14 revision for caching
id: pg_v14_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
- name: Set pg 15 revision for caching
id: pg_v15_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
- name: Cache postgres v14 build
id: cache_pg_14
uses: actions/cache@v3
with:
path: pg_install/v14
key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Cache postgres v15 build
id: cache_pg_15
uses: actions/cache@v3
with:
path: pg_install/v15
key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Set extra env for macOS
run: |
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
- name: Cache cargo deps
uses: actions/cache@v3
with:
path: |
~/.cargo/registry
!~/.cargo/registry/src
~/.cargo/git
target
key: v1-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust
- name: Build postgres v14
if: steps.cache_pg_14.outputs.cache-hit != 'true'
run: make postgres-v14 -j$(nproc)
- name: Build postgres v15
if: steps.cache_pg_15.outputs.cache-hit != 'true'
run: make postgres-v15 -j$(nproc)
- name: Build neon extensions
run: make neon-pg-ext -j$(nproc)
- name: Run cargo build
run: cargo build --all --release
- name: Check that no warnings are produced
run: ./run_clippy.sh
gather-rust-build-stats:
timeout-minutes: 90
runs-on: ubuntu-latest
env:
BUILD_TYPE: release
# build with incremental compilation produce partial results
# so do not attempt to cache this build, also disable the incremental compilation
CARGO_INCREMENTAL: 0
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 1
- name: Install Ubuntu postgres dependencies
run: |
sudo apt update
sudo apt install build-essential libreadline-dev zlib1g-dev flex bison libseccomp-dev libssl-dev protobuf-compiler
# Some of our rust modules use FFI and need those to be checked
- name: Get postgres headers
run: make postgres-headers -j$(nproc)
- name: Produce the build stats
run: cargo build --all --release --timings
- name: Upload the build stats
uses: actions/upload-artifact@v3
with:
name: neon-${{ runner.os }}-release-build-stats
path: ./target/cargo-timings/

View File

@@ -23,7 +23,6 @@ jobs:
runs-on: [ ubuntu-latest ]
env:
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
steps:
@@ -52,8 +51,8 @@ jobs:
id: create-neon-project
uses: ./.github/actions/neon-project-create
with:
environment: staging
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
- name: Run pytest
env:
@@ -64,7 +63,7 @@ jobs:
run: |
# Test framework expects we have psql binary;
# but since we don't really need it in this test, let's mock it
mkdir -p "$POSTGRES_DISTRIB_DIR/v${DEFAULT_PG_VERSION}/bin" && touch "$POSTGRES_DISTRIB_DIR/v${DEFAULT_PG_VERSION}/bin/psql";
mkdir -p "$POSTGRES_DISTRIB_DIR/v14/bin" && touch "$POSTGRES_DISTRIB_DIR/v14/bin/psql";
./scripts/pytest \
--junitxml=$TEST_OUTPUT/junit.xml \
--tb=short \
@@ -76,6 +75,7 @@ jobs:
if: ${{ always() }}
uses: ./.github/actions/neon-project-delete
with:
environment: staging
project_id: ${{ steps.create-neon-project.outputs.project_id }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}

2
.gitmodules vendored
View File

@@ -1,7 +1,7 @@
[submodule "vendor/postgres-v14"]
path = vendor/postgres-v14
url = https://github.com/neondatabase/postgres.git
branch = REL_14_STABLE_neon
branch = main
[submodule "vendor/postgres-v15"]
path = vendor/postgres-v15
url = https://github.com/neondatabase/postgres.git

View File

@@ -1,11 +0,0 @@
/compute_tools/ @neondatabase/control-plane
/control_plane/ @neondatabase/compute @neondatabase/storage
/libs/pageserver_api/ @neondatabase/compute @neondatabase/storage
/libs/postgres_ffi/ @neondatabase/compute
/libs/remote_storage/ @neondatabase/storage
/libs/safekeeper_api/ @neondatabase/safekeepers
/pageserver/ @neondatabase/compute @neondatabase/storage
/pgxn/ @neondatabase/compute
/proxy/ @neondatabase/control-plane
/safekeeper/ @neondatabase/safekeepers
/vendor/ @neondatabase/compute

1642
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -16,7 +16,6 @@ members = [
"pageserver",
"proxy",
"safekeeper",
"storage_broker",
"workspace_hack",
"libs/*",
]
@@ -26,10 +25,6 @@ members = [
# Besides, debug info should not affect the performance.
debug = true
# disable debug symbols for all packages except this one to decrease binaries size
[profile.release.package."*"]
debug = false
[profile.release-line-debug]
inherits = "release"
debug = 1 # true = 2 = all symbols, 1 = line only

View File

@@ -44,7 +44,7 @@ COPY . .
# Show build caching stats to check if it was used in the end.
# Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
RUN set -e \
&& mold -run cargo build --bin pageserver --bin pageserver_binutils --bin draw_timeline_dir --bin safekeeper --bin storage_broker --bin proxy --locked --release \
&& mold -run cargo build --bin pageserver --bin pageserver_binutils --bin draw_timeline_dir --bin safekeeper --bin proxy --locked --release \
&& cachepot -s
# Build final image
@@ -67,7 +67,6 @@ COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver
COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver_binutils /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/draw_timeline_dir /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/safekeeper /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_broker /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy /usr/local/bin
COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/
@@ -79,7 +78,7 @@ COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/
RUN mkdir -p /data/.neon/ && chown -R neon:neon /data/.neon/ \
&& /usr/local/bin/pageserver -D /data/.neon/ --init \
-c "id=1234" \
-c "broker_endpoint='http://storage_broker:50051'" \
-c "broker_endpoints=['http://etcd:2379']" \
-c "pg_distrib_dir='/usr/local/'" \
-c "listen_pg_addr='0.0.0.0:6400'" \
-c "listen_http_addr='0.0.0.0:9898'"

View File

@@ -200,6 +200,9 @@ COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-deb
# libreadline8 for psql
# libossp-uuid16 for extension ossp-uuid
# libgeos, libgdal, libproj and libprotobuf-c1 for PostGIS
#
# Lastly, link compute_ctl into zenith_ctl while we're at it,
# so that we don't need to put this in another layer.
RUN apt update && \
apt install --no-install-recommends -y \
libreadline8 \
@@ -208,7 +211,8 @@ RUN apt update && \
libgdal28 \
libproj19 \
libprotobuf-c1 && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
ln /usr/local/bin/compute_ctl /usr/local/bin/zenith_ctl
USER postgres
ENTRYPOINT ["/usr/local/bin/compute_ctl"]

View File

@@ -200,6 +200,9 @@ COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-deb
# libreadline8 for psql
# libossp-uuid16 for extension ossp-uuid
# libgeos, libgdal, libproj and libprotobuf-c1 for PostGIS
#
# Lastly, link compute_ctl into zenith_ctl while we're at it,
# so that we don't need to put this in another layer.
RUN apt update && \
apt install --no-install-recommends -y \
libreadline8 \
@@ -208,7 +211,8 @@ RUN apt update && \
libgdal28 \
libproj19 \
libprotobuf-c1 && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
ln /usr/local/bin/compute_ctl /usr/local/bin/zenith_ctl
USER postgres
ENTRYPOINT ["/usr/local/bin/compute_ctl"]

View File

@@ -0,0 +1,88 @@
#
# Legacy version of the Dockerfile for the compute node.
# Used by e2e CI. Building Dockerfile.compute-node will take
# unreasonable ammount of time without v2 runners.
#
# TODO: remove once cloud repo CI is moved to v2 runners.
#
# Allow specifiyng different compute-tools tag and image repo, so we are
# able to use different images
ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
ARG IMAGE=compute-tools
ARG TAG=latest
#
# Image with pre-built tools
#
FROM $REPOSITORY/$IMAGE:$TAG AS compute-deps
# Only to get ready compute_ctl binary as deppendency
#
# Image with Postgres build deps
#
FROM debian:bullseye-slim AS build-deps
RUN apt-get update && apt-get -yq install automake libtool build-essential bison flex libreadline-dev zlib1g-dev libxml2-dev \
libcurl4-openssl-dev libossp-uuid-dev
#
# Image with built Postgres
#
FROM build-deps AS pg-build
# Add user postgres
RUN adduser postgres
RUN mkdir /pg && chown postgres:postgres /pg
# Copy source files
# version 14 is default for now
COPY ./vendor/postgres-v14 /pg/
COPY ./pgxn /pg/
# Build and install Postgres locally
RUN mkdir /pg/compute_build && cd /pg/compute_build && \
../configure CFLAGS='-O2 -g3' --prefix=$(pwd)/postgres_bin --enable-debug --with-uuid=ossp && \
# Install main binaries and contribs
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \
# Install headers
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install
# Install neon contrib
RUN make MAKELEVEL=0 PG_CONFIG=/pg/compute_build/postgres_bin/bin/pg_config -j $(getconf _NPROCESSORS_ONLN) -C /pg/neon install
USER postgres
WORKDIR /pg
#
# Final compute node image to be exported
#
FROM debian:bullseye-slim
# libreadline-dev is required to run psql
RUN apt-get update && apt-get -yq install libreadline-dev libossp-uuid-dev
# Add user postgres
RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
echo "postgres:test_console_pass" | chpasswd && \
mkdir /var/db/postgres/compute && mkdir /var/db/postgres/specs && \
chown -R postgres:postgres /var/db/postgres && \
chmod 0750 /var/db/postgres/compute
# Copy ready Postgres binaries
COPY --from=pg-build /pg/compute_build/postgres_bin /usr/local
# Copy binaries from compute-tools
COPY --from=compute-deps /usr/local/bin/compute_ctl /usr/local/bin/compute_ctl
# XXX: temporary symlink for compatibility with old control-plane
RUN ln -s /usr/local/bin/compute_ctl /usr/local/bin/zenith_ctl
# Add postgres shared objects to the search path
RUN echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig
USER postgres
ENTRYPOINT ["/usr/local/bin/compute_ctl"]

View File

@@ -20,18 +20,18 @@ else
$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
endif
# Seccomp BPF is only available for Linux
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux)
# Seccomp BPF is only available for Linux
PG_CONFIGURE_OPTS += --with-libseccomp
else ifeq ($(UNAME_S),Darwin)
# macOS with brew-installed openssl requires explicit paths
# It can be configured with OPENSSL_PREFIX variable
OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
# macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure
# brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage
EXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/:
endif
# macOS with brew-installed openssl requires explicit paths
# It can be configured with OPENSSL_PREFIX variable
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Darwin)
OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
endif
# Use -C option so that when PostgreSQL "make install" installs the
@@ -73,8 +73,7 @@ $(POSTGRES_INSTALL_DIR)/build/v14/config.status:
+@echo "Configuring Postgres v14 build"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/v14
(cd $(POSTGRES_INSTALL_DIR)/build/v14 && \
env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-v14/configure \
CFLAGS='$(PG_CFLAGS)' \
$(ROOT_PROJECT_DIR)/vendor/postgres-v14/configure CFLAGS='$(PG_CFLAGS)' \
$(PG_CONFIGURE_OPTS) \
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/v14 > configure.log)
@@ -82,8 +81,7 @@ $(POSTGRES_INSTALL_DIR)/build/v15/config.status:
+@echo "Configuring Postgres v15 build"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/v15
(cd $(POSTGRES_INSTALL_DIR)/build/v15 && \
env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-v15/configure \
CFLAGS='$(PG_CFLAGS)' \
$(ROOT_PROJECT_DIR)/vendor/postgres-v15/configure CFLAGS='$(PG_CFLAGS)' \
$(PG_CONFIGURE_OPTS) \
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/v15 > configure.log)
@@ -113,8 +111,6 @@ postgres-v14: postgres-v14-configure \
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14 MAKELEVEL=0 install
+@echo "Compiling libpq v14"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/src/interfaces/libpq install
+@echo "Compiling pg_prewarm v14"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pg_prewarm install
+@echo "Compiling pg_buffercache v14"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pg_buffercache install
+@echo "Compiling pageinspect v14"
@@ -127,8 +123,6 @@ postgres-v15: postgres-v15-configure \
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15 MAKELEVEL=0 install
+@echo "Compiling libpq v15"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/src/interfaces/libpq install
+@echo "Compiling pg_prewarm v15"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pg_prewarm install
+@echo "Compiling pg_buffercache v15"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pg_buffercache install
+@echo "Compiling pageinspect v15"

View File

@@ -2,20 +2,29 @@
Neon is a serverless open-source alternative to AWS Aurora Postgres. It separates storage and compute and substitutes the PostgreSQL storage layer by redistributing data across a cluster of nodes.
The project used to be called "Zenith". Many of the commands and code comments
still refer to "zenith", but we are in the process of renaming things.
## Quick start
Try the [Neon Free Tier](https://neon.tech/docs/introduction/technical-preview-free-tier/) to create a serverless Postgres instance. Then connect to it with your preferred Postgres client (psql, dbeaver, etc) or use the online [SQL Editor](https://neon.tech/docs/get-started-with-neon/query-with-neon-sql-editor/). See [Connect from any application](https://neon.tech/docs/connect/connect-from-any-app/) for connection instructions.
[Join the waitlist](https://neon.tech/) for our free tier to receive your serverless postgres instance. Then connect to it with your preferred postgres client (psql, dbeaver, etc) or use the online SQL editor.
Alternatively, compile and run the project [locally](#running-local-installation).
## Architecture overview
A Neon installation consists of compute nodes and the Neon storage engine. Compute nodes are stateless PostgreSQL nodes backed by the Neon storage engine.
A Neon installation consists of compute nodes and a Neon storage engine.
Compute nodes are stateless PostgreSQL nodes backed by the Neon storage engine.
The Neon storage engine consists of two major components:
- Pageserver. Scalable storage backend for the compute nodes.
- Safekeepers. The safekeepers form a redundant WAL service that received WAL from the compute node, and stores it durably until it has been processed by the pageserver and uploaded to cloud storage.
- WAL service. The service receives WAL from the compute node and ensures that it is stored durably.
See developer documentation in [/docs/SUMMARY.md](/docs/SUMMARY.md) for more information.
Pageserver consists of:
- Repository - Neon storage implementation.
- WAL receiver - service that receives WAL from WAL service and stores it in the repository.
- Page service - service that communicates with compute nodes and responds with pages from the repository.
- WAL redo - service that builds pages from base images and WAL records on Page service request
## Running local installation
@@ -26,12 +35,12 @@ See developer documentation in [/docs/SUMMARY.md](/docs/SUMMARY.md) for more inf
* On Ubuntu or Debian, this set of packages should be sufficient to build the code:
```bash
apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libseccomp-dev \
libssl-dev clang pkg-config libpq-dev cmake postgresql-client protobuf-compiler
libssl-dev clang pkg-config libpq-dev etcd cmake postgresql-client
```
* On Fedora, these packages are needed:
```bash
dnf install flex bison readline-devel zlib-devel openssl-devel \
libseccomp-devel perl clang cmake postgresql postgresql-contrib protobuf-compiler
libseccomp-devel perl clang cmake etcd postgresql postgresql-contrib
```
2. [Install Rust](https://www.rust-lang.org/tools/install)
@@ -44,7 +53,7 @@ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
1. Install XCode and dependencies
```
xcode-select --install
brew install protobuf openssl flex bison
brew install protobuf etcd openssl
```
2. [Install Rust](https://www.rust-lang.org/tools/install)
@@ -116,23 +125,24 @@ Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (r
# Create repository in .neon with proper paths to binaries and data
# Later that would be responsibility of a package install script
> ./target/debug/neon_local init
Starting pageserver at '127.0.0.1:64000' in '.neon'.
pageserver started, pid: 2545906
Successfully initialized timeline de200bd42b49cc1814412c7e592dd6e9
Stopped pageserver 1 process with pid 2545906
Starting pageserver at '127.0.0.1:64000' in '.neon'
Pageserver started
Successfully initialized timeline 7dd0907914ac399ff3be45fb252bfdb7
Stopping pageserver gracefully...done!
# start pageserver and safekeeper
> ./target/debug/neon_local start
Starting neon broker at 127.0.0.1:50051
storage_broker started, pid: 2918372
Starting pageserver at '127.0.0.1:64000' in '.neon'.
pageserver started, pid: 2918386
Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'.
safekeeper 1 started, pid: 2918437
Starting etcd broker using /usr/bin/etcd
Starting pageserver at '127.0.0.1:64000' in '.neon'
Pageserver started
Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'
Safekeeper started
# start postgres compute node
> ./target/debug/neon_local pg start main
Starting new postgres (v14) main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
Starting new postgres main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'
@@ -220,20 +230,12 @@ CARGO_BUILD_FLAGS="--features=testing" make
## Documentation
[/docs/](/docs/) Contains a top-level overview of all available markdown documentation.
Now we use README files to cover design ideas and overall architecture for each module and `rustdoc` style documentation comments. See also [/docs/](/docs/) a top-level overview of all available markdown documentation.
- [/docs/sourcetree.md](/docs/sourcetree.md) contains overview of source tree layout.
To view your `rustdoc` documentation in a browser, try running `cargo doc --no-deps --open`
See also README files in some source directories, and `rustdoc` style documentation comments.
Other resources:
- [SELECT 'Hello, World'](https://neon.tech/blog/hello-world/): Blog post by Nikita Shamgunov on the high level architecture
- [Architecture decisions in Neon](https://neon.tech/blog/architecture-decisions-in-neon/): Blog post by Heikki Linnakangas
- [Neon: Serverless PostgreSQL!](https://www.youtube.com/watch?v=rES0yzeERns): Presentation on storage system by Heikki Linnakangas in the CMU Database Group seminar series
### Postgres-specific terms
Due to Neon's very close relation with PostgreSQL internals, numerous specific terms are used.

188
cli-v2-story.md Normal file
View File

@@ -0,0 +1,188 @@
Create a new Zenith repository in the current directory:
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli init
The files belonging to this database system will be owned by user "heikki".
This user must also own the server process.
The database cluster will be initialized with locale "en_GB.UTF-8".
The default database encoding has accordingly been set to "UTF8".
The default text search configuration will be set to "english".
Data page checksums are disabled.
creating directory tmp ... ok
creating subdirectories ... ok
selecting dynamic shared memory implementation ... posix
selecting default max_connections ... 100
selecting default shared_buffers ... 128MB
selecting default time zone ... Europe/Helsinki
creating configuration files ... ok
running bootstrap script ... ok
performing post-bootstrap initialization ... ok
syncing data to disk ... ok
initdb: warning: enabling "trust" authentication for local connections
You can change this by editing pg_hba.conf or using the option -A, or
--auth-local and --auth-host, the next time you run initdb.
new zenith repository was created in .zenith
Initially, there is only one branch:
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch
main
Start a local Postgres instance on the branch:
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start main
Creating data directory from snapshot at 0/15FFB08...
waiting for server to start....2021-04-13 09:27:43.919 EEST [984664] LOG: starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
2021-04-13 09:27:43.920 EEST [984664] LOG: listening on IPv6 address "::1", port 5432
2021-04-13 09:27:43.920 EEST [984664] LOG: listening on IPv4 address "127.0.0.1", port 5432
2021-04-13 09:27:43.927 EEST [984664] LOG: listening on Unix socket "/tmp/.s.PGSQL.5432"
2021-04-13 09:27:43.939 EEST [984665] LOG: database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
2021-04-13 09:27:43.939 EEST [984665] LOG: creating missing WAL directory "pg_wal/archive_status"
2021-04-13 09:27:44.189 EEST [984665] LOG: database system was not properly shut down; automatic recovery in progress
2021-04-13 09:27:44.195 EEST [984665] LOG: invalid record length at 0/15FFB80: wanted 24, got 0
2021-04-13 09:27:44.195 EEST [984665] LOG: redo is not required
2021-04-13 09:27:44.225 EEST [984664] LOG: database system is ready to accept connections
done
server started
Run some commands against it:
~/git-sandbox/zenith (cli-v2)$ psql postgres -c "create table foo (t text);"
CREATE TABLE
~/git-sandbox/zenith (cli-v2)$ psql postgres -c "insert into foo values ('inserted on the main branch');"
INSERT 0 1
~/git-sandbox/zenith (cli-v2)$ psql postgres -c "select * from foo"
t
-----------------------------
inserted on the main branch
(1 row)
Create a new branch called 'experimental'. We create it from the
current end of the 'main' branch, but you could specify a different
LSN as the start point instead.
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch experimental main
branching at end of WAL: 0/161F478
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch
experimental
main
Start another Postgres instance off the 'experimental' branch:
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start experimental -- -o -p5433
Creating data directory from snapshot at 0/15FFB08...
waiting for server to start....2021-04-13 09:28:41.874 EEST [984766] LOG: starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
2021-04-13 09:28:41.875 EEST [984766] LOG: listening on IPv6 address "::1", port 5433
2021-04-13 09:28:41.875 EEST [984766] LOG: listening on IPv4 address "127.0.0.1", port 5433
2021-04-13 09:28:41.883 EEST [984766] LOG: listening on Unix socket "/tmp/.s.PGSQL.5433"
2021-04-13 09:28:41.896 EEST [984767] LOG: database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
2021-04-13 09:28:42.265 EEST [984767] LOG: database system was not properly shut down; automatic recovery in progress
2021-04-13 09:28:42.269 EEST [984767] LOG: redo starts at 0/15FFB80
2021-04-13 09:28:42.272 EEST [984767] LOG: invalid record length at 0/161F4B0: wanted 24, got 0
2021-04-13 09:28:42.272 EEST [984767] LOG: redo done at 0/161F478 system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s
2021-04-13 09:28:42.321 EEST [984766] LOG: database system is ready to accept connections
done
server started
Insert some a row on the 'experimental' branch:
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo"
t
-----------------------------
inserted on the main branch
(1 row)
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "insert into foo values ('inserted on experimental')"
INSERT 0 1
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo"
t
-----------------------------
inserted on the main branch
inserted on experimental
(2 rows)
See that the other Postgres instance is still running on 'main' branch on port 5432:
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5432 -c "select * from foo"
t
-----------------------------
inserted on the main branch
(1 row)
Everything is stored in the .zenith directory:
~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/
total 12
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:28 datadirs
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:27 refs
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:28 timelines
The 'datadirs' directory contains the datadirs of the running instances:
~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/datadirs/
total 8
drwx------ 18 heikki heikki 4096 Apr 13 09:27 3c0c634c1674079b2c6d4edf7c91523e
drwx------ 18 heikki heikki 4096 Apr 13 09:28 697e3c103d4b1763cd6e82e4ff361d76
~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/datadirs/3c0c634c1674079b2c6d4edf7c91523e/
total 124
drwxr-xr-x 5 heikki heikki 4096 Apr 13 09:27 base
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 global
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_commit_ts
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_dynshmem
-rw------- 1 heikki heikki 4760 Apr 13 09:27 pg_hba.conf
-rw------- 1 heikki heikki 1636 Apr 13 09:27 pg_ident.conf
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:32 pg_logical
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:27 pg_multixact
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_notify
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_replslot
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_serial
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_snapshots
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_stat
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:34 pg_stat_tmp
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_subtrans
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_tblspc
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_twophase
-rw------- 1 heikki heikki 3 Apr 13 09:27 PG_VERSION
lrwxrwxrwx 1 heikki heikki 52 Apr 13 09:27 pg_wal -> ../../timelines/3c0c634c1674079b2c6d4edf7c91523e/wal
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_xact
-rw------- 1 heikki heikki 88 Apr 13 09:27 postgresql.auto.conf
-rw------- 1 heikki heikki 28688 Apr 13 09:27 postgresql.conf
-rw------- 1 heikki heikki 96 Apr 13 09:27 postmaster.opts
-rw------- 1 heikki heikki 149 Apr 13 09:27 postmaster.pid
Note how 'pg_wal' is just a symlink to the 'timelines' directory. The
datadir is ephemeral, you can delete it at any time, and it can be reconstructed
from the snapshots and WAL stored in the 'timelines' directory. So if you push/pull
the repository, the 'datadirs' are not included. (They are like git working trees)
~/git-sandbox/zenith (cli-v2)$ killall -9 postgres
~/git-sandbox/zenith (cli-v2)$ rm -rf .zenith/datadirs/*
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start experimental -- -o -p5433
Creating data directory from snapshot at 0/15FFB08...
waiting for server to start....2021-04-13 09:37:05.476 EEST [985340] LOG: starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
2021-04-13 09:37:05.477 EEST [985340] LOG: listening on IPv6 address "::1", port 5433
2021-04-13 09:37:05.477 EEST [985340] LOG: listening on IPv4 address "127.0.0.1", port 5433
2021-04-13 09:37:05.487 EEST [985340] LOG: listening on Unix socket "/tmp/.s.PGSQL.5433"
2021-04-13 09:37:05.498 EEST [985341] LOG: database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
2021-04-13 09:37:05.808 EEST [985341] LOG: database system was not properly shut down; automatic recovery in progress
2021-04-13 09:37:05.813 EEST [985341] LOG: redo starts at 0/15FFB80
2021-04-13 09:37:05.815 EEST [985341] LOG: invalid record length at 0/161F770: wanted 24, got 0
2021-04-13 09:37:05.815 EEST [985341] LOG: redo done at 0/161F738 system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s
2021-04-13 09:37:05.866 EEST [985340] LOG: database system is ready to accept connections
done
server started
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo"
t
-----------------------------
inserted on the main branch
inserted on experimental
(2 rows)

View File

@@ -5,7 +5,7 @@ edition = "2021"
[dependencies]
anyhow = "1.0"
chrono = { version = "0.4", default-features = false, features = ["clock"] }
chrono = "0.4"
clap = "4.0"
env_logger = "0.9"
futures = "0.3.13"

View File

@@ -65,7 +65,7 @@ impl GenericOption {
let name = match self.name.as_str() {
"safekeepers" => "neon.safekeepers",
"wal_acceptor_reconnect" => "neon.safekeeper_reconnect_timeout",
"wal_acceptor_connection_timeout" => "neon.safekeeper_connection_timeout",
"wal_acceptor_connect_timeout" => "neon.safekeeper_connect_timeout",
it => it,
};

View File

@@ -23,9 +23,6 @@ url = "2.2.2"
# Note: Do not directly depend on pageserver or safekeeper; use pageserver_api or safekeeper_api
# instead, so that recompile times are better.
pageserver_api = { path = "../libs/pageserver_api" }
postgres_connection = { path = "../libs/postgres_connection" }
safekeeper_api = { path = "../libs/safekeeper_api" }
# Note: main broker code is inside the binary crate, so linking with the library shouldn't be heavy.
storage_broker = { version = "0.1", path = "../storage_broker" }
utils = { path = "../libs/utils" }
workspace_hack = { version = "0.1", path = "../workspace_hack" }

View File

@@ -10,5 +10,5 @@ id = 1
pg_port = 5454
http_port = 7676
[broker]
listen_addr = '127.0.0.1:50051'
[etcd_broker]
broker_endpoints = ['http://127.0.0.1:2379']

View File

@@ -14,32 +14,20 @@
use std::ffi::OsStr;
use std::io::Write;
use std::os::unix::prelude::AsRawFd;
use std::os::unix::process::CommandExt;
use std::path::{Path, PathBuf};
use std::path::Path;
use std::process::{Child, Command};
use std::time::Duration;
use std::{fs, io, thread};
use anyhow::Context;
use anyhow::{anyhow, bail, Context, Result};
use nix::errno::Errno;
use nix::fcntl::{FcntlArg, FdFlag};
use nix::sys::signal::{kill, Signal};
use nix::unistd::Pid;
use utils::pid_file::{self, PidFileRead};
// These constants control the loop used to poll for process start / stop.
//
// The loop waits for at most 10 seconds, polling every 100 ms.
// Once a second, it prints a dot ("."), to give the user an indication that
// it's waiting. If the process hasn't started/stopped after 5 seconds,
// it prints a notice that it's taking long, but keeps waiting.
//
const RETRY_UNTIL_SECS: u64 = 10;
const RETRIES: u64 = (RETRY_UNTIL_SECS * 1000) / RETRY_INTERVAL_MILLIS;
const RETRY_INTERVAL_MILLIS: u64 = 100;
const DOT_EVERY_RETRIES: u64 = 10;
const NOTICE_AFTER_RETRIES: u64 = 50;
use utils::lock_file;
const RETRIES: u32 = 15;
const RETRY_TIMEOUT_MILLIS: u64 = 500;
/// Argument to `start_process`, to indicate whether it should create pidfile or if the process creates
/// it itself.
@@ -51,16 +39,11 @@ pub enum InitialPidFile<'t> {
}
/// Start a background child process using the parameters given.
pub fn start_process<
F,
S: AsRef<OsStr>,
EI: IntoIterator<Item = (String, String)>, // Not generic AsRef<OsStr>, otherwise empty `envs` prevents type inference
>(
pub fn start_process<F, S: AsRef<OsStr>>(
process_name: &str,
datadir: &Path,
command: &Path,
args: &[S],
envs: EI,
initial_pid_file: InitialPidFile,
process_status_check: F,
) -> anyhow::Result<Child>
@@ -86,15 +69,6 @@ where
.stderr(same_file_for_stderr)
.args(args);
let filled_cmd = fill_aws_secrets_vars(fill_rust_env_vars(background_command));
filled_cmd.envs(envs);
let pid_file_to_check = match initial_pid_file {
InitialPidFile::Create(path) => {
pre_exec_create_pidfile(filled_cmd, path);
path
}
InitialPidFile::Expect(path) => path,
};
let mut spawned_process = filled_cmd.spawn().with_context(|| {
format!("Could not spawn {process_name}, see console output and log files for details.")
@@ -105,23 +79,44 @@ where
.with_context(|| format!("Subprocess {process_name} has invalid pid {pid}"))?,
);
let pid_file_to_check = match initial_pid_file {
InitialPidFile::Create(target_pid_file_path) => {
match lock_file::create_lock_file(target_pid_file_path, pid.to_string()) {
lock_file::LockCreationResult::Created { .. } => {
// We use "lock" file here only to create the pid file. The lock on the pidfile will be dropped as soon
// as this CLI invocation exits, so it's a bit useless, but doesn't any harm either.
}
lock_file::LockCreationResult::AlreadyLocked { .. } => {
anyhow::bail!("Cannot write pid file for {process_name} at path {target_pid_file_path:?}: file is already locked by another process")
}
lock_file::LockCreationResult::CreationFailed(e) => {
return Err(e.context(format!(
"Failed to create pid file for {process_name} at path {target_pid_file_path:?}"
)))
}
}
None
}
InitialPidFile::Expect(pid_file_path) => Some(pid_file_path),
};
for retries in 0..RETRIES {
match process_started(pid, Some(pid_file_to_check), &process_status_check) {
match process_started(pid, pid_file_to_check, &process_status_check) {
Ok(true) => {
println!("\n{process_name} started, pid: {pid}");
return Ok(spawned_process);
}
Ok(false) => {
if retries == NOTICE_AFTER_RETRIES {
// The process is taking a long time to start up. Keep waiting, but
// print a message
print!("\n{process_name} has not started yet, continuing to wait");
}
if retries % DOT_EVERY_RETRIES == 0 {
if retries < 5 {
print!(".");
io::stdout().flush().unwrap();
} else {
if retries == 5 {
println!() // put a line break after dots for second message
}
println!("{process_name} has not started yet, retrying ({retries})...");
}
thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS));
thread::sleep(Duration::from_millis(RETRY_TIMEOUT_MILLIS));
}
Err(e) => {
println!("{process_name} failed to start: {e:#}");
@@ -132,49 +127,17 @@ where
}
}
}
println!();
anyhow::bail!("{process_name} did not start in {RETRY_UNTIL_SECS} seconds");
}
/// Send SIGTERM to child process
pub fn send_stop_child_process(child: &std::process::Child) -> anyhow::Result<()> {
let pid = child.id();
match kill(
nix::unistd::Pid::from_raw(pid.try_into().unwrap()),
Signal::SIGTERM,
) {
Ok(()) => Ok(()),
Err(Errno::ESRCH) => {
println!("child process with pid {pid} does not exist");
Ok(())
}
Err(e) => anyhow::bail!("Failed to send signal to child process with pid {pid}: {e}"),
}
anyhow::bail!("{process_name} could not start in {RETRIES} attempts");
}
/// Stops the process, using the pid file given. Returns Ok also if the process is already not running.
pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> anyhow::Result<()> {
let pid = match pid_file::read(pid_file)
.with_context(|| format!("read pid_file {pid_file:?}"))?
{
PidFileRead::NotExist => {
println!("{process_name} is already stopped: no pid file present at {pid_file:?}");
return Ok(());
}
PidFileRead::NotHeldByAnyProcess(_) => {
// Don't try to kill according to file contents beacuse the pid might have been re-used by another process.
// Don't delete the file either, it can race with new pid file creation.
// Read `pid_file` module comment for details.
println!(
"No process is holding the pidfile. The process must have already exited. Leave in place to avoid race conditions: {pid_file:?}"
);
return Ok(());
}
PidFileRead::LockedByOtherProcess(pid) => pid,
};
// XXX the pid could become invalid (and recycled) at any time before the kill() below.
if !pid_file.exists() {
println!("{process_name} is already stopped: no pid file {pid_file:?} is present");
return Ok(());
}
let pid = read_pidfile(pid_file)?;
// send signal
let sig = if immediate {
print!("Stopping {process_name} with pid {pid} immediately..");
Signal::SIGQUIT
@@ -186,9 +149,8 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
match kill(pid, sig) {
Ok(()) => (),
Err(Errno::ESRCH) => {
// Again, don't delete the pid file. The unlink can race with a new pid file being created.
println!(
"{process_name} with pid {pid} does not exist, but a pid file {pid_file:?} was found. Likely the pid got recycled. Lucky we didn't harm anyone."
"{process_name} with pid {pid} does not exist, but a pid file {pid_file:?} was found"
);
return Ok(());
}
@@ -196,23 +158,21 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
}
// Wait until process is gone
for retries in 0..RETRIES {
for _ in 0..RETRIES {
match process_has_stopped(pid) {
Ok(true) => {
println!("\n{process_name} stopped");
if let Err(e) = fs::remove_file(pid_file) {
if e.kind() != io::ErrorKind::NotFound {
eprintln!("Failed to remove pid file {pid_file:?} after stopping the process: {e:#}");
}
}
return Ok(());
}
Ok(false) => {
if retries == NOTICE_AFTER_RETRIES {
// The process is taking a long time to start up. Keep waiting, but
// print a message
print!("\n{process_name} has not stopped yet, continuing to wait");
}
if retries % DOT_EVERY_RETRIES == 0 {
print!(".");
io::stdout().flush().unwrap();
}
thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS));
print!(".");
io::stdout().flush().unwrap();
thread::sleep(Duration::from_secs(1))
}
Err(e) => {
println!("{process_name} with pid {pid} failed to stop: {e:#}");
@@ -220,28 +180,24 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
}
}
}
println!();
anyhow::bail!("{process_name} with pid {pid} did not stop in {RETRY_UNTIL_SECS} seconds");
anyhow::bail!("{process_name} with pid {pid} failed to stop in {RETRIES} attempts");
}
fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
// If RUST_BACKTRACE is set, pass it through. But if it's not set, default
// to RUST_BACKTRACE=1.
let backtrace_setting = std::env::var_os("RUST_BACKTRACE");
let backtrace_setting = backtrace_setting
.as_deref()
.unwrap_or_else(|| OsStr::new("1"));
let mut filled_cmd = cmd.env_clear().env("RUST_BACKTRACE", "1");
let mut filled_cmd = cmd.env_clear().env("RUST_BACKTRACE", backtrace_setting);
// Pass through these environment variables to the command
for var in ["LLVM_PROFILE_FILE", "FAILPOINTS", "RUST_LOG"] {
if let Some(val) = std::env::var_os(var) {
filled_cmd = filled_cmd.env(var, val);
}
let var = "LLVM_PROFILE_FILE";
if let Some(val) = std::env::var_os(var) {
filled_cmd = filled_cmd.env(var, val);
}
filled_cmd
const RUST_LOG_KEY: &str = "RUST_LOG";
if let Ok(rust_log_value) = std::env::var(RUST_LOG_KEY) {
filled_cmd.env(RUST_LOG_KEY, rust_log_value)
} else {
filled_cmd
}
}
fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
@@ -257,69 +213,6 @@ fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
cmd
}
/// Add a `pre_exec` to the cmd that, inbetween fork() and exec(),
/// 1. Claims a pidfile with a fcntl lock on it and
/// 2. Sets up the pidfile's file descriptor so that it (and the lock)
/// will remain held until the cmd exits.
fn pre_exec_create_pidfile<P>(cmd: &mut Command, path: P) -> &mut Command
where
P: Into<PathBuf>,
{
let path: PathBuf = path.into();
// SAFETY
// pre_exec is marked unsafe because it runs between fork and exec.
// Why is that dangerous in various ways?
// Long answer: https://github.com/rust-lang/rust/issues/39575
// Short answer: in a multi-threaded program, other threads may have
// been inside of critical sections at the time of fork. In the
// original process, that was allright, assuming they protected
// the critical sections appropriately, e.g., through locks.
// Fork adds another process to the mix that
// 1. Has a single thread T
// 2. In an exact copy of the address space at the time of fork.
// A variety of problems scan occur now:
// 1. T tries to grab a lock that was locked at the time of fork.
// It will wait forever since in its address space, the lock
// is in state 'taken' but the thread that would unlock it is
// not there.
// 2. A rust object that represented some external resource in the
// parent now got implicitly copied by the the fork, even though
// the object's type is not `Copy`. The parent program may use
// non-copyability as way to enforce unique ownership of an
// external resource in the typesystem. The fork breaks that
// assumption, as now both parent and child process have an
// owned instance of the object that represents the same
// underlying resource.
// While these seem like niche problems, (1) in particular is
// highly relevant. For example, `malloc()` may grab a mutex internally,
// and so, if we forked while another thread was mallocing' and our
// pre_exec closure allocates as well, it will block on the malloc
// mutex forever
//
// The proper solution is to only use C library functions that are marked
// "async-signal-safe": https://man7.org/linux/man-pages/man7/signal-safety.7.html
//
// With this specific pre_exec() closure, the non-error path doesn't allocate.
// The error path uses `anyhow`, and hence does allocate.
// We take our chances there, hoping that any potential disaster is constrained
// to the child process (e.g., malloc has no state ourside of the child process).
// Last, `expect` prints to stderr, and stdio is not async-signal-safe.
// Again, we take our chances, making the same assumptions as for malloc.
unsafe {
cmd.pre_exec(move || {
let file = pid_file::claim_for_current_process(&path).expect("claim pid file");
// Remove the FD_CLOEXEC flag on the pidfile descriptor so that the pidfile
// remains locked after exec.
nix::fcntl::fcntl(file.as_raw_fd(), FcntlArg::F_SETFD(FdFlag::empty()))
.expect("remove FD_CLOEXEC");
// Don't run drop(file), it would close the file before we actually exec.
std::mem::forget(file);
Ok(())
});
}
cmd
}
fn process_started<F>(
pid: Pid,
pid_file_to_check: Option<&Path>,
@@ -330,11 +223,14 @@ where
{
match status_check() {
Ok(true) => match pid_file_to_check {
Some(pid_file_path) => match pid_file::read(pid_file_path)? {
PidFileRead::NotExist => Ok(false),
PidFileRead::LockedByOtherProcess(pid_in_file) => Ok(pid_in_file == pid),
PidFileRead::NotHeldByAnyProcess(_) => Ok(false),
},
Some(pid_file_path) => {
if pid_file_path.exists() {
let pid_in_file = read_pidfile(pid_file_path)?;
Ok(pid_in_file == pid)
} else {
Ok(false)
}
}
None => Ok(true),
},
Ok(false) => Ok(false),
@@ -342,6 +238,21 @@ where
}
}
/// Read a PID file
///
/// We expect a file that contains a single integer.
fn read_pidfile(pidfile: &Path) -> Result<Pid> {
let pid_str = fs::read_to_string(pidfile)
.with_context(|| format!("failed to read pidfile {pidfile:?}"))?;
let pid: i32 = pid_str
.parse()
.map_err(|_| anyhow!("failed to parse pidfile {pidfile:?}"))?;
if pid < 1 {
bail!("pidfile {pidfile:?} contained bad value '{pid}'");
}
Ok(Pid::from_raw(pid))
}
fn process_has_stopped(pid: Pid) -> anyhow::Result<bool> {
match kill(pid, None) {
// Process exists, keep waiting

View File

@@ -8,10 +8,10 @@
use anyhow::{anyhow, bail, Context, Result};
use clap::{value_parser, Arg, ArgAction, ArgMatches, Command};
use control_plane::compute::ComputeControlPlane;
use control_plane::local_env::LocalEnv;
use control_plane::local_env::{EtcdBroker, LocalEnv};
use control_plane::pageserver::PageServerNode;
use control_plane::safekeeper::SafekeeperNode;
use control_plane::{broker, local_env};
use control_plane::{etcd, local_env};
use pageserver_api::models::TimelineInfo;
use pageserver_api::{
DEFAULT_HTTP_LISTEN_ADDR as DEFAULT_PAGESERVER_HTTP_ADDR,
@@ -22,10 +22,9 @@ use safekeeper_api::{
DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT,
};
use std::collections::{BTreeSet, HashMap};
use std::path::PathBuf;
use std::path::{Path, PathBuf};
use std::process::exit;
use std::str::FromStr;
use storage_broker::DEFAULT_LISTEN_ADDR as DEFAULT_BROKER_ADDR;
use utils::{
auth::{Claims, Scope},
id::{NodeId, TenantId, TenantTimelineId, TimelineId},
@@ -42,12 +41,13 @@ project_git_version!(GIT_VERSION);
const DEFAULT_PG_VERSION: &str = "14";
fn default_conf() -> String {
fn default_conf(etcd_binary_path: &Path) -> String {
format!(
r#"
# Default built-in configuration, defined in main.rs
[broker]
listen_addr = '{DEFAULT_BROKER_ADDR}'
[etcd_broker]
broker_endpoints = ['http://localhost:2379']
etcd_binary_path = '{etcd_binary_path}'
[pageserver]
id = {DEFAULT_PAGESERVER_ID}
@@ -60,6 +60,7 @@ id = {DEFAULT_SAFEKEEPER_ID}
pg_port = {DEFAULT_SAFEKEEPER_PG_PORT}
http_port = {DEFAULT_SAFEKEEPER_HTTP_PORT}
"#,
etcd_binary_path = etcd_binary_path.display(),
pageserver_auth_type = AuthType::Trust,
)
}
@@ -297,7 +298,7 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
})?
} else {
// Built-in default config
default_conf()
default_conf(&EtcdBroker::locate_etcd()?)
};
let pg_version = init_match
@@ -323,7 +324,7 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
pg_version,
)
.unwrap_or_else(|e| {
eprintln!("pageserver init failed: {e:?}");
eprintln!("pageserver init failed: {e}");
exit(1);
});
@@ -806,14 +807,14 @@ fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
}
fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::Result<()> {
broker::start_broker_process(env)?;
etcd::start_etcd_process(env)?;
let pageserver = PageServerNode::from_env(env);
// Postgres nodes are not started automatically
if let Err(e) = pageserver.start(&pageserver_config_overrides(sub_match)) {
eprintln!("pageserver start failed: {e}");
try_stop_storage_broker_process(env);
try_stop_etcd_process(env);
exit(1);
}
@@ -821,7 +822,7 @@ fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow
let safekeeper = SafekeeperNode::from_env(env, node);
if let Err(e) = safekeeper.start() {
eprintln!("safekeeper '{}' start failed: {e}", safekeeper.id);
try_stop_storage_broker_process(env);
try_stop_etcd_process(env);
exit(1);
}
}
@@ -853,14 +854,14 @@ fn handle_stop_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<
}
}
try_stop_storage_broker_process(env);
try_stop_etcd_process(env);
Ok(())
}
fn try_stop_storage_broker_process(env: &local_env::LocalEnv) {
if let Err(e) = broker::stop_broker_process(env) {
eprintln!("neon broker stop failed: {e}");
fn try_stop_etcd_process(env: &local_env::LocalEnv) {
if let Err(e) = etcd::stop_etcd_process(env) {
eprintln!("etcd stop failed: {e}");
}
}

View File

@@ -1,48 +0,0 @@
use anyhow::Context;
use std::path::PathBuf;
use crate::{background_process, local_env};
pub fn start_broker_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
let broker = &env.broker;
let listen_addr = &broker.listen_addr;
print!("Starting neon broker at {}", listen_addr);
let args = [format!("--listen-addr={listen_addr}")];
let client = reqwest::blocking::Client::new();
background_process::start_process(
"storage_broker",
&env.base_data_dir,
&env.storage_broker_bin(),
&args,
[],
background_process::InitialPidFile::Create(&storage_broker_pid_file_path(env)),
|| {
let url = broker.client_url();
let status_url = url.join("status").with_context(|| {
format!("Failed to append /status path to broker endpoint {url}",)
})?;
let request = client
.get(status_url)
.build()
.with_context(|| format!("Failed to construct request to broker endpoint {url}"))?;
match client.execute(request) {
Ok(resp) => Ok(resp.status().is_success()),
Err(_) => Ok(false),
}
},
)
.context("Failed to spawn storage_broker subprocess")?;
Ok(())
}
pub fn stop_broker_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
background_process::stop_process(true, "storage_broker", &storage_broker_pid_file_path(env))
}
fn storage_broker_pid_file_path(env: &local_env::LocalEnv) -> PathBuf {
env.base_data_dir.join("storage_broker.pid")
}

View File

@@ -322,9 +322,6 @@ impl PostgresNode {
conf.append("shared_preload_libraries", "neon");
conf.append_line("");
conf.append("neon.pageserver_connstring", &pageserver_connstr);
if let AuthType::NeonJWT = auth_type {
conf.append("neon.safekeeper_token_env", "$ZENITH_AUTH_TOKEN");
}
conf.append("neon.tenant_id", &self.tenant_id.to_string());
conf.append("neon.timeline_id", &self.timeline_id.to_string());
if let Some(lsn) = self.lsn {
@@ -346,7 +343,7 @@ impl PostgresNode {
// To be able to restore database in case of pageserver node crash, safekeeper should not
// remove WAL beyond this point. Too large lag can cause space exhaustion in safekeepers
// (if they are not able to upload WAL to S3).
conf.append("max_replication_write_lag", "15MB");
conf.append("max_replication_write_lag", "500MB");
conf.append("max_replication_flush_lag", "10GB");
if !self.env.safekeepers.is_empty() {

View File

@@ -0,0 +1,57 @@
use url::Url;
#[derive(Debug)]
pub struct PgConnectionConfig {
url: Url,
}
impl PgConnectionConfig {
pub fn host(&self) -> &str {
self.url.host_str().expect("BUG: no host")
}
pub fn port(&self) -> u16 {
self.url.port().expect("BUG: no port")
}
/// Return a `<host>:<port>` string.
pub fn raw_address(&self) -> String {
format!("{}:{}", self.host(), self.port())
}
/// Connect using postgres protocol with TLS disabled.
pub fn connect_no_tls(&self) -> Result<postgres::Client, postgres::Error> {
postgres::Client::connect(self.url.as_str(), postgres::NoTls)
}
}
impl std::str::FromStr for PgConnectionConfig {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut url: Url = s.parse()?;
match url.scheme() {
"postgres" | "postgresql" => {}
other => anyhow::bail!("invalid scheme: {other}"),
}
// It's not a valid connection url if host is unavailable.
if url.host().is_none() {
anyhow::bail!(url::ParseError::EmptyHost);
}
// E.g. `postgres:bar`.
if url.cannot_be_a_base() {
anyhow::bail!("URL cannot be a base");
}
// Set the default PG port if it's missing.
if url.port().is_none() {
url.set_port(Some(5432))
.expect("BUG: couldn't set the default port");
}
Ok(Self { url })
}
}

77
control_plane/src/etcd.rs Normal file
View File

@@ -0,0 +1,77 @@
use std::{fs, path::PathBuf};
use anyhow::Context;
use crate::{background_process, local_env};
pub fn start_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
let etcd_broker = &env.etcd_broker;
println!(
"Starting etcd broker using {:?}",
etcd_broker.etcd_binary_path
);
let etcd_data_dir = env.base_data_dir.join("etcd");
fs::create_dir_all(&etcd_data_dir)
.with_context(|| format!("Failed to create etcd data dir {etcd_data_dir:?}"))?;
let client_urls = etcd_broker.comma_separated_endpoints();
let args = [
format!("--data-dir={}", etcd_data_dir.display()),
format!("--listen-client-urls={client_urls}"),
format!("--advertise-client-urls={client_urls}"),
// Set --quota-backend-bytes to keep the etcd virtual memory
// size smaller. Our test etcd clusters are very small.
// See https://github.com/etcd-io/etcd/issues/7910
"--quota-backend-bytes=100000000".to_string(),
// etcd doesn't compact (vacuum) with default settings,
// enable it to prevent space exhaustion.
"--auto-compaction-mode=revision".to_string(),
"--auto-compaction-retention=1".to_string(),
];
let pid_file_path = etcd_pid_file_path(env);
let client = reqwest::blocking::Client::new();
background_process::start_process(
"etcd",
&etcd_data_dir,
&etcd_broker.etcd_binary_path,
&args,
background_process::InitialPidFile::Create(&pid_file_path),
|| {
for broker_endpoint in &etcd_broker.broker_endpoints {
let request = broker_endpoint
.join("health")
.with_context(|| {
format!(
"Failed to append /health path to broker endopint {}",
broker_endpoint
)
})
.and_then(|url| {
client.get(&url.to_string()).build().with_context(|| {
format!("Failed to construct request to etcd endpoint {url}")
})
})?;
if client.execute(request).is_ok() {
return Ok(true);
}
}
Ok(false)
},
)
.context("Failed to spawn etcd subprocess")?;
Ok(())
}
pub fn stop_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
background_process::stop_process(true, "etcd", &etcd_pid_file_path(env))
}
fn etcd_pid_file_path(env: &local_env::LocalEnv) -> PathBuf {
env.base_data_dir.join("etcd.pid")
}

View File

@@ -8,8 +8,9 @@
//
mod background_process;
pub mod broker;
pub mod compute;
pub mod connection;
pub mod etcd;
pub mod local_env;
pub mod pageserver;
pub mod postgresql_conf;

View File

@@ -4,16 +4,12 @@
//! script which will use local paths.
use anyhow::{bail, ensure, Context};
use reqwest::Url;
use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DisplayFromStr};
use std::collections::HashMap;
use std::env;
use std::fs;
use std::net::IpAddr;
use std::net::Ipv4Addr;
use std::net::SocketAddr;
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use utils::{
@@ -66,7 +62,7 @@ pub struct LocalEnv {
#[serde(default)]
pub private_key_path: PathBuf,
pub broker: NeonBroker,
pub etcd_broker: EtcdBroker,
pub pageserver: PageServerConf,
@@ -82,26 +78,67 @@ pub struct LocalEnv {
branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
}
/// Broker config for cluster internal communication.
/// Etcd broker config for cluster internal communication.
#[serde_as]
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
#[serde(default)]
pub struct NeonBroker {
/// Broker listen address for storage nodes coordination, e.g. '127.0.0.1:50051'.
pub listen_addr: SocketAddr,
pub struct EtcdBroker {
/// A prefix to all to any key when pushing/polling etcd from a node.
#[serde(default)]
pub broker_etcd_prefix: Option<String>,
/// Broker (etcd) endpoints for storage nodes coordination, e.g. 'http://127.0.0.1:2379'.
#[serde(default)]
#[serde_as(as = "Vec<DisplayFromStr>")]
pub broker_endpoints: Vec<Url>,
/// Etcd binary path to use.
#[serde(default)]
pub etcd_binary_path: PathBuf,
}
// Dummy Default impl to satisfy Deserialize derive.
impl Default for NeonBroker {
fn default() -> Self {
NeonBroker {
listen_addr: SocketAddr::new(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), 0),
}
impl EtcdBroker {
pub fn locate_etcd() -> anyhow::Result<PathBuf> {
let which_output = Command::new("which")
.arg("etcd")
.output()
.context("Failed to run 'which etcd' command")?;
let stdout = String::from_utf8_lossy(&which_output.stdout);
ensure!(
which_output.status.success(),
"'which etcd' invocation failed. Status: {}, stdout: {stdout}, stderr: {}",
which_output.status,
String::from_utf8_lossy(&which_output.stderr)
);
let etcd_path = PathBuf::from(stdout.trim());
ensure!(
etcd_path.is_file(),
"'which etcd' invocation was successful, but the path it returned is not a file or does not exist: {}",
etcd_path.display()
);
Ok(etcd_path)
}
}
impl NeonBroker {
pub fn client_url(&self) -> Url {
Url::parse(&format!("http://{}", self.listen_addr)).expect("failed to construct url")
pub fn comma_separated_endpoints(&self) -> String {
self.broker_endpoints
.iter()
.map(|url| {
// URL by default adds a '/' path at the end, which is not what etcd CLI wants.
let url_string = url.as_str();
if url_string.ends_with('/') {
&url_string[0..url_string.len() - 1]
} else {
url_string
}
})
.fold(String::new(), |mut comma_separated_urls, url| {
if !comma_separated_urls.is_empty() {
comma_separated_urls.push(',');
}
comma_separated_urls.push_str(url);
comma_separated_urls
})
}
}
@@ -197,10 +234,6 @@ impl LocalEnv {
self.neon_distrib_dir.join("safekeeper")
}
pub fn storage_broker_bin(&self) -> PathBuf {
self.neon_distrib_dir.join("storage_broker")
}
pub fn pg_data_dirs_path(&self) -> PathBuf {
self.base_data_dir.join("pgdatadirs").join("tenants")
}
@@ -478,8 +511,8 @@ mod tests {
"failed to parse simple config {simple_conf_toml}, reason: {simple_conf_parse_result:?}"
);
let string_to_replace = "listen_addr = '127.0.0.1:50051'";
let spoiled_url_str = "listen_addr = '!@$XOXO%^&'";
let string_to_replace = "broker_endpoints = ['http://127.0.0.1:2379']";
let spoiled_url_str = "broker_endpoints = ['!@$XOXO%^&']";
let spoiled_url_toml = simple_conf_toml.replace(string_to_replace, spoiled_url_str);
assert!(
spoiled_url_toml.contains(spoiled_url_str),

View File

@@ -1,20 +1,19 @@
use std::collections::HashMap;
use std::fs::File;
use std::fs::{self, File};
use std::io::{BufReader, Write};
use std::num::NonZeroU64;
use std::path::{Path, PathBuf};
use std::process::Child;
use std::{io, result};
use anyhow::{bail, ensure, Context};
use crate::connection::PgConnectionConfig;
use anyhow::{bail, Context};
use pageserver_api::models::{
TenantConfigRequest, TenantCreateRequest, TenantInfo, TimelineCreateRequest, TimelineInfo,
};
use postgres_connection::{parse_host_port, PgConnectionConfig};
use reqwest::blocking::{Client, RequestBuilder, Response};
use reqwest::{IntoUrl, Method};
use thiserror::Error;
use utils::auth::{Claims, Scope};
use utils::{
http::error::HttpErrorBody,
id::{TenantId, TimelineId},
@@ -78,26 +77,37 @@ pub struct PageServerNode {
impl PageServerNode {
pub fn from_env(env: &LocalEnv) -> PageServerNode {
let (host, port) = parse_host_port(&env.pageserver.listen_pg_addr)
.expect("Unable to parse listen_pg_addr");
let port = port.unwrap_or(5432);
let password = if env.pageserver.auth_type == AuthType::NeonJWT {
Some(env.pageserver.auth_token.clone())
&env.pageserver.auth_token
} else {
None
""
};
Self {
pg_connection_config: PgConnectionConfig::new_host_port(host, port)
.set_password(password),
pg_connection_config: Self::pageserver_connection_config(
password,
&env.pageserver.listen_pg_addr,
),
env: env.clone(),
http_client: Client::new(),
http_base_url: format!("http://{}/v1", env.pageserver.listen_http_addr),
}
}
// pageserver conf overrides defined by neon_local configuration.
fn neon_local_overrides(&self) -> Vec<String> {
/// Construct libpq connection string for connecting to the pageserver.
fn pageserver_connection_config(password: &str, listen_addr: &str) -> PgConnectionConfig {
format!("postgresql://no_user:{password}@{listen_addr}/no_db")
.parse()
.unwrap()
}
pub fn initialize(
&self,
create_tenant: Option<TenantId>,
initial_timeline_id: Option<TimelineId>,
config_overrides: &[&str],
pg_version: u32,
) -> anyhow::Result<TimelineId> {
let id = format!("id={}", self.env.pageserver.id);
// FIXME: the paths should be shell-escaped to handle paths with spaces, quotas etc.
let pg_distrib_dir_param = format!(
@@ -112,32 +122,41 @@ impl PageServerNode {
);
let listen_pg_addr_param =
format!("listen_pg_addr='{}'", self.env.pageserver.listen_pg_addr);
let broker_endpoint_param = format!("broker_endpoint='{}'", self.env.broker.client_url());
let broker_endpoints_param = format!(
"broker_endpoints=[{}]",
self.env
.etcd_broker
.broker_endpoints
.iter()
.map(|url| format!("'{url}'"))
.collect::<Vec<_>>()
.join(",")
);
let broker_etcd_prefix_param = self
.env
.etcd_broker
.broker_etcd_prefix
.as_ref()
.map(|prefix| format!("broker_etcd_prefix='{prefix}'"));
let mut overrides = vec![
id,
pg_distrib_dir_param,
authg_type_param,
listen_http_addr_param,
listen_pg_addr_param,
broker_endpoint_param,
];
let mut init_config_overrides = config_overrides.to_vec();
init_config_overrides.push(&id);
init_config_overrides.push(&pg_distrib_dir_param);
init_config_overrides.push(&authg_type_param);
init_config_overrides.push(&listen_http_addr_param);
init_config_overrides.push(&listen_pg_addr_param);
init_config_overrides.push(&broker_endpoints_param);
if let Some(broker_etcd_prefix_param) = broker_etcd_prefix_param.as_deref() {
init_config_overrides.push(broker_etcd_prefix_param);
}
if self.env.pageserver.auth_type != AuthType::Trust {
overrides.push("auth_validation_public_key_path='auth_public_key.pem'".to_owned());
init_config_overrides.push("auth_validation_public_key_path='auth_public_key.pem'");
}
overrides
}
pub fn initialize(
&self,
create_tenant: Option<TenantId>,
initial_timeline_id: Option<TimelineId>,
config_overrides: &[&str],
pg_version: u32,
) -> anyhow::Result<TimelineId> {
let mut pageserver_process = self
.start_node(config_overrides, &self.env.base_data_dir, true)
.start_node(&init_config_overrides, &self.env.base_data_dir, true)
.with_context(|| {
format!(
"Failed to start a process for pageserver {}",
@@ -154,21 +173,29 @@ impl PageServerNode {
}
Err(e) => eprintln!("{e:#}"),
}
background_process::send_stop_child_process(&pageserver_process)?;
let exit_code = pageserver_process.wait()?;
ensure!(
exit_code.success(),
format!(
"pageserver init failed with exit code {:?}",
exit_code.code()
)
);
println!(
"Stopped pageserver {} process with pid {}",
self.env.pageserver.id,
pageserver_process.id(),
);
match pageserver_process.kill() {
Err(e) => {
eprintln!(
"Failed to stop pageserver {} process with pid {}: {e:#}",
self.env.pageserver.id,
pageserver_process.id(),
)
}
Ok(()) => {
println!(
"Stopped pageserver {} process with pid {}",
self.env.pageserver.id,
pageserver_process.id(),
);
// cleanup after pageserver startup, since we do not call regular `stop_process` during init
let pid_file = self.pid_file();
if let Err(e) = fs::remove_file(&pid_file) {
if e.kind() != io::ErrorKind::NotFound {
eprintln!("Failed to remove pid file {pid_file:?} after stopping the process: {e:#}");
}
}
}
}
init_result
}
@@ -210,10 +237,7 @@ impl PageServerNode {
datadir: &Path,
update_config: bool,
) -> anyhow::Result<Child> {
let mut overrides = self.neon_local_overrides();
overrides.extend(config_overrides.iter().map(|&c| c.to_owned()));
print!(
println!(
"Starting pageserver at '{}' in '{}'",
self.pg_connection_config.raw_address(),
datadir.display()
@@ -231,25 +255,15 @@ impl PageServerNode {
args.push("--update-config");
}
for config_override in &overrides {
for config_override in config_overrides {
args.extend(["-c", config_override]);
}
let envs = if self.env.pageserver.auth_type != AuthType::Trust {
// Generate a token to connect from the pageserver to a safekeeper
let token = self
.env
.generate_auth_token(&Claims::new(None, Scope::SafekeeperData))?;
vec![("ZENITH_AUTH_TOKEN".to_owned(), token)]
} else {
vec![]
};
background_process::start_process(
"pageserver",
datadir,
&self.env.pageserver_bin(),
&args,
envs,
background_process::InitialPidFile::Expect(&self.pid_file()),
|| match self.check_status() {
Ok(()) => Ok(true),
@@ -348,11 +362,6 @@ impl PageServerNode {
.map(|x| x.parse::<NonZeroU64>())
.transpose()
.context("Failed to parse 'max_lsn_wal_lag' as non zero integer")?,
trace_read_requests: settings
.remove("trace_read_requests")
.map(|x| x.parse::<bool>())
.transpose()
.context("Failed to parse 'trace_read_requests' as bool")?,
};
if !settings.is_empty() {
bail!("Unrecognized tenant settings: {settings:?}")
@@ -415,11 +424,6 @@ impl PageServerNode {
.map(|x| x.parse::<NonZeroU64>())
.transpose()
.context("Failed to parse 'max_lsn_wal_lag' as non zero integer")?,
trace_read_requests: settings
.get("trace_read_requests")
.map(|x| x.parse::<bool>())
.transpose()
.context("Failed to parse 'trace_read_requests' as bool")?,
})
.send()?
.error_from_body()?;

View File

@@ -5,12 +5,12 @@ use std::sync::Arc;
use std::{io, result};
use anyhow::Context;
use postgres_connection::PgConnectionConfig;
use reqwest::blocking::{Client, RequestBuilder, Response};
use reqwest::{IntoUrl, Method};
use thiserror::Error;
use utils::{http::error::HttpErrorBody, id::NodeId};
use crate::connection::PgConnectionConfig;
use crate::pageserver::PageServerNode;
use crate::{
background_process,
@@ -86,7 +86,10 @@ impl SafekeeperNode {
/// Construct libpq connection string for connecting to this safekeeper.
fn safekeeper_connection_config(port: u16) -> PgConnectionConfig {
PgConnectionConfig::new_host_port(url::Host::parse("127.0.0.1").unwrap(), port)
// TODO safekeeper authentication not implemented yet
format!("postgresql://no_user@127.0.0.1:{port}/no_db")
.parse()
.unwrap()
}
pub fn datadir_path_by_id(env: &LocalEnv, sk_id: NodeId) -> PathBuf {
@@ -131,8 +134,13 @@ impl SafekeeperNode {
args.push("--no-sync");
}
let broker_endpoint = format!("{}", self.env.broker.client_url());
args.extend(["--broker-endpoint", &broker_endpoint]);
let comma_separated_endpoints = self.env.etcd_broker.comma_separated_endpoints();
if !comma_separated_endpoints.is_empty() {
args.extend(["--broker-endpoints", &comma_separated_endpoints]);
}
if let Some(prefix) = self.env.etcd_broker.broker_etcd_prefix.as_deref() {
args.extend(["--broker-etcd-prefix", prefix]);
}
let mut backup_threads = String::new();
if let Some(threads) = self.conf.backup_threads {
@@ -161,7 +169,6 @@ impl SafekeeperNode {
&datadir,
&self.env.safekeeper_bin(),
&args,
[],
background_process::InitialPidFile::Expect(&self.pid_file()),
|| match self.check_status() {
Ok(()) => Ok(true),

View File

@@ -1,13 +0,0 @@
ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
ARG COMPUTE_IMAGE=compute-node-v14
ARG TAG=latest
FROM $REPOSITORY/${COMPUTE_IMAGE}:$TAG
USER root
RUN apt-get update && \
apt-get install -y curl \
jq \
netcat
USER postgres

View File

@@ -1,8 +1,29 @@
version: '3'
services:
etcd:
image: quay.io/coreos/etcd:v3.5.4
ports:
- 2379:2379
- 2380:2380
environment:
# This signifficantly speeds up etcd and we anyway don't data persistency there.
ETCD_UNSAFE_NO_FSYNC: "1"
command:
- "etcd"
- "--auto-compaction-mode=revision"
- "--auto-compaction-retention=1"
- "--name=etcd-cluster"
- "--initial-cluster-state=new"
- "--initial-cluster-token=etcd-cluster-1"
- "--initial-cluster=etcd-cluster=http://etcd:2380"
- "--initial-advertise-peer-urls=http://etcd:2380"
- "--advertise-client-urls=http://etcd:2379"
- "--listen-client-urls=http://0.0.0.0:2379"
- "--listen-peer-urls=http://0.0.0.0:2380"
- "--quota-backend-bytes=134217728" # 128 MB
minio:
restart: always
image: quay.io/minio/minio:RELEASE.2022-10-20T00-55-09Z
ports:
- 9000:9000
@@ -20,7 +41,7 @@ services:
entrypoint:
- "/bin/sh"
- "-c"
command:
command:
- "until (/usr/bin/mc alias set minio http://minio:9000 $$MINIO_ROOT_USER $$MINIO_ROOT_PASSWORD) do
echo 'Waiting to start minio...' && sleep 1;
done;
@@ -30,10 +51,9 @@ services:
- minio
pageserver:
restart: always
image: ${REPOSITORY:-neondatabase}/neon:${TAG:-latest}
image: neondatabase/neon:${TAG:-latest}
environment:
- BROKER_ENDPOINT='http://storage_broker:50051'
- BROKER_ENDPOINT='http://etcd:2379'
- AWS_ACCESS_KEY_ID=minio
- AWS_SECRET_ACCESS_KEY=password
#- RUST_BACKTRACE=1
@@ -45,7 +65,7 @@ services:
- "-c"
command:
- "/usr/local/bin/pageserver -D /data/.neon/
-c \"broker_endpoint=$$BROKER_ENDPOINT\"
-c \"broker_endpoints=[$$BROKER_ENDPOINT]\"
-c \"listen_pg_addr='0.0.0.0:6400'\"
-c \"listen_http_addr='0.0.0.0:9898'\"
-c \"remote_storage={endpoint='http://minio:9000',
@@ -53,16 +73,15 @@ services:
bucket_region='eu-north-1',
prefix_in_bucket='/pageserver/'}\""
depends_on:
- storage_broker
- etcd
- minio_create_buckets
safekeeper1:
restart: always
image: ${REPOSITORY:-neondatabase}/neon:${TAG:-latest}
image: neondatabase/neon:${TAG:-latest}
environment:
- SAFEKEEPER_ADVERTISE_URL=safekeeper1:5454
- SAFEKEEPER_ID=1
- BROKER_ENDPOINT=http://storage_broker:50051
- BROKER_ENDPOINT=http://etcd:2379
- AWS_ACCESS_KEY_ID=minio
- AWS_SECRET_ACCESS_KEY=password
#- RUST_BACKTRACE=1
@@ -76,23 +95,22 @@ services:
- "safekeeper --listen-pg=$$SAFEKEEPER_ADVERTISE_URL
--listen-http='0.0.0.0:7676'
--id=$$SAFEKEEPER_ID
--broker-endpoint=$$BROKER_ENDPOINT
--broker-endpoints=$$BROKER_ENDPOINT
-D /data
--remote-storage=\"{endpoint='http://minio:9000',
bucket_name='neon',
bucket_region='eu-north-1',
prefix_in_bucket='/safekeeper/'}\""
depends_on:
- storage_broker
- etcd
- minio_create_buckets
safekeeper2:
restart: always
image: ${REPOSITORY:-neondatabase}/neon:${TAG:-latest}
image: neondatabase/neon:${TAG:-latest}
environment:
- SAFEKEEPER_ADVERTISE_URL=safekeeper2:5454
- SAFEKEEPER_ID=2
- BROKER_ENDPOINT=http://storage_broker:50051
- BROKER_ENDPOINT=http://etcd:2379
- AWS_ACCESS_KEY_ID=minio
- AWS_SECRET_ACCESS_KEY=password
#- RUST_BACKTRACE=1
@@ -106,23 +124,22 @@ services:
- "safekeeper --listen-pg=$$SAFEKEEPER_ADVERTISE_URL
--listen-http='0.0.0.0:7676'
--id=$$SAFEKEEPER_ID
--broker-endpoint=$$BROKER_ENDPOINT
--broker-endpoints=$$BROKER_ENDPOINT
-D /data
--remote-storage=\"{endpoint='http://minio:9000',
bucket_name='neon',
bucket_region='eu-north-1',
prefix_in_bucket='/safekeeper/'}\""
depends_on:
- storage_broker
- etcd
- minio_create_buckets
safekeeper3:
restart: always
image: ${REPOSITORY:-neondatabase}/neon:${TAG:-latest}
image: neondatabase/neon:${TAG:-latest}
environment:
- SAFEKEEPER_ADVERTISE_URL=safekeeper3:5454
- SAFEKEEPER_ID=3
- BROKER_ENDPOINT=http://storage_broker:50051
- BROKER_ENDPOINT=http://etcd:2379
- AWS_ACCESS_KEY_ID=minio
- AWS_SECRET_ACCESS_KEY=password
#- RUST_BACKTRACE=1
@@ -136,41 +153,29 @@ services:
- "safekeeper --listen-pg=$$SAFEKEEPER_ADVERTISE_URL
--listen-http='0.0.0.0:7676'
--id=$$SAFEKEEPER_ID
--broker-endpoint=$$BROKER_ENDPOINT
--broker-endpoints=$$BROKER_ENDPOINT
-D /data
--remote-storage=\"{endpoint='http://minio:9000',
bucket_name='neon',
bucket_region='eu-north-1',
prefix_in_bucket='/safekeeper/'}\""
depends_on:
- storage_broker
- etcd
- minio_create_buckets
storage_broker:
restart: always
image: ${REPOSITORY:-neondatabase}/neon:${TAG:-latest}
ports:
- 50051:50051
command:
- "storage_broker"
- "--listen-addr=0.0.0.0:50051"
compute:
restart: always
build:
context: ./compute_wrapper/
context: ./image/compute
args:
- COMPUTE_IMAGE=compute-node-v${PG_VERSION:-14}
- TAG=${TAG:-latest}
- COMPUTE_IMAGE=compute-node-v${PG_VERSION:-14}:${TAG:-latest}
- http_proxy=$http_proxy
- https_proxy=$https_proxy
environment:
- PG_VERSION=${PG_VERSION:-14}
#- RUST_BACKTRACE=1
# Mount the test files directly, for faster editing cycle.
volumes:
- ./compute_wrapper/var/db/postgres/specs/:/var/db/postgres/specs/
- ./compute_wrapper/shell/:/shell/
- ./compute/var/db/postgres/specs/:/var/db/postgres/specs/
- ./compute/shell/:/shell/
ports:
- 55433:55433 # pg protocol handler
- 3080:3080 # http endpoints

View File

@@ -1,60 +0,0 @@
#!/bin/bash
# A basic test to ensure Docker images are built correctly.
# Build a wrapper around the compute, start all services and runs a simple SQL query.
# Repeats the process for all currenly supported Postgres versions.
# Implicitly accepts `REPOSITORY` and `TAG` env vars that are passed into the compose file
# Their defaults point at DockerHub `neondatabase/neon:latest` image.`,
# to verify custom image builds (e.g pre-published ones).
# XXX: Current does not work on M1 macs due to x86_64 Docker images compiled only, and no seccomp support in M1 Docker emulation layer.
set -eux -o pipefail
SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
COMPOSE_FILE=$SCRIPT_DIR/docker-compose.yml
COMPUTE_CONTAINER_NAME=docker-compose-compute-1
SQL="CREATE TABLE t(key int primary key, value text); insert into t values(1,1); select * from t;"
PSQL_OPTION="-h localhost -U cloud_admin -p 55433 -c '$SQL' postgres"
cleanup() {
echo "show container information"
docker ps
docker compose -f $COMPOSE_FILE logs
echo "stop containers..."
docker compose -f $COMPOSE_FILE down
}
echo "clean up containers if exists"
cleanup
for pg_version in 14 15; do
echo "start containers (pg_version=$pg_version)."
PG_VERSION=$pg_version docker compose -f $COMPOSE_FILE up --build -d
echo "wait until the compute is ready. timeout after 60s. "
cnt=0
while sleep 1; do
# check timeout
cnt=`expr $cnt + 1`
if [ $cnt -gt 60 ]; then
echo "timeout before the compute is ready."
cleanup
exit 1
fi
# check if the compute is ready
set +o pipefail
result=`docker compose -f $COMPOSE_FILE logs "compute_is_ready" | grep "accepting connections" | wc -l`
set -o pipefail
if [ $result -eq 1 ]; then
echo "OK. The compute is ready to connect."
echo "execute simple queries."
docker exec $COMPUTE_CONTAINER_NAME /bin/bash -c "psql $PSQL_OPTION"
cleanup
break
fi
done
done

View File

@@ -0,0 +1,10 @@
ARG COMPUTE_IMAGE=compute-node-v14:latest
FROM neondatabase/${COMPUTE_IMAGE}
USER root
RUN apt-get update && \
apt-get install -y curl \
jq \
netcat
USER postgres

View File

@@ -37,7 +37,7 @@
- [Source view](./sourcetree.md)
- [docker.md](./docker.md) — Docker images and building pipeline.
- [Error handling and logging](./error-handling.md)
- [Error handling and logging]()
- [Testing]()
- [Unit testing]()
- [Integration testing]()

View File

@@ -1,154 +1,30 @@
## Authentication
### Overview
We use JWT tokens in communication between almost all components (compute, pageserver, safekeeper, CLI) regardless of the protocol used (HTTP/PostgreSQL).
storage_broker currently has no authentication.
Authentication is optional and is disabled by default for easier debugging.
It is used in some tests, though.
Note that we do not cover authentication with `pg.neon.tech` here.
For HTTP connections we use the Bearer authentication scheme.
For PostgreSQL connections we expect the token to be passed as a password.
There is a caveat for `psql`: it silently truncates passwords to 100 symbols, so to correctly pass JWT via `psql` you have to either use `PGPASSWORD` environment variable, or store password in `psql`'s config file.
Current token scopes are described in `utils::auth::Scope`.
There are no expiration or rotation schemes.
_TODO_: some scopes allow both access to server management API and to the data.
These probably should be split into multiple scopes.
Tokens should not occur in logs.
They may sometimes occur in configuration files, although this is discouraged
because configs may be parsed and dumped into logs.
#### Tokens generation and validation
JWT tokens are signed using a private key.
Compute/pageserver/safekeeper use the private key's public counterpart to validate JWT tokens.
These components should not have access to the private key and may only get tokens from their configuration or external clients.
The key pair is generated once for an installation of compute/pageserver/safekeeper, e.g. by `neon_local init`.
There is currently no way to rotate the key without bringing down all components.
### CLI
CLI generates a key pair during call to `neon_local init` with the following commands:
Current state of authentication includes usage of JWT tokens in communication between compute and pageserver and between CLI and pageserver. JWT token is signed using RSA keys. CLI generates a key pair during call to `neon_local init`. Using following openssl commands:
```bash
openssl genrsa -out auth_private_key.pem 2048
openssl rsa -in auth_private_key.pem -pubout -outform PEM -out auth_public_key.pem
openssl genrsa -out private_key.pem 2048
openssl rsa -in private_key.pem -pubout -outform PEM -out public_key.pem
```
Configuration files for all components point to `public_key.pem` for JWT validation.
However, authentication is disabled by default.
There is no way to automatically enable it everywhere, you have to configure each component individually.
CLI also generates signed token and saves it in the config for later access to pageserver. Now authentication is optional. Pageserver has two variables in config: `auth_validation_public_key_path` and `auth_type`, so when auth type present and set to `NeonJWT` pageserver will require authentication for connections. Actual JWT is passed in password field of connection string. There is a caveat for psql, it silently truncates passwords to 100 symbols, so to correctly pass JWT via psql you have to either use PGPASSWORD environment variable, or store password in psql config file.
CLI also generates signed token (full access to Pageserver) and saves it in
the CLI's `config` file under `pageserver.auth_token`.
Note that pageserver's config does not have any similar parameter.
CLI is the only component which accesses that token.
Technically it could generate it from the private key on each run,
but it does not do that for some reason (_TODO_).
Currently there is no authentication between compute and safekeepers, because this communication layer is under heavy refactoring. After this refactoring support for authentication will be added there too. Now safekeeper supports "hardcoded" token passed via environment variable to be able to use callmemaybe command in pageserver.
### Compute
#### Overview
Compute is a per-timeline PostgreSQL instance, so it should not have
any access to data of other tenants.
All tokens used by a compute are restricted to a specific tenant.
There is no auth isolation from other timelines of the same tenant,
but a non-rogue client never accesses another timeline even by an accident:
timeline IDs are random and hard to guess.
Compute uses token passed via environment variable to communicate to pageserver and in the future to the safekeeper too.
#### Incoming connections
All incoming connections are from PostgreSQL clients.
Their authentication is just plain PostgreSQL authentication and out of scope for this document.
JWT authentication now supports two scopes: tenant and pageserverapi. Tenant scope is intended for use in tenant related api calls, e.g. create_branch. Compute launched for particular tenant also uses this scope. Scope pageserver api is intended to be used by console to manage pageserver. For now we have only one management operation - create tenant.
There is no administrative API except those provided by PostgreSQL.
#### Outgoing connections
Compute connects to Pageserver for getting pages.
The connection string is configured by the `neon.pageserver_connstring` PostgreSQL GUC, e.g. `postgresql://no_user:$ZENITH_AUTH_TOKEN@localhost:15028`.
The environment variable inside the connection string is substituted with
the JWT token.
Compute connects to Safekeepers to write and commit data.
The token is the same for all safekeepers.
It's stored in an environment variable, whose name is configured
by the `neon.safekeeper_token_env` PostgreSQL GUC.
If the GUC is unset, no token is passed.
Note that both tokens can be (and typically are) the same;
the scope is the tenant and the token is usually passed through the
`$ZENITH_AUTH_TOKEN` environment variable.
### Pageserver
#### Overview
Pageserver keeps track of multiple tenants, each having multiple timelines.
For each timeline, it connects to the corresponding Safekeeper.
Information about "corresponding Safekeeper" is published by Safekeepers
in the storage_broker, but they do not publish access tokens, otherwise what is
the point of authentication.
Pageserver keeps a connection to some set of Safekeepers, which
may or may not correspond to active Computes.
Hence, we cannot obtain a per-timeline access token from a Compute.
E.g. if the timeline's Compute terminates before all WAL is
consumed by the Pageserver, the Pageserver continues consuming WAL.
Pageserver replicas' authentication is the same as the main's.
#### Incoming connections
Pageserver listens for connections from computes.
Each compute should present a token valid for the timeline's tenant.
Pageserver also has HTTP API: some parts are per-tenant,
some parts are server-wide, these are different scopes.
The `auth_type` configuration variable in Pageserver's config may have
either of three values:
* `Trust` removes all authentication. The outdated `MD5` value does likewise
* `NeonJWT` enables JWT validation.
Tokens are validated using the public key which lies in a PEM file
specified in the `auth_validation_public_key_path` config.
#### Outgoing connections
Pageserver makes a connection to a Safekeeper for each active timeline.
As Pageserver may want to access any timeline it has on the disk,
it is given a blanket JWT token to access any data on any Safekeeper.
This token is passed through an environment variable called `ZENITH_AUTH_TOKEN`
(non-configurable as of writing this text).
A better way _may be_ to store JWT token for each timeline next to it,
but may be not.
### Safekeeper
#### Overview
Safekeeper keeps track of multiple tenants, each having multiple timelines.
#### Incoming connections
Safekeeper accepts connections from Compute/Pageserver, each
connection corresponds to a specific timeline and requires
a corresponding JWT token.
Safekeeper also has HTTP API: some parts are per-tenant,
some parts are server-wide, these are different scopes.
The `auth-validation-public-key-path` command line options controls
the authentication mode:
* If the option is missing, there is no authentication or JWT token validation.
* If the option is present, it should be a path to the public key PEM file used for JWT token validation.
#### Outgoing connections
No connections are initiated by a Safekeeper.
### In the source code
Tests do not use authentication by default.
If you need it, you can enable it by configuring the test's environment:
Examples for token generation in python:
```python
neon_env_builder.auth_enabled = True
# generate pageserverapi token
management_token = jwt.encode({"scope": "pageserverapi"}, auth_keys.priv, algorithm="RS256")
# generate tenant token
tenant_token = jwt.encode({"scope": "tenant", "tenant_id": ps.initial_tenant}, auth_keys.priv, algorithm="RS256")
```
You will have to generate tokens if you want to access components inside the test directly,
use `AuthKeys.generate_*_token` methods for that.
If you create a new scope, please create a new method to prevent mistypes in scope's name.
Utility functions to work with jwts in rust are located in libs/utils/src/auth.rs

View File

@@ -23,9 +23,9 @@ We build all images after a successful `release` tests run and push automaticall
You can see a [docker compose](https://docs.docker.com/compose/) example to create a neon cluster in [/docker-compose/docker-compose.yml](/docker-compose/docker-compose.yml). It creates the following conatainers.
- etcd x 1
- pageserver x 1
- safekeeper x 3
- storage_broker x 1
- compute x 1
- MinIO x 1 # This is Amazon S3 compatible object storage
@@ -41,7 +41,7 @@ $ cd docker-compose/docker-compose.yml
$ docker-compose down # remove the conainers if exists
$ PG_VERSION=15 TAG=2221 docker-compose up --build -d # You can specify the postgres and image version
Creating network "dockercompose_default" with the default driver
Creating docker-compose_storage_broker_1 ... done
Creating dockercompose_etcd3_1 ...
(...omit...)
```

View File

@@ -1,198 +0,0 @@
# Error handling and logging
## Logging errors
The principle is that errors are logged when they are handled. If you
just propagate an error to the caller in a function, you don't need to
log it; the caller will. But if you consume an error in a function,
you *must* log it (if it needs to be logged at all).
For example:
```rust
fn read_motd_file() -> std::io::Result<String> {
let mut f = File::open("/etc/motd")?;
let mut result = String::new();
f.read_to_string(&mut result)?;
result
}
```
Opening or reading the file could fail, but there is no need to log
the error here. The function merely propagates the error to the
caller, and it is up to the caller to log the error or propagate it
further, if the failure is not expected. But if, for example, it is
normal that the "/etc/motd" file doesn't exist, the caller can choose
to silently ignore the error, or log it as an INFO or DEBUG level
message:
```rust
fn get_message_of_the_day() -> String {
// Get the motd from /etc/motd, or return the default proverb
match read_motd_file() {
Ok(motd) => motd,
Err(err) => {
// It's normal that /etc/motd doesn't exist, but if we fail to
// read it for some other reason, that's unexpected. The message
// of the day isn't very important though, so we just WARN and
// continue with the default in any case.
if err.kind() != std::io::ErrorKind::NotFound {
tracing::warn!("could not read \"/etc/motd\": {err:?}");
}
"An old error is always more popular than a new truth. - German proverb"
}
}
}
```
## Error types
We use the `anyhow` crate widely. It contains many convenient macros
like `bail!` and `ensure!` to construct and return errors, and to
propagate many kinds of low-level errors, wrapped in `anyhow::Error`.
A downside of `anyhow::Error` is that the caller cannot distinguish
between different error cases. Most errors are propagated all the way
to the mgmt API handler function, or the main loop that handles a
connection with the compute node, and they are all handled the same
way: the error is logged and returned to the client as an HTTP or
libpq error.
But in some cases, we need to distinguish between errors and handle
them differently. For example, attaching a tenant to the pageserver
could fail either because the tenant has already been attached, or
because we could not load its metadata from cloud storage. The first
case is more or less expected. The console sends the Attach request to
the pageserver, and the pageserver completes the operation, but the
network connection might be lost before the console receives the
response. The console will retry the operation in that case, but the
tenant has already been attached. It is important that the pagserver
responds with the HTTP 403 Already Exists error in that case, rather
than a generic HTTP 500 Internal Server Error.
If you need to distinguish between different kinds of errors, create a
new `Error` type. The `thiserror` crate is useful for that. But in
most cases `anyhow::Error` is good enough.
## Panics
Depending on where a panic happens, it can cause the whole pageserver
or safekeeper to restart, or just a single tenant. In either case,
that is pretty bad and causes an outage. Avoid panics. Never use
`unwrap()` or other calls that might panic, to verify inputs from the
network or from disk.
It is acceptable to use functions that might panic, like `unwrap()`, if
it is obvious that it cannot panic. For example, if you have just
checked that a variable is not None, it is OK to call `unwrap()` on it,
but it is still preferable to use `expect("reason")` instead to explain
why the function cannot fail.
`assert!` and `panic!` are reserved for checking clear invariants and
very obvious "can't happen" cases. When in doubt, use anyhow `ensure!`
or `bail!` instead.
## Error levels
`tracing::Level` doesn't provide very clear guidelines on what the
different levels mean, or when to use which level. Here is how we use
them:
### Error
Examples:
- could not open file "foobar"
- invalid tenant id
Errors are not expected to happen during normal operation. Incorrect
inputs from client can cause ERRORs. For example, if a client tries to
call a mgmt API that doesn't exist, or if a compute node sends passes
an LSN that has already been garbage collected away.
These should *not* happen during normal operations. "Normal
operations" is not a very precise concept. But for example, disk
errors are not expected to happen when the system is working, so those
count as Errors. However, if a TCP connection to a compute node is
lost, that is not considered an Error, because it doesn't affect the
pageserver's or safekeeper's operation in any way, and happens fairly
frequently when compute nodes are shut down, or are killed abruptly
because of errors in the compute.
**Errors are monitored, and always need human investigation to determine
the cause.**
Whether something should be logged at ERROR, WARNING or INFO level can
depend on the callers and clients. For example, it might be unexpected
and a sign of a serious issue if the console calls the
"timeline_detail" mgmt API for a timeline that doesn't exist. ERROR
would be appropriate in that case. But if the console routinely calls
the API after deleting a timeline, to check if the deletion has
completed, then it would be totally normal and an INFO or DEBUG level
message would be more appropriate. If a message is logged as an ERROR,
but it in fact happens frequently in production and never requires any
action, it should probably be demoted to an INFO level message.
### Warn
Examples:
- could not remove temporary file "foobar.temp"
- unrecognized file "foobar" in timeline directory
Warnings are similar to Errors, in that they should not happen
when the system is operating normally. The difference between Error and
Warning is that an Error means that the operation failed, whereas Warning
means that something unexpected happened, but the operation continued anyway.
For example, if deleting a file fails because the file already didn't exist,
it should be logged as Warning.
> **Note:** The python regression tests, under `test_regress`, check the
> pageserver log after each test for any ERROR and WARN lines. If there are
> any ERRORs or WARNs that have not been explicitly listed in the test as
> allowed, the test is marked a failed. This is to catch unexpected errors
> e.g. in background operations, that don't cause immediate misbehaviour in
> the tested functionality.
### Info
Info level is used to log useful information when the system is
operating normally. Info level is appropriate e.g. for logging state
changes, background operations, and network connections.
Examples:
- "system is shutting down"
- "tenant was created"
- "retrying S3 upload"
### Debug & Trace
Debug and Trace level messages are not printed to the log in our normal
production configuration, but could be enabled for a specific server or
tenant, to aid debugging. (Although we don't actually have that
capability as of this writing).
## Context
We use logging "spans" to hold context information about the current
operation. Almost every operation happens on a particular tenant and
timeline, so we enter a span with the "tenant_id" and "timeline_id"
very early when processing an incoming API request, for example. All
background operations should also run in a span containing at least
those two fields, and any other parameters or information that might
be useful when debugging an error that might happen when performing
the operation.
TODO: Spans are not captured in the Error when it is created, but when
the error is logged. It would be more useful to capture them at Error
creation. We should consider using `tracing_error::SpanTrace` to do
that.
## Error message style
PostgreSQL has a style guide for writing error messages:
https://www.postgresql.org/docs/current/error-style-guide.html
Follow that guide when writing error messages in the PostgreSQL
extension. We don't follow it strictly in the pageserver and
safekeeper, but the advice in the PostgreSQL style guide is generally
good, and you can't go wrong by following it.

View File

@@ -1,201 +0,0 @@
# Context
In Neon, one host runs a lot of compute nodes. Most of the compute
nodes are fairly small, 1-2 vCPUs and 1-2 GB of memory. But some
compute nodes can be larger, and we also want to autoscale from small
to large, and back again, without shutting down the compute.
+----------------------------+
| Host |
| +------------------------+ |
| | Container VM | | +------------+
| | +--------------------+ | | | |
| | | Postgres | | | | Pageserver |
| | | | | | | |
| | | ################ | | | | |
| | | # shared_ # | | | +------------+
| | | # buffers # | | |
| | | ################ | | |
| | | | | |
| | +--------------------+ | |
| +------------------------+ |
+----------------------------+
Each Postgres runs in a qemu VM, which in turn runs in a kubernetes
pod. Kubernetes manages the placement of these Postgres compute nodes
to hosts.
# Problem
PostgreSQL normally relies heavily on the kernel page cache for
performance. PostgreSQL has its own buffer cache, configured by the
shared_buffers setting, but the usual advice is to set shared_buffers
to around 10-20% of the overall system RAM available, leaving the rest
for the kernel page cache. However with Neon, the I/O operations don't
go through the kernel filesystem layer, so we bypass the kernel page
cache and rely solely on the Postgres shared buffer cache for caching
in the compute node.
Because we don't make use of the kernel page cache, we have to either
set shared_buffers larger than you would with normal PostgreSQL, or
you send a lot more I/O requests to the pageserver than you otherwise
would. However in PostgreSQL, shared_buffers setting cannot be changed
while the server is running.
Furthermore, we have fast local SSDs available in the compute hosts
that we could also utilize for caching.
# Solution 1: Scale shared buffers
This solution consists of:
- Core PostgreSQL changes to allow changing shared_buffers on the fly
- New code to orchestrate changing the memory size of the VM, and tell
PostgreSQL to change the shared_buffers setting accordingly.
- New code to Postgres that measures current shared buffer cache usage
to determine what the "cache pressure" is, i.e. how useful it would
be to have a larger shared buffer cache. This could be in an
extension.
- A new governor in the host that chooses which VM to allocate
how much memory.
Picture:
+----------------------------+
| Host |
| +------------------------+ |
| | Container VM | | +------------+
| | +--------------------+ | | | |
| | | Postgres | | | | Pageserver |
| | | | | | | |
| | | ################ | | | | |
| | | # shared_ # | | | +------------+
| | | # buffers # | | |
| | | # # | | |
| | | . | . | | |
| | | . | . | | |
| | | . V . | | |
| | | ################ | | |
| | | | | |
| | +--------------------+ | |
| +------------------------+ |
+----------------------------+
Pros:
- best possible performance for the cached data
Cons:
- Scales only memory, cannot take advantage of local SSDs in host machine
- Needs explicit operations to scale. Won't dynamicaly share resources
between tenants, we'll need to start a resizing process to change
the allocations.
- Needs patches to core PostgreSQL
# Alternative 2: Local filesystem cache
Add code to Postgres Neon extension to use a local file on disk for
caching. When a page is evicted from Postgres buffer cache, write it
to the local file, and read it back if it's requested again. Rely on
kernel page cache to keep the most hot part of that file in memory.
Like in solution 1, need a governor in the host to allocate the local
disk for each VM, and orchestration to scale it up and down.
+----------------------------+
| Host |
| +------------------------+ |
| | Container VM | | +------------+
| | +--------------------+ | | | |
| | | Postgres | | | | Pageserver |
| | | | | | | |
| | | ################ | | | | |
| | | # shared_ # | | | +------------+
| | | # buffers # | | |
| | | ################ | | |
| | | | | |
| | | ################ | | |
| | | # Local FS # | | |
| | | # cache # | | |
| | | # # | | |
| | | ################ | | |
| | | | | |
| | +--------------------+ | |
| +------------------------+ |
+----------------------------+
Pros:
- No PostgreSQL core changes required
- Automatically takes advantage of local SSDs, not just memory
Cons:
- Needs explicit operations to change the size of the cache file
- Need support for live migration of the filesystem
Question:
How is the page cache shared between the host kernel and the VMs? Does
each VM maintain their own page cache? I think that depends on the
filesystem and qemu driver we choose. If we use a raw block device and
let the VM manage it, I believe the VM will maintain the page
cache. But if we use a driver to access the host filesystem directly,
or use something like NFS, I'm not sure.
# Alternative 3: Host cache
Implement a new host cache process that intercepts all the I/O
requests from all VMs running on the host, and manages a local cache.
Postgres can communicate with the host cache using TCP, or via custom
virtio driver.
+----------------------------+
| Host |
| |
| ###################### |
| # # |
| # shared host cache # |
| # # |
| # # |
| # # |
| # # |
| ###################### |
| |
| +------------------------+ |
| | Container VM | | +------------+
| | +--------------------+ | | | |
| | | Postgres | | | | Pageserver |
| | | | | | | |
| | | ################ | | | | |
| | | # shared_ # | | | +------------+
| | | # buffers # | | |
| | | ################ | | |
| | | | | |
| | +--------------------+ | |
| +------------------------+ |
+----------------------------+
Pros:
- dynamic sharing between all tenants (one cache and eviction policy for all)
- No PostgreSQL core changes required
- Takes advantage of local SSDs, not just memory
Cons:
- Whole new component to write
One way to achieve this would be to collocate the pageserver on the
host itself. That would eliminate the network roundtrip between
Postgres and the pageserver, effectively making the pageserver itself
be the host shared cache.

View File

@@ -10,6 +10,7 @@ the values in the config file, if any are specified for the same key and get int
```toml
# Initial configuration file created by 'pageserver --init'
listen_pg_addr = '127.0.0.1:64000'
listen_http_addr = '127.0.0.1:9898'
@@ -24,12 +25,13 @@ max_file_descriptors = '100'
# initial superuser role name to use when creating a new tenant
initial_superuser_name = 'cloud_admin'
broker_endpoint = 'http://127.0.0.1:50051'
broker_etcd_prefix = 'neon'
broker_endpoints = ['some://etcd']
# [remote_storage]
```
The config above shows default values for all basic pageserver settings, besides `broker_endpoint`: that one has to be set by the user,
The config above shows default values for all basic pageserver settings, besides `broker_endpoints`: that one has to be set by the user,
see the corresponding section below.
Pageserver uses default values for all files that are missing in the config, so it's not a hard error to leave the config blank.
Yet, it validates the config values it can (e.g. postgres install dir) and errors if the validation fails, refusing to start.
@@ -48,10 +50,16 @@ Example: `${PAGESERVER_BIN} -c "checkpoint_timeout = '10 m'" -c "remote_storage=
Note that TOML distinguishes between strings and integers, the former require single or double quotes around them.
#### broker_endpoint
#### broker_endpoints
A storage broker endpoint to connect and pull the information from. Default is
`'http://127.0.0.1:50051'`.
A list of endpoints (etcd currently) to connect and pull the information from.
Mandatory, does not have a default, since requires etcd to be started as a separate process,
and its connection url should be specified separately.
#### broker_etcd_prefix
A prefix to add for every etcd key used, to separate one group of related instances from another, in the same cluster.
Default is `neon`.
#### checkpoint_distance

View File

@@ -2,11 +2,6 @@
Below you will find a brief overview of each subdir in the source tree in alphabetical order.
`storage_broker`:
Neon storage broker, providing messaging between safekeepers and pageservers.
[storage_broker.md](./storage_broker.md)
`/control_plane`:
Local control plane.
@@ -45,9 +40,9 @@ and create new databases and accounts (control plane API in our case).
Integration tests, written in Python using the `pytest` framework.
`/vendor/postgres-v14` and `/vendor/postgres-v15`:
`/vendor/postgres-v14`:
PostgreSQL source tree per version, with the modifications needed for Neon.
PostgreSQL source tree, with the modifications needed for Neon.
`/pgxn/neon`:
@@ -88,16 +83,6 @@ A subject for future modularization.
`/libs/metrics`:
Helpers for exposing Prometheus metrics from the server.
### Adding dependencies
When you add a Cargo dependency, you should update hakari manifest by running commands below and committing the updated `Cargo.lock` and `workspace_hack/`. There may be no changes, that's fine.
```bash
cargo hakari generate
cargo hakari manage-deps
```
If you don't have hakari installed (`error: no such subcommand: hakari`), install it by running `cargo install cargo-hakari`.
## Using Python
Note that Debian/Ubuntu Python packages are stale, as it commonly happens,
so manual installation of dependencies is not recommended.

View File

@@ -1,27 +0,0 @@
# Storage broker
Storage broker targets two issues:
- Allowing safekeepers and pageservers learn which nodes also hold their
timelines, and timeline statuses there.
- Avoiding O(n^2) connections between storage nodes while doing so.
This is used
- By pageservers to determine the most advanced and alive safekeeper to pull WAL from.
- By safekeepers to synchronize on the timeline: advance
`remote_consistent_lsn`, `backup_lsn`, choose who offloads WAL to s3.
Technically, it is a simple stateless pub-sub message broker based on tonic
(grpc) making multiplexing easy. Since it is stateless, fault tolerance can be
provided by k8s; there is no built in replication support, though it is not hard
to add.
Currently, the only message is `SafekeeperTimelineInfo`. Each safekeeper, for
each active timeline, once in a while pushes timeline status to the broker.
Other nodes subscribe and receive this info, using it per above.
Broker serves /metrics on the same port as grpc service.
grpcurl can be used to check which values are currently being pushed:
```
grpcurl -proto broker/proto/broker.proto -d '{"all":{}}' -plaintext localhost:50051 storage_broker.BrokerService/SubscribeSafekeeperInfo
```

View File

@@ -0,0 +1,18 @@
[package]
name = "etcd_broker"
version = "0.1.0"
edition = "2021"
[dependencies]
etcd-client = "0.9.0"
regex = "1.4.5"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1"
serde_with = "2.0"
once_cell = "1.13.0"
utils = { path = "../utils" }
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
tokio = "1"
tracing = "0.1"
thiserror = "1"

209
libs/etcd_broker/src/lib.rs Normal file
View File

@@ -0,0 +1,209 @@
//! A set of primitives to access a shared data/updates, propagated via etcd broker (not persistent).
//! Intended to connect services to each other, not to store their data.
/// All broker keys, that are used when dealing with etcd.
pub mod subscription_key;
/// All broker values, possible to use when dealing with etcd.
pub mod subscription_value;
use std::str::FromStr;
use serde::de::DeserializeOwned;
use subscription_key::SubscriptionKey;
use tokio::{sync::mpsc, task::JoinHandle};
use tracing::*;
use crate::subscription_key::SubscriptionFullKey;
pub use etcd_client::*;
/// Default value to use for prefixing to all etcd keys with.
/// This way allows isolating safekeeper/pageserver groups in the same etcd cluster.
pub const DEFAULT_NEON_BROKER_ETCD_PREFIX: &str = "neon";
/// A way to control the data retrieval from a certain subscription.
pub struct BrokerSubscription<V> {
/// An unbounded channel to fetch the relevant etcd updates from.
pub value_updates: mpsc::UnboundedReceiver<BrokerUpdate<V>>,
key: SubscriptionKey,
/// A subscription task handle, to allow waiting on it for the task to complete.
/// Both the updates channel and the handle require `&mut`, so it's better to keep
/// both `pub` to allow using both in the same structures without borrow checker complaining.
pub watcher_handle: JoinHandle<Result<(), BrokerError>>,
watcher: Watcher,
}
impl<V> BrokerSubscription<V> {
/// Cancels the subscription, stopping the data poller and waiting for it to shut down.
pub async fn cancel(mut self) -> Result<(), BrokerError> {
self.watcher.cancel().await.map_err(|e| {
BrokerError::EtcdClient(
e,
format!("Failed to cancel broker subscription, kind: {:?}", self.key),
)
})?;
match (&mut self.watcher_handle).await {
Ok(res) => res,
Err(e) => {
if e.is_cancelled() {
// don't error on the tasks that are cancelled already
Ok(())
} else {
Err(BrokerError::InternalError(format!(
"Panicked during broker subscription task, kind: {:?}, error: {e}",
self.key
)))
}
}
}
}
}
impl<V> Drop for BrokerSubscription<V> {
fn drop(&mut self) {
// we poll data from etcd into the channel in the same struct, so if the whole struct gets dropped,
// no more data is used by the receiver and it's safe to cancel and drop the whole etcd subscription task.
self.watcher_handle.abort();
}
}
/// An update from the etcd broker.
pub struct BrokerUpdate<V> {
/// Etcd generation version, the bigger the more actual the data is.
pub etcd_version: i64,
/// Etcd key for the corresponding value, parsed from the broker KV.
pub key: SubscriptionFullKey,
/// Current etcd value, parsed from the broker KV.
pub value: V,
}
#[derive(Debug, thiserror::Error)]
pub enum BrokerError {
#[error("Etcd client error: {0}. Context: {1}")]
EtcdClient(etcd_client::Error, String),
#[error("Error during parsing etcd key: {0}")]
KeyNotParsed(String),
#[error("Internal error: {0}")]
InternalError(String),
}
/// Creates a background task to poll etcd for timeline updates from safekeepers.
/// Stops and returns `Err` on any error during etcd communication.
/// Watches the key changes until either the watcher is cancelled via etcd or the subscription cancellation handle,
/// exiting normally in such cases.
/// Etcd values are parsed as json fukes into a type, specified in the generic patameter.
pub async fn subscribe_for_json_values<V>(
client: &mut Client,
key: SubscriptionKey,
) -> Result<BrokerSubscription<V>, BrokerError>
where
V: DeserializeOwned + Send + 'static,
{
subscribe_for_values(client, key, |_, value_str| {
match serde_json::from_str::<V>(value_str) {
Ok(value) => Some(value),
Err(e) => {
error!("Failed to parse value str '{value_str}': {e}");
None
}
}
})
.await
}
/// Same as [`subscribe_for_json_values`], but allows to specify a custom parser of a etcd value string.
pub async fn subscribe_for_values<P, V>(
client: &mut Client,
key: SubscriptionKey,
value_parser: P,
) -> Result<BrokerSubscription<V>, BrokerError>
where
V: Send + 'static,
P: Fn(SubscriptionFullKey, &str) -> Option<V> + Send + 'static,
{
info!("Subscribing to broker value updates, key: {key:?}");
let subscription_key = key.clone();
let (watcher, mut stream) = client
.watch(key.watch_key(), Some(WatchOptions::new().with_prefix()))
.await
.map_err(|e| {
BrokerError::EtcdClient(
e,
format!("Failed to init the watch for subscription {key:?}"),
)
})?;
let (value_updates_sender, value_updates_receiver) = mpsc::unbounded_channel();
let watcher_handle = tokio::spawn(async move {
while let Some(resp) = stream.message().await.map_err(|e| BrokerError::InternalError(format!(
"Failed to get messages from the subscription stream, kind: {:?}, error: {e}", key.kind
)))? {
if resp.canceled() {
info!("Watch for timeline updates subscription was canceled, exiting");
break;
}
let events = resp.events();
debug!("Processing {} events", events.len());
for event in events {
if EventType::Put == event.event_type() {
if let Some(new_etcd_kv) = event.kv() {
match parse_etcd_kv(new_etcd_kv, &value_parser, &key.cluster_prefix) {
Ok(Some((key, value))) => if let Err(e) = value_updates_sender.send(BrokerUpdate {
etcd_version: new_etcd_kv.version(),
key,
value,
}) {
info!("Broker value updates for key {key:?} sender got dropped, exiting: {e}");
break;
},
Ok(None) => debug!("Ignoring key {key:?} : no value was returned by the parser"),
Err(BrokerError::KeyNotParsed(e)) => debug!("Unexpected key {key:?} for timeline update: {e}"),
Err(e) => error!("Failed to represent etcd KV {new_etcd_kv:?}: {e}"),
};
}
}
}
}
Ok(())
}.instrument(info_span!("etcd_broker")));
Ok(BrokerSubscription {
key: subscription_key,
value_updates: value_updates_receiver,
watcher_handle,
watcher,
})
}
fn parse_etcd_kv<P, V>(
kv: &KeyValue,
value_parser: &P,
cluster_prefix: &str,
) -> Result<Option<(SubscriptionFullKey, V)>, BrokerError>
where
P: Fn(SubscriptionFullKey, &str) -> Option<V>,
{
let key_str = kv.key_str().map_err(|e| {
BrokerError::EtcdClient(e, "Failed to extract key str out of etcd KV".to_string())
})?;
let value_str = kv.value_str().map_err(|e| {
BrokerError::EtcdClient(e, "Failed to extract value str out of etcd KV".to_string())
})?;
if !key_str.starts_with(cluster_prefix) {
return Err(BrokerError::KeyNotParsed(format!(
"KV has unexpected key '{key_str}' that does not start with cluster prefix {cluster_prefix}"
)));
}
let key = SubscriptionFullKey::from_str(&key_str[cluster_prefix.len()..]).map_err(|e| {
BrokerError::KeyNotParsed(format!("Failed to parse KV key '{key_str}': {e}"))
})?;
Ok(value_parser(key, value_str).map(|value| (key, value)))
}

View File

@@ -0,0 +1,310 @@
//! Etcd broker keys, used in the project and shared between instances.
//! The keys are split into two categories:
//!
//! * [`SubscriptionFullKey`] full key format: `<cluster_prefix>/<tenant>/<timeline>/<node_kind>/<operation>/<node_id>`
//! Always returned from etcd in this form, always start with the user key provided.
//!
//! * [`SubscriptionKey`] user input key format: always partial, since it's unknown which `node_id`'s are available.
//! Full key always starts with the user input one, due to etcd subscription properties.
use std::{fmt::Display, str::FromStr};
use once_cell::sync::Lazy;
use regex::{Captures, Regex};
use utils::id::{NodeId, TenantId, TenantTimelineId};
/// The subscription kind to the timeline updates from safekeeper.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct SubscriptionKey {
/// Generic cluster prefix, allowing to use the same etcd instance by multiple logic groups.
pub cluster_prefix: String,
/// The subscription kind.
pub kind: SubscriptionKind,
}
/// All currently possible key kinds of a etcd broker subscription.
/// Etcd works so, that every key that starts with the subbscription key given is considered matching and
/// returned as part of the subscrption.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum SubscriptionKind {
/// Get every update in etcd.
All,
/// Get etcd updates for any timeiline of a certain tenant, affected by any operation from any node kind.
TenantTimelines(TenantId),
/// Get etcd updates for a certain timeline of a tenant, affected by any operation from any node kind.
Timeline(TenantTimelineId),
/// Get etcd timeline updates, specific to a certain node kind.
Node(TenantTimelineId, NodeKind),
/// Get etcd timeline updates for a certain operation on specific nodes.
Operation(TenantTimelineId, NodeKind, OperationKind),
}
/// All kinds of nodes, able to write into etcd.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum NodeKind {
Safekeeper,
Pageserver,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum OperationKind {
Safekeeper(SkOperationKind),
}
/// Current operations, running inside the safekeeper node.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum SkOperationKind {
TimelineInfo,
WalBackup,
}
static SUBSCRIPTION_FULL_KEY_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new("/([[:xdigit:]]+)/([[:xdigit:]]+)/([^/]+)/([^/]+)/([[:digit:]]+)$")
.expect("wrong subscription full etcd key regex")
});
/// Full key, received from etcd during any of the component's work.
/// No other etcd keys are considered during system's work.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct SubscriptionFullKey {
pub id: TenantTimelineId,
pub node_kind: NodeKind,
pub operation: OperationKind,
pub node_id: NodeId,
}
impl SubscriptionKey {
/// Subscribes for all etcd updates.
pub fn all(cluster_prefix: String) -> Self {
SubscriptionKey {
cluster_prefix,
kind: SubscriptionKind::All,
}
}
/// Subscribes to a given timeline info updates from safekeepers.
pub fn sk_timeline_info(cluster_prefix: String, timeline: TenantTimelineId) -> Self {
Self {
cluster_prefix,
kind: SubscriptionKind::Operation(
timeline,
NodeKind::Safekeeper,
OperationKind::Safekeeper(SkOperationKind::TimelineInfo),
),
}
}
/// Subscribes to all timeine updates during specific operations, running on the corresponding nodes.
pub fn operation(
cluster_prefix: String,
timeline: TenantTimelineId,
node_kind: NodeKind,
operation: OperationKind,
) -> Self {
Self {
cluster_prefix,
kind: SubscriptionKind::Operation(timeline, node_kind, operation),
}
}
/// Etcd key to use for watching a certain timeline updates from safekeepers.
pub fn watch_key(&self) -> String {
let cluster_prefix = &self.cluster_prefix;
match self.kind {
SubscriptionKind::All => cluster_prefix.to_string(),
SubscriptionKind::TenantTimelines(tenant_id) => {
format!("{cluster_prefix}/{tenant_id}")
}
SubscriptionKind::Timeline(id) => {
format!("{cluster_prefix}/{id}")
}
SubscriptionKind::Node(id, node_kind) => {
format!("{cluster_prefix}/{id}/{node_kind}")
}
SubscriptionKind::Operation(id, node_kind, operation_kind) => {
format!("{cluster_prefix}/{id}/{node_kind}/{operation_kind}")
}
}
}
}
impl Display for OperationKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
OperationKind::Safekeeper(o) => o.fmt(f),
}
}
}
impl FromStr for OperationKind {
type Err = String;
fn from_str(operation_kind_str: &str) -> Result<Self, Self::Err> {
match operation_kind_str {
"timeline_info" => Ok(OperationKind::Safekeeper(SkOperationKind::TimelineInfo)),
"wal_backup" => Ok(OperationKind::Safekeeper(SkOperationKind::WalBackup)),
_ => Err(format!("Unknown operation kind: {operation_kind_str}")),
}
}
}
impl Display for SubscriptionFullKey {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self {
id,
node_kind,
operation,
node_id,
} = self;
write!(f, "{id}/{node_kind}/{operation}/{node_id}")
}
}
impl FromStr for SubscriptionFullKey {
type Err = String;
fn from_str(subscription_kind_str: &str) -> Result<Self, Self::Err> {
let key_captures = match SUBSCRIPTION_FULL_KEY_REGEX.captures(subscription_kind_str) {
Some(captures) => captures,
None => {
return Err(format!(
"Subscription kind str does not match a subscription full key regex {}",
SUBSCRIPTION_FULL_KEY_REGEX.as_str()
));
}
};
Ok(Self {
id: TenantTimelineId::new(
parse_capture(&key_captures, 1)?,
parse_capture(&key_captures, 2)?,
),
node_kind: parse_capture(&key_captures, 3)?,
operation: parse_capture(&key_captures, 4)?,
node_id: NodeId(parse_capture(&key_captures, 5)?),
})
}
}
fn parse_capture<T>(caps: &Captures, index: usize) -> Result<T, String>
where
T: FromStr,
<T as FromStr>::Err: Display,
{
let capture_match = caps
.get(index)
.ok_or_else(|| format!("Failed to get capture match at index {index}"))?
.as_str();
capture_match.parse().map_err(|e| {
format!(
"Failed to parse {} from {capture_match}: {e}",
std::any::type_name::<T>()
)
})
}
impl Display for NodeKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Safekeeper => write!(f, "safekeeper"),
Self::Pageserver => write!(f, "pageserver"),
}
}
}
impl FromStr for NodeKind {
type Err = String;
fn from_str(node_kind_str: &str) -> Result<Self, Self::Err> {
match node_kind_str {
"safekeeper" => Ok(Self::Safekeeper),
"pageserver" => Ok(Self::Pageserver),
_ => Err(format!("Invalid node kind: {node_kind_str}")),
}
}
}
impl Display for SkOperationKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::TimelineInfo => write!(f, "timeline_info"),
Self::WalBackup => write!(f, "wal_backup"),
}
}
}
impl FromStr for SkOperationKind {
type Err = String;
fn from_str(operation_str: &str) -> Result<Self, Self::Err> {
match operation_str {
"timeline_info" => Ok(Self::TimelineInfo),
"wal_backup" => Ok(Self::WalBackup),
_ => Err(format!("Invalid operation: {operation_str}")),
}
}
}
#[cfg(test)]
mod tests {
use utils::id::TimelineId;
use super::*;
#[test]
fn full_cluster_key_parsing() {
let prefix = "neon";
let node_kind = NodeKind::Safekeeper;
let operation_kind = OperationKind::Safekeeper(SkOperationKind::WalBackup);
let tenant_id = TenantId::generate();
let timeline_id = TimelineId::generate();
let id = TenantTimelineId::new(tenant_id, timeline_id);
let node_id = NodeId(1);
let timeline_subscription_keys = [
SubscriptionKey {
cluster_prefix: prefix.to_string(),
kind: SubscriptionKind::All,
},
SubscriptionKey {
cluster_prefix: prefix.to_string(),
kind: SubscriptionKind::TenantTimelines(tenant_id),
},
SubscriptionKey {
cluster_prefix: prefix.to_string(),
kind: SubscriptionKind::Timeline(id),
},
SubscriptionKey {
cluster_prefix: prefix.to_string(),
kind: SubscriptionKind::Node(id, node_kind),
},
SubscriptionKey {
cluster_prefix: prefix.to_string(),
kind: SubscriptionKind::Operation(id, node_kind, operation_kind),
},
];
let full_key_string = format!(
"{}/{node_id}",
timeline_subscription_keys.last().unwrap().watch_key()
);
for key in timeline_subscription_keys {
assert!(full_key_string.starts_with(&key.watch_key()), "Full key '{full_key_string}' should start with any of the keys, keys, but {key:?} did not match");
}
let full_key = SubscriptionFullKey::from_str(&full_key_string).unwrap_or_else(|e| {
panic!("Failed to parse {full_key_string} as a subscription full key: {e}")
});
assert_eq!(
full_key,
SubscriptionFullKey {
id,
node_kind,
operation: operation_kind,
node_id
}
)
}
}

View File

@@ -0,0 +1,38 @@
//! Module for the values to put into etcd.
use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DisplayFromStr};
use utils::lsn::Lsn;
/// Data about safekeeper's timeline. Fields made optional for easy migrations.
#[serde_as]
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct SkTimelineInfo {
/// Term of the last entry.
pub last_log_term: Option<u64>,
/// LSN of the last record.
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub flush_lsn: Option<Lsn>,
/// Up to which LSN safekeeper regards its WAL as committed.
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub commit_lsn: Option<Lsn>,
/// LSN up to which safekeeper has backed WAL.
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub backup_lsn: Option<Lsn>,
/// LSN of last checkpoint uploaded by pageserver.
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub remote_consistent_lsn: Option<Lsn>,
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub peer_horizon_lsn: Option<Lsn>,
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub local_start_lsn: Option<Lsn>,
/// A connection string to use for WAL receiving.
#[serde(default)]
pub safekeeper_connstr: Option<String>,
}

View File

@@ -9,7 +9,6 @@ serde_with = "2.0"
const_format = "0.2.21"
anyhow = { version = "1.0", features = ["backtrace"] }
bytes = "1.0.1"
byteorder = "1.4.3"
utils = { path = "../utils" }
postgres_ffi = { path = "../postgres_ffi" }

View File

@@ -1,6 +1,5 @@
use std::num::NonZeroU64;
use byteorder::{BigEndian, ReadBytesExt};
use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DisplayFromStr};
use utils::{
@@ -10,42 +9,24 @@ use utils::{
use crate::reltag::RelTag;
use anyhow::bail;
use bytes::{BufMut, Bytes, BytesMut};
use bytes::{Buf, BufMut, Bytes, BytesMut};
/// A state of a tenant in pageserver's memory.
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub enum TenantState {
// This tenant is being loaded from local disk
Loading,
// This tenant is being downloaded from cloud storage.
Attaching,
/// Tenant is fully operational
Active,
/// A tenant is recognized by pageserver, but it is being detached or the
/// system is being shut down.
Stopping,
/// A tenant is recognized by the pageserver, but can no longer be used for
/// any operations, because it failed to be activated.
/// Tenant is fully operational, its background jobs might be running or not.
Active { background_jobs_running: bool },
/// A tenant is recognized by pageserver, but not yet ready to operate:
/// e.g. not present locally and being downloaded or being read into memory from the file system.
Paused,
/// A tenant is recognized by the pageserver, but no longer used for any operations, as failed to get activated.
Broken,
}
impl TenantState {
pub fn has_in_progress_downloads(&self) -> bool {
match self {
Self::Loading => true,
Self::Attaching => true,
Self::Active => false,
Self::Stopping => false,
Self::Broken => false,
}
}
}
/// A state of a timeline in pageserver's memory.
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub enum TimelineState {
/// Timeline is fully operational. If the containing Tenant is Active, the timeline's
/// background jobs are running otherwise they will be launched when the tenant is activated.
/// Timeline is fully operational, its background jobs are running.
Active,
/// A timeline is recognized by pageserver, but not yet ready to operate.
/// The status indicates, that the timeline could eventually go back to Active automatically:
@@ -53,9 +34,8 @@ pub enum TimelineState {
Suspended,
/// A timeline is recognized by pageserver, but not yet ready to operate and not allowed to
/// automatically become Active after certain events: only a management call can change this status.
Stopping,
/// A timeline is recognized by the pageserver, but can no longer be used for
/// any operations, because it failed to be activated.
Paused,
/// A timeline is recognized by the pageserver, but no longer used for any operations, as failed to get activated.
Broken,
}
@@ -92,7 +72,6 @@ pub struct TenantCreateRequest {
pub walreceiver_connect_timeout: Option<String>,
pub lagging_wal_timeout: Option<String>,
pub max_lsn_wal_lag: Option<NonZeroU64>,
pub trace_read_requests: Option<bool>,
}
#[serde_as]
@@ -132,7 +111,6 @@ pub struct TenantConfigRequest {
pub walreceiver_connect_timeout: Option<String>,
pub lagging_wal_timeout: Option<String>,
pub max_lsn_wal_lag: Option<NonZeroU64>,
pub trace_read_requests: Option<bool>,
}
impl TenantConfigRequest {
@@ -151,7 +129,6 @@ impl TenantConfigRequest {
walreceiver_connect_timeout: None,
lagging_wal_timeout: None,
max_lsn_wal_lag: None,
trace_read_requests: None,
}
}
}
@@ -187,8 +164,6 @@ pub struct TimelineInfo {
pub latest_gc_cutoff_lsn: Lsn,
#[serde_as(as = "DisplayFromStr")]
pub disk_consistent_lsn: Lsn,
#[serde_as(as = "DisplayFromStr")]
pub remote_consistent_lsn: Lsn,
pub current_logical_size: Option<u64>, // is None when timeline is Unloaded
pub current_physical_size: Option<u64>, // is None when timeline is Unloaded
pub current_logical_size_non_incremental: Option<u64>,
@@ -201,6 +176,10 @@ pub struct TimelineInfo {
pub last_received_msg_ts: Option<u128>,
pub pg_version: u32,
#[serde_as(as = "Option<DisplayFromStr>")]
pub remote_consistent_lsn: Option<Lsn>,
pub awaits_download: bool,
pub state: TimelineState,
// Some of the above fields are duplicated in 'local' and 'remote', for backwards-
@@ -246,7 +225,6 @@ pub struct TimelineGcRequest {
}
// Wrapped in libpq CopyData
#[derive(PartialEq, Eq)]
pub enum PagestreamFeMessage {
Exists(PagestreamExistsRequest),
Nblocks(PagestreamNblocksRequest),
@@ -263,21 +241,21 @@ pub enum PagestreamBeMessage {
DbSize(PagestreamDbSizeResponse),
}
#[derive(Debug, PartialEq, Eq)]
#[derive(Debug)]
pub struct PagestreamExistsRequest {
pub latest: bool,
pub lsn: Lsn,
pub rel: RelTag,
}
#[derive(Debug, PartialEq, Eq)]
#[derive(Debug)]
pub struct PagestreamNblocksRequest {
pub latest: bool,
pub lsn: Lsn,
pub rel: RelTag,
}
#[derive(Debug, PartialEq, Eq)]
#[derive(Debug)]
pub struct PagestreamGetPageRequest {
pub latest: bool,
pub lsn: Lsn,
@@ -285,7 +263,7 @@ pub struct PagestreamGetPageRequest {
pub blkno: u32,
}
#[derive(Debug, PartialEq, Eq)]
#[derive(Debug)]
pub struct PagestreamDbSizeRequest {
pub latest: bool,
pub lsn: Lsn,
@@ -318,98 +296,52 @@ pub struct PagestreamDbSizeResponse {
}
impl PagestreamFeMessage {
pub fn serialize(&self) -> Bytes {
let mut bytes = BytesMut::new();
match self {
Self::Exists(req) => {
bytes.put_u8(0);
bytes.put_u8(if req.latest { 1 } else { 0 });
bytes.put_u64(req.lsn.0);
bytes.put_u32(req.rel.spcnode);
bytes.put_u32(req.rel.dbnode);
bytes.put_u32(req.rel.relnode);
bytes.put_u8(req.rel.forknum);
}
Self::Nblocks(req) => {
bytes.put_u8(1);
bytes.put_u8(if req.latest { 1 } else { 0 });
bytes.put_u64(req.lsn.0);
bytes.put_u32(req.rel.spcnode);
bytes.put_u32(req.rel.dbnode);
bytes.put_u32(req.rel.relnode);
bytes.put_u8(req.rel.forknum);
}
Self::GetPage(req) => {
bytes.put_u8(2);
bytes.put_u8(if req.latest { 1 } else { 0 });
bytes.put_u64(req.lsn.0);
bytes.put_u32(req.rel.spcnode);
bytes.put_u32(req.rel.dbnode);
bytes.put_u32(req.rel.relnode);
bytes.put_u8(req.rel.forknum);
bytes.put_u32(req.blkno);
}
Self::DbSize(req) => {
bytes.put_u8(3);
bytes.put_u8(if req.latest { 1 } else { 0 });
bytes.put_u64(req.lsn.0);
bytes.put_u32(req.dbnode);
}
}
bytes.into()
}
pub fn parse<R: std::io::Read>(body: &mut R) -> anyhow::Result<PagestreamFeMessage> {
pub fn parse(mut body: Bytes) -> anyhow::Result<PagestreamFeMessage> {
// TODO these gets can fail
// these correspond to the NeonMessageTag enum in pagestore_client.h
//
// TODO: consider using protobuf or serde bincode for less error prone
// serialization.
let msg_tag = body.read_u8()?;
let msg_tag = body.get_u8();
match msg_tag {
0 => Ok(PagestreamFeMessage::Exists(PagestreamExistsRequest {
latest: body.read_u8()? != 0,
lsn: Lsn::from(body.read_u64::<BigEndian>()?),
latest: body.get_u8() != 0,
lsn: Lsn::from(body.get_u64()),
rel: RelTag {
spcnode: body.read_u32::<BigEndian>()?,
dbnode: body.read_u32::<BigEndian>()?,
relnode: body.read_u32::<BigEndian>()?,
forknum: body.read_u8()?,
spcnode: body.get_u32(),
dbnode: body.get_u32(),
relnode: body.get_u32(),
forknum: body.get_u8(),
},
})),
1 => Ok(PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
latest: body.read_u8()? != 0,
lsn: Lsn::from(body.read_u64::<BigEndian>()?),
latest: body.get_u8() != 0,
lsn: Lsn::from(body.get_u64()),
rel: RelTag {
spcnode: body.read_u32::<BigEndian>()?,
dbnode: body.read_u32::<BigEndian>()?,
relnode: body.read_u32::<BigEndian>()?,
forknum: body.read_u8()?,
spcnode: body.get_u32(),
dbnode: body.get_u32(),
relnode: body.get_u32(),
forknum: body.get_u8(),
},
})),
2 => Ok(PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
latest: body.read_u8()? != 0,
lsn: Lsn::from(body.read_u64::<BigEndian>()?),
latest: body.get_u8() != 0,
lsn: Lsn::from(body.get_u64()),
rel: RelTag {
spcnode: body.read_u32::<BigEndian>()?,
dbnode: body.read_u32::<BigEndian>()?,
relnode: body.read_u32::<BigEndian>()?,
forknum: body.read_u8()?,
spcnode: body.get_u32(),
dbnode: body.get_u32(),
relnode: body.get_u32(),
forknum: body.get_u8(),
},
blkno: body.read_u32::<BigEndian>()?,
blkno: body.get_u32(),
})),
3 => Ok(PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
latest: body.read_u8()? != 0,
lsn: Lsn::from(body.read_u64::<BigEndian>()?),
dbnode: body.read_u32::<BigEndian>()?,
latest: body.get_u8() != 0,
lsn: Lsn::from(body.get_u64()),
dbnode: body.get_u32(),
})),
_ => bail!("unknown smgr message tag: {:?}", msg_tag),
_ => bail!("unknown smgr message tag: {},'{:?}'", msg_tag, body),
}
}
}
@@ -448,58 +380,3 @@ impl PagestreamBeMessage {
bytes.into()
}
}
#[cfg(test)]
mod tests {
use bytes::Buf;
use super::*;
#[test]
fn test_pagestream() {
// Test serialization/deserialization of PagestreamFeMessage
let messages = vec![
PagestreamFeMessage::Exists(PagestreamExistsRequest {
latest: true,
lsn: Lsn(4),
rel: RelTag {
forknum: 1,
spcnode: 2,
dbnode: 3,
relnode: 4,
},
}),
PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
latest: false,
lsn: Lsn(4),
rel: RelTag {
forknum: 1,
spcnode: 2,
dbnode: 3,
relnode: 4,
},
}),
PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
latest: true,
lsn: Lsn(4),
rel: RelTag {
forknum: 1,
spcnode: 2,
dbnode: 3,
relnode: 4,
},
blkno: 7,
}),
PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
latest: true,
lsn: Lsn(4),
dbnode: 7,
}),
];
for msg in messages {
let bytes = msg.serialize();
let reconstructed = PagestreamFeMessage::parse(&mut bytes.reader()).unwrap();
assert!(msg == reconstructed);
}
}
}

View File

@@ -1,17 +0,0 @@
[package]
name = "postgres_connection"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
anyhow = "1.0"
itertools = "0.10.3"
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
url = "2.2.2"
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
[dev-dependencies]
once_cell = "1.13.0"

View File

@@ -1,253 +0,0 @@
use anyhow::{bail, Context};
use itertools::Itertools;
use std::borrow::Cow;
use std::fmt;
use url::Host;
/// Parses a string of format either `host:port` or `host` into a corresponding pair.
/// The `host` part should be a correct `url::Host`, while `port` (if present) should be
/// a valid decimal u16 of digits only.
pub fn parse_host_port<S: AsRef<str>>(host_port: S) -> Result<(Host, Option<u16>), anyhow::Error> {
let (host, port) = match host_port.as_ref().rsplit_once(':') {
Some((host, port)) => (
host,
// +80 is a valid u16, but not a valid port
if port.chars().all(|c| c.is_ascii_digit()) {
Some(port.parse::<u16>().context("Unable to parse port")?)
} else {
bail!("Port contains a non-ascii-digit")
},
),
None => (host_port.as_ref(), None), // No colons, no port specified
};
let host = Host::parse(host).context("Unable to parse host")?;
Ok((host, port))
}
#[cfg(test)]
mod tests_parse_host_port {
use crate::parse_host_port;
use url::Host;
#[test]
fn test_normal() {
let (host, port) = parse_host_port("hello:123").unwrap();
assert_eq!(host, Host::Domain("hello".to_owned()));
assert_eq!(port, Some(123));
}
#[test]
fn test_no_port() {
let (host, port) = parse_host_port("hello").unwrap();
assert_eq!(host, Host::Domain("hello".to_owned()));
assert_eq!(port, None);
}
#[test]
fn test_ipv6() {
let (host, port) = parse_host_port("[::1]:123").unwrap();
assert_eq!(host, Host::<String>::Ipv6(std::net::Ipv6Addr::LOCALHOST));
assert_eq!(port, Some(123));
}
#[test]
fn test_invalid_host() {
assert!(parse_host_port("hello world").is_err());
}
#[test]
fn test_invalid_port() {
assert!(parse_host_port("hello:+80").is_err());
}
}
#[derive(Clone)]
pub struct PgConnectionConfig {
host: Host,
port: u16,
password: Option<String>,
options: Vec<String>,
}
/// A simplified PostgreSQL connection configuration. Supports only a subset of possible
/// settings for simplicity. A password getter or `to_connection_string` methods are not
/// added by design to avoid accidentally leaking password through logging, command line
/// arguments to a child process, or likewise.
impl PgConnectionConfig {
pub fn new_host_port(host: Host, port: u16) -> Self {
PgConnectionConfig {
host,
port,
password: None,
options: vec![],
}
}
pub fn host(&self) -> &Host {
&self.host
}
pub fn port(&self) -> u16 {
self.port
}
pub fn set_host(mut self, h: Host) -> Self {
self.host = h;
self
}
pub fn set_port(mut self, p: u16) -> Self {
self.port = p;
self
}
pub fn set_password(mut self, s: Option<String>) -> Self {
self.password = s;
self
}
pub fn extend_options<I: IntoIterator<Item = S>, S: Into<String>>(mut self, i: I) -> Self {
self.options.extend(i.into_iter().map(|s| s.into()));
self
}
/// Return a `<host>:<port>` string.
pub fn raw_address(&self) -> String {
format!("{}:{}", self.host(), self.port())
}
/// Build a client library-specific connection configuration.
/// Used for testing and when we need to add some obscure configuration
/// elements at the last moment.
pub fn to_tokio_postgres_config(&self) -> tokio_postgres::Config {
// Use `tokio_postgres::Config` instead of `postgres::Config` because
// the former supports more options to fiddle with later.
let mut config = tokio_postgres::Config::new();
config.host(&self.host().to_string()).port(self.port);
if let Some(password) = &self.password {
config.password(password);
}
if !self.options.is_empty() {
// These options are command-line options and should be escaped before being passed
// as an 'options' connection string parameter, see
// https://www.postgresql.org/docs/15/libpq-connect.html#LIBPQ-CONNECT-OPTIONS
//
// They will be space-separated, so each space inside an option should be escaped,
// and all backslashes should be escaped before that. Although we don't expect options
// with spaces at the moment, they're supported by PostgreSQL. Hence we support them
// in this typesafe interface.
//
// We use `Cow` to avoid allocations in the best case (no escaping). A fully imperative
// solution would require 1-2 allocations in the worst case as well, but it's harder to
// implement and this function is hardly a bottleneck. The function is only called around
// establishing a new connection.
#[allow(unstable_name_collisions)]
config.options(
&self
.options
.iter()
.map(|s| {
if s.contains(['\\', ' ']) {
Cow::Owned(s.replace('\\', "\\\\").replace(' ', "\\ "))
} else {
Cow::Borrowed(s.as_str())
}
})
.intersperse(Cow::Borrowed(" ")) // TODO: use impl from std once it's stabilized
.collect::<String>(),
);
}
config
}
/// Connect using postgres protocol with TLS disabled.
pub fn connect_no_tls(&self) -> Result<postgres::Client, postgres::Error> {
postgres::Config::from(self.to_tokio_postgres_config()).connect(postgres::NoTls)
}
}
impl fmt::Debug for PgConnectionConfig {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
// We want `password: Some(REDACTED-STRING)`, not `password: Some("REDACTED-STRING")`
// so even if the password is `REDACTED-STRING` (quite unlikely) there is no confusion.
// Hence `format_args!()`, it returns a "safe" string which is not escaped by `Debug`.
f.debug_struct("PgConnectionConfig")
.field("host", &self.host)
.field("port", &self.port)
.field(
"password",
&self
.password
.as_ref()
.map(|_| format_args!("REDACTED-STRING")),
)
.finish()
}
}
#[cfg(test)]
mod tests_pg_connection_config {
use crate::PgConnectionConfig;
use once_cell::sync::Lazy;
use url::Host;
static STUB_HOST: Lazy<Host> = Lazy::new(|| Host::Domain("stub.host.example".to_owned()));
#[test]
fn test_no_password() {
let cfg = PgConnectionConfig::new_host_port(STUB_HOST.clone(), 123);
assert_eq!(cfg.host(), &*STUB_HOST);
assert_eq!(cfg.port(), 123);
assert_eq!(cfg.raw_address(), "stub.host.example:123");
assert_eq!(
format!("{:?}", cfg),
"PgConnectionConfig { host: Domain(\"stub.host.example\"), port: 123, password: None }"
);
}
#[test]
fn test_ipv6() {
// May be a special case because hostname contains a colon.
let cfg = PgConnectionConfig::new_host_port(Host::parse("[::1]").unwrap(), 123);
assert_eq!(
cfg.host(),
&Host::<String>::Ipv6(std::net::Ipv6Addr::LOCALHOST)
);
assert_eq!(cfg.port(), 123);
assert_eq!(cfg.raw_address(), "[::1]:123");
assert_eq!(
format!("{:?}", cfg),
"PgConnectionConfig { host: Ipv6(::1), port: 123, password: None }"
);
}
#[test]
fn test_with_password() {
let cfg = PgConnectionConfig::new_host_port(STUB_HOST.clone(), 123)
.set_password(Some("password".to_owned()));
assert_eq!(cfg.host(), &*STUB_HOST);
assert_eq!(cfg.port(), 123);
assert_eq!(cfg.raw_address(), "stub.host.example:123");
assert_eq!(
format!("{:?}", cfg),
"PgConnectionConfig { host: Domain(\"stub.host.example\"), port: 123, password: Some(REDACTED-STRING) }"
);
}
#[test]
fn test_with_options() {
let cfg = PgConnectionConfig::new_host_port(STUB_HOST.clone(), 123).extend_options([
"hello",
"world",
"with space",
"and \\ backslashes",
]);
assert_eq!(cfg.host(), &*STUB_HOST);
assert_eq!(cfg.port(), 123);
assert_eq!(cfg.raw_address(), "stub.host.example:123");
assert_eq!(
cfg.to_tokio_postgres_config().get_options(),
Some("hello world with\\ space and\\ \\\\\\ backslashes")
);
}
}

View File

@@ -163,27 +163,6 @@ pub fn page_set_lsn(pg: &mut [u8], lsn: Lsn) {
pg[4..8].copy_from_slice(&(lsn.0 as u32).to_le_bytes());
}
// This is port of function with the same name from freespace.c.
// The only difference is that it does not have "level" parameter because XLogRecordPageWithFreeSpace
// always call it with level=FSM_BOTTOM_LEVEL
pub fn fsm_logical_to_physical(addr: BlockNumber) -> BlockNumber {
let mut leafno = addr;
const FSM_TREE_DEPTH: u32 = if pg_constants::SLOTS_PER_FSM_PAGE >= 1626 {
3
} else {
4
};
/* Count upper level nodes required to address the leaf page */
let mut pages: BlockNumber = 0;
for _l in 0..FSM_TREE_DEPTH {
pages += leafno + 1;
leafno /= pg_constants::SLOTS_PER_FSM_PAGE;
}
/* Turn the page count into 0-based block number */
pages - 1
}
pub mod waldecoder {
use crate::{v14, v15};

View File

@@ -197,16 +197,6 @@ pub const XLOG_CHECKPOINT_SHUTDOWN: u8 = 0x00;
pub const XLOG_CHECKPOINT_ONLINE: u8 = 0x10;
pub const XLP_LONG_HEADER: u16 = 0x0002;
/* From fsm_internals.h */
const FSM_NODES_PER_PAGE: usize = BLCKSZ as usize - SIZEOF_PAGE_HEADER_DATA - 4;
const FSM_NON_LEAF_NODES_PER_PAGE: usize = BLCKSZ as usize / 2 - 1;
const FSM_LEAF_NODES_PER_PAGE: usize = FSM_NODES_PER_PAGE - FSM_NON_LEAF_NODES_PER_PAGE;
pub const SLOTS_PER_FSM_PAGE: u32 = FSM_LEAF_NODES_PER_PAGE as u32;
/* From visibilitymap.c */
pub const VM_HEAPBLOCKS_PER_PAGE: u32 =
(BLCKSZ as usize - SIZEOF_PAGE_HEADER_DATA) as u32 * (8 / 2); // MAPSIZE * (BITS_PER_BYTE / BITS_PER_HEAPBLOCK)
// List of subdirectories inside pgdata.
// Copied from src/bin/initdb/initdb.c
pub const PGDATA_SUBDIRS: [&str; 22] = [

View File

@@ -1,6 +1,7 @@
use anyhow::*;
use core::time::Duration;
use log::*;
use once_cell::sync::Lazy;
use postgres::types::PgLsn;
use postgres::Client;
use postgres_ffi::{WAL_SEGMENT_SIZE, XLOG_BLCKSZ};
@@ -25,13 +26,15 @@ pub struct PostgresServer {
client_config: postgres::Config,
}
pub static REQUIRED_POSTGRES_CONFIG: [&str; 4] = [
"wal_keep_size=50MB", // Ensure old WAL is not removed
"shared_preload_libraries=neon", // can only be loaded at startup
// Disable background processes as much as possible
"wal_writer_delay=10s",
"autovacuum=off",
];
pub static REQUIRED_POSTGRES_CONFIG: Lazy<Vec<&'static str>> = Lazy::new(|| {
vec![
"wal_keep_size=50MB", // Ensure old WAL is not removed
"shared_preload_libraries=neon", // can only be loaded at startup
// Disable background processes as much as possible
"wal_writer_delay=10s",
"autovacuum=off",
]
});
impl Conf {
pub fn pg_distrib_dir(&self) -> anyhow::Result<PathBuf> {

View File

@@ -1,16 +0,0 @@
[package]
name = "pq_proto"
version = "0.1.0"
edition = "2021"
[dependencies]
anyhow = "1.0"
bytes = "1.0.1"
pin-project-lite = "0.2.7"
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
rand = "0.8.3"
serde = { version = "1.0", features = ["derive"] }
tokio = { version = "1.17", features = ["macros"] }
tracing = "0.1"
workspace_hack = { version = "0.1", path = "../../workspace_hack" }

View File

@@ -9,11 +9,8 @@ async-trait = "0.1"
metrics = { version = "0.1", path = "../metrics" }
utils = { version = "0.1", path = "../utils" }
once_cell = "1.13.0"
aws-smithy-http = "0.51.0"
aws-types = "0.51.0"
aws-config = { version = "0.51.0", default-features = false, features=["rustls"] }
aws-sdk-s3 = "0.21.0"
hyper = { version = "0.14", features = ["stream"] }
rusoto_core = "0.48"
rusoto_s3 = "0.48"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1"
tokio = { version = "1.17", features = ["sync", "macros", "fs", "io-util"] }

View File

@@ -10,7 +10,7 @@ mod s3_bucket;
use std::{
collections::HashMap,
fmt::Debug,
fmt::{Debug, Display},
num::{NonZeroU32, NonZeroUsize},
ops::Deref,
path::{Path, PathBuf},
@@ -41,27 +41,44 @@ pub const DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT: usize = 100;
const REMOTE_STORAGE_PREFIX_SEPARATOR: char = '/';
/// Path on the remote storage, relative to some inner prefix.
/// The prefix is an implementation detail, that allows representing local paths
/// as the remote ones, stripping the local storage prefix away.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct RemotePath(PathBuf);
impl RemotePath {
pub fn new(relative_path: &Path) -> anyhow::Result<Self> {
anyhow::ensure!(
relative_path.is_relative(),
"Path {relative_path:?} is not relative"
);
Ok(Self(relative_path.to_path_buf()))
}
pub fn with_base(&self, base_path: &Path) -> PathBuf {
base_path.join(&self.0)
}
#[derive(Clone, PartialEq, Eq)]
pub struct RemoteObjectId(String);
///
/// A key that refers to an object in remote storage. It works much like a Path,
/// but it's a separate datatype so that you don't accidentally mix local paths
/// and remote keys.
///
impl RemoteObjectId {
// Needed to retrieve last component for RemoteObjectId.
// In other words a file name
/// Turn a/b/c or a/b/c/ into c
pub fn object_name(&self) -> Option<&str> {
self.0.file_name().and_then(|os_str| os_str.to_str())
// corner case, char::to_string is not const, thats why this is more verbose than it needs to be
// see https://github.com/rust-lang/rust/issues/88674
if self.0.len() == 1 && self.0.chars().next().unwrap() == REMOTE_STORAGE_PREFIX_SEPARATOR {
return None;
}
if self.0.ends_with(REMOTE_STORAGE_PREFIX_SEPARATOR) {
self.0.rsplit(REMOTE_STORAGE_PREFIX_SEPARATOR).nth(1)
} else {
self.0
.rsplit_once(REMOTE_STORAGE_PREFIX_SEPARATOR)
.map(|(_, last)| last)
}
}
}
impl Debug for RemoteObjectId {
fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
Debug::fmt(&self.0, fmt)
}
}
impl Display for RemoteObjectId {
fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
Display::fmt(&self.0, fmt)
}
}
@@ -70,40 +87,49 @@ impl RemotePath {
/// providing basic CRUD operations for storage files.
#[async_trait::async_trait]
pub trait RemoteStorage: Send + Sync + 'static {
/// Attempts to derive the storage path out of the local path, if the latter is correct.
fn remote_object_id(&self, local_path: &Path) -> anyhow::Result<RemoteObjectId>;
/// Gets the download path of the given storage file.
fn local_path(&self, remote_object_id: &RemoteObjectId) -> anyhow::Result<PathBuf>;
/// Lists all items the storage has right now.
async fn list(&self) -> anyhow::Result<Vec<RemotePath>>;
async fn list(&self) -> anyhow::Result<Vec<RemoteObjectId>>;
/// Lists all top level subdirectories for a given prefix
/// Note: here we assume that if the prefix is passed it was obtained via remote_object_id
/// which already takes into account any kind of global prefix (prefix_in_bucket for S3 or storage_root for LocalFS)
/// so this method doesnt need to.
async fn list_prefixes(&self, prefix: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>>;
async fn list_prefixes(
&self,
prefix: Option<&RemoteObjectId>,
) -> anyhow::Result<Vec<RemoteObjectId>>;
/// Streams the local file contents into remote into the remote storage entry.
async fn upload(
&self,
data: Box<(dyn io::AsyncRead + Unpin + Send + Sync + 'static)>,
from: Box<(dyn io::AsyncRead + Unpin + Send + Sync + 'static)>,
// S3 PUT request requires the content length to be specified,
// otherwise it starts to fail with the concurrent connection count increasing.
data_size_bytes: usize,
to: &RemotePath,
from_size_bytes: usize,
to: &RemoteObjectId,
metadata: Option<StorageMetadata>,
) -> anyhow::Result<()>;
/// Streams the remote storage entry contents into the buffered writer given, returns the filled writer.
/// Returns the metadata, if any was stored with the file previously.
async fn download(&self, from: &RemotePath) -> Result<Download, DownloadError>;
async fn download(&self, from: &RemoteObjectId) -> Result<Download, DownloadError>;
/// Streams a given byte range of the remote storage entry contents into the buffered writer given, returns the filled writer.
/// Returns the metadata, if any was stored with the file previously.
async fn download_byte_range(
&self,
from: &RemotePath,
from: &RemoteObjectId,
start_inclusive: u64,
end_exclusive: Option<u64>,
) -> Result<Download, DownloadError>;
async fn delete(&self, path: &RemotePath) -> anyhow::Result<()>;
async fn delete(&self, path: &RemoteObjectId) -> anyhow::Result<()>;
/// Downcast to LocalFs implementation. For tests.
fn as_local(&self) -> Option<&LocalFs> {
@@ -142,7 +168,7 @@ impl std::fmt::Display for DownloadError {
write!(f, "Failed to download a remote file due to user input: {e}")
}
DownloadError::NotFound => write!(f, "No file found for the remote object id given"),
DownloadError::Other(e) => write!(f, "Failed to download a remote file: {e:?}"),
DownloadError::Other(e) => write!(f, "Failed to download a remote file: {e}"),
}
}
}
@@ -152,35 +178,34 @@ impl std::error::Error for DownloadError {}
/// Every storage, currently supported.
/// Serves as a simple way to pass around the [`RemoteStorage`] without dealing with generics.
#[derive(Clone)]
pub enum GenericRemoteStorage {
LocalFs(LocalFs),
AwsS3(Arc<S3Bucket>),
}
pub struct GenericRemoteStorage(Arc<dyn RemoteStorage>);
impl Deref for GenericRemoteStorage {
type Target = dyn RemoteStorage;
fn deref(&self) -> &Self::Target {
match self {
GenericRemoteStorage::LocalFs(local_fs) => local_fs,
GenericRemoteStorage::AwsS3(s3_bucket) => s3_bucket.as_ref(),
}
self.0.as_ref()
}
}
impl GenericRemoteStorage {
pub fn new(storage: impl RemoteStorage) -> Self {
Self(Arc::new(storage))
}
pub fn from_config(
working_directory: PathBuf,
storage_config: &RemoteStorageConfig,
) -> anyhow::Result<GenericRemoteStorage> {
Ok(match &storage_config.storage {
RemoteStorageKind::LocalFs(root) => {
info!("Using fs root '{}' as a remote storage", root.display());
GenericRemoteStorage::LocalFs(LocalFs::new(root.clone())?)
GenericRemoteStorage::new(LocalFs::new(root.clone(), working_directory)?)
}
RemoteStorageKind::AwsS3(s3_config) => {
info!("Using s3 bucket '{}' in region '{}' as a remote storage, prefix in bucket: '{:?}', bucket endpoint: '{:?}'",
s3_config.bucket_name, s3_config.bucket_region, s3_config.prefix_in_bucket, s3_config.endpoint);
GenericRemoteStorage::AwsS3(Arc::new(S3Bucket::new(s3_config)?))
GenericRemoteStorage::new(S3Bucket::new(s3_config, working_directory)?)
}
})
}
@@ -194,12 +219,23 @@ impl GenericRemoteStorage {
&self,
from: Box<dyn tokio::io::AsyncRead + Unpin + Send + Sync + 'static>,
from_size_bytes: usize,
to: &RemotePath,
from_path: &Path,
) -> anyhow::Result<()> {
self.upload(from, from_size_bytes, to, None)
let target_storage_path = self.remote_object_id(from_path).with_context(|| {
format!(
"Failed to get the storage path for source local path '{}'",
from_path.display()
)
})?;
self.upload(from, from_size_bytes, &target_storage_path, None)
.await
.with_context(|| {
format!("Failed to upload data of length {from_size_bytes} to storage path {to:?}")
format!(
"Failed to upload from '{}' to storage path '{:?}'",
from_path.display(),
target_storage_path
)
})
}
@@ -208,11 +244,24 @@ impl GenericRemoteStorage {
pub async fn download_storage_object(
&self,
byte_range: Option<(u64, Option<u64>)>,
from: &RemotePath,
to_path: &Path,
) -> Result<Download, DownloadError> {
let remote_object_path = self
.remote_object_id(to_path)
.with_context(|| {
format!(
"Failed to get the storage path for target local path '{}'",
to_path.display()
)
})
.map_err(DownloadError::BadInput)?;
match byte_range {
Some((start, end)) => self.download_byte_range(from, start, end).await,
None => self.download(from).await,
Some((start, end)) => {
self.download_byte_range(&remote_object_path, start, end)
.await
}
None => self.download(&remote_object_path).await,
}
}
}
@@ -222,6 +271,23 @@ impl GenericRemoteStorage {
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct StorageMetadata(HashMap<String, String>);
fn strip_path_prefix<'a>(prefix: &'a Path, path: &'a Path) -> anyhow::Result<&'a Path> {
if prefix == path {
anyhow::bail!(
"Prefix and the path are equal, cannot strip: '{}'",
prefix.display()
)
} else {
path.strip_prefix(prefix).with_context(|| {
format!(
"Path '{}' is not prefixed with '{}'",
path.display(),
prefix.display(),
)
})
}
}
/// External backup storage configuration, enough for creating a client for that storage.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RemoteStorageConfig {
@@ -365,24 +431,21 @@ mod tests {
use super::*;
#[test]
fn test_object_name() {
let k = RemotePath::new(Path::new("a/b/c")).unwrap();
fn object_name() {
let k = RemoteObjectId("a/b/c".to_owned());
assert_eq!(k.object_name(), Some("c"));
let k = RemotePath::new(Path::new("a/b/c/")).unwrap();
let k = RemoteObjectId("a/b/c/".to_owned());
assert_eq!(k.object_name(), Some("c"));
let k = RemotePath::new(Path::new("a/")).unwrap();
let k = RemoteObjectId("a/".to_owned());
assert_eq!(k.object_name(), Some("a"));
// XXX is it impossible to have an empty key?
let k = RemotePath::new(Path::new("")).unwrap();
let k = RemoteObjectId("".to_owned());
assert_eq!(k.object_name(), None);
let k = RemoteObjectId("/".to_owned());
assert_eq!(k.object_name(), None);
}
#[test]
fn rempte_path_cannot_be_created_from_absolute_ones() {
let err = RemotePath::new(Path::new("/")).expect_err("Should fail on absolute paths");
assert_eq!(err.to_string(), "Path \"/\" is not relative");
}
}

View File

@@ -5,7 +5,6 @@
//! volume is mounted to the local FS.
use std::{
borrow::Cow,
future::Future,
path::{Path, PathBuf},
pin::Pin,
@@ -19,33 +18,60 @@ use tokio::{
use tracing::*;
use utils::crashsafe::path_with_suffix_extension;
use crate::{Download, DownloadError, RemotePath};
use crate::{Download, DownloadError, RemoteObjectId};
use super::{RemoteStorage, StorageMetadata};
use super::{strip_path_prefix, RemoteStorage, StorageMetadata};
const LOCAL_FS_TEMP_FILE_SUFFIX: &str = "___temp";
#[derive(Debug, Clone)]
/// Convert a Path in the remote storage into a RemoteObjectId
fn remote_object_id_from_path(path: &Path) -> anyhow::Result<RemoteObjectId> {
Ok(RemoteObjectId(
path.to_str()
.ok_or_else(|| anyhow::anyhow!("unexpected characters found in path"))?
.to_string(),
))
}
pub struct LocalFs {
working_directory: PathBuf,
storage_root: PathBuf,
}
impl LocalFs {
/// Attempts to create local FS storage, along with its root directory.
/// Storage root will be created (if does not exist) and transformed into an absolute path (if passed as relative).
pub fn new(mut storage_root: PathBuf) -> anyhow::Result<Self> {
if !storage_root.exists() {
std::fs::create_dir_all(&storage_root).with_context(|| {
format!("Failed to create all directories in the given root path {storage_root:?}")
})?;
}
if !storage_root.is_absolute() {
storage_root = storage_root.canonicalize().with_context(|| {
format!("Failed to represent path {storage_root:?} as an absolute path")
pub fn new(root: PathBuf, working_directory: PathBuf) -> anyhow::Result<Self> {
if !root.exists() {
std::fs::create_dir_all(&root).with_context(|| {
format!(
"Failed to create all directories in the given root path '{}'",
root.display(),
)
})?;
}
Ok(Self {
working_directory,
storage_root: root,
})
}
Ok(Self { storage_root })
///
/// Get the absolute path in the local filesystem to given remote object.
///
/// This is public so that it can be used in tests. Should not be used elsewhere.
///
pub fn resolve_in_storage(&self, remote_object_id: &RemoteObjectId) -> anyhow::Result<PathBuf> {
let path = PathBuf::from(&remote_object_id.0);
if path.is_relative() {
Ok(self.storage_root.join(path))
} else if path.starts_with(&self.storage_root) {
Ok(path)
} else {
bail!(
"Path '{}' does not belong to the current storage",
path.display()
)
}
}
async fn read_storage_metadata(
@@ -77,48 +103,45 @@ impl LocalFs {
#[async_trait::async_trait]
impl RemoteStorage for LocalFs {
async fn list(&self) -> anyhow::Result<Vec<RemotePath>> {
Ok(get_all_files(&self.storage_root, true)
.await?
.into_iter()
.map(|path| {
path.strip_prefix(&self.storage_root)
.context("Failed to strip storage root prefix")
.and_then(RemotePath::new)
.expect(
"We list files for storage root, hence should be able to remote the prefix",
)
})
.collect())
/// Convert a "local" path into a "remote path"
fn remote_object_id(&self, local_path: &Path) -> anyhow::Result<RemoteObjectId> {
let path = self.storage_root.join(
strip_path_prefix(&self.working_directory, local_path)
.context("local path does not belong to this storage")?,
);
remote_object_id_from_path(&path)
}
async fn list_prefixes(&self, prefix: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>> {
fn local_path(&self, remote_object_id: &RemoteObjectId) -> anyhow::Result<PathBuf> {
let storage_path = PathBuf::from(&remote_object_id.0);
let relative_path = strip_path_prefix(&self.storage_root, &storage_path)
.context("local path does not belong to this storage")?;
Ok(self.working_directory.join(relative_path))
}
async fn list(&self) -> anyhow::Result<Vec<RemoteObjectId>> {
get_all_files(&self.storage_root, true).await
}
async fn list_prefixes(
&self,
prefix: Option<&RemoteObjectId>,
) -> anyhow::Result<Vec<RemoteObjectId>> {
let path = match prefix {
Some(prefix) => Cow::Owned(prefix.with_base(&self.storage_root)),
None => Cow::Borrowed(&self.storage_root),
Some(prefix) => Path::new(&prefix.0),
None => &self.storage_root,
};
Ok(get_all_files(path.as_ref(), false)
.await?
.into_iter()
.map(|path| {
path.strip_prefix(&self.storage_root)
.context("Failed to strip preifix")
.and_then(RemotePath::new)
.expect(
"We list files for storage root, hence should be able to remote the prefix",
)
})
.collect())
get_all_files(path, false).await
}
async fn upload(
&self,
data: Box<(dyn io::AsyncRead + Unpin + Send + Sync + 'static)>,
data_size_bytes: usize,
to: &RemotePath,
from: Box<(dyn io::AsyncRead + Unpin + Send + Sync + 'static)>,
from_size_bytes: usize,
to: &RemoteObjectId,
metadata: Option<StorageMetadata>,
) -> anyhow::Result<()> {
let target_file_path = to.with_base(&self.storage_root);
let target_file_path = self.resolve_in_storage(to)?;
create_target_directory(&target_file_path).await?;
// We need this dance with sort of durable rename (without fsyncs)
// to prevent partial uploads. This was really hit when pageserver shutdown
@@ -139,8 +162,8 @@ impl RemoteStorage for LocalFs {
})?,
);
let from_size_bytes = data_size_bytes as u64;
let mut buffer_to_read = data.take(from_size_bytes);
let from_size_bytes = from_size_bytes as u64;
let mut buffer_to_read = from.take(from_size_bytes);
let bytes_read = io::copy(&mut buffer_to_read, &mut destination)
.await
@@ -197,22 +220,27 @@ impl RemoteStorage for LocalFs {
Ok(())
}
async fn download(&self, from: &RemotePath) -> Result<Download, DownloadError> {
let target_path = from.with_base(&self.storage_root);
if file_exists(&target_path).map_err(DownloadError::BadInput)? {
async fn download(&self, from: &RemoteObjectId) -> Result<Download, DownloadError> {
let file_path = self
.resolve_in_storage(from)
.map_err(DownloadError::BadInput)?;
if file_exists(&file_path).map_err(DownloadError::BadInput)? {
let source = io::BufReader::new(
fs::OpenOptions::new()
.read(true)
.open(&target_path)
.open(&file_path)
.await
.with_context(|| {
format!("Failed to open source file {target_path:?} to use in the download")
format!(
"Failed to open source file '{}' to use in the download",
file_path.display()
)
})
.map_err(DownloadError::Other)?,
);
let metadata = self
.read_storage_metadata(&target_path)
.read_storage_metadata(&file_path)
.await
.map_err(DownloadError::Other)?;
Ok(Download {
@@ -226,7 +254,7 @@ impl RemoteStorage for LocalFs {
async fn download_byte_range(
&self,
from: &RemotePath,
from: &RemoteObjectId,
start_inclusive: u64,
end_exclusive: Option<u64>,
) -> Result<Download, DownloadError> {
@@ -238,15 +266,20 @@ impl RemoteStorage for LocalFs {
return Err(DownloadError::Other(anyhow::anyhow!("Invalid range, start ({start_inclusive}) and end_exclusive ({end_exclusive:?}) difference is zero bytes")));
}
}
let target_path = from.with_base(&self.storage_root);
if file_exists(&target_path).map_err(DownloadError::BadInput)? {
let file_path = self
.resolve_in_storage(from)
.map_err(DownloadError::BadInput)?;
if file_exists(&file_path).map_err(DownloadError::BadInput)? {
let mut source = io::BufReader::new(
fs::OpenOptions::new()
.read(true)
.open(&target_path)
.open(&file_path)
.await
.with_context(|| {
format!("Failed to open source file {target_path:?} to use in the download")
format!(
"Failed to open source file '{}' to use in the download",
file_path.display()
)
})
.map_err(DownloadError::Other)?,
);
@@ -256,7 +289,7 @@ impl RemoteStorage for LocalFs {
.context("Failed to seek to the range start in a local storage file")
.map_err(DownloadError::Other)?;
let metadata = self
.read_storage_metadata(&target_path)
.read_storage_metadata(&file_path)
.await
.map_err(DownloadError::Other)?;
@@ -275,12 +308,15 @@ impl RemoteStorage for LocalFs {
}
}
async fn delete(&self, path: &RemotePath) -> anyhow::Result<()> {
let file_path = path.with_base(&self.storage_root);
async fn delete(&self, path: &RemoteObjectId) -> anyhow::Result<()> {
let file_path = self.resolve_in_storage(path)?;
if file_path.exists() && file_path.is_file() {
Ok(fs::remove_file(file_path).await?)
} else {
bail!("File {file_path:?} either does not exist or is not a file")
bail!(
"File '{}' either does not exist or is not a file",
file_path.display()
)
}
}
@@ -296,7 +332,7 @@ fn storage_metadata_path(original_path: &Path) -> PathBuf {
fn get_all_files<'a, P>(
directory_path: P,
recursive: bool,
) -> Pin<Box<dyn Future<Output = anyhow::Result<Vec<PathBuf>>> + Send + Sync + 'a>>
) -> Pin<Box<dyn Future<Output = anyhow::Result<Vec<RemoteObjectId>>> + Send + Sync + 'a>>
where
P: AsRef<Path> + Send + Sync + 'a,
{
@@ -310,20 +346,20 @@ where
let file_type = dir_entry.file_type().await?;
let entry_path = dir_entry.path();
if file_type.is_symlink() {
debug!("{entry_path:?} us a symlink, skipping")
debug!("{:?} us a symlink, skipping", entry_path)
} else if file_type.is_dir() {
if recursive {
paths.extend(get_all_files(&entry_path, true).await?.into_iter())
} else {
paths.push(entry_path)
paths.push(remote_object_id_from_path(&dir_entry.path())?)
}
} else {
paths.push(entry_path);
paths.push(remote_object_id_from_path(&dir_entry.path())?);
}
}
Ok(paths)
} else {
bail!("Path {directory_path:?} is not a directory")
bail!("Path '{}' is not a directory", directory_path.display())
}
} else {
Ok(Vec::new())
@@ -358,6 +394,173 @@ fn file_exists(file_path: &Path) -> anyhow::Result<bool> {
}
}
#[cfg(test)]
mod pure_tests {
use tempfile::tempdir;
use super::*;
#[test]
fn storage_path_positive() -> anyhow::Result<()> {
let workdir = tempdir()?.path().to_owned();
let storage_root = PathBuf::from("somewhere").join("else");
let storage = LocalFs {
working_directory: workdir.clone(),
storage_root: storage_root.clone(),
};
let local_path = workdir
.join("timelines")
.join("some_timeline")
.join("file_name");
let expected_path = storage_root.join(local_path.strip_prefix(&workdir)?);
let actual_path = PathBuf::from(
storage
.remote_object_id(&local_path)
.expect("Matching path should map to storage path normally")
.0,
);
assert_eq!(
expected_path,
actual_path,
"File paths from workdir should be stored in local fs storage with the same path they have relative to the workdir"
);
Ok(())
}
#[test]
fn storage_path_negatives() -> anyhow::Result<()> {
#[track_caller]
fn storage_path_error(storage: &LocalFs, mismatching_path: &Path) -> String {
match storage.remote_object_id(mismatching_path) {
Ok(wrong_path) => panic!(
"Expected path '{}' to error, but got storage path: {:?}",
mismatching_path.display(),
wrong_path,
),
Err(e) => format!("{:?}", e),
}
}
let workdir = tempdir()?.path().to_owned();
let storage_root = PathBuf::from("somewhere").join("else");
let storage = LocalFs {
working_directory: workdir.clone(),
storage_root,
};
let error_string = storage_path_error(&storage, &workdir);
assert!(error_string.contains("does not belong to this storage"));
assert!(error_string.contains(workdir.to_str().unwrap()));
let mismatching_path_str = "/something/else";
let error_message = storage_path_error(&storage, Path::new(mismatching_path_str));
assert!(
error_message.contains(mismatching_path_str),
"Error should mention wrong path"
);
assert!(
error_message.contains(workdir.to_str().unwrap()),
"Error should mention server workdir"
);
assert!(error_message.contains("does not belong to this storage"));
Ok(())
}
#[test]
fn local_path_positive() -> anyhow::Result<()> {
let workdir = tempdir()?.path().to_owned();
let storage_root = PathBuf::from("somewhere").join("else");
let storage = LocalFs {
working_directory: workdir.clone(),
storage_root: storage_root.clone(),
};
let name = "not a metadata";
let local_path = workdir.join("timelines").join("some_timeline").join(name);
assert_eq!(
local_path,
storage
.local_path(&remote_object_id_from_path(
&storage_root.join(local_path.strip_prefix(&workdir)?)
)?)
.expect("For a valid input, valid local path should be parsed"),
"Should be able to parse metadata out of the correctly named remote delta file"
);
let local_metadata_path = workdir
.join("timelines")
.join("some_timeline")
.join("metadata");
let remote_metadata_path = storage.remote_object_id(&local_metadata_path)?;
assert_eq!(
local_metadata_path,
storage
.local_path(&remote_metadata_path)
.expect("For a valid input, valid local path should be parsed"),
"Should be able to parse metadata out of the correctly named remote metadata file"
);
Ok(())
}
#[test]
fn local_path_negatives() -> anyhow::Result<()> {
#[track_caller]
fn local_path_error(storage: &LocalFs, storage_path: &RemoteObjectId) -> String {
match storage.local_path(storage_path) {
Ok(wrong_path) => panic!(
"Expected local path input {:?} to cause an error, but got file path: {:?}",
storage_path, wrong_path,
),
Err(e) => format!("{:?}", e),
}
}
let storage_root = PathBuf::from("somewhere").join("else");
let storage = LocalFs {
working_directory: tempdir()?.path().to_owned(),
storage_root,
};
let totally_wrong_path = "wrong_wrong_wrong";
let error_message =
local_path_error(&storage, &RemoteObjectId(totally_wrong_path.to_string()));
assert!(error_message.contains(totally_wrong_path));
Ok(())
}
#[test]
fn download_destination_matches_original_path() -> anyhow::Result<()> {
let workdir = tempdir()?.path().to_owned();
let original_path = workdir
.join("timelines")
.join("some_timeline")
.join("some name");
let storage_root = PathBuf::from("somewhere").join("else");
let dummy_storage = LocalFs {
working_directory: workdir,
storage_root,
};
let storage_path = dummy_storage.remote_object_id(&original_path)?;
let download_destination = dummy_storage.local_path(&storage_path)?;
assert_eq!(
original_path, download_destination,
"'original path -> storage path -> matching fs path' transformation should produce the same path as the input one for the correct path"
);
Ok(())
}
}
#[cfg(test)]
mod fs_tests {
use super::*;
@@ -369,7 +572,7 @@ mod fs_tests {
storage: &LocalFs,
#[allow(clippy::ptr_arg)]
// have to use &PathBuf due to `storage.local_path` parameter requirements
remote_storage_path: &RemotePath,
remote_storage_path: &RemoteObjectId,
expected_metadata: Option<&StorageMetadata>,
) -> anyhow::Result<String> {
let mut download = storage
@@ -392,16 +595,41 @@ mod fs_tests {
#[tokio::test]
async fn upload_file() -> anyhow::Result<()> {
let workdir = tempdir()?.path().to_owned();
let storage = create_storage()?;
let target_path_1 = upload_dummy_file(&storage, "upload_1", None).await?;
let (file, size) = create_file_for_upload(
&storage.working_directory.join("whatever"),
"whatever_contents",
)
.await?;
let target_path = "/somewhere/else";
match storage
.upload(
Box::new(file),
size,
&RemoteObjectId(target_path.to_string()),
None,
)
.await
{
Ok(()) => panic!("Should not allow storing files with wrong target path"),
Err(e) => {
let message = format!("{:?}", e);
assert!(message.contains(target_path));
assert!(message.contains("does not belong to the current storage"));
}
}
assert!(storage.list().await?.is_empty());
let target_path_1 = upload_dummy_file(&workdir, &storage, "upload_1", None).await?;
assert_eq!(
storage.list().await?,
vec![target_path_1.clone()],
"Should list a single file after first upload"
);
let target_path_2 = upload_dummy_file(&storage, "upload_2", None).await?;
let target_path_2 = upload_dummy_file(&workdir, &storage, "upload_2", None).await?;
assert_eq!(
list_files_sorted(&storage).await?,
vec![target_path_1.clone(), target_path_2.clone()],
@@ -415,7 +643,7 @@ mod fs_tests {
async fn upload_file_negatives() -> anyhow::Result<()> {
let storage = create_storage()?;
let id = RemotePath::new(Path::new("dummy"))?;
let id = storage.remote_object_id(&storage.working_directory.join("dummy"))?;
let content = std::io::Cursor::new(b"12345");
// Check that you get an error if the size parameter doesn't match the actual
@@ -440,14 +668,16 @@ mod fs_tests {
}
fn create_storage() -> anyhow::Result<LocalFs> {
LocalFs::new(tempdir()?.path().to_owned())
LocalFs::new(tempdir()?.path().to_owned(), tempdir()?.path().to_owned())
}
#[tokio::test]
async fn download_file() -> anyhow::Result<()> {
let workdir = tempdir()?.path().to_owned();
let storage = create_storage()?;
let upload_name = "upload_1";
let upload_target = upload_dummy_file(&storage, upload_name, None).await?;
let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;
let contents = read_and_assert_remote_file_contents(&storage, &upload_target, None).await?;
assert_eq!(
@@ -457,7 +687,7 @@ mod fs_tests {
);
let non_existing_path = "somewhere/else";
match storage.download(&RemotePath::new(Path::new(non_existing_path))?).await {
match storage.download(&RemoteObjectId(non_existing_path.to_string())).await {
Err(DownloadError::NotFound) => {} // Should get NotFound for non existing keys
other => panic!("Should get a NotFound error when downloading non-existing storage files, but got: {other:?}"),
}
@@ -466,9 +696,11 @@ mod fs_tests {
#[tokio::test]
async fn download_file_range_positive() -> anyhow::Result<()> {
let workdir = tempdir()?.path().to_owned();
let storage = create_storage()?;
let upload_name = "upload_1";
let upload_target = upload_dummy_file(&storage, upload_name, None).await?;
let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;
let full_range_download_contents =
read_and_assert_remote_file_contents(&storage, &upload_target, None).await?;
@@ -534,9 +766,11 @@ mod fs_tests {
#[tokio::test]
async fn download_file_range_negative() -> anyhow::Result<()> {
let workdir = tempdir()?.path().to_owned();
let storage = create_storage()?;
let upload_name = "upload_1";
let upload_target = upload_dummy_file(&storage, upload_name, None).await?;
let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;
let start = 1_000_000_000;
let end = start + 1;
@@ -578,9 +812,11 @@ mod fs_tests {
#[tokio::test]
async fn delete_file() -> anyhow::Result<()> {
let workdir = tempdir()?.path().to_owned();
let storage = create_storage()?;
let upload_name = "upload_1";
let upload_target = upload_dummy_file(&storage, upload_name, None).await?;
let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;
storage.delete(&upload_target).await?;
assert!(storage.list().await?.is_empty());
@@ -590,8 +826,7 @@ mod fs_tests {
Err(e) => {
let error_string = e.to_string();
assert!(error_string.contains("does not exist"));
let expected_path = upload_target.with_base(&storage.storage_root);
assert!(error_string.contains(expected_path.to_str().unwrap()));
assert!(error_string.contains(&upload_target.0));
}
}
Ok(())
@@ -599,6 +834,8 @@ mod fs_tests {
#[tokio::test]
async fn file_with_metadata() -> anyhow::Result<()> {
let workdir = tempdir()?.path().to_owned();
let storage = create_storage()?;
let upload_name = "upload_1";
let metadata = StorageMetadata(HashMap::from([
@@ -606,7 +843,7 @@ mod fs_tests {
("two".to_string(), "2".to_string()),
]));
let upload_target =
upload_dummy_file(&storage, upload_name, Some(metadata.clone())).await?;
upload_dummy_file(&workdir, &storage, upload_name, Some(metadata.clone())).await?;
let full_range_download_contents =
read_and_assert_remote_file_contents(&storage, &upload_target, Some(&metadata)).await?;
@@ -646,32 +883,23 @@ mod fs_tests {
}
async fn upload_dummy_file(
workdir: &Path,
storage: &LocalFs,
name: &str,
metadata: Option<StorageMetadata>,
) -> anyhow::Result<RemotePath> {
let from_path = storage
.storage_root
.join("timelines")
.join("some_timeline")
.join(name);
) -> anyhow::Result<RemoteObjectId> {
let timeline_path = workdir.join("timelines").join("some_timeline");
let relative_timeline_path = timeline_path.strip_prefix(&workdir)?;
let storage_path = storage.storage_root.join(relative_timeline_path).join(name);
let remote_object_id = RemoteObjectId(storage_path.to_str().unwrap().to_string());
let from_path = storage.working_directory.join(name);
let (file, size) = create_file_for_upload(&from_path, &dummy_contents(name)).await?;
let relative_path = from_path
.strip_prefix(&storage.storage_root)
.context("Failed to strip storage root prefix")
.and_then(RemotePath::new)
.with_context(|| {
format!(
"Failed to resolve remote part of path {:?} for base {:?}",
from_path, storage.storage_root
)
})?;
storage
.upload(Box::new(file), size, &relative_path, metadata)
.upload(Box::new(file), size, &remote_object_id, metadata)
.await?;
Ok(relative_path)
remote_object_id_from_path(&storage_path)
}
async fn create_file_for_upload(
@@ -696,7 +924,7 @@ mod fs_tests {
format!("contents for {name}")
}
async fn list_files_sorted(storage: &LocalFs) -> anyhow::Result<Vec<RemotePath>> {
async fn list_files_sorted(storage: &LocalFs) -> anyhow::Result<Vec<RemoteObjectId>> {
let mut files = storage.list().await?;
files.sort_by(|a, b| a.0.cmp(&b.0));
Ok(files)

View File

@@ -4,34 +4,27 @@
//! allowing multiple api users to independently work with the same S3 bucket, if
//! their bucket prefixes are both specified and different.
use std::env::var;
use std::sync::Arc;
use std::time::Duration;
use std::path::{Path, PathBuf};
use anyhow::Context;
use aws_config::{
environment::credentials::EnvironmentVariableCredentialsProvider, imds,
imds::credentials::ImdsCredentialsProvider, meta::credentials::provide_credentials_fn,
use rusoto_core::{
credential::{InstanceMetadataProvider, StaticProvider},
HttpClient, Region, RusotoError,
};
use aws_sdk_s3::{
config::Config,
error::{GetObjectError, GetObjectErrorKind},
types::{ByteStream, SdkError},
Client, Endpoint, Region,
use rusoto_s3::{
DeleteObjectRequest, GetObjectError, GetObjectRequest, ListObjectsV2Request, PutObjectRequest,
S3Client, StreamingBody, S3,
};
use aws_smithy_http::body::SdkBody;
use aws_types::credentials::{CredentialsError, ProvideCredentials};
use hyper::Body;
use tokio::{io, sync::Semaphore};
use tokio_util::io::ReaderStream;
use tracing::debug;
use super::StorageMetadata;
use crate::{
Download, DownloadError, RemotePath, RemoteStorage, S3Config, REMOTE_STORAGE_PREFIX_SEPARATOR,
strip_path_prefix, Download, DownloadError, RemoteObjectId, RemoteStorage, S3Config,
REMOTE_STORAGE_PREFIX_SEPARATOR,
};
const DEFAULT_IMDS_TIMEOUT: Duration = Duration::from_secs(10);
use super::StorageMetadata;
pub(super) mod metrics {
use metrics::{register_int_counter_vec, IntCounterVec};
@@ -98,9 +91,32 @@ pub(super) mod metrics {
}
}
fn download_destination(
id: &RemoteObjectId,
workdir: &Path,
prefix_to_strip: Option<&str>,
) -> PathBuf {
let path_without_prefix = match prefix_to_strip {
Some(prefix) => id.0.strip_prefix(prefix).unwrap_or_else(|| {
panic!(
"Could not strip prefix '{}' from S3 object key '{}'",
prefix, id.0
)
}),
None => &id.0,
};
workdir.join(
path_without_prefix
.split(REMOTE_STORAGE_PREFIX_SEPARATOR)
.collect::<PathBuf>(),
)
}
/// AWS S3 storage.
pub struct S3Bucket {
client: Client,
workdir: PathBuf,
client: S3Client,
bucket_name: String,
prefix_in_bucket: Option<String>,
// Every request to S3 can be throttled or cancelled, if a certain number of requests per second is exceeded.
@@ -109,53 +125,50 @@ pub struct S3Bucket {
concurrency_limiter: Semaphore,
}
#[derive(Default)]
struct GetObjectRequest {
bucket: String,
key: String,
range: Option<String>,
}
impl S3Bucket {
/// Creates the S3 storage, errors if incorrect AWS S3 configuration provided.
pub fn new(aws_config: &S3Config) -> anyhow::Result<Self> {
pub fn new(aws_config: &S3Config, workdir: PathBuf) -> anyhow::Result<Self> {
debug!(
"Creating s3 remote storage for S3 bucket {}",
aws_config.bucket_name
);
let mut config_builder = Config::builder()
.region(Region::new(aws_config.bucket_region.clone()))
.credentials_provider(provide_credentials_fn(|| async {
match var("AWS_ACCESS_KEY_ID").is_ok() && var("AWS_SECRET_ACCESS_KEY").is_ok() {
true => {
EnvironmentVariableCredentialsProvider::new()
.provide_credentials()
.await
}
false => {
let imds_client = imds::Client::builder()
.connect_timeout(DEFAULT_IMDS_TIMEOUT)
.read_timeout(DEFAULT_IMDS_TIMEOUT)
.build()
.await
.map_err(CredentialsError::unhandled)?;
ImdsCredentialsProvider::builder()
.imds_client(imds_client)
.build()
.provide_credentials()
.await
}
}
}));
let region = match aws_config.endpoint.clone() {
Some(custom_endpoint) => Region::Custom {
name: aws_config.bucket_region.clone(),
endpoint: custom_endpoint,
},
None => aws_config
.bucket_region
.parse::<Region>()
.context("Failed to parse the s3 region from config")?,
};
let request_dispatcher = HttpClient::new().context("Failed to create S3 http client")?;
if let Some(custom_endpoint) = aws_config.endpoint.clone() {
let endpoint = Endpoint::immutable(
custom_endpoint
.parse()
.expect("Failed to parse S3 custom endpoint"),
let access_key_id = std::env::var("AWS_ACCESS_KEY_ID").ok();
let secret_access_key = std::env::var("AWS_SECRET_ACCESS_KEY").ok();
// session token is used when authorizing through sso
// which is typically the case when testing locally on developer machine
let session_token = std::env::var("AWS_SESSION_TOKEN").ok();
let client = if access_key_id.is_none() && secret_access_key.is_none() {
debug!("Using IAM-based AWS access");
S3Client::new_with(request_dispatcher, InstanceMetadataProvider::new(), region)
} else {
debug!(
"Using credentials-based AWS access. Session token is set: {}",
session_token.is_some()
);
config_builder.set_endpoint_resolver(Some(Arc::new(endpoint)));
}
let client = Client::from_conf(config_builder.build());
S3Client::new_with(
request_dispatcher,
StaticProvider::new(
access_key_id.unwrap_or_default(),
secret_access_key.unwrap_or_default(),
session_token,
None,
),
region,
)
};
let prefix_in_bucket = aws_config.prefix_in_bucket.as_deref().map(|prefix| {
let mut prefix = prefix;
@@ -169,41 +182,16 @@ impl S3Bucket {
}
prefix
});
Ok(Self {
client,
workdir,
bucket_name: aws_config.bucket_name.clone(),
prefix_in_bucket,
concurrency_limiter: Semaphore::new(aws_config.concurrency_limit.get()),
})
}
fn s3_object_to_relative_path(&self, key: &str) -> RemotePath {
let relative_path =
match key.strip_prefix(self.prefix_in_bucket.as_deref().unwrap_or_default()) {
Some(stripped) => stripped,
// we rely on AWS to return properly prefixed paths
// for requests with a certain prefix
None => panic!(
"Key {} does not start with bucket prefix {:?}",
key, self.prefix_in_bucket
),
};
RemotePath(
relative_path
.split(REMOTE_STORAGE_PREFIX_SEPARATOR)
.collect(),
)
}
fn relative_path_to_s3_object(&self, path: &RemotePath) -> String {
let mut full_path = self.prefix_in_bucket.clone().unwrap_or_default();
for segment in path.0.iter() {
full_path.push(REMOTE_STORAGE_PREFIX_SEPARATOR);
full_path.push_str(segment.to_str().unwrap_or_default());
}
full_path
}
async fn download_object(&self, request: GetObjectRequest) -> Result<Download, DownloadError> {
let _guard = self
.concurrency_limiter
@@ -214,33 +202,20 @@ impl S3Bucket {
metrics::inc_get_object();
let get_object = self
.client
.get_object()
.bucket(request.bucket)
.key(request.key)
.set_range(request.range)
.send()
.await;
match get_object {
Ok(object_output) => {
let metadata = object_output.metadata().cloned().map(StorageMetadata);
Ok(Download {
metadata,
download_stream: Box::pin(io::BufReader::new(
object_output.body.into_async_read(),
)),
})
}
Err(SdkError::ServiceError {
err:
GetObjectError {
kind: GetObjectErrorKind::NoSuchKey(..),
..
},
..
}) => Err(DownloadError::NotFound),
match self.client.get_object(request).await {
Ok(object_output) => match object_output.body {
None => {
metrics::inc_get_object_fail();
Err(DownloadError::Other(anyhow::anyhow!(
"Got no body for the S3 object given"
)))
}
Some(body) => Ok(Download {
metadata: object_output.metadata.map(StorageMetadata),
download_stream: Box::pin(io::BufReader::new(body.into_async_read())),
}),
},
Err(RusotoError::Service(GetObjectError::NoSuchKey(_))) => Err(DownloadError::NotFound),
Err(e) => {
metrics::inc_get_object_fail();
Err(DownloadError::Other(anyhow::anyhow!(
@@ -253,7 +228,25 @@ impl S3Bucket {
#[async_trait::async_trait]
impl RemoteStorage for S3Bucket {
async fn list(&self) -> anyhow::Result<Vec<RemotePath>> {
fn remote_object_id(&self, local_path: &Path) -> anyhow::Result<RemoteObjectId> {
let relative_path = strip_path_prefix(&self.workdir, local_path)?;
let mut key = self.prefix_in_bucket.clone().unwrap_or_default();
for segment in relative_path {
key.push(REMOTE_STORAGE_PREFIX_SEPARATOR);
key.push_str(&segment.to_string_lossy());
}
Ok(RemoteObjectId(key))
}
fn local_path(&self, storage_path: &RemoteObjectId) -> anyhow::Result<PathBuf> {
Ok(download_destination(
storage_path,
&self.workdir,
self.prefix_in_bucket.as_deref(),
))
}
async fn list(&self) -> anyhow::Result<Vec<RemoteObjectId>> {
let mut document_keys = Vec::new();
let mut continuation_token = None;
@@ -268,11 +261,12 @@ impl RemoteStorage for S3Bucket {
let fetch_response = self
.client
.list_objects_v2()
.bucket(self.bucket_name.clone())
.set_prefix(self.prefix_in_bucket.clone())
.set_continuation_token(continuation_token)
.send()
.list_objects_v2(ListObjectsV2Request {
bucket: self.bucket_name.clone(),
prefix: self.prefix_in_bucket.clone(),
continuation_token,
..ListObjectsV2Request::default()
})
.await
.map_err(|e| {
metrics::inc_list_objects_fail();
@@ -283,7 +277,7 @@ impl RemoteStorage for S3Bucket {
.contents
.unwrap_or_default()
.into_iter()
.filter_map(|o| Some(self.s3_object_to_relative_path(o.key()?))),
.filter_map(|o| Some(RemoteObjectId(o.key?))),
);
match fetch_response.continuation_token {
@@ -297,10 +291,13 @@ impl RemoteStorage for S3Bucket {
/// See the doc for `RemoteStorage::list_prefixes`
/// Note: it wont include empty "directories"
async fn list_prefixes(&self, prefix: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>> {
async fn list_prefixes(
&self,
prefix: Option<&RemoteObjectId>,
) -> anyhow::Result<Vec<RemoteObjectId>> {
// get the passed prefix or if it is not set use prefix_in_bucket value
let list_prefix = prefix
.map(|p| self.relative_path_to_s3_object(p))
.map(|p| p.0.clone())
.or_else(|| self.prefix_in_bucket.clone())
.map(|mut p| {
// required to end with a separator
@@ -325,12 +322,13 @@ impl RemoteStorage for S3Bucket {
let fetch_response = self
.client
.list_objects_v2()
.bucket(self.bucket_name.clone())
.set_prefix(list_prefix.clone())
.set_continuation_token(continuation_token)
.delimiter(REMOTE_STORAGE_PREFIX_SEPARATOR.to_string())
.send()
.list_objects_v2(ListObjectsV2Request {
bucket: self.bucket_name.clone(),
prefix: list_prefix.clone(),
continuation_token,
delimiter: Some(REMOTE_STORAGE_PREFIX_SEPARATOR.to_string()),
..ListObjectsV2Request::default()
})
.await
.map_err(|e| {
metrics::inc_list_objects_fail();
@@ -342,7 +340,7 @@ impl RemoteStorage for S3Bucket {
.common_prefixes
.unwrap_or_default()
.into_iter()
.filter_map(|o| Some(self.s3_object_to_relative_path(o.prefix()?))),
.filter_map(|o| Some(RemoteObjectId(o.prefix?))),
);
match fetch_response.continuation_token {
@@ -358,7 +356,7 @@ impl RemoteStorage for S3Bucket {
&self,
from: Box<(dyn io::AsyncRead + Unpin + Send + Sync + 'static)>,
from_size_bytes: usize,
to: &RemotePath,
to: &RemoteObjectId,
metadata: Option<StorageMetadata>,
) -> anyhow::Result<()> {
let _guard = self
@@ -368,18 +366,17 @@ impl RemoteStorage for S3Bucket {
.context("Concurrency limiter semaphore got closed during S3 upload")?;
metrics::inc_put_object();
let body = Body::wrap_stream(ReaderStream::new(from));
let bytes_stream = ByteStream::new(SdkBody::from(body));
self.client
.put_object()
.bucket(self.bucket_name.clone())
.key(self.relative_path_to_s3_object(to))
.set_metadata(metadata.map(|m| m.0))
.content_length(from_size_bytes.try_into()?)
.body(bytes_stream)
.send()
.put_object(PutObjectRequest {
body: Some(StreamingBody::new_with_size(
ReaderStream::new(from),
from_size_bytes,
)),
bucket: self.bucket_name.clone(),
key: to.0.to_owned(),
metadata: metadata.map(|m| m.0),
..PutObjectRequest::default()
})
.await
.map_err(|e| {
metrics::inc_put_object_fail();
@@ -388,10 +385,10 @@ impl RemoteStorage for S3Bucket {
Ok(())
}
async fn download(&self, from: &RemotePath) -> Result<Download, DownloadError> {
async fn download(&self, from: &RemoteObjectId) -> Result<Download, DownloadError> {
self.download_object(GetObjectRequest {
bucket: self.bucket_name.clone(),
key: self.relative_path_to_s3_object(from),
key: from.0.to_owned(),
..GetObjectRequest::default()
})
.await
@@ -399,7 +396,7 @@ impl RemoteStorage for S3Bucket {
async fn download_byte_range(
&self,
from: &RemotePath,
from: &RemoteObjectId,
start_inclusive: u64,
end_exclusive: Option<u64>,
) -> Result<Download, DownloadError> {
@@ -407,19 +404,20 @@ impl RemoteStorage for S3Bucket {
// and needs both ends to be exclusive
let end_inclusive = end_exclusive.map(|end| end.saturating_sub(1));
let range = Some(match end_inclusive {
Some(end_inclusive) => format!("bytes={start_inclusive}-{end_inclusive}"),
None => format!("bytes={start_inclusive}-"),
Some(end_inclusive) => format!("bytes={}-{}", start_inclusive, end_inclusive),
None => format!("bytes={}-", start_inclusive),
});
self.download_object(GetObjectRequest {
bucket: self.bucket_name.clone(),
key: self.relative_path_to_s3_object(from),
key: from.0.to_owned(),
range,
..GetObjectRequest::default()
})
.await
}
async fn delete(&self, path: &RemotePath) -> anyhow::Result<()> {
async fn delete(&self, remote_object_id: &RemoteObjectId) -> anyhow::Result<()> {
let _guard = self
.concurrency_limiter
.acquire()
@@ -429,10 +427,11 @@ impl RemoteStorage for S3Bucket {
metrics::inc_delete_object();
self.client
.delete_object()
.bucket(self.bucket_name.clone())
.key(self.relative_path_to_s3_object(path))
.send()
.delete_object(DeleteObjectRequest {
bucket: self.bucket_name.clone(),
key: remote_object_id.0.to_owned(),
..DeleteObjectRequest::default()
})
.await
.map_err(|e| {
metrics::inc_delete_object_fail();
@@ -441,3 +440,181 @@ impl RemoteStorage for S3Bucket {
Ok(())
}
}
#[cfg(test)]
mod tests {
use tempfile::tempdir;
use super::*;
#[test]
fn test_download_destination() -> anyhow::Result<()> {
let workdir = tempdir()?.path().to_owned();
let local_path = workdir.join("one").join("two").join("test_name");
let relative_path = local_path.strip_prefix(&workdir)?;
let key = RemoteObjectId(format!(
"{}{}",
REMOTE_STORAGE_PREFIX_SEPARATOR,
relative_path
.iter()
.map(|segment| segment.to_str().unwrap())
.collect::<Vec<_>>()
.join(&REMOTE_STORAGE_PREFIX_SEPARATOR.to_string()),
));
assert_eq!(
local_path,
download_destination(&key, &workdir, None),
"Download destination should consist of s3 path joined with the workdir prefix"
);
Ok(())
}
#[test]
fn storage_path_positive() -> anyhow::Result<()> {
let workdir = tempdir()?.path().to_owned();
let segment_1 = "matching";
let segment_2 = "file";
let local_path = &workdir.join(segment_1).join(segment_2);
let storage = dummy_storage(workdir);
let expected_key = RemoteObjectId(format!(
"{}{REMOTE_STORAGE_PREFIX_SEPARATOR}{segment_1}{REMOTE_STORAGE_PREFIX_SEPARATOR}{segment_2}",
storage.prefix_in_bucket.as_deref().unwrap_or_default(),
));
let actual_key = storage
.remote_object_id(local_path)
.expect("Matching path should map to S3 path normally");
assert_eq!(
expected_key,
actual_key,
"S3 key from the matching path should contain all segments after the workspace prefix, separated with S3 separator"
);
Ok(())
}
#[test]
fn storage_path_negatives() -> anyhow::Result<()> {
#[track_caller]
fn storage_path_error(storage: &S3Bucket, mismatching_path: &Path) -> String {
match storage.remote_object_id(mismatching_path) {
Ok(wrong_key) => panic!(
"Expected path '{}' to error, but got S3 key: {:?}",
mismatching_path.display(),
wrong_key,
),
Err(e) => e.to_string(),
}
}
let workdir = tempdir()?.path().to_owned();
let storage = dummy_storage(workdir.clone());
let error_message = storage_path_error(&storage, &workdir);
assert!(
error_message.contains("Prefix and the path are equal"),
"Message '{}' does not contain the required string",
error_message
);
let mismatching_path = PathBuf::from("somewhere").join("else");
let error_message = storage_path_error(&storage, &mismatching_path);
assert!(
error_message.contains(mismatching_path.to_str().unwrap()),
"Error should mention wrong path"
);
assert!(
error_message.contains(workdir.to_str().unwrap()),
"Error should mention server workdir"
);
assert!(
error_message.contains("is not prefixed with"),
"Message '{}' does not contain a required string",
error_message
);
Ok(())
}
#[test]
fn local_path_positive() -> anyhow::Result<()> {
let workdir = tempdir()?.path().to_owned();
let storage = dummy_storage(workdir.clone());
let timeline_dir = workdir.join("timelines").join("test_timeline");
let relative_timeline_path = timeline_dir.strip_prefix(&workdir)?;
let s3_key = create_s3_key(
&relative_timeline_path.join("not a metadata"),
storage.prefix_in_bucket.as_deref(),
);
assert_eq!(
download_destination(&s3_key, &workdir, storage.prefix_in_bucket.as_deref()),
storage
.local_path(&s3_key)
.expect("For a valid input, valid S3 info should be parsed"),
"Should be able to parse metadata out of the correctly named remote delta file"
);
let s3_key = create_s3_key(
&relative_timeline_path.join("metadata"),
storage.prefix_in_bucket.as_deref(),
);
assert_eq!(
download_destination(&s3_key, &workdir, storage.prefix_in_bucket.as_deref()),
storage
.local_path(&s3_key)
.expect("For a valid input, valid S3 info should be parsed"),
"Should be able to parse metadata out of the correctly named remote metadata file"
);
Ok(())
}
#[test]
fn download_destination_matches_original_path() -> anyhow::Result<()> {
let workdir = tempdir()?.path().to_owned();
let original_path = workdir
.join("timelines")
.join("some_timeline")
.join("some name");
let dummy_storage = dummy_storage(workdir);
let key = dummy_storage.remote_object_id(&original_path)?;
let download_destination = dummy_storage.local_path(&key)?;
assert_eq!(
original_path, download_destination,
"'original path -> storage key -> matching fs path' transformation should produce the same path as the input one for the correct path"
);
Ok(())
}
fn dummy_storage(workdir: PathBuf) -> S3Bucket {
S3Bucket {
workdir,
client: S3Client::new("us-east-1".parse().unwrap()),
bucket_name: "dummy-bucket".to_string(),
prefix_in_bucket: Some("dummy_prefix/".to_string()),
concurrency_limiter: Semaphore::new(1),
}
}
fn create_s3_key(relative_file_path: &Path, prefix: Option<&str>) -> RemoteObjectId {
RemoteObjectId(relative_file_path.iter().fold(
prefix.unwrap_or_default().to_string(),
|mut path_string, segment| {
path_string.push(REMOTE_STORAGE_PREFIX_SEPARATOR);
path_string.push_str(segment.to_str().unwrap());
path_string
},
))
}
}

View File

@@ -22,40 +22,3 @@ pub struct TimelineCreateRequest {
// If not passed, it is assigned to the beginning of commit_lsn segment.
pub local_start_lsn: Option<Lsn>,
}
fn lsn_invalid() -> Lsn {
Lsn::INVALID
}
/// Data about safekeeper's timeline, mirrors broker.proto.
#[serde_as]
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct SkTimelineInfo {
/// Term of the last entry.
pub last_log_term: Option<u64>,
/// LSN of the last record.
#[serde_as(as = "DisplayFromStr")]
#[serde(default = "lsn_invalid")]
pub flush_lsn: Lsn,
/// Up to which LSN safekeeper regards its WAL as committed.
#[serde_as(as = "DisplayFromStr")]
#[serde(default = "lsn_invalid")]
pub commit_lsn: Lsn,
/// LSN up to which safekeeper has backed WAL.
#[serde_as(as = "DisplayFromStr")]
#[serde(default = "lsn_invalid")]
pub backup_lsn: Lsn,
/// LSN of last checkpoint uploaded by pageserver.
#[serde_as(as = "DisplayFromStr")]
#[serde(default = "lsn_invalid")]
pub remote_consistent_lsn: Lsn,
#[serde_as(as = "DisplayFromStr")]
#[serde(default = "lsn_invalid")]
pub peer_horizon_lsn: Lsn,
#[serde_as(as = "DisplayFromStr")]
#[serde(default = "lsn_invalid")]
pub local_start_lsn: Lsn,
/// A connection string to use for WAL receiving.
#[serde(default)]
pub safekeeper_connstr: Option<String>,
}

View File

@@ -1,3 +0,0 @@
*.dot
*.png
*.svg

View File

@@ -1,8 +0,0 @@
[package]
name = "tenant_size_model"
version = "0.1.0"
edition = "2021"
publish = false
[dependencies]
workspace_hack = { version = "0.1", path = "../../workspace_hack" }

View File

@@ -1,13 +0,0 @@
all: 1.svg 2.svg 3.svg 4.svg 1.png 2.png 3.png 4.png
../../target/debug/tenant_size_model: Cargo.toml src/main.rs src/lib.rs
cargo build --bin tenant_size_model
%.svg: %.dot
dot -Tsvg $< > $@
%.png: %.dot
dot -Tpng $< > $@
%.dot: ../../target/debug/tenant_size_model
../../target/debug/tenant_size_model $* > $@

View File

@@ -1,7 +0,0 @@
# Logical size + WAL pricing
This is a simulator to calculate the tenant size in different scenarios,
using the "Logical size + WAL" method. Makefile produces diagrams used in a
private presentation:
https://docs.google.com/presentation/d/1OapE4k11xmcwMh7I7YvNWGC63yCRLh6udO9bXZ-fZmo/edit?usp=sharing

View File

@@ -1,382 +0,0 @@
use std::borrow::Cow;
use std::collections::HashMap;
/// Pricing model or history size builder.
///
/// Maintains knowledge of the branches and their modifications. Generic over the branch name key
/// type.
pub struct Storage<K: 'static> {
segments: Vec<Segment>,
/// Mapping from the branch name to the index of a segment describing it's latest state.
branches: HashMap<K, usize>,
}
/// Snapshot of a branch.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Segment {
/// Previous segment index into ['Storage::segments`], if any.
parent: Option<usize>,
/// Description of how did we get to this state.
///
/// Mainly used in the original scenarios 1..=4 with insert, delete and update. Not used when
/// modifying a branch directly.
pub op: Cow<'static, str>,
/// LSN before this state
start_lsn: u64,
/// LSN at this state
pub end_lsn: u64,
/// Logical size before this state
start_size: u64,
/// Logical size at this state. Can be None in the last Segment of a branch.
pub end_size: Option<u64>,
/// Indices to [`Storage::segments`]
///
/// FIXME: this could be an Option<usize>
children_after: Vec<usize>,
/// Determined by `retention_period` given to [`Storage::calculate`]
pub needed: bool,
}
//
//
//
//
// *-g--*---D--->
// /
// /
// / *---b----*-B--->
// / /
// / /
// -----*--e---*-----f----* C
// E \
// \
// *--a---*---A-->
//
// If A and B need to be retained, is it cheaper to store
// snapshot at C+a+b, or snapshots at A and B ?
//
// If D also needs to be retained, which is cheaper:
//
// 1. E+g+e+f+a+b
// 2. D+C+a+b
// 3. D+A+B
/// [`Segment`] which has had it's size calculated.
pub struct SegmentSize {
pub seg_id: usize,
pub method: SegmentMethod,
this_size: u64,
pub children: Vec<SegmentSize>,
}
impl SegmentSize {
fn total(&self) -> u64 {
self.this_size + self.children.iter().fold(0, |acc, x| acc + x.total())
}
pub fn total_children(&self) -> u64 {
if self.method == SnapshotAfter {
self.this_size + self.children.iter().fold(0, |acc, x| acc + x.total())
} else {
self.children.iter().fold(0, |acc, x| acc + x.total())
}
}
}
/// Different methods to retain history from a particular state
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum SegmentMethod {
SnapshotAfter,
Wal,
WalNeeded,
Skipped,
}
use SegmentMethod::*;
impl<K: std::hash::Hash + Eq + 'static> Storage<K> {
/// Creates a new storage with the given default branch name.
pub fn new(initial_branch: K) -> Storage<K> {
let init_segment = Segment {
op: "".into(),
needed: false,
parent: None,
start_lsn: 0,
end_lsn: 0,
start_size: 0,
end_size: Some(0),
children_after: Vec::new(),
};
Storage {
segments: vec![init_segment],
branches: HashMap::from([(initial_branch, 0)]),
}
}
/// Advances the branch with a new point, at given LSN.
pub fn insert_point<Q: ?Sized>(
&mut self,
branch: &Q,
op: Cow<'static, str>,
lsn: u64,
size: Option<u64>,
) where
K: std::borrow::Borrow<Q>,
Q: std::hash::Hash + Eq,
{
let lastseg_id = *self.branches.get(branch).unwrap();
let newseg_id = self.segments.len();
let lastseg = &mut self.segments[lastseg_id];
assert!(lsn > lastseg.end_lsn);
let newseg = Segment {
op,
parent: Some(lastseg_id),
start_lsn: lastseg.end_lsn,
end_lsn: lsn,
start_size: lastseg.end_size.unwrap(),
end_size: size,
children_after: Vec::new(),
needed: false,
};
lastseg.children_after.push(newseg_id);
self.segments.push(newseg);
*self.branches.get_mut(branch).expect("read already") = newseg_id;
}
/// Advances the branch with the named operation, by the relative LSN and logical size bytes.
pub fn modify_branch<Q: ?Sized>(
&mut self,
branch: &Q,
op: Cow<'static, str>,
lsn_bytes: u64,
size_bytes: i64,
) where
K: std::borrow::Borrow<Q>,
Q: std::hash::Hash + Eq,
{
let lastseg_id = *self.branches.get(branch).unwrap();
let newseg_id = self.segments.len();
let lastseg = &mut self.segments[lastseg_id];
let newseg = Segment {
op,
parent: Some(lastseg_id),
start_lsn: lastseg.end_lsn,
end_lsn: lastseg.end_lsn + lsn_bytes,
start_size: lastseg.end_size.unwrap(),
end_size: Some((lastseg.end_size.unwrap() as i64 + size_bytes) as u64),
children_after: Vec::new(),
needed: false,
};
lastseg.children_after.push(newseg_id);
self.segments.push(newseg);
*self.branches.get_mut(branch).expect("read already") = newseg_id;
}
pub fn insert<Q: ?Sized>(&mut self, branch: &Q, bytes: u64)
where
K: std::borrow::Borrow<Q>,
Q: std::hash::Hash + Eq,
{
self.modify_branch(branch, "insert".into(), bytes, bytes as i64);
}
pub fn update<Q: ?Sized>(&mut self, branch: &Q, bytes: u64)
where
K: std::borrow::Borrow<Q>,
Q: std::hash::Hash + Eq,
{
self.modify_branch(branch, "update".into(), bytes, 0i64);
}
pub fn delete<Q: ?Sized>(&mut self, branch: &Q, bytes: u64)
where
K: std::borrow::Borrow<Q>,
Q: std::hash::Hash + Eq,
{
self.modify_branch(branch, "delete".into(), bytes, -(bytes as i64));
}
/// Panics if the parent branch cannot be found.
pub fn branch<Q: ?Sized>(&mut self, parent: &Q, name: K)
where
K: std::borrow::Borrow<Q>,
Q: std::hash::Hash + Eq,
{
// Find the right segment
let branchseg_id = *self
.branches
.get(parent)
.expect("should had found the parent by key");
let _branchseg = &mut self.segments[branchseg_id];
// Create branch name for it
self.branches.insert(name, branchseg_id);
}
pub fn calculate(&mut self, retention_period: u64) -> SegmentSize {
// Phase 1: Mark all the segments that need to be retained
for (_branch, &last_seg_id) in self.branches.iter() {
let last_seg = &self.segments[last_seg_id];
let cutoff_lsn = last_seg.start_lsn.saturating_sub(retention_period);
let mut seg_id = last_seg_id;
loop {
let seg = &mut self.segments[seg_id];
if seg.end_lsn < cutoff_lsn {
break;
}
seg.needed = true;
if let Some(prev_seg_id) = seg.parent {
seg_id = prev_seg_id;
} else {
break;
}
}
}
// Phase 2: For each oldest segment in a chain that needs to be retained,
// calculate if we should store snapshot or WAL
self.size_from_snapshot_later(0)
}
fn size_from_wal(&self, seg_id: usize) -> SegmentSize {
let seg = &self.segments[seg_id];
let this_size = seg.end_lsn - seg.start_lsn;
let mut children = Vec::new();
// try both ways
for &child_id in seg.children_after.iter() {
// try each child both ways
let child = &self.segments[child_id];
let p1 = self.size_from_wal(child_id);
let p = if !child.needed {
let p2 = self.size_from_snapshot_later(child_id);
if p1.total() < p2.total() {
p1
} else {
p2
}
} else {
p1
};
children.push(p);
}
SegmentSize {
seg_id,
method: if seg.needed { WalNeeded } else { Wal },
this_size,
children,
}
}
fn size_from_snapshot_later(&self, seg_id: usize) -> SegmentSize {
// If this is needed, then it's time to do the snapshot and continue
// with wal method.
let seg = &self.segments[seg_id];
//eprintln!("snap: seg{}: {} needed: {}", seg_id, seg.children_after.len(), seg.needed);
if seg.needed {
let mut children = Vec::new();
for &child_id in seg.children_after.iter() {
// try each child both ways
let child = &self.segments[child_id];
let p1 = self.size_from_wal(child_id);
let p = if !child.needed {
let p2 = self.size_from_snapshot_later(child_id);
if p1.total() < p2.total() {
p1
} else {
p2
}
} else {
p1
};
children.push(p);
}
SegmentSize {
seg_id,
method: WalNeeded,
this_size: seg.start_size,
children,
}
} else {
// If any of the direct children are "needed", need to be able to reconstruct here
let mut children_needed = false;
for &child in seg.children_after.iter() {
let seg = &self.segments[child];
if seg.needed {
children_needed = true;
break;
}
}
let method1 = if !children_needed {
let mut children = Vec::new();
for child in seg.children_after.iter() {
children.push(self.size_from_snapshot_later(*child));
}
Some(SegmentSize {
seg_id,
method: Skipped,
this_size: 0,
children,
})
} else {
None
};
// If this a junction, consider snapshotting here
let method2 = if children_needed || seg.children_after.len() >= 2 {
let mut children = Vec::new();
for child in seg.children_after.iter() {
children.push(self.size_from_wal(*child));
}
Some(SegmentSize {
seg_id,
method: SnapshotAfter,
this_size: seg.end_size.unwrap(),
children,
})
} else {
None
};
match (method1, method2) {
(None, None) => panic!(),
(Some(method), None) => method,
(None, Some(method)) => method,
(Some(method1), Some(method2)) => {
if method1.total() < method2.total() {
method1
} else {
method2
}
}
}
}
}
pub fn into_segments(self) -> Vec<Segment> {
self.segments
}
}

View File

@@ -1,268 +0,0 @@
//! Tenant size model testing ground.
//!
//! Has a number of scenarios and a `main` for invoking these by number, calculating the history
//! size, outputs graphviz graph. Makefile in directory shows how to use graphviz to turn scenarios
//! into pngs.
use tenant_size_model::{Segment, SegmentSize, Storage};
// Main branch only. Some updates on it.
fn scenario_1() -> (Vec<Segment>, SegmentSize) {
// Create main branch
let mut storage = Storage::new("main");
// Bulk load 5 GB of data to it
storage.insert("main", 5_000);
// Stream of updates
for _ in 0..5 {
storage.update("main", 1_000);
}
let size = storage.calculate(1000);
(storage.into_segments(), size)
}
// Main branch only. Some updates on it.
fn scenario_2() -> (Vec<Segment>, SegmentSize) {
// Create main branch
let mut storage = Storage::new("main");
// Bulk load 5 GB of data to it
storage.insert("main", 5_000);
// Stream of updates
for _ in 0..5 {
storage.update("main", 1_000);
}
// Branch
storage.branch("main", "child");
storage.update("child", 1_000);
// More updates on parent
storage.update("main", 1_000);
let size = storage.calculate(1000);
(storage.into_segments(), size)
}
// Like 2, but more updates on main
fn scenario_3() -> (Vec<Segment>, SegmentSize) {
// Create main branch
let mut storage = Storage::new("main");
// Bulk load 5 GB of data to it
storage.insert("main", 5_000);
// Stream of updates
for _ in 0..5 {
storage.update("main", 1_000);
}
// Branch
storage.branch("main", "child");
storage.update("child", 1_000);
// More updates on parent
for _ in 0..5 {
storage.update("main", 1_000);
}
let size = storage.calculate(1000);
(storage.into_segments(), size)
}
// Diverged branches
fn scenario_4() -> (Vec<Segment>, SegmentSize) {
// Create main branch
let mut storage = Storage::new("main");
// Bulk load 5 GB of data to it
storage.insert("main", 5_000);
// Stream of updates
for _ in 0..5 {
storage.update("main", 1_000);
}
// Branch
storage.branch("main", "child");
storage.update("child", 1_000);
// More updates on parent
for _ in 0..8 {
storage.update("main", 1_000);
}
let size = storage.calculate(1000);
(storage.into_segments(), size)
}
fn scenario_5() -> (Vec<Segment>, SegmentSize) {
let mut storage = Storage::new("a");
storage.insert("a", 5000);
storage.branch("a", "b");
storage.update("b", 4000);
storage.update("a", 2000);
storage.branch("a", "c");
storage.insert("c", 4000);
storage.insert("a", 2000);
let size = storage.calculate(5000);
(storage.into_segments(), size)
}
fn scenario_6() -> (Vec<Segment>, SegmentSize) {
use std::borrow::Cow;
const NO_OP: Cow<'static, str> = Cow::Borrowed("");
let branches = [
Some(0x7ff1edab8182025f15ae33482edb590a_u128),
Some(0xb1719e044db05401a05a2ed588a3ad3f),
Some(0xb68d6691c895ad0a70809470020929ef),
];
// compared to other scenarios, this one uses bytes instead of kB
let mut storage = Storage::new(None);
storage.branch(&None, branches[0]); // at 0
storage.modify_branch(&branches[0], NO_OP, 108951064, 43696128); // at 108951064
storage.branch(&branches[0], branches[1]); // at 108951064
storage.modify_branch(&branches[1], NO_OP, 15560408, -1851392); // at 124511472
storage.modify_branch(&branches[0], NO_OP, 174464360, -1531904); // at 283415424
storage.branch(&branches[0], branches[2]); // at 283415424
storage.modify_branch(&branches[2], NO_OP, 15906192, 8192); // at 299321616
storage.modify_branch(&branches[0], NO_OP, 18909976, 32768); // at 302325400
let size = storage.calculate(100_000);
(storage.into_segments(), size)
}
fn main() {
let args: Vec<String> = std::env::args().collect();
let scenario = if args.len() < 2 { "1" } else { &args[1] };
let (segments, size) = match scenario {
"1" => scenario_1(),
"2" => scenario_2(),
"3" => scenario_3(),
"4" => scenario_4(),
"5" => scenario_5(),
"6" => scenario_6(),
other => {
eprintln!("invalid scenario {}", other);
std::process::exit(1);
}
};
graphviz_tree(&segments, &size);
}
fn graphviz_recurse(segments: &[Segment], node: &SegmentSize) {
use tenant_size_model::SegmentMethod::*;
let seg_id = node.seg_id;
let seg = segments.get(seg_id).unwrap();
let lsn = seg.end_lsn;
let size = seg.end_size.unwrap_or(0);
let method = node.method;
println!(" {{");
println!(" node [width=0.1 height=0.1 shape=oval]");
let tenant_size = node.total_children();
let penwidth = if seg.needed { 6 } else { 3 };
let x = match method {
SnapshotAfter =>
format!("label=\"lsn: {lsn}\\nsize: {size}\\ntenant_size: {tenant_size}\" style=filled penwidth={penwidth}"),
Wal =>
format!("label=\"lsn: {lsn}\\nsize: {size}\\ntenant_size: {tenant_size}\" color=\"black\" penwidth={penwidth}"),
WalNeeded =>
format!("label=\"lsn: {lsn}\\nsize: {size}\\ntenant_size: {tenant_size}\" color=\"black\" penwidth={penwidth}"),
Skipped =>
format!("label=\"lsn: {lsn}\\nsize: {size}\\ntenant_size: {tenant_size}\" color=\"gray\" penwidth={penwidth}"),
};
println!(" \"seg{seg_id}\" [{x}]");
println!(" }}");
// Recurse. Much of the data is actually on the edge
for child in node.children.iter() {
let child_id = child.seg_id;
graphviz_recurse(segments, child);
let edge_color = match child.method {
SnapshotAfter => "gray",
Wal => "black",
WalNeeded => "black",
Skipped => "gray",
};
println!(" {{");
println!(" edge [] ");
print!(" \"seg{seg_id}\" -> \"seg{child_id}\" [");
print!("color={edge_color}");
if child.method == WalNeeded {
print!(" penwidth=6");
}
if child.method == Wal {
print!(" penwidth=3");
}
let next = segments.get(child_id).unwrap();
if next.op.is_empty() {
print!(
" label=\"{} / {}\"",
next.end_lsn - seg.end_lsn,
(next.end_size.unwrap_or(0) as i128 - seg.end_size.unwrap_or(0) as i128)
);
} else {
print!(" label=\"{}: {}\"", next.op, next.end_lsn - seg.end_lsn);
}
println!("]");
println!(" }}");
}
}
fn graphviz_tree(segments: &[Segment], tree: &SegmentSize) {
println!("digraph G {{");
println!(" fontname=\"Helvetica,Arial,sans-serif\"");
println!(" node [fontname=\"Helvetica,Arial,sans-serif\"]");
println!(" edge [fontname=\"Helvetica,Arial,sans-serif\"]");
println!(" graph [center=1 rankdir=LR]");
println!(" edge [dir=none]");
graphviz_recurse(segments, tree);
println!("}}");
}
#[test]
fn scenarios_return_same_size() {
type ScenarioFn = fn() -> (Vec<Segment>, SegmentSize);
let truths: &[(u32, ScenarioFn, _)] = &[
(line!(), scenario_1, 8000),
(line!(), scenario_2, 9000),
(line!(), scenario_3, 13000),
(line!(), scenario_4, 16000),
(line!(), scenario_5, 17000),
(line!(), scenario_6, 333_792_000),
];
for (line, scenario, expected) in truths {
let (_, size) = scenario();
assert_eq!(*expected, size.total_children(), "scenario on line {line}");
}
}

Some files were not shown because too many files have changed in this diff Show More