Compare commits

..

10 Commits

Author SHA1 Message Date
Konstantin Knizhnik
7f67f65d92 Use access counter for file cache pages 2022-11-04 12:55:21 +02:00
Konstantin Knizhnik
81527300ef Use access counter for giel cache pages 2022-11-04 12:22:47 +02:00
Konstantin Knizhnik
ba46de96eb Cache reconstructed pages on disk 2022-11-03 19:57:27 +02:00
Konstantin Knizhnik
e8dec662e6 Cache reconstructed pages on disk 2022-11-03 19:01:03 +02:00
Konstantin Knizhnik
ebf1972ea4 Make clippy happy 2022-10-31 23:00:54 +03:00
Joonas Koivunen
35890bb293 suggested refactoring to err out awaiting 2022-10-31 22:32:37 +03:00
Konstantin Knizhnik
8769fef1a5 Fix style 2022-10-31 19:59:34 +03:00
Konstantin Knizhnik
7452f91d5a Replace Arc<Bytes> with BYtes because Bytes maintains its own reference counter 2022-10-31 19:43:38 +03:00
Konstantin Knizhnik
6a50e1f76a Add image layer cache implementation 2022-10-29 14:29:59 +03:00
Konstantin Knizhnik
0c78aa7589 Implement page image cache with invaidtion mechanism 2022-10-28 22:06:54 +03:00
549 changed files with 32980 additions and 94364 deletions

View File

@@ -4,7 +4,7 @@
hakari-package = "workspace_hack" hakari-package = "workspace_hack"
# Format for `workspace-hack = ...` lines in other Cargo.tomls. Requires cargo-hakari 0.9.8 or above. # Format for `workspace-hack = ...` lines in other Cargo.tomls. Requires cargo-hakari 0.9.8 or above.
dep-format-version = "4" dep-format-version = "2"
# Setting workspace.resolver = "2" in the root Cargo.toml is HIGHLY recommended. # Setting workspace.resolver = "2" in the root Cargo.toml is HIGHLY recommended.
# Hakari works much better with the new feature resolver. # Hakari works much better with the new feature resolver.

View File

@@ -14,11 +14,8 @@
!pgxn/ !pgxn/
!proxy/ !proxy/
!safekeeper/ !safekeeper/
!storage_broker/
!trace/
!vendor/postgres-v14/ !vendor/postgres-v14/
!vendor/postgres-v15/ !vendor/postgres-v15/
!workspace_hack/ !workspace_hack/
!neon_local/ !neon_local/
!scripts/ninstall.sh !scripts/ninstall.sh
!vm-cgconfig.conf

View File

@@ -10,12 +10,11 @@
<!-- List everything that should be done **before** release, any issues / setting changes / etc --> <!-- List everything that should be done **before** release, any issues / setting changes / etc -->
### Checklist after release ### Checklist after release
- [ ] Make sure instructions from PRs included in this release and labeled `manual_release_instructions` are executed (either by you or by people who wrote them).
- [ ] Based on the merged commits write release notes and open a PR into `website` repo ([example](https://github.com/neondatabase/website/pull/219/files)) - [ ] Based on the merged commits write release notes and open a PR into `website` repo ([example](https://github.com/neondatabase/website/pull/219/files))
- [ ] Check [#dev-production-stream](https://neondb.slack.com/archives/C03F5SM1N02) Slack channel - [ ] Check [#dev-production-stream](https://neondb.slack.com/archives/C03F5SM1N02) Slack channel
- [ ] Check [stuck projects page](https://console.neon.tech/admin/projects?sort=last_active&order=desc&stuck=true) - [ ] Check [stuck projects page](https://console.neon.tech/admin/projects?sort=last_active&order=desc&stuck=true)
- [ ] Check [recent operation failures](https://console.neon.tech/admin/operations?action=create_timeline%2Cstart_compute%2Cstop_compute%2Csuspend_compute%2Capply_config%2Cdelete_timeline%2Cdelete_tenant%2Ccreate_branch%2Ccheck_availability&sort=updated_at&order=desc&had_retries=some) - [ ] Check [recent operation failures](https://console.neon.tech/admin/operations?action=create_timeline%2Cstart_compute%2Cstop_compute%2Csuspend_compute%2Capply_config%2Cdelete_timeline%2Cdelete_tenant%2Ccreate_branch%2Ccheck_availability&sort=updated_at&order=desc&had_retries=some)
- [ ] Check [cloud SLO dashboard](https://neonprod.grafana.net/d/_oWcBMJ7k/cloud-slos?orgId=1) - [ ] Check [cloud SLO dashboard](https://observer.zenith.tech/d/_oWcBMJ7k/cloud-slos?orgId=1)
- [ ] Check [compute startup metrics dashboard](https://neonprod.grafana.net/d/5OkYJEmVz/compute-startup-time) - [ ] Check [compute startup metrics dashboard](https://observer.zenith.tech/d/5OkYJEmVz/compute-startup-time)
<!-- List everything that should be done **after** release, any admin UI configuration / Grafana dashboard / alert changes / setting changes / etc --> <!-- List everything that should be done **after** release, any admin UI configuration / Grafana dashboard / alert changes / setting changes / etc -->

View File

@@ -15,47 +15,25 @@ outputs:
report-url: report-url:
description: 'Allure report URL' description: 'Allure report URL'
value: ${{ steps.generate-report.outputs.report-url }} value: ${{ steps.generate-report.outputs.report-url }}
report-json-url:
description: 'Allure report JSON URL'
value: ${{ steps.generate-report.outputs.report-json-url }}
runs: runs:
using: "composite" using: "composite"
steps: steps:
# We're using some of env variables quite offen, so let's set them once.
#
# It would be nice to have them set in common runs.env[0] section, but it doesn't work[1]
#
# - [0] https://docs.github.com/en/actions/creating-actions/metadata-syntax-for-github-actions#runsenv
# - [1] https://github.com/neondatabase/neon/pull/3907#discussion_r1154703456
#
- name: Set common environment variables
shell: bash -euxo pipefail {0}
run: |
echo "BUILD_TYPE=${BUILD_TYPE}" >> $GITHUB_ENV
echo "BUCKET=${BUCKET}" >> $GITHUB_ENV
echo "TEST_OUTPUT=${TEST_OUTPUT}" >> $GITHUB_ENV
env:
BUILD_TYPE: ${{ inputs.build_type }}
BUCKET: neon-github-public-dev
TEST_OUTPUT: /tmp/test_output
- name: Validate input parameters - name: Validate input parameters
shell: bash -euxo pipefail {0} shell: bash -euxo pipefail {0}
run: | run: |
if [ "${{ inputs.action }}" != "store" ] && [ "${{ inputs.action }}" != "generate" ]; then if [ "${{ inputs.action }}" != "store" ] && [ "${{ inputs.action }}" != "generate" ]; then
echo >&2 "Unknown inputs.action type '${{ inputs.action }}'; allowed 'generate' or 'store' only" echo 2>&1 "Unknown inputs.action type '${{ inputs.action }}'; allowed 'generate' or 'store' only"
exit 1 exit 1
fi fi
if [ -z "${{ inputs.test_selection }}" ] && [ "${{ inputs.action }}" == "store" ]; then if [ -z "${{ inputs.test_selection }}" ] && [ "${{ inputs.action }}" == "store" ]; then
echo >&2 "inputs.test_selection must be set for 'store' action" echo 2>&1 "inputs.test_selection must be set for 'store' action"
exit 2 exit 2
fi fi
- name: Calculate variables - name: Calculate key
id: calculate-vars id: calculate-key
shell: bash -euxo pipefail {0} shell: bash -euxo pipefail {0}
run: | run: |
# TODO: for manually triggered workflows (via workflow_dispatch) we need to have a separate key # TODO: for manually triggered workflows (via workflow_dispatch) we need to have a separate key
@@ -63,22 +41,14 @@ runs:
pr_number=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true) pr_number=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true)
if [ "${pr_number}" != "null" ]; then if [ "${pr_number}" != "null" ]; then
key=pr-${pr_number} key=pr-${pr_number}
elif [ "${GITHUB_REF_NAME}" = "main" ]; then elif [ "${GITHUB_REF}" = "refs/heads/main" ]; then
# Shortcut for a special branch # Shortcut for a special branch
key=main key=main
elif [ "${GITHUB_REF_NAME}" = "release" ]; then
# Shortcut for a special branch
key=release
else else
key=branch-$(printf "${GITHUB_REF_NAME}" | tr -c "[:alnum:]._-" "-") key=branch-$(echo ${GITHUB_REF#refs/heads/} | tr -c "[:alnum:]._-" "-")
fi fi
echo "KEY=${key}" >> $GITHUB_OUTPUT echo "KEY=${key}" >> $GITHUB_OUTPUT
# Sanitize test selection to remove `/` and any other special characters
# Use printf instead of echo to avoid having `\n` at the end of the string
test_selection=$(printf "${{ inputs.test_selection }}" | tr -c "[:alnum:]._-" "-" )
echo "TEST_SELECTION=${test_selection}" >> $GITHUB_OUTPUT
- uses: actions/setup-java@v3 - uses: actions/setup-java@v3
if: ${{ inputs.action == 'generate' }} if: ${{ inputs.action == 'generate' }}
with: with:
@@ -98,15 +68,16 @@ runs:
rm -f ${ALLURE_ZIP} rm -f ${ALLURE_ZIP}
fi fi
env: env:
ALLURE_VERSION: 2.21.0 ALLURE_VERSION: 2.19.0
ALLURE_ZIP_MD5: c8db4dd8e2a7882583d569ed2c82879c ALLURE_ZIP_MD5: ced21401a1a8b9dfb68cee9e4c210464
- name: Upload Allure results - name: Upload Allure results
if: ${{ inputs.action == 'store' }} if: ${{ inputs.action == 'store' }}
env: env:
REPORT_PREFIX: reports/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }} REPORT_PREFIX: reports/${{ steps.calculate-key.outputs.KEY }}/${{ inputs.build_type }}
RAW_PREFIX: reports-raw/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }} RAW_PREFIX: reports-raw/${{ steps.calculate-key.outputs.KEY }}/${{ inputs.build_type }}
TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }} TEST_OUTPUT: /tmp/test_output
BUCKET: neon-github-public-dev
shell: bash -euxo pipefail {0} shell: bash -euxo pipefail {0}
run: | run: |
# Add metadata # Add metadata
@@ -124,22 +95,22 @@ runs:
EOF EOF
cat <<EOF > $TEST_OUTPUT/allure/results/environment.properties cat <<EOF > $TEST_OUTPUT/allure/results/environment.properties
TEST_SELECTION=${{ inputs.test_selection }} TEST_SELECTION=${{ inputs.test_selection }}
BUILD_TYPE=${BUILD_TYPE} BUILD_TYPE=${{ inputs.build_type }}
EOF EOF
ARCHIVE="${GITHUB_RUN_ID}-${TEST_SELECTION}-${GITHUB_RUN_ATTEMPT}-$(date +%s).tar.zst" ARCHIVE="${GITHUB_RUN_ID}-${{ inputs.test_selection }}-${GITHUB_RUN_ATTEMPT}-$(date +%s).tar.zst"
ZSTD_NBTHREADS=0 ZSTD_NBTHREADS=0
tar -C ${TEST_OUTPUT}/allure/results -cf ${ARCHIVE} --zstd . tar -C ${TEST_OUTPUT}/allure/results -cf ${ARCHIVE} --zstd .
aws s3 mv --only-show-errors ${ARCHIVE} "s3://${BUCKET}/${RAW_PREFIX}/${ARCHIVE}" aws s3 mv --only-show-errors ${ARCHIVE} "s3://${BUCKET}/${RAW_PREFIX}/${ARCHIVE}"
# Potentially we could have several running build for the same key (for example for the main branch), so we use improvised lock for this # Potentially we could have several running build for the same key (for example for the main branch), so we use improvised lock for this
- name: Acquire Allure lock - name: Acquire Allure lock
if: ${{ inputs.action == 'generate' }} if: ${{ inputs.action == 'generate' }}
shell: bash -euxo pipefail {0} shell: bash -euxo pipefail {0}
env: env:
LOCK_FILE: reports/${{ steps.calculate-vars.outputs.KEY }}/lock.txt LOCK_FILE: reports/${{ steps.calculate-key.outputs.KEY }}/lock.txt
TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }} BUCKET: neon-github-public-dev
run: | run: |
LOCK_TIMEOUT=300 # seconds LOCK_TIMEOUT=300 # seconds
@@ -152,12 +123,12 @@ runs:
fi fi
sleep 1 sleep 1
done done
echo "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${TEST_SELECTION}" > lock.txt echo "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${{ inputs.test_selection }}" > lock.txt
aws s3 mv --only-show-errors lock.txt "s3://${BUCKET}/${LOCK_FILE}" aws s3 mv --only-show-errors lock.txt "s3://${BUCKET}/${LOCK_FILE}"
# A double-check that exactly WE have acquired the lock # A double-check that exactly WE have acquired the lock
aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt
if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${TEST_SELECTION}" ]; then if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${{ inputs.test_selection }}" ]; then
break break
fi fi
done done
@@ -166,8 +137,10 @@ runs:
if: ${{ inputs.action == 'generate' }} if: ${{ inputs.action == 'generate' }}
id: generate-report id: generate-report
env: env:
REPORT_PREFIX: reports/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }} REPORT_PREFIX: reports/${{ steps.calculate-key.outputs.KEY }}/${{ inputs.build_type }}
RAW_PREFIX: reports-raw/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }} RAW_PREFIX: reports-raw/${{ steps.calculate-key.outputs.KEY }}/${{ inputs.build_type }}
TEST_OUTPUT: /tmp/test_output
BUCKET: neon-github-public-dev
shell: bash -euxo pipefail {0} shell: bash -euxo pipefail {0}
run: | run: |
# Get previously uploaded data for this run # Get previously uploaded data for this run
@@ -203,42 +176,36 @@ runs:
REPORT_URL=https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html REPORT_URL=https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html
# Generate redirect # Generate redirect
cat <<EOF > ${TEST_OUTPUT}/allure/index.html cat <<EOF > ./index.html
<!DOCTYPE html> <!DOCTYPE html>
<meta charset="utf-8"> <meta charset="utf-8">
<title>Redirecting to ${REPORT_URL}</title> <title>Redirecting to ${REPORT_URL}</title>
<meta http-equiv="refresh" content="0; URL=${REPORT_URL}"> <meta http-equiv="refresh" content="0; URL=${REPORT_URL}">
EOF EOF
aws s3 cp --only-show-errors ${TEST_OUTPUT}/allure/index.html "s3://${BUCKET}/${REPORT_PREFIX}/latest/index.html" aws s3 cp --only-show-errors ./index.html "s3://${BUCKET}/${REPORT_PREFIX}/latest/index.html"
echo "[Allure Report](${REPORT_URL})" >> ${GITHUB_STEP_SUMMARY} echo "[Allure Report](${REPORT_URL})" >> ${GITHUB_STEP_SUMMARY}
echo "report-url=${REPORT_URL}" >> $GITHUB_OUTPUT echo "report-url=${REPORT_URL}" >> $GITHUB_OUTPUT
echo "report-json-url=${REPORT_URL%/index.html}/data/suites.json" >> $GITHUB_OUTPUT
- name: Release Allure lock - name: Release Allure lock
if: ${{ inputs.action == 'generate' && always() }} if: ${{ inputs.action == 'generate' && always() }}
shell: bash -euxo pipefail {0} shell: bash -euxo pipefail {0}
env: env:
LOCK_FILE: reports/${{ steps.calculate-vars.outputs.KEY }}/lock.txt LOCK_FILE: reports/${{ steps.calculate-key.outputs.KEY }}/lock.txt
TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }} BUCKET: neon-github-public-dev
run: | run: |
aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt || exit 0 aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt || exit 0
if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${TEST_SELECTION}" ]; then if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${{ inputs.test_selection }}" ]; then
aws s3 rm "s3://${BUCKET}/${LOCK_FILE}" aws s3 rm "s3://${BUCKET}/${LOCK_FILE}"
fi fi
- name: Cleanup
if: always()
shell: bash -euxo pipefail {0}
run: |
rm -rf ${TEST_OUTPUT}/allure
- uses: actions/github-script@v6 - uses: actions/github-script@v6
if: ${{ inputs.action == 'generate' && always() }} if: ${{ inputs.action == 'generate' && always() }}
env: env:
REPORT_URL: ${{ steps.generate-report.outputs.report-url }} REPORT_URL: ${{ steps.generate-report.outputs.report-url }}
BUILD_TYPE: ${{ inputs.build_type }}
SHA: ${{ github.event.pull_request.head.sha || github.sha }} SHA: ${{ github.event.pull_request.head.sha || github.sha }}
with: with:
script: | script: |

View File

@@ -37,7 +37,7 @@ runs:
echo 'SKIPPED=true' >> $GITHUB_OUTPUT echo 'SKIPPED=true' >> $GITHUB_OUTPUT
exit 0 exit 0
else else
echo >&2 "Neither s3://${BUCKET}/${PREFIX}/${FILENAME} nor its version from previous attempts exist" echo 2>&1 "Neither s3://${BUCKET}/${PREFIX}/${FILENAME} nor its version from previous attempts exist"
exit 1 exit 1
fi fi
fi fi

View File

@@ -1,138 +0,0 @@
name: 'Create Branch'
description: 'Create Branch using API'
inputs:
api_key:
desctiption: 'Neon API key'
required: true
project_id:
desctiption: 'ID of the Project to create Branch in'
required: true
api_host:
desctiption: 'Neon API host'
default: console.stage.neon.tech
outputs:
dsn:
description: 'Created Branch DSN (for main database)'
value: ${{ steps.change-password.outputs.dsn }}
branch_id:
description: 'Created Branch ID'
value: ${{ steps.create-branch.outputs.branch_id }}
runs:
using: "composite"
steps:
- name: Create New Branch
id: create-branch
shell: bash -euxo pipefail {0}
run: |
for i in $(seq 1 10); do
branch=$(curl \
"https://${API_HOST}/api/v2/projects/${PROJECT_ID}/branches" \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}" \
--data "{
\"branch\": {
\"name\": \"Created by actions/neon-branch-create; GITHUB_RUN_ID=${GITHUB_RUN_ID} at $(date +%s)\"
},
\"endpoints\": [
{
\"type\": \"read_write\"
}
]
}")
if [ -z "${branch}" ]; then
sleep 1
continue
fi
branch_id=$(echo $branch | jq --raw-output '.branch.id')
if [ "${branch_id}" == "null" ]; then
sleep 1
continue
fi
break
done
if [ -z "${branch_id}" ] || [ "${branch_id}" == "null" ]; then
echo >&2 "Failed to create branch after 10 attempts, the latest response was: ${branch}"
exit 1
fi
branch_id=$(echo $branch | jq --raw-output '.branch.id')
echo "branch_id=${branch_id}" >> $GITHUB_OUTPUT
host=$(echo $branch | jq --raw-output '.endpoints[0].host')
echo "host=${host}" >> $GITHUB_OUTPUT
env:
API_HOST: ${{ inputs.api_host }}
API_KEY: ${{ inputs.api_key }}
PROJECT_ID: ${{ inputs.project_id }}
- name: Get Role name
id: role-name
shell: bash -euxo pipefail {0}
run: |
roles=$(curl \
"https://${API_HOST}/api/v2/projects/${PROJECT_ID}/branches/${BRANCH_ID}/roles" \
--fail \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}"
)
role_name=$(echo $roles | jq --raw-output '.roles[] | select(.protected == false) | .name')
echo "role_name=${role_name}" >> $GITHUB_OUTPUT
env:
API_HOST: ${{ inputs.api_host }}
API_KEY: ${{ inputs.api_key }}
PROJECT_ID: ${{ inputs.project_id }}
BRANCH_ID: ${{ steps.create-branch.outputs.branch_id }}
- name: Change Password
id: change-password
# A shell without `set -x` to not to expose password/dsn in logs
shell: bash -euo pipefail {0}
run: |
for i in $(seq 1 10); do
reset_password=$(curl \
"https://${API_HOST}/api/v2/projects/${PROJECT_ID}/branches/${BRANCH_ID}/roles/${ROLE_NAME}/reset_password" \
--request POST \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}"
)
if [ -z "${reset_password}" ]; then
sleep 1
continue
fi
password=$(echo $reset_password | jq --raw-output '.role.password')
if [ "${password}" == "null" ]; then
sleep 1
continue
fi
echo "::add-mask::${password}"
break
done
if [ -z "${password}" ] || [ "${password}" == "null" ]; then
echo >&2 "Failed to reset password after 10 attempts, the latest response was: ${reset_password}"
exit 1
fi
dsn="postgres://${ROLE_NAME}:${password}@${HOST}/neondb"
echo "::add-mask::${dsn}"
echo "dsn=${dsn}" >> $GITHUB_OUTPUT
env:
API_HOST: ${{ inputs.api_host }}
API_KEY: ${{ inputs.api_key }}
PROJECT_ID: ${{ inputs.project_id }}
BRANCH_ID: ${{ steps.create-branch.outputs.branch_id }}
ROLE_NAME: ${{ steps.role-name.outputs.role_name }}
HOST: ${{ steps.create-branch.outputs.host }}

View File

@@ -1,58 +0,0 @@
name: 'Delete Branch'
description: 'Delete Branch using API'
inputs:
api_key:
desctiption: 'Neon API key'
required: true
project_id:
desctiption: 'ID of the Project which should be deleted'
required: true
branch_id:
desctiption: 'ID of the branch to delete'
required: true
api_host:
desctiption: 'Neon API host'
default: console.stage.neon.tech
runs:
using: "composite"
steps:
- name: Delete Branch
# Do not try to delete a branch if .github/actions/neon-project-create
# or .github/actions/neon-branch-create failed before
if: ${{ inputs.project_id != '' && inputs.branch_id != '' }}
shell: bash -euxo pipefail {0}
run: |
for i in $(seq 1 10); do
deleted_branch=$(curl \
"https://${API_HOST}/api/v2/projects/${PROJECT_ID}/branches/${BRANCH_ID}" \
--request DELETE \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}"
)
if [ -z "${deleted_branch}" ]; then
sleep 1
continue
fi
branch_id=$(echo $deleted_branch | jq --raw-output '.branch.id')
if [ "${branch_id}" == "null" ]; then
sleep 1
continue
fi
break
done
if [ -z "${branch_id}" ] || [ "${branch_id}" == "null" ]; then
echo >&2 "Failed to delete branch after 10 attempts, the latest response was: ${deleted_branch}"
exit 1
fi
env:
API_HOST: ${{ inputs.api_host }}
API_KEY: ${{ inputs.api_key }}
PROJECT_ID: ${{ inputs.project_id }}
BRANCH_ID: ${{ inputs.branch_id }}

View File

@@ -5,22 +5,12 @@ inputs:
api_key: api_key:
desctiption: 'Neon API key' desctiption: 'Neon API key'
required: true required: true
environment:
desctiption: 'dev (aka captest) or stage'
required: true
region_id: region_id:
desctiption: 'Region ID, if not set the project will be created in the default region' desctiption: 'Region ID, if not set the project will be created in the default region'
default: aws-us-east-2 required: false
postgres_version:
desctiption: 'Postgres version; default is 15'
default: 15
api_host:
desctiption: 'Neon API host'
default: console.stage.neon.tech
provisioner:
desctiption: 'k8s-pod or k8s-neonvm'
default: 'k8s-pod'
compute_units:
desctiption: '[Min, Max] compute units; Min and Max are used for k8s-neonvm with autoscaling, for k8s-pod values Min and Max should be equal'
default: '[1, 1]'
outputs: outputs:
dsn: dsn:
description: 'Created Project DSN (for main database)' description: 'Created Project DSN (for main database)'
@@ -32,17 +22,38 @@ outputs:
runs: runs:
using: "composite" using: "composite"
steps: steps:
- name: Parse Input
id: parse-input
shell: bash -euxo pipefail {0}
run: |
case "${ENVIRONMENT}" in
dev)
API_HOST=console.dev.neon.tech
REGION_ID=${REGION_ID:-eu-west-1}
;;
staging)
API_HOST=console.stage.neon.tech
REGION_ID=${REGION_ID:-us-east-1}
;;
*)
echo 2>&1 "Unknown environment=${ENVIRONMENT}. Allowed 'dev' or 'staging' only"
exit 1
;;
esac
echo "api_host=${API_HOST}" >> $GITHUB_OUTPUT
echo "region_id=${REGION_ID}" >> $GITHUB_OUTPUT
env:
ENVIRONMENT: ${{ inputs.environment }}
REGION_ID: ${{ inputs.region_id }}
- name: Create Neon Project - name: Create Neon Project
id: create-neon-project id: create-neon-project
# A shell without `set -x` to not to expose password/dsn in logs # A shell without `set -x` to not to expose password/dsn in logs
shell: bash -euo pipefail {0} shell: bash -euo pipefail {0}
run: | run: |
if [ "${PROVISIONER}" == "k8s-pod" ] && [ "${MIN_CU}" != "${MAX_CU}" ]; then
echo >&2 "For k8s-pod provisioner MIN_CU should be equal to MAX_CU"
fi
project=$(curl \ project=$(curl \
"https://${API_HOST}/api/v2/projects" \ "https://${API_HOST}/api/v1/projects" \
--fail \ --fail \
--header "Accept: application/json" \ --header "Accept: application/json" \
--header "Content-Type: application/json" \ --header "Content-Type: application/json" \
@@ -50,11 +61,8 @@ runs:
--data "{ --data "{
\"project\": { \"project\": {
\"name\": \"Created by actions/neon-project-create; GITHUB_RUN_ID=${GITHUB_RUN_ID}\", \"name\": \"Created by actions/neon-project-create; GITHUB_RUN_ID=${GITHUB_RUN_ID}\",
\"pg_version\": ${POSTGRES_VERSION}, \"platform_id\": \"aws\",
\"region_id\": \"${REGION_ID}\", \"region_id\": \"${REGION_ID}\",
\"provisioner\": \"${PROVISIONER}\",
\"autoscaling_limit_min_cu\": ${MIN_CU},
\"autoscaling_limit_max_cu\": ${MAX_CU},
\"settings\": { } \"settings\": { }
} }
}") }")
@@ -62,19 +70,13 @@ runs:
# Mask password # Mask password
echo "::add-mask::$(echo $project | jq --raw-output '.roles[] | select(.name != "web_access") | .password')" echo "::add-mask::$(echo $project | jq --raw-output '.roles[] | select(.name != "web_access") | .password')"
dsn=$(echo $project | jq --raw-output '.connection_uris[0].connection_uri') dsn=$(echo $project | jq --raw-output '.roles[] | select(.name != "web_access") | .dsn')/main
echo "::add-mask::${dsn}" echo "::add-mask::${dsn}"
echo "dsn=${dsn}" >> $GITHUB_OUTPUT echo "dsn=${dsn}" >> $GITHUB_OUTPUT
project_id=$(echo $project | jq --raw-output '.project.id') project_id=$(echo $project | jq --raw-output '.id')
echo "project_id=${project_id}" >> $GITHUB_OUTPUT echo "project_id=${project_id}" >> $GITHUB_OUTPUT
echo "Project ${project_id} has been created"
env: env:
API_HOST: ${{ inputs.api_host }}
API_KEY: ${{ inputs.api_key }} API_KEY: ${{ inputs.api_key }}
REGION_ID: ${{ inputs.region_id }} API_HOST: ${{ steps.parse-input.outputs.api_host }}
POSTGRES_VERSION: ${{ inputs.postgres_version }} REGION_ID: ${{ steps.parse-input.outputs.region_id }}
PROVISIONER: ${{ inputs.provisioner }}
MIN_CU: ${{ fromJSON(inputs.compute_units)[0] }}
MAX_CU: ${{ fromJSON(inputs.compute_units)[1] }}

View File

@@ -5,31 +5,50 @@ inputs:
api_key: api_key:
desctiption: 'Neon API key' desctiption: 'Neon API key'
required: true required: true
environment:
desctiption: 'dev (aka captest) or stage'
required: true
project_id: project_id:
desctiption: 'ID of the Project to delete' desctiption: 'ID of the Project to delete'
required: true required: true
api_host:
desctiption: 'Neon API host'
default: console.stage.neon.tech
runs: runs:
using: "composite" using: "composite"
steps: steps:
- name: Delete Neon Project - name: Parse Input
# Do not try to delete a project if .github/actions/neon-project-create failed before id: parse-input
if: ${{ inputs.project_id != '' }}
shell: bash -euxo pipefail {0} shell: bash -euxo pipefail {0}
run: | run: |
curl \ case "${ENVIRONMENT}" in
"https://${API_HOST}/api/v2/projects/${PROJECT_ID}" \ dev)
--fail \ API_HOST=console.dev.neon.tech
--request DELETE \ ;;
--header "Accept: application/json" \ staging)
--header "Content-Type: application/json" \ API_HOST=console.stage.neon.tech
--header "Authorization: Bearer ${API_KEY}" ;;
*)
echo 2>&1 "Unknown environment=${ENVIRONMENT}. Allowed 'dev' or 'staging' only"
exit 1
;;
esac
echo "Project ${PROJECT_ID} has been deleted" echo "api_host=${API_HOST}" >> $GITHUB_OUTPUT
env:
ENVIRONMENT: ${{ inputs.environment }}
- name: Delete Neon Project
shell: bash -euxo pipefail {0}
run: |
# Allow PROJECT_ID to be empty/null for cases when .github/actions/neon-project-create failed
if [ -n "${PROJECT_ID}" ]; then
curl -X "POST" \
"https://${API_HOST}/api/v1/projects/${PROJECT_ID}/delete" \
--fail \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}"
fi
env: env:
API_HOST: ${{ inputs.api_host }}
API_KEY: ${{ inputs.api_key }} API_KEY: ${{ inputs.api_key }}
PROJECT_ID: ${{ inputs.project_id }} PROJECT_ID: ${{ inputs.project_id }}
API_HOST: ${{ steps.parse-input.outputs.api_host }}

View File

@@ -44,10 +44,6 @@ inputs:
description: 'Secret access key' description: 'Secret access key'
required: false required: false
default: '' default: ''
rerun_flaky:
description: 'Whether to rerun flaky tests'
required: false
default: 'false'
runs: runs:
using: "composite" using: "composite"
@@ -59,22 +55,6 @@ runs:
name: neon-${{ runner.os }}-${{ inputs.build_type }}-artifact name: neon-${{ runner.os }}-${{ inputs.build_type }}-artifact
path: /tmp/neon path: /tmp/neon
- name: Download Neon binaries for the previous release
if: inputs.build_type != 'remote'
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ inputs.build_type }}-artifact
path: /tmp/neon-previous
prefix: latest
- name: Download compatibility snapshot for Postgres 14
if: inputs.build_type != 'remote'
uses: ./.github/actions/download
with:
name: compatibility-snapshot-${{ inputs.build_type }}-pg14
path: /tmp/compatibility_snapshot_pg14
prefix: latest
- name: Checkout - name: Checkout
if: inputs.needs_postgres_source == 'true' if: inputs.needs_postgres_source == 'true'
uses: actions/checkout@v3 uses: actions/checkout@v3
@@ -93,19 +73,22 @@ runs:
shell: bash -euxo pipefail {0} shell: bash -euxo pipefail {0}
run: ./scripts/pysync run: ./scripts/pysync
- name: Download compatibility snapshot for Postgres 14
uses: ./.github/actions/download
with:
name: compatibility-snapshot-${{ inputs.build_type }}-pg14
path: /tmp/compatibility_snapshot_pg14
prefix: latest
- name: Run pytest - name: Run pytest
env: env:
NEON_BIN: /tmp/neon/bin NEON_BIN: /tmp/neon/bin
COMPATIBILITY_NEON_BIN: /tmp/neon-previous/bin
COMPATIBILITY_POSTGRES_DISTRIB_DIR: /tmp/neon-previous/pg_install
TEST_OUTPUT: /tmp/test_output TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: ${{ inputs.build_type }} BUILD_TYPE: ${{ inputs.build_type }}
AWS_ACCESS_KEY_ID: ${{ inputs.real_s3_access_key_id }} AWS_ACCESS_KEY_ID: ${{ inputs.real_s3_access_key_id }}
AWS_SECRET_ACCESS_KEY: ${{ inputs.real_s3_secret_access_key }} AWS_SECRET_ACCESS_KEY: ${{ inputs.real_s3_secret_access_key }}
COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg14 COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg14
ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'backward compatibility breakage') ALLOW_BREAKING_CHANGES: contains(github.event.pull_request.labels.*.name, 'breaking changes')
ALLOW_FORWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'forward compatibility breakage')
RERUN_FLAKY: ${{ inputs.rerun_flaky }}
shell: bash -euxo pipefail {0} shell: bash -euxo pipefail {0}
run: | run: |
# PLATFORM will be embedded in the perf test report # PLATFORM will be embedded in the perf test report
@@ -128,12 +111,7 @@ runs:
exit 1 exit 1
fi fi
if [[ "${{ inputs.run_in_parallel }}" == "true" ]]; then if [[ "${{ inputs.run_in_parallel }}" == "true" ]]; then
# -n16 uses sixteen processes to run tests via pytest-xdist EXTRA_PARAMS="-n4 $EXTRA_PARAMS"
EXTRA_PARAMS="-n16 $EXTRA_PARAMS"
# --dist=loadgroup points tests marked with @pytest.mark.xdist_group
# to the same worker to make @pytest.mark.order work with xdist
EXTRA_PARAMS="--dist=loadgroup $EXTRA_PARAMS"
fi fi
if [[ "${{ inputs.run_with_real_s3 }}" == "true" ]]; then if [[ "${{ inputs.run_with_real_s3 }}" == "true" ]]; then
@@ -148,13 +126,6 @@ runs:
EXTRA_PARAMS="--out-dir $PERF_REPORT_DIR $EXTRA_PARAMS" EXTRA_PARAMS="--out-dir $PERF_REPORT_DIR $EXTRA_PARAMS"
fi fi
if [ "${RERUN_FLAKY}" == "true" ]; then
mkdir -p $TEST_OUTPUT
poetry run ./scripts/flaky_tests.py "${TEST_RESULT_CONNSTR}" --days 10 --output "$TEST_OUTPUT/flaky.json"
EXTRA_PARAMS="--flaky-tests-json $TEST_OUTPUT/flaky.json $EXTRA_PARAMS"
fi
if [[ "${{ inputs.build_type }}" == "debug" ]]; then if [[ "${{ inputs.build_type }}" == "debug" ]]; then
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run) cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
elif [[ "${{ inputs.build_type }}" == "release" ]]; then elif [[ "${{ inputs.build_type }}" == "release" ]]; then
@@ -175,9 +146,9 @@ runs:
# --verbose prints name of each test (helpful when there are # --verbose prints name of each test (helpful when there are
# multiple tests in one file) # multiple tests in one file)
# -rA prints summary in the end # -rA prints summary in the end
# -n4 uses four processes to run tests via pytest-xdist
# -s is not used to prevent pytest from capturing output, because tests are running # -s is not used to prevent pytest from capturing output, because tests are running
# in parallel and logs are mixed between different tests # in parallel and logs are mixed between different tests
#
mkdir -p $TEST_OUTPUT/allure/results mkdir -p $TEST_OUTPUT/allure/results
"${cov_prefix[@]}" ./scripts/pytest \ "${cov_prefix[@]}" ./scripts/pytest \
--junitxml=$TEST_OUTPUT/junit.xml \ --junitxml=$TEST_OUTPUT/junit.xml \
@@ -197,12 +168,12 @@ runs:
uses: ./.github/actions/upload uses: ./.github/actions/upload
with: with:
name: compatibility-snapshot-${{ inputs.build_type }}-pg14-${{ github.run_id }} name: compatibility-snapshot-${{ inputs.build_type }}-pg14-${{ github.run_id }}
# The path includes a test name (test_create_snapshot) and directory that the test creates (compatibility_snapshot_pg14), keep the path in sync with the test # The path includes a test name (test_prepare_snapshot) and directory that the test creates (compatibility_snapshot_pg14), keep the path in sync with the test
path: /tmp/test_output/test_create_snapshot/compatibility_snapshot_pg14/ path: /tmp/test_output/test_prepare_snapshot/compatibility_snapshot_pg14/
prefix: latest prefix: latest
- name: Create Allure report - name: Create Allure report
if: ${{ !cancelled() }} if: always()
uses: ./.github/actions/allure-report uses: ./.github/actions/allure-report
with: with:
action: store action: store

View File

@@ -23,7 +23,7 @@ runs:
mkdir -p $(dirname $ARCHIVE) mkdir -p $(dirname $ARCHIVE)
if [ -f ${ARCHIVE} ]; then if [ -f ${ARCHIVE} ]; then
echo >&2 "File ${ARCHIVE} already exist. Something went wrong before" echo 2>&1 "File ${ARCHIVE} already exist. Something went wrong before"
exit 1 exit 1
fi fi
@@ -33,10 +33,10 @@ runs:
elif [ -f ${SOURCE} ]; then elif [ -f ${SOURCE} ]; then
time tar -cf ${ARCHIVE} --zstd ${SOURCE} time tar -cf ${ARCHIVE} --zstd ${SOURCE}
elif ! ls ${SOURCE} > /dev/null 2>&1; then elif ! ls ${SOURCE} > /dev/null 2>&1; then
echo >&2 "${SOURCE} does not exist" echo 2>&1 "${SOURCE} does not exist"
exit 2 exit 2
else else
echo >&2 "${SOURCE} is neither a directory nor a file, do not know how to handle it" echo 2>&1 "${SOURCE} is neither a directory nor a file, do not know how to handle it"
exit 3 exit 3
fi fi

View File

@@ -1,3 +1,5 @@
zenith_install.tar.gz
.zenith_current_version
neon_install.tar.gz neon_install.tar.gz
.neon_current_version .neon_current_version

View File

@@ -3,6 +3,7 @@
localhost_warning = False localhost_warning = False
host_key_checking = False host_key_checking = False
timeout = 30 timeout = 30
collections_paths = ./collections
[ssh_connection] [ssh_connection]
ssh_args = -F ./ansible.ssh.cfg ssh_args = -F ./ansible.ssh.cfg

View File

@@ -91,15 +91,6 @@
tags: tags:
- pageserver - pageserver
# used in `pageserver.service` template
- name: learn current availability_zone
shell:
cmd: "curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone"
register: ec2_availability_zone
- set_fact:
ec2_availability_zone={{ ec2_availability_zone.stdout }}
- name: upload systemd service definition - name: upload systemd service definition
ansible.builtin.template: ansible.builtin.template:
src: systemd/pageserver.service src: systemd/pageserver.service
@@ -126,8 +117,7 @@
shell: shell:
cmd: | cmd: |
INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id) INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
curl -sfS -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/pageservers/$INSTANCE_ID | jq '.version = {{ current_version }}' > /tmp/new_version curl -sfS -d '{"version": {{ current_version }} }' -X PATCH {{ console_mgmt_base_url }}/api/v1/pageservers/$INSTANCE_ID
curl -sfS -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" -H "Content-Type: application/json" -X POST -d@/tmp/new_version {{ console_mgmt_base_url }}/management/api/v2/pageservers
tags: tags:
- pageserver - pageserver
@@ -162,15 +152,6 @@
tags: tags:
- safekeeper - safekeeper
# used in `safekeeper.service` template
- name: learn current availability_zone
shell:
cmd: "curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone"
register: ec2_availability_zone
- set_fact:
ec2_availability_zone={{ ec2_availability_zone.stdout }}
# in the future safekeepers should discover pageservers byself # in the future safekeepers should discover pageservers byself
# but currently use first pageserver that was discovered # but currently use first pageserver that was discovered
- name: set first pageserver var for safekeepers - name: set first pageserver var for safekeepers
@@ -205,7 +186,6 @@
shell: shell:
cmd: | cmd: |
INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id) INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
curl -sfS -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/safekeepers/$INSTANCE_ID | jq '.version = {{ current_version }}' > /tmp/new_version curl -sfS -d '{"version": {{ current_version }} }' -X PATCH {{ console_mgmt_base_url }}/api/v1/safekeepers/$INSTANCE_ID
curl -sfS -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" -H "Content-Type: application/json" -X POST -d@/tmp/new_version {{ console_mgmt_base_url }}/management/api/v2/safekeepers
tags: tags:
- safekeeper - safekeeper

View File

@@ -25,7 +25,6 @@ mkdir neon_install/bin/
docker cp ${ID}:/usr/local/bin/pageserver neon_install/bin/ docker cp ${ID}:/usr/local/bin/pageserver neon_install/bin/
docker cp ${ID}:/usr/local/bin/pageserver_binutils neon_install/bin/ docker cp ${ID}:/usr/local/bin/pageserver_binutils neon_install/bin/
docker cp ${ID}:/usr/local/bin/safekeeper neon_install/bin/ docker cp ${ID}:/usr/local/bin/safekeeper neon_install/bin/
docker cp ${ID}:/usr/local/bin/storage_broker neon_install/bin/
docker cp ${ID}:/usr/local/bin/proxy neon_install/bin/ docker cp ${ID}:/usr/local/bin/proxy neon_install/bin/
docker cp ${ID}:/usr/local/v14/bin/ neon_install/v14/bin/ docker cp ${ID}:/usr/local/v14/bin/ neon_install/v14/bin/
docker cp ${ID}:/usr/local/v15/bin/ neon_install/v15/bin/ docker cp ${ID}:/usr/local/v15/bin/ neon_install/v15/bin/

31
.github/ansible/neon-stress.hosts.yaml vendored Normal file
View File

@@ -0,0 +1,31 @@
storage:
vars:
bucket_name: neon-storage-ireland
bucket_region: eu-west-1
console_mgmt_base_url: http://neon-stress-console.local
etcd_endpoints: neon-stress-etcd.local:2379
safekeeper_enable_s3_offload: 'false'
pageserver_config_stub:
pg_distrib_dir: /usr/local
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "{{ inventory_hostname }}"
safekeeper_s3_prefix: neon-stress/wal
hostname_suffix: ".local"
remote_user: admin
children:
pageservers:
hosts:
neon-stress-ps-1:
console_region_id: aws-eu-west-1
neon-stress-ps-2:
console_region_id: aws-eu-west-1
safekeepers:
hosts:
neon-stress-sk-1:
console_region_id: aws-eu-west-1
neon-stress-sk-2:
console_region_id: aws-eu-west-1
neon-stress-sk-3:
console_region_id: aws-eu-west-1

View File

@@ -1,48 +0,0 @@
storage:
vars:
bucket_name: neon-prod-storage-ap-southeast-1
bucket_region: ap-southeast-1
console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
broker_endpoint: http://storage-broker-lb.epsilon.ap-southeast-1.internal.aws.neon.tech:50051
pageserver_config_stub:
pg_distrib_dir: /usr/local
metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
metric_collection_interval: 10min
disk_usage_based_eviction:
max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
min_avail_bytes: 0
period: "10s"
tenant_config:
eviction_policy:
kind: "LayerAccessThreshold"
period: "10m"
threshold: &default_eviction_threshold "24h"
evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "pageserver/v1"
safekeeper_s3_prefix: safekeeper/v1/wal
hostname_suffix: ""
remote_user: ssm-user
ansible_aws_ssm_region: ap-southeast-1
ansible_aws_ssm_bucket_name: neon-prod-storage-ap-southeast-1
console_region_id: aws-ap-southeast-1
sentry_environment: production
children:
pageservers:
hosts:
pageserver-0.ap-southeast-1.aws.neon.tech:
ansible_host: i-064de8ea28bdb495b
pageserver-1.ap-southeast-1.aws.neon.tech:
ansible_host: i-0b180defcaeeb6b93
safekeepers:
hosts:
safekeeper-0.ap-southeast-1.aws.neon.tech:
ansible_host: i-0d6f1dc5161eef894
safekeeper-2.ap-southeast-1.aws.neon.tech:
ansible_host: i-04fb63634e4679eb9
safekeeper-3.ap-southeast-1.aws.neon.tech:
ansible_host: i-05481f3bc88cfc2d4

View File

@@ -1,50 +0,0 @@
storage:
vars:
bucket_name: neon-prod-storage-eu-central-1
bucket_region: eu-central-1
console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
broker_endpoint: http://storage-broker-lb.gamma.eu-central-1.internal.aws.neon.tech:50051
pageserver_config_stub:
pg_distrib_dir: /usr/local
metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
metric_collection_interval: 10min
disk_usage_based_eviction:
max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
min_avail_bytes: 0
period: "10s"
tenant_config:
eviction_policy:
kind: "LayerAccessThreshold"
period: "10m"
threshold: &default_eviction_threshold "24h"
evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "pageserver/v1"
safekeeper_s3_prefix: safekeeper/v1/wal
hostname_suffix: ""
remote_user: ssm-user
ansible_aws_ssm_region: eu-central-1
ansible_aws_ssm_bucket_name: neon-prod-storage-eu-central-1
console_region_id: aws-eu-central-1
sentry_environment: production
children:
pageservers:
hosts:
pageserver-0.eu-central-1.aws.neon.tech:
ansible_host: i-0cd8d316ecbb715be
pageserver-1.eu-central-1.aws.neon.tech:
ansible_host: i-090044ed3d383fef0
pageserver-2.eu-central-1.aws.neon.tech:
ansible_host: i-033584edf3f4b6742
safekeepers:
hosts:
safekeeper-0.eu-central-1.aws.neon.tech:
ansible_host: i-0b238612d2318a050
safekeeper-1.eu-central-1.aws.neon.tech:
ansible_host: i-07b9c45e5c2637cd4
safekeeper-2.eu-central-1.aws.neon.tech:
ansible_host: i-020257302c3c93d88

View File

@@ -1,51 +0,0 @@
storage:
vars:
bucket_name: neon-prod-storage-us-east-2
bucket_region: us-east-2
console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
broker_endpoint: http://storage-broker-lb.delta.us-east-2.internal.aws.neon.tech:50051
pageserver_config_stub:
pg_distrib_dir: /usr/local
metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
metric_collection_interval: 10min
disk_usage_based_eviction:
max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
min_avail_bytes: 0
period: "10s"
tenant_config:
eviction_policy:
kind: "LayerAccessThreshold"
period: "10m"
threshold: &default_eviction_threshold "24h"
evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "pageserver/v1"
safekeeper_s3_prefix: safekeeper/v1/wal
hostname_suffix: ""
remote_user: ssm-user
ansible_aws_ssm_region: us-east-2
ansible_aws_ssm_bucket_name: neon-prod-storage-us-east-2
console_region_id: aws-us-east-2
sentry_environment: production
children:
pageservers:
hosts:
pageserver-0.us-east-2.aws.neon.tech:
ansible_host: i-062227ba7f119eb8c
pageserver-1.us-east-2.aws.neon.tech:
ansible_host: i-0b3ec0afab5968938
pageserver-2.us-east-2.aws.neon.tech:
ansible_host: i-0d7a1c4325e71421d
safekeepers:
hosts:
safekeeper-0.us-east-2.aws.neon.tech:
ansible_host: i-0e94224750c57d346
safekeeper-1.us-east-2.aws.neon.tech:
ansible_host: i-06d113fb73bfddeb0
safekeeper-2.us-east-2.aws.neon.tech:
ansible_host: i-09f66c8e04afff2e8

View File

@@ -1,53 +0,0 @@
storage:
vars:
bucket_name: neon-prod-storage-us-west-2
bucket_region: us-west-2
console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
broker_endpoint: http://storage-broker-lb.eta.us-west-2.internal.aws.neon.tech:50051
pageserver_config_stub:
pg_distrib_dir: /usr/local
metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
metric_collection_interval: 10min
disk_usage_based_eviction:
max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
min_avail_bytes: 0
period: "10s"
tenant_config:
eviction_policy:
kind: "LayerAccessThreshold"
period: "10m"
threshold: &default_eviction_threshold "24h"
evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "pageserver/v1"
safekeeper_s3_prefix: safekeeper/v1/wal
hostname_suffix: ""
remote_user: ssm-user
ansible_aws_ssm_region: us-west-2
ansible_aws_ssm_bucket_name: neon-prod-storage-us-west-2
console_region_id: aws-us-west-2-new
sentry_environment: production
children:
pageservers:
hosts:
pageserver-0.us-west-2.aws.neon.tech:
ansible_host: i-0d9f6dfae0e1c780d
pageserver-1.us-west-2.aws.neon.tech:
ansible_host: i-0c834be1dddba8b3f
pageserver-2.us-west-2.aws.neon.tech:
ansible_host: i-051642d372c0a4f32
pageserver-3.us-west-2.aws.neon.tech:
ansible_host: i-00c3844beb9ad1c6b
safekeepers:
hosts:
safekeeper-0.us-west-2.aws.neon.tech:
ansible_host: i-00719d8a74986fda6
safekeeper-1.us-west-2.aws.neon.tech:
ansible_host: i-074682f9d3c712e7c
safekeeper-2.us-west-2.aws.neon.tech:
ansible_host: i-042b7efb1729d7966

33
.github/ansible/production.hosts.yaml vendored Normal file
View File

@@ -0,0 +1,33 @@
---
storage:
vars:
console_mgmt_base_url: http://console-release.local
bucket_name: zenith-storage-oregon
bucket_region: us-west-2
etcd_endpoints: zenith-1-etcd.local:2379
pageserver_config_stub:
pg_distrib_dir: /usr/local
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "{{ inventory_hostname }}"
safekeeper_s3_prefix: prod-1/wal
hostname_suffix: ".local"
remote_user: admin
children:
pageservers:
hosts:
zenith-1-ps-2:
console_region_id: aws-us-west-2
zenith-1-ps-3:
console_region_id: aws-us-west-2
safekeepers:
hosts:
zenith-1-sk-1:
console_region_id: aws-us-west-2
zenith-1-sk-2:
console_region_id: aws-us-west-2
zenith-1-sk-3:
console_region_id: aws-us-west-2

View File

@@ -1,10 +1,7 @@
#!/bin/sh #!/bin/sh
# fetch params from meta-data service # get instance id from meta-data service
INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id) INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
AZ_ID=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone)
INSTANCE_TYPE=$(curl -s http://169.254.169.254/latest/meta-data/instance-type)
DISK_SIZE=$(df -B1 /storage | tail -1 | awk '{print $2}')
# store fqdn hostname in var # store fqdn hostname in var
HOST=$(hostname -f) HOST=$(hostname -f)
@@ -19,10 +16,7 @@ cat <<EOF | tee /tmp/payload
"instance_id": "${INSTANCE_ID}", "instance_id": "${INSTANCE_ID}",
"http_host": "${HOST}", "http_host": "${HOST}",
"http_port": 9898, "http_port": 9898,
"active": false, "active": false
"availability_zone_id": "${AZ_ID}",
"disk_size": ${DISK_SIZE},
"instance_type": "${INSTANCE_TYPE}"
} }
EOF EOF
@@ -30,7 +24,7 @@ EOF
if ! curl -sf -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/pageservers/${INSTANCE_ID} -o /dev/null; then if ! curl -sf -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/pageservers/${INSTANCE_ID} -o /dev/null; then
# not registered, so register it now # not registered, so register it now
ID=$(curl -sf -X POST -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" -H "Content-Type: application/json" {{ console_mgmt_base_url }}/management/api/v2/pageservers -d@/tmp/payload | jq -r '.id') ID=$(curl -sf -X POST -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/pageservers -d@/tmp/payload | jq -r '.id')
# init pageserver # init pageserver
sudo -u pageserver /usr/local/bin/pageserver -c "id=${ID}" -c "pg_distrib_dir='/usr/local'" --init -D /storage/pageserver/data sudo -u pageserver /usr/local/bin/pageserver -c "id=${ID}" -c "pg_distrib_dir='/usr/local'" --init -D /storage/pageserver/data

View File

@@ -25,7 +25,7 @@ EOF
if ! curl -sf -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/safekeepers/${INSTANCE_ID} -o /dev/null; then if ! curl -sf -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/safekeepers/${INSTANCE_ID} -o /dev/null; then
# not registered, so register it now # not registered, so register it now
ID=$(curl -sf -X POST -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" -H "Content-Type: application/json" {{ console_mgmt_base_url }}/management/api/v2/safekeepers -d@/tmp/payload | jq -r '.id') ID=$(curl -sf -X POST -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/safekeepers -d@/tmp/payload | jq -r '.id')
# init safekeeper # init safekeeper
sudo -u safekeeper /usr/local/bin/safekeeper --id ${ID} --init -D /storage/safekeeper/data sudo -u safekeeper /usr/local/bin/safekeeper --id ${ID} --init -D /storage/safekeeper/data
fi fi

View File

@@ -1,2 +1,3 @@
ansible_connection: aws_ssm ansible_connection: aws_ssm
ansible_aws_ssm_bucket_name: neon-dev-bucket
ansible_python_interpreter: /usr/bin/python3 ansible_python_interpreter: /usr/bin/python3

View File

@@ -1,46 +0,0 @@
storage:
vars:
bucket_name: neon-dev-storage-eu-west-1
bucket_region: eu-west-1
console_mgmt_base_url: http://neon-internal-api.aws.neon.build
broker_endpoint: http://storage-broker-lb.zeta.eu-west-1.internal.aws.neon.build:50051
pageserver_config_stub:
pg_distrib_dir: /usr/local
metric_collection_endpoint: http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events
metric_collection_interval: 10min
disk_usage_based_eviction:
max_usage_pct: 80
min_avail_bytes: 0
period: "10s"
tenant_config:
eviction_policy:
kind: "LayerAccessThreshold"
period: "20m"
threshold: &default_eviction_threshold "20m"
evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "pageserver/v1"
safekeeper_s3_prefix: safekeeper/v1/wal
hostname_suffix: ""
remote_user: ssm-user
ansible_aws_ssm_region: eu-west-1
ansible_aws_ssm_bucket_name: neon-dev-storage-eu-west-1
console_region_id: aws-eu-west-1
sentry_environment: staging
children:
pageservers:
hosts:
pageserver-0.eu-west-1.aws.neon.build:
ansible_host: i-01d496c5041c7f34c
safekeepers:
hosts:
safekeeper-0.eu-west-1.aws.neon.build:
ansible_host: i-05226ef85722831bf
safekeeper-1.eu-west-1.aws.neon.build:
ansible_host: i-06969ee1bf2958bfc
safekeeper-2.eu-west-1.aws.neon.build:
ansible_host: i-087892e9625984a0b

34
.github/ansible/staging.hosts.yaml vendored Normal file
View File

@@ -0,0 +1,34 @@
storage:
vars:
bucket_name: zenith-staging-storage-us-east-1
bucket_region: us-east-1
console_mgmt_base_url: http://console-staging.local
etcd_endpoints: zenith-us-stage-etcd.local:2379
pageserver_config_stub:
pg_distrib_dir: /usr/local
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "{{ inventory_hostname }}"
safekeeper_s3_prefix: us-stage/wal
hostname_suffix: ".local"
remote_user: admin
children:
pageservers:
hosts:
zenith-us-stage-ps-2:
console_region_id: aws-us-east-1
zenith-us-stage-ps-3:
console_region_id: aws-us-east-1
zenith-us-stage-ps-4:
console_region_id: aws-us-east-1
safekeepers:
hosts:
zenith-us-stage-sk-4:
console_region_id: aws-us-east-1
zenith-us-stage-sk-5:
console_region_id: aws-us-east-1
zenith-us-stage-sk-6:
console_region_id: aws-us-east-1

View File

@@ -2,22 +2,10 @@ storage:
vars: vars:
bucket_name: neon-staging-storage-us-east-2 bucket_name: neon-staging-storage-us-east-2
bucket_region: us-east-2 bucket_region: us-east-2
console_mgmt_base_url: http://neon-internal-api.aws.neon.build console_mgmt_base_url: http://console-staging.local
broker_endpoint: http://storage-broker-lb.beta.us-east-2.internal.aws.neon.build:50051 etcd_endpoints: etcd-0.us-east-2.aws.neon.build:2379
pageserver_config_stub: pageserver_config_stub:
pg_distrib_dir: /usr/local pg_distrib_dir: /usr/local
metric_collection_endpoint: http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events
metric_collection_interval: 10min
disk_usage_based_eviction:
max_usage_pct: 80
min_avail_bytes: 0
period: "10s"
tenant_config:
eviction_policy:
kind: "LayerAccessThreshold"
period: "20m"
threshold: &default_eviction_threshold "20m"
evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
remote_storage: remote_storage:
bucket_name: "{{ bucket_name }}" bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}" bucket_region: "{{ bucket_region }}"
@@ -26,23 +14,13 @@ storage:
hostname_suffix: "" hostname_suffix: ""
remote_user: ssm-user remote_user: ssm-user
ansible_aws_ssm_region: us-east-2 ansible_aws_ssm_region: us-east-2
ansible_aws_ssm_bucket_name: neon-staging-storage-us-east-2
console_region_id: aws-us-east-2 console_region_id: aws-us-east-2
sentry_environment: staging
children: children:
pageservers: pageservers:
hosts: hosts:
pageserver-0.us-east-2.aws.neon.build: pageserver-0.us-east-2.aws.neon.build:
ansible_host: i-0c3e70929edb5d691 ansible_host: i-0c3e70929edb5d691
pageserver-1.us-east-2.aws.neon.build:
ansible_host: i-0565a8b4008aa3f40
pageserver-2.us-east-2.aws.neon.build:
ansible_host: i-01e31cdf7e970586a
pageserver-3.us-east-2.aws.neon.build:
ansible_host: i-0602a0291365ef7cc
pageserver-99.us-east-2.aws.neon.build:
ansible_host: i-0c39491109bb88824
safekeepers: safekeepers:
hosts: hosts:
@@ -52,5 +30,3 @@ storage:
ansible_host: i-0171efc3604a7b907 ansible_host: i-0171efc3604a7b907
safekeeper-2.us-east-2.aws.neon.build: safekeeper-2.us-east-2.aws.neon.build:
ansible_host: i-0de0b03a51676a6ce ansible_host: i-0de0b03a51676a6ce
safekeeper-99.us-east-2.aws.neon.build:
ansible_host: i-0d61b6a2ea32028d5

View File

@@ -5,8 +5,8 @@ After=network.target auditd.service
[Service] [Service]
Type=simple Type=simple
User=pageserver User=pageserver
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/v14/lib SENTRY_DSN={{ SENTRY_URL_PAGESERVER }} SENTRY_ENVIRONMENT={{ sentry_environment }} Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/v14/lib
ExecStart=/usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -c "broker_endpoint='{{ broker_endpoint }}'" -c "availability_zone='{{ ec2_availability_zone }}'" -D /storage/pageserver/data ExecStart=/usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -c "broker_endpoints=['{{ etcd_endpoints }}']" -D /storage/pageserver/data
ExecReload=/bin/kill -HUP $MAINPID ExecReload=/bin/kill -HUP $MAINPID
KillMode=mixed KillMode=mixed
KillSignal=SIGINT KillSignal=SIGINT

View File

@@ -5,8 +5,8 @@ After=network.target auditd.service
[Service] [Service]
Type=simple Type=simple
User=safekeeper User=safekeeper
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/v14/lib SENTRY_DSN={{ SENTRY_URL_SAFEKEEPER }} SENTRY_ENVIRONMENT={{ sentry_environment }} Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/v14/lib
ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}{{ hostname_suffix }}:6500 --listen-http {{ inventory_hostname }}{{ hostname_suffix }}:7676 -D /storage/safekeeper/data --broker-endpoint={{ broker_endpoint }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ safekeeper_s3_prefix }}"}' --availability-zone={{ ec2_availability_zone }} ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}{{ hostname_suffix }}:6500 --listen-http {{ inventory_hostname }}{{ hostname_suffix }}:7676 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ safekeeper_s3_prefix }}"}'
ExecReload=/bin/kill -HUP $MAINPID ExecReload=/bin/kill -HUP $MAINPID
KillMode=mixed KillMode=mixed
KillSignal=SIGINT KillSignal=SIGINT

View File

@@ -1,75 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
deploymentStrategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 100%
maxUnavailable: 50%
# Delay the kill signal by 5 minutes (5 * 60)
# The pod(s) will stay in Terminating, keeps the existing connections
# but doesn't receive new ones
containerLifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 300"]
terminationGracePeriodSeconds: 604800
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2"
domain: "*.eu-west-1.aws.neon.build"
sentryEnvironment: "staging"
wssPort: 8443
metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
metricCollectionInterval: "1min"
# -- Additional labels for neon-proxy pods
podLabels:
neon_service: proxy-scram
neon_env: dev
neon_region: eu-west-1
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: eu-west-1.aws.neon.build
httpsPort: 443
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,52 +0,0 @@
# Helm chart values for neon-storage-broker
podLabels:
neon_env: staging
neon_service: storage-broker
# Use L4 LB
service:
# service.annotations -- Annotations to add to the service
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external # use newer AWS Load Balancer Controller
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal # deploy LB to private subnet
# assign service to this name at external-dns
external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.zeta.eu-west-1.internal.aws.neon.build
# service.type -- Service type
type: LoadBalancer
# service.port -- broker listen port
port: 50051
ingress:
enabled: false
metrics:
enabled: false
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-storage-broker.fullname\" . }}"
labels:
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
app.kubernetes.io/name: neon-storage-broker
app.kubernetes.io/instance: neon-storage-broker
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-storage-broker"
endpoints:
- port: broker
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"
settings:
sentryEnvironment: "staging"

View File

@@ -1,67 +0,0 @@
# Helm chart values for neon-proxy-link.
# This is a YAML-formatted file.
image:
repository: neondatabase/neon
settings:
authBackend: "link"
authEndpoint: "https://console.stage.neon.tech/authenticate_proxy_request/"
uri: "https://console.stage.neon.tech/psql_session/"
domain: "pg.neon.build"
sentryEnvironment: "staging"
metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
metricCollectionInterval: "1min"
# -- Additional labels for neon-proxy-link pods
podLabels:
neon_service: proxy
neon_env: dev
neon_region: us-east-2
service:
type: LoadBalancer
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal
external-dns.alpha.kubernetes.io/hostname: neon-proxy-link-mgmt.beta.us-east-2.aws.neon.build
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: neon-proxy-link.beta.us-east-2.aws.neon.build
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,76 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
deploymentStrategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 100%
maxUnavailable: 50%
# Delay the kill signal by 5 minutes (5 * 60)
# The pod(s) will stay in Terminating, keeps the existing connections
# but doesn't receive new ones
containerLifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 300"]
terminationGracePeriodSeconds: 604800
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2"
domain: "*.cloud.stage.neon.tech"
sentryEnvironment: "staging"
wssPort: 8443
metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
metricCollectionInterval: "1min"
# -- Additional labels for neon-proxy pods
podLabels:
neon_service: proxy-scram-legacy
neon_env: dev
neon_region: us-east-2
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: neon-proxy-scram-legacy.beta.us-east-2.aws.neon.build
httpsPort: 443
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,40 +1,20 @@
# Helm chart values for neon-proxy-scram. # Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file. # This is a YAML-formatted file.
deploymentStrategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 100%
maxUnavailable: 50%
# Delay the kill signal by 5 minutes (5 * 60)
# The pod(s) will stay in Terminating, keeps the existing connections
# but doesn't receive new ones
containerLifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 300"]
terminationGracePeriodSeconds: 604800
image: image:
repository: neondatabase/neon repository: neondatabase/neon
settings: settings:
authBackend: "console" authBackend: "console"
authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2" authEndpoint: "http://console-staging.local/management/api/v2"
domain: "*.us-east-2.aws.neon.build" domain: "*.us-east-2.aws.neon.build"
extraDomains: ["*.us-east-2.postgres.zenith.tech", "*.us-east-2.retooldb-staging.com"]
sentryEnvironment: "staging"
wssPort: 8443
metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
metricCollectionInterval: "1min"
# -- Additional labels for neon-proxy pods # -- Additional labels for neon-proxy pods
podLabels: podLabels:
neon_service: proxy-scram zenith_service: proxy-scram
neon_env: dev zenith_env: dev
neon_region: us-east-2 zenith_region: us-east-2
zenith_region_slug: us-east-2
exposedService: exposedService:
annotations: annotations:
@@ -42,7 +22,6 @@ exposedService:
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: us-east-2.aws.neon.build external-dns.alpha.kubernetes.io/hostname: us-east-2.aws.neon.build
httpsPort: 443
#metrics: #metrics:
# enabled: true # enabled: true
@@ -50,28 +29,3 @@ exposedService:
# enabled: true # enabled: true
# selector: # selector:
# release: kube-prometheus-stack # release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,52 +0,0 @@
# Helm chart values for neon-storage-broker
podLabels:
neon_env: staging
neon_service: storage-broker
# Use L4 LB
service:
# service.annotations -- Annotations to add to the service
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external # use newer AWS Load Balancer Controller
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal # deploy LB to private subnet
# assign service to this name at external-dns
external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.beta.us-east-2.internal.aws.neon.build
# service.type -- Service type
type: LoadBalancer
# service.port -- broker listen port
port: 50051
ingress:
enabled: false
metrics:
enabled: false
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-storage-broker.fullname\" . }}"
labels:
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
app.kubernetes.io/name: neon-storage-broker
app.kubernetes.io/instance: neon-storage-broker
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-storage-broker"
endpoints:
- port: broker
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"
settings:
sentryEnvironment: "staging"

View File

@@ -0,0 +1,26 @@
fullnameOverride: "neon-stress-proxy-scram"
settings:
authBackend: "console"
authEndpoint: "http://neon-stress-console.local/management/api/v2"
domain: "*.stress.neon.tech"
podLabels:
zenith_service: proxy-scram
zenith_env: staging
zenith_region: eu-west-1
zenith_region_slug: ireland
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: '*.stress.neon.tech'
metrics:
enabled: true
serviceMonitor:
enabled: true
selector:
release: kube-prometheus-stack

View File

@@ -0,0 +1,35 @@
fullnameOverride: "neon-stress-proxy"
settings:
authBackend: "link"
authEndpoint: "https://console.dev.neon.tech/authenticate_proxy_request/"
uri: "https://console.dev.neon.tech/psql_session/"
# -- Additional labels for zenith-proxy pods
podLabels:
zenith_service: proxy
zenith_env: staging
zenith_region: eu-west-1
zenith_region_slug: ireland
service:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal
external-dns.alpha.kubernetes.io/hostname: neon-stress-proxy.local
type: LoadBalancer
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: connect.dev.neon.tech
metrics:
enabled: true
serviceMonitor:
enabled: true
selector:
release: kube-prometheus-stack

View File

@@ -1,77 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
deploymentStrategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 100%
maxUnavailable: 50%
# Delay the kill signal by 5 minutes (5 * 60)
# The pod(s) will stay in Terminating, keeps the existing connections
# but doesn't receive new ones
containerLifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 300"]
terminationGracePeriodSeconds: 604800
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
domain: "*.ap-southeast-1.aws.neon.tech"
extraDomains: ["*.ap-southeast-1.retooldb.com", "*.ap-southeast-1.postgres.vercel-storage.com"]
sentryEnvironment: "production"
wssPort: 8443
metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
metricCollectionInterval: "10min"
# -- Additional labels for neon-proxy pods
podLabels:
neon_service: proxy-scram
neon_env: prod
neon_region: ap-southeast-1
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: ap-southeast-1.aws.neon.tech
httpsPort: 443
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,52 +0,0 @@
# Helm chart values for neon-storage-broker
podLabels:
neon_env: production
neon_service: storage-broker
# Use L4 LB
service:
# service.annotations -- Annotations to add to the service
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external # use newer AWS Load Balancer Controller
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal # deploy LB to private subnet
# assign service to this name at external-dns
external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.epsilon.ap-southeast-1.internal.aws.neon.tech
# service.type -- Service type
type: LoadBalancer
# service.port -- broker listen port
port: 50051
ingress:
enabled: false
metrics:
enabled: false
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-storage-broker.fullname\" . }}"
labels:
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
app.kubernetes.io/name: neon-storage-broker
app.kubernetes.io/instance: neon-storage-broker
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-storage-broker"
endpoints:
- port: broker
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"
settings:
sentryEnvironment: "production"

View File

@@ -1,77 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
deploymentStrategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 100%
maxUnavailable: 50%
# Delay the kill signal by 5 minutes (5 * 60)
# The pod(s) will stay in Terminating, keeps the existing connections
# but doesn't receive new ones
containerLifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 300"]
terminationGracePeriodSeconds: 604800
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
domain: "*.eu-central-1.aws.neon.tech"
extraDomains: ["*.eu-central-1.retooldb.com", "*.eu-central-1.postgres.vercel-storage.com"]
sentryEnvironment: "production"
wssPort: 8443
metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
metricCollectionInterval: "10min"
# -- Additional labels for neon-proxy pods
podLabels:
neon_service: proxy-scram
neon_env: prod
neon_region: eu-central-1
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: eu-central-1.aws.neon.tech
httpsPort: 443
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,52 +0,0 @@
# Helm chart values for neon-storage-broker
podLabels:
neon_env: production
neon_service: storage-broker
# Use L4 LB
service:
# service.annotations -- Annotations to add to the service
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external # use newer AWS Load Balancer Controller
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal # deploy LB to private subnet
# assign service to this name at external-dns
external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.gamma.eu-central-1.internal.aws.neon.tech
# service.type -- Service type
type: LoadBalancer
# service.port -- broker listen port
port: 50051
ingress:
enabled: false
metrics:
enabled: false
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-storage-broker.fullname\" . }}"
labels:
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
app.kubernetes.io/name: neon-storage-broker
app.kubernetes.io/instance: neon-storage-broker
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-storage-broker"
endpoints:
- port: broker
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"
settings:
sentryEnvironment: "production"

View File

@@ -1,58 +0,0 @@
# Helm chart values for neon-proxy-link.
# This is a YAML-formatted file.
image:
repository: neondatabase/neon
settings:
authBackend: "link"
authEndpoint: "https://console.neon.tech/authenticate_proxy_request/"
uri: "https://console.neon.tech/psql_session/"
domain: "pg.neon.tech"
sentryEnvironment: "production"
# -- Additional labels for zenith-proxy pods
podLabels:
neon_service: proxy
neon_env: production
neon_region: us-east-2
service:
type: LoadBalancer
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal
external-dns.alpha.kubernetes.io/hostname: neon-proxy-link-mgmt.delta.us-east-2.aws.neon.tech
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: neon-proxy-link.delta.us-east-2.aws.neon.tech
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,77 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
deploymentStrategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 100%
maxUnavailable: 50%
# Delay the kill signal by 5 minutes (5 * 60)
# The pod(s) will stay in Terminating, keeps the existing connections
# but doesn't receive new ones
containerLifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 300"]
terminationGracePeriodSeconds: 604800
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
domain: "*.us-east-2.aws.neon.tech"
extraDomains: ["*.us-east-2.retooldb.com", "*.us-east-2.postgres.vercel-storage.com"]
sentryEnvironment: "production"
wssPort: 8443
metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
metricCollectionInterval: "10min"
# -- Additional labels for neon-proxy pods
podLabels:
neon_service: proxy-scram
neon_env: prod
neon_region: us-east-2
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: us-east-2.aws.neon.tech
httpsPort: 443
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,52 +0,0 @@
# Helm chart values for neon-storage-broker
podLabels:
neon_env: production
neon_service: storage-broker
# Use L4 LB
service:
# service.annotations -- Annotations to add to the service
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external # use newer AWS Load Balancer Controller
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal # deploy LB to private subnet
# assign service to this name at external-dns
external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.delta.us-east-2.internal.aws.neon.tech
# service.type -- Service type
type: LoadBalancer
# service.port -- broker listen port
port: 50051
ingress:
enabled: false
metrics:
enabled: false
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-storage-broker.fullname\" . }}"
labels:
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
app.kubernetes.io/name: neon-storage-broker
app.kubernetes.io/instance: neon-storage-broker
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-storage-broker"
endpoints:
- port: broker
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"
settings:
sentryEnvironment: "production"

View File

@@ -1,76 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
deploymentStrategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 100%
maxUnavailable: 50%
# Delay the kill signal by 5 minutes (5 * 60)
# The pod(s) will stay in Terminating, keeps the existing connections
# but doesn't receive new ones
containerLifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 300"]
terminationGracePeriodSeconds: 604800
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
domain: "*.cloud.neon.tech"
sentryEnvironment: "production"
wssPort: 8443
metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
metricCollectionInterval: "10min"
# -- Additional labels for neon-proxy pods
podLabels:
neon_service: proxy-scram
neon_env: prod
neon_region: us-west-2
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: neon-proxy-scram-legacy.eta.us-west-2.aws.neon.tech
httpsPort: 443
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,77 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
deploymentStrategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 100%
maxUnavailable: 50%
# Delay the kill signal by 5 minutes (5 * 60)
# The pod(s) will stay in Terminating, keeps the existing connections
# but doesn't receive new ones
containerLifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 300"]
terminationGracePeriodSeconds: 604800
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
domain: "*.us-west-2.aws.neon.tech"
extraDomains: ["*.us-west-2.retooldb.com", "*.us-west-2.postgres.vercel-storage.com"]
sentryEnvironment: "production"
wssPort: 8443
metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
metricCollectionInterval: "10min"
# -- Additional labels for neon-proxy pods
podLabels:
neon_service: proxy-scram
neon_env: prod
neon_region: us-west-2
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: us-west-2.aws.neon.tech
httpsPort: 443
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,52 +0,0 @@
# Helm chart values for neon-storage-broker
podLabels:
neon_env: production
neon_service: storage-broker
# Use L4 LB
service:
# service.annotations -- Annotations to add to the service
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external # use newer AWS Load Balancer Controller
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal # deploy LB to private subnet
# assign service to this name at external-dns
external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.eta.us-west-2.internal.aws.neon.tech
# service.type -- Service type
type: LoadBalancer
# service.port -- broker listen port
port: 50051
ingress:
enabled: false
metrics:
enabled: false
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-storage-broker.fullname\" . }}"
labels:
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
app.kubernetes.io/name: neon-storage-broker
app.kubernetes.io/instance: neon-storage-broker
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-storage-broker"
endpoints:
- port: broker
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"
settings:
sentryEnvironment: "production"

View File

@@ -0,0 +1,24 @@
settings:
authBackend: "console"
authEndpoint: "http://console-release.local/management/api/v2"
domain: "*.cloud.neon.tech"
podLabels:
zenith_service: proxy-scram
zenith_env: production
zenith_region: us-west-2
zenith_region_slug: oregon
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: '*.cloud.neon.tech'
metrics:
enabled: true
serviceMonitor:
enabled: true
selector:
release: kube-prometheus-stack

View File

@@ -0,0 +1,33 @@
settings:
authBackend: "link"
authEndpoint: "https://console.neon.tech/authenticate_proxy_request/"
uri: "https://console.neon.tech/psql_session/"
# -- Additional labels for zenith-proxy pods
podLabels:
zenith_service: proxy
zenith_env: production
zenith_region: us-west-2
zenith_region_slug: oregon
service:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal
external-dns.alpha.kubernetes.io/hostname: proxy-release.local
type: LoadBalancer
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: connect.neon.tech,pg.neon.tech
metrics:
enabled: true
serviceMonitor:
enabled: true
selector:
release: kube-prometheus-stack

View File

@@ -0,0 +1,31 @@
# Helm chart values for zenith-proxy.
# This is a YAML-formatted file.
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://console-staging.local/management/api/v2"
domain: "*.cloud.stage.neon.tech"
# -- Additional labels for zenith-proxy pods
podLabels:
zenith_service: proxy-scram
zenith_env: staging
zenith_region: us-east-1
zenith_region_slug: virginia
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: cloud.stage.neon.tech
metrics:
enabled: true
serviceMonitor:
enabled: true
selector:
release: kube-prometheus-stack

31
.github/helm-values/staging.proxy.yaml vendored Normal file
View File

@@ -0,0 +1,31 @@
# Helm chart values for zenith-proxy.
# This is a YAML-formatted file.
image:
repository: neondatabase/neon
settings:
authBackend: "link"
authEndpoint: "https://console.stage.neon.tech/authenticate_proxy_request/"
uri: "https://console.stage.neon.tech/psql_session/"
# -- Additional labels for zenith-proxy pods
podLabels:
zenith_service: proxy
zenith_env: staging
zenith_region: us-east-1
zenith_region_slug: virginia
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: connect.stage.neon.tech
metrics:
enabled: true
serviceMonitor:
enabled: true
selector:
release: kube-prometheus-stack

View File

@@ -1,14 +0,0 @@
## Describe your changes
## Issue ticket number and link
## Checklist before requesting a review
- [ ] I have performed a self-review of my code.
- [ ] If it is a core feature, I have added thorough tests.
- [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard?
- [ ] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section.
## Checklist before merging
- [ ] Do not forget to reformat commit message to not include the above checklist

View File

@@ -15,10 +15,12 @@ on:
workflow_dispatch: # adds ability to run this manually workflow_dispatch: # adds ability to run this manually
inputs: inputs:
environment:
description: 'Environment to run remote tests on (dev or staging)'
required: false
region_id: region_id:
description: 'Use a particular region. If not set the default region will be used' description: 'Use a particular region. If not set the default region will be used'
required: false required: false
default: 'aws-us-east-2'
save_perf_report: save_perf_report:
type: boolean type: boolean
description: 'Publish perf report or not. If not set, the report is published only for the main branch' description: 'Publish perf report or not. If not set, the report is published only for the main branch'
@@ -30,74 +32,102 @@ defaults:
concurrency: concurrency:
# Allow only one workflow per any non-`main` branch. # Allow only one workflow per any non-`main` branch.
group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }} group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
cancel-in-progress: true cancel-in-progress: true
jobs: jobs:
bench: bench:
env: # this workflow runs on self hosteed runner
TEST_PG_BENCH_DURATIONS_MATRIX: "300" # it's environment is quite different from usual guthub runner
TEST_PG_BENCH_SCALES_MATRIX: "10,100" # probably the most important difference is that it doesn't start from clean workspace each time
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install # e g if you install system packages they are not cleaned up since you install them directly in host machine
DEFAULT_PG_VERSION: 14 # not a container or something
TEST_OUTPUT: /tmp/test_output # See documentation for more info: https://docs.github.com/en/actions/hosting-your-own-runners/about-self-hosted-runners
BUILD_TYPE: remote runs-on: [self-hosted, zenith-benchmarker]
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
PLATFORM: "neon-staging"
runs-on: [ self-hosted, us-east-2, x64 ] env:
container: POSTGRES_DISTRIB_DIR: /usr/pgsql
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned DEFAULT_PG_VERSION: 14
options: --init
steps: steps:
- uses: actions/checkout@v3 - name: Checkout zenith repo
uses: actions/checkout@v3
- name: Download Neon artifact # actions/setup-python@v2 is not working correctly on self-hosted runners
uses: ./.github/actions/download # see https://github.com/actions/setup-python/issues/162
with: # and probably https://github.com/actions/setup-python/issues/162#issuecomment-865387976 in particular
name: neon-${{ runner.os }}-release-artifact # so the simplest solution to me is to use already installed system python and spin virtualenvs for job runs.
path: /tmp/neon/ # there is Python 3.7.10 already installed on the machine so use it to install poetry and then use poetry's virtuealenvs
prefix: latest - name: Install poetry & deps
run: |
python3 -m pip install --upgrade poetry wheel
# since pip/poetry caches are reused there shouldn't be any troubles with install every time
./scripts/pysync
- name: Show versions
run: |
echo Python
python3 --version
poetry run python3 --version
echo Poetry
poetry --version
echo Pgbench
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
- name: Create Neon Project - name: Create Neon Project
id: create-neon-project id: create-neon-project
uses: ./.github/actions/neon-project-create uses: ./.github/actions/neon-project-create
with: with:
region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }} environment: ${{ github.event.inputs.environment || 'staging' }}
postgres_version: ${{ env.DEFAULT_PG_VERSION }} api_key: ${{ ( github.event.inputs.environment || 'staging' ) == 'staging' && secrets.NEON_STAGING_API_KEY || secrets.NEON_CAPTEST_API_KEY }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Run benchmark - name: Run benchmark
uses: ./.github/actions/run-python-test-set # pgbench is installed system wide from official repo
with: # https://download.postgresql.org/pub/repos/yum/13/redhat/rhel-7-x86_64/
build_type: ${{ env.BUILD_TYPE }} # via
test_selection: performance # sudo tee /etc/yum.repos.d/pgdg.repo<<EOF
run_in_parallel: false # [pgdg13]
save_perf_report: ${{ env.SAVE_PERF_REPORT }} # name=PostgreSQL 13 for RHEL/CentOS 7 - x86_64
# Set --sparse-ordering option of pytest-order plugin # baseurl=https://download.postgresql.org/pub/repos/yum/13/redhat/rhel-7-x86_64/
# to ensure tests are running in order of appears in the file. # enabled=1
# It's important for test_perf_pgbench.py::test_pgbench_remote_* tests # gpgcheck=0
extra_params: -m remote_cluster --sparse-ordering --timeout 5400 --ignore test_runner/performance/test_perf_olap.py # EOF
# sudo yum makecache
# sudo yum install postgresql13-contrib
# actual binaries are located in /usr/pgsql-13/bin/
env: env:
# The pgbench test runs two tests of given duration against each scale.
# So the total runtime with these parameters is 2 * 2 * 300 = 1200, or 20 minutes.
# Plus time needed to initialize the test databases.
TEST_PG_BENCH_DURATIONS_MATRIX: "300"
TEST_PG_BENCH_SCALES_MATRIX: "10,100"
PLATFORM: "neon-staging"
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }} BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
REMOTE_ENV: "1" # indicate to test harness that we do not have zenith binaries locally
run: |
# just to be sure that no data was cached on self hosted runner
# since it might generate duplicates when calling ingest_perf_test_result.py
rm -rf perf-report-staging
mkdir -p perf-report-staging
# Set --sparse-ordering option of pytest-order plugin to ensure tests are running in order of appears in the file,
# it's important for test_perf_pgbench.py::test_pgbench_remote_* tests
./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --sparse-ordering --out-dir perf-report-staging --timeout 5400
- name: Submit result
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
run: |
REPORT_FROM=$(realpath perf-report-staging) REPORT_TO=staging scripts/generate_and_push_perf_report.sh
- name: Delete Neon Project - name: Delete Neon Project
if: ${{ always() }} if: ${{ always() }}
uses: ./.github/actions/neon-project-delete uses: ./.github/actions/neon-project-delete
with: with:
environment: staging
project_id: ${{ steps.create-neon-project.outputs.project_id }} project_id: ${{ steps.create-neon-project.outputs.project_id }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }} api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Create Allure report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report
with:
action: generate
build_type: ${{ env.BUILD_TYPE }}
- name: Post to a Slack channel - name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }} if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1 uses: slackapi/slack-github-action@v1
@@ -107,65 +137,22 @@ jobs:
env: env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
generate-matrices:
# Create matrices for the benchmarking jobs, so we run benchmarks on rds only once a week (on Saturday)
#
# Available platforms:
# - neon-captest-new: Freshly created project (1 CU)
# - neon-captest-freetier: Use freetier-sized compute (0.25 CU)
# - neon-captest-reuse: Reusing existing project
# - rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
# - rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
runs-on: ubuntu-latest
outputs:
pgbench-compare-matrix: ${{ steps.pgbench-compare-matrix.outputs.matrix }}
olap-compare-matrix: ${{ steps.olap-compare-matrix.outputs.matrix }}
steps:
- name: Generate matrix for pgbench benchmark
id: pgbench-compare-matrix
run: |
matrix='{
"platform": [
"neon-captest-new",
"neon-captest-reuse"
],
"db_size": [ "10gb" ],
"include": [
{ "platform": "neon-captest-freetier", "db_size": "3gb" },
{ "platform": "neon-captest-new", "db_size": "50gb" }
]
}'
if [ "$(date +%A)" = "Saturday" ]; then
matrix=$(echo $matrix | jq '.include += [{ "platform": "rds-postgres", "db_size": "10gb"},
{ "platform": "rds-aurora", "db_size": "50gb"}]')
fi
echo "matrix=$(echo $matrix | jq --compact-output '.')" >> $GITHUB_OUTPUT
- name: Generate matrix for OLAP benchmarks
id: olap-compare-matrix
run: |
matrix='{
"platform": [
"neon-captest-reuse"
]
}'
if [ "$(date +%A)" = "Saturday" ]; then
matrix=$(echo $matrix | jq '.include += [{ "platform": "rds-postgres" },
{ "platform": "rds-aurora" }]')
fi
echo "matrix=$(echo $matrix | jq --compact-output '.')" >> $GITHUB_OUTPUT
pgbench-compare: pgbench-compare:
needs: [ generate-matrices ]
strategy: strategy:
fail-fast: false fail-fast: false
matrix: ${{fromJson(needs.generate-matrices.outputs.pgbench-compare-matrix)}} matrix:
# neon-captest-new: Run pgbench in a freshly created project
# neon-captest-reuse: Same, but reusing existing project
# neon-captest-prefetch: Same, with prefetching enabled (new project)
platform: [ neon-captest-new, neon-captest-reuse, neon-captest-prefetch ]
db_size: [ 10gb ]
include:
- platform: neon-captest-new
db_size: 50gb
- platform: neon-captest-prefetch
db_size: 50gb
- platform: rds-aurora
db_size: 50gb
env: env:
TEST_PG_BENCH_DURATIONS_MATRIX: "60m" TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
@@ -174,12 +161,12 @@ jobs:
DEFAULT_PG_VERSION: 14 DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }} SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
PLATFORM: ${{ matrix.platform }} PLATFORM: ${{ matrix.platform }}
runs-on: [ self-hosted, us-east-2, x64 ] runs-on: dev
container: container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:pinned
options: --init options: --init
timeout-minutes: 360 # 6h timeout-minutes: 360 # 6h
@@ -200,14 +187,12 @@ jobs:
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
- name: Create Neon Project - name: Create Neon Project
if: contains(fromJson('["neon-captest-new", "neon-captest-freetier"]'), matrix.platform) if: contains(fromJson('["neon-captest-new", "neon-captest-prefetch"]'), matrix.platform)
id: create-neon-project id: create-neon-project
uses: ./.github/actions/neon-project-create uses: ./.github/actions/neon-project-create
with: with:
region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }} environment: ${{ github.event.inputs.environment || 'dev' }}
postgres_version: ${{ env.DEFAULT_PG_VERSION }} api_key: ${{ ( github.event.inputs.environment || 'dev' ) == 'staging' && secrets.NEON_STAGING_API_KEY || secrets.NEON_CAPTEST_API_KEY }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
compute_units: ${{ (matrix.platform == 'neon-captest-freetier' && '[0.25, 0.25]') || '[1, 1]' }}
- name: Set up Connection String - name: Set up Connection String
id: set-up-connstr id: set-up-connstr
@@ -216,17 +201,14 @@ jobs:
neon-captest-reuse) neon-captest-reuse)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }} CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }}
;; ;;
neon-captest-new | neon-captest-freetier) neon-captest-new | neon-captest-prefetch)
CONNSTR=${{ steps.create-neon-project.outputs.dsn }} CONNSTR=${{ steps.create-neon-project.outputs.dsn }}
;; ;;
rds-aurora) rds-aurora)
CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_CONNSTR }} CONNSTR=${{ secrets.BENCHMARK_RDS_CONNSTR }}
;;
rds-postgres)
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }}
;; ;;
*) *)
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-freetier', 'rds-aurora', or 'rds-postgres'" echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-prefetch' or 'rds-aurora'"
exit 1 exit 1
;; ;;
esac esac
@@ -235,6 +217,14 @@ jobs:
psql ${CONNSTR} -c "SELECT version();" psql ${CONNSTR} -c "SELECT version();"
- name: Set database options
if: matrix.platform == 'neon-captest-prefetch'
run: |
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE main SET enable_seqscan_prefetch=on"
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE main SET seqscan_prefetch_buffers=10"
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
- name: Benchmark init - name: Benchmark init
uses: ./.github/actions/run-python-test-set uses: ./.github/actions/run-python-test-set
with: with:
@@ -274,20 +264,21 @@ jobs:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
- name: Delete Neon Project
if: ${{ steps.create-neon-project.outputs.project_id && always() }}
uses: ./.github/actions/neon-project-delete
with:
project_id: ${{ steps.create-neon-project.outputs.project_id }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Create Allure report - name: Create Allure report
if: ${{ !cancelled() }} if: always()
uses: ./.github/actions/allure-report uses: ./.github/actions/allure-report
with: with:
action: generate action: generate
build_type: ${{ env.BUILD_TYPE }} build_type: ${{ env.BUILD_TYPE }}
- name: Delete Neon Project
if: ${{ steps.create-neon-project.outputs.project_id && always() }}
uses: ./.github/actions/neon-project-delete
with:
environment: dev
project_id: ${{ steps.create-neon-project.outputs.project_id }}
api_key: ${{ secrets.NEON_CAPTEST_API_KEY }}
- name: Post to a Slack channel - name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }} if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1 uses: slackapi/slack-github-action@v1
@@ -296,286 +287,3 @@ jobs:
slack-message: "Periodic perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" slack-message: "Periodic perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env: env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
clickbench-compare:
# ClichBench DB for rds-aurora and rds-Postgres deployed to the same clusters
# we use for performance testing in pgbench-compare.
# Run this job only when pgbench-compare is finished to avoid the intersection.
# We might change it after https://github.com/neondatabase/neon/issues/2900.
#
# *_CLICKBENCH_CONNSTR: Genuine ClickBench DB with ~100M rows
# *_CLICKBENCH_10M_CONNSTR: DB with the first 10M rows of ClickBench DB
if: ${{ !cancelled() }}
needs: [ generate-matrices, pgbench-compare ]
strategy:
fail-fast: false
matrix: ${{ fromJson(needs.generate-matrices.outputs.olap-compare-matrix) }}
env:
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
PLATFORM: ${{ matrix.platform }}
runs-on: [ self-hosted, us-east-2, x64 ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
timeout-minutes: 360 # 6h
steps:
- uses: actions/checkout@v3
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-release-artifact
path: /tmp/neon/
prefix: latest
- name: Add Postgres binaries to PATH
run: |
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
- name: Set up Connection String
id: set-up-connstr
run: |
case "${PLATFORM}" in
neon-captest-reuse)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_10M_CONNSTR }}
;;
rds-aurora)
CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_CLICKBENCH_10M_CONNSTR }}
;;
rds-postgres)
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CLICKBENCH_10M_CONNSTR }}
;;
*)
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
exit 1
;;
esac
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
psql ${CONNSTR} -c "SELECT version();"
- name: ClickBench benchmark
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance/test_perf_olap.py
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_clickbench
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
- name: Create Allure report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report
with:
action: generate
build_type: ${{ env.BUILD_TYPE }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: "Periodic OLAP perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
tpch-compare:
# TCP-H DB for rds-aurora and rds-Postgres deployed to the same clusters
# we use for performance testing in pgbench-compare & clickbench-compare.
# Run this job only when clickbench-compare is finished to avoid the intersection.
# We might change it after https://github.com/neondatabase/neon/issues/2900.
#
# *_TPCH_S10_CONNSTR: DB generated with scale factor 10 (~10 GB)
if: ${{ !cancelled() }}
needs: [ generate-matrices, clickbench-compare ]
strategy:
fail-fast: false
matrix: ${{ fromJson(needs.generate-matrices.outputs.olap-compare-matrix) }}
env:
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
PLATFORM: ${{ matrix.platform }}
runs-on: [ self-hosted, us-east-2, x64 ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
timeout-minutes: 360 # 6h
steps:
- uses: actions/checkout@v3
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-release-artifact
path: /tmp/neon/
prefix: latest
- name: Add Postgres binaries to PATH
run: |
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
- name: Set up Connection String
id: set-up-connstr
run: |
case "${PLATFORM}" in
neon-captest-reuse)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_TPCH_S10_CONNSTR }}
;;
rds-aurora)
CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_TPCH_S10_CONNSTR }}
;;
rds-postgres)
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_TPCH_S10_CONNSTR }}
;;
*)
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
exit 1
;;
esac
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
psql ${CONNSTR} -c "SELECT version();"
- name: Run TPC-H benchmark
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance/test_perf_olap.py
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_tpch
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
- name: Create Allure report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report
with:
action: generate
build_type: ${{ env.BUILD_TYPE }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: "Periodic TPC-H perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
user-examples-compare:
if: ${{ !cancelled() }}
needs: [ generate-matrices, tpch-compare ]
strategy:
fail-fast: false
matrix: ${{ fromJson(needs.generate-matrices.outputs.olap-compare-matrix) }}
env:
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
PLATFORM: ${{ matrix.platform }}
runs-on: [ self-hosted, us-east-2, x64 ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
timeout-minutes: 360 # 6h
steps:
- uses: actions/checkout@v3
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-release-artifact
path: /tmp/neon/
prefix: latest
- name: Add Postgres binaries to PATH
run: |
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
- name: Set up Connection String
id: set-up-connstr
run: |
case "${PLATFORM}" in
neon-captest-reuse)
CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_CAPTEST_CONNSTR }}
;;
rds-aurora)
CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_RDS_AURORA_CONNSTR }}
;;
rds-postgres)
CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_RDS_POSTGRES_CONNSTR }}
;;
*)
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
exit 1
;;
esac
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
psql ${CONNSTR} -c "SELECT version();"
- name: Run user examples
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance/test_perf_olap.py
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_user_examples
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
- name: Create Allure report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report
with:
action: generate
build_type: ${{ env.BUILD_TYPE }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: "Periodic TPC-H perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

File diff suppressed because it is too large Load Diff

166
.github/workflows/codestyle.yml vendored Normal file
View File

@@ -0,0 +1,166 @@
name: Check code style and build
on:
push:
branches:
- main
pull_request:
defaults:
run:
shell: bash -euxo pipefail {0}
concurrency:
# Allow only one workflow per any non-`main` branch.
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
cancel-in-progress: true
env:
RUST_BACKTRACE: 1
COPT: '-Werror'
jobs:
check-codestyle-rust:
strategy:
fail-fast: false
matrix:
# XXX: both OSes have rustup
# * https://github.com/actions/runner-images/blob/main/images/macos/macos-12-Readme.md#rust-tools
# * https://github.com/actions/runner-images/blob/main/images/linux/Ubuntu2204-Readme.md#rust-tools
# this is all we need to install our toolchain later via rust-toolchain.toml
# so don't install any toolchain explicitly.
os: [ubuntu-latest, macos-latest]
timeout-minutes: 90
name: check codestyle rust and postgres
runs-on: ${{ matrix.os }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 2
- name: Check formatting
run: cargo fmt --all -- --check
- name: Install Ubuntu postgres dependencies
if: matrix.os == 'ubuntu-latest'
run: |
sudo apt update
sudo apt install build-essential libreadline-dev zlib1g-dev flex bison libseccomp-dev libssl-dev
- name: Install macOS postgres dependencies
if: matrix.os == 'macos-latest'
run: brew install flex bison openssl
- name: Set pg 14 revision for caching
id: pg_v14_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
shell: bash -euxo pipefail {0}
- name: Set pg 15 revision for caching
id: pg_v15_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
shell: bash -euxo pipefail {0}
- name: Cache postgres v14 build
id: cache_pg_14
uses: actions/cache@v3
with:
path: pg_install/v14
key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Cache postgres v15 build
id: cache_pg_15
uses: actions/cache@v3
with:
path: pg_install/v15
key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Set extra env for macOS
if: matrix.os == 'macos-latest'
run: |
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
- name: Build postgres v14
if: steps.cache_pg_14.outputs.cache-hit != 'true'
run: make postgres-v14
shell: bash -euxo pipefail {0}
- name: Build postgres v15
if: steps.cache_pg_15.outputs.cache-hit != 'true'
run: make postgres-v15
shell: bash -euxo pipefail {0}
- name: Build neon extensions
run: make neon-pg-ext
- name: Cache cargo deps
id: cache_cargo
uses: actions/cache@v3
with:
path: |
~/.cargo/registry
!~/.cargo/registry/src
~/.cargo/git
target
key: v5-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust
- name: Run cargo clippy
run: ./run_clippy.sh
- name: Ensure all project builds
run: cargo build --locked --all --all-targets
check-rust-dependencies:
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: false
fetch-depth: 1
# https://github.com/facebookincubator/cargo-guppy/tree/bec4e0eb29dcd1faac70b1b5360267fc02bf830e/tools/cargo-hakari#2-keep-the-workspace-hack-up-to-date-in-ci
- name: Check every project module is covered by Hakari
run: |
cargo hakari generate --diff # workspace-hack Cargo.toml is up-to-date
cargo hakari manage-deps --dry-run # all workspace crates depend on workspace-hack
shell: bash -euxo pipefail {0}
check-codestyle-python:
runs-on: [ self-hosted, Linux, k8s-runner ]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: false
fetch-depth: 1
- name: Cache poetry deps
id: cache_poetry
uses: actions/cache@v3
with:
path: ~/.cache/pypoetry/virtualenvs
key: v1-codestyle-python-deps-${{ hashFiles('poetry.lock') }}
- name: Install Python deps
run: ./scripts/pysync
- name: Run isort to ensure code format
run: poetry run isort --diff --check .
- name: Run black to ensure code format
run: poetry run black --diff --check .
- name: Run flake8 to ensure code format
run: poetry run flake8 .
- name: Run mypy to check types
run: poetry run mypy .

View File

@@ -1,179 +0,0 @@
name: Neon Deploy dev
on:
workflow_dispatch:
inputs:
dockerTag:
description: 'Docker tag to deploy'
required: true
type: string
branch:
description: 'Branch or commit used for deploy scripts and configs'
required: true
type: string
default: 'main'
deployStorage:
description: 'Deploy storage'
required: true
type: boolean
default: true
deployProxy:
description: 'Deploy proxy'
required: true
type: boolean
default: true
deployStorageBroker:
description: 'Deploy storage-broker'
required: true
type: boolean
default: true
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
concurrency:
group: deploy-dev
cancel-in-progress: false
jobs:
deploy-storage-new:
runs-on: [ self-hosted, gen3, small ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
options: --user root --privileged
if: inputs.deployStorage
defaults:
run:
shell: bash
strategy:
matrix:
target_region: [ eu-west-1, us-east-2 ]
environment:
name: dev-${{ matrix.target_region }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
ref: ${{ inputs.branch }}
- name: Redeploy
run: |
export DOCKER_TAG=${{ inputs.dockerTag }}
cd "$(pwd)/.github/ansible"
./get_binaries.sh
ansible-galaxy collection install sivel.toiletwater
ansible-playbook -v deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_STAGING_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
rm -f neon_install.tar.gz .neon_current_version
- name: Cleanup ansible folder
run: rm -rf ~/.ansible
deploy-proxy-new:
runs-on: [ self-hosted, gen3, small ]
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
if: inputs.deployProxy
defaults:
run:
shell: bash
strategy:
matrix:
include:
- target_region: us-east-2
target_cluster: dev-us-east-2-beta
deploy_link_proxy: true
deploy_legacy_scram_proxy: true
- target_region: eu-west-1
target_cluster: dev-eu-west-1-zeta
deploy_link_proxy: false
deploy_legacy_scram_proxy: false
environment:
name: dev-${{ matrix.target_region }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
ref: ${{ inputs.branch }}
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1-node16
with:
role-to-assume: arn:aws:iam::369495373322:role/github-runner
aws-region: eu-central-1
role-skip-session-tagging: true
role-duration-seconds: 1800
- name: Configure environment
run: |
helm repo add neondatabase https://neondatabase.github.io/helm-charts
aws --region ${{ matrix.target_region }} eks update-kubeconfig --name ${{ matrix.target_cluster }}
- name: Re-deploy scram proxy
run: |
DOCKER_TAG=${{ inputs.dockerTag }}
helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
- name: Re-deploy link proxy
if: matrix.deploy_link_proxy
run: |
DOCKER_TAG=${{ inputs.dockerTag }}
helm upgrade neon-proxy-link neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-link.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
- name: Re-deploy legacy scram proxy
if: matrix.deploy_legacy_scram_proxy
run: |
DOCKER_TAG=${{ inputs.dockerTag }}
helm upgrade neon-proxy-scram-legacy neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram-legacy.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
- name: Cleanup helm folder
run: rm -rf ~/.cache
deploy-storage-broker-new:
runs-on: [ self-hosted, gen3, small ]
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
if: inputs.deployStorageBroker
defaults:
run:
shell: bash
strategy:
matrix:
include:
- target_region: us-east-2
target_cluster: dev-us-east-2-beta
- target_region: eu-west-1
target_cluster: dev-eu-west-1-zeta
environment:
name: dev-${{ matrix.target_region }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
ref: ${{ inputs.branch }}
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1-node16
with:
role-to-assume: arn:aws:iam::369495373322:role/github-runner
aws-region: eu-central-1
role-skip-session-tagging: true
role-duration-seconds: 1800
- name: Configure environment
run: |
helm repo add neondatabase https://neondatabase.github.io/helm-charts
aws --region ${{ matrix.target_region }} eks update-kubeconfig --name ${{ matrix.target_cluster }}
- name: Deploy storage-broker
run:
helm upgrade neon-storage-broker-lb neondatabase/neon-storage-broker --namespace neon-storage-broker-lb --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-storage-broker.yaml --set image.tag=${{ inputs.dockerTag }} --set settings.sentryUrl=${{ secrets.SENTRY_URL_BROKER }} --wait --timeout 5m0s
- name: Cleanup helm folder
run: rm -rf ~/.cache

View File

@@ -1,167 +0,0 @@
name: Neon Deploy prod
on:
workflow_dispatch:
inputs:
dockerTag:
description: 'Docker tag to deploy'
required: true
type: string
branch:
description: 'Branch or commit used for deploy scripts and configs'
required: true
type: string
default: 'release'
deployStorage:
description: 'Deploy storage'
required: true
type: boolean
default: true
deployProxy:
description: 'Deploy proxy'
required: true
type: boolean
default: true
deployStorageBroker:
description: 'Deploy storage-broker'
required: true
type: boolean
default: true
disclamerAcknowledged:
description: 'I confirm that there is an emergency and I can not use regular release workflow'
required: true
type: boolean
default: false
concurrency:
group: deploy-prod
cancel-in-progress: false
jobs:
deploy-prod-new:
runs-on: prod
container:
image: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
options: --user root --privileged
if: inputs.deployStorage && inputs.disclamerAcknowledged
defaults:
run:
shell: bash
strategy:
matrix:
target_region: [ us-east-2, us-west-2, eu-central-1, ap-southeast-1 ]
environment:
name: prod-${{ matrix.target_region }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
ref: ${{ inputs.branch }}
- name: Redeploy
run: |
export DOCKER_TAG=${{ inputs.dockerTag }}
cd "$(pwd)/.github/ansible"
./get_binaries.sh
ansible-galaxy collection install sivel.toiletwater
ansible-playbook -v deploy.yaml -i prod.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_PRODUCTION_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
rm -f neon_install.tar.gz .neon_current_version
deploy-proxy-prod-new:
runs-on: prod
container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
if: inputs.deployProxy && inputs.disclamerAcknowledged
defaults:
run:
shell: bash
strategy:
matrix:
include:
- target_region: us-east-2
target_cluster: prod-us-east-2-delta
deploy_link_proxy: true
deploy_legacy_scram_proxy: false
- target_region: us-west-2
target_cluster: prod-us-west-2-eta
deploy_link_proxy: false
deploy_legacy_scram_proxy: true
- target_region: eu-central-1
target_cluster: prod-eu-central-1-gamma
deploy_link_proxy: false
deploy_legacy_scram_proxy: false
- target_region: ap-southeast-1
target_cluster: prod-ap-southeast-1-epsilon
deploy_link_proxy: false
deploy_legacy_scram_proxy: false
environment:
name: prod-${{ matrix.target_region }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
ref: ${{ inputs.branch }}
- name: Configure environment
run: |
helm repo add neondatabase https://neondatabase.github.io/helm-charts
aws --region ${{ matrix.target_region }} eks update-kubeconfig --name ${{ matrix.target_cluster }}
- name: Re-deploy scram proxy
run: |
DOCKER_TAG=${{ inputs.dockerTag }}
helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
- name: Re-deploy link proxy
if: matrix.deploy_link_proxy
run: |
DOCKER_TAG=${{ inputs.dockerTag }}
helm upgrade neon-proxy-link neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-link.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
- name: Re-deploy legacy scram proxy
if: matrix.deploy_legacy_scram_proxy
run: |
DOCKER_TAG=${{ inputs.dockerTag }}
helm upgrade neon-proxy-scram-legacy neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram-legacy.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
deploy-storage-broker-prod-new:
runs-on: prod
container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
if: inputs.deployStorageBroker && inputs.disclamerAcknowledged
defaults:
run:
shell: bash
strategy:
matrix:
include:
- target_region: us-east-2
target_cluster: prod-us-east-2-delta
- target_region: us-west-2
target_cluster: prod-us-west-2-eta
- target_region: eu-central-1
target_cluster: prod-eu-central-1-gamma
- target_region: ap-southeast-1
target_cluster: prod-ap-southeast-1-epsilon
environment:
name: prod-${{ matrix.target_region }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
ref: ${{ inputs.branch }}
- name: Configure environment
run: |
helm repo add neondatabase https://neondatabase.github.io/helm-charts
aws --region ${{ matrix.target_region }} eks update-kubeconfig --name ${{ matrix.target_cluster }}
- name: Deploy storage-broker
run:
helm upgrade neon-storage-broker-lb neondatabase/neon-storage-broker --namespace neon-storage-broker-lb --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-storage-broker.yaml --set image.tag=${{ inputs.dockerTag }} --set settings.sentryUrl=${{ secrets.SENTRY_URL_BROKER }} --wait --timeout 5m0s

View File

@@ -1,154 +0,0 @@
name: Check neon with extra platform builds
on:
push:
branches:
- main
pull_request:
defaults:
run:
shell: bash -euxo pipefail {0}
concurrency:
# Allow only one workflow per any non-`main` branch.
group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
cancel-in-progress: true
env:
RUST_BACKTRACE: 1
COPT: '-Werror'
jobs:
check-macos-build:
if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')
timeout-minutes: 90
runs-on: macos-latest
env:
# Use release build only, to have less debug info around
# Hence keeping target/ (and general cache size) smaller
BUILD_TYPE: release
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 1
- name: Install macOS postgres dependencies
run: brew install flex bison openssl protobuf
- name: Set pg 14 revision for caching
id: pg_v14_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
- name: Set pg 15 revision for caching
id: pg_v15_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
- name: Cache postgres v14 build
id: cache_pg_14
uses: actions/cache@v3
with:
path: pg_install/v14
key: v1-${{ runner.os }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Cache postgres v15 build
id: cache_pg_15
uses: actions/cache@v3
with:
path: pg_install/v15
key: v1-${{ runner.os }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Set extra env for macOS
run: |
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
- name: Cache cargo deps
uses: actions/cache@v3
with:
path: |
~/.cargo/registry
!~/.cargo/registry/src
~/.cargo/git
target
key: v1-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust
- name: Build postgres v14
if: steps.cache_pg_14.outputs.cache-hit != 'true'
run: make postgres-v14 -j$(nproc)
- name: Build postgres v15
if: steps.cache_pg_15.outputs.cache-hit != 'true'
run: make postgres-v15 -j$(nproc)
- name: Build neon extensions
run: make neon-pg-ext -j$(nproc)
- name: Run cargo build
run: cargo build --all --release
- name: Check that no warnings are produced
run: ./run_clippy.sh
gather-rust-build-stats:
if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats')
runs-on: [ self-hosted, gen3, large ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
env:
BUILD_TYPE: release
# remove the cachepot wrapper and build without crate caches
RUSTC_WRAPPER: ""
# build with incremental compilation produce partial results
# so do not attempt to cache this build, also disable the incremental compilation
CARGO_INCREMENTAL: 0
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 1
# Some of our rust modules use FFI and need those to be checked
- name: Get postgres headers
run: make postgres-headers -j$(nproc)
- name: Produce the build stats
run: cargo build --all --release --timings
- name: Upload the build stats
id: upload-stats
env:
BUCKET: neon-github-public-dev
SHA: ${{ github.event.pull_request.head.sha || github.sha }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
run: |
REPORT_URL=https://${BUCKET}.s3.amazonaws.com/build-stats/${SHA}/${GITHUB_RUN_ID}/cargo-timing.html
aws s3 cp --only-show-errors ./target/cargo-timings/cargo-timing.html "s3://${BUCKET}/build-stats/${SHA}/${GITHUB_RUN_ID}/"
echo "report-url=${REPORT_URL}" >> $GITHUB_OUTPUT
- name: Publish build stats report
uses: actions/github-script@v6
env:
REPORT_URL: ${{ steps.upload-stats.outputs.report-url }}
SHA: ${{ github.event.pull_request.head.sha || github.sha }}
with:
script: |
const { REPORT_URL, SHA } = process.env
await github.rest.repos.createCommitStatus({
owner: context.repo.owner,
repo: context.repo.repo,
sha: `${SHA}`,
state: 'success',
target_url: `${REPORT_URL}`,
context: `Build stats (release)`,
})

View File

@@ -14,7 +14,7 @@ on:
concurrency: concurrency:
# Allow only one workflow per any non-`main` branch. # Allow only one workflow per any non-`main` branch.
group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }} group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
cancel-in-progress: true cancel-in-progress: true
jobs: jobs:
@@ -23,7 +23,6 @@ jobs:
runs-on: [ ubuntu-latest ] runs-on: [ ubuntu-latest ]
env: env:
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output TEST_OUTPUT: /tmp/test_output
steps: steps:
@@ -52,8 +51,8 @@ jobs:
id: create-neon-project id: create-neon-project
uses: ./.github/actions/neon-project-create uses: ./.github/actions/neon-project-create
with: with:
environment: staging
api_key: ${{ secrets.NEON_STAGING_API_KEY }} api_key: ${{ secrets.NEON_STAGING_API_KEY }}
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
- name: Run pytest - name: Run pytest
env: env:
@@ -64,7 +63,7 @@ jobs:
run: | run: |
# Test framework expects we have psql binary; # Test framework expects we have psql binary;
# but since we don't really need it in this test, let's mock it # but since we don't really need it in this test, let's mock it
mkdir -p "$POSTGRES_DISTRIB_DIR/v${DEFAULT_PG_VERSION}/bin" && touch "$POSTGRES_DISTRIB_DIR/v${DEFAULT_PG_VERSION}/bin/psql"; mkdir -p "$POSTGRES_DISTRIB_DIR/v14/bin" && touch "$POSTGRES_DISTRIB_DIR/v14/bin/psql";
./scripts/pytest \ ./scripts/pytest \
--junitxml=$TEST_OUTPUT/junit.xml \ --junitxml=$TEST_OUTPUT/junit.xml \
--tb=short \ --tb=short \
@@ -76,6 +75,7 @@ jobs:
if: ${{ always() }} if: ${{ always() }}
uses: ./.github/actions/neon-project-delete uses: ./.github/actions/neon-project-delete
with: with:
environment: staging
project_id: ${{ steps.create-neon-project.outputs.project_id }} project_id: ${{ steps.create-neon-project.outputs.project_id }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }} api_key: ${{ secrets.NEON_STAGING_API_KEY }}

View File

@@ -1,33 +0,0 @@
name: Create Release Branch
on:
schedule:
- cron: '0 10 * * 2'
jobs:
create_release_branch:
runs-on: [ubuntu-latest]
steps:
- name: Check out code
uses: actions/checkout@v3
with:
ref: main
- name: Get current date
id: date
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
- name: Create release branch
run: git checkout -b releases/${{ steps.date.outputs.date }}
- name: Push new branch
run: git push origin releases/${{ steps.date.outputs.date }}
- name: Create pull request into release
uses: thomaseizinger/create-pull-request@e3972219c86a56550fb70708d96800d8e24ba862 # 1.3.0
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
head: releases/${{ steps.date.outputs.date }}
base: release
title: Release ${{ steps.date.outputs.date }}

2
.gitmodules vendored
View File

@@ -1,7 +1,7 @@
[submodule "vendor/postgres-v14"] [submodule "vendor/postgres-v14"]
path = vendor/postgres-v14 path = vendor/postgres-v14
url = https://github.com/neondatabase/postgres.git url = https://github.com/neondatabase/postgres.git
branch = REL_14_STABLE_neon branch = main
[submodule "vendor/postgres-v15"] [submodule "vendor/postgres-v15"]
path = vendor/postgres-v15 path = vendor/postgres-v15
url = https://github.com/neondatabase/postgres.git url = https://github.com/neondatabase/postgres.git

View File

@@ -1,11 +0,0 @@
/compute_tools/ @neondatabase/control-plane
/control_plane/ @neondatabase/compute @neondatabase/storage
/libs/pageserver_api/ @neondatabase/compute @neondatabase/storage
/libs/postgres_ffi/ @neondatabase/compute
/libs/remote_storage/ @neondatabase/storage
/libs/safekeeper_api/ @neondatabase/safekeepers
/pageserver/ @neondatabase/compute @neondatabase/storage
/pgxn/ @neondatabase/compute
/proxy/ @neondatabase/control-plane
/safekeeper/ @neondatabase/safekeepers
/vendor/ @neondatabase/compute

3444
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,3 +1,14 @@
# 'named-profiles' feature was stabilized in cargo 1.57. This line makes the
# build work with older cargo versions.
#
# We have this because as of this writing, the latest cargo Debian package
# that's available is 1.56. (Confusingly, the Debian package version number
# is 0.57, whereas 'cargo --version' says 1.56.)
#
# See https://tracker.debian.org/pkg/cargo for the current status of the
# package. When that gets updated, we can remove this.
cargo-features = ["named-profiles"]
[workspace] [workspace]
members = [ members = [
"compute_tools", "compute_tools",
@@ -5,176 +16,15 @@ members = [
"pageserver", "pageserver",
"proxy", "proxy",
"safekeeper", "safekeeper",
"storage_broker",
"workspace_hack", "workspace_hack",
"trace",
"libs/*", "libs/*",
] ]
[workspace.package]
edition = "2021"
license = "Apache-2.0"
## All dependency versions, used in the project
[workspace.dependencies]
anyhow = { version = "1.0", features = ["backtrace"] }
async-stream = "0.3"
async-trait = "0.1"
atty = "0.2.14"
aws-config = { version = "0.51.0", default-features = false, features=["rustls"] }
aws-sdk-s3 = "0.21.0"
aws-smithy-http = "0.51.0"
aws-types = "0.55"
base64 = "0.13.0"
bincode = "1.3"
bindgen = "0.65"
bstr = "1.0"
byteorder = "1.4"
bytes = "1.0"
chrono = { version = "0.4", default-features = false, features = ["clock"] }
clap = { version = "4.0", features = ["derive"] }
close_fds = "0.3.2"
comfy-table = "6.1"
const_format = "0.2"
crc32c = "0.6"
crossbeam-utils = "0.8.5"
either = "1.8"
enum-map = "2.4.2"
enumset = "1.0.12"
fail = "0.5.0"
fs2 = "0.4.3"
futures = "0.3"
futures-core = "0.3"
futures-util = "0.3"
git-version = "0.3"
hashbrown = "0.13"
hashlink = "0.8.1"
hex = "0.4"
hex-literal = "0.4"
hmac = "0.12.1"
hostname = "0.3.1"
humantime = "2.1"
humantime-serde = "1.1.1"
hyper = { version = "0.14", features = ["http2", "tcp", "runtime", "http1"]}
hyper-tungstenite = "0.9"
itertools = "0.10"
jsonwebtoken = "8"
libc = "0.2"
md5 = "0.7.0"
memoffset = "0.8"
nix = "0.26"
notify = "5.0.0"
num_cpus = "1.15"
num-traits = "0.2.15"
once_cell = "1.13"
opentelemetry = "0.18.0"
opentelemetry-otlp = { version = "0.11.0", default_features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
opentelemetry-semantic-conventions = "0.10.0"
parking_lot = "0.12"
pin-project-lite = "0.2"
prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
prost = "0.11"
rand = "0.8"
regex = "1.4"
reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"] }
reqwest-tracing = { version = "0.4.0", features = ["opentelemetry_0_18"] }
reqwest-middleware = "0.2.0"
routerify = "3"
rpds = "0.13"
rustls = "0.20"
rustls-pemfile = "1"
rustls-split = "0.3"
scopeguard = "1.1"
sentry = { version = "0.30", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1"
serde_with = "2.0"
sha2 = "0.10.2"
signal-hook = "0.3"
socket2 = "0.5"
strum = "0.24"
strum_macros = "0.24"
svg_fmt = "0.4.1"
sync_wrapper = "0.1.2"
tar = "0.4"
test-context = "0.1"
thiserror = "1.0"
tls-listener = { version = "0.6", features = ["rustls", "hyper-h1"] }
tokio = { version = "1.17", features = ["macros"] }
tokio-io-timeout = "1.2.0"
tokio-postgres-rustls = "0.9.0"
tokio-rustls = "0.23"
tokio-stream = "0.1"
tokio-util = { version = "0.7", features = ["io"] }
toml = "0.7"
toml_edit = "0.19"
tonic = {version = "0.9", features = ["tls", "tls-roots"]}
tracing = "0.1"
tracing-opentelemetry = "0.18.0"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
url = "2.2"
uuid = { version = "1.2", features = ["v4", "serde"] }
walkdir = "2.3.2"
webpki-roots = "0.23"
x509-parser = "0.15"
percent-encoding = "1.0"
## TODO replace this with tracing
env_logger = "0.10"
log = "0.4"
## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
tokio-tar = { git = "https://github.com/neondatabase/tokio-tar.git", rev="404df61437de0feef49ba2ccdbdd94eb8ad6e142" }
## Other git libraries
heapless = { default-features=false, features=[], git = "https://github.com/japaric/heapless.git", rev = "644653bf3b831c6bb4963be2de24804acf5e5001" } # upstream release pending
## Local libraries
compute_api = { version = "0.1", path = "./libs/compute_api/" }
consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" }
metrics = { version = "0.1", path = "./libs/metrics/" }
pageserver_api = { version = "0.1", path = "./libs/pageserver_api/" }
postgres_backend = { version = "0.1", path = "./libs/postgres_backend/" }
postgres_connection = { version = "0.1", path = "./libs/postgres_connection/" }
postgres_ffi = { version = "0.1", path = "./libs/postgres_ffi/" }
pq_proto = { version = "0.1", path = "./libs/pq_proto/" }
remote_storage = { version = "0.1", path = "./libs/remote_storage/" }
safekeeper_api = { version = "0.1", path = "./libs/safekeeper_api" }
storage_broker = { version = "0.1", path = "./storage_broker/" } # Note: main broker code is inside the binary crate, so linking with the library shouldn't be heavy.
tenant_size_model = { version = "0.1", path = "./libs/tenant_size_model/" }
tracing-utils = { version = "0.1", path = "./libs/tracing-utils/" }
utils = { version = "0.1", path = "./libs/utils/" }
## Common library dependency
workspace_hack = { version = "0.1", path = "./workspace_hack/" }
## Build dependencies
criterion = "0.4"
rcgen = "0.10"
rstest = "0.17"
tempfile = "3.4"
tonic-build = "0.9"
# This is only needed for proxy's tests.
# TODO: we should probably fork `tokio-postgres-rustls` instead.
[patch.crates-io]
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
################# Binary contents sections
[profile.release] [profile.release]
# This is useful for profiling and, to some extent, debug. # This is useful for profiling and, to some extent, debug.
# Besides, debug info should not affect the performance. # Besides, debug info should not affect the performance.
debug = true debug = true
# disable debug symbols for all packages except this one to decrease binaries size
[profile.release.package."*"]
debug = false
[profile.release-line-debug] [profile.release-line-debug]
inherits = "release" inherits = "release"
debug = 1 # true = 2 = all symbols, 1 = line only debug = 1 # true = 2 = all symbols, 1 = line only
@@ -226,3 +76,9 @@ inherits = "release"
debug = false # true = 2 = all symbols, 1 = line only debug = false # true = 2 = all symbols, 1 = line only
opt-level = "z" opt-level = "z"
lto = true lto = true
# This is only needed for proxy's tests.
# TODO: we should probably fork `tokio-postgres-rustls` instead.
[patch.crates-io]
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }

View File

@@ -39,12 +39,12 @@ ARG CACHEPOT_BUCKET=neon-github-dev
COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server
COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server
COPY --chown=nonroot . . COPY . .
# Show build caching stats to check if it was used in the end. # Show build caching stats to check if it was used in the end.
# Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats. # Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
RUN set -e \ RUN set -e \
&& mold -run cargo build --bin pageserver --bin pageserver_binutils --bin draw_timeline_dir --bin safekeeper --bin storage_broker --bin proxy --locked --release \ && mold -run cargo build --bin pageserver --bin pageserver_binutils --bin draw_timeline_dir --bin safekeeper --bin proxy --locked --release \
&& cachepot -s && cachepot -s
# Build final image # Build final image
@@ -67,7 +67,6 @@ COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver
COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver_binutils /usr/local/bin COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver_binutils /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/draw_timeline_dir /usr/local/bin COPY --from=build --chown=neon:neon /home/nonroot/target/release/draw_timeline_dir /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/safekeeper /usr/local/bin COPY --from=build --chown=neon:neon /home/nonroot/target/release/safekeeper /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_broker /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy /usr/local/bin COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy /usr/local/bin
COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/ COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/
@@ -79,7 +78,7 @@ COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/
RUN mkdir -p /data/.neon/ && chown -R neon:neon /data/.neon/ \ RUN mkdir -p /data/.neon/ && chown -R neon:neon /data/.neon/ \
&& /usr/local/bin/pageserver -D /data/.neon/ --init \ && /usr/local/bin/pageserver -D /data/.neon/ --init \
-c "id=1234" \ -c "id=1234" \
-c "broker_endpoint='http://storage_broker:50051'" \ -c "broker_endpoints=['http://etcd:2379']" \
-c "pg_distrib_dir='/usr/local/'" \ -c "pg_distrib_dir='/usr/local/'" \
-c "listen_pg_addr='0.0.0.0:6400'" \ -c "listen_pg_addr='0.0.0.0:6400'" \
-c "listen_http_addr='0.0.0.0:9898'" -c "listen_http_addr='0.0.0.0:9898'"

View File

@@ -1,600 +0,0 @@
ARG PG_VERSION
ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
ARG IMAGE=rust
ARG TAG=pinned
#########################################################################################
#
# Layer "build-deps"
#
#########################################################################################
FROM debian:bullseye-slim AS build-deps
RUN apt update && \
apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \
zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libssl-dev \
libicu-dev libxslt1-dev liblz4-dev libzstd-dev
#########################################################################################
#
# Layer "pg-build"
# Build Postgres from the neon postgres repository.
#
#########################################################################################
FROM build-deps AS pg-build
ARG PG_VERSION
COPY vendor/postgres-${PG_VERSION} postgres
RUN cd postgres && \
export CONFIGURE_CMD="./configure CFLAGS='-O2 -g3' --enable-debug --with-openssl --with-uuid=ossp \
--with-icu --with-libxml --with-libxslt --with-lz4" && \
if [ "${PG_VERSION}" != "v14" ]; then \
# zstd is available only from PG15
export CONFIGURE_CMD="${CONFIGURE_CMD} --with-zstd"; \
fi && \
eval $CONFIGURE_CMD && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \
# Install headers
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install && \
# Enable some of contrib extensions
echo 'trusted = true' >> /usr/local/pgsql/share/extension/autoinc.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/bloom.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/earthdistance.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/insert_username.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/intagg.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/moddatetime.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_stat_statements.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrowlocks.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgstattuple.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/refint.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/xml2.control
#########################################################################################
#
# Layer "postgis-build"
# Build PostGIS from the upstream PostGIS mirror.
#
#########################################################################################
FROM build-deps AS postgis-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt update && \
apt install -y cmake gdal-bin libboost-dev libboost-thread-dev libboost-filesystem-dev \
libboost-system-dev libboost-iostreams-dev libboost-program-options-dev libboost-timer-dev \
libcgal-dev libgdal-dev libgmp-dev libmpfr-dev libopenscenegraph-dev libprotobuf-c-dev \
protobuf-c-compiler xsltproc
# SFCGAL > 1.3 requires CGAL > 5.2, Bullseye's libcgal-dev is 5.2
RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \
echo "4e39b3b2adada6254a7bdba6d297bb28e1a9835a9f879b74f37e2dab70203232 SFCGAL.tar.gz" | sha256sum --check && \
mkdir sfcgal-src && cd sfcgal-src && tar xvzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
cmake . && make -j $(getconf _NPROCESSORS_ONLN) && \
DESTDIR=/sfcgal make install -j $(getconf _NPROCESSORS_ONLN) && \
make clean && cp -R /sfcgal/* /
ENV PATH "/usr/local/pgsql/bin:$PATH"
RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.2.tar.gz -O postgis.tar.gz && \
echo "9a2a219da005a1730a39d1959a1c7cec619b1efb009b65be80ffc25bad299068 postgis.tar.gz" | sha256sum --check && \
mkdir postgis-src && cd postgis-src && tar xvzf ../postgis.tar.gz --strip-components=1 -C . && \
./autogen.sh && \
./configure --with-sfcgal=/usr/local/bin/sfcgal-config && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
cd extensions/postgis && \
make clean && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_raster.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_sfcgal.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_tiger_geocoder.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_topology.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/address_standardizer.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/address_standardizer_data_us.control
RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \
echo "cac297c07d34460887c4f3b522b35c470138760fe358e351ad1db4edb6ee306e pgrouting.tar.gz" | sha256sum --check && \
mkdir pgrouting-src && cd pgrouting-src && tar xvzf ../pgrouting.tar.gz --strip-components=1 -C . && \
mkdir build && \
cd build && \
cmake .. && \
make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrouting.control
#########################################################################################
#
# Layer "plv8-build"
# Build plv8
#
#########################################################################################
FROM build-deps AS plv8-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt update && \
apt install -y ninja-build python3-dev libncurses5 binutils clang
RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.5.tar.gz -O plv8.tar.gz && \
echo "1e108d5df639e4c189e1c5bdfa2432a521c126ca89e7e5a969d46899ca7bf106 plv8.tar.gz" | sha256sum --check && \
mkdir plv8-src && cd plv8-src && tar xvzf ../plv8.tar.gz --strip-components=1 -C . && \
export PATH="/usr/local/pgsql/bin:$PATH" && \
make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) install && \
rm -rf /plv8-* && \
find /usr/local/pgsql/ -name "plv8-*.so" | xargs strip && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plcoffee.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plls.control
#########################################################################################
#
# Layer "h3-pg-build"
# Build h3_pg
#
#########################################################################################
FROM build-deps AS h3-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
# packaged cmake is too old
RUN wget https://github.com/Kitware/CMake/releases/download/v3.24.2/cmake-3.24.2-linux-x86_64.sh \
-q -O /tmp/cmake-install.sh \
&& echo "739d372726cb23129d57a539ce1432453448816e345e1545f6127296926b6754 /tmp/cmake-install.sh" | sha256sum --check \
&& chmod u+x /tmp/cmake-install.sh \
&& /tmp/cmake-install.sh --skip-license --prefix=/usr/local/ \
&& rm /tmp/cmake-install.sh
RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \
echo "ec99f1f5974846bde64f4513cf8d2ea1b8d172d2218ab41803bf6a63532272bc h3.tar.gz" | sha256sum --check && \
mkdir h3-src && cd h3-src && tar xvzf ../h3.tar.gz --strip-components=1 -C . && \
mkdir build && cd build && \
cmake .. -DCMAKE_BUILD_TYPE=Release && \
make -j $(getconf _NPROCESSORS_ONLN) && \
DESTDIR=/h3 make install && \
cp -R /h3/usr / && \
rm -rf build
RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.2.tar.gz -O h3-pg.tar.gz && \
echo "c135aa45999b2ad1326d2537c1cadef96d52660838e4ca371706c08fdea1a956 h3-pg.tar.gz" | sha256sum --check && \
mkdir h3-pg-src && cd h3-pg-src && tar xvzf ../h3-pg.tar.gz --strip-components=1 -C . && \
export PATH="/usr/local/pgsql/bin:$PATH" && \
make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3_postgis.control
#########################################################################################
#
# Layer "unit-pg-build"
# compile unit extension
#
#########################################################################################
FROM build-deps AS unit-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -O postgresql-unit.tar.gz && \
echo "411d05beeb97e5a4abf17572bfcfbb5a68d98d1018918feff995f6ee3bb03e79 postgresql-unit.tar.gz" | sha256sum --check && \
mkdir postgresql-unit-src && cd postgresql-unit-src && tar xvzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
# unit extension's "create extension" script relies on absolute install path to fill some reference tables.
# We move the extension from '/usr/local/pgsql/' to '/usr/local/' after it is build. So we need to adjust the path.
# This one-liner removes pgsql/ part of the path.
# NOTE: Other extensions that rely on MODULEDIR variable after building phase will need the same fix.
find /usr/local/pgsql/share/extension/ -name "unit*.sql" -print0 | xargs -0 sed -i "s|pgsql/||g" && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/unit.control
#########################################################################################
#
# Layer "vector-pg-build"
# compile pgvector extension
#
#########################################################################################
FROM build-deps AS vector-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.4.0.tar.gz -O pgvector.tar.gz && \
echo "b76cf84ddad452cc880a6c8c661d137ddd8679c000a16332f4f03ecf6e10bcc8 pgvector.tar.gz" | sha256sum --check && \
mkdir pgvector-src && cd pgvector-src && tar xvzf ../pgvector.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/vector.control
#########################################################################################
#
# Layer "pgjwt-pg-build"
# compile pgjwt extension
#
#########################################################################################
FROM build-deps AS pgjwt-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
# 9742dab1b2f297ad3811120db7b21451bca2d3c9 made on 13/11/2021
RUN wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b21451bca2d3c9.tar.gz -O pgjwt.tar.gz && \
echo "cfdefb15007286f67d3d45510f04a6a7a495004be5b3aecb12cda667e774203f pgjwt.tar.gz" | sha256sum --check && \
mkdir pgjwt-src && cd pgjwt-src && tar xvzf ../pgjwt.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgjwt.control
#########################################################################################
#
# Layer "hypopg-pg-build"
# compile hypopg extension
#
#########################################################################################
FROM build-deps AS hypopg-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.3.1.tar.gz -O hypopg.tar.gz && \
echo "e7f01ee0259dc1713f318a108f987663d60f3041948c2ada57a94b469565ca8e hypopg.tar.gz" | sha256sum --check && \
mkdir hypopg-src && cd hypopg-src && tar xvzf ../hypopg.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/hypopg.control
#########################################################################################
#
# Layer "pg-hashids-pg-build"
# compile pg_hashids extension
#
#########################################################################################
FROM build-deps AS pg-hashids-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \
echo "74576b992d9277c92196dd8d816baa2cc2d8046fe102f3dcd7f3c3febed6822a pg_hashids.tar.gz" | sha256sum --check && \
mkdir pg_hashids-src && cd pg_hashids-src && tar xvzf ../pg_hashids.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_hashids.control
#########################################################################################
#
# Layer "rum-pg-build"
# compile rum extension
#
#########################################################################################
FROM build-deps AS rum-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O rum.tar.gz && \
echo "6ab370532c965568df6210bd844ac6ba649f53055e48243525b0b7e5c4d69a7d rum.tar.gz" | sha256sum --check && \
mkdir rum-src && cd rum-src && tar xvzf ../rum.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/rum.control
#########################################################################################
#
# Layer "pgtap-pg-build"
# compile pgTAP extension
#
#########################################################################################
FROM build-deps AS pgtap-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgtap.tar.gz && \
echo "9c7c3de67ea41638e14f06da5da57bac6f5bd03fea05c165a0ec862205a5c052 pgtap.tar.gz" | sha256sum --check && \
mkdir pgtap-src && cd pgtap-src && tar xvzf ../pgtap.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgtap.control
#########################################################################################
#
# Layer "ip4r-pg-build"
# compile ip4r extension
#
#########################################################################################
FROM build-deps AS ip4r-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.1.tar.gz -O ip4r.tar.gz && \
echo "78b9f0c1ae45c22182768fe892a32d533c82281035e10914111400bf6301c726 ip4r.tar.gz" | sha256sum --check && \
mkdir ip4r-src && cd ip4r-src && tar xvzf ../ip4r.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/ip4r.control
#########################################################################################
#
# Layer "prefix-pg-build"
# compile Prefix extension
#
#########################################################################################
FROM build-deps AS prefix-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.9.tar.gz -O prefix.tar.gz && \
echo "38d30a08d0241a8bbb8e1eb8f0152b385051665a8e621c8899e7c5068f8b511e prefix.tar.gz" | sha256sum --check && \
mkdir prefix-src && cd prefix-src && tar xvzf ../prefix.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/prefix.control
#########################################################################################
#
# Layer "hll-pg-build"
# compile hll extension
#
#########################################################################################
FROM build-deps AS hll-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.17.tar.gz -O hll.tar.gz && \
echo "9a18288e884f197196b0d29b9f178ba595b0dfc21fbf7a8699380e77fa04c1e9 hll.tar.gz" | sha256sum --check && \
mkdir hll-src && cd hll-src && tar xvzf ../hll.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/hll.control
#########################################################################################
#
# Layer "plpgsql-check-pg-build"
# compile plpgsql_check extension
#
#########################################################################################
FROM build-deps AS plpgsql-check-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.3.2.tar.gz -O plpgsql_check.tar.gz && \
echo "9d81167c4bbeb74eebf7d60147b21961506161addc2aee537f95ad8efeae427b plpgsql_check.tar.gz" | sha256sum --check && \
mkdir plpgsql_check-src && cd plpgsql_check-src && tar xvzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plpgsql_check.control
#########################################################################################
#
# Layer "timescaledb-pg-build"
# compile timescaledb extension
#
#########################################################################################
FROM build-deps AS timescaledb-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH "/usr/local/pgsql/bin:$PATH"
RUN apt-get update && \
apt-get install -y cmake && \
wget https://github.com/timescale/timescaledb/archive/refs/tags/2.10.1.tar.gz -O timescaledb.tar.gz && \
echo "6fca72a6ed0f6d32d2b3523951ede73dc5f9b0077b38450a029a5f411fdb8c73 timescaledb.tar.gz" | sha256sum --check && \
mkdir timescaledb-src && cd timescaledb-src && tar xvzf ../timescaledb.tar.gz --strip-components=1 -C . && \
./bootstrap -DSEND_TELEMETRY_DEFAULT:BOOL=OFF -DUSE_TELEMETRY:BOOL=OFF -DAPACHE_ONLY:BOOL=ON && \
cd build && \
make -j $(getconf _NPROCESSORS_ONLN) && \
make install -j $(getconf _NPROCESSORS_ONLN) && \
echo "trusted = true" >> /usr/local/pgsql/share/extension/timescaledb.control
#########################################################################################
#
# Layer "pg-hint-plan-pg-build"
# compile pg_hint_plan extension
#
#########################################################################################
FROM build-deps AS pg-hint-plan-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ARG PG_VERSION
ENV PATH "/usr/local/pgsql/bin:$PATH"
RUN case "${PG_VERSION}" in \
"v14") \
export PG_HINT_PLAN_VERSION=14_1_4_1 \
export PG_HINT_PLAN_CHECKSUM=c3501becf70ead27f70626bce80ea401ceac6a77e2083ee5f3ff1f1444ec1ad1 \
;; \
"v15") \
export PG_HINT_PLAN_VERSION=15_1_5_0 \
export PG_HINT_PLAN_CHECKSUM=564cbbf4820973ffece63fbf76e3c0af62c4ab23543142c7caaa682bc48918be \
;; \
*) \
echo "Export the valid PG_HINT_PLAN_VERSION variable" && exit 1 \
;; \
esac && \
wget https://github.com/ossc-db/pg_hint_plan/archive/refs/tags/REL${PG_HINT_PLAN_VERSION}.tar.gz -O pg_hint_plan.tar.gz && \
echo "${PG_HINT_PLAN_CHECKSUM} pg_hint_plan.tar.gz" | sha256sum --check && \
mkdir pg_hint_plan-src && cd pg_hint_plan-src && tar xvzf ../pg_hint_plan.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \
make install -j $(getconf _NPROCESSORS_ONLN) && \
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_hint_plan.control
#########################################################################################
#
# Layer "rust extensions"
# This layer is used to build `pgx` deps
#
#########################################################################################
FROM build-deps AS rust-extensions-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt-get update && \
apt-get install -y curl libclang-dev cmake && \
useradd -ms /bin/bash nonroot -b /home
ENV HOME=/home/nonroot
ENV PATH="/home/nonroot/.cargo/bin:/usr/local/pgsql/bin/:$PATH"
USER nonroot
WORKDIR /home/nonroot
ARG PG_VERSION
RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
chmod +x rustup-init && \
./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
rm rustup-init && \
cargo install --locked --version 0.7.3 cargo-pgx && \
/bin/bash -c 'cargo pgx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'
USER root
#########################################################################################
#
# Layer "pg-jsonschema-pg-build"
# Compile "pg_jsonschema" extension
#
#########################################################################################
FROM rust-extensions-build AS pg-jsonschema-pg-build
# caeab60d70b2fd3ae421ec66466a3abbb37b7ee6 made on 06/03/2023
# there is no release tag yet, but we need it due to the superuser fix in the control file, switch to git tag after release >= 0.1.5
RUN wget https://github.com/supabase/pg_jsonschema/archive/caeab60d70b2fd3ae421ec66466a3abbb37b7ee6.tar.gz -O pg_jsonschema.tar.gz && \
echo "54129ce2e7ee7a585648dbb4cef6d73f795d94fe72f248ac01119992518469a4 pg_jsonschema.tar.gz" | sha256sum --check && \
mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xvzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \
sed -i 's/pgx = "0.7.1"/pgx = { version = "0.7.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
cargo pgx install --release && \
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_jsonschema.control
#########################################################################################
#
# Layer "pg-graphql-pg-build"
# Compile "pg_graphql" extension
#
#########################################################################################
FROM rust-extensions-build AS pg-graphql-pg-build
# b4988843647450a153439be367168ed09971af85 made on 22/02/2023 (from remove-pgx-contrib-spiext branch)
# Currently pgx version bump to >= 0.7.2 causes "call to unsafe function" compliation errors in
# pgx-contrib-spiext. There is a branch that removes that dependency, so use it. It is on the
# same 1.1 version we've used before.
RUN wget https://github.com/yrashk/pg_graphql/archive/b4988843647450a153439be367168ed09971af85.tar.gz -O pg_graphql.tar.gz && \
echo "0c7b0e746441b2ec24187d0e03555faf935c2159e2839bddd14df6dafbc8c9bd pg_graphql.tar.gz" | sha256sum --check && \
mkdir pg_graphql-src && cd pg_graphql-src && tar xvzf ../pg_graphql.tar.gz --strip-components=1 -C . && \
sed -i 's/pgx = "~0.7.1"/pgx = { version = "0.7.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
sed -i 's/pgx-tests = "~0.7.1"/pgx-tests = "0.7.3"/g' Cargo.toml && \
cargo pgx install --release && \
# it's needed to enable extension because it uses untrusted C language
sed -i 's/superuser = false/superuser = true/g' /usr/local/pgsql/share/extension/pg_graphql.control && \
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_graphql.control
#########################################################################################
#
# Layer "pg-tiktoken-build"
# Compile "pg_tiktoken" extension
#
#########################################################################################
FROM rust-extensions-build AS pg-tiktoken-pg-build
# 801f84f08c6881c8aa30f405fafbf00eec386a72 made on 10/03/2023
RUN wget https://github.com/kelvich/pg_tiktoken/archive/801f84f08c6881c8aa30f405fafbf00eec386a72.tar.gz -O pg_tiktoken.tar.gz && \
echo "52f60ac800993a49aa8c609961842b611b6b1949717b69ce2ec9117117e16e4a pg_tiktoken.tar.gz" | sha256sum --check && \
mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xvzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \
cargo pgx install --release && \
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_tiktoken.control
#########################################################################################
#
# Layer "neon-pg-ext-build"
# compile neon extensions
#
#########################################################################################
FROM build-deps AS neon-pg-ext-build
COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=postgis-build /sfcgal/* /
COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=h3-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=h3-pg-build /h3/usr /
COPY --from=unit-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=vector-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pgjwt-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg-jsonschema-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg-graphql-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg-tiktoken-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=hypopg-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg-hashids-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=rum-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pgtap-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=ip4r-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=prefix-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=hll-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=plpgsql-check-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=timescaledb-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg-hint-plan-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY pgxn/ pgxn/
RUN make -j $(getconf _NPROCESSORS_ONLN) \
PG_CONFIG=/usr/local/pgsql/bin/pg_config \
-C pgxn/neon \
-s install && \
make -j $(getconf _NPROCESSORS_ONLN) \
PG_CONFIG=/usr/local/pgsql/bin/pg_config \
-C pgxn/neon_utils \
-s install
#########################################################################################
#
# Compile and run the Neon-specific `compute_ctl` binary
#
#########################################################################################
FROM $REPOSITORY/$IMAGE:$TAG AS compute-tools
USER nonroot
# Copy entire project to get Cargo.* files with proper dependencies for the whole project
COPY --chown=nonroot . .
RUN cd compute_tools && cargo build --locked --profile release-line-debug-size-lto
#########################################################################################
#
# Clean up postgres folder before inclusion
#
#########################################################################################
FROM neon-pg-ext-build AS postgres-cleanup-layer
COPY --from=neon-pg-ext-build /usr/local/pgsql /usr/local/pgsql
# Remove binaries from /bin/ that we won't use (or would manually copy & install otherwise)
RUN cd /usr/local/pgsql/bin && rm ecpg raster2pgsql shp2pgsql pgtopo_export pgtopo_import pgsql2shp
# Remove headers that we won't need anymore - we've completed installation of all extensions
RUN rm -r /usr/local/pgsql/include
# Remove static postgresql libraries - all compilation is finished, so we
# can now remove these files - they must be included in other binaries by now
# if they were to be used by other libraries.
RUN rm /usr/local/pgsql/lib/lib*.a
#########################################################################################
#
# Final layer
# Put it all together into the final image
#
#########################################################################################
FROM debian:bullseye-slim
# Add user postgres
RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
echo "postgres:test_console_pass" | chpasswd && \
mkdir /var/db/postgres/compute && mkdir /var/db/postgres/specs && \
chown -R postgres:postgres /var/db/postgres && \
chmod 0750 /var/db/postgres/compute && \
echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig && \
# create folder for file cache
mkdir -p -m 777 /neon/cache
COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
# Install:
# libreadline8 for psql
# libicu67, locales for collations (including ICU and plpgsql_check)
# liblz4-1 for lz4
# libossp-uuid16 for extension ossp-uuid
# libgeos, libgdal, libsfcgal1, libproj and libprotobuf-c1 for PostGIS
# libxml2, libxslt1.1 for xml2
# libzstd1 for zstd
RUN apt update && \
apt install --no-install-recommends -y \
gdb \
locales \
libicu67 \
liblz4-1 \
libreadline8 \
libossp-uuid16 \
libgeos-c1v5 \
libgdal28 \
libproj19 \
libprotobuf-c1 \
libsfcgal1 \
libxml2 \
libxslt1.1 \
libzstd1 \
procps && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
ENV LANG en_US.utf8
USER postgres
ENTRYPOINT ["/usr/local/bin/compute_ctl"]

218
Dockerfile.compute-node-v14 Normal file
View File

@@ -0,0 +1,218 @@
#
# This file is identical to the Dockerfile.compute-node-v15 file
# except for the version of Postgres that is built.
#
ARG TAG=pinned
#########################################################################################
#
# Layer "build-deps"
#
#########################################################################################
FROM debian:bullseye-slim AS build-deps
RUN apt update && \
apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \
zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config
#########################################################################################
#
# Layer "pg-build"
# Build Postgres from the neon postgres repository.
#
#########################################################################################
FROM build-deps AS pg-build
COPY vendor/postgres-v14 postgres
RUN cd postgres && \
./configure CFLAGS='-O2 -g3' --enable-debug --with-uuid=ossp && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \
# Install headers
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install
#########################################################################################
#
# Layer "postgis-build"
# Build PostGIS from the upstream PostGIS mirror.
#
#########################################################################################
FROM build-deps AS postgis-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt update && \
apt install -y gdal-bin libgdal-dev libprotobuf-c-dev protobuf-c-compiler xsltproc
RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.1.tar.gz && \
tar xvzf postgis-3.3.1.tar.gz && \
cd postgis-3.3.1 && \
./autogen.sh && \
export PATH="/usr/local/pgsql/bin:$PATH" && \
./configure && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
cd extensions/postgis && \
make clean && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_raster.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_tiger_geocoder.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_topology.control
#########################################################################################
#
# Layer "plv8-build"
# Build plv8
#
#########################################################################################
FROM build-deps AS plv8-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt update && \
apt install -y ninja-build python3-dev libc++-dev libc++abi-dev libncurses5 binutils
# https://github.com/plv8/plv8/issues/475:
# v8 uses gold for linking and sets `--thread-count=4` which breaks
# gold version <= 1.35 (https://sourceware.org/bugzilla/show_bug.cgi?id=23607)
# Install newer gold version manually as debian-testing binutils version updates
# libc version, which in turn breaks other extension built against non-testing libc.
RUN wget https://ftp.gnu.org/gnu/binutils/binutils-2.38.tar.gz && \
tar xvzf binutils-2.38.tar.gz && \
cd binutils-2.38 && \
cd libiberty && ./configure && make -j $(getconf _NPROCESSORS_ONLN) && \
cd ../bfd && ./configure && make bfdver.h && \
cd ../gold && ./configure && make -j $(getconf _NPROCESSORS_ONLN) && make install && \
cp /usr/local/bin/ld.gold /usr/bin/gold
# Sed is used to patch for https://github.com/plv8/plv8/issues/503
RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.4.tar.gz && \
tar xvzf v3.1.4.tar.gz && \
cd plv8-3.1.4 && \
export PATH="/usr/local/pgsql/bin:$PATH" && \
sed -i 's/MemoryContextAlloc(/MemoryContextAllocZero(/' plv8.cc && \
make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) install && \
rm -rf /plv8-* && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control
#########################################################################################
#
# Layer "h3-pg-build"
# Build h3_pg
#
#########################################################################################
FROM build-deps AS h3-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
# packaged cmake is too old
RUN wget https://github.com/Kitware/CMake/releases/download/v3.24.2/cmake-3.24.2-linux-x86_64.sh \
-q -O /tmp/cmake-install.sh \
&& chmod u+x /tmp/cmake-install.sh \
&& /tmp/cmake-install.sh --skip-license --prefix=/usr/local/ \
&& rm /tmp/cmake-install.sh
RUN wget https://github.com/uber/h3/archive/refs/tags/v4.0.1.tar.gz -O h3.tgz && \
tar xvzf h3.tgz && \
cd h3-4.0.1 && \
mkdir build && \
cd build && \
cmake .. -DCMAKE_BUILD_TYPE=Release && \
make -j $(getconf _NPROCESSORS_ONLN) && \
DESTDIR=/h3 make install && \
cp -R /h3/usr / && \
rm -rf build
RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.0.1.tar.gz -O h3-pg.tgz && \
tar xvzf h3-pg.tgz && \
cd h3-pg-4.0.1 && \
export PATH="/usr/local/pgsql/bin:$PATH" && \
make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3_postgis.control
#########################################################################################
#
# Layer "neon-pg-ext-build"
# compile neon extensions
#
#########################################################################################
FROM build-deps AS neon-pg-ext-build
COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=h3-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=h3-pg-build /h3/usr /
COPY pgxn/ pgxn/
RUN make -j $(getconf _NPROCESSORS_ONLN) \
PG_CONFIG=/usr/local/pgsql/bin/pg_config \
-C pgxn/neon \
-s install
#########################################################################################
#
# Compile and run the Neon-specific `compute_ctl` binary
#
#########################################################################################
FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:$TAG AS compute-tools
USER nonroot
# Copy entire project to get Cargo.* files with proper dependencies for the whole project
COPY --chown=nonroot . .
RUN cd compute_tools && cargo build --locked --profile release-line-debug-size-lto
#########################################################################################
#
# Clean up postgres folder before inclusion
#
#########################################################################################
FROM neon-pg-ext-build AS postgres-cleanup-layer
COPY --from=neon-pg-ext-build /usr/local/pgsql /usr/local/pgsql
# Remove binaries from /bin/ that we won't use (or would manually copy & install otherwise)
RUN cd /usr/local/pgsql/bin && rm ecpg raster2pgsql shp2pgsql pgtopo_export pgtopo_import pgsql2shp
# Remove headers that we won't need anymore - we've completed installation of all extensions
RUN rm -r /usr/local/pgsql/include
# Remove now-useless PGXS src infrastructure
RUN rm -r /usr/local/pgsql/lib/pgxs/src
# Remove static postgresql libraries - all compilation is finished, so we
# can now remove these files - they must be included in other binaries by now
# if they were to be used by other libraries.
RUN rm /usr/local/pgsql/lib/lib*.a
#########################################################################################
#
# Final layer
# Put it all together into the final image
#
#########################################################################################
FROM debian:bullseye-slim
# Add user postgres
RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
echo "postgres:test_console_pass" | chpasswd && \
mkdir /var/db/postgres/compute && mkdir /var/db/postgres/specs && \
chown -R postgres:postgres /var/db/postgres && \
chmod 0750 /var/db/postgres/compute && \
echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig
COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
# Install:
# libreadline8 for psql
# libossp-uuid16 for extension ossp-uuid
# libgeos, libgdal, libproj and libprotobuf-c1 for PostGIS
#
# Lastly, link compute_ctl into zenith_ctl while we're at it,
# so that we don't need to put this in another layer.
RUN apt update && \
apt install --no-install-recommends -y \
libreadline8 \
libossp-uuid16 \
libgeos-c1v5 \
libgdal28 \
libproj19 \
libprotobuf-c1 && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
ln /usr/local/bin/compute_ctl /usr/local/bin/zenith_ctl
USER postgres
ENTRYPOINT ["/usr/local/bin/compute_ctl"]

218
Dockerfile.compute-node-v15 Normal file
View File

@@ -0,0 +1,218 @@
#
# This file is identical to the Dockerfile.compute-node-v14 file
# except for the version of Postgres that is built.
#
ARG TAG=pinned
#########################################################################################
#
# Layer "build-deps"
#
#########################################################################################
FROM debian:bullseye-slim AS build-deps
RUN apt update && \
apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \
zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config
#########################################################################################
#
# Layer "pg-build"
# Build Postgres from the neon postgres repository.
#
#########################################################################################
FROM build-deps AS pg-build
COPY vendor/postgres-v15 postgres
RUN cd postgres && \
./configure CFLAGS='-O2 -g3' --enable-debug --with-uuid=ossp && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \
# Install headers
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install
#########################################################################################
#
# Layer "postgis-build"
# Build PostGIS from the upstream PostGIS mirror.
#
#########################################################################################
FROM build-deps AS postgis-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt update && \
apt install -y gdal-bin libgdal-dev libprotobuf-c-dev protobuf-c-compiler xsltproc
RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.1.tar.gz && \
tar xvzf postgis-3.3.1.tar.gz && \
cd postgis-3.3.1 && \
./autogen.sh && \
export PATH="/usr/local/pgsql/bin:$PATH" && \
./configure && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
cd extensions/postgis && \
make clean && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_raster.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_tiger_geocoder.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_topology.control
#########################################################################################
#
# Layer "plv8-build"
# Build plv8
#
#########################################################################################
FROM build-deps AS plv8-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt update && \
apt install -y ninja-build python3-dev libc++-dev libc++abi-dev libncurses5 binutils
# https://github.com/plv8/plv8/issues/475:
# v8 uses gold for linking and sets `--thread-count=4` which breaks
# gold version <= 1.35 (https://sourceware.org/bugzilla/show_bug.cgi?id=23607)
# Install newer gold version manually as debian-testing binutils version updates
# libc version, which in turn breaks other extension built against non-testing libc.
RUN wget https://ftp.gnu.org/gnu/binutils/binutils-2.38.tar.gz && \
tar xvzf binutils-2.38.tar.gz && \
cd binutils-2.38 && \
cd libiberty && ./configure && make -j $(getconf _NPROCESSORS_ONLN) && \
cd ../bfd && ./configure && make bfdver.h && \
cd ../gold && ./configure && make -j $(getconf _NPROCESSORS_ONLN) && make install && \
cp /usr/local/bin/ld.gold /usr/bin/gold
# Sed is used to patch for https://github.com/plv8/plv8/issues/503
RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.4.tar.gz && \
tar xvzf v3.1.4.tar.gz && \
cd plv8-3.1.4 && \
export PATH="/usr/local/pgsql/bin:$PATH" && \
sed -i 's/MemoryContextAlloc(/MemoryContextAllocZero(/' plv8.cc && \
make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) install && \
rm -rf /plv8-* && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control
#########################################################################################
#
# Layer "h3-pg-build"
# Build h3_pg
#
#########################################################################################
FROM build-deps AS h3-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
# packaged cmake is too old
RUN wget https://github.com/Kitware/CMake/releases/download/v3.24.2/cmake-3.24.2-linux-x86_64.sh \
-q -O /tmp/cmake-install.sh \
&& chmod u+x /tmp/cmake-install.sh \
&& /tmp/cmake-install.sh --skip-license --prefix=/usr/local/ \
&& rm /tmp/cmake-install.sh
RUN wget https://github.com/uber/h3/archive/refs/tags/v4.0.1.tar.gz -O h3.tgz && \
tar xvzf h3.tgz && \
cd h3-4.0.1 && \
mkdir build && \
cd build && \
cmake .. -DCMAKE_BUILD_TYPE=Release && \
make -j $(getconf _NPROCESSORS_ONLN) && \
DESTDIR=/h3 make install && \
cp -R /h3/usr / && \
rm -rf build
RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.0.1.tar.gz -O h3-pg.tgz && \
tar xvzf h3-pg.tgz && \
cd h3-pg-4.0.1 && \
export PATH="/usr/local/pgsql/bin:$PATH" && \
make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3_postgis.control
#########################################################################################
#
# Layer "neon-pg-ext-build"
# compile neon extensions
#
#########################################################################################
FROM build-deps AS neon-pg-ext-build
COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=h3-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=h3-pg-build /h3/usr /
COPY pgxn/ pgxn/
RUN make -j $(getconf _NPROCESSORS_ONLN) \
PG_CONFIG=/usr/local/pgsql/bin/pg_config \
-C pgxn/neon \
-s install
#########################################################################################
#
# Compile and run the Neon-specific `compute_ctl` binary
#
#########################################################################################
FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:$TAG AS compute-tools
USER nonroot
# Copy entire project to get Cargo.* files with proper dependencies for the whole project
COPY --chown=nonroot . .
RUN cd compute_tools && cargo build --locked --profile release-line-debug-size-lto
#########################################################################################
#
# Clean up postgres folder before inclusion
#
#########################################################################################
FROM neon-pg-ext-build AS postgres-cleanup-layer
COPY --from=neon-pg-ext-build /usr/local/pgsql /usr/local/pgsql
# Remove binaries from /bin/ that we won't use (or would manually copy & install otherwise)
RUN cd /usr/local/pgsql/bin && rm ecpg raster2pgsql shp2pgsql pgtopo_export pgtopo_import pgsql2shp
# Remove headers that we won't need anymore - we've completed installation of all extensions
RUN rm -r /usr/local/pgsql/include
# Remove now-useless PGXS src infrastructure
RUN rm -r /usr/local/pgsql/lib/pgxs/src
# Remove static postgresql libraries - all compilation is finished, so we
# can now remove these files - they must be included in other binaries by now
# if they were to be used by other libraries.
RUN rm /usr/local/pgsql/lib/lib*.a
#########################################################################################
#
# Final layer
# Put it all together into the final image
#
#########################################################################################
FROM debian:bullseye-slim
# Add user postgres
RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
echo "postgres:test_console_pass" | chpasswd && \
mkdir /var/db/postgres/compute && mkdir /var/db/postgres/specs && \
chown -R postgres:postgres /var/db/postgres && \
chmod 0750 /var/db/postgres/compute && \
echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig
COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
# Install:
# libreadline8 for psql
# libossp-uuid16 for extension ossp-uuid
# libgeos, libgdal, libproj and libprotobuf-c1 for PostGIS
#
# Lastly, link compute_ctl into zenith_ctl while we're at it,
# so that we don't need to put this in another layer.
RUN apt update && \
apt install --no-install-recommends -y \
libreadline8 \
libossp-uuid16 \
libgeos-c1v5 \
libgdal28 \
libproj19 \
libprotobuf-c1 && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
ln /usr/local/bin/compute_ctl /usr/local/bin/zenith_ctl
USER postgres
ENTRYPOINT ["/usr/local/bin/compute_ctl"]

View File

@@ -0,0 +1,88 @@
#
# Legacy version of the Dockerfile for the compute node.
# Used by e2e CI. Building Dockerfile.compute-node will take
# unreasonable ammount of time without v2 runners.
#
# TODO: remove once cloud repo CI is moved to v2 runners.
#
# Allow specifiyng different compute-tools tag and image repo, so we are
# able to use different images
ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
ARG IMAGE=compute-tools
ARG TAG=latest
#
# Image with pre-built tools
#
FROM $REPOSITORY/$IMAGE:$TAG AS compute-deps
# Only to get ready compute_ctl binary as deppendency
#
# Image with Postgres build deps
#
FROM debian:bullseye-slim AS build-deps
RUN apt-get update && apt-get -yq install automake libtool build-essential bison flex libreadline-dev zlib1g-dev libxml2-dev \
libcurl4-openssl-dev libossp-uuid-dev
#
# Image with built Postgres
#
FROM build-deps AS pg-build
# Add user postgres
RUN adduser postgres
RUN mkdir /pg && chown postgres:postgres /pg
# Copy source files
# version 14 is default for now
COPY ./vendor/postgres-v14 /pg/
COPY ./pgxn /pg/
# Build and install Postgres locally
RUN mkdir /pg/compute_build && cd /pg/compute_build && \
../configure CFLAGS='-O2 -g3' --prefix=$(pwd)/postgres_bin --enable-debug --with-uuid=ossp && \
# Install main binaries and contribs
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \
# Install headers
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install
# Install neon contrib
RUN make MAKELEVEL=0 PG_CONFIG=/pg/compute_build/postgres_bin/bin/pg_config -j $(getconf _NPROCESSORS_ONLN) -C /pg/neon install
USER postgres
WORKDIR /pg
#
# Final compute node image to be exported
#
FROM debian:bullseye-slim
# libreadline-dev is required to run psql
RUN apt-get update && apt-get -yq install libreadline-dev libossp-uuid-dev
# Add user postgres
RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
echo "postgres:test_console_pass" | chpasswd && \
mkdir /var/db/postgres/compute && mkdir /var/db/postgres/specs && \
chown -R postgres:postgres /var/db/postgres && \
chmod 0750 /var/db/postgres/compute
# Copy ready Postgres binaries
COPY --from=pg-build /pg/compute_build/postgres_bin /usr/local
# Copy binaries from compute-tools
COPY --from=compute-deps /usr/local/bin/compute_ctl /usr/local/bin/compute_ctl
# XXX: temporary symlink for compatibility with old control-plane
RUN ln -s /usr/local/bin/compute_ctl /usr/local/bin/zenith_ctl
# Add postgres shared objects to the search path
RUN echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig
USER postgres
ENTRYPOINT ["/usr/local/bin/compute_ctl"]

View File

@@ -1,70 +0,0 @@
# Note: this file *mostly* just builds on Dockerfile.compute-node
ARG SRC_IMAGE
ARG VM_INFORMANT_VERSION=v0.1.14
# on libcgroup update, make sure to check bootstrap.sh for changes
ARG LIBCGROUP_VERSION=v2.0.3
# Pull VM informant, to copy from later
FROM neondatabase/vm-informant:$VM_INFORMANT_VERSION as informant
# Build cgroup-tools
#
# At time of writing (2023-03-14), debian bullseye has a version of cgroup-tools (technically
# libcgroup) that doesn't support cgroup v2 (version 0.41-11). Unfortunately, the vm-informant
# requires cgroup v2, so we'll build cgroup-tools ourselves.
FROM debian:bullseye-slim as libcgroup-builder
ARG LIBCGROUP_VERSION
RUN set -exu \
&& apt update \
&& apt install --no-install-recommends -y \
git \
ca-certificates \
automake \
cmake \
make \
gcc \
byacc \
flex \
libtool \
libpam0g-dev \
&& git clone --depth 1 -b $LIBCGROUP_VERSION https://github.com/libcgroup/libcgroup \
&& INSTALL_DIR="/libcgroup-install" \
&& mkdir -p "$INSTALL_DIR/bin" "$INSTALL_DIR/include" \
&& cd libcgroup \
# extracted from bootstrap.sh, with modified flags:
&& (test -d m4 || mkdir m4) \
&& autoreconf -fi \
&& rm -rf autom4te.cache \
&& CFLAGS="-O3" ./configure --prefix="$INSTALL_DIR" --sysconfdir=/etc --localstatedir=/var --enable-opaque-hierarchy="name=systemd" \
# actually build the thing...
&& make install
# Combine, starting from non-VM compute node image.
FROM $SRC_IMAGE as base
# Temporarily set user back to root so we can run adduser, set inittab
USER root
RUN adduser vm-informant --disabled-password --no-create-home
RUN set -e \
&& rm -f /etc/inittab \
&& touch /etc/inittab
RUN set -e \
&& echo "::sysinit:cgconfigparser -l /etc/cgconfig.conf -s 1664" >> /etc/inittab \
&& CONNSTR="dbname=neondb user=cloud_admin sslmode=disable" \
&& ARGS="--auto-restart --cgroup=neon-postgres --pgconnstr=\"$CONNSTR\"" \
&& echo "::respawn:su vm-informant -c '/usr/local/bin/vm-informant $ARGS'" >> /etc/inittab
USER postgres
ADD vm-cgconfig.conf /etc/cgconfig.conf
COPY --from=informant /usr/bin/vm-informant /usr/local/bin/vm-informant
COPY --from=libcgroup-builder /libcgroup-install/bin/* /usr/bin/
COPY --from=libcgroup-builder /libcgroup-install/lib/* /usr/lib/
COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/
ENTRYPOINT ["/usr/sbin/cgexec", "-g", "*:neon-postgres", "/usr/local/bin/compute_ctl"]

221
Makefile
View File

@@ -20,18 +20,18 @@ else
$(error Bad build type '$(BUILD_TYPE)', see Makefile for options) $(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
endif endif
# Seccomp BPF is only available for Linux
UNAME_S := $(shell uname -s) UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux) ifeq ($(UNAME_S),Linux)
# Seccomp BPF is only available for Linux
PG_CONFIGURE_OPTS += --with-libseccomp PG_CONFIGURE_OPTS += --with-libseccomp
else ifeq ($(UNAME_S),Darwin) endif
# macOS with brew-installed openssl requires explicit paths
# It can be configured with OPENSSL_PREFIX variable # macOS with brew-installed openssl requires explicit paths
OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3) # It can be configured with OPENSSL_PREFIX variable
PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib UNAME_S := $(shell uname -s)
# macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure ifeq ($(UNAME_S),Darwin)
# brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
EXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/: PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
endif endif
# Use -C option so that when PostgreSQL "make install" installs the # Use -C option so that when PostgreSQL "make install" installs the
@@ -61,129 +61,130 @@ all: neon postgres neon-pg-ext
# #
# The 'postgres_ffi' depends on the Postgres headers. # The 'postgres_ffi' depends on the Postgres headers.
.PHONY: neon .PHONY: neon
neon: postgres-headers neon: postgres-v14-headers postgres-v15-headers
+@echo "Compiling Neon" +@echo "Compiling Neon"
$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS) $(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS)
### PostgreSQL parts ### PostgreSQL parts
# Some rules are duplicated for Postgres v14 and 15. We may want to refactor # The rules are duplicated for Postgres v14 and 15. We may want to refactor
# to avoid the duplication in the future, but it's tolerable for now. # to avoid the duplication in the future, but it's tolerable for now.
# #
$(POSTGRES_INSTALL_DIR)/build/%/config.status: $(POSTGRES_INSTALL_DIR)/build/v14/config.status:
+@echo "Configuring Postgres $* build" +@echo "Configuring Postgres v14 build"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/$* mkdir -p $(POSTGRES_INSTALL_DIR)/build/v14
(cd $(POSTGRES_INSTALL_DIR)/build/$* && \ (cd $(POSTGRES_INSTALL_DIR)/build/v14 && \
env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure \ $(ROOT_PROJECT_DIR)/vendor/postgres-v14/configure CFLAGS='$(PG_CFLAGS)' \
CFLAGS='$(PG_CFLAGS)' \
$(PG_CONFIGURE_OPTS) \ $(PG_CONFIGURE_OPTS) \
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/$* > configure.log) --prefix=$(abspath $(POSTGRES_INSTALL_DIR))/v14 > configure.log)
$(POSTGRES_INSTALL_DIR)/build/v15/config.status:
+@echo "Configuring Postgres v15 build"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/v15
(cd $(POSTGRES_INSTALL_DIR)/build/v15 && \
$(ROOT_PROJECT_DIR)/vendor/postgres-v15/configure CFLAGS='$(PG_CFLAGS)' \
$(PG_CONFIGURE_OPTS) \
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/v15 > configure.log)
# nicer alias to run 'configure' # nicer alias to run 'configure'
# Note: I've been unable to use templates for this part of our configuration. .PHONY: postgres-v14-configure
# I'm not sure why it wouldn't work, but this is the only place (apart from postgres-v14-configure: $(POSTGRES_INSTALL_DIR)/build/v14/config.status
# the "build-all-versions" entry points) where direct mention of PostgreSQL
# versions is used. .PHONY: postgres-v15-configure
.PHONY: postgres-configure-v15 postgres-v15-configure: $(POSTGRES_INSTALL_DIR)/build/v15/config.status
postgres-configure-v15: $(POSTGRES_INSTALL_DIR)/build/v15/config.status
.PHONY: postgres-configure-v14
postgres-configure-v14: $(POSTGRES_INSTALL_DIR)/build/v14/config.status
# Install the PostgreSQL header files into $(POSTGRES_INSTALL_DIR)/<version>/include # Install the PostgreSQL header files into $(POSTGRES_INSTALL_DIR)/<version>/include
.PHONY: postgres-headers-% .PHONY: postgres-v14-headers
postgres-headers-%: postgres-configure-% postgres-v14-headers: postgres-v14-configure
+@echo "Installing PostgreSQL $* headers" +@echo "Installing PostgreSQL v14 headers"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/include MAKELEVEL=0 install $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/src/include MAKELEVEL=0 install
.PHONY: postgres-v15-headers
postgres-v15-headers: postgres-v15-configure
+@echo "Installing PostgreSQL v15 headers"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/src/include MAKELEVEL=0 install
# Compile and install PostgreSQL # Compile and install PostgreSQL
.PHONY: postgres-% .PHONY: postgres-v14
postgres-%: postgres-configure-% \ postgres-v14: postgres-v14-configure \
postgres-headers-% # to prevent `make install` conflicts with neon's `postgres-headers` postgres-v14-headers # to prevent `make install` conflicts with neon's `postgres-headers`
+@echo "Compiling PostgreSQL $*" +@echo "Compiling PostgreSQL v14"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$* MAKELEVEL=0 install $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14 MAKELEVEL=0 install
+@echo "Compiling libpq $*" +@echo "Compiling libpq v14"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/interfaces/libpq install $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/src/interfaces/libpq install
+@echo "Compiling pg_prewarm $*" +@echo "Compiling pg_buffercache v14"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_prewarm install $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pg_buffercache install
+@echo "Compiling pg_buffercache $*" +@echo "Compiling pageinspect v14"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_buffercache install $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pageinspect install
+@echo "Compiling pageinspect $*"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect install
.PHONY: postgres-clean-% .PHONY: postgres-v15
postgres-clean-%: postgres-v15: postgres-v15-configure \
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$* MAKELEVEL=0 clean postgres-v15-headers # to prevent `make install` conflicts with neon's `postgres-headers`
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_buffercache clean +@echo "Compiling PostgreSQL v15"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect clean $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15 MAKELEVEL=0 install
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/interfaces/libpq clean +@echo "Compiling libpq v15"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/src/interfaces/libpq install
.PHONY: neon-pg-ext-% +@echo "Compiling pg_buffercache v15"
neon-pg-ext-%: postgres-% $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pg_buffercache install
+@echo "Compiling neon $*" +@echo "Compiling pageinspect v15"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-$* $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pageinspect install
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install
+@echo "Compiling neon_walredo $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile install
+@echo "Compiling neon_test_utils $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile install
+@echo "Compiling neon_utils $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-utils-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile install
.PHONY: neon-pg-ext-clean-%
neon-pg-ext-clean-%:
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
-C $(POSTGRES_INSTALL_DIR)/build/neon-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile clean
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
-C $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile clean
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
-C $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile clean
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile clean
.PHONY: neon-pg-ext
neon-pg-ext: \
neon-pg-ext-v14 \
neon-pg-ext-v15
.PHONY: neon-pg-ext-clean
neon-pg-ext-clean: \
neon-pg-ext-clean-v14 \
neon-pg-ext-clean-v15
# shorthand to build all Postgres versions # shorthand to build all Postgres versions
.PHONY: postgres postgres: postgres-v14 postgres-v15
postgres: \
postgres-v14 \
postgres-v15
.PHONY: postgres-headers .PHONY: postgres-v14-clean
postgres-headers: \ postgres-v14-clean:
postgres-headers-v14 \ $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14 MAKELEVEL=0 clean
postgres-headers-v15 $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pg_buffercache clean
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pageinspect clean
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/src/interfaces/libpq clean
.PHONY: postgres-clean .PHONY: postgres-v15-clean
postgres-clean: \ postgres-v15-clean:
postgres-clean-v14 \ $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15 MAKELEVEL=0 clean
postgres-clean-v15 $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pg_buffercache clean
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pageinspect clean
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/src/interfaces/libpq clean
neon-pg-ext-v14: postgres-v14
+@echo "Compiling neon v14"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-v14
(cd $(POSTGRES_INSTALL_DIR)/build/neon-v14 && \
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v14/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install)
+@echo "Compiling neon_test_utils" v14
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-v14
(cd $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-v14 && \
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v14/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile install)
neon-pg-ext-v15: postgres-v15
+@echo "Compiling neon v15"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-v15
(cd $(POSTGRES_INSTALL_DIR)/build/neon-v15 && \
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v15/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install)
+@echo "Compiling neon_test_utils" v15
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-v15
(cd $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-v15 && \
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v15/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile install)
.PHONY: neon-pg-ext-clean
$(MAKE) -C $(ROOT_PROJECT_DIR)/pgxn/neon clean
$(MAKE) -C $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils clean
neon-pg-ext: neon-pg-ext-v14 neon-pg-ext-v15
postgres-headers: postgres-v14-headers postgres-v15-headers
postgres-clean: postgres-v14-clean postgres-v15-clean
# This doesn't remove the effects of 'configure'. # This doesn't remove the effects of 'configure'.
.PHONY: clean .PHONY: clean
clean: postgres-clean neon-pg-ext-clean clean:
cd $(POSTGRES_INSTALL_DIR)/build/v14 && $(MAKE) clean
cd $(POSTGRES_INSTALL_DIR)/build/v15 && $(MAKE) clean
$(CARGO_CMD_PREFIX) cargo clean $(CARGO_CMD_PREFIX) cargo clean
cd pgxn/neon && $(MAKE) clean
cd pgxn/neon_test_utils && $(MAKE) clean
# This removes everything # This removes everything
.PHONY: distclean .PHONY: distclean

107
README.md
View File

@@ -2,20 +2,29 @@
Neon is a serverless open-source alternative to AWS Aurora Postgres. It separates storage and compute and substitutes the PostgreSQL storage layer by redistributing data across a cluster of nodes. Neon is a serverless open-source alternative to AWS Aurora Postgres. It separates storage and compute and substitutes the PostgreSQL storage layer by redistributing data across a cluster of nodes.
The project used to be called "Zenith". Many of the commands and code comments
still refer to "zenith", but we are in the process of renaming things.
## Quick start ## Quick start
Try the [Neon Free Tier](https://neon.tech/docs/introduction/technical-preview-free-tier/) to create a serverless Postgres instance. Then connect to it with your preferred Postgres client (psql, dbeaver, etc) or use the online [SQL Editor](https://neon.tech/docs/get-started-with-neon/query-with-neon-sql-editor/). See [Connect from any application](https://neon.tech/docs/connect/connect-from-any-app/) for connection instructions. [Join the waitlist](https://neon.tech/) for our free tier to receive your serverless postgres instance. Then connect to it with your preferred postgres client (psql, dbeaver, etc) or use the online SQL editor.
Alternatively, compile and run the project [locally](#running-local-installation). Alternatively, compile and run the project [locally](#running-local-installation).
## Architecture overview ## Architecture overview
A Neon installation consists of compute nodes and the Neon storage engine. Compute nodes are stateless PostgreSQL nodes backed by the Neon storage engine. A Neon installation consists of compute nodes and a Neon storage engine.
Compute nodes are stateless PostgreSQL nodes backed by the Neon storage engine.
The Neon storage engine consists of two major components: The Neon storage engine consists of two major components:
- Pageserver. Scalable storage backend for the compute nodes. - Pageserver. Scalable storage backend for the compute nodes.
- Safekeepers. The safekeepers form a redundant WAL service that received WAL from the compute node, and stores it durably until it has been processed by the pageserver and uploaded to cloud storage. - WAL service. The service receives WAL from the compute node and ensures that it is stored durably.
See developer documentation in [/docs/SUMMARY.md](/docs/SUMMARY.md) for more information. Pageserver consists of:
- Repository - Neon storage implementation.
- WAL receiver - service that receives WAL from WAL service and stores it in the repository.
- Page service - service that communicates with compute nodes and responds with pages from the repository.
- WAL redo - service that builds pages from base images and WAL records on Page service request
## Running local installation ## Running local installation
@@ -26,21 +35,13 @@ See developer documentation in [/docs/SUMMARY.md](/docs/SUMMARY.md) for more inf
* On Ubuntu or Debian, this set of packages should be sufficient to build the code: * On Ubuntu or Debian, this set of packages should be sufficient to build the code:
```bash ```bash
apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libseccomp-dev \ apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libseccomp-dev \
libssl-dev clang pkg-config libpq-dev cmake postgresql-client protobuf-compiler libssl-dev clang pkg-config libpq-dev etcd cmake postgresql-client
``` ```
* On Fedora, these packages are needed: * On Fedora, these packages are needed:
```bash ```bash
dnf install flex bison readline-devel zlib-devel openssl-devel \ dnf install flex bison readline-devel zlib-devel openssl-devel \
libseccomp-devel perl clang cmake postgresql postgresql-contrib protobuf-compiler \ libseccomp-devel perl clang cmake etcd postgresql postgresql-contrib
protobuf-devel
``` ```
* On Arch based systems, these packages are needed:
```bash
pacman -S base-devel readline zlib libseccomp openssl clang \
postgresql-libs cmake postgresql protobuf
```
Building Neon requires 3.15+ version of `protoc` (protobuf-compiler). If your distribution provides an older version, you can install a newer version from [here](https://github.com/protocolbuffers/protobuf/releases).
2. [Install Rust](https://www.rust-lang.org/tools/install) 2. [Install Rust](https://www.rust-lang.org/tools/install)
``` ```
@@ -48,14 +49,11 @@ Building Neon requires 3.15+ version of `protoc` (protobuf-compiler). If your di
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
``` ```
#### Installing dependencies on macOS (12.3.1) #### Installing dependencies on OSX (12.3.1)
1. Install XCode and dependencies 1. Install XCode and dependencies
``` ```
xcode-select --install xcode-select --install
brew install protobuf openssl flex bison brew install protobuf etcd openssl
# add openssl to PATH, required for ed25519 keys generation in neon_local
echo 'export PATH="$(brew --prefix openssl)/bin:$PATH"' >> ~/.zshrc
``` ```
2. [Install Rust](https://www.rust-lang.org/tools/install) 2. [Install Rust](https://www.rust-lang.org/tools/install)
@@ -93,10 +91,9 @@ cd neon
# The preferred and default is to make a debug build. This will create a # The preferred and default is to make a debug build. This will create a
# demonstrably slower build than a release build. For a release build, # demonstrably slower build than a release build. For a release build,
# use "BUILD_TYPE=release make -j`nproc` -s" # use "BUILD_TYPE=release make -j`nproc`"
# Remove -s for the verbose build log
make -j`nproc` -s make -j`nproc`
``` ```
#### Building on OSX #### Building on OSX
@@ -110,17 +107,16 @@ cd neon
# The preferred and default is to make a debug build. This will create a # The preferred and default is to make a debug build. This will create a
# demonstrably slower build than a release build. For a release build, # demonstrably slower build than a release build. For a release build,
# use "BUILD_TYPE=release make -j`sysctl -n hw.logicalcpu` -s" # use "BUILD_TYPE=release make -j`sysctl -n hw.logicalcpu`"
# Remove -s for the verbose build log
make -j`sysctl -n hw.logicalcpu` -s make -j`sysctl -n hw.logicalcpu`
``` ```
#### Dependency installation notes #### Dependency installation notes
To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `pg_install/bin` and `pg_install/lib`, respectively. To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `pg_install/bin` and `pg_install/lib`, respectively.
To run the integration tests or Python scripts (not required to use the code), install To run the integration tests or Python scripts (not required to use the code), install
Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (requires [poetry>=1.3](https://python-poetry.org/)) in the project directory. Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (requires [poetry](https://python-poetry.org/)) in the project directory.
#### Running neon database #### Running neon database
@@ -129,33 +125,31 @@ Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (r
# Create repository in .neon with proper paths to binaries and data # Create repository in .neon with proper paths to binaries and data
# Later that would be responsibility of a package install script # Later that would be responsibility of a package install script
> ./target/debug/neon_local init > ./target/debug/neon_local init
Starting pageserver at '127.0.0.1:64000' in '.neon'. Starting pageserver at '127.0.0.1:64000' in '.neon'
# start pageserver, safekeeper, and broker for their intercommunication Pageserver started
Successfully initialized timeline 7dd0907914ac399ff3be45fb252bfdb7
Stopping pageserver gracefully...done!
# start pageserver and safekeeper
> ./target/debug/neon_local start > ./target/debug/neon_local start
Starting neon broker at 127.0.0.1:50051 Starting etcd broker using /usr/bin/etcd
storage_broker started, pid: 2918372 Starting pageserver at '127.0.0.1:64000' in '.neon'
Starting pageserver at '127.0.0.1:64000' in '.neon'.
pageserver started, pid: 2918386
Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'.
safekeeper 1 started, pid: 2918437
# create initial tenant and use it as a default for every future neon_local invocation Pageserver started
> ./target/debug/neon_local tenant create --set-default Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'
tenant 9ef87a5bf0d92544f6fafeeb3239695c successfully created on the pageserver Safekeeper started
Created an initial timeline 'de200bd42b49cc1814412c7e592dd6e9' at Lsn 0/16B5A50 for tenant: 9ef87a5bf0d92544f6fafeeb3239695c
Setting tenant 9ef87a5bf0d92544f6fafeeb3239695c as a default one
# start postgres compute node # start postgres compute node
> ./target/debug/neon_local endpoint start main > ./target/debug/neon_local pg start main
Starting new endpoint main (PostgreSQL v14) on timeline de200bd42b49cc1814412c7e592dd6e9 ... Starting new postgres main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432 Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
Starting postgres at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres' Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'
# check list of running postgres instances # check list of running postgres instances
> ./target/debug/neon_local endpoint list > ./target/debug/neon_local pg list
ENDPOINT ADDRESS TIMELINE BRANCH NAME LSN STATUS NODE ADDRESS TIMELINE BRANCH NAME LSN STATUS
main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16B5BA8 running main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16B5BA8 running
``` ```
2. Now, it is possible to connect to postgres and run some queries: 2. Now, it is possible to connect to postgres and run some queries:
@@ -184,14 +178,14 @@ Created timeline 'b3b863fa45fa9e57e615f9f2d944e601' at Lsn 0/16F9A00 for tenant:
(L) ┗━ @0/16F9A00: migration_check [b3b863fa45fa9e57e615f9f2d944e601] (L) ┗━ @0/16F9A00: migration_check [b3b863fa45fa9e57e615f9f2d944e601]
# start postgres on that branch # start postgres on that branch
> ./target/debug/neon_local endpoint start migration_check --branch-name migration_check > ./target/debug/neon_local pg start migration_check --branch-name migration_check
Starting new endpoint migration_check (PostgreSQL v14) on timeline b3b863fa45fa9e57e615f9f2d944e601 ... Starting new postgres migration_check on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433 Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
Starting postgres at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres' Starting postgres node at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres'
# check the new list of running postgres instances # check the new list of running postgres instances
> ./target/debug/neon_local endpoint list > ./target/debug/neon_local pg list
ENDPOINT ADDRESS TIMELINE BRANCH NAME LSN STATUS NODE ADDRESS TIMELINE BRANCH NAME LSN STATUS
main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16F9A38 running main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16F9A38 running
migration_check 127.0.0.1:55433 b3b863fa45fa9e57e615f9f2d944e601 migration_check 0/16F9A70 running migration_check 127.0.0.1:55433 b3b863fa45fa9e57e615f9f2d944e601 migration_check 0/16F9A70 running
@@ -229,27 +223,22 @@ Ensure your dependencies are installed as described [here](https://github.com/ne
```sh ```sh
git clone --recursive https://github.com/neondatabase/neon.git git clone --recursive https://github.com/neondatabase/neon.git
# either:
CARGO_BUILD_FLAGS="--features=testing" make CARGO_BUILD_FLAGS="--features=testing" make
# or:
make debug
./scripts/pytest ./scripts/pytest
``` ```
## Documentation ## Documentation
[/docs/](/docs/) Contains a top-level overview of all available markdown documentation. Now we use README files to cover design ideas and overall architecture for each module and `rustdoc` style documentation comments. See also [/docs/](/docs/) a top-level overview of all available markdown documentation.
- [/docs/sourcetree.md](/docs/sourcetree.md) contains overview of source tree layout. - [/docs/sourcetree.md](/docs/sourcetree.md) contains overview of source tree layout.
To view your `rustdoc` documentation in a browser, try running `cargo doc --no-deps --open` To view your `rustdoc` documentation in a browser, try running `cargo doc --no-deps --open`
See also README files in some source directories, and `rustdoc` style documentation comments.
Other resources:
- [SELECT 'Hello, World'](https://neon.tech/blog/hello-world/): Blog post by Nikita Shamgunov on the high level architecture
- [Architecture decisions in Neon](https://neon.tech/blog/architecture-decisions-in-neon/): Blog post by Heikki Linnakangas
- [Neon: Serverless PostgreSQL!](https://www.youtube.com/watch?v=rES0yzeERns): Presentation on storage system by Heikki Linnakangas in the CMU Database Group seminar series
### Postgres-specific terms ### Postgres-specific terms
Due to Neon's very close relation with PostgreSQL internals, numerous specific terms are used. Due to Neon's very close relation with PostgreSQL internals, numerous specific terms are used.

188
cli-v2-story.md Normal file
View File

@@ -0,0 +1,188 @@
Create a new Zenith repository in the current directory:
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli init
The files belonging to this database system will be owned by user "heikki".
This user must also own the server process.
The database cluster will be initialized with locale "en_GB.UTF-8".
The default database encoding has accordingly been set to "UTF8".
The default text search configuration will be set to "english".
Data page checksums are disabled.
creating directory tmp ... ok
creating subdirectories ... ok
selecting dynamic shared memory implementation ... posix
selecting default max_connections ... 100
selecting default shared_buffers ... 128MB
selecting default time zone ... Europe/Helsinki
creating configuration files ... ok
running bootstrap script ... ok
performing post-bootstrap initialization ... ok
syncing data to disk ... ok
initdb: warning: enabling "trust" authentication for local connections
You can change this by editing pg_hba.conf or using the option -A, or
--auth-local and --auth-host, the next time you run initdb.
new zenith repository was created in .zenith
Initially, there is only one branch:
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch
main
Start a local Postgres instance on the branch:
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start main
Creating data directory from snapshot at 0/15FFB08...
waiting for server to start....2021-04-13 09:27:43.919 EEST [984664] LOG: starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
2021-04-13 09:27:43.920 EEST [984664] LOG: listening on IPv6 address "::1", port 5432
2021-04-13 09:27:43.920 EEST [984664] LOG: listening on IPv4 address "127.0.0.1", port 5432
2021-04-13 09:27:43.927 EEST [984664] LOG: listening on Unix socket "/tmp/.s.PGSQL.5432"
2021-04-13 09:27:43.939 EEST [984665] LOG: database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
2021-04-13 09:27:43.939 EEST [984665] LOG: creating missing WAL directory "pg_wal/archive_status"
2021-04-13 09:27:44.189 EEST [984665] LOG: database system was not properly shut down; automatic recovery in progress
2021-04-13 09:27:44.195 EEST [984665] LOG: invalid record length at 0/15FFB80: wanted 24, got 0
2021-04-13 09:27:44.195 EEST [984665] LOG: redo is not required
2021-04-13 09:27:44.225 EEST [984664] LOG: database system is ready to accept connections
done
server started
Run some commands against it:
~/git-sandbox/zenith (cli-v2)$ psql postgres -c "create table foo (t text);"
CREATE TABLE
~/git-sandbox/zenith (cli-v2)$ psql postgres -c "insert into foo values ('inserted on the main branch');"
INSERT 0 1
~/git-sandbox/zenith (cli-v2)$ psql postgres -c "select * from foo"
t
-----------------------------
inserted on the main branch
(1 row)
Create a new branch called 'experimental'. We create it from the
current end of the 'main' branch, but you could specify a different
LSN as the start point instead.
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch experimental main
branching at end of WAL: 0/161F478
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch
experimental
main
Start another Postgres instance off the 'experimental' branch:
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start experimental -- -o -p5433
Creating data directory from snapshot at 0/15FFB08...
waiting for server to start....2021-04-13 09:28:41.874 EEST [984766] LOG: starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
2021-04-13 09:28:41.875 EEST [984766] LOG: listening on IPv6 address "::1", port 5433
2021-04-13 09:28:41.875 EEST [984766] LOG: listening on IPv4 address "127.0.0.1", port 5433
2021-04-13 09:28:41.883 EEST [984766] LOG: listening on Unix socket "/tmp/.s.PGSQL.5433"
2021-04-13 09:28:41.896 EEST [984767] LOG: database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
2021-04-13 09:28:42.265 EEST [984767] LOG: database system was not properly shut down; automatic recovery in progress
2021-04-13 09:28:42.269 EEST [984767] LOG: redo starts at 0/15FFB80
2021-04-13 09:28:42.272 EEST [984767] LOG: invalid record length at 0/161F4B0: wanted 24, got 0
2021-04-13 09:28:42.272 EEST [984767] LOG: redo done at 0/161F478 system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s
2021-04-13 09:28:42.321 EEST [984766] LOG: database system is ready to accept connections
done
server started
Insert some a row on the 'experimental' branch:
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo"
t
-----------------------------
inserted on the main branch
(1 row)
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "insert into foo values ('inserted on experimental')"
INSERT 0 1
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo"
t
-----------------------------
inserted on the main branch
inserted on experimental
(2 rows)
See that the other Postgres instance is still running on 'main' branch on port 5432:
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5432 -c "select * from foo"
t
-----------------------------
inserted on the main branch
(1 row)
Everything is stored in the .zenith directory:
~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/
total 12
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:28 datadirs
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:27 refs
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:28 timelines
The 'datadirs' directory contains the datadirs of the running instances:
~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/datadirs/
total 8
drwx------ 18 heikki heikki 4096 Apr 13 09:27 3c0c634c1674079b2c6d4edf7c91523e
drwx------ 18 heikki heikki 4096 Apr 13 09:28 697e3c103d4b1763cd6e82e4ff361d76
~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/datadirs/3c0c634c1674079b2c6d4edf7c91523e/
total 124
drwxr-xr-x 5 heikki heikki 4096 Apr 13 09:27 base
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 global
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_commit_ts
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_dynshmem
-rw------- 1 heikki heikki 4760 Apr 13 09:27 pg_hba.conf
-rw------- 1 heikki heikki 1636 Apr 13 09:27 pg_ident.conf
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:32 pg_logical
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:27 pg_multixact
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_notify
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_replslot
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_serial
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_snapshots
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_stat
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:34 pg_stat_tmp
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_subtrans
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_tblspc
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_twophase
-rw------- 1 heikki heikki 3 Apr 13 09:27 PG_VERSION
lrwxrwxrwx 1 heikki heikki 52 Apr 13 09:27 pg_wal -> ../../timelines/3c0c634c1674079b2c6d4edf7c91523e/wal
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_xact
-rw------- 1 heikki heikki 88 Apr 13 09:27 postgresql.auto.conf
-rw------- 1 heikki heikki 28688 Apr 13 09:27 postgresql.conf
-rw------- 1 heikki heikki 96 Apr 13 09:27 postmaster.opts
-rw------- 1 heikki heikki 149 Apr 13 09:27 postmaster.pid
Note how 'pg_wal' is just a symlink to the 'timelines' directory. The
datadir is ephemeral, you can delete it at any time, and it can be reconstructed
from the snapshots and WAL stored in the 'timelines' directory. So if you push/pull
the repository, the 'datadirs' are not included. (They are like git working trees)
~/git-sandbox/zenith (cli-v2)$ killall -9 postgres
~/git-sandbox/zenith (cli-v2)$ rm -rf .zenith/datadirs/*
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start experimental -- -o -p5433
Creating data directory from snapshot at 0/15FFB08...
waiting for server to start....2021-04-13 09:37:05.476 EEST [985340] LOG: starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
2021-04-13 09:37:05.477 EEST [985340] LOG: listening on IPv6 address "::1", port 5433
2021-04-13 09:37:05.477 EEST [985340] LOG: listening on IPv4 address "127.0.0.1", port 5433
2021-04-13 09:37:05.487 EEST [985340] LOG: listening on Unix socket "/tmp/.s.PGSQL.5433"
2021-04-13 09:37:05.498 EEST [985341] LOG: database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
2021-04-13 09:37:05.808 EEST [985341] LOG: database system was not properly shut down; automatic recovery in progress
2021-04-13 09:37:05.813 EEST [985341] LOG: redo starts at 0/15FFB80
2021-04-13 09:37:05.815 EEST [985341] LOG: invalid record length at 0/161F770: wanted 24, got 0
2021-04-13 09:37:05.815 EEST [985341] LOG: redo done at 0/161F738 system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s
2021-04-13 09:37:05.866 EEST [985340] LOG: database system is ready to accept connections
done
server started
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo"
t
-----------------------------
inserted on the main branch
inserted on experimental
(2 rows)

View File

@@ -1,32 +1,23 @@
[package] [package]
name = "compute_tools" name = "compute_tools"
version = "0.1.0" version = "0.1.0"
edition.workspace = true edition = "2021"
license.workspace = true
[dependencies] [dependencies]
anyhow.workspace = true anyhow = "1.0"
chrono.workspace = true chrono = "0.4"
clap.workspace = true clap = "4.0"
futures.workspace = true env_logger = "0.9"
hyper = { workspace = true, features = ["full"] } futures = "0.3.13"
notify.workspace = true hyper = { version = "0.14", features = ["full"] }
num_cpus.workspace = true log = { version = "0.4", features = ["std", "serde"] }
opentelemetry.workspace = true notify = "5.0.0"
postgres.workspace = true postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
regex.workspace = true regex = "1"
serde.workspace = true serde = { version = "1.0", features = ["derive"] }
serde_json.workspace = true serde_json = "1"
tar.workspace = true tar = "0.4"
reqwest = { workspace = true, features = ["json"] } tokio = { version = "1.17", features = ["macros", "rt", "rt-multi-thread"] }
tokio = { workspace = true, features = ["rt", "rt-multi-thread"] } tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
tokio-postgres.workspace = true url = "2.2.2"
tracing.workspace = true workspace_hack = { version = "0.1", path = "../workspace_hack" }
tracing-opentelemetry.workspace = true
tracing-subscriber.workspace = true
tracing-utils.workspace = true
url.workspace = true
compute_api.workspace = true
utils.workspace = true
workspace_hack.workspace = true

View File

@@ -19,10 +19,6 @@ Also `compute_ctl` spawns two separate service threads:
- `http-endpoint` runs a Hyper HTTP API server, which serves readiness and the - `http-endpoint` runs a Hyper HTTP API server, which serves readiness and the
last activity requests. last activity requests.
If the `vm-informant` binary is present at `/bin/vm-informant`, it will also be started. For VM
compute nodes, `vm-informant` communicates with the VM autoscaling system. It coordinates
downscaling and (eventually) will request immediate upscaling under resource pressure.
Usage example: Usage example:
```sh ```sh
compute_ctl -D /var/db/postgres/compute \ compute_ctl -D /var/db/postgres/compute \

View File

@@ -18,10 +18,6 @@
//! - `http-endpoint` runs a Hyper HTTP API server, which serves readiness and the //! - `http-endpoint` runs a Hyper HTTP API server, which serves readiness and the
//! last activity requests. //! last activity requests.
//! //!
//! If the `vm-informant` binary is present at `/bin/vm-informant`, it will also be started. For VM
//! compute nodes, `vm-informant` communicates with the VM autoscaling system. It coordinates
//! downscaling and (eventually) will request immediate upscaling under resource pressure.
//!
//! Usage example: //! Usage example:
//! ```sh //! ```sh
//! compute_ctl -D /var/db/postgres/compute \ //! compute_ctl -D /var/db/postgres/compute \
@@ -34,27 +30,26 @@ use std::fs::File;
use std::panic; use std::panic;
use std::path::Path; use std::path::Path;
use std::process::exit; use std::process::exit;
use std::sync::{mpsc, Arc, Condvar, Mutex}; use std::sync::{Arc, RwLock};
use std::{thread, time::Duration}; use std::{thread, time::Duration};
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use chrono::Utc; use chrono::Utc;
use clap::Arg; use clap::Arg;
use tracing::{error, info}; use log::{error, info};
use url::Url;
use compute_api::responses::ComputeStatus; use compute_tools::compute::{ComputeMetrics, ComputeNode, ComputeState, ComputeStatus};
use compute_tools::compute::{ComputeNode, ComputeState, ParsedSpec};
use compute_tools::configurator::launch_configurator;
use compute_tools::http::api::launch_http_server; use compute_tools::http::api::launch_http_server;
use compute_tools::logger::*; use compute_tools::logger::*;
use compute_tools::monitor::launch_monitor; use compute_tools::monitor::launch_monitor;
use compute_tools::params::*; use compute_tools::params::*;
use compute_tools::pg_helpers::*;
use compute_tools::spec::*; use compute_tools::spec::*;
use url::Url;
fn main() -> Result<()> { fn main() -> Result<()> {
init_tracing_and_logging(DEFAULT_LOG_LEVEL)?; // TODO: re-use `utils::logging` later
init_logger(DEFAULT_LOG_LEVEL)?;
let matches = cli().get_matches(); let matches = cli().get_matches();
@@ -64,183 +59,85 @@ fn main() -> Result<()> {
let connstr = matches let connstr = matches
.get_one::<String>("connstr") .get_one::<String>("connstr")
.expect("Postgres connection string is required"); .expect("Postgres connection string is required");
let spec_json = matches.get_one::<String>("spec"); let spec = matches.get_one::<String>("spec");
let spec_path = matches.get_one::<String>("spec-path"); let spec_path = matches.get_one::<String>("spec-path");
let compute_id = matches.get_one::<String>("compute-id");
let control_plane_uri = matches.get_one::<String>("control-plane-uri");
// Try to use just 'postgres' if no path is provided // Try to use just 'postgres' if no path is provided
let pgbin = matches.get_one::<String>("pgbin").unwrap(); let pgbin = matches.get_one::<String>("pgbin").unwrap();
let mut spec = None; let spec: ComputeSpec = match spec {
let mut live_config_allowed = false;
match spec_json {
// First, try to get cluster spec from the cli argument // First, try to get cluster spec from the cli argument
Some(json) => { Some(json) => serde_json::from_str(json)?,
spec = Some(serde_json::from_str(json)?);
}
None => { None => {
// Second, try to read it from the file if path is provided // Second, try to read it from the file if path is provided
if let Some(sp) = spec_path { if let Some(sp) = spec_path {
let path = Path::new(sp); let path = Path::new(sp);
let file = File::open(path)?; let file = File::open(path)?;
spec = Some(serde_json::from_reader(file)?); serde_json::from_reader(file)?
} else if let Some(id) = compute_id {
if let Some(cp_base) = control_plane_uri {
live_config_allowed = true;
if let Ok(s) = get_spec_from_control_plane(cp_base, id) {
spec = Some(s);
}
} else {
panic!("must specify both --control-plane-uri and --compute-id or none");
}
} else { } else {
panic!( panic!("cluster spec should be provided via --spec or --spec-path argument");
"compute spec should be provided by one of the following ways: \
--spec OR --spec-path OR --control-plane-uri and --compute-id"
);
} }
} }
}; };
let mut new_state = ComputeState::new(); let pageserver_connstr = spec
let spec_set; .cluster
if let Some(spec) = spec { .settings
let pspec = ParsedSpec::try_from(spec).map_err(|msg| anyhow::anyhow!(msg))?; .find("neon.pageserver_connstring")
new_state.pspec = Some(pspec); .expect("pageserver connstr should be provided");
spec_set = true; let tenant = spec
} else { .cluster
spec_set = false; .settings
} .find("neon.tenant_id")
let compute_node = ComputeNode { .expect("tenant id should be provided");
let timeline = spec
.cluster
.settings
.find("neon.timeline_id")
.expect("tenant id should be provided");
let compute_state = ComputeNode {
start_time: Utc::now(), start_time: Utc::now(),
connstr: Url::parse(connstr).context("cannot parse connstr as a URL")?, connstr: Url::parse(connstr).context("cannot parse connstr as a URL")?,
pgdata: pgdata.to_string(), pgdata: pgdata.to_string(),
pgbin: pgbin.to_string(), pgbin: pgbin.to_string(),
live_config_allowed, spec,
state: Mutex::new(new_state), tenant,
state_changed: Condvar::new(), timeline,
pageserver_connstr,
metrics: ComputeMetrics::new(),
state: RwLock::new(ComputeState::new()),
}; };
let compute = Arc::new(compute_node); let compute = Arc::new(compute_state);
// Launch http service first, so we were able to serve control-plane // Launch service threads first, so we were able to serve availability
// requests, while configuration is still in progress. // requests, while configuration is still in progress.
let _http_handle = launch_http_server(&compute).expect("cannot launch http endpoint thread"); let _http_handle = launch_http_server(&compute).expect("cannot launch http endpoint thread");
if !spec_set {
// No spec provided, hang waiting for it.
info!("no compute spec provided, waiting");
let mut state = compute.state.lock().unwrap();
while state.status != ComputeStatus::ConfigurationPending {
state = compute.state_changed.wait(state).unwrap();
if state.status == ComputeStatus::ConfigurationPending {
info!("got spec, continue configuration");
// Spec is already set by the http server handler.
break;
}
}
}
// We got all we need, update the state.
let mut state = compute.state.lock().unwrap();
let pspec = state.pspec.as_ref().expect("spec must be set");
let startup_tracing_context = pspec.spec.startup_tracing_context.clone();
state.status = ComputeStatus::Init;
compute.state_changed.notify_all();
drop(state);
// Extract OpenTelemetry context for the startup actions from the spec, and
// attach it to the current tracing context.
//
// This is used to propagate the context for the 'start_compute' operation
// from the neon control plane. This allows linking together the wider
// 'start_compute' operation that creates the compute container, with the
// startup actions here within the container.
//
// Switch to the startup context here, and exit it once the startup has
// completed and Postgres is up and running.
//
// NOTE: This is supposed to only cover the *startup* actions. Once
// postgres is configured and up-and-running, we exit this span. Any other
// actions that are performed on incoming HTTP requests, for example, are
// performed in separate spans.
let startup_context_guard = if let Some(ref carrier) = startup_tracing_context {
use opentelemetry::propagation::TextMapPropagator;
use opentelemetry::sdk::propagation::TraceContextPropagator;
Some(TraceContextPropagator::new().extract(carrier).attach())
} else {
None
};
// Launch remaining service threads
let _monitor_handle = launch_monitor(&compute).expect("cannot launch compute monitor thread"); let _monitor_handle = launch_monitor(&compute).expect("cannot launch compute monitor thread");
let _configurator_handle =
launch_configurator(&compute).expect("cannot launch configurator thread");
// Start Postgres // Run compute (Postgres) and hang waiting on it.
let mut delay_exit = false; match compute.prepare_and_run() {
let mut exit_code = None; Ok(ec) => {
let pg = match compute.start_compute() { let code = ec.code().unwrap_or(1);
Ok(pg) => Some(pg), info!("Postgres exited with code {}, shutting down", code);
Err(err) => { exit(code)
error!("could not start the compute node: {:?}", err); }
let mut state = compute.state.lock().unwrap(); Err(error) => {
state.error = Some(format!("{:?}", err)); error!("could not start the compute node: {:?}", error);
let mut state = compute.state.write().unwrap();
state.error = Some(format!("{:?}", error));
state.status = ComputeStatus::Failed; state.status = ComputeStatus::Failed;
drop(state); drop(state);
delay_exit = true;
None // Keep serving HTTP requests, so the cloud control plane was able to
// get the actual error.
info!("giving control plane 30s to collect the error before shutdown");
thread::sleep(Duration::from_secs(30));
info!("shutting down");
Err(error)
} }
};
// Wait for the child Postgres process forever. In this state Ctrl+C will
// propagate to Postgres and it will be shut down as well.
if let Some(mut pg) = pg {
// Startup is finished, exit the startup tracing span
drop(startup_context_guard);
let ecode = pg
.wait()
.expect("failed to start waiting on Postgres process");
info!("Postgres exited with code {}, shutting down", ecode);
exit_code = ecode.code()
} }
if let Err(err) = compute.check_for_core_dumps() {
error!("error while checking for core dumps: {err:?}");
}
// If launch failed, keep serving HTTP requests for a while, so the cloud
// control plane can get the actual error.
if delay_exit {
info!("giving control plane 30s to collect the error before shutdown");
thread::sleep(Duration::from_secs(30));
}
// Shutdown trace pipeline gracefully, so that it has a chance to send any
// pending traces before we exit. Shutting down OTEL tracing provider may
// hang for quite some time, see, for example:
// - https://github.com/open-telemetry/opentelemetry-rust/issues/868
// - and our problems with staging https://github.com/neondatabase/cloud/issues/3707#issuecomment-1493983636
//
// Yet, we want computes to shut down fast enough, as we may need a new one
// for the same timeline ASAP. So wait no longer than 2s for the shutdown to
// complete, then just error out and exit the main thread.
info!("shutting down tracing");
let (sender, receiver) = mpsc::channel();
let _ = thread::spawn(move || {
tracing_utils::shutdown_tracing();
sender.send(()).ok()
});
let shutdown_res = receiver.recv_timeout(Duration::from_millis(2000));
if shutdown_res.is_err() {
error!("timed out while shutting down tracing, exiting anyway");
}
info!("shutting down");
exit(exit_code.unwrap_or(1))
} }
fn cli() -> clap::Command { fn cli() -> clap::Command {
@@ -281,18 +178,6 @@ fn cli() -> clap::Command {
.long("spec-path") .long("spec-path")
.value_name("SPEC_PATH"), .value_name("SPEC_PATH"),
) )
.arg(
Arg::new("compute-id")
.short('i')
.long("compute-id")
.value_name("COMPUTE_ID"),
)
.arg(
Arg::new("control-plane-uri")
.short('p')
.long("control-plane-uri")
.value_name("CONTROL_PLANE_API_BASE_URI"),
)
} }
#[test] #[test]

View File

@@ -1,28 +1,11 @@
use anyhow::{anyhow, Result}; use anyhow::{anyhow, Result};
use log::error;
use postgres::Client;
use tokio_postgres::NoTls; use tokio_postgres::NoTls;
use tracing::{error, instrument};
use crate::compute::ComputeNode; use crate::compute::ComputeNode;
/// Update timestamp in a row in a special service table to check pub fn create_writablity_check_data(client: &mut Client) -> Result<()> {
/// that we can actually write some data in this particular timeline.
/// Create table if it's missing.
#[instrument(skip_all)]
pub async fn check_writability(compute: &ComputeNode) -> Result<()> {
// Connect to the database.
let (client, connection) = tokio_postgres::connect(compute.connstr.as_str(), NoTls).await?;
if client.is_closed() {
return Err(anyhow!("connection to postgres closed"));
}
// The connection object performs the actual communication with the database,
// so spawn it off to run on its own.
tokio::spawn(async move {
if let Err(e) = connection.await {
error!("connection error: {}", e);
}
});
let query = " let query = "
CREATE TABLE IF NOT EXISTS health_check ( CREATE TABLE IF NOT EXISTS health_check (
id serial primary key, id serial primary key,
@@ -31,15 +14,30 @@ pub async fn check_writability(compute: &ComputeNode) -> Result<()> {
INSERT INTO health_check VALUES (1, now()) INSERT INTO health_check VALUES (1, now())
ON CONFLICT (id) DO UPDATE ON CONFLICT (id) DO UPDATE
SET updated_at = now();"; SET updated_at = now();";
let result = client.simple_query(query)?;
let result = client.simple_query(query).await?; if result.len() < 2 {
return Err(anyhow::format_err!("executed {} queries", result.len()));
if result.len() != 2 { }
return Err(anyhow::format_err!( Ok(())
"expected 2 query results, but got {}", }
result.len()
)); pub async fn check_writability(compute: &ComputeNode) -> Result<()> {
let (client, connection) = tokio_postgres::connect(compute.connstr.as_str(), NoTls).await?;
if client.is_closed() {
return Err(anyhow!("connection to postgres closed"));
}
tokio::spawn(async move {
if let Err(e) = connection.await {
error!("connection error: {}", e);
}
});
let result = client
.simple_query("UPDATE health_check SET updated_at = now() WHERE id = 1;")
.await?;
if result.len() != 1 {
return Err(anyhow!("statement can't be executed"));
} }
Ok(()) Ok(())
} }

View File

@@ -17,21 +17,17 @@
use std::fs; use std::fs;
use std::os::unix::fs::PermissionsExt; use std::os::unix::fs::PermissionsExt;
use std::path::Path; use std::path::Path;
use std::process::{Command, Stdio}; use std::process::{Command, ExitStatus, Stdio};
use std::str::FromStr; use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Condvar, Mutex}; use std::sync::RwLock;
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use log::info;
use postgres::{Client, NoTls}; use postgres::{Client, NoTls};
use tokio_postgres; use serde::{Serialize, Serializer};
use tracing::{info, instrument, warn};
use utils::id::{TenantId, TimelineId};
use utils::lsn::Lsn;
use compute_api::responses::{ComputeMetrics, ComputeStatus};
use compute_api::spec::ComputeSpec;
use crate::checker::create_writablity_check_data;
use crate::config; use crate::config;
use crate::pg_helpers::*; use crate::pg_helpers::*;
use crate::spec::*; use crate::spec::*;
@@ -43,45 +39,40 @@ pub struct ComputeNode {
pub connstr: url::Url, pub connstr: url::Url,
pub pgdata: String, pub pgdata: String,
pub pgbin: String, pub pgbin: String,
/// We should only allow live re- / configuration of the compute node if pub spec: ComputeSpec,
/// it uses 'pull model', i.e. it can go to control-plane and fetch pub tenant: String,
/// the latest configuration. Otherwise, there could be a case: pub timeline: String,
/// - we start compute with some spec provided as argument pub pageserver_connstr: String,
/// - we push new spec and it does reconfiguration pub metrics: ComputeMetrics,
/// - but then something happens and compute pod / VM is destroyed, /// Volatile part of the `ComputeNode` so should be used under `RwLock`
/// so k8s controller starts it again with the **old** spec /// to allow HTTP API server to serve status requests, while configuration
/// and the same for empty computes: /// is in progress.
/// - we started compute without any spec pub state: RwLock<ComputeState>,
/// - we push spec and it does configuration
/// - but then it is restarted without any spec again
pub live_config_allowed: bool,
/// Volatile part of the `ComputeNode`, which should be used under `Mutex`.
/// To allow HTTP API server to serving status requests, while configuration
/// is in progress, lock should be held only for short periods of time to do
/// read/write, not the whole configuration process.
pub state: Mutex<ComputeState>,
/// `Condvar` to allow notifying waiters about state changes.
pub state_changed: Condvar,
} }
#[derive(Clone, Debug)] fn rfc3339_serialize<S>(x: &DateTime<Utc>, s: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
x.to_rfc3339().serialize(s)
}
#[derive(Serialize)]
#[serde(rename_all = "snake_case")]
pub struct ComputeState { pub struct ComputeState {
pub status: ComputeStatus, pub status: ComputeStatus,
/// Timestamp of the last Postgres activity /// Timestamp of the last Postgres activity
#[serde(serialize_with = "rfc3339_serialize")]
pub last_active: DateTime<Utc>, pub last_active: DateTime<Utc>,
pub error: Option<String>, pub error: Option<String>,
pub pspec: Option<ParsedSpec>,
pub metrics: ComputeMetrics,
} }
impl ComputeState { impl ComputeState {
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
status: ComputeStatus::Empty, status: ComputeStatus::Init,
last_active: Utc::now(), last_active: Utc::now(),
error: None, error: None,
pspec: None,
metrics: ComputeMetrics::default(),
} }
} }
} }
@@ -92,58 +83,46 @@ impl Default for ComputeState {
} }
} }
#[derive(Clone, Debug)] #[derive(Serialize, Clone, Copy, PartialEq, Eq)]
pub struct ParsedSpec { #[serde(rename_all = "snake_case")]
pub spec: ComputeSpec, pub enum ComputeStatus {
pub tenant_id: TenantId, Init,
pub timeline_id: TimelineId, Running,
pub pageserver_connstr: String, Failed,
pub storage_auth_token: Option<String>,
} }
impl TryFrom<ComputeSpec> for ParsedSpec { #[derive(Serialize)]
type Error = String; pub struct ComputeMetrics {
fn try_from(spec: ComputeSpec) -> Result<Self, String> { pub sync_safekeepers_ms: AtomicU64,
let pageserver_connstr = spec pub basebackup_ms: AtomicU64,
.cluster pub config_ms: AtomicU64,
.settings pub total_startup_ms: AtomicU64,
.find("neon.pageserver_connstring") }
.ok_or("pageserver connstr should be provided")?;
let storage_auth_token = spec.storage_auth_token.clone();
let tenant_id: TenantId = spec
.cluster
.settings
.find("neon.tenant_id")
.ok_or("tenant id should be provided")
.map(|s| TenantId::from_str(&s))?
.or(Err("invalid tenant id"))?;
let timeline_id: TimelineId = spec
.cluster
.settings
.find("neon.timeline_id")
.ok_or("timeline id should be provided")
.map(|s| TimelineId::from_str(&s))?
.or(Err("invalid timeline id"))?;
Ok(ParsedSpec { impl ComputeMetrics {
spec, pub fn new() -> Self {
pageserver_connstr, Self {
storage_auth_token, sync_safekeepers_ms: AtomicU64::new(0),
tenant_id, basebackup_ms: AtomicU64::new(0),
timeline_id, config_ms: AtomicU64::new(0),
}) total_startup_ms: AtomicU64::new(0),
}
}
}
impl Default for ComputeMetrics {
fn default() -> Self {
Self::new()
} }
} }
impl ComputeNode { impl ComputeNode {
pub fn set_status(&self, status: ComputeStatus) { pub fn set_status(&self, status: ComputeStatus) {
let mut state = self.state.lock().unwrap(); self.state.write().unwrap().status = status;
state.status = status;
self.state_changed.notify_all();
} }
pub fn get_status(&self) -> ComputeStatus { pub fn get_status(&self) -> ComputeStatus {
self.state.lock().unwrap().status self.state.read().unwrap().status
} }
// Remove `pgdata` directory and create it again with right permissions. // Remove `pgdata` directory and create it again with right permissions.
@@ -159,26 +138,13 @@ impl ComputeNode {
// Get basebackup from the libpq connection to pageserver using `connstr` and // Get basebackup from the libpq connection to pageserver using `connstr` and
// unarchive it to `pgdata` directory overriding all its previous content. // unarchive it to `pgdata` directory overriding all its previous content.
#[instrument(skip(self, compute_state))] fn get_basebackup(&self, lsn: &str) -> Result<()> {
fn get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> {
let spec = compute_state.pspec.as_ref().expect("spec must be set");
let start_time = Utc::now(); let start_time = Utc::now();
let mut config = postgres::Config::from_str(&spec.pageserver_connstr)?; let mut client = Client::connect(&self.pageserver_connstr, NoTls)?;
// Use the storage auth token from the config file, if given.
// Note: this overrides any password set in the connection string.
if let Some(storage_auth_token) = &spec.storage_auth_token {
info!("Got storage auth token from spec file");
config.password(storage_auth_token);
} else {
info!("Storage auth token not set");
}
let mut client = config.connect(NoTls)?;
let basebackup_cmd = match lsn { let basebackup_cmd = match lsn {
Lsn(0) => format!("basebackup {} {}", spec.tenant_id, spec.timeline_id), // First start of the compute "0/0" => format!("basebackup {} {}", &self.tenant, &self.timeline), // First start of the compute
_ => format!("basebackup {} {} {}", spec.tenant_id, spec.timeline_id, lsn), _ => format!("basebackup {} {} {}", &self.tenant, &self.timeline, lsn),
}; };
let copyreader = client.copy_out(basebackup_cmd.as_str())?; let copyreader = client.copy_out(basebackup_cmd.as_str())?;
@@ -191,28 +157,26 @@ impl ComputeNode {
ar.set_ignore_zeros(true); ar.set_ignore_zeros(true);
ar.unpack(&self.pgdata)?; ar.unpack(&self.pgdata)?;
self.state.lock().unwrap().metrics.basebackup_ms = Utc::now() self.metrics.basebackup_ms.store(
.signed_duration_since(start_time) Utc::now()
.to_std() .signed_duration_since(start_time)
.unwrap() .to_std()
.as_millis() as u64; .unwrap()
.as_millis() as u64,
Ordering::Relaxed,
);
Ok(()) Ok(())
} }
// Run `postgres` in a special mode with `--sync-safekeepers` argument // Run `postgres` in a special mode with `--sync-safekeepers` argument
// and return the reported LSN back to the caller. // and return the reported LSN back to the caller.
#[instrument(skip(self, storage_auth_token))] fn sync_safekeepers(&self) -> Result<String> {
fn sync_safekeepers(&self, storage_auth_token: Option<String>) -> Result<Lsn> {
let start_time = Utc::now(); let start_time = Utc::now();
let sync_handle = Command::new(&self.pgbin) let sync_handle = Command::new(&self.pgbin)
.args(["--sync-safekeepers"]) .args(&["--sync-safekeepers"])
.env("PGDATA", &self.pgdata) // we cannot use -D in this mode .env("PGDATA", &self.pgdata) // we cannot use -D in this mode
.envs(if let Some(storage_auth_token) = &storage_auth_token {
vec![("NEON_AUTH_TOKEN", storage_auth_token)]
} else {
vec![]
})
.stdout(Stdio::piped()) .stdout(Stdio::piped())
.spawn() .spawn()
.expect("postgres --sync-safekeepers failed to start"); .expect("postgres --sync-safekeepers failed to start");
@@ -233,42 +197,44 @@ impl ComputeNode {
); );
} }
self.state.lock().unwrap().metrics.sync_safekeepers_ms = Utc::now() self.metrics.sync_safekeepers_ms.store(
.signed_duration_since(start_time) Utc::now()
.to_std() .signed_duration_since(start_time)
.unwrap() .to_std()
.as_millis() as u64; .unwrap()
.as_millis() as u64,
Ordering::Relaxed,
);
let lsn = Lsn::from_str(String::from_utf8(sync_output.stdout)?.trim())?; let lsn = String::from(String::from_utf8(sync_output.stdout)?.trim());
Ok(lsn) Ok(lsn)
} }
/// Do all the preparations like PGDATA directory creation, configuration, /// Do all the preparations like PGDATA directory creation, configuration,
/// safekeepers sync, basebackup, etc. /// safekeepers sync, basebackup, etc.
#[instrument(skip(self, compute_state))] pub fn prepare_pgdata(&self) -> Result<()> {
pub fn prepare_pgdata(&self, compute_state: &ComputeState) -> Result<()> { let spec = &self.spec;
let pspec = compute_state.pspec.as_ref().expect("spec must be set");
let pgdata_path = Path::new(&self.pgdata); let pgdata_path = Path::new(&self.pgdata);
// Remove/create an empty pgdata directory and put configuration there. // Remove/create an empty pgdata directory and put configuration there.
self.create_pgdata()?; self.create_pgdata()?;
config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), &pspec.spec)?; config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec)?;
info!("starting safekeepers syncing"); info!("starting safekeepers syncing");
let lsn = self let lsn = self
.sync_safekeepers(pspec.storage_auth_token.clone()) .sync_safekeepers()
.with_context(|| "failed to sync safekeepers")?; .with_context(|| "failed to sync safekeepers")?;
info!("safekeepers synced at LSN {}", lsn); info!("safekeepers synced at LSN {}", lsn);
info!( info!(
"getting basebackup@{} from pageserver {}", "getting basebackup@{} from pageserver {}",
lsn, &pspec.pageserver_connstr lsn, &self.pageserver_connstr
); );
self.get_basebackup(compute_state, lsn).with_context(|| { self.get_basebackup(&lsn).with_context(|| {
format!( format!(
"failed to get basebackup@{} from pageserver {}", "failed to get basebackup@{} from pageserver {}",
lsn, &pspec.pageserver_connstr lsn, &self.pageserver_connstr
) )
})?; })?;
@@ -280,36 +246,23 @@ impl ComputeNode {
/// Start Postgres as a child process and manage DBs/roles. /// Start Postgres as a child process and manage DBs/roles.
/// After that this will hang waiting on the postmaster process to exit. /// After that this will hang waiting on the postmaster process to exit.
#[instrument(skip(self))] pub fn run(&self) -> Result<ExitStatus> {
pub fn start_postgres( let start_time = Utc::now();
&self,
storage_auth_token: Option<String>,
) -> Result<std::process::Child> {
let pgdata_path = Path::new(&self.pgdata); let pgdata_path = Path::new(&self.pgdata);
// Run postgres as a child process. // Run postgres as a child process.
let mut pg = Command::new(&self.pgbin) let mut pg = Command::new(&self.pgbin)
.args(["-D", &self.pgdata]) .args(&["-D", &self.pgdata])
.envs(if let Some(storage_auth_token) = &storage_auth_token {
vec![("NEON_AUTH_TOKEN", storage_auth_token)]
} else {
vec![]
})
.spawn() .spawn()
.expect("cannot start postgres process"); .expect("cannot start postgres process");
wait_for_postgres(&mut pg, pgdata_path)?; wait_for_postgres(&mut pg, pgdata_path)?;
Ok(pg)
}
/// Do initial configuration of the already started Postgres.
#[instrument(skip(self, compute_state))]
pub fn apply_config(&self, compute_state: &ComputeState) -> Result<()> {
// If connection fails, // If connection fails,
// it may be the old node with `zenith_admin` superuser. // it may be the old node with `zenith_admin` superuser.
// //
// In this case we need to connect with old `zenith_admin` name // In this case we need to connect with old `zenith_admin`name
// and create new user. We cannot simply rename connected user, // and create new user. We cannot simply rename connected user,
// but we can create a new one and grant it all privileges. // but we can create a new one and grant it all privileges.
let mut client = match Client::connect(self.connstr.as_str(), NoTls) { let mut client = match Client::connect(self.connstr.as_str(), NoTls) {
@@ -335,206 +288,59 @@ impl ComputeNode {
Ok(client) => client, Ok(client) => client,
}; };
// Proceed with post-startup configuration. Note, that order of operations is important. handle_roles(&self.spec, &mut client)?;
let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec; handle_databases(&self.spec, &mut client)?;
handle_roles(spec, &mut client)?; handle_role_deletions(self, &mut client)?;
handle_databases(spec, &mut client)?; handle_grants(self, &mut client)?;
handle_role_deletions(spec, self.connstr.as_str(), &mut client)?; create_writablity_check_data(&mut client)?;
handle_grants(spec, self.connstr.as_str(), &mut client)?;
handle_extensions(spec, &mut client)?;
// 'Close' connection // 'Close' connection
drop(client); drop(client);
info!(
"finished configuration of compute for project {}",
spec.cluster.cluster_id
);
Ok(())
}
// We could've wrapped this around `pg_ctl reload`, but right now we don't use
// `pg_ctl` for start / stop, so this just seems much easier to do as we already
// have opened connection to Postgres and superuser access.
#[instrument(skip(self, client))]
fn pg_reload_conf(&self, client: &mut Client) -> Result<()> {
client.simple_query("SELECT pg_reload_conf()")?;
Ok(())
}
/// Similar to `apply_config()`, but does a bit different sequence of operations,
/// as it's used to reconfigure a previously started and configured Postgres node.
#[instrument(skip(self))]
pub fn reconfigure(&self) -> Result<()> {
let spec = self.state.lock().unwrap().pspec.clone().unwrap().spec;
// Write new config
let pgdata_path = Path::new(&self.pgdata);
config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), &spec)?;
let mut client = Client::connect(self.connstr.as_str(), NoTls)?;
self.pg_reload_conf(&mut client)?;
// Proceed with post-startup configuration. Note, that order of operations is important.
handle_roles(&spec, &mut client)?;
handle_databases(&spec, &mut client)?;
handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
handle_grants(&spec, self.connstr.as_str(), &mut client)?;
handle_extensions(&spec, &mut client)?;
// 'Close' connection
drop(client);
let unknown_op = "unknown".to_string();
let op_id = spec.operation_uuid.as_ref().unwrap_or(&unknown_op);
info!(
"finished reconfiguration of compute node for operation {}",
op_id
);
Ok(())
}
#[instrument(skip(self))]
pub fn start_compute(&self) -> Result<std::process::Child> {
let compute_state = self.state.lock().unwrap().clone();
let spec = compute_state.pspec.as_ref().expect("spec must be set");
info!(
"starting compute for project {}, operation {}, tenant {}, timeline {}",
spec.spec.cluster.cluster_id,
spec.spec.operation_uuid.as_deref().unwrap_or("None"),
spec.tenant_id,
spec.timeline_id,
);
self.prepare_pgdata(&compute_state)?;
let start_time = Utc::now();
let pg = self.start_postgres(spec.storage_auth_token.clone())?;
self.apply_config(&compute_state)?;
let startup_end_time = Utc::now(); let startup_end_time = Utc::now();
{
let mut state = self.state.lock().unwrap(); self.metrics.config_ms.store(
state.metrics.config_ms = startup_end_time startup_end_time
.signed_duration_since(start_time) .signed_duration_since(start_time)
.to_std() .to_std()
.unwrap() .unwrap()
.as_millis() as u64; .as_millis() as u64,
state.metrics.total_startup_ms = startup_end_time Ordering::Relaxed,
);
self.metrics.total_startup_ms.store(
startup_end_time
.signed_duration_since(self.start_time) .signed_duration_since(self.start_time)
.to_std() .to_std()
.unwrap() .unwrap()
.as_millis() as u64; .as_millis() as u64,
} Ordering::Relaxed,
);
self.set_status(ComputeStatus::Running); self.set_status(ComputeStatus::Running);
Ok(pg) info!(
"finished configuration of compute for project {}",
self.spec.cluster.cluster_id
);
// Wait for child Postgres process basically forever. In this state Ctrl+C
// will propagate to Postgres and it will be shut down as well.
let ecode = pg
.wait()
.expect("failed to start waiting on Postgres process");
Ok(ecode)
} }
// Look for core dumps and collect backtraces. pub fn prepare_and_run(&self) -> Result<ExitStatus> {
// info!(
// EKS worker nodes have following core dump settings: "starting compute for project {}, operation {}, tenant {}, timeline {}",
// /proc/sys/kernel/core_pattern -> core self.spec.cluster.cluster_id,
// /proc/sys/kernel/core_uses_pid -> 1 self.spec.operation_uuid.as_ref().unwrap(),
// ulimint -c -> unlimited self.tenant,
// which results in core dumps being written to postgres data directory as core.<pid>. self.timeline,
// );
// Use that as a default location and pattern, except macos where core dumps are written
// to /cores/ directory by default.
pub fn check_for_core_dumps(&self) -> Result<()> {
let core_dump_dir = match std::env::consts::OS {
"macos" => Path::new("/cores/"),
_ => Path::new(&self.pgdata),
};
// Collect core dump paths if any self.prepare_pgdata()?;
info!("checking for core dumps in {}", core_dump_dir.display()); self.run()
let files = fs::read_dir(core_dump_dir)?;
let cores = files.filter_map(|entry| {
let entry = entry.ok()?;
let _ = entry.file_name().to_str()?.strip_prefix("core.")?;
Some(entry.path())
});
// Print backtrace for each core dump
for core_path in cores {
warn!(
"core dump found: {}, collecting backtrace",
core_path.display()
);
// Try first with gdb
let backtrace = Command::new("gdb")
.args(["--batch", "-q", "-ex", "bt", &self.pgbin])
.arg(&core_path)
.output();
// Try lldb if no gdb is found -- that is handy for local testing on macOS
let backtrace = match backtrace {
Err(ref e) if e.kind() == std::io::ErrorKind::NotFound => {
warn!("cannot find gdb, trying lldb");
Command::new("lldb")
.arg("-c")
.arg(&core_path)
.args(["--batch", "-o", "bt all", "-o", "quit"])
.output()
}
_ => backtrace,
}?;
warn!(
"core dump backtrace: {}",
String::from_utf8_lossy(&backtrace.stdout)
);
warn!(
"debugger stderr: {}",
String::from_utf8_lossy(&backtrace.stderr)
);
}
Ok(())
}
/// Select `pg_stat_statements` data and return it as a stringified JSON
pub async fn collect_insights(&self) -> String {
let mut result_rows: Vec<String> = Vec::new();
let connect_result = tokio_postgres::connect(self.connstr.as_str(), NoTls).await;
let (client, connection) = connect_result.unwrap();
tokio::spawn(async move {
if let Err(e) = connection.await {
eprintln!("connection error: {}", e);
}
});
let result = client
.simple_query(
"SELECT
row_to_json(pg_stat_statements)
FROM
pg_stat_statements
WHERE
userid != 'cloud_admin'::regrole::oid
ORDER BY
(mean_exec_time + mean_plan_time) DESC
LIMIT 100",
)
.await;
if let Ok(raw_rows) = result {
for message in raw_rows.iter() {
if let postgres::SimpleQueryMessage::Row(row) = message {
if let Some(json) = row.get(0) {
result_rows.push(json.to_string());
}
}
}
format!("{{\"pg_stat_statements\": [{}]}}", result_rows.join(","))
} else {
"{{\"pg_stat_statements\": []}}".to_string()
}
} }
} }

View File

@@ -6,7 +6,7 @@ use std::path::Path;
use anyhow::Result; use anyhow::Result;
use crate::pg_helpers::PgOptionsSerialize; use crate::pg_helpers::PgOptionsSerialize;
use compute_api::spec::ComputeSpec; use crate::spec::ComputeSpec;
/// Check that `line` is inside a text file and put it there if it is not. /// Check that `line` is inside a text file and put it there if it is not.
/// Create file if it doesn't exist. /// Create file if it doesn't exist.

View File

@@ -1,54 +0,0 @@
use std::sync::Arc;
use std::thread;
use anyhow::Result;
use tracing::{error, info, instrument};
use compute_api::responses::ComputeStatus;
use crate::compute::ComputeNode;
#[instrument(skip(compute))]
fn configurator_main_loop(compute: &Arc<ComputeNode>) {
info!("waiting for reconfiguration requests");
loop {
let state = compute.state.lock().unwrap();
let mut state = compute.state_changed.wait(state).unwrap();
if state.status == ComputeStatus::ConfigurationPending {
info!("got configuration request");
state.status = ComputeStatus::Configuration;
compute.state_changed.notify_all();
drop(state);
let mut new_status = ComputeStatus::Failed;
if let Err(e) = compute.reconfigure() {
error!("could not configure compute node: {}", e);
} else {
new_status = ComputeStatus::Running;
info!("compute node configured");
}
// XXX: used to test that API is blocking
// std::thread::sleep(std::time::Duration::from_millis(10000));
compute.set_status(new_status);
} else if state.status == ComputeStatus::Failed {
info!("compute node is now in Failed state, exiting");
break;
} else {
info!("woken up for compute status: {:?}, sleeping", state.status);
}
}
}
pub fn launch_configurator(compute: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
let compute = Arc::clone(compute);
Ok(thread::Builder::new()
.name("compute-configurator".into())
.spawn(move || {
configurator_main_loop(&compute);
info!("configurator thread is exited");
})?)
}

View File

@@ -3,120 +3,65 @@ use std::net::SocketAddr;
use std::sync::Arc; use std::sync::Arc;
use std::thread; use std::thread;
use crate::compute::{ComputeNode, ComputeState, ParsedSpec};
use compute_api::requests::ConfigurationRequest;
use compute_api::responses::{ComputeStatus, ComputeStatusResponse, GenericAPIError};
use anyhow::Result; use anyhow::Result;
use hyper::service::{make_service_fn, service_fn}; use hyper::service::{make_service_fn, service_fn};
use hyper::{Body, Method, Request, Response, Server, StatusCode}; use hyper::{Body, Method, Request, Response, Server, StatusCode};
use num_cpus; use log::{error, info};
use serde_json; use serde_json;
use tokio::task;
use tracing::{error, info};
use tracing_utils::http::OtelName;
fn status_response_from_state(state: &ComputeState) -> ComputeStatusResponse { use crate::compute::{ComputeNode, ComputeStatus};
ComputeStatusResponse {
tenant: state
.pspec
.as_ref()
.map(|pspec| pspec.tenant_id.to_string()),
timeline: state
.pspec
.as_ref()
.map(|pspec| pspec.timeline_id.to_string()),
status: state.status,
last_active: state.last_active,
error: state.error.clone(),
}
}
// Service function to handle all available routes. // Service function to handle all available routes.
async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body> { async fn routes(req: Request<Body>, compute: Arc<ComputeNode>) -> Response<Body> {
//
// NOTE: The URI path is currently included in traces. That's OK because
// it doesn't contain any variable parts or sensitive information. But
// please keep that in mind if you change the routing here.
//
match (req.method(), req.uri().path()) { match (req.method(), req.uri().path()) {
// Timestamp of the last Postgres activity in the plain text.
// DEPRECATED in favour of /status
(&Method::GET, "/last_activity") => {
info!("serving /last_active GET request");
let state = compute.state.read().unwrap();
// Use RFC3339 format for consistency.
Response::new(Body::from(state.last_active.to_rfc3339()))
}
// Has compute setup process finished? -> true/false.
// DEPRECATED in favour of /status
(&Method::GET, "/ready") => {
info!("serving /ready GET request");
let status = compute.get_status();
Response::new(Body::from(format!("{}", status == ComputeStatus::Running)))
}
// Serialized compute state. // Serialized compute state.
(&Method::GET, "/status") => { (&Method::GET, "/status") => {
info!("serving /status GET request"); info!("serving /status GET request");
let state = compute.state.lock().unwrap(); let state = compute.state.read().unwrap();
let status_response = status_response_from_state(&state); Response::new(Body::from(serde_json::to_string(&*state).unwrap()))
Response::new(Body::from(serde_json::to_string(&status_response).unwrap()))
} }
// Startup metrics in JSON format. Keep /metrics reserved for a possible // Startup metrics in JSON format. Keep /metrics reserved for a possible
// future use for Prometheus metrics format. // future use for Prometheus metrics format.
(&Method::GET, "/metrics.json") => { (&Method::GET, "/metrics.json") => {
info!("serving /metrics.json GET request"); info!("serving /metrics.json GET request");
let metrics = compute.state.lock().unwrap().metrics.clone(); Response::new(Body::from(serde_json::to_string(&compute.metrics).unwrap()))
Response::new(Body::from(serde_json::to_string(&metrics).unwrap()))
} }
// Collect Postgres current usage insights // DEPRECATED, use POST instead
(&Method::GET, "/insights") => { (&Method::GET, "/check_writability") => {
info!("serving /insights GET request"); info!("serving /check_writability GET request");
let status = compute.get_status(); let res = crate::checker::check_writability(&compute).await;
if status != ComputeStatus::Running { match res {
let msg = format!("compute is not running, current status: {:?}", status); Ok(_) => Response::new(Body::from("true")),
error!(msg); Err(e) => Response::new(Body::from(e.to_string())),
return Response::new(Body::from(msg));
} }
let insights = compute.collect_insights().await;
Response::new(Body::from(insights))
} }
(&Method::POST, "/check_writability") => { (&Method::POST, "/check_writability") => {
info!("serving /check_writability POST request"); info!("serving /check_writability POST request");
let status = compute.get_status(); let res = crate::checker::check_writability(&compute).await;
if status != ComputeStatus::Running {
let msg = format!(
"invalid compute status for check_writability request: {:?}",
status
);
error!(msg);
return Response::new(Body::from(msg));
}
let res = crate::checker::check_writability(compute).await;
match res { match res {
Ok(_) => Response::new(Body::from("true")), Ok(_) => Response::new(Body::from("true")),
Err(e) => { Err(e) => Response::new(Body::from(e.to_string())),
error!("check_writability failed: {}", e);
Response::new(Body::from(e.to_string()))
}
}
}
(&Method::GET, "/info") => {
let num_cpus = num_cpus::get_physical();
info!("serving /info GET request. num_cpus: {}", num_cpus);
Response::new(Body::from(
serde_json::json!({
"num_cpus": num_cpus,
})
.to_string(),
))
}
// Accept spec in JSON format and request compute configuration. If
// anything goes wrong after we set the compute status to `ConfigurationPending`
// and update compute state with new spec, we basically leave compute
// in the potentially wrong state. That said, it's control-plane's
// responsibility to watch compute state after reconfiguration request
// and to clean restart in case of errors.
(&Method::POST, "/configure") => {
info!("serving /configure POST request");
match handle_configure_request(req, compute).await {
Ok(msg) => Response::new(Body::from(msg)),
Err((msg, code)) => {
error!("error handling /configure request: {msg}");
render_json_error(&msg, code)
}
} }
} }
@@ -129,94 +74,6 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
} }
} }
async fn handle_configure_request(
req: Request<Body>,
compute: &Arc<ComputeNode>,
) -> Result<String, (String, StatusCode)> {
if !compute.live_config_allowed {
return Err((
"live configuration is not allowed for this compute node".to_string(),
StatusCode::PRECONDITION_FAILED,
));
}
let body_bytes = hyper::body::to_bytes(req.into_body()).await.unwrap();
let spec_raw = String::from_utf8(body_bytes.to_vec()).unwrap();
if let Ok(request) = serde_json::from_str::<ConfigurationRequest>(&spec_raw) {
let spec = request.spec;
let parsed_spec = match ParsedSpec::try_from(spec) {
Ok(ps) => ps,
Err(msg) => return Err((msg, StatusCode::PRECONDITION_FAILED)),
};
// XXX: wrap state update under lock in code blocks. Otherwise,
// we will try to `Send` `mut state` into the spawned thread
// bellow, which will cause error:
// ```
// error: future cannot be sent between threads safely
// ```
{
let mut state = compute.state.lock().unwrap();
if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
let msg = format!(
"invalid compute status for configuration request: {:?}",
state.status.clone()
);
return Err((msg, StatusCode::PRECONDITION_FAILED));
}
state.pspec = Some(parsed_spec);
state.status = ComputeStatus::ConfigurationPending;
compute.state_changed.notify_all();
drop(state);
info!("set new spec and notified waiters");
}
// Spawn a blocking thread to wait for compute to become Running.
// This is needed to do not block the main pool of workers and
// be able to serve other requests while some particular request
// is waiting for compute to finish configuration.
let c = compute.clone();
task::spawn_blocking(move || {
let mut state = c.state.lock().unwrap();
while state.status != ComputeStatus::Running {
state = c.state_changed.wait(state).unwrap();
info!(
"waiting for compute to become Running, current status: {:?}",
state.status
);
if state.status == ComputeStatus::Failed {
let err = state.error.as_ref().map_or("unknown error", |x| x);
let msg = format!("compute configuration failed: {:?}", err);
return Err((msg, StatusCode::INTERNAL_SERVER_ERROR));
}
}
Ok(())
})
.await
.unwrap()?;
// Return current compute state if everything went well.
let state = compute.state.lock().unwrap().clone();
let status_response = status_response_from_state(&state);
Ok(serde_json::to_string(&status_response).unwrap())
} else {
Err(("invalid spec".to_string(), StatusCode::BAD_REQUEST))
}
}
fn render_json_error(e: &str, status: StatusCode) -> Response<Body> {
let error = GenericAPIError {
error: e.to_string(),
};
Response::builder()
.status(status)
.body(Body::from(serde_json::to_string(&error).unwrap()))
.unwrap()
}
// Main Hyper HTTP server function that runs it and blocks waiting on it forever. // Main Hyper HTTP server function that runs it and blocks waiting on it forever.
#[tokio::main] #[tokio::main]
async fn serve(state: Arc<ComputeNode>) { async fn serve(state: Arc<ComputeNode>) {
@@ -227,19 +84,7 @@ async fn serve(state: Arc<ComputeNode>) {
async move { async move {
Ok::<_, Infallible>(service_fn(move |req: Request<Body>| { Ok::<_, Infallible>(service_fn(move |req: Request<Body>| {
let state = state.clone(); let state = state.clone();
async move { async move { Ok::<_, Infallible>(routes(req, state).await) }
Ok::<_, Infallible>(
// NOTE: We include the URI path in the string. It
// doesn't contain any variable parts or sensitive
// information in this API.
tracing_utils::http::tracing_handler(
req,
|req| routes(req, &state),
OtelName::UriPath,
)
.await,
)
}
})) }))
} }
}); });

View File

@@ -10,12 +10,12 @@ paths:
/status: /status:
get: get:
tags: tags:
- Info - "info"
summary: Get compute node internal status. summary: Get compute node internal status
description: "" description: ""
operationId: getComputeStatus operationId: getComputeStatus
responses: responses:
200: "200":
description: ComputeState description: ComputeState
content: content:
application/json: application/json:
@@ -25,120 +25,85 @@ paths:
/metrics.json: /metrics.json:
get: get:
tags: tags:
- Info - "info"
summary: Get compute node startup metrics in JSON format. summary: Get compute node startup metrics in JSON format
description: "" description: ""
operationId: getComputeMetricsJSON operationId: getComputeMetricsJSON
responses: responses:
200: "200":
description: ComputeMetrics description: ComputeMetrics
content: content:
application/json: application/json:
schema: schema:
$ref: "#/components/schemas/ComputeMetrics" $ref: "#/components/schemas/ComputeMetrics"
/insights: /ready:
get: get:
deprecated: true
tags: tags:
- Info - "info"
summary: Get current compute insights in JSON format. summary: Check whether compute startup process finished successfully
description: |
Note, that this doesn't include any historical data.
operationId: getComputeInsights
responses:
200:
description: Compute insights
content:
application/json:
schema:
$ref: "#/components/schemas/ComputeInsights"
/info:
get:
tags:
- Info
summary: Get info about the compute pod / VM.
description: "" description: ""
operationId: getInfo operationId: computeIsReady
responses: responses:
200: "200":
description: Info description: Compute is ready ('true') or not ('false')
content: content:
application/json: text/plain:
schema: schema:
$ref: "#/components/schemas/Info" type: string
example: "true"
/last_activity:
get:
deprecated: true
tags:
- "info"
summary: Get timestamp of the last compute activity
description: ""
operationId: getLastComputeActivityTS
responses:
"200":
description: Timestamp of the last compute activity
content:
text/plain:
schema:
type: string
example: "2022-10-12T07:20:50.52Z"
/check_writability: /check_writability:
post: get:
deprecated: true
tags: tags:
- Check - "check"
summary: Check that we can write new data on this compute. summary: Check that we can write new data on this compute
description: "" description: ""
operationId: checkComputeWritability operationId: checkComputeWritabilityDeprecated
responses: responses:
200: "200":
description: Check result description: Check result
content: content:
text/plain: text/plain:
schema: schema:
type: string type: string
description: Error text or 'true' if check passed. description: Error text or 'true' if check passed
example: "true" example: "true"
/configure:
post: post:
tags: tags:
- Configure - "check"
summary: Perform compute node configuration. summary: Check that we can write new data on this compute
description: | description: ""
This is a blocking API endpoint, i.e. it blocks waiting until operationId: checkComputeWritability
compute is finished configuration and is in `Running` state.
Optional non-blocking mode could be added later.
operationId: configureCompute
requestBody:
description: Configuration request.
required: true
content:
application/json:
schema:
type: object
required:
- spec
properties:
spec:
# XXX: I don't want to explain current spec in the OpenAPI format,
# as it could be changed really soon. Consider doing it later.
type: object
responses: responses:
200: "200":
description: Compute configuration finished. description: Check result
content: content:
application/json: text/plain:
schema: schema:
$ref: "#/components/schemas/ComputeState" type: string
400: description: Error text or 'true' if check passed
description: Provided spec is invalid. example: "true"
content:
application/json:
schema:
$ref: "#/components/schemas/GenericError"
412:
description: |
It's not possible to do live-configuration of the compute.
It's either in the wrong state, or compute doesn't use pull
mode of configuration.
content:
application/json:
schema:
$ref: "#/components/schemas/GenericError"
500:
description: |
Compute configuration request was processed, but error
occurred. Compute will likely shutdown soon.
content:
application/json:
schema:
$ref: "#/components/schemas/GenericError"
components: components:
securitySchemes: securitySchemes:
@@ -150,7 +115,7 @@ components:
schemas: schemas:
ComputeMetrics: ComputeMetrics:
type: object type: object
description: Compute startup metrics. description: Compute startup metrics
required: required:
- sync_safekeepers_ms - sync_safekeepers_ms
- basebackup_ms - basebackup_ms
@@ -166,15 +131,6 @@ components:
total_startup_ms: total_startup_ms:
type: integer type: integer
Info:
type: object
description: Information about VM/Pod.
required:
- num_cpus
properties:
num_cpus:
type: integer
ComputeState: ComputeState:
type: object type: object
required: required:
@@ -185,29 +141,11 @@ components:
$ref: '#/components/schemas/ComputeStatus' $ref: '#/components/schemas/ComputeStatus'
last_active: last_active:
type: string type: string
description: The last detected compute activity timestamp in UTC and RFC3339 format. description: The last detected compute activity timestamp in UTC and RFC3339 format
example: "2022-10-12T07:20:50.52Z" example: "2022-10-12T07:20:50.52Z"
error: error:
type: string type: string
description: Text of the error during compute startup, if any. description: Text of the error during compute startup, if any
example: ""
tenant:
type: string
description: Identifier of the current tenant served by compute node, if any.
example: c9269c359e9a199fad1ea0981246a78f
timeline:
type: string
description: Identifier of the current timeline served by compute node, if any.
example: ece7de74d4b8cbe5433a68ce4d1b97b4
ComputeInsights:
type: object
properties:
pg_stat_statements:
description: Contains raw output from pg_stat_statements in JSON format.
type: array
items:
type: object
ComputeStatus: ComputeStatus:
type: string type: string
@@ -215,19 +153,6 @@ components:
- init - init
- failed - failed
- running - running
example: running
#
# Errors
#
GenericError:
type: object
required:
- error
properties:
error:
type: string
security: security:
- JWT: [] - JWT: []

View File

@@ -4,7 +4,6 @@
//! //!
pub mod checker; pub mod checker;
pub mod config; pub mod config;
pub mod configurator;
pub mod http; pub mod http;
#[macro_use] #[macro_use]
pub mod logger; pub mod logger;

View File

@@ -1,37 +1,43 @@
use tracing_opentelemetry::OpenTelemetryLayer; use std::io::Write;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::prelude::*;
/// Initialize logging to stderr, and OpenTelemetry tracing and exporter. use anyhow::Result;
/// use chrono::Utc;
/// Logging is configured using either `default_log_level` or use env_logger::{Builder, Env};
macro_rules! info_println {
($($tts:tt)*) => {
if log_enabled!(Level::Info) {
println!($($tts)*);
}
}
}
macro_rules! info_print {
($($tts:tt)*) => {
if log_enabled!(Level::Info) {
print!($($tts)*);
}
}
}
/// Initialize `env_logger` using either `default_level` or
/// `RUST_LOG` environment variable as default log level. /// `RUST_LOG` environment variable as default log level.
/// pub fn init_logger(default_level: &str) -> Result<()> {
/// OpenTelemetry is configured with OTLP/HTTP exporter. It picks up let env = Env::default().filter_or("RUST_LOG", default_level);
/// configuration from environment variables. For example, to change the destination,
/// set `OTEL_EXPORTER_OTLP_ENDPOINT=http://jaeger:4318`. See
/// `tracing-utils` package description.
///
pub fn init_tracing_and_logging(default_log_level: &str) -> anyhow::Result<()> {
// Initialize Logging
let env_filter = tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(default_log_level));
let fmt_layer = tracing_subscriber::fmt::layer() Builder::from_env(env)
.with_target(false) .format(|buf, record| {
.with_writer(std::io::stderr); let thread_handle = std::thread::current();
writeln!(
// Initialize OpenTelemetry buf,
let otlp_layer = "{} [{}] {}: {}",
tracing_utils::init_tracing_without_runtime("compute_ctl").map(OpenTelemetryLayer::new); Utc::now().format("%Y-%m-%d %H:%M:%S%.3f %Z"),
thread_handle.name().unwrap_or("main"),
// Put it all together record.level(),
tracing_subscriber::registry() record.args()
.with(env_filter) )
.with(otlp_layer) })
.with(fmt_layer)
.init(); .init();
tracing::info!("logging and tracing started");
Ok(()) Ok(())
} }

View File

@@ -3,8 +3,8 @@ use std::{thread, time};
use anyhow::Result; use anyhow::Result;
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use log::{debug, info};
use postgres::{Client, NoTls}; use postgres::{Client, NoTls};
use tracing::{debug, info};
use crate::compute::ComputeNode; use crate::compute::ComputeNode;
@@ -46,22 +46,16 @@ fn watch_compute_activity(compute: &ComputeNode) {
AND usename != 'cloud_admin';", // XXX: find a better way to filter other monitors? AND usename != 'cloud_admin';", // XXX: find a better way to filter other monitors?
&[], &[],
); );
let mut last_active = compute.state.lock().unwrap().last_active; let mut last_active = compute.state.read().unwrap().last_active;
if let Ok(backs) = backends { if let Ok(backs) = backends {
let mut idle_backs: Vec<DateTime<Utc>> = vec![]; let mut idle_backs: Vec<DateTime<Utc>> = vec![];
for b in backs.into_iter() { for b in backs.into_iter() {
let state: String = match b.try_get("state") { let state: String = b.get("state");
Ok(state) => state, let change: String = b.get("state_change");
Err(_) => continue,
};
if state == "idle" { if state == "idle" {
let change: String = match b.try_get("state_change") {
Ok(state_change) => state_change,
Err(_) => continue,
};
let change = DateTime::parse_from_rfc3339(&change); let change = DateTime::parse_from_rfc3339(&change);
match change { match change {
Ok(t) => idle_backs.push(t.with_timezone(&Utc)), Ok(t) => idle_backs.push(t.with_timezone(&Utc)),
@@ -80,14 +74,16 @@ fn watch_compute_activity(compute: &ComputeNode) {
} }
} }
// Get idle backend `state_change` with the max timestamp. // Sort idle backend `state_change` timestamps. The last one corresponds
if let Some(last) = idle_backs.iter().max() { // to the last activity.
idle_backs.sort();
if let Some(last) = idle_backs.last() {
last_active = *last; last_active = *last;
} }
} }
// Update the last activity in the shared state if we got a more recent one. // Update the last activity in the shared state if we got a more recent one.
let mut state = compute.state.lock().unwrap(); let mut state = compute.state.write().unwrap();
if last_active > state.last_active { if last_active > state.last_active {
state.last_active = last_active; state.last_active = last_active;
debug!("set the last compute activity time to: {}", last_active); debug!("set the last compute activity time to: {}", last_active);

View File

@@ -1,9 +1,3 @@
pub const DEFAULT_LOG_LEVEL: &str = "info"; pub const DEFAULT_LOG_LEVEL: &str = "info";
// From Postgres docs: pub const DEFAULT_CONNSTRING: &str = "host=localhost user=postgres";
// To ease transition from the md5 method to the newer SCRAM method, if md5 is specified
// as a method in pg_hba.conf but the user's password on the server is encrypted for SCRAM
// (see below), then SCRAM-based authentication will automatically be chosen instead.
// https://www.postgresql.org/docs/15/auth-password.html
//
// So it's safe to set md5 here, as `control-plane` anyway uses SCRAM for all roles.
pub const PG_HBA_ALL_MD5: &str = "host\tall\t\tall\t\t0.0.0.0/0\t\tmd5"; pub const PG_HBA_ALL_MD5: &str = "host\tall\t\tall\t\t0.0.0.0/0\t\tmd5";

View File

@@ -10,34 +10,48 @@ use std::time::{Duration, Instant};
use anyhow::{bail, Result}; use anyhow::{bail, Result};
use notify::{RecursiveMode, Watcher}; use notify::{RecursiveMode, Watcher};
use postgres::{Client, Transaction}; use postgres::{Client, Transaction};
use tracing::{debug, instrument}; use serde::Deserialize;
use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};
const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds
/// Escape a string for including it in a SQL literal /// Rust representation of Postgres role info with only those fields
fn escape_literal(s: &str) -> String { /// that matter for us.
s.replace('\'', "''").replace('\\', "\\\\") #[derive(Clone, Deserialize)]
pub struct Role {
pub name: PgIdent,
pub encrypted_password: Option<String>,
pub options: GenericOptions,
} }
/// Escape a string so that it can be used in postgresql.conf. /// Rust representation of Postgres database info with only those fields
/// Same as escape_literal, currently. /// that matter for us.
fn escape_conf_value(s: &str) -> String { #[derive(Clone, Deserialize)]
s.replace('\'', "''").replace('\\', "\\\\") pub struct Database {
pub name: PgIdent,
pub owner: PgIdent,
pub options: GenericOptions,
} }
trait GenericOptionExt { /// Common type representing both SQL statement params with or without value,
fn to_pg_option(&self) -> String; /// like `LOGIN` or `OWNER username` in the `CREATE/ALTER ROLE`, and config
fn to_pg_setting(&self) -> String; /// options like `wal_level = logical`.
#[derive(Clone, Deserialize)]
pub struct GenericOption {
pub name: String,
pub value: Option<String>,
pub vartype: String,
} }
impl GenericOptionExt for GenericOption { /// Optional collection of `GenericOption`'s. Type alias allows us to
/// declare a `trait` on it.
pub type GenericOptions = Option<Vec<GenericOption>>;
impl GenericOption {
/// Represent `GenericOption` as SQL statement parameter. /// Represent `GenericOption` as SQL statement parameter.
fn to_pg_option(&self) -> String { pub fn to_pg_option(&self) -> String {
if let Some(val) = &self.value { if let Some(val) = &self.value {
match self.vartype.as_ref() { match self.vartype.as_ref() {
"string" => format!("{} '{}'", self.name, escape_literal(val)), "string" => format!("{} '{}'", self.name, val),
_ => format!("{} {}", self.name, val), _ => format!("{} {}", self.name, val),
} }
} else { } else {
@@ -46,11 +60,18 @@ impl GenericOptionExt for GenericOption {
} }
/// Represent `GenericOption` as configuration option. /// Represent `GenericOption` as configuration option.
fn to_pg_setting(&self) -> String { pub fn to_pg_setting(&self) -> String {
if let Some(val) = &self.value { if let Some(val) = &self.value {
let name = match self.name.as_str() {
"safekeepers" => "neon.safekeepers",
"wal_acceptor_reconnect" => "neon.safekeeper_reconnect_timeout",
"wal_acceptor_connect_timeout" => "neon.safekeeper_connect_timeout",
it => it,
};
match self.vartype.as_ref() { match self.vartype.as_ref() {
"string" => format!("{} = '{}'", self.name, escape_conf_value(val)), "string" => format!("{} = '{}'", name, val),
_ => format!("{} = {}", self.name, val), _ => format!("{} = {}", name, val),
} }
} else { } else {
self.name.to_owned() self.name.to_owned()
@@ -85,7 +106,6 @@ impl PgOptionsSerialize for GenericOptions {
.map(|op| op.to_pg_setting()) .map(|op| op.to_pg_setting())
.collect::<Vec<String>>() .collect::<Vec<String>>()
.join("\n") .join("\n")
+ "\n" // newline after last setting
} else { } else {
"".to_string() "".to_string()
} }
@@ -99,22 +119,25 @@ pub trait GenericOptionsSearch {
impl GenericOptionsSearch for GenericOptions { impl GenericOptionsSearch for GenericOptions {
/// Lookup option by name /// Lookup option by name
fn find(&self, name: &str) -> Option<String> { fn find(&self, name: &str) -> Option<String> {
let ops = self.as_ref()?; match &self {
let op = ops.iter().find(|s| s.name == name)?; Some(ops) => {
op.value.clone() let op = ops.iter().find(|s| s.name == name);
match op {
Some(op) => op.value.clone(),
None => None,
}
}
None => None,
}
} }
} }
pub trait RoleExt { impl Role {
fn to_pg_options(&self) -> String;
}
impl RoleExt for Role {
/// Serialize a list of role parameters into a Postgres-acceptable /// Serialize a list of role parameters into a Postgres-acceptable
/// string of arguments. /// string of arguments.
fn to_pg_options(&self) -> String { pub fn to_pg_options(&self) -> String {
// XXX: consider putting LOGIN as a default option somewhere higher, e.g. in control-plane. // XXX: consider putting LOGIN as a default option somewhere higher, e.g. in Rails.
// For now, we do not use generic `options` for roles. Once used, add // For now we do not use generic `options` for roles. Once used, add
// `self.options.as_pg_options()` somewhere here. // `self.options.as_pg_options()` somewhere here.
let mut params: String = "LOGIN".to_string(); let mut params: String = "LOGIN".to_string();
@@ -137,17 +160,13 @@ impl RoleExt for Role {
} }
} }
pub trait DatabaseExt { impl Database {
fn to_pg_options(&self) -> String;
}
impl DatabaseExt for Database {
/// Serialize a list of database parameters into a Postgres-acceptable /// Serialize a list of database parameters into a Postgres-acceptable
/// string of arguments. /// string of arguments.
/// NB: `TEMPLATE` is actually also an identifier, but so far we only need /// NB: `TEMPLATE` is actually also an identifier, but so far we only need
/// to use `template0` and `template1`, so it is not a problem. Yet in the future /// to use `template0` and `template1`, so it is not a problem. Yet in the future
/// it may require a proper quoting too. /// it may require a proper quoting too.
fn to_pg_options(&self) -> String { pub fn to_pg_options(&self) -> String {
let mut params: String = self.options.as_pg_options(); let mut params: String = self.options.as_pg_options();
write!(params, " OWNER {}", &self.owner.pg_quote()) write!(params, " OWNER {}", &self.owner.pg_quote())
.expect("String is documented to not to error during write operations"); .expect("String is documented to not to error during write operations");
@@ -156,6 +175,10 @@ impl DatabaseExt for Database {
} }
} }
/// String type alias representing Postgres identifier and
/// intended to be used for DB / role names.
pub type PgIdent = String;
/// Generic trait used to provide quoting / encoding for strings used in the /// Generic trait used to provide quoting / encoding for strings used in the
/// Postgres SQL queries and DATABASE_URL. /// Postgres SQL queries and DATABASE_URL.
pub trait Escaping { pub trait Escaping {
@@ -209,7 +232,6 @@ pub fn get_existing_dbs(client: &mut Client) -> Result<Vec<Database>> {
/// Wait for Postgres to become ready to accept connections. It's ready to /// Wait for Postgres to become ready to accept connections. It's ready to
/// accept connections when the state-field in `pgdata/postmaster.pid` says /// accept connections when the state-field in `pgdata/postmaster.pid` says
/// 'ready'. /// 'ready'.
#[instrument(skip(pg))]
pub fn wait_for_postgres(pg: &mut Child, pgdata: &Path) -> Result<()> { pub fn wait_for_postgres(pg: &mut Child, pgdata: &Path) -> Result<()> {
let pid_path = pgdata.join("postmaster.pid"); let pid_path = pgdata.join("postmaster.pid");
@@ -268,18 +290,18 @@ pub fn wait_for_postgres(pg: &mut Child, pgdata: &Path) -> Result<()> {
} }
let res = rx.recv_timeout(Duration::from_millis(100)); let res = rx.recv_timeout(Duration::from_millis(100));
debug!("woken up by notify: {res:?}"); log::debug!("woken up by notify: {res:?}");
// If there are multiple events in the channel already, we only need to be // If there are multiple events in the channel already, we only need to be
// check once. Swallow the extra events before we go ahead to check the // check once. Swallow the extra events before we go ahead to check the
// pid file. // pid file.
while let Ok(res) = rx.try_recv() { while let Ok(res) = rx.try_recv() {
debug!("swallowing extra event: {res:?}"); log::debug!("swallowing extra event: {res:?}");
} }
// Check that we can open pid file first. // Check that we can open pid file first.
if let Ok(file) = File::open(&pid_path) { if let Ok(file) = File::open(&pid_path) {
if !postmaster_pid_seen { if !postmaster_pid_seen {
debug!("postmaster.pid appeared"); log::debug!("postmaster.pid appeared");
watcher watcher
.unwatch(pgdata) .unwatch(pgdata)
.expect("Failed to remove pgdata dir watch"); .expect("Failed to remove pgdata dir watch");
@@ -295,7 +317,7 @@ pub fn wait_for_postgres(pg: &mut Child, pgdata: &Path) -> Result<()> {
// Pid file could be there and we could read it, but it could be empty, for example. // Pid file could be there and we could read it, but it could be empty, for example.
if let Some(Ok(line)) = last_line { if let Some(Ok(line)) = last_line {
let status = line.trim(); let status = line.trim();
debug!("last line of postmaster.pid: {status:?}"); log::debug!("last line of postmaster.pid: {status:?}");
// Now Postgres is ready to accept connections // Now Postgres is ready to accept connections
if status == "ready" { if status == "ready" {
@@ -311,7 +333,7 @@ pub fn wait_for_postgres(pg: &mut Child, pgdata: &Path) -> Result<()> {
} }
} }
tracing::info!("PostgreSQL is now running, continuing to configure it"); log::info!("PostgreSQL is now running, continuing to configure it");
Ok(()) Ok(())
} }

View File

@@ -1,45 +1,52 @@
use std::path::Path; use std::path::Path;
use std::str::FromStr; use std::str::FromStr;
use anyhow::{anyhow, bail, Result}; use anyhow::Result;
use log::{info, log_enabled, warn, Level};
use postgres::config::Config; use postgres::config::Config;
use postgres::{Client, NoTls}; use postgres::{Client, NoTls};
use tracing::{info, info_span, instrument, span_enabled, warn, Level}; use serde::Deserialize;
use crate::compute::ComputeNode;
use crate::config; use crate::config;
use crate::params::PG_HBA_ALL_MD5; use crate::params::PG_HBA_ALL_MD5;
use crate::pg_helpers::*; use crate::pg_helpers::*;
use compute_api::responses::ControlPlaneSpecResponse; /// Cluster spec or configuration represented as an optional number of
use compute_api::spec::{ComputeSpec, Database, PgIdent, Role}; /// delta operations + final cluster state description.
#[derive(Clone, Deserialize)]
pub struct ComputeSpec {
pub format_version: f32,
pub timestamp: String,
pub operation_uuid: Option<String>,
/// Expected cluster state at the end of transition process.
pub cluster: Cluster,
pub delta_operations: Option<Vec<DeltaOp>>,
}
/// Request spec from the control-plane by compute_id. If `NEON_CONSOLE_JWT` /// Cluster state seen from the perspective of the external tools
/// env variable is set, it will be used for authorization. /// like Rails web console.
pub fn get_spec_from_control_plane(base_uri: &str, compute_id: &str) -> Result<ComputeSpec> { #[derive(Clone, Deserialize)]
let cp_uri = format!("{base_uri}/management/api/v2/computes/{compute_id}/spec"); pub struct Cluster {
let jwt: String = match std::env::var("NEON_CONSOLE_JWT") { pub cluster_id: String,
Ok(v) => v, pub name: String,
Err(_) => "".to_string(), pub state: Option<String>,
}; pub roles: Vec<Role>,
info!("getting spec from control plane: {}", cp_uri); pub databases: Vec<Database>,
pub settings: GenericOptions,
}
// TODO: check the response. We should distinguish cases when it's /// Single cluster state changing operation that could not be represented as
// - network error, then retry /// a static `Cluster` structure. For example:
// - no spec for compute yet, then wait /// - DROP DATABASE
// - compute id is unknown or any other error, then bail out /// - DROP ROLE
let resp: ControlPlaneSpecResponse = reqwest::blocking::Client::new() /// - ALTER ROLE name RENAME TO new_name
.get(cp_uri) /// - ALTER DATABASE name RENAME TO new_name
.header("Authorization", jwt) #[derive(Clone, Deserialize)]
.send() pub struct DeltaOp {
.map_err(|e| anyhow!("could not send spec request to control plane: {}", e))? pub action: String,
.json() pub name: PgIdent,
.map_err(|e| anyhow!("could not get compute spec from control plane: {}", e))?; pub new_name: Option<PgIdent>,
if let Some(spec) = resp.spec {
Ok(spec)
} else {
bail!("could not get compute spec from control plane")
}
} }
/// It takes cluster specification and does the following: /// It takes cluster specification and does the following:
@@ -72,25 +79,23 @@ pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> {
/// Given a cluster spec json and open transaction it handles roles creation, /// Given a cluster spec json and open transaction it handles roles creation,
/// deletion and update. /// deletion and update.
#[instrument(skip_all)]
pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> { pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
let mut xact = client.transaction()?; let mut xact = client.transaction()?;
let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?; let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;
// Print a list of existing Postgres roles (only in debug mode) // Print a list of existing Postgres roles (only in debug mode)
if span_enabled!(Level::INFO) { info!("postgres roles:");
info!("postgres roles:"); for r in &existing_roles {
for r in &existing_roles { info_println!(
info!( "{} - {}:{}",
" - {}:{}", " ".repeat(27 + 5),
r.name, r.name,
if r.encrypted_password.is_some() { if r.encrypted_password.is_some() {
"[FILTERED]" "[FILTERED]"
} else { } else {
"(null)" "(null)"
} }
); );
}
} }
// Process delta operations first // Process delta operations first
@@ -131,80 +136,58 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
info!("cluster spec roles:"); info!("cluster spec roles:");
for role in &spec.cluster.roles { for role in &spec.cluster.roles {
let name = &role.name; let name = &role.name;
// XXX: with a limited number of roles it is fine, but consider making it a HashMap
let pg_role = existing_roles.iter().find(|r| r.name == *name);
enum RoleAction { info_print!(
None, "{} - {}:{}",
Update, " ".repeat(27 + 5),
Create, name,
} if role.encrypted_password.is_some() {
let action = if let Some(r) = pg_role {
if (r.encrypted_password.is_none() && role.encrypted_password.is_some())
|| (r.encrypted_password.is_some() && role.encrypted_password.is_none())
{
RoleAction::Update
} else if let Some(pg_pwd) = &r.encrypted_password {
// Check whether password changed or not (trim 'md5' prefix first if any)
//
// This is a backward compatibility hack, which comes from the times when we were using
// md5 for everyone and hashes were stored in the console db without md5 prefix. So when
// role comes from the control-plane (json spec) `Role.encrypted_password` doesn't have md5 prefix,
// but when role comes from Postgres (`get_existing_roles` / `existing_roles`) it has this prefix.
// Here is the only place so far where we compare hashes, so it seems to be the best candidate
// to place this compatibility layer.
let pg_pwd = if let Some(stripped) = pg_pwd.strip_prefix("md5") {
stripped
} else {
pg_pwd
};
if pg_pwd != *role.encrypted_password.as_ref().unwrap() {
RoleAction::Update
} else {
RoleAction::None
}
} else {
RoleAction::None
}
} else {
RoleAction::Create
};
match action {
RoleAction::None => {}
RoleAction::Update => {
let mut query: String = format!("ALTER ROLE {} ", name.pg_quote());
query.push_str(&role.to_pg_options());
xact.execute(query.as_str(), &[])?;
}
RoleAction::Create => {
let mut query: String = format!("CREATE ROLE {} ", name.pg_quote());
info!("role create query: '{}'", &query);
query.push_str(&role.to_pg_options());
xact.execute(query.as_str(), &[])?;
let grant_query = format!(
"GRANT pg_read_all_data, pg_write_all_data TO {}",
name.pg_quote()
);
xact.execute(grant_query.as_str(), &[])?;
info!("role grant query: '{}'", &grant_query);
}
}
if span_enabled!(Level::INFO) {
let pwd = if role.encrypted_password.is_some() {
"[FILTERED]" "[FILTERED]"
} else { } else {
"(null)" "(null)"
}; }
let action_str = match action { );
RoleAction::None => "",
RoleAction::Create => " -> create", // XXX: with a limited number of roles it is fine, but consider making it a HashMap
RoleAction::Update => " -> update", let pg_role = existing_roles.iter().find(|r| r.name == *name);
};
info!(" - {}:{}{}", name, pwd, action_str); if let Some(r) = pg_role {
let mut update_role = false;
if (r.encrypted_password.is_none() && role.encrypted_password.is_some())
|| (r.encrypted_password.is_some() && role.encrypted_password.is_none())
{
update_role = true;
} else if let Some(pg_pwd) = &r.encrypted_password {
// Check whether password changed or not (trim 'md5:' prefix first)
update_role = pg_pwd[3..] != *role.encrypted_password.as_ref().unwrap();
}
if update_role {
let mut query: String = format!("ALTER ROLE {} ", name.pg_quote());
info_print!(" -> update");
query.push_str(&role.to_pg_options());
xact.execute(query.as_str(), &[])?;
}
} else {
info!("role name: '{}'", &name);
let mut query: String = format!("CREATE ROLE {} ", name.pg_quote());
info!("role create query: '{}'", &query);
info_print!(" -> create");
query.push_str(&role.to_pg_options());
xact.execute(query.as_str(), &[])?;
let grant_query = format!(
"GRANT pg_read_all_data, pg_write_all_data TO {}",
name.pg_quote()
);
xact.execute(grant_query.as_str(), &[])?;
info!("role grant query: '{}'", &grant_query);
} }
info_print!("\n");
} }
xact.commit()?; xact.commit()?;
@@ -213,32 +196,23 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
} }
/// Reassign all dependent objects and delete requested roles. /// Reassign all dependent objects and delete requested roles.
#[instrument(skip_all)] pub fn handle_role_deletions(node: &ComputeNode, client: &mut Client) -> Result<()> {
pub fn handle_role_deletions(spec: &ComputeSpec, connstr: &str, client: &mut Client) -> Result<()> { let spec = &node.spec;
// First, reassign all dependent objects to db owners.
if let Some(ops) = &spec.delta_operations { if let Some(ops) = &spec.delta_operations {
// First, reassign all dependent objects to db owners.
info!("reassigning dependent objects of to-be-deleted roles"); info!("reassigning dependent objects of to-be-deleted roles");
// Fetch existing roles. We could've exported and used `existing_roles` from
// `handle_roles()`, but we only make this list there before creating new roles.
// Which is probably fine as we never create to-be-deleted roles, but that'd
// just look a bit untidy. Anyway, the entire `pg_roles` should be in shared
// buffers already, so this shouldn't be a big deal.
let mut xact = client.transaction()?;
let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;
xact.commit()?;
for op in ops { for op in ops {
// Check that role is still present in Postgres, as this could be a if op.action == "delete_role" {
// restart with the same spec after role deletion. reassign_owned_objects(node, &op.name)?;
if op.action == "delete_role" && existing_roles.iter().any(|r| r.name == op.name) {
reassign_owned_objects(spec, connstr, &op.name)?;
} }
} }
}
// Second, proceed with role deletions. // Second, proceed with role deletions.
let mut xact = client.transaction()?;
if let Some(ops) = &spec.delta_operations {
info!("processing role deletions"); info!("processing role deletions");
let mut xact = client.transaction()?;
for op in ops { for op in ops {
// We do not check either role exists or not, // We do not check either role exists or not,
// Postgres will take care of it for us // Postgres will take care of it for us
@@ -249,17 +223,16 @@ pub fn handle_role_deletions(spec: &ComputeSpec, connstr: &str, client: &mut Cli
xact.execute(query.as_str(), &[])?; xact.execute(query.as_str(), &[])?;
} }
} }
xact.commit()?;
} }
Ok(()) Ok(())
} }
// Reassign all owned objects in all databases to the owner of the database. // Reassign all owned objects in all databases to the owner of the database.
fn reassign_owned_objects(spec: &ComputeSpec, connstr: &str, role_name: &PgIdent) -> Result<()> { fn reassign_owned_objects(node: &ComputeNode, role_name: &PgIdent) -> Result<()> {
for db in &spec.cluster.databases { for db in &node.spec.cluster.databases {
if db.owner != *role_name { if db.owner != *role_name {
let mut conf = Config::from_str(connstr)?; let mut conf = Config::from_str(node.connstr.as_str())?;
conf.dbname(&db.name); conf.dbname(&db.name);
let mut client = conf.connect(NoTls)?; let mut client = conf.connect(NoTls)?;
@@ -290,16 +263,13 @@ fn reassign_owned_objects(spec: &ComputeSpec, connstr: &str, role_name: &PgIdent
/// like `CREATE DATABASE` and `DROP DATABASE` do not support it. Statement-level /// like `CREATE DATABASE` and `DROP DATABASE` do not support it. Statement-level
/// atomicity should be enough here due to the order of operations and various checks, /// atomicity should be enough here due to the order of operations and various checks,
/// which together provide us idempotency. /// which together provide us idempotency.
#[instrument(skip_all)]
pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> { pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
let existing_dbs: Vec<Database> = get_existing_dbs(client)?; let existing_dbs: Vec<Database> = get_existing_dbs(client)?;
// Print a list of existing Postgres databases (only in debug mode) // Print a list of existing Postgres databases (only in debug mode)
if span_enabled!(Level::INFO) { info!("postgres databases:");
info!("postgres databases:"); for r in &existing_dbs {
for r in &existing_dbs { info_println!("{} - {}:{}", " ".repeat(27 + 5), r.name, r.owner);
info!(" {}:{}", r.name, r.owner);
}
} }
// Process delta operations first // Process delta operations first
@@ -342,15 +312,12 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
for db in &spec.cluster.databases { for db in &spec.cluster.databases {
let name = &db.name; let name = &db.name;
info_print!("{} - {}:{}", " ".repeat(27 + 5), db.name, db.owner);
// XXX: with a limited number of databases it is fine, but consider making it a HashMap // XXX: with a limited number of databases it is fine, but consider making it a HashMap
let pg_db = existing_dbs.iter().find(|r| r.name == *name); let pg_db = existing_dbs.iter().find(|r| r.name == *name);
enum DatabaseAction { if let Some(r) = pg_db {
None,
Update,
Create,
}
let action = if let Some(r) = pg_db {
// XXX: db owner name is returned as quoted string from Postgres, // XXX: db owner name is returned as quoted string from Postgres,
// when quoting is needed. // when quoting is needed.
let new_owner = if r.owner.starts_with('"') { let new_owner = if r.owner.starts_with('"') {
@@ -360,42 +327,24 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
}; };
if new_owner != r.owner { if new_owner != r.owner {
// Update the owner
DatabaseAction::Update
} else {
DatabaseAction::None
}
} else {
DatabaseAction::Create
};
match action {
DatabaseAction::None => {}
DatabaseAction::Update => {
let query: String = format!( let query: String = format!(
"ALTER DATABASE {} OWNER TO {}", "ALTER DATABASE {} OWNER TO {}",
name.pg_quote(), name.pg_quote(),
db.owner.pg_quote() db.owner.pg_quote()
); );
let _guard = info_span!("executing", query).entered(); info_print!(" -> update");
client.execute(query.as_str(), &[])?;
}
DatabaseAction::Create => {
let mut query: String = format!("CREATE DATABASE {} ", name.pg_quote());
query.push_str(&db.to_pg_options());
let _guard = info_span!("executing", query).entered();
client.execute(query.as_str(), &[])?;
}
};
if span_enabled!(Level::INFO) { client.execute(query.as_str(), &[])?;
let action_str = match action { }
DatabaseAction::None => "", } else {
DatabaseAction::Create => " -> create", let mut query: String = format!("CREATE DATABASE {} ", name.pg_quote());
DatabaseAction::Update => " -> update", info_print!(" -> create");
};
info!(" - {}:{}{}", db.name, db.owner, action_str); query.push_str(&db.to_pg_options());
client.execute(query.as_str(), &[])?;
} }
info_print!("\n");
} }
Ok(()) Ok(())
@@ -403,8 +352,9 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
/// Grant CREATE ON DATABASE to the database owner and do some other alters and grants /// Grant CREATE ON DATABASE to the database owner and do some other alters and grants
/// to allow users creating trusted extensions and re-creating `public` schema, for example. /// to allow users creating trusted extensions and re-creating `public` schema, for example.
#[instrument(skip_all)] pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
pub fn handle_grants(spec: &ComputeSpec, connstr: &str, client: &mut Client) -> Result<()> { let spec = &node.spec;
info!("cluster spec grants:"); info!("cluster spec grants:");
// We now have a separate `web_access` role to connect to the database // We now have a separate `web_access` role to connect to the database
@@ -436,8 +386,8 @@ pub fn handle_grants(spec: &ComputeSpec, connstr: &str, client: &mut Client) ->
// Do some per-database access adjustments. We'd better do this at db creation time, // Do some per-database access adjustments. We'd better do this at db creation time,
// but CREATE DATABASE isn't transactional. So we cannot create db + do some grants // but CREATE DATABASE isn't transactional. So we cannot create db + do some grants
// atomically. // atomically.
for db in &spec.cluster.databases { for db in &node.spec.cluster.databases {
let mut conf = Config::from_str(connstr)?; let mut conf = Config::from_str(node.connstr.as_str())?;
conf.dbname(&db.name); conf.dbname(&db.name);
let mut db_client = conf.connect(NoTls)?; let mut db_client = conf.connect(NoTls)?;
@@ -503,18 +453,3 @@ pub fn handle_grants(spec: &ComputeSpec, connstr: &str, client: &mut Client) ->
Ok(()) Ok(())
} }
/// Create required system extensions
#[instrument(skip_all)]
pub fn handle_extensions(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") {
if libs.contains("pg_stat_statements") {
// Create extension only if this compute really needs it
let query = "CREATE EXTENSION IF NOT EXISTS pg_stat_statements";
info!("creating system extensions with query: {}", query);
client.simple_query(query)?;
}
}
Ok(())
}

View File

@@ -178,11 +178,6 @@
"name": "neon.pageserver_connstring", "name": "neon.pageserver_connstring",
"value": "host=127.0.0.1 port=6400", "value": "host=127.0.0.1 port=6400",
"vartype": "string" "vartype": "string"
},
{
"name": "test.escaping",
"value": "here's a backslash \\ and a quote ' and a double-quote \" hooray",
"vartype": "string"
} }
] ]
}, },

View File

@@ -1,13 +1,14 @@
#[cfg(test)] #[cfg(test)]
mod pg_helpers_tests { mod pg_helpers_tests {
use std::fs::File; use std::fs::File;
use compute_api::spec::{ComputeSpec, GenericOption, GenericOptions, PgIdent};
use compute_tools::pg_helpers::*; use compute_tools::pg_helpers::*;
use compute_tools::spec::ComputeSpec;
#[test] #[test]
fn params_serialize() { fn params_serialize() {
let file = File::open("../libs/compute_api/tests/cluster_spec.json").unwrap(); let file = File::open("tests/cluster_spec.json").unwrap();
let spec: ComputeSpec = serde_json::from_reader(file).unwrap(); let spec: ComputeSpec = serde_json::from_reader(file).unwrap();
assert_eq!( assert_eq!(
@@ -22,35 +23,12 @@ mod pg_helpers_tests {
#[test] #[test]
fn settings_serialize() { fn settings_serialize() {
let file = File::open("../libs/compute_api/tests/cluster_spec.json").unwrap(); let file = File::open("tests/cluster_spec.json").unwrap();
let spec: ComputeSpec = serde_json::from_reader(file).unwrap(); let spec: ComputeSpec = serde_json::from_reader(file).unwrap();
assert_eq!( assert_eq!(
spec.cluster.settings.as_pg_settings(), spec.cluster.settings.as_pg_settings(),
r#"fsync = off "fsync = off\nwal_level = replica\nhot_standby = on\nneon.safekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'\nwal_log_hints = on\nlog_connections = on\nshared_buffers = 32768\nport = 55432\nmax_connections = 100\nmax_wal_senders = 10\nlisten_addresses = '0.0.0.0'\nwal_sender_timeout = 0\npassword_encryption = md5\nmaintenance_work_mem = 65536\nmax_parallel_workers = 8\nmax_worker_processes = 8\nneon.tenant_id = 'b0554b632bd4d547a63b86c3630317e8'\nmax_replication_slots = 10\nneon.timeline_id = '2414a61ffc94e428f14b5758fe308e13'\nshared_preload_libraries = 'neon'\nsynchronous_standby_names = 'walproposer'\nneon.pageserver_connstring = 'host=127.0.0.1 port=6400'"
wal_level = replica
hot_standby = on
neon.safekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'
wal_log_hints = on
log_connections = on
shared_buffers = 32768
port = 55432
max_connections = 100
max_wal_senders = 10
listen_addresses = '0.0.0.0'
wal_sender_timeout = 0
password_encryption = md5
maintenance_work_mem = 65536
max_parallel_workers = 8
max_worker_processes = 8
neon.tenant_id = 'b0554b632bd4d547a63b86c3630317e8'
max_replication_slots = 10
neon.timeline_id = '2414a61ffc94e428f14b5758fe308e13'
shared_preload_libraries = 'neon'
synchronous_standby_names = 'walproposer'
neon.pageserver_connstring = 'host=127.0.0.1 port=6400'
test.escaping = 'here''s a backslash \\ and a quote '' and a double-quote " hooray'
"#
); );
} }
@@ -60,33 +38,4 @@ test.escaping = 'here''s a backslash \\ and a quote '' and a double-quote " hoor
assert_eq!(ident.pg_quote(), "\"\"\"name\"\";\\n select 1;\""); assert_eq!(ident.pg_quote(), "\"\"\"name\"\";\\n select 1;\"");
} }
#[test]
fn generic_options_search() {
let generic_options: GenericOptions = Some(vec![
GenericOption {
name: "present_value".into(),
value: Some("value".into()),
vartype: "string".into(),
},
GenericOption {
name: "missed_value".into(),
value: None,
vartype: "int".into(),
},
]);
assert_eq!(generic_options.find("present_value"), Some("value".into()));
assert_eq!(generic_options.find("missed_value"), None);
assert_eq!(generic_options.find("invalid_value"), None);
let empty_generic_options: GenericOptions = Some(vec![]);
assert_eq!(empty_generic_options.find("present_value"), None);
assert_eq!(empty_generic_options.find("missed_value"), None);
assert_eq!(empty_generic_options.find("invalid_value"), None);
let none_generic_options: GenericOptions = None;
assert_eq!(none_generic_options.find("present_value"), None);
assert_eq!(none_generic_options.find("missed_value"), None);
assert_eq!(none_generic_options.find("invalid_value"), None);
}
} }

View File

@@ -1,33 +1,27 @@
[package] [package]
name = "control_plane" name = "control_plane"
version = "0.1.0" version = "0.1.0"
edition.workspace = true edition = "2021"
license.workspace = true
[dependencies] [dependencies]
anyhow.workspace = true clap = "4.0"
clap.workspace = true comfy-table = "6.1"
comfy-table.workspace = true git-version = "0.3.5"
git-version.workspace = true tar = "0.4.38"
nix.workspace = true postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
once_cell.workspace = true serde = { version = "1.0", features = ["derive"] }
postgres.workspace = true serde_with = "2.0"
regex.workspace = true toml = "0.5"
reqwest = { workspace = true, features = ["blocking", "json"] } once_cell = "1.13.0"
serde.workspace = true regex = "1"
serde_json.workspace = true anyhow = "1.0"
serde_with.workspace = true thiserror = "1"
tar.workspace = true nix = "0.25"
thiserror.workspace = true reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }
toml.workspace = true
url.workspace = true
# Note: Do not directly depend on pageserver or safekeeper; use pageserver_api or safekeeper_api # Note: Do not directly depend on pageserver or safekeeper; use pageserver_api or safekeeper_api
# instead, so that recompile times are better. # instead, so that recompile times are better.
pageserver_api.workspace = true pageserver_api = { path = "../libs/pageserver_api" }
postgres_backend.workspace = true safekeeper_api = { path = "../libs/safekeeper_api" }
safekeeper_api.workspace = true utils = { path = "../libs/utils" }
postgres_connection.workspace = true workspace_hack = { version = "0.1", path = "../workspace_hack" }
storage_broker.workspace = true
utils.workspace = true
workspace_hack.workspace = true

View File

@@ -2,8 +2,7 @@
[pageserver] [pageserver]
listen_pg_addr = '127.0.0.1:64000' listen_pg_addr = '127.0.0.1:64000'
listen_http_addr = '127.0.0.1:9898' listen_http_addr = '127.0.0.1:9898'
pg_auth_type = 'Trust' auth_type = 'Trust'
http_auth_type = 'Trust'
[[safekeepers]] [[safekeepers]]
id = 1 id = 1

View File

@@ -3,13 +3,12 @@
[pageserver] [pageserver]
listen_pg_addr = '127.0.0.1:64000' listen_pg_addr = '127.0.0.1:64000'
listen_http_addr = '127.0.0.1:9898' listen_http_addr = '127.0.0.1:9898'
pg_auth_type = 'Trust' auth_type = 'Trust'
http_auth_type = 'Trust'
[[safekeepers]] [[safekeepers]]
id = 1 id = 1
pg_port = 5454 pg_port = 5454
http_port = 7676 http_port = 7676
[broker] [etcd_broker]
listen_addr = '127.0.0.1:50051' broker_endpoints = ['http://127.0.0.1:2379']

View File

@@ -1,337 +0,0 @@
//! Spawns and kills background processes that are needed by Neon CLI.
//! Applies common set-up such as log and pid files (if needed) to every process.
//!
//! Neon CLI does not run in background, so it needs to store the information about
//! spawned processes, which it does in this module.
//! We do that by storing the pid of the process in the "${process_name}.pid" file.
//! The pid file can be created by the process itself
//! (Neon storage binaries do that and also ensure that a lock is taken onto that file)
//! or we create such file after starting the process
//! (non-Neon binaries don't necessarily follow our pidfile conventions).
//! The pid stored in the file is later used to stop the service.
//!
//! See [`lock_file`] module for more info.
use std::ffi::OsStr;
use std::io::Write;
use std::os::unix::prelude::AsRawFd;
use std::os::unix::process::CommandExt;
use std::path::{Path, PathBuf};
use std::process::{Child, Command};
use std::time::Duration;
use std::{fs, io, thread};
use anyhow::Context;
use nix::errno::Errno;
use nix::fcntl::{FcntlArg, FdFlag};
use nix::sys::signal::{kill, Signal};
use nix::unistd::Pid;
use utils::pid_file::{self, PidFileRead};
// These constants control the loop used to poll for process start / stop.
//
// The loop waits for at most 10 seconds, polling every 100 ms.
// Once a second, it prints a dot ("."), to give the user an indication that
// it's waiting. If the process hasn't started/stopped after 5 seconds,
// it prints a notice that it's taking long, but keeps waiting.
//
const RETRY_UNTIL_SECS: u64 = 10;
const RETRIES: u64 = (RETRY_UNTIL_SECS * 1000) / RETRY_INTERVAL_MILLIS;
const RETRY_INTERVAL_MILLIS: u64 = 100;
const DOT_EVERY_RETRIES: u64 = 10;
const NOTICE_AFTER_RETRIES: u64 = 50;
/// Argument to `start_process`, to indicate whether it should create pidfile or if the process creates
/// it itself.
pub enum InitialPidFile<'t> {
/// Create a pidfile, to allow future CLI invocations to manipulate the process.
Create(&'t Path),
/// The process will create the pidfile itself, need to wait for that event.
Expect(&'t Path),
}
/// Start a background child process using the parameters given.
pub fn start_process<F, AI, A, EI>(
process_name: &str,
datadir: &Path,
command: &Path,
args: AI,
envs: EI,
initial_pid_file: InitialPidFile,
process_status_check: F,
) -> anyhow::Result<Child>
where
F: Fn() -> anyhow::Result<bool>,
AI: IntoIterator<Item = A>,
A: AsRef<OsStr>,
// Not generic AsRef<OsStr>, otherwise empty `envs` prevents type inference
EI: IntoIterator<Item = (String, String)>,
{
let log_path = datadir.join(format!("{process_name}.log"));
let process_log_file = fs::OpenOptions::new()
.create(true)
.write(true)
.append(true)
.open(&log_path)
.with_context(|| {
format!("Could not open {process_name} log file {log_path:?} for writing")
})?;
let same_file_for_stderr = process_log_file.try_clone().with_context(|| {
format!("Could not reuse {process_name} log file {log_path:?} for writing stderr")
})?;
let mut command = Command::new(command);
let background_command = command
.stdout(process_log_file)
.stderr(same_file_for_stderr)
.args(args);
let filled_cmd = fill_aws_secrets_vars(fill_rust_env_vars(background_command));
filled_cmd.envs(envs);
let pid_file_to_check = match initial_pid_file {
InitialPidFile::Create(path) => {
pre_exec_create_pidfile(filled_cmd, path);
path
}
InitialPidFile::Expect(path) => path,
};
let mut spawned_process = filled_cmd.spawn().with_context(|| {
format!("Could not spawn {process_name}, see console output and log files for details.")
})?;
let pid = spawned_process.id();
let pid = Pid::from_raw(
i32::try_from(pid)
.with_context(|| format!("Subprocess {process_name} has invalid pid {pid}"))?,
);
for retries in 0..RETRIES {
match process_started(pid, Some(pid_file_to_check), &process_status_check) {
Ok(true) => {
println!("\n{process_name} started, pid: {pid}");
return Ok(spawned_process);
}
Ok(false) => {
if retries == NOTICE_AFTER_RETRIES {
// The process is taking a long time to start up. Keep waiting, but
// print a message
print!("\n{process_name} has not started yet, continuing to wait");
}
if retries % DOT_EVERY_RETRIES == 0 {
print!(".");
io::stdout().flush().unwrap();
}
thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS));
}
Err(e) => {
println!("{process_name} failed to start: {e:#}");
if let Err(e) = spawned_process.kill() {
println!("Could not stop {process_name} subprocess: {e:#}")
};
return Err(e);
}
}
}
println!();
anyhow::bail!("{process_name} did not start in {RETRY_UNTIL_SECS} seconds");
}
/// Stops the process, using the pid file given. Returns Ok also if the process is already not running.
pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> anyhow::Result<()> {
let pid = match pid_file::read(pid_file)
.with_context(|| format!("read pid_file {pid_file:?}"))?
{
PidFileRead::NotExist => {
println!("{process_name} is already stopped: no pid file present at {pid_file:?}");
return Ok(());
}
PidFileRead::NotHeldByAnyProcess(_) => {
// Don't try to kill according to file contents beacuse the pid might have been re-used by another process.
// Don't delete the file either, it can race with new pid file creation.
// Read `pid_file` module comment for details.
println!(
"No process is holding the pidfile. The process must have already exited. Leave in place to avoid race conditions: {pid_file:?}"
);
return Ok(());
}
PidFileRead::LockedByOtherProcess(pid) => pid,
};
// XXX the pid could become invalid (and recycled) at any time before the kill() below.
// send signal
let sig = if immediate {
print!("Stopping {process_name} with pid {pid} immediately..");
Signal::SIGQUIT
} else {
print!("Stopping {process_name} with pid {pid} gracefully..");
Signal::SIGTERM
};
io::stdout().flush().unwrap();
match kill(pid, sig) {
Ok(()) => (),
Err(Errno::ESRCH) => {
// Again, don't delete the pid file. The unlink can race with a new pid file being created.
println!(
"{process_name} with pid {pid} does not exist, but a pid file {pid_file:?} was found. Likely the pid got recycled. Lucky we didn't harm anyone."
);
return Ok(());
}
Err(e) => anyhow::bail!("Failed to send signal to {process_name} with pid {pid}: {e}"),
}
// Wait until process is gone
for retries in 0..RETRIES {
match process_has_stopped(pid) {
Ok(true) => {
println!("\n{process_name} stopped");
return Ok(());
}
Ok(false) => {
if retries == NOTICE_AFTER_RETRIES {
// The process is taking a long time to start up. Keep waiting, but
// print a message
print!("\n{process_name} has not stopped yet, continuing to wait");
}
if retries % DOT_EVERY_RETRIES == 0 {
print!(".");
io::stdout().flush().unwrap();
}
thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS));
}
Err(e) => {
println!("{process_name} with pid {pid} failed to stop: {e:#}");
return Err(e);
}
}
}
println!();
anyhow::bail!("{process_name} with pid {pid} did not stop in {RETRY_UNTIL_SECS} seconds");
}
fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
// If RUST_BACKTRACE is set, pass it through. But if it's not set, default
// to RUST_BACKTRACE=1.
let backtrace_setting = std::env::var_os("RUST_BACKTRACE");
let backtrace_setting = backtrace_setting
.as_deref()
.unwrap_or_else(|| OsStr::new("1"));
let mut filled_cmd = cmd.env_clear().env("RUST_BACKTRACE", backtrace_setting);
// Pass through these environment variables to the command
for var in ["LLVM_PROFILE_FILE", "FAILPOINTS", "RUST_LOG"] {
if let Some(val) = std::env::var_os(var) {
filled_cmd = filled_cmd.env(var, val);
}
}
filled_cmd
}
fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
for env_key in [
"AWS_ACCESS_KEY_ID",
"AWS_SECRET_ACCESS_KEY",
"AWS_SESSION_TOKEN",
] {
if let Ok(value) = std::env::var(env_key) {
cmd = cmd.env(env_key, value);
}
}
cmd
}
/// Add a `pre_exec` to the cmd that, inbetween fork() and exec(),
/// 1. Claims a pidfile with a fcntl lock on it and
/// 2. Sets up the pidfile's file descriptor so that it (and the lock)
/// will remain held until the cmd exits.
fn pre_exec_create_pidfile<P>(cmd: &mut Command, path: P) -> &mut Command
where
P: Into<PathBuf>,
{
let path: PathBuf = path.into();
// SAFETY
// pre_exec is marked unsafe because it runs between fork and exec.
// Why is that dangerous in various ways?
// Long answer: https://github.com/rust-lang/rust/issues/39575
// Short answer: in a multi-threaded program, other threads may have
// been inside of critical sections at the time of fork. In the
// original process, that was allright, assuming they protected
// the critical sections appropriately, e.g., through locks.
// Fork adds another process to the mix that
// 1. Has a single thread T
// 2. In an exact copy of the address space at the time of fork.
// A variety of problems scan occur now:
// 1. T tries to grab a lock that was locked at the time of fork.
// It will wait forever since in its address space, the lock
// is in state 'taken' but the thread that would unlock it is
// not there.
// 2. A rust object that represented some external resource in the
// parent now got implicitly copied by the the fork, even though
// the object's type is not `Copy`. The parent program may use
// non-copyability as way to enforce unique ownership of an
// external resource in the typesystem. The fork breaks that
// assumption, as now both parent and child process have an
// owned instance of the object that represents the same
// underlying resource.
// While these seem like niche problems, (1) in particular is
// highly relevant. For example, `malloc()` may grab a mutex internally,
// and so, if we forked while another thread was mallocing' and our
// pre_exec closure allocates as well, it will block on the malloc
// mutex forever
//
// The proper solution is to only use C library functions that are marked
// "async-signal-safe": https://man7.org/linux/man-pages/man7/signal-safety.7.html
//
// With this specific pre_exec() closure, the non-error path doesn't allocate.
// The error path uses `anyhow`, and hence does allocate.
// We take our chances there, hoping that any potential disaster is constrained
// to the child process (e.g., malloc has no state ourside of the child process).
// Last, `expect` prints to stderr, and stdio is not async-signal-safe.
// Again, we take our chances, making the same assumptions as for malloc.
unsafe {
cmd.pre_exec(move || {
let file = pid_file::claim_for_current_process(&path).expect("claim pid file");
// Remove the FD_CLOEXEC flag on the pidfile descriptor so that the pidfile
// remains locked after exec.
nix::fcntl::fcntl(file.as_raw_fd(), FcntlArg::F_SETFD(FdFlag::empty()))
.expect("remove FD_CLOEXEC");
// Don't run drop(file), it would close the file before we actually exec.
std::mem::forget(file);
Ok(())
});
}
cmd
}
fn process_started<F>(
pid: Pid,
pid_file_to_check: Option<&Path>,
status_check: &F,
) -> anyhow::Result<bool>
where
F: Fn() -> anyhow::Result<bool>,
{
match status_check() {
Ok(true) => match pid_file_to_check {
Some(pid_file_path) => match pid_file::read(pid_file_path)? {
PidFileRead::NotExist => Ok(false),
PidFileRead::LockedByOtherProcess(pid_in_file) => Ok(pid_in_file == pid),
PidFileRead::NotHeldByAnyProcess(_) => Ok(false),
},
None => Ok(true),
},
Ok(false) => Ok(false),
Err(e) => anyhow::bail!("process failed to start: {e}"),
}
}
fn process_has_stopped(pid: Pid) -> anyhow::Result<bool> {
match kill(pid, None) {
// Process exists, keep waiting
Ok(_) => Ok(false),
// Process not found, we're done
Err(Errno::ESRCH) => Ok(true),
Err(err) => anyhow::bail!("Failed to send signal to process with pid {pid}: {err}"),
}
}

View File

@@ -7,30 +7,29 @@
//! //!
use anyhow::{anyhow, bail, Context, Result}; use anyhow::{anyhow, bail, Context, Result};
use clap::{value_parser, Arg, ArgAction, ArgMatches, Command}; use clap::{value_parser, Arg, ArgAction, ArgMatches, Command};
use control_plane::endpoint::ComputeControlPlane; use control_plane::compute::ComputeControlPlane;
use control_plane::local_env::LocalEnv; use control_plane::local_env::{EtcdBroker, LocalEnv};
use control_plane::pageserver::PageServerNode;
use control_plane::safekeeper::SafekeeperNode; use control_plane::safekeeper::SafekeeperNode;
use control_plane::{broker, local_env}; use control_plane::storage::PageServerNode;
use control_plane::{etcd, local_env};
use pageserver_api::models::TimelineInfo; use pageserver_api::models::TimelineInfo;
use pageserver_api::{ use pageserver_api::{
DEFAULT_HTTP_LISTEN_ADDR as DEFAULT_PAGESERVER_HTTP_ADDR, DEFAULT_HTTP_LISTEN_ADDR as DEFAULT_PAGESERVER_HTTP_ADDR,
DEFAULT_PG_LISTEN_ADDR as DEFAULT_PAGESERVER_PG_ADDR, DEFAULT_PG_LISTEN_ADDR as DEFAULT_PAGESERVER_PG_ADDR,
}; };
use postgres_backend::AuthType;
use safekeeper_api::{ use safekeeper_api::{
DEFAULT_HTTP_LISTEN_PORT as DEFAULT_SAFEKEEPER_HTTP_PORT, DEFAULT_HTTP_LISTEN_PORT as DEFAULT_SAFEKEEPER_HTTP_PORT,
DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT, DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT,
}; };
use std::collections::{BTreeSet, HashMap}; use std::collections::{BTreeSet, HashMap};
use std::path::PathBuf; use std::path::{Path, PathBuf};
use std::process::exit; use std::process::exit;
use std::str::FromStr; use std::str::FromStr;
use storage_broker::DEFAULT_LISTEN_ADDR as DEFAULT_BROKER_ADDR;
use utils::{ use utils::{
auth::{Claims, Scope}, auth::{Claims, Scope},
id::{NodeId, TenantId, TenantTimelineId, TimelineId}, id::{NodeId, TenantId, TenantTimelineId, TimelineId},
lsn::Lsn, lsn::Lsn,
postgres_backend::AuthType,
project_git_version, project_git_version,
}; };
@@ -42,26 +41,27 @@ project_git_version!(GIT_VERSION);
const DEFAULT_PG_VERSION: &str = "14"; const DEFAULT_PG_VERSION: &str = "14";
fn default_conf() -> String { fn default_conf(etcd_binary_path: &Path) -> String {
format!( format!(
r#" r#"
# Default built-in configuration, defined in main.rs # Default built-in configuration, defined in main.rs
[broker] [etcd_broker]
listen_addr = '{DEFAULT_BROKER_ADDR}' broker_endpoints = ['http://localhost:2379']
etcd_binary_path = '{etcd_binary_path}'
[pageserver] [pageserver]
id = {DEFAULT_PAGESERVER_ID} id = {DEFAULT_PAGESERVER_ID}
listen_pg_addr = '{DEFAULT_PAGESERVER_PG_ADDR}' listen_pg_addr = '{DEFAULT_PAGESERVER_PG_ADDR}'
listen_http_addr = '{DEFAULT_PAGESERVER_HTTP_ADDR}' listen_http_addr = '{DEFAULT_PAGESERVER_HTTP_ADDR}'
pg_auth_type = '{trust_auth}' auth_type = '{pageserver_auth_type}'
http_auth_type = '{trust_auth}'
[[safekeepers]] [[safekeepers]]
id = {DEFAULT_SAFEKEEPER_ID} id = {DEFAULT_SAFEKEEPER_ID}
pg_port = {DEFAULT_SAFEKEEPER_PG_PORT} pg_port = {DEFAULT_SAFEKEEPER_PG_PORT}
http_port = {DEFAULT_SAFEKEEPER_HTTP_PORT} http_port = {DEFAULT_SAFEKEEPER_HTTP_PORT}
"#, "#,
trust_auth = AuthType::Trust, etcd_binary_path = etcd_binary_path.display(),
pageserver_auth_type = AuthType::Trust,
) )
} }
@@ -106,9 +106,8 @@ fn main() -> Result<()> {
"start" => handle_start_all(sub_args, &env), "start" => handle_start_all(sub_args, &env),
"stop" => handle_stop_all(sub_args, &env), "stop" => handle_stop_all(sub_args, &env),
"pageserver" => handle_pageserver(sub_args, &env), "pageserver" => handle_pageserver(sub_args, &env),
"pg" => handle_pg(sub_args, &env),
"safekeeper" => handle_safekeeper(sub_args, &env), "safekeeper" => handle_safekeeper(sub_args, &env),
"endpoint" => handle_endpoint(sub_args, &env),
"pg" => bail!("'pg' subcommand has been renamed to 'endpoint'"),
_ => bail!("unexpected subcommand {sub_name}"), _ => bail!("unexpected subcommand {sub_name}"),
}; };
@@ -265,7 +264,7 @@ fn get_tenant_id(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::R
} else if let Some(default_id) = env.default_tenant_id { } else if let Some(default_id) = env.default_tenant_id {
Ok(default_id) Ok(default_id)
} else { } else {
anyhow::bail!("No tenant id. Use --tenant-id, or set a default tenant"); bail!("No tenant id. Use --tenant-id, or set 'default_tenant_id' in the config file");
} }
} }
@@ -286,6 +285,8 @@ fn parse_timeline_id(sub_match: &ArgMatches) -> anyhow::Result<Option<TimelineId
} }
fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> { fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
let initial_timeline_id_arg = parse_timeline_id(init_match)?;
// Create config file // Create config file
let toml_file: String = if let Some(config_path) = init_match.get_one::<PathBuf>("config") { let toml_file: String = if let Some(config_path) = init_match.get_one::<PathBuf>("config") {
// load and parse the file // load and parse the file
@@ -297,7 +298,7 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
})? })?
} else { } else {
// Built-in default config // Built-in default config
default_conf() default_conf(&EtcdBroker::locate_etcd()?)
}; };
let pg_version = init_match let pg_version = init_match
@@ -309,16 +310,30 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
LocalEnv::parse_config(&toml_file).context("Failed to create neon configuration")?; LocalEnv::parse_config(&toml_file).context("Failed to create neon configuration")?;
env.init(pg_version) env.init(pg_version)
.context("Failed to initialize neon repository")?; .context("Failed to initialize neon repository")?;
let initial_tenant_id = env
.default_tenant_id
.expect("default_tenant_id should be generated by the `env.init()` call above");
// Initialize pageserver, create initial tenant and timeline. // Initialize pageserver, create initial tenant and timeline.
let pageserver = PageServerNode::from_env(&env); let pageserver = PageServerNode::from_env(&env);
pageserver let initial_timeline_id = pageserver
.initialize(&pageserver_config_overrides(init_match)) .initialize(
Some(initial_tenant_id),
initial_timeline_id_arg,
&pageserver_config_overrides(init_match),
pg_version,
)
.unwrap_or_else(|e| { .unwrap_or_else(|e| {
eprintln!("pageserver init failed: {e:?}"); eprintln!("pageserver init failed: {e}");
exit(1); exit(1);
}); });
env.register_branch_mapping(
DEFAULT_BRANCH_NAME.to_owned(),
initial_tenant_id,
initial_timeline_id,
)?;
Ok(env) Ok(env)
} }
@@ -327,7 +342,7 @@ fn pageserver_config_overrides(init_match: &ArgMatches) -> Vec<&str> {
.get_many::<String>("pageserver-config-override") .get_many::<String>("pageserver-config-override")
.into_iter() .into_iter()
.flatten() .flatten()
.map(String::as_str) .map(|s| s.as_str())
.collect() .collect()
} }
@@ -374,17 +389,6 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
println!( println!(
"Created an initial timeline '{new_timeline_id}' at Lsn {last_record_lsn} for tenant: {new_tenant_id}", "Created an initial timeline '{new_timeline_id}' at Lsn {last_record_lsn} for tenant: {new_tenant_id}",
); );
if create_match.get_flag("set-default") {
println!("Setting tenant {new_tenant_id} as a default one");
env.default_tenant_id = Some(new_tenant_id);
}
}
Some(("set-default", set_default_match)) => {
let tenant_id =
parse_tenant_id(set_default_match)?.context("No tenant id specified")?;
println!("Setting tenant {tenant_id} as a default one");
env.default_tenant_id = Some(tenant_id);
} }
Some(("config", create_match)) => { Some(("config", create_match)) => {
let tenant_id = get_tenant_id(create_match, env)?; let tenant_id = get_tenant_id(create_match, env)?;
@@ -471,10 +475,10 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
let mut cplane = ComputeControlPlane::load(env.clone())?; let mut cplane = ComputeControlPlane::load(env.clone())?;
println!("Importing timeline into pageserver ..."); println!("Importing timeline into pageserver ...");
pageserver.timeline_import(tenant_id, timeline_id, base, pg_wal, pg_version)?; pageserver.timeline_import(tenant_id, timeline_id, base, pg_wal, pg_version)?;
println!("Creating node for imported timeline ...");
env.register_branch_mapping(name.to_string(), tenant_id, timeline_id)?; env.register_branch_mapping(name.to_string(), tenant_id, timeline_id)?;
println!("Creating endpoint for imported timeline ..."); cplane.new_node(tenant_id, name, timeline_id, None, None, pg_version)?;
cplane.new_endpoint(tenant_id, name, timeline_id, None, None, pg_version)?;
println!("Done"); println!("Done");
} }
Some(("branch", branch_match)) => { Some(("branch", branch_match)) => {
@@ -522,10 +526,10 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
Ok(()) Ok(())
} }
fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
let (sub_name, sub_args) = match ep_match.subcommand() { let (sub_name, sub_args) = match pg_match.subcommand() {
Some(ep_subcommand_data) => ep_subcommand_data, Some(pg_subcommand_data) => pg_subcommand_data,
None => bail!("no endpoint subcommand provided"), None => bail!("no pg subcommand provided"),
}; };
let mut cplane = ComputeControlPlane::load(env.clone())?; let mut cplane = ComputeControlPlane::load(env.clone())?;
@@ -546,8 +550,8 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
table.load_preset(comfy_table::presets::NOTHING); table.load_preset(comfy_table::presets::NOTHING);
table.set_header([ table.set_header(&[
"ENDPOINT", "NODE",
"ADDRESS", "ADDRESS",
"TIMELINE", "TIMELINE",
"BRANCH NAME", "BRANCH NAME",
@@ -555,39 +559,39 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
"STATUS", "STATUS",
]); ]);
for (endpoint_id, endpoint) in cplane for ((_, node_name), node) in cplane
.endpoints .nodes
.iter() .iter()
.filter(|(_, endpoint)| endpoint.tenant_id == tenant_id) .filter(|((node_tenant_id, _), _)| node_tenant_id == &tenant_id)
{ {
let lsn_str = match endpoint.lsn { let lsn_str = match node.lsn {
None => { None => {
// -> primary endpoint // -> primary node
// Use the LSN at the end of the timeline. // Use the LSN at the end of the timeline.
timeline_infos timeline_infos
.get(&endpoint.timeline_id) .get(&node.timeline_id)
.map(|bi| bi.last_record_lsn.to_string()) .map(|bi| bi.last_record_lsn.to_string())
.unwrap_or_else(|| "?".to_string()) .unwrap_or_else(|| "?".to_string())
} }
Some(lsn) => { Some(lsn) => {
// -> read-only endpoint // -> read-only node
// Use the endpoint's LSN. // Use the node's LSN.
lsn.to_string() lsn.to_string()
} }
}; };
let branch_name = timeline_name_mappings let branch_name = timeline_name_mappings
.get(&TenantTimelineId::new(tenant_id, endpoint.timeline_id)) .get(&TenantTimelineId::new(tenant_id, node.timeline_id))
.map(|name| name.as_str()) .map(|name| name.as_str())
.unwrap_or("?"); .unwrap_or("?");
table.add_row([ table.add_row(&[
endpoint_id.as_str(), node_name.as_str(),
&endpoint.address.to_string(), &node.address.to_string(),
&endpoint.timeline_id.to_string(), &node.timeline_id.to_string(),
branch_name, branch_name,
lsn_str.as_str(), lsn_str.as_str(),
endpoint.status(), node.status(),
]); ]);
} }
@@ -598,10 +602,10 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
.get_one::<String>("branch-name") .get_one::<String>("branch-name")
.map(|s| s.as_str()) .map(|s| s.as_str())
.unwrap_or(DEFAULT_BRANCH_NAME); .unwrap_or(DEFAULT_BRANCH_NAME);
let endpoint_id = sub_args let node_name = sub_args
.get_one::<String>("endpoint_id") .get_one::<String>("node")
.map(String::to_string) .map(|node_name| node_name.to_string())
.unwrap_or_else(|| format!("ep-{branch_name}")); .unwrap_or_else(|| format!("{branch_name}_node"));
let lsn = sub_args let lsn = sub_args
.get_one::<String>("lsn") .get_one::<String>("lsn")
@@ -619,17 +623,17 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
.copied() .copied()
.context("Failed to parse postgres version from the argument string")?; .context("Failed to parse postgres version from the argument string")?;
cplane.new_endpoint(tenant_id, &endpoint_id, timeline_id, lsn, port, pg_version)?; cplane.new_node(tenant_id, &node_name, timeline_id, lsn, port, pg_version)?;
} }
"start" => { "start" => {
let port: Option<u16> = sub_args.get_one::<u16>("port").copied(); let port: Option<u16> = sub_args.get_one::<u16>("port").copied();
let endpoint_id = sub_args let node_name = sub_args
.get_one::<String>("endpoint_id") .get_one::<String>("node")
.ok_or_else(|| anyhow!("No endpoint ID was provided to start"))?; .ok_or_else(|| anyhow!("No node name was provided to start"))?;
let endpoint = cplane.endpoints.get(endpoint_id.as_str()); let node = cplane.nodes.get(&(tenant_id, node_name.to_string()));
let auth_token = if matches!(env.pageserver.pg_auth_type, AuthType::NeonJWT) { let auth_token = if matches!(env.pageserver.auth_type, AuthType::NeonJWT) {
let claims = Claims::new(Some(tenant_id), Scope::Tenant); let claims = Claims::new(Some(tenant_id), Scope::Tenant);
Some(env.generate_auth_token(&claims)?) Some(env.generate_auth_token(&claims)?)
@@ -637,9 +641,9 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
None None
}; };
if let Some(endpoint) = endpoint { if let Some(node) = node {
println!("Starting existing endpoint {endpoint_id}..."); println!("Starting existing postgres {node_name}...");
endpoint.start(&auth_token)?; node.start(&auth_token)?;
} else { } else {
let branch_name = sub_args let branch_name = sub_args
.get_one::<String>("branch-name") .get_one::<String>("branch-name")
@@ -664,33 +668,27 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
// start --port X // start --port X
// stop // stop
// start <-- will also use port X even without explicit port argument // start <-- will also use port X even without explicit port argument
println!("Starting new endpoint {endpoint_id} (PostgreSQL v{pg_version}) on timeline {timeline_id} ..."); println!("Starting new postgres (v{pg_version}) {node_name} on timeline {timeline_id} ...");
let ep = cplane.new_endpoint( let node =
tenant_id, cplane.new_node(tenant_id, node_name, timeline_id, lsn, port, pg_version)?;
endpoint_id, node.start(&auth_token)?;
timeline_id,
lsn,
port,
pg_version,
)?;
ep.start(&auth_token)?;
} }
} }
"stop" => { "stop" => {
let endpoint_id = sub_args let node_name = sub_args
.get_one::<String>("endpoint_id") .get_one::<String>("node")
.ok_or_else(|| anyhow!("No endpoint ID was provided to stop"))?; .ok_or_else(|| anyhow!("No node name was provided to stop"))?;
let destroy = sub_args.get_flag("destroy"); let destroy = sub_args.get_flag("destroy");
let endpoint = cplane let node = cplane
.endpoints .nodes
.get(endpoint_id.as_str()) .get(&(tenant_id, node_name.to_string()))
.with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?; .with_context(|| format!("postgres {node_name} is not found"))?;
endpoint.stop(destroy)?; node.stop(destroy)?;
} }
_ => bail!("Unexpected endpoint subcommand '{sub_name}'"), _ => bail!("Unexpected pg subcommand '{sub_name}'"),
} }
Ok(()) Ok(())
@@ -750,7 +748,7 @@ fn get_safekeeper(env: &local_env::LocalEnv, id: NodeId) -> Result<SafekeeperNod
if let Some(node) = env.safekeepers.iter().find(|node| node.id == id) { if let Some(node) = env.safekeepers.iter().find(|node| node.id == id) {
Ok(SafekeeperNode::from_env(env, node)) Ok(SafekeeperNode::from_env(env, node))
} else { } else {
bail!("could not find safekeeper {id}") bail!("could not find safekeeper '{}'", id)
} }
} }
@@ -809,22 +807,22 @@ fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
} }
fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::Result<()> { fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::Result<()> {
// Endpoints are not started automatically etcd::start_etcd_process(env)?;
broker::start_broker_process(env)?;
let pageserver = PageServerNode::from_env(env); let pageserver = PageServerNode::from_env(env);
// Postgres nodes are not started automatically
if let Err(e) = pageserver.start(&pageserver_config_overrides(sub_match)) { if let Err(e) = pageserver.start(&pageserver_config_overrides(sub_match)) {
eprintln!("pageserver {} start failed: {:#}", env.pageserver.id, e); eprintln!("pageserver start failed: {e}");
try_stop_all(env, true); try_stop_etcd_process(env);
exit(1); exit(1);
} }
for node in env.safekeepers.iter() { for node in env.safekeepers.iter() {
let safekeeper = SafekeeperNode::from_env(env, node); let safekeeper = SafekeeperNode::from_env(env, node);
if let Err(e) = safekeeper.start() { if let Err(e) = safekeeper.start() {
eprintln!("safekeeper {} start failed: {:#}", safekeeper.id, e); eprintln!("safekeeper '{}' start failed: {e}", safekeeper.id);
try_stop_all(env, false); try_stop_etcd_process(env);
exit(1); exit(1);
} }
} }
@@ -835,41 +833,35 @@ fn handle_stop_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<
let immediate = let immediate =
sub_match.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate"); sub_match.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate");
try_stop_all(env, immediate);
Ok(())
}
fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
let pageserver = PageServerNode::from_env(env); let pageserver = PageServerNode::from_env(env);
// Stop all endpoints // Stop all compute nodes
match ComputeControlPlane::load(env.clone()) { let cplane = ComputeControlPlane::load(env.clone())?;
Ok(cplane) => { for (_k, node) in cplane.nodes {
for (_k, node) in cplane.endpoints { if let Err(e) = node.stop(false) {
if let Err(e) = node.stop(false) { eprintln!("postgres stop failed: {}", e);
eprintln!("postgres stop failed: {e:#}");
}
}
}
Err(e) => {
eprintln!("postgres stop failed, could not restore control plane data from env: {e:#}")
} }
} }
if let Err(e) = pageserver.stop(immediate) { if let Err(e) = pageserver.stop(immediate) {
eprintln!("pageserver {} stop failed: {:#}", env.pageserver.id, e); eprintln!("pageserver stop failed: {}", e);
} }
for node in env.safekeepers.iter() { for node in env.safekeepers.iter() {
let safekeeper = SafekeeperNode::from_env(env, node); let safekeeper = SafekeeperNode::from_env(env, node);
if let Err(e) = safekeeper.stop(immediate) { if let Err(e) = safekeeper.stop(immediate) {
eprintln!("safekeeper {} stop failed: {:#}", safekeeper.id, e); eprintln!("safekeeper '{}' stop failed: {}", safekeeper.id, e);
} }
} }
if let Err(e) = broker::stop_broker_process(env) { try_stop_etcd_process(env);
eprintln!("neon broker stop failed: {e:#}");
Ok(())
}
fn try_stop_etcd_process(env: &local_env::LocalEnv) {
if let Err(e) = etcd::stop_etcd_process(env) {
eprintln!("etcd stop failed: {e}");
} }
} }
@@ -879,9 +871,7 @@ fn cli() -> Command {
.help("Name of the branch to be created or used as an alias for other services") .help("Name of the branch to be created or used as an alias for other services")
.required(false); .required(false);
let endpoint_id_arg = Arg::new("endpoint_id") let pg_node_arg = Arg::new("node").help("Postgres node name").required(false);
.help("Postgres endpoint id")
.required(false);
let safekeeper_id_arg = Arg::new("id").help("safekeeper id").required(false); let safekeeper_id_arg = Arg::new("id").help("safekeeper id").required(false);
@@ -911,7 +901,6 @@ fn cli() -> Command {
let stop_mode_arg = Arg::new("stop-mode") let stop_mode_arg = Arg::new("stop-mode")
.short('m') .short('m')
.value_parser(["fast", "immediate"]) .value_parser(["fast", "immediate"])
.default_value("fast")
.help("If 'immediate', don't flush repository data at shutdown") .help("If 'immediate', don't flush repository data at shutdown")
.required(false) .required(false)
.value_name("stop-mode"); .value_name("stop-mode");
@@ -933,8 +922,9 @@ fn cli() -> Command {
.version(GIT_VERSION) .version(GIT_VERSION)
.subcommand( .subcommand(
Command::new("init") Command::new("init")
.about("Initialize a new Neon repository, preparing configs for services to start with") .about("Initialize a new Neon repository")
.arg(pageserver_config_args.clone()) .arg(pageserver_config_args.clone())
.arg(timeline_id_arg.clone().help("Use a specific timeline id when creating a tenant and its initial timeline"))
.arg( .arg(
Arg::new("config") Arg::new("config")
.long("config") .long("config")
@@ -996,14 +986,11 @@ fn cli() -> Command {
.arg(timeline_id_arg.clone().help("Use a specific timeline id when creating a tenant and its initial timeline")) .arg(timeline_id_arg.clone().help("Use a specific timeline id when creating a tenant and its initial timeline"))
.arg(Arg::new("config").short('c').num_args(1).action(ArgAction::Append).required(false)) .arg(Arg::new("config").short('c').num_args(1).action(ArgAction::Append).required(false))
.arg(pg_version_arg.clone()) .arg(pg_version_arg.clone())
.arg(Arg::new("set-default").long("set-default").action(ArgAction::SetTrue).required(false)
.help("Use this tenant in future CLI commands where tenant_id is needed, but not specified"))
) )
.subcommand(Command::new("set-default").arg(tenant_id_arg.clone().required(true))
.about("Set a particular tenant as default in future CLI commands where tenant_id is needed, but not specified"))
.subcommand(Command::new("config") .subcommand(Command::new("config")
.arg(tenant_id_arg.clone()) .arg(tenant_id_arg.clone())
.arg(Arg::new("config").short('c').num_args(1).action(ArgAction::Append).required(false))) .arg(Arg::new("config").short('c').num_args(1).action(ArgAction::Append).required(false))
)
) )
.subcommand( .subcommand(
Command::new("pageserver") Command::new("pageserver")
@@ -1035,27 +1022,27 @@ fn cli() -> Command {
) )
) )
.subcommand( .subcommand(
Command::new("endpoint") Command::new("pg")
.arg_required_else_help(true) .arg_required_else_help(true)
.about("Manage postgres instances") .about("Manage postgres instances")
.subcommand(Command::new("list").arg(tenant_id_arg.clone())) .subcommand(Command::new("list").arg(tenant_id_arg.clone()))
.subcommand(Command::new("create") .subcommand(Command::new("create")
.about("Create a compute endpoint") .about("Create a postgres compute node")
.arg(endpoint_id_arg.clone()) .arg(pg_node_arg.clone())
.arg(branch_name_arg.clone()) .arg(branch_name_arg.clone())
.arg(tenant_id_arg.clone()) .arg(tenant_id_arg.clone())
.arg(lsn_arg.clone()) .arg(lsn_arg.clone())
.arg(port_arg.clone()) .arg(port_arg.clone())
.arg( .arg(
Arg::new("config-only") Arg::new("config-only")
.help("Don't do basebackup, create endpoint directory with only config files") .help("Don't do basebackup, create compute node with only config files")
.long("config-only") .long("config-only")
.required(false)) .required(false))
.arg(pg_version_arg.clone()) .arg(pg_version_arg.clone())
) )
.subcommand(Command::new("start") .subcommand(Command::new("start")
.about("Start postgres.\n If the endpoint doesn't exist yet, it is created.") .about("Start a postgres compute node.\n This command actually creates new node from scratch, but preserves existing config files")
.arg(endpoint_id_arg.clone()) .arg(pg_node_arg.clone())
.arg(tenant_id_arg.clone()) .arg(tenant_id_arg.clone())
.arg(branch_name_arg) .arg(branch_name_arg)
.arg(timeline_id_arg) .arg(timeline_id_arg)
@@ -1065,7 +1052,7 @@ fn cli() -> Command {
) )
.subcommand( .subcommand(
Command::new("stop") Command::new("stop")
.arg(endpoint_id_arg) .arg(pg_node_arg)
.arg(tenant_id_arg) .arg(tenant_id_arg)
.arg( .arg(
Arg::new("destroy") Arg::new("destroy")
@@ -1077,13 +1064,6 @@ fn cli() -> Command {
) )
) )
// Obsolete old name for 'endpoint'. We now just print an error if it's used.
.subcommand(
Command::new("pg")
.hide(true)
.arg(Arg::new("ignore-rest").allow_hyphen_values(true).num_args(0..).required(false))
.trailing_var_arg(true)
)
.subcommand( .subcommand(
Command::new("start") Command::new("start")
.about("Start page server and safekeepers") .about("Start page server and safekeepers")

View File

@@ -1,48 +0,0 @@
use anyhow::Context;
use std::path::PathBuf;
use crate::{background_process, local_env};
pub fn start_broker_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
let broker = &env.broker;
let listen_addr = &broker.listen_addr;
print!("Starting neon broker at {}", listen_addr);
let args = [format!("--listen-addr={listen_addr}")];
let client = reqwest::blocking::Client::new();
background_process::start_process(
"storage_broker",
&env.base_data_dir,
&env.storage_broker_bin(),
args,
[],
background_process::InitialPidFile::Create(&storage_broker_pid_file_path(env)),
|| {
let url = broker.client_url();
let status_url = url.join("status").with_context(|| {
format!("Failed to append /status path to broker endpoint {url}",)
})?;
let request = client
.get(status_url)
.build()
.with_context(|| format!("Failed to construct request to broker endpoint {url}"))?;
match client.execute(request) {
Ok(resp) => Ok(resp.status().is_success()),
Err(_) => Ok(false),
}
},
)
.context("Failed to spawn storage_broker subprocess")?;
Ok(())
}
pub fn stop_broker_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
background_process::stop_process(true, "storage_broker", &storage_broker_pid_file_path(env))
}
fn storage_broker_pid_file_path(env: &local_env::LocalEnv) -> PathBuf {
env.base_data_dir.join("storage_broker.pid")
}

View File

@@ -12,58 +12,69 @@ use std::time::Duration;
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use utils::{ use utils::{
connstring::connection_host_port,
id::{TenantId, TimelineId}, id::{TenantId, TimelineId},
lsn::Lsn, lsn::Lsn,
postgres_backend::AuthType,
}; };
use crate::local_env::{LocalEnv, DEFAULT_PG_VERSION}; use crate::local_env::{LocalEnv, DEFAULT_PG_VERSION};
use crate::pageserver::PageServerNode;
use crate::postgresql_conf::PostgresConf; use crate::postgresql_conf::PostgresConf;
use crate::storage::PageServerNode;
// //
// ComputeControlPlane // ComputeControlPlane
// //
pub struct ComputeControlPlane { pub struct ComputeControlPlane {
base_port: u16, base_port: u16,
// endpoint ID is the key
pub endpoints: BTreeMap<String, Arc<Endpoint>>,
env: LocalEnv,
pageserver: Arc<PageServerNode>, pageserver: Arc<PageServerNode>,
pub nodes: BTreeMap<(TenantId, String), Arc<PostgresNode>>,
env: LocalEnv,
} }
impl ComputeControlPlane { impl ComputeControlPlane {
// Load current endpoints from the endpoints/ subdirectories // Load current nodes with ports from data directories on disk
// Directory structure has the following layout:
// pgdatadirs
// |- tenants
// | |- <tenant_id>
// | | |- <node name>
pub fn load(env: LocalEnv) -> Result<ComputeControlPlane> { pub fn load(env: LocalEnv) -> Result<ComputeControlPlane> {
let pageserver = Arc::new(PageServerNode::from_env(&env)); let pageserver = Arc::new(PageServerNode::from_env(&env));
let mut endpoints = BTreeMap::default(); let mut nodes = BTreeMap::default();
for endpoint_dir in fs::read_dir(env.endpoints_path()) let pgdatadirspath = &env.pg_data_dirs_path();
.with_context(|| format!("failed to list {}", env.endpoints_path().display()))?
for tenant_dir in fs::read_dir(&pgdatadirspath)
.with_context(|| format!("failed to list {}", pgdatadirspath.display()))?
{ {
let ep = Endpoint::from_dir_entry(endpoint_dir?, &env, &pageserver)?; let tenant_dir = tenant_dir?;
endpoints.insert(ep.name.clone(), Arc::new(ep)); for timeline_dir in fs::read_dir(tenant_dir.path())
.with_context(|| format!("failed to list {}", tenant_dir.path().display()))?
{
let node = PostgresNode::from_dir_entry(timeline_dir?, &env, &pageserver)?;
nodes.insert((node.tenant_id, node.name.clone()), Arc::new(node));
}
} }
Ok(ComputeControlPlane { Ok(ComputeControlPlane {
base_port: 55431, base_port: 55431,
endpoints,
env,
pageserver, pageserver,
nodes,
env,
}) })
} }
fn get_port(&mut self) -> u16 { fn get_port(&mut self) -> u16 {
1 + self 1 + self
.endpoints .nodes
.values() .iter()
.map(|ep| ep.address.port()) .map(|(_name, node)| node.address.port())
.max() .max()
.unwrap_or(self.base_port) .unwrap_or(self.base_port)
} }
pub fn new_endpoint( pub fn new_node(
&mut self, &mut self,
tenant_id: TenantId, tenant_id: TenantId,
name: &str, name: &str,
@@ -71,58 +82,56 @@ impl ComputeControlPlane {
lsn: Option<Lsn>, lsn: Option<Lsn>,
port: Option<u16>, port: Option<u16>,
pg_version: u32, pg_version: u32,
) -> Result<Arc<Endpoint>> { ) -> Result<Arc<PostgresNode>> {
let port = port.unwrap_or_else(|| self.get_port()); let port = port.unwrap_or_else(|| self.get_port());
let ep = Arc::new(Endpoint { let node = Arc::new(PostgresNode {
name: name.to_owned(), name: name.to_owned(),
address: SocketAddr::new("127.0.0.1".parse().unwrap(), port), address: SocketAddr::new("127.0.0.1".parse().unwrap(), port),
env: self.env.clone(), env: self.env.clone(),
pageserver: Arc::clone(&self.pageserver), pageserver: Arc::clone(&self.pageserver),
is_test: false,
timeline_id, timeline_id,
lsn, lsn,
tenant_id, tenant_id,
uses_wal_proposer: false,
pg_version, pg_version,
}); });
ep.create_pgdata()?; node.create_pgdata()?;
ep.setup_pg_conf()?; node.setup_pg_conf(self.env.pageserver.auth_type)?;
self.endpoints.insert(ep.name.clone(), Arc::clone(&ep)); self.nodes
.insert((tenant_id, node.name.clone()), Arc::clone(&node));
Ok(ep) Ok(node)
} }
} }
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
#[derive(Debug)] #[derive(Debug)]
pub struct Endpoint { pub struct PostgresNode {
/// used as the directory name
name: String,
pub tenant_id: TenantId,
pub timeline_id: TimelineId,
// Some(lsn) if this is a read-only endpoint anchored at 'lsn'. None for the primary.
pub lsn: Option<Lsn>,
// port and address of the Postgres server
pub address: SocketAddr, pub address: SocketAddr,
pg_version: u32, name: String,
// These are not part of the endpoint as such, but the environment
// the endpoint runs in.
pub env: LocalEnv, pub env: LocalEnv,
pageserver: Arc<PageServerNode>, pageserver: Arc<PageServerNode>,
is_test: bool,
pub timeline_id: TimelineId,
pub lsn: Option<Lsn>, // if it's a read-only node. None for primary
pub tenant_id: TenantId,
uses_wal_proposer: bool,
pg_version: u32,
} }
impl Endpoint { impl PostgresNode {
fn from_dir_entry( fn from_dir_entry(
entry: std::fs::DirEntry, entry: std::fs::DirEntry,
env: &LocalEnv, env: &LocalEnv,
pageserver: &Arc<PageServerNode>, pageserver: &Arc<PageServerNode>,
) -> Result<Endpoint> { ) -> Result<PostgresNode> {
if !entry.file_type()?.is_dir() { if !entry.file_type()?.is_dir() {
anyhow::bail!( anyhow::bail!(
"Endpoint::from_dir_entry failed: '{}' is not a directory", "PostgresNode::from_dir_entry failed: '{}' is not a directory",
entry.path().display() entry.path().display()
); );
} }
@@ -132,7 +141,7 @@ impl Endpoint {
let name = fname.to_str().unwrap().to_string(); let name = fname.to_str().unwrap().to_string();
// Read config file into memory // Read config file into memory
let cfg_path = entry.path().join("pgdata").join("postgresql.conf"); let cfg_path = entry.path().join("postgresql.conf");
let cfg_path_str = cfg_path.to_string_lossy(); let cfg_path_str = cfg_path.to_string_lossy();
let mut conf_file = File::open(&cfg_path) let mut conf_file = File::open(&cfg_path)
.with_context(|| format!("failed to open config file in {}", cfg_path_str))?; .with_context(|| format!("failed to open config file in {}", cfg_path_str))?;
@@ -144,6 +153,7 @@ impl Endpoint {
let port: u16 = conf.parse_field("port", &context)?; let port: u16 = conf.parse_field("port", &context)?;
let timeline_id: TimelineId = conf.parse_field("neon.timeline_id", &context)?; let timeline_id: TimelineId = conf.parse_field("neon.timeline_id", &context)?;
let tenant_id: TenantId = conf.parse_field("neon.tenant_id", &context)?; let tenant_id: TenantId = conf.parse_field("neon.tenant_id", &context)?;
let uses_wal_proposer = conf.get("neon.safekeepers").is_some();
// Read postgres version from PG_VERSION file to determine which postgres version binary to use. // Read postgres version from PG_VERSION file to determine which postgres version binary to use.
// If it doesn't exist, assume broken data directory and use default pg version. // If it doesn't exist, assume broken data directory and use default pg version.
@@ -158,21 +168,23 @@ impl Endpoint {
conf.parse_field_optional("recovery_target_lsn", &context)?; conf.parse_field_optional("recovery_target_lsn", &context)?;
// ok now // ok now
Ok(Endpoint { Ok(PostgresNode {
address: SocketAddr::new("127.0.0.1".parse().unwrap(), port), address: SocketAddr::new("127.0.0.1".parse().unwrap(), port),
name, name,
env: env.clone(), env: env.clone(),
pageserver: Arc::clone(pageserver), pageserver: Arc::clone(pageserver),
is_test: false,
timeline_id, timeline_id,
lsn: recovery_target_lsn, lsn: recovery_target_lsn,
tenant_id, tenant_id,
uses_wal_proposer,
pg_version, pg_version,
}) })
} }
fn sync_safekeepers(&self, auth_token: &Option<String>, pg_version: u32) -> Result<Lsn> { fn sync_safekeepers(&self, auth_token: &Option<String>, pg_version: u32) -> Result<Lsn> {
let pg_path = self.env.pg_bin_dir(pg_version)?.join("postgres"); let pg_path = self.env.pg_bin_dir(pg_version)?.join("postgres");
let mut cmd = Command::new(pg_path); let mut cmd = Command::new(&pg_path);
cmd.arg("--sync-safekeepers") cmd.arg("--sync-safekeepers")
.env_clear() .env_clear()
@@ -190,7 +202,7 @@ impl Endpoint {
.stderr(Stdio::piped()); .stderr(Stdio::piped());
if let Some(token) = auth_token { if let Some(token) = auth_token {
cmd.env("NEON_AUTH_TOKEN", token); cmd.env("ZENITH_AUTH_TOKEN", token);
} }
let sync_handle = cmd let sync_handle = cmd
@@ -250,7 +262,7 @@ impl Endpoint {
} }
fn create_pgdata(&self) -> Result<()> { fn create_pgdata(&self) -> Result<()> {
fs::create_dir_all(self.pgdata()).with_context(|| { fs::create_dir_all(&self.pgdata()).with_context(|| {
format!( format!(
"could not create data directory {}", "could not create data directory {}",
self.pgdata().display() self.pgdata().display()
@@ -266,8 +278,8 @@ impl Endpoint {
} }
// Write postgresql.conf with default configuration // Write postgresql.conf with default configuration
// and PG_VERSION file to the data directory of a new endpoint. // and PG_VERSION file to the data directory of a new node.
fn setup_pg_conf(&self) -> Result<()> { fn setup_pg_conf(&self, auth_type: AuthType) -> Result<()> {
let mut conf = PostgresConf::new(); let mut conf = PostgresConf::new();
conf.append("max_wal_senders", "10"); conf.append("max_wal_senders", "10");
conf.append("wal_log_hints", "off"); conf.append("wal_log_hints", "off");
@@ -286,13 +298,26 @@ impl Endpoint {
// walproposer panics when basebackup is invalid, it is pointless to restart in this case. // walproposer panics when basebackup is invalid, it is pointless to restart in this case.
conf.append("restart_after_crash", "off"); conf.append("restart_after_crash", "off");
// Configure the Neon Postgres extension to fetch pages from pageserver // Configure the node to fetch pages from pageserver
let pageserver_connstr = { let pageserver_connstr = {
let config = &self.pageserver.pg_connection_config; let (host, port) = connection_host_port(&self.pageserver.pg_connection_config);
let (host, port) = (config.host(), config.port());
// NOTE: avoid spaces in connection string, because it is less error prone if we forward it somewhere. // Set up authentication
format!("postgresql://no_user@{host}:{port}") //
// $ZENITH_AUTH_TOKEN will be replaced with value from environment
// variable during compute pg startup. It is done this way because
// otherwise user will be able to retrieve the value using SHOW
// command or pg_settings
let password = if let AuthType::NeonJWT = auth_type {
"$ZENITH_AUTH_TOKEN"
} else {
""
};
// NOTE avoiding spaces in connection string, because it is less error prone if we forward it somewhere.
// Also note that not all parameters are supported here. Because in compute we substitute $ZENITH_AUTH_TOKEN
// We parse this string and build it back with token from env var, and for simplicity rebuild
// uses only needed variables namely host, port, user, password.
format!("postgresql://no_user:{password}@{host}:{port}")
}; };
conf.append("shared_preload_libraries", "neon"); conf.append("shared_preload_libraries", "neon");
conf.append_line(""); conf.append_line("");
@@ -318,11 +343,11 @@ impl Endpoint {
// To be able to restore database in case of pageserver node crash, safekeeper should not // To be able to restore database in case of pageserver node crash, safekeeper should not
// remove WAL beyond this point. Too large lag can cause space exhaustion in safekeepers // remove WAL beyond this point. Too large lag can cause space exhaustion in safekeepers
// (if they are not able to upload WAL to S3). // (if they are not able to upload WAL to S3).
conf.append("max_replication_write_lag", "15MB"); conf.append("max_replication_write_lag", "500MB");
conf.append("max_replication_flush_lag", "10GB"); conf.append("max_replication_flush_lag", "10GB");
if !self.env.safekeepers.is_empty() { if !self.env.safekeepers.is_empty() {
// Configure Postgres to connect to the safekeepers // Configure the node to connect to the safekeepers
conf.append("synchronous_standby_names", "walproposer"); conf.append("synchronous_standby_names", "walproposer");
let safekeepers = self let safekeepers = self
@@ -357,7 +382,7 @@ impl Endpoint {
fn load_basebackup(&self, auth_token: &Option<String>) -> Result<()> { fn load_basebackup(&self, auth_token: &Option<String>) -> Result<()> {
let backup_lsn = if let Some(lsn) = self.lsn { let backup_lsn = if let Some(lsn) = self.lsn {
Some(lsn) Some(lsn)
} else if !self.env.safekeepers.is_empty() { } else if self.uses_wal_proposer {
// LSN 0 means that it is bootstrap and we need to download just // LSN 0 means that it is bootstrap and we need to download just
// latest data from the pageserver. That is a bit clumsy but whole bootstrap // latest data from the pageserver. That is a bit clumsy but whole bootstrap
// procedure evolves quite actively right now, so let's think about it again // procedure evolves quite actively right now, so let's think about it again
@@ -377,12 +402,8 @@ impl Endpoint {
Ok(()) Ok(())
} }
pub fn endpoint_path(&self) -> PathBuf {
self.env.endpoints_path().join(&self.name)
}
pub fn pgdata(&self) -> PathBuf { pub fn pgdata(&self) -> PathBuf {
self.endpoint_path().join("pgdata") self.env.pg_data_dir(&self.tenant_id, &self.name)
} }
pub fn status(&self) -> &str { pub fn status(&self) -> &str {
@@ -400,7 +421,7 @@ impl Endpoint {
fn pg_ctl(&self, args: &[&str], auth_token: &Option<String>) -> Result<()> { fn pg_ctl(&self, args: &[&str], auth_token: &Option<String>) -> Result<()> {
let pg_ctl_path = self.env.pg_bin_dir(self.pg_version)?.join("pg_ctl"); let pg_ctl_path = self.env.pg_bin_dir(self.pg_version)?.join("pg_ctl");
let mut cmd = Command::new(&pg_ctl_path); let mut cmd = Command::new(pg_ctl_path);
cmd.args( cmd.args(
[ [
&[ &[
@@ -423,15 +444,11 @@ impl Endpoint {
"DYLD_LIBRARY_PATH", "DYLD_LIBRARY_PATH",
self.env.pg_lib_dir(self.pg_version)?.to_str().unwrap(), self.env.pg_lib_dir(self.pg_version)?.to_str().unwrap(),
); );
// Pass authentication token used for the connections to pageserver and safekeepers
if let Some(token) = auth_token { if let Some(token) = auth_token {
cmd.env("NEON_AUTH_TOKEN", token); cmd.env("ZENITH_AUTH_TOKEN", token);
} }
let pg_ctl = cmd let pg_ctl = cmd.output().context("pg_ctl failed")?;
.output()
.context(format!("{} failed", pg_ctl_path.display()))?;
if !pg_ctl.status.success() { if !pg_ctl.status.success() {
anyhow::bail!( anyhow::bail!(
"pg_ctl failed, exit code: {}, stdout: {}, stderr: {}", "pg_ctl failed, exit code: {}, stdout: {}, stderr: {}",
@@ -444,11 +461,12 @@ impl Endpoint {
} }
pub fn start(&self, auth_token: &Option<String>) -> Result<()> { pub fn start(&self, auth_token: &Option<String>) -> Result<()> {
// Bail if the node already running.
if self.status() == "running" { if self.status() == "running" {
anyhow::bail!("The endpoint is already running"); anyhow::bail!("The node is already running");
} }
// 1. We always start Postgres from scratch, so // 1. We always start compute node from scratch, so
// if old dir exists, preserve 'postgresql.conf' and drop the directory // if old dir exists, preserve 'postgresql.conf' and drop the directory
let postgresql_conf_path = self.pgdata().join("postgresql.conf"); let postgresql_conf_path = self.pgdata().join("postgresql.conf");
let postgresql_conf = fs::read(&postgresql_conf_path).with_context(|| { let postgresql_conf = fs::read(&postgresql_conf_path).with_context(|| {
@@ -457,7 +475,7 @@ impl Endpoint {
postgresql_conf_path.to_str().unwrap() postgresql_conf_path.to_str().unwrap()
) )
})?; })?;
fs::remove_dir_all(self.pgdata())?; fs::remove_dir_all(&self.pgdata())?;
self.create_pgdata()?; self.create_pgdata()?;
// 2. Bring back config files // 2. Bring back config files
@@ -470,17 +488,21 @@ impl Endpoint {
File::create(self.pgdata().join("standby.signal"))?; File::create(self.pgdata().join("standby.signal"))?;
} }
// 4. Finally start postgres // 4. Finally start the compute node postgres
println!("Starting postgres at '{}'", self.connstr()); println!("Starting postgres node at '{}'", self.connstr());
self.pg_ctl(&["start"], auth_token) self.pg_ctl(&["start"], auth_token)
} }
pub fn restart(&self, auth_token: &Option<String>) -> Result<()> {
self.pg_ctl(&["restart"], auth_token)
}
pub fn stop(&self, destroy: bool) -> Result<()> { pub fn stop(&self, destroy: bool) -> Result<()> {
// If we are going to destroy data directory, // If we are going to destroy data directory,
// use immediate shutdown mode, otherwise, // use immediate shutdown mode, otherwise,
// shutdown gracefully to leave the data directory sane. // shutdown gracefully to leave the data directory sane.
// //
// Postgres is always started from scratch, so stop // Compute node always starts from scratch, so stop
// without destroy only used for testing and debugging. // without destroy only used for testing and debugging.
// //
if destroy { if destroy {
@@ -489,7 +511,7 @@ impl Endpoint {
"Destroying postgres data directory '{}'", "Destroying postgres data directory '{}'",
self.pgdata().to_str().unwrap() self.pgdata().to_str().unwrap()
); );
fs::remove_dir_all(self.endpoint_path())?; fs::remove_dir_all(&self.pgdata())?;
} else { } else {
self.pg_ctl(&["stop"], &None)?; self.pg_ctl(&["stop"], &None)?;
} }
@@ -505,4 +527,26 @@ impl Endpoint {
"postgres" "postgres"
) )
} }
// XXX: cache that in control plane
pub fn whoami(&self) -> String {
let output = Command::new("whoami")
.output()
.expect("failed to execute whoami");
assert!(output.status.success(), "whoami failed");
String::from_utf8(output.stdout).unwrap().trim().to_string()
}
}
impl Drop for PostgresNode {
// destructor to clean up state after test is done
// XXX: we may detect failed test by setting some flag in catch_unwind()
// and checking it here. But let just clean datadirs on start.
fn drop(&mut self) {
if self.is_test {
let _ = self.stop(true);
}
}
} }

101
control_plane/src/etcd.rs Normal file
View File

@@ -0,0 +1,101 @@
use std::{
fs,
path::PathBuf,
process::{Command, Stdio},
};
use anyhow::Context;
use nix::{
sys::signal::{kill, Signal},
unistd::Pid,
};
use crate::{local_env, read_pidfile};
pub fn start_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
let etcd_broker = &env.etcd_broker;
println!(
"Starting etcd broker using {}",
etcd_broker.etcd_binary_path.display()
);
let etcd_data_dir = env.base_data_dir.join("etcd");
fs::create_dir_all(&etcd_data_dir).with_context(|| {
format!(
"Failed to create etcd data dir: {}",
etcd_data_dir.display()
)
})?;
let etcd_stdout_file =
fs::File::create(etcd_data_dir.join("etcd.stdout.log")).with_context(|| {
format!(
"Failed to create etcd stout file in directory {}",
etcd_data_dir.display()
)
})?;
let etcd_stderr_file =
fs::File::create(etcd_data_dir.join("etcd.stderr.log")).with_context(|| {
format!(
"Failed to create etcd stderr file in directory {}",
etcd_data_dir.display()
)
})?;
let client_urls = etcd_broker.comma_separated_endpoints();
let etcd_process = Command::new(&etcd_broker.etcd_binary_path)
.args(&[
format!("--data-dir={}", etcd_data_dir.display()),
format!("--listen-client-urls={client_urls}"),
format!("--advertise-client-urls={client_urls}"),
// Set --quota-backend-bytes to keep the etcd virtual memory
// size smaller. Our test etcd clusters are very small.
// See https://github.com/etcd-io/etcd/issues/7910
"--quota-backend-bytes=100000000".to_string(),
// etcd doesn't compact (vacuum) with default settings,
// enable it to prevent space exhaustion.
"--auto-compaction-mode=revision".to_string(),
"--auto-compaction-retention=1".to_string(),
])
.stdout(Stdio::from(etcd_stdout_file))
.stderr(Stdio::from(etcd_stderr_file))
.spawn()
.context("Failed to spawn etcd subprocess")?;
let pid = etcd_process.id();
let etcd_pid_file_path = etcd_pid_file_path(env);
fs::write(&etcd_pid_file_path, pid.to_string()).with_context(|| {
format!(
"Failed to create etcd pid file at {}",
etcd_pid_file_path.display()
)
})?;
Ok(())
}
pub fn stop_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
let etcd_path = &env.etcd_broker.etcd_binary_path;
println!("Stopping etcd broker at {}", etcd_path.display());
let etcd_pid_file_path = etcd_pid_file_path(env);
let pid = Pid::from_raw(read_pidfile(&etcd_pid_file_path).with_context(|| {
format!(
"Failed to read etcd pid file at {}",
etcd_pid_file_path.display()
)
})?);
kill(pid, Signal::SIGTERM).with_context(|| {
format!(
"Failed to stop etcd with pid {pid} at {}",
etcd_pid_file_path.display()
)
})?;
Ok(())
}
fn etcd_pid_file_path(env: &local_env::LocalEnv) -> PathBuf {
env.base_data_dir.join("etcd.pid")
}

View File

@@ -6,11 +6,59 @@
// Intended to be used in integration tests and in CLI tools for // Intended to be used in integration tests and in CLI tools for
// local installations. // local installations.
// //
use anyhow::{anyhow, bail, Context, Result};
use std::fs;
use std::path::Path;
use std::process::Command;
mod background_process; pub mod compute;
pub mod broker; pub mod etcd;
pub mod endpoint;
pub mod local_env; pub mod local_env;
pub mod pageserver;
pub mod postgresql_conf; pub mod postgresql_conf;
pub mod safekeeper; pub mod safekeeper;
pub mod storage;
/// Read a PID file
///
/// We expect a file that contains a single integer.
/// We return an i32 for compatibility with libc and nix.
pub fn read_pidfile(pidfile: &Path) -> Result<i32> {
let pid_str = fs::read_to_string(pidfile)
.with_context(|| format!("failed to read pidfile {:?}", pidfile))?;
let pid: i32 = pid_str
.parse()
.map_err(|_| anyhow!("failed to parse pidfile {:?}", pidfile))?;
if pid < 1 {
bail!("pidfile {:?} contained bad value '{}'", pidfile, pid);
}
Ok(pid)
}
fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
let cmd = cmd.env_clear().env("RUST_BACKTRACE", "1");
let var = "LLVM_PROFILE_FILE";
if let Some(val) = std::env::var_os(var) {
cmd.env(var, val);
}
const RUST_LOG_KEY: &str = "RUST_LOG";
if let Ok(rust_log_value) = std::env::var(RUST_LOG_KEY) {
cmd.env(RUST_LOG_KEY, rust_log_value)
} else {
cmd
}
}
fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
for env_key in [
"AWS_ACCESS_KEY_ID",
"AWS_SECRET_ACCESS_KEY",
"AWS_SESSION_TOKEN",
] {
if let Ok(value) = std::env::var(env_key) {
cmd = cmd.env(env_key, value);
}
}
cmd
}

View File

@@ -4,22 +4,18 @@
//! script which will use local paths. //! script which will use local paths.
use anyhow::{bail, ensure, Context}; use anyhow::{bail, ensure, Context};
use postgres_backend::AuthType;
use reqwest::Url; use reqwest::Url;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DisplayFromStr}; use serde_with::{serde_as, DisplayFromStr};
use std::collections::HashMap; use std::collections::HashMap;
use std::env; use std::env;
use std::fs; use std::fs;
use std::net::IpAddr;
use std::net::Ipv4Addr;
use std::net::SocketAddr;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::process::{Command, Stdio}; use std::process::{Command, Stdio};
use utils::{ use utils::{
auth::{encode_from_key_file, Claims}, auth::{encode_from_key_file, Claims, Scope},
id::{NodeId, TenantId, TenantTimelineId, TimelineId}, id::{NodeId, TenantId, TenantTimelineId, TimelineId},
postgres_backend::AuthType,
}; };
use crate::safekeeper::SafekeeperNode; use crate::safekeeper::SafekeeperNode;
@@ -66,7 +62,7 @@ pub struct LocalEnv {
#[serde(default)] #[serde(default)]
pub private_key_path: PathBuf, pub private_key_path: PathBuf,
pub broker: NeonBroker, pub etcd_broker: EtcdBroker,
pub pageserver: PageServerConf, pub pageserver: PageServerConf,
@@ -82,26 +78,67 @@ pub struct LocalEnv {
branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>, branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
} }
/// Broker config for cluster internal communication. /// Etcd broker config for cluster internal communication.
#[serde_as]
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)] #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
#[serde(default)] pub struct EtcdBroker {
pub struct NeonBroker { /// A prefix to all to any key when pushing/polling etcd from a node.
/// Broker listen address for storage nodes coordination, e.g. '127.0.0.1:50051'. #[serde(default)]
pub listen_addr: SocketAddr, pub broker_etcd_prefix: Option<String>,
/// Broker (etcd) endpoints for storage nodes coordination, e.g. 'http://127.0.0.1:2379'.
#[serde(default)]
#[serde_as(as = "Vec<DisplayFromStr>")]
pub broker_endpoints: Vec<Url>,
/// Etcd binary path to use.
#[serde(default)]
pub etcd_binary_path: PathBuf,
} }
// Dummy Default impl to satisfy Deserialize derive. impl EtcdBroker {
impl Default for NeonBroker { pub fn locate_etcd() -> anyhow::Result<PathBuf> {
fn default() -> Self { let which_output = Command::new("which")
NeonBroker { .arg("etcd")
listen_addr: SocketAddr::new(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), 0), .output()
} .context("Failed to run 'which etcd' command")?;
let stdout = String::from_utf8_lossy(&which_output.stdout);
ensure!(
which_output.status.success(),
"'which etcd' invocation failed. Status: {}, stdout: {stdout}, stderr: {}",
which_output.status,
String::from_utf8_lossy(&which_output.stderr)
);
let etcd_path = PathBuf::from(stdout.trim());
ensure!(
etcd_path.is_file(),
"'which etcd' invocation was successful, but the path it returned is not a file or does not exist: {}",
etcd_path.display()
);
Ok(etcd_path)
} }
}
impl NeonBroker { pub fn comma_separated_endpoints(&self) -> String {
pub fn client_url(&self) -> Url { self.broker_endpoints
Url::parse(&format!("http://{}", self.listen_addr)).expect("failed to construct url") .iter()
.map(|url| {
// URL by default adds a '/' path at the end, which is not what etcd CLI wants.
let url_string = url.as_str();
if url_string.ends_with('/') {
&url_string[0..url_string.len() - 1]
} else {
url_string
}
})
.fold(String::new(), |mut comma_separated_urls, url| {
if !comma_separated_urls.is_empty() {
comma_separated_urls.push(',');
}
comma_separated_urls.push_str(url);
comma_separated_urls
})
} }
} }
@@ -110,14 +147,15 @@ impl NeonBroker {
pub struct PageServerConf { pub struct PageServerConf {
// node id // node id
pub id: NodeId, pub id: NodeId,
// Pageserver connection settings // Pageserver connection settings
pub listen_pg_addr: String, pub listen_pg_addr: String,
pub listen_http_addr: String, pub listen_http_addr: String,
// auth type used for the PG and HTTP ports // used to determine which auth type is used
pub pg_auth_type: AuthType, pub auth_type: AuthType,
pub http_auth_type: AuthType,
// jwt auth token used for communication with pageserver
pub auth_token: String,
} }
impl Default for PageServerConf { impl Default for PageServerConf {
@@ -126,8 +164,8 @@ impl Default for PageServerConf {
id: NodeId(0), id: NodeId(0),
listen_pg_addr: String::new(), listen_pg_addr: String::new(),
listen_http_addr: String::new(), listen_http_addr: String::new(),
pg_auth_type: AuthType::Trust, auth_type: AuthType::Trust,
http_auth_type: AuthType::Trust, auth_token: String::new(),
} }
} }
} }
@@ -188,20 +226,22 @@ impl LocalEnv {
} }
} }
pub fn pageserver_bin(&self) -> PathBuf { pub fn pageserver_bin(&self) -> anyhow::Result<PathBuf> {
self.neon_distrib_dir.join("pageserver") Ok(self.neon_distrib_dir.join("pageserver"))
} }
pub fn safekeeper_bin(&self) -> PathBuf { pub fn safekeeper_bin(&self) -> anyhow::Result<PathBuf> {
self.neon_distrib_dir.join("safekeeper") Ok(self.neon_distrib_dir.join("safekeeper"))
} }
pub fn storage_broker_bin(&self) -> PathBuf { pub fn pg_data_dirs_path(&self) -> PathBuf {
self.neon_distrib_dir.join("storage_broker") self.base_data_dir.join("pgdatadirs").join("tenants")
} }
pub fn endpoints_path(&self) -> PathBuf { pub fn pg_data_dir(&self, tenant_id: &TenantId, branch_name: &str) -> PathBuf {
self.base_data_dir.join("endpoints") self.pg_data_dirs_path()
.join(tenant_id.to_string())
.join(branch_name)
} }
// TODO: move pageserver files into ./pageserver // TODO: move pageserver files into ./pageserver
@@ -289,6 +329,11 @@ impl LocalEnv {
env.neon_distrib_dir = env::current_exe()?.parent().unwrap().to_owned(); env.neon_distrib_dir = env::current_exe()?.parent().unwrap().to_owned();
} }
// If no initial tenant ID was given, generate it.
if env.default_tenant_id.is_none() {
env.default_tenant_id = Some(TenantId::generate());
}
env.base_data_dir = base_path(); env.base_data_dir = base_path();
Ok(env) Ok(env)
@@ -392,36 +437,51 @@ impl LocalEnv {
} }
} }
fs::create_dir(base_path)?; fs::create_dir(&base_path)?;
// Generate keypair for JWT. // generate keys for jwt
// // openssl genrsa -out private_key.pem 2048
// The keypair is only needed if authentication is enabled in any of the let private_key_path;
// components. For convenience, we generate the keypair even if authentication
// is not enabled, so that you can easily enable it after the initialization
// step. However, if the key generation fails, we treat it as non-fatal if
// authentication was not enabled.
if self.private_key_path == PathBuf::new() { if self.private_key_path == PathBuf::new() {
match generate_auth_keys( private_key_path = base_path.join("auth_private_key.pem");
base_path.join("auth_private_key.pem").as_path(), let keygen_output = Command::new("openssl")
base_path.join("auth_public_key.pem").as_path(), .arg("genrsa")
) { .args(&["-out", private_key_path.to_str().unwrap()])
Ok(()) => { .arg("2048")
self.private_key_path = PathBuf::from("auth_private_key.pem"); .stdout(Stdio::null())
} .output()
Err(e) => { .context("failed to generate auth private key")?;
if !self.auth_keys_needed() { if !keygen_output.status.success() {
eprintln!("Could not generate keypair for JWT authentication: {e}"); bail!(
eprintln!("Continuing anyway because authentication was not enabled"); "openssl failed: '{}'",
self.private_key_path = PathBuf::from("auth_private_key.pem"); String::from_utf8_lossy(&keygen_output.stderr)
} else { );
return Err(e); }
} self.private_key_path = PathBuf::from("auth_private_key.pem");
}
let public_key_path = base_path.join("auth_public_key.pem");
// openssl rsa -in private_key.pem -pubout -outform PEM -out public_key.pem
let keygen_output = Command::new("openssl")
.arg("rsa")
.args(&["-in", private_key_path.to_str().unwrap()])
.arg("-pubout")
.args(&["-outform", "PEM"])
.args(&["-out", public_key_path.to_str().unwrap()])
.stdout(Stdio::null())
.output()
.context("failed to generate auth private key")?;
if !keygen_output.status.success() {
bail!(
"openssl failed: '{}'",
String::from_utf8_lossy(&keygen_output.stderr)
);
} }
} }
fs::create_dir_all(self.endpoints_path())?; self.pageserver.auth_token =
self.generate_auth_token(&Claims::new(None, Scope::PageServerApi))?;
fs::create_dir_all(self.pg_data_dirs_path())?;
for safekeeper in &self.safekeepers { for safekeeper in &self.safekeepers {
fs::create_dir_all(SafekeeperNode::datadir_path_by_id(self, safekeeper.id))?; fs::create_dir_all(SafekeeperNode::datadir_path_by_id(self, safekeeper.id))?;
@@ -429,12 +489,6 @@ impl LocalEnv {
self.persist_config(base_path) self.persist_config(base_path)
} }
fn auth_keys_needed(&self) -> bool {
self.pageserver.pg_auth_type == AuthType::NeonJWT
|| self.pageserver.http_auth_type == AuthType::NeonJWT
|| self.safekeepers.iter().any(|sk| sk.auth_enabled)
}
} }
fn base_path() -> PathBuf { fn base_path() -> PathBuf {
@@ -444,43 +498,6 @@ fn base_path() -> PathBuf {
} }
} }
/// Generate a public/private key pair for JWT authentication
fn generate_auth_keys(private_key_path: &Path, public_key_path: &Path) -> anyhow::Result<()> {
// Generate the key pair
//
// openssl genpkey -algorithm ed25519 -out auth_private_key.pem
let keygen_output = Command::new("openssl")
.arg("genpkey")
.args(["-algorithm", "ed25519"])
.args(["-out", private_key_path.to_str().unwrap()])
.stdout(Stdio::null())
.output()
.context("failed to generate auth private key")?;
if !keygen_output.status.success() {
bail!(
"openssl failed: '{}'",
String::from_utf8_lossy(&keygen_output.stderr)
);
}
// Extract the public key from the private key file
//
// openssl pkey -in auth_private_key.pem -pubout -out auth_public_key.pem
let keygen_output = Command::new("openssl")
.arg("pkey")
.args(["-in", private_key_path.to_str().unwrap()])
.arg("-pubout")
.args(["-out", public_key_path.to_str().unwrap()])
.output()
.context("failed to extract public key from private key")?;
if !keygen_output.status.success() {
bail!(
"openssl failed: '{}'",
String::from_utf8_lossy(&keygen_output.stderr)
);
}
Ok(())
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@@ -494,8 +511,8 @@ mod tests {
"failed to parse simple config {simple_conf_toml}, reason: {simple_conf_parse_result:?}" "failed to parse simple config {simple_conf_toml}, reason: {simple_conf_parse_result:?}"
); );
let string_to_replace = "listen_addr = '127.0.0.1:50051'"; let string_to_replace = "broker_endpoints = ['http://127.0.0.1:2379']";
let spoiled_url_str = "listen_addr = '!@$XOXO%^&'"; let spoiled_url_str = "broker_endpoints = ['!@$XOXO%^&']";
let spoiled_url_toml = simple_conf_toml.replace(string_to_replace, spoiled_url_str); let spoiled_url_toml = simple_conf_toml.replace(string_to_replace, spoiled_url_str);
assert!( assert!(
spoiled_url_toml.contains(spoiled_url_str), spoiled_url_toml.contains(spoiled_url_str),

Some files were not shown because too many files have changed in this diff Show More