Compare commits

..

8 Commits

Author SHA1 Message Date
Bojan Serafimov
edc77e8a24 Add pyo3 prototype 2022-08-30 11:28:22 -04:00
Bojan Serafimov
324c09d19f Cleanup 2022-08-18 15:17:30 -04:00
Bojan Serafimov
1501dbd5a5 Cleanup 2022-08-18 15:16:14 -04:00
Bojan Serafimov
4e91ff31ca Fmt 2022-08-18 13:20:01 -04:00
Bojan Serafimov
fe1b471cfc Cleanup 2022-08-18 13:14:05 -04:00
Bojan Serafimov
3e0b57e766 Convert bin to lib 2022-08-18 12:41:46 -04:00
Bojan Serafimov
7bff7c4014 Add library 2022-08-18 12:40:14 -04:00
Bojan Serafimov
11a1f2a1d8 Initial experiment 2022-08-18 12:27:43 -04:00
677 changed files with 36341 additions and 123447 deletions

View File

@@ -11,6 +11,3 @@ opt-level = 3
[profile.dev]
# Turn on a small amount of optimization in Development mode.
opt-level = 1
[alias]
build_testing = ["build", "--features", "testing"]

View File

@@ -4,7 +4,7 @@
hakari-package = "workspace_hack"
# Format for `workspace-hack = ...` lines in other Cargo.tomls. Requires cargo-hakari 0.9.8 or above.
dep-format-version = "3"
dep-format-version = "2"
# Setting workspace.resolver = "2" in the root Cargo.toml is HIGHLY recommended.
# Hakari works much better with the new feature resolver.

View File

@@ -1,24 +1,18 @@
*
**/.git/
**/__pycache__
**/.pytest_cache
!rust-toolchain.toml
!Cargo.toml
!Cargo.lock
!Makefile
.git
target
tmp_check
tmp_install
tmp_check_cli
test_output
.vscode
.neon
integration_tests/.neon
.mypy_cache
Dockerfile
.dockerignore
!.cargo/
!.config/
!control_plane/
!compute_tools/
!libs/
!pageserver/
!pgxn/
!proxy/
!safekeeper/
!storage_broker/
!trace/
!vendor/postgres-v14/
!vendor/postgres-v15/
!workspace_hack/
!neon_local/
!scripts/ninstall.sh
!vm-cgconfig.conf

View File

@@ -1 +0,0 @@
4c2bb43775947775401cbb9d774823c5723a91f8

View File

@@ -1,23 +0,0 @@
---
name: Bug Template
about: Used for describing bugs
title: ''
labels: t/bug
assignees: ''
---
## Steps to reproduce
## Expected result
## Actual result
## Environment
## Logs, links
-

View File

@@ -1,25 +0,0 @@
---
name: Epic Template
about: A set of related tasks contributing towards specific outcome, comprising of
more than 1 week of work.
title: 'Epic: '
labels: t/Epic
assignees: ''
---
## Motivation
## DoD
## Implementation ideas
## Tasks
- [ ]
## Other related tasks and Epics
-

View File

@@ -1,20 +0,0 @@
## Release 202Y-MM-DD
**NB: this PR must be merged only by 'Create a merge commit'!**
### Checklist when preparing for release
- [ ] Read or refresh [the release flow guide](https://github.com/neondatabase/cloud/wiki/Release:-general-flow)
- [ ] Ask in the [cloud Slack channel](https://neondb.slack.com/archives/C033A2WE6BZ) that you are going to rollout the release. Any blockers?
- [ ] Does this release contain any db migrations? Destructive ones? What is the rollback plan?
<!-- List everything that should be done **before** release, any issues / setting changes / etc -->
### Checklist after release
- [ ] Based on the merged commits write release notes and open a PR into `website` repo ([example](https://github.com/neondatabase/website/pull/219/files))
- [ ] Check [#dev-production-stream](https://neondb.slack.com/archives/C03F5SM1N02) Slack channel
- [ ] Check [stuck projects page](https://console.neon.tech/admin/projects?sort=last_active&order=desc&stuck=true)
- [ ] Check [recent operation failures](https://console.neon.tech/admin/operations?action=create_timeline%2Cstart_compute%2Cstop_compute%2Csuspend_compute%2Capply_config%2Cdelete_timeline%2Cdelete_tenant%2Ccreate_branch%2Ccheck_availability&sort=updated_at&order=desc&had_retries=some)
- [ ] Check [cloud SLO dashboard](https://neonprod.grafana.net/d/_oWcBMJ7k/cloud-slos?orgId=1)
- [ ] Check [compute startup metrics dashboard](https://neonprod.grafana.net/d/5OkYJEmVz/compute-startup-time)
<!-- List everything that should be done **after** release, any admin UI configuration / Grafana dashboard / alert changes / setting changes / etc -->

View File

@@ -1,254 +0,0 @@
name: 'Create Allure report'
description: 'Create and publish Allure report'
inputs:
action:
desctiption: 'generate or store'
required: true
build_type:
description: '`build_type` from run-python-test-set action'
required: true
test_selection:
description: '`test_selector` from run-python-test-set action'
required: false
outputs:
report-url:
description: 'Allure report URL'
value: ${{ steps.generate-report.outputs.report-url }}
report-json-url:
description: 'Allure report JSON URL'
value: ${{ steps.generate-report.outputs.report-json-url }}
runs:
using: "composite"
steps:
# We're using some of env variables quite offen, so let's set them once.
#
# It would be nice to have them set in common runs.env[0] section, but it doesn't work[1]
#
# - [0] https://docs.github.com/en/actions/creating-actions/metadata-syntax-for-github-actions#runsenv
# - [1] https://github.com/neondatabase/neon/pull/3907#discussion_r1154703456
#
- name: Set common environment variables
shell: bash -euxo pipefail {0}
run: |
echo "BUILD_TYPE=${BUILD_TYPE}" >> $GITHUB_ENV
echo "BUCKET=${BUCKET}" >> $GITHUB_ENV
echo "TEST_OUTPUT=${TEST_OUTPUT}" >> $GITHUB_ENV
env:
BUILD_TYPE: ${{ inputs.build_type }}
BUCKET: neon-github-public-dev
TEST_OUTPUT: /tmp/test_output
- name: Validate input parameters
shell: bash -euxo pipefail {0}
run: |
if [ "${{ inputs.action }}" != "store" ] && [ "${{ inputs.action }}" != "generate" ]; then
echo 2>&1 "Unknown inputs.action type '${{ inputs.action }}'; allowed 'generate' or 'store' only"
exit 1
fi
if [ -z "${{ inputs.test_selection }}" ] && [ "${{ inputs.action }}" == "store" ]; then
echo 2>&1 "inputs.test_selection must be set for 'store' action"
exit 2
fi
- name: Calculate variables
id: calculate-vars
shell: bash -euxo pipefail {0}
run: |
# TODO: for manually triggered workflows (via workflow_dispatch) we need to have a separate key
pr_number=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true)
if [ "${pr_number}" != "null" ]; then
key=pr-${pr_number}
elif [ "${GITHUB_REF_NAME}" = "main" ]; then
# Shortcut for a special branch
key=main
elif [ "${GITHUB_REF_NAME}" = "release" ]; then
# Shortcut for a special branch
key=release
else
key=branch-$(printf "${GITHUB_REF_NAME}" | tr -c "[:alnum:]._-" "-")
fi
echo "KEY=${key}" >> $GITHUB_OUTPUT
# Sanitize test selection to remove `/` and any other special characters
# Use printf instead of echo to avoid having `\n` at the end of the string
test_selection=$(printf "${{ inputs.test_selection }}" | tr -c "[:alnum:]._-" "-" )
echo "TEST_SELECTION=${test_selection}" >> $GITHUB_OUTPUT
- uses: actions/setup-java@v3
if: ${{ inputs.action == 'generate' }}
with:
distribution: 'temurin'
java-version: '17'
- name: Install Allure
if: ${{ inputs.action == 'generate' }}
shell: bash -euxo pipefail {0}
run: |
if ! which allure; then
ALLURE_ZIP=allure-${ALLURE_VERSION}.zip
wget -q https://github.com/allure-framework/allure2/releases/download/${ALLURE_VERSION}/${ALLURE_ZIP}
echo "${ALLURE_ZIP_MD5} ${ALLURE_ZIP}" | md5sum -c
unzip -q ${ALLURE_ZIP}
echo "$(pwd)/allure-${ALLURE_VERSION}/bin" >> $GITHUB_PATH
rm -f ${ALLURE_ZIP}
fi
env:
ALLURE_VERSION: 2.21.0
ALLURE_ZIP_MD5: c8db4dd8e2a7882583d569ed2c82879c
- name: Upload Allure results
if: ${{ inputs.action == 'store' }}
env:
REPORT_PREFIX: reports/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
RAW_PREFIX: reports-raw/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }}
shell: bash -euxo pipefail {0}
run: |
# Add metadata
cat <<EOF > $TEST_OUTPUT/allure/results/executor.json
{
"name": "GitHub Actions",
"type": "github",
"url": "https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/latest/index.html",
"buildOrder": ${GITHUB_RUN_ID},
"buildName": "GitHub Actions Run #${{ github.run_number }}/${GITHUB_RUN_ATTEMPT}",
"buildUrl": "${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/attempts/${GITHUB_RUN_ATTEMPT}",
"reportUrl": "https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html",
"reportName": "Allure Report"
}
EOF
cat <<EOF > $TEST_OUTPUT/allure/results/environment.properties
TEST_SELECTION=${{ inputs.test_selection }}
BUILD_TYPE=${BUILD_TYPE}
EOF
ARCHIVE="${GITHUB_RUN_ID}-${TEST_SELECTION}-${GITHUB_RUN_ATTEMPT}-$(date +%s).tar.zst"
ZSTD_NBTHREADS=0
tar -C ${TEST_OUTPUT}/allure/results -cf ${ARCHIVE} --zstd .
aws s3 mv --only-show-errors ${ARCHIVE} "s3://${BUCKET}/${RAW_PREFIX}/${ARCHIVE}"
# Potentially we could have several running build for the same key (for example for the main branch), so we use improvised lock for this
- name: Acquire Allure lock
if: ${{ inputs.action == 'generate' }}
shell: bash -euxo pipefail {0}
env:
LOCK_FILE: reports/${{ steps.calculate-vars.outputs.KEY }}/lock.txt
TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }}
run: |
LOCK_TIMEOUT=300 # seconds
for _ in $(seq 1 5); do
for i in $(seq 1 ${LOCK_TIMEOUT}); do
LOCK_ADDED=$(aws s3api head-object --bucket neon-github-public-dev --key ${LOCK_FILE} | jq --raw-output '.LastModified' || true)
# `date --date="..."` is supported only by gnu date (i.e. it doesn't work on BSD/macOS)
if [ -z "${LOCK_ADDED}" ] || [ "$(( $(date +%s) - $(date --date="${LOCK_ADDED}" +%s) ))" -gt "${LOCK_TIMEOUT}" ]; then
break
fi
sleep 1
done
echo "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${TEST_SELECTION}" > lock.txt
aws s3 mv --only-show-errors lock.txt "s3://${BUCKET}/${LOCK_FILE}"
# A double-check that exactly WE have acquired the lock
aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt
if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${TEST_SELECTION}" ]; then
break
fi
done
- name: Generate and publish final Allure report
if: ${{ inputs.action == 'generate' }}
id: generate-report
env:
REPORT_PREFIX: reports/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
RAW_PREFIX: reports-raw/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
shell: bash -euxo pipefail {0}
run: |
# Get previously uploaded data for this run
ZSTD_NBTHREADS=0
s3_filepaths=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${RAW_PREFIX}/${GITHUB_RUN_ID}- | jq --raw-output '.Contents[].Key')
if [ -z "$s3_filepaths" ]; then
# There's no previously uploaded data for this run
exit 0
fi
for s3_filepath in ${s3_filepaths}; do
aws s3 cp --only-show-errors "s3://${BUCKET}/${s3_filepath}" "${TEST_OUTPUT}/allure/"
archive=${TEST_OUTPUT}/allure/$(basename $s3_filepath)
mkdir -p ${archive%.tar.zst}
tar -xf ${archive} -C ${archive%.tar.zst}
rm -f ${archive}
done
# Get history trend
aws s3 cp --recursive --only-show-errors "s3://${BUCKET}/${REPORT_PREFIX}/latest/history" "${TEST_OUTPUT}/allure/latest/history" || true
# Generate report
allure generate --clean --output $TEST_OUTPUT/allure/report $TEST_OUTPUT/allure/*
# Replace a logo link with a redirect to the latest version of the report
sed -i 's|<a href="." class=|<a href="https://'${BUCKET}'.s3.amazonaws.com/'${REPORT_PREFIX}'/latest/index.html" class=|g' $TEST_OUTPUT/allure/report/app.js
# Upload a history and the final report (in this particular order to not to have duplicated history in 2 places)
aws s3 mv --recursive --only-show-errors "${TEST_OUTPUT}/allure/report/history" "s3://${BUCKET}/${REPORT_PREFIX}/latest/history"
aws s3 mv --recursive --only-show-errors "${TEST_OUTPUT}/allure/report" "s3://${BUCKET}/${REPORT_PREFIX}/${GITHUB_RUN_ID}"
REPORT_URL=https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html
# Generate redirect
cat <<EOF > ${TEST_OUTPUT}/allure/index.html
<!DOCTYPE html>
<meta charset="utf-8">
<title>Redirecting to ${REPORT_URL}</title>
<meta http-equiv="refresh" content="0; URL=${REPORT_URL}">
EOF
aws s3 cp --only-show-errors ${TEST_OUTPUT}/allure/index.html "s3://${BUCKET}/${REPORT_PREFIX}/latest/index.html"
echo "[Allure Report](${REPORT_URL})" >> ${GITHUB_STEP_SUMMARY}
echo "report-url=${REPORT_URL}" >> $GITHUB_OUTPUT
echo "report-json-url=${REPORT_URL%/index.html}/data/suites.json" >> $GITHUB_OUTPUT
- name: Release Allure lock
if: ${{ inputs.action == 'generate' && always() }}
shell: bash -euxo pipefail {0}
env:
LOCK_FILE: reports/${{ steps.calculate-vars.outputs.KEY }}/lock.txt
TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }}
run: |
aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt || exit 0
if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${TEST_SELECTION}" ]; then
aws s3 rm "s3://${BUCKET}/${LOCK_FILE}"
fi
- name: Cleanup
if: always()
shell: bash -euxo pipefail {0}
run: |
rm -rf ${TEST_OUTPUT}/allure
- uses: actions/github-script@v6
if: ${{ inputs.action == 'generate' && always() }}
env:
REPORT_URL: ${{ steps.generate-report.outputs.report-url }}
SHA: ${{ github.event.pull_request.head.sha || github.sha }}
with:
script: |
const { REPORT_URL, BUILD_TYPE, SHA } = process.env
await github.rest.repos.createCommitStatus({
owner: context.repo.owner,
repo: context.repo.repo,
sha: `${SHA}`,
state: 'success',
target_url: `${REPORT_URL}`,
context: `Allure report / ${BUILD_TYPE}`,
})

View File

@@ -12,9 +12,6 @@ inputs:
description: "Allow to skip if file doesn't exist, fail otherwise"
default: false
required: false
prefix:
description: "S3 prefix. Default is '${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
required: false
runs:
using: "composite"
@@ -26,23 +23,23 @@ runs:
TARGET: ${{ inputs.path }}
ARCHIVE: /tmp/downloads/${{ inputs.name }}.tar.zst
SKIP_IF_DOES_NOT_EXIST: ${{ inputs.skip-if-does-not-exist }}
PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}', github.run_id, github.run_attempt) }}
run: |
BUCKET=neon-github-public-dev
PREFIX=artifacts/${GITHUB_RUN_ID}
FILENAME=$(basename $ARCHIVE)
S3_KEY=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${PREFIX%$GITHUB_RUN_ATTEMPT} | jq -r '.Contents[].Key' | grep ${FILENAME} | sort --version-sort | tail -1 || true)
S3_KEY=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${PREFIX} | jq -r '.Contents[].Key' | grep ${FILENAME} | sort --version-sort | tail -1 || true)
if [ -z "${S3_KEY}" ]; then
if [ "${SKIP_IF_DOES_NOT_EXIST}" = "true" ]; then
echo 'SKIPPED=true' >> $GITHUB_OUTPUT
echo '::set-output name=SKIPPED::true'
exit 0
else
echo 2>&1 "Neither s3://${BUCKET}/${PREFIX}/${FILENAME} nor its version from previous attempts exist"
echo 2>&1 "Neither s3://${BUCKET}/${PREFIX}/${GITHUB_RUN_ATTEMPT}/${FILENAME} nor its version from previous attempts exist"
exit 1
fi
fi
echo 'SKIPPED=false' >> $GITHUB_OUTPUT
echo '::set-output name=SKIPPED::false'
mkdir -p $(dirname $ARCHIVE)
time aws s3 cp --only-show-errors s3://${BUCKET}/${S3_KEY} ${ARCHIVE}

View File

@@ -1,138 +0,0 @@
name: 'Create Branch'
description: 'Create Branch using API'
inputs:
api_key:
desctiption: 'Neon API key'
required: true
project_id:
desctiption: 'ID of the Project to create Branch in'
required: true
api_host:
desctiption: 'Neon API host'
default: console.stage.neon.tech
outputs:
dsn:
description: 'Created Branch DSN (for main database)'
value: ${{ steps.change-password.outputs.dsn }}
branch_id:
description: 'Created Branch ID'
value: ${{ steps.create-branch.outputs.branch_id }}
runs:
using: "composite"
steps:
- name: Create New Branch
id: create-branch
shell: bash -euxo pipefail {0}
run: |
for i in $(seq 1 10); do
branch=$(curl \
"https://${API_HOST}/api/v2/projects/${PROJECT_ID}/branches" \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}" \
--data "{
\"branch\": {
\"name\": \"Created by actions/neon-branch-create; GITHUB_RUN_ID=${GITHUB_RUN_ID} at $(date +%s)\"
},
\"endpoints\": [
{
\"type\": \"read_write\"
}
]
}")
if [ -z "${branch}" ]; then
sleep 1
continue
fi
branch_id=$(echo $branch | jq --raw-output '.branch.id')
if [ "${branch_id}" == "null" ]; then
sleep 1
continue
fi
break
done
if [ -z "${branch_id}" ] || [ "${branch_id}" == "null" ]; then
echo 2>&1 "Failed to create branch after 10 attempts, the latest response was: ${branch}"
exit 1
fi
branch_id=$(echo $branch | jq --raw-output '.branch.id')
echo "branch_id=${branch_id}" >> $GITHUB_OUTPUT
host=$(echo $branch | jq --raw-output '.endpoints[0].host')
echo "host=${host}" >> $GITHUB_OUTPUT
env:
API_HOST: ${{ inputs.api_host }}
API_KEY: ${{ inputs.api_key }}
PROJECT_ID: ${{ inputs.project_id }}
- name: Get Role name
id: role-name
shell: bash -euxo pipefail {0}
run: |
roles=$(curl \
"https://${API_HOST}/api/v2/projects/${PROJECT_ID}/branches/${BRANCH_ID}/roles" \
--fail \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}"
)
role_name=$(echo $roles | jq --raw-output '.roles[] | select(.protected == false) | .name')
echo "role_name=${role_name}" >> $GITHUB_OUTPUT
env:
API_HOST: ${{ inputs.api_host }}
API_KEY: ${{ inputs.api_key }}
PROJECT_ID: ${{ inputs.project_id }}
BRANCH_ID: ${{ steps.create-branch.outputs.branch_id }}
- name: Change Password
id: change-password
# A shell without `set -x` to not to expose password/dsn in logs
shell: bash -euo pipefail {0}
run: |
for i in $(seq 1 10); do
reset_password=$(curl \
"https://${API_HOST}/api/v2/projects/${PROJECT_ID}/branches/${BRANCH_ID}/roles/${ROLE_NAME}/reset_password" \
--request POST \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}"
)
if [ -z "${reset_password}" ]; then
sleep 1
continue
fi
password=$(echo $reset_password | jq --raw-output '.role.password')
if [ "${password}" == "null" ]; then
sleep 1
continue
fi
echo "::add-mask::${password}"
break
done
if [ -z "${password}" ] || [ "${password}" == "null" ]; then
echo 2>&1 "Failed to reset password after 10 attempts, the latest response was: ${reset_password}"
exit 1
fi
dsn="postgres://${ROLE_NAME}:${password}@${HOST}/neondb"
echo "::add-mask::${dsn}"
echo "dsn=${dsn}" >> $GITHUB_OUTPUT
env:
API_HOST: ${{ inputs.api_host }}
API_KEY: ${{ inputs.api_key }}
PROJECT_ID: ${{ inputs.project_id }}
BRANCH_ID: ${{ steps.create-branch.outputs.branch_id }}
ROLE_NAME: ${{ steps.role-name.outputs.role_name }}
HOST: ${{ steps.create-branch.outputs.host }}

View File

@@ -1,58 +0,0 @@
name: 'Delete Branch'
description: 'Delete Branch using API'
inputs:
api_key:
desctiption: 'Neon API key'
required: true
project_id:
desctiption: 'ID of the Project which should be deleted'
required: true
branch_id:
desctiption: 'ID of the branch to delete'
required: true
api_host:
desctiption: 'Neon API host'
default: console.stage.neon.tech
runs:
using: "composite"
steps:
- name: Delete Branch
# Do not try to delete a branch if .github/actions/neon-project-create
# or .github/actions/neon-branch-create failed before
if: ${{ inputs.project_id != '' && inputs.branch_id != '' }}
shell: bash -euxo pipefail {0}
run: |
for i in $(seq 1 10); do
deleted_branch=$(curl \
"https://${API_HOST}/api/v2/projects/${PROJECT_ID}/branches/${BRANCH_ID}" \
--request DELETE \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}"
)
if [ -z "${deleted_branch}" ]; then
sleep 1
continue
fi
branch_id=$(echo $deleted_branch | jq --raw-output '.branch.id')
if [ "${branch_id}" == "null" ]; then
sleep 1
continue
fi
break
done
if [ -z "${branch_id}" ] || [ "${branch_id}" == "null" ]; then
echo 2>&1 "Failed to delete branch after 10 attempts, the latest response was: ${deleted_branch}"
exit 1
fi
env:
API_HOST: ${{ inputs.api_host }}
API_KEY: ${{ inputs.api_key }}
PROJECT_ID: ${{ inputs.project_id }}
BRANCH_ID: ${{ inputs.branch_id }}

View File

@@ -1,64 +0,0 @@
name: 'Create Neon Project'
description: 'Create Neon Project using API'
inputs:
api_key:
desctiption: 'Neon API key'
required: true
region_id:
desctiption: 'Region ID, if not set the project will be created in the default region'
default: aws-us-east-2
postgres_version:
desctiption: 'Postgres version; default is 15'
default: 15
api_host:
desctiption: 'Neon API host'
default: console.stage.neon.tech
outputs:
dsn:
description: 'Created Project DSN (for main database)'
value: ${{ steps.create-neon-project.outputs.dsn }}
project_id:
description: 'Created Project ID'
value: ${{ steps.create-neon-project.outputs.project_id }}
runs:
using: "composite"
steps:
- name: Create Neon Project
id: create-neon-project
# A shell without `set -x` to not to expose password/dsn in logs
shell: bash -euo pipefail {0}
run: |
project=$(curl \
"https://${API_HOST}/api/v2/projects" \
--fail \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}" \
--data "{
\"project\": {
\"name\": \"Created by actions/neon-project-create; GITHUB_RUN_ID=${GITHUB_RUN_ID}\",
\"pg_version\": ${POSTGRES_VERSION},
\"region_id\": \"${REGION_ID}\",
\"settings\": { }
}
}")
# Mask password
echo "::add-mask::$(echo $project | jq --raw-output '.roles[] | select(.name != "web_access") | .password')"
dsn=$(echo $project | jq --raw-output '.connection_uris[0].connection_uri')
echo "::add-mask::${dsn}"
echo "dsn=${dsn}" >> $GITHUB_OUTPUT
project_id=$(echo $project | jq --raw-output '.project.id')
echo "project_id=${project_id}" >> $GITHUB_OUTPUT
echo "Project ${project_id} has been created"
env:
API_HOST: ${{ inputs.api_host }}
API_KEY: ${{ inputs.api_key }}
REGION_ID: ${{ inputs.region_id }}
POSTGRES_VERSION: ${{ inputs.postgres_version }}

View File

@@ -1,35 +0,0 @@
name: 'Delete Neon Project'
description: 'Delete Neon Project using API'
inputs:
api_key:
desctiption: 'Neon API key'
required: true
project_id:
desctiption: 'ID of the Project to delete'
required: true
api_host:
desctiption: 'Neon API host'
default: console.stage.neon.tech
runs:
using: "composite"
steps:
- name: Delete Neon Project
# Do not try to delete a project if .github/actions/neon-project-create failed before
if: ${{ inputs.project_id != '' }}
shell: bash -euxo pipefail {0}
run: |
curl \
"https://${API_HOST}/api/v2/projects/${PROJECT_ID}" \
--fail \
--request DELETE \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}"
echo "Project ${PROJECT_ID} has been deleted"
env:
API_HOST: ${{ inputs.api_host }}
API_KEY: ${{ inputs.api_key }}
PROJECT_ID: ${{ inputs.project_id }}

View File

@@ -3,7 +3,10 @@ description: 'Runs a Neon python test set, performing all the required preparati
inputs:
build_type:
description: 'Type of Rust (neon) and C (postgres) builds. Must be "release" or "debug", or "remote" for the remote cluster'
description: 'Type of Rust (neon) and C (postgres) builds. Must be "release" or "debug".'
required: true
rust_toolchain:
description: 'Rust toolchain version to fetch the caches'
required: true
test_selection:
description: 'A python test suite to run'
@@ -21,7 +24,7 @@ inputs:
required: false
default: 'true'
save_perf_report:
description: 'Whether to upload the performance report, if true PERF_TEST_RESULT_CONNSTR env variable should be set'
description: 'Whether to upload the performance report'
required: false
default: 'false'
run_with_real_s3:
@@ -44,37 +47,16 @@ inputs:
description: 'Secret access key'
required: false
default: ''
rerun_flaky:
description: 'Whether to rerun flaky tests'
required: false
default: 'false'
runs:
using: "composite"
steps:
- name: Get Neon artifact
if: inputs.build_type != 'remote'
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ inputs.build_type }}-artifact
name: neon-${{ runner.os }}-${{ inputs.build_type }}-${{ inputs.rust_toolchain }}-artifact
path: /tmp/neon
- name: Download Neon binaries for the previous release
if: inputs.build_type != 'remote'
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ inputs.build_type }}-artifact
path: /tmp/neon-previous
prefix: latest
- name: Download compatibility snapshot for Postgres 14
if: inputs.build_type != 'remote'
uses: ./.github/actions/download
with:
name: compatibility-snapshot-${{ inputs.build_type }}-pg14
path: /tmp/compatibility_snapshot_pg14
prefix: latest
- name: Checkout
if: inputs.needs_postgres_source == 'true'
uses: actions/checkout@v3
@@ -96,28 +78,16 @@ runs:
- name: Run pytest
env:
NEON_BIN: /tmp/neon/bin
COMPATIBILITY_NEON_BIN: /tmp/neon-previous/bin
COMPATIBILITY_POSTGRES_DISTRIB_DIR: /tmp/neon-previous/pg_install
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
TEST_OUTPUT: /tmp/test_output
# this variable will be embedded in perf test report
# and is needed to distinguish different environments
PLATFORM: github-actions-selfhosted
BUILD_TYPE: ${{ inputs.build_type }}
AWS_ACCESS_KEY_ID: ${{ inputs.real_s3_access_key_id }}
AWS_SECRET_ACCESS_KEY: ${{ inputs.real_s3_secret_access_key }}
COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg14
ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'backward compatibility breakage')
ALLOW_FORWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'forward compatibility breakage')
RERUN_FLAKY: ${{ inputs.rerun_flaky }}
shell: bash -euxo pipefail {0}
run: |
# PLATFORM will be embedded in the perf test report
# and it is needed to distinguish different environments
export PLATFORM=${PLATFORM:-github-actions-selfhosted}
export POSTGRES_DISTRIB_DIR=${POSTGRES_DISTRIB_DIR:-/tmp/neon/pg_install}
export DEFAULT_PG_VERSION=${DEFAULT_PG_VERSION:-14}
if [ "${BUILD_TYPE}" = "remote" ]; then
export REMOTE_ENV=1
fi
PERF_REPORT_DIR="$(realpath test_runner/perf-report-local)"
rm -rf $PERF_REPORT_DIR
@@ -128,12 +98,7 @@ runs:
exit 1
fi
if [[ "${{ inputs.run_in_parallel }}" == "true" ]]; then
# -n16 uses sixteen processes to run tests via pytest-xdist
EXTRA_PARAMS="-n16 $EXTRA_PARAMS"
# --dist=loadgroup points tests marked with @pytest.mark.xdist_group
# to the same worker to make @pytest.mark.order work with xdist
EXTRA_PARAMS="--dist=loadgroup $EXTRA_PARAMS"
EXTRA_PARAMS="-n4 $EXTRA_PARAMS"
fi
if [[ "${{ inputs.run_with_real_s3 }}" == "true" ]]; then
@@ -144,28 +109,16 @@ runs:
fi
if [[ "${{ inputs.save_perf_report }}" == "true" ]]; then
mkdir -p "$PERF_REPORT_DIR"
EXTRA_PARAMS="--out-dir $PERF_REPORT_DIR $EXTRA_PARAMS"
fi
if [ "${RERUN_FLAKY}" == "true" ]; then
mkdir -p $TEST_OUTPUT
poetry run ./scripts/flaky_tests.py "${TEST_RESULT_CONNSTR}" --days 10 --output "$TEST_OUTPUT/flaky.json"
EXTRA_PARAMS="--flaky-tests-json $TEST_OUTPUT/flaky.json $EXTRA_PARAMS"
if [[ "$GITHUB_REF" == "refs/heads/main" ]]; then
mkdir -p "$PERF_REPORT_DIR"
EXTRA_PARAMS="--out-dir $PERF_REPORT_DIR $EXTRA_PARAMS"
fi
fi
if [[ "${{ inputs.build_type }}" == "debug" ]]; then
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
elif [[ "${{ inputs.build_type }}" == "release" ]]; then
cov_prefix=()
else
cov_prefix=()
fi
# Wake up the cluster if we use remote neon instance
if [ "${{ inputs.build_type }}" = "remote" ] && [ -n "${BENCHMARK_CONNSTR}" ]; then
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/psql ${BENCHMARK_CONNSTR} -c "SELECT version();"
fi
# Run the tests.
@@ -175,36 +128,35 @@ runs:
# --verbose prints name of each test (helpful when there are
# multiple tests in one file)
# -rA prints summary in the end
# -n4 uses four processes to run tests via pytest-xdist
# -s is not used to prevent pytest from capturing output, because tests are running
# in parallel and logs are mixed between different tests
#
mkdir -p $TEST_OUTPUT/allure/results
"${cov_prefix[@]}" ./scripts/pytest \
--junitxml=$TEST_OUTPUT/junit.xml \
--alluredir=$TEST_OUTPUT/allure/results \
--tb=short \
--verbose \
-m "not remote_cluster" \
-rA $TEST_SELECTION $EXTRA_PARAMS
if [[ "${{ inputs.save_perf_report }}" == "true" ]]; then
export REPORT_FROM="$PERF_REPORT_DIR"
export REPORT_TO="$PLATFORM"
scripts/generate_and_push_perf_report.sh
if [[ "$GITHUB_REF" == "refs/heads/main" ]]; then
export REPORT_FROM="$PERF_REPORT_DIR"
export REPORT_TO=local
scripts/generate_and_push_perf_report.sh
fi
fi
- name: Upload compatibility snapshot for Postgres 14
if: github.ref_name == 'release'
- name: Delete all data but logs
shell: bash -euxo pipefail {0}
if: always()
run: |
du -sh /tmp/test_output/*
find /tmp/test_output -type f ! -name "*.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" ! -name "flamegraph.svg" ! -name "*.metrics" -delete
du -sh /tmp/test_output/*
- name: Upload python test logs
if: always()
uses: ./.github/actions/upload
with:
name: compatibility-snapshot-${{ inputs.build_type }}-pg14-${{ github.run_id }}
# The path includes a test name (test_create_snapshot) and directory that the test creates (compatibility_snapshot_pg14), keep the path in sync with the test
path: /tmp/test_output/test_create_snapshot/compatibility_snapshot_pg14/
prefix: latest
- name: Create Allure report
if: success() || failure()
uses: ./.github/actions/allure-report
with:
action: store
build_type: ${{ inputs.build_type }}
test_selection: ${{ inputs.test_selection }}
name: python-test-${{ inputs.test_selection }}-${{ runner.os }}-${{ inputs.build_type }}-${{ inputs.rust_toolchain }}-logs
path: /tmp/test_output/

View File

@@ -7,9 +7,6 @@ inputs:
path:
description: "A directory or file to upload"
required: true
prefix:
description: "S3 prefix. Default is '${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
required: false
runs:
using: "composite"
@@ -45,14 +42,14 @@ runs:
env:
SOURCE: ${{ inputs.path }}
ARCHIVE: /tmp/uploads/${{ inputs.name }}.tar.zst
PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}', github.run_id, github.run_attempt) }}
run: |
BUCKET=neon-github-public-dev
PREFIX=artifacts/${GITHUB_RUN_ID}
FILENAME=$(basename $ARCHIVE)
FILESIZE=$(du -sh ${ARCHIVE} | cut -f1)
time aws s3 mv --only-show-errors ${ARCHIVE} s3://${BUCKET}/${PREFIX}/${FILENAME}
time aws s3 mv --only-show-errors ${ARCHIVE} s3://${BUCKET}/${PREFIX}/${GITHUB_RUN_ATTEMPT}/${FILENAME}
# Ref https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#adding-a-job-summary
echo "[${FILENAME}](https://${BUCKET}.s3.amazonaws.com/${PREFIX}/${FILENAME}) ${FILESIZE}" >> ${GITHUB_STEP_SUMMARY}
echo "[${FILENAME}](https://${BUCKET}.s3.amazonaws.com/${PREFIX}/${GITHUB_RUN_ATTEMPT}/${FILENAME}) ${FILESIZE}" >> ${GITHUB_STEP_SUMMARY}

View File

@@ -1,5 +1,4 @@
zenith_install.tar.gz
.zenith_current_version
neon_install.tar.gz
.neon_current_version
collections/*
!collections/.keep

View File

@@ -1,7 +1,7 @@
- name: Upload Neon binaries
hosts: storage
gather_facts: False
remote_user: "{{ remote_user }}"
remote_user: admin
tasks:
@@ -14,8 +14,7 @@
- safekeeper
- name: inform about versions
debug:
msg: "Version to deploy - {{ current_version }}"
debug: msg="Version to deploy - {{ current_version }}"
tags:
- pageserver
- safekeeper
@@ -36,7 +35,7 @@
- name: Deploy pageserver
hosts: pageservers
gather_facts: False
remote_user: "{{ remote_user }}"
remote_user: admin
tasks:
@@ -59,47 +58,24 @@
creates: "/storage/pageserver/data/tenants"
environment:
NEON_REPO_DIR: "/storage/pageserver/data"
LD_LIBRARY_PATH: "/usr/local/v14/lib"
LD_LIBRARY_PATH: "/usr/local/lib"
become: true
tags:
- pageserver
- name: read the existing remote pageserver config
ansible.builtin.slurp:
src: /storage/pageserver/data/pageserver.toml
register: _remote_ps_config
tags:
- pageserver
- name: parse the existing pageserver configuration
ansible.builtin.set_fact:
_existing_ps_config: "{{ _remote_ps_config['content'] | b64decode | sivel.toiletwater.from_toml }}"
tags:
- pageserver
- name: construct the final pageserver configuration dict
ansible.builtin.set_fact:
pageserver_config: "{{ pageserver_config_stub | combine({'id': _existing_ps_config.id }) }}"
tags:
- pageserver
- name: template the pageserver config
template:
src: templates/pageserver.toml.j2
dest: /storage/pageserver/data/pageserver.toml
- name: update remote storage (s3) config
lineinfile:
path: /storage/pageserver/data/pageserver.toml
line: "{{ item }}"
loop:
- "[remote_storage]"
- "bucket_name = '{{ bucket_name }}'"
- "bucket_region = '{{ bucket_region }}'"
- "prefix_in_bucket = '{{ inventory_hostname }}'"
become: true
tags:
- pageserver
# used in `pageserver.service` template
- name: learn current availability_zone
shell:
cmd: "curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone"
register: ec2_availability_zone
- set_fact:
ec2_availability_zone={{ ec2_availability_zone.stdout }}
- name: upload systemd service definition
ansible.builtin.template:
src: systemd/pageserver.service
@@ -126,15 +102,14 @@
shell:
cmd: |
INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
curl -sfS -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/pageservers/$INSTANCE_ID | jq '.version = {{ current_version }}' > /tmp/new_version
curl -sfS -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" -H "Content-Type: application/json" -X POST -d@/tmp/new_version {{ console_mgmt_base_url }}/management/api/v2/pageservers
curl -sfS -d '{"version": {{ current_version }} }' -X PATCH {{ console_mgmt_base_url }}/api/v1/pageservers/$INSTANCE_ID
tags:
- pageserver
- name: Deploy safekeeper
hosts: safekeepers
gather_facts: False
remote_user: "{{ remote_user }}"
remote_user: admin
tasks:
@@ -157,20 +132,11 @@
creates: "/storage/safekeeper/data/safekeeper.id"
environment:
NEON_REPO_DIR: "/storage/safekeeper/data"
LD_LIBRARY_PATH: "/usr/local/v14/lib"
LD_LIBRARY_PATH: "/usr/local/lib"
become: true
tags:
- safekeeper
# used in `safekeeper.service` template
- name: learn current availability_zone
shell:
cmd: "curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone"
register: ec2_availability_zone
- set_fact:
ec2_availability_zone={{ ec2_availability_zone.stdout }}
# in the future safekeepers should discover pageservers byself
# but currently use first pageserver that was discovered
- name: set first pageserver var for safekeepers
@@ -205,7 +171,6 @@
shell:
cmd: |
INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
curl -sfS -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/safekeepers/$INSTANCE_ID | jq '.version = {{ current_version }}' > /tmp/new_version
curl -sfS -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" -H "Content-Type: application/json" -X POST -d@/tmp/new_version {{ console_mgmt_base_url }}/management/api/v2/safekeepers
curl -sfS -d '{"version": {{ current_version }} }' -X PATCH {{ console_mgmt_base_url }}/api/v1/safekeepers/$INSTANCE_ID
tags:
- safekeeper

View File

@@ -21,16 +21,10 @@ docker pull --quiet neondatabase/neon:${DOCKER_TAG}
ID=$(docker create neondatabase/neon:${DOCKER_TAG})
docker cp ${ID}:/data/postgres_install.tar.gz .
tar -xzf postgres_install.tar.gz -C neon_install
mkdir neon_install/bin/
docker cp ${ID}:/usr/local/bin/pageserver neon_install/bin/
docker cp ${ID}:/usr/local/bin/pageserver_binutils neon_install/bin/
docker cp ${ID}:/usr/local/bin/safekeeper neon_install/bin/
docker cp ${ID}:/usr/local/bin/storage_broker neon_install/bin/
docker cp ${ID}:/usr/local/bin/proxy neon_install/bin/
docker cp ${ID}:/usr/local/v14/bin/ neon_install/v14/bin/
docker cp ${ID}:/usr/local/v15/bin/ neon_install/v15/bin/
docker cp ${ID}:/usr/local/v14/lib/ neon_install/v14/lib/
docker cp ${ID}:/usr/local/v15/lib/ neon_install/v15/lib/
docker cp ${ID}:/usr/local/bin/postgres neon_install/bin/
docker rm -vf ${ID}
# store version to file (for ansible playbooks) and create binaries tarball

20
.github/ansible/neon-stress.hosts vendored Normal file
View File

@@ -0,0 +1,20 @@
[pageservers]
neon-stress-ps-1 console_region_id=1
neon-stress-ps-2 console_region_id=1
[safekeepers]
neon-stress-sk-1 console_region_id=1
neon-stress-sk-2 console_region_id=1
neon-stress-sk-3 console_region_id=1
[storage:children]
pageservers
safekeepers
[storage:vars]
env_name = neon-stress
console_mgmt_base_url = http://neon-stress-console.local
bucket_name = neon-storage-ireland
bucket_region = eu-west-1
etcd_endpoints = etcd-stress.local:2379
safekeeper_enable_s3_offload = false

View File

@@ -1,48 +0,0 @@
storage:
vars:
bucket_name: neon-prod-storage-ap-southeast-1
bucket_region: ap-southeast-1
console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
broker_endpoint: http://storage-broker-lb.epsilon.ap-southeast-1.internal.aws.neon.tech:50051
pageserver_config_stub:
pg_distrib_dir: /usr/local
metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
metric_collection_interval: 10min
disk_usage_based_eviction:
max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
min_avail_bytes: 0
period: "10s"
tenant_config:
eviction_policy:
kind: "LayerAccessThreshold"
period: "10m"
threshold: &default_eviction_threshold "24h"
evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "pageserver/v1"
safekeeper_s3_prefix: safekeeper/v1/wal
hostname_suffix: ""
remote_user: ssm-user
ansible_aws_ssm_region: ap-southeast-1
ansible_aws_ssm_bucket_name: neon-prod-storage-ap-southeast-1
console_region_id: aws-ap-southeast-1
sentry_environment: production
children:
pageservers:
hosts:
pageserver-0.ap-southeast-1.aws.neon.tech:
ansible_host: i-064de8ea28bdb495b
pageserver-1.ap-southeast-1.aws.neon.tech:
ansible_host: i-0b180defcaeeb6b93
safekeepers:
hosts:
safekeeper-0.ap-southeast-1.aws.neon.tech:
ansible_host: i-0d6f1dc5161eef894
safekeeper-2.ap-southeast-1.aws.neon.tech:
ansible_host: i-04fb63634e4679eb9
safekeeper-3.ap-southeast-1.aws.neon.tech:
ansible_host: i-05481f3bc88cfc2d4

View File

@@ -1,50 +0,0 @@
storage:
vars:
bucket_name: neon-prod-storage-eu-central-1
bucket_region: eu-central-1
console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
broker_endpoint: http://storage-broker-lb.gamma.eu-central-1.internal.aws.neon.tech:50051
pageserver_config_stub:
pg_distrib_dir: /usr/local
metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
metric_collection_interval: 10min
disk_usage_based_eviction:
max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
min_avail_bytes: 0
period: "10s"
tenant_config:
eviction_policy:
kind: "LayerAccessThreshold"
period: "10m"
threshold: &default_eviction_threshold "24h"
evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "pageserver/v1"
safekeeper_s3_prefix: safekeeper/v1/wal
hostname_suffix: ""
remote_user: ssm-user
ansible_aws_ssm_region: eu-central-1
ansible_aws_ssm_bucket_name: neon-prod-storage-eu-central-1
console_region_id: aws-eu-central-1
sentry_environment: production
children:
pageservers:
hosts:
pageserver-0.eu-central-1.aws.neon.tech:
ansible_host: i-0cd8d316ecbb715be
pageserver-1.eu-central-1.aws.neon.tech:
ansible_host: i-090044ed3d383fef0
pageserver-2.eu-central-1.aws.neon.tech:
ansible_host: i-033584edf3f4b6742
safekeepers:
hosts:
safekeeper-0.eu-central-1.aws.neon.tech:
ansible_host: i-0b238612d2318a050
safekeeper-1.eu-central-1.aws.neon.tech:
ansible_host: i-07b9c45e5c2637cd4
safekeeper-2.eu-central-1.aws.neon.tech:
ansible_host: i-020257302c3c93d88

View File

@@ -1,51 +0,0 @@
storage:
vars:
bucket_name: neon-prod-storage-us-east-2
bucket_region: us-east-2
console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
broker_endpoint: http://storage-broker-lb.delta.us-east-2.internal.aws.neon.tech:50051
pageserver_config_stub:
pg_distrib_dir: /usr/local
metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
metric_collection_interval: 10min
disk_usage_based_eviction:
max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
min_avail_bytes: 0
period: "10s"
tenant_config:
eviction_policy:
kind: "LayerAccessThreshold"
period: "10m"
threshold: &default_eviction_threshold "24h"
evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "pageserver/v1"
safekeeper_s3_prefix: safekeeper/v1/wal
hostname_suffix: ""
remote_user: ssm-user
ansible_aws_ssm_region: us-east-2
ansible_aws_ssm_bucket_name: neon-prod-storage-us-east-2
console_region_id: aws-us-east-2
sentry_environment: production
children:
pageservers:
hosts:
pageserver-0.us-east-2.aws.neon.tech:
ansible_host: i-062227ba7f119eb8c
pageserver-1.us-east-2.aws.neon.tech:
ansible_host: i-0b3ec0afab5968938
pageserver-2.us-east-2.aws.neon.tech:
ansible_host: i-0d7a1c4325e71421d
safekeepers:
hosts:
safekeeper-0.us-east-2.aws.neon.tech:
ansible_host: i-0e94224750c57d346
safekeeper-1.us-east-2.aws.neon.tech:
ansible_host: i-06d113fb73bfddeb0
safekeeper-2.us-east-2.aws.neon.tech:
ansible_host: i-09f66c8e04afff2e8

View File

@@ -1,53 +0,0 @@
storage:
vars:
bucket_name: neon-prod-storage-us-west-2
bucket_region: us-west-2
console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
broker_endpoint: http://storage-broker-lb.eta.us-west-2.internal.aws.neon.tech:50051
pageserver_config_stub:
pg_distrib_dir: /usr/local
metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
metric_collection_interval: 10min
disk_usage_based_eviction:
max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
min_avail_bytes: 0
period: "10s"
tenant_config:
eviction_policy:
kind: "LayerAccessThreshold"
period: "10m"
threshold: &default_eviction_threshold "24h"
evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "pageserver/v1"
safekeeper_s3_prefix: safekeeper/v1/wal
hostname_suffix: ""
remote_user: ssm-user
ansible_aws_ssm_region: us-west-2
ansible_aws_ssm_bucket_name: neon-prod-storage-us-west-2
console_region_id: aws-us-west-2-new
sentry_environment: production
children:
pageservers:
hosts:
pageserver-0.us-west-2.aws.neon.tech:
ansible_host: i-0d9f6dfae0e1c780d
pageserver-1.us-west-2.aws.neon.tech:
ansible_host: i-0c834be1dddba8b3f
pageserver-2.us-west-2.aws.neon.tech:
ansible_host: i-051642d372c0a4f32
pageserver-3.us-west-2.aws.neon.tech:
ansible_host: i-00c3844beb9ad1c6b
safekeepers:
hosts:
safekeeper-0.us-west-2.aws.neon.tech:
ansible_host: i-00719d8a74986fda6
safekeeper-1.us-west-2.aws.neon.tech:
ansible_host: i-074682f9d3c712e7c
safekeeper-2.us-west-2.aws.neon.tech:
ansible_host: i-042b7efb1729d7966

20
.github/ansible/production.hosts vendored Normal file
View File

@@ -0,0 +1,20 @@
[pageservers]
#zenith-1-ps-1 console_region_id=1
zenith-1-ps-2 console_region_id=1
zenith-1-ps-3 console_region_id=1
[safekeepers]
zenith-1-sk-1 console_region_id=1
zenith-1-sk-2 console_region_id=1
zenith-1-sk-3 console_region_id=1
[storage:children]
pageservers
safekeepers
[storage:vars]
env_name = prod-1
console_mgmt_base_url = http://console-release.local
bucket_name = zenith-storage-oregon
bucket_region = us-west-2
etcd_endpoints = zenith-1-etcd.local:2379

View File

@@ -1,10 +1,7 @@
#!/bin/sh
# fetch params from meta-data service
# get instance id from meta-data service
INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
AZ_ID=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone)
INSTANCE_TYPE=$(curl -s http://169.254.169.254/latest/meta-data/instance-type)
DISK_SIZE=$(df -B1 /storage | tail -1 | awk '{print $2}')
# store fqdn hostname in var
HOST=$(hostname -f)
@@ -15,22 +12,18 @@ cat <<EOF | tee /tmp/payload
"version": 1,
"host": "${HOST}",
"port": 6400,
"region_id": "{{ console_region_id }}",
"region_id": {{ console_region_id }},
"instance_id": "${INSTANCE_ID}",
"http_host": "${HOST}",
"http_port": 9898,
"active": false,
"availability_zone_id": "${AZ_ID}",
"disk_size": ${DISK_SIZE},
"instance_type": "${INSTANCE_TYPE}"
"http_port": 9898
}
EOF
# check if pageserver already registered or not
if ! curl -sf -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/pageservers/${INSTANCE_ID} -o /dev/null; then
if ! curl -sf -X PATCH -d '{}' {{ console_mgmt_base_url }}/api/v1/pageservers/${INSTANCE_ID} -o /dev/null; then
# not registered, so register it now
ID=$(curl -sf -X POST -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" -H "Content-Type: application/json" {{ console_mgmt_base_url }}/management/api/v2/pageservers -d@/tmp/payload | jq -r '.id')
ID=$(curl -sf -X POST {{ console_mgmt_base_url }}/api/v1/pageservers -d@/tmp/payload | jq -r '.ID')
# init pageserver
sudo -u pageserver /usr/local/bin/pageserver -c "id=${ID}" -c "pg_distrib_dir='/usr/local'" --init -D /storage/pageserver/data

View File

@@ -14,18 +14,18 @@ cat <<EOF | tee /tmp/payload
"host": "${HOST}",
"port": 6500,
"http_port": 7676,
"region_id": "{{ console_region_id }}",
"region_id": {{ console_region_id }},
"instance_id": "${INSTANCE_ID}",
"availability_zone_id": "${AZ_ID}",
"active": false
"availability_zone_id": "${AZ_ID}"
}
EOF
# check if safekeeper already registered or not
if ! curl -sf -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/safekeepers/${INSTANCE_ID} -o /dev/null; then
if ! curl -sf -X PATCH -d '{}' {{ console_mgmt_base_url }}/api/v1/safekeepers/${INSTANCE_ID} -o /dev/null; then
# not registered, so register it now
ID=$(curl -sf -X POST -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" -H "Content-Type: application/json" {{ console_mgmt_base_url }}/management/api/v2/safekeepers -d@/tmp/payload | jq -r '.id')
ID=$(curl -sf -X POST {{ console_mgmt_base_url }}/api/v1/safekeepers -d@/tmp/payload | jq -r '.ID')
# init safekeeper
sudo -u safekeeper /usr/local/bin/safekeeper --id ${ID} --init -D /storage/safekeeper/data
fi

View File

@@ -1,2 +0,0 @@
ansible_connection: aws_ssm
ansible_python_interpreter: /usr/bin/python3

View File

@@ -1,46 +0,0 @@
storage:
vars:
bucket_name: neon-dev-storage-eu-west-1
bucket_region: eu-west-1
console_mgmt_base_url: http://neon-internal-api.aws.neon.build
broker_endpoint: http://storage-broker-lb.zeta.eu-west-1.internal.aws.neon.build:50051
pageserver_config_stub:
pg_distrib_dir: /usr/local
metric_collection_endpoint: http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events
metric_collection_interval: 10min
disk_usage_based_eviction:
max_usage_pct: 80
min_avail_bytes: 0
period: "10s"
tenant_config:
eviction_policy:
kind: "LayerAccessThreshold"
period: "20m"
threshold: &default_eviction_threshold "20m"
evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "pageserver/v1"
safekeeper_s3_prefix: safekeeper/v1/wal
hostname_suffix: ""
remote_user: ssm-user
ansible_aws_ssm_region: eu-west-1
ansible_aws_ssm_bucket_name: neon-dev-storage-eu-west-1
console_region_id: aws-eu-west-1
sentry_environment: staging
children:
pageservers:
hosts:
pageserver-0.eu-west-1.aws.neon.build:
ansible_host: i-01d496c5041c7f34c
safekeepers:
hosts:
safekeeper-0.eu-west-1.aws.neon.build:
ansible_host: i-05226ef85722831bf
safekeeper-1.eu-west-1.aws.neon.build:
ansible_host: i-06969ee1bf2958bfc
safekeeper-2.eu-west-1.aws.neon.build:
ansible_host: i-087892e9625984a0b

20
.github/ansible/staging.hosts vendored Normal file
View File

@@ -0,0 +1,20 @@
[pageservers]
#zenith-us-stage-ps-1 console_region_id=27
zenith-us-stage-ps-2 console_region_id=27
zenith-us-stage-ps-3 console_region_id=27
[safekeepers]
zenith-us-stage-sk-4 console_region_id=27
zenith-us-stage-sk-5 console_region_id=27
zenith-us-stage-sk-6 console_region_id=27
[storage:children]
pageservers
safekeepers
[storage:vars]
env_name = us-stage
console_mgmt_base_url = http://console-staging.local
bucket_name = zenith-staging-storage-us-east-1
bucket_region = us-east-1
etcd_endpoints = zenith-us-stage-etcd.local:2379

View File

@@ -1,56 +0,0 @@
storage:
vars:
bucket_name: neon-staging-storage-us-east-2
bucket_region: us-east-2
console_mgmt_base_url: http://neon-internal-api.aws.neon.build
broker_endpoint: http://storage-broker-lb.beta.us-east-2.internal.aws.neon.build:50051
pageserver_config_stub:
pg_distrib_dir: /usr/local
metric_collection_endpoint: http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events
metric_collection_interval: 10min
disk_usage_based_eviction:
max_usage_pct: 80
min_avail_bytes: 0
period: "10s"
tenant_config:
eviction_policy:
kind: "LayerAccessThreshold"
period: "20m"
threshold: &default_eviction_threshold "20m"
evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "pageserver/v1"
safekeeper_s3_prefix: safekeeper/v1/wal
hostname_suffix: ""
remote_user: ssm-user
ansible_aws_ssm_region: us-east-2
ansible_aws_ssm_bucket_name: neon-staging-storage-us-east-2
console_region_id: aws-us-east-2
sentry_environment: staging
children:
pageservers:
hosts:
pageserver-0.us-east-2.aws.neon.build:
ansible_host: i-0c3e70929edb5d691
pageserver-1.us-east-2.aws.neon.build:
ansible_host: i-0565a8b4008aa3f40
pageserver-2.us-east-2.aws.neon.build:
ansible_host: i-01e31cdf7e970586a
pageserver-3.us-east-2.aws.neon.build:
ansible_host: i-0602a0291365ef7cc
pageserver-99.us-east-2.aws.neon.build:
ansible_host: i-0c39491109bb88824
safekeepers:
hosts:
safekeeper-0.us-east-2.aws.neon.build:
ansible_host: i-027662bd552bf5db0
safekeeper-1.us-east-2.aws.neon.build:
ansible_host: i-0171efc3604a7b907
safekeeper-2.us-east-2.aws.neon.build:
ansible_host: i-0de0b03a51676a6ce
safekeeper-99.us-east-2.aws.neon.build:
ansible_host: i-0d61b6a2ea32028d5

View File

@@ -1,12 +1,12 @@
[Unit]
Description=Neon pageserver
Description=Zenith pageserver
After=network.target auditd.service
[Service]
Type=simple
User=pageserver
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/v14/lib SENTRY_DSN={{ SENTRY_URL_PAGESERVER }} SENTRY_ENVIRONMENT={{ sentry_environment }}
ExecStart=/usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -c "broker_endpoint='{{ broker_endpoint }}'" -c "availability_zone='{{ ec2_availability_zone }}'" -D /storage/pageserver/data
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/lib
ExecStart=/usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -c "broker_endpoints=['{{ etcd_endpoints }}']" -D /storage/pageserver/data
ExecReload=/bin/kill -HUP $MAINPID
KillMode=mixed
KillSignal=SIGINT

View File

@@ -1,12 +1,12 @@
[Unit]
Description=Neon safekeeper
Description=Zenith safekeeper
After=network.target auditd.service
[Service]
Type=simple
User=safekeeper
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/v14/lib SENTRY_DSN={{ SENTRY_URL_SAFEKEEPER }} SENTRY_ENVIRONMENT={{ sentry_environment }}
ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}{{ hostname_suffix }}:6500 --listen-http {{ inventory_hostname }}{{ hostname_suffix }}:7676 -D /storage/safekeeper/data --broker-endpoint={{ broker_endpoint }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ safekeeper_s3_prefix }}"}' --availability-zone={{ ec2_availability_zone }}
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/lib
ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ env_name }}/wal"}'
ExecReload=/bin/kill -HUP $MAINPID
KillMode=mixed
KillSignal=SIGINT

View File

@@ -1 +0,0 @@
{{ pageserver_config | sivel.toiletwater.to_toml }}

View File

@@ -1,75 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
deploymentStrategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 100%
maxUnavailable: 50%
# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
# The pod(s) will stay in Terminating, keeps the existing connections
# but doesn't receive new ones
containerLifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 604800"]
terminationGracePeriodSeconds: 604800
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2"
domain: "*.eu-west-1.aws.neon.build"
sentryEnvironment: "staging"
wssPort: 8443
metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
metricCollectionInterval: "1min"
# -- Additional labels for neon-proxy pods
podLabels:
neon_service: proxy-scram
neon_env: dev
neon_region: eu-west-1
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: eu-west-1.aws.neon.build
httpsPort: 443
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,52 +0,0 @@
# Helm chart values for neon-storage-broker
podLabels:
neon_env: staging
neon_service: storage-broker
# Use L4 LB
service:
# service.annotations -- Annotations to add to the service
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external # use newer AWS Load Balancer Controller
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal # deploy LB to private subnet
# assign service to this name at external-dns
external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.zeta.eu-west-1.internal.aws.neon.build
# service.type -- Service type
type: LoadBalancer
# service.port -- broker listen port
port: 50051
ingress:
enabled: false
metrics:
enabled: false
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-storage-broker.fullname\" . }}"
labels:
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
app.kubernetes.io/name: neon-storage-broker
app.kubernetes.io/instance: neon-storage-broker
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-storage-broker"
endpoints:
- port: broker
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"
settings:
sentryEnvironment: "staging"

View File

@@ -1,67 +0,0 @@
# Helm chart values for neon-proxy-link.
# This is a YAML-formatted file.
image:
repository: neondatabase/neon
settings:
authBackend: "link"
authEndpoint: "https://console.stage.neon.tech/authenticate_proxy_request/"
uri: "https://console.stage.neon.tech/psql_session/"
domain: "pg.neon.build"
sentryEnvironment: "staging"
metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
metricCollectionInterval: "1min"
# -- Additional labels for neon-proxy-link pods
podLabels:
neon_service: proxy
neon_env: dev
neon_region: us-east-2
service:
type: LoadBalancer
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal
external-dns.alpha.kubernetes.io/hostname: neon-proxy-link-mgmt.beta.us-east-2.aws.neon.build
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: neon-proxy-link.beta.us-east-2.aws.neon.build
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,60 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2"
domain: "*.cloud.stage.neon.tech"
sentryEnvironment: "staging"
wssPort: 8443
metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
metricCollectionInterval: "1min"
# -- Additional labels for neon-proxy pods
podLabels:
neon_service: proxy-scram-legacy
neon_env: dev
neon_region: us-east-2
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: neon-proxy-scram-legacy.beta.us-east-2.aws.neon.build
httpsPort: 443
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,75 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
deploymentStrategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 100%
maxUnavailable: 50%
# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
# The pod(s) will stay in Terminating, keeps the existing connections
# but doesn't receive new ones
containerLifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 604800"]
terminationGracePeriodSeconds: 604800
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2"
domain: "*.us-east-2.aws.neon.build"
sentryEnvironment: "staging"
wssPort: 8443
metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
metricCollectionInterval: "1min"
# -- Additional labels for neon-proxy pods
podLabels:
neon_service: proxy-scram
neon_env: dev
neon_region: us-east-2
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: us-east-2.aws.neon.build
httpsPort: 443
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,52 +0,0 @@
# Helm chart values for neon-storage-broker
podLabels:
neon_env: staging
neon_service: storage-broker
# Use L4 LB
service:
# service.annotations -- Annotations to add to the service
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external # use newer AWS Load Balancer Controller
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal # deploy LB to private subnet
# assign service to this name at external-dns
external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.beta.us-east-2.internal.aws.neon.build
# service.type -- Service type
type: LoadBalancer
# service.port -- broker listen port
port: 50051
ingress:
enabled: false
metrics:
enabled: false
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-storage-broker.fullname\" . }}"
labels:
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
app.kubernetes.io/name: neon-storage-broker
app.kubernetes.io/instance: neon-storage-broker
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-storage-broker"
endpoints:
- port: broker
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"
settings:
sentryEnvironment: "staging"

View File

@@ -0,0 +1,26 @@
fullnameOverride: "neon-stress-proxy-scram"
settings:
authBackend: "console"
authEndpoint: "http://neon-stress-console.local/management/api/v2"
domain: "*.stress.neon.tech"
podLabels:
zenith_service: proxy-scram
zenith_env: staging
zenith_region: eu-west-1
zenith_region_slug: ireland
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: '*.stress.neon.tech'
metrics:
enabled: true
serviceMonitor:
enabled: true
selector:
release: kube-prometheus-stack

View File

@@ -0,0 +1,34 @@
fullnameOverride: "neon-stress-proxy"
settings:
authEndpoint: "https://console.dev.neon.tech/authenticate_proxy_request/"
uri: "https://console.dev.neon.tech/psql_session/"
# -- Additional labels for zenith-proxy pods
podLabels:
zenith_service: proxy
zenith_env: staging
zenith_region: eu-west-1
zenith_region_slug: ireland
service:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal
external-dns.alpha.kubernetes.io/hostname: neon-stress-proxy.local
type: LoadBalancer
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: connect.dev.neon.tech
metrics:
enabled: true
serviceMonitor:
enabled: true
selector:
release: kube-prometheus-stack

View File

@@ -1,76 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
deploymentStrategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 100%
maxUnavailable: 50%
# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
# The pod(s) will stay in Terminating, keeps the existing connections
# but doesn't receive new ones
containerLifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 604800"]
terminationGracePeriodSeconds: 604800
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
domain: "*.ap-southeast-1.aws.neon.tech"
sentryEnvironment: "production"
wssPort: 8443
metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
metricCollectionInterval: "10min"
# -- Additional labels for neon-proxy pods
podLabels:
neon_service: proxy-scram
neon_env: prod
neon_region: ap-southeast-1
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: ap-southeast-1.aws.neon.tech
httpsPort: 443
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,52 +0,0 @@
# Helm chart values for neon-storage-broker
podLabels:
neon_env: production
neon_service: storage-broker
# Use L4 LB
service:
# service.annotations -- Annotations to add to the service
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external # use newer AWS Load Balancer Controller
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal # deploy LB to private subnet
# assign service to this name at external-dns
external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.epsilon.ap-southeast-1.internal.aws.neon.tech
# service.type -- Service type
type: LoadBalancer
# service.port -- broker listen port
port: 50051
ingress:
enabled: false
metrics:
enabled: false
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-storage-broker.fullname\" . }}"
labels:
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
app.kubernetes.io/name: neon-storage-broker
app.kubernetes.io/instance: neon-storage-broker
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-storage-broker"
endpoints:
- port: broker
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"
settings:
sentryEnvironment: "production"

View File

@@ -1,76 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
deploymentStrategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 100%
maxUnavailable: 50%
# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
# The pod(s) will stay in Terminating, keeps the existing connections
# but doesn't receive new ones
containerLifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 604800"]
terminationGracePeriodSeconds: 604800
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
domain: "*.eu-central-1.aws.neon.tech"
sentryEnvironment: "production"
wssPort: 8443
metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
metricCollectionInterval: "10min"
# -- Additional labels for neon-proxy pods
podLabels:
neon_service: proxy-scram
neon_env: prod
neon_region: eu-central-1
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: eu-central-1.aws.neon.tech
httpsPort: 443
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,52 +0,0 @@
# Helm chart values for neon-storage-broker
podLabels:
neon_env: production
neon_service: storage-broker
# Use L4 LB
service:
# service.annotations -- Annotations to add to the service
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external # use newer AWS Load Balancer Controller
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal # deploy LB to private subnet
# assign service to this name at external-dns
external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.gamma.eu-central-1.internal.aws.neon.tech
# service.type -- Service type
type: LoadBalancer
# service.port -- broker listen port
port: 50051
ingress:
enabled: false
metrics:
enabled: false
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-storage-broker.fullname\" . }}"
labels:
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
app.kubernetes.io/name: neon-storage-broker
app.kubernetes.io/instance: neon-storage-broker
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-storage-broker"
endpoints:
- port: broker
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"
settings:
sentryEnvironment: "production"

View File

@@ -1,58 +0,0 @@
# Helm chart values for neon-proxy-link.
# This is a YAML-formatted file.
image:
repository: neondatabase/neon
settings:
authBackend: "link"
authEndpoint: "https://console.neon.tech/authenticate_proxy_request/"
uri: "https://console.neon.tech/psql_session/"
domain: "pg.neon.tech"
sentryEnvironment: "production"
# -- Additional labels for zenith-proxy pods
podLabels:
neon_service: proxy
neon_env: production
neon_region: us-east-2
service:
type: LoadBalancer
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal
external-dns.alpha.kubernetes.io/hostname: neon-proxy-link-mgmt.delta.us-east-2.aws.neon.tech
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: neon-proxy-link.delta.us-east-2.aws.neon.tech
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,76 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
deploymentStrategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 100%
maxUnavailable: 50%
# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
# The pod(s) will stay in Terminating, keeps the existing connections
# but doesn't receive new ones
containerLifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 604800"]
terminationGracePeriodSeconds: 604800
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
domain: "*.us-east-2.aws.neon.tech"
sentryEnvironment: "production"
wssPort: 8443
metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
metricCollectionInterval: "10min"
# -- Additional labels for neon-proxy pods
podLabels:
neon_service: proxy-scram
neon_env: prod
neon_region: us-east-2
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: us-east-2.aws.neon.tech
httpsPort: 443
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,52 +0,0 @@
# Helm chart values for neon-storage-broker
podLabels:
neon_env: production
neon_service: storage-broker
# Use L4 LB
service:
# service.annotations -- Annotations to add to the service
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external # use newer AWS Load Balancer Controller
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal # deploy LB to private subnet
# assign service to this name at external-dns
external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.delta.us-east-2.internal.aws.neon.tech
# service.type -- Service type
type: LoadBalancer
# service.port -- broker listen port
port: 50051
ingress:
enabled: false
metrics:
enabled: false
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-storage-broker.fullname\" . }}"
labels:
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
app.kubernetes.io/name: neon-storage-broker
app.kubernetes.io/instance: neon-storage-broker
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-storage-broker"
endpoints:
- port: broker
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"
settings:
sentryEnvironment: "production"

View File

@@ -1,76 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
deploymentStrategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 100%
maxUnavailable: 50%
# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
# The pod(s) will stay in Terminating, keeps the existing connections
# but doesn't receive new ones
containerLifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 604800"]
terminationGracePeriodSeconds: 604800
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
domain: "*.cloud.neon.tech"
sentryEnvironment: "production"
wssPort: 8443
metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
metricCollectionInterval: "10min"
# -- Additional labels for neon-proxy pods
podLabels:
neon_service: proxy-scram
neon_env: prod
neon_region: us-west-2
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: neon-proxy-scram-legacy.eta.us-west-2.aws.neon.tech
httpsPort: 443
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,76 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
deploymentStrategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 100%
maxUnavailable: 50%
# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
# The pod(s) will stay in Terminating, keeps the existing connections
# but doesn't receive new ones
containerLifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 604800"]
terminationGracePeriodSeconds: 604800
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
domain: "*.us-west-2.aws.neon.tech"
sentryEnvironment: "production"
wssPort: 8443
metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
metricCollectionInterval: "10min"
# -- Additional labels for neon-proxy pods
podLabels:
neon_service: proxy-scram
neon_env: prod
neon_region: us-west-2
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: us-west-2.aws.neon.tech
httpsPort: 443
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,52 +0,0 @@
# Helm chart values for neon-storage-broker
podLabels:
neon_env: production
neon_service: storage-broker
# Use L4 LB
service:
# service.annotations -- Annotations to add to the service
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external # use newer AWS Load Balancer Controller
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal # deploy LB to private subnet
# assign service to this name at external-dns
external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.eta.us-west-2.internal.aws.neon.tech
# service.type -- Service type
type: LoadBalancer
# service.port -- broker listen port
port: 50051
ingress:
enabled: false
metrics:
enabled: false
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-storage-broker.fullname\" . }}"
labels:
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
app.kubernetes.io/name: neon-storage-broker
app.kubernetes.io/instance: neon-storage-broker
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-storage-broker"
endpoints:
- port: broker
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"
settings:
sentryEnvironment: "production"

View File

@@ -0,0 +1,24 @@
settings:
authBackend: "console"
authEndpoint: "http://console-release.local/management/api/v2"
domain: "*.cloud.neon.tech"
podLabels:
zenith_service: proxy-scram
zenith_env: production
zenith_region: us-west-2
zenith_region_slug: oregon
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: '*.cloud.neon.tech'
metrics:
enabled: true
serviceMonitor:
enabled: true
selector:
release: kube-prometheus-stack

View File

@@ -0,0 +1,32 @@
settings:
authEndpoint: "https://console.neon.tech/authenticate_proxy_request/"
uri: "https://console.neon.tech/psql_session/"
# -- Additional labels for zenith-proxy pods
podLabels:
zenith_service: proxy
zenith_env: production
zenith_region: us-west-2
zenith_region_slug: oregon
service:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal
external-dns.alpha.kubernetes.io/hostname: proxy-release.local
type: LoadBalancer
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: connect.neon.tech,pg.neon.tech
metrics:
enabled: true
serviceMonitor:
enabled: true
selector:
release: kube-prometheus-stack

View File

@@ -0,0 +1,31 @@
# Helm chart values for zenith-proxy.
# This is a YAML-formatted file.
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://console-staging.local/management/api/v2"
domain: "*.cloud.stage.neon.tech"
# -- Additional labels for zenith-proxy pods
podLabels:
zenith_service: proxy-scram
zenith_env: staging
zenith_region: us-east-1
zenith_region_slug: virginia
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: cloud.stage.neon.tech
metrics:
enabled: true
serviceMonitor:
enabled: true
selector:
release: kube-prometheus-stack

30
.github/helm-values/staging.proxy.yaml vendored Normal file
View File

@@ -0,0 +1,30 @@
# Helm chart values for zenith-proxy.
# This is a YAML-formatted file.
image:
repository: neondatabase/neon
settings:
authEndpoint: "https://console.stage.neon.tech/authenticate_proxy_request/"
uri: "https://console.stage.neon.tech/psql_session/"
# -- Additional labels for zenith-proxy pods
podLabels:
zenith_service: proxy
zenith_env: staging
zenith_region: us-east-1
zenith_region_slug: virginia
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: connect.stage.neon.tech
metrics:
enabled: true
serviceMonitor:
enabled: true
selector:
release: kube-prometheus-stack

View File

@@ -1,14 +0,0 @@
## Describe your changes
## Issue ticket number and link
## Checklist before requesting a review
- [ ] I have performed a self-review of my code.
- [ ] If it is a core feature, I have added thorough tests.
- [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard?
- [ ] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section.
## Checklist before merging
- [ ] Do not forget to reformat commit message to not include the above checklist

View File

@@ -11,18 +11,9 @@ on:
# │ │ ┌───────────── day of the month (1 - 31)
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
- cron: '0 3 * * *' # run once a day, timezone is utc
- cron: '36 4 * * *' # run once a day, timezone is utc
workflow_dispatch: # adds ability to run this manually
inputs:
region_id:
description: 'Use a particular region. If not set the default region will be used'
required: false
default: 'aws-us-east-2'
save_perf_report:
type: boolean
description: 'Publish perf report or not. If not set, the report is published only for the main branch'
required: false
defaults:
run:
@@ -35,68 +26,94 @@ concurrency:
jobs:
bench:
env:
TEST_PG_BENCH_DURATIONS_MATRIX: "300"
TEST_PG_BENCH_SCALES_MATRIX: "10,100"
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
PLATFORM: "neon-staging"
# this workflow runs on self hosteed runner
# it's environment is quite different from usual guthub runner
# probably the most important difference is that it doesn't start from clean workspace each time
# e g if you install system packages they are not cleaned up since you install them directly in host machine
# not a container or something
# See documentation for more info: https://docs.github.com/en/actions/hosting-your-own-runners/about-self-hosted-runners
runs-on: [self-hosted, zenith-benchmarker]
runs-on: [ self-hosted, us-east-2, x64 ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
env:
POSTGRES_DISTRIB_DIR: "/usr/pgsql-14"
steps:
- uses: actions/checkout@v3
- name: Checkout zenith repo
uses: actions/checkout@v3
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-release-artifact
path: /tmp/neon/
prefix: latest
# actions/setup-python@v2 is not working correctly on self-hosted runners
# see https://github.com/actions/setup-python/issues/162
# and probably https://github.com/actions/setup-python/issues/162#issuecomment-865387976 in particular
# so the simplest solution to me is to use already installed system python and spin virtualenvs for job runs.
# there is Python 3.7.10 already installed on the machine so use it to install poetry and then use poetry's virtuealenvs
- name: Install poetry & deps
run: |
python3 -m pip install --upgrade poetry wheel
# since pip/poetry caches are reused there shouldn't be any troubles with install every time
./scripts/pysync
- name: Create Neon Project
id: create-neon-project
uses: ./.github/actions/neon-project-create
with:
region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Show versions
run: |
echo Python
python3 --version
poetry run python3 --version
echo Poetry
poetry --version
echo Pgbench
$POSTGRES_DISTRIB_DIR/bin/pgbench --version
# FIXME cluster setup is skipped due to various changes in console API
# for now pre created cluster is used. When API gain some stability
# after massive changes dynamic cluster setup will be revived.
# So use pre created cluster. It needs to be started manually, but stop is automatic after 5 minutes of inactivity
- name: Setup cluster
env:
BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
run: |
set -e
echo "Starting cluster"
# wake up the cluster
$POSTGRES_DISTRIB_DIR/bin/psql $BENCHMARK_CONNSTR -c "SELECT 1"
- name: Run benchmark
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
# Set --sparse-ordering option of pytest-order plugin
# to ensure tests are running in order of appears in the file.
# It's important for test_perf_pgbench.py::test_pgbench_remote_* tests
extra_params: -m remote_cluster --sparse-ordering --timeout 5400 --ignore test_runner/performance/test_perf_olap.py
# pgbench is installed system wide from official repo
# https://download.postgresql.org/pub/repos/yum/13/redhat/rhel-7-x86_64/
# via
# sudo tee /etc/yum.repos.d/pgdg.repo<<EOF
# [pgdg13]
# name=PostgreSQL 13 for RHEL/CentOS 7 - x86_64
# baseurl=https://download.postgresql.org/pub/repos/yum/13/redhat/rhel-7-x86_64/
# enabled=1
# gpgcheck=0
# EOF
# sudo yum makecache
# sudo yum install postgresql13-contrib
# actual binaries are located in /usr/pgsql-13/bin/
env:
# The pgbench test runs two tests of given duration against each scale.
# So the total runtime with these parameters is 2 * 2 * 300 = 1200, or 20 minutes.
# Plus time needed to initialize the test databases.
TEST_PG_BENCH_DURATIONS_MATRIX: "300"
TEST_PG_BENCH_SCALES_MATRIX: "10,100"
PLATFORM: "neon-staging"
BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
REMOTE_ENV: "1" # indicate to test harness that we do not have zenith binaries locally
run: |
# just to be sure that no data was cached on self hosted runner
# since it might generate duplicates when calling ingest_perf_test_result.py
rm -rf perf-report-staging
mkdir -p perf-report-staging
# Set --sparse-ordering option of pytest-order plugin to ensure tests are running in order of appears in the file,
# it's important for test_perf_pgbench.py::test_pgbench_remote_* tests
./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --sparse-ordering --skip-interfering-proc-check --out-dir perf-report-staging --timeout 5400
- name: Submit result
env:
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
- name: Delete Neon Project
if: ${{ always() }}
uses: ./.github/actions/neon-project-delete
with:
project_id: ${{ steps.create-neon-project.outputs.project_id }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Create Allure report
if: success() || failure()
uses: ./.github/actions/allure-report
with:
action: generate
build_type: ${{ env.BUILD_TYPE }}
run: |
REPORT_FROM=$(realpath perf-report-staging) REPORT_TO=staging scripts/generate_and_push_perf_report.sh
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
@@ -108,489 +125,104 @@ jobs:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
pgbench-compare:
strategy:
fail-fast: false
matrix:
# neon-captest-new: Run pgbench in a freshly created project
# neon-captest-reuse: Same, but reusing existing project
# neon-captest-prefetch: Same, with prefetching enabled (new project)
# rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
# rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
platform: [ neon-captest-reuse, neon-captest-prefetch, rds-postgres ]
db_size: [ 10gb ]
runner: [ us-east-2 ]
include:
- platform: neon-captest-prefetch
db_size: 50gb
runner: us-east-2
- platform: rds-aurora
db_size: 50gb
runner: us-east-2
env:
TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
TEST_PG_BENCH_SCALES_MATRIX: ${{ matrix.db_size }}
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 14
TEST_PG_BENCH_SCALES_MATRIX: "10gb"
REMOTE_ENV: "1"
POSTGRES_DISTRIB_DIR: /usr
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
PLATFORM: ${{ matrix.platform }}
runs-on: [ self-hosted, "${{ matrix.runner }}", x64 ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
strategy:
fail-fast: false
matrix:
connstr: [ BENCHMARK_CAPTEST_CONNSTR, BENCHMARK_RDS_CONNSTR ]
runs-on: dev
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:2817580636
timeout-minutes: 360 # 6h
steps:
- uses: actions/checkout@v3
- name: Download Neon artifact
uses: ./.github/actions/download
- name: Cache poetry deps
id: cache_poetry
uses: actions/cache@v3
with:
name: neon-${{ runner.os }}-release-artifact
path: /tmp/neon/
prefix: latest
path: ~/.cache/pypoetry/virtualenvs
key: v2-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
- name: Add Postgres binaries to PATH
run: |
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
- name: Install Python deps
run: ./scripts/pysync
- name: Create Neon Project
if: contains(fromJson('["neon-captest-new", "neon-captest-prefetch"]'), matrix.platform)
id: create-neon-project
uses: ./.github/actions/neon-project-create
with:
region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Set up Connection String
id: set-up-connstr
run: |
case "${PLATFORM}" in
neon-captest-reuse)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }}
;;
neon-captest-new | neon-captest-prefetch)
CONNSTR=${{ steps.create-neon-project.outputs.dsn }}
;;
rds-aurora)
CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_CONNSTR }}
;;
rds-postgres)
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }}
;;
*)
echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-prefetch', 'rds-aurora', or 'rds-postgres'"
exit 1
;;
esac
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
psql ${CONNSTR} -c "SELECT version();"
- name: Set database options
if: matrix.platform == 'neon-captest-prefetch'
run: |
DB_NAME=$(psql ${BENCHMARK_CONNSTR} --no-align --quiet -t -c "SELECT current_database()")
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET enable_seqscan_prefetch=on"
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET effective_io_concurrency=32"
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET maintenance_io_concurrency=32"
- name: Calculate platform
id: calculate-platform
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
CONNSTR: ${{ matrix.connstr }}
run: |
if [ "${CONNSTR}" = "BENCHMARK_CAPTEST_CONNSTR" ]; then
PLATFORM=neon-captest
elif [ "${CONNSTR}" = "BENCHMARK_RDS_CONNSTR" ]; then
PLATFORM=rds-aurora
else
echo 2>&1 "Unknown CONNSTR=${CONNSTR}. Allowed are BENCHMARK_CAPTEST_CONNSTR, and BENCHMARK_RDS_CONNSTR only"
exit 1
fi
echo "::set-output name=PLATFORM::${PLATFORM}"
- name: Install Deps
run: |
echo "deb http://apt.postgresql.org/pub/repos/apt focal-pgdg main" | sudo tee /etc/apt/sources.list.d/pgdg.list
wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add -
sudo apt -y update
sudo apt install -y postgresql-14 postgresql-client-14
- name: Benchmark init
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
PLATFORM: ${{ steps.calculate-platform.outputs.PLATFORM }}
BENCHMARK_CONNSTR: ${{ secrets[matrix.connstr] }}
run: |
mkdir -p perf-report-captest
psql $BENCHMARK_CONNSTR -c "SELECT 1;"
./scripts/pytest test_runner/performance/test_perf_pgbench.py::test_pgbench_remote_init -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-captest --timeout 21600
- name: Benchmark simple-update
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
PLATFORM: ${{ steps.calculate-platform.outputs.PLATFORM }}
BENCHMARK_CONNSTR: ${{ secrets[matrix.connstr] }}
run: |
psql $BENCHMARK_CONNSTR -c "SELECT 1;"
./scripts/pytest test_runner/performance/test_perf_pgbench.py::test_pgbench_remote_simple_update -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-captest --timeout 21600
- name: Benchmark select-only
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
PLATFORM: ${{ steps.calculate-platform.outputs.PLATFORM }}
BENCHMARK_CONNSTR: ${{ secrets[matrix.connstr] }}
run: |
psql $BENCHMARK_CONNSTR -c "SELECT 1;"
./scripts/pytest test_runner/performance/test_perf_pgbench.py::test_pgbench_remote_select_only -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-captest --timeout 21600
- name: Submit result
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
run: |
REPORT_FROM=$(realpath perf-report-captest) REPORT_TO=staging scripts/generate_and_push_perf_report.sh
- name: Delete Neon Project
if: ${{ steps.create-neon-project.outputs.project_id && always() }}
uses: ./.github/actions/neon-project-delete
- name: Upload logs
if: always()
uses: ./.github/actions/upload
with:
project_id: ${{ steps.create-neon-project.outputs.project_id }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Create Allure report
if: success() || failure()
uses: ./.github/actions/allure-report
with:
action: generate
build_type: ${{ env.BUILD_TYPE }}
name: bench-captest-${{ steps.calculate-platform.outputs.PLATFORM }}
path: /tmp/test_output/
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: "Periodic perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
clickbench-compare:
# ClichBench DB for rds-aurora and rds-Postgres deployed to the same clusters
# we use for performance testing in pgbench-compare.
# Run this job only when pgbench-compare is finished to avoid the intersection.
# We might change it after https://github.com/neondatabase/neon/issues/2900.
#
# *_CLICKBENCH_CONNSTR: Genuine ClickBench DB with ~100M rows
# *_CLICKBENCH_10M_CONNSTR: DB with the first 10M rows of ClickBench DB
if: success() || failure()
needs: [ pgbench-compare ]
strategy:
fail-fast: false
matrix:
# neon-captest-prefetch: We have pre-created projects with prefetch enabled
# rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
# rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
platform: [ neon-captest-prefetch, rds-postgres, rds-aurora ]
env:
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
PLATFORM: ${{ matrix.platform }}
runs-on: [ self-hosted, us-east-2, x64 ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
timeout-minutes: 360 # 6h
steps:
- uses: actions/checkout@v3
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-release-artifact
path: /tmp/neon/
prefix: latest
- name: Add Postgres binaries to PATH
run: |
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
- name: Set up Connection String
id: set-up-connstr
run: |
case "${PLATFORM}" in
neon-captest-prefetch)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_10M_CONNSTR }}
;;
rds-aurora)
CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_CLICKBENCH_10M_CONNSTR }}
;;
rds-postgres)
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CLICKBENCH_10M_CONNSTR }}
;;
*)
echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-prefetch', 'rds-aurora', or 'rds-postgres'"
exit 1
;;
esac
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
psql ${CONNSTR} -c "SELECT version();"
- name: Set database options
if: matrix.platform == 'neon-captest-prefetch'
run: |
DB_NAME=$(psql ${BENCHMARK_CONNSTR} --no-align --quiet -t -c "SELECT current_database()")
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET enable_seqscan_prefetch=on"
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET effective_io_concurrency=32"
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET maintenance_io_concurrency=32"
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
- name: ClickBench benchmark
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance/test_perf_olap.py
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_clickbench
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
- name: Create Allure report
if: success() || failure()
uses: ./.github/actions/allure-report
with:
action: generate
build_type: ${{ env.BUILD_TYPE }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: "Periodic OLAP perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
tpch-compare:
# TCP-H DB for rds-aurora and rds-Postgres deployed to the same clusters
# we use for performance testing in pgbench-compare & clickbench-compare.
# Run this job only when clickbench-compare is finished to avoid the intersection.
# We might change it after https://github.com/neondatabase/neon/issues/2900.
#
# *_TPCH_S10_CONNSTR: DB generated with scale factor 10 (~10 GB)
if: success() || failure()
needs: [ clickbench-compare ]
strategy:
fail-fast: false
matrix:
# neon-captest-prefetch: We have pre-created projects with prefetch enabled
# rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
# rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
platform: [ neon-captest-prefetch, rds-postgres, rds-aurora ]
env:
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
PLATFORM: ${{ matrix.platform }}
runs-on: [ self-hosted, us-east-2, x64 ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
timeout-minutes: 360 # 6h
steps:
- uses: actions/checkout@v3
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-release-artifact
path: /tmp/neon/
prefix: latest
- name: Add Postgres binaries to PATH
run: |
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
- name: Set up Connection String
id: set-up-connstr
run: |
case "${PLATFORM}" in
neon-captest-prefetch)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_TPCH_S10_CONNSTR }}
;;
rds-aurora)
CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_TPCH_S10_CONNSTR }}
;;
rds-postgres)
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_TPCH_S10_CONNSTR }}
;;
*)
echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-prefetch', 'rds-aurora', or 'rds-postgres'"
exit 1
;;
esac
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
psql ${CONNSTR} -c "SELECT version();"
- name: Set database options
if: matrix.platform == 'neon-captest-prefetch'
run: |
DB_NAME=$(psql ${BENCHMARK_CONNSTR} --no-align --quiet -t -c "SELECT current_database()")
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET enable_seqscan_prefetch=on"
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET effective_io_concurrency=32"
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET maintenance_io_concurrency=32"
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
- name: Run TPC-H benchmark
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance/test_perf_olap.py
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_tpch
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
- name: Create Allure report
if: success() || failure()
uses: ./.github/actions/allure-report
with:
action: generate
build_type: ${{ env.BUILD_TYPE }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: "Periodic TPC-H perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
user-examples-compare:
if: success() || failure()
needs: [ tpch-compare ]
strategy:
fail-fast: false
matrix:
# neon-captest-prefetch: We have pre-created projects with prefetch enabled
# rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
# rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
platform: [ neon-captest-prefetch, rds-postgres, rds-aurora ]
env:
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
PLATFORM: ${{ matrix.platform }}
runs-on: [ self-hosted, us-east-2, x64 ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
timeout-minutes: 360 # 6h
steps:
- uses: actions/checkout@v3
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-release-artifact
path: /tmp/neon/
prefix: latest
- name: Add Postgres binaries to PATH
run: |
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
- name: Set up Connection String
id: set-up-connstr
run: |
case "${PLATFORM}" in
neon-captest-prefetch)
CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_CAPTEST_CONNSTR }}
;;
rds-aurora)
CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_RDS_AURORA_CONNSTR }}
;;
rds-postgres)
CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_RDS_POSTGRES_CONNSTR }}
;;
*)
echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-prefetch', 'rds-aurora', or 'rds-postgres'"
exit 1
;;
esac
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
psql ${CONNSTR} -c "SELECT version();"
- name: Set database options
if: matrix.platform == 'neon-captest-prefetch'
run: |
DB_NAME=$(psql ${BENCHMARK_CONNSTR} --no-align --quiet -t -c "SELECT current_database()")
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET enable_seqscan_prefetch=on"
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET effective_io_concurrency=32"
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET maintenance_io_concurrency=32"
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
- name: Run user examples
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance/test_perf_olap.py
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_user_examples
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
- name: Create Allure report
if: success() || failure()
uses: ./.github/actions/allure-report
with:
action: generate
build_type: ${{ env.BUILD_TYPE }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: "Periodic TPC-H perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
slack-message: "Periodic perf testing: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

File diff suppressed because it is too large Load Diff

135
.github/workflows/codestyle.yml vendored Normal file
View File

@@ -0,0 +1,135 @@
name: Check code style and build
on:
push:
branches:
- main
pull_request:
defaults:
run:
shell: bash -euxo pipefail {0}
concurrency:
# Allow only one workflow per any non-`main` branch.
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
cancel-in-progress: true
env:
RUST_BACKTRACE: 1
jobs:
check-codestyle-rust:
strategy:
fail-fast: false
matrix:
# If we want to duplicate this job for different
# Rust toolchains (e.g. nightly or 1.37.0), add them here.
rust_toolchain: [1.58]
os: [ubuntu-latest, macos-latest]
timeout-minutes: 60
name: run regression test suite
runs-on: ${{ matrix.os }}
steps:
- name: Checkout
uses: actions/checkout@v2
with:
submodules: true
fetch-depth: 2
- name: Install rust toolchain ${{ matrix.rust_toolchain }}
uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: ${{ matrix.rust_toolchain }}
components: rustfmt, clippy
override: true
- name: Check formatting
run: cargo fmt --all -- --check
- name: Install Ubuntu postgres dependencies
if: matrix.os == 'ubuntu-latest'
run: |
sudo apt update
sudo apt install build-essential libreadline-dev zlib1g-dev flex bison libseccomp-dev libssl-dev
- name: Install macOS postgres dependencies
if: matrix.os == 'macos-latest'
run: brew install flex bison openssl
- name: Set pg revision for caching
id: pg_ver
run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres)
- name: Cache postgres build
id: cache_pg
uses: actions/cache@v2
with:
path: |
tmp_install/
key: ${{ runner.os }}-pg-${{ steps.pg_ver.outputs.pg_rev }}
- name: Set extra env for macOS
if: matrix.os == 'macos-latest'
run: |
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
- name: Build postgres
if: steps.cache_pg.outputs.cache-hit != 'true'
run: make postgres
# Plain configure output can contain weird errors like 'error: C compiler cannot create executables'
# and the real cause will be inside config.log
- name: Print configure logs in case of failure
if: failure()
continue-on-error: true
run: |
echo '' && echo '=== config.log ===' && echo ''
cat tmp_install/build/config.log
echo '' && echo '=== configure.log ===' && echo ''
cat tmp_install/build/configure.log
- name: Cache cargo deps
id: cache_cargo
uses: actions/cache@v2
with:
path: |
~/.cargo/registry
!~/.cargo/registry/src
~/.cargo/git
target
key: v2-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust-${{ matrix.rust_toolchain }}
- name: Run cargo clippy
run: ./run_clippy.sh
- name: Ensure all project builds
run: cargo build --all --all-targets
check-codestyle-python:
runs-on: [ self-hosted, Linux, k8s-runner ]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: false
fetch-depth: 1
- name: Cache poetry deps
id: cache_poetry
uses: actions/cache@v3
with:
path: ~/.cache/pypoetry/virtualenvs
key: v1-codestyle-python-deps-${{ hashFiles('poetry.lock') }}
- name: Install Python deps
run: ./scripts/pysync
- name: Run yapf to ensure code format
run: poetry run yapf --recursive --diff .
- name: Run mypy to check types
run: poetry run mypy .

View File

@@ -1,179 +0,0 @@
name: Neon Deploy dev
on:
workflow_dispatch:
inputs:
dockerTag:
description: 'Docker tag to deploy'
required: true
type: string
branch:
description: 'Branch or commit used for deploy scripts and configs'
required: true
type: string
default: 'main'
deployStorage:
description: 'Deploy storage'
required: true
type: boolean
default: true
deployProxy:
description: 'Deploy proxy'
required: true
type: boolean
default: true
deployStorageBroker:
description: 'Deploy storage-broker'
required: true
type: boolean
default: true
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
concurrency:
group: deploy-dev
cancel-in-progress: false
jobs:
deploy-storage-new:
runs-on: [ self-hosted, gen3, small ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
options: --user root --privileged
if: inputs.deployStorage
defaults:
run:
shell: bash
strategy:
matrix:
target_region: [ eu-west-1, us-east-2 ]
environment:
name: dev-${{ matrix.target_region }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
ref: ${{ inputs.branch }}
- name: Redeploy
run: |
export DOCKER_TAG=${{ inputs.dockerTag }}
cd "$(pwd)/.github/ansible"
./get_binaries.sh
ansible-galaxy collection install sivel.toiletwater
ansible-playbook -v deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_STAGING_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
rm -f neon_install.tar.gz .neon_current_version
- name: Cleanup ansible folder
run: rm -rf ~/.ansible
deploy-proxy-new:
runs-on: [ self-hosted, gen3, small ]
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
if: inputs.deployProxy
defaults:
run:
shell: bash
strategy:
matrix:
include:
- target_region: us-east-2
target_cluster: dev-us-east-2-beta
deploy_link_proxy: true
deploy_legacy_scram_proxy: true
- target_region: eu-west-1
target_cluster: dev-eu-west-1-zeta
deploy_link_proxy: false
deploy_legacy_scram_proxy: false
environment:
name: dev-${{ matrix.target_region }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
ref: ${{ inputs.branch }}
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1-node16
with:
role-to-assume: arn:aws:iam::369495373322:role/github-runner
aws-region: eu-central-1
role-skip-session-tagging: true
role-duration-seconds: 1800
- name: Configure environment
run: |
helm repo add neondatabase https://neondatabase.github.io/helm-charts
aws --region ${{ matrix.target_region }} eks update-kubeconfig --name ${{ matrix.target_cluster }}
- name: Re-deploy scram proxy
run: |
DOCKER_TAG=${{ inputs.dockerTag }}
helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
- name: Re-deploy link proxy
if: matrix.deploy_link_proxy
run: |
DOCKER_TAG=${{ inputs.dockerTag }}
helm upgrade neon-proxy-link neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-link.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
- name: Re-deploy legacy scram proxy
if: matrix.deploy_legacy_scram_proxy
run: |
DOCKER_TAG=${{ inputs.dockerTag }}
helm upgrade neon-proxy-scram-legacy neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram-legacy.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
- name: Cleanup helm folder
run: rm -rf ~/.cache
deploy-storage-broker-new:
runs-on: [ self-hosted, gen3, small ]
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
if: inputs.deployStorageBroker
defaults:
run:
shell: bash
strategy:
matrix:
include:
- target_region: us-east-2
target_cluster: dev-us-east-2-beta
- target_region: eu-west-1
target_cluster: dev-eu-west-1-zeta
environment:
name: dev-${{ matrix.target_region }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
ref: ${{ inputs.branch }}
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1-node16
with:
role-to-assume: arn:aws:iam::369495373322:role/github-runner
aws-region: eu-central-1
role-skip-session-tagging: true
role-duration-seconds: 1800
- name: Configure environment
run: |
helm repo add neondatabase https://neondatabase.github.io/helm-charts
aws --region ${{ matrix.target_region }} eks update-kubeconfig --name ${{ matrix.target_cluster }}
- name: Deploy storage-broker
run:
helm upgrade neon-storage-broker-lb neondatabase/neon-storage-broker --namespace neon-storage-broker-lb --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-storage-broker.yaml --set image.tag=${{ inputs.dockerTag }} --set settings.sentryUrl=${{ secrets.SENTRY_URL_BROKER }} --wait --timeout 5m0s
- name: Cleanup helm folder
run: rm -rf ~/.cache

View File

@@ -1,167 +0,0 @@
name: Neon Deploy prod
on:
workflow_dispatch:
inputs:
dockerTag:
description: 'Docker tag to deploy'
required: true
type: string
branch:
description: 'Branch or commit used for deploy scripts and configs'
required: true
type: string
default: 'release'
deployStorage:
description: 'Deploy storage'
required: true
type: boolean
default: true
deployProxy:
description: 'Deploy proxy'
required: true
type: boolean
default: true
deployStorageBroker:
description: 'Deploy storage-broker'
required: true
type: boolean
default: true
disclamerAcknowledged:
description: 'I confirm that there is an emergency and I can not use regular release workflow'
required: true
type: boolean
default: false
concurrency:
group: deploy-prod
cancel-in-progress: false
jobs:
deploy-prod-new:
runs-on: prod
container:
image: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
options: --user root --privileged
if: inputs.deployStorage && inputs.disclamerAcknowledged
defaults:
run:
shell: bash
strategy:
matrix:
target_region: [ us-east-2, us-west-2, eu-central-1, ap-southeast-1 ]
environment:
name: prod-${{ matrix.target_region }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
ref: ${{ inputs.branch }}
- name: Redeploy
run: |
export DOCKER_TAG=${{ inputs.dockerTag }}
cd "$(pwd)/.github/ansible"
./get_binaries.sh
ansible-galaxy collection install sivel.toiletwater
ansible-playbook -v deploy.yaml -i prod.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_PRODUCTION_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
rm -f neon_install.tar.gz .neon_current_version
deploy-proxy-prod-new:
runs-on: prod
container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
if: inputs.deployProxy && inputs.disclamerAcknowledged
defaults:
run:
shell: bash
strategy:
matrix:
include:
- target_region: us-east-2
target_cluster: prod-us-east-2-delta
deploy_link_proxy: true
deploy_legacy_scram_proxy: false
- target_region: us-west-2
target_cluster: prod-us-west-2-eta
deploy_link_proxy: false
deploy_legacy_scram_proxy: true
- target_region: eu-central-1
target_cluster: prod-eu-central-1-gamma
deploy_link_proxy: false
deploy_legacy_scram_proxy: false
- target_region: ap-southeast-1
target_cluster: prod-ap-southeast-1-epsilon
deploy_link_proxy: false
deploy_legacy_scram_proxy: false
environment:
name: prod-${{ matrix.target_region }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
ref: ${{ inputs.branch }}
- name: Configure environment
run: |
helm repo add neondatabase https://neondatabase.github.io/helm-charts
aws --region ${{ matrix.target_region }} eks update-kubeconfig --name ${{ matrix.target_cluster }}
- name: Re-deploy scram proxy
run: |
DOCKER_TAG=${{ inputs.dockerTag }}
helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
- name: Re-deploy link proxy
if: matrix.deploy_link_proxy
run: |
DOCKER_TAG=${{ inputs.dockerTag }}
helm upgrade neon-proxy-link neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-link.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
- name: Re-deploy legacy scram proxy
if: matrix.deploy_legacy_scram_proxy
run: |
DOCKER_TAG=${{ inputs.dockerTag }}
helm upgrade neon-proxy-scram-legacy neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram-legacy.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
deploy-storage-broker-prod-new:
runs-on: prod
container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
if: inputs.deployStorageBroker && inputs.disclamerAcknowledged
defaults:
run:
shell: bash
strategy:
matrix:
include:
- target_region: us-east-2
target_cluster: prod-us-east-2-delta
- target_region: us-west-2
target_cluster: prod-us-west-2-eta
- target_region: eu-central-1
target_cluster: prod-eu-central-1-gamma
- target_region: ap-southeast-1
target_cluster: prod-ap-southeast-1-epsilon
environment:
name: prod-${{ matrix.target_region }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
ref: ${{ inputs.branch }}
- name: Configure environment
run: |
helm repo add neondatabase https://neondatabase.github.io/helm-charts
aws --region ${{ matrix.target_region }} eks update-kubeconfig --name ${{ matrix.target_cluster }}
- name: Deploy storage-broker
run:
helm upgrade neon-storage-broker-lb neondatabase/neon-storage-broker --namespace neon-storage-broker-lb --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-storage-broker.yaml --set image.tag=${{ inputs.dockerTag }} --set settings.sentryUrl=${{ secrets.SENTRY_URL_BROKER }} --wait --timeout 5m0s

View File

@@ -1,154 +0,0 @@
name: Check neon with extra platform builds
on:
push:
branches:
- main
pull_request:
defaults:
run:
shell: bash -euxo pipefail {0}
concurrency:
# Allow only one workflow per any non-`main` branch.
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
cancel-in-progress: true
env:
RUST_BACKTRACE: 1
COPT: '-Werror'
jobs:
check-macos-build:
if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')
timeout-minutes: 90
runs-on: macos-latest
env:
# Use release build only, to have less debug info around
# Hence keeping target/ (and general cache size) smaller
BUILD_TYPE: release
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 1
- name: Install macOS postgres dependencies
run: brew install flex bison openssl protobuf
- name: Set pg 14 revision for caching
id: pg_v14_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
- name: Set pg 15 revision for caching
id: pg_v15_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
- name: Cache postgres v14 build
id: cache_pg_14
uses: actions/cache@v3
with:
path: pg_install/v14
key: v1-${{ runner.os }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Cache postgres v15 build
id: cache_pg_15
uses: actions/cache@v3
with:
path: pg_install/v15
key: v1-${{ runner.os }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Set extra env for macOS
run: |
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
- name: Cache cargo deps
uses: actions/cache@v3
with:
path: |
~/.cargo/registry
!~/.cargo/registry/src
~/.cargo/git
target
key: v1-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust
- name: Build postgres v14
if: steps.cache_pg_14.outputs.cache-hit != 'true'
run: make postgres-v14 -j$(nproc)
- name: Build postgres v15
if: steps.cache_pg_15.outputs.cache-hit != 'true'
run: make postgres-v15 -j$(nproc)
- name: Build neon extensions
run: make neon-pg-ext -j$(nproc)
- name: Run cargo build
run: cargo build --all --release
- name: Check that no warnings are produced
run: ./run_clippy.sh
gather-rust-build-stats:
if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats')
runs-on: [ self-hosted, gen3, large ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
env:
BUILD_TYPE: release
# remove the cachepot wrapper and build without crate caches
RUSTC_WRAPPER: ""
# build with incremental compilation produce partial results
# so do not attempt to cache this build, also disable the incremental compilation
CARGO_INCREMENTAL: 0
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 1
# Some of our rust modules use FFI and need those to be checked
- name: Get postgres headers
run: make postgres-headers -j$(nproc)
- name: Produce the build stats
run: cargo build --all --release --timings
- name: Upload the build stats
id: upload-stats
env:
BUCKET: neon-github-public-dev
SHA: ${{ github.event.pull_request.head.sha || github.sha }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
run: |
REPORT_URL=https://${BUCKET}.s3.amazonaws.com/build-stats/${SHA}/${GITHUB_RUN_ID}/cargo-timing.html
aws s3 cp --only-show-errors ./target/cargo-timings/cargo-timing.html "s3://${BUCKET}/build-stats/${SHA}/${GITHUB_RUN_ID}/"
echo "report-url=${REPORT_URL}" >> $GITHUB_OUTPUT
- name: Publish build stats report
uses: actions/github-script@v6
env:
REPORT_URL: ${{ steps.upload-stats.outputs.report-url }}
SHA: ${{ github.event.pull_request.head.sha || github.sha }}
with:
script: |
const { REPORT_URL, SHA } = process.env
await github.rest.repos.createCommitStatus({
owner: context.repo.owner,
repo: context.repo.repo,
sha: `${SHA}`,
state: 'success',
target_url: `${REPORT_URL}`,
context: `Build stats (release)`,
})

45
.github/workflows/notifications.yml vendored Normal file
View File

@@ -0,0 +1,45 @@
name: Send Notifications
on:
push:
branches: [ main ]
jobs:
send-notifications:
timeout-minutes: 30
name: send commit notifications
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
with:
submodules: true
fetch-depth: 2
- name: Form variables for notification message
id: git_info_grab
run: |
git_stat=$(git show --stat=50)
git_stat="${git_stat//'%'/'%25'}"
git_stat="${git_stat//$'\n'/'%0A'}"
git_stat="${git_stat//$'\r'/'%0D'}"
git_stat="${git_stat// /}" # space -> 'Space En', as github tends to eat ordinary spaces
echo "::set-output name=git_stat::$git_stat"
echo "::set-output name=sha_short::$(git rev-parse --short HEAD)"
echo "##[set-output name=git_branch;]$(echo ${GITHUB_REF#refs/heads/})"
- name: Send notification
uses: appleboy/telegram-action@master
with:
to: ${{ secrets.TELEGRAM_TO }}
token: ${{ secrets.TELEGRAM_TOKEN }}
format: markdown
args: |
*@${{ github.actor }} pushed to* [${{ github.repository }}:${{steps.git_info_grab.outputs.git_branch}}](github.com/${{ github.repository }}/commit/${{steps.git_info_grab.outputs.sha_short }})
```
${{ steps.git_info_grab.outputs.git_stat }}
```

View File

@@ -23,7 +23,6 @@ jobs:
runs-on: [ ubuntu-latest ]
env:
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
steps:
@@ -48,23 +47,17 @@ jobs:
shell: bash -euxo pipefail {0}
run: ./scripts/pysync
- name: Create Neon Project
id: create-neon-project
uses: ./.github/actions/neon-project-create
with:
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
- name: Run pytest
env:
REMOTE_ENV: 1
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
shell: bash -euxo pipefail {0}
run: |
# Test framework expects we have psql binary;
# but since we don't really need it in this test, let's mock it
mkdir -p "$POSTGRES_DISTRIB_DIR/v${DEFAULT_PG_VERSION}/bin" && touch "$POSTGRES_DISTRIB_DIR/v${DEFAULT_PG_VERSION}/bin/psql";
mkdir -p "$POSTGRES_DISTRIB_DIR/bin" && touch "$POSTGRES_DISTRIB_DIR/bin/psql";
./scripts/pytest \
--junitxml=$TEST_OUTPUT/junit.xml \
--tb=short \
@@ -72,13 +65,6 @@ jobs:
-m "remote_cluster" \
-rA "test_runner/pg_clients"
- name: Delete Neon Project
if: ${{ always() }}
uses: ./.github/actions/neon-project-delete
with:
project_id: ${{ steps.create-neon-project.outputs.project_id }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
# We use GitHub's action upload-artifact because `ubuntu-latest` doesn't have configured AWS CLI.
# It will be fixed after switching to gen2 runner
- name: Upload python test logs

View File

@@ -1,33 +0,0 @@
name: Create Release Branch
on:
schedule:
- cron: '0 10 * * 2'
jobs:
create_release_branch:
runs-on: [ubuntu-latest]
steps:
- name: Check out code
uses: actions/checkout@v3
with:
ref: main
- name: Get current date
id: date
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
- name: Create release branch
run: git checkout -b releases/${{ steps.date.outputs.date }}
- name: Push new branch
run: git push origin releases/${{ steps.date.outputs.date }}
- name: Create pull request into release
uses: thomaseizinger/create-pull-request@e3972219c86a56550fb70708d96800d8e24ba862 # 1.3.0
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
head: releases/${{ steps.date.outputs.date }}
base: release
title: Release ${{ steps.date.outputs.date }}

6
.gitignore vendored
View File

@@ -1,6 +1,7 @@
/pg_install
/target
/bindings/python/neon-dev-utils/target
/tmp_check
/tmp_install
/tmp_check_cli
__pycache__/
test_output/
@@ -15,6 +16,3 @@ test_output/
*.key
*.crt
*.o
*.so
*.Po

12
.gitmodules vendored
View File

@@ -1,8 +1,4 @@
[submodule "vendor/postgres-v14"]
path = vendor/postgres-v14
url = https://github.com/neondatabase/postgres.git
branch = REL_14_STABLE_neon
[submodule "vendor/postgres-v15"]
path = vendor/postgres-v15
url = https://github.com/neondatabase/postgres.git
branch = REL_15_STABLE_neon
[submodule "vendor/postgres"]
path = vendor/postgres
url = https://github.com/zenithdb/postgres
branch = main

10
.yapfignore Normal file
View File

@@ -0,0 +1,10 @@
# This file is only read when `yapf` is run from this directory.
# Hence we only top-level directories here to avoid confusion.
# See source code for the exact file format: https://github.com/google/yapf/blob/c6077954245bc3add82dafd853a1c7305a6ebd20/yapf/yapflib/file_resources.py#L40-L43
vendor/
target/
tmp_install/
__pycache__/
test_output/
.neon/
.git/

View File

@@ -1,11 +0,0 @@
/compute_tools/ @neondatabase/control-plane
/control_plane/ @neondatabase/compute @neondatabase/storage
/libs/pageserver_api/ @neondatabase/compute @neondatabase/storage
/libs/postgres_ffi/ @neondatabase/compute
/libs/remote_storage/ @neondatabase/storage
/libs/safekeeper_api/ @neondatabase/safekeepers
/pageserver/ @neondatabase/compute @neondatabase/storage
/pgxn/ @neondatabase/compute
/proxy/ @neondatabase/control-plane
/safekeeper/ @neondatabase/safekeepers
/vendor/ @neondatabase/compute

3208
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -5,221 +5,22 @@ members = [
"pageserver",
"proxy",
"safekeeper",
"storage_broker",
"workspace_hack",
"trace",
"neon_local",
"integration_tests",
"libs/*",
]
exclude = [
"bindings/python/neon-dev-utils",
]
[workspace.package]
edition = "2021"
license = "Apache-2.0"
## All dependency versions, used in the project
[workspace.dependencies]
anyhow = { version = "1.0", features = ["backtrace"] }
async-stream = "0.3"
async-trait = "0.1"
atty = "0.2.14"
aws-config = { version = "0.51.0", default-features = false, features=["rustls"] }
aws-sdk-s3 = "0.21.0"
aws-smithy-http = "0.51.0"
aws-types = "0.51.0"
base64 = "0.13.0"
bincode = "1.3"
bindgen = "0.61"
bstr = "1.0"
byteorder = "1.4"
bytes = "1.0"
chrono = { version = "0.4", default-features = false, features = ["clock"] }
clap = { version = "4.0", features = ["derive"] }
close_fds = "0.3.2"
comfy-table = "6.1"
const_format = "0.2"
crc32c = "0.6"
crossbeam-utils = "0.8.5"
either = "1.8"
enum-map = "2.4.2"
enumset = "1.0.12"
fail = "0.5.0"
fs2 = "0.4.3"
futures = "0.3"
futures-core = "0.3"
futures-util = "0.3"
git-version = "0.3"
hashbrown = "0.13"
hashlink = "0.8.1"
hex = "0.4"
hex-literal = "0.3"
hmac = "0.12.1"
hostname = "0.3.1"
humantime = "2.1"
humantime-serde = "1.1.1"
hyper = "0.14"
hyper-tungstenite = "0.9"
itertools = "0.10"
jsonwebtoken = "8"
libc = "0.2"
md5 = "0.7.0"
memoffset = "0.8"
nix = "0.26"
notify = "5.0.0"
num_cpus = "1.15"
num-traits = "0.2.15"
once_cell = "1.13"
opentelemetry = "0.18.0"
opentelemetry-otlp = { version = "0.11.0", default_features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
opentelemetry-semantic-conventions = "0.10.0"
parking_lot = "0.12"
pin-project-lite = "0.2"
prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
prost = "0.11"
rand = "0.8"
regex = "1.4"
reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"] }
reqwest-tracing = { version = "0.4.0", features = ["opentelemetry_0_18"] }
reqwest-middleware = "0.2.0"
routerify = "3"
rpds = "0.12.0"
rustls = "0.20"
rustls-pemfile = "1"
rustls-split = "0.3"
scopeguard = "1.1"
sentry = { version = "0.29", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1"
serde_with = "2.0"
sha2 = "0.10.2"
signal-hook = "0.3"
socket2 = "0.4.4"
strum = "0.24"
strum_macros = "0.24"
svg_fmt = "0.4.1"
sync_wrapper = "0.1.2"
tar = "0.4"
test-context = "0.1"
thiserror = "1.0"
tls-listener = { version = "0.6", features = ["rustls", "hyper-h1"] }
tokio = { version = "1.17", features = ["macros"] }
tokio-postgres-rustls = "0.9.0"
tokio-rustls = "0.23"
tokio-stream = "0.1"
tokio-util = { version = "0.7", features = ["io"] }
toml = "0.5"
toml_edit = { version = "0.17", features = ["easy"] }
tonic = {version = "0.8", features = ["tls", "tls-roots"]}
tracing = "0.1"
tracing-opentelemetry = "0.18.0"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
url = "2.2"
uuid = { version = "1.2", features = ["v4", "serde"] }
walkdir = "2.3.2"
webpki-roots = "0.22.5"
x509-parser = "0.14"
## TODO replace this with tracing
env_logger = "0.10"
log = "0.4"
## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
tokio-tar = { git = "https://github.com/neondatabase/tokio-tar.git", rev="404df61437de0feef49ba2ccdbdd94eb8ad6e142" }
## Other git libraries
heapless = { default-features=false, features=[], git = "https://github.com/japaric/heapless.git", rev = "644653bf3b831c6bb4963be2de24804acf5e5001" } # upstream release pending
## Local libraries
consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" }
metrics = { version = "0.1", path = "./libs/metrics/" }
pageserver_api = { version = "0.1", path = "./libs/pageserver_api/" }
postgres_backend = { version = "0.1", path = "./libs/postgres_backend/" }
postgres_connection = { version = "0.1", path = "./libs/postgres_connection/" }
postgres_ffi = { version = "0.1", path = "./libs/postgres_ffi/" }
pq_proto = { version = "0.1", path = "./libs/pq_proto/" }
remote_storage = { version = "0.1", path = "./libs/remote_storage/" }
safekeeper_api = { version = "0.1", path = "./libs/safekeeper_api" }
storage_broker = { version = "0.1", path = "./storage_broker/" } # Note: main broker code is inside the binary crate, so linking with the library shouldn't be heavy.
tenant_size_model = { version = "0.1", path = "./libs/tenant_size_model/" }
tracing-utils = { version = "0.1", path = "./libs/tracing-utils/" }
utils = { version = "0.1", path = "./libs/utils/" }
## Common library dependency
workspace_hack = { version = "0.1", path = "./workspace_hack/" }
## Build dependencies
criterion = "0.4"
rcgen = "0.10"
rstest = "0.16"
tempfile = "3.4"
tonic-build = "0.8"
# This is only needed for proxy's tests.
# TODO: we should probably fork `tokio-postgres-rustls` instead.
[patch.crates-io]
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
################# Binary contents sections
[profile.release]
# This is useful for profiling and, to some extent, debug.
# Besides, debug info should not affect the performance.
debug = true
# disable debug symbols for all packages except this one to decrease binaries size
[profile.release.package."*"]
debug = false
[profile.release-line-debug]
inherits = "release"
debug = 1 # true = 2 = all symbols, 1 = line only
[profile.release-line-debug-lto]
inherits = "release"
debug = 1 # true = 2 = all symbols, 1 = line only
lto = true
[profile.release-line-debug-size]
inherits = "release"
debug = 1 # true = 2 = all symbols, 1 = line only
opt-level = "s"
[profile.release-line-debug-zize]
inherits = "release"
debug = 1 # true = 2 = all symbols, 1 = line only
opt-level = "z"
[profile.release-line-debug-size-lto]
inherits = "release"
debug = 1 # true = 2 = all symbols, 1 = line only
opt-level = "s"
lto = true
[profile.release-line-debug-zize-lto]
inherits = "release"
debug = 1 # true = 2 = all symbols, 1 = line only
opt-level = "z"
lto = true
[profile.release-no-debug]
inherits = "release"
debug = false # true = 2 = all symbols, 1 = line only
[profile.release-no-debug-size]
inherits = "release"
debug = false # true = 2 = all symbols, 1 = line only
opt-level = "s"
[profile.release-no-debug-zize]
inherits = "release"
debug = false # true = 2 = all symbols, 1 = line only
opt-level = "z"
[profile.release-no-debug-size-lto]
inherits = "release"
debug = false # true = 2 = all symbols, 1 = line only
opt-level = "s"
lto = true
[profile.release-no-debug-zize-lto]
inherits = "release"
debug = false # true = 2 = all symbols, 1 = line only
opt-level = "z"
lto = true
# This is only needed for proxy's tests.
# TODO: we should probably fork `tokio-postgres-rustls` instead.
[patch.crates-io]
tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }

View File

@@ -1,29 +1,18 @@
### Creates a storage Docker image with postgres, pageserver, safekeeper and proxy binaries.
### The image itself is mainly used as a container for the binaries and for starting e2e tests with custom parameters.
### By default, the binaries inside the image have some mock parameters and can start, but are not intended to be used
### inside this image in the real deployments.
ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
ARG IMAGE=rust
ARG TAG=pinned
# Build Postgres
FROM $REPOSITORY/$IMAGE:$TAG AS pg-build
FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned AS pg-build
WORKDIR /home/nonroot
COPY --chown=nonroot vendor/postgres-v14 vendor/postgres-v14
COPY --chown=nonroot vendor/postgres-v15 vendor/postgres-v15
COPY --chown=nonroot pgxn pgxn
COPY --chown=nonroot Makefile Makefile
COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh
COPY vendor/postgres vendor/postgres
COPY Makefile Makefile
ENV BUILD_TYPE release
RUN set -e \
&& mold -run make -j $(nproc) -s neon-pg-ext \
&& rm -rf pg_install/build \
&& tar -C pg_install -czf /home/nonroot/postgres_install.tar.gz .
&& mold -run make -j $(nproc) -s postgres \
&& rm -rf tmp_install/build \
&& tar -C tmp_install -czf /home/nonroot/postgres_install.tar.gz .
# Build neon binaries
FROM $REPOSITORY/$IMAGE:$TAG AS build
# Build zenith binaries
FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned AS build
WORKDIR /home/nonroot
ARG GIT_VERSION=local
@@ -37,14 +26,13 @@ ARG CACHEPOT_BUCKET=neon-github-dev
#ARG AWS_ACCESS_KEY_ID
#ARG AWS_SECRET_ACCESS_KEY
COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server
COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server
COPY --chown=nonroot . .
COPY --from=pg-build /home/nonroot/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
COPY . .
# Show build caching stats to check if it was used in the end.
# Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
RUN set -e \
&& mold -run cargo build --bin pageserver --bin pageserver_binutils --bin draw_timeline_dir --bin safekeeper --bin storage_broker --bin proxy --locked --release \
&& mold -run cargo build --release \
&& cachepot -s
# Build final image
@@ -60,31 +48,17 @@ RUN set -e \
openssl \
ca-certificates \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
&& useradd -d /data neon \
&& chown -R neon:neon /data
&& useradd -d /data zenith \
&& chown -R zenith:zenith /data
COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver_binutils /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/draw_timeline_dir /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/safekeeper /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_broker /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy /usr/local/bin
COPY --from=build --chown=zenith:zenith /home/nonroot/target/release/pageserver /usr/local/bin
COPY --from=build --chown=zenith:zenith /home/nonroot/target/release/safekeeper /usr/local/bin
COPY --from=build --chown=zenith:zenith /home/nonroot/target/release/proxy /usr/local/bin
COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/
COPY --from=pg-build /home/nonroot/pg_install/v15 /usr/local/v15/
COPY --from=pg-build /home/nonroot/tmp_install/ /usr/local/
COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/
# By default, pageserver uses `.neon/` working directory in WORKDIR, so create one and fill it with the dummy config.
# Now, when `docker run ... pageserver` is run, it can start without errors, yet will have some default dummy values.
RUN mkdir -p /data/.neon/ && chown -R neon:neon /data/.neon/ \
&& /usr/local/bin/pageserver -D /data/.neon/ --init \
-c "id=1234" \
-c "broker_endpoint='http://storage_broker:50051'" \
-c "pg_distrib_dir='/usr/local/'" \
-c "listen_pg_addr='0.0.0.0:6400'" \
-c "listen_http_addr='0.0.0.0:9898'"
VOLUME ["/data"]
USER neon
USER zenith
EXPOSE 6400
EXPOSE 9898
CMD ["pageserver"]

View File

@@ -1,494 +0,0 @@
ARG PG_VERSION
ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
ARG IMAGE=rust
ARG TAG=pinned
#########################################################################################
#
# Layer "build-deps"
#
#########################################################################################
FROM debian:bullseye-slim AS build-deps
RUN apt update && \
apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \
zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libssl-dev \
libicu-dev libxslt1-dev
#########################################################################################
#
# Layer "pg-build"
# Build Postgres from the neon postgres repository.
#
#########################################################################################
FROM build-deps AS pg-build
ARG PG_VERSION
COPY vendor/postgres-${PG_VERSION} postgres
RUN cd postgres && \
./configure CFLAGS='-O2 -g3' --enable-debug --with-openssl --with-uuid=ossp --with-icu \
--with-libxml --with-libxslt && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \
# Install headers
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install && \
# Enable some of contrib extensions
echo 'trusted = true' >> /usr/local/pgsql/share/extension/autoinc.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/bloom.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/earthdistance.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/insert_username.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/intagg.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/moddatetime.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_stat_statements.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrowlocks.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgstattuple.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/refint.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/xml2.control
#########################################################################################
#
# Layer "postgis-build"
# Build PostGIS from the upstream PostGIS mirror.
#
#########################################################################################
FROM build-deps AS postgis-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt update && \
apt install -y cmake gdal-bin libboost-dev libboost-thread-dev libboost-filesystem-dev \
libboost-system-dev libboost-iostreams-dev libboost-program-options-dev libboost-timer-dev \
libcgal-dev libgdal-dev libgmp-dev libmpfr-dev libopenscenegraph-dev libprotobuf-c-dev \
protobuf-c-compiler xsltproc
# SFCGAL > 1.3 requires CGAL > 5.2, Bullseye's libcgal-dev is 5.2
RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \
mkdir sfcgal-src && cd sfcgal-src && tar xvzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
cmake . && make -j $(getconf _NPROCESSORS_ONLN) && \
DESTDIR=/sfcgal make install -j $(getconf _NPROCESSORS_ONLN) && \
make clean && cp -R /sfcgal/* /
ENV PATH "/usr/local/pgsql/bin:$PATH"
RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.2.tar.gz -O postgis.tar.gz && \
mkdir postgis-src && cd postgis-src && tar xvzf ../postgis.tar.gz --strip-components=1 -C . && \
./autogen.sh && \
./configure --with-sfcgal=/usr/local/bin/sfcgal-config && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
cd extensions/postgis && \
make clean && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_raster.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_sfcgal.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_tiger_geocoder.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_topology.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/address_standardizer.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/address_standardizer_data_us.control
RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \
mkdir pgrouting-src && cd pgrouting-src && tar xvzf ../pgrouting.tar.gz --strip-components=1 -C . && \
mkdir build && \
cd build && \
cmake .. && \
make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrouting.control
#########################################################################################
#
# Layer "plv8-build"
# Build plv8
#
#########################################################################################
FROM build-deps AS plv8-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt update && \
apt install -y ninja-build python3-dev libncurses5 binutils clang
RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.5.tar.gz -O plv8.tar.gz && \
mkdir plv8-src && cd plv8-src && tar xvzf ../plv8.tar.gz --strip-components=1 -C . && \
export PATH="/usr/local/pgsql/bin:$PATH" && \
make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) install && \
rm -rf /plv8-* && \
find /usr/local/pgsql/ -name "plv8-*.so" | xargs strip && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plcoffee.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plls.control
#########################################################################################
#
# Layer "h3-pg-build"
# Build h3_pg
#
#########################################################################################
FROM build-deps AS h3-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
# packaged cmake is too old
RUN wget https://github.com/Kitware/CMake/releases/download/v3.24.2/cmake-3.24.2-linux-x86_64.sh \
-q -O /tmp/cmake-install.sh \
&& chmod u+x /tmp/cmake-install.sh \
&& /tmp/cmake-install.sh --skip-license --prefix=/usr/local/ \
&& rm /tmp/cmake-install.sh
RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \
mkdir h3-src && cd h3-src && tar xvzf ../h3.tar.gz --strip-components=1 -C . && \
mkdir build && cd build && \
cmake .. -DCMAKE_BUILD_TYPE=Release && \
make -j $(getconf _NPROCESSORS_ONLN) && \
DESTDIR=/h3 make install && \
cp -R /h3/usr / && \
rm -rf build
RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.2.tar.gz -O h3-pg.tar.gz && \
mkdir h3-pg-src && cd h3-pg-src && tar xvzf ../h3-pg.tar.gz --strip-components=1 -C . && \
export PATH="/usr/local/pgsql/bin:$PATH" && \
make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3_postgis.control
#########################################################################################
#
# Layer "unit-pg-build"
# compile unit extension
#
#########################################################################################
FROM build-deps AS unit-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -O postgresql-unit.tar.gz && \
mkdir postgresql-unit-src && cd postgresql-unit-src && tar xvzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
# unit extension's "create extension" script relies on absolute install path to fill some reference tables.
# We move the extension from '/usr/local/pgsql/' to '/usr/local/' after it is build. So we need to adjust the path.
# This one-liner removes pgsql/ part of the path.
# NOTE: Other extensions that rely on MODULEDIR variable after building phase will need the same fix.
find /usr/local/pgsql/share/extension/ -name "unit*.sql" -print0 | xargs -0 sed -i "s|pgsql/||g" && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/unit.control
#########################################################################################
#
# Layer "vector-pg-build"
# compile pgvector extension
#
#########################################################################################
FROM build-deps AS vector-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.4.0.tar.gz -O pgvector.tar.gz && \
mkdir pgvector-src && cd pgvector-src && tar xvzf ../pgvector.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/vector.control
#########################################################################################
#
# Layer "pgjwt-pg-build"
# compile pgjwt extension
#
#########################################################################################
FROM build-deps AS pgjwt-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
# 9742dab1b2f297ad3811120db7b21451bca2d3c9 made on 13/11/2021
RUN wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b21451bca2d3c9.tar.gz -O pgjwt.tar.gz && \
mkdir pgjwt-src && cd pgjwt-src && tar xvzf ../pgjwt.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgjwt.control
#########################################################################################
#
# Layer "hypopg-pg-build"
# compile hypopg extension
#
#########################################################################################
FROM build-deps AS hypopg-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.3.1.tar.gz -O hypopg.tar.gz && \
mkdir hypopg-src && cd hypopg-src && tar xvzf ../hypopg.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/hypopg.control
#########################################################################################
#
# Layer "pg-hashids-pg-build"
# compile pg_hashids extension
#
#########################################################################################
FROM build-deps AS pg-hashids-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \
mkdir pg_hashids-src && cd pg_hashids-src && tar xvzf ../pg_hashids.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_hashids.control
#########################################################################################
#
# Layer "rum-pg-build"
# compile rum extension
#
#########################################################################################
FROM build-deps AS rum-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O rum.tar.gz && \
mkdir rum-src && cd rum-src && tar xvzf ../rum.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/rum.control
#########################################################################################
#
# Layer "pgtap-pg-build"
# compile pgTAP extension
#
#########################################################################################
FROM build-deps AS pgtap-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgtap.tar.gz && \
mkdir pgtap-src && cd pgtap-src && tar xvzf ../pgtap.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgtap.control
#########################################################################################
#
# Layer "prefix-pg-build"
# compile Prefix extension
#
#########################################################################################
FROM build-deps AS prefix-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.9.tar.gz -O prefix.tar.gz && \
mkdir prefix-src && cd prefix-src && tar xvzf ../prefix.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/prefix.control
#########################################################################################
#
# Layer "hll-pg-build"
# compile hll extension
#
#########################################################################################
FROM build-deps AS hll-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.17.tar.gz -O hll.tar.gz && \
mkdir hll-src && cd hll-src && tar xvzf ../hll.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/hll.control
#########################################################################################
#
# Layer "plpgsql-check-pg-build"
# compile plpgsql_check extension
#
#########################################################################################
FROM build-deps AS plpgsql-check-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.3.2.tar.gz -O plpgsql_check.tar.gz && \
mkdir plpgsql_check-src && cd plpgsql_check-src && tar xvzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plpgsql_check.control
#########################################################################################
#
# Layer "rust extensions"
# This layer is used to build `pgx` deps
#
#########################################################################################
FROM build-deps AS rust-extensions-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt-get update && \
apt-get install -y curl libclang-dev cmake && \
useradd -ms /bin/bash nonroot -b /home
ENV HOME=/home/nonroot
ENV PATH="/home/nonroot/.cargo/bin:/usr/local/pgsql/bin/:$PATH"
USER nonroot
WORKDIR /home/nonroot
ARG PG_VERSION
RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
chmod +x rustup-init && \
./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
rm rustup-init && \
cargo install --locked --version 0.7.3 cargo-pgx && \
/bin/bash -c 'cargo pgx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'
USER root
#########################################################################################
#
# Layer "pg-jsonschema-pg-build"
# Compile "pg_jsonschema" extension
#
#########################################################################################
FROM rust-extensions-build AS pg-jsonschema-pg-build
# there is no release tag yet, but we need it due to the superuser fix in the control file
RUN wget https://github.com/supabase/pg_jsonschema/archive/caeab60d70b2fd3ae421ec66466a3abbb37b7ee6.tar.gz -O pg_jsonschema.tar.gz && \
mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xvzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \
sed -i 's/pgx = "0.7.1"/pgx = { version = "0.7.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
cargo pgx install --release && \
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_jsonschema.control
#########################################################################################
#
# Layer "pg-graphql-pg-build"
# Compile "pg_graphql" extension
#
#########################################################################################
FROM rust-extensions-build AS pg-graphql-pg-build
# Currently pgx version bump to >= 0.7.2 causes "call to unsafe function" compliation errors in
# pgx-contrib-spiext. There is a branch that removes that dependency, so use it. It is on the
# same 1.1 version we've used before.
RUN git clone -b remove-pgx-contrib-spiext --single-branch https://github.com/yrashk/pg_graphql && \
cd pg_graphql && \
sed -i 's/pgx = "~0.7.1"/pgx = { version = "0.7.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
sed -i 's/pgx-tests = "~0.7.1"/pgx-tests = "0.7.3"/g' Cargo.toml && \
cargo pgx install --release && \
# it's needed to enable extension because it uses untrusted C language
sed -i 's/superuser = false/superuser = true/g' /usr/local/pgsql/share/extension/pg_graphql.control && \
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_graphql.control
#########################################################################################
#
# Layer "pg-tiktoken-build"
# Compile "pg_tiktoken" extension
#
#########################################################################################
FROM rust-extensions-build AS pg-tiktoken-pg-build
RUN git clone --depth=1 --single-branch https://github.com/kelvich/pg_tiktoken && \
cd pg_tiktoken && \
cargo pgx install --release && \
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_tiktoken.control
#########################################################################################
#
# Layer "neon-pg-ext-build"
# compile neon extensions
#
#########################################################################################
FROM build-deps AS neon-pg-ext-build
COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=postgis-build /sfcgal/* /
COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=h3-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=h3-pg-build /h3/usr /
COPY --from=unit-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=vector-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pgjwt-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg-jsonschema-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg-graphql-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg-tiktoken-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=hypopg-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg-hashids-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=rum-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pgtap-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=prefix-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=hll-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=plpgsql-check-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY pgxn/ pgxn/
RUN make -j $(getconf _NPROCESSORS_ONLN) \
PG_CONFIG=/usr/local/pgsql/bin/pg_config \
-C pgxn/neon \
-s install && \
make -j $(getconf _NPROCESSORS_ONLN) \
PG_CONFIG=/usr/local/pgsql/bin/pg_config \
-C pgxn/neon_utils \
-s install
#########################################################################################
#
# Compile and run the Neon-specific `compute_ctl` binary
#
#########################################################################################
FROM $REPOSITORY/$IMAGE:$TAG AS compute-tools
USER nonroot
# Copy entire project to get Cargo.* files with proper dependencies for the whole project
COPY --chown=nonroot . .
RUN cd compute_tools && cargo build --locked --profile release-line-debug-size-lto
#########################################################################################
#
# Clean up postgres folder before inclusion
#
#########################################################################################
FROM neon-pg-ext-build AS postgres-cleanup-layer
COPY --from=neon-pg-ext-build /usr/local/pgsql /usr/local/pgsql
# Remove binaries from /bin/ that we won't use (or would manually copy & install otherwise)
RUN cd /usr/local/pgsql/bin && rm ecpg raster2pgsql shp2pgsql pgtopo_export pgtopo_import pgsql2shp
# Remove headers that we won't need anymore - we've completed installation of all extensions
RUN rm -r /usr/local/pgsql/include
# Remove static postgresql libraries - all compilation is finished, so we
# can now remove these files - they must be included in other binaries by now
# if they were to be used by other libraries.
RUN rm /usr/local/pgsql/lib/lib*.a
#########################################################################################
#
# Final layer
# Put it all together into the final image
#
#########################################################################################
FROM debian:bullseye-slim
# Add user postgres
RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
echo "postgres:test_console_pass" | chpasswd && \
mkdir /var/db/postgres/compute && mkdir /var/db/postgres/specs && \
chown -R postgres:postgres /var/db/postgres && \
chmod 0750 /var/db/postgres/compute && \
echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig && \
# create folder for file cache
mkdir -p -m 777 /neon/cache
COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
# Install:
# libreadline8 for psql
# libicu67, locales for collations (including ICU and plpgsql_check)
# libossp-uuid16 for extension ossp-uuid
# libgeos, libgdal, libsfcgal1, libproj and libprotobuf-c1 for PostGIS
# libxml2, libxslt1.1 for xml2
RUN apt update && \
apt install --no-install-recommends -y \
locales \
libicu67 \
libreadline8 \
libossp-uuid16 \
libgeos-c1v5 \
libgdal28 \
libproj19 \
libprotobuf-c1 \
libsfcgal1 \
libxml2 \
libxslt1.1 \
gdb && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
ENV LANG en_US.utf8
USER postgres
ENTRYPOINT ["/usr/local/bin/compute_ctl"]

View File

@@ -1,10 +1,6 @@
# First transient image to build compute_tools binaries
# NB: keep in sync with rust image version in .github/workflows/build_and_test.yml
ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
ARG IMAGE=rust
ARG TAG=pinned
FROM $REPOSITORY/$IMAGE:$TAG AS rust-build
FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned AS rust-build
WORKDIR /home/nonroot
# Enable https://github.com/paritytech/cachepot to cache Rust crates' compilation results in Docker builds.
@@ -20,7 +16,7 @@ ARG CACHEPOT_BUCKET=neon-github-dev
COPY . .
RUN set -e \
&& mold -run cargo build -p compute_tools --locked --release \
&& mold -run cargo build -p compute_tools --release \
&& cachepot -s
# Final image that only has one binary

View File

@@ -1,70 +0,0 @@
# Note: this file *mostly* just builds on Dockerfile.compute-node
ARG SRC_IMAGE
ARG VM_INFORMANT_VERSION=v0.1.14
# on libcgroup update, make sure to check bootstrap.sh for changes
ARG LIBCGROUP_VERSION=v2.0.3
# Pull VM informant, to copy from later
FROM neondatabase/vm-informant:$VM_INFORMANT_VERSION as informant
# Build cgroup-tools
#
# At time of writing (2023-03-14), debian bullseye has a version of cgroup-tools (technically
# libcgroup) that doesn't support cgroup v2 (version 0.41-11). Unfortunately, the vm-informant
# requires cgroup v2, so we'll build cgroup-tools ourselves.
FROM debian:bullseye-slim as libcgroup-builder
ARG LIBCGROUP_VERSION
RUN set -exu \
&& apt update \
&& apt install --no-install-recommends -y \
git \
ca-certificates \
automake \
cmake \
make \
gcc \
byacc \
flex \
libtool \
libpam0g-dev \
&& git clone --depth 1 -b $LIBCGROUP_VERSION https://github.com/libcgroup/libcgroup \
&& INSTALL_DIR="/libcgroup-install" \
&& mkdir -p "$INSTALL_DIR/bin" "$INSTALL_DIR/include" \
&& cd libcgroup \
# extracted from bootstrap.sh, with modified flags:
&& (test -d m4 || mkdir m4) \
&& autoreconf -fi \
&& rm -rf autom4te.cache \
&& CFLAGS="-O3" ./configure --prefix="$INSTALL_DIR" --sysconfdir=/etc --localstatedir=/var --enable-opaque-hierarchy="name=systemd" \
# actually build the thing...
&& make install
# Combine, starting from non-VM compute node image.
FROM $SRC_IMAGE as base
# Temporarily set user back to root so we can run adduser, set inittab
USER root
RUN adduser vm-informant --disabled-password --no-create-home
RUN set -e \
&& rm -f /etc/inittab \
&& touch /etc/inittab
RUN set -e \
&& echo "::sysinit:cgconfigparser -l /etc/cgconfig.conf -s 1664" >> /etc/inittab \
&& CONNSTR="dbname=neondb user=cloud_admin sslmode=disable" \
&& ARGS="--auto-restart --cgroup=neon-postgres --pgconnstr=\"$CONNSTR\"" \
&& echo "::respawn:su vm-informant -c '/usr/local/bin/vm-informant $ARGS'" >> /etc/inittab
USER postgres
ADD vm-cgconfig.conf /etc/cgconfig.conf
COPY --from=informant /usr/bin/vm-informant /usr/local/bin/vm-informant
COPY --from=libcgroup-builder /libcgroup-install/bin/* /usr/bin/
COPY --from=libcgroup-builder /libcgroup-install/lib/* /usr/lib/
COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/
ENTRYPOINT ["/usr/sbin/cgexec", "-g", "*:neon-postgres", "/usr/local/bin/compute_ctl"]

188
Makefile
View File

@@ -1,7 +1,15 @@
ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
# Where to install Postgres, default is ./pg_install, maybe useful for package managers
POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install/
# Where to install Postgres, default is ./tmp_install, maybe useful for package managers
POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/tmp_install
# Seccomp BPF is only available for Linux
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux)
SECCOMP = --with-libseccomp
else
SECCOMP =
endif
#
# We differentiate between release / debug build types using the BUILD_TYPE
@@ -20,26 +28,14 @@ else
$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
endif
# macOS with brew-installed openssl requires explicit paths
# It can be configured with OPENSSL_PREFIX variable
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux)
# Seccomp BPF is only available for Linux
PG_CONFIGURE_OPTS += --with-libseccomp
else ifeq ($(UNAME_S),Darwin)
# macOS with brew-installed openssl requires explicit paths
# It can be configured with OPENSSL_PREFIX variable
OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
# macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure
# brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage
EXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/:
ifeq ($(UNAME_S),Darwin)
OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
endif
# Use -C option so that when PostgreSQL "make install" installs the
# headers, the mtime of the headers are not changed when there have
# been no changes to the files. Changing the mtime triggers an
# unnecessary rebuild of 'postgres_ffi'.
PG_CONFIGURE_OPTS += INSTALL='$(ROOT_PROJECT_DIR)/scripts/ninstall.sh -C'
# Choose whether we should be silent or verbose
CARGO_BUILD_FLAGS += --$(if $(filter s,$(MAKEFLAGS)),quiet,verbose)
# Fix for a corner case when make doesn't pass a jobserver
@@ -52,137 +48,63 @@ CARGO_CMD_PREFIX += $(if $(filter n,$(MAKEFLAGS)),,+)
CARGO_CMD_PREFIX += CARGO_TERM_PROGRESS_WHEN=never CI=1
#
# Top level Makefile to build Neon and PostgreSQL
# Top level Makefile to build Zenith and PostgreSQL
#
.PHONY: all
all: neon postgres neon-pg-ext
all: zenith postgres
### Neon Rust bits
### Zenith Rust bits
#
# The 'postgres_ffi' depends on the Postgres headers.
.PHONY: neon
neon: postgres-headers
+@echo "Compiling Neon"
.PHONY: zenith
zenith: postgres-headers
+@echo "Compiling Zenith"
$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS)
### PostgreSQL parts
# Some rules are duplicated for Postgres v14 and 15. We may want to refactor
# to avoid the duplication in the future, but it's tolerable for now.
#
$(POSTGRES_INSTALL_DIR)/build/%/config.status:
+@echo "Configuring Postgres $* build"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/$*
(cd $(POSTGRES_INSTALL_DIR)/build/$* && \
env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure \
CFLAGS='$(PG_CFLAGS)' \
$(POSTGRES_INSTALL_DIR)/build/config.status:
+@echo "Configuring postgres build"
mkdir -p $(POSTGRES_INSTALL_DIR)/build
(cd $(POSTGRES_INSTALL_DIR)/build && \
$(ROOT_PROJECT_DIR)/vendor/postgres/configure CFLAGS='$(PG_CFLAGS)' \
$(PG_CONFIGURE_OPTS) \
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/$* > configure.log)
$(SECCOMP) \
--prefix=$(abspath $(POSTGRES_INSTALL_DIR)) > configure.log)
# nicer alias to run 'configure'
# Note: I've been unable to use templates for this part of our configuration.
# I'm not sure why it wouldn't work, but this is the only place (apart from
# the "build-all-versions" entry points) where direct mention of PostgreSQL
# versions is used.
.PHONY: postgres-configure-v15
postgres-configure-v15: $(POSTGRES_INSTALL_DIR)/build/v15/config.status
.PHONY: postgres-configure-v14
postgres-configure-v14: $(POSTGRES_INSTALL_DIR)/build/v14/config.status
# Install the PostgreSQL header files into $(POSTGRES_INSTALL_DIR)/<version>/include
.PHONY: postgres-headers-%
postgres-headers-%: postgres-configure-%
+@echo "Installing PostgreSQL $* headers"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/include MAKELEVEL=0 install
# Compile and install PostgreSQL
.PHONY: postgres-%
postgres-%: postgres-configure-% \
postgres-headers-% # to prevent `make install` conflicts with neon's `postgres-headers`
+@echo "Compiling PostgreSQL $*"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$* MAKELEVEL=0 install
+@echo "Compiling libpq $*"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/interfaces/libpq install
+@echo "Compiling pg_prewarm $*"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_prewarm install
+@echo "Compiling pg_buffercache $*"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_buffercache install
+@echo "Compiling pageinspect $*"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect install
.PHONY: postgres-clean-%
postgres-clean-%:
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$* MAKELEVEL=0 clean
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_buffercache clean
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect clean
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/interfaces/libpq clean
.PHONY: neon-pg-ext-%
neon-pg-ext-%: postgres-%
+@echo "Compiling neon $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install
+@echo "Compiling neon_walredo $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile install
+@echo "Compiling neon_test_utils $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile install
+@echo "Compiling neon_utils $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-utils-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile install
.PHONY: neon-pg-ext-clean-%
neon-pg-ext-clean-%:
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
-C $(POSTGRES_INSTALL_DIR)/build/neon-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile clean
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
-C $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile clean
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
-C $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile clean
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile clean
.PHONY: neon-pg-ext
neon-pg-ext: \
neon-pg-ext-v14 \
neon-pg-ext-v15
.PHONY: neon-pg-ext-clean
neon-pg-ext-clean: \
neon-pg-ext-clean-v14 \
neon-pg-ext-clean-v15
# shorthand to build all Postgres versions
.PHONY: postgres
postgres: \
postgres-v14 \
postgres-v15
# nicer alias for running 'configure'
.PHONY: postgres-configure
postgres-configure: $(POSTGRES_INSTALL_DIR)/build/config.status
# Install the PostgreSQL header files into $(POSTGRES_INSTALL_DIR)/include
.PHONY: postgres-headers
postgres-headers: \
postgres-headers-v14 \
postgres-headers-v15
postgres-headers: postgres-configure
+@echo "Installing PostgreSQL headers"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/src/include MAKELEVEL=0 install
# Compile and install PostgreSQL and contrib/neon
.PHONY: postgres
postgres: postgres-configure \
postgres-headers # to prevent `make install` conflicts with zenith's `postgres-headers`
+@echo "Compiling PostgreSQL"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build MAKELEVEL=0 install
+@echo "Compiling contrib/neon"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/contrib/neon install
+@echo "Compiling contrib/neon_test_utils"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/contrib/neon_test_utils install
+@echo "Compiling pg_buffercache"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/contrib/pg_buffercache install
+@echo "Compiling pageinspect"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/contrib/pageinspect install
.PHONY: postgres-clean
postgres-clean: \
postgres-clean-v14 \
postgres-clean-v15
postgres-clean:
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build MAKELEVEL=0 clean
# This doesn't remove the effects of 'configure'.
.PHONY: clean
clean: postgres-clean neon-pg-ext-clean
clean:
cd $(POSTGRES_INSTALL_DIR)/build && $(MAKE) clean
$(CARGO_CMD_PREFIX) cargo clean
# This removes everything

4
NOTICE
View File

@@ -1,5 +1,5 @@
Neon
Copyright 2022 Neon Inc.
The PostgreSQL submodules in vendor/postgres-v14 and vendor/postgres-v15 are licensed under the
PostgreSQL license. See vendor/postgres-v14/COPYRIGHT and vendor/postgres-v15/COPYRIGHT.
The PostgreSQL submodule in vendor/postgres is licensed under the
PostgreSQL license. See vendor/postgres/COPYRIGHT.

110
README.md
View File

@@ -2,21 +2,29 @@
Neon is a serverless open-source alternative to AWS Aurora Postgres. It separates storage and compute and substitutes the PostgreSQL storage layer by redistributing data across a cluster of nodes.
The project used to be called "Zenith". Many of the commands and code comments
still refer to "zenith", but we are in the process of renaming things.
## Quick start
Try the [Neon Free Tier](https://neon.tech/docs/introduction/technical-preview-free-tier/) to create a serverless Postgres instance. Then connect to it with your preferred Postgres client (psql, dbeaver, etc) or use the online [SQL Editor](https://neon.tech/docs/get-started-with-neon/query-with-neon-sql-editor/). See [Connect from any application](https://neon.tech/docs/connect/connect-from-any-app/) for connection instructions.
[Join the waitlist](https://neon.tech/) for our free tier to receive your serverless postgres instance. Then connect to it with your preferred postgres client (psql, dbeaver, etc) or use the online SQL editor.
Alternatively, compile and run the project [locally](#running-local-installation).
## Architecture overview
A Neon installation consists of compute nodes and the Neon storage engine. Compute nodes are stateless PostgreSQL nodes backed by the Neon storage engine.
A Neon installation consists of compute nodes and a Neon storage engine.
Compute nodes are stateless PostgreSQL nodes backed by the Neon storage engine.
The Neon storage engine consists of two major components:
- Pageserver. Scalable storage backend for the compute nodes.
- Safekeepers. The safekeepers form a redundant WAL service that received WAL from the compute node, and stores it durably until it has been processed by the pageserver and uploaded to cloud storage.
See developer documentation in [/docs/SUMMARY.md](/docs/SUMMARY.md) for more information.
- WAL service. The service receives WAL from the compute node and ensures that it is stored durably.
Pageserver consists of:
- Repository - Neon storage implementation.
- WAL receiver - service that receives WAL from WAL service and stores it in the repository.
- Page service - service that communicates with compute nodes and responds with pages from the repository.
- WAL redo - service that builds pages from base images and WAL records on Page service request
## Running local installation
@@ -26,21 +34,13 @@ See developer documentation in [/docs/SUMMARY.md](/docs/SUMMARY.md) for more inf
* On Ubuntu or Debian, this set of packages should be sufficient to build the code:
```bash
apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libseccomp-dev \
libssl-dev clang pkg-config libpq-dev cmake postgresql-client protobuf-compiler
libssl-dev clang pkg-config libpq-dev etcd cmake postgresql-client
```
* On Fedora, these packages are needed:
```bash
dnf install flex bison readline-devel zlib-devel openssl-devel \
libseccomp-devel perl clang cmake postgresql postgresql-contrib protobuf-compiler \
protobuf-devel
libseccomp-devel perl clang cmake etcd postgresql postgresql-contrib
```
* On Arch based systems, these packages are needed:
```bash
pacman -S base-devel readline zlib libseccomp openssl clang \
postgresql-libs cmake postgresql protobuf
```
Building Neon requires 3.15+ version of `protoc` (protobuf-compiler). If your distribution provides an older version, you can install a newer version from [here](https://github.com/protocolbuffers/protobuf/releases).
2. [Install Rust](https://www.rust-lang.org/tools/install)
```
@@ -48,14 +48,11 @@ Building Neon requires 3.15+ version of `protoc` (protobuf-compiler). If your di
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
```
#### Installing dependencies on macOS (12.3.1)
#### Installing dependencies on OSX (12.3.1)
1. Install XCode and dependencies
```
xcode-select --install
brew install protobuf openssl flex bison
# add openssl to PATH, required for ed25519 keys generation in neon_local
echo 'export PATH="$(brew --prefix openssl)/bin:$PATH"' >> ~/.zshrc
brew install protobuf etcd openssl
```
2. [Install Rust](https://www.rust-lang.org/tools/install)
@@ -71,17 +68,6 @@ brew install libpq
brew link --force libpq
```
#### Rustc version
The project uses [rust toolchain file](./rust-toolchain.toml) to define the version it's built with in CI for testing and local builds.
This file is automatically picked up by [`rustup`](https://rust-lang.github.io/rustup/overrides.html#the-toolchain-file) that installs (if absent) and uses the toolchain version pinned in the file.
rustup users who want to build with another toolchain can use [`rustup override`](https://rust-lang.github.io/rustup/overrides.html#directory-overrides) command to set a specific toolchain for the project's directory.
non-rustup users most probably are not getting the same toolchain automatically from the file, so are responsible to manually verify their toolchain matches the version in the file.
Newer rustc versions most probably will work fine, yet older ones might not be supported due to some new features used by the project or the crates.
#### Building on Linux
1. Build neon and patched postgres
@@ -91,12 +77,11 @@ Newer rustc versions most probably will work fine, yet older ones might not be s
git clone --recursive https://github.com/neondatabase/neon.git
cd neon
# The preferred and default is to make a debug build. This will create a
# demonstrably slower build than a release build. For a release build,
# use "BUILD_TYPE=release make -j`nproc` -s"
# Remove -s for the verbose build log
# The preferred and default is to make a debug build. This will create a
# demonstrably slower build than a release build. If you want to use a release
# build, utilize "BUILD_TYPE=release make -j`nproc`"
make -j`nproc` -s
make -j`nproc`
```
#### Building on OSX
@@ -108,19 +93,18 @@ make -j`nproc` -s
git clone --recursive https://github.com/neondatabase/neon.git
cd neon
# The preferred and default is to make a debug build. This will create a
# demonstrably slower build than a release build. For a release build,
# use "BUILD_TYPE=release make -j`sysctl -n hw.logicalcpu` -s"
# Remove -s for the verbose build log
# The preferred and default is to make a debug build. This will create a
# demonstrably slower build than a release build. If you want to use a release
# build, utilize "BUILD_TYPE=release make -j`sysctl -n hw.logicalcpu`"
make -j`sysctl -n hw.logicalcpu` -s
make -j`sysctl -n hw.logicalcpu`
```
#### Dependency installation notes
To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `pg_install/bin` and `pg_install/lib`, respectively.
To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `tmp_install/bin` and `tmp_install/lib`, respectively.
To run the integration tests or Python scripts (not required to use the code), install
Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (requires [poetry>=1.3](https://python-poetry.org/)) in the project directory.
Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (requires [poetry](https://python-poetry.org/)) in the project directory.
#### Running neon database
@@ -129,26 +113,22 @@ Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (r
# Create repository in .neon with proper paths to binaries and data
# Later that would be responsibility of a package install script
> ./target/debug/neon_local init
Starting pageserver at '127.0.0.1:64000' in '.neon'.
initializing tenantid 9ef87a5bf0d92544f6fafeeb3239695c
created initial timeline de200bd42b49cc1814412c7e592dd6e9 timeline.lsn 0/16B5A50
initial timeline de200bd42b49cc1814412c7e592dd6e9 created
pageserver init succeeded
# start pageserver, safekeeper, and broker for their intercommunication
# start pageserver and safekeeper
> ./target/debug/neon_local start
Starting neon broker at 127.0.0.1:50051
storage_broker started, pid: 2918372
Starting pageserver at '127.0.0.1:64000' in '.neon'.
pageserver started, pid: 2918386
Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'.
safekeeper 1 started, pid: 2918437
# create initial tenant and use it as a default for every future neon_local invocation
> ./target/debug/neon_local tenant create --set-default
tenant 9ef87a5bf0d92544f6fafeeb3239695c successfully created on the pageserver
Created an initial timeline 'de200bd42b49cc1814412c7e592dd6e9' at Lsn 0/16B5A50 for tenant: 9ef87a5bf0d92544f6fafeeb3239695c
Setting tenant 9ef87a5bf0d92544f6fafeeb3239695c as a default one
Starting pageserver at '127.0.0.1:64000' in '.neon'
Pageserver started
initializing for sk 1 for 7676
Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'
Safekeeper started
# start postgres compute node
> ./target/debug/neon_local pg start main
Starting new postgres (v14) main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
Starting new postgres main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'
@@ -228,28 +208,18 @@ Ensure your dependencies are installed as described [here](https://github.com/ne
```sh
git clone --recursive https://github.com/neondatabase/neon.git
CARGO_BUILD_FLAGS="--features=testing" make
make # builds also postgres and installs it to ./tmp_install
./scripts/pytest
```
## Documentation
[/docs/](/docs/) Contains a top-level overview of all available markdown documentation.
Now we use README files to cover design ideas and overall architecture for each module and `rustdoc` style documentation comments. See also [/docs/](/docs/) a top-level overview of all available markdown documentation.
- [/docs/sourcetree.md](/docs/sourcetree.md) contains overview of source tree layout.
To view your `rustdoc` documentation in a browser, try running `cargo doc --no-deps --open`
See also README files in some source directories, and `rustdoc` style documentation comments.
Other resources:
- [SELECT 'Hello, World'](https://neon.tech/blog/hello-world/): Blog post by Nikita Shamgunov on the high level architecture
- [Architecture decisions in Neon](https://neon.tech/blog/architecture-decisions-in-neon/): Blog post by Heikki Linnakangas
- [Neon: Serverless PostgreSQL!](https://www.youtube.com/watch?v=rES0yzeERns): Presentation on storage system by Heikki Linnakangas in the CMU Database Group seminar series
### Postgres-specific terms
Due to Neon's very close relation with PostgreSQL internals, numerous specific terms are used.

264
bindings/python/neon-dev-utils/Cargo.lock generated Normal file
View File

@@ -0,0 +1,264 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "indoc"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "47741a8bc60fb26eb8d6e0238bbb26d8575ff623fdc97b1a2c00c050b9684ed8"
dependencies = [
"indoc-impl",
"proc-macro-hack",
]
[[package]]
name = "indoc-impl"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce046d161f000fffde5f432a0d034d0341dc152643b2598ed5bfce44c4f3a8f0"
dependencies = [
"proc-macro-hack",
"proc-macro2",
"quote",
"syn",
"unindent",
]
[[package]]
name = "instant"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
dependencies = [
"cfg-if",
]
[[package]]
name = "libc"
version = "0.2.132"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5"
[[package]]
name = "lock_api"
version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f80bf5aacaf25cbfc8210d1cfb718f2bf3b11c4c54e5afe36c236853a8ec390"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]]
name = "neon-dev-utils"
version = "0.1.0"
dependencies = [
"pyo3",
]
[[package]]
name = "once_cell"
version = "1.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "074864da206b4973b84eb91683020dbefd6a8c3f0f38e054d93954e891935e4e"
[[package]]
name = "parking_lot"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99"
dependencies = [
"instant",
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216"
dependencies = [
"cfg-if",
"instant",
"libc",
"redox_syscall",
"smallvec",
"winapi",
]
[[package]]
name = "paste"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "45ca20c77d80be666aef2b45486da86238fabe33e38306bd3118fe4af33fa880"
dependencies = [
"paste-impl",
"proc-macro-hack",
]
[[package]]
name = "paste-impl"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d95a7db200b97ef370c8e6de0088252f7e0dfff7d047a28528e47456c0fc98b6"
dependencies = [
"proc-macro-hack",
]
[[package]]
name = "proc-macro-hack"
version = "0.5.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
[[package]]
name = "proc-macro2"
version = "1.0.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a2ca2c61bc9f3d74d2886294ab7b9853abd9c1ad903a3ac7815c58989bb7bab"
dependencies = [
"unicode-ident",
]
[[package]]
name = "pyo3"
version = "0.15.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d41d50a7271e08c7c8a54cd24af5d62f73ee3a6f6a314215281ebdec421d5752"
dependencies = [
"cfg-if",
"indoc",
"libc",
"parking_lot",
"paste",
"pyo3-build-config",
"pyo3-macros",
"unindent",
]
[[package]]
name = "pyo3-build-config"
version = "0.15.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "779239fc40b8e18bc8416d3a37d280ca9b9fb04bda54b98037bb6748595c2410"
dependencies = [
"once_cell",
]
[[package]]
name = "pyo3-macros"
version = "0.15.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "00b247e8c664be87998d8628e86f282c25066165f1f8dda66100c48202fdb93a"
dependencies = [
"pyo3-macros-backend",
"quote",
"syn",
]
[[package]]
name = "pyo3-macros-backend"
version = "0.15.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a8c2812c412e00e641d99eeb79dd478317d981d938aa60325dfa7157b607095"
dependencies = [
"proc-macro2",
"pyo3-build-config",
"quote",
"syn",
]
[[package]]
name = "quote"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
dependencies = [
"proc-macro2",
]
[[package]]
name = "redox_syscall"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
dependencies = [
"bitflags",
]
[[package]]
name = "scopeguard"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "smallvec"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fd0db749597d91ff862fd1d55ea87f7855a744a8425a64695b6fca237d1dad1"
[[package]]
name = "syn"
version = "1.0.99"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58dbef6ec655055e20b86b15a8cc6d439cca19b667537ac6a1369572d151ab13"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf"
[[package]]
name = "unindent"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58ee9362deb4a96cef4d437d1ad49cffc9b9e92d202b6995674e928ce684f112"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

View File

@@ -0,0 +1,16 @@
[package]
name = "neon-dev-utils"
version = "0.1.0"
edition = "2021"
[lib]
name = "neon_dev_utils"
# "cdylib" is necessary to produce a shared library for Python to import from.
#
# Downstream Rust code (including code in `bin/`, `examples/`, and `tests/`) will not be able
# to `use string_sum;` unless the "rlib" or "lib" crate type is also included, e.g.:
# crate-type = ["cdylib", "rlib"]
crate-type = ["cdylib"]
[dependencies]
pyo3 = { version = "0.15.1", features = ["extension-module"] }

View File

@@ -0,0 +1,31 @@
[[package]]
name = "maturin"
version = "0.13.2"
description = "Build and publish crates with pyo3, rust-cpython and cffi bindings as well as rust binaries as python packages"
category = "dev"
optional = false
python-versions = ">=3.7"
[package.dependencies]
tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
[package.extras]
zig = ["ziglang (>=0.9.0,<0.10.0)"]
patchelf = ["patchelf"]
[[package]]
name = "tomli"
version = "2.0.1"
description = "A lil' TOML parser"
category = "dev"
optional = false
python-versions = ">=3.7"
[metadata]
lock-version = "1.1"
python-versions = "^3.10"
content-hash = "4e177514d6cf74b58bcd8ca30ef300c10a833b3e6b1d809aa57337ee20efeb47"
[metadata.files]
maturin = []
tomli = []

View File

@@ -0,0 +1,15 @@
[tool.poetry]
name = "neon-dev-utils"
version = "0.1.0"
description = "Python bindings for common neon development utils"
authors = ["Your Name <you@example.com>"]
[tool.poetry.dependencies]
python = "^3.10"
[tool.poetry.dev-dependencies]
maturin = "^0.13.2"
[build-system]
requires = ["maturin>=0.13.2", "poetry-core>=1.0.0"]
build-backend = "maturin"

View File

@@ -0,0 +1,17 @@
use pyo3::prelude::*;
/// Formats the sum of two numbers as string.
#[pyfunction]
fn sum_as_string(a: usize, b: usize) -> PyResult<String> {
Ok((a + b).to_string())
}
/// A Python module implemented in Rust. The name of this function must match
/// the `lib.name` setting in the `Cargo.toml`, else Python will not be able to
/// import the module.
#[pymodule]
fn neon_dev_utils(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(sum_as_string, m)?)?;
Ok(())
}

188
cli-v2-story.md Normal file
View File

@@ -0,0 +1,188 @@
Create a new Zenith repository in the current directory:
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli init
The files belonging to this database system will be owned by user "heikki".
This user must also own the server process.
The database cluster will be initialized with locale "en_GB.UTF-8".
The default database encoding has accordingly been set to "UTF8".
The default text search configuration will be set to "english".
Data page checksums are disabled.
creating directory tmp ... ok
creating subdirectories ... ok
selecting dynamic shared memory implementation ... posix
selecting default max_connections ... 100
selecting default shared_buffers ... 128MB
selecting default time zone ... Europe/Helsinki
creating configuration files ... ok
running bootstrap script ... ok
performing post-bootstrap initialization ... ok
syncing data to disk ... ok
initdb: warning: enabling "trust" authentication for local connections
You can change this by editing pg_hba.conf or using the option -A, or
--auth-local and --auth-host, the next time you run initdb.
new zenith repository was created in .zenith
Initially, there is only one branch:
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch
main
Start a local Postgres instance on the branch:
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start main
Creating data directory from snapshot at 0/15FFB08...
waiting for server to start....2021-04-13 09:27:43.919 EEST [984664] LOG: starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
2021-04-13 09:27:43.920 EEST [984664] LOG: listening on IPv6 address "::1", port 5432
2021-04-13 09:27:43.920 EEST [984664] LOG: listening on IPv4 address "127.0.0.1", port 5432
2021-04-13 09:27:43.927 EEST [984664] LOG: listening on Unix socket "/tmp/.s.PGSQL.5432"
2021-04-13 09:27:43.939 EEST [984665] LOG: database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
2021-04-13 09:27:43.939 EEST [984665] LOG: creating missing WAL directory "pg_wal/archive_status"
2021-04-13 09:27:44.189 EEST [984665] LOG: database system was not properly shut down; automatic recovery in progress
2021-04-13 09:27:44.195 EEST [984665] LOG: invalid record length at 0/15FFB80: wanted 24, got 0
2021-04-13 09:27:44.195 EEST [984665] LOG: redo is not required
2021-04-13 09:27:44.225 EEST [984664] LOG: database system is ready to accept connections
done
server started
Run some commands against it:
~/git-sandbox/zenith (cli-v2)$ psql postgres -c "create table foo (t text);"
CREATE TABLE
~/git-sandbox/zenith (cli-v2)$ psql postgres -c "insert into foo values ('inserted on the main branch');"
INSERT 0 1
~/git-sandbox/zenith (cli-v2)$ psql postgres -c "select * from foo"
t
-----------------------------
inserted on the main branch
(1 row)
Create a new branch called 'experimental'. We create it from the
current end of the 'main' branch, but you could specify a different
LSN as the start point instead.
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch experimental main
branching at end of WAL: 0/161F478
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch
experimental
main
Start another Postgres instance off the 'experimental' branch:
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start experimental -- -o -p5433
Creating data directory from snapshot at 0/15FFB08...
waiting for server to start....2021-04-13 09:28:41.874 EEST [984766] LOG: starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
2021-04-13 09:28:41.875 EEST [984766] LOG: listening on IPv6 address "::1", port 5433
2021-04-13 09:28:41.875 EEST [984766] LOG: listening on IPv4 address "127.0.0.1", port 5433
2021-04-13 09:28:41.883 EEST [984766] LOG: listening on Unix socket "/tmp/.s.PGSQL.5433"
2021-04-13 09:28:41.896 EEST [984767] LOG: database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
2021-04-13 09:28:42.265 EEST [984767] LOG: database system was not properly shut down; automatic recovery in progress
2021-04-13 09:28:42.269 EEST [984767] LOG: redo starts at 0/15FFB80
2021-04-13 09:28:42.272 EEST [984767] LOG: invalid record length at 0/161F4B0: wanted 24, got 0
2021-04-13 09:28:42.272 EEST [984767] LOG: redo done at 0/161F478 system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s
2021-04-13 09:28:42.321 EEST [984766] LOG: database system is ready to accept connections
done
server started
Insert some a row on the 'experimental' branch:
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo"
t
-----------------------------
inserted on the main branch
(1 row)
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "insert into foo values ('inserted on experimental')"
INSERT 0 1
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo"
t
-----------------------------
inserted on the main branch
inserted on experimental
(2 rows)
See that the other Postgres instance is still running on 'main' branch on port 5432:
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5432 -c "select * from foo"
t
-----------------------------
inserted on the main branch
(1 row)
Everything is stored in the .zenith directory:
~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/
total 12
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:28 datadirs
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:27 refs
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:28 timelines
The 'datadirs' directory contains the datadirs of the running instances:
~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/datadirs/
total 8
drwx------ 18 heikki heikki 4096 Apr 13 09:27 3c0c634c1674079b2c6d4edf7c91523e
drwx------ 18 heikki heikki 4096 Apr 13 09:28 697e3c103d4b1763cd6e82e4ff361d76
~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/datadirs/3c0c634c1674079b2c6d4edf7c91523e/
total 124
drwxr-xr-x 5 heikki heikki 4096 Apr 13 09:27 base
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 global
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_commit_ts
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_dynshmem
-rw------- 1 heikki heikki 4760 Apr 13 09:27 pg_hba.conf
-rw------- 1 heikki heikki 1636 Apr 13 09:27 pg_ident.conf
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:32 pg_logical
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:27 pg_multixact
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_notify
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_replslot
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_serial
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_snapshots
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_stat
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:34 pg_stat_tmp
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_subtrans
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_tblspc
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_twophase
-rw------- 1 heikki heikki 3 Apr 13 09:27 PG_VERSION
lrwxrwxrwx 1 heikki heikki 52 Apr 13 09:27 pg_wal -> ../../timelines/3c0c634c1674079b2c6d4edf7c91523e/wal
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_xact
-rw------- 1 heikki heikki 88 Apr 13 09:27 postgresql.auto.conf
-rw------- 1 heikki heikki 28688 Apr 13 09:27 postgresql.conf
-rw------- 1 heikki heikki 96 Apr 13 09:27 postmaster.opts
-rw------- 1 heikki heikki 149 Apr 13 09:27 postmaster.pid
Note how 'pg_wal' is just a symlink to the 'timelines' directory. The
datadir is ephemeral, you can delete it at any time, and it can be reconstructed
from the snapshots and WAL stored in the 'timelines' directory. So if you push/pull
the repository, the 'datadirs' are not included. (They are like git working trees)
~/git-sandbox/zenith (cli-v2)$ killall -9 postgres
~/git-sandbox/zenith (cli-v2)$ rm -rf .zenith/datadirs/*
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start experimental -- -o -p5433
Creating data directory from snapshot at 0/15FFB08...
waiting for server to start....2021-04-13 09:37:05.476 EEST [985340] LOG: starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
2021-04-13 09:37:05.477 EEST [985340] LOG: listening on IPv6 address "::1", port 5433
2021-04-13 09:37:05.477 EEST [985340] LOG: listening on IPv4 address "127.0.0.1", port 5433
2021-04-13 09:37:05.487 EEST [985340] LOG: listening on Unix socket "/tmp/.s.PGSQL.5433"
2021-04-13 09:37:05.498 EEST [985341] LOG: database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
2021-04-13 09:37:05.808 EEST [985341] LOG: database system was not properly shut down; automatic recovery in progress
2021-04-13 09:37:05.813 EEST [985341] LOG: redo starts at 0/15FFB80
2021-04-13 09:37:05.815 EEST [985341] LOG: invalid record length at 0/161F770: wanted 24, got 0
2021-04-13 09:37:05.815 EEST [985341] LOG: redo done at 0/161F738 system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s
2021-04-13 09:37:05.866 EEST [985340] LOG: database system is ready to accept connections
done
server started
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo"
t
-----------------------------
inserted on the main branch
inserted on experimental
(2 rows)

View File

@@ -1,30 +1,21 @@
[package]
name = "compute_tools"
version = "0.1.0"
edition.workspace = true
license.workspace = true
edition = "2021"
[dependencies]
anyhow.workspace = true
chrono.workspace = true
clap.workspace = true
futures.workspace = true
hyper = { workspace = true, features = ["full"] }
notify.workspace = true
num_cpus.workspace = true
opentelemetry.workspace = true
postgres.workspace = true
regex.workspace = true
serde.workspace = true
serde_json.workspace = true
tar.workspace = true
reqwest = { workspace = true, features = ["json"] }
tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
tokio-postgres.workspace = true
tracing.workspace = true
tracing-opentelemetry.workspace = true
tracing-subscriber.workspace = true
tracing-utils.workspace = true
url.workspace = true
workspace_hack.workspace = true
anyhow = "1.0"
chrono = "0.4"
clap = "3.0"
env_logger = "0.9"
hyper = { version = "0.14", features = ["full"] }
log = { version = "0.4", features = ["std", "serde"] }
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
regex = "1"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1"
tar = "0.4"
tokio = { version = "1.17", features = ["macros", "rt", "rt-multi-thread"] }
tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
url = "2.2.2"
workspace_hack = { version = "0.1", path = "../workspace_hack" }

View File

@@ -19,10 +19,6 @@ Also `compute_ctl` spawns two separate service threads:
- `http-endpoint` runs a Hyper HTTP API server, which serves readiness and the
last activity requests.
If the `vm-informant` binary is present at `/bin/vm-informant`, it will also be started. For VM
compute nodes, `vm-informant` communicates with the VM autoscaling system. It coordinates
downscaling and (eventually) will request immediate upscaling under resource pressure.
Usage example:
```sh
compute_ctl -D /var/db/postgres/compute \

View File

@@ -18,10 +18,6 @@
//! - `http-endpoint` runs a Hyper HTTP API server, which serves readiness and the
//! last activity requests.
//!
//! If the `vm-informant` binary is present at `/bin/vm-informant`, it will also be started. For VM
//! compute nodes, `vm-informant` communicates with the VM autoscaling system. It coordinates
//! downscaling and (eventually) will request immediate upscaling under resource pressure.
//!
//! Usage example:
//! ```sh
//! compute_ctl -D /var/db/postgres/compute \
@@ -40,7 +36,7 @@ use std::{thread, time::Duration};
use anyhow::{Context, Result};
use chrono::Utc;
use clap::Arg;
use tracing::{error, info};
use log::{error, info};
use compute_tools::compute::{ComputeMetrics, ComputeNode, ComputeState, ComputeStatus};
use compute_tools::http::api::launch_http_server;
@@ -52,24 +48,56 @@ use compute_tools::spec::*;
use url::Url;
fn main() -> Result<()> {
init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
// TODO: re-use `utils::logging` later
init_logger(DEFAULT_LOG_LEVEL)?;
let matches = cli().get_matches();
// Env variable is set by `cargo`
let version: Option<&str> = option_env!("CARGO_PKG_VERSION");
let matches = clap::App::new("compute_ctl")
.version(version.unwrap_or("unknown"))
.arg(
Arg::new("connstr")
.short('C')
.long("connstr")
.value_name("DATABASE_URL")
.required(true),
)
.arg(
Arg::new("pgdata")
.short('D')
.long("pgdata")
.value_name("DATADIR")
.required(true),
)
.arg(
Arg::new("pgbin")
.short('b')
.long("pgbin")
.value_name("POSTGRES_PATH"),
)
.arg(
Arg::new("spec")
.short('s')
.long("spec")
.value_name("SPEC_JSON"),
)
.arg(
Arg::new("spec-path")
.short('S')
.long("spec-path")
.value_name("SPEC_PATH"),
)
.get_matches();
let pgdata = matches
.get_one::<String>("pgdata")
.expect("PGDATA path is required");
let pgdata = matches.value_of("pgdata").expect("PGDATA path is required");
let connstr = matches
.get_one::<String>("connstr")
.value_of("connstr")
.expect("Postgres connection string is required");
let spec = matches.get_one::<String>("spec");
let spec_path = matches.get_one::<String>("spec-path");
let compute_id = matches.get_one::<String>("compute-id");
let control_plane_uri = matches.get_one::<String>("control-plane-uri");
let spec = matches.value_of("spec");
let spec_path = matches.value_of("spec-path");
// Try to use just 'postgres' if no path is provided
let pgbin = matches.get_one::<String>("pgbin").unwrap();
let pgbin = matches.value_of("pgbin").unwrap_or("postgres");
let spec: ComputeSpec = match spec {
// First, try to get cluster spec from the cli argument
@@ -80,60 +108,17 @@ fn main() -> Result<()> {
let path = Path::new(sp);
let file = File::open(path)?;
serde_json::from_reader(file)?
} else if let Some(id) = compute_id {
if let Some(cp_base) = control_plane_uri {
let cp_uri = format!("{cp_base}/management/api/v1/{id}/spec");
let jwt: String = match std::env::var("NEON_CONSOLE_JWT") {
Ok(v) => v,
Err(_) => "".to_string(),
};
reqwest::blocking::Client::new()
.get(cp_uri)
.header("Authorization", jwt)
.send()?
.json()?
} else {
panic!(
"must specify --control-plane-uri \"{:#?}\" and --compute-id \"{:#?}\"",
control_plane_uri, compute_id
);
}
} else {
panic!("compute spec should be provided via --spec or --spec-path argument");
panic!("cluster spec should be provided via --spec or --spec-path argument");
}
}
};
// Extract OpenTelemetry context for the startup actions from the spec, and
// attach it to the current tracing context.
//
// This is used to propagate the context for the 'start_compute' operation
// from the neon control plane. This allows linking together the wider
// 'start_compute' operation that creates the compute container, with the
// startup actions here within the container.
//
// Switch to the startup context here, and exit it once the startup has
// completed and Postgres is up and running.
//
// NOTE: This is supposed to only cover the *startup* actions. Once
// postgres is configured and up-and-running, we exit this span. Any other
// actions that are performed on incoming HTTP requests, for example, are
// performed in separate spans.
let startup_context_guard = if let Some(ref carrier) = spec.startup_tracing_context {
use opentelemetry::propagation::TextMapPropagator;
use opentelemetry::sdk::propagation::TraceContextPropagator;
Some(TraceContextPropagator::new().extract(carrier).attach())
} else {
None
};
let pageserver_connstr = spec
.cluster
.settings
.find("neon.pageserver_connstring")
.expect("pageserver connstr should be provided");
let storage_auth_token = spec.storage_auth_token.clone();
let tenant = spec
.cluster
.settings
@@ -154,8 +139,7 @@ fn main() -> Result<()> {
tenant,
timeline,
pageserver_connstr,
storage_auth_token,
metrics: ComputeMetrics::default(),
metrics: ComputeMetrics::new(),
state: RwLock::new(ComputeState::new()),
};
let compute = Arc::new(compute_state);
@@ -165,108 +149,27 @@ fn main() -> Result<()> {
let _http_handle = launch_http_server(&compute).expect("cannot launch http endpoint thread");
let _monitor_handle = launch_monitor(&compute).expect("cannot launch compute monitor thread");
// Start Postgres
let mut delay_exit = false;
let mut exit_code = None;
let pg = match compute.start_compute() {
Ok(pg) => Some(pg),
Err(err) => {
error!("could not start the compute node: {:?}", err);
// Run compute (Postgres) and hang waiting on it.
match compute.prepare_and_run() {
Ok(ec) => {
let code = ec.code().unwrap_or(1);
info!("Postgres exited with code {}, shutting down", code);
exit(code)
}
Err(error) => {
error!("could not start the compute node: {:?}", error);
let mut state = compute.state.write().unwrap();
state.error = Some(format!("{:?}", err));
state.error = Some(format!("{:?}", error));
state.status = ComputeStatus::Failed;
drop(state);
delay_exit = true;
None
// Keep serving HTTP requests, so the cloud control plane was able to
// get the actual error.
info!("giving control plane 30s to collect the error before shutdown");
thread::sleep(Duration::from_secs(30));
info!("shutting down");
Err(error)
}
};
// Wait for the child Postgres process forever. In this state Ctrl+C will
// propagate to Postgres and it will be shut down as well.
if let Some(mut pg) = pg {
// Startup is finished, exit the startup tracing span
drop(startup_context_guard);
let ecode = pg
.wait()
.expect("failed to start waiting on Postgres process");
info!("Postgres exited with code {}, shutting down", ecode);
exit_code = ecode.code()
}
if let Err(err) = compute.check_for_core_dumps() {
error!("error while checking for core dumps: {err:?}");
}
// If launch failed, keep serving HTTP requests for a while, so the cloud
// control plane can get the actual error.
if delay_exit {
info!("giving control plane 30s to collect the error before shutdown");
thread::sleep(Duration::from_secs(30));
}
info!("shutting down tracing");
// Shutdown trace pipeline gracefully, so that it has a chance to send any
// pending traces before we exit.
tracing_utils::shutdown_tracing();
info!("shutting down");
exit(exit_code.unwrap_or(1))
}
fn cli() -> clap::Command {
// Env variable is set by `cargo`
let version = option_env!("CARGO_PKG_VERSION").unwrap_or("unknown");
clap::Command::new("compute_ctl")
.version(version)
.arg(
Arg::new("connstr")
.short('C')
.long("connstr")
.value_name("DATABASE_URL")
.required(true),
)
.arg(
Arg::new("pgdata")
.short('D')
.long("pgdata")
.value_name("DATADIR")
.required(true),
)
.arg(
Arg::new("pgbin")
.short('b')
.long("pgbin")
.default_value("postgres")
.value_name("POSTGRES_PATH"),
)
.arg(
Arg::new("spec")
.short('s')
.long("spec")
.value_name("SPEC_JSON"),
)
.arg(
Arg::new("spec-path")
.short('S')
.long("spec-path")
.value_name("SPEC_PATH"),
)
.arg(
Arg::new("compute-id")
.short('i')
.long("compute-id")
.value_name("COMPUTE_ID"),
)
.arg(
Arg::new("control-plane-uri")
.short('p')
.long("control-plane-uri")
.value_name("CONTROL_PLANE"),
)
}
#[test]
fn verify_cli() {
cli().debug_assert()
}

View File

@@ -1,12 +1,11 @@
use anyhow::{anyhow, Result};
use log::error;
use postgres::Client;
use tokio_postgres::NoTls;
use tracing::{error, instrument};
use crate::compute::ComputeNode;
#[instrument(skip_all)]
pub fn create_writability_check_data(client: &mut Client) -> Result<()> {
pub fn create_writablity_check_data(client: &mut Client) -> Result<()> {
let query = "
CREATE TABLE IF NOT EXISTS health_check (
id serial primary key,
@@ -22,7 +21,6 @@ pub fn create_writability_check_data(client: &mut Client) -> Result<()> {
Ok(())
}
#[instrument(skip_all)]
pub async fn check_writability(compute: &ComputeNode) -> Result<()> {
let (client, connection) = tokio_postgres::connect(compute.connstr.as_str(), NoTls).await?;
if client.is_closed() {

View File

@@ -17,19 +17,17 @@
use std::fs;
use std::os::unix::fs::PermissionsExt;
use std::path::Path;
use std::process::{Command, Stdio};
use std::str::FromStr;
use std::process::{Command, ExitStatus, Stdio};
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::RwLock;
use anyhow::{Context, Result};
use chrono::{DateTime, Utc};
use log::info;
use postgres::{Client, NoTls};
use serde::{Serialize, Serializer};
use tokio_postgres;
use tracing::{info, instrument, warn};
use crate::checker::create_writability_check_data;
use crate::checker::create_writablity_check_data;
use crate::config;
use crate::pg_helpers::*;
use crate::spec::*;
@@ -45,7 +43,6 @@ pub struct ComputeNode {
pub tenant: String,
pub timeline: String,
pub pageserver_connstr: String,
pub storage_auth_token: Option<String>,
pub metrics: ComputeMetrics,
/// Volatile part of the `ComputeNode` so should be used under `RwLock`
/// to allow HTTP API server to serve status requests, while configuration
@@ -94,7 +91,7 @@ pub enum ComputeStatus {
Failed,
}
#[derive(Default, Serialize)]
#[derive(Serialize)]
pub struct ComputeMetrics {
pub sync_safekeepers_ms: AtomicU64,
pub basebackup_ms: AtomicU64,
@@ -102,6 +99,23 @@ pub struct ComputeMetrics {
pub total_startup_ms: AtomicU64,
}
impl ComputeMetrics {
pub fn new() -> Self {
Self {
sync_safekeepers_ms: AtomicU64::new(0),
basebackup_ms: AtomicU64::new(0),
config_ms: AtomicU64::new(0),
total_startup_ms: AtomicU64::new(0),
}
}
}
impl Default for ComputeMetrics {
fn default() -> Self {
Self::new()
}
}
impl ComputeNode {
pub fn set_status(&self, status: ComputeStatus) {
self.state.write().unwrap().status = status;
@@ -124,22 +138,10 @@ impl ComputeNode {
// Get basebackup from the libpq connection to pageserver using `connstr` and
// unarchive it to `pgdata` directory overriding all its previous content.
#[instrument(skip(self))]
fn get_basebackup(&self, lsn: &str) -> Result<()> {
let start_time = Utc::now();
let mut config = postgres::Config::from_str(&self.pageserver_connstr)?;
// Use the storage auth token from the config file, if given.
// Note: this overrides any password set in the connection string.
if let Some(storage_auth_token) = &self.storage_auth_token {
info!("Got storage auth token from spec file");
config.password(storage_auth_token);
} else {
info!("Storage auth token not set");
}
let mut client = config.connect(NoTls)?;
let mut client = Client::connect(&self.pageserver_connstr, NoTls)?;
let basebackup_cmd = match lsn {
"0/0" => format!("basebackup {} {}", &self.tenant, &self.timeline), // First start of the compute
_ => format!("basebackup {} {} {}", &self.tenant, &self.timeline, lsn),
@@ -169,18 +171,12 @@ impl ComputeNode {
// Run `postgres` in a special mode with `--sync-safekeepers` argument
// and return the reported LSN back to the caller.
#[instrument(skip(self))]
fn sync_safekeepers(&self) -> Result<String> {
let start_time = Utc::now();
let sync_handle = Command::new(&self.pgbin)
.args(["--sync-safekeepers"])
.args(&["--sync-safekeepers"])
.env("PGDATA", &self.pgdata) // we cannot use -D in this mode
.envs(if let Some(storage_auth_token) = &self.storage_auth_token {
vec![("NEON_AUTH_TOKEN", storage_auth_token)]
} else {
vec![]
})
.stdout(Stdio::piped())
.spawn()
.expect("postgres --sync-safekeepers failed to start");
@@ -191,13 +187,10 @@ impl ComputeNode {
let sync_output = sync_handle
.wait_with_output()
.expect("postgres --sync-safekeepers failed");
if !sync_output.status.success() {
anyhow::bail!(
"postgres --sync-safekeepers exited with non-zero status: {}. stdout: {}",
"postgres --sync-safekeepers exited with non-zero status: {}",
sync_output.status,
String::from_utf8(sync_output.stdout)
.expect("postgres --sync-safekeepers exited, and stdout is not utf-8"),
);
}
@@ -217,7 +210,6 @@ impl ComputeNode {
/// Do all the preparations like PGDATA directory creation, configuration,
/// safekeepers sync, basebackup, etc.
#[instrument(skip(self))]
pub fn prepare_pgdata(&self) -> Result<()> {
let spec = &self.spec;
let pgdata_path = Path::new(&self.pgdata);
@@ -251,32 +243,30 @@ impl ComputeNode {
/// Start Postgres as a child process and manage DBs/roles.
/// After that this will hang waiting on the postmaster process to exit.
#[instrument(skip(self))]
pub fn start_postgres(&self) -> Result<std::process::Child> {
pub fn run(&self) -> Result<ExitStatus> {
let start_time = Utc::now();
let pgdata_path = Path::new(&self.pgdata);
// Run postgres as a child process.
let mut pg = Command::new(&self.pgbin)
.args(["-D", &self.pgdata])
.envs(if let Some(storage_auth_token) = &self.storage_auth_token {
vec![("NEON_AUTH_TOKEN", storage_auth_token)]
} else {
vec![]
})
.args(&["-D", &self.pgdata])
.spawn()
.expect("cannot start postgres process");
wait_for_postgres(&mut pg, pgdata_path)?;
// Try default Postgres port if it is not provided
let port = self
.spec
.cluster
.settings
.find("port")
.unwrap_or_else(|| "5432".to_string());
wait_for_postgres(&mut pg, &port, pgdata_path)?;
Ok(pg)
}
#[instrument(skip(self))]
pub fn apply_config(&self) -> Result<()> {
// If connection fails,
// it may be the old node with `zenith_admin` superuser.
//
// In this case we need to connect with old `zenith_admin` name
// In this case we need to connect with old `zenith_admin`name
// and create new user. We cannot simply rename connected user,
// but we can create a new one and grant it all privileges.
let mut client = match Client::connect(self.connstr.as_str(), NoTls) {
@@ -302,44 +292,16 @@ impl ComputeNode {
Ok(client) => client,
};
// Proceed with post-startup configuration. Note, that order of operations is important.
handle_roles(&self.spec, &mut client)?;
handle_databases(&self.spec, &mut client)?;
handle_role_deletions(self, &mut client)?;
handle_grants(self, &mut client)?;
create_writability_check_data(&mut client)?;
handle_extensions(&self.spec, &mut client)?;
create_writablity_check_data(&mut client)?;
// 'Close' connection
drop(client);
info!(
"finished configuration of compute for project {}",
self.spec.cluster.cluster_id
);
Ok(())
}
#[instrument(skip(self))]
pub fn start_compute(&self) -> Result<std::process::Child> {
info!(
"starting compute for project {}, operation {}, tenant {}, timeline {}",
self.spec.cluster.cluster_id,
self.spec.operation_uuid.as_ref().unwrap(),
self.tenant,
self.timeline,
);
self.prepare_pgdata()?;
let start_time = Utc::now();
let pg = self.start_postgres()?;
self.apply_config()?;
let startup_end_time = Utc::now();
self.metrics.config_ms.store(
startup_end_time
.signed_duration_since(start_time)
@@ -359,109 +321,30 @@ impl ComputeNode {
self.set_status(ComputeStatus::Running);
Ok(pg)
info!(
"finished configuration of compute for project {}",
self.spec.cluster.cluster_id
);
// Wait for child Postgres process basically forever. In this state Ctrl+C
// will propagate to Postgres and it will be shut down as well.
let ecode = pg
.wait()
.expect("failed to start waiting on Postgres process");
Ok(ecode)
}
// Look for core dumps and collect backtraces.
//
// EKS worker nodes have following core dump settings:
// /proc/sys/kernel/core_pattern -> core
// /proc/sys/kernel/core_uses_pid -> 1
// ulimint -c -> unlimited
// which results in core dumps being written to postgres data directory as core.<pid>.
//
// Use that as a default location and pattern, except macos where core dumps are written
// to /cores/ directory by default.
pub fn check_for_core_dumps(&self) -> Result<()> {
let core_dump_dir = match std::env::consts::OS {
"macos" => Path::new("/cores/"),
_ => Path::new(&self.pgdata),
};
pub fn prepare_and_run(&self) -> Result<ExitStatus> {
info!(
"starting compute for project {}, operation {}, tenant {}, timeline {}",
self.spec.cluster.cluster_id,
self.spec.operation_uuid.as_ref().unwrap(),
self.tenant,
self.timeline,
);
// Collect core dump paths if any
info!("checking for core dumps in {}", core_dump_dir.display());
let files = fs::read_dir(core_dump_dir)?;
let cores = files.filter_map(|entry| {
let entry = entry.ok()?;
let _ = entry.file_name().to_str()?.strip_prefix("core.")?;
Some(entry.path())
});
// Print backtrace for each core dump
for core_path in cores {
warn!(
"core dump found: {}, collecting backtrace",
core_path.display()
);
// Try first with gdb
let backtrace = Command::new("gdb")
.args(["--batch", "-q", "-ex", "bt", &self.pgbin])
.arg(&core_path)
.output();
// Try lldb if no gdb is found -- that is handy for local testing on macOS
let backtrace = match backtrace {
Err(ref e) if e.kind() == std::io::ErrorKind::NotFound => {
warn!("cannot find gdb, trying lldb");
Command::new("lldb")
.arg("-c")
.arg(&core_path)
.args(["--batch", "-o", "bt all", "-o", "quit"])
.output()
}
_ => backtrace,
}?;
warn!(
"core dump backtrace: {}",
String::from_utf8_lossy(&backtrace.stdout)
);
warn!(
"debugger stderr: {}",
String::from_utf8_lossy(&backtrace.stderr)
);
}
Ok(())
}
/// Select `pg_stat_statements` data and return it as a stringified JSON
pub async fn collect_insights(&self) -> String {
let mut result_rows: Vec<String> = Vec::new();
let connect_result = tokio_postgres::connect(self.connstr.as_str(), NoTls).await;
let (client, connection) = connect_result.unwrap();
tokio::spawn(async move {
if let Err(e) = connection.await {
eprintln!("connection error: {}", e);
}
});
let result = client
.simple_query(
"SELECT
row_to_json(pg_stat_statements)
FROM
pg_stat_statements
WHERE
userid != 'cloud_admin'::regrole::oid
ORDER BY
(mean_exec_time + mean_plan_time) DESC
LIMIT 100",
)
.await;
if let Ok(raw_rows) = result {
for message in raw_rows.iter() {
if let postgres::SimpleQueryMessage::Row(row) = message {
if let Some(json) = row.get(0) {
result_rows.push(json.to_string());
}
}
}
format!("{{\"pg_stat_statements\": [{}]}}", result_rows.join(","))
} else {
"{{\"pg_stat_statements\": []}}".to_string()
}
self.prepare_pgdata()?;
self.run()
}
}

View File

@@ -3,23 +3,35 @@ use std::net::SocketAddr;
use std::sync::Arc;
use std::thread;
use crate::compute::ComputeNode;
use anyhow::Result;
use hyper::service::{make_service_fn, service_fn};
use hyper::{Body, Method, Request, Response, Server, StatusCode};
use num_cpus;
use log::{error, info};
use serde_json;
use tracing::{error, info};
use tracing_utils::http::OtelName;
use crate::compute::{ComputeNode, ComputeStatus};
// Service function to handle all available routes.
async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body> {
//
// NOTE: The URI path is currently included in traces. That's OK because
// it doesn't contain any variable parts or sensitive information. But
// please keep that in mind if you change the routing here.
//
async fn routes(req: Request<Body>, compute: Arc<ComputeNode>) -> Response<Body> {
match (req.method(), req.uri().path()) {
// Timestamp of the last Postgres activity in the plain text.
// DEPRECATED in favour of /status
(&Method::GET, "/last_activity") => {
info!("serving /last_active GET request");
let state = compute.state.read().unwrap();
// Use RFC3339 format for consistency.
Response::new(Body::from(state.last_active.to_rfc3339()))
}
// Has compute setup process finished? -> true/false.
// DEPRECATED in favour of /status
(&Method::GET, "/ready") => {
info!("serving /ready GET request");
let status = compute.get_status();
Response::new(Body::from(format!("{}", status == ComputeStatus::Running)))
}
// Serialized compute state.
(&Method::GET, "/status") => {
info!("serving /status GET request");
@@ -34,31 +46,23 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
Response::new(Body::from(serde_json::to_string(&compute.metrics).unwrap()))
}
// Collect Postgres current usage insights
(&Method::GET, "/insights") => {
info!("serving /insights GET request");
let insights = compute.collect_insights().await;
Response::new(Body::from(insights))
}
(&Method::POST, "/check_writability") => {
info!("serving /check_writability POST request");
let res = crate::checker::check_writability(compute).await;
// DEPRECATED, use POST instead
(&Method::GET, "/check_writability") => {
info!("serving /check_writability GET request");
let res = crate::checker::check_writability(&compute).await;
match res {
Ok(_) => Response::new(Body::from("true")),
Err(e) => Response::new(Body::from(e.to_string())),
}
}
(&Method::GET, "/info") => {
let num_cpus = num_cpus::get_physical();
info!("serving /info GET request. num_cpus: {}", num_cpus);
Response::new(Body::from(
serde_json::json!({
"num_cpus": num_cpus,
})
.to_string(),
))
(&Method::POST, "/check_writability") => {
info!("serving /check_writability POST request");
let res = crate::checker::check_writability(&compute).await;
match res {
Ok(_) => Response::new(Body::from("true")),
Err(e) => Response::new(Body::from(e.to_string())),
}
}
// Return the `404 Not Found` for any other routes.
@@ -80,19 +84,7 @@ async fn serve(state: Arc<ComputeNode>) {
async move {
Ok::<_, Infallible>(service_fn(move |req: Request<Body>| {
let state = state.clone();
async move {
Ok::<_, Infallible>(
// NOTE: We include the URI path in the string. It
// doesn't contain any variable parts or sensitive
// information in this API.
tracing_utils::http::tracing_handler(
req,
|req| routes(req, &state),
OtelName::UriPath,
)
.await,
)
}
async move { Ok::<_, Infallible>(routes(req, state).await) }
}))
}
});

View File

@@ -10,12 +10,12 @@ paths:
/status:
get:
tags:
- Info
- "info"
summary: Get compute node internal status
description: ""
operationId: getComputeStatus
responses:
200:
"200":
description: ComputeState
content:
application/json:
@@ -25,58 +25,78 @@ paths:
/metrics.json:
get:
tags:
- Info
- "info"
summary: Get compute node startup metrics in JSON format
description: ""
operationId: getComputeMetricsJSON
responses:
200:
"200":
description: ComputeMetrics
content:
application/json:
schema:
$ref: "#/components/schemas/ComputeMetrics"
/insights:
get:
tags:
- Info
summary: Get current compute insights in JSON format
description: |
Note, that this doesn't include any historical data
operationId: getComputeInsights
responses:
200:
description: Compute insights
content:
application/json:
schema:
$ref: "#/components/schemas/ComputeInsights"
/info:
/ready:
get:
deprecated: true
tags:
- "info"
summary: Get info about the compute Pod/VM
summary: Check whether compute startup process finished successfully
description: ""
operationId: getInfo
operationId: computeIsReady
responses:
"200":
description: Info
description: Compute is ready ('true') or not ('false')
content:
application/json:
text/plain:
schema:
$ref: "#/components/schemas/Info"
type: string
example: "true"
/last_activity:
get:
deprecated: true
tags:
- "info"
summary: Get timestamp of the last compute activity
description: ""
operationId: getLastComputeActivityTS
responses:
"200":
description: Timestamp of the last compute activity
content:
text/plain:
schema:
type: string
example: "2022-10-12T07:20:50.52Z"
/check_writability:
get:
deprecated: true
tags:
- "check"
summary: Check that we can write new data on this compute
description: ""
operationId: checkComputeWritabilityDeprecated
responses:
"200":
description: Check result
content:
text/plain:
schema:
type: string
description: Error text or 'true' if check passed
example: "true"
post:
tags:
- Check
- "check"
summary: Check that we can write new data on this compute
description: ""
operationId: checkComputeWritability
responses:
200:
"200":
description: Check result
content:
text/plain:
@@ -111,15 +131,6 @@ components:
total_startup_ms:
type: integer
Info:
type: object
description: Information about VM/Pod
required:
- num_cpus
properties:
num_cpus:
type: integer
ComputeState:
type: object
required:
@@ -136,15 +147,6 @@ components:
type: string
description: Text of the error during compute startup, if any
ComputeInsights:
type: object
properties:
pg_stat_statements:
description: Contains raw output from pg_stat_statements in JSON format
type: array
items:
type: object
ComputeStatus:
type: string
enum:

View File

@@ -1,37 +1,43 @@
use tracing_opentelemetry::OpenTelemetryLayer;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::prelude::*;
use std::io::Write;
/// Initialize logging to stderr, and OpenTelemetry tracing and exporter.
///
/// Logging is configured using either `default_log_level` or
use anyhow::Result;
use chrono::Utc;
use env_logger::{Builder, Env};
macro_rules! info_println {
($($tts:tt)*) => {
if log_enabled!(Level::Info) {
println!($($tts)*);
}
}
}
macro_rules! info_print {
($($tts:tt)*) => {
if log_enabled!(Level::Info) {
print!($($tts)*);
}
}
}
/// Initialize `env_logger` using either `default_level` or
/// `RUST_LOG` environment variable as default log level.
///
/// OpenTelemetry is configured with OTLP/HTTP exporter. It picks up
/// configuration from environment variables. For example, to change the destination,
/// set `OTEL_EXPORTER_OTLP_ENDPOINT=http://jaeger:4318`. See
/// `tracing-utils` package description.
///
pub fn init_tracing_and_logging(default_log_level: &str) -> anyhow::Result<()> {
// Initialize Logging
let env_filter = tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(default_log_level));
pub fn init_logger(default_level: &str) -> Result<()> {
let env = Env::default().filter_or("RUST_LOG", default_level);
let fmt_layer = tracing_subscriber::fmt::layer()
.with_target(false)
.with_writer(std::io::stderr);
// Initialize OpenTelemetry
let otlp_layer =
tracing_utils::init_tracing_without_runtime("compute_ctl").map(OpenTelemetryLayer::new);
// Put it all together
tracing_subscriber::registry()
.with(env_filter)
.with(otlp_layer)
.with(fmt_layer)
Builder::from_env(env)
.format(|buf, record| {
let thread_handle = std::thread::current();
writeln!(
buf,
"{} [{}] {}: {}",
Utc::now().format("%Y-%m-%d %H:%M:%S%.3f %Z"),
thread_handle.name().unwrap_or("main"),
record.level(),
record.args()
)
})
.init();
tracing::info!("logging and tracing started");
Ok(())
}

View File

@@ -3,8 +3,8 @@ use std::{thread, time};
use anyhow::Result;
use chrono::{DateTime, Utc};
use log::{debug, info};
use postgres::{Client, NoTls};
use tracing::{debug, info};
use crate::compute::ComputeNode;
@@ -52,16 +52,10 @@ fn watch_compute_activity(compute: &ComputeNode) {
let mut idle_backs: Vec<DateTime<Utc>> = vec![];
for b in backs.into_iter() {
let state: String = match b.try_get("state") {
Ok(state) => state,
Err(_) => continue,
};
let state: String = b.get("state");
let change: String = b.get("state_change");
if state == "idle" {
let change: String = match b.try_get("state_change") {
Ok(state_change) => state_change,
Err(_) => continue,
};
let change = DateTime::parse_from_rfc3339(&change);
match change {
Ok(t) => idle_backs.push(t.with_timezone(&Utc)),
@@ -80,8 +74,10 @@ fn watch_compute_activity(compute: &ComputeNode) {
}
}
// Get idle backend `state_change` with the max timestamp.
if let Some(last) = idle_backs.iter().max() {
// Sort idle backend `state_change` timestamps. The last one corresponds
// to the last activity.
idle_backs.sort();
if let Some(last) = idle_backs.last() {
last_active = *last;
}
}

View File

@@ -1,9 +1,3 @@
pub const DEFAULT_LOG_LEVEL: &str = "info";
// From Postgres docs:
// To ease transition from the md5 method to the newer SCRAM method, if md5 is specified
// as a method in pg_hba.conf but the user's password on the server is encrypted for SCRAM
// (see below), then SCRAM-based authentication will automatically be chosen instead.
// https://www.postgresql.org/docs/15/auth-password.html
//
// So it's safe to set md5 here, as `control-plane` anyway uses SCRAM for all roles.
pub const DEFAULT_CONNSTRING: &str = "host=localhost user=postgres";
pub const PG_HBA_ALL_MD5: &str = "host\tall\t\tall\t\t0.0.0.0/0\t\tmd5";

View File

@@ -1,19 +1,18 @@
use std::fmt::Write;
use std::fs;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::net::{SocketAddr, TcpStream};
use std::os::unix::fs::PermissionsExt;
use std::path::Path;
use std::process::Child;
use std::time::{Duration, Instant};
use std::str::FromStr;
use std::{fs, thread, time};
use anyhow::{bail, Result};
use notify::{RecursiveMode, Watcher};
use postgres::{Client, Transaction};
use serde::Deserialize;
use tracing::{debug, instrument};
const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds
const POSTGRES_WAIT_TIMEOUT: u64 = 60 * 1000; // milliseconds
/// Rust representation of Postgres role info with only those fields
/// that matter for us.
@@ -47,23 +46,12 @@ pub struct GenericOption {
/// declare a `trait` on it.
pub type GenericOptions = Option<Vec<GenericOption>>;
/// Escape a string for including it in a SQL literal
fn escape_literal(s: &str) -> String {
s.replace('\'', "''").replace('\\', "\\\\")
}
/// Escape a string so that it can be used in postgresql.conf.
/// Same as escape_literal, currently.
fn escape_conf_value(s: &str) -> String {
s.replace('\'', "''").replace('\\', "\\\\")
}
impl GenericOption {
/// Represent `GenericOption` as SQL statement parameter.
pub fn to_pg_option(&self) -> String {
if let Some(val) = &self.value {
match self.vartype.as_ref() {
"string" => format!("{} '{}'", self.name, escape_literal(val)),
"string" => format!("{} '{}'", self.name, val),
_ => format!("{} {}", self.name, val),
}
} else {
@@ -75,7 +63,7 @@ impl GenericOption {
pub fn to_pg_setting(&self) -> String {
if let Some(val) = &self.value {
match self.vartype.as_ref() {
"string" => format!("{} = '{}'", self.name, escape_conf_value(val)),
"string" => format!("{} = '{}'", self.name, val),
_ => format!("{} = {}", self.name, val),
}
} else {
@@ -111,7 +99,6 @@ impl PgOptionsSerialize for GenericOptions {
.map(|op| op.to_pg_setting())
.collect::<Vec<String>>()
.join("\n")
+ "\n" // newline after last setting
} else {
"".to_string()
}
@@ -125,9 +112,16 @@ pub trait GenericOptionsSearch {
impl GenericOptionsSearch for GenericOptions {
/// Lookup option by name
fn find(&self, name: &str) -> Option<String> {
let ops = self.as_ref()?;
let op = ops.iter().find(|s| s.name == name)?;
op.value.clone()
match &self {
Some(ops) => {
let op = ops.iter().find(|s| s.name == name);
match op {
Some(op) => op.value.clone(),
None => None,
}
}
None => None,
}
}
}
@@ -135,8 +129,8 @@ impl Role {
/// Serialize a list of role parameters into a Postgres-acceptable
/// string of arguments.
pub fn to_pg_options(&self) -> String {
// XXX: consider putting LOGIN as a default option somewhere higher, e.g. in control-plane.
// For now, we do not use generic `options` for roles. Once used, add
// XXX: consider putting LOGIN as a default option somewhere higher, e.g. in Rails.
// For now we do not use generic `options` for roles. Once used, add
// `self.options.as_pg_options()` somewhere here.
let mut params: String = "LOGIN".to_string();
@@ -160,14 +154,6 @@ impl Role {
}
impl Database {
pub fn new(name: PgIdent, owner: PgIdent) -> Self {
Self {
name,
owner,
options: None,
}
}
/// Serialize a list of database parameters into a Postgres-acceptable
/// string of arguments.
/// NB: `TEMPLATE` is actually also an identifier, but so far we only need
@@ -175,7 +161,7 @@ impl Database {
/// it may require a proper quoting too.
pub fn to_pg_options(&self) -> String {
let mut params: String = self.options.as_pg_options();
write!(params, " OWNER {}", &self.owner.pg_quote())
write!(params, " OWNER {}", &self.owner.quote())
.expect("String is documented to not to error during write operations");
params
@@ -186,17 +172,18 @@ impl Database {
/// intended to be used for DB / role names.
pub type PgIdent = String;
/// Generic trait used to provide quoting / encoding for strings used in the
/// Postgres SQL queries and DATABASE_URL.
pub trait Escaping {
fn pg_quote(&self) -> String;
/// Generic trait used to provide quoting for strings used in the
/// Postgres SQL queries. Currently used only to implement quoting
/// of identifiers, but could be used for literals in the future.
pub trait PgQuote {
fn quote(&self) -> String;
}
impl Escaping for PgIdent {
impl PgQuote for PgIdent {
/// This is intended to mimic Postgres quote_ident(), but for simplicity it
/// always quotes provided string with `""` and escapes every `"`.
/// **Not idempotent**, i.e. if string is already escaped it will be escaped again.
fn pg_quote(&self) -> String {
/// always quotes provided string with `""` and escapes every `"`. Not idempotent,
/// i.e. if string is already escaped it will be escaped again.
fn quote(&self) -> String {
let result = format!("\"{}\"", self.replace('"', "\"\""));
result
}
@@ -226,119 +213,62 @@ pub fn get_existing_dbs(client: &mut Client) -> Result<Vec<Database>> {
&[],
)?
.iter()
.map(|row| Database::new(row.get("datname"), row.get("owner")))
.map(|row| Database {
name: row.get("datname"),
owner: row.get("owner"),
options: None,
})
.collect();
Ok(postgres_dbs)
}
/// Wait for Postgres to become ready to accept connections. It's ready to
/// accept connections when the state-field in `pgdata/postmaster.pid` says
/// 'ready'.
#[instrument(skip(pg))]
pub fn wait_for_postgres(pg: &mut Child, pgdata: &Path) -> Result<()> {
/// Wait for Postgres to become ready to accept connections:
/// - state should be `ready` in the `pgdata/postmaster.pid`
/// - and we should be able to connect to 127.0.0.1:5432
pub fn wait_for_postgres(pg: &mut Child, port: &str, pgdata: &Path) -> Result<()> {
let pid_path = pgdata.join("postmaster.pid");
let mut slept: u64 = 0; // ms
let pause = time::Duration::from_millis(100);
// PostgreSQL writes line "ready" to the postmaster.pid file, when it has
// completed initialization and is ready to accept connections. We want to
// react quickly and perform the rest of our initialization as soon as
// PostgreSQL starts accepting connections. Use 'notify' to be notified
// whenever the PID file is changed, and whenever it changes, read it to
// check if it's now "ready".
//
// You cannot actually watch a file before it exists, so we first watch the
// data directory, and once the postmaster.pid file appears, we switch to
// watch the file instead. We also wake up every 100 ms to poll, just in
// case we miss some events for some reason. Not strictly necessary, but
// better safe than sorry.
let (tx, rx) = std::sync::mpsc::channel();
let (mut watcher, rx): (Box<dyn Watcher>, _) = match notify::recommended_watcher(move |res| {
let _ = tx.send(res);
}) {
Ok(watcher) => (Box::new(watcher), rx),
Err(e) => {
match e.kind {
notify::ErrorKind::Io(os) if os.raw_os_error() == Some(38) => {
// docker on m1 macs does not support recommended_watcher
// but return "Function not implemented (os error 38)"
// see https://github.com/notify-rs/notify/issues/423
let (tx, rx) = std::sync::mpsc::channel();
let timeout = time::Duration::from_millis(10);
let addr = SocketAddr::from_str(&format!("127.0.0.1:{}", port)).unwrap();
// let's poll it faster than what we check the results for (100ms)
let config =
notify::Config::default().with_poll_interval(Duration::from_millis(50));
let watcher = notify::PollWatcher::new(
move |res| {
let _ = tx.send(res);
},
config,
)?;
(Box::new(watcher), rx)
}
_ => return Err(e.into()),
}
}
};
watcher.watch(pgdata, RecursiveMode::NonRecursive)?;
let started_at = Instant::now();
let mut postmaster_pid_seen = false;
loop {
// Sleep POSTGRES_WAIT_TIMEOUT at max (a bit longer actually if consider a TCP timeout,
// but postgres starts listening almost immediately, even if it is not really
// ready to accept connections).
if slept >= POSTGRES_WAIT_TIMEOUT {
bail!("timed out while waiting for Postgres to start");
}
if let Ok(Some(status)) = pg.try_wait() {
// Postgres exited, that is not what we expected, bail out earlier.
let code = status.code().unwrap_or(-1);
bail!("Postgres exited unexpectedly with code {}", code);
}
let res = rx.recv_timeout(Duration::from_millis(100));
debug!("woken up by notify: {res:?}");
// If there are multiple events in the channel already, we only need to be
// check once. Swallow the extra events before we go ahead to check the
// pid file.
while let Ok(res) = rx.try_recv() {
debug!("swallowing extra event: {res:?}");
}
// Check that we can open pid file first.
if let Ok(file) = File::open(&pid_path) {
if !postmaster_pid_seen {
debug!("postmaster.pid appeared");
watcher
.unwatch(pgdata)
.expect("Failed to remove pgdata dir watch");
watcher
.watch(&pid_path, RecursiveMode::NonRecursive)
.expect("Failed to add postmaster.pid file watch");
postmaster_pid_seen = true;
}
let file = BufReader::new(file);
let last_line = file.lines().last();
// Pid file could be there and we could read it, but it could be empty, for example.
if let Some(Ok(line)) = last_line {
let status = line.trim();
debug!("last line of postmaster.pid: {status:?}");
let can_connect = TcpStream::connect_timeout(&addr, timeout).is_ok();
// Now Postgres is ready to accept connections
if status == "ready" {
if status == "ready" && can_connect {
break;
}
}
}
// Give up after POSTGRES_WAIT_TIMEOUT.
let duration = started_at.elapsed();
if duration >= POSTGRES_WAIT_TIMEOUT {
bail!("timed out while waiting for Postgres to start");
}
thread::sleep(pause);
slept += 100;
}
tracing::info!("PostgreSQL is now running, continuing to configure it");
Ok(())
}

View File

@@ -1,12 +1,9 @@
use std::collections::HashMap;
use std::path::Path;
use std::str::FromStr;
use anyhow::Result;
use postgres::config::Config;
use log::{info, log_enabled, warn, Level};
use postgres::{Client, NoTls};
use serde::Deserialize;
use tracing::{info, info_span, instrument, span_enabled, warn, Level};
use crate::compute::ComputeNode;
use crate::config;
@@ -23,10 +20,6 @@ pub struct ComputeSpec {
/// Expected cluster state at the end of transition process.
pub cluster: Cluster,
pub delta_operations: Option<Vec<DeltaOp>>,
pub storage_auth_token: Option<String>,
pub startup_tracing_context: Option<HashMap<String, String>>,
}
/// Cluster state seen from the perspective of the external tools
@@ -84,25 +77,23 @@ pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> {
/// Given a cluster spec json and open transaction it handles roles creation,
/// deletion and update.
#[instrument(skip_all)]
pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
let mut xact = client.transaction()?;
let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;
// Print a list of existing Postgres roles (only in debug mode)
if span_enabled!(Level::INFO) {
info!("postgres roles:");
for r in &existing_roles {
info!(
" - {}:{}",
r.name,
if r.encrypted_password.is_some() {
"[FILTERED]"
} else {
"(null)"
}
);
}
info!("postgres roles:");
for r in &existing_roles {
info_println!(
"{} - {}:{}",
" ".repeat(27 + 5),
r.name,
if r.encrypted_password.is_some() {
"[FILTERED]"
} else {
"(null)"
}
);
}
// Process delta operations first
@@ -124,8 +115,8 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
if existing_roles.iter().any(|r| r.name == op.name) {
let query: String = format!(
"ALTER ROLE {} RENAME TO {}",
op.name.pg_quote(),
new_name.pg_quote()
op.name.quote(),
new_name.quote()
);
warn!("renaming role '{}' to '{}'", op.name, new_name);
@@ -143,80 +134,58 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
info!("cluster spec roles:");
for role in &spec.cluster.roles {
let name = &role.name;
// XXX: with a limited number of roles it is fine, but consider making it a HashMap
let pg_role = existing_roles.iter().find(|r| r.name == *name);
enum RoleAction {
None,
Update,
Create,
}
let action = if let Some(r) = pg_role {
if (r.encrypted_password.is_none() && role.encrypted_password.is_some())
|| (r.encrypted_password.is_some() && role.encrypted_password.is_none())
{
RoleAction::Update
} else if let Some(pg_pwd) = &r.encrypted_password {
// Check whether password changed or not (trim 'md5' prefix first if any)
//
// This is a backward compatibility hack, which comes from the times when we were using
// md5 for everyone and hashes were stored in the console db without md5 prefix. So when
// role comes from the control-plane (json spec) `Role.encrypted_password` doesn't have md5 prefix,
// but when role comes from Postgres (`get_existing_roles` / `existing_roles`) it has this prefix.
// Here is the only place so far where we compare hashes, so it seems to be the best candidate
// to place this compatibility layer.
let pg_pwd = if let Some(stripped) = pg_pwd.strip_prefix("md5") {
stripped
} else {
pg_pwd
};
if pg_pwd != *role.encrypted_password.as_ref().unwrap() {
RoleAction::Update
} else {
RoleAction::None
}
} else {
RoleAction::None
}
} else {
RoleAction::Create
};
match action {
RoleAction::None => {}
RoleAction::Update => {
let mut query: String = format!("ALTER ROLE {} ", name.pg_quote());
query.push_str(&role.to_pg_options());
xact.execute(query.as_str(), &[])?;
}
RoleAction::Create => {
let mut query: String = format!("CREATE ROLE {} ", name.pg_quote());
info!("role create query: '{}'", &query);
query.push_str(&role.to_pg_options());
xact.execute(query.as_str(), &[])?;
let grant_query = format!(
"GRANT pg_read_all_data, pg_write_all_data TO {}",
name.pg_quote()
);
xact.execute(grant_query.as_str(), &[])?;
info!("role grant query: '{}'", &grant_query);
}
}
if span_enabled!(Level::INFO) {
let pwd = if role.encrypted_password.is_some() {
info_print!(
"{} - {}:{}",
" ".repeat(27 + 5),
name,
if role.encrypted_password.is_some() {
"[FILTERED]"
} else {
"(null)"
};
let action_str = match action {
RoleAction::None => "",
RoleAction::Create => " -> create",
RoleAction::Update => " -> update",
};
info!(" - {}:{}{}", name, pwd, action_str);
}
);
// XXX: with a limited number of roles it is fine, but consider making it a HashMap
let pg_role = existing_roles.iter().find(|r| r.name == *name);
if let Some(r) = pg_role {
let mut update_role = false;
if (r.encrypted_password.is_none() && role.encrypted_password.is_some())
|| (r.encrypted_password.is_some() && role.encrypted_password.is_none())
{
update_role = true;
} else if let Some(pg_pwd) = &r.encrypted_password {
// Check whether password changed or not (trim 'md5:' prefix first)
update_role = pg_pwd[3..] != *role.encrypted_password.as_ref().unwrap();
}
if update_role {
let mut query: String = format!("ALTER ROLE {} ", name.quote());
info_print!(" -> update");
query.push_str(&role.to_pg_options());
xact.execute(query.as_str(), &[])?;
}
} else {
info!("role name: '{}'", &name);
let mut query: String = format!("CREATE ROLE {} ", name.quote());
info!("role create query: '{}'", &query);
info_print!(" -> create");
query.push_str(&role.to_pg_options());
xact.execute(query.as_str(), &[])?;
let grant_query = format!(
"GRANT pg_read_all_data, pg_write_all_data TO {}",
name.quote()
);
xact.execute(grant_query.as_str(), &[])?;
info!("role grant query: '{}'", &grant_query);
}
info_print!("\n");
}
xact.commit()?;
@@ -225,43 +194,33 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
}
/// Reassign all dependent objects and delete requested roles.
#[instrument(skip_all)]
pub fn handle_role_deletions(node: &ComputeNode, client: &mut Client) -> Result<()> {
if let Some(ops) = &node.spec.delta_operations {
// First, reassign all dependent objects to db owners.
let spec = &node.spec;
// First, reassign all dependent objects to db owners.
if let Some(ops) = &spec.delta_operations {
info!("reassigning dependent objects of to-be-deleted roles");
// Fetch existing roles. We could've exported and used `existing_roles` from
// `handle_roles()`, but we only make this list there before creating new roles.
// Which is probably fine as we never create to-be-deleted roles, but that'd
// just look a bit untidy. Anyway, the entire `pg_roles` should be in shared
// buffers already, so this shouldn't be a big deal.
let mut xact = client.transaction()?;
let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;
xact.commit()?;
for op in ops {
// Check that role is still present in Postgres, as this could be a
// restart with the same spec after role deletion.
if op.action == "delete_role" && existing_roles.iter().any(|r| r.name == op.name) {
if op.action == "delete_role" {
reassign_owned_objects(node, &op.name)?;
}
}
}
// Second, proceed with role deletions.
// Second, proceed with role deletions.
let mut xact = client.transaction()?;
if let Some(ops) = &spec.delta_operations {
info!("processing role deletions");
let mut xact = client.transaction()?;
for op in ops {
// We do not check either role exists or not,
// Postgres will take care of it for us
if op.action == "delete_role" {
let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.pg_quote());
let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.quote());
warn!("deleting role '{}'", &op.name);
xact.execute(query.as_str(), &[])?;
}
}
xact.commit()?;
}
Ok(())
@@ -271,16 +230,17 @@ pub fn handle_role_deletions(node: &ComputeNode, client: &mut Client) -> Result<
fn reassign_owned_objects(node: &ComputeNode, role_name: &PgIdent) -> Result<()> {
for db in &node.spec.cluster.databases {
if db.owner != *role_name {
let mut conf = Config::from_str(node.connstr.as_str())?;
conf.dbname(&db.name);
let mut connstr = node.connstr.clone();
// database name is always the last and the only component of the path
connstr.set_path(&db.name);
let mut client = conf.connect(NoTls)?;
let mut client = Client::connect(connstr.as_str(), NoTls)?;
// This will reassign all dependent objects to the db owner
let reassign_query = format!(
"REASSIGN OWNED BY {} TO {}",
role_name.pg_quote(),
db.owner.pg_quote()
role_name.quote(),
db.owner.quote()
);
info!(
"reassigning objects owned by '{}' in db '{}' to '{}'",
@@ -289,7 +249,7 @@ fn reassign_owned_objects(node: &ComputeNode, role_name: &PgIdent) -> Result<()>
client.simple_query(&reassign_query)?;
// This now will only drop privileges of the role
let drop_query = format!("DROP OWNED BY {}", role_name.pg_quote());
let drop_query = format!("DROP OWNED BY {}", role_name.quote());
client.simple_query(&drop_query)?;
}
}
@@ -302,16 +262,13 @@ fn reassign_owned_objects(node: &ComputeNode, role_name: &PgIdent) -> Result<()>
/// like `CREATE DATABASE` and `DROP DATABASE` do not support it. Statement-level
/// atomicity should be enough here due to the order of operations and various checks,
/// which together provide us idempotency.
#[instrument(skip_all)]
pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
let existing_dbs: Vec<Database> = get_existing_dbs(client)?;
// Print a list of existing Postgres databases (only in debug mode)
if span_enabled!(Level::INFO) {
info!("postgres databases:");
for r in &existing_dbs {
info!(" {}:{}", r.name, r.owner);
}
info!("postgres databases:");
for r in &existing_dbs {
info_println!("{} - {}:{}", " ".repeat(27 + 5), r.name, r.owner);
}
// Process delta operations first
@@ -322,7 +279,7 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
// We do not check either DB exists or not,
// Postgres will take care of it for us
"delete_db" => {
let query: String = format!("DROP DATABASE IF EXISTS {}", &op.name.pg_quote());
let query: String = format!("DROP DATABASE IF EXISTS {}", &op.name.quote());
warn!("deleting database '{}'", &op.name);
client.execute(query.as_str(), &[])?;
@@ -334,8 +291,8 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
if existing_dbs.iter().any(|r| r.name == op.name) {
let query: String = format!(
"ALTER DATABASE {} RENAME TO {}",
op.name.pg_quote(),
new_name.pg_quote()
op.name.quote(),
new_name.quote()
);
warn!("renaming database '{}' to '{}'", op.name, new_name);
@@ -354,60 +311,39 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
for db in &spec.cluster.databases {
let name = &db.name;
info_print!("{} - {}:{}", " ".repeat(27 + 5), db.name, db.owner);
// XXX: with a limited number of databases it is fine, but consider making it a HashMap
let pg_db = existing_dbs.iter().find(|r| r.name == *name);
enum DatabaseAction {
None,
Update,
Create,
}
let action = if let Some(r) = pg_db {
if let Some(r) = pg_db {
// XXX: db owner name is returned as quoted string from Postgres,
// when quoting is needed.
let new_owner = if r.owner.starts_with('"') {
db.owner.pg_quote()
db.owner.quote()
} else {
db.owner.clone()
};
if new_owner != r.owner {
// Update the owner
DatabaseAction::Update
} else {
DatabaseAction::None
}
} else {
DatabaseAction::Create
};
match action {
DatabaseAction::None => {}
DatabaseAction::Update => {
let query: String = format!(
"ALTER DATABASE {} OWNER TO {}",
name.pg_quote(),
db.owner.pg_quote()
name.quote(),
db.owner.quote()
);
let _guard = info_span!("executing", query).entered();
client.execute(query.as_str(), &[])?;
}
DatabaseAction::Create => {
let mut query: String = format!("CREATE DATABASE {} ", name.pg_quote());
query.push_str(&db.to_pg_options());
let _guard = info_span!("executing", query).entered();
client.execute(query.as_str(), &[])?;
}
};
info_print!(" -> update");
if span_enabled!(Level::INFO) {
let action_str = match action {
DatabaseAction::None => "",
DatabaseAction::Create => " -> create",
DatabaseAction::Update => " -> update",
};
info!(" - {}:{}{}", db.name, db.owner, action_str);
client.execute(query.as_str(), &[])?;
}
} else {
let mut query: String = format!("CREATE DATABASE {} ", name.quote());
info_print!(" -> create");
query.push_str(&db.to_pg_options());
client.execute(query.as_str(), &[])?;
}
info_print!("\n");
}
Ok(())
@@ -415,7 +351,6 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
/// Grant CREATE ON DATABASE to the database owner and do some other alters and grants
/// to allow users creating trusted extensions and re-creating `public` schema, for example.
#[instrument(skip_all)]
pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
let spec = &node.spec;
@@ -431,7 +366,7 @@ pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
.cluster
.roles
.iter()
.map(|r| r.name.pg_quote())
.map(|r| r.name.quote())
.collect::<Vec<_>>();
for db in &spec.cluster.databases {
@@ -439,7 +374,7 @@ pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
let query: String = format!(
"GRANT CREATE ON DATABASE {} TO {}",
dbname.pg_quote(),
dbname.quote(),
roles.join(", ")
);
info!("grant query {}", &query);
@@ -450,11 +385,12 @@ pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
// Do some per-database access adjustments. We'd better do this at db creation time,
// but CREATE DATABASE isn't transactional. So we cannot create db + do some grants
// atomically.
let mut db_connstr = node.connstr.clone();
for db in &node.spec.cluster.databases {
let mut conf = Config::from_str(node.connstr.as_str())?;
conf.dbname(&db.name);
// database name is always the last and the only component of the path
db_connstr.set_path(&db.name);
let mut db_client = conf.connect(NoTls)?;
let mut db_client = Client::connect(db_connstr.as_str(), NoTls)?;
// This will only change ownership on the schema itself, not the objects
// inside it. Without it owner of the `public` schema will be `cloud_admin`
@@ -483,51 +419,9 @@ pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
END IF;\n\
END\n\
$$;",
db.owner.pg_quote()
db.owner.quote()
);
db_client.simple_query(&alter_query)?;
// Explicitly grant CREATE ON SCHEMA PUBLIC to the web_access user.
// This is needed because since postgres 15 this privilege is removed by default.
let grant_query = "DO $$\n\
BEGIN\n\
IF EXISTS(\n\
SELECT nspname\n\
FROM pg_catalog.pg_namespace\n\
WHERE nspname = 'public'\n\
) AND\n\
current_setting('server_version_num')::int/10000 >= 15\n\
THEN\n\
IF EXISTS(\n\
SELECT rolname\n\
FROM pg_catalog.pg_roles\n\
WHERE rolname = 'web_access'\n\
)\n\
THEN\n\
GRANT CREATE ON SCHEMA public TO web_access;\n\
END IF;\n\
END IF;\n\
END\n\
$$;"
.to_string();
info!("grant query for db {} : {}", &db.name, &grant_query);
db_client.simple_query(&grant_query)?;
}
Ok(())
}
/// Create required system extensions
#[instrument(skip_all)]
pub fn handle_extensions(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") {
if libs.contains("pg_stat_statements") {
// Create extension only if this compute really needs it
let query = "CREATE EXTENSION IF NOT EXISTS pg_stat_statements";
info!("creating system extensions with query: {}", query);
client.simple_query(query)?;
}
}
Ok(())

View File

@@ -85,7 +85,7 @@
"vartype": "bool"
},
{
"name": "neon.safekeepers",
"name": "safekeepers",
"value": "127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501",
"vartype": "string"
},
@@ -178,14 +178,10 @@
"name": "neon.pageserver_connstring",
"value": "host=127.0.0.1 port=6400",
"vartype": "string"
},
{
"name": "test.escaping",
"value": "here's a backslash \\ and a quote ' and a double-quote \" hooray",
"vartype": "string"
}
]
},
"delta_operations": [
{
"action": "delete_db",

View File

@@ -28,66 +28,14 @@ mod pg_helpers_tests {
assert_eq!(
spec.cluster.settings.as_pg_settings(),
r#"fsync = off
wal_level = replica
hot_standby = on
neon.safekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'
wal_log_hints = on
log_connections = on
shared_buffers = 32768
port = 55432
max_connections = 100
max_wal_senders = 10
listen_addresses = '0.0.0.0'
wal_sender_timeout = 0
password_encryption = md5
maintenance_work_mem = 65536
max_parallel_workers = 8
max_worker_processes = 8
neon.tenant_id = 'b0554b632bd4d547a63b86c3630317e8'
max_replication_slots = 10
neon.timeline_id = '2414a61ffc94e428f14b5758fe308e13'
shared_preload_libraries = 'neon'
synchronous_standby_names = 'walproposer'
neon.pageserver_connstring = 'host=127.0.0.1 port=6400'
test.escaping = 'here''s a backslash \\ and a quote '' and a double-quote " hooray'
"#
"fsync = off\nwal_level = replica\nhot_standby = on\nsafekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'\nwal_log_hints = on\nlog_connections = on\nshared_buffers = 32768\nport = 55432\nmax_connections = 100\nmax_wal_senders = 10\nlisten_addresses = '0.0.0.0'\nwal_sender_timeout = 0\npassword_encryption = md5\nmaintenance_work_mem = 65536\nmax_parallel_workers = 8\nmax_worker_processes = 8\nneon.tenant_id = 'b0554b632bd4d547a63b86c3630317e8'\nmax_replication_slots = 10\nneon.timeline_id = '2414a61ffc94e428f14b5758fe308e13'\nshared_preload_libraries = 'neon'\nsynchronous_standby_names = 'walproposer'\nneon.pageserver_connstring = 'host=127.0.0.1 port=6400'"
);
}
#[test]
fn ident_pg_quote() {
fn quote_ident() {
let ident: PgIdent = PgIdent::from("\"name\";\\n select 1;");
assert_eq!(ident.pg_quote(), "\"\"\"name\"\";\\n select 1;\"");
}
#[test]
fn generic_options_search() {
let generic_options: GenericOptions = Some(vec![
GenericOption {
name: "present_value".into(),
value: Some("value".into()),
vartype: "string".into(),
},
GenericOption {
name: "missed_value".into(),
value: None,
vartype: "int".into(),
},
]);
assert_eq!(generic_options.find("present_value"), Some("value".into()));
assert_eq!(generic_options.find("missed_value"), None);
assert_eq!(generic_options.find("invalid_value"), None);
let empty_generic_options: GenericOptions = Some(vec![]);
assert_eq!(empty_generic_options.find("present_value"), None);
assert_eq!(empty_generic_options.find("missed_value"), None);
assert_eq!(empty_generic_options.find("invalid_value"), None);
let none_generic_options: GenericOptions = None;
assert_eq!(none_generic_options.find("present_value"), None);
assert_eq!(none_generic_options.find("missed_value"), None);
assert_eq!(none_generic_options.find("invalid_value"), None);
assert_eq!(ident.quote(), "\"\"\"name\"\";\\n select 1;\"");
}
}

View File

@@ -1,33 +1,22 @@
[package]
name = "control_plane"
version = "0.1.0"
edition.workspace = true
license.workspace = true
edition = "2021"
[dependencies]
anyhow.workspace = true
clap.workspace = true
comfy-table.workspace = true
git-version.workspace = true
nix.workspace = true
once_cell.workspace = true
postgres.workspace = true
regex.workspace = true
reqwest = { workspace = true, features = ["blocking", "json"] }
serde.workspace = true
serde_json.workspace = true
serde_with.workspace = true
tar.workspace = true
thiserror.workspace = true
toml.workspace = true
url.workspace = true
# Note: Do not directly depend on pageserver or safekeeper; use pageserver_api or safekeeper_api
# instead, so that recompile times are better.
pageserver_api.workspace = true
postgres_backend.workspace = true
safekeeper_api.workspace = true
postgres_connection.workspace = true
storage_broker.workspace = true
utils.workspace = true
tar = "0.4.38"
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
serde = { version = "1.0", features = ["derive"] }
serde_with = "1.12.0"
toml = "0.5"
once_cell = "1.13.0"
regex = "1"
anyhow = "1.0"
thiserror = "1"
nix = "0.23"
reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }
workspace_hack.workspace = true
pageserver = { path = "../pageserver" }
safekeeper = { path = "../safekeeper" }
utils = { path = "../libs/utils" }
workspace_hack = { version = "0.1", path = "../workspace_hack" }

View File

@@ -2,8 +2,7 @@
[pageserver]
listen_pg_addr = '127.0.0.1:64000'
listen_http_addr = '127.0.0.1:9898'
pg_auth_type = 'Trust'
http_auth_type = 'Trust'
auth_type = 'Trust'
[[safekeepers]]
id = 1

Some files were not shown because too many files have changed in this diff Show More