Compare commits

..

1 Commits

Author SHA1 Message Date
Patrick Insinger
32dd786650 pageserver - add naive layer IDs 2021-11-01 00:20:50 -07:00
777 changed files with 29488 additions and 167383 deletions

View File

@@ -1,16 +0,0 @@
# The binaries are really slow, if you compile them in 'dev' mode with the defaults.
# Enable some optimizations even in 'dev' mode, to make tests faster. The basic
# optimizations enabled by "opt-level=1" don't affect debuggability too much.
#
# See https://www.reddit.com/r/rust/comments/gvrgca/this_is_a_neat_trick_for_getting_good_runtime/
#
[profile.dev.package."*"]
# Set the default for dependencies in Development mode.
opt-level = 3
[profile.dev]
# Turn on a small amount of optimization in Development mode.
opt-level = 1
[alias]
build_testing = ["build", "--features", "testing"]

410
.circleci/config.yml Normal file
View File

@@ -0,0 +1,410 @@
version: 2.1
executors:
zenith-build-executor:
resource_class: xlarge
docker:
- image: cimg/rust:1.55.0
zenith-python-executor:
docker:
- image: cimg/python:3.7.10 # Oldest available 3.7 with Ubuntu 20.04 (for GLIBC and Rust) at CirlceCI
jobs:
check-codestyle:
executor: zenith-build-executor
steps:
- checkout
- run:
name: rustfmt
when: always
command: |
cargo fmt --all -- --check
# A job to build postgres
build-postgres:
executor: zenith-build-executor
parameters:
build_type:
type: enum
enum: ["debug", "release"]
environment:
BUILD_TYPE: << parameters.build_type >>
steps:
# Checkout the git repo (circleci doesn't have a flag to enable submodules here)
- checkout
# Grab the postgres git revision to build a cache key.
# Note this works even though the submodule hasn't been checkout out yet.
- run:
name: Get postgres cache key
command: |
git rev-parse HEAD:vendor/postgres > /tmp/cache-key-postgres
- restore_cache:
name: Restore postgres cache
keys:
# Restore ONLY if the rev key matches exactly
- v04-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
# FIXME We could cache our own docker container, instead of installing packages every time.
- run:
name: apt install dependencies
command: |
if [ ! -e tmp_install/bin/postgres ]; then
sudo apt update
sudo apt install build-essential libreadline-dev zlib1g-dev flex bison libseccomp-dev
fi
# Build postgres if the restore_cache didn't find a build.
# `make` can't figure out whether the cache is valid, since
# it only compares file timestamps.
- run:
name: build postgres
command: |
if [ ! -e tmp_install/bin/postgres ]; then
# "depth 1" saves some time by not cloning the whole repo
git submodule update --init --depth 1
make postgres -j8
fi
- save_cache:
name: Save postgres cache
key: v04-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
paths:
- tmp_install
# A job to build zenith rust code
build-zenith:
executor: zenith-build-executor
parameters:
build_type:
type: enum
enum: ["debug", "release"]
steps:
- run:
name: apt install dependencies
command: |
sudo apt update
sudo apt install libssl-dev clang
# Checkout the git repo (without submodules)
- checkout
# Grab the postgres git revision to build a cache key.
# Note this works even though the submodule hasn't been checkout out yet.
- run:
name: Get postgres cache key
command: |
git rev-parse HEAD:vendor/postgres > /tmp/cache-key-postgres
- restore_cache:
name: Restore postgres cache
keys:
# Restore ONLY if the rev key matches exactly
- v04-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
- restore_cache:
name: Restore rust cache
keys:
# Require an exact match. While an out of date cache might speed up the build,
# there's no way to clean out old packages, so the cache grows every time something
# changes.
- v04-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
# Build the rust code, including test binaries
- run:
name: Rust build << parameters.build_type >>
command: |
export CARGO_INCREMENTAL=0
BUILD_TYPE="<< parameters.build_type >>"
if [[ $BUILD_TYPE == "debug" ]]; then
echo "Build in debug mode"
cargo build --bins --tests
elif [[ $BUILD_TYPE == "release" ]]; then
echo "Build in release mode"
cargo build --release --bins --tests
fi
- save_cache:
name: Save rust cache
key: v04-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
paths:
- ~/.cargo/registry
- ~/.cargo/git
- target
# Run style checks
# has to run separately from cargo fmt section
# since needs to run with dependencies
- run:
name: clippy
command: |
./run_clippy.sh
# Run rust unit tests
- run: cargo test
# Install the rust binaries, for use by test jobs
# `--locked` is required; otherwise, `cargo install` will ignore Cargo.lock.
# FIXME: this is a really silly way to install; maybe we should just output
# a tarball as an artifact? Or a .deb package?
- run:
name: cargo install
command: |
export CARGO_INCREMENTAL=0
BUILD_TYPE="<< parameters.build_type >>"
if [[ $BUILD_TYPE == "debug" ]]; then
echo "Install debug mode"
CARGO_FLAGS="--debug"
elif [[ $BUILD_TYPE == "release" ]]; then
echo "Install release mode"
# The default is release mode; there is no --release flag.
CARGO_FLAGS=""
fi
cargo install $CARGO_FLAGS --locked --root /tmp/zenith --path pageserver
cargo install $CARGO_FLAGS --locked --root /tmp/zenith --path walkeeper
cargo install $CARGO_FLAGS --locked --root /tmp/zenith --path zenith
# Install the postgres binaries, for use by test jobs
# FIXME: this is a silly way to do "install"; maybe just output a standard
# postgres package, whatever the favored form is (tarball? .deb package?)
# Note that pg_regress needs some build artifacts that probably aren't
# in the usual package...?
- run:
name: postgres install
command: |
cp -a tmp_install /tmp/zenith/pg_install
# Save the rust output binaries for other jobs in this workflow.
- persist_to_workspace:
root: /tmp/zenith
paths:
- "*"
check-python:
executor: zenith-python-executor
steps:
- checkout
- run:
name: Install deps
working_directory: test_runner
command: pipenv --python 3.7 install --dev
- run:
name: Run yapf to ensure code format
when: always
working_directory: test_runner
command: pipenv run yapf --recursive --diff .
- run:
name: Run mypy to check types
when: always
working_directory: test_runner
command: pipenv run mypy .
run-pytest:
executor: zenith-python-executor
parameters:
# pytest args to specify the tests to run.
#
# This can be a test file name, e.g. 'test_pgbench.py, or a subdirectory,
# or '-k foobar' to run tests containing string 'foobar'. See pytest man page
# section SPECIFYING TESTS / SELECTING TESTS for details.
#
# Select the type of Rust build. Must be "release" or "debug".
build_type:
type: string
default: "debug"
# This parameter is required, to prevent the mistake of running all tests in one job.
test_selection:
type: string
default: ""
# Arbitrary parameters to pytest. For example "-s" to prevent capturing stdout/stderr
extra_params:
type: string
default: ""
needs_postgres_source:
type: boolean
default: false
run_in_parallel:
type: boolean
default: true
steps:
- attach_workspace:
at: /tmp/zenith
- checkout
- when:
condition: << parameters.needs_postgres_source >>
steps:
- run: git submodule update --init --depth 1
- run:
name: Install deps
working_directory: test_runner
command: pipenv --python 3.7 install
- run:
name: Run pytest
working_directory: test_runner
environment:
- ZENITH_BIN: /tmp/zenith/bin
- POSTGRES_DISTRIB_DIR: /tmp/zenith/pg_install
- TEST_OUTPUT: /tmp/test_output
command: |
TEST_SELECTION="<< parameters.test_selection >>"
EXTRA_PARAMS="<< parameters.extra_params >>"
if [ -z "$TEST_SELECTION" ]; then
echo "test_selection must be set"
exit 1
fi
if << parameters.run_in_parallel >>; then
EXTRA_PARAMS="-n4 $EXTRA_PARAMS"
fi;
# Run the tests.
#
# The junit.xml file allows CircleCI to display more fine-grained test information
# in its "Tests" tab in the results page.
# --verbose prints name of each test (helpful when there are
# multiple tests in one file)
# -rA prints summary in the end
# -n4 uses four processes to run tests via pytest-xdist
# -s is not used to prevent pytest from capturing output, because tests are running
# in parallel and logs are mixed between different tests
pipenv run pytest --junitxml=$TEST_OUTPUT/junit.xml --tb=short --verbose -rA $TEST_SELECTION $EXTRA_PARAMS
- run:
# CircleCI artifacts are preserved one file at a time, so skipping
# this step isn't a good idea. If you want to extract the
# pageserver state, perhaps a tarball would be a better idea.
name: Delete all data but logs
when: always
command: |
du -sh /tmp/test_output/*
find /tmp/test_output -type f ! -name "pg.log" ! -name "pageserver.log" ! -name "safekeeper.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" -delete
du -sh /tmp/test_output/*
- store_artifacts:
path: /tmp/test_output
# The store_test_results step tells CircleCI where to find the junit.xml file.
- store_test_results:
path: /tmp/test_output
# Build zenithdb/zenith:latest image and push it to Docker hub
docker-image:
docker:
- image: cimg/base:2021.04
steps:
- checkout
- setup_remote_docker:
docker_layer_caching: true
- run:
name: Init postgres submodule
command: git submodule update --init --depth 1
- run:
name: Build and push Docker image
command: |
echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
docker build -t zenithdb/zenith:latest . && docker push zenithdb/zenith:latest
# Trigger a new remote CI job
remote-ci-trigger:
docker:
- image: cimg/base:2021.04
parameters:
remote_repo:
type: string
environment:
REMOTE_REPO: << parameters.remote_repo >>
steps:
- run:
name: Set PR's status to pending
command: |
LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
curl -f -X POST \
https://api.github.com/repos/$LOCAL_REPO/statuses/$CIRCLE_SHA1 \
-H "Accept: application/vnd.github.v3+json" \
--user "$CI_ACCESS_TOKEN" \
--data \
"{
\"state\": \"pending\",
\"context\": \"zenith-remote-ci\",
\"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
}"
- run:
name: Request a remote CI test
command: |
LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
curl -f -X POST \
https://api.github.com/repos/$REMOTE_REPO/actions/workflows/testing.yml/dispatches \
-H "Accept: application/vnd.github.v3+json" \
--user "$CI_ACCESS_TOKEN" \
--data \
"{
\"ref\": \"main\",
\"inputs\": {
\"ci_job_name\": \"zenith-remote-ci\",
\"commit_hash\": \"$CIRCLE_SHA1\",
\"remote_repo\": \"$LOCAL_REPO\"
}
}"
workflows:
build_and_test:
jobs:
- check-codestyle
- check-python
- build-postgres:
name: build-postgres-<< matrix.build_type >>
matrix:
parameters:
build_type: ["debug", "release"]
- build-zenith:
name: build-zenith-<< matrix.build_type >>
matrix:
parameters:
build_type: ["debug", "release"]
requires:
- build-postgres-<< matrix.build_type >>
- run-pytest:
name: pg_regress-tests-<< matrix.build_type >>
matrix:
parameters:
build_type: ["debug", "release"]
test_selection: batch_pg_regress
needs_postgres_source: true
requires:
- build-zenith-<< matrix.build_type >>
- run-pytest:
name: other-tests-<< matrix.build_type >>
matrix:
parameters:
build_type: ["debug", "release"]
test_selection: batch_others
requires:
- build-zenith-<< matrix.build_type >>
- run-pytest:
name: benchmarks
build_type: release
test_selection: performance
run_in_parallel: false
requires:
- build-zenith-release
- docker-image:
# Context gives an ability to login
context: Docker Hub
# Build image only for commits to main
filters:
branches:
only:
- main
requires:
- pg_regress-tests-release
- other-tests-release
- remote-ci-trigger:
# Context passes credentials for gh api
context: CI_ACCESS_TOKEN
remote_repo: "zenithdb/console"
requires:
# XXX: Successful build doesn't mean everything is OK, but
# the job to be triggered takes so much time to complete (~22 min)
# that it's better not to wait for the commented-out steps
- build-zenith-debug
# - pg_regress-tests-release
# - other-tests-release

View File

@@ -1,26 +0,0 @@
# This file contains settings for `cargo hakari`.
# See https://docs.rs/cargo-hakari/latest/cargo_hakari/config for a full list of options.
hakari-package = "workspace_hack"
# Format for `workspace-hack = ...` lines in other Cargo.tomls. Requires cargo-hakari 0.9.8 or above.
dep-format-version = "4"
# Setting workspace.resolver = "2" in the root Cargo.toml is HIGHLY recommended.
# Hakari works much better with the new feature resolver.
# For more about the new feature resolver, see:
# https://blog.rust-lang.org/2021/03/25/Rust-1.51.0.html#cargos-new-feature-resolver
# Have to keep the resolver still here since hakari requires this field,
# despite it's now the default for 2021 edition & cargo.
resolver = "2"
# Add triples corresponding to platforms commonly used by developers here.
# https://doc.rust-lang.org/rustc/platform-support.html
platforms = [
# "x86_64-unknown-linux-gnu",
# "x86_64-apple-darwin",
# "x86_64-pc-windows-msvc",
]
# Write out exact versions rather than a semver range. (Defaults to false.)
# exact-versions = true

View File

@@ -1,24 +1,18 @@
*
**/.git/
**/__pycache__
**/.pytest_cache
!rust-toolchain.toml
!Cargo.toml
!Cargo.lock
!Makefile
.git
target
tmp_check
tmp_install
tmp_check_cli
test_output
.vscode
.zenith
integration_tests/.zenith
.mypy_cache
Dockerfile
.dockerignore
!.cargo/
!.config/
!control_plane/
!compute_tools/
!libs/
!pageserver/
!pgxn/
!proxy/
!safekeeper/
!storage_broker/
!trace/
!vendor/postgres-v14/
!vendor/postgres-v15/
!workspace_hack/
!neon_local/
!scripts/ninstall.sh
!vm-cgconfig.conf

View File

@@ -1 +0,0 @@
4c2bb43775947775401cbb9d774823c5723a91f8

View File

@@ -1,23 +0,0 @@
---
name: Bug Template
about: Used for describing bugs
title: ''
labels: t/bug
assignees: ''
---
## Steps to reproduce
## Expected result
## Actual result
## Environment
## Logs, links
-

View File

@@ -1,25 +0,0 @@
---
name: Epic Template
about: A set of related tasks contributing towards specific outcome, comprising of
more than 1 week of work.
title: 'Epic: '
labels: t/Epic
assignees: ''
---
## Motivation
## DoD
## Implementation ideas
## Tasks
- [ ]
## Other related tasks and Epics
-

View File

@@ -1,21 +0,0 @@
## Release 202Y-MM-DD
**NB: this PR must be merged only by 'Create a merge commit'!**
### Checklist when preparing for release
- [ ] Read or refresh [the release flow guide](https://github.com/neondatabase/cloud/wiki/Release:-general-flow)
- [ ] Ask in the [cloud Slack channel](https://neondb.slack.com/archives/C033A2WE6BZ) that you are going to rollout the release. Any blockers?
- [ ] Does this release contain any db migrations? Destructive ones? What is the rollback plan?
<!-- List everything that should be done **before** release, any issues / setting changes / etc -->
### Checklist after release
- [ ] Make sure instructions from PRs included in this release and labeled `manual_release_instructions` are executed (either by you or by people who wrote them).
- [ ] Based on the merged commits write release notes and open a PR into `website` repo ([example](https://github.com/neondatabase/website/pull/219/files))
- [ ] Check [#dev-production-stream](https://neondb.slack.com/archives/C03F5SM1N02) Slack channel
- [ ] Check [stuck projects page](https://console.neon.tech/admin/projects?sort=last_active&order=desc&stuck=true)
- [ ] Check [recent operation failures](https://console.neon.tech/admin/operations?action=create_timeline%2Cstart_compute%2Cstop_compute%2Csuspend_compute%2Capply_config%2Cdelete_timeline%2Cdelete_tenant%2Ccreate_branch%2Ccheck_availability&sort=updated_at&order=desc&had_retries=some)
- [ ] Check [cloud SLO dashboard](https://neonprod.grafana.net/d/_oWcBMJ7k/cloud-slos?orgId=1)
- [ ] Check [compute startup metrics dashboard](https://neonprod.grafana.net/d/5OkYJEmVz/compute-startup-time)
<!-- List everything that should be done **after** release, any admin UI configuration / Grafana dashboard / alert changes / setting changes / etc -->

View File

@@ -1,254 +0,0 @@
name: 'Create Allure report'
description: 'Create and publish Allure report'
inputs:
action:
desctiption: 'generate or store'
required: true
build_type:
description: '`build_type` from run-python-test-set action'
required: true
test_selection:
description: '`test_selector` from run-python-test-set action'
required: false
outputs:
report-url:
description: 'Allure report URL'
value: ${{ steps.generate-report.outputs.report-url }}
report-json-url:
description: 'Allure report JSON URL'
value: ${{ steps.generate-report.outputs.report-json-url }}
runs:
using: "composite"
steps:
# We're using some of env variables quite offen, so let's set them once.
#
# It would be nice to have them set in common runs.env[0] section, but it doesn't work[1]
#
# - [0] https://docs.github.com/en/actions/creating-actions/metadata-syntax-for-github-actions#runsenv
# - [1] https://github.com/neondatabase/neon/pull/3907#discussion_r1154703456
#
- name: Set common environment variables
shell: bash -euxo pipefail {0}
run: |
echo "BUILD_TYPE=${BUILD_TYPE}" >> $GITHUB_ENV
echo "BUCKET=${BUCKET}" >> $GITHUB_ENV
echo "TEST_OUTPUT=${TEST_OUTPUT}" >> $GITHUB_ENV
env:
BUILD_TYPE: ${{ inputs.build_type }}
BUCKET: neon-github-public-dev
TEST_OUTPUT: /tmp/test_output
- name: Validate input parameters
shell: bash -euxo pipefail {0}
run: |
if [ "${{ inputs.action }}" != "store" ] && [ "${{ inputs.action }}" != "generate" ]; then
echo >&2 "Unknown inputs.action type '${{ inputs.action }}'; allowed 'generate' or 'store' only"
exit 1
fi
if [ -z "${{ inputs.test_selection }}" ] && [ "${{ inputs.action }}" == "store" ]; then
echo >&2 "inputs.test_selection must be set for 'store' action"
exit 2
fi
- name: Calculate variables
id: calculate-vars
shell: bash -euxo pipefail {0}
run: |
# TODO: for manually triggered workflows (via workflow_dispatch) we need to have a separate key
pr_number=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true)
if [ "${pr_number}" != "null" ]; then
key=pr-${pr_number}
elif [ "${GITHUB_REF_NAME}" = "main" ]; then
# Shortcut for a special branch
key=main
elif [ "${GITHUB_REF_NAME}" = "release" ]; then
# Shortcut for a special branch
key=release
else
key=branch-$(printf "${GITHUB_REF_NAME}" | tr -c "[:alnum:]._-" "-")
fi
echo "KEY=${key}" >> $GITHUB_OUTPUT
# Sanitize test selection to remove `/` and any other special characters
# Use printf instead of echo to avoid having `\n` at the end of the string
test_selection=$(printf "${{ inputs.test_selection }}" | tr -c "[:alnum:]._-" "-" )
echo "TEST_SELECTION=${test_selection}" >> $GITHUB_OUTPUT
- uses: actions/setup-java@v3
if: ${{ inputs.action == 'generate' }}
with:
distribution: 'temurin'
java-version: '17'
- name: Install Allure
if: ${{ inputs.action == 'generate' }}
shell: bash -euxo pipefail {0}
run: |
if ! which allure; then
ALLURE_ZIP=allure-${ALLURE_VERSION}.zip
wget -q https://github.com/allure-framework/allure2/releases/download/${ALLURE_VERSION}/${ALLURE_ZIP}
echo "${ALLURE_ZIP_MD5} ${ALLURE_ZIP}" | md5sum -c
unzip -q ${ALLURE_ZIP}
echo "$(pwd)/allure-${ALLURE_VERSION}/bin" >> $GITHUB_PATH
rm -f ${ALLURE_ZIP}
fi
env:
ALLURE_VERSION: 2.21.0
ALLURE_ZIP_MD5: c8db4dd8e2a7882583d569ed2c82879c
- name: Upload Allure results
if: ${{ inputs.action == 'store' }}
env:
REPORT_PREFIX: reports/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
RAW_PREFIX: reports-raw/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }}
shell: bash -euxo pipefail {0}
run: |
# Add metadata
cat <<EOF > $TEST_OUTPUT/allure/results/executor.json
{
"name": "GitHub Actions",
"type": "github",
"url": "https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/latest/index.html",
"buildOrder": ${GITHUB_RUN_ID},
"buildName": "GitHub Actions Run #${{ github.run_number }}/${GITHUB_RUN_ATTEMPT}",
"buildUrl": "${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/attempts/${GITHUB_RUN_ATTEMPT}",
"reportUrl": "https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html",
"reportName": "Allure Report"
}
EOF
cat <<EOF > $TEST_OUTPUT/allure/results/environment.properties
TEST_SELECTION=${{ inputs.test_selection }}
BUILD_TYPE=${BUILD_TYPE}
EOF
ARCHIVE="${GITHUB_RUN_ID}-${TEST_SELECTION}-${GITHUB_RUN_ATTEMPT}-$(date +%s).tar.zst"
ZSTD_NBTHREADS=0
tar -C ${TEST_OUTPUT}/allure/results -cf ${ARCHIVE} --zstd .
aws s3 mv --only-show-errors ${ARCHIVE} "s3://${BUCKET}/${RAW_PREFIX}/${ARCHIVE}"
# Potentially we could have several running build for the same key (for example for the main branch), so we use improvised lock for this
- name: Acquire Allure lock
if: ${{ inputs.action == 'generate' }}
shell: bash -euxo pipefail {0}
env:
LOCK_FILE: reports/${{ steps.calculate-vars.outputs.KEY }}/lock.txt
TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }}
run: |
LOCK_TIMEOUT=300 # seconds
for _ in $(seq 1 5); do
for i in $(seq 1 ${LOCK_TIMEOUT}); do
LOCK_ADDED=$(aws s3api head-object --bucket neon-github-public-dev --key ${LOCK_FILE} | jq --raw-output '.LastModified' || true)
# `date --date="..."` is supported only by gnu date (i.e. it doesn't work on BSD/macOS)
if [ -z "${LOCK_ADDED}" ] || [ "$(( $(date +%s) - $(date --date="${LOCK_ADDED}" +%s) ))" -gt "${LOCK_TIMEOUT}" ]; then
break
fi
sleep 1
done
echo "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${TEST_SELECTION}" > lock.txt
aws s3 mv --only-show-errors lock.txt "s3://${BUCKET}/${LOCK_FILE}"
# A double-check that exactly WE have acquired the lock
aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt
if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${TEST_SELECTION}" ]; then
break
fi
done
- name: Generate and publish final Allure report
if: ${{ inputs.action == 'generate' }}
id: generate-report
env:
REPORT_PREFIX: reports/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
RAW_PREFIX: reports-raw/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
shell: bash -euxo pipefail {0}
run: |
# Get previously uploaded data for this run
ZSTD_NBTHREADS=0
s3_filepaths=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${RAW_PREFIX}/${GITHUB_RUN_ID}- | jq --raw-output '.Contents[].Key')
if [ -z "$s3_filepaths" ]; then
# There's no previously uploaded data for this run
exit 0
fi
for s3_filepath in ${s3_filepaths}; do
aws s3 cp --only-show-errors "s3://${BUCKET}/${s3_filepath}" "${TEST_OUTPUT}/allure/"
archive=${TEST_OUTPUT}/allure/$(basename $s3_filepath)
mkdir -p ${archive%.tar.zst}
tar -xf ${archive} -C ${archive%.tar.zst}
rm -f ${archive}
done
# Get history trend
aws s3 cp --recursive --only-show-errors "s3://${BUCKET}/${REPORT_PREFIX}/latest/history" "${TEST_OUTPUT}/allure/latest/history" || true
# Generate report
allure generate --clean --output $TEST_OUTPUT/allure/report $TEST_OUTPUT/allure/*
# Replace a logo link with a redirect to the latest version of the report
sed -i 's|<a href="." class=|<a href="https://'${BUCKET}'.s3.amazonaws.com/'${REPORT_PREFIX}'/latest/index.html" class=|g' $TEST_OUTPUT/allure/report/app.js
# Upload a history and the final report (in this particular order to not to have duplicated history in 2 places)
aws s3 mv --recursive --only-show-errors "${TEST_OUTPUT}/allure/report/history" "s3://${BUCKET}/${REPORT_PREFIX}/latest/history"
aws s3 mv --recursive --only-show-errors "${TEST_OUTPUT}/allure/report" "s3://${BUCKET}/${REPORT_PREFIX}/${GITHUB_RUN_ID}"
REPORT_URL=https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html
# Generate redirect
cat <<EOF > ${TEST_OUTPUT}/allure/index.html
<!DOCTYPE html>
<meta charset="utf-8">
<title>Redirecting to ${REPORT_URL}</title>
<meta http-equiv="refresh" content="0; URL=${REPORT_URL}">
EOF
aws s3 cp --only-show-errors ${TEST_OUTPUT}/allure/index.html "s3://${BUCKET}/${REPORT_PREFIX}/latest/index.html"
echo "[Allure Report](${REPORT_URL})" >> ${GITHUB_STEP_SUMMARY}
echo "report-url=${REPORT_URL}" >> $GITHUB_OUTPUT
echo "report-json-url=${REPORT_URL%/index.html}/data/suites.json" >> $GITHUB_OUTPUT
- name: Release Allure lock
if: ${{ inputs.action == 'generate' && always() }}
shell: bash -euxo pipefail {0}
env:
LOCK_FILE: reports/${{ steps.calculate-vars.outputs.KEY }}/lock.txt
TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }}
run: |
aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt || exit 0
if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${TEST_SELECTION}" ]; then
aws s3 rm "s3://${BUCKET}/${LOCK_FILE}"
fi
- name: Cleanup
if: always()
shell: bash -euxo pipefail {0}
run: |
rm -rf ${TEST_OUTPUT}/allure
- uses: actions/github-script@v6
if: ${{ inputs.action == 'generate' && always() }}
env:
REPORT_URL: ${{ steps.generate-report.outputs.report-url }}
SHA: ${{ github.event.pull_request.head.sha || github.sha }}
with:
script: |
const { REPORT_URL, BUILD_TYPE, SHA } = process.env
await github.rest.repos.createCommitStatus({
owner: context.repo.owner,
repo: context.repo.repo,
sha: `${SHA}`,
state: 'success',
target_url: `${REPORT_URL}`,
context: `Allure report / ${BUILD_TYPE}`,
})

View File

@@ -1,59 +0,0 @@
name: "Download an artifact"
description: "Custom download action"
inputs:
name:
description: "Artifact name"
required: true
path:
description: "A directory to put artifact into"
default: "."
required: false
skip-if-does-not-exist:
description: "Allow to skip if file doesn't exist, fail otherwise"
default: false
required: false
prefix:
description: "S3 prefix. Default is '${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
required: false
runs:
using: "composite"
steps:
- name: Download artifact
id: download-artifact
shell: bash -euxo pipefail {0}
env:
TARGET: ${{ inputs.path }}
ARCHIVE: /tmp/downloads/${{ inputs.name }}.tar.zst
SKIP_IF_DOES_NOT_EXIST: ${{ inputs.skip-if-does-not-exist }}
PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}', github.run_id, github.run_attempt) }}
run: |
BUCKET=neon-github-public-dev
FILENAME=$(basename $ARCHIVE)
S3_KEY=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${PREFIX%$GITHUB_RUN_ATTEMPT} | jq -r '.Contents[].Key' | grep ${FILENAME} | sort --version-sort | tail -1 || true)
if [ -z "${S3_KEY}" ]; then
if [ "${SKIP_IF_DOES_NOT_EXIST}" = "true" ]; then
echo 'SKIPPED=true' >> $GITHUB_OUTPUT
exit 0
else
echo >&2 "Neither s3://${BUCKET}/${PREFIX}/${FILENAME} nor its version from previous attempts exist"
exit 1
fi
fi
echo 'SKIPPED=false' >> $GITHUB_OUTPUT
mkdir -p $(dirname $ARCHIVE)
time aws s3 cp --only-show-errors s3://${BUCKET}/${S3_KEY} ${ARCHIVE}
- name: Extract artifact
if: ${{ steps.download-artifact.outputs.SKIPPED == 'false' }}
shell: bash -euxo pipefail {0}
env:
TARGET: ${{ inputs.path }}
ARCHIVE: /tmp/downloads/${{ inputs.name }}.tar.zst
run: |
mkdir -p ${TARGET}
time tar -xf ${ARCHIVE} -C ${TARGET}
rm -f ${ARCHIVE}

View File

@@ -1,138 +0,0 @@
name: 'Create Branch'
description: 'Create Branch using API'
inputs:
api_key:
desctiption: 'Neon API key'
required: true
project_id:
desctiption: 'ID of the Project to create Branch in'
required: true
api_host:
desctiption: 'Neon API host'
default: console.stage.neon.tech
outputs:
dsn:
description: 'Created Branch DSN (for main database)'
value: ${{ steps.change-password.outputs.dsn }}
branch_id:
description: 'Created Branch ID'
value: ${{ steps.create-branch.outputs.branch_id }}
runs:
using: "composite"
steps:
- name: Create New Branch
id: create-branch
shell: bash -euxo pipefail {0}
run: |
for i in $(seq 1 10); do
branch=$(curl \
"https://${API_HOST}/api/v2/projects/${PROJECT_ID}/branches" \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}" \
--data "{
\"branch\": {
\"name\": \"Created by actions/neon-branch-create; GITHUB_RUN_ID=${GITHUB_RUN_ID} at $(date +%s)\"
},
\"endpoints\": [
{
\"type\": \"read_write\"
}
]
}")
if [ -z "${branch}" ]; then
sleep 1
continue
fi
branch_id=$(echo $branch | jq --raw-output '.branch.id')
if [ "${branch_id}" == "null" ]; then
sleep 1
continue
fi
break
done
if [ -z "${branch_id}" ] || [ "${branch_id}" == "null" ]; then
echo >&2 "Failed to create branch after 10 attempts, the latest response was: ${branch}"
exit 1
fi
branch_id=$(echo $branch | jq --raw-output '.branch.id')
echo "branch_id=${branch_id}" >> $GITHUB_OUTPUT
host=$(echo $branch | jq --raw-output '.endpoints[0].host')
echo "host=${host}" >> $GITHUB_OUTPUT
env:
API_HOST: ${{ inputs.api_host }}
API_KEY: ${{ inputs.api_key }}
PROJECT_ID: ${{ inputs.project_id }}
- name: Get Role name
id: role-name
shell: bash -euxo pipefail {0}
run: |
roles=$(curl \
"https://${API_HOST}/api/v2/projects/${PROJECT_ID}/branches/${BRANCH_ID}/roles" \
--fail \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}"
)
role_name=$(echo $roles | jq --raw-output '.roles[] | select(.protected == false) | .name')
echo "role_name=${role_name}" >> $GITHUB_OUTPUT
env:
API_HOST: ${{ inputs.api_host }}
API_KEY: ${{ inputs.api_key }}
PROJECT_ID: ${{ inputs.project_id }}
BRANCH_ID: ${{ steps.create-branch.outputs.branch_id }}
- name: Change Password
id: change-password
# A shell without `set -x` to not to expose password/dsn in logs
shell: bash -euo pipefail {0}
run: |
for i in $(seq 1 10); do
reset_password=$(curl \
"https://${API_HOST}/api/v2/projects/${PROJECT_ID}/branches/${BRANCH_ID}/roles/${ROLE_NAME}/reset_password" \
--request POST \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}"
)
if [ -z "${reset_password}" ]; then
sleep 1
continue
fi
password=$(echo $reset_password | jq --raw-output '.role.password')
if [ "${password}" == "null" ]; then
sleep 1
continue
fi
echo "::add-mask::${password}"
break
done
if [ -z "${password}" ] || [ "${password}" == "null" ]; then
echo >&2 "Failed to reset password after 10 attempts, the latest response was: ${reset_password}"
exit 1
fi
dsn="postgres://${ROLE_NAME}:${password}@${HOST}/neondb"
echo "::add-mask::${dsn}"
echo "dsn=${dsn}" >> $GITHUB_OUTPUT
env:
API_HOST: ${{ inputs.api_host }}
API_KEY: ${{ inputs.api_key }}
PROJECT_ID: ${{ inputs.project_id }}
BRANCH_ID: ${{ steps.create-branch.outputs.branch_id }}
ROLE_NAME: ${{ steps.role-name.outputs.role_name }}
HOST: ${{ steps.create-branch.outputs.host }}

View File

@@ -1,58 +0,0 @@
name: 'Delete Branch'
description: 'Delete Branch using API'
inputs:
api_key:
desctiption: 'Neon API key'
required: true
project_id:
desctiption: 'ID of the Project which should be deleted'
required: true
branch_id:
desctiption: 'ID of the branch to delete'
required: true
api_host:
desctiption: 'Neon API host'
default: console.stage.neon.tech
runs:
using: "composite"
steps:
- name: Delete Branch
# Do not try to delete a branch if .github/actions/neon-project-create
# or .github/actions/neon-branch-create failed before
if: ${{ inputs.project_id != '' && inputs.branch_id != '' }}
shell: bash -euxo pipefail {0}
run: |
for i in $(seq 1 10); do
deleted_branch=$(curl \
"https://${API_HOST}/api/v2/projects/${PROJECT_ID}/branches/${BRANCH_ID}" \
--request DELETE \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}"
)
if [ -z "${deleted_branch}" ]; then
sleep 1
continue
fi
branch_id=$(echo $deleted_branch | jq --raw-output '.branch.id')
if [ "${branch_id}" == "null" ]; then
sleep 1
continue
fi
break
done
if [ -z "${branch_id}" ] || [ "${branch_id}" == "null" ]; then
echo >&2 "Failed to delete branch after 10 attempts, the latest response was: ${deleted_branch}"
exit 1
fi
env:
API_HOST: ${{ inputs.api_host }}
API_KEY: ${{ inputs.api_key }}
PROJECT_ID: ${{ inputs.project_id }}
BRANCH_ID: ${{ inputs.branch_id }}

View File

@@ -1,80 +0,0 @@
name: 'Create Neon Project'
description: 'Create Neon Project using API'
inputs:
api_key:
desctiption: 'Neon API key'
required: true
region_id:
desctiption: 'Region ID, if not set the project will be created in the default region'
default: aws-us-east-2
postgres_version:
desctiption: 'Postgres version; default is 15'
default: 15
api_host:
desctiption: 'Neon API host'
default: console.stage.neon.tech
provisioner:
desctiption: 'k8s-pod or k8s-neonvm'
default: 'k8s-pod'
compute_units:
desctiption: '[Min, Max] compute units; Min and Max are used for k8s-neonvm with autoscaling, for k8s-pod values Min and Max should be equal'
default: '[1, 1]'
outputs:
dsn:
description: 'Created Project DSN (for main database)'
value: ${{ steps.create-neon-project.outputs.dsn }}
project_id:
description: 'Created Project ID'
value: ${{ steps.create-neon-project.outputs.project_id }}
runs:
using: "composite"
steps:
- name: Create Neon Project
id: create-neon-project
# A shell without `set -x` to not to expose password/dsn in logs
shell: bash -euo pipefail {0}
run: |
if [ "${PROVISIONER}" == "k8s-pod" ] && [ "${MIN_CU}" != "${MAX_CU}" ]; then
echo >&2 "For k8s-pod provisioner MIN_CU should be equal to MAX_CU"
fi
project=$(curl \
"https://${API_HOST}/api/v2/projects" \
--fail \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}" \
--data "{
\"project\": {
\"name\": \"Created by actions/neon-project-create; GITHUB_RUN_ID=${GITHUB_RUN_ID}\",
\"pg_version\": ${POSTGRES_VERSION},
\"region_id\": \"${REGION_ID}\",
\"provisioner\": \"${PROVISIONER}\",
\"autoscaling_limit_min_cu\": ${MIN_CU},
\"autoscaling_limit_max_cu\": ${MAX_CU},
\"settings\": { }
}
}")
# Mask password
echo "::add-mask::$(echo $project | jq --raw-output '.roles[] | select(.name != "web_access") | .password')"
dsn=$(echo $project | jq --raw-output '.connection_uris[0].connection_uri')
echo "::add-mask::${dsn}"
echo "dsn=${dsn}" >> $GITHUB_OUTPUT
project_id=$(echo $project | jq --raw-output '.project.id')
echo "project_id=${project_id}" >> $GITHUB_OUTPUT
echo "Project ${project_id} has been created"
env:
API_HOST: ${{ inputs.api_host }}
API_KEY: ${{ inputs.api_key }}
REGION_ID: ${{ inputs.region_id }}
POSTGRES_VERSION: ${{ inputs.postgres_version }}
PROVISIONER: ${{ inputs.provisioner }}
MIN_CU: ${{ fromJSON(inputs.compute_units)[0] }}
MAX_CU: ${{ fromJSON(inputs.compute_units)[1] }}

View File

@@ -1,35 +0,0 @@
name: 'Delete Neon Project'
description: 'Delete Neon Project using API'
inputs:
api_key:
desctiption: 'Neon API key'
required: true
project_id:
desctiption: 'ID of the Project to delete'
required: true
api_host:
desctiption: 'Neon API host'
default: console.stage.neon.tech
runs:
using: "composite"
steps:
- name: Delete Neon Project
# Do not try to delete a project if .github/actions/neon-project-create failed before
if: ${{ inputs.project_id != '' }}
shell: bash -euxo pipefail {0}
run: |
curl \
"https://${API_HOST}/api/v2/projects/${PROJECT_ID}" \
--fail \
--request DELETE \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}"
echo "Project ${PROJECT_ID} has been deleted"
env:
API_HOST: ${{ inputs.api_host }}
API_KEY: ${{ inputs.api_key }}
PROJECT_ID: ${{ inputs.project_id }}

View File

@@ -1,210 +0,0 @@
name: 'Run python test'
description: 'Runs a Neon python test set, performing all the required preparations before'
inputs:
build_type:
description: 'Type of Rust (neon) and C (postgres) builds. Must be "release" or "debug", or "remote" for the remote cluster'
required: true
test_selection:
description: 'A python test suite to run'
required: true
extra_params:
description: 'Arbitrary parameters to pytest. For example "-s" to prevent capturing stdout/stderr'
required: false
default: ''
needs_postgres_source:
description: 'Set to true if the test suite requires postgres source checked out'
required: false
default: 'false'
run_in_parallel:
description: 'Whether to run tests in parallel'
required: false
default: 'true'
save_perf_report:
description: 'Whether to upload the performance report, if true PERF_TEST_RESULT_CONNSTR env variable should be set'
required: false
default: 'false'
run_with_real_s3:
description: 'Whether to pass real s3 credentials to the test suite'
required: false
default: 'false'
real_s3_bucket:
description: 'Bucket name for real s3 tests'
required: false
default: ''
real_s3_region:
description: 'Region name for real s3 tests'
required: false
default: ''
real_s3_access_key_id:
description: 'Access key id'
required: false
default: ''
real_s3_secret_access_key:
description: 'Secret access key'
required: false
default: ''
rerun_flaky:
description: 'Whether to rerun flaky tests'
required: false
default: 'false'
runs:
using: "composite"
steps:
- name: Get Neon artifact
if: inputs.build_type != 'remote'
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ inputs.build_type }}-artifact
path: /tmp/neon
- name: Download Neon binaries for the previous release
if: inputs.build_type != 'remote'
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ inputs.build_type }}-artifact
path: /tmp/neon-previous
prefix: latest
- name: Download compatibility snapshot for Postgres 14
if: inputs.build_type != 'remote'
uses: ./.github/actions/download
with:
name: compatibility-snapshot-${{ inputs.build_type }}-pg14
path: /tmp/compatibility_snapshot_pg14
prefix: latest
- name: Checkout
if: inputs.needs_postgres_source == 'true'
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 1
- name: Cache poetry deps
id: cache_poetry
uses: actions/cache@v3
with:
path: ~/.cache/pypoetry/virtualenvs
key: v1-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
- name: Install Python deps
shell: bash -euxo pipefail {0}
run: ./scripts/pysync
- name: Run pytest
env:
NEON_BIN: /tmp/neon/bin
COMPATIBILITY_NEON_BIN: /tmp/neon-previous/bin
COMPATIBILITY_POSTGRES_DISTRIB_DIR: /tmp/neon-previous/pg_install
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: ${{ inputs.build_type }}
AWS_ACCESS_KEY_ID: ${{ inputs.real_s3_access_key_id }}
AWS_SECRET_ACCESS_KEY: ${{ inputs.real_s3_secret_access_key }}
COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg14
ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'backward compatibility breakage')
ALLOW_FORWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'forward compatibility breakage')
RERUN_FLAKY: ${{ inputs.rerun_flaky }}
shell: bash -euxo pipefail {0}
run: |
# PLATFORM will be embedded in the perf test report
# and it is needed to distinguish different environments
export PLATFORM=${PLATFORM:-github-actions-selfhosted}
export POSTGRES_DISTRIB_DIR=${POSTGRES_DISTRIB_DIR:-/tmp/neon/pg_install}
export DEFAULT_PG_VERSION=${DEFAULT_PG_VERSION:-14}
if [ "${BUILD_TYPE}" = "remote" ]; then
export REMOTE_ENV=1
fi
PERF_REPORT_DIR="$(realpath test_runner/perf-report-local)"
rm -rf $PERF_REPORT_DIR
TEST_SELECTION="test_runner/${{ inputs.test_selection }}"
EXTRA_PARAMS="${{ inputs.extra_params }}"
if [ -z "$TEST_SELECTION" ]; then
echo "test_selection must be set"
exit 1
fi
if [[ "${{ inputs.run_in_parallel }}" == "true" ]]; then
# -n16 uses sixteen processes to run tests via pytest-xdist
EXTRA_PARAMS="-n16 $EXTRA_PARAMS"
# --dist=loadgroup points tests marked with @pytest.mark.xdist_group
# to the same worker to make @pytest.mark.order work with xdist
EXTRA_PARAMS="--dist=loadgroup $EXTRA_PARAMS"
fi
if [[ "${{ inputs.run_with_real_s3 }}" == "true" ]]; then
echo "REAL S3 ENABLED"
export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
export REMOTE_STORAGE_S3_BUCKET=${{ inputs.real_s3_bucket }}
export REMOTE_STORAGE_S3_REGION=${{ inputs.real_s3_region }}
fi
if [[ "${{ inputs.save_perf_report }}" == "true" ]]; then
mkdir -p "$PERF_REPORT_DIR"
EXTRA_PARAMS="--out-dir $PERF_REPORT_DIR $EXTRA_PARAMS"
fi
if [ "${RERUN_FLAKY}" == "true" ]; then
mkdir -p $TEST_OUTPUT
poetry run ./scripts/flaky_tests.py "${TEST_RESULT_CONNSTR}" --days 10 --output "$TEST_OUTPUT/flaky.json"
EXTRA_PARAMS="--flaky-tests-json $TEST_OUTPUT/flaky.json $EXTRA_PARAMS"
fi
if [[ "${{ inputs.build_type }}" == "debug" ]]; then
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
elif [[ "${{ inputs.build_type }}" == "release" ]]; then
cov_prefix=()
else
cov_prefix=()
fi
# Wake up the cluster if we use remote neon instance
if [ "${{ inputs.build_type }}" = "remote" ] && [ -n "${BENCHMARK_CONNSTR}" ]; then
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/psql ${BENCHMARK_CONNSTR} -c "SELECT version();"
fi
# Run the tests.
#
# The junit.xml file allows CI tools to display more fine-grained test information
# in its "Tests" tab in the results page.
# --verbose prints name of each test (helpful when there are
# multiple tests in one file)
# -rA prints summary in the end
# -s is not used to prevent pytest from capturing output, because tests are running
# in parallel and logs are mixed between different tests
#
mkdir -p $TEST_OUTPUT/allure/results
"${cov_prefix[@]}" ./scripts/pytest \
--junitxml=$TEST_OUTPUT/junit.xml \
--alluredir=$TEST_OUTPUT/allure/results \
--tb=short \
--verbose \
-rA $TEST_SELECTION $EXTRA_PARAMS
if [[ "${{ inputs.save_perf_report }}" == "true" ]]; then
export REPORT_FROM="$PERF_REPORT_DIR"
export REPORT_TO="$PLATFORM"
scripts/generate_and_push_perf_report.sh
fi
- name: Upload compatibility snapshot for Postgres 14
if: github.ref_name == 'release'
uses: ./.github/actions/upload
with:
name: compatibility-snapshot-${{ inputs.build_type }}-pg14-${{ github.run_id }}
# The path includes a test name (test_create_snapshot) and directory that the test creates (compatibility_snapshot_pg14), keep the path in sync with the test
path: /tmp/test_output/test_create_snapshot/compatibility_snapshot_pg14/
prefix: latest
- name: Create Allure report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report
with:
action: store
build_type: ${{ inputs.build_type }}
test_selection: ${{ inputs.test_selection }}

View File

@@ -1,22 +0,0 @@
name: 'Merge and upload coverage data'
description: 'Compresses and uploads the coverage data as an artifact'
runs:
using: "composite"
steps:
- name: Merge coverage data
shell: bash -euxo pipefail {0}
run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
- name: Download previous coverage data into the same directory
uses: ./.github/actions/download
with:
name: coverage-data-artifact
path: /tmp/coverage
skip-if-does-not-exist: true # skip if there's no previous coverage to download
- name: Upload coverage data
uses: ./.github/actions/upload
with:
name: coverage-data-artifact
path: /tmp/coverage

View File

@@ -1,58 +0,0 @@
name: "Upload an artifact"
description: "Custom upload action"
inputs:
name:
description: "Artifact name"
required: true
path:
description: "A directory or file to upload"
required: true
prefix:
description: "S3 prefix. Default is '${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
required: false
runs:
using: "composite"
steps:
- name: Prepare artifact
shell: bash -euxo pipefail {0}
env:
SOURCE: ${{ inputs.path }}
ARCHIVE: /tmp/uploads/${{ inputs.name }}.tar.zst
run: |
mkdir -p $(dirname $ARCHIVE)
if [ -f ${ARCHIVE} ]; then
echo >&2 "File ${ARCHIVE} already exist. Something went wrong before"
exit 1
fi
ZSTD_NBTHREADS=0
if [ -d ${SOURCE} ]; then
time tar -C ${SOURCE} -cf ${ARCHIVE} --zstd .
elif [ -f ${SOURCE} ]; then
time tar -cf ${ARCHIVE} --zstd ${SOURCE}
elif ! ls ${SOURCE} > /dev/null 2>&1; then
echo >&2 "${SOURCE} does not exist"
exit 2
else
echo >&2 "${SOURCE} is neither a directory nor a file, do not know how to handle it"
exit 3
fi
- name: Upload artifact
shell: bash -euxo pipefail {0}
env:
SOURCE: ${{ inputs.path }}
ARCHIVE: /tmp/uploads/${{ inputs.name }}.tar.zst
PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}', github.run_id, github.run_attempt) }}
run: |
BUCKET=neon-github-public-dev
FILENAME=$(basename $ARCHIVE)
FILESIZE=$(du -sh ${ARCHIVE} | cut -f1)
time aws s3 mv --only-show-errors ${ARCHIVE} s3://${BUCKET}/${PREFIX}/${FILENAME}
# Ref https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#adding-a-job-summary
echo "[${FILENAME}](https://${BUCKET}.s3.amazonaws.com/${PREFIX}/${FILENAME}) ${FILESIZE}" >> ${GITHUB_STEP_SUMMARY}

View File

@@ -1,5 +0,0 @@
neon_install.tar.gz
.neon_current_version
collections/*
!collections/.keep

View File

@@ -1,12 +0,0 @@
[defaults]
localhost_warning = False
host_key_checking = False
timeout = 30
[ssh_connection]
ssh_args = -F ./ansible.ssh.cfg
# teleport doesn't support sftp yet https://github.com/gravitational/teleport/issues/7127
# and scp neither worked for me
transfer_method = piped
pipelining = True

View File

@@ -1,15 +0,0 @@
# Remove this once https://github.com/gravitational/teleport/issues/10918 is fixed
# (use pre 8.5 option name to cope with old ssh in CI)
PubkeyAcceptedKeyTypes +ssh-rsa-cert-v01@openssh.com
Host tele.zenith.tech
User admin
Port 3023
StrictHostKeyChecking no
UserKnownHostsFile /dev/null
Host * !tele.zenith.tech
User admin
StrictHostKeyChecking no
UserKnownHostsFile /dev/null
ProxyJump tele.zenith.tech

View File

View File

@@ -1,211 +0,0 @@
- name: Upload Neon binaries
hosts: storage
gather_facts: False
remote_user: "{{ remote_user }}"
tasks:
- name: get latest version of Neon binaries
register: current_version_file
set_fact:
current_version: "{{ lookup('file', '.neon_current_version') | trim }}"
tags:
- pageserver
- safekeeper
- name: inform about versions
debug:
msg: "Version to deploy - {{ current_version }}"
tags:
- pageserver
- safekeeper
- name: upload and extract Neon binaries to /usr/local
ansible.builtin.unarchive:
owner: root
group: root
src: neon_install.tar.gz
dest: /usr/local
become: true
tags:
- pageserver
- safekeeper
- binaries
- putbinaries
- name: Deploy pageserver
hosts: pageservers
gather_facts: False
remote_user: "{{ remote_user }}"
tasks:
- name: upload init script
when: console_mgmt_base_url is defined
ansible.builtin.template:
src: scripts/init_pageserver.sh
dest: /tmp/init_pageserver.sh
owner: root
group: root
mode: '0755'
become: true
tags:
- pageserver
- name: init pageserver
shell:
cmd: /tmp/init_pageserver.sh
args:
creates: "/storage/pageserver/data/tenants"
environment:
NEON_REPO_DIR: "/storage/pageserver/data"
LD_LIBRARY_PATH: "/usr/local/v14/lib"
become: true
tags:
- pageserver
- name: read the existing remote pageserver config
ansible.builtin.slurp:
src: /storage/pageserver/data/pageserver.toml
register: _remote_ps_config
tags:
- pageserver
- name: parse the existing pageserver configuration
ansible.builtin.set_fact:
_existing_ps_config: "{{ _remote_ps_config['content'] | b64decode | sivel.toiletwater.from_toml }}"
tags:
- pageserver
- name: construct the final pageserver configuration dict
ansible.builtin.set_fact:
pageserver_config: "{{ pageserver_config_stub | combine({'id': _existing_ps_config.id }) }}"
tags:
- pageserver
- name: template the pageserver config
template:
src: templates/pageserver.toml.j2
dest: /storage/pageserver/data/pageserver.toml
become: true
tags:
- pageserver
# used in `pageserver.service` template
- name: learn current availability_zone
shell:
cmd: "curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone"
register: ec2_availability_zone
- set_fact:
ec2_availability_zone={{ ec2_availability_zone.stdout }}
- name: upload systemd service definition
ansible.builtin.template:
src: systemd/pageserver.service
dest: /etc/systemd/system/pageserver.service
owner: root
group: root
mode: '0644'
become: true
tags:
- pageserver
- name: start systemd service
ansible.builtin.systemd:
daemon_reload: yes
name: pageserver
enabled: yes
state: restarted
become: true
tags:
- pageserver
- name: post version to console
when: console_mgmt_base_url is defined
shell:
cmd: |
INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
curl -sfS -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/pageservers/$INSTANCE_ID | jq '.version = {{ current_version }}' > /tmp/new_version
curl -sfS -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" -H "Content-Type: application/json" -X POST -d@/tmp/new_version {{ console_mgmt_base_url }}/management/api/v2/pageservers
tags:
- pageserver
- name: Deploy safekeeper
hosts: safekeepers
gather_facts: False
remote_user: "{{ remote_user }}"
tasks:
- name: upload init script
when: console_mgmt_base_url is defined
ansible.builtin.template:
src: scripts/init_safekeeper.sh
dest: /tmp/init_safekeeper.sh
owner: root
group: root
mode: '0755'
become: true
tags:
- safekeeper
- name: init safekeeper
shell:
cmd: /tmp/init_safekeeper.sh
args:
creates: "/storage/safekeeper/data/safekeeper.id"
environment:
NEON_REPO_DIR: "/storage/safekeeper/data"
LD_LIBRARY_PATH: "/usr/local/v14/lib"
become: true
tags:
- safekeeper
# used in `safekeeper.service` template
- name: learn current availability_zone
shell:
cmd: "curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone"
register: ec2_availability_zone
- set_fact:
ec2_availability_zone={{ ec2_availability_zone.stdout }}
# in the future safekeepers should discover pageservers byself
# but currently use first pageserver that was discovered
- name: set first pageserver var for safekeepers
set_fact:
first_pageserver: "{{ hostvars[groups['pageservers'][0]]['inventory_hostname'] }}"
tags:
- safekeeper
- name: upload systemd service definition
ansible.builtin.template:
src: systemd/safekeeper.service
dest: /etc/systemd/system/safekeeper.service
owner: root
group: root
mode: '0644'
become: true
tags:
- safekeeper
- name: start systemd service
ansible.builtin.systemd:
daemon_reload: yes
name: safekeeper
enabled: yes
state: restarted
become: true
tags:
- safekeeper
- name: post version to console
when: console_mgmt_base_url is defined
shell:
cmd: |
INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
curl -sfS -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/safekeepers/$INSTANCE_ID | jq '.version = {{ current_version }}' > /tmp/new_version
curl -sfS -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" -H "Content-Type: application/json" -X POST -d@/tmp/new_version {{ console_mgmt_base_url }}/management/api/v2/safekeepers
tags:
- safekeeper

View File

@@ -1,42 +0,0 @@
#!/bin/bash
set -e
if [ -n "${DOCKER_TAG}" ]; then
# Verson is DOCKER_TAG but without prefix
VERSION=$(echo $DOCKER_TAG | sed 's/^.*-//g')
else
echo "Please set DOCKER_TAG environment variable"
exit 1
fi
# do initial cleanup
rm -rf neon_install postgres_install.tar.gz neon_install.tar.gz .neon_current_version
mkdir neon_install
# retrieve binaries from docker image
echo "getting binaries from docker image"
docker pull --quiet neondatabase/neon:${DOCKER_TAG}
ID=$(docker create neondatabase/neon:${DOCKER_TAG})
docker cp ${ID}:/data/postgres_install.tar.gz .
tar -xzf postgres_install.tar.gz -C neon_install
mkdir neon_install/bin/
docker cp ${ID}:/usr/local/bin/pageserver neon_install/bin/
docker cp ${ID}:/usr/local/bin/pageserver_binutils neon_install/bin/
docker cp ${ID}:/usr/local/bin/safekeeper neon_install/bin/
docker cp ${ID}:/usr/local/bin/storage_broker neon_install/bin/
docker cp ${ID}:/usr/local/bin/proxy neon_install/bin/
docker cp ${ID}:/usr/local/v14/bin/ neon_install/v14/bin/
docker cp ${ID}:/usr/local/v15/bin/ neon_install/v15/bin/
docker cp ${ID}:/usr/local/v14/lib/ neon_install/v14/lib/
docker cp ${ID}:/usr/local/v15/lib/ neon_install/v15/lib/
docker rm -vf ${ID}
# store version to file (for ansible playbooks) and create binaries tarball
echo ${VERSION} > neon_install/.neon_current_version
echo ${VERSION} > .neon_current_version
tar -czf neon_install.tar.gz -C neon_install .
# do final cleaup
rm -rf neon_install postgres_install.tar.gz

View File

@@ -1,48 +0,0 @@
storage:
vars:
bucket_name: neon-prod-storage-ap-southeast-1
bucket_region: ap-southeast-1
console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
broker_endpoint: http://storage-broker-lb.epsilon.ap-southeast-1.internal.aws.neon.tech:50051
pageserver_config_stub:
pg_distrib_dir: /usr/local
metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
metric_collection_interval: 10min
disk_usage_based_eviction:
max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
min_avail_bytes: 0
period: "10s"
tenant_config:
eviction_policy:
kind: "LayerAccessThreshold"
period: "10m"
threshold: &default_eviction_threshold "24h"
evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "pageserver/v1"
safekeeper_s3_prefix: safekeeper/v1/wal
hostname_suffix: ""
remote_user: ssm-user
ansible_aws_ssm_region: ap-southeast-1
ansible_aws_ssm_bucket_name: neon-prod-storage-ap-southeast-1
console_region_id: aws-ap-southeast-1
sentry_environment: production
children:
pageservers:
hosts:
pageserver-0.ap-southeast-1.aws.neon.tech:
ansible_host: i-064de8ea28bdb495b
pageserver-1.ap-southeast-1.aws.neon.tech:
ansible_host: i-0b180defcaeeb6b93
safekeepers:
hosts:
safekeeper-0.ap-southeast-1.aws.neon.tech:
ansible_host: i-0d6f1dc5161eef894
safekeeper-2.ap-southeast-1.aws.neon.tech:
ansible_host: i-04fb63634e4679eb9
safekeeper-3.ap-southeast-1.aws.neon.tech:
ansible_host: i-05481f3bc88cfc2d4

View File

@@ -1,50 +0,0 @@
storage:
vars:
bucket_name: neon-prod-storage-eu-central-1
bucket_region: eu-central-1
console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
broker_endpoint: http://storage-broker-lb.gamma.eu-central-1.internal.aws.neon.tech:50051
pageserver_config_stub:
pg_distrib_dir: /usr/local
metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
metric_collection_interval: 10min
disk_usage_based_eviction:
max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
min_avail_bytes: 0
period: "10s"
tenant_config:
eviction_policy:
kind: "LayerAccessThreshold"
period: "10m"
threshold: &default_eviction_threshold "24h"
evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "pageserver/v1"
safekeeper_s3_prefix: safekeeper/v1/wal
hostname_suffix: ""
remote_user: ssm-user
ansible_aws_ssm_region: eu-central-1
ansible_aws_ssm_bucket_name: neon-prod-storage-eu-central-1
console_region_id: aws-eu-central-1
sentry_environment: production
children:
pageservers:
hosts:
pageserver-0.eu-central-1.aws.neon.tech:
ansible_host: i-0cd8d316ecbb715be
pageserver-1.eu-central-1.aws.neon.tech:
ansible_host: i-090044ed3d383fef0
pageserver-2.eu-central-1.aws.neon.tech:
ansible_host: i-033584edf3f4b6742
safekeepers:
hosts:
safekeeper-0.eu-central-1.aws.neon.tech:
ansible_host: i-0b238612d2318a050
safekeeper-1.eu-central-1.aws.neon.tech:
ansible_host: i-07b9c45e5c2637cd4
safekeeper-2.eu-central-1.aws.neon.tech:
ansible_host: i-020257302c3c93d88

View File

@@ -1,51 +0,0 @@
storage:
vars:
bucket_name: neon-prod-storage-us-east-2
bucket_region: us-east-2
console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
broker_endpoint: http://storage-broker-lb.delta.us-east-2.internal.aws.neon.tech:50051
pageserver_config_stub:
pg_distrib_dir: /usr/local
metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
metric_collection_interval: 10min
disk_usage_based_eviction:
max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
min_avail_bytes: 0
period: "10s"
tenant_config:
eviction_policy:
kind: "LayerAccessThreshold"
period: "10m"
threshold: &default_eviction_threshold "24h"
evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "pageserver/v1"
safekeeper_s3_prefix: safekeeper/v1/wal
hostname_suffix: ""
remote_user: ssm-user
ansible_aws_ssm_region: us-east-2
ansible_aws_ssm_bucket_name: neon-prod-storage-us-east-2
console_region_id: aws-us-east-2
sentry_environment: production
children:
pageservers:
hosts:
pageserver-0.us-east-2.aws.neon.tech:
ansible_host: i-062227ba7f119eb8c
pageserver-1.us-east-2.aws.neon.tech:
ansible_host: i-0b3ec0afab5968938
pageserver-2.us-east-2.aws.neon.tech:
ansible_host: i-0d7a1c4325e71421d
safekeepers:
hosts:
safekeeper-0.us-east-2.aws.neon.tech:
ansible_host: i-0e94224750c57d346
safekeeper-1.us-east-2.aws.neon.tech:
ansible_host: i-06d113fb73bfddeb0
safekeeper-2.us-east-2.aws.neon.tech:
ansible_host: i-09f66c8e04afff2e8

View File

@@ -1,53 +0,0 @@
storage:
vars:
bucket_name: neon-prod-storage-us-west-2
bucket_region: us-west-2
console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
broker_endpoint: http://storage-broker-lb.eta.us-west-2.internal.aws.neon.tech:50051
pageserver_config_stub:
pg_distrib_dir: /usr/local
metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
metric_collection_interval: 10min
disk_usage_based_eviction:
max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
min_avail_bytes: 0
period: "10s"
tenant_config:
eviction_policy:
kind: "LayerAccessThreshold"
period: "10m"
threshold: &default_eviction_threshold "24h"
evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "pageserver/v1"
safekeeper_s3_prefix: safekeeper/v1/wal
hostname_suffix: ""
remote_user: ssm-user
ansible_aws_ssm_region: us-west-2
ansible_aws_ssm_bucket_name: neon-prod-storage-us-west-2
console_region_id: aws-us-west-2-new
sentry_environment: production
children:
pageservers:
hosts:
pageserver-0.us-west-2.aws.neon.tech:
ansible_host: i-0d9f6dfae0e1c780d
pageserver-1.us-west-2.aws.neon.tech:
ansible_host: i-0c834be1dddba8b3f
pageserver-2.us-west-2.aws.neon.tech:
ansible_host: i-051642d372c0a4f32
pageserver-3.us-west-2.aws.neon.tech:
ansible_host: i-00c3844beb9ad1c6b
safekeepers:
hosts:
safekeeper-0.us-west-2.aws.neon.tech:
ansible_host: i-00719d8a74986fda6
safekeeper-1.us-west-2.aws.neon.tech:
ansible_host: i-074682f9d3c712e7c
safekeeper-2.us-west-2.aws.neon.tech:
ansible_host: i-042b7efb1729d7966

View File

@@ -1,37 +0,0 @@
#!/bin/sh
# fetch params from meta-data service
INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
AZ_ID=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone)
INSTANCE_TYPE=$(curl -s http://169.254.169.254/latest/meta-data/instance-type)
DISK_SIZE=$(df -B1 /storage | tail -1 | awk '{print $2}')
# store fqdn hostname in var
HOST=$(hostname -f)
cat <<EOF | tee /tmp/payload
{
"version": 1,
"host": "${HOST}",
"port": 6400,
"region_id": "{{ console_region_id }}",
"instance_id": "${INSTANCE_ID}",
"http_host": "${HOST}",
"http_port": 9898,
"active": false,
"availability_zone_id": "${AZ_ID}",
"disk_size": ${DISK_SIZE},
"instance_type": "${INSTANCE_TYPE}"
}
EOF
# check if pageserver already registered or not
if ! curl -sf -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/pageservers/${INSTANCE_ID} -o /dev/null; then
# not registered, so register it now
ID=$(curl -sf -X POST -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" -H "Content-Type: application/json" {{ console_mgmt_base_url }}/management/api/v2/pageservers -d@/tmp/payload | jq -r '.id')
# init pageserver
sudo -u pageserver /usr/local/bin/pageserver -c "id=${ID}" -c "pg_distrib_dir='/usr/local'" --init -D /storage/pageserver/data
fi

View File

@@ -1,31 +0,0 @@
#!/bin/sh
# fetch params from meta-data service
INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
AZ_ID=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone)
# store fqdn hostname in var
HOST=$(hostname -f)
cat <<EOF | tee /tmp/payload
{
"version": 1,
"host": "${HOST}",
"port": 6500,
"http_port": 7676,
"region_id": "{{ console_region_id }}",
"instance_id": "${INSTANCE_ID}",
"availability_zone_id": "${AZ_ID}",
"active": false
}
EOF
# check if safekeeper already registered or not
if ! curl -sf -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/safekeepers/${INSTANCE_ID} -o /dev/null; then
# not registered, so register it now
ID=$(curl -sf -X POST -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" -H "Content-Type: application/json" {{ console_mgmt_base_url }}/management/api/v2/safekeepers -d@/tmp/payload | jq -r '.id')
# init safekeeper
sudo -u safekeeper /usr/local/bin/safekeeper --id ${ID} --init -D /storage/safekeeper/data
fi

View File

@@ -1,2 +0,0 @@
ansible_connection: aws_ssm
ansible_python_interpreter: /usr/bin/python3

View File

@@ -1,46 +0,0 @@
storage:
vars:
bucket_name: neon-dev-storage-eu-west-1
bucket_region: eu-west-1
console_mgmt_base_url: http://neon-internal-api.aws.neon.build
broker_endpoint: http://storage-broker-lb.zeta.eu-west-1.internal.aws.neon.build:50051
pageserver_config_stub:
pg_distrib_dir: /usr/local
metric_collection_endpoint: http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events
metric_collection_interval: 10min
disk_usage_based_eviction:
max_usage_pct: 80
min_avail_bytes: 0
period: "10s"
tenant_config:
eviction_policy:
kind: "LayerAccessThreshold"
period: "20m"
threshold: &default_eviction_threshold "20m"
evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "pageserver/v1"
safekeeper_s3_prefix: safekeeper/v1/wal
hostname_suffix: ""
remote_user: ssm-user
ansible_aws_ssm_region: eu-west-1
ansible_aws_ssm_bucket_name: neon-dev-storage-eu-west-1
console_region_id: aws-eu-west-1
sentry_environment: staging
children:
pageservers:
hosts:
pageserver-0.eu-west-1.aws.neon.build:
ansible_host: i-01d496c5041c7f34c
safekeepers:
hosts:
safekeeper-0.eu-west-1.aws.neon.build:
ansible_host: i-05226ef85722831bf
safekeeper-1.eu-west-1.aws.neon.build:
ansible_host: i-06969ee1bf2958bfc
safekeeper-2.eu-west-1.aws.neon.build:
ansible_host: i-087892e9625984a0b

View File

@@ -1,56 +0,0 @@
storage:
vars:
bucket_name: neon-staging-storage-us-east-2
bucket_region: us-east-2
console_mgmt_base_url: http://neon-internal-api.aws.neon.build
broker_endpoint: http://storage-broker-lb.beta.us-east-2.internal.aws.neon.build:50051
pageserver_config_stub:
pg_distrib_dir: /usr/local
metric_collection_endpoint: http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events
metric_collection_interval: 10min
disk_usage_based_eviction:
max_usage_pct: 80
min_avail_bytes: 0
period: "10s"
tenant_config:
eviction_policy:
kind: "LayerAccessThreshold"
period: "20m"
threshold: &default_eviction_threshold "20m"
evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "pageserver/v1"
safekeeper_s3_prefix: safekeeper/v1/wal
hostname_suffix: ""
remote_user: ssm-user
ansible_aws_ssm_region: us-east-2
ansible_aws_ssm_bucket_name: neon-staging-storage-us-east-2
console_region_id: aws-us-east-2
sentry_environment: staging
children:
pageservers:
hosts:
pageserver-0.us-east-2.aws.neon.build:
ansible_host: i-0c3e70929edb5d691
pageserver-1.us-east-2.aws.neon.build:
ansible_host: i-0565a8b4008aa3f40
pageserver-2.us-east-2.aws.neon.build:
ansible_host: i-01e31cdf7e970586a
pageserver-3.us-east-2.aws.neon.build:
ansible_host: i-0602a0291365ef7cc
pageserver-99.us-east-2.aws.neon.build:
ansible_host: i-0c39491109bb88824
safekeepers:
hosts:
safekeeper-0.us-east-2.aws.neon.build:
ansible_host: i-027662bd552bf5db0
safekeeper-1.us-east-2.aws.neon.build:
ansible_host: i-0171efc3604a7b907
safekeeper-2.us-east-2.aws.neon.build:
ansible_host: i-0de0b03a51676a6ce
safekeeper-99.us-east-2.aws.neon.build:
ansible_host: i-0d61b6a2ea32028d5

View File

@@ -1,18 +0,0 @@
[Unit]
Description=Neon pageserver
After=network.target auditd.service
[Service]
Type=simple
User=pageserver
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/v14/lib SENTRY_DSN={{ SENTRY_URL_PAGESERVER }} SENTRY_ENVIRONMENT={{ sentry_environment }}
ExecStart=/usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -c "broker_endpoint='{{ broker_endpoint }}'" -c "availability_zone='{{ ec2_availability_zone }}'" -D /storage/pageserver/data
ExecReload=/bin/kill -HUP $MAINPID
KillMode=mixed
KillSignal=SIGINT
Restart=on-failure
TimeoutSec=10
LimitNOFILE=30000000
[Install]
WantedBy=multi-user.target

View File

@@ -1,18 +0,0 @@
[Unit]
Description=Neon safekeeper
After=network.target auditd.service
[Service]
Type=simple
User=safekeeper
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/v14/lib SENTRY_DSN={{ SENTRY_URL_SAFEKEEPER }} SENTRY_ENVIRONMENT={{ sentry_environment }}
ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}{{ hostname_suffix }}:6500 --listen-http {{ inventory_hostname }}{{ hostname_suffix }}:7676 -D /storage/safekeeper/data --broker-endpoint={{ broker_endpoint }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ safekeeper_s3_prefix }}"}' --availability-zone={{ ec2_availability_zone }}
ExecReload=/bin/kill -HUP $MAINPID
KillMode=mixed
KillSignal=SIGINT
Restart=on-failure
TimeoutSec=10
LimitNOFILE=30000000
[Install]
WantedBy=multi-user.target

View File

@@ -1 +0,0 @@
{{ pageserver_config | sivel.toiletwater.to_toml }}

View File

@@ -1,75 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
deploymentStrategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 100%
maxUnavailable: 50%
# Delay the kill signal by 5 minutes (5 * 60)
# The pod(s) will stay in Terminating, keeps the existing connections
# but doesn't receive new ones
containerLifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 300"]
terminationGracePeriodSeconds: 604800
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2"
domain: "*.eu-west-1.aws.neon.build"
sentryEnvironment: "staging"
wssPort: 8443
metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
metricCollectionInterval: "1min"
# -- Additional labels for neon-proxy pods
podLabels:
neon_service: proxy-scram
neon_env: dev
neon_region: eu-west-1
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: eu-west-1.aws.neon.build
httpsPort: 443
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,52 +0,0 @@
# Helm chart values for neon-storage-broker
podLabels:
neon_env: staging
neon_service: storage-broker
# Use L4 LB
service:
# service.annotations -- Annotations to add to the service
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external # use newer AWS Load Balancer Controller
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal # deploy LB to private subnet
# assign service to this name at external-dns
external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.zeta.eu-west-1.internal.aws.neon.build
# service.type -- Service type
type: LoadBalancer
# service.port -- broker listen port
port: 50051
ingress:
enabled: false
metrics:
enabled: false
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-storage-broker.fullname\" . }}"
labels:
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
app.kubernetes.io/name: neon-storage-broker
app.kubernetes.io/instance: neon-storage-broker
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-storage-broker"
endpoints:
- port: broker
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"
settings:
sentryEnvironment: "staging"

View File

@@ -1,67 +0,0 @@
# Helm chart values for neon-proxy-link.
# This is a YAML-formatted file.
image:
repository: neondatabase/neon
settings:
authBackend: "link"
authEndpoint: "https://console.stage.neon.tech/authenticate_proxy_request/"
uri: "https://console.stage.neon.tech/psql_session/"
domain: "pg.neon.build"
sentryEnvironment: "staging"
metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
metricCollectionInterval: "1min"
# -- Additional labels for neon-proxy-link pods
podLabels:
neon_service: proxy
neon_env: dev
neon_region: us-east-2
service:
type: LoadBalancer
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal
external-dns.alpha.kubernetes.io/hostname: neon-proxy-link-mgmt.beta.us-east-2.aws.neon.build
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: neon-proxy-link.beta.us-east-2.aws.neon.build
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,76 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
deploymentStrategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 100%
maxUnavailable: 50%
# Delay the kill signal by 5 minutes (5 * 60)
# The pod(s) will stay in Terminating, keeps the existing connections
# but doesn't receive new ones
containerLifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 300"]
terminationGracePeriodSeconds: 604800
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2"
domain: "*.cloud.stage.neon.tech"
sentryEnvironment: "staging"
wssPort: 8443
metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
metricCollectionInterval: "1min"
# -- Additional labels for neon-proxy pods
podLabels:
neon_service: proxy-scram-legacy
neon_env: dev
neon_region: us-east-2
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: neon-proxy-scram-legacy.beta.us-east-2.aws.neon.build
httpsPort: 443
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,77 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
deploymentStrategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 100%
maxUnavailable: 50%
# Delay the kill signal by 5 minutes (5 * 60)
# The pod(s) will stay in Terminating, keeps the existing connections
# but doesn't receive new ones
containerLifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 300"]
terminationGracePeriodSeconds: 604800
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2"
domain: "*.us-east-2.aws.neon.build"
extraDomains: ["*.us-east-2.postgres.zenith.tech", "*.us-east-2.retooldb-staging.com"]
sentryEnvironment: "staging"
wssPort: 8443
metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
metricCollectionInterval: "1min"
# -- Additional labels for neon-proxy pods
podLabels:
neon_service: proxy-scram
neon_env: dev
neon_region: us-east-2
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: us-east-2.aws.neon.build
httpsPort: 443
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,52 +0,0 @@
# Helm chart values for neon-storage-broker
podLabels:
neon_env: staging
neon_service: storage-broker
# Use L4 LB
service:
# service.annotations -- Annotations to add to the service
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external # use newer AWS Load Balancer Controller
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal # deploy LB to private subnet
# assign service to this name at external-dns
external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.beta.us-east-2.internal.aws.neon.build
# service.type -- Service type
type: LoadBalancer
# service.port -- broker listen port
port: 50051
ingress:
enabled: false
metrics:
enabled: false
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-storage-broker.fullname\" . }}"
labels:
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
app.kubernetes.io/name: neon-storage-broker
app.kubernetes.io/instance: neon-storage-broker
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-storage-broker"
endpoints:
- port: broker
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"
settings:
sentryEnvironment: "staging"

View File

@@ -1,77 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
deploymentStrategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 100%
maxUnavailable: 50%
# Delay the kill signal by 5 minutes (5 * 60)
# The pod(s) will stay in Terminating, keeps the existing connections
# but doesn't receive new ones
containerLifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 300"]
terminationGracePeriodSeconds: 604800
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
domain: "*.ap-southeast-1.aws.neon.tech"
extraDomains: ["*.ap-southeast-1.retooldb.com", "*.ap-southeast-1.postgres.vercel-storage.com"]
sentryEnvironment: "production"
wssPort: 8443
metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
metricCollectionInterval: "10min"
# -- Additional labels for neon-proxy pods
podLabels:
neon_service: proxy-scram
neon_env: prod
neon_region: ap-southeast-1
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: ap-southeast-1.aws.neon.tech
httpsPort: 443
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,52 +0,0 @@
# Helm chart values for neon-storage-broker
podLabels:
neon_env: production
neon_service: storage-broker
# Use L4 LB
service:
# service.annotations -- Annotations to add to the service
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external # use newer AWS Load Balancer Controller
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal # deploy LB to private subnet
# assign service to this name at external-dns
external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.epsilon.ap-southeast-1.internal.aws.neon.tech
# service.type -- Service type
type: LoadBalancer
# service.port -- broker listen port
port: 50051
ingress:
enabled: false
metrics:
enabled: false
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-storage-broker.fullname\" . }}"
labels:
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
app.kubernetes.io/name: neon-storage-broker
app.kubernetes.io/instance: neon-storage-broker
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-storage-broker"
endpoints:
- port: broker
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"
settings:
sentryEnvironment: "production"

View File

@@ -1,77 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
deploymentStrategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 100%
maxUnavailable: 50%
# Delay the kill signal by 5 minutes (5 * 60)
# The pod(s) will stay in Terminating, keeps the existing connections
# but doesn't receive new ones
containerLifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 300"]
terminationGracePeriodSeconds: 604800
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
domain: "*.eu-central-1.aws.neon.tech"
extraDomains: ["*.eu-central-1.retooldb.com", "*.eu-central-1.postgres.vercel-storage.com"]
sentryEnvironment: "production"
wssPort: 8443
metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
metricCollectionInterval: "10min"
# -- Additional labels for neon-proxy pods
podLabels:
neon_service: proxy-scram
neon_env: prod
neon_region: eu-central-1
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: eu-central-1.aws.neon.tech
httpsPort: 443
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,52 +0,0 @@
# Helm chart values for neon-storage-broker
podLabels:
neon_env: production
neon_service: storage-broker
# Use L4 LB
service:
# service.annotations -- Annotations to add to the service
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external # use newer AWS Load Balancer Controller
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal # deploy LB to private subnet
# assign service to this name at external-dns
external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.gamma.eu-central-1.internal.aws.neon.tech
# service.type -- Service type
type: LoadBalancer
# service.port -- broker listen port
port: 50051
ingress:
enabled: false
metrics:
enabled: false
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-storage-broker.fullname\" . }}"
labels:
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
app.kubernetes.io/name: neon-storage-broker
app.kubernetes.io/instance: neon-storage-broker
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-storage-broker"
endpoints:
- port: broker
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"
settings:
sentryEnvironment: "production"

View File

@@ -1,58 +0,0 @@
# Helm chart values for neon-proxy-link.
# This is a YAML-formatted file.
image:
repository: neondatabase/neon
settings:
authBackend: "link"
authEndpoint: "https://console.neon.tech/authenticate_proxy_request/"
uri: "https://console.neon.tech/psql_session/"
domain: "pg.neon.tech"
sentryEnvironment: "production"
# -- Additional labels for zenith-proxy pods
podLabels:
neon_service: proxy
neon_env: production
neon_region: us-east-2
service:
type: LoadBalancer
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal
external-dns.alpha.kubernetes.io/hostname: neon-proxy-link-mgmt.delta.us-east-2.aws.neon.tech
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: neon-proxy-link.delta.us-east-2.aws.neon.tech
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,77 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
deploymentStrategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 100%
maxUnavailable: 50%
# Delay the kill signal by 5 minutes (5 * 60)
# The pod(s) will stay in Terminating, keeps the existing connections
# but doesn't receive new ones
containerLifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 300"]
terminationGracePeriodSeconds: 604800
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
domain: "*.us-east-2.aws.neon.tech"
extraDomains: ["*.us-east-2.retooldb.com", "*.us-east-2.postgres.vercel-storage.com"]
sentryEnvironment: "production"
wssPort: 8443
metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
metricCollectionInterval: "10min"
# -- Additional labels for neon-proxy pods
podLabels:
neon_service: proxy-scram
neon_env: prod
neon_region: us-east-2
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: us-east-2.aws.neon.tech
httpsPort: 443
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,52 +0,0 @@
# Helm chart values for neon-storage-broker
podLabels:
neon_env: production
neon_service: storage-broker
# Use L4 LB
service:
# service.annotations -- Annotations to add to the service
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external # use newer AWS Load Balancer Controller
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal # deploy LB to private subnet
# assign service to this name at external-dns
external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.delta.us-east-2.internal.aws.neon.tech
# service.type -- Service type
type: LoadBalancer
# service.port -- broker listen port
port: 50051
ingress:
enabled: false
metrics:
enabled: false
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-storage-broker.fullname\" . }}"
labels:
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
app.kubernetes.io/name: neon-storage-broker
app.kubernetes.io/instance: neon-storage-broker
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-storage-broker"
endpoints:
- port: broker
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"
settings:
sentryEnvironment: "production"

View File

@@ -1,76 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
deploymentStrategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 100%
maxUnavailable: 50%
# Delay the kill signal by 5 minutes (5 * 60)
# The pod(s) will stay in Terminating, keeps the existing connections
# but doesn't receive new ones
containerLifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 300"]
terminationGracePeriodSeconds: 604800
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
domain: "*.cloud.neon.tech"
sentryEnvironment: "production"
wssPort: 8443
metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
metricCollectionInterval: "10min"
# -- Additional labels for neon-proxy pods
podLabels:
neon_service: proxy-scram
neon_env: prod
neon_region: us-west-2
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: neon-proxy-scram-legacy.eta.us-west-2.aws.neon.tech
httpsPort: 443
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,77 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
deploymentStrategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 100%
maxUnavailable: 50%
# Delay the kill signal by 5 minutes (5 * 60)
# The pod(s) will stay in Terminating, keeps the existing connections
# but doesn't receive new ones
containerLifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 300"]
terminationGracePeriodSeconds: 604800
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
domain: "*.us-west-2.aws.neon.tech"
extraDomains: ["*.us-west-2.retooldb.com", "*.us-west-2.postgres.vercel-storage.com"]
sentryEnvironment: "production"
wssPort: 8443
metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
metricCollectionInterval: "10min"
# -- Additional labels for neon-proxy pods
podLabels:
neon_service: proxy-scram
neon_env: prod
neon_region: us-west-2
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: us-west-2.aws.neon.tech
httpsPort: 443
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-proxy.fullname\" . }}"
labels:
helm.sh/chart: neon-proxy-{{ .Chart.Version }}
app.kubernetes.io/name: neon-proxy
app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-proxy"
endpoints:
- port: http
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"

View File

@@ -1,52 +0,0 @@
# Helm chart values for neon-storage-broker
podLabels:
neon_env: production
neon_service: storage-broker
# Use L4 LB
service:
# service.annotations -- Annotations to add to the service
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external # use newer AWS Load Balancer Controller
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal # deploy LB to private subnet
# assign service to this name at external-dns
external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.eta.us-west-2.internal.aws.neon.tech
# service.type -- Service type
type: LoadBalancer
# service.port -- broker listen port
port: 50051
ingress:
enabled: false
metrics:
enabled: false
extraManifests:
- apiVersion: operator.victoriametrics.com/v1beta1
kind: VMServiceScrape
metadata:
name: "{{ include \"neon-storage-broker.fullname\" . }}"
labels:
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
app.kubernetes.io/name: neon-storage-broker
app.kubernetes.io/instance: neon-storage-broker
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
app.kubernetes.io/managed-by: Helm
namespace: "{{ .Release.Namespace }}"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "neon-storage-broker"
endpoints:
- port: broker
path: /metrics
interval: 10s
scrapeTimeout: 10s
namespaceSelector:
matchNames:
- "{{ .Release.Namespace }}"
settings:
sentryEnvironment: "production"

View File

@@ -1,14 +0,0 @@
## Describe your changes
## Issue ticket number and link
## Checklist before requesting a review
- [ ] I have performed a self-review of my code.
- [ ] If it is a core feature, I have added thorough tests.
- [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard?
- [ ] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section.
## Checklist before merging
- [ ] Do not forget to reformat commit message to not include the above checklist

View File

@@ -1,581 +0,0 @@
name: Benchmarking
on:
# uncomment to run on push for debugging your PR
# push:
# branches: [ your branch ]
schedule:
# * is a special character in YAML so you have to quote this string
# ┌───────────── minute (0 - 59)
# │ ┌───────────── hour (0 - 23)
# │ │ ┌───────────── day of the month (1 - 31)
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
- cron: '0 3 * * *' # run once a day, timezone is utc
workflow_dispatch: # adds ability to run this manually
inputs:
region_id:
description: 'Use a particular region. If not set the default region will be used'
required: false
default: 'aws-us-east-2'
save_perf_report:
type: boolean
description: 'Publish perf report or not. If not set, the report is published only for the main branch'
required: false
defaults:
run:
shell: bash -euxo pipefail {0}
concurrency:
# Allow only one workflow per any non-`main` branch.
group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
cancel-in-progress: true
jobs:
bench:
env:
TEST_PG_BENCH_DURATIONS_MATRIX: "300"
TEST_PG_BENCH_SCALES_MATRIX: "10,100"
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
PLATFORM: "neon-staging"
runs-on: [ self-hosted, us-east-2, x64 ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
steps:
- uses: actions/checkout@v3
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-release-artifact
path: /tmp/neon/
prefix: latest
- name: Create Neon Project
id: create-neon-project
uses: ./.github/actions/neon-project-create
with:
region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Run benchmark
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
# Set --sparse-ordering option of pytest-order plugin
# to ensure tests are running in order of appears in the file.
# It's important for test_perf_pgbench.py::test_pgbench_remote_* tests
extra_params: -m remote_cluster --sparse-ordering --timeout 5400 --ignore test_runner/performance/test_perf_olap.py
env:
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
- name: Delete Neon Project
if: ${{ always() }}
uses: ./.github/actions/neon-project-delete
with:
project_id: ${{ steps.create-neon-project.outputs.project_id }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Create Allure report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report
with:
action: generate
build_type: ${{ env.BUILD_TYPE }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: "Periodic perf testing: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
generate-matrices:
# Create matrices for the benchmarking jobs, so we run benchmarks on rds only once a week (on Saturday)
#
# Available platforms:
# - neon-captest-new: Freshly created project (1 CU)
# - neon-captest-freetier: Use freetier-sized compute (0.25 CU)
# - neon-captest-reuse: Reusing existing project
# - rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
# - rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
runs-on: ubuntu-latest
outputs:
pgbench-compare-matrix: ${{ steps.pgbench-compare-matrix.outputs.matrix }}
olap-compare-matrix: ${{ steps.olap-compare-matrix.outputs.matrix }}
steps:
- name: Generate matrix for pgbench benchmark
id: pgbench-compare-matrix
run: |
matrix='{
"platform": [
"neon-captest-new",
"neon-captest-reuse"
],
"db_size": [ "10gb" ],
"include": [
{ "platform": "neon-captest-freetier", "db_size": "3gb" },
{ "platform": "neon-captest-new", "db_size": "50gb" }
]
}'
if [ "$(date +%A)" = "Saturday" ]; then
matrix=$(echo $matrix | jq '.include += [{ "platform": "rds-postgres", "db_size": "10gb"},
{ "platform": "rds-aurora", "db_size": "50gb"}]')
fi
echo "matrix=$(echo $matrix | jq --compact-output '.')" >> $GITHUB_OUTPUT
- name: Generate matrix for OLAP benchmarks
id: olap-compare-matrix
run: |
matrix='{
"platform": [
"neon-captest-reuse"
]
}'
if [ "$(date +%A)" = "Saturday" ]; then
matrix=$(echo $matrix | jq '.include += [{ "platform": "rds-postgres" },
{ "platform": "rds-aurora" }]')
fi
echo "matrix=$(echo $matrix | jq --compact-output '.')" >> $GITHUB_OUTPUT
pgbench-compare:
needs: [ generate-matrices ]
strategy:
fail-fast: false
matrix: ${{fromJson(needs.generate-matrices.outputs.pgbench-compare-matrix)}}
env:
TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
TEST_PG_BENCH_SCALES_MATRIX: ${{ matrix.db_size }}
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
PLATFORM: ${{ matrix.platform }}
runs-on: [ self-hosted, us-east-2, x64 ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
timeout-minutes: 360 # 6h
steps:
- uses: actions/checkout@v3
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-release-artifact
path: /tmp/neon/
prefix: latest
- name: Add Postgres binaries to PATH
run: |
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
- name: Create Neon Project
if: contains(fromJson('["neon-captest-new", "neon-captest-freetier"]'), matrix.platform)
id: create-neon-project
uses: ./.github/actions/neon-project-create
with:
region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
compute_units: ${{ (matrix.platform == 'neon-captest-freetier' && '[0.25, 0.25]') || '[1, 1]' }}
- name: Set up Connection String
id: set-up-connstr
run: |
case "${PLATFORM}" in
neon-captest-reuse)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }}
;;
neon-captest-new | neon-captest-freetier)
CONNSTR=${{ steps.create-neon-project.outputs.dsn }}
;;
rds-aurora)
CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_CONNSTR }}
;;
rds-postgres)
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }}
;;
*)
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-freetier', 'rds-aurora', or 'rds-postgres'"
exit 1
;;
esac
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
psql ${CONNSTR} -c "SELECT version();"
- name: Benchmark init
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
- name: Benchmark simple-update
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
- name: Benchmark select-only
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
- name: Delete Neon Project
if: ${{ steps.create-neon-project.outputs.project_id && always() }}
uses: ./.github/actions/neon-project-delete
with:
project_id: ${{ steps.create-neon-project.outputs.project_id }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Create Allure report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report
with:
action: generate
build_type: ${{ env.BUILD_TYPE }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: "Periodic perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
clickbench-compare:
# ClichBench DB for rds-aurora and rds-Postgres deployed to the same clusters
# we use for performance testing in pgbench-compare.
# Run this job only when pgbench-compare is finished to avoid the intersection.
# We might change it after https://github.com/neondatabase/neon/issues/2900.
#
# *_CLICKBENCH_CONNSTR: Genuine ClickBench DB with ~100M rows
# *_CLICKBENCH_10M_CONNSTR: DB with the first 10M rows of ClickBench DB
if: ${{ !cancelled() }}
needs: [ generate-matrices, pgbench-compare ]
strategy:
fail-fast: false
matrix: ${{ fromJson(needs.generate-matrices.outputs.olap-compare-matrix) }}
env:
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
PLATFORM: ${{ matrix.platform }}
runs-on: [ self-hosted, us-east-2, x64 ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
timeout-minutes: 360 # 6h
steps:
- uses: actions/checkout@v3
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-release-artifact
path: /tmp/neon/
prefix: latest
- name: Add Postgres binaries to PATH
run: |
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
- name: Set up Connection String
id: set-up-connstr
run: |
case "${PLATFORM}" in
neon-captest-reuse)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_10M_CONNSTR }}
;;
rds-aurora)
CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_CLICKBENCH_10M_CONNSTR }}
;;
rds-postgres)
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CLICKBENCH_10M_CONNSTR }}
;;
*)
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
exit 1
;;
esac
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
psql ${CONNSTR} -c "SELECT version();"
- name: ClickBench benchmark
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance/test_perf_olap.py
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_clickbench
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
- name: Create Allure report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report
with:
action: generate
build_type: ${{ env.BUILD_TYPE }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: "Periodic OLAP perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
tpch-compare:
# TCP-H DB for rds-aurora and rds-Postgres deployed to the same clusters
# we use for performance testing in pgbench-compare & clickbench-compare.
# Run this job only when clickbench-compare is finished to avoid the intersection.
# We might change it after https://github.com/neondatabase/neon/issues/2900.
#
# *_TPCH_S10_CONNSTR: DB generated with scale factor 10 (~10 GB)
if: ${{ !cancelled() }}
needs: [ generate-matrices, clickbench-compare ]
strategy:
fail-fast: false
matrix: ${{ fromJson(needs.generate-matrices.outputs.olap-compare-matrix) }}
env:
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
PLATFORM: ${{ matrix.platform }}
runs-on: [ self-hosted, us-east-2, x64 ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
timeout-minutes: 360 # 6h
steps:
- uses: actions/checkout@v3
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-release-artifact
path: /tmp/neon/
prefix: latest
- name: Add Postgres binaries to PATH
run: |
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
- name: Set up Connection String
id: set-up-connstr
run: |
case "${PLATFORM}" in
neon-captest-reuse)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_TPCH_S10_CONNSTR }}
;;
rds-aurora)
CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_TPCH_S10_CONNSTR }}
;;
rds-postgres)
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_TPCH_S10_CONNSTR }}
;;
*)
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
exit 1
;;
esac
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
psql ${CONNSTR} -c "SELECT version();"
- name: Run TPC-H benchmark
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance/test_perf_olap.py
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_tpch
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
- name: Create Allure report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report
with:
action: generate
build_type: ${{ env.BUILD_TYPE }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: "Periodic TPC-H perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
user-examples-compare:
if: ${{ !cancelled() }}
needs: [ generate-matrices, tpch-compare ]
strategy:
fail-fast: false
matrix: ${{ fromJson(needs.generate-matrices.outputs.olap-compare-matrix) }}
env:
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
PLATFORM: ${{ matrix.platform }}
runs-on: [ self-hosted, us-east-2, x64 ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
timeout-minutes: 360 # 6h
steps:
- uses: actions/checkout@v3
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-release-artifact
path: /tmp/neon/
prefix: latest
- name: Add Postgres binaries to PATH
run: |
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
- name: Set up Connection String
id: set-up-connstr
run: |
case "${PLATFORM}" in
neon-captest-reuse)
CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_CAPTEST_CONNSTR }}
;;
rds-aurora)
CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_RDS_AURORA_CONNSTR }}
;;
rds-postgres)
CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_RDS_POSTGRES_CONNSTR }}
;;
*)
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
exit 1
;;
esac
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
psql ${CONNSTR} -c "SELECT version();"
- name: Run user examples
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance/test_perf_olap.py
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_user_examples
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
- name: Create Allure report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report
with:
action: generate
build_type: ${{ env.BUILD_TYPE }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: "Periodic TPC-H perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

File diff suppressed because it is too large Load Diff

View File

@@ -1,179 +0,0 @@
name: Neon Deploy dev
on:
workflow_dispatch:
inputs:
dockerTag:
description: 'Docker tag to deploy'
required: true
type: string
branch:
description: 'Branch or commit used for deploy scripts and configs'
required: true
type: string
default: 'main'
deployStorage:
description: 'Deploy storage'
required: true
type: boolean
default: true
deployProxy:
description: 'Deploy proxy'
required: true
type: boolean
default: true
deployStorageBroker:
description: 'Deploy storage-broker'
required: true
type: boolean
default: true
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
concurrency:
group: deploy-dev
cancel-in-progress: false
jobs:
deploy-storage-new:
runs-on: [ self-hosted, gen3, small ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
options: --user root --privileged
if: inputs.deployStorage
defaults:
run:
shell: bash
strategy:
matrix:
target_region: [ eu-west-1, us-east-2 ]
environment:
name: dev-${{ matrix.target_region }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
ref: ${{ inputs.branch }}
- name: Redeploy
run: |
export DOCKER_TAG=${{ inputs.dockerTag }}
cd "$(pwd)/.github/ansible"
./get_binaries.sh
ansible-galaxy collection install sivel.toiletwater
ansible-playbook -v deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_STAGING_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
rm -f neon_install.tar.gz .neon_current_version
- name: Cleanup ansible folder
run: rm -rf ~/.ansible
deploy-proxy-new:
runs-on: [ self-hosted, gen3, small ]
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
if: inputs.deployProxy
defaults:
run:
shell: bash
strategy:
matrix:
include:
- target_region: us-east-2
target_cluster: dev-us-east-2-beta
deploy_link_proxy: true
deploy_legacy_scram_proxy: true
- target_region: eu-west-1
target_cluster: dev-eu-west-1-zeta
deploy_link_proxy: false
deploy_legacy_scram_proxy: false
environment:
name: dev-${{ matrix.target_region }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
ref: ${{ inputs.branch }}
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1-node16
with:
role-to-assume: arn:aws:iam::369495373322:role/github-runner
aws-region: eu-central-1
role-skip-session-tagging: true
role-duration-seconds: 1800
- name: Configure environment
run: |
helm repo add neondatabase https://neondatabase.github.io/helm-charts
aws --region ${{ matrix.target_region }} eks update-kubeconfig --name ${{ matrix.target_cluster }}
- name: Re-deploy scram proxy
run: |
DOCKER_TAG=${{ inputs.dockerTag }}
helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
- name: Re-deploy link proxy
if: matrix.deploy_link_proxy
run: |
DOCKER_TAG=${{ inputs.dockerTag }}
helm upgrade neon-proxy-link neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-link.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
- name: Re-deploy legacy scram proxy
if: matrix.deploy_legacy_scram_proxy
run: |
DOCKER_TAG=${{ inputs.dockerTag }}
helm upgrade neon-proxy-scram-legacy neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram-legacy.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
- name: Cleanup helm folder
run: rm -rf ~/.cache
deploy-storage-broker-new:
runs-on: [ self-hosted, gen3, small ]
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
if: inputs.deployStorageBroker
defaults:
run:
shell: bash
strategy:
matrix:
include:
- target_region: us-east-2
target_cluster: dev-us-east-2-beta
- target_region: eu-west-1
target_cluster: dev-eu-west-1-zeta
environment:
name: dev-${{ matrix.target_region }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
ref: ${{ inputs.branch }}
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1-node16
with:
role-to-assume: arn:aws:iam::369495373322:role/github-runner
aws-region: eu-central-1
role-skip-session-tagging: true
role-duration-seconds: 1800
- name: Configure environment
run: |
helm repo add neondatabase https://neondatabase.github.io/helm-charts
aws --region ${{ matrix.target_region }} eks update-kubeconfig --name ${{ matrix.target_cluster }}
- name: Deploy storage-broker
run:
helm upgrade neon-storage-broker-lb neondatabase/neon-storage-broker --namespace neon-storage-broker-lb --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-storage-broker.yaml --set image.tag=${{ inputs.dockerTag }} --set settings.sentryUrl=${{ secrets.SENTRY_URL_BROKER }} --wait --timeout 5m0s
- name: Cleanup helm folder
run: rm -rf ~/.cache

View File

@@ -1,167 +0,0 @@
name: Neon Deploy prod
on:
workflow_dispatch:
inputs:
dockerTag:
description: 'Docker tag to deploy'
required: true
type: string
branch:
description: 'Branch or commit used for deploy scripts and configs'
required: true
type: string
default: 'release'
deployStorage:
description: 'Deploy storage'
required: true
type: boolean
default: true
deployProxy:
description: 'Deploy proxy'
required: true
type: boolean
default: true
deployStorageBroker:
description: 'Deploy storage-broker'
required: true
type: boolean
default: true
disclamerAcknowledged:
description: 'I confirm that there is an emergency and I can not use regular release workflow'
required: true
type: boolean
default: false
concurrency:
group: deploy-prod
cancel-in-progress: false
jobs:
deploy-prod-new:
runs-on: prod
container:
image: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
options: --user root --privileged
if: inputs.deployStorage && inputs.disclamerAcknowledged
defaults:
run:
shell: bash
strategy:
matrix:
target_region: [ us-east-2, us-west-2, eu-central-1, ap-southeast-1 ]
environment:
name: prod-${{ matrix.target_region }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
ref: ${{ inputs.branch }}
- name: Redeploy
run: |
export DOCKER_TAG=${{ inputs.dockerTag }}
cd "$(pwd)/.github/ansible"
./get_binaries.sh
ansible-galaxy collection install sivel.toiletwater
ansible-playbook -v deploy.yaml -i prod.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_PRODUCTION_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
rm -f neon_install.tar.gz .neon_current_version
deploy-proxy-prod-new:
runs-on: prod
container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
if: inputs.deployProxy && inputs.disclamerAcknowledged
defaults:
run:
shell: bash
strategy:
matrix:
include:
- target_region: us-east-2
target_cluster: prod-us-east-2-delta
deploy_link_proxy: true
deploy_legacy_scram_proxy: false
- target_region: us-west-2
target_cluster: prod-us-west-2-eta
deploy_link_proxy: false
deploy_legacy_scram_proxy: true
- target_region: eu-central-1
target_cluster: prod-eu-central-1-gamma
deploy_link_proxy: false
deploy_legacy_scram_proxy: false
- target_region: ap-southeast-1
target_cluster: prod-ap-southeast-1-epsilon
deploy_link_proxy: false
deploy_legacy_scram_proxy: false
environment:
name: prod-${{ matrix.target_region }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
ref: ${{ inputs.branch }}
- name: Configure environment
run: |
helm repo add neondatabase https://neondatabase.github.io/helm-charts
aws --region ${{ matrix.target_region }} eks update-kubeconfig --name ${{ matrix.target_cluster }}
- name: Re-deploy scram proxy
run: |
DOCKER_TAG=${{ inputs.dockerTag }}
helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
- name: Re-deploy link proxy
if: matrix.deploy_link_proxy
run: |
DOCKER_TAG=${{ inputs.dockerTag }}
helm upgrade neon-proxy-link neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-link.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
- name: Re-deploy legacy scram proxy
if: matrix.deploy_legacy_scram_proxy
run: |
DOCKER_TAG=${{ inputs.dockerTag }}
helm upgrade neon-proxy-scram-legacy neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram-legacy.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
deploy-storage-broker-prod-new:
runs-on: prod
container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
if: inputs.deployStorageBroker && inputs.disclamerAcknowledged
defaults:
run:
shell: bash
strategy:
matrix:
include:
- target_region: us-east-2
target_cluster: prod-us-east-2-delta
- target_region: us-west-2
target_cluster: prod-us-west-2-eta
- target_region: eu-central-1
target_cluster: prod-eu-central-1-gamma
- target_region: ap-southeast-1
target_cluster: prod-ap-southeast-1-epsilon
environment:
name: prod-${{ matrix.target_region }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
ref: ${{ inputs.branch }}
- name: Configure environment
run: |
helm repo add neondatabase https://neondatabase.github.io/helm-charts
aws --region ${{ matrix.target_region }} eks update-kubeconfig --name ${{ matrix.target_cluster }}
- name: Deploy storage-broker
run:
helm upgrade neon-storage-broker-lb neondatabase/neon-storage-broker --namespace neon-storage-broker-lb --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-storage-broker.yaml --set image.tag=${{ inputs.dockerTag }} --set settings.sentryUrl=${{ secrets.SENTRY_URL_BROKER }} --wait --timeout 5m0s

View File

@@ -1,154 +0,0 @@
name: Check neon with extra platform builds
on:
push:
branches:
- main
pull_request:
defaults:
run:
shell: bash -euxo pipefail {0}
concurrency:
# Allow only one workflow per any non-`main` branch.
group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
cancel-in-progress: true
env:
RUST_BACKTRACE: 1
COPT: '-Werror'
jobs:
check-macos-build:
if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')
timeout-minutes: 90
runs-on: macos-latest
env:
# Use release build only, to have less debug info around
# Hence keeping target/ (and general cache size) smaller
BUILD_TYPE: release
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 1
- name: Install macOS postgres dependencies
run: brew install flex bison openssl protobuf
- name: Set pg 14 revision for caching
id: pg_v14_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
- name: Set pg 15 revision for caching
id: pg_v15_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
- name: Cache postgres v14 build
id: cache_pg_14
uses: actions/cache@v3
with:
path: pg_install/v14
key: v1-${{ runner.os }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Cache postgres v15 build
id: cache_pg_15
uses: actions/cache@v3
with:
path: pg_install/v15
key: v1-${{ runner.os }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Set extra env for macOS
run: |
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
- name: Cache cargo deps
uses: actions/cache@v3
with:
path: |
~/.cargo/registry
!~/.cargo/registry/src
~/.cargo/git
target
key: v1-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust
- name: Build postgres v14
if: steps.cache_pg_14.outputs.cache-hit != 'true'
run: make postgres-v14 -j$(nproc)
- name: Build postgres v15
if: steps.cache_pg_15.outputs.cache-hit != 'true'
run: make postgres-v15 -j$(nproc)
- name: Build neon extensions
run: make neon-pg-ext -j$(nproc)
- name: Run cargo build
run: cargo build --all --release
- name: Check that no warnings are produced
run: ./run_clippy.sh
gather-rust-build-stats:
if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats')
runs-on: [ self-hosted, gen3, large ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
env:
BUILD_TYPE: release
# remove the cachepot wrapper and build without crate caches
RUSTC_WRAPPER: ""
# build with incremental compilation produce partial results
# so do not attempt to cache this build, also disable the incremental compilation
CARGO_INCREMENTAL: 0
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 1
# Some of our rust modules use FFI and need those to be checked
- name: Get postgres headers
run: make postgres-headers -j$(nproc)
- name: Produce the build stats
run: cargo build --all --release --timings
- name: Upload the build stats
id: upload-stats
env:
BUCKET: neon-github-public-dev
SHA: ${{ github.event.pull_request.head.sha || github.sha }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
run: |
REPORT_URL=https://${BUCKET}.s3.amazonaws.com/build-stats/${SHA}/${GITHUB_RUN_ID}/cargo-timing.html
aws s3 cp --only-show-errors ./target/cargo-timings/cargo-timing.html "s3://${BUCKET}/build-stats/${SHA}/${GITHUB_RUN_ID}/"
echo "report-url=${REPORT_URL}" >> $GITHUB_OUTPUT
- name: Publish build stats report
uses: actions/github-script@v6
env:
REPORT_URL: ${{ steps.upload-stats.outputs.report-url }}
SHA: ${{ github.event.pull_request.head.sha || github.sha }}
with:
script: |
const { REPORT_URL, SHA } = process.env
await github.rest.repos.createCommitStatus({
owner: context.repo.owner,
repo: context.repo.repo,
sha: `${SHA}`,
state: 'success',
target_url: `${REPORT_URL}`,
context: `Build stats (release)`,
})

45
.github/workflows/notifications.yml vendored Normal file
View File

@@ -0,0 +1,45 @@
name: Send Notifications
on:
push:
branches: [ main ]
jobs:
send-notifications:
timeout-minutes: 30
name: send commit notifications
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
with:
submodules: true
fetch-depth: 2
- name: Form variables for notification message
id: git_info_grab
run: |
git_stat=$(git show --stat=50)
git_stat="${git_stat//'%'/'%25'}"
git_stat="${git_stat//$'\n'/'%0A'}"
git_stat="${git_stat//$'\r'/'%0D'}"
git_stat="${git_stat// /}" # space -> 'Space En', as github tends to eat ordinary spaces
echo "::set-output name=git_stat::$git_stat"
echo "::set-output name=sha_short::$(git rev-parse --short HEAD)"
echo "##[set-output name=git_branch;]$(echo ${GITHUB_REF#refs/heads/})"
- name: Send notification
uses: appleboy/telegram-action@master
with:
to: ${{ secrets.TELEGRAM_TO }}
token: ${{ secrets.TELEGRAM_TOKEN }}
format: markdown
args: |
*@${{ github.actor }} pushed to* [${{ github.repository }}:${{steps.git_info_grab.outputs.git_branch}}](github.com/${{ github.repository }}/commit/${{steps.git_info_grab.outputs.sha_short }})
```
${{ steps.git_info_grab.outputs.git_stat }}
```

View File

@@ -1,99 +0,0 @@
name: Test Postgres client libraries
on:
schedule:
# * is a special character in YAML so you have to quote this string
# ┌───────────── minute (0 - 59)
# │ ┌───────────── hour (0 - 23)
# │ │ ┌───────────── day of the month (1 - 31)
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
- cron: '23 02 * * *' # run once a day, timezone is utc
workflow_dispatch:
concurrency:
# Allow only one workflow per any non-`main` branch.
group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
cancel-in-progress: true
jobs:
test-postgres-client-libs:
# TODO: switch to gen2 runner, requires docker
runs-on: [ ubuntu-latest ]
env:
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
steps:
- name: Checkout
uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Install Poetry
uses: snok/install-poetry@v1
- name: Cache poetry deps
id: cache_poetry
uses: actions/cache@v3
with:
path: ~/.cache/pypoetry/virtualenvs
key: v1-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
- name: Install Python deps
shell: bash -euxo pipefail {0}
run: ./scripts/pysync
- name: Create Neon Project
id: create-neon-project
uses: ./.github/actions/neon-project-create
with:
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
- name: Run pytest
env:
REMOTE_ENV: 1
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
shell: bash -euxo pipefail {0}
run: |
# Test framework expects we have psql binary;
# but since we don't really need it in this test, let's mock it
mkdir -p "$POSTGRES_DISTRIB_DIR/v${DEFAULT_PG_VERSION}/bin" && touch "$POSTGRES_DISTRIB_DIR/v${DEFAULT_PG_VERSION}/bin/psql";
./scripts/pytest \
--junitxml=$TEST_OUTPUT/junit.xml \
--tb=short \
--verbose \
-m "remote_cluster" \
-rA "test_runner/pg_clients"
- name: Delete Neon Project
if: ${{ always() }}
uses: ./.github/actions/neon-project-delete
with:
project_id: ${{ steps.create-neon-project.outputs.project_id }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
# We use GitHub's action upload-artifact because `ubuntu-latest` doesn't have configured AWS CLI.
# It will be fixed after switching to gen2 runner
- name: Upload python test logs
if: always()
uses: actions/upload-artifact@v3
with:
retention-days: 7
name: python-test-pg_clients-${{ runner.os }}-stage-logs
path: ${{ env.TEST_OUTPUT }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: "Testing Postgres clients: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

View File

@@ -1,33 +0,0 @@
name: Create Release Branch
on:
schedule:
- cron: '0 10 * * 2'
jobs:
create_release_branch:
runs-on: [ubuntu-latest]
steps:
- name: Check out code
uses: actions/checkout@v3
with:
ref: main
- name: Get current date
id: date
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
- name: Create release branch
run: git checkout -b releases/${{ steps.date.outputs.date }}
- name: Push new branch
run: git push origin releases/${{ steps.date.outputs.date }}
- name: Create pull request into release
uses: thomaseizinger/create-pull-request@e3972219c86a56550fb70708d96800d8e24ba862 # 1.3.0
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
head: releases/${{ steps.date.outputs.date }}
base: release
title: Release ${{ steps.date.outputs.date }}

73
.github/workflows/testing.yml vendored Normal file
View File

@@ -0,0 +1,73 @@
name: Build and Test
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
regression-check:
strategy:
matrix:
# If we want to duplicate this job for different
# Rust toolchains (e.g. nightly or 1.37.0), add them here.
rust_toolchain: [stable]
os: [ubuntu-latest]
timeout-minutes: 30
name: run regression test suite
runs-on: ${{ matrix.os }}
steps:
- name: Checkout
uses: actions/checkout@v2
with:
submodules: true
fetch-depth: 2
- name: install rust toolchain ${{ matrix.rust_toolchain }}
uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: ${{ matrix.rust_toolchain }}
override: true
- name: Install postgres dependencies
run: |
sudo apt update
sudo apt install build-essential libreadline-dev zlib1g-dev flex bison libseccomp-dev
- name: Set pg revision for caching
id: pg_ver
run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres)
- name: Cache postgres build
id: cache_pg
uses: actions/cache@v2
with:
path: |
tmp_install/
key: ${{ runner.os }}-pg-${{ steps.pg_ver.outputs.pg_rev }}
- name: Build postgres
if: steps.cache_pg.outputs.cache-hit != 'true'
run: |
make postgres
- name: Cache cargo deps
id: cache_cargo
uses: actions/cache@v2
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
- name: Run cargo build
run: |
cargo build --workspace --bins --examples --tests
- name: Run cargo test
run: |
cargo test -- --nocapture --test-threads=1

17
.gitignore vendored
View File

@@ -1,20 +1,9 @@
/pg_install
/target
/tmp_check
/tmp_install
/tmp_check_cli
__pycache__/
test_output/
.vscode
.idea
/.neon
/integration_tests/.neon
# Coverage
*.profraw
*.profdata
*.key
*.crt
*.o
*.so
*.Po
/.zenith
/integration_tests/.zenith

12
.gitmodules vendored
View File

@@ -1,8 +1,4 @@
[submodule "vendor/postgres-v14"]
path = vendor/postgres-v14
url = https://github.com/neondatabase/postgres.git
branch = REL_14_STABLE_neon
[submodule "vendor/postgres-v15"]
path = vendor/postgres-v15
url = https://github.com/neondatabase/postgres.git
branch = REL_15_STABLE_neon
[submodule "vendor/postgres"]
path = vendor/postgres
url = https://github.com/zenithdb/postgres
branch = main

View File

@@ -1,11 +0,0 @@
/compute_tools/ @neondatabase/control-plane
/control_plane/ @neondatabase/compute @neondatabase/storage
/libs/pageserver_api/ @neondatabase/compute @neondatabase/storage
/libs/postgres_ffi/ @neondatabase/compute
/libs/remote_storage/ @neondatabase/storage
/libs/safekeeper_api/ @neondatabase/safekeepers
/pageserver/ @neondatabase/compute @neondatabase/storage
/pgxn/ @neondatabase/compute
/proxy/ @neondatabase/control-plane
/safekeeper/ @neondatabase/safekeepers
/vendor/ @neondatabase/compute

View File

@@ -11,15 +11,17 @@ than it was before.
## Submitting changes
1. Get at least one +1 on your PR before you push.
1. Make a PR for every change.
Even seemingly trivial patches can break things in surprising ways.
Use of common sense is OK. If you're only fixing a typo in a comment,
it's probably fine to just push it. But if in doubt, open a PR.
2. Get at least one +1 on your PR before you push.
For simple patches, it will only take a minute for someone to review
it.
2. Don't force push small changes after making the PR ready for review.
Doing so will force readers to re-read your entire PR, which will delay
the review process.
3. Always keep the CI green.
Do not push, if the CI failed on your PR. Even if you think it's not

20
COPYRIGHT Normal file
View File

@@ -0,0 +1,20 @@
This software is licensed under the Apache 2.0 License:
----------------------------------------------------------------------------
Copyright 2021 Zenith Labs, Inc
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
----------------------------------------------------------------------------
The PostgreSQL submodule in vendor/postgres is licensed under the
PostgreSQL license. See vendor/postgres/COPYRIGHT.

4606
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,228 +1,17 @@
[workspace]
members = [
"compute_tools",
"control_plane",
"pageserver",
"postgres_ffi",
"proxy",
"safekeeper",
"storage_broker",
"walkeeper",
"workspace_hack",
"trace",
"libs/*",
"zenith",
"zenith_metrics",
"zenith_utils",
]
[workspace.package]
edition = "2021"
license = "Apache-2.0"
## All dependency versions, used in the project
[workspace.dependencies]
anyhow = { version = "1.0", features = ["backtrace"] }
async-stream = "0.3"
async-trait = "0.1"
atty = "0.2.14"
aws-config = { version = "0.51.0", default-features = false, features=["rustls"] }
aws-sdk-s3 = "0.21.0"
aws-smithy-http = "0.51.0"
aws-types = "0.55"
base64 = "0.13.0"
bincode = "1.3"
bindgen = "0.65"
bstr = "1.0"
byteorder = "1.4"
bytes = "1.0"
chrono = { version = "0.4", default-features = false, features = ["clock"] }
clap = { version = "4.0", features = ["derive"] }
close_fds = "0.3.2"
comfy-table = "6.1"
const_format = "0.2"
crc32c = "0.6"
crossbeam-utils = "0.8.5"
either = "1.8"
enum-map = "2.4.2"
enumset = "1.0.12"
fail = "0.5.0"
fs2 = "0.4.3"
futures = "0.3"
futures-core = "0.3"
futures-util = "0.3"
git-version = "0.3"
hashbrown = "0.13"
hashlink = "0.8.1"
hex = "0.4"
hex-literal = "0.4"
hmac = "0.12.1"
hostname = "0.3.1"
humantime = "2.1"
humantime-serde = "1.1.1"
hyper = { version = "0.14", features = ["http2", "tcp", "runtime", "http1"]}
hyper-tungstenite = "0.9"
itertools = "0.10"
jsonwebtoken = "8"
libc = "0.2"
md5 = "0.7.0"
memoffset = "0.8"
nix = "0.26"
notify = "5.0.0"
num_cpus = "1.15"
num-traits = "0.2.15"
once_cell = "1.13"
opentelemetry = "0.18.0"
opentelemetry-otlp = { version = "0.11.0", default_features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
opentelemetry-semantic-conventions = "0.10.0"
parking_lot = "0.12"
pin-project-lite = "0.2"
prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
prost = "0.11"
rand = "0.8"
regex = "1.4"
reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"] }
reqwest-tracing = { version = "0.4.0", features = ["opentelemetry_0_18"] }
reqwest-middleware = "0.2.0"
routerify = "3"
rpds = "0.13"
rustls = "0.20"
rustls-pemfile = "1"
rustls-split = "0.3"
scopeguard = "1.1"
sentry = { version = "0.30", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1"
serde_with = "2.0"
sha2 = "0.10.2"
signal-hook = "0.3"
socket2 = "0.5"
strum = "0.24"
strum_macros = "0.24"
svg_fmt = "0.4.1"
sync_wrapper = "0.1.2"
tar = "0.4"
test-context = "0.1"
thiserror = "1.0"
tls-listener = { version = "0.6", features = ["rustls", "hyper-h1"] }
tokio = { version = "1.17", features = ["macros"] }
tokio-io-timeout = "1.2.0"
tokio-postgres-rustls = "0.9.0"
tokio-rustls = "0.23"
tokio-stream = "0.1"
tokio-util = { version = "0.7", features = ["io"] }
toml = "0.7"
toml_edit = "0.19"
tonic = {version = "0.9", features = ["tls", "tls-roots"]}
tracing = "0.1"
tracing-opentelemetry = "0.18.0"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
url = "2.2"
uuid = { version = "1.2", features = ["v4", "serde"] }
walkdir = "2.3.2"
webpki-roots = "0.23"
x509-parser = "0.15"
percent-encoding = "1.0"
## TODO replace this with tracing
env_logger = "0.10"
log = "0.4"
## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e", features = ["with-chrono-0_4", "array-impls"] }
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e", features = ["array-impls"] }
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e", features = ["array-impls"] }
tokio-tar = { git = "https://github.com/neondatabase/tokio-tar.git", rev="404df61437de0feef49ba2ccdbdd94eb8ad6e142" }
## Other git libraries
heapless = { default-features=false, features=[], git = "https://github.com/japaric/heapless.git", rev = "644653bf3b831c6bb4963be2de24804acf5e5001" } # upstream release pending
## Local libraries
compute_api = { version = "0.1", path = "./libs/compute_api/" }
consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" }
metrics = { version = "0.1", path = "./libs/metrics/" }
pageserver_api = { version = "0.1", path = "./libs/pageserver_api/" }
postgres_backend = { version = "0.1", path = "./libs/postgres_backend/" }
postgres_connection = { version = "0.1", path = "./libs/postgres_connection/" }
postgres_ffi = { version = "0.1", path = "./libs/postgres_ffi/" }
pq_proto = { version = "0.1", path = "./libs/pq_proto/" }
remote_storage = { version = "0.1", path = "./libs/remote_storage/" }
safekeeper_api = { version = "0.1", path = "./libs/safekeeper_api" }
storage_broker = { version = "0.1", path = "./storage_broker/" } # Note: main broker code is inside the binary crate, so linking with the library shouldn't be heavy.
tenant_size_model = { version = "0.1", path = "./libs/tenant_size_model/" }
tracing-utils = { version = "0.1", path = "./libs/tracing-utils/" }
utils = { version = "0.1", path = "./libs/utils/" }
## Common library dependency
workspace_hack = { version = "0.1", path = "./workspace_hack/" }
## Build dependencies
criterion = "0.4"
rcgen = "0.10"
rstest = "0.17"
tempfile = "3.4"
tonic-build = "0.9"
# This is only needed for proxy's tests.
# TODO: we should probably fork `tokio-postgres-rustls` instead.
[patch.crates-io]
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
################# Binary contents sections
[profile.release]
# This is useful for profiling and, to some extent, debug.
# Besides, debug info should not affect the performance.
debug = true
# disable debug symbols for all packages except this one to decrease binaries size
[profile.release.package."*"]
debug = false
[profile.release-line-debug]
inherits = "release"
debug = 1 # true = 2 = all symbols, 1 = line only
[profile.release-line-debug-lto]
inherits = "release"
debug = 1 # true = 2 = all symbols, 1 = line only
lto = true
[profile.release-line-debug-size]
inherits = "release"
debug = 1 # true = 2 = all symbols, 1 = line only
opt-level = "s"
[profile.release-line-debug-zize]
inherits = "release"
debug = 1 # true = 2 = all symbols, 1 = line only
opt-level = "z"
[profile.release-line-debug-size-lto]
inherits = "release"
debug = 1 # true = 2 = all symbols, 1 = line only
opt-level = "s"
lto = true
[profile.release-line-debug-zize-lto]
inherits = "release"
debug = 1 # true = 2 = all symbols, 1 = line only
opt-level = "z"
lto = true
[profile.release-no-debug]
inherits = "release"
debug = false # true = 2 = all symbols, 1 = line only
[profile.release-no-debug-size]
inherits = "release"
debug = false # true = 2 = all symbols, 1 = line only
opt-level = "s"
[profile.release-no-debug-zize]
inherits = "release"
debug = false # true = 2 = all symbols, 1 = line only
opt-level = "z"
[profile.release-no-debug-size-lto]
inherits = "release"
debug = false # true = 2 = all symbols, 1 = line only
opt-level = "s"
lto = true
[profile.release-no-debug-zize-lto]
inherits = "release"
debug = false # true = 2 = all symbols, 1 = line only
opt-level = "z"
lto = true

View File

@@ -1,90 +1,60 @@
### Creates a storage Docker image with postgres, pageserver, safekeeper and proxy binaries.
### The image itself is mainly used as a container for the binaries and for starting e2e tests with custom parameters.
### By default, the binaries inside the image have some mock parameters and can start, but are not intended to be used
### inside this image in the real deployments.
ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
ARG IMAGE=rust
ARG TAG=pinned
# Build Postgres
FROM $REPOSITORY/$IMAGE:$TAG AS pg-build
WORKDIR /home/nonroot
COPY --chown=nonroot vendor/postgres-v14 vendor/postgres-v14
COPY --chown=nonroot vendor/postgres-v15 vendor/postgres-v15
COPY --chown=nonroot pgxn pgxn
COPY --chown=nonroot Makefile Makefile
COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh
ENV BUILD_TYPE release
RUN set -e \
&& mold -run make -j $(nproc) -s neon-pg-ext \
&& rm -rf pg_install/build \
&& tar -C pg_install -czf /home/nonroot/postgres_install.tar.gz .
# Build neon binaries
FROM $REPOSITORY/$IMAGE:$TAG AS build
WORKDIR /home/nonroot
ARG GIT_VERSION=local
# Enable https://github.com/paritytech/cachepot to cache Rust crates' compilation results in Docker builds.
# Set up cachepot to use an AWS S3 bucket for cache results, to reuse it between `docker build` invocations.
# cachepot falls back to local filesystem if S3 is misconfigured, not failing the build
ARG RUSTC_WRAPPER=cachepot
ENV AWS_REGION=eu-central-1
ENV CACHEPOT_S3_KEY_PREFIX=cachepot
ARG CACHEPOT_BUCKET=neon-github-dev
#ARG AWS_ACCESS_KEY_ID
#ARG AWS_SECRET_ACCESS_KEY
COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server
COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server
COPY --chown=nonroot . .
# Show build caching stats to check if it was used in the end.
# Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
RUN set -e \
&& mold -run cargo build --bin pageserver --bin pageserver_binutils --bin draw_timeline_dir --bin safekeeper --bin storage_broker --bin proxy --locked --release \
&& cachepot -s
# Build final image
#
FROM debian:bullseye-slim
# Docker image for console integration testing.
#
#
# Build Postgres separately --- this layer will be rebuilt only if one of
# mentioned paths will get any changes.
#
FROM zenithdb/build:buster AS pg-build
WORKDIR /zenith
COPY ./vendor/postgres vendor/postgres
COPY ./Makefile Makefile
ENV BUILD_TYPE release
RUN make -j $(getconf _NPROCESSORS_ONLN) -s postgres
RUN rm -rf postgres_install/build
#
# Build zenith binaries
#
# TODO: build cargo deps as separate layer. We used cargo-chef before but that was
# net time waste in a lot of cases. Copying Cargo.lock with empty lib.rs should do the work.
#
FROM zenithdb/build:buster AS build
WORKDIR /zenith
COPY --from=pg-build /zenith/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
COPY . .
RUN cargo build --release
#
# Copy binaries to resulting image.
#
FROM debian:buster-slim
WORKDIR /data
RUN set -e \
&& apt update \
&& apt install -y \
libreadline-dev \
libseccomp-dev \
openssl \
ca-certificates \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
&& useradd -d /data neon \
&& chown -R neon:neon /data
RUN apt-get update && apt-get -yq install libreadline-dev libseccomp-dev openssl ca-certificates && \
mkdir zenith_install
COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver_binutils /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/draw_timeline_dir /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/safekeeper /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_broker /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy /usr/local/bin
COPY --from=build /zenith/target/release/pageserver /usr/local/bin
COPY --from=build /zenith/target/release/safekeeper /usr/local/bin
COPY --from=build /zenith/target/release/proxy /usr/local/bin
COPY --from=pg-build /zenith/tmp_install postgres_install
COPY docker-entrypoint.sh /docker-entrypoint.sh
COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/
COPY --from=pg-build /home/nonroot/pg_install/v15 /usr/local/v15/
COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/
# Remove build artifacts (~ 500 MB)
RUN rm -rf postgres_install/build && \
# 'Install' Postgres binaries locally
cp -r postgres_install/* /usr/local/ && \
# Prepare an archive of Postgres binaries (should be around 11 MB)
# and keep it inside container for an ease of deploy pipeline.
cd postgres_install && tar -czf /data/postgres_install.tar.gz . && cd .. && \
rm -rf postgres_install
# By default, pageserver uses `.neon/` working directory in WORKDIR, so create one and fill it with the dummy config.
# Now, when `docker run ... pageserver` is run, it can start without errors, yet will have some default dummy values.
RUN mkdir -p /data/.neon/ && chown -R neon:neon /data/.neon/ \
&& /usr/local/bin/pageserver -D /data/.neon/ --init \
-c "id=1234" \
-c "broker_endpoint='http://storage_broker:50051'" \
-c "pg_distrib_dir='/usr/local/'" \
-c "listen_pg_addr='0.0.0.0:6400'" \
-c "listen_http_addr='0.0.0.0:9898'"
RUN useradd -d /data zenith && chown -R zenith:zenith /data
VOLUME ["/data"]
USER neon
USER zenith
EXPOSE 6400
EXPOSE 9898
ENTRYPOINT ["/docker-entrypoint.sh"]
CMD ["pageserver"]

95
Dockerfile.alpine Normal file
View File

@@ -0,0 +1,95 @@
#
# Docker image for console integration testing.
#
# We may also reuse it in CI to unify installation process and as a general binaries building
# tool for production servers.
#
# Dynamic linking is used for librocksdb and libstdc++ bacause librocksdb-sys calls
# bindgen with "dynamic" feature flag. This also prevents usage of dockerhub alpine-rust
# images which are statically linked and have guards against any dlopen. I would rather
# prefer all static binaries so we may change the way librocksdb-sys builds or wait until
# we will have our own storage and drop rockdb dependency.
#
# Cargo-chef is used to separate dependencies building from main binaries building. This
# way `docker build` will download and install dependencies only of there are changes to
# out Cargo.toml files.
#
#
# build postgres separately -- this layer will be rebuilt only if one of
# mentioned paths will get any changes
#
FROM alpine:3.13 as pg-build
RUN apk add --update clang llvm compiler-rt compiler-rt-static lld musl-dev binutils \
make bison flex readline-dev zlib-dev perl linux-headers libseccomp-dev
WORKDIR zenith
COPY ./vendor/postgres vendor/postgres
COPY ./Makefile Makefile
# Build using clang and lld
RUN CC='clang' LD='lld' CFLAGS='-fuse-ld=lld --rtlib=compiler-rt' make postgres -j4
#
# Calculate cargo dependencies.
# This will always run, but only generate recipe.json with list of dependencies without
# installing them.
#
FROM alpine:20210212 as cargo-deps-inspect
RUN apk add --update rust cargo
RUN cargo install cargo-chef
WORKDIR zenith
COPY . .
RUN cargo chef prepare --recipe-path recipe.json
#
# Build cargo dependencies.
# This temp cantainner would be build only if recipe.json was changed.
#
FROM alpine:20210212 as deps-build
RUN apk add --update rust cargo openssl-dev clang build-base
# rust-rocksdb can be built against system-wide rocksdb -- that saves about
# 10 minutes during build. Rocksdb apk package is in testing now, but use it
# anyway. In case of any troubles we can download and build rocksdb here manually
# (to cache it as a docker layer).
RUN apk --no-cache --update --repository https://dl-cdn.alpinelinux.org/alpine/edge/testing add rocksdb-dev
WORKDIR zenith
COPY --from=pg-build /zenith/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
COPY --from=cargo-deps-inspect /root/.cargo/bin/cargo-chef /root/.cargo/bin/
COPY --from=cargo-deps-inspect /zenith/recipe.json recipe.json
RUN ROCKSDB_LIB_DIR=/usr/lib/ cargo chef cook --release --recipe-path recipe.json
#
# Build zenith binaries
#
FROM alpine:20210212 as build
RUN apk add --update rust cargo openssl-dev clang build-base
RUN apk --no-cache --update --repository https://dl-cdn.alpinelinux.org/alpine/edge/testing add rocksdb-dev
WORKDIR zenith
COPY . .
# Copy cached dependencies
COPY --from=pg-build /zenith/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
COPY --from=deps-build /zenith/target target
COPY --from=deps-build /root/.cargo /root/.cargo
RUN cargo build --release
#
# Copy binaries to resulting image.
# build-base hare to provide libstdc++ (it will also bring gcc, but leave it this way until we figure
# out how to statically link rocksdb or avoid it at all).
#
FROM alpine:3.13
RUN apk add --update openssl build-base libseccomp-dev
RUN apk --no-cache --update --repository https://dl-cdn.alpinelinux.org/alpine/edge/testing add rocksdb
COPY --from=build /zenith/target/release/pageserver /usr/local/bin
COPY --from=build /zenith/target/release/safekeeper /usr/local/bin
COPY --from=build /zenith/target/release/proxy /usr/local/bin
COPY --from=pg-build /zenith/tmp_install /usr/local
COPY docker-entrypoint.sh /docker-entrypoint.sh
RUN addgroup zenith && adduser -h /data -D -G zenith zenith
VOLUME ["/data"]
WORKDIR /data
USER zenith
EXPOSE 6400
ENTRYPOINT ["/docker-entrypoint.sh"]
CMD ["pageserver"]

15
Dockerfile.build Normal file
View File

@@ -0,0 +1,15 @@
#
# Image with all the required dependencies to build https://github.com/zenithdb/zenith
# and Postgres from https://github.com/zenithdb/postgres
# Also includes some rust development and build tools.
#
FROM rust:slim-buster
WORKDIR /zenith
# Install postgres and zenith build dependencies
# clang is for rocksdb
RUN apt-get update && apt-get -yq install automake libtool build-essential bison flex libreadline-dev zlib1g-dev libxml2-dev \
libseccomp-dev pkg-config libssl-dev clang
# Install rust tools
RUN rustup component add clippy && cargo install cargo-audit

View File

@@ -1,600 +0,0 @@
ARG PG_VERSION
ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
ARG IMAGE=rust
ARG TAG=pinned
#########################################################################################
#
# Layer "build-deps"
#
#########################################################################################
FROM debian:bullseye-slim AS build-deps
RUN apt update && \
apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \
zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libssl-dev \
libicu-dev libxslt1-dev liblz4-dev libzstd-dev
#########################################################################################
#
# Layer "pg-build"
# Build Postgres from the neon postgres repository.
#
#########################################################################################
FROM build-deps AS pg-build
ARG PG_VERSION
COPY vendor/postgres-${PG_VERSION} postgres
RUN cd postgres && \
export CONFIGURE_CMD="./configure CFLAGS='-O2 -g3' --enable-debug --with-openssl --with-uuid=ossp \
--with-icu --with-libxml --with-libxslt --with-lz4" && \
if [ "${PG_VERSION}" != "v14" ]; then \
# zstd is available only from PG15
export CONFIGURE_CMD="${CONFIGURE_CMD} --with-zstd"; \
fi && \
eval $CONFIGURE_CMD && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \
# Install headers
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install && \
# Enable some of contrib extensions
echo 'trusted = true' >> /usr/local/pgsql/share/extension/autoinc.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/bloom.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/earthdistance.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/insert_username.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/intagg.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/moddatetime.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_stat_statements.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrowlocks.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgstattuple.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/refint.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/xml2.control
#########################################################################################
#
# Layer "postgis-build"
# Build PostGIS from the upstream PostGIS mirror.
#
#########################################################################################
FROM build-deps AS postgis-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt update && \
apt install -y cmake gdal-bin libboost-dev libboost-thread-dev libboost-filesystem-dev \
libboost-system-dev libboost-iostreams-dev libboost-program-options-dev libboost-timer-dev \
libcgal-dev libgdal-dev libgmp-dev libmpfr-dev libopenscenegraph-dev libprotobuf-c-dev \
protobuf-c-compiler xsltproc
# SFCGAL > 1.3 requires CGAL > 5.2, Bullseye's libcgal-dev is 5.2
RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \
echo "4e39b3b2adada6254a7bdba6d297bb28e1a9835a9f879b74f37e2dab70203232 SFCGAL.tar.gz" | sha256sum --check && \
mkdir sfcgal-src && cd sfcgal-src && tar xvzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
cmake . && make -j $(getconf _NPROCESSORS_ONLN) && \
DESTDIR=/sfcgal make install -j $(getconf _NPROCESSORS_ONLN) && \
make clean && cp -R /sfcgal/* /
ENV PATH "/usr/local/pgsql/bin:$PATH"
RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.2.tar.gz -O postgis.tar.gz && \
echo "9a2a219da005a1730a39d1959a1c7cec619b1efb009b65be80ffc25bad299068 postgis.tar.gz" | sha256sum --check && \
mkdir postgis-src && cd postgis-src && tar xvzf ../postgis.tar.gz --strip-components=1 -C . && \
./autogen.sh && \
./configure --with-sfcgal=/usr/local/bin/sfcgal-config && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
cd extensions/postgis && \
make clean && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_raster.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_sfcgal.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_tiger_geocoder.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_topology.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/address_standardizer.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/address_standardizer_data_us.control
RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \
echo "cac297c07d34460887c4f3b522b35c470138760fe358e351ad1db4edb6ee306e pgrouting.tar.gz" | sha256sum --check && \
mkdir pgrouting-src && cd pgrouting-src && tar xvzf ../pgrouting.tar.gz --strip-components=1 -C . && \
mkdir build && \
cd build && \
cmake .. && \
make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrouting.control
#########################################################################################
#
# Layer "plv8-build"
# Build plv8
#
#########################################################################################
FROM build-deps AS plv8-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt update && \
apt install -y ninja-build python3-dev libncurses5 binutils clang
RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.5.tar.gz -O plv8.tar.gz && \
echo "1e108d5df639e4c189e1c5bdfa2432a521c126ca89e7e5a969d46899ca7bf106 plv8.tar.gz" | sha256sum --check && \
mkdir plv8-src && cd plv8-src && tar xvzf ../plv8.tar.gz --strip-components=1 -C . && \
export PATH="/usr/local/pgsql/bin:$PATH" && \
make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) install && \
rm -rf /plv8-* && \
find /usr/local/pgsql/ -name "plv8-*.so" | xargs strip && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plcoffee.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plls.control
#########################################################################################
#
# Layer "h3-pg-build"
# Build h3_pg
#
#########################################################################################
FROM build-deps AS h3-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
# packaged cmake is too old
RUN wget https://github.com/Kitware/CMake/releases/download/v3.24.2/cmake-3.24.2-linux-x86_64.sh \
-q -O /tmp/cmake-install.sh \
&& echo "739d372726cb23129d57a539ce1432453448816e345e1545f6127296926b6754 /tmp/cmake-install.sh" | sha256sum --check \
&& chmod u+x /tmp/cmake-install.sh \
&& /tmp/cmake-install.sh --skip-license --prefix=/usr/local/ \
&& rm /tmp/cmake-install.sh
RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \
echo "ec99f1f5974846bde64f4513cf8d2ea1b8d172d2218ab41803bf6a63532272bc h3.tar.gz" | sha256sum --check && \
mkdir h3-src && cd h3-src && tar xvzf ../h3.tar.gz --strip-components=1 -C . && \
mkdir build && cd build && \
cmake .. -DCMAKE_BUILD_TYPE=Release && \
make -j $(getconf _NPROCESSORS_ONLN) && \
DESTDIR=/h3 make install && \
cp -R /h3/usr / && \
rm -rf build
RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.2.tar.gz -O h3-pg.tar.gz && \
echo "c135aa45999b2ad1326d2537c1cadef96d52660838e4ca371706c08fdea1a956 h3-pg.tar.gz" | sha256sum --check && \
mkdir h3-pg-src && cd h3-pg-src && tar xvzf ../h3-pg.tar.gz --strip-components=1 -C . && \
export PATH="/usr/local/pgsql/bin:$PATH" && \
make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3_postgis.control
#########################################################################################
#
# Layer "unit-pg-build"
# compile unit extension
#
#########################################################################################
FROM build-deps AS unit-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -O postgresql-unit.tar.gz && \
echo "411d05beeb97e5a4abf17572bfcfbb5a68d98d1018918feff995f6ee3bb03e79 postgresql-unit.tar.gz" | sha256sum --check && \
mkdir postgresql-unit-src && cd postgresql-unit-src && tar xvzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
# unit extension's "create extension" script relies on absolute install path to fill some reference tables.
# We move the extension from '/usr/local/pgsql/' to '/usr/local/' after it is build. So we need to adjust the path.
# This one-liner removes pgsql/ part of the path.
# NOTE: Other extensions that rely on MODULEDIR variable after building phase will need the same fix.
find /usr/local/pgsql/share/extension/ -name "unit*.sql" -print0 | xargs -0 sed -i "s|pgsql/||g" && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/unit.control
#########################################################################################
#
# Layer "vector-pg-build"
# compile pgvector extension
#
#########################################################################################
FROM build-deps AS vector-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.4.0.tar.gz -O pgvector.tar.gz && \
echo "b76cf84ddad452cc880a6c8c661d137ddd8679c000a16332f4f03ecf6e10bcc8 pgvector.tar.gz" | sha256sum --check && \
mkdir pgvector-src && cd pgvector-src && tar xvzf ../pgvector.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/vector.control
#########################################################################################
#
# Layer "pgjwt-pg-build"
# compile pgjwt extension
#
#########################################################################################
FROM build-deps AS pgjwt-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
# 9742dab1b2f297ad3811120db7b21451bca2d3c9 made on 13/11/2021
RUN wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b21451bca2d3c9.tar.gz -O pgjwt.tar.gz && \
echo "cfdefb15007286f67d3d45510f04a6a7a495004be5b3aecb12cda667e774203f pgjwt.tar.gz" | sha256sum --check && \
mkdir pgjwt-src && cd pgjwt-src && tar xvzf ../pgjwt.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgjwt.control
#########################################################################################
#
# Layer "hypopg-pg-build"
# compile hypopg extension
#
#########################################################################################
FROM build-deps AS hypopg-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.3.1.tar.gz -O hypopg.tar.gz && \
echo "e7f01ee0259dc1713f318a108f987663d60f3041948c2ada57a94b469565ca8e hypopg.tar.gz" | sha256sum --check && \
mkdir hypopg-src && cd hypopg-src && tar xvzf ../hypopg.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/hypopg.control
#########################################################################################
#
# Layer "pg-hashids-pg-build"
# compile pg_hashids extension
#
#########################################################################################
FROM build-deps AS pg-hashids-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \
echo "74576b992d9277c92196dd8d816baa2cc2d8046fe102f3dcd7f3c3febed6822a pg_hashids.tar.gz" | sha256sum --check && \
mkdir pg_hashids-src && cd pg_hashids-src && tar xvzf ../pg_hashids.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_hashids.control
#########################################################################################
#
# Layer "rum-pg-build"
# compile rum extension
#
#########################################################################################
FROM build-deps AS rum-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O rum.tar.gz && \
echo "6ab370532c965568df6210bd844ac6ba649f53055e48243525b0b7e5c4d69a7d rum.tar.gz" | sha256sum --check && \
mkdir rum-src && cd rum-src && tar xvzf ../rum.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/rum.control
#########################################################################################
#
# Layer "pgtap-pg-build"
# compile pgTAP extension
#
#########################################################################################
FROM build-deps AS pgtap-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgtap.tar.gz && \
echo "9c7c3de67ea41638e14f06da5da57bac6f5bd03fea05c165a0ec862205a5c052 pgtap.tar.gz" | sha256sum --check && \
mkdir pgtap-src && cd pgtap-src && tar xvzf ../pgtap.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgtap.control
#########################################################################################
#
# Layer "ip4r-pg-build"
# compile ip4r extension
#
#########################################################################################
FROM build-deps AS ip4r-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.1.tar.gz -O ip4r.tar.gz && \
echo "78b9f0c1ae45c22182768fe892a32d533c82281035e10914111400bf6301c726 ip4r.tar.gz" | sha256sum --check && \
mkdir ip4r-src && cd ip4r-src && tar xvzf ../ip4r.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/ip4r.control
#########################################################################################
#
# Layer "prefix-pg-build"
# compile Prefix extension
#
#########################################################################################
FROM build-deps AS prefix-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.9.tar.gz -O prefix.tar.gz && \
echo "38d30a08d0241a8bbb8e1eb8f0152b385051665a8e621c8899e7c5068f8b511e prefix.tar.gz" | sha256sum --check && \
mkdir prefix-src && cd prefix-src && tar xvzf ../prefix.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/prefix.control
#########################################################################################
#
# Layer "hll-pg-build"
# compile hll extension
#
#########################################################################################
FROM build-deps AS hll-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.17.tar.gz -O hll.tar.gz && \
echo "9a18288e884f197196b0d29b9f178ba595b0dfc21fbf7a8699380e77fa04c1e9 hll.tar.gz" | sha256sum --check && \
mkdir hll-src && cd hll-src && tar xvzf ../hll.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/hll.control
#########################################################################################
#
# Layer "plpgsql-check-pg-build"
# compile plpgsql_check extension
#
#########################################################################################
FROM build-deps AS plpgsql-check-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.3.2.tar.gz -O plpgsql_check.tar.gz && \
echo "9d81167c4bbeb74eebf7d60147b21961506161addc2aee537f95ad8efeae427b plpgsql_check.tar.gz" | sha256sum --check && \
mkdir plpgsql_check-src && cd plpgsql_check-src && tar xvzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plpgsql_check.control
#########################################################################################
#
# Layer "timescaledb-pg-build"
# compile timescaledb extension
#
#########################################################################################
FROM build-deps AS timescaledb-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH "/usr/local/pgsql/bin:$PATH"
RUN apt-get update && \
apt-get install -y cmake && \
wget https://github.com/timescale/timescaledb/archive/refs/tags/2.10.1.tar.gz -O timescaledb.tar.gz && \
echo "6fca72a6ed0f6d32d2b3523951ede73dc5f9b0077b38450a029a5f411fdb8c73 timescaledb.tar.gz" | sha256sum --check && \
mkdir timescaledb-src && cd timescaledb-src && tar xvzf ../timescaledb.tar.gz --strip-components=1 -C . && \
./bootstrap -DSEND_TELEMETRY_DEFAULT:BOOL=OFF -DUSE_TELEMETRY:BOOL=OFF -DAPACHE_ONLY:BOOL=ON && \
cd build && \
make -j $(getconf _NPROCESSORS_ONLN) && \
make install -j $(getconf _NPROCESSORS_ONLN) && \
echo "trusted = true" >> /usr/local/pgsql/share/extension/timescaledb.control
#########################################################################################
#
# Layer "pg-hint-plan-pg-build"
# compile pg_hint_plan extension
#
#########################################################################################
FROM build-deps AS pg-hint-plan-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ARG PG_VERSION
ENV PATH "/usr/local/pgsql/bin:$PATH"
RUN case "${PG_VERSION}" in \
"v14") \
export PG_HINT_PLAN_VERSION=14_1_4_1 \
export PG_HINT_PLAN_CHECKSUM=c3501becf70ead27f70626bce80ea401ceac6a77e2083ee5f3ff1f1444ec1ad1 \
;; \
"v15") \
export PG_HINT_PLAN_VERSION=15_1_5_0 \
export PG_HINT_PLAN_CHECKSUM=564cbbf4820973ffece63fbf76e3c0af62c4ab23543142c7caaa682bc48918be \
;; \
*) \
echo "Export the valid PG_HINT_PLAN_VERSION variable" && exit 1 \
;; \
esac && \
wget https://github.com/ossc-db/pg_hint_plan/archive/refs/tags/REL${PG_HINT_PLAN_VERSION}.tar.gz -O pg_hint_plan.tar.gz && \
echo "${PG_HINT_PLAN_CHECKSUM} pg_hint_plan.tar.gz" | sha256sum --check && \
mkdir pg_hint_plan-src && cd pg_hint_plan-src && tar xvzf ../pg_hint_plan.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \
make install -j $(getconf _NPROCESSORS_ONLN) && \
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_hint_plan.control
#########################################################################################
#
# Layer "rust extensions"
# This layer is used to build `pgx` deps
#
#########################################################################################
FROM build-deps AS rust-extensions-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt-get update && \
apt-get install -y curl libclang-dev cmake && \
useradd -ms /bin/bash nonroot -b /home
ENV HOME=/home/nonroot
ENV PATH="/home/nonroot/.cargo/bin:/usr/local/pgsql/bin/:$PATH"
USER nonroot
WORKDIR /home/nonroot
ARG PG_VERSION
RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
chmod +x rustup-init && \
./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
rm rustup-init && \
cargo install --locked --version 0.7.3 cargo-pgx && \
/bin/bash -c 'cargo pgx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'
USER root
#########################################################################################
#
# Layer "pg-jsonschema-pg-build"
# Compile "pg_jsonschema" extension
#
#########################################################################################
FROM rust-extensions-build AS pg-jsonschema-pg-build
# caeab60d70b2fd3ae421ec66466a3abbb37b7ee6 made on 06/03/2023
# there is no release tag yet, but we need it due to the superuser fix in the control file, switch to git tag after release >= 0.1.5
RUN wget https://github.com/supabase/pg_jsonschema/archive/caeab60d70b2fd3ae421ec66466a3abbb37b7ee6.tar.gz -O pg_jsonschema.tar.gz && \
echo "54129ce2e7ee7a585648dbb4cef6d73f795d94fe72f248ac01119992518469a4 pg_jsonschema.tar.gz" | sha256sum --check && \
mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xvzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \
sed -i 's/pgx = "0.7.1"/pgx = { version = "0.7.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
cargo pgx install --release && \
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_jsonschema.control
#########################################################################################
#
# Layer "pg-graphql-pg-build"
# Compile "pg_graphql" extension
#
#########################################################################################
FROM rust-extensions-build AS pg-graphql-pg-build
# b4988843647450a153439be367168ed09971af85 made on 22/02/2023 (from remove-pgx-contrib-spiext branch)
# Currently pgx version bump to >= 0.7.2 causes "call to unsafe function" compliation errors in
# pgx-contrib-spiext. There is a branch that removes that dependency, so use it. It is on the
# same 1.1 version we've used before.
RUN wget https://github.com/yrashk/pg_graphql/archive/b4988843647450a153439be367168ed09971af85.tar.gz -O pg_graphql.tar.gz && \
echo "0c7b0e746441b2ec24187d0e03555faf935c2159e2839bddd14df6dafbc8c9bd pg_graphql.tar.gz" | sha256sum --check && \
mkdir pg_graphql-src && cd pg_graphql-src && tar xvzf ../pg_graphql.tar.gz --strip-components=1 -C . && \
sed -i 's/pgx = "~0.7.1"/pgx = { version = "0.7.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
sed -i 's/pgx-tests = "~0.7.1"/pgx-tests = "0.7.3"/g' Cargo.toml && \
cargo pgx install --release && \
# it's needed to enable extension because it uses untrusted C language
sed -i 's/superuser = false/superuser = true/g' /usr/local/pgsql/share/extension/pg_graphql.control && \
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_graphql.control
#########################################################################################
#
# Layer "pg-tiktoken-build"
# Compile "pg_tiktoken" extension
#
#########################################################################################
FROM rust-extensions-build AS pg-tiktoken-pg-build
# 801f84f08c6881c8aa30f405fafbf00eec386a72 made on 10/03/2023
RUN wget https://github.com/kelvich/pg_tiktoken/archive/801f84f08c6881c8aa30f405fafbf00eec386a72.tar.gz -O pg_tiktoken.tar.gz && \
echo "52f60ac800993a49aa8c609961842b611b6b1949717b69ce2ec9117117e16e4a pg_tiktoken.tar.gz" | sha256sum --check && \
mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xvzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \
cargo pgx install --release && \
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_tiktoken.control
#########################################################################################
#
# Layer "neon-pg-ext-build"
# compile neon extensions
#
#########################################################################################
FROM build-deps AS neon-pg-ext-build
COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=postgis-build /sfcgal/* /
COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=h3-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=h3-pg-build /h3/usr /
COPY --from=unit-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=vector-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pgjwt-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg-jsonschema-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg-graphql-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg-tiktoken-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=hypopg-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg-hashids-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=rum-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pgtap-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=ip4r-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=prefix-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=hll-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=plpgsql-check-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=timescaledb-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg-hint-plan-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY pgxn/ pgxn/
RUN make -j $(getconf _NPROCESSORS_ONLN) \
PG_CONFIG=/usr/local/pgsql/bin/pg_config \
-C pgxn/neon \
-s install && \
make -j $(getconf _NPROCESSORS_ONLN) \
PG_CONFIG=/usr/local/pgsql/bin/pg_config \
-C pgxn/neon_utils \
-s install
#########################################################################################
#
# Compile and run the Neon-specific `compute_ctl` binary
#
#########################################################################################
FROM $REPOSITORY/$IMAGE:$TAG AS compute-tools
USER nonroot
# Copy entire project to get Cargo.* files with proper dependencies for the whole project
COPY --chown=nonroot . .
RUN cd compute_tools && cargo build --locked --profile release-line-debug-size-lto
#########################################################################################
#
# Clean up postgres folder before inclusion
#
#########################################################################################
FROM neon-pg-ext-build AS postgres-cleanup-layer
COPY --from=neon-pg-ext-build /usr/local/pgsql /usr/local/pgsql
# Remove binaries from /bin/ that we won't use (or would manually copy & install otherwise)
RUN cd /usr/local/pgsql/bin && rm ecpg raster2pgsql shp2pgsql pgtopo_export pgtopo_import pgsql2shp
# Remove headers that we won't need anymore - we've completed installation of all extensions
RUN rm -r /usr/local/pgsql/include
# Remove static postgresql libraries - all compilation is finished, so we
# can now remove these files - they must be included in other binaries by now
# if they were to be used by other libraries.
RUN rm /usr/local/pgsql/lib/lib*.a
#########################################################################################
#
# Final layer
# Put it all together into the final image
#
#########################################################################################
FROM debian:bullseye-slim
# Add user postgres
RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
echo "postgres:test_console_pass" | chpasswd && \
mkdir /var/db/postgres/compute && mkdir /var/db/postgres/specs && \
chown -R postgres:postgres /var/db/postgres && \
chmod 0750 /var/db/postgres/compute && \
echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig && \
# create folder for file cache
mkdir -p -m 777 /neon/cache
COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
# Install:
# libreadline8 for psql
# libicu67, locales for collations (including ICU and plpgsql_check)
# liblz4-1 for lz4
# libossp-uuid16 for extension ossp-uuid
# libgeos, libgdal, libsfcgal1, libproj and libprotobuf-c1 for PostGIS
# libxml2, libxslt1.1 for xml2
# libzstd1 for zstd
RUN apt update && \
apt install --no-install-recommends -y \
gdb \
locales \
libicu67 \
liblz4-1 \
libreadline8 \
libossp-uuid16 \
libgeos-c1v5 \
libgdal28 \
libproj19 \
libprotobuf-c1 \
libsfcgal1 \
libxml2 \
libxslt1.1 \
libzstd1 \
procps && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
ENV LANG en_US.utf8
USER postgres
ENTRYPOINT ["/usr/local/bin/compute_ctl"]

View File

@@ -1,29 +0,0 @@
# First transient image to build compute_tools binaries
# NB: keep in sync with rust image version in .github/workflows/build_and_test.yml
ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
ARG IMAGE=rust
ARG TAG=pinned
FROM $REPOSITORY/$IMAGE:$TAG AS rust-build
WORKDIR /home/nonroot
# Enable https://github.com/paritytech/cachepot to cache Rust crates' compilation results in Docker builds.
# Set up cachepot to use an AWS S3 bucket for cache results, to reuse it between `docker build` invocations.
# cachepot falls back to local filesystem if S3 is misconfigured, not failing the build.
ARG RUSTC_WRAPPER=cachepot
ENV AWS_REGION=eu-central-1
ENV CACHEPOT_S3_KEY_PREFIX=cachepot
ARG CACHEPOT_BUCKET=neon-github-dev
#ARG AWS_ACCESS_KEY_ID
#ARG AWS_SECRET_ACCESS_KEY
COPY . .
RUN set -e \
&& mold -run cargo build -p compute_tools --locked --release \
&& cachepot -s
# Final image that only has one binary
FROM debian:bullseye-slim
COPY --from=rust-build /home/nonroot/target/release/compute_ctl /usr/local/bin/compute_ctl

View File

@@ -1,70 +0,0 @@
# Note: this file *mostly* just builds on Dockerfile.compute-node
ARG SRC_IMAGE
ARG VM_INFORMANT_VERSION=v0.1.14
# on libcgroup update, make sure to check bootstrap.sh for changes
ARG LIBCGROUP_VERSION=v2.0.3
# Pull VM informant, to copy from later
FROM neondatabase/vm-informant:$VM_INFORMANT_VERSION as informant
# Build cgroup-tools
#
# At time of writing (2023-03-14), debian bullseye has a version of cgroup-tools (technically
# libcgroup) that doesn't support cgroup v2 (version 0.41-11). Unfortunately, the vm-informant
# requires cgroup v2, so we'll build cgroup-tools ourselves.
FROM debian:bullseye-slim as libcgroup-builder
ARG LIBCGROUP_VERSION
RUN set -exu \
&& apt update \
&& apt install --no-install-recommends -y \
git \
ca-certificates \
automake \
cmake \
make \
gcc \
byacc \
flex \
libtool \
libpam0g-dev \
&& git clone --depth 1 -b $LIBCGROUP_VERSION https://github.com/libcgroup/libcgroup \
&& INSTALL_DIR="/libcgroup-install" \
&& mkdir -p "$INSTALL_DIR/bin" "$INSTALL_DIR/include" \
&& cd libcgroup \
# extracted from bootstrap.sh, with modified flags:
&& (test -d m4 || mkdir m4) \
&& autoreconf -fi \
&& rm -rf autom4te.cache \
&& CFLAGS="-O3" ./configure --prefix="$INSTALL_DIR" --sysconfdir=/etc --localstatedir=/var --enable-opaque-hierarchy="name=systemd" \
# actually build the thing...
&& make install
# Combine, starting from non-VM compute node image.
FROM $SRC_IMAGE as base
# Temporarily set user back to root so we can run adduser, set inittab
USER root
RUN adduser vm-informant --disabled-password --no-create-home
RUN set -e \
&& rm -f /etc/inittab \
&& touch /etc/inittab
RUN set -e \
&& echo "::sysinit:cgconfigparser -l /etc/cgconfig.conf -s 1664" >> /etc/inittab \
&& CONNSTR="dbname=neondb user=cloud_admin sslmode=disable" \
&& ARGS="--auto-restart --cgroup=neon-postgres --pgconnstr=\"$CONNSTR\"" \
&& echo "::respawn:su vm-informant -c '/usr/local/bin/vm-informant $ARGS'" >> /etc/inittab
USER postgres
ADD vm-cgconfig.conf /etc/cgconfig.conf
COPY --from=informant /usr/bin/vm-informant /usr/local/bin/vm-informant
COPY --from=libcgroup-builder /libcgroup-install/bin/* /usr/bin/
COPY --from=libcgroup-builder /libcgroup-install/lib/* /usr/lib/
COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/
ENTRYPOINT ["/usr/sbin/cgexec", "-g", "*:neon-postgres", "/usr/local/bin/compute_ctl"]

190
Makefile
View File

@@ -1,7 +1,10 @@
ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
# Where to install Postgres, default is ./pg_install, maybe useful for package managers
POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install/
# Seccomp BPF is only available for Linux
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux)
SECCOMP = --with-libseccomp
else
SECCOMP =
endif
#
# We differentiate between release / debug build types using the BUILD_TYPE
@@ -9,37 +12,17 @@ POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install/
#
BUILD_TYPE ?= debug
ifeq ($(BUILD_TYPE),release)
PG_CONFIGURE_OPTS = --enable-debug --with-openssl
PG_CONFIGURE_OPTS = --enable-debug
PG_CFLAGS = -O2 -g3 $(CFLAGS)
# Unfortunately, `--profile=...` is a nightly feature
CARGO_BUILD_FLAGS += --release
else ifeq ($(BUILD_TYPE),debug)
PG_CONFIGURE_OPTS = --enable-debug --with-openssl --enable-cassert --enable-depend
PG_CONFIGURE_OPTS = --enable-debug --enable-cassert --enable-depend
PG_CFLAGS = -O0 -g3 $(CFLAGS)
else
$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
$(error Bad build type `$(BUILD_TYPE)', see Makefile for options)
endif
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux)
# Seccomp BPF is only available for Linux
PG_CONFIGURE_OPTS += --with-libseccomp
else ifeq ($(UNAME_S),Darwin)
# macOS with brew-installed openssl requires explicit paths
# It can be configured with OPENSSL_PREFIX variable
OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
# macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure
# brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage
EXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/:
endif
# Use -C option so that when PostgreSQL "make install" installs the
# headers, the mtime of the headers are not changed when there have
# been no changes to the files. Changing the mtime triggers an
# unnecessary rebuild of 'postgres_ffi'.
PG_CONFIGURE_OPTS += INSTALL='$(ROOT_PROJECT_DIR)/scripts/ninstall.sh -C'
# Choose whether we should be silent or verbose
CARGO_BUILD_FLAGS += --$(if $(filter s,$(MAKEFLAGS)),quiet,verbose)
# Fix for a corner case when make doesn't pass a jobserver
@@ -52,143 +35,64 @@ CARGO_CMD_PREFIX += $(if $(filter n,$(MAKEFLAGS)),,+)
CARGO_CMD_PREFIX += CARGO_TERM_PROGRESS_WHEN=never CI=1
#
# Top level Makefile to build Neon and PostgreSQL
# Top level Makefile to build Zenith and PostgreSQL
#
.PHONY: all
all: neon postgres neon-pg-ext
all: zenith postgres
### Neon Rust bits
### Zenith Rust bits
#
# The 'postgres_ffi' depends on the Postgres headers.
.PHONY: neon
neon: postgres-headers
+@echo "Compiling Neon"
.PHONY: zenith
zenith: postgres-headers
+@echo "Compiling Zenith"
$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS)
### PostgreSQL parts
# Some rules are duplicated for Postgres v14 and 15. We may want to refactor
# to avoid the duplication in the future, but it's tolerable for now.
#
$(POSTGRES_INSTALL_DIR)/build/%/config.status:
+@echo "Configuring Postgres $* build"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/$*
(cd $(POSTGRES_INSTALL_DIR)/build/$* && \
env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure \
CFLAGS='$(PG_CFLAGS)' \
tmp_install/build/config.status:
+@echo "Configuring postgres build"
mkdir -p tmp_install/build
(cd tmp_install/build && \
../../vendor/postgres/configure CFLAGS='$(PG_CFLAGS)' \
$(PG_CONFIGURE_OPTS) \
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/$* > configure.log)
$(SECCOMP) \
--prefix=$(abspath tmp_install) > configure.log)
# nicer alias to run 'configure'
# Note: I've been unable to use templates for this part of our configuration.
# I'm not sure why it wouldn't work, but this is the only place (apart from
# the "build-all-versions" entry points) where direct mention of PostgreSQL
# versions is used.
.PHONY: postgres-configure-v15
postgres-configure-v15: $(POSTGRES_INSTALL_DIR)/build/v15/config.status
.PHONY: postgres-configure-v14
postgres-configure-v14: $(POSTGRES_INSTALL_DIR)/build/v14/config.status
# Install the PostgreSQL header files into $(POSTGRES_INSTALL_DIR)/<version>/include
.PHONY: postgres-headers-%
postgres-headers-%: postgres-configure-%
+@echo "Installing PostgreSQL $* headers"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/include MAKELEVEL=0 install
# Compile and install PostgreSQL
.PHONY: postgres-%
postgres-%: postgres-configure-% \
postgres-headers-% # to prevent `make install` conflicts with neon's `postgres-headers`
+@echo "Compiling PostgreSQL $*"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$* MAKELEVEL=0 install
+@echo "Compiling libpq $*"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/interfaces/libpq install
+@echo "Compiling pg_prewarm $*"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_prewarm install
+@echo "Compiling pg_buffercache $*"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_buffercache install
+@echo "Compiling pageinspect $*"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect install
.PHONY: postgres-clean-%
postgres-clean-%:
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$* MAKELEVEL=0 clean
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_buffercache clean
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect clean
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/interfaces/libpq clean
.PHONY: neon-pg-ext-%
neon-pg-ext-%: postgres-%
+@echo "Compiling neon $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install
+@echo "Compiling neon_walredo $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile install
+@echo "Compiling neon_test_utils $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile install
+@echo "Compiling neon_utils $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-utils-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile install
.PHONY: neon-pg-ext-clean-%
neon-pg-ext-clean-%:
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
-C $(POSTGRES_INSTALL_DIR)/build/neon-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile clean
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
-C $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile clean
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
-C $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile clean
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile clean
.PHONY: neon-pg-ext
neon-pg-ext: \
neon-pg-ext-v14 \
neon-pg-ext-v15
.PHONY: neon-pg-ext-clean
neon-pg-ext-clean: \
neon-pg-ext-clean-v14 \
neon-pg-ext-clean-v15
# shorthand to build all Postgres versions
.PHONY: postgres
postgres: \
postgres-v14 \
postgres-v15
# nicer alias for running 'configure'
.PHONY: postgres-configure
postgres-configure: tmp_install/build/config.status
# Install the PostgreSQL header files into tmp_install/include
.PHONY: postgres-headers
postgres-headers: \
postgres-headers-v14 \
postgres-headers-v15
postgres-headers: postgres-configure
+@echo "Installing PostgreSQL headers"
$(MAKE) -C tmp_install/build/src/include MAKELEVEL=0 install
# Compile and install PostgreSQL and contrib/zenith
.PHONY: postgres
postgres: postgres-configure \
postgres-headers # to prevent `make install` conflicts with zenith's `postgres-headers`
+@echo "Compiling PostgreSQL"
$(MAKE) -C tmp_install/build MAKELEVEL=0 install
+@echo "Compiling contrib/zenith"
$(MAKE) -C tmp_install/build/contrib/zenith install
+@echo "Compiling contrib/zenith_test_utils"
$(MAKE) -C tmp_install/build/contrib/zenith_test_utils install
.PHONY: postgres-clean
postgres-clean: \
postgres-clean-v14 \
postgres-clean-v15
postgres-clean:
$(MAKE) -C tmp_install/build MAKELEVEL=0 clean
# This doesn't remove the effects of 'configure'.
.PHONY: clean
clean: postgres-clean neon-pg-ext-clean
clean:
cd tmp_install/build && $(MAKE) clean
$(CARGO_CMD_PREFIX) cargo clean
# This removes everything
.PHONY: distclean
distclean:
rm -rf $(POSTGRES_INSTALL_DIR)
rm -rf tmp_install
$(CARGO_CMD_PREFIX) cargo clean
.PHONY: fmt
@@ -197,4 +101,4 @@ fmt:
.PHONY: setup-pre-commit-hook
setup-pre-commit-hook:
ln -s -f $(ROOT_PROJECT_DIR)/pre-commit.py .git/hooks/pre-commit
ln -s -f ../../pre-commit.py .git/hooks/pre-commit

5
NOTICE
View File

@@ -1,5 +0,0 @@
Neon
Copyright 2022 Neon Inc.
The PostgreSQL submodules in vendor/postgres-v14 and vendor/postgres-v15 are licensed under the
PostgreSQL license. See vendor/postgres-v14/COPYRIGHT and vendor/postgres-v15/COPYRIGHT.

1
Pipfile Symbolic link
View File

@@ -0,0 +1 @@
./test_runner/Pipfile

1
Pipfile.lock generated Symbolic link
View File

@@ -0,0 +1 @@
./test_runner/Pipfile.lock

262
README.md
View File

@@ -1,166 +1,82 @@
# Neon
# Zenith
Neon is a serverless open-source alternative to AWS Aurora Postgres. It separates storage and compute and substitutes the PostgreSQL storage layer by redistributing data across a cluster of nodes.
## Quick start
Try the [Neon Free Tier](https://neon.tech/docs/introduction/technical-preview-free-tier/) to create a serverless Postgres instance. Then connect to it with your preferred Postgres client (psql, dbeaver, etc) or use the online [SQL Editor](https://neon.tech/docs/get-started-with-neon/query-with-neon-sql-editor/). See [Connect from any application](https://neon.tech/docs/connect/connect-from-any-app/) for connection instructions.
Alternatively, compile and run the project [locally](#running-local-installation).
Zenith substitutes PostgreSQL storage layer and redistributes data across a cluster of nodes
## Architecture overview
A Neon installation consists of compute nodes and the Neon storage engine. Compute nodes are stateless PostgreSQL nodes backed by the Neon storage engine.
A Zenith installation consists of Compute nodes and Storage engine.
The Neon storage engine consists of two major components:
- Pageserver. Scalable storage backend for the compute nodes.
- Safekeepers. The safekeepers form a redundant WAL service that received WAL from the compute node, and stores it durably until it has been processed by the pageserver and uploaded to cloud storage.
Compute nodes are stateless PostgreSQL nodes, backed by zenith storage.
See developer documentation in [/docs/SUMMARY.md](/docs/SUMMARY.md) for more information.
Zenith storage engine consists of two major components:
- Pageserver. Scalable storage backend for compute nodes.
- WAL service. The service that receives WAL from compute node and ensures that it is stored durably.
Pageserver consists of:
- Repository - Zenith storage implementation.
- WAL receiver - service that receives WAL from WAL service and stores it in the repository.
- Page service - service that communicates with compute nodes and responds with pages from the repository.
- WAL redo - service that builds pages from base images and WAL records on Page service request.
## Running local installation
1. Install build dependencies and other useful packages
#### Installing dependencies on Linux
1. Install build dependencies and other applicable packages
* On Ubuntu or Debian, this set of packages should be sufficient to build the code:
```bash
On Ubuntu or Debian this set of packages should be sufficient to build the code:
```text
apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libseccomp-dev \
libssl-dev clang pkg-config libpq-dev cmake postgresql-client protobuf-compiler
```
* On Fedora, these packages are needed:
```bash
dnf install flex bison readline-devel zlib-devel openssl-devel \
libseccomp-devel perl clang cmake postgresql postgresql-contrib protobuf-compiler \
protobuf-devel
```
* On Arch based systems, these packages are needed:
```bash
pacman -S base-devel readline zlib libseccomp openssl clang \
postgresql-libs cmake postgresql protobuf
libssl-dev clang pkg-config libpq-dev
```
Building Neon requires 3.15+ version of `protoc` (protobuf-compiler). If your distribution provides an older version, you can install a newer version from [here](https://github.com/protocolbuffers/protobuf/releases).
[Rust] 1.55 or later is also required.
2. [Install Rust](https://www.rust-lang.org/tools/install)
```
# recommended approach from https://www.rust-lang.org/tools/install
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
```
To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `tmp_install/bin` and `tmp_install/lib`, respectively.
#### Installing dependencies on macOS (12.3.1)
1. Install XCode and dependencies
```
xcode-select --install
brew install protobuf openssl flex bison
To run the integration tests (not required to use the code), install
Python (3.7 or higher), and install python3 packages with `pipenv` using `pipenv install` in the project directory.
# add openssl to PATH, required for ed25519 keys generation in neon_local
echo 'export PATH="$(brew --prefix openssl)/bin:$PATH"' >> ~/.zshrc
```
2. [Install Rust](https://www.rust-lang.org/tools/install)
```
# recommended approach from https://www.rust-lang.org/tools/install
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
```
3. Install PostgreSQL Client
```
# from https://stackoverflow.com/questions/44654216/correct-way-to-install-psql-without-full-postgres-on-macos
brew install libpq
brew link --force libpq
```
#### Rustc version
The project uses [rust toolchain file](./rust-toolchain.toml) to define the version it's built with in CI for testing and local builds.
This file is automatically picked up by [`rustup`](https://rust-lang.github.io/rustup/overrides.html#the-toolchain-file) that installs (if absent) and uses the toolchain version pinned in the file.
rustup users who want to build with another toolchain can use [`rustup override`](https://rust-lang.github.io/rustup/overrides.html#directory-overrides) command to set a specific toolchain for the project's directory.
non-rustup users most probably are not getting the same toolchain automatically from the file, so are responsible to manually verify their toolchain matches the version in the file.
Newer rustc versions most probably will work fine, yet older ones might not be supported due to some new features used by the project or the crates.
#### Building on Linux
1. Build neon and patched postgres
```
# Note: The path to the neon sources can not contain a space.
git clone --recursive https://github.com/neondatabase/neon.git
cd neon
# The preferred and default is to make a debug build. This will create a
# demonstrably slower build than a release build. For a release build,
# use "BUILD_TYPE=release make -j`nproc` -s"
# Remove -s for the verbose build log
make -j`nproc` -s
```
#### Building on OSX
1. Build neon and patched postgres
```
# Note: The path to the neon sources can not contain a space.
git clone --recursive https://github.com/neondatabase/neon.git
cd neon
# The preferred and default is to make a debug build. This will create a
# demonstrably slower build than a release build. For a release build,
# use "BUILD_TYPE=release make -j`sysctl -n hw.logicalcpu` -s"
# Remove -s for the verbose build log
make -j`sysctl -n hw.logicalcpu` -s
```
#### Dependency installation notes
To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `pg_install/bin` and `pg_install/lib`, respectively.
To run the integration tests or Python scripts (not required to use the code), install
Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (requires [poetry>=1.3](https://python-poetry.org/)) in the project directory.
#### Running neon database
1. Start pageserver and postgres on top of it (should be called from repo root):
2. Build zenith and patched postgres
```sh
# Create repository in .neon with proper paths to binaries and data
git clone --recursive https://github.com/zenithdb/zenith.git
cd zenith
make -j5
```
3. Start pageserver and postgres on top of it (should be called from repo root):
```sh
# Create repository in .zenith with proper paths to binaries and data
# Later that would be responsibility of a package install script
> ./target/debug/neon_local init
Starting pageserver at '127.0.0.1:64000' in '.neon'.
> ./target/debug/zenith init
initializing tenantid c03ba6b7ad4c5e9cf556f059ade44229
created initial timeline 5b014a9e41b4b63ce1a1febc04503636 timeline.lsn 0/169C3C8
created main branch
pageserver init succeeded
# start pageserver, safekeeper, and broker for their intercommunication
> ./target/debug/neon_local start
Starting neon broker at 127.0.0.1:50051
storage_broker started, pid: 2918372
Starting pageserver at '127.0.0.1:64000' in '.neon'.
pageserver started, pid: 2918386
Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'.
safekeeper 1 started, pid: 2918437
# create initial tenant and use it as a default for every future neon_local invocation
> ./target/debug/neon_local tenant create --set-default
tenant 9ef87a5bf0d92544f6fafeeb3239695c successfully created on the pageserver
Created an initial timeline 'de200bd42b49cc1814412c7e592dd6e9' at Lsn 0/16B5A50 for tenant: 9ef87a5bf0d92544f6fafeeb3239695c
Setting tenant 9ef87a5bf0d92544f6fafeeb3239695c as a default one
# start pageserver and safekeeper
> ./target/debug/zenith start
Starting pageserver at 'localhost:64000' in '.zenith'
Pageserver started
initializing for single for 7676
Starting safekeeper at 'localhost:5454' in '.zenith/safekeepers/single'
Safekeeper started
# start postgres compute node
> ./target/debug/neon_local endpoint start main
Starting new endpoint main (PostgreSQL v14) on timeline de200bd42b49cc1814412c7e592dd6e9 ...
Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
Starting postgres at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'
> ./target/debug/zenith pg start main
Starting new postgres main on main...
Extracting base backup to create postgres instance: path=.zenith/pgdatadirs/tenants/c03ba6b7ad4c5e9cf556f059ade44229/main port=55432
Starting postgres node at 'host=127.0.0.1 port=55432 user=zenith_admin dbname=postgres'
waiting for server to start.... done
server started
# check list of running postgres instances
> ./target/debug/neon_local endpoint list
ENDPOINT ADDRESS TIMELINE BRANCH NAME LSN STATUS
main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16B5BA8 running
> ./target/debug/zenith pg list
BRANCH ADDRESS LSN STATUS
main 127.0.0.1:55432 0/1609610 running
```
2. Now, it is possible to connect to postgres and run some queries:
4. Now it is possible to connect to postgres and run some queries:
```text
> psql -p55432 -h 127.0.0.1 -U cloud_admin postgres
> psql -p55432 -h 127.0.0.1 -U zenith_admin postgres
postgres=# CREATE TABLE t(key int primary key, value text);
CREATE TABLE
postgres=# insert into t values(1,1);
@@ -172,32 +88,25 @@ postgres=# select * from t;
(1 row)
```
3. And create branches and run postgres on them:
5. And create branches and run postgres on them:
```sh
# create branch named migration_check
> ./target/debug/neon_local timeline branch --branch-name migration_check
Created timeline 'b3b863fa45fa9e57e615f9f2d944e601' at Lsn 0/16F9A00 for tenant: 9ef87a5bf0d92544f6fafeeb3239695c. Ancestor timeline: 'main'
> ./target/debug/zenith branch migration_check main
Created branch 'migration_check' at 0/1609610
# check branches tree
> ./target/debug/neon_local timeline list
(L) main [de200bd42b49cc1814412c7e592dd6e9]
(L) ┗━ @0/16F9A00: migration_check [b3b863fa45fa9e57e615f9f2d944e601]
> ./target/debug/zenith branch
main
┗━ @0/1609610: migration_check
# start postgres on that branch
> ./target/debug/neon_local endpoint start migration_check --branch-name migration_check
Starting new endpoint migration_check (PostgreSQL v14) on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
Starting postgres at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres'
# check the new list of running postgres instances
> ./target/debug/neon_local endpoint list
ENDPOINT ADDRESS TIMELINE BRANCH NAME LSN STATUS
main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16F9A38 running
migration_check 127.0.0.1:55433 b3b863fa45fa9e57e615f9f2d944e601 migration_check 0/16F9A70 running
> ./target/debug/zenith pg start migration_check
Starting postgres node at 'host=127.0.0.1 port=55433 user=stas'
waiting for server to start.... done
# this new postgres instance will have all the data from 'main' postgres,
# but all modifications would not affect data in original postgres
> psql -p55433 -h 127.0.0.1 -U cloud_admin postgres
> psql -p55433 -h 127.0.0.1 -U zenith_admin postgres
postgres=# select * from t;
key | value
-----+-------
@@ -206,60 +115,41 @@ postgres=# select * from t;
postgres=# insert into t values(2,2);
INSERT 0 1
# check that the new change doesn't affect the 'main' postgres
> psql -p55432 -h 127.0.0.1 -U cloud_admin postgres
postgres=# select * from t;
key | value
-----+-------
1 | 1
(1 row)
```
4. If you want to run tests afterward (see below), you must stop all the running of the pageserver, safekeeper, and postgres instances
you have just started. You can terminate them all with one command:
6. If you want to run tests afterwards (see below), you have to stop all the running the pageserver, safekeeper and postgres instances
you have just started. You can stop them all with one command:
```sh
> ./target/debug/neon_local stop
> ./target/debug/zenith stop
```
## Running tests
Ensure your dependencies are installed as described [here](https://github.com/neondatabase/neon#dependency-installation-notes).
```sh
git clone --recursive https://github.com/neondatabase/neon.git
CARGO_BUILD_FLAGS="--features=testing" make
./scripts/pytest
git clone --recursive https://github.com/zenithdb/zenith.git
make # builds also postgres and installs it to ./tmp_install
cd test_runner
pytest
```
## Documentation
[/docs/](/docs/) Contains a top-level overview of all available markdown documentation.
Now we use README files to cover design ideas and overall architecture for each module and `rustdoc` style documentation comments. See also [/docs/](/docs/) a top-level overview of all available markdown documentation.
- [/docs/sourcetree.md](/docs/sourcetree.md) contains overview of source tree layout.
To view your `rustdoc` documentation in a browser, try running `cargo doc --no-deps --open`
See also README files in some source directories, and `rustdoc` style documentation comments.
Other resources:
- [SELECT 'Hello, World'](https://neon.tech/blog/hello-world/): Blog post by Nikita Shamgunov on the high level architecture
- [Architecture decisions in Neon](https://neon.tech/blog/architecture-decisions-in-neon/): Blog post by Heikki Linnakangas
- [Neon: Serverless PostgreSQL!](https://www.youtube.com/watch?v=rES0yzeERns): Presentation on storage system by Heikki Linnakangas in the CMU Database Group seminar series
### Postgres-specific terms
Due to Neon's very close relation with PostgreSQL internals, numerous specific terms are used.
The same applies to certain spelling: i.e. we use MB to denote 1024 * 1024 bytes, while MiB would be technically more correct, it's inconsistent with what PostgreSQL code and its documentation use.
Due to Zenith's very close relation with PostgreSQL internals, there are numerous specific terms used.
Same applies to certain spelling: i.e. we use MB to denote 1024 * 1024 bytes, while MiB would be technically more correct, it's inconsistent with what PostgreSQL code and its documentation use.
To get more familiar with this aspect, refer to:
- [Neon glossary](/docs/glossary.md)
- [PostgreSQL glossary](https://www.postgresql.org/docs/14/glossary.html)
- Other PostgreSQL documentation and sources (Neon fork sources can be found [here](https://github.com/neondatabase/postgres))
- [Zenith glossary](/docs/glossary.md)
- [PostgreSQL glossary](https://www.postgresql.org/docs/13/glossary.html)
- Other PostgreSQL documentation and sources (Zenith fork sources can be found [here](https://github.com/zenithdb/postgres))
## Join the development

188
cli-v2-story.md Normal file
View File

@@ -0,0 +1,188 @@
Create a new Zenith repository in the current directory:
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli init
The files belonging to this database system will be owned by user "heikki".
This user must also own the server process.
The database cluster will be initialized with locale "en_GB.UTF-8".
The default database encoding has accordingly been set to "UTF8".
The default text search configuration will be set to "english".
Data page checksums are disabled.
creating directory tmp ... ok
creating subdirectories ... ok
selecting dynamic shared memory implementation ... posix
selecting default max_connections ... 100
selecting default shared_buffers ... 128MB
selecting default time zone ... Europe/Helsinki
creating configuration files ... ok
running bootstrap script ... ok
performing post-bootstrap initialization ... ok
syncing data to disk ... ok
initdb: warning: enabling "trust" authentication for local connections
You can change this by editing pg_hba.conf or using the option -A, or
--auth-local and --auth-host, the next time you run initdb.
new zenith repository was created in .zenith
Initially, there is only one branch:
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch
main
Start a local Postgres instance on the branch:
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start main
Creating data directory from snapshot at 0/15FFB08...
waiting for server to start....2021-04-13 09:27:43.919 EEST [984664] LOG: starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
2021-04-13 09:27:43.920 EEST [984664] LOG: listening on IPv6 address "::1", port 5432
2021-04-13 09:27:43.920 EEST [984664] LOG: listening on IPv4 address "127.0.0.1", port 5432
2021-04-13 09:27:43.927 EEST [984664] LOG: listening on Unix socket "/tmp/.s.PGSQL.5432"
2021-04-13 09:27:43.939 EEST [984665] LOG: database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
2021-04-13 09:27:43.939 EEST [984665] LOG: creating missing WAL directory "pg_wal/archive_status"
2021-04-13 09:27:44.189 EEST [984665] LOG: database system was not properly shut down; automatic recovery in progress
2021-04-13 09:27:44.195 EEST [984665] LOG: invalid record length at 0/15FFB80: wanted 24, got 0
2021-04-13 09:27:44.195 EEST [984665] LOG: redo is not required
2021-04-13 09:27:44.225 EEST [984664] LOG: database system is ready to accept connections
done
server started
Run some commands against it:
~/git-sandbox/zenith (cli-v2)$ psql postgres -c "create table foo (t text);"
CREATE TABLE
~/git-sandbox/zenith (cli-v2)$ psql postgres -c "insert into foo values ('inserted on the main branch');"
INSERT 0 1
~/git-sandbox/zenith (cli-v2)$ psql postgres -c "select * from foo"
t
-----------------------------
inserted on the main branch
(1 row)
Create a new branch called 'experimental'. We create it from the
current end of the 'main' branch, but you could specify a different
LSN as the start point instead.
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch experimental main
branching at end of WAL: 0/161F478
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch
experimental
main
Start another Postgres instance off the 'experimental' branch:
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start experimental -- -o -p5433
Creating data directory from snapshot at 0/15FFB08...
waiting for server to start....2021-04-13 09:28:41.874 EEST [984766] LOG: starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
2021-04-13 09:28:41.875 EEST [984766] LOG: listening on IPv6 address "::1", port 5433
2021-04-13 09:28:41.875 EEST [984766] LOG: listening on IPv4 address "127.0.0.1", port 5433
2021-04-13 09:28:41.883 EEST [984766] LOG: listening on Unix socket "/tmp/.s.PGSQL.5433"
2021-04-13 09:28:41.896 EEST [984767] LOG: database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
2021-04-13 09:28:42.265 EEST [984767] LOG: database system was not properly shut down; automatic recovery in progress
2021-04-13 09:28:42.269 EEST [984767] LOG: redo starts at 0/15FFB80
2021-04-13 09:28:42.272 EEST [984767] LOG: invalid record length at 0/161F4B0: wanted 24, got 0
2021-04-13 09:28:42.272 EEST [984767] LOG: redo done at 0/161F478 system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s
2021-04-13 09:28:42.321 EEST [984766] LOG: database system is ready to accept connections
done
server started
Insert some a row on the 'experimental' branch:
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo"
t
-----------------------------
inserted on the main branch
(1 row)
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "insert into foo values ('inserted on experimental')"
INSERT 0 1
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo"
t
-----------------------------
inserted on the main branch
inserted on experimental
(2 rows)
See that the other Postgres instance is still running on 'main' branch on port 5432:
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5432 -c "select * from foo"
t
-----------------------------
inserted on the main branch
(1 row)
Everything is stored in the .zenith directory:
~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/
total 12
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:28 datadirs
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:27 refs
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:28 timelines
The 'datadirs' directory contains the datadirs of the running instances:
~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/datadirs/
total 8
drwx------ 18 heikki heikki 4096 Apr 13 09:27 3c0c634c1674079b2c6d4edf7c91523e
drwx------ 18 heikki heikki 4096 Apr 13 09:28 697e3c103d4b1763cd6e82e4ff361d76
~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/datadirs/3c0c634c1674079b2c6d4edf7c91523e/
total 124
drwxr-xr-x 5 heikki heikki 4096 Apr 13 09:27 base
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 global
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_commit_ts
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_dynshmem
-rw------- 1 heikki heikki 4760 Apr 13 09:27 pg_hba.conf
-rw------- 1 heikki heikki 1636 Apr 13 09:27 pg_ident.conf
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:32 pg_logical
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:27 pg_multixact
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_notify
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_replslot
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_serial
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_snapshots
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_stat
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:34 pg_stat_tmp
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_subtrans
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_tblspc
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_twophase
-rw------- 1 heikki heikki 3 Apr 13 09:27 PG_VERSION
lrwxrwxrwx 1 heikki heikki 52 Apr 13 09:27 pg_wal -> ../../timelines/3c0c634c1674079b2c6d4edf7c91523e/wal
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_xact
-rw------- 1 heikki heikki 88 Apr 13 09:27 postgresql.auto.conf
-rw------- 1 heikki heikki 28688 Apr 13 09:27 postgresql.conf
-rw------- 1 heikki heikki 96 Apr 13 09:27 postmaster.opts
-rw------- 1 heikki heikki 149 Apr 13 09:27 postmaster.pid
Note how 'pg_wal' is just a symlink to the 'timelines' directory. The
datadir is ephemeral, you can delete it at any time, and it can be reconstructed
from the snapshots and WAL stored in the 'timelines' directory. So if you push/pull
the repository, the 'datadirs' are not included. (They are like git working trees)
~/git-sandbox/zenith (cli-v2)$ killall -9 postgres
~/git-sandbox/zenith (cli-v2)$ rm -rf .zenith/datadirs/*
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start experimental -- -o -p5433
Creating data directory from snapshot at 0/15FFB08...
waiting for server to start....2021-04-13 09:37:05.476 EEST [985340] LOG: starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
2021-04-13 09:37:05.477 EEST [985340] LOG: listening on IPv6 address "::1", port 5433
2021-04-13 09:37:05.477 EEST [985340] LOG: listening on IPv4 address "127.0.0.1", port 5433
2021-04-13 09:37:05.487 EEST [985340] LOG: listening on Unix socket "/tmp/.s.PGSQL.5433"
2021-04-13 09:37:05.498 EEST [985341] LOG: database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
2021-04-13 09:37:05.808 EEST [985341] LOG: database system was not properly shut down; automatic recovery in progress
2021-04-13 09:37:05.813 EEST [985341] LOG: redo starts at 0/15FFB80
2021-04-13 09:37:05.815 EEST [985341] LOG: invalid record length at 0/161F770: wanted 24, got 0
2021-04-13 09:37:05.815 EEST [985341] LOG: redo done at 0/161F738 system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s
2021-04-13 09:37:05.866 EEST [985340] LOG: database system is ready to accept connections
done
server started
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo"
t
-----------------------------
inserted on the main branch
inserted on experimental
(2 rows)

View File

@@ -1 +0,0 @@
target

View File

@@ -1 +0,0 @@
target

View File

@@ -1,32 +0,0 @@
[package]
name = "compute_tools"
version = "0.1.0"
edition.workspace = true
license.workspace = true
[dependencies]
anyhow.workspace = true
chrono.workspace = true
clap.workspace = true
futures.workspace = true
hyper = { workspace = true, features = ["full"] }
notify.workspace = true
num_cpus.workspace = true
opentelemetry.workspace = true
postgres.workspace = true
regex.workspace = true
serde.workspace = true
serde_json.workspace = true
tar.workspace = true
reqwest = { workspace = true, features = ["json"] }
tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
tokio-postgres.workspace = true
tracing.workspace = true
tracing-opentelemetry.workspace = true
tracing-subscriber.workspace = true
tracing-utils.workspace = true
url.workspace = true
compute_api.workspace = true
utils.workspace = true
workspace_hack.workspace = true

View File

@@ -1,85 +0,0 @@
# Compute node tools
Postgres wrapper (`compute_ctl`) is intended to be run as a Docker entrypoint or as a `systemd`
`ExecStart` option. It will handle all the `Neon` specifics during compute node
initialization:
- `compute_ctl` accepts cluster (compute node) specification as a JSON file.
- Every start is a fresh start, so the data directory is removed and
initialized again on each run.
- Next it will put configuration files into the `PGDATA` directory.
- Sync safekeepers and get commit LSN.
- Get `basebackup` from pageserver using the returned on the previous step LSN.
- Try to start `postgres` and wait until it is ready to accept connections.
- Check and alter/drop/create roles and databases.
- Hang waiting on the `postmaster` process to exit.
Also `compute_ctl` spawns two separate service threads:
- `compute-monitor` checks the last Postgres activity timestamp and saves it
into the shared `ComputeNode`;
- `http-endpoint` runs a Hyper HTTP API server, which serves readiness and the
last activity requests.
If the `vm-informant` binary is present at `/bin/vm-informant`, it will also be started. For VM
compute nodes, `vm-informant` communicates with the VM autoscaling system. It coordinates
downscaling and (eventually) will request immediate upscaling under resource pressure.
Usage example:
```sh
compute_ctl -D /var/db/postgres/compute \
-C 'postgresql://cloud_admin@localhost/postgres' \
-S /var/db/postgres/specs/current.json \
-b /usr/local/bin/postgres
```
## Tests
Cargo formatter:
```sh
cargo fmt
```
Run tests:
```sh
cargo test
```
Clippy linter:
```sh
cargo clippy --all --all-targets -- -Dwarnings -Drust-2018-idioms
```
## Cross-platform compilation
Imaging that you are on macOS (x86) and you want a Linux GNU (`x86_64-unknown-linux-gnu` platform in `rust` terminology) executable.
### Using docker
You can use a throw-away Docker container ([rustlang/rust](https://hub.docker.com/r/rustlang/rust/) image) for doing that:
```sh
docker run --rm \
-v $(pwd):/compute_tools \
-w /compute_tools \
-t rustlang/rust:nightly cargo build --release --target=x86_64-unknown-linux-gnu
```
or one-line:
```sh
docker run --rm -v $(pwd):/compute_tools -w /compute_tools -t rust:latest cargo build --release --target=x86_64-unknown-linux-gnu
```
### Using rust native cross-compilation
Another way is to add `x86_64-unknown-linux-gnu` target on your host system:
```sh
rustup target add x86_64-unknown-linux-gnu
```
Install macOS cross-compiler toolchain:
```sh
brew tap SergioBenitez/osxct
brew install x86_64-unknown-linux-gnu
```
And finally run `cargo build`:
```sh
CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER=x86_64-unknown-linux-gnu-gcc cargo build --target=x86_64-unknown-linux-gnu --release
```

View File

@@ -1 +0,0 @@
max_width = 100

View File

@@ -1,301 +0,0 @@
//!
//! Postgres wrapper (`compute_ctl`) is intended to be run as a Docker entrypoint or as a `systemd`
//! `ExecStart` option. It will handle all the `Neon` specifics during compute node
//! initialization:
//! - `compute_ctl` accepts cluster (compute node) specification as a JSON file.
//! - Every start is a fresh start, so the data directory is removed and
//! initialized again on each run.
//! - Next it will put configuration files into the `PGDATA` directory.
//! - Sync safekeepers and get commit LSN.
//! - Get `basebackup` from pageserver using the returned on the previous step LSN.
//! - Try to start `postgres` and wait until it is ready to accept connections.
//! - Check and alter/drop/create roles and databases.
//! - Hang waiting on the `postmaster` process to exit.
//!
//! Also `compute_ctl` spawns two separate service threads:
//! - `compute-monitor` checks the last Postgres activity timestamp and saves it
//! into the shared `ComputeNode`;
//! - `http-endpoint` runs a Hyper HTTP API server, which serves readiness and the
//! last activity requests.
//!
//! If the `vm-informant` binary is present at `/bin/vm-informant`, it will also be started. For VM
//! compute nodes, `vm-informant` communicates with the VM autoscaling system. It coordinates
//! downscaling and (eventually) will request immediate upscaling under resource pressure.
//!
//! Usage example:
//! ```sh
//! compute_ctl -D /var/db/postgres/compute \
//! -C 'postgresql://cloud_admin@localhost/postgres' \
//! -S /var/db/postgres/specs/current.json \
//! -b /usr/local/bin/postgres
//! ```
//!
use std::fs::File;
use std::panic;
use std::path::Path;
use std::process::exit;
use std::sync::{mpsc, Arc, Condvar, Mutex};
use std::{thread, time::Duration};
use anyhow::{Context, Result};
use chrono::Utc;
use clap::Arg;
use tracing::{error, info};
use url::Url;
use compute_api::responses::ComputeStatus;
use compute_tools::compute::{ComputeNode, ComputeState, ParsedSpec};
use compute_tools::configurator::launch_configurator;
use compute_tools::http::api::launch_http_server;
use compute_tools::logger::*;
use compute_tools::monitor::launch_monitor;
use compute_tools::params::*;
use compute_tools::spec::*;
fn main() -> Result<()> {
init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
let matches = cli().get_matches();
let pgdata = matches
.get_one::<String>("pgdata")
.expect("PGDATA path is required");
let connstr = matches
.get_one::<String>("connstr")
.expect("Postgres connection string is required");
let spec_json = matches.get_one::<String>("spec");
let spec_path = matches.get_one::<String>("spec-path");
let compute_id = matches.get_one::<String>("compute-id");
let control_plane_uri = matches.get_one::<String>("control-plane-uri");
// Try to use just 'postgres' if no path is provided
let pgbin = matches.get_one::<String>("pgbin").unwrap();
let mut spec = None;
let mut live_config_allowed = false;
match spec_json {
// First, try to get cluster spec from the cli argument
Some(json) => {
spec = Some(serde_json::from_str(json)?);
}
None => {
// Second, try to read it from the file if path is provided
if let Some(sp) = spec_path {
let path = Path::new(sp);
let file = File::open(path)?;
spec = Some(serde_json::from_reader(file)?);
} else if let Some(id) = compute_id {
if let Some(cp_base) = control_plane_uri {
live_config_allowed = true;
if let Ok(s) = get_spec_from_control_plane(cp_base, id) {
spec = Some(s);
}
} else {
panic!("must specify both --control-plane-uri and --compute-id or none");
}
} else {
panic!(
"compute spec should be provided by one of the following ways: \
--spec OR --spec-path OR --control-plane-uri and --compute-id"
);
}
}
};
let mut new_state = ComputeState::new();
let spec_set;
if let Some(spec) = spec {
let pspec = ParsedSpec::try_from(spec).map_err(|msg| anyhow::anyhow!(msg))?;
new_state.pspec = Some(pspec);
spec_set = true;
} else {
spec_set = false;
}
let compute_node = ComputeNode {
start_time: Utc::now(),
connstr: Url::parse(connstr).context("cannot parse connstr as a URL")?,
pgdata: pgdata.to_string(),
pgbin: pgbin.to_string(),
live_config_allowed,
state: Mutex::new(new_state),
state_changed: Condvar::new(),
};
let compute = Arc::new(compute_node);
// Launch http service first, so we were able to serve control-plane
// requests, while configuration is still in progress.
let _http_handle = launch_http_server(&compute).expect("cannot launch http endpoint thread");
if !spec_set {
// No spec provided, hang waiting for it.
info!("no compute spec provided, waiting");
let mut state = compute.state.lock().unwrap();
while state.status != ComputeStatus::ConfigurationPending {
state = compute.state_changed.wait(state).unwrap();
if state.status == ComputeStatus::ConfigurationPending {
info!("got spec, continue configuration");
// Spec is already set by the http server handler.
break;
}
}
}
// We got all we need, update the state.
let mut state = compute.state.lock().unwrap();
let pspec = state.pspec.as_ref().expect("spec must be set");
let startup_tracing_context = pspec.spec.startup_tracing_context.clone();
state.status = ComputeStatus::Init;
compute.state_changed.notify_all();
drop(state);
// Extract OpenTelemetry context for the startup actions from the spec, and
// attach it to the current tracing context.
//
// This is used to propagate the context for the 'start_compute' operation
// from the neon control plane. This allows linking together the wider
// 'start_compute' operation that creates the compute container, with the
// startup actions here within the container.
//
// Switch to the startup context here, and exit it once the startup has
// completed and Postgres is up and running.
//
// NOTE: This is supposed to only cover the *startup* actions. Once
// postgres is configured and up-and-running, we exit this span. Any other
// actions that are performed on incoming HTTP requests, for example, are
// performed in separate spans.
let startup_context_guard = if let Some(ref carrier) = startup_tracing_context {
use opentelemetry::propagation::TextMapPropagator;
use opentelemetry::sdk::propagation::TraceContextPropagator;
Some(TraceContextPropagator::new().extract(carrier).attach())
} else {
None
};
// Launch remaining service threads
let _monitor_handle = launch_monitor(&compute).expect("cannot launch compute monitor thread");
let _configurator_handle =
launch_configurator(&compute).expect("cannot launch configurator thread");
// Start Postgres
let mut delay_exit = false;
let mut exit_code = None;
let pg = match compute.start_compute() {
Ok(pg) => Some(pg),
Err(err) => {
error!("could not start the compute node: {:?}", err);
let mut state = compute.state.lock().unwrap();
state.error = Some(format!("{:?}", err));
state.status = ComputeStatus::Failed;
drop(state);
delay_exit = true;
None
}
};
// Wait for the child Postgres process forever. In this state Ctrl+C will
// propagate to Postgres and it will be shut down as well.
if let Some(mut pg) = pg {
// Startup is finished, exit the startup tracing span
drop(startup_context_guard);
let ecode = pg
.wait()
.expect("failed to start waiting on Postgres process");
info!("Postgres exited with code {}, shutting down", ecode);
exit_code = ecode.code()
}
if let Err(err) = compute.check_for_core_dumps() {
error!("error while checking for core dumps: {err:?}");
}
// If launch failed, keep serving HTTP requests for a while, so the cloud
// control plane can get the actual error.
if delay_exit {
info!("giving control plane 30s to collect the error before shutdown");
thread::sleep(Duration::from_secs(30));
}
// Shutdown trace pipeline gracefully, so that it has a chance to send any
// pending traces before we exit. Shutting down OTEL tracing provider may
// hang for quite some time, see, for example:
// - https://github.com/open-telemetry/opentelemetry-rust/issues/868
// - and our problems with staging https://github.com/neondatabase/cloud/issues/3707#issuecomment-1493983636
//
// Yet, we want computes to shut down fast enough, as we may need a new one
// for the same timeline ASAP. So wait no longer than 2s for the shutdown to
// complete, then just error out and exit the main thread.
info!("shutting down tracing");
let (sender, receiver) = mpsc::channel();
let _ = thread::spawn(move || {
tracing_utils::shutdown_tracing();
sender.send(()).ok()
});
let shutdown_res = receiver.recv_timeout(Duration::from_millis(2000));
if shutdown_res.is_err() {
error!("timed out while shutting down tracing, exiting anyway");
}
info!("shutting down");
exit(exit_code.unwrap_or(1))
}
fn cli() -> clap::Command {
// Env variable is set by `cargo`
let version = option_env!("CARGO_PKG_VERSION").unwrap_or("unknown");
clap::Command::new("compute_ctl")
.version(version)
.arg(
Arg::new("connstr")
.short('C')
.long("connstr")
.value_name("DATABASE_URL")
.required(true),
)
.arg(
Arg::new("pgdata")
.short('D')
.long("pgdata")
.value_name("DATADIR")
.required(true),
)
.arg(
Arg::new("pgbin")
.short('b')
.long("pgbin")
.default_value("postgres")
.value_name("POSTGRES_PATH"),
)
.arg(
Arg::new("spec")
.short('s')
.long("spec")
.value_name("SPEC_JSON"),
)
.arg(
Arg::new("spec-path")
.short('S')
.long("spec-path")
.value_name("SPEC_PATH"),
)
.arg(
Arg::new("compute-id")
.short('i')
.long("compute-id")
.value_name("COMPUTE_ID"),
)
.arg(
Arg::new("control-plane-uri")
.short('p')
.long("control-plane-uri")
.value_name("CONTROL_PLANE_API_BASE_URI"),
)
}
#[test]
fn verify_cli() {
cli().debug_assert()
}

View File

@@ -1,45 +0,0 @@
use anyhow::{anyhow, Result};
use tokio_postgres::NoTls;
use tracing::{error, instrument};
use crate::compute::ComputeNode;
/// Update timestamp in a row in a special service table to check
/// that we can actually write some data in this particular timeline.
/// Create table if it's missing.
#[instrument(skip_all)]
pub async fn check_writability(compute: &ComputeNode) -> Result<()> {
// Connect to the database.
let (client, connection) = tokio_postgres::connect(compute.connstr.as_str(), NoTls).await?;
if client.is_closed() {
return Err(anyhow!("connection to postgres closed"));
}
// The connection object performs the actual communication with the database,
// so spawn it off to run on its own.
tokio::spawn(async move {
if let Err(e) = connection.await {
error!("connection error: {}", e);
}
});
let query = "
CREATE TABLE IF NOT EXISTS health_check (
id serial primary key,
updated_at timestamptz default now()
);
INSERT INTO health_check VALUES (1, now())
ON CONFLICT (id) DO UPDATE
SET updated_at = now();";
let result = client.simple_query(query).await?;
if result.len() != 2 {
return Err(anyhow::format_err!(
"expected 2 query results, but got {}",
result.len()
));
}
Ok(())
}

View File

@@ -1,540 +0,0 @@
//
// XXX: This starts to be scarry similar to the `PostgresNode` from `control_plane`,
// but there are several things that makes `PostgresNode` usage inconvenient in the
// cloud:
// - it inherits from `LocalEnv`, which contains **all-all** the information about
// a complete service running
// - it uses `PageServerNode` with information about http endpoint, which we do not
// need in the cloud again
// - many tiny pieces like, for example, we do not use `pg_ctl` in the cloud
//
// Thus, to use `PostgresNode` in the cloud, we need to 'mock' a bunch of required
// attributes (not required for the cloud). Yet, it is still tempting to unify these
// `PostgresNode` and `ComputeNode` and use one in both places.
//
// TODO: stabilize `ComputeNode` and think about using it in the `control_plane`.
//
use std::fs;
use std::os::unix::fs::PermissionsExt;
use std::path::Path;
use std::process::{Command, Stdio};
use std::str::FromStr;
use std::sync::{Condvar, Mutex};
use anyhow::{Context, Result};
use chrono::{DateTime, Utc};
use postgres::{Client, NoTls};
use tokio_postgres;
use tracing::{info, instrument, warn};
use utils::id::{TenantId, TimelineId};
use utils::lsn::Lsn;
use compute_api::responses::{ComputeMetrics, ComputeStatus};
use compute_api::spec::ComputeSpec;
use crate::config;
use crate::pg_helpers::*;
use crate::spec::*;
/// Compute node info shared across several `compute_ctl` threads.
pub struct ComputeNode {
pub start_time: DateTime<Utc>,
// Url type maintains proper escaping
pub connstr: url::Url,
pub pgdata: String,
pub pgbin: String,
/// We should only allow live re- / configuration of the compute node if
/// it uses 'pull model', i.e. it can go to control-plane and fetch
/// the latest configuration. Otherwise, there could be a case:
/// - we start compute with some spec provided as argument
/// - we push new spec and it does reconfiguration
/// - but then something happens and compute pod / VM is destroyed,
/// so k8s controller starts it again with the **old** spec
/// and the same for empty computes:
/// - we started compute without any spec
/// - we push spec and it does configuration
/// - but then it is restarted without any spec again
pub live_config_allowed: bool,
/// Volatile part of the `ComputeNode`, which should be used under `Mutex`.
/// To allow HTTP API server to serving status requests, while configuration
/// is in progress, lock should be held only for short periods of time to do
/// read/write, not the whole configuration process.
pub state: Mutex<ComputeState>,
/// `Condvar` to allow notifying waiters about state changes.
pub state_changed: Condvar,
}
#[derive(Clone, Debug)]
pub struct ComputeState {
pub status: ComputeStatus,
/// Timestamp of the last Postgres activity
pub last_active: DateTime<Utc>,
pub error: Option<String>,
pub pspec: Option<ParsedSpec>,
pub metrics: ComputeMetrics,
}
impl ComputeState {
pub fn new() -> Self {
Self {
status: ComputeStatus::Empty,
last_active: Utc::now(),
error: None,
pspec: None,
metrics: ComputeMetrics::default(),
}
}
}
impl Default for ComputeState {
fn default() -> Self {
Self::new()
}
}
#[derive(Clone, Debug)]
pub struct ParsedSpec {
pub spec: ComputeSpec,
pub tenant_id: TenantId,
pub timeline_id: TimelineId,
pub pageserver_connstr: String,
pub storage_auth_token: Option<String>,
}
impl TryFrom<ComputeSpec> for ParsedSpec {
type Error = String;
fn try_from(spec: ComputeSpec) -> Result<Self, String> {
let pageserver_connstr = spec
.cluster
.settings
.find("neon.pageserver_connstring")
.ok_or("pageserver connstr should be provided")?;
let storage_auth_token = spec.storage_auth_token.clone();
let tenant_id: TenantId = spec
.cluster
.settings
.find("neon.tenant_id")
.ok_or("tenant id should be provided")
.map(|s| TenantId::from_str(&s))?
.or(Err("invalid tenant id"))?;
let timeline_id: TimelineId = spec
.cluster
.settings
.find("neon.timeline_id")
.ok_or("timeline id should be provided")
.map(|s| TimelineId::from_str(&s))?
.or(Err("invalid timeline id"))?;
Ok(ParsedSpec {
spec,
pageserver_connstr,
storage_auth_token,
tenant_id,
timeline_id,
})
}
}
impl ComputeNode {
pub fn set_status(&self, status: ComputeStatus) {
let mut state = self.state.lock().unwrap();
state.status = status;
self.state_changed.notify_all();
}
pub fn get_status(&self) -> ComputeStatus {
self.state.lock().unwrap().status
}
// Remove `pgdata` directory and create it again with right permissions.
fn create_pgdata(&self) -> Result<()> {
// Ignore removal error, likely it is a 'No such file or directory (os error 2)'.
// If it is something different then create_dir() will error out anyway.
let _ok = fs::remove_dir_all(&self.pgdata);
fs::create_dir(&self.pgdata)?;
fs::set_permissions(&self.pgdata, fs::Permissions::from_mode(0o700))?;
Ok(())
}
// Get basebackup from the libpq connection to pageserver using `connstr` and
// unarchive it to `pgdata` directory overriding all its previous content.
#[instrument(skip(self, compute_state))]
fn get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> {
let spec = compute_state.pspec.as_ref().expect("spec must be set");
let start_time = Utc::now();
let mut config = postgres::Config::from_str(&spec.pageserver_connstr)?;
// Use the storage auth token from the config file, if given.
// Note: this overrides any password set in the connection string.
if let Some(storage_auth_token) = &spec.storage_auth_token {
info!("Got storage auth token from spec file");
config.password(storage_auth_token);
} else {
info!("Storage auth token not set");
}
let mut client = config.connect(NoTls)?;
let basebackup_cmd = match lsn {
Lsn(0) => format!("basebackup {} {}", spec.tenant_id, spec.timeline_id), // First start of the compute
_ => format!("basebackup {} {} {}", spec.tenant_id, spec.timeline_id, lsn),
};
let copyreader = client.copy_out(basebackup_cmd.as_str())?;
// Read the archive directly from the `CopyOutReader`
//
// Set `ignore_zeros` so that unpack() reads all the Copy data and
// doesn't stop at the end-of-archive marker. Otherwise, if the server
// sends an Error after finishing the tarball, we will not notice it.
let mut ar = tar::Archive::new(copyreader);
ar.set_ignore_zeros(true);
ar.unpack(&self.pgdata)?;
self.state.lock().unwrap().metrics.basebackup_ms = Utc::now()
.signed_duration_since(start_time)
.to_std()
.unwrap()
.as_millis() as u64;
Ok(())
}
// Run `postgres` in a special mode with `--sync-safekeepers` argument
// and return the reported LSN back to the caller.
#[instrument(skip(self, storage_auth_token))]
fn sync_safekeepers(&self, storage_auth_token: Option<String>) -> Result<Lsn> {
let start_time = Utc::now();
let sync_handle = Command::new(&self.pgbin)
.args(["--sync-safekeepers"])
.env("PGDATA", &self.pgdata) // we cannot use -D in this mode
.envs(if let Some(storage_auth_token) = &storage_auth_token {
vec![("NEON_AUTH_TOKEN", storage_auth_token)]
} else {
vec![]
})
.stdout(Stdio::piped())
.spawn()
.expect("postgres --sync-safekeepers failed to start");
// `postgres --sync-safekeepers` will print all log output to stderr and
// final LSN to stdout. So we pipe only stdout, while stderr will be automatically
// redirected to the caller output.
let sync_output = sync_handle
.wait_with_output()
.expect("postgres --sync-safekeepers failed");
if !sync_output.status.success() {
anyhow::bail!(
"postgres --sync-safekeepers exited with non-zero status: {}. stdout: {}",
sync_output.status,
String::from_utf8(sync_output.stdout)
.expect("postgres --sync-safekeepers exited, and stdout is not utf-8"),
);
}
self.state.lock().unwrap().metrics.sync_safekeepers_ms = Utc::now()
.signed_duration_since(start_time)
.to_std()
.unwrap()
.as_millis() as u64;
let lsn = Lsn::from_str(String::from_utf8(sync_output.stdout)?.trim())?;
Ok(lsn)
}
/// Do all the preparations like PGDATA directory creation, configuration,
/// safekeepers sync, basebackup, etc.
#[instrument(skip(self, compute_state))]
pub fn prepare_pgdata(&self, compute_state: &ComputeState) -> Result<()> {
let pspec = compute_state.pspec.as_ref().expect("spec must be set");
let pgdata_path = Path::new(&self.pgdata);
// Remove/create an empty pgdata directory and put configuration there.
self.create_pgdata()?;
config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), &pspec.spec)?;
info!("starting safekeepers syncing");
let lsn = self
.sync_safekeepers(pspec.storage_auth_token.clone())
.with_context(|| "failed to sync safekeepers")?;
info!("safekeepers synced at LSN {}", lsn);
info!(
"getting basebackup@{} from pageserver {}",
lsn, &pspec.pageserver_connstr
);
self.get_basebackup(compute_state, lsn).with_context(|| {
format!(
"failed to get basebackup@{} from pageserver {}",
lsn, &pspec.pageserver_connstr
)
})?;
// Update pg_hba.conf received with basebackup.
update_pg_hba(pgdata_path)?;
Ok(())
}
/// Start Postgres as a child process and manage DBs/roles.
/// After that this will hang waiting on the postmaster process to exit.
#[instrument(skip(self))]
pub fn start_postgres(
&self,
storage_auth_token: Option<String>,
) -> Result<std::process::Child> {
let pgdata_path = Path::new(&self.pgdata);
// Run postgres as a child process.
let mut pg = Command::new(&self.pgbin)
.args(["-D", &self.pgdata])
.envs(if let Some(storage_auth_token) = &storage_auth_token {
vec![("NEON_AUTH_TOKEN", storage_auth_token)]
} else {
vec![]
})
.spawn()
.expect("cannot start postgres process");
wait_for_postgres(&mut pg, pgdata_path)?;
Ok(pg)
}
/// Do initial configuration of the already started Postgres.
#[instrument(skip(self, compute_state))]
pub fn apply_config(&self, compute_state: &ComputeState) -> Result<()> {
// If connection fails,
// it may be the old node with `zenith_admin` superuser.
//
// In this case we need to connect with old `zenith_admin` name
// and create new user. We cannot simply rename connected user,
// but we can create a new one and grant it all privileges.
let mut client = match Client::connect(self.connstr.as_str(), NoTls) {
Err(e) => {
info!(
"cannot connect to postgres: {}, retrying with `zenith_admin` username",
e
);
let mut zenith_admin_connstr = self.connstr.clone();
zenith_admin_connstr
.set_username("zenith_admin")
.map_err(|_| anyhow::anyhow!("invalid connstr"))?;
let mut client = Client::connect(zenith_admin_connstr.as_str(), NoTls)?;
client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
client.simple_query("GRANT zenith_admin TO cloud_admin")?;
drop(client);
// reconnect with connsting with expected name
Client::connect(self.connstr.as_str(), NoTls)?
}
Ok(client) => client,
};
// Proceed with post-startup configuration. Note, that order of operations is important.
let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec;
handle_roles(spec, &mut client)?;
handle_databases(spec, &mut client)?;
handle_role_deletions(spec, self.connstr.as_str(), &mut client)?;
handle_grants(spec, self.connstr.as_str(), &mut client)?;
handle_extensions(spec, &mut client)?;
// 'Close' connection
drop(client);
info!(
"finished configuration of compute for project {}",
spec.cluster.cluster_id
);
Ok(())
}
// We could've wrapped this around `pg_ctl reload`, but right now we don't use
// `pg_ctl` for start / stop, so this just seems much easier to do as we already
// have opened connection to Postgres and superuser access.
#[instrument(skip(self, client))]
fn pg_reload_conf(&self, client: &mut Client) -> Result<()> {
client.simple_query("SELECT pg_reload_conf()")?;
Ok(())
}
/// Similar to `apply_config()`, but does a bit different sequence of operations,
/// as it's used to reconfigure a previously started and configured Postgres node.
#[instrument(skip(self))]
pub fn reconfigure(&self) -> Result<()> {
let spec = self.state.lock().unwrap().pspec.clone().unwrap().spec;
// Write new config
let pgdata_path = Path::new(&self.pgdata);
config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), &spec)?;
let mut client = Client::connect(self.connstr.as_str(), NoTls)?;
self.pg_reload_conf(&mut client)?;
// Proceed with post-startup configuration. Note, that order of operations is important.
handle_roles(&spec, &mut client)?;
handle_databases(&spec, &mut client)?;
handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
handle_grants(&spec, self.connstr.as_str(), &mut client)?;
handle_extensions(&spec, &mut client)?;
// 'Close' connection
drop(client);
let unknown_op = "unknown".to_string();
let op_id = spec.operation_uuid.as_ref().unwrap_or(&unknown_op);
info!(
"finished reconfiguration of compute node for operation {}",
op_id
);
Ok(())
}
#[instrument(skip(self))]
pub fn start_compute(&self) -> Result<std::process::Child> {
let compute_state = self.state.lock().unwrap().clone();
let spec = compute_state.pspec.as_ref().expect("spec must be set");
info!(
"starting compute for project {}, operation {}, tenant {}, timeline {}",
spec.spec.cluster.cluster_id,
spec.spec.operation_uuid.as_deref().unwrap_or("None"),
spec.tenant_id,
spec.timeline_id,
);
self.prepare_pgdata(&compute_state)?;
let start_time = Utc::now();
let pg = self.start_postgres(spec.storage_auth_token.clone())?;
self.apply_config(&compute_state)?;
let startup_end_time = Utc::now();
{
let mut state = self.state.lock().unwrap();
state.metrics.config_ms = startup_end_time
.signed_duration_since(start_time)
.to_std()
.unwrap()
.as_millis() as u64;
state.metrics.total_startup_ms = startup_end_time
.signed_duration_since(self.start_time)
.to_std()
.unwrap()
.as_millis() as u64;
}
self.set_status(ComputeStatus::Running);
Ok(pg)
}
// Look for core dumps and collect backtraces.
//
// EKS worker nodes have following core dump settings:
// /proc/sys/kernel/core_pattern -> core
// /proc/sys/kernel/core_uses_pid -> 1
// ulimint -c -> unlimited
// which results in core dumps being written to postgres data directory as core.<pid>.
//
// Use that as a default location and pattern, except macos where core dumps are written
// to /cores/ directory by default.
pub fn check_for_core_dumps(&self) -> Result<()> {
let core_dump_dir = match std::env::consts::OS {
"macos" => Path::new("/cores/"),
_ => Path::new(&self.pgdata),
};
// Collect core dump paths if any
info!("checking for core dumps in {}", core_dump_dir.display());
let files = fs::read_dir(core_dump_dir)?;
let cores = files.filter_map(|entry| {
let entry = entry.ok()?;
let _ = entry.file_name().to_str()?.strip_prefix("core.")?;
Some(entry.path())
});
// Print backtrace for each core dump
for core_path in cores {
warn!(
"core dump found: {}, collecting backtrace",
core_path.display()
);
// Try first with gdb
let backtrace = Command::new("gdb")
.args(["--batch", "-q", "-ex", "bt", &self.pgbin])
.arg(&core_path)
.output();
// Try lldb if no gdb is found -- that is handy for local testing on macOS
let backtrace = match backtrace {
Err(ref e) if e.kind() == std::io::ErrorKind::NotFound => {
warn!("cannot find gdb, trying lldb");
Command::new("lldb")
.arg("-c")
.arg(&core_path)
.args(["--batch", "-o", "bt all", "-o", "quit"])
.output()
}
_ => backtrace,
}?;
warn!(
"core dump backtrace: {}",
String::from_utf8_lossy(&backtrace.stdout)
);
warn!(
"debugger stderr: {}",
String::from_utf8_lossy(&backtrace.stderr)
);
}
Ok(())
}
/// Select `pg_stat_statements` data and return it as a stringified JSON
pub async fn collect_insights(&self) -> String {
let mut result_rows: Vec<String> = Vec::new();
let connect_result = tokio_postgres::connect(self.connstr.as_str(), NoTls).await;
let (client, connection) = connect_result.unwrap();
tokio::spawn(async move {
if let Err(e) = connection.await {
eprintln!("connection error: {}", e);
}
});
let result = client
.simple_query(
"SELECT
row_to_json(pg_stat_statements)
FROM
pg_stat_statements
WHERE
userid != 'cloud_admin'::regrole::oid
ORDER BY
(mean_exec_time + mean_plan_time) DESC
LIMIT 100",
)
.await;
if let Ok(raw_rows) = result {
for message in raw_rows.iter() {
if let postgres::SimpleQueryMessage::Row(row) = message {
if let Some(json) = row.get(0) {
result_rows.push(json.to_string());
}
}
}
format!("{{\"pg_stat_statements\": [{}]}}", result_rows.join(","))
} else {
"{{\"pg_stat_statements\": []}}".to_string()
}
}
}

View File

@@ -1,51 +0,0 @@
use std::fs::{File, OpenOptions};
use std::io;
use std::io::prelude::*;
use std::path::Path;
use anyhow::Result;
use crate::pg_helpers::PgOptionsSerialize;
use compute_api::spec::ComputeSpec;
/// Check that `line` is inside a text file and put it there if it is not.
/// Create file if it doesn't exist.
pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
let mut file = OpenOptions::new()
.read(true)
.write(true)
.create(true)
.append(false)
.open(path)?;
let buf = io::BufReader::new(&file);
let mut count: usize = 0;
for l in buf.lines() {
if l? == line {
return Ok(false);
}
count = 1;
}
write!(file, "{}{}", "\n".repeat(count), line)?;
Ok(true)
}
/// Create or completely rewrite configuration file specified by `path`
pub fn write_postgres_conf(path: &Path, spec: &ComputeSpec) -> Result<()> {
// File::create() destroys the file content if it exists.
let mut postgres_conf = File::create(path)?;
write_auto_managed_block(&mut postgres_conf, &spec.cluster.settings.as_pg_settings())?;
Ok(())
}
// Write Postgres config block wrapped with generated comment section
fn write_auto_managed_block(file: &mut File, buf: &str) -> Result<()> {
writeln!(file, "# Managed by compute_ctl: begin")?;
writeln!(file, "{}", buf)?;
writeln!(file, "# Managed by compute_ctl: end")?;
Ok(())
}

View File

@@ -1,54 +0,0 @@
use std::sync::Arc;
use std::thread;
use anyhow::Result;
use tracing::{error, info, instrument};
use compute_api::responses::ComputeStatus;
use crate::compute::ComputeNode;
#[instrument(skip(compute))]
fn configurator_main_loop(compute: &Arc<ComputeNode>) {
info!("waiting for reconfiguration requests");
loop {
let state = compute.state.lock().unwrap();
let mut state = compute.state_changed.wait(state).unwrap();
if state.status == ComputeStatus::ConfigurationPending {
info!("got configuration request");
state.status = ComputeStatus::Configuration;
compute.state_changed.notify_all();
drop(state);
let mut new_status = ComputeStatus::Failed;
if let Err(e) = compute.reconfigure() {
error!("could not configure compute node: {}", e);
} else {
new_status = ComputeStatus::Running;
info!("compute node configured");
}
// XXX: used to test that API is blocking
// std::thread::sleep(std::time::Duration::from_millis(10000));
compute.set_status(new_status);
} else if state.status == ComputeStatus::Failed {
info!("compute node is now in Failed state, exiting");
break;
} else {
info!("woken up for compute status: {:?}, sleeping", state.status);
}
}
}
pub fn launch_configurator(compute: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
let compute = Arc::clone(compute);
Ok(thread::Builder::new()
.name("compute-configurator".into())
.spawn(move || {
configurator_main_loop(&compute);
info!("configurator thread is exited");
})?)
}

View File

@@ -1,264 +0,0 @@
use std::convert::Infallible;
use std::net::SocketAddr;
use std::sync::Arc;
use std::thread;
use crate::compute::{ComputeNode, ComputeState, ParsedSpec};
use compute_api::requests::ConfigurationRequest;
use compute_api::responses::{ComputeStatus, ComputeStatusResponse, GenericAPIError};
use anyhow::Result;
use hyper::service::{make_service_fn, service_fn};
use hyper::{Body, Method, Request, Response, Server, StatusCode};
use num_cpus;
use serde_json;
use tokio::task;
use tracing::{error, info};
use tracing_utils::http::OtelName;
fn status_response_from_state(state: &ComputeState) -> ComputeStatusResponse {
ComputeStatusResponse {
tenant: state
.pspec
.as_ref()
.map(|pspec| pspec.tenant_id.to_string()),
timeline: state
.pspec
.as_ref()
.map(|pspec| pspec.timeline_id.to_string()),
status: state.status,
last_active: state.last_active,
error: state.error.clone(),
}
}
// Service function to handle all available routes.
async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body> {
//
// NOTE: The URI path is currently included in traces. That's OK because
// it doesn't contain any variable parts or sensitive information. But
// please keep that in mind if you change the routing here.
//
match (req.method(), req.uri().path()) {
// Serialized compute state.
(&Method::GET, "/status") => {
info!("serving /status GET request");
let state = compute.state.lock().unwrap();
let status_response = status_response_from_state(&state);
Response::new(Body::from(serde_json::to_string(&status_response).unwrap()))
}
// Startup metrics in JSON format. Keep /metrics reserved for a possible
// future use for Prometheus metrics format.
(&Method::GET, "/metrics.json") => {
info!("serving /metrics.json GET request");
let metrics = compute.state.lock().unwrap().metrics.clone();
Response::new(Body::from(serde_json::to_string(&metrics).unwrap()))
}
// Collect Postgres current usage insights
(&Method::GET, "/insights") => {
info!("serving /insights GET request");
let status = compute.get_status();
if status != ComputeStatus::Running {
let msg = format!("compute is not running, current status: {:?}", status);
error!(msg);
return Response::new(Body::from(msg));
}
let insights = compute.collect_insights().await;
Response::new(Body::from(insights))
}
(&Method::POST, "/check_writability") => {
info!("serving /check_writability POST request");
let status = compute.get_status();
if status != ComputeStatus::Running {
let msg = format!(
"invalid compute status for check_writability request: {:?}",
status
);
error!(msg);
return Response::new(Body::from(msg));
}
let res = crate::checker::check_writability(compute).await;
match res {
Ok(_) => Response::new(Body::from("true")),
Err(e) => {
error!("check_writability failed: {}", e);
Response::new(Body::from(e.to_string()))
}
}
}
(&Method::GET, "/info") => {
let num_cpus = num_cpus::get_physical();
info!("serving /info GET request. num_cpus: {}", num_cpus);
Response::new(Body::from(
serde_json::json!({
"num_cpus": num_cpus,
})
.to_string(),
))
}
// Accept spec in JSON format and request compute configuration. If
// anything goes wrong after we set the compute status to `ConfigurationPending`
// and update compute state with new spec, we basically leave compute
// in the potentially wrong state. That said, it's control-plane's
// responsibility to watch compute state after reconfiguration request
// and to clean restart in case of errors.
(&Method::POST, "/configure") => {
info!("serving /configure POST request");
match handle_configure_request(req, compute).await {
Ok(msg) => Response::new(Body::from(msg)),
Err((msg, code)) => {
error!("error handling /configure request: {msg}");
render_json_error(&msg, code)
}
}
}
// Return the `404 Not Found` for any other routes.
_ => {
let mut not_found = Response::new(Body::from("404 Not Found"));
*not_found.status_mut() = StatusCode::NOT_FOUND;
not_found
}
}
}
async fn handle_configure_request(
req: Request<Body>,
compute: &Arc<ComputeNode>,
) -> Result<String, (String, StatusCode)> {
if !compute.live_config_allowed {
return Err((
"live configuration is not allowed for this compute node".to_string(),
StatusCode::PRECONDITION_FAILED,
));
}
let body_bytes = hyper::body::to_bytes(req.into_body()).await.unwrap();
let spec_raw = String::from_utf8(body_bytes.to_vec()).unwrap();
if let Ok(request) = serde_json::from_str::<ConfigurationRequest>(&spec_raw) {
let spec = request.spec;
let parsed_spec = match ParsedSpec::try_from(spec) {
Ok(ps) => ps,
Err(msg) => return Err((msg, StatusCode::PRECONDITION_FAILED)),
};
// XXX: wrap state update under lock in code blocks. Otherwise,
// we will try to `Send` `mut state` into the spawned thread
// bellow, which will cause error:
// ```
// error: future cannot be sent between threads safely
// ```
{
let mut state = compute.state.lock().unwrap();
if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
let msg = format!(
"invalid compute status for configuration request: {:?}",
state.status.clone()
);
return Err((msg, StatusCode::PRECONDITION_FAILED));
}
state.pspec = Some(parsed_spec);
state.status = ComputeStatus::ConfigurationPending;
compute.state_changed.notify_all();
drop(state);
info!("set new spec and notified waiters");
}
// Spawn a blocking thread to wait for compute to become Running.
// This is needed to do not block the main pool of workers and
// be able to serve other requests while some particular request
// is waiting for compute to finish configuration.
let c = compute.clone();
task::spawn_blocking(move || {
let mut state = c.state.lock().unwrap();
while state.status != ComputeStatus::Running {
state = c.state_changed.wait(state).unwrap();
info!(
"waiting for compute to become Running, current status: {:?}",
state.status
);
if state.status == ComputeStatus::Failed {
let err = state.error.as_ref().map_or("unknown error", |x| x);
let msg = format!("compute configuration failed: {:?}", err);
return Err((msg, StatusCode::INTERNAL_SERVER_ERROR));
}
}
Ok(())
})
.await
.unwrap()?;
// Return current compute state if everything went well.
let state = compute.state.lock().unwrap().clone();
let status_response = status_response_from_state(&state);
Ok(serde_json::to_string(&status_response).unwrap())
} else {
Err(("invalid spec".to_string(), StatusCode::BAD_REQUEST))
}
}
fn render_json_error(e: &str, status: StatusCode) -> Response<Body> {
let error = GenericAPIError {
error: e.to_string(),
};
Response::builder()
.status(status)
.body(Body::from(serde_json::to_string(&error).unwrap()))
.unwrap()
}
// Main Hyper HTTP server function that runs it and blocks waiting on it forever.
#[tokio::main]
async fn serve(state: Arc<ComputeNode>) {
let addr = SocketAddr::from(([0, 0, 0, 0], 3080));
let make_service = make_service_fn(move |_conn| {
let state = state.clone();
async move {
Ok::<_, Infallible>(service_fn(move |req: Request<Body>| {
let state = state.clone();
async move {
Ok::<_, Infallible>(
// NOTE: We include the URI path in the string. It
// doesn't contain any variable parts or sensitive
// information in this API.
tracing_utils::http::tracing_handler(
req,
|req| routes(req, &state),
OtelName::UriPath,
)
.await,
)
}
}))
}
});
info!("starting HTTP server on {}", addr);
let server = Server::bind(&addr).serve(make_service);
// Run this server forever
if let Err(e) = server.await {
error!("server error: {}", e);
}
}
/// Launch a separate Hyper HTTP API server thread and return its `JoinHandle`.
pub fn launch_http_server(state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
let state = Arc::clone(state);
Ok(thread::Builder::new()
.name("http-endpoint".into())
.spawn(move || serve(state))?)
}

View File

@@ -1 +0,0 @@
pub mod api;

View File

@@ -1,233 +0,0 @@
openapi: "3.0.2"
info:
title: Compute node control API
version: "1.0"
servers:
- url: "http://localhost:3080"
paths:
/status:
get:
tags:
- Info
summary: Get compute node internal status.
description: ""
operationId: getComputeStatus
responses:
200:
description: ComputeState
content:
application/json:
schema:
$ref: "#/components/schemas/ComputeState"
/metrics.json:
get:
tags:
- Info
summary: Get compute node startup metrics in JSON format.
description: ""
operationId: getComputeMetricsJSON
responses:
200:
description: ComputeMetrics
content:
application/json:
schema:
$ref: "#/components/schemas/ComputeMetrics"
/insights:
get:
tags:
- Info
summary: Get current compute insights in JSON format.
description: |
Note, that this doesn't include any historical data.
operationId: getComputeInsights
responses:
200:
description: Compute insights
content:
application/json:
schema:
$ref: "#/components/schemas/ComputeInsights"
/info:
get:
tags:
- Info
summary: Get info about the compute pod / VM.
description: ""
operationId: getInfo
responses:
200:
description: Info
content:
application/json:
schema:
$ref: "#/components/schemas/Info"
/check_writability:
post:
tags:
- Check
summary: Check that we can write new data on this compute.
description: ""
operationId: checkComputeWritability
responses:
200:
description: Check result
content:
text/plain:
schema:
type: string
description: Error text or 'true' if check passed.
example: "true"
/configure:
post:
tags:
- Configure
summary: Perform compute node configuration.
description: |
This is a blocking API endpoint, i.e. it blocks waiting until
compute is finished configuration and is in `Running` state.
Optional non-blocking mode could be added later.
operationId: configureCompute
requestBody:
description: Configuration request.
required: true
content:
application/json:
schema:
type: object
required:
- spec
properties:
spec:
# XXX: I don't want to explain current spec in the OpenAPI format,
# as it could be changed really soon. Consider doing it later.
type: object
responses:
200:
description: Compute configuration finished.
content:
application/json:
schema:
$ref: "#/components/schemas/ComputeState"
400:
description: Provided spec is invalid.
content:
application/json:
schema:
$ref: "#/components/schemas/GenericError"
412:
description: |
It's not possible to do live-configuration of the compute.
It's either in the wrong state, or compute doesn't use pull
mode of configuration.
content:
application/json:
schema:
$ref: "#/components/schemas/GenericError"
500:
description: |
Compute configuration request was processed, but error
occurred. Compute will likely shutdown soon.
content:
application/json:
schema:
$ref: "#/components/schemas/GenericError"
components:
securitySchemes:
JWT:
type: http
scheme: bearer
bearerFormat: JWT
schemas:
ComputeMetrics:
type: object
description: Compute startup metrics.
required:
- sync_safekeepers_ms
- basebackup_ms
- config_ms
- total_startup_ms
properties:
sync_safekeepers_ms:
type: integer
basebackup_ms:
type: integer
config_ms:
type: integer
total_startup_ms:
type: integer
Info:
type: object
description: Information about VM/Pod.
required:
- num_cpus
properties:
num_cpus:
type: integer
ComputeState:
type: object
required:
- status
- last_active
properties:
status:
$ref: '#/components/schemas/ComputeStatus'
last_active:
type: string
description: The last detected compute activity timestamp in UTC and RFC3339 format.
example: "2022-10-12T07:20:50.52Z"
error:
type: string
description: Text of the error during compute startup, if any.
example: ""
tenant:
type: string
description: Identifier of the current tenant served by compute node, if any.
example: c9269c359e9a199fad1ea0981246a78f
timeline:
type: string
description: Identifier of the current timeline served by compute node, if any.
example: ece7de74d4b8cbe5433a68ce4d1b97b4
ComputeInsights:
type: object
properties:
pg_stat_statements:
description: Contains raw output from pg_stat_statements in JSON format.
type: array
items:
type: object
ComputeStatus:
type: string
enum:
- init
- failed
- running
example: running
#
# Errors
#
GenericError:
type: object
required:
- error
properties:
error:
type: string
security:
- JWT: []

View File

@@ -1,15 +0,0 @@
//!
//! Various tools and helpers to handle cluster / compute node (Postgres)
//! configuration.
//!
pub mod checker;
pub mod config;
pub mod configurator;
pub mod http;
#[macro_use]
pub mod logger;
pub mod compute;
pub mod monitor;
pub mod params;
pub mod pg_helpers;
pub mod spec;

View File

@@ -1,37 +0,0 @@
use tracing_opentelemetry::OpenTelemetryLayer;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::prelude::*;
/// Initialize logging to stderr, and OpenTelemetry tracing and exporter.
///
/// Logging is configured using either `default_log_level` or
/// `RUST_LOG` environment variable as default log level.
///
/// OpenTelemetry is configured with OTLP/HTTP exporter. It picks up
/// configuration from environment variables. For example, to change the destination,
/// set `OTEL_EXPORTER_OTLP_ENDPOINT=http://jaeger:4318`. See
/// `tracing-utils` package description.
///
pub fn init_tracing_and_logging(default_log_level: &str) -> anyhow::Result<()> {
// Initialize Logging
let env_filter = tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(default_log_level));
let fmt_layer = tracing_subscriber::fmt::layer()
.with_target(false)
.with_writer(std::io::stderr);
// Initialize OpenTelemetry
let otlp_layer =
tracing_utils::init_tracing_without_runtime("compute_ctl").map(OpenTelemetryLayer::new);
// Put it all together
tracing_subscriber::registry()
.with(env_filter)
.with(otlp_layer)
.with(fmt_layer)
.init();
tracing::info!("logging and tracing started");
Ok(())
}

View File

@@ -1,113 +0,0 @@
use std::sync::Arc;
use std::{thread, time};
use anyhow::Result;
use chrono::{DateTime, Utc};
use postgres::{Client, NoTls};
use tracing::{debug, info};
use crate::compute::ComputeNode;
const MONITOR_CHECK_INTERVAL: u64 = 500; // milliseconds
// Spin in a loop and figure out the last activity time in the Postgres.
// Then update it in the shared state. This function never errors out.
// XXX: the only expected panic is at `RwLock` unwrap().
fn watch_compute_activity(compute: &ComputeNode) {
// Suppose that `connstr` doesn't change
let connstr = compute.connstr.as_str();
// Define `client` outside of the loop to reuse existing connection if it's active.
let mut client = Client::connect(connstr, NoTls);
let timeout = time::Duration::from_millis(MONITOR_CHECK_INTERVAL);
info!("watching Postgres activity at {}", connstr);
loop {
// Should be outside of the write lock to allow others to read while we sleep.
thread::sleep(timeout);
match &mut client {
Ok(cli) => {
if cli.is_closed() {
info!("connection to postgres closed, trying to reconnect");
// Connection is closed, reconnect and try again.
client = Client::connect(connstr, NoTls);
continue;
}
// Get all running client backends except ourself, use RFC3339 DateTime format.
let backends = cli
.query(
"SELECT state, to_char(state_change, 'YYYY-MM-DD\"T\"HH24:MI:SS.US\"Z\"') AS state_change
FROM pg_stat_activity
WHERE backend_type = 'client backend'
AND pid != pg_backend_pid()
AND usename != 'cloud_admin';", // XXX: find a better way to filter other monitors?
&[],
);
let mut last_active = compute.state.lock().unwrap().last_active;
if let Ok(backs) = backends {
let mut idle_backs: Vec<DateTime<Utc>> = vec![];
for b in backs.into_iter() {
let state: String = match b.try_get("state") {
Ok(state) => state,
Err(_) => continue,
};
if state == "idle" {
let change: String = match b.try_get("state_change") {
Ok(state_change) => state_change,
Err(_) => continue,
};
let change = DateTime::parse_from_rfc3339(&change);
match change {
Ok(t) => idle_backs.push(t.with_timezone(&Utc)),
Err(e) => {
info!("cannot parse backend state_change DateTime: {}", e);
continue;
}
}
} else {
// Found non-idle backend, so the last activity is NOW.
// Save it and exit the for loop. Also clear the idle backend
// `state_change` timestamps array as it doesn't matter now.
last_active = Utc::now();
idle_backs.clear();
break;
}
}
// Get idle backend `state_change` with the max timestamp.
if let Some(last) = idle_backs.iter().max() {
last_active = *last;
}
}
// Update the last activity in the shared state if we got a more recent one.
let mut state = compute.state.lock().unwrap();
if last_active > state.last_active {
state.last_active = last_active;
debug!("set the last compute activity time to: {}", last_active);
}
}
Err(e) => {
debug!("cannot connect to postgres: {}, retrying", e);
// Establish a new connection and try again.
client = Client::connect(connstr, NoTls);
}
}
}
}
/// Launch a separate compute monitor thread and return its `JoinHandle`.
pub fn launch_monitor(state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
let state = Arc::clone(state);
Ok(thread::Builder::new()
.name("compute-monitor".into())
.spawn(move || watch_compute_activity(&state))?)
}

View File

@@ -1,9 +0,0 @@
pub const DEFAULT_LOG_LEVEL: &str = "info";
// From Postgres docs:
// To ease transition from the md5 method to the newer SCRAM method, if md5 is specified
// as a method in pg_hba.conf but the user's password on the server is encrypted for SCRAM
// (see below), then SCRAM-based authentication will automatically be chosen instead.
// https://www.postgresql.org/docs/15/auth-password.html
//
// So it's safe to set md5 here, as `control-plane` anyway uses SCRAM for all roles.
pub const PG_HBA_ALL_MD5: &str = "host\tall\t\tall\t\t0.0.0.0/0\t\tmd5";

View File

@@ -1,328 +0,0 @@
use std::fmt::Write;
use std::fs;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::os::unix::fs::PermissionsExt;
use std::path::Path;
use std::process::Child;
use std::time::{Duration, Instant};
use anyhow::{bail, Result};
use notify::{RecursiveMode, Watcher};
use postgres::{Client, Transaction};
use tracing::{debug, instrument};
use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};
const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds
/// Escape a string for including it in a SQL literal
fn escape_literal(s: &str) -> String {
s.replace('\'', "''").replace('\\', "\\\\")
}
/// Escape a string so that it can be used in postgresql.conf.
/// Same as escape_literal, currently.
fn escape_conf_value(s: &str) -> String {
s.replace('\'', "''").replace('\\', "\\\\")
}
trait GenericOptionExt {
fn to_pg_option(&self) -> String;
fn to_pg_setting(&self) -> String;
}
impl GenericOptionExt for GenericOption {
/// Represent `GenericOption` as SQL statement parameter.
fn to_pg_option(&self) -> String {
if let Some(val) = &self.value {
match self.vartype.as_ref() {
"string" => format!("{} '{}'", self.name, escape_literal(val)),
_ => format!("{} {}", self.name, val),
}
} else {
self.name.to_owned()
}
}
/// Represent `GenericOption` as configuration option.
fn to_pg_setting(&self) -> String {
if let Some(val) = &self.value {
match self.vartype.as_ref() {
"string" => format!("{} = '{}'", self.name, escape_conf_value(val)),
_ => format!("{} = {}", self.name, val),
}
} else {
self.name.to_owned()
}
}
}
pub trait PgOptionsSerialize {
fn as_pg_options(&self) -> String;
fn as_pg_settings(&self) -> String;
}
impl PgOptionsSerialize for GenericOptions {
/// Serialize an optional collection of `GenericOption`'s to
/// Postgres SQL statement arguments.
fn as_pg_options(&self) -> String {
if let Some(ops) = &self {
ops.iter()
.map(|op| op.to_pg_option())
.collect::<Vec<String>>()
.join(" ")
} else {
"".to_string()
}
}
/// Serialize an optional collection of `GenericOption`'s to
/// `postgresql.conf` compatible format.
fn as_pg_settings(&self) -> String {
if let Some(ops) = &self {
ops.iter()
.map(|op| op.to_pg_setting())
.collect::<Vec<String>>()
.join("\n")
+ "\n" // newline after last setting
} else {
"".to_string()
}
}
}
pub trait GenericOptionsSearch {
fn find(&self, name: &str) -> Option<String>;
}
impl GenericOptionsSearch for GenericOptions {
/// Lookup option by name
fn find(&self, name: &str) -> Option<String> {
let ops = self.as_ref()?;
let op = ops.iter().find(|s| s.name == name)?;
op.value.clone()
}
}
pub trait RoleExt {
fn to_pg_options(&self) -> String;
}
impl RoleExt for Role {
/// Serialize a list of role parameters into a Postgres-acceptable
/// string of arguments.
fn to_pg_options(&self) -> String {
// XXX: consider putting LOGIN as a default option somewhere higher, e.g. in control-plane.
// For now, we do not use generic `options` for roles. Once used, add
// `self.options.as_pg_options()` somewhere here.
let mut params: String = "LOGIN".to_string();
if let Some(pass) = &self.encrypted_password {
// Some time ago we supported only md5 and treated all encrypted_password as md5.
// Now we also support SCRAM-SHA-256 and to preserve compatibility
// we treat all encrypted_password as md5 unless they starts with SCRAM-SHA-256.
if pass.starts_with("SCRAM-SHA-256") {
write!(params, " PASSWORD '{pass}'")
.expect("String is documented to not to error during write operations");
} else {
write!(params, " PASSWORD 'md5{pass}'")
.expect("String is documented to not to error during write operations");
}
} else {
params.push_str(" PASSWORD NULL");
}
params
}
}
pub trait DatabaseExt {
fn to_pg_options(&self) -> String;
}
impl DatabaseExt for Database {
/// Serialize a list of database parameters into a Postgres-acceptable
/// string of arguments.
/// NB: `TEMPLATE` is actually also an identifier, but so far we only need
/// to use `template0` and `template1`, so it is not a problem. Yet in the future
/// it may require a proper quoting too.
fn to_pg_options(&self) -> String {
let mut params: String = self.options.as_pg_options();
write!(params, " OWNER {}", &self.owner.pg_quote())
.expect("String is documented to not to error during write operations");
params
}
}
/// Generic trait used to provide quoting / encoding for strings used in the
/// Postgres SQL queries and DATABASE_URL.
pub trait Escaping {
fn pg_quote(&self) -> String;
}
impl Escaping for PgIdent {
/// This is intended to mimic Postgres quote_ident(), but for simplicity it
/// always quotes provided string with `""` and escapes every `"`.
/// **Not idempotent**, i.e. if string is already escaped it will be escaped again.
fn pg_quote(&self) -> String {
let result = format!("\"{}\"", self.replace('"', "\"\""));
result
}
}
/// Build a list of existing Postgres roles
pub fn get_existing_roles(xact: &mut Transaction<'_>) -> Result<Vec<Role>> {
let postgres_roles = xact
.query("SELECT rolname, rolpassword FROM pg_catalog.pg_authid", &[])?
.iter()
.map(|row| Role {
name: row.get("rolname"),
encrypted_password: row.get("rolpassword"),
options: None,
})
.collect();
Ok(postgres_roles)
}
/// Build a list of existing Postgres databases
pub fn get_existing_dbs(client: &mut Client) -> Result<Vec<Database>> {
let postgres_dbs = client
.query(
"SELECT datname, datdba::regrole::text as owner
FROM pg_catalog.pg_database;",
&[],
)?
.iter()
.map(|row| Database {
name: row.get("datname"),
owner: row.get("owner"),
options: None,
})
.collect();
Ok(postgres_dbs)
}
/// Wait for Postgres to become ready to accept connections. It's ready to
/// accept connections when the state-field in `pgdata/postmaster.pid` says
/// 'ready'.
#[instrument(skip(pg))]
pub fn wait_for_postgres(pg: &mut Child, pgdata: &Path) -> Result<()> {
let pid_path = pgdata.join("postmaster.pid");
// PostgreSQL writes line "ready" to the postmaster.pid file, when it has
// completed initialization and is ready to accept connections. We want to
// react quickly and perform the rest of our initialization as soon as
// PostgreSQL starts accepting connections. Use 'notify' to be notified
// whenever the PID file is changed, and whenever it changes, read it to
// check if it's now "ready".
//
// You cannot actually watch a file before it exists, so we first watch the
// data directory, and once the postmaster.pid file appears, we switch to
// watch the file instead. We also wake up every 100 ms to poll, just in
// case we miss some events for some reason. Not strictly necessary, but
// better safe than sorry.
let (tx, rx) = std::sync::mpsc::channel();
let (mut watcher, rx): (Box<dyn Watcher>, _) = match notify::recommended_watcher(move |res| {
let _ = tx.send(res);
}) {
Ok(watcher) => (Box::new(watcher), rx),
Err(e) => {
match e.kind {
notify::ErrorKind::Io(os) if os.raw_os_error() == Some(38) => {
// docker on m1 macs does not support recommended_watcher
// but return "Function not implemented (os error 38)"
// see https://github.com/notify-rs/notify/issues/423
let (tx, rx) = std::sync::mpsc::channel();
// let's poll it faster than what we check the results for (100ms)
let config =
notify::Config::default().with_poll_interval(Duration::from_millis(50));
let watcher = notify::PollWatcher::new(
move |res| {
let _ = tx.send(res);
},
config,
)?;
(Box::new(watcher), rx)
}
_ => return Err(e.into()),
}
}
};
watcher.watch(pgdata, RecursiveMode::NonRecursive)?;
let started_at = Instant::now();
let mut postmaster_pid_seen = false;
loop {
if let Ok(Some(status)) = pg.try_wait() {
// Postgres exited, that is not what we expected, bail out earlier.
let code = status.code().unwrap_or(-1);
bail!("Postgres exited unexpectedly with code {}", code);
}
let res = rx.recv_timeout(Duration::from_millis(100));
debug!("woken up by notify: {res:?}");
// If there are multiple events in the channel already, we only need to be
// check once. Swallow the extra events before we go ahead to check the
// pid file.
while let Ok(res) = rx.try_recv() {
debug!("swallowing extra event: {res:?}");
}
// Check that we can open pid file first.
if let Ok(file) = File::open(&pid_path) {
if !postmaster_pid_seen {
debug!("postmaster.pid appeared");
watcher
.unwatch(pgdata)
.expect("Failed to remove pgdata dir watch");
watcher
.watch(&pid_path, RecursiveMode::NonRecursive)
.expect("Failed to add postmaster.pid file watch");
postmaster_pid_seen = true;
}
let file = BufReader::new(file);
let last_line = file.lines().last();
// Pid file could be there and we could read it, but it could be empty, for example.
if let Some(Ok(line)) = last_line {
let status = line.trim();
debug!("last line of postmaster.pid: {status:?}");
// Now Postgres is ready to accept connections
if status == "ready" {
break;
}
}
}
// Give up after POSTGRES_WAIT_TIMEOUT.
let duration = started_at.elapsed();
if duration >= POSTGRES_WAIT_TIMEOUT {
bail!("timed out while waiting for Postgres to start");
}
}
tracing::info!("PostgreSQL is now running, continuing to configure it");
Ok(())
}
/// Remove `pgdata` directory and create it again with right permissions.
pub fn create_pgdata(pgdata: &str) -> Result<()> {
// Ignore removal error, likely it is a 'No such file or directory (os error 2)'.
// If it is something different then create_dir() will error out anyway.
let _ok = fs::remove_dir_all(pgdata);
fs::create_dir(pgdata)?;
fs::set_permissions(pgdata, fs::Permissions::from_mode(0o700))?;
Ok(())
}

View File

@@ -1,520 +0,0 @@
use std::path::Path;
use std::str::FromStr;
use anyhow::{anyhow, bail, Result};
use postgres::config::Config;
use postgres::{Client, NoTls};
use tracing::{info, info_span, instrument, span_enabled, warn, Level};
use crate::config;
use crate::params::PG_HBA_ALL_MD5;
use crate::pg_helpers::*;
use compute_api::responses::ControlPlaneSpecResponse;
use compute_api::spec::{ComputeSpec, Database, PgIdent, Role};
/// Request spec from the control-plane by compute_id. If `NEON_CONSOLE_JWT`
/// env variable is set, it will be used for authorization.
pub fn get_spec_from_control_plane(base_uri: &str, compute_id: &str) -> Result<ComputeSpec> {
let cp_uri = format!("{base_uri}/management/api/v2/computes/{compute_id}/spec");
let jwt: String = match std::env::var("NEON_CONSOLE_JWT") {
Ok(v) => v,
Err(_) => "".to_string(),
};
info!("getting spec from control plane: {}", cp_uri);
// TODO: check the response. We should distinguish cases when it's
// - network error, then retry
// - no spec for compute yet, then wait
// - compute id is unknown or any other error, then bail out
let resp: ControlPlaneSpecResponse = reqwest::blocking::Client::new()
.get(cp_uri)
.header("Authorization", jwt)
.send()
.map_err(|e| anyhow!("could not send spec request to control plane: {}", e))?
.json()
.map_err(|e| anyhow!("could not get compute spec from control plane: {}", e))?;
if let Some(spec) = resp.spec {
Ok(spec)
} else {
bail!("could not get compute spec from control plane")
}
}
/// It takes cluster specification and does the following:
/// - Serialize cluster config and put it into `postgresql.conf` completely rewriting the file.
/// - Update `pg_hba.conf` to allow external connections.
pub fn handle_configuration(spec: &ComputeSpec, pgdata_path: &Path) -> Result<()> {
// File `postgresql.conf` is no longer included into `basebackup`, so just
// always write all config into it creating new file.
config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec)?;
update_pg_hba(pgdata_path)?;
Ok(())
}
/// Check `pg_hba.conf` and update if needed to allow external connections.
pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> {
// XXX: consider making it a part of spec.json
info!("checking pg_hba.conf");
let pghba_path = pgdata_path.join("pg_hba.conf");
if config::line_in_file(&pghba_path, PG_HBA_ALL_MD5)? {
info!("updated pg_hba.conf to allow external connections");
} else {
info!("pg_hba.conf is up-to-date");
}
Ok(())
}
/// Given a cluster spec json and open transaction it handles roles creation,
/// deletion and update.
#[instrument(skip_all)]
pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
let mut xact = client.transaction()?;
let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;
// Print a list of existing Postgres roles (only in debug mode)
if span_enabled!(Level::INFO) {
info!("postgres roles:");
for r in &existing_roles {
info!(
" - {}:{}",
r.name,
if r.encrypted_password.is_some() {
"[FILTERED]"
} else {
"(null)"
}
);
}
}
// Process delta operations first
if let Some(ops) = &spec.delta_operations {
info!("processing role renames");
for op in ops {
match op.action.as_ref() {
"delete_role" => {
// no-op now, roles will be deleted at the end of configuration
}
// Renaming role drops its password, since role name is
// used as a salt there. It is important that this role
// is recorded with a new `name` in the `roles` list.
// Follow up roles update will set the new password.
"rename_role" => {
let new_name = op.new_name.as_ref().unwrap();
// XXX: with a limited number of roles it is fine, but consider making it a HashMap
if existing_roles.iter().any(|r| r.name == op.name) {
let query: String = format!(
"ALTER ROLE {} RENAME TO {}",
op.name.pg_quote(),
new_name.pg_quote()
);
warn!("renaming role '{}' to '{}'", op.name, new_name);
xact.execute(query.as_str(), &[])?;
}
}
_ => {}
}
}
}
// Refresh Postgres roles info to handle possible roles renaming
let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;
info!("cluster spec roles:");
for role in &spec.cluster.roles {
let name = &role.name;
// XXX: with a limited number of roles it is fine, but consider making it a HashMap
let pg_role = existing_roles.iter().find(|r| r.name == *name);
enum RoleAction {
None,
Update,
Create,
}
let action = if let Some(r) = pg_role {
if (r.encrypted_password.is_none() && role.encrypted_password.is_some())
|| (r.encrypted_password.is_some() && role.encrypted_password.is_none())
{
RoleAction::Update
} else if let Some(pg_pwd) = &r.encrypted_password {
// Check whether password changed or not (trim 'md5' prefix first if any)
//
// This is a backward compatibility hack, which comes from the times when we were using
// md5 for everyone and hashes were stored in the console db without md5 prefix. So when
// role comes from the control-plane (json spec) `Role.encrypted_password` doesn't have md5 prefix,
// but when role comes from Postgres (`get_existing_roles` / `existing_roles`) it has this prefix.
// Here is the only place so far where we compare hashes, so it seems to be the best candidate
// to place this compatibility layer.
let pg_pwd = if let Some(stripped) = pg_pwd.strip_prefix("md5") {
stripped
} else {
pg_pwd
};
if pg_pwd != *role.encrypted_password.as_ref().unwrap() {
RoleAction::Update
} else {
RoleAction::None
}
} else {
RoleAction::None
}
} else {
RoleAction::Create
};
match action {
RoleAction::None => {}
RoleAction::Update => {
let mut query: String = format!("ALTER ROLE {} ", name.pg_quote());
query.push_str(&role.to_pg_options());
xact.execute(query.as_str(), &[])?;
}
RoleAction::Create => {
let mut query: String = format!("CREATE ROLE {} ", name.pg_quote());
info!("role create query: '{}'", &query);
query.push_str(&role.to_pg_options());
xact.execute(query.as_str(), &[])?;
let grant_query = format!(
"GRANT pg_read_all_data, pg_write_all_data TO {}",
name.pg_quote()
);
xact.execute(grant_query.as_str(), &[])?;
info!("role grant query: '{}'", &grant_query);
}
}
if span_enabled!(Level::INFO) {
let pwd = if role.encrypted_password.is_some() {
"[FILTERED]"
} else {
"(null)"
};
let action_str = match action {
RoleAction::None => "",
RoleAction::Create => " -> create",
RoleAction::Update => " -> update",
};
info!(" - {}:{}{}", name, pwd, action_str);
}
}
xact.commit()?;
Ok(())
}
/// Reassign all dependent objects and delete requested roles.
#[instrument(skip_all)]
pub fn handle_role_deletions(spec: &ComputeSpec, connstr: &str, client: &mut Client) -> Result<()> {
if let Some(ops) = &spec.delta_operations {
// First, reassign all dependent objects to db owners.
info!("reassigning dependent objects of to-be-deleted roles");
// Fetch existing roles. We could've exported and used `existing_roles` from
// `handle_roles()`, but we only make this list there before creating new roles.
// Which is probably fine as we never create to-be-deleted roles, but that'd
// just look a bit untidy. Anyway, the entire `pg_roles` should be in shared
// buffers already, so this shouldn't be a big deal.
let mut xact = client.transaction()?;
let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;
xact.commit()?;
for op in ops {
// Check that role is still present in Postgres, as this could be a
// restart with the same spec after role deletion.
if op.action == "delete_role" && existing_roles.iter().any(|r| r.name == op.name) {
reassign_owned_objects(spec, connstr, &op.name)?;
}
}
// Second, proceed with role deletions.
info!("processing role deletions");
let mut xact = client.transaction()?;
for op in ops {
// We do not check either role exists or not,
// Postgres will take care of it for us
if op.action == "delete_role" {
let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.pg_quote());
warn!("deleting role '{}'", &op.name);
xact.execute(query.as_str(), &[])?;
}
}
xact.commit()?;
}
Ok(())
}
// Reassign all owned objects in all databases to the owner of the database.
fn reassign_owned_objects(spec: &ComputeSpec, connstr: &str, role_name: &PgIdent) -> Result<()> {
for db in &spec.cluster.databases {
if db.owner != *role_name {
let mut conf = Config::from_str(connstr)?;
conf.dbname(&db.name);
let mut client = conf.connect(NoTls)?;
// This will reassign all dependent objects to the db owner
let reassign_query = format!(
"REASSIGN OWNED BY {} TO {}",
role_name.pg_quote(),
db.owner.pg_quote()
);
info!(
"reassigning objects owned by '{}' in db '{}' to '{}'",
role_name, &db.name, &db.owner
);
client.simple_query(&reassign_query)?;
// This now will only drop privileges of the role
let drop_query = format!("DROP OWNED BY {}", role_name.pg_quote());
client.simple_query(&drop_query)?;
}
}
Ok(())
}
/// It follows mostly the same logic as `handle_roles()` excepting that we
/// does not use an explicit transactions block, since major database operations
/// like `CREATE DATABASE` and `DROP DATABASE` do not support it. Statement-level
/// atomicity should be enough here due to the order of operations and various checks,
/// which together provide us idempotency.
#[instrument(skip_all)]
pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
let existing_dbs: Vec<Database> = get_existing_dbs(client)?;
// Print a list of existing Postgres databases (only in debug mode)
if span_enabled!(Level::INFO) {
info!("postgres databases:");
for r in &existing_dbs {
info!(" {}:{}", r.name, r.owner);
}
}
// Process delta operations first
if let Some(ops) = &spec.delta_operations {
info!("processing delta operations on databases");
for op in ops {
match op.action.as_ref() {
// We do not check either DB exists or not,
// Postgres will take care of it for us
"delete_db" => {
let query: String = format!("DROP DATABASE IF EXISTS {}", &op.name.pg_quote());
warn!("deleting database '{}'", &op.name);
client.execute(query.as_str(), &[])?;
}
"rename_db" => {
let new_name = op.new_name.as_ref().unwrap();
// XXX: with a limited number of roles it is fine, but consider making it a HashMap
if existing_dbs.iter().any(|r| r.name == op.name) {
let query: String = format!(
"ALTER DATABASE {} RENAME TO {}",
op.name.pg_quote(),
new_name.pg_quote()
);
warn!("renaming database '{}' to '{}'", op.name, new_name);
client.execute(query.as_str(), &[])?;
}
}
_ => {}
}
}
}
// Refresh Postgres databases info to handle possible renames
let existing_dbs: Vec<Database> = get_existing_dbs(client)?;
info!("cluster spec databases:");
for db in &spec.cluster.databases {
let name = &db.name;
// XXX: with a limited number of databases it is fine, but consider making it a HashMap
let pg_db = existing_dbs.iter().find(|r| r.name == *name);
enum DatabaseAction {
None,
Update,
Create,
}
let action = if let Some(r) = pg_db {
// XXX: db owner name is returned as quoted string from Postgres,
// when quoting is needed.
let new_owner = if r.owner.starts_with('"') {
db.owner.pg_quote()
} else {
db.owner.clone()
};
if new_owner != r.owner {
// Update the owner
DatabaseAction::Update
} else {
DatabaseAction::None
}
} else {
DatabaseAction::Create
};
match action {
DatabaseAction::None => {}
DatabaseAction::Update => {
let query: String = format!(
"ALTER DATABASE {} OWNER TO {}",
name.pg_quote(),
db.owner.pg_quote()
);
let _guard = info_span!("executing", query).entered();
client.execute(query.as_str(), &[])?;
}
DatabaseAction::Create => {
let mut query: String = format!("CREATE DATABASE {} ", name.pg_quote());
query.push_str(&db.to_pg_options());
let _guard = info_span!("executing", query).entered();
client.execute(query.as_str(), &[])?;
}
};
if span_enabled!(Level::INFO) {
let action_str = match action {
DatabaseAction::None => "",
DatabaseAction::Create => " -> create",
DatabaseAction::Update => " -> update",
};
info!(" - {}:{}{}", db.name, db.owner, action_str);
}
}
Ok(())
}
/// Grant CREATE ON DATABASE to the database owner and do some other alters and grants
/// to allow users creating trusted extensions and re-creating `public` schema, for example.
#[instrument(skip_all)]
pub fn handle_grants(spec: &ComputeSpec, connstr: &str, client: &mut Client) -> Result<()> {
info!("cluster spec grants:");
// We now have a separate `web_access` role to connect to the database
// via the web interface and proxy link auth. And also we grant a
// read / write all data privilege to every role. So also grant
// create to everyone.
// XXX: later we should stop messing with Postgres ACL in such horrible
// ways.
let roles = spec
.cluster
.roles
.iter()
.map(|r| r.name.pg_quote())
.collect::<Vec<_>>();
for db in &spec.cluster.databases {
let dbname = &db.name;
let query: String = format!(
"GRANT CREATE ON DATABASE {} TO {}",
dbname.pg_quote(),
roles.join(", ")
);
info!("grant query {}", &query);
client.execute(query.as_str(), &[])?;
}
// Do some per-database access adjustments. We'd better do this at db creation time,
// but CREATE DATABASE isn't transactional. So we cannot create db + do some grants
// atomically.
for db in &spec.cluster.databases {
let mut conf = Config::from_str(connstr)?;
conf.dbname(&db.name);
let mut db_client = conf.connect(NoTls)?;
// This will only change ownership on the schema itself, not the objects
// inside it. Without it owner of the `public` schema will be `cloud_admin`
// and database owner cannot do anything with it. SQL procedure ensures
// that it won't error out if schema `public` doesn't exist.
let alter_query = format!(
"DO $$\n\
DECLARE\n\
schema_owner TEXT;\n\
BEGIN\n\
IF EXISTS(\n\
SELECT nspname\n\
FROM pg_catalog.pg_namespace\n\
WHERE nspname = 'public'\n\
)\n\
THEN\n\
SELECT nspowner::regrole::text\n\
FROM pg_catalog.pg_namespace\n\
WHERE nspname = 'public'\n\
INTO schema_owner;\n\
\n\
IF schema_owner = 'cloud_admin' OR schema_owner = 'zenith_admin'\n\
THEN\n\
ALTER SCHEMA public OWNER TO {};\n\
END IF;\n\
END IF;\n\
END\n\
$$;",
db.owner.pg_quote()
);
db_client.simple_query(&alter_query)?;
// Explicitly grant CREATE ON SCHEMA PUBLIC to the web_access user.
// This is needed because since postgres 15 this privilege is removed by default.
let grant_query = "DO $$\n\
BEGIN\n\
IF EXISTS(\n\
SELECT nspname\n\
FROM pg_catalog.pg_namespace\n\
WHERE nspname = 'public'\n\
) AND\n\
current_setting('server_version_num')::int/10000 >= 15\n\
THEN\n\
IF EXISTS(\n\
SELECT rolname\n\
FROM pg_catalog.pg_roles\n\
WHERE rolname = 'web_access'\n\
)\n\
THEN\n\
GRANT CREATE ON SCHEMA public TO web_access;\n\
END IF;\n\
END IF;\n\
END\n\
$$;"
.to_string();
info!("grant query for db {} : {}", &db.name, &grant_query);
db_client.simple_query(&grant_query)?;
}
Ok(())
}
/// Create required system extensions
#[instrument(skip_all)]
pub fn handle_extensions(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") {
if libs.contains("pg_stat_statements") {
// Create extension only if this compute really needs it
let query = "CREATE EXTENSION IF NOT EXISTS pg_stat_statements";
info!("creating system extensions with query: {}", query);
client.simple_query(query)?;
}
}
Ok(())
}

View File

@@ -1,48 +0,0 @@
#[cfg(test)]
mod config_tests {
use std::fs::{remove_file, File};
use std::io::{Read, Write};
use std::path::Path;
use compute_tools::config::*;
fn write_test_file(path: &Path, content: &str) {
let mut file = File::create(path).unwrap();
file.write_all(content.as_bytes()).unwrap();
}
fn check_file_content(path: &Path, expected_content: &str) {
let mut file = File::open(path).unwrap();
let mut content = String::new();
file.read_to_string(&mut content).unwrap();
assert_eq!(content, expected_content);
}
#[test]
fn test_line_in_file() {
let path = Path::new("./tests/tmp/config_test.txt");
write_test_file(path, "line1\nline2.1\t line2.2\nline3");
let line = "line2.1\t line2.2";
let result = line_in_file(path, line).unwrap();
assert!(!result);
check_file_content(path, "line1\nline2.1\t line2.2\nline3");
let line = "line4";
let result = line_in_file(path, line).unwrap();
assert!(result);
check_file_content(path, "line1\nline2.1\t line2.2\nline3\nline4");
remove_file(path).unwrap();
let path = Path::new("./tests/tmp/new_config_test.txt");
let line = "line4";
let result = line_in_file(path, line).unwrap();
assert!(result);
check_file_content(path, "line4");
remove_file(path).unwrap();
}
}

Some files were not shown because too many files have changed in this diff Show More