Compare commits

..

10 Commits

Author SHA1 Message Date
Konstantin Knizhnik
458ca82d75 Bump postgres version 2021-09-27 12:08:55 +03:00
Konstantin Knizhnik
0dface838d Bump postgres version 2021-09-24 19:21:16 +03:00
Konstantin Knizhnik
f7878c5157 Bump postgres version 2021-09-24 19:10:18 +03:00
Konstantin Knizhnik
53c870b8e7 Store file cache outside pgdata dir 2021-09-24 19:07:30 +03:00
Konstantin Knizhnik
c23b65914e Sete zenith.file_cache_size parameter 2021-09-24 18:19:41 +03:00
Konstantin Knizhnik
b51d3f6b2b Revert "Save page received from page server in local file cache"
This reverts commit 137472db91.
2021-09-24 18:04:21 +03:00
Konstantin Knizhnik
137472db91 Save page received from page server in local file cache 2021-09-24 15:15:54 +03:00
Konstantin Knizhnik
f817985f2b Do not throw error on attempt to drop unexisted relation 2021-09-23 12:11:06 +03:00
Konstantin Knizhnik
2e94e1428e Remove import_timeline_wal function 2021-09-23 12:11:06 +03:00
Konstantin Knizhnik
9a486ca109 Do not write WAL at pageserver 2021-09-23 12:11:06 +03:00
586 changed files with 21383 additions and 105174 deletions

View File

@@ -1,16 +0,0 @@
# The binaries are really slow, if you compile them in 'dev' mode with the defaults.
# Enable some optimizations even in 'dev' mode, to make tests faster. The basic
# optimizations enabled by "opt-level=1" don't affect debuggability too much.
#
# See https://www.reddit.com/r/rust/comments/gvrgca/this_is_a_neat_trick_for_getting_good_runtime/
#
[profile.dev.package."*"]
# Set the default for dependencies in Development mode.
opt-level = 3
[profile.dev]
# Turn on a small amount of optimization in Development mode.
opt-level = 1
[alias]
build_testing = ["build", "--features", "testing"]

384
.circleci/config.yml Normal file
View File

@@ -0,0 +1,384 @@
version: 2.1
orbs:
python: circleci/python@1.4.0
executors:
zenith-build-executor:
resource_class: xlarge
docker:
- image: cimg/rust:1.52.1
jobs:
check-codestyle:
executor: zenith-build-executor
steps:
- checkout
- run:
name: rustfmt
when: always
command: |
cargo fmt --all -- --check
# A job to build postgres
build-postgres:
executor: zenith-build-executor
steps:
# Checkout the git repo (circleci doesn't have a flag to enable submodules here)
- checkout
# Grab the postgres git revision to build a cache key.
# Note this works even though the submodule hasn't been checkout out yet.
- run:
name: Get postgres cache key
command: |
git rev-parse HEAD:vendor/postgres > /tmp/cache-key-postgres
- restore_cache:
name: Restore postgres cache
keys:
# Restore ONLY if the rev key matches exactly
- v03-postgres-cache-{{ checksum "/tmp/cache-key-postgres" }}
# FIXME We could cache our own docker container, instead of installing packages every time.
- run:
name: apt install dependencies
command: |
if [ ! -e tmp_install/bin/postgres ]; then
sudo apt update
sudo apt install build-essential libreadline-dev zlib1g-dev flex bison libseccomp-dev
fi
# Build postgres if the restore_cache didn't find a build.
# `make` can't figure out whether the cache is valid, since
# it only compares file timestamps.
- run:
name: build postgres
command: |
if [ ! -e tmp_install/bin/postgres ]; then
# "depth 1" saves some time by not cloning the whole repo
git submodule update --init --depth 1
make postgres
fi
- save_cache:
name: Save postgres cache
key: v03-postgres-cache-{{ checksum "/tmp/cache-key-postgres" }}
paths:
- tmp_install
# A job to build zenith rust code
build-zenith:
executor: zenith-build-executor
parameters:
build_type:
type: enum
enum: ["debug", "release"]
steps:
- run:
name: apt install dependencies
command: |
sudo apt update
sudo apt install libssl-dev clang
# Checkout the git repo (without submodules)
- checkout
# Grab the postgres git revision to build a cache key.
# Note this works even though the submodule hasn't been checkout out yet.
- run:
name: Get postgres cache key
command: |
git rev-parse HEAD:vendor/postgres > /tmp/cache-key-postgres
- restore_cache:
name: Restore postgres cache
keys:
# Restore ONLY if the rev key matches exactly
- v03-postgres-cache-{{ checksum "/tmp/cache-key-postgres" }}
- restore_cache:
name: Restore rust cache
keys:
# Require an exact match. While an out of date cache might speed up the build,
# there's no way to clean out old packages, so the cache grows every time something
# changes.
- v03-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
# Build the rust code, including test binaries
- run:
name: Rust build << parameters.build_type >>
command: |
export CARGO_INCREMENTAL=0
BUILD_TYPE="<< parameters.build_type >>"
if [[ $BUILD_TYPE == "debug" ]]; then
echo "Build in debug mode"
cargo build --bins --tests
elif [[ $BUILD_TYPE == "release" ]]; then
echo "Build in release mode"
cargo build --release --bins --tests
fi
- save_cache:
name: Save rust cache
key: v03-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
paths:
- ~/.cargo/registry
- ~/.cargo/git
- target
# Run style checks
# has to run separately from cargo fmt section
# since needs to run with dependencies
- run:
name: clippy
command: |
./run_clippy.sh
# Run rust unit tests
- run: cargo test
# Install the rust binaries, for use by test jobs
# `--locked` is required; otherwise, `cargo install` will ignore Cargo.lock.
# FIXME: this is a really silly way to install; maybe we should just output
# a tarball as an artifact? Or a .deb package?
- run:
name: cargo install
command: |
export CARGO_INCREMENTAL=0
BUILD_TYPE="<< parameters.build_type >>"
if [[ $BUILD_TYPE == "debug" ]]; then
echo "Install debug mode"
CARGO_FLAGS="--debug"
elif [[ $BUILD_TYPE == "release" ]]; then
echo "Install release mode"
# The default is release mode; there is no --release flag.
CARGO_FLAGS=""
fi
cargo install $CARGO_FLAGS --locked --root /tmp/zenith --path pageserver
cargo install $CARGO_FLAGS --locked --root /tmp/zenith --path walkeeper
cargo install $CARGO_FLAGS --locked --root /tmp/zenith --path zenith
# Install the postgres binaries, for use by test jobs
# FIXME: this is a silly way to do "install"; maybe just output a standard
# postgres package, whatever the favored form is (tarball? .deb package?)
# Note that pg_regress needs some build artifacts that probably aren't
# in the usual package...?
- run:
name: postgres install
command: |
cp -a tmp_install /tmp/zenith/pg_install
# Save the rust output binaries for other jobs in this workflow.
- persist_to_workspace:
root: /tmp/zenith
paths:
- "*"
run-pytest:
#description: "Run pytest"
executor: python/default
parameters:
# pytest args to specify the tests to run.
#
# This can be a test file name, e.g. 'test_pgbench.py, or a subdirectory,
# or '-k foobar' to run tests containing string 'foobar'. See pytest man page
# section SPECIFYING TESTS / SELECTING TESTS for details.
#
# Select the type of Rust build. Must be "release" or "debug".
build_type:
type: string
default: "debug"
# This parameter is required, to prevent the mistake of running all tests in one job.
test_selection:
type: string
default: ""
# Arbitrary parameters to pytest. For example "-s" to prevent capturing stdout/stderr
extra_params:
type: string
default: ""
needs_postgres_source:
type: boolean
default: false
run_in_parallel:
type: boolean
default: true
steps:
- attach_workspace:
at: /tmp/zenith
- checkout
- when:
condition: << parameters.needs_postgres_source >>
steps:
- run: git submodule update --init --depth 1
- run:
name: Install pipenv & deps
working_directory: test_runner
command: |
pip install pipenv
pipenv install
- run:
name: Run pytest
working_directory: test_runner
environment:
- ZENITH_BIN: /tmp/zenith/bin
- POSTGRES_DISTRIB_DIR: /tmp/zenith/pg_install
- TEST_OUTPUT: /tmp/test_output
command: |
TEST_SELECTION="<< parameters.test_selection >>"
EXTRA_PARAMS="<< parameters.extra_params >>"
if [ -z "$TEST_SELECTION" ]; then
echo "test_selection must be set"
exit 1
fi
if << parameters.run_in_parallel >>; then
EXTRA_PARAMS="-n4 $EXTRA_PARAMS"
fi;
# Run the tests.
#
# The junit.xml file allows CircleCI to display more fine-grained test information
# in its "Tests" tab in the results page.
# -s prevents pytest from capturing output, which helps to see
# what's going on if the test hangs
# --verbose prints name of each test (helpful when there are
# multiple tests in one file)
# -rA prints summary in the end
# -n4 uses four processes to run tests via pytest-xdist
pipenv run pytest --junitxml=$TEST_OUTPUT/junit.xml --tb=short -s --verbose -rA $TEST_SELECTION $EXTRA_PARAMS
- run:
# CircleCI artifacts are preserved one file at a time, so skipping
# this step isn't a good idea. If you want to extract the
# pageserver state, perhaps a tarball would be a better idea.
name: Delete all data but logs
when: always
command: |
du -sh /tmp/test_output/*
find /tmp/test_output -type f ! -name "pg.log" ! -name "pageserver.log" ! -name "wal_acceptor.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" -delete
du -sh /tmp/test_output/*
- store_artifacts:
path: /tmp/test_output
# The store_test_results step tells CircleCI where to find the junit.xml file.
- store_test_results:
path: /tmp/test_output
# Build zenithdb/zenith:latest image and push it to Docker hub
docker-image:
docker:
- image: cimg/base:2021.04
steps:
- checkout
- setup_remote_docker:
docker_layer_caching: true
- run:
name: Init postgres submodule
command: git submodule update --init --depth 1
- run:
name: Build and push Docker image
command: |
echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
docker build -t zenithdb/zenith:latest . && docker push zenithdb/zenith:latest
# Trigger a new remote CI job
remote-ci-trigger:
docker:
- image: cimg/base:2021.04
parameters:
remote_repo:
type: string
environment:
REMOTE_REPO: << parameters.remote_repo >>
steps:
- run:
name: Set PR's status to pending
command: |
LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
curl -f -X POST \
https://api.github.com/repos/$LOCAL_REPO/statuses/$CIRCLE_SHA1 \
-H "Accept: application/vnd.github.v3+json" \
--user "$CI_ACCESS_TOKEN" \
--data \
"{
\"state\": \"pending\",
\"context\": \"zenith-remote-ci\",
\"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
}"
- run:
name: Request a remote CI test
command: |
LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
curl -f -X POST \
https://api.github.com/repos/$REMOTE_REPO/actions/workflows/testing.yml/dispatches \
-H "Accept: application/vnd.github.v3+json" \
--user "$CI_ACCESS_TOKEN" \
--data \
"{
\"ref\": \"main\",
\"inputs\": {
\"ci_job_name\": \"zenith-remote-ci\",
\"commit_hash\": \"$CIRCLE_SHA1\",
\"remote_repo\": \"$LOCAL_REPO\",
\"zenith_image_branch\": \"$CIRCLE_BRANCH\"
}
}"
workflows:
build_and_test:
jobs:
- check-codestyle
- build-postgres
- build-zenith:
name: build-zenith-<< matrix.build_type >>
matrix:
parameters:
build_type: ["debug", "release"]
requires:
- build-postgres
- run-pytest:
name: pg_regress-tests-<< matrix.build_type >>
matrix:
parameters:
build_type: ["debug", "release"]
test_selection: batch_pg_regress
needs_postgres_source: true
requires:
- build-zenith-<< matrix.build_type >>
- run-pytest:
name: other-tests-<< matrix.build_type >>
matrix:
parameters:
build_type: ["debug", "release"]
test_selection: batch_others
requires:
- build-zenith-<< matrix.build_type >>
- run-pytest:
name: benchmarks
build_type: release
test_selection: performance
run_in_parallel: false
requires:
- build-zenith-release
- docker-image:
# Context gives an ability to login
context: Docker Hub
# Build image only for commits to main
filters:
branches:
only:
- main
requires:
- pg_regress-tests-release
- other-tests-release
- remote-ci-trigger:
# Context passes credentials for gh api
context: CI_ACCESS_TOKEN
remote_repo: "zenithdb/console"
requires:
# XXX: Successful build doesn't mean everything is OK, but
# the job to be triggered takes so much time to complete (~22 min)
# that it's better not to wait for the commented-out steps
- build-zenith-debug
# - pg_regress-tests-release
# - other-tests-release

View File

@@ -1,26 +0,0 @@
# This file contains settings for `cargo hakari`.
# See https://docs.rs/cargo-hakari/latest/cargo_hakari/config for a full list of options.
hakari-package = "workspace_hack"
# Format for `workspace-hack = ...` lines in other Cargo.tomls. Requires cargo-hakari 0.9.8 or above.
dep-format-version = "2"
# Setting workspace.resolver = "2" in the root Cargo.toml is HIGHLY recommended.
# Hakari works much better with the new feature resolver.
# For more about the new feature resolver, see:
# https://blog.rust-lang.org/2021/03/25/Rust-1.51.0.html#cargos-new-feature-resolver
# Have to keep the resolver still here since hakari requires this field,
# despite it's now the default for 2021 edition & cargo.
resolver = "2"
# Add triples corresponding to platforms commonly used by developers here.
# https://doc.rust-lang.org/rustc/platform-support.html
platforms = [
# "x86_64-unknown-linux-gnu",
# "x86_64-apple-darwin",
# "x86_64-pc-windows-msvc",
]
# Write out exact versions rather than a semver range. (Defaults to false.)
# exact-versions = true

View File

@@ -1,21 +1,13 @@
*
**/.git/
**/__pycache__
**/.pytest_cache
!rust-toolchain.toml
!Cargo.toml
!Cargo.lock
!Makefile
!.cargo/
!.config/
!control_plane/
!compute_tools/
!libs/
!pageserver/
!pgxn/
!proxy/
!safekeeper/
!vendor/postgres-v14/
!vendor/postgres-v15/
!workspace_hack/
!neon_local/
!scripts/ninstall.sh
/target
/tmp_check
/tmp_install
/tmp_check_cli
/test_output
/.vscode
/.zenith
/integration_tests/.zenith
/Dockerfile

View File

@@ -1 +0,0 @@
4c2bb43775947775401cbb9d774823c5723a91f8

View File

@@ -1,23 +0,0 @@
---
name: Bug Template
about: Used for describing bugs
title: ''
labels: t/bug
assignees: ''
---
## Steps to reproduce
## Expected result
## Actual result
## Environment
## Logs, links
-

View File

@@ -1,25 +0,0 @@
---
name: Epic Template
about: A set of related tasks contributing towards specific outcome, comprising of
more than 1 week of work.
title: 'Epic: '
labels: t/Epic
assignees: ''
---
## Motivation
## DoD
## Implementation ideas
## Tasks
- [ ]
## Other related tasks and Epics
-

View File

@@ -1,20 +0,0 @@
## Release 202Y-MM-DD
**NB: this PR must be merged only by 'Create a merge commit'!**
### Checklist when preparing for release
- [ ] Read or refresh [the release flow guide](https://github.com/neondatabase/cloud/wiki/Release:-general-flow)
- [ ] Ask in the [cloud Slack channel](https://neondb.slack.com/archives/C033A2WE6BZ) that you are going to rollout the release. Any blockers?
- [ ] Does this release contain any db migrations? Destructive ones? What is the rollback plan?
<!-- List everything that should be done **before** release, any issues / setting changes / etc -->
### Checklist after release
- [ ] Based on the merged commits write release notes and open a PR into `website` repo ([example](https://github.com/neondatabase/website/pull/219/files))
- [ ] Check [#dev-production-stream](https://neondb.slack.com/archives/C03F5SM1N02) Slack channel
- [ ] Check [stuck projects page](https://console.neon.tech/admin/projects?sort=last_active&order=desc&stuck=true)
- [ ] Check [recent operation failures](https://console.neon.tech/admin/operations?action=create_timeline%2Cstart_compute%2Cstop_compute%2Csuspend_compute%2Capply_config%2Cdelete_timeline%2Cdelete_tenant%2Ccreate_branch%2Ccheck_availability&sort=updated_at&order=desc&had_retries=some)
- [ ] Check [cloud SLO dashboard](https://observer.zenith.tech/d/_oWcBMJ7k/cloud-slos?orgId=1)
- [ ] Check [compute startup metrics dashboard](https://observer.zenith.tech/d/5OkYJEmVz/compute-startup-time)
<!-- List everything that should be done **after** release, any admin UI configuration / Grafana dashboard / alert changes / setting changes / etc -->

View File

@@ -1,221 +0,0 @@
name: 'Create Allure report'
description: 'Create and publish Allure report'
inputs:
action:
desctiption: 'generate or store'
required: true
build_type:
description: '`build_type` from run-python-test-set action'
required: true
test_selection:
description: '`test_selector` from run-python-test-set action'
required: false
outputs:
report-url:
description: 'Allure report URL'
value: ${{ steps.generate-report.outputs.report-url }}
runs:
using: "composite"
steps:
- name: Validate input parameters
shell: bash -euxo pipefail {0}
run: |
if [ "${{ inputs.action }}" != "store" ] && [ "${{ inputs.action }}" != "generate" ]; then
echo 2>&1 "Unknown inputs.action type '${{ inputs.action }}'; allowed 'generate' or 'store' only"
exit 1
fi
if [ -z "${{ inputs.test_selection }}" ] && [ "${{ inputs.action }}" == "store" ]; then
echo 2>&1 "inputs.test_selection must be set for 'store' action"
exit 2
fi
- name: Calculate key
id: calculate-key
shell: bash -euxo pipefail {0}
run: |
# TODO: for manually triggered workflows (via workflow_dispatch) we need to have a separate key
pr_number=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true)
if [ "${pr_number}" != "null" ]; then
key=pr-${pr_number}
elif [ "${GITHUB_REF}" = "refs/heads/main" ]; then
# Shortcut for a special branch
key=main
else
key=branch-$(echo ${GITHUB_REF#refs/heads/} | tr -c "[:alnum:]._-" "-")
fi
echo "KEY=${key}" >> $GITHUB_OUTPUT
- uses: actions/setup-java@v3
if: ${{ inputs.action == 'generate' }}
with:
distribution: 'temurin'
java-version: '17'
- name: Install Allure
if: ${{ inputs.action == 'generate' }}
shell: bash -euxo pipefail {0}
run: |
if ! which allure; then
ALLURE_ZIP=allure-${ALLURE_VERSION}.zip
wget -q https://github.com/allure-framework/allure2/releases/download/${ALLURE_VERSION}/${ALLURE_ZIP}
echo "${ALLURE_ZIP_MD5} ${ALLURE_ZIP}" | md5sum -c
unzip -q ${ALLURE_ZIP}
echo "$(pwd)/allure-${ALLURE_VERSION}/bin" >> $GITHUB_PATH
rm -f ${ALLURE_ZIP}
fi
env:
ALLURE_VERSION: 2.19.0
ALLURE_ZIP_MD5: ced21401a1a8b9dfb68cee9e4c210464
- name: Upload Allure results
if: ${{ inputs.action == 'store' }}
env:
REPORT_PREFIX: reports/${{ steps.calculate-key.outputs.KEY }}/${{ inputs.build_type }}
RAW_PREFIX: reports-raw/${{ steps.calculate-key.outputs.KEY }}/${{ inputs.build_type }}
TEST_OUTPUT: /tmp/test_output
BUCKET: neon-github-public-dev
shell: bash -euxo pipefail {0}
run: |
# Add metadata
cat <<EOF > $TEST_OUTPUT/allure/results/executor.json
{
"name": "GitHub Actions",
"type": "github",
"url": "https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/latest/index.html",
"buildOrder": ${GITHUB_RUN_ID},
"buildName": "GitHub Actions Run #${{ github.run_number }}/${GITHUB_RUN_ATTEMPT}",
"buildUrl": "${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/attempts/${GITHUB_RUN_ATTEMPT}",
"reportUrl": "https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html",
"reportName": "Allure Report"
}
EOF
cat <<EOF > $TEST_OUTPUT/allure/results/environment.properties
TEST_SELECTION=${{ inputs.test_selection }}
BUILD_TYPE=${{ inputs.build_type }}
EOF
ARCHIVE="${GITHUB_RUN_ID}-${{ inputs.test_selection }}-${GITHUB_RUN_ATTEMPT}-$(date +%s).tar.zst"
ZSTD_NBTHREADS=0
tar -C ${TEST_OUTPUT}/allure/results -cf ${ARCHIVE} --zstd .
aws s3 mv --only-show-errors ${ARCHIVE} "s3://${BUCKET}/${RAW_PREFIX}/${ARCHIVE}"
# Potentially we could have several running build for the same key (for example for the main branch), so we use improvised lock for this
- name: Acquire Allure lock
if: ${{ inputs.action == 'generate' }}
shell: bash -euxo pipefail {0}
env:
LOCK_FILE: reports/${{ steps.calculate-key.outputs.KEY }}/lock.txt
BUCKET: neon-github-public-dev
run: |
LOCK_TIMEOUT=300 # seconds
for _ in $(seq 1 5); do
for i in $(seq 1 ${LOCK_TIMEOUT}); do
LOCK_ADDED=$(aws s3api head-object --bucket neon-github-public-dev --key ${LOCK_FILE} | jq --raw-output '.LastModified' || true)
# `date --date="..."` is supported only by gnu date (i.e. it doesn't work on BSD/macOS)
if [ -z "${LOCK_ADDED}" ] || [ "$(( $(date +%s) - $(date --date="${LOCK_ADDED}" +%s) ))" -gt "${LOCK_TIMEOUT}" ]; then
break
fi
sleep 1
done
echo "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${{ inputs.test_selection }}" > lock.txt
aws s3 mv --only-show-errors lock.txt "s3://${BUCKET}/${LOCK_FILE}"
# A double-check that exactly WE have acquired the lock
aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt
if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${{ inputs.test_selection }}" ]; then
break
fi
done
- name: Generate and publish final Allure report
if: ${{ inputs.action == 'generate' }}
id: generate-report
env:
REPORT_PREFIX: reports/${{ steps.calculate-key.outputs.KEY }}/${{ inputs.build_type }}
RAW_PREFIX: reports-raw/${{ steps.calculate-key.outputs.KEY }}/${{ inputs.build_type }}
TEST_OUTPUT: /tmp/test_output
BUCKET: neon-github-public-dev
shell: bash -euxo pipefail {0}
run: |
# Get previously uploaded data for this run
ZSTD_NBTHREADS=0
s3_filepaths=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${RAW_PREFIX}/${GITHUB_RUN_ID}- | jq --raw-output '.Contents[].Key')
if [ -z "$s3_filepaths" ]; then
# There's no previously uploaded data for this run
exit 0
fi
for s3_filepath in ${s3_filepaths}; do
aws s3 cp --only-show-errors "s3://${BUCKET}/${s3_filepath}" "${TEST_OUTPUT}/allure/"
archive=${TEST_OUTPUT}/allure/$(basename $s3_filepath)
mkdir -p ${archive%.tar.zst}
tar -xf ${archive} -C ${archive%.tar.zst}
rm -f ${archive}
done
# Get history trend
aws s3 cp --recursive --only-show-errors "s3://${BUCKET}/${REPORT_PREFIX}/latest/history" "${TEST_OUTPUT}/allure/latest/history" || true
# Generate report
allure generate --clean --output $TEST_OUTPUT/allure/report $TEST_OUTPUT/allure/*
# Replace a logo link with a redirect to the latest version of the report
sed -i 's|<a href="." class=|<a href="https://'${BUCKET}'.s3.amazonaws.com/'${REPORT_PREFIX}'/latest/index.html" class=|g' $TEST_OUTPUT/allure/report/app.js
# Upload a history and the final report (in this particular order to not to have duplicated history in 2 places)
aws s3 mv --recursive --only-show-errors "${TEST_OUTPUT}/allure/report/history" "s3://${BUCKET}/${REPORT_PREFIX}/latest/history"
aws s3 mv --recursive --only-show-errors "${TEST_OUTPUT}/allure/report" "s3://${BUCKET}/${REPORT_PREFIX}/${GITHUB_RUN_ID}"
REPORT_URL=https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html
# Generate redirect
cat <<EOF > ./index.html
<!DOCTYPE html>
<meta charset="utf-8">
<title>Redirecting to ${REPORT_URL}</title>
<meta http-equiv="refresh" content="0; URL=${REPORT_URL}">
EOF
aws s3 cp --only-show-errors ./index.html "s3://${BUCKET}/${REPORT_PREFIX}/latest/index.html"
echo "[Allure Report](${REPORT_URL})" >> ${GITHUB_STEP_SUMMARY}
echo "report-url=${REPORT_URL}" >> $GITHUB_OUTPUT
- name: Release Allure lock
if: ${{ inputs.action == 'generate' && always() }}
shell: bash -euxo pipefail {0}
env:
LOCK_FILE: reports/${{ steps.calculate-key.outputs.KEY }}/lock.txt
BUCKET: neon-github-public-dev
run: |
aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt || exit 0
if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${{ inputs.test_selection }}" ]; then
aws s3 rm "s3://${BUCKET}/${LOCK_FILE}"
fi
- uses: actions/github-script@v6
if: ${{ inputs.action == 'generate' && always() }}
env:
REPORT_URL: ${{ steps.generate-report.outputs.report-url }}
BUILD_TYPE: ${{ inputs.build_type }}
SHA: ${{ github.event.pull_request.head.sha || github.sha }}
with:
script: |
const { REPORT_URL, BUILD_TYPE, SHA } = process.env
await github.rest.repos.createCommitStatus({
owner: context.repo.owner,
repo: context.repo.repo,
sha: `${SHA}`,
state: 'success',
target_url: `${REPORT_URL}`,
context: `Allure report / ${BUILD_TYPE}`,
})

View File

@@ -1,59 +0,0 @@
name: "Download an artifact"
description: "Custom download action"
inputs:
name:
description: "Artifact name"
required: true
path:
description: "A directory to put artifact into"
default: "."
required: false
skip-if-does-not-exist:
description: "Allow to skip if file doesn't exist, fail otherwise"
default: false
required: false
prefix:
description: "S3 prefix. Default is '${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
required: false
runs:
using: "composite"
steps:
- name: Download artifact
id: download-artifact
shell: bash -euxo pipefail {0}
env:
TARGET: ${{ inputs.path }}
ARCHIVE: /tmp/downloads/${{ inputs.name }}.tar.zst
SKIP_IF_DOES_NOT_EXIST: ${{ inputs.skip-if-does-not-exist }}
PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}', github.run_id, github.run_attempt) }}
run: |
BUCKET=neon-github-public-dev
FILENAME=$(basename $ARCHIVE)
S3_KEY=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${PREFIX%$GITHUB_RUN_ATTEMPT} | jq -r '.Contents[].Key' | grep ${FILENAME} | sort --version-sort | tail -1 || true)
if [ -z "${S3_KEY}" ]; then
if [ "${SKIP_IF_DOES_NOT_EXIST}" = "true" ]; then
echo 'SKIPPED=true' >> $GITHUB_OUTPUT
exit 0
else
echo 2>&1 "Neither s3://${BUCKET}/${PREFIX}/${FILENAME} nor its version from previous attempts exist"
exit 1
fi
fi
echo 'SKIPPED=false' >> $GITHUB_OUTPUT
mkdir -p $(dirname $ARCHIVE)
time aws s3 cp --only-show-errors s3://${BUCKET}/${S3_KEY} ${ARCHIVE}
- name: Extract artifact
if: ${{ steps.download-artifact.outputs.SKIPPED == 'false' }}
shell: bash -euxo pipefail {0}
env:
TARGET: ${{ inputs.path }}
ARCHIVE: /tmp/downloads/${{ inputs.name }}.tar.zst
run: |
mkdir -p ${TARGET}
time tar -xf ${ARCHIVE} -C ${TARGET}
rm -f ${ARCHIVE}

View File

@@ -1,82 +0,0 @@
name: 'Create Neon Project'
description: 'Create Neon Project using API'
inputs:
api_key:
desctiption: 'Neon API key'
required: true
environment:
desctiption: 'dev (aka captest) or stage'
required: true
region_id:
desctiption: 'Region ID, if not set the project will be created in the default region'
required: false
outputs:
dsn:
description: 'Created Project DSN (for main database)'
value: ${{ steps.create-neon-project.outputs.dsn }}
project_id:
description: 'Created Project ID'
value: ${{ steps.create-neon-project.outputs.project_id }}
runs:
using: "composite"
steps:
- name: Parse Input
id: parse-input
shell: bash -euxo pipefail {0}
run: |
case "${ENVIRONMENT}" in
dev)
API_HOST=console.dev.neon.tech
REGION_ID=${REGION_ID:-eu-west-1}
;;
staging)
API_HOST=console.stage.neon.tech
REGION_ID=${REGION_ID:-us-east-1}
;;
*)
echo 2>&1 "Unknown environment=${ENVIRONMENT}. Allowed 'dev' or 'staging' only"
exit 1
;;
esac
echo "api_host=${API_HOST}" >> $GITHUB_OUTPUT
echo "region_id=${REGION_ID}" >> $GITHUB_OUTPUT
env:
ENVIRONMENT: ${{ inputs.environment }}
REGION_ID: ${{ inputs.region_id }}
- name: Create Neon Project
id: create-neon-project
# A shell without `set -x` to not to expose password/dsn in logs
shell: bash -euo pipefail {0}
run: |
project=$(curl \
"https://${API_HOST}/api/v1/projects" \
--fail \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}" \
--data "{
\"project\": {
\"name\": \"Created by actions/neon-project-create; GITHUB_RUN_ID=${GITHUB_RUN_ID}\",
\"platform_id\": \"aws\",
\"region_id\": \"${REGION_ID}\",
\"settings\": { }
}
}")
# Mask password
echo "::add-mask::$(echo $project | jq --raw-output '.roles[] | select(.name != "web_access") | .password')"
dsn=$(echo $project | jq --raw-output '.roles[] | select(.name != "web_access") | .dsn')/main
echo "::add-mask::${dsn}"
echo "dsn=${dsn}" >> $GITHUB_OUTPUT
project_id=$(echo $project | jq --raw-output '.id')
echo "project_id=${project_id}" >> $GITHUB_OUTPUT
env:
API_KEY: ${{ inputs.api_key }}
API_HOST: ${{ steps.parse-input.outputs.api_host }}
REGION_ID: ${{ steps.parse-input.outputs.region_id }}

View File

@@ -1,54 +0,0 @@
name: 'Delete Neon Project'
description: 'Delete Neon Project using API'
inputs:
api_key:
desctiption: 'Neon API key'
required: true
environment:
desctiption: 'dev (aka captest) or stage'
required: true
project_id:
desctiption: 'ID of the Project to delete'
required: true
runs:
using: "composite"
steps:
- name: Parse Input
id: parse-input
shell: bash -euxo pipefail {0}
run: |
case "${ENVIRONMENT}" in
dev)
API_HOST=console.dev.neon.tech
;;
staging)
API_HOST=console.stage.neon.tech
;;
*)
echo 2>&1 "Unknown environment=${ENVIRONMENT}. Allowed 'dev' or 'staging' only"
exit 1
;;
esac
echo "api_host=${API_HOST}" >> $GITHUB_OUTPUT
env:
ENVIRONMENT: ${{ inputs.environment }}
- name: Delete Neon Project
shell: bash -euxo pipefail {0}
run: |
# Allow PROJECT_ID to be empty/null for cases when .github/actions/neon-project-create failed
if [ -n "${PROJECT_ID}" ]; then
curl -X "POST" \
"https://${API_HOST}/api/v1/projects/${PROJECT_ID}/delete" \
--fail \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}"
fi
env:
API_KEY: ${{ inputs.api_key }}
PROJECT_ID: ${{ inputs.project_id }}
API_HOST: ${{ steps.parse-input.outputs.api_host }}

View File

@@ -1,181 +0,0 @@
name: 'Run python test'
description: 'Runs a Neon python test set, performing all the required preparations before'
inputs:
build_type:
description: 'Type of Rust (neon) and C (postgres) builds. Must be "release" or "debug", or "remote" for the remote cluster'
required: true
test_selection:
description: 'A python test suite to run'
required: true
extra_params:
description: 'Arbitrary parameters to pytest. For example "-s" to prevent capturing stdout/stderr'
required: false
default: ''
needs_postgres_source:
description: 'Set to true if the test suite requires postgres source checked out'
required: false
default: 'false'
run_in_parallel:
description: 'Whether to run tests in parallel'
required: false
default: 'true'
save_perf_report:
description: 'Whether to upload the performance report, if true PERF_TEST_RESULT_CONNSTR env variable should be set'
required: false
default: 'false'
run_with_real_s3:
description: 'Whether to pass real s3 credentials to the test suite'
required: false
default: 'false'
real_s3_bucket:
description: 'Bucket name for real s3 tests'
required: false
default: ''
real_s3_region:
description: 'Region name for real s3 tests'
required: false
default: ''
real_s3_access_key_id:
description: 'Access key id'
required: false
default: ''
real_s3_secret_access_key:
description: 'Secret access key'
required: false
default: ''
runs:
using: "composite"
steps:
- name: Get Neon artifact
if: inputs.build_type != 'remote'
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ inputs.build_type }}-artifact
path: /tmp/neon
- name: Checkout
if: inputs.needs_postgres_source == 'true'
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 1
- name: Cache poetry deps
id: cache_poetry
uses: actions/cache@v3
with:
path: ~/.cache/pypoetry/virtualenvs
key: v1-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
- name: Install Python deps
shell: bash -euxo pipefail {0}
run: ./scripts/pysync
- name: Download compatibility snapshot for Postgres 14
uses: ./.github/actions/download
with:
name: compatibility-snapshot-${{ inputs.build_type }}-pg14
path: /tmp/compatibility_snapshot_pg14
prefix: latest
- name: Run pytest
env:
NEON_BIN: /tmp/neon/bin
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: ${{ inputs.build_type }}
AWS_ACCESS_KEY_ID: ${{ inputs.real_s3_access_key_id }}
AWS_SECRET_ACCESS_KEY: ${{ inputs.real_s3_secret_access_key }}
COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg14
ALLOW_BREAKING_CHANGES: contains(github.event.pull_request.labels.*.name, 'breaking changes')
shell: bash -euxo pipefail {0}
run: |
# PLATFORM will be embedded in the perf test report
# and it is needed to distinguish different environments
export PLATFORM=${PLATFORM:-github-actions-selfhosted}
export POSTGRES_DISTRIB_DIR=${POSTGRES_DISTRIB_DIR:-/tmp/neon/pg_install}
export DEFAULT_PG_VERSION=${DEFAULT_PG_VERSION:-14}
if [ "${BUILD_TYPE}" = "remote" ]; then
export REMOTE_ENV=1
fi
PERF_REPORT_DIR="$(realpath test_runner/perf-report-local)"
rm -rf $PERF_REPORT_DIR
TEST_SELECTION="test_runner/${{ inputs.test_selection }}"
EXTRA_PARAMS="${{ inputs.extra_params }}"
if [ -z "$TEST_SELECTION" ]; then
echo "test_selection must be set"
exit 1
fi
if [[ "${{ inputs.run_in_parallel }}" == "true" ]]; then
EXTRA_PARAMS="-n4 $EXTRA_PARAMS"
fi
if [[ "${{ inputs.run_with_real_s3 }}" == "true" ]]; then
echo "REAL S3 ENABLED"
export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
export REMOTE_STORAGE_S3_BUCKET=${{ inputs.real_s3_bucket }}
export REMOTE_STORAGE_S3_REGION=${{ inputs.real_s3_region }}
fi
if [[ "${{ inputs.save_perf_report }}" == "true" ]]; then
mkdir -p "$PERF_REPORT_DIR"
EXTRA_PARAMS="--out-dir $PERF_REPORT_DIR $EXTRA_PARAMS"
fi
if [[ "${{ inputs.build_type }}" == "debug" ]]; then
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
elif [[ "${{ inputs.build_type }}" == "release" ]]; then
cov_prefix=()
else
cov_prefix=()
fi
# Wake up the cluster if we use remote neon instance
if [ "${{ inputs.build_type }}" = "remote" ] && [ -n "${BENCHMARK_CONNSTR}" ]; then
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/psql ${BENCHMARK_CONNSTR} -c "SELECT version();"
fi
# Run the tests.
#
# The junit.xml file allows CI tools to display more fine-grained test information
# in its "Tests" tab in the results page.
# --verbose prints name of each test (helpful when there are
# multiple tests in one file)
# -rA prints summary in the end
# -n4 uses four processes to run tests via pytest-xdist
# -s is not used to prevent pytest from capturing output, because tests are running
# in parallel and logs are mixed between different tests
mkdir -p $TEST_OUTPUT/allure/results
"${cov_prefix[@]}" ./scripts/pytest \
--junitxml=$TEST_OUTPUT/junit.xml \
--alluredir=$TEST_OUTPUT/allure/results \
--tb=short \
--verbose \
-rA $TEST_SELECTION $EXTRA_PARAMS
if [[ "${{ inputs.save_perf_report }}" == "true" ]]; then
export REPORT_FROM="$PERF_REPORT_DIR"
export REPORT_TO="$PLATFORM"
scripts/generate_and_push_perf_report.sh
fi
- name: Upload compatibility snapshot for Postgres 14
if: github.ref_name == 'release'
uses: ./.github/actions/upload
with:
name: compatibility-snapshot-${{ inputs.build_type }}-pg14-${{ github.run_id }}
# The path includes a test name (test_prepare_snapshot) and directory that the test creates (compatibility_snapshot_pg14), keep the path in sync with the test
path: /tmp/test_output/test_prepare_snapshot/compatibility_snapshot_pg14/
prefix: latest
- name: Create Allure report
if: always()
uses: ./.github/actions/allure-report
with:
action: store
build_type: ${{ inputs.build_type }}
test_selection: ${{ inputs.test_selection }}

View File

@@ -1,22 +0,0 @@
name: 'Merge and upload coverage data'
description: 'Compresses and uploads the coverage data as an artifact'
runs:
using: "composite"
steps:
- name: Merge coverage data
shell: bash -euxo pipefail {0}
run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
- name: Download previous coverage data into the same directory
uses: ./.github/actions/download
with:
name: coverage-data-artifact
path: /tmp/coverage
skip-if-does-not-exist: true # skip if there's no previous coverage to download
- name: Upload coverage data
uses: ./.github/actions/upload
with:
name: coverage-data-artifact
path: /tmp/coverage

View File

@@ -1,58 +0,0 @@
name: "Upload an artifact"
description: "Custom upload action"
inputs:
name:
description: "Artifact name"
required: true
path:
description: "A directory or file to upload"
required: true
prefix:
description: "S3 prefix. Default is '${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
required: false
runs:
using: "composite"
steps:
- name: Prepare artifact
shell: bash -euxo pipefail {0}
env:
SOURCE: ${{ inputs.path }}
ARCHIVE: /tmp/uploads/${{ inputs.name }}.tar.zst
run: |
mkdir -p $(dirname $ARCHIVE)
if [ -f ${ARCHIVE} ]; then
echo 2>&1 "File ${ARCHIVE} already exist. Something went wrong before"
exit 1
fi
ZSTD_NBTHREADS=0
if [ -d ${SOURCE} ]; then
time tar -C ${SOURCE} -cf ${ARCHIVE} --zstd .
elif [ -f ${SOURCE} ]; then
time tar -cf ${ARCHIVE} --zstd ${SOURCE}
elif ! ls ${SOURCE} > /dev/null 2>&1; then
echo 2>&1 "${SOURCE} does not exist"
exit 2
else
echo 2>&1 "${SOURCE} is neither a directory nor a file, do not know how to handle it"
exit 3
fi
- name: Upload artifact
shell: bash -euxo pipefail {0}
env:
SOURCE: ${{ inputs.path }}
ARCHIVE: /tmp/uploads/${{ inputs.name }}.tar.zst
PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}', github.run_id, github.run_attempt) }}
run: |
BUCKET=neon-github-public-dev
FILENAME=$(basename $ARCHIVE)
FILESIZE=$(du -sh ${ARCHIVE} | cut -f1)
time aws s3 mv --only-show-errors ${ARCHIVE} s3://${BUCKET}/${PREFIX}/${FILENAME}
# Ref https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#adding-a-job-summary
echo "[${FILENAME}](https://${BUCKET}.s3.amazonaws.com/${PREFIX}/${FILENAME}) ${FILESIZE}" >> ${GITHUB_STEP_SUMMARY}

View File

@@ -1,7 +0,0 @@
zenith_install.tar.gz
.zenith_current_version
neon_install.tar.gz
.neon_current_version
collections/*
!collections/.keep

View File

@@ -1,13 +0,0 @@
[defaults]
localhost_warning = False
host_key_checking = False
timeout = 30
collections_paths = ./collections
[ssh_connection]
ssh_args = -F ./ansible.ssh.cfg
# teleport doesn't support sftp yet https://github.com/gravitational/teleport/issues/7127
# and scp neither worked for me
transfer_method = piped
pipelining = True

View File

@@ -1,15 +0,0 @@
# Remove this once https://github.com/gravitational/teleport/issues/10918 is fixed
# (use pre 8.5 option name to cope with old ssh in CI)
PubkeyAcceptedKeyTypes +ssh-rsa-cert-v01@openssh.com
Host tele.zenith.tech
User admin
Port 3023
StrictHostKeyChecking no
UserKnownHostsFile /dev/null
Host * !tele.zenith.tech
User admin
StrictHostKeyChecking no
UserKnownHostsFile /dev/null
ProxyJump tele.zenith.tech

View File

View File

@@ -1,191 +0,0 @@
- name: Upload Neon binaries
hosts: storage
gather_facts: False
remote_user: "{{ remote_user }}"
tasks:
- name: get latest version of Neon binaries
register: current_version_file
set_fact:
current_version: "{{ lookup('file', '.neon_current_version') | trim }}"
tags:
- pageserver
- safekeeper
- name: inform about versions
debug:
msg: "Version to deploy - {{ current_version }}"
tags:
- pageserver
- safekeeper
- name: upload and extract Neon binaries to /usr/local
ansible.builtin.unarchive:
owner: root
group: root
src: neon_install.tar.gz
dest: /usr/local
become: true
tags:
- pageserver
- safekeeper
- binaries
- putbinaries
- name: Deploy pageserver
hosts: pageservers
gather_facts: False
remote_user: "{{ remote_user }}"
tasks:
- name: upload init script
when: console_mgmt_base_url is defined
ansible.builtin.template:
src: scripts/init_pageserver.sh
dest: /tmp/init_pageserver.sh
owner: root
group: root
mode: '0755'
become: true
tags:
- pageserver
- name: init pageserver
shell:
cmd: /tmp/init_pageserver.sh
args:
creates: "/storage/pageserver/data/tenants"
environment:
NEON_REPO_DIR: "/storage/pageserver/data"
LD_LIBRARY_PATH: "/usr/local/v14/lib"
become: true
tags:
- pageserver
- name: read the existing remote pageserver config
ansible.builtin.slurp:
src: /storage/pageserver/data/pageserver.toml
register: _remote_ps_config
tags:
- pageserver
- name: parse the existing pageserver configuration
ansible.builtin.set_fact:
_existing_ps_config: "{{ _remote_ps_config['content'] | b64decode | sivel.toiletwater.from_toml }}"
tags:
- pageserver
- name: construct the final pageserver configuration dict
ansible.builtin.set_fact:
pageserver_config: "{{ pageserver_config_stub | combine({'id': _existing_ps_config.id }) }}"
tags:
- pageserver
- name: template the pageserver config
template:
src: templates/pageserver.toml.j2
dest: /storage/pageserver/data/pageserver.toml
become: true
tags:
- pageserver
- name: upload systemd service definition
ansible.builtin.template:
src: systemd/pageserver.service
dest: /etc/systemd/system/pageserver.service
owner: root
group: root
mode: '0644'
become: true
tags:
- pageserver
- name: start systemd service
ansible.builtin.systemd:
daemon_reload: yes
name: pageserver
enabled: yes
state: restarted
become: true
tags:
- pageserver
- name: post version to console
when: console_mgmt_base_url is defined
shell:
cmd: |
INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
curl -sfS -d '{"version": {{ current_version }} }' -X PATCH {{ console_mgmt_base_url }}/api/v1/pageservers/$INSTANCE_ID
tags:
- pageserver
- name: Deploy safekeeper
hosts: safekeepers
gather_facts: False
remote_user: "{{ remote_user }}"
tasks:
- name: upload init script
when: console_mgmt_base_url is defined
ansible.builtin.template:
src: scripts/init_safekeeper.sh
dest: /tmp/init_safekeeper.sh
owner: root
group: root
mode: '0755'
become: true
tags:
- safekeeper
- name: init safekeeper
shell:
cmd: /tmp/init_safekeeper.sh
args:
creates: "/storage/safekeeper/data/safekeeper.id"
environment:
NEON_REPO_DIR: "/storage/safekeeper/data"
LD_LIBRARY_PATH: "/usr/local/v14/lib"
become: true
tags:
- safekeeper
# in the future safekeepers should discover pageservers byself
# but currently use first pageserver that was discovered
- name: set first pageserver var for safekeepers
set_fact:
first_pageserver: "{{ hostvars[groups['pageservers'][0]]['inventory_hostname'] }}"
tags:
- safekeeper
- name: upload systemd service definition
ansible.builtin.template:
src: systemd/safekeeper.service
dest: /etc/systemd/system/safekeeper.service
owner: root
group: root
mode: '0644'
become: true
tags:
- safekeeper
- name: start systemd service
ansible.builtin.systemd:
daemon_reload: yes
name: safekeeper
enabled: yes
state: restarted
become: true
tags:
- safekeeper
- name: post version to console
when: console_mgmt_base_url is defined
shell:
cmd: |
INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
curl -sfS -d '{"version": {{ current_version }} }' -X PATCH {{ console_mgmt_base_url }}/api/v1/safekeepers/$INSTANCE_ID
tags:
- safekeeper

View File

@@ -1,41 +0,0 @@
#!/bin/bash
set -e
if [ -n "${DOCKER_TAG}" ]; then
# Verson is DOCKER_TAG but without prefix
VERSION=$(echo $DOCKER_TAG | sed 's/^.*-//g')
else
echo "Please set DOCKER_TAG environment variable"
exit 1
fi
# do initial cleanup
rm -rf neon_install postgres_install.tar.gz neon_install.tar.gz .neon_current_version
mkdir neon_install
# retrieve binaries from docker image
echo "getting binaries from docker image"
docker pull --quiet neondatabase/neon:${DOCKER_TAG}
ID=$(docker create neondatabase/neon:${DOCKER_TAG})
docker cp ${ID}:/data/postgres_install.tar.gz .
tar -xzf postgres_install.tar.gz -C neon_install
mkdir neon_install/bin/
docker cp ${ID}:/usr/local/bin/pageserver neon_install/bin/
docker cp ${ID}:/usr/local/bin/pageserver_binutils neon_install/bin/
docker cp ${ID}:/usr/local/bin/safekeeper neon_install/bin/
docker cp ${ID}:/usr/local/bin/proxy neon_install/bin/
docker cp ${ID}:/usr/local/v14/bin/ neon_install/v14/bin/
docker cp ${ID}:/usr/local/v15/bin/ neon_install/v15/bin/
docker cp ${ID}:/usr/local/v14/lib/ neon_install/v14/lib/
docker cp ${ID}:/usr/local/v15/lib/ neon_install/v15/lib/
docker rm -vf ${ID}
# store version to file (for ansible playbooks) and create binaries tarball
echo ${VERSION} > neon_install/.neon_current_version
echo ${VERSION} > .neon_current_version
tar -czf neon_install.tar.gz -C neon_install .
# do final cleaup
rm -rf neon_install postgres_install.tar.gz

View File

@@ -1,31 +0,0 @@
storage:
vars:
bucket_name: neon-storage-ireland
bucket_region: eu-west-1
console_mgmt_base_url: http://neon-stress-console.local
etcd_endpoints: neon-stress-etcd.local:2379
safekeeper_enable_s3_offload: 'false'
pageserver_config_stub:
pg_distrib_dir: /usr/local
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "{{ inventory_hostname }}"
safekeeper_s3_prefix: neon-stress/wal
hostname_suffix: ".local"
remote_user: admin
children:
pageservers:
hosts:
neon-stress-ps-1:
console_region_id: aws-eu-west-1
neon-stress-ps-2:
console_region_id: aws-eu-west-1
safekeepers:
hosts:
neon-stress-sk-1:
console_region_id: aws-eu-west-1
neon-stress-sk-2:
console_region_id: aws-eu-west-1
neon-stress-sk-3:
console_region_id: aws-eu-west-1

View File

@@ -1,33 +0,0 @@
---
storage:
vars:
console_mgmt_base_url: http://console-release.local
bucket_name: zenith-storage-oregon
bucket_region: us-west-2
etcd_endpoints: zenith-1-etcd.local:2379
pageserver_config_stub:
pg_distrib_dir: /usr/local
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "{{ inventory_hostname }}"
safekeeper_s3_prefix: prod-1/wal
hostname_suffix: ".local"
remote_user: admin
children:
pageservers:
hosts:
zenith-1-ps-2:
console_region_id: aws-us-west-2
zenith-1-ps-3:
console_region_id: aws-us-west-2
safekeepers:
hosts:
zenith-1-sk-1:
console_region_id: aws-us-west-2
zenith-1-sk-2:
console_region_id: aws-us-west-2
zenith-1-sk-3:
console_region_id: aws-us-west-2

View File

@@ -1,31 +0,0 @@
#!/bin/sh
# get instance id from meta-data service
INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
# store fqdn hostname in var
HOST=$(hostname -f)
cat <<EOF | tee /tmp/payload
{
"version": 1,
"host": "${HOST}",
"port": 6400,
"region_id": "{{ console_region_id }}",
"instance_id": "${INSTANCE_ID}",
"http_host": "${HOST}",
"http_port": 9898,
"active": false
}
EOF
# check if pageserver already registered or not
if ! curl -sf -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/pageservers/${INSTANCE_ID} -o /dev/null; then
# not registered, so register it now
ID=$(curl -sf -X POST -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/pageservers -d@/tmp/payload | jq -r '.id')
# init pageserver
sudo -u pageserver /usr/local/bin/pageserver -c "id=${ID}" -c "pg_distrib_dir='/usr/local'" --init -D /storage/pageserver/data
fi

View File

@@ -1,31 +0,0 @@
#!/bin/sh
# fetch params from meta-data service
INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
AZ_ID=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone)
# store fqdn hostname in var
HOST=$(hostname -f)
cat <<EOF | tee /tmp/payload
{
"version": 1,
"host": "${HOST}",
"port": 6500,
"http_port": 7676,
"region_id": "{{ console_region_id }}",
"instance_id": "${INSTANCE_ID}",
"availability_zone_id": "${AZ_ID}",
"active": false
}
EOF
# check if safekeeper already registered or not
if ! curl -sf -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/safekeepers/${INSTANCE_ID} -o /dev/null; then
# not registered, so register it now
ID=$(curl -sf -X POST -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/safekeepers -d@/tmp/payload | jq -r '.id')
# init safekeeper
sudo -u safekeeper /usr/local/bin/safekeeper --id ${ID} --init -D /storage/safekeeper/data
fi

View File

@@ -1,3 +0,0 @@
ansible_connection: aws_ssm
ansible_aws_ssm_bucket_name: neon-dev-bucket
ansible_python_interpreter: /usr/bin/python3

View File

@@ -1,34 +0,0 @@
storage:
vars:
bucket_name: zenith-staging-storage-us-east-1
bucket_region: us-east-1
console_mgmt_base_url: http://console-staging.local
etcd_endpoints: zenith-us-stage-etcd.local:2379
pageserver_config_stub:
pg_distrib_dir: /usr/local
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "{{ inventory_hostname }}"
safekeeper_s3_prefix: us-stage/wal
hostname_suffix: ".local"
remote_user: admin
children:
pageservers:
hosts:
zenith-us-stage-ps-2:
console_region_id: aws-us-east-1
zenith-us-stage-ps-3:
console_region_id: aws-us-east-1
zenith-us-stage-ps-4:
console_region_id: aws-us-east-1
safekeepers:
hosts:
zenith-us-stage-sk-4:
console_region_id: aws-us-east-1
zenith-us-stage-sk-5:
console_region_id: aws-us-east-1
zenith-us-stage-sk-6:
console_region_id: aws-us-east-1

View File

@@ -1,32 +0,0 @@
storage:
vars:
bucket_name: neon-staging-storage-us-east-2
bucket_region: us-east-2
console_mgmt_base_url: http://console-staging.local
etcd_endpoints: etcd-0.us-east-2.aws.neon.build:2379
pageserver_config_stub:
pg_distrib_dir: /usr/local
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "pageserver/v1"
safekeeper_s3_prefix: safekeeper/v1/wal
hostname_suffix: ""
remote_user: ssm-user
ansible_aws_ssm_region: us-east-2
console_region_id: aws-us-east-2
children:
pageservers:
hosts:
pageserver-0.us-east-2.aws.neon.build:
ansible_host: i-0c3e70929edb5d691
safekeepers:
hosts:
safekeeper-0.us-east-2.aws.neon.build:
ansible_host: i-027662bd552bf5db0
safekeeper-1.us-east-2.aws.neon.build:
ansible_host: i-0171efc3604a7b907
safekeeper-2.us-east-2.aws.neon.build:
ansible_host: i-0de0b03a51676a6ce

View File

@@ -1,18 +0,0 @@
[Unit]
Description=Neon pageserver
After=network.target auditd.service
[Service]
Type=simple
User=pageserver
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/v14/lib
ExecStart=/usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -c "broker_endpoints=['{{ etcd_endpoints }}']" -D /storage/pageserver/data
ExecReload=/bin/kill -HUP $MAINPID
KillMode=mixed
KillSignal=SIGINT
Restart=on-failure
TimeoutSec=10
LimitNOFILE=30000000
[Install]
WantedBy=multi-user.target

View File

@@ -1,18 +0,0 @@
[Unit]
Description=Neon safekeeper
After=network.target auditd.service
[Service]
Type=simple
User=safekeeper
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/v14/lib
ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}{{ hostname_suffix }}:6500 --listen-http {{ inventory_hostname }}{{ hostname_suffix }}:7676 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ safekeeper_s3_prefix }}"}'
ExecReload=/bin/kill -HUP $MAINPID
KillMode=mixed
KillSignal=SIGINT
Restart=on-failure
TimeoutSec=10
LimitNOFILE=30000000
[Install]
WantedBy=multi-user.target

View File

@@ -1 +0,0 @@
{{ pageserver_config | sivel.toiletwater.to_toml }}

View File

@@ -1,31 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://console-staging.local/management/api/v2"
domain: "*.us-east-2.aws.neon.build"
# -- Additional labels for neon-proxy pods
podLabels:
zenith_service: proxy-scram
zenith_env: dev
zenith_region: us-east-2
zenith_region_slug: us-east-2
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: us-east-2.aws.neon.build
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack

View File

@@ -1,26 +0,0 @@
fullnameOverride: "neon-stress-proxy-scram"
settings:
authBackend: "console"
authEndpoint: "http://neon-stress-console.local/management/api/v2"
domain: "*.stress.neon.tech"
podLabels:
zenith_service: proxy-scram
zenith_env: staging
zenith_region: eu-west-1
zenith_region_slug: ireland
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: '*.stress.neon.tech'
metrics:
enabled: true
serviceMonitor:
enabled: true
selector:
release: kube-prometheus-stack

View File

@@ -1,35 +0,0 @@
fullnameOverride: "neon-stress-proxy"
settings:
authBackend: "link"
authEndpoint: "https://console.dev.neon.tech/authenticate_proxy_request/"
uri: "https://console.dev.neon.tech/psql_session/"
# -- Additional labels for zenith-proxy pods
podLabels:
zenith_service: proxy
zenith_env: staging
zenith_region: eu-west-1
zenith_region_slug: ireland
service:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal
external-dns.alpha.kubernetes.io/hostname: neon-stress-proxy.local
type: LoadBalancer
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: connect.dev.neon.tech
metrics:
enabled: true
serviceMonitor:
enabled: true
selector:
release: kube-prometheus-stack

View File

@@ -1,24 +0,0 @@
settings:
authBackend: "console"
authEndpoint: "http://console-release.local/management/api/v2"
domain: "*.cloud.neon.tech"
podLabels:
zenith_service: proxy-scram
zenith_env: production
zenith_region: us-west-2
zenith_region_slug: oregon
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: '*.cloud.neon.tech'
metrics:
enabled: true
serviceMonitor:
enabled: true
selector:
release: kube-prometheus-stack

View File

@@ -1,33 +0,0 @@
settings:
authBackend: "link"
authEndpoint: "https://console.neon.tech/authenticate_proxy_request/"
uri: "https://console.neon.tech/psql_session/"
# -- Additional labels for zenith-proxy pods
podLabels:
zenith_service: proxy
zenith_env: production
zenith_region: us-west-2
zenith_region_slug: oregon
service:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internal
external-dns.alpha.kubernetes.io/hostname: proxy-release.local
type: LoadBalancer
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: connect.neon.tech,pg.neon.tech
metrics:
enabled: true
serviceMonitor:
enabled: true
selector:
release: kube-prometheus-stack

View File

@@ -1,31 +0,0 @@
# Helm chart values for zenith-proxy.
# This is a YAML-formatted file.
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://console-staging.local/management/api/v2"
domain: "*.cloud.stage.neon.tech"
# -- Additional labels for zenith-proxy pods
podLabels:
zenith_service: proxy-scram
zenith_env: staging
zenith_region: us-east-1
zenith_region_slug: virginia
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: cloud.stage.neon.tech
metrics:
enabled: true
serviceMonitor:
enabled: true
selector:
release: kube-prometheus-stack

View File

@@ -1,31 +0,0 @@
# Helm chart values for zenith-proxy.
# This is a YAML-formatted file.
image:
repository: neondatabase/neon
settings:
authBackend: "link"
authEndpoint: "https://console.stage.neon.tech/authenticate_proxy_request/"
uri: "https://console.stage.neon.tech/psql_session/"
# -- Additional labels for zenith-proxy pods
podLabels:
zenith_service: proxy
zenith_env: staging
zenith_region: us-east-1
zenith_region_slug: virginia
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: connect.stage.neon.tech
metrics:
enabled: true
serviceMonitor:
enabled: true
selector:
release: kube-prometheus-stack

View File

@@ -1,289 +0,0 @@
name: Benchmarking
on:
# uncomment to run on push for debugging your PR
# push:
# branches: [ your branch ]
schedule:
# * is a special character in YAML so you have to quote this string
# ┌───────────── minute (0 - 59)
# │ ┌───────────── hour (0 - 23)
# │ │ ┌───────────── day of the month (1 - 31)
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
- cron: '0 3 * * *' # run once a day, timezone is utc
workflow_dispatch: # adds ability to run this manually
inputs:
environment:
description: 'Environment to run remote tests on (dev or staging)'
required: false
region_id:
description: 'Use a particular region. If not set the default region will be used'
required: false
save_perf_report:
type: boolean
description: 'Publish perf report or not. If not set, the report is published only for the main branch'
required: false
defaults:
run:
shell: bash -euxo pipefail {0}
concurrency:
# Allow only one workflow per any non-`main` branch.
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
cancel-in-progress: true
jobs:
bench:
# this workflow runs on self hosteed runner
# it's environment is quite different from usual guthub runner
# probably the most important difference is that it doesn't start from clean workspace each time
# e g if you install system packages they are not cleaned up since you install them directly in host machine
# not a container or something
# See documentation for more info: https://docs.github.com/en/actions/hosting-your-own-runners/about-self-hosted-runners
runs-on: [self-hosted, zenith-benchmarker]
env:
POSTGRES_DISTRIB_DIR: /usr/pgsql
DEFAULT_PG_VERSION: 14
steps:
- name: Checkout zenith repo
uses: actions/checkout@v3
# actions/setup-python@v2 is not working correctly on self-hosted runners
# see https://github.com/actions/setup-python/issues/162
# and probably https://github.com/actions/setup-python/issues/162#issuecomment-865387976 in particular
# so the simplest solution to me is to use already installed system python and spin virtualenvs for job runs.
# there is Python 3.7.10 already installed on the machine so use it to install poetry and then use poetry's virtuealenvs
- name: Install poetry & deps
run: |
python3 -m pip install --upgrade poetry wheel
# since pip/poetry caches are reused there shouldn't be any troubles with install every time
./scripts/pysync
- name: Show versions
run: |
echo Python
python3 --version
poetry run python3 --version
echo Poetry
poetry --version
echo Pgbench
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
- name: Create Neon Project
id: create-neon-project
uses: ./.github/actions/neon-project-create
with:
environment: ${{ github.event.inputs.environment || 'staging' }}
api_key: ${{ ( github.event.inputs.environment || 'staging' ) == 'staging' && secrets.NEON_STAGING_API_KEY || secrets.NEON_CAPTEST_API_KEY }}
- name: Run benchmark
# pgbench is installed system wide from official repo
# https://download.postgresql.org/pub/repos/yum/13/redhat/rhel-7-x86_64/
# via
# sudo tee /etc/yum.repos.d/pgdg.repo<<EOF
# [pgdg13]
# name=PostgreSQL 13 for RHEL/CentOS 7 - x86_64
# baseurl=https://download.postgresql.org/pub/repos/yum/13/redhat/rhel-7-x86_64/
# enabled=1
# gpgcheck=0
# EOF
# sudo yum makecache
# sudo yum install postgresql13-contrib
# actual binaries are located in /usr/pgsql-13/bin/
env:
# The pgbench test runs two tests of given duration against each scale.
# So the total runtime with these parameters is 2 * 2 * 300 = 1200, or 20 minutes.
# Plus time needed to initialize the test databases.
TEST_PG_BENCH_DURATIONS_MATRIX: "300"
TEST_PG_BENCH_SCALES_MATRIX: "10,100"
PLATFORM: "neon-staging"
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
REMOTE_ENV: "1" # indicate to test harness that we do not have zenith binaries locally
run: |
# just to be sure that no data was cached on self hosted runner
# since it might generate duplicates when calling ingest_perf_test_result.py
rm -rf perf-report-staging
mkdir -p perf-report-staging
# Set --sparse-ordering option of pytest-order plugin to ensure tests are running in order of appears in the file,
# it's important for test_perf_pgbench.py::test_pgbench_remote_* tests
./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --sparse-ordering --out-dir perf-report-staging --timeout 5400
- name: Submit result
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
run: |
REPORT_FROM=$(realpath perf-report-staging) REPORT_TO=staging scripts/generate_and_push_perf_report.sh
- name: Delete Neon Project
if: ${{ always() }}
uses: ./.github/actions/neon-project-delete
with:
environment: staging
project_id: ${{ steps.create-neon-project.outputs.project_id }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: "Periodic perf testing: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
pgbench-compare:
strategy:
fail-fast: false
matrix:
# neon-captest-new: Run pgbench in a freshly created project
# neon-captest-reuse: Same, but reusing existing project
# neon-captest-prefetch: Same, with prefetching enabled (new project)
platform: [ neon-captest-new, neon-captest-reuse, neon-captest-prefetch ]
db_size: [ 10gb ]
include:
- platform: neon-captest-new
db_size: 50gb
- platform: neon-captest-prefetch
db_size: 50gb
- platform: rds-aurora
db_size: 50gb
env:
TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
TEST_PG_BENCH_SCALES_MATRIX: ${{ matrix.db_size }}
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
PLATFORM: ${{ matrix.platform }}
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:pinned
options: --init
timeout-minutes: 360 # 6h
steps:
- uses: actions/checkout@v3
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-release-artifact
path: /tmp/neon/
prefix: latest
- name: Add Postgres binaries to PATH
run: |
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
- name: Create Neon Project
if: contains(fromJson('["neon-captest-new", "neon-captest-prefetch"]'), matrix.platform)
id: create-neon-project
uses: ./.github/actions/neon-project-create
with:
environment: ${{ github.event.inputs.environment || 'dev' }}
api_key: ${{ ( github.event.inputs.environment || 'dev' ) == 'staging' && secrets.NEON_STAGING_API_KEY || secrets.NEON_CAPTEST_API_KEY }}
- name: Set up Connection String
id: set-up-connstr
run: |
case "${PLATFORM}" in
neon-captest-reuse)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }}
;;
neon-captest-new | neon-captest-prefetch)
CONNSTR=${{ steps.create-neon-project.outputs.dsn }}
;;
rds-aurora)
CONNSTR=${{ secrets.BENCHMARK_RDS_CONNSTR }}
;;
*)
echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-prefetch' or 'rds-aurora'"
exit 1
;;
esac
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
psql ${CONNSTR} -c "SELECT version();"
- name: Set database options
if: matrix.platform == 'neon-captest-prefetch'
run: |
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE main SET enable_seqscan_prefetch=on"
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE main SET seqscan_prefetch_buffers=10"
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
- name: Benchmark init
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
- name: Benchmark simple-update
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
- name: Benchmark select-only
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
- name: Create Allure report
if: always()
uses: ./.github/actions/allure-report
with:
action: generate
build_type: ${{ env.BUILD_TYPE }}
- name: Delete Neon Project
if: ${{ steps.create-neon-project.outputs.project_id && always() }}
uses: ./.github/actions/neon-project-delete
with:
environment: dev
project_id: ${{ steps.create-neon-project.outputs.project_id }}
api_key: ${{ secrets.NEON_CAPTEST_API_KEY }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: "Periodic perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

View File

@@ -1,876 +0,0 @@
name: Test and Deploy
on:
push:
branches:
- main
- release
pull_request:
concurrency:
# Allow only one workflow per any non-`main` branch.
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
cancel-in-progress: true
env:
RUST_BACKTRACE: 1
COPT: '-Werror'
jobs:
tag:
runs-on: dev
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
outputs:
build-tag: ${{steps.build-tag.outputs.tag}}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Get build tag
run: |
echo run:$GITHUB_RUN_ID
echo ref:$GITHUB_REF_NAME
echo rev:$(git rev-list --count HEAD)
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
echo "tag=$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
echo "tag=release-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
else
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
echo "tag=$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
fi
shell: bash
id: build-tag
build-neon:
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
strategy:
fail-fast: false
matrix:
build_type: [ debug, release ]
env:
BUILD_TYPE: ${{ matrix.build_type }}
GIT_VERSION: ${{ github.sha }}
steps:
- name: Fix git ownership
run: |
# Workaround for `fatal: detected dubious ownership in repository at ...`
#
# Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
# Ref https://github.com/actions/checkout/issues/785
#
git config --global --add safe.directory ${{ github.workspace }}
git config --global --add safe.directory ${GITHUB_WORKSPACE}
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 1
- name: Set pg 14 revision for caching
id: pg_v14_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
shell: bash -euxo pipefail {0}
- name: Set pg 15 revision for caching
id: pg_v15_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
shell: bash -euxo pipefail {0}
# Set some environment variables used by all the steps.
#
# CARGO_FLAGS is extra options to pass to "cargo build", "cargo test" etc.
# It also includes --features, if any
#
# CARGO_FEATURES is passed to "cargo metadata". It is separate from CARGO_FLAGS,
# because "cargo metadata" doesn't accept --release or --debug options
#
# We run tests with addtional features, that are turned off by default (e.g. in release builds), see
# corresponding Cargo.toml files for their descriptions.
- name: Set env variables
run: |
if [[ $BUILD_TYPE == "debug" ]]; then
cov_prefix="scripts/coverage --profraw-prefix=$GITHUB_JOB --dir=/tmp/coverage run"
CARGO_FEATURES="--features testing"
CARGO_FLAGS="--locked --timings $CARGO_FEATURES"
elif [[ $BUILD_TYPE == "release" ]]; then
cov_prefix=""
CARGO_FEATURES="--features testing,profiling"
CARGO_FLAGS="--locked --timings --release $CARGO_FEATURES"
fi
echo "cov_prefix=${cov_prefix}" >> $GITHUB_ENV
echo "CARGO_FEATURES=${CARGO_FEATURES}" >> $GITHUB_ENV
echo "CARGO_FLAGS=${CARGO_FLAGS}" >> $GITHUB_ENV
shell: bash -euxo pipefail {0}
# Don't include the ~/.cargo/registry/src directory. It contains just
# uncompressed versions of the crates in ~/.cargo/registry/cache
# directory, and it's faster to let 'cargo' to rebuild it from the
# compressed crates.
- name: Cache cargo deps
id: cache_cargo
uses: actions/cache@v3
with:
path: |
~/.cargo/registry/
!~/.cargo/registry/src
~/.cargo/git/
target/
# Fall back to older versions of the key, if no cache for current Cargo.lock was found
key: |
v9-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ hashFiles('Cargo.lock') }}
v9-${{ runner.os }}-${{ matrix.build_type }}-cargo-
- name: Cache postgres v14 build
id: cache_pg_14
uses: actions/cache@v3
with:
path: pg_install/v14
key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Cache postgres v15 build
id: cache_pg_15
uses: actions/cache@v3
with:
path: pg_install/v15
key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Build postgres v14
if: steps.cache_pg_14.outputs.cache-hit != 'true'
run: mold -run make postgres-v14 -j$(nproc)
shell: bash -euxo pipefail {0}
- name: Build postgres v15
if: steps.cache_pg_15.outputs.cache-hit != 'true'
run: mold -run make postgres-v15 -j$(nproc)
shell: bash -euxo pipefail {0}
- name: Build neon extensions
run: mold -run make neon-pg-ext -j$(nproc)
shell: bash -euxo pipefail {0}
- name: Run cargo build
run: |
${cov_prefix} mold -run cargo build $CARGO_FLAGS --bins --tests
shell: bash -euxo pipefail {0}
- name: Run cargo test
run: |
${cov_prefix} cargo test $CARGO_FLAGS
shell: bash -euxo pipefail {0}
- name: Install rust binaries
run: |
# Install target binaries
mkdir -p /tmp/neon/bin/
binaries=$(
${cov_prefix} cargo metadata $CARGO_FEATURES --format-version=1 --no-deps |
jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
)
for bin in $binaries; do
SRC=target/$BUILD_TYPE/$bin
DST=/tmp/neon/bin/$bin
cp "$SRC" "$DST"
done
# Install test executables and write list of all binaries (for code coverage)
if [[ $BUILD_TYPE == "debug" ]]; then
# Keep bloated coverage data files away from the rest of the artifact
mkdir -p /tmp/coverage/
mkdir -p /tmp/neon/test_bin/
test_exe_paths=$(
${cov_prefix} cargo test $CARGO_FLAGS --message-format=json --no-run |
jq -r '.executable | select(. != null)'
)
for bin in $test_exe_paths; do
SRC=$bin
DST=/tmp/neon/test_bin/$(basename $bin)
# We don't need debug symbols for code coverage, so strip them out to make
# the artifact smaller.
strip "$SRC" -o "$DST"
echo "$DST" >> /tmp/coverage/binaries.list
done
for bin in $binaries; do
echo "/tmp/neon/bin/$bin" >> /tmp/coverage/binaries.list
done
fi
shell: bash -euxo pipefail {0}
- name: Install postgres binaries
run: cp -a pg_install /tmp/neon/pg_install
shell: bash -euxo pipefail {0}
- name: Upload Neon artifact
uses: ./.github/actions/upload
with:
name: neon-${{ runner.os }}-${{ matrix.build_type }}-artifact
path: /tmp/neon
- name: Prepare cargo build timing stats for storing
run: |
mkdir -p "/tmp/neon/cargo-timings/$BUILD_TYPE/"
cp -r ./target/cargo-timings/* "/tmp/neon/cargo-timings/$BUILD_TYPE/"
shell: bash -euxo pipefail {0}
- name: Upload cargo build stats
uses: ./.github/actions/upload
with:
name: neon-${{ runner.os }}-${{ matrix.build_type }}-build-stats
path: /tmp/neon/cargo-timings/
# XXX: keep this after the binaries.list is formed, so the coverage can properly work later
- name: Merge and upload coverage data
if: matrix.build_type == 'debug'
uses: ./.github/actions/save-coverage-data
regress-tests:
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
needs: [ build-neon ]
strategy:
fail-fast: false
matrix:
build_type: [ debug, release ]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 2
- name: Pytest regression tests
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ matrix.build_type }}
test_selection: regress
needs_postgres_source: true
run_with_real_s3: true
real_s3_bucket: ci-tests-s3
real_s3_region: us-west-2
real_s3_access_key_id: "${{ secrets.AWS_ACCESS_KEY_ID_CI_TESTS_S3 }}"
real_s3_secret_access_key: "${{ secrets.AWS_SECRET_ACCESS_KEY_CI_TESTS_S3 }}"
- name: Merge and upload coverage data
if: matrix.build_type == 'debug'
uses: ./.github/actions/save-coverage-data
upload-latest-artifacts:
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
needs: [ regress-tests ]
if: github.ref_name == 'main'
steps:
- name: Copy Neon artifact to the latest directory
shell: bash -euxo pipefail {0}
env:
BUCKET: neon-github-public-dev
PREFIX: artifacts/${{ github.run_id }}
run: |
for build_type in debug release; do
FILENAME=neon-${{ runner.os }}-${build_type}-artifact.tar.zst
S3_KEY=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${PREFIX} | jq -r '.Contents[].Key' | grep ${FILENAME} | sort --version-sort | tail -1 || true)
if [ -z "${S3_KEY}" ]; then
echo 2>&1 "Neither s3://${BUCKET}/${PREFIX}/${FILENAME} nor its version from previous attempts exist"
exit 1
fi
time aws s3 cp --only-show-errors s3://${BUCKET}/${S3_KEY} s3://${BUCKET}/artifacts/latest/${FILENAME}
done
benchmarks:
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
needs: [ build-neon ]
if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks')
strategy:
fail-fast: false
matrix:
build_type: [ release ]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 2
- name: Pytest benchmarks
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ matrix.build_type }}
test_selection: performance
run_in_parallel: false
save_perf_report: ${{ github.ref == 'refs/heads/main' }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
# XXX: no coverage data handling here, since benchmarks are run on release builds,
# while coverage is currently collected for the debug ones
merge-allure-report:
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
needs: [ regress-tests, benchmarks ]
if: always()
strategy:
fail-fast: false
matrix:
build_type: [ debug, release ]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: false
- name: Create Allure report
id: create-allure-report
uses: ./.github/actions/allure-report
with:
action: generate
build_type: ${{ matrix.build_type }}
- name: Store Allure test stat in the DB
if: ${{ steps.create-allure-report.outputs.report-url }}
env:
BUILD_TYPE: ${{ matrix.build_type }}
SHA: ${{ github.event.pull_request.head.sha || github.sha }}
REPORT_URL: ${{ steps.create-allure-report.outputs.report-url }}
TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR }}
shell: bash -euxo pipefail {0}
run: |
curl --fail --output suites.json ${REPORT_URL%/index.html}/data/suites.json
./scripts/pysync
DATABASE_URL="$TEST_RESULT_CONNSTR" poetry run python3 scripts/ingest_regress_test_result.py --revision ${SHA} --reference ${GITHUB_REF} --build-type ${BUILD_TYPE} --ingest suites.json
coverage-report:
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
needs: [ regress-tests ]
strategy:
fail-fast: false
matrix:
build_type: [ debug ]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 1
- name: Restore cargo deps cache
id: cache_cargo
uses: actions/cache@v3
with:
path: |
~/.cargo/registry/
!~/.cargo/registry/src
~/.cargo/git/
target/
key: v9-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ hashFiles('Cargo.lock') }}
- name: Get Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ matrix.build_type }}-artifact
path: /tmp/neon
- name: Get coverage artifact
uses: ./.github/actions/download
with:
name: coverage-data-artifact
path: /tmp/coverage
- name: Merge coverage data
run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
shell: bash -euxo pipefail {0}
- name: Build and upload coverage report
run: |
COMMIT_SHA=${{ github.event.pull_request.head.sha }}
COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}
COMMIT_URL=https://github.com/${{ github.repository }}/commit/$COMMIT_SHA
scripts/coverage \
--dir=/tmp/coverage report \
--input-objects=/tmp/coverage/binaries.list \
--commit-url=$COMMIT_URL \
--format=github
REPORT_URL=https://${{ github.repository_owner }}.github.io/zenith-coverage-data/$COMMIT_SHA
scripts/git-upload \
--repo=https://${{ secrets.VIP_VAP_ACCESS_TOKEN }}@github.com/${{ github.repository_owner }}/zenith-coverage-data.git \
--message="Add code coverage for $COMMIT_URL" \
copy /tmp/coverage/report $COMMIT_SHA # COPY FROM TO_RELATIVE
# Add link to the coverage report to the commit
curl -f -X POST \
https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
-H "Accept: application/vnd.github.v3+json" \
--user "${{ secrets.CI_ACCESS_TOKEN }}" \
--data \
"{
\"state\": \"success\",
\"context\": \"neon-coverage\",
\"description\": \"Coverage report is ready\",
\"target_url\": \"$REPORT_URL\"
}"
shell: bash -euxo pipefail {0}
trigger-e2e-tests:
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
options: --init
needs: [ build-neon ]
steps:
- name: Set PR's status to pending and request a remote CI test
run: |
COMMIT_SHA=${{ github.event.pull_request.head.sha }}
COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}
REMOTE_REPO="${{ github.repository_owner }}/cloud"
curl -f -X POST \
https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
-H "Accept: application/vnd.github.v3+json" \
--user "${{ secrets.CI_ACCESS_TOKEN }}" \
--data \
"{
\"state\": \"pending\",
\"context\": \"neon-cloud-e2e\",
\"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
}"
curl -f -X POST \
https://api.github.com/repos/$REMOTE_REPO/actions/workflows/testing.yml/dispatches \
-H "Accept: application/vnd.github.v3+json" \
--user "${{ secrets.CI_ACCESS_TOKEN }}" \
--data \
"{
\"ref\": \"main\",
\"inputs\": {
\"ci_job_name\": \"neon-cloud-e2e\",
\"commit_hash\": \"$COMMIT_SHA\",
\"remote_repo\": \"${{ github.repository }}\"
}
}"
neon-image:
runs-on: dev
needs: [ tag ]
container: gcr.io/kaniko-project/executor:v1.9.0-debug
steps:
- name: Checkout
uses: actions/checkout@v1 # v3 won't work with kaniko
with:
submodules: true
fetch-depth: 0
- name: Configure ECR login
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
- name: Kaniko build neon
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}}
compute-tools-image:
runs-on: dev
needs: [ tag ]
container: gcr.io/kaniko-project/executor:v1.9.0-debug
steps:
- name: Checkout
uses: actions/checkout@v1 # v3 won't work with kaniko
- name: Configure ECR login
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
- name: Kaniko build compute tools
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-tools --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}}
compute-node-image:
runs-on: dev
container: gcr.io/kaniko-project/executor:v1.9.0-debug
needs: [ tag ]
steps:
- name: Checkout
uses: actions/checkout@v1 # v3 won't work with kaniko
with:
submodules: true
fetch-depth: 0
- name: Configure ECR login
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
# compute-node uses postgres 14, which is default now
# cloud repo depends on this image name, thus duplicating it
# remove compute-node when cloud repo is updated
- name: Kaniko build compute node with extensions v14 (compatibility)
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:${{needs.tag.outputs.build-tag}}
compute-node-image-v14:
runs-on: dev
container: gcr.io/kaniko-project/executor:v1.9.0-debug
needs: [ tag ]
steps:
- name: Checkout
uses: actions/checkout@v1 # v3 won't work with kaniko
with:
submodules: true
fetch-depth: 0
- name: Configure ECR login
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
- name: Kaniko build compute node with extensions v14
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}}
compute-node-image-v15:
runs-on: dev
container: gcr.io/kaniko-project/executor:v1.9.0-debug
needs: [ tag ]
steps:
- name: Checkout
uses: actions/checkout@v1 # v3 won't work with kaniko
with:
submodules: true
fetch-depth: 0
- name: Configure ECR login
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
- name: Kaniko build compute node with extensions v15
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v15 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}}
promote-images:
runs-on: dev
needs: [ tag, neon-image, compute-node-image, compute-node-image-v14, compute-node-image-v15, compute-tools-image ]
if: github.event_name != 'workflow_dispatch'
container: amazon/aws-cli
strategy:
fail-fast: false
matrix:
# compute-node uses postgres 14, which is default now
# cloud repo depends on this image name, thus duplicating it
# remove compute-node when cloud repo is updated
name: [ neon, compute-node, compute-node-v14, compute-node-v15, compute-tools ]
steps:
- name: Promote image to latest
run: |
export MANIFEST=$(aws ecr batch-get-image --repository-name ${{ matrix.name }} --image-ids imageTag=${{needs.tag.outputs.build-tag}} --query 'images[].imageManifest' --output text)
aws ecr put-image --repository-name ${{ matrix.name }} --image-tag latest --image-manifest "$MANIFEST"
push-docker-hub:
runs-on: dev
needs: [ promote-images, tag ]
container: golang:1.19-bullseye
steps:
- name: Install Crane & ECR helper
run: |
go install github.com/google/go-containerregistry/cmd/crane@31786c6cbb82d6ec4fb8eb79cd9387905130534e # v0.11.0
go install github.com/awslabs/amazon-ecr-credential-helper/ecr-login/cli/docker-credential-ecr-login@69c85dc22db6511932bbf119e1a0cc5c90c69a7f # v0.6.0
- name: Configure ECR login
run: |
mkdir /github/home/.docker/
echo "{\"credsStore\":\"ecr-login\"}" > /github/home/.docker/config.json
- name: Pull neon image from ECR
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} neon
- name: Pull compute tools image from ECR
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} compute-tools
- name: Pull compute node image from ECR
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:${{needs.tag.outputs.build-tag}} compute-node
- name: Pull compute node v14 image from ECR
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} compute-node-v14
- name: Pull compute node v15 image from ECR
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} compute-node-v15
- name: Pull rust image from ECR
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned rust
- name: Push images to production ECR
if: |
(github.ref_name == 'main' || github.ref_name == 'release') &&
github.event_name != 'workflow_dispatch'
run: |
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/neon:latest
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-tools:latest
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-node:latest
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-node-v14:latest
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-node-v15:latest
- name: Configure Docker Hub login
run: |
# ECR Credential Helper & Docker Hub don't work together in config, hence reset
echo "" > /github/home/.docker/config.json
crane auth login -u ${{ secrets.NEON_DOCKERHUB_USERNAME }} -p ${{ secrets.NEON_DOCKERHUB_PASSWORD }} index.docker.io
- name: Push neon image to Docker Hub
run: crane push neon neondatabase/neon:${{needs.tag.outputs.build-tag}}
- name: Push compute tools image to Docker Hub
run: crane push compute-tools neondatabase/compute-tools:${{needs.tag.outputs.build-tag}}
- name: Push compute node image to Docker Hub
run: crane push compute-node neondatabase/compute-node:${{needs.tag.outputs.build-tag}}
- name: Push compute node v14 image to Docker Hub
run: crane push compute-node-v14 neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}}
- name: Push compute node v15 image to Docker Hub
run: crane push compute-node-v15 neondatabase/compute-node-v15:${{needs.tag.outputs.build-tag}}
- name: Push rust image to Docker Hub
run: crane push rust neondatabase/rust:pinned
- name: Add latest tag to images in Docker Hub
if: |
(github.ref_name == 'main' || github.ref_name == 'release') &&
github.event_name != 'workflow_dispatch'
run: |
crane tag neondatabase/neon:${{needs.tag.outputs.build-tag}} latest
crane tag neondatabase/compute-tools:${{needs.tag.outputs.build-tag}} latest
crane tag neondatabase/compute-node:${{needs.tag.outputs.build-tag}} latest
crane tag neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}} latest
crane tag neondatabase/compute-node-v15:${{needs.tag.outputs.build-tag}} latest
calculate-deploy-targets:
runs-on: [ self-hosted, Linux, k8s-runner ]
if: |
(github.ref_name == 'main' || github.ref_name == 'release') &&
github.event_name != 'workflow_dispatch'
outputs:
matrix-include: ${{ steps.set-matrix.outputs.include }}
steps:
- id: set-matrix
run: |
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
STAGING='{"env_name": "staging", "proxy_job": "neon-proxy", "proxy_config": "staging.proxy", "kubeconfig_secret": "STAGING_KUBECONFIG_DATA", "console_api_key_secret": "NEON_STAGING_API_KEY"}'
NEON_STRESS='{"env_name": "neon-stress", "proxy_job": "neon-stress-proxy", "proxy_config": "neon-stress.proxy", "kubeconfig_secret": "NEON_STRESS_KUBECONFIG_DATA", "console_api_key_secret": "NEON_CAPTEST_API_KEY"}'
echo "include=[$STAGING, $NEON_STRESS]" >> $GITHUB_OUTPUT
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
PRODUCTION='{"env_name": "production", "proxy_job": "neon-proxy", "proxy_config": "production.proxy", "kubeconfig_secret": "PRODUCTION_KUBECONFIG_DATA", "console_api_key_secret": "NEON_PRODUCTION_API_KEY"}'
echo "include=[$PRODUCTION]" >> $GITHUB_OUTPUT
else
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
exit 1
fi
deploy:
runs-on: [ self-hosted, Linux, k8s-runner ]
#container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
# We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
# If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
if: |
(github.ref_name == 'main' || github.ref_name == 'release') &&
github.event_name != 'workflow_dispatch'
defaults:
run:
shell: bash
strategy:
matrix:
include: ${{fromJSON(needs.calculate-deploy-targets.outputs.matrix-include)}}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
- name: Setup python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Setup ansible
run: |
export PATH="/root/.local/bin:$PATH"
pip install --progress-bar off --user ansible boto3 toml
- name: Redeploy
run: |
export DOCKER_TAG=${{needs.tag.outputs.build-tag}}
cd "$(pwd)/.github/ansible"
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
./get_binaries.sh
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
RELEASE=true ./get_binaries.sh
else
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
exit 1
fi
eval $(ssh-agent)
echo "${{ secrets.TELEPORT_SSH_KEY }}" | tr -d '\n'| base64 --decode >ssh-key
echo "${{ secrets.TELEPORT_SSH_CERT }}" | tr -d '\n'| base64 --decode >ssh-key-cert.pub
chmod 0600 ssh-key
ssh-add ssh-key
rm -f ssh-key ssh-key-cert.pub
ansible-galaxy collection install sivel.toiletwater
ansible-playbook deploy.yaml -i ${{ matrix.env_name }}.hosts.yaml -e CONSOLE_API_TOKEN=${{ secrets[matrix.console_api_key_secret] }}
rm -f neon_install.tar.gz .neon_current_version
deploy-new:
runs-on: dev
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
# We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
# If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
if: |
(github.ref_name == 'main') &&
github.event_name != 'workflow_dispatch'
defaults:
run:
shell: bash
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
- name: Redeploy
run: |
export DOCKER_TAG=${{needs.tag.outputs.build-tag}}
cd "$(pwd)/.github/ansible"
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
./get_binaries.sh
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
RELEASE=true ./get_binaries.sh
else
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
exit 1
fi
ansible-galaxy collection install sivel.toiletwater
ansible-playbook deploy.yaml -i staging.us-east-2.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_STAGING_API_KEY}}
rm -f neon_install.tar.gz .neon_current_version
deploy-proxy:
runs-on: dev
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
if: |
(github.ref_name == 'main' || github.ref_name == 'release') &&
github.event_name != 'workflow_dispatch'
defaults:
run:
shell: bash
strategy:
matrix:
include: ${{fromJSON(needs.calculate-deploy-targets.outputs.matrix-include)}}
env:
KUBECONFIG: .kubeconfig
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
- name: Add curl
run: apt update && apt install curl -y
- name: Store kubeconfig file
run: |
echo "${{ secrets[matrix.kubeconfig_secret] }}" | base64 --decode > ${KUBECONFIG}
chmod 0600 ${KUBECONFIG}
- name: Setup helm v3
run: |
curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
helm repo add neondatabase https://neondatabase.github.io/helm-charts
- name: Re-deploy proxy
run: |
DOCKER_TAG=${{needs.tag.outputs.build-tag}}
helm upgrade ${{ matrix.proxy_job }} neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
deploy-proxy-new:
runs-on: dev
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
if: |
(github.ref_name == 'main') &&
github.event_name != 'workflow_dispatch'
defaults:
run:
shell: bash
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
- name: Configure environment
run: |
helm repo add neondatabase https://neondatabase.github.io/helm-charts
aws --region us-east-2 eks update-kubeconfig --name dev-us-east-2-beta --role-arn arn:aws:iam::369495373322:role/github-runner
- name: Re-deploy proxy
run: |
DOCKER_TAG=${{needs.tag.outputs.build-tag}}
helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
promote-compatibility-test-snapshot:
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
needs: [ deploy, deploy-proxy ]
if: github.ref_name == 'release' && github.event_name != 'workflow_dispatch'
steps:
- name: Promote compatibility snapshot for the release
shell: bash -euxo pipefail {0}
env:
BUCKET: neon-github-public-dev
PREFIX: artifacts/latest
run: |
for build_type in debug release; do
OLD_FILENAME=compatibility-snapshot-${build_type}-pg14-${GITHUB_RUN_ID}.tar.zst
NEW_FILENAME=compatibility-snapshot-${build_type}-pg14.tar.zst
time aws s3 mv --only-show-errors s3://${BUCKET}/${PREFIX}/${OLD_FILENAME} s3://${BUCKET}/${PREFIX}/${NEW_FILENAME}
done

View File

@@ -1,166 +0,0 @@
name: Check code style and build
on:
push:
branches:
- main
pull_request:
defaults:
run:
shell: bash -euxo pipefail {0}
concurrency:
# Allow only one workflow per any non-`main` branch.
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
cancel-in-progress: true
env:
RUST_BACKTRACE: 1
COPT: '-Werror'
jobs:
check-codestyle-rust:
strategy:
fail-fast: false
matrix:
# XXX: both OSes have rustup
# * https://github.com/actions/runner-images/blob/main/images/macos/macos-12-Readme.md#rust-tools
# * https://github.com/actions/runner-images/blob/main/images/linux/Ubuntu2204-Readme.md#rust-tools
# this is all we need to install our toolchain later via rust-toolchain.toml
# so don't install any toolchain explicitly.
os: [ubuntu-latest, macos-latest]
timeout-minutes: 90
name: check codestyle rust and postgres
runs-on: ${{ matrix.os }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 2
- name: Check formatting
run: cargo fmt --all -- --check
- name: Install Ubuntu postgres dependencies
if: matrix.os == 'ubuntu-latest'
run: |
sudo apt update
sudo apt install build-essential libreadline-dev zlib1g-dev flex bison libseccomp-dev libssl-dev
- name: Install macOS postgres dependencies
if: matrix.os == 'macos-latest'
run: brew install flex bison openssl
- name: Set pg 14 revision for caching
id: pg_v14_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
shell: bash -euxo pipefail {0}
- name: Set pg 15 revision for caching
id: pg_v15_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
shell: bash -euxo pipefail {0}
- name: Cache postgres v14 build
id: cache_pg_14
uses: actions/cache@v3
with:
path: pg_install/v14
key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Cache postgres v15 build
id: cache_pg_15
uses: actions/cache@v3
with:
path: pg_install/v15
key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Set extra env for macOS
if: matrix.os == 'macos-latest'
run: |
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
- name: Build postgres v14
if: steps.cache_pg_14.outputs.cache-hit != 'true'
run: make postgres-v14
shell: bash -euxo pipefail {0}
- name: Build postgres v15
if: steps.cache_pg_15.outputs.cache-hit != 'true'
run: make postgres-v15
shell: bash -euxo pipefail {0}
- name: Build neon extensions
run: make neon-pg-ext
- name: Cache cargo deps
id: cache_cargo
uses: actions/cache@v3
with:
path: |
~/.cargo/registry
!~/.cargo/registry/src
~/.cargo/git
target
key: v5-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust
- name: Run cargo clippy
run: ./run_clippy.sh
- name: Ensure all project builds
run: cargo build --locked --all --all-targets
check-rust-dependencies:
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: false
fetch-depth: 1
# https://github.com/facebookincubator/cargo-guppy/tree/bec4e0eb29dcd1faac70b1b5360267fc02bf830e/tools/cargo-hakari#2-keep-the-workspace-hack-up-to-date-in-ci
- name: Check every project module is covered by Hakari
run: |
cargo hakari generate --diff # workspace-hack Cargo.toml is up-to-date
cargo hakari manage-deps --dry-run # all workspace crates depend on workspace-hack
shell: bash -euxo pipefail {0}
check-codestyle-python:
runs-on: [ self-hosted, Linux, k8s-runner ]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: false
fetch-depth: 1
- name: Cache poetry deps
id: cache_poetry
uses: actions/cache@v3
with:
path: ~/.cache/pypoetry/virtualenvs
key: v1-codestyle-python-deps-${{ hashFiles('poetry.lock') }}
- name: Install Python deps
run: ./scripts/pysync
- name: Run isort to ensure code format
run: poetry run isort --diff --check .
- name: Run black to ensure code format
run: poetry run black --diff --check .
- name: Run flake8 to ensure code format
run: poetry run flake8 .
- name: Run mypy to check types
run: poetry run mypy .

45
.github/workflows/notifications.yml vendored Normal file
View File

@@ -0,0 +1,45 @@
name: Send Notifications
on:
push:
branches: [ main ]
jobs:
send-notifications:
timeout-minutes: 30
name: send commit notifications
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
with:
submodules: true
fetch-depth: 2
- name: Form variables for notification message
id: git_info_grab
run: |
git_stat=$(git show --stat=50)
git_stat="${git_stat//'%'/'%25'}"
git_stat="${git_stat//$'\n'/'%0A'}"
git_stat="${git_stat//$'\r'/'%0D'}"
git_stat="${git_stat// /}" # space -> 'Space En', as github tends to eat ordinary spaces
echo "::set-output name=git_stat::$git_stat"
echo "::set-output name=sha_short::$(git rev-parse --short HEAD)"
echo "##[set-output name=git_branch;]$(echo ${GITHUB_REF#refs/heads/})"
- name: Send notification
uses: appleboy/telegram-action@master
with:
to: ${{ secrets.TELEGRAM_TO }}
token: ${{ secrets.TELEGRAM_TOKEN }}
format: markdown
args: |
*@${{ github.actor }} pushed to* [${{ github.repository }}:${{steps.git_info_grab.outputs.git_branch}}](github.com/${{ github.repository }}/commit/${{steps.git_info_grab.outputs.sha_short }})
```
${{ steps.git_info_grab.outputs.git_stat }}
```

View File

@@ -1,99 +0,0 @@
name: Test Postgres client libraries
on:
schedule:
# * is a special character in YAML so you have to quote this string
# ┌───────────── minute (0 - 59)
# │ ┌───────────── hour (0 - 23)
# │ │ ┌───────────── day of the month (1 - 31)
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
- cron: '23 02 * * *' # run once a day, timezone is utc
workflow_dispatch:
concurrency:
# Allow only one workflow per any non-`main` branch.
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
cancel-in-progress: true
jobs:
test-postgres-client-libs:
# TODO: switch to gen2 runner, requires docker
runs-on: [ ubuntu-latest ]
env:
TEST_OUTPUT: /tmp/test_output
steps:
- name: Checkout
uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Install Poetry
uses: snok/install-poetry@v1
- name: Cache poetry deps
id: cache_poetry
uses: actions/cache@v3
with:
path: ~/.cache/pypoetry/virtualenvs
key: v1-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
- name: Install Python deps
shell: bash -euxo pipefail {0}
run: ./scripts/pysync
- name: Create Neon Project
id: create-neon-project
uses: ./.github/actions/neon-project-create
with:
environment: staging
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Run pytest
env:
REMOTE_ENV: 1
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
shell: bash -euxo pipefail {0}
run: |
# Test framework expects we have psql binary;
# but since we don't really need it in this test, let's mock it
mkdir -p "$POSTGRES_DISTRIB_DIR/v14/bin" && touch "$POSTGRES_DISTRIB_DIR/v14/bin/psql";
./scripts/pytest \
--junitxml=$TEST_OUTPUT/junit.xml \
--tb=short \
--verbose \
-m "remote_cluster" \
-rA "test_runner/pg_clients"
- name: Delete Neon Project
if: ${{ always() }}
uses: ./.github/actions/neon-project-delete
with:
environment: staging
project_id: ${{ steps.create-neon-project.outputs.project_id }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
# We use GitHub's action upload-artifact because `ubuntu-latest` doesn't have configured AWS CLI.
# It will be fixed after switching to gen2 runner
- name: Upload python test logs
if: always()
uses: actions/upload-artifact@v3
with:
retention-days: 7
name: python-test-pg_clients-${{ runner.os }}-stage-logs
path: ${{ env.TEST_OUTPUT }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: "Testing Postgres clients: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

73
.github/workflows/testing.yml vendored Normal file
View File

@@ -0,0 +1,73 @@
name: Build and Test
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
regression-check:
strategy:
matrix:
# If we want to duplicate this job for different
# Rust toolchains (e.g. nightly or 1.37.0), add them here.
rust_toolchain: [stable]
os: [ubuntu-latest]
timeout-minutes: 30
name: run regression test suite
runs-on: ${{ matrix.os }}
steps:
- name: Checkout
uses: actions/checkout@v2
with:
submodules: true
fetch-depth: 2
- name: install rust toolchain ${{ matrix.rust_toolchain }}
uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: ${{ matrix.rust_toolchain }}
override: true
- name: Install postgres dependencies
run: |
sudo apt update
sudo apt install build-essential libreadline-dev zlib1g-dev flex bison libseccomp-dev
- name: Set pg revision for caching
id: pg_ver
run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres)
- name: Cache postgres build
id: cache_pg
uses: actions/cache@v2
with:
path: |
tmp_install/
key: ${{ runner.os }}-pg-${{ steps.pg_ver.outputs.pg_rev }}
- name: Build postgres
if: steps.cache_pg.outputs.cache-hit != 'true'
run: |
make postgres
- name: Cache cargo deps
id: cache_cargo
uses: actions/cache@v2
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
- name: Run cargo build
run: |
cargo build --workspace --bins --examples --tests
- name: Run cargo test
run: |
cargo test -- --nocapture --test-threads=1

17
.gitignore vendored
View File

@@ -1,20 +1,9 @@
/pg_install
/target
/tmp_check
/tmp_install
/tmp_check_cli
__pycache__/
test_output/
.vscode
.idea
/.neon
/integration_tests/.neon
# Coverage
*.profraw
*.profdata
*.key
*.crt
*.o
*.so
*.Po
/.zenith
/integration_tests/.zenith

10
.gitmodules vendored
View File

@@ -1,8 +1,4 @@
[submodule "vendor/postgres-v14"]
path = vendor/postgres-v14
url = https://github.com/neondatabase/postgres.git
[submodule "vendor/postgres"]
path = vendor/postgres
url = https://github.com/zenithdb/postgres
branch = main
[submodule "vendor/postgres-v15"]
path = vendor/postgres-v15
url = https://github.com/neondatabase/postgres.git
branch = REL_15_STABLE_neon

View File

@@ -11,15 +11,17 @@ than it was before.
## Submitting changes
1. Get at least one +1 on your PR before you push.
1. Make a PR for every change.
Even seemingly trivial patches can break things in surprising ways.
Use of common sense is OK. If you're only fixing a typo in a comment,
it's probably fine to just push it. But if in doubt, open a PR.
2. Get at least one +1 on your PR before you push.
For simple patches, it will only take a minute for someone to review
it.
2. Don't force push small changes after making the PR ready for review.
Doing so will force readers to re-read your entire PR, which will delay
the review process.
3. Always keep the CI green.
Do not push, if the CI failed on your PR. Even if you think it's not

20
COPYRIGHT Normal file
View File

@@ -0,0 +1,20 @@
This software is licensed under the Apache 2.0 License:
----------------------------------------------------------------------------
Copyright 2021 Zenith Labs, Inc
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
----------------------------------------------------------------------------
The PostgreSQL submodule in vendor/postgres is licensed under the
PostgreSQL license. See vendor/postgres/COPYRIGHT.

3402
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,84 +1,17 @@
# 'named-profiles' feature was stabilized in cargo 1.57. This line makes the
# build work with older cargo versions.
#
# We have this because as of this writing, the latest cargo Debian package
# that's available is 1.56. (Confusingly, the Debian package version number
# is 0.57, whereas 'cargo --version' says 1.56.)
#
# See https://tracker.debian.org/pkg/cargo for the current status of the
# package. When that gets updated, we can remove this.
cargo-features = ["named-profiles"]
[workspace]
members = [
"compute_tools",
"control_plane",
"pageserver",
"postgres_ffi",
"proxy",
"safekeeper",
"walkeeper",
"workspace_hack",
"libs/*",
"zenith",
"zenith_metrics",
"zenith_utils",
]
[profile.release]
# This is useful for profiling and, to some extent, debug.
# Besides, debug info should not affect the performance.
debug = true
[profile.release-line-debug]
inherits = "release"
debug = 1 # true = 2 = all symbols, 1 = line only
[profile.release-line-debug-lto]
inherits = "release"
debug = 1 # true = 2 = all symbols, 1 = line only
lto = true
[profile.release-line-debug-size]
inherits = "release"
debug = 1 # true = 2 = all symbols, 1 = line only
opt-level = "s"
[profile.release-line-debug-zize]
inherits = "release"
debug = 1 # true = 2 = all symbols, 1 = line only
opt-level = "z"
[profile.release-line-debug-size-lto]
inherits = "release"
debug = 1 # true = 2 = all symbols, 1 = line only
opt-level = "s"
lto = true
[profile.release-line-debug-zize-lto]
inherits = "release"
debug = 1 # true = 2 = all symbols, 1 = line only
opt-level = "z"
lto = true
[profile.release-no-debug]
inherits = "release"
debug = false # true = 2 = all symbols, 1 = line only
[profile.release-no-debug-size]
inherits = "release"
debug = false # true = 2 = all symbols, 1 = line only
opt-level = "s"
[profile.release-no-debug-zize]
inherits = "release"
debug = false # true = 2 = all symbols, 1 = line only
opt-level = "z"
[profile.release-no-debug-size-lto]
inherits = "release"
debug = false # true = 2 = all symbols, 1 = line only
opt-level = "s"
lto = true
[profile.release-no-debug-zize-lto]
inherits = "release"
debug = false # true = 2 = all symbols, 1 = line only
opt-level = "z"
lto = true
# This is only needed for proxy's tests.
# TODO: we should probably fork `tokio-postgres-rustls` instead.
[patch.crates-io]
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }

View File

@@ -1,89 +1,78 @@
### Creates a storage Docker image with postgres, pageserver, safekeeper and proxy binaries.
### The image itself is mainly used as a container for the binaries and for starting e2e tests with custom parameters.
### By default, the binaries inside the image have some mock parameters and can start, but are not intended to be used
### inside this image in the real deployments.
ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
ARG IMAGE=rust
ARG TAG=pinned
# Build Postgres
FROM $REPOSITORY/$IMAGE:$TAG AS pg-build
WORKDIR /home/nonroot
COPY --chown=nonroot vendor/postgres-v14 vendor/postgres-v14
COPY --chown=nonroot vendor/postgres-v15 vendor/postgres-v15
COPY --chown=nonroot pgxn pgxn
COPY --chown=nonroot Makefile Makefile
COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh
ENV BUILD_TYPE release
RUN set -e \
&& mold -run make -j $(nproc) -s neon-pg-ext \
&& rm -rf pg_install/build \
&& tar -C pg_install -czf /home/nonroot/postgres_install.tar.gz .
# Build neon binaries
FROM $REPOSITORY/$IMAGE:$TAG AS build
WORKDIR /home/nonroot
ARG GIT_VERSION=local
# Enable https://github.com/paritytech/cachepot to cache Rust crates' compilation results in Docker builds.
# Set up cachepot to use an AWS S3 bucket for cache results, to reuse it between `docker build` invocations.
# cachepot falls back to local filesystem if S3 is misconfigured, not failing the build
ARG RUSTC_WRAPPER=cachepot
ENV AWS_REGION=eu-central-1
ENV CACHEPOT_S3_KEY_PREFIX=cachepot
ARG CACHEPOT_BUCKET=neon-github-dev
#ARG AWS_ACCESS_KEY_ID
#ARG AWS_SECRET_ACCESS_KEY
COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server
COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server
COPY . .
# Show build caching stats to check if it was used in the end.
# Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
RUN set -e \
&& mold -run cargo build --bin pageserver --bin pageserver_binutils --bin draw_timeline_dir --bin safekeeper --bin proxy --locked --release \
&& cachepot -s
# Build final image
#
FROM debian:bullseye-slim
# Docker image for console integration testing.
#
#
# Build Postgres separately --- this layer will be rebuilt only if one of
# mentioned paths will get any changes.
#
FROM zenithdb/build:buster AS pg-build
WORKDIR /zenith
COPY ./vendor/postgres vendor/postgres
COPY ./Makefile Makefile
RUN make -j $(getconf _NPROCESSORS_ONLN) -s postgres
#
# Calculate cargo dependencies.
# This will always run, but only generate recipe.json with list of dependencies without
# installing them.
#
FROM zenithdb/build:buster AS cargo-deps-inspect
WORKDIR /zenith
COPY . .
RUN cargo chef prepare --recipe-path /zenith/recipe.json
#
# Build cargo dependencies.
# This temp cantainner should be rebuilt only if recipe.json was changed.
#
FROM zenithdb/build:buster AS deps-build
WORKDIR /zenith
COPY --from=pg-build /zenith/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
COPY --from=cargo-deps-inspect /usr/local/cargo/bin/cargo-chef /usr/local/cargo/bin/
COPY --from=cargo-deps-inspect /zenith/recipe.json recipe.json
RUN ROCKSDB_LIB_DIR=/usr/lib/ cargo chef cook --release --recipe-path recipe.json
#
# Build zenith binaries
#
FROM zenithdb/build:buster AS build
WORKDIR /zenith
COPY . .
# Copy cached dependencies
COPY --from=pg-build /zenith/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
COPY --from=deps-build /zenith/target target
COPY --from=deps-build /usr/local/cargo/ /usr/local/cargo/
RUN cargo build --release
#
# Copy binaries to resulting image.
#
FROM debian:buster-slim
WORKDIR /data
RUN set -e \
&& apt update \
&& apt install -y \
libreadline-dev \
libseccomp-dev \
openssl \
ca-certificates \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
&& useradd -d /data neon \
&& chown -R neon:neon /data
RUN apt-get update && apt-get -yq install librocksdb-dev libseccomp-dev openssl && \
mkdir zenith_install
COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver_binutils /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/draw_timeline_dir /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/safekeeper /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy /usr/local/bin
COPY --from=build /zenith/target/release/pageserver /usr/local/bin
COPY --from=build /zenith/target/release/wal_acceptor /usr/local/bin
COPY --from=build /zenith/target/release/proxy /usr/local/bin
COPY --from=pg-build /zenith/tmp_install postgres_install
COPY docker-entrypoint.sh /docker-entrypoint.sh
COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/
COPY --from=pg-build /home/nonroot/pg_install/v15 /usr/local/v15/
COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/
# Remove build artifacts (~ 500 MB)
RUN rm -rf postgres_install/build && \
# 'Install' Postgres binaries locally
cp -r postgres_install/* /usr/local/ && \
# Prepare an archive of Postgres binaries (should be around 11 MB)
# and keep it inside container for an ease of deploy pipeline.
cd postgres_install && tar -czf /data/postgres_install.tar.gz . && cd .. && \
rm -rf postgres_install
# By default, pageserver uses `.neon/` working directory in WORKDIR, so create one and fill it with the dummy config.
# Now, when `docker run ... pageserver` is run, it can start without errors, yet will have some default dummy values.
RUN mkdir -p /data/.neon/ && chown -R neon:neon /data/.neon/ \
&& /usr/local/bin/pageserver -D /data/.neon/ --init \
-c "id=1234" \
-c "broker_endpoints=['http://etcd:2379']" \
-c "pg_distrib_dir='/usr/local/'" \
-c "listen_pg_addr='0.0.0.0:6400'" \
-c "listen_http_addr='0.0.0.0:9898'"
RUN useradd -d /data zenith && chown -R zenith:zenith /data
VOLUME ["/data"]
USER neon
USER zenith
EXPOSE 6400
EXPOSE 9898
ENTRYPOINT ["/docker-entrypoint.sh"]
CMD ["pageserver"]

95
Dockerfile.alpine Normal file
View File

@@ -0,0 +1,95 @@
#
# Docker image for console integration testing.
#
# We may also reuse it in CI to unify installation process and as a general binaries building
# tool for production servers.
#
# Dynamic linking is used for librocksdb and libstdc++ bacause librocksdb-sys calls
# bindgen with "dynamic" feature flag. This also prevents usage of dockerhub alpine-rust
# images which are statically linked and have guards against any dlopen. I would rather
# prefer all static binaries so we may change the way librocksdb-sys builds or wait until
# we will have our own storage and drop rockdb dependency.
#
# Cargo-chef is used to separate dependencies building from main binaries building. This
# way `docker build` will download and install dependencies only of there are changes to
# out Cargo.toml files.
#
#
# build postgres separately -- this layer will be rebuilt only if one of
# mentioned paths will get any changes
#
FROM alpine:3.13 as pg-build
RUN apk add --update clang llvm compiler-rt compiler-rt-static lld musl-dev binutils \
make bison flex readline-dev zlib-dev perl linux-headers libseccomp-dev
WORKDIR zenith
COPY ./vendor/postgres vendor/postgres
COPY ./Makefile Makefile
# Build using clang and lld
RUN CC='clang' LD='lld' CFLAGS='-fuse-ld=lld --rtlib=compiler-rt' make postgres -j4
#
# Calculate cargo dependencies.
# This will always run, but only generate recipe.json with list of dependencies without
# installing them.
#
FROM alpine:20210212 as cargo-deps-inspect
RUN apk add --update rust cargo
RUN cargo install cargo-chef
WORKDIR zenith
COPY . .
RUN cargo chef prepare --recipe-path recipe.json
#
# Build cargo dependencies.
# This temp cantainner would be build only if recipe.json was changed.
#
FROM alpine:20210212 as deps-build
RUN apk add --update rust cargo openssl-dev clang build-base
# rust-rocksdb can be built against system-wide rocksdb -- that saves about
# 10 minutes during build. Rocksdb apk package is in testing now, but use it
# anyway. In case of any troubles we can download and build rocksdb here manually
# (to cache it as a docker layer).
RUN apk --no-cache --update --repository https://dl-cdn.alpinelinux.org/alpine/edge/testing add rocksdb-dev
WORKDIR zenith
COPY --from=pg-build /zenith/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
COPY --from=cargo-deps-inspect /root/.cargo/bin/cargo-chef /root/.cargo/bin/
COPY --from=cargo-deps-inspect /zenith/recipe.json recipe.json
RUN ROCKSDB_LIB_DIR=/usr/lib/ cargo chef cook --release --recipe-path recipe.json
#
# Build zenith binaries
#
FROM alpine:20210212 as build
RUN apk add --update rust cargo openssl-dev clang build-base
RUN apk --no-cache --update --repository https://dl-cdn.alpinelinux.org/alpine/edge/testing add rocksdb-dev
WORKDIR zenith
COPY . .
# Copy cached dependencies
COPY --from=pg-build /zenith/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
COPY --from=deps-build /zenith/target target
COPY --from=deps-build /root/.cargo /root/.cargo
RUN cargo build --release
#
# Copy binaries to resulting image.
# build-base hare to provide libstdc++ (it will also bring gcc, but leave it this way until we figure
# out how to statically link rocksdb or avoid it at all).
#
FROM alpine:3.13
RUN apk add --update openssl build-base libseccomp-dev
RUN apk --no-cache --update --repository https://dl-cdn.alpinelinux.org/alpine/edge/testing add rocksdb
COPY --from=build /zenith/target/release/pageserver /usr/local/bin
COPY --from=build /zenith/target/release/wal_acceptor /usr/local/bin
COPY --from=build /zenith/target/release/proxy /usr/local/bin
COPY --from=pg-build /zenith/tmp_install /usr/local
COPY docker-entrypoint.sh /docker-entrypoint.sh
RUN addgroup zenith && adduser -h /data -D -G zenith zenith
VOLUME ["/data"]
WORKDIR /data
USER zenith
EXPOSE 6400
ENTRYPOINT ["/docker-entrypoint.sh"]
CMD ["pageserver"]

15
Dockerfile.build Normal file
View File

@@ -0,0 +1,15 @@
#
# Image with all the required dependencies to build https://github.com/zenithdb/zenith
# and Postgres from https://github.com/zenithdb/postgres
# Also includes some rust development and build tools.
#
FROM rust:slim-buster
WORKDIR /zenith
# Install postgres and zenith build dependencies
# clang is for rocksdb
RUN apt-get update && apt-get -yq install automake libtool build-essential bison flex libreadline-dev zlib1g-dev libxml2-dev \
libseccomp-dev pkg-config libssl-dev librocksdb-dev clang
# Install rust tools
RUN rustup component add clippy && cargo install cargo-chef cargo-audit

View File

@@ -1,218 +0,0 @@
#
# This file is identical to the Dockerfile.compute-node-v15 file
# except for the version of Postgres that is built.
#
ARG TAG=pinned
#########################################################################################
#
# Layer "build-deps"
#
#########################################################################################
FROM debian:bullseye-slim AS build-deps
RUN apt update && \
apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \
zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config
#########################################################################################
#
# Layer "pg-build"
# Build Postgres from the neon postgres repository.
#
#########################################################################################
FROM build-deps AS pg-build
COPY vendor/postgres-v14 postgres
RUN cd postgres && \
./configure CFLAGS='-O2 -g3' --enable-debug --with-uuid=ossp && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \
# Install headers
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install
#########################################################################################
#
# Layer "postgis-build"
# Build PostGIS from the upstream PostGIS mirror.
#
#########################################################################################
FROM build-deps AS postgis-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt update && \
apt install -y gdal-bin libgdal-dev libprotobuf-c-dev protobuf-c-compiler xsltproc
RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.1.tar.gz && \
tar xvzf postgis-3.3.1.tar.gz && \
cd postgis-3.3.1 && \
./autogen.sh && \
export PATH="/usr/local/pgsql/bin:$PATH" && \
./configure && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
cd extensions/postgis && \
make clean && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_raster.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_tiger_geocoder.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_topology.control
#########################################################################################
#
# Layer "plv8-build"
# Build plv8
#
#########################################################################################
FROM build-deps AS plv8-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt update && \
apt install -y ninja-build python3-dev libc++-dev libc++abi-dev libncurses5 binutils
# https://github.com/plv8/plv8/issues/475:
# v8 uses gold for linking and sets `--thread-count=4` which breaks
# gold version <= 1.35 (https://sourceware.org/bugzilla/show_bug.cgi?id=23607)
# Install newer gold version manually as debian-testing binutils version updates
# libc version, which in turn breaks other extension built against non-testing libc.
RUN wget https://ftp.gnu.org/gnu/binutils/binutils-2.38.tar.gz && \
tar xvzf binutils-2.38.tar.gz && \
cd binutils-2.38 && \
cd libiberty && ./configure && make -j $(getconf _NPROCESSORS_ONLN) && \
cd ../bfd && ./configure && make bfdver.h && \
cd ../gold && ./configure && make -j $(getconf _NPROCESSORS_ONLN) && make install && \
cp /usr/local/bin/ld.gold /usr/bin/gold
# Sed is used to patch for https://github.com/plv8/plv8/issues/503
RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.4.tar.gz && \
tar xvzf v3.1.4.tar.gz && \
cd plv8-3.1.4 && \
export PATH="/usr/local/pgsql/bin:$PATH" && \
sed -i 's/MemoryContextAlloc(/MemoryContextAllocZero(/' plv8.cc && \
make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) install && \
rm -rf /plv8-* && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control
#########################################################################################
#
# Layer "h3-pg-build"
# Build h3_pg
#
#########################################################################################
FROM build-deps AS h3-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
# packaged cmake is too old
RUN wget https://github.com/Kitware/CMake/releases/download/v3.24.2/cmake-3.24.2-linux-x86_64.sh \
-q -O /tmp/cmake-install.sh \
&& chmod u+x /tmp/cmake-install.sh \
&& /tmp/cmake-install.sh --skip-license --prefix=/usr/local/ \
&& rm /tmp/cmake-install.sh
RUN wget https://github.com/uber/h3/archive/refs/tags/v4.0.1.tar.gz -O h3.tgz && \
tar xvzf h3.tgz && \
cd h3-4.0.1 && \
mkdir build && \
cd build && \
cmake .. -DCMAKE_BUILD_TYPE=Release && \
make -j $(getconf _NPROCESSORS_ONLN) && \
DESTDIR=/h3 make install && \
cp -R /h3/usr / && \
rm -rf build
RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.0.1.tar.gz -O h3-pg.tgz && \
tar xvzf h3-pg.tgz && \
cd h3-pg-4.0.1 && \
export PATH="/usr/local/pgsql/bin:$PATH" && \
make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3_postgis.control
#########################################################################################
#
# Layer "neon-pg-ext-build"
# compile neon extensions
#
#########################################################################################
FROM build-deps AS neon-pg-ext-build
COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=h3-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=h3-pg-build /h3/usr /
COPY pgxn/ pgxn/
RUN make -j $(getconf _NPROCESSORS_ONLN) \
PG_CONFIG=/usr/local/pgsql/bin/pg_config \
-C pgxn/neon \
-s install
#########################################################################################
#
# Compile and run the Neon-specific `compute_ctl` binary
#
#########################################################################################
FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:$TAG AS compute-tools
USER nonroot
# Copy entire project to get Cargo.* files with proper dependencies for the whole project
COPY --chown=nonroot . .
RUN cd compute_tools && cargo build --locked --profile release-line-debug-size-lto
#########################################################################################
#
# Clean up postgres folder before inclusion
#
#########################################################################################
FROM neon-pg-ext-build AS postgres-cleanup-layer
COPY --from=neon-pg-ext-build /usr/local/pgsql /usr/local/pgsql
# Remove binaries from /bin/ that we won't use (or would manually copy & install otherwise)
RUN cd /usr/local/pgsql/bin && rm ecpg raster2pgsql shp2pgsql pgtopo_export pgtopo_import pgsql2shp
# Remove headers that we won't need anymore - we've completed installation of all extensions
RUN rm -r /usr/local/pgsql/include
# Remove now-useless PGXS src infrastructure
RUN rm -r /usr/local/pgsql/lib/pgxs/src
# Remove static postgresql libraries - all compilation is finished, so we
# can now remove these files - they must be included in other binaries by now
# if they were to be used by other libraries.
RUN rm /usr/local/pgsql/lib/lib*.a
#########################################################################################
#
# Final layer
# Put it all together into the final image
#
#########################################################################################
FROM debian:bullseye-slim
# Add user postgres
RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
echo "postgres:test_console_pass" | chpasswd && \
mkdir /var/db/postgres/compute && mkdir /var/db/postgres/specs && \
chown -R postgres:postgres /var/db/postgres && \
chmod 0750 /var/db/postgres/compute && \
echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig
COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
# Install:
# libreadline8 for psql
# libossp-uuid16 for extension ossp-uuid
# libgeos, libgdal, libproj and libprotobuf-c1 for PostGIS
#
# Lastly, link compute_ctl into zenith_ctl while we're at it,
# so that we don't need to put this in another layer.
RUN apt update && \
apt install --no-install-recommends -y \
libreadline8 \
libossp-uuid16 \
libgeos-c1v5 \
libgdal28 \
libproj19 \
libprotobuf-c1 && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
ln /usr/local/bin/compute_ctl /usr/local/bin/zenith_ctl
USER postgres
ENTRYPOINT ["/usr/local/bin/compute_ctl"]

View File

@@ -1,218 +0,0 @@
#
# This file is identical to the Dockerfile.compute-node-v14 file
# except for the version of Postgres that is built.
#
ARG TAG=pinned
#########################################################################################
#
# Layer "build-deps"
#
#########################################################################################
FROM debian:bullseye-slim AS build-deps
RUN apt update && \
apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \
zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config
#########################################################################################
#
# Layer "pg-build"
# Build Postgres from the neon postgres repository.
#
#########################################################################################
FROM build-deps AS pg-build
COPY vendor/postgres-v15 postgres
RUN cd postgres && \
./configure CFLAGS='-O2 -g3' --enable-debug --with-uuid=ossp && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \
# Install headers
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install
#########################################################################################
#
# Layer "postgis-build"
# Build PostGIS from the upstream PostGIS mirror.
#
#########################################################################################
FROM build-deps AS postgis-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt update && \
apt install -y gdal-bin libgdal-dev libprotobuf-c-dev protobuf-c-compiler xsltproc
RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.1.tar.gz && \
tar xvzf postgis-3.3.1.tar.gz && \
cd postgis-3.3.1 && \
./autogen.sh && \
export PATH="/usr/local/pgsql/bin:$PATH" && \
./configure && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
cd extensions/postgis && \
make clean && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_raster.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_tiger_geocoder.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_topology.control
#########################################################################################
#
# Layer "plv8-build"
# Build plv8
#
#########################################################################################
FROM build-deps AS plv8-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt update && \
apt install -y ninja-build python3-dev libc++-dev libc++abi-dev libncurses5 binutils
# https://github.com/plv8/plv8/issues/475:
# v8 uses gold for linking and sets `--thread-count=4` which breaks
# gold version <= 1.35 (https://sourceware.org/bugzilla/show_bug.cgi?id=23607)
# Install newer gold version manually as debian-testing binutils version updates
# libc version, which in turn breaks other extension built against non-testing libc.
RUN wget https://ftp.gnu.org/gnu/binutils/binutils-2.38.tar.gz && \
tar xvzf binutils-2.38.tar.gz && \
cd binutils-2.38 && \
cd libiberty && ./configure && make -j $(getconf _NPROCESSORS_ONLN) && \
cd ../bfd && ./configure && make bfdver.h && \
cd ../gold && ./configure && make -j $(getconf _NPROCESSORS_ONLN) && make install && \
cp /usr/local/bin/ld.gold /usr/bin/gold
# Sed is used to patch for https://github.com/plv8/plv8/issues/503
RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.4.tar.gz && \
tar xvzf v3.1.4.tar.gz && \
cd plv8-3.1.4 && \
export PATH="/usr/local/pgsql/bin:$PATH" && \
sed -i 's/MemoryContextAlloc(/MemoryContextAllocZero(/' plv8.cc && \
make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) install && \
rm -rf /plv8-* && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control
#########################################################################################
#
# Layer "h3-pg-build"
# Build h3_pg
#
#########################################################################################
FROM build-deps AS h3-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
# packaged cmake is too old
RUN wget https://github.com/Kitware/CMake/releases/download/v3.24.2/cmake-3.24.2-linux-x86_64.sh \
-q -O /tmp/cmake-install.sh \
&& chmod u+x /tmp/cmake-install.sh \
&& /tmp/cmake-install.sh --skip-license --prefix=/usr/local/ \
&& rm /tmp/cmake-install.sh
RUN wget https://github.com/uber/h3/archive/refs/tags/v4.0.1.tar.gz -O h3.tgz && \
tar xvzf h3.tgz && \
cd h3-4.0.1 && \
mkdir build && \
cd build && \
cmake .. -DCMAKE_BUILD_TYPE=Release && \
make -j $(getconf _NPROCESSORS_ONLN) && \
DESTDIR=/h3 make install && \
cp -R /h3/usr / && \
rm -rf build
RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.0.1.tar.gz -O h3-pg.tgz && \
tar xvzf h3-pg.tgz && \
cd h3-pg-4.0.1 && \
export PATH="/usr/local/pgsql/bin:$PATH" && \
make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3_postgis.control
#########################################################################################
#
# Layer "neon-pg-ext-build"
# compile neon extensions
#
#########################################################################################
FROM build-deps AS neon-pg-ext-build
COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=h3-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=h3-pg-build /h3/usr /
COPY pgxn/ pgxn/
RUN make -j $(getconf _NPROCESSORS_ONLN) \
PG_CONFIG=/usr/local/pgsql/bin/pg_config \
-C pgxn/neon \
-s install
#########################################################################################
#
# Compile and run the Neon-specific `compute_ctl` binary
#
#########################################################################################
FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:$TAG AS compute-tools
USER nonroot
# Copy entire project to get Cargo.* files with proper dependencies for the whole project
COPY --chown=nonroot . .
RUN cd compute_tools && cargo build --locked --profile release-line-debug-size-lto
#########################################################################################
#
# Clean up postgres folder before inclusion
#
#########################################################################################
FROM neon-pg-ext-build AS postgres-cleanup-layer
COPY --from=neon-pg-ext-build /usr/local/pgsql /usr/local/pgsql
# Remove binaries from /bin/ that we won't use (or would manually copy & install otherwise)
RUN cd /usr/local/pgsql/bin && rm ecpg raster2pgsql shp2pgsql pgtopo_export pgtopo_import pgsql2shp
# Remove headers that we won't need anymore - we've completed installation of all extensions
RUN rm -r /usr/local/pgsql/include
# Remove now-useless PGXS src infrastructure
RUN rm -r /usr/local/pgsql/lib/pgxs/src
# Remove static postgresql libraries - all compilation is finished, so we
# can now remove these files - they must be included in other binaries by now
# if they were to be used by other libraries.
RUN rm /usr/local/pgsql/lib/lib*.a
#########################################################################################
#
# Final layer
# Put it all together into the final image
#
#########################################################################################
FROM debian:bullseye-slim
# Add user postgres
RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
echo "postgres:test_console_pass" | chpasswd && \
mkdir /var/db/postgres/compute && mkdir /var/db/postgres/specs && \
chown -R postgres:postgres /var/db/postgres && \
chmod 0750 /var/db/postgres/compute && \
echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig
COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
# Install:
# libreadline8 for psql
# libossp-uuid16 for extension ossp-uuid
# libgeos, libgdal, libproj and libprotobuf-c1 for PostGIS
#
# Lastly, link compute_ctl into zenith_ctl while we're at it,
# so that we don't need to put this in another layer.
RUN apt update && \
apt install --no-install-recommends -y \
libreadline8 \
libossp-uuid16 \
libgeos-c1v5 \
libgdal28 \
libproj19 \
libprotobuf-c1 && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
ln /usr/local/bin/compute_ctl /usr/local/bin/zenith_ctl
USER postgres
ENTRYPOINT ["/usr/local/bin/compute_ctl"]

View File

@@ -1,88 +0,0 @@
#
# Legacy version of the Dockerfile for the compute node.
# Used by e2e CI. Building Dockerfile.compute-node will take
# unreasonable ammount of time without v2 runners.
#
# TODO: remove once cloud repo CI is moved to v2 runners.
#
# Allow specifiyng different compute-tools tag and image repo, so we are
# able to use different images
ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
ARG IMAGE=compute-tools
ARG TAG=latest
#
# Image with pre-built tools
#
FROM $REPOSITORY/$IMAGE:$TAG AS compute-deps
# Only to get ready compute_ctl binary as deppendency
#
# Image with Postgres build deps
#
FROM debian:bullseye-slim AS build-deps
RUN apt-get update && apt-get -yq install automake libtool build-essential bison flex libreadline-dev zlib1g-dev libxml2-dev \
libcurl4-openssl-dev libossp-uuid-dev
#
# Image with built Postgres
#
FROM build-deps AS pg-build
# Add user postgres
RUN adduser postgres
RUN mkdir /pg && chown postgres:postgres /pg
# Copy source files
# version 14 is default for now
COPY ./vendor/postgres-v14 /pg/
COPY ./pgxn /pg/
# Build and install Postgres locally
RUN mkdir /pg/compute_build && cd /pg/compute_build && \
../configure CFLAGS='-O2 -g3' --prefix=$(pwd)/postgres_bin --enable-debug --with-uuid=ossp && \
# Install main binaries and contribs
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \
# Install headers
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install
# Install neon contrib
RUN make MAKELEVEL=0 PG_CONFIG=/pg/compute_build/postgres_bin/bin/pg_config -j $(getconf _NPROCESSORS_ONLN) -C /pg/neon install
USER postgres
WORKDIR /pg
#
# Final compute node image to be exported
#
FROM debian:bullseye-slim
# libreadline-dev is required to run psql
RUN apt-get update && apt-get -yq install libreadline-dev libossp-uuid-dev
# Add user postgres
RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
echo "postgres:test_console_pass" | chpasswd && \
mkdir /var/db/postgres/compute && mkdir /var/db/postgres/specs && \
chown -R postgres:postgres /var/db/postgres && \
chmod 0750 /var/db/postgres/compute
# Copy ready Postgres binaries
COPY --from=pg-build /pg/compute_build/postgres_bin /usr/local
# Copy binaries from compute-tools
COPY --from=compute-deps /usr/local/bin/compute_ctl /usr/local/bin/compute_ctl
# XXX: temporary symlink for compatibility with old control-plane
RUN ln -s /usr/local/bin/compute_ctl /usr/local/bin/zenith_ctl
# Add postgres shared objects to the search path
RUN echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig
USER postgres
ENTRYPOINT ["/usr/local/bin/compute_ctl"]

View File

@@ -1,29 +0,0 @@
# First transient image to build compute_tools binaries
# NB: keep in sync with rust image version in .github/workflows/build_and_test.yml
ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
ARG IMAGE=rust
ARG TAG=pinned
FROM $REPOSITORY/$IMAGE:$TAG AS rust-build
WORKDIR /home/nonroot
# Enable https://github.com/paritytech/cachepot to cache Rust crates' compilation results in Docker builds.
# Set up cachepot to use an AWS S3 bucket for cache results, to reuse it between `docker build` invocations.
# cachepot falls back to local filesystem if S3 is misconfigured, not failing the build.
ARG RUSTC_WRAPPER=cachepot
ENV AWS_REGION=eu-central-1
ENV CACHEPOT_S3_KEY_PREFIX=cachepot
ARG CACHEPOT_BUCKET=neon-github-dev
#ARG AWS_ACCESS_KEY_ID
#ARG AWS_SECRET_ACCESS_KEY
COPY . .
RUN set -e \
&& mold -run cargo build -p compute_tools --locked --release \
&& cachepot -s
# Final image that only has one binary
FROM debian:bullseye-slim
COPY --from=rust-build /home/nonroot/target/release/compute_ctl /usr/local/bin/compute_ctl

221
Makefile
View File

@@ -1,196 +1,75 @@
ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
# Where to install Postgres, default is ./pg_install, maybe useful for package managers
POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install/
#
# We differentiate between release / debug build types using the BUILD_TYPE
# environment variable.
#
BUILD_TYPE ?= debug
ifeq ($(BUILD_TYPE),release)
PG_CONFIGURE_OPTS = --enable-debug --with-openssl
PG_CFLAGS = -O2 -g3 $(CFLAGS)
# Unfortunately, `--profile=...` is a nightly feature
CARGO_BUILD_FLAGS += --release
else ifeq ($(BUILD_TYPE),debug)
PG_CONFIGURE_OPTS = --enable-debug --with-openssl --enable-cassert --enable-depend
PG_CFLAGS = -O0 -g3 $(CFLAGS)
else
$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
endif
# Seccomp BPF is only available for Linux
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux)
PG_CONFIGURE_OPTS += --with-libseccomp
SECCOMP = --with-libseccomp
else
SECCOMP =
endif
# macOS with brew-installed openssl requires explicit paths
# It can be configured with OPENSSL_PREFIX variable
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Darwin)
OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
endif
# Use -C option so that when PostgreSQL "make install" installs the
# headers, the mtime of the headers are not changed when there have
# been no changes to the files. Changing the mtime triggers an
# unnecessary rebuild of 'postgres_ffi'.
PG_CONFIGURE_OPTS += INSTALL='$(ROOT_PROJECT_DIR)/scripts/ninstall.sh -C'
# Choose whether we should be silent or verbose
CARGO_BUILD_FLAGS += --$(if $(filter s,$(MAKEFLAGS)),quiet,verbose)
# Fix for a corner case when make doesn't pass a jobserver
CARGO_BUILD_FLAGS += $(filter -j1,$(MAKEFLAGS))
# This option has a side effect of passing make jobserver to cargo.
# However, we shouldn't do this if `make -n` (--dry-run) has been asked.
CARGO_CMD_PREFIX += $(if $(filter n,$(MAKEFLAGS)),,+)
# Force cargo not to print progress bar
CARGO_CMD_PREFIX += CARGO_TERM_PROGRESS_WHEN=never CI=1
#
# Top level Makefile to build Neon and PostgreSQL
# Top level Makefile to build Zenith and PostgreSQL
#
.PHONY: all
all: neon postgres neon-pg-ext
all: zenith postgres
### Neon Rust bits
# We don't want to run 'cargo build' in parallel with the postgres build,
# because interleaving cargo build output with postgres build output looks
# confusing. Also, 'cargo build' is parallel on its own, so it would be too
# much parallelism. (Recursive invocation of postgres target still gets any
# '-j' flag from the command line, so 'make -j' is still useful.)
.NOTPARALLEL:
### Zenith Rust bits
#
# The 'postgres_ffi' depends on the Postgres headers.
.PHONY: neon
neon: postgres-v14-headers postgres-v15-headers
+@echo "Compiling Neon"
$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS)
.PHONY: zenith
zenith: postgres-headers
cargo build
### PostgreSQL parts
# The rules are duplicated for Postgres v14 and 15. We may want to refactor
# to avoid the duplication in the future, but it's tolerable for now.
#
$(POSTGRES_INSTALL_DIR)/build/v14/config.status:
+@echo "Configuring Postgres v14 build"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/v14
(cd $(POSTGRES_INSTALL_DIR)/build/v14 && \
$(ROOT_PROJECT_DIR)/vendor/postgres-v14/configure CFLAGS='$(PG_CFLAGS)' \
$(PG_CONFIGURE_OPTS) \
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/v14 > configure.log)
tmp_install/build/config.status:
+@echo "Configuring postgres build"
mkdir -p tmp_install/build
(cd tmp_install/build && \
../../vendor/postgres/configure CFLAGS='-O0 -g3 $(CFLAGS)' \
--enable-cassert \
--enable-debug \
--enable-depend \
$(SECCOMP) \
--prefix=$(abspath tmp_install) > configure.log)
$(POSTGRES_INSTALL_DIR)/build/v15/config.status:
+@echo "Configuring Postgres v15 build"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/v15
(cd $(POSTGRES_INSTALL_DIR)/build/v15 && \
$(ROOT_PROJECT_DIR)/vendor/postgres-v15/configure CFLAGS='$(PG_CFLAGS)' \
$(PG_CONFIGURE_OPTS) \
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/v15 > configure.log)
# nicer alias for running 'configure'
.PHONY: postgres-configure
postgres-configure: tmp_install/build/config.status
# nicer alias to run 'configure'
.PHONY: postgres-v14-configure
postgres-v14-configure: $(POSTGRES_INSTALL_DIR)/build/v14/config.status
# Install the PostgreSQL header files into tmp_install/include
.PHONY: postgres-headers
postgres-headers: postgres-configure
+@echo "Installing PostgreSQL headers"
$(MAKE) -C tmp_install/build/src/include MAKELEVEL=0 install
.PHONY: postgres-v15-configure
postgres-v15-configure: $(POSTGRES_INSTALL_DIR)/build/v15/config.status
# Install the PostgreSQL header files into $(POSTGRES_INSTALL_DIR)/<version>/include
.PHONY: postgres-v14-headers
postgres-v14-headers: postgres-v14-configure
+@echo "Installing PostgreSQL v14 headers"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/src/include MAKELEVEL=0 install
# Compile and install PostgreSQL and contrib/zenith
.PHONY: postgres
postgres: postgres-configure
+@echo "Compiling PostgreSQL"
$(MAKE) -C tmp_install/build MAKELEVEL=0 install
+@echo "Compiling contrib/zenith"
$(MAKE) -C tmp_install/build/contrib/zenith install
+@echo "Compiling contrib/zenith_test_utils"
$(MAKE) -C tmp_install/build/contrib/zenith_test_utils install
.PHONY: postgres-v15-headers
postgres-v15-headers: postgres-v15-configure
+@echo "Installing PostgreSQL v15 headers"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/src/include MAKELEVEL=0 install
# Compile and install PostgreSQL
.PHONY: postgres-v14
postgres-v14: postgres-v14-configure \
postgres-v14-headers # to prevent `make install` conflicts with neon's `postgres-headers`
+@echo "Compiling PostgreSQL v14"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14 MAKELEVEL=0 install
+@echo "Compiling libpq v14"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/src/interfaces/libpq install
+@echo "Compiling pg_buffercache v14"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pg_buffercache install
+@echo "Compiling pageinspect v14"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pageinspect install
.PHONY: postgres-v15
postgres-v15: postgres-v15-configure \
postgres-v15-headers # to prevent `make install` conflicts with neon's `postgres-headers`
+@echo "Compiling PostgreSQL v15"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15 MAKELEVEL=0 install
+@echo "Compiling libpq v15"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/src/interfaces/libpq install
+@echo "Compiling pg_buffercache v15"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pg_buffercache install
+@echo "Compiling pageinspect v15"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pageinspect install
# shorthand to build all Postgres versions
postgres: postgres-v14 postgres-v15
.PHONY: postgres-v14-clean
postgres-v14-clean:
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14 MAKELEVEL=0 clean
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pg_buffercache clean
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pageinspect clean
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/src/interfaces/libpq clean
.PHONY: postgres-v15-clean
postgres-v15-clean:
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15 MAKELEVEL=0 clean
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pg_buffercache clean
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pageinspect clean
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/src/interfaces/libpq clean
neon-pg-ext-v14: postgres-v14
+@echo "Compiling neon v14"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-v14
(cd $(POSTGRES_INSTALL_DIR)/build/neon-v14 && \
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v14/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install)
+@echo "Compiling neon_test_utils" v14
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-v14
(cd $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-v14 && \
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v14/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile install)
neon-pg-ext-v15: postgres-v15
+@echo "Compiling neon v15"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-v15
(cd $(POSTGRES_INSTALL_DIR)/build/neon-v15 && \
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v15/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install)
+@echo "Compiling neon_test_utils" v15
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-v15
(cd $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-v15 && \
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v15/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile install)
.PHONY: neon-pg-ext-clean
$(MAKE) -C $(ROOT_PROJECT_DIR)/pgxn/neon clean
$(MAKE) -C $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils clean
neon-pg-ext: neon-pg-ext-v14 neon-pg-ext-v15
postgres-headers: postgres-v14-headers postgres-v15-headers
postgres-clean: postgres-v14-clean postgres-v15-clean
postgres-clean:
$(MAKE) -C tmp_install/build MAKELEVEL=0 clean
# This doesn't remove the effects of 'configure'.
.PHONY: clean
clean:
cd $(POSTGRES_INSTALL_DIR)/build/v14 && $(MAKE) clean
cd $(POSTGRES_INSTALL_DIR)/build/v15 && $(MAKE) clean
$(CARGO_CMD_PREFIX) cargo clean
cd pgxn/neon && $(MAKE) clean
cd pgxn/neon_test_utils && $(MAKE) clean
cd tmp_install/build && ${MAKE} clean
cargo clean
# This removes everything
.PHONY: distclean
distclean:
rm -rf $(POSTGRES_INSTALL_DIR)
$(CARGO_CMD_PREFIX) cargo clean
rm -rf tmp_install
cargo clean
.PHONY: fmt
fmt:
@@ -198,4 +77,4 @@ fmt:
.PHONY: setup-pre-commit-hook
setup-pre-commit-hook:
ln -s -f $(ROOT_PROJECT_DIR)/pre-commit.py .git/hooks/pre-commit
ln -s -f ../../pre-commit.py .git/hooks/pre-commit

5
NOTICE
View File

@@ -1,5 +0,0 @@
Neon
Copyright 2022 Neon Inc.
The PostgreSQL submodules in vendor/postgres-v14 and vendor/postgres-v15 are licensed under the
PostgreSQL license. See vendor/postgres-v14/COPYRIGHT and vendor/postgres-v15/COPYRIGHT.

1
Pipfile Symbolic link
View File

@@ -0,0 +1 @@
./test_runner/Pipfile

1
Pipfile.lock generated Symbolic link
View File

@@ -0,0 +1 @@
./test_runner/Pipfile.lock

232
README.md
View File

@@ -1,160 +1,73 @@
# Neon
# Zenith
Neon is a serverless open-source alternative to AWS Aurora Postgres. It separates storage and compute and substitutes the PostgreSQL storage layer by redistributing data across a cluster of nodes.
The project used to be called "Zenith". Many of the commands and code comments
still refer to "zenith", but we are in the process of renaming things.
## Quick start
[Join the waitlist](https://neon.tech/) for our free tier to receive your serverless postgres instance. Then connect to it with your preferred postgres client (psql, dbeaver, etc) or use the online SQL editor.
Alternatively, compile and run the project [locally](#running-local-installation).
Zenith substitutes PostgreSQL storage layer and redistributes data across a cluster of nodes
## Architecture overview
A Neon installation consists of compute nodes and a Neon storage engine.
A Zenith installation consists of Compute nodes and Storage engine.
Compute nodes are stateless PostgreSQL nodes backed by the Neon storage engine.
Compute nodes are stateless PostgreSQL nodes, backed by zenith storage.
The Neon storage engine consists of two major components:
- Pageserver. Scalable storage backend for the compute nodes.
- WAL service. The service receives WAL from the compute node and ensures that it is stored durably.
Zenith storage engine consists of two major components:
- Pageserver. Scalable storage backend for compute nodes.
- WAL service. The service that receives WAL from compute node and ensures that it is stored durably.
Pageserver consists of:
- Repository - Neon storage implementation.
- Repository - Zenith storage implementation.
- WAL receiver - service that receives WAL from WAL service and stores it in the repository.
- Page service - service that communicates with compute nodes and responds with pages from the repository.
- WAL redo - service that builds pages from base images and WAL records on Page service request
- WAL redo - service that builds pages from base images and WAL records on Page service request.
## Running local installation
1. Install build dependencies and other useful packages
#### Installing dependencies on Linux
1. Install build dependencies and other applicable packages
* On Ubuntu or Debian, this set of packages should be sufficient to build the code:
```bash
On Ubuntu or Debian this set of packages should be sufficient to build the code:
```text
apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libseccomp-dev \
libssl-dev clang pkg-config libpq-dev etcd cmake postgresql-client
```
* On Fedora, these packages are needed:
```bash
dnf install flex bison readline-devel zlib-devel openssl-devel \
libseccomp-devel perl clang cmake etcd postgresql postgresql-contrib
libssl-dev clang
```
2. [Install Rust](https://www.rust-lang.org/tools/install)
```
# recommended approach from https://www.rust-lang.org/tools/install
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
```
[Rust] 1.52 or later is also required.
#### Installing dependencies on OSX (12.3.1)
1. Install XCode and dependencies
```
xcode-select --install
brew install protobuf etcd openssl
```
To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `tmp_install/bin` and `tmp_install/lib`, respectively.
2. [Install Rust](https://www.rust-lang.org/tools/install)
```
# recommended approach from https://www.rust-lang.org/tools/install
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
```
To run the integration tests (not required to use the code), install
Python (3.6 or higher), and install python3 packages with `pipenv` using `pipenv install` in the project directory.
3. Install PostgreSQL Client
```
# from https://stackoverflow.com/questions/44654216/correct-way-to-install-psql-without-full-postgres-on-macos
brew install libpq
brew link --force libpq
```
#### Rustc version
The project uses [rust toolchain file](./rust-toolchain.toml) to define the version it's built with in CI for testing and local builds.
This file is automatically picked up by [`rustup`](https://rust-lang.github.io/rustup/overrides.html#the-toolchain-file) that installs (if absent) and uses the toolchain version pinned in the file.
rustup users who want to build with another toolchain can use [`rustup override`](https://rust-lang.github.io/rustup/overrides.html#directory-overrides) command to set a specific toolchain for the project's directory.
non-rustup users most probably are not getting the same toolchain automatically from the file, so are responsible to manually verify their toolchain matches the version in the file.
Newer rustc versions most probably will work fine, yet older ones might not be supported due to some new features used by the project or the crates.
#### Building on Linux
1. Build neon and patched postgres
```
# Note: The path to the neon sources can not contain a space.
git clone --recursive https://github.com/neondatabase/neon.git
cd neon
# The preferred and default is to make a debug build. This will create a
# demonstrably slower build than a release build. For a release build,
# use "BUILD_TYPE=release make -j`nproc`"
make -j`nproc`
```
#### Building on OSX
1. Build neon and patched postgres
```
# Note: The path to the neon sources can not contain a space.
git clone --recursive https://github.com/neondatabase/neon.git
cd neon
# The preferred and default is to make a debug build. This will create a
# demonstrably slower build than a release build. For a release build,
# use "BUILD_TYPE=release make -j`sysctl -n hw.logicalcpu`"
make -j`sysctl -n hw.logicalcpu`
```
#### Dependency installation notes
To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `pg_install/bin` and `pg_install/lib`, respectively.
To run the integration tests or Python scripts (not required to use the code), install
Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (requires [poetry](https://python-poetry.org/)) in the project directory.
#### Running neon database
1. Start pageserver and postgres on top of it (should be called from repo root):
2. Build zenith and patched postgres
```sh
# Create repository in .neon with proper paths to binaries and data
git clone --recursive https://github.com/zenithdb/zenith.git
cd zenith
make -j5
```
3. Start pageserver and postgres on top of it (should be called from repo root):
```sh
# Create repository in .zenith with proper paths to binaries and data
# Later that would be responsibility of a package install script
> ./target/debug/neon_local init
Starting pageserver at '127.0.0.1:64000' in '.neon'
> ./target/debug/zenith init
pageserver init succeeded
# start pageserver
> ./target/debug/zenith start
Starting pageserver at '127.0.0.1:64000' in .zenith
Pageserver started
Successfully initialized timeline 7dd0907914ac399ff3be45fb252bfdb7
Stopping pageserver gracefully...done!
# start pageserver and safekeeper
> ./target/debug/neon_local start
Starting etcd broker using /usr/bin/etcd
Starting pageserver at '127.0.0.1:64000' in '.neon'
Pageserver started
Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'
Safekeeper started
# start postgres compute node
> ./target/debug/neon_local pg start main
Starting new postgres main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'
# start postgres on top on the pageserver
> ./target/debug/zenith pg start main
Starting postgres node at 'host=127.0.0.1 port=55432 user=stas'
waiting for server to start.... done
# check list of running postgres instances
> ./target/debug/neon_local pg list
NODE ADDRESS TIMELINE BRANCH NAME LSN STATUS
main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16B5BA8 running
> ./target/debug/zenith pg list
BRANCH ADDRESS LSN STATUS
main 127.0.0.1:55432 0/1609610 running
```
2. Now, it is possible to connect to postgres and run some queries:
4. Now it is possible to connect to postgres and run some queries:
```text
> psql -p55432 -h 127.0.0.1 -U cloud_admin postgres
> psql -p55432 -h 127.0.0.1 -U zenith_admin postgres
postgres=# CREATE TABLE t(key int primary key, value text);
CREATE TABLE
postgres=# insert into t values(1,1);
@@ -166,32 +79,25 @@ postgres=# select * from t;
(1 row)
```
3. And create branches and run postgres on them:
5. And create branches and run postgres on them:
```sh
# create branch named migration_check
> ./target/debug/neon_local timeline branch --branch-name migration_check
Created timeline 'b3b863fa45fa9e57e615f9f2d944e601' at Lsn 0/16F9A00 for tenant: 9ef87a5bf0d92544f6fafeeb3239695c. Ancestor timeline: 'main'
> ./target/debug/zenith branch migration_check main
Created branch 'migration_check' at 0/1609610
# check branches tree
> ./target/debug/neon_local timeline list
(L) main [de200bd42b49cc1814412c7e592dd6e9]
(L) ┗━ @0/16F9A00: migration_check [b3b863fa45fa9e57e615f9f2d944e601]
> ./target/debug/zenith branch
main
┗━ @0/1609610: migration_check
# start postgres on that branch
> ./target/debug/neon_local pg start migration_check --branch-name migration_check
Starting new postgres migration_check on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
Starting postgres node at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres'
# check the new list of running postgres instances
> ./target/debug/neon_local pg list
NODE ADDRESS TIMELINE BRANCH NAME LSN STATUS
main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16F9A38 running
migration_check 127.0.0.1:55433 b3b863fa45fa9e57e615f9f2d944e601 migration_check 0/16F9A70 running
> ./target/debug/zenith pg start migration_check
Starting postgres node at 'host=127.0.0.1 port=55433 user=stas'
waiting for server to start.... done
# this new postgres instance will have all the data from 'main' postgres,
# but all modifications would not affect data in original postgres
> psql -p55433 -h 127.0.0.1 -U cloud_admin postgres
> psql -p55433 -h 127.0.0.1 -U zenith_admin postgres
postgres=# select * from t;
key | value
-----+-------
@@ -200,35 +106,15 @@ postgres=# select * from t;
postgres=# insert into t values(2,2);
INSERT 0 1
# check that the new change doesn't affect the 'main' postgres
> psql -p55432 -h 127.0.0.1 -U cloud_admin postgres
postgres=# select * from t;
key | value
-----+-------
1 | 1
(1 row)
```
4. If you want to run tests afterward (see below), you must stop all the running of the pageserver, safekeeper, and postgres instances
you have just started. You can terminate them all with one command:
```sh
> ./target/debug/neon_local stop
```
## Running tests
Ensure your dependencies are installed as described [here](https://github.com/neondatabase/neon#dependency-installation-notes).
```sh
git clone --recursive https://github.com/neondatabase/neon.git
# either:
CARGO_BUILD_FLAGS="--features=testing" make
# or:
make debug
./scripts/pytest
git clone --recursive https://github.com/zenithdb/zenith.git
make # builds also postgres and installs it to ./tmp_install
cd test_runner
pytest
```
## Documentation
@@ -241,14 +127,14 @@ To view your `rustdoc` documentation in a browser, try running `cargo doc --no-d
### Postgres-specific terms
Due to Neon's very close relation with PostgreSQL internals, numerous specific terms are used.
The same applies to certain spelling: i.e. we use MB to denote 1024 * 1024 bytes, while MiB would be technically more correct, it's inconsistent with what PostgreSQL code and its documentation use.
Due to Zenith's very close relation with PostgreSQL internals, there are numerous specific terms used.
Same applies to certain spelling: i.e. we use MB to denote 1024 * 1024 bytes, while MiB would be technically more correct, it's inconsistent with what PostgreSQL code and its documentation use.
To get more familiar with this aspect, refer to:
- [Neon glossary](/docs/glossary.md)
- [PostgreSQL glossary](https://www.postgresql.org/docs/14/glossary.html)
- Other PostgreSQL documentation and sources (Neon fork sources can be found [here](https://github.com/neondatabase/postgres))
- [Zenith glossary](/docs/glossary.md)
- [PostgreSQL glossary](https://www.postgresql.org/docs/13/glossary.html)
- Other PostgreSQL documentation and sources (Zenith fork sources can be found [here](https://github.com/zenithdb/postgres))
## Join the development

View File

@@ -1 +0,0 @@
target

View File

@@ -1 +0,0 @@
target

View File

@@ -1,23 +0,0 @@
[package]
name = "compute_tools"
version = "0.1.0"
edition = "2021"
[dependencies]
anyhow = "1.0"
chrono = "0.4"
clap = "4.0"
env_logger = "0.9"
futures = "0.3.13"
hyper = { version = "0.14", features = ["full"] }
log = { version = "0.4", features = ["std", "serde"] }
notify = "5.0.0"
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
regex = "1"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1"
tar = "0.4"
tokio = { version = "1.17", features = ["macros", "rt", "rt-multi-thread"] }
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
url = "2.2.2"
workspace_hack = { version = "0.1", path = "../workspace_hack" }

View File

@@ -1,81 +0,0 @@
# Compute node tools
Postgres wrapper (`compute_ctl`) is intended to be run as a Docker entrypoint or as a `systemd`
`ExecStart` option. It will handle all the `Neon` specifics during compute node
initialization:
- `compute_ctl` accepts cluster (compute node) specification as a JSON file.
- Every start is a fresh start, so the data directory is removed and
initialized again on each run.
- Next it will put configuration files into the `PGDATA` directory.
- Sync safekeepers and get commit LSN.
- Get `basebackup` from pageserver using the returned on the previous step LSN.
- Try to start `postgres` and wait until it is ready to accept connections.
- Check and alter/drop/create roles and databases.
- Hang waiting on the `postmaster` process to exit.
Also `compute_ctl` spawns two separate service threads:
- `compute-monitor` checks the last Postgres activity timestamp and saves it
into the shared `ComputeNode`;
- `http-endpoint` runs a Hyper HTTP API server, which serves readiness and the
last activity requests.
Usage example:
```sh
compute_ctl -D /var/db/postgres/compute \
-C 'postgresql://cloud_admin@localhost/postgres' \
-S /var/db/postgres/specs/current.json \
-b /usr/local/bin/postgres
```
## Tests
Cargo formatter:
```sh
cargo fmt
```
Run tests:
```sh
cargo test
```
Clippy linter:
```sh
cargo clippy --all --all-targets -- -Dwarnings -Drust-2018-idioms
```
## Cross-platform compilation
Imaging that you are on macOS (x86) and you want a Linux GNU (`x86_64-unknown-linux-gnu` platform in `rust` terminology) executable.
### Using docker
You can use a throw-away Docker container ([rustlang/rust](https://hub.docker.com/r/rustlang/rust/) image) for doing that:
```sh
docker run --rm \
-v $(pwd):/compute_tools \
-w /compute_tools \
-t rustlang/rust:nightly cargo build --release --target=x86_64-unknown-linux-gnu
```
or one-line:
```sh
docker run --rm -v $(pwd):/compute_tools -w /compute_tools -t rust:latest cargo build --release --target=x86_64-unknown-linux-gnu
```
### Using rust native cross-compilation
Another way is to add `x86_64-unknown-linux-gnu` target on your host system:
```sh
rustup target add x86_64-unknown-linux-gnu
```
Install macOS cross-compiler toolchain:
```sh
brew tap SergioBenitez/osxct
brew install x86_64-unknown-linux-gnu
```
And finally run `cargo build`:
```sh
CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER=x86_64-unknown-linux-gnu-gcc cargo build --target=x86_64-unknown-linux-gnu --release
```

View File

@@ -1 +0,0 @@
max_width = 100

View File

@@ -1,186 +0,0 @@
//!
//! Postgres wrapper (`compute_ctl`) is intended to be run as a Docker entrypoint or as a `systemd`
//! `ExecStart` option. It will handle all the `Neon` specifics during compute node
//! initialization:
//! - `compute_ctl` accepts cluster (compute node) specification as a JSON file.
//! - Every start is a fresh start, so the data directory is removed and
//! initialized again on each run.
//! - Next it will put configuration files into the `PGDATA` directory.
//! - Sync safekeepers and get commit LSN.
//! - Get `basebackup` from pageserver using the returned on the previous step LSN.
//! - Try to start `postgres` and wait until it is ready to accept connections.
//! - Check and alter/drop/create roles and databases.
//! - Hang waiting on the `postmaster` process to exit.
//!
//! Also `compute_ctl` spawns two separate service threads:
//! - `compute-monitor` checks the last Postgres activity timestamp and saves it
//! into the shared `ComputeNode`;
//! - `http-endpoint` runs a Hyper HTTP API server, which serves readiness and the
//! last activity requests.
//!
//! Usage example:
//! ```sh
//! compute_ctl -D /var/db/postgres/compute \
//! -C 'postgresql://cloud_admin@localhost/postgres' \
//! -S /var/db/postgres/specs/current.json \
//! -b /usr/local/bin/postgres
//! ```
//!
use std::fs::File;
use std::panic;
use std::path::Path;
use std::process::exit;
use std::sync::{Arc, RwLock};
use std::{thread, time::Duration};
use anyhow::{Context, Result};
use chrono::Utc;
use clap::Arg;
use log::{error, info};
use compute_tools::compute::{ComputeMetrics, ComputeNode, ComputeState, ComputeStatus};
use compute_tools::http::api::launch_http_server;
use compute_tools::logger::*;
use compute_tools::monitor::launch_monitor;
use compute_tools::params::*;
use compute_tools::pg_helpers::*;
use compute_tools::spec::*;
use url::Url;
fn main() -> Result<()> {
// TODO: re-use `utils::logging` later
init_logger(DEFAULT_LOG_LEVEL)?;
let matches = cli().get_matches();
let pgdata = matches
.get_one::<String>("pgdata")
.expect("PGDATA path is required");
let connstr = matches
.get_one::<String>("connstr")
.expect("Postgres connection string is required");
let spec = matches.get_one::<String>("spec");
let spec_path = matches.get_one::<String>("spec-path");
// Try to use just 'postgres' if no path is provided
let pgbin = matches.get_one::<String>("pgbin").unwrap();
let spec: ComputeSpec = match spec {
// First, try to get cluster spec from the cli argument
Some(json) => serde_json::from_str(json)?,
None => {
// Second, try to read it from the file if path is provided
if let Some(sp) = spec_path {
let path = Path::new(sp);
let file = File::open(path)?;
serde_json::from_reader(file)?
} else {
panic!("cluster spec should be provided via --spec or --spec-path argument");
}
}
};
let pageserver_connstr = spec
.cluster
.settings
.find("neon.pageserver_connstring")
.expect("pageserver connstr should be provided");
let tenant = spec
.cluster
.settings
.find("neon.tenant_id")
.expect("tenant id should be provided");
let timeline = spec
.cluster
.settings
.find("neon.timeline_id")
.expect("tenant id should be provided");
let compute_state = ComputeNode {
start_time: Utc::now(),
connstr: Url::parse(connstr).context("cannot parse connstr as a URL")?,
pgdata: pgdata.to_string(),
pgbin: pgbin.to_string(),
spec,
tenant,
timeline,
pageserver_connstr,
metrics: ComputeMetrics::new(),
state: RwLock::new(ComputeState::new()),
};
let compute = Arc::new(compute_state);
// Launch service threads first, so we were able to serve availability
// requests, while configuration is still in progress.
let _http_handle = launch_http_server(&compute).expect("cannot launch http endpoint thread");
let _monitor_handle = launch_monitor(&compute).expect("cannot launch compute monitor thread");
// Run compute (Postgres) and hang waiting on it.
match compute.prepare_and_run() {
Ok(ec) => {
let code = ec.code().unwrap_or(1);
info!("Postgres exited with code {}, shutting down", code);
exit(code)
}
Err(error) => {
error!("could not start the compute node: {:?}", error);
let mut state = compute.state.write().unwrap();
state.error = Some(format!("{:?}", error));
state.status = ComputeStatus::Failed;
drop(state);
// Keep serving HTTP requests, so the cloud control plane was able to
// get the actual error.
info!("giving control plane 30s to collect the error before shutdown");
thread::sleep(Duration::from_secs(30));
info!("shutting down");
Err(error)
}
}
}
fn cli() -> clap::Command {
// Env variable is set by `cargo`
let version = option_env!("CARGO_PKG_VERSION").unwrap_or("unknown");
clap::Command::new("compute_ctl")
.version(version)
.arg(
Arg::new("connstr")
.short('C')
.long("connstr")
.value_name("DATABASE_URL")
.required(true),
)
.arg(
Arg::new("pgdata")
.short('D')
.long("pgdata")
.value_name("DATADIR")
.required(true),
)
.arg(
Arg::new("pgbin")
.short('b')
.long("pgbin")
.default_value("postgres")
.value_name("POSTGRES_PATH"),
)
.arg(
Arg::new("spec")
.short('s')
.long("spec")
.value_name("SPEC_JSON"),
)
.arg(
Arg::new("spec-path")
.short('S')
.long("spec-path")
.value_name("SPEC_PATH"),
)
}
#[test]
fn verify_cli() {
cli().debug_assert()
}

View File

@@ -1,43 +0,0 @@
use anyhow::{anyhow, Result};
use log::error;
use postgres::Client;
use tokio_postgres::NoTls;
use crate::compute::ComputeNode;
pub fn create_writablity_check_data(client: &mut Client) -> Result<()> {
let query = "
CREATE TABLE IF NOT EXISTS health_check (
id serial primary key,
updated_at timestamptz default now()
);
INSERT INTO health_check VALUES (1, now())
ON CONFLICT (id) DO UPDATE
SET updated_at = now();";
let result = client.simple_query(query)?;
if result.len() < 2 {
return Err(anyhow::format_err!("executed {} queries", result.len()));
}
Ok(())
}
pub async fn check_writability(compute: &ComputeNode) -> Result<()> {
let (client, connection) = tokio_postgres::connect(compute.connstr.as_str(), NoTls).await?;
if client.is_closed() {
return Err(anyhow!("connection to postgres closed"));
}
tokio::spawn(async move {
if let Err(e) = connection.await {
error!("connection error: {}", e);
}
});
let result = client
.simple_query("UPDATE health_check SET updated_at = now() WHERE id = 1;")
.await?;
if result.len() != 1 {
return Err(anyhow!("statement can't be executed"));
}
Ok(())
}

View File

@@ -1,346 +0,0 @@
//
// XXX: This starts to be scarry similar to the `PostgresNode` from `control_plane`,
// but there are several things that makes `PostgresNode` usage inconvenient in the
// cloud:
// - it inherits from `LocalEnv`, which contains **all-all** the information about
// a complete service running
// - it uses `PageServerNode` with information about http endpoint, which we do not
// need in the cloud again
// - many tiny pieces like, for example, we do not use `pg_ctl` in the cloud
//
// Thus, to use `PostgresNode` in the cloud, we need to 'mock' a bunch of required
// attributes (not required for the cloud). Yet, it is still tempting to unify these
// `PostgresNode` and `ComputeNode` and use one in both places.
//
// TODO: stabilize `ComputeNode` and think about using it in the `control_plane`.
//
use std::fs;
use std::os::unix::fs::PermissionsExt;
use std::path::Path;
use std::process::{Command, ExitStatus, Stdio};
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::RwLock;
use anyhow::{Context, Result};
use chrono::{DateTime, Utc};
use log::info;
use postgres::{Client, NoTls};
use serde::{Serialize, Serializer};
use crate::checker::create_writablity_check_data;
use crate::config;
use crate::pg_helpers::*;
use crate::spec::*;
/// Compute node info shared across several `compute_ctl` threads.
pub struct ComputeNode {
pub start_time: DateTime<Utc>,
// Url type maintains proper escaping
pub connstr: url::Url,
pub pgdata: String,
pub pgbin: String,
pub spec: ComputeSpec,
pub tenant: String,
pub timeline: String,
pub pageserver_connstr: String,
pub metrics: ComputeMetrics,
/// Volatile part of the `ComputeNode` so should be used under `RwLock`
/// to allow HTTP API server to serve status requests, while configuration
/// is in progress.
pub state: RwLock<ComputeState>,
}
fn rfc3339_serialize<S>(x: &DateTime<Utc>, s: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
x.to_rfc3339().serialize(s)
}
#[derive(Serialize)]
#[serde(rename_all = "snake_case")]
pub struct ComputeState {
pub status: ComputeStatus,
/// Timestamp of the last Postgres activity
#[serde(serialize_with = "rfc3339_serialize")]
pub last_active: DateTime<Utc>,
pub error: Option<String>,
}
impl ComputeState {
pub fn new() -> Self {
Self {
status: ComputeStatus::Init,
last_active: Utc::now(),
error: None,
}
}
}
impl Default for ComputeState {
fn default() -> Self {
Self::new()
}
}
#[derive(Serialize, Clone, Copy, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum ComputeStatus {
Init,
Running,
Failed,
}
#[derive(Serialize)]
pub struct ComputeMetrics {
pub sync_safekeepers_ms: AtomicU64,
pub basebackup_ms: AtomicU64,
pub config_ms: AtomicU64,
pub total_startup_ms: AtomicU64,
}
impl ComputeMetrics {
pub fn new() -> Self {
Self {
sync_safekeepers_ms: AtomicU64::new(0),
basebackup_ms: AtomicU64::new(0),
config_ms: AtomicU64::new(0),
total_startup_ms: AtomicU64::new(0),
}
}
}
impl Default for ComputeMetrics {
fn default() -> Self {
Self::new()
}
}
impl ComputeNode {
pub fn set_status(&self, status: ComputeStatus) {
self.state.write().unwrap().status = status;
}
pub fn get_status(&self) -> ComputeStatus {
self.state.read().unwrap().status
}
// Remove `pgdata` directory and create it again with right permissions.
fn create_pgdata(&self) -> Result<()> {
// Ignore removal error, likely it is a 'No such file or directory (os error 2)'.
// If it is something different then create_dir() will error out anyway.
let _ok = fs::remove_dir_all(&self.pgdata);
fs::create_dir(&self.pgdata)?;
fs::set_permissions(&self.pgdata, fs::Permissions::from_mode(0o700))?;
Ok(())
}
// Get basebackup from the libpq connection to pageserver using `connstr` and
// unarchive it to `pgdata` directory overriding all its previous content.
fn get_basebackup(&self, lsn: &str) -> Result<()> {
let start_time = Utc::now();
let mut client = Client::connect(&self.pageserver_connstr, NoTls)?;
let basebackup_cmd = match lsn {
"0/0" => format!("basebackup {} {}", &self.tenant, &self.timeline), // First start of the compute
_ => format!("basebackup {} {} {}", &self.tenant, &self.timeline, lsn),
};
let copyreader = client.copy_out(basebackup_cmd.as_str())?;
// Read the archive directly from the `CopyOutReader`
//
// Set `ignore_zeros` so that unpack() reads all the Copy data and
// doesn't stop at the end-of-archive marker. Otherwise, if the server
// sends an Error after finishing the tarball, we will not notice it.
let mut ar = tar::Archive::new(copyreader);
ar.set_ignore_zeros(true);
ar.unpack(&self.pgdata)?;
self.metrics.basebackup_ms.store(
Utc::now()
.signed_duration_since(start_time)
.to_std()
.unwrap()
.as_millis() as u64,
Ordering::Relaxed,
);
Ok(())
}
// Run `postgres` in a special mode with `--sync-safekeepers` argument
// and return the reported LSN back to the caller.
fn sync_safekeepers(&self) -> Result<String> {
let start_time = Utc::now();
let sync_handle = Command::new(&self.pgbin)
.args(&["--sync-safekeepers"])
.env("PGDATA", &self.pgdata) // we cannot use -D in this mode
.stdout(Stdio::piped())
.spawn()
.expect("postgres --sync-safekeepers failed to start");
// `postgres --sync-safekeepers` will print all log output to stderr and
// final LSN to stdout. So we pipe only stdout, while stderr will be automatically
// redirected to the caller output.
let sync_output = sync_handle
.wait_with_output()
.expect("postgres --sync-safekeepers failed");
if !sync_output.status.success() {
anyhow::bail!(
"postgres --sync-safekeepers exited with non-zero status: {}. stdout: {}",
sync_output.status,
String::from_utf8(sync_output.stdout)
.expect("postgres --sync-safekeepers exited, and stdout is not utf-8"),
);
}
self.metrics.sync_safekeepers_ms.store(
Utc::now()
.signed_duration_since(start_time)
.to_std()
.unwrap()
.as_millis() as u64,
Ordering::Relaxed,
);
let lsn = String::from(String::from_utf8(sync_output.stdout)?.trim());
Ok(lsn)
}
/// Do all the preparations like PGDATA directory creation, configuration,
/// safekeepers sync, basebackup, etc.
pub fn prepare_pgdata(&self) -> Result<()> {
let spec = &self.spec;
let pgdata_path = Path::new(&self.pgdata);
// Remove/create an empty pgdata directory and put configuration there.
self.create_pgdata()?;
config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec)?;
info!("starting safekeepers syncing");
let lsn = self
.sync_safekeepers()
.with_context(|| "failed to sync safekeepers")?;
info!("safekeepers synced at LSN {}", lsn);
info!(
"getting basebackup@{} from pageserver {}",
lsn, &self.pageserver_connstr
);
self.get_basebackup(&lsn).with_context(|| {
format!(
"failed to get basebackup@{} from pageserver {}",
lsn, &self.pageserver_connstr
)
})?;
// Update pg_hba.conf received with basebackup.
update_pg_hba(pgdata_path)?;
Ok(())
}
/// Start Postgres as a child process and manage DBs/roles.
/// After that this will hang waiting on the postmaster process to exit.
pub fn run(&self) -> Result<ExitStatus> {
let start_time = Utc::now();
let pgdata_path = Path::new(&self.pgdata);
// Run postgres as a child process.
let mut pg = Command::new(&self.pgbin)
.args(&["-D", &self.pgdata])
.spawn()
.expect("cannot start postgres process");
wait_for_postgres(&mut pg, pgdata_path)?;
// If connection fails,
// it may be the old node with `zenith_admin` superuser.
//
// In this case we need to connect with old `zenith_admin`name
// and create new user. We cannot simply rename connected user,
// but we can create a new one and grant it all privileges.
let mut client = match Client::connect(self.connstr.as_str(), NoTls) {
Err(e) => {
info!(
"cannot connect to postgres: {}, retrying with `zenith_admin` username",
e
);
let mut zenith_admin_connstr = self.connstr.clone();
zenith_admin_connstr
.set_username("zenith_admin")
.map_err(|_| anyhow::anyhow!("invalid connstr"))?;
let mut client = Client::connect(zenith_admin_connstr.as_str(), NoTls)?;
client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
client.simple_query("GRANT zenith_admin TO cloud_admin")?;
drop(client);
// reconnect with connsting with expected name
Client::connect(self.connstr.as_str(), NoTls)?
}
Ok(client) => client,
};
handle_roles(&self.spec, &mut client)?;
handle_databases(&self.spec, &mut client)?;
handle_role_deletions(self, &mut client)?;
handle_grants(self, &mut client)?;
create_writablity_check_data(&mut client)?;
// 'Close' connection
drop(client);
let startup_end_time = Utc::now();
self.metrics.config_ms.store(
startup_end_time
.signed_duration_since(start_time)
.to_std()
.unwrap()
.as_millis() as u64,
Ordering::Relaxed,
);
self.metrics.total_startup_ms.store(
startup_end_time
.signed_duration_since(self.start_time)
.to_std()
.unwrap()
.as_millis() as u64,
Ordering::Relaxed,
);
self.set_status(ComputeStatus::Running);
info!(
"finished configuration of compute for project {}",
self.spec.cluster.cluster_id
);
// Wait for child Postgres process basically forever. In this state Ctrl+C
// will propagate to Postgres and it will be shut down as well.
let ecode = pg
.wait()
.expect("failed to start waiting on Postgres process");
Ok(ecode)
}
pub fn prepare_and_run(&self) -> Result<ExitStatus> {
info!(
"starting compute for project {}, operation {}, tenant {}, timeline {}",
self.spec.cluster.cluster_id,
self.spec.operation_uuid.as_ref().unwrap(),
self.tenant,
self.timeline,
);
self.prepare_pgdata()?;
self.run()
}
}

View File

@@ -1,51 +0,0 @@
use std::fs::{File, OpenOptions};
use std::io;
use std::io::prelude::*;
use std::path::Path;
use anyhow::Result;
use crate::pg_helpers::PgOptionsSerialize;
use crate::spec::ComputeSpec;
/// Check that `line` is inside a text file and put it there if it is not.
/// Create file if it doesn't exist.
pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
let mut file = OpenOptions::new()
.read(true)
.write(true)
.create(true)
.append(false)
.open(path)?;
let buf = io::BufReader::new(&file);
let mut count: usize = 0;
for l in buf.lines() {
if l? == line {
return Ok(false);
}
count = 1;
}
write!(file, "{}{}", "\n".repeat(count), line)?;
Ok(true)
}
/// Create or completely rewrite configuration file specified by `path`
pub fn write_postgres_conf(path: &Path, spec: &ComputeSpec) -> Result<()> {
// File::create() destroys the file content if it exists.
let mut postgres_conf = File::create(path)?;
write_auto_managed_block(&mut postgres_conf, &spec.cluster.settings.as_pg_settings())?;
Ok(())
}
// Write Postgres config block wrapped with generated comment section
fn write_auto_managed_block(file: &mut File, buf: &str) -> Result<()> {
writeln!(file, "# Managed by compute_ctl: begin")?;
writeln!(file, "{}", buf)?;
writeln!(file, "# Managed by compute_ctl: end")?;
Ok(())
}

View File

@@ -1,109 +0,0 @@
use std::convert::Infallible;
use std::net::SocketAddr;
use std::sync::Arc;
use std::thread;
use anyhow::Result;
use hyper::service::{make_service_fn, service_fn};
use hyper::{Body, Method, Request, Response, Server, StatusCode};
use log::{error, info};
use serde_json;
use crate::compute::{ComputeNode, ComputeStatus};
// Service function to handle all available routes.
async fn routes(req: Request<Body>, compute: Arc<ComputeNode>) -> Response<Body> {
match (req.method(), req.uri().path()) {
// Timestamp of the last Postgres activity in the plain text.
// DEPRECATED in favour of /status
(&Method::GET, "/last_activity") => {
info!("serving /last_active GET request");
let state = compute.state.read().unwrap();
// Use RFC3339 format for consistency.
Response::new(Body::from(state.last_active.to_rfc3339()))
}
// Has compute setup process finished? -> true/false.
// DEPRECATED in favour of /status
(&Method::GET, "/ready") => {
info!("serving /ready GET request");
let status = compute.get_status();
Response::new(Body::from(format!("{}", status == ComputeStatus::Running)))
}
// Serialized compute state.
(&Method::GET, "/status") => {
info!("serving /status GET request");
let state = compute.state.read().unwrap();
Response::new(Body::from(serde_json::to_string(&*state).unwrap()))
}
// Startup metrics in JSON format. Keep /metrics reserved for a possible
// future use for Prometheus metrics format.
(&Method::GET, "/metrics.json") => {
info!("serving /metrics.json GET request");
Response::new(Body::from(serde_json::to_string(&compute.metrics).unwrap()))
}
// DEPRECATED, use POST instead
(&Method::GET, "/check_writability") => {
info!("serving /check_writability GET request");
let res = crate::checker::check_writability(&compute).await;
match res {
Ok(_) => Response::new(Body::from("true")),
Err(e) => Response::new(Body::from(e.to_string())),
}
}
(&Method::POST, "/check_writability") => {
info!("serving /check_writability POST request");
let res = crate::checker::check_writability(&compute).await;
match res {
Ok(_) => Response::new(Body::from("true")),
Err(e) => Response::new(Body::from(e.to_string())),
}
}
// Return the `404 Not Found` for any other routes.
_ => {
let mut not_found = Response::new(Body::from("404 Not Found"));
*not_found.status_mut() = StatusCode::NOT_FOUND;
not_found
}
}
}
// Main Hyper HTTP server function that runs it and blocks waiting on it forever.
#[tokio::main]
async fn serve(state: Arc<ComputeNode>) {
let addr = SocketAddr::from(([0, 0, 0, 0], 3080));
let make_service = make_service_fn(move |_conn| {
let state = state.clone();
async move {
Ok::<_, Infallible>(service_fn(move |req: Request<Body>| {
let state = state.clone();
async move { Ok::<_, Infallible>(routes(req, state).await) }
}))
}
});
info!("starting HTTP server on {}", addr);
let server = Server::bind(&addr).serve(make_service);
// Run this server forever
if let Err(e) = server.await {
error!("server error: {}", e);
}
}
/// Launch a separate Hyper HTTP API server thread and return its `JoinHandle`.
pub fn launch_http_server(state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
let state = Arc::clone(state);
Ok(thread::Builder::new()
.name("http-endpoint".into())
.spawn(move || serve(state))?)
}

View File

@@ -1 +0,0 @@
pub mod api;

View File

@@ -1,158 +0,0 @@
openapi: "3.0.2"
info:
title: Compute node control API
version: "1.0"
servers:
- url: "http://localhost:3080"
paths:
/status:
get:
tags:
- "info"
summary: Get compute node internal status
description: ""
operationId: getComputeStatus
responses:
"200":
description: ComputeState
content:
application/json:
schema:
$ref: "#/components/schemas/ComputeState"
/metrics.json:
get:
tags:
- "info"
summary: Get compute node startup metrics in JSON format
description: ""
operationId: getComputeMetricsJSON
responses:
"200":
description: ComputeMetrics
content:
application/json:
schema:
$ref: "#/components/schemas/ComputeMetrics"
/ready:
get:
deprecated: true
tags:
- "info"
summary: Check whether compute startup process finished successfully
description: ""
operationId: computeIsReady
responses:
"200":
description: Compute is ready ('true') or not ('false')
content:
text/plain:
schema:
type: string
example: "true"
/last_activity:
get:
deprecated: true
tags:
- "info"
summary: Get timestamp of the last compute activity
description: ""
operationId: getLastComputeActivityTS
responses:
"200":
description: Timestamp of the last compute activity
content:
text/plain:
schema:
type: string
example: "2022-10-12T07:20:50.52Z"
/check_writability:
get:
deprecated: true
tags:
- "check"
summary: Check that we can write new data on this compute
description: ""
operationId: checkComputeWritabilityDeprecated
responses:
"200":
description: Check result
content:
text/plain:
schema:
type: string
description: Error text or 'true' if check passed
example: "true"
post:
tags:
- "check"
summary: Check that we can write new data on this compute
description: ""
operationId: checkComputeWritability
responses:
"200":
description: Check result
content:
text/plain:
schema:
type: string
description: Error text or 'true' if check passed
example: "true"
components:
securitySchemes:
JWT:
type: http
scheme: bearer
bearerFormat: JWT
schemas:
ComputeMetrics:
type: object
description: Compute startup metrics
required:
- sync_safekeepers_ms
- basebackup_ms
- config_ms
- total_startup_ms
properties:
sync_safekeepers_ms:
type: integer
basebackup_ms:
type: integer
config_ms:
type: integer
total_startup_ms:
type: integer
ComputeState:
type: object
required:
- status
- last_active
properties:
status:
$ref: '#/components/schemas/ComputeStatus'
last_active:
type: string
description: The last detected compute activity timestamp in UTC and RFC3339 format
example: "2022-10-12T07:20:50.52Z"
error:
type: string
description: Text of the error during compute startup, if any
ComputeStatus:
type: string
enum:
- init
- failed
- running
security:
- JWT: []

View File

@@ -1,14 +0,0 @@
//!
//! Various tools and helpers to handle cluster / compute node (Postgres)
//! configuration.
//!
pub mod checker;
pub mod config;
pub mod http;
#[macro_use]
pub mod logger;
pub mod compute;
pub mod monitor;
pub mod params;
pub mod pg_helpers;
pub mod spec;

View File

@@ -1,43 +0,0 @@
use std::io::Write;
use anyhow::Result;
use chrono::Utc;
use env_logger::{Builder, Env};
macro_rules! info_println {
($($tts:tt)*) => {
if log_enabled!(Level::Info) {
println!($($tts)*);
}
}
}
macro_rules! info_print {
($($tts:tt)*) => {
if log_enabled!(Level::Info) {
print!($($tts)*);
}
}
}
/// Initialize `env_logger` using either `default_level` or
/// `RUST_LOG` environment variable as default log level.
pub fn init_logger(default_level: &str) -> Result<()> {
let env = Env::default().filter_or("RUST_LOG", default_level);
Builder::from_env(env)
.format(|buf, record| {
let thread_handle = std::thread::current();
writeln!(
buf,
"{} [{}] {}: {}",
Utc::now().format("%Y-%m-%d %H:%M:%S%.3f %Z"),
thread_handle.name().unwrap_or("main"),
record.level(),
record.args()
)
})
.init();
Ok(())
}

View File

@@ -1,109 +0,0 @@
use std::sync::Arc;
use std::{thread, time};
use anyhow::Result;
use chrono::{DateTime, Utc};
use log::{debug, info};
use postgres::{Client, NoTls};
use crate::compute::ComputeNode;
const MONITOR_CHECK_INTERVAL: u64 = 500; // milliseconds
// Spin in a loop and figure out the last activity time in the Postgres.
// Then update it in the shared state. This function never errors out.
// XXX: the only expected panic is at `RwLock` unwrap().
fn watch_compute_activity(compute: &ComputeNode) {
// Suppose that `connstr` doesn't change
let connstr = compute.connstr.as_str();
// Define `client` outside of the loop to reuse existing connection if it's active.
let mut client = Client::connect(connstr, NoTls);
let timeout = time::Duration::from_millis(MONITOR_CHECK_INTERVAL);
info!("watching Postgres activity at {}", connstr);
loop {
// Should be outside of the write lock to allow others to read while we sleep.
thread::sleep(timeout);
match &mut client {
Ok(cli) => {
if cli.is_closed() {
info!("connection to postgres closed, trying to reconnect");
// Connection is closed, reconnect and try again.
client = Client::connect(connstr, NoTls);
continue;
}
// Get all running client backends except ourself, use RFC3339 DateTime format.
let backends = cli
.query(
"SELECT state, to_char(state_change, 'YYYY-MM-DD\"T\"HH24:MI:SS.US\"Z\"') AS state_change
FROM pg_stat_activity
WHERE backend_type = 'client backend'
AND pid != pg_backend_pid()
AND usename != 'cloud_admin';", // XXX: find a better way to filter other monitors?
&[],
);
let mut last_active = compute.state.read().unwrap().last_active;
if let Ok(backs) = backends {
let mut idle_backs: Vec<DateTime<Utc>> = vec![];
for b in backs.into_iter() {
let state: String = b.get("state");
let change: String = b.get("state_change");
if state == "idle" {
let change = DateTime::parse_from_rfc3339(&change);
match change {
Ok(t) => idle_backs.push(t.with_timezone(&Utc)),
Err(e) => {
info!("cannot parse backend state_change DateTime: {}", e);
continue;
}
}
} else {
// Found non-idle backend, so the last activity is NOW.
// Save it and exit the for loop. Also clear the idle backend
// `state_change` timestamps array as it doesn't matter now.
last_active = Utc::now();
idle_backs.clear();
break;
}
}
// Sort idle backend `state_change` timestamps. The last one corresponds
// to the last activity.
idle_backs.sort();
if let Some(last) = idle_backs.last() {
last_active = *last;
}
}
// Update the last activity in the shared state if we got a more recent one.
let mut state = compute.state.write().unwrap();
if last_active > state.last_active {
state.last_active = last_active;
debug!("set the last compute activity time to: {}", last_active);
}
}
Err(e) => {
debug!("cannot connect to postgres: {}, retrying", e);
// Establish a new connection and try again.
client = Client::connect(connstr, NoTls);
}
}
}
}
/// Launch a separate compute monitor thread and return its `JoinHandle`.
pub fn launch_monitor(state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
let state = Arc::clone(state);
Ok(thread::Builder::new()
.name("compute-monitor".into())
.spawn(move || watch_compute_activity(&state))?)
}

View File

@@ -1,3 +0,0 @@
pub const DEFAULT_LOG_LEVEL: &str = "info";
pub const DEFAULT_CONNSTRING: &str = "host=localhost user=postgres";
pub const PG_HBA_ALL_MD5: &str = "host\tall\t\tall\t\t0.0.0.0/0\t\tmd5";

View File

@@ -1,350 +0,0 @@
use std::fmt::Write;
use std::fs;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::os::unix::fs::PermissionsExt;
use std::path::Path;
use std::process::Child;
use std::time::{Duration, Instant};
use anyhow::{bail, Result};
use notify::{RecursiveMode, Watcher};
use postgres::{Client, Transaction};
use serde::Deserialize;
const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds
/// Rust representation of Postgres role info with only those fields
/// that matter for us.
#[derive(Clone, Deserialize)]
pub struct Role {
pub name: PgIdent,
pub encrypted_password: Option<String>,
pub options: GenericOptions,
}
/// Rust representation of Postgres database info with only those fields
/// that matter for us.
#[derive(Clone, Deserialize)]
pub struct Database {
pub name: PgIdent,
pub owner: PgIdent,
pub options: GenericOptions,
}
/// Common type representing both SQL statement params with or without value,
/// like `LOGIN` or `OWNER username` in the `CREATE/ALTER ROLE`, and config
/// options like `wal_level = logical`.
#[derive(Clone, Deserialize)]
pub struct GenericOption {
pub name: String,
pub value: Option<String>,
pub vartype: String,
}
/// Optional collection of `GenericOption`'s. Type alias allows us to
/// declare a `trait` on it.
pub type GenericOptions = Option<Vec<GenericOption>>;
impl GenericOption {
/// Represent `GenericOption` as SQL statement parameter.
pub fn to_pg_option(&self) -> String {
if let Some(val) = &self.value {
match self.vartype.as_ref() {
"string" => format!("{} '{}'", self.name, val),
_ => format!("{} {}", self.name, val),
}
} else {
self.name.to_owned()
}
}
/// Represent `GenericOption` as configuration option.
pub fn to_pg_setting(&self) -> String {
if let Some(val) = &self.value {
let name = match self.name.as_str() {
"safekeepers" => "neon.safekeepers",
"wal_acceptor_reconnect" => "neon.safekeeper_reconnect_timeout",
"wal_acceptor_connect_timeout" => "neon.safekeeper_connect_timeout",
it => it,
};
match self.vartype.as_ref() {
"string" => format!("{} = '{}'", name, val),
_ => format!("{} = {}", name, val),
}
} else {
self.name.to_owned()
}
}
}
pub trait PgOptionsSerialize {
fn as_pg_options(&self) -> String;
fn as_pg_settings(&self) -> String;
}
impl PgOptionsSerialize for GenericOptions {
/// Serialize an optional collection of `GenericOption`'s to
/// Postgres SQL statement arguments.
fn as_pg_options(&self) -> String {
if let Some(ops) = &self {
ops.iter()
.map(|op| op.to_pg_option())
.collect::<Vec<String>>()
.join(" ")
} else {
"".to_string()
}
}
/// Serialize an optional collection of `GenericOption`'s to
/// `postgresql.conf` compatible format.
fn as_pg_settings(&self) -> String {
if let Some(ops) = &self {
ops.iter()
.map(|op| op.to_pg_setting())
.collect::<Vec<String>>()
.join("\n")
} else {
"".to_string()
}
}
}
pub trait GenericOptionsSearch {
fn find(&self, name: &str) -> Option<String>;
}
impl GenericOptionsSearch for GenericOptions {
/// Lookup option by name
fn find(&self, name: &str) -> Option<String> {
match &self {
Some(ops) => {
let op = ops.iter().find(|s| s.name == name);
match op {
Some(op) => op.value.clone(),
None => None,
}
}
None => None,
}
}
}
impl Role {
/// Serialize a list of role parameters into a Postgres-acceptable
/// string of arguments.
pub fn to_pg_options(&self) -> String {
// XXX: consider putting LOGIN as a default option somewhere higher, e.g. in Rails.
// For now we do not use generic `options` for roles. Once used, add
// `self.options.as_pg_options()` somewhere here.
let mut params: String = "LOGIN".to_string();
if let Some(pass) = &self.encrypted_password {
// Some time ago we supported only md5 and treated all encrypted_password as md5.
// Now we also support SCRAM-SHA-256 and to preserve compatibility
// we treat all encrypted_password as md5 unless they starts with SCRAM-SHA-256.
if pass.starts_with("SCRAM-SHA-256") {
write!(params, " PASSWORD '{pass}'")
.expect("String is documented to not to error during write operations");
} else {
write!(params, " PASSWORD 'md5{pass}'")
.expect("String is documented to not to error during write operations");
}
} else {
params.push_str(" PASSWORD NULL");
}
params
}
}
impl Database {
/// Serialize a list of database parameters into a Postgres-acceptable
/// string of arguments.
/// NB: `TEMPLATE` is actually also an identifier, but so far we only need
/// to use `template0` and `template1`, so it is not a problem. Yet in the future
/// it may require a proper quoting too.
pub fn to_pg_options(&self) -> String {
let mut params: String = self.options.as_pg_options();
write!(params, " OWNER {}", &self.owner.pg_quote())
.expect("String is documented to not to error during write operations");
params
}
}
/// String type alias representing Postgres identifier and
/// intended to be used for DB / role names.
pub type PgIdent = String;
/// Generic trait used to provide quoting / encoding for strings used in the
/// Postgres SQL queries and DATABASE_URL.
pub trait Escaping {
fn pg_quote(&self) -> String;
}
impl Escaping for PgIdent {
/// This is intended to mimic Postgres quote_ident(), but for simplicity it
/// always quotes provided string with `""` and escapes every `"`.
/// **Not idempotent**, i.e. if string is already escaped it will be escaped again.
fn pg_quote(&self) -> String {
let result = format!("\"{}\"", self.replace('"', "\"\""));
result
}
}
/// Build a list of existing Postgres roles
pub fn get_existing_roles(xact: &mut Transaction<'_>) -> Result<Vec<Role>> {
let postgres_roles = xact
.query("SELECT rolname, rolpassword FROM pg_catalog.pg_authid", &[])?
.iter()
.map(|row| Role {
name: row.get("rolname"),
encrypted_password: row.get("rolpassword"),
options: None,
})
.collect();
Ok(postgres_roles)
}
/// Build a list of existing Postgres databases
pub fn get_existing_dbs(client: &mut Client) -> Result<Vec<Database>> {
let postgres_dbs = client
.query(
"SELECT datname, datdba::regrole::text as owner
FROM pg_catalog.pg_database;",
&[],
)?
.iter()
.map(|row| Database {
name: row.get("datname"),
owner: row.get("owner"),
options: None,
})
.collect();
Ok(postgres_dbs)
}
/// Wait for Postgres to become ready to accept connections. It's ready to
/// accept connections when the state-field in `pgdata/postmaster.pid` says
/// 'ready'.
pub fn wait_for_postgres(pg: &mut Child, pgdata: &Path) -> Result<()> {
let pid_path = pgdata.join("postmaster.pid");
// PostgreSQL writes line "ready" to the postmaster.pid file, when it has
// completed initialization and is ready to accept connections. We want to
// react quickly and perform the rest of our initialization as soon as
// PostgreSQL starts accepting connections. Use 'notify' to be notified
// whenever the PID file is changed, and whenever it changes, read it to
// check if it's now "ready".
//
// You cannot actually watch a file before it exists, so we first watch the
// data directory, and once the postmaster.pid file appears, we switch to
// watch the file instead. We also wake up every 100 ms to poll, just in
// case we miss some events for some reason. Not strictly necessary, but
// better safe than sorry.
let (tx, rx) = std::sync::mpsc::channel();
let (mut watcher, rx): (Box<dyn Watcher>, _) = match notify::recommended_watcher(move |res| {
let _ = tx.send(res);
}) {
Ok(watcher) => (Box::new(watcher), rx),
Err(e) => {
match e.kind {
notify::ErrorKind::Io(os) if os.raw_os_error() == Some(38) => {
// docker on m1 macs does not support recommended_watcher
// but return "Function not implemented (os error 38)"
// see https://github.com/notify-rs/notify/issues/423
let (tx, rx) = std::sync::mpsc::channel();
// let's poll it faster than what we check the results for (100ms)
let config =
notify::Config::default().with_poll_interval(Duration::from_millis(50));
let watcher = notify::PollWatcher::new(
move |res| {
let _ = tx.send(res);
},
config,
)?;
(Box::new(watcher), rx)
}
_ => return Err(e.into()),
}
}
};
watcher.watch(pgdata, RecursiveMode::NonRecursive)?;
let started_at = Instant::now();
let mut postmaster_pid_seen = false;
loop {
if let Ok(Some(status)) = pg.try_wait() {
// Postgres exited, that is not what we expected, bail out earlier.
let code = status.code().unwrap_or(-1);
bail!("Postgres exited unexpectedly with code {}", code);
}
let res = rx.recv_timeout(Duration::from_millis(100));
log::debug!("woken up by notify: {res:?}");
// If there are multiple events in the channel already, we only need to be
// check once. Swallow the extra events before we go ahead to check the
// pid file.
while let Ok(res) = rx.try_recv() {
log::debug!("swallowing extra event: {res:?}");
}
// Check that we can open pid file first.
if let Ok(file) = File::open(&pid_path) {
if !postmaster_pid_seen {
log::debug!("postmaster.pid appeared");
watcher
.unwatch(pgdata)
.expect("Failed to remove pgdata dir watch");
watcher
.watch(&pid_path, RecursiveMode::NonRecursive)
.expect("Failed to add postmaster.pid file watch");
postmaster_pid_seen = true;
}
let file = BufReader::new(file);
let last_line = file.lines().last();
// Pid file could be there and we could read it, but it could be empty, for example.
if let Some(Ok(line)) = last_line {
let status = line.trim();
log::debug!("last line of postmaster.pid: {status:?}");
// Now Postgres is ready to accept connections
if status == "ready" {
break;
}
}
}
// Give up after POSTGRES_WAIT_TIMEOUT.
let duration = started_at.elapsed();
if duration >= POSTGRES_WAIT_TIMEOUT {
bail!("timed out while waiting for Postgres to start");
}
}
log::info!("PostgreSQL is now running, continuing to configure it");
Ok(())
}
/// Remove `pgdata` directory and create it again with right permissions.
pub fn create_pgdata(pgdata: &str) -> Result<()> {
// Ignore removal error, likely it is a 'No such file or directory (os error 2)'.
// If it is something different then create_dir() will error out anyway.
let _ok = fs::remove_dir_all(pgdata);
fs::create_dir(pgdata)?;
fs::set_permissions(pgdata, fs::Permissions::from_mode(0o700))?;
Ok(())
}

View File

@@ -1,455 +0,0 @@
use std::path::Path;
use std::str::FromStr;
use anyhow::Result;
use log::{info, log_enabled, warn, Level};
use postgres::config::Config;
use postgres::{Client, NoTls};
use serde::Deserialize;
use crate::compute::ComputeNode;
use crate::config;
use crate::params::PG_HBA_ALL_MD5;
use crate::pg_helpers::*;
/// Cluster spec or configuration represented as an optional number of
/// delta operations + final cluster state description.
#[derive(Clone, Deserialize)]
pub struct ComputeSpec {
pub format_version: f32,
pub timestamp: String,
pub operation_uuid: Option<String>,
/// Expected cluster state at the end of transition process.
pub cluster: Cluster,
pub delta_operations: Option<Vec<DeltaOp>>,
}
/// Cluster state seen from the perspective of the external tools
/// like Rails web console.
#[derive(Clone, Deserialize)]
pub struct Cluster {
pub cluster_id: String,
pub name: String,
pub state: Option<String>,
pub roles: Vec<Role>,
pub databases: Vec<Database>,
pub settings: GenericOptions,
}
/// Single cluster state changing operation that could not be represented as
/// a static `Cluster` structure. For example:
/// - DROP DATABASE
/// - DROP ROLE
/// - ALTER ROLE name RENAME TO new_name
/// - ALTER DATABASE name RENAME TO new_name
#[derive(Clone, Deserialize)]
pub struct DeltaOp {
pub action: String,
pub name: PgIdent,
pub new_name: Option<PgIdent>,
}
/// It takes cluster specification and does the following:
/// - Serialize cluster config and put it into `postgresql.conf` completely rewriting the file.
/// - Update `pg_hba.conf` to allow external connections.
pub fn handle_configuration(spec: &ComputeSpec, pgdata_path: &Path) -> Result<()> {
// File `postgresql.conf` is no longer included into `basebackup`, so just
// always write all config into it creating new file.
config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec)?;
update_pg_hba(pgdata_path)?;
Ok(())
}
/// Check `pg_hba.conf` and update if needed to allow external connections.
pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> {
// XXX: consider making it a part of spec.json
info!("checking pg_hba.conf");
let pghba_path = pgdata_path.join("pg_hba.conf");
if config::line_in_file(&pghba_path, PG_HBA_ALL_MD5)? {
info!("updated pg_hba.conf to allow external connections");
} else {
info!("pg_hba.conf is up-to-date");
}
Ok(())
}
/// Given a cluster spec json and open transaction it handles roles creation,
/// deletion and update.
pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
let mut xact = client.transaction()?;
let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;
// Print a list of existing Postgres roles (only in debug mode)
info!("postgres roles:");
for r in &existing_roles {
info_println!(
"{} - {}:{}",
" ".repeat(27 + 5),
r.name,
if r.encrypted_password.is_some() {
"[FILTERED]"
} else {
"(null)"
}
);
}
// Process delta operations first
if let Some(ops) = &spec.delta_operations {
info!("processing role renames");
for op in ops {
match op.action.as_ref() {
"delete_role" => {
// no-op now, roles will be deleted at the end of configuration
}
// Renaming role drops its password, since role name is
// used as a salt there. It is important that this role
// is recorded with a new `name` in the `roles` list.
// Follow up roles update will set the new password.
"rename_role" => {
let new_name = op.new_name.as_ref().unwrap();
// XXX: with a limited number of roles it is fine, but consider making it a HashMap
if existing_roles.iter().any(|r| r.name == op.name) {
let query: String = format!(
"ALTER ROLE {} RENAME TO {}",
op.name.pg_quote(),
new_name.pg_quote()
);
warn!("renaming role '{}' to '{}'", op.name, new_name);
xact.execute(query.as_str(), &[])?;
}
}
_ => {}
}
}
}
// Refresh Postgres roles info to handle possible roles renaming
let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;
info!("cluster spec roles:");
for role in &spec.cluster.roles {
let name = &role.name;
info_print!(
"{} - {}:{}",
" ".repeat(27 + 5),
name,
if role.encrypted_password.is_some() {
"[FILTERED]"
} else {
"(null)"
}
);
// XXX: with a limited number of roles it is fine, but consider making it a HashMap
let pg_role = existing_roles.iter().find(|r| r.name == *name);
if let Some(r) = pg_role {
let mut update_role = false;
if (r.encrypted_password.is_none() && role.encrypted_password.is_some())
|| (r.encrypted_password.is_some() && role.encrypted_password.is_none())
{
update_role = true;
} else if let Some(pg_pwd) = &r.encrypted_password {
// Check whether password changed or not (trim 'md5:' prefix first)
update_role = pg_pwd[3..] != *role.encrypted_password.as_ref().unwrap();
}
if update_role {
let mut query: String = format!("ALTER ROLE {} ", name.pg_quote());
info_print!(" -> update");
query.push_str(&role.to_pg_options());
xact.execute(query.as_str(), &[])?;
}
} else {
info!("role name: '{}'", &name);
let mut query: String = format!("CREATE ROLE {} ", name.pg_quote());
info!("role create query: '{}'", &query);
info_print!(" -> create");
query.push_str(&role.to_pg_options());
xact.execute(query.as_str(), &[])?;
let grant_query = format!(
"GRANT pg_read_all_data, pg_write_all_data TO {}",
name.pg_quote()
);
xact.execute(grant_query.as_str(), &[])?;
info!("role grant query: '{}'", &grant_query);
}
info_print!("\n");
}
xact.commit()?;
Ok(())
}
/// Reassign all dependent objects and delete requested roles.
pub fn handle_role_deletions(node: &ComputeNode, client: &mut Client) -> Result<()> {
let spec = &node.spec;
// First, reassign all dependent objects to db owners.
if let Some(ops) = &spec.delta_operations {
info!("reassigning dependent objects of to-be-deleted roles");
for op in ops {
if op.action == "delete_role" {
reassign_owned_objects(node, &op.name)?;
}
}
}
// Second, proceed with role deletions.
let mut xact = client.transaction()?;
if let Some(ops) = &spec.delta_operations {
info!("processing role deletions");
for op in ops {
// We do not check either role exists or not,
// Postgres will take care of it for us
if op.action == "delete_role" {
let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.pg_quote());
warn!("deleting role '{}'", &op.name);
xact.execute(query.as_str(), &[])?;
}
}
}
Ok(())
}
// Reassign all owned objects in all databases to the owner of the database.
fn reassign_owned_objects(node: &ComputeNode, role_name: &PgIdent) -> Result<()> {
for db in &node.spec.cluster.databases {
if db.owner != *role_name {
let mut conf = Config::from_str(node.connstr.as_str())?;
conf.dbname(&db.name);
let mut client = conf.connect(NoTls)?;
// This will reassign all dependent objects to the db owner
let reassign_query = format!(
"REASSIGN OWNED BY {} TO {}",
role_name.pg_quote(),
db.owner.pg_quote()
);
info!(
"reassigning objects owned by '{}' in db '{}' to '{}'",
role_name, &db.name, &db.owner
);
client.simple_query(&reassign_query)?;
// This now will only drop privileges of the role
let drop_query = format!("DROP OWNED BY {}", role_name.pg_quote());
client.simple_query(&drop_query)?;
}
}
Ok(())
}
/// It follows mostly the same logic as `handle_roles()` excepting that we
/// does not use an explicit transactions block, since major database operations
/// like `CREATE DATABASE` and `DROP DATABASE` do not support it. Statement-level
/// atomicity should be enough here due to the order of operations and various checks,
/// which together provide us idempotency.
pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
let existing_dbs: Vec<Database> = get_existing_dbs(client)?;
// Print a list of existing Postgres databases (only in debug mode)
info!("postgres databases:");
for r in &existing_dbs {
info_println!("{} - {}:{}", " ".repeat(27 + 5), r.name, r.owner);
}
// Process delta operations first
if let Some(ops) = &spec.delta_operations {
info!("processing delta operations on databases");
for op in ops {
match op.action.as_ref() {
// We do not check either DB exists or not,
// Postgres will take care of it for us
"delete_db" => {
let query: String = format!("DROP DATABASE IF EXISTS {}", &op.name.pg_quote());
warn!("deleting database '{}'", &op.name);
client.execute(query.as_str(), &[])?;
}
"rename_db" => {
let new_name = op.new_name.as_ref().unwrap();
// XXX: with a limited number of roles it is fine, but consider making it a HashMap
if existing_dbs.iter().any(|r| r.name == op.name) {
let query: String = format!(
"ALTER DATABASE {} RENAME TO {}",
op.name.pg_quote(),
new_name.pg_quote()
);
warn!("renaming database '{}' to '{}'", op.name, new_name);
client.execute(query.as_str(), &[])?;
}
}
_ => {}
}
}
}
// Refresh Postgres databases info to handle possible renames
let existing_dbs: Vec<Database> = get_existing_dbs(client)?;
info!("cluster spec databases:");
for db in &spec.cluster.databases {
let name = &db.name;
info_print!("{} - {}:{}", " ".repeat(27 + 5), db.name, db.owner);
// XXX: with a limited number of databases it is fine, but consider making it a HashMap
let pg_db = existing_dbs.iter().find(|r| r.name == *name);
if let Some(r) = pg_db {
// XXX: db owner name is returned as quoted string from Postgres,
// when quoting is needed.
let new_owner = if r.owner.starts_with('"') {
db.owner.pg_quote()
} else {
db.owner.clone()
};
if new_owner != r.owner {
let query: String = format!(
"ALTER DATABASE {} OWNER TO {}",
name.pg_quote(),
db.owner.pg_quote()
);
info_print!(" -> update");
client.execute(query.as_str(), &[])?;
}
} else {
let mut query: String = format!("CREATE DATABASE {} ", name.pg_quote());
info_print!(" -> create");
query.push_str(&db.to_pg_options());
client.execute(query.as_str(), &[])?;
}
info_print!("\n");
}
Ok(())
}
/// Grant CREATE ON DATABASE to the database owner and do some other alters and grants
/// to allow users creating trusted extensions and re-creating `public` schema, for example.
pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
let spec = &node.spec;
info!("cluster spec grants:");
// We now have a separate `web_access` role to connect to the database
// via the web interface and proxy link auth. And also we grant a
// read / write all data privilege to every role. So also grant
// create to everyone.
// XXX: later we should stop messing with Postgres ACL in such horrible
// ways.
let roles = spec
.cluster
.roles
.iter()
.map(|r| r.name.pg_quote())
.collect::<Vec<_>>();
for db in &spec.cluster.databases {
let dbname = &db.name;
let query: String = format!(
"GRANT CREATE ON DATABASE {} TO {}",
dbname.pg_quote(),
roles.join(", ")
);
info!("grant query {}", &query);
client.execute(query.as_str(), &[])?;
}
// Do some per-database access adjustments. We'd better do this at db creation time,
// but CREATE DATABASE isn't transactional. So we cannot create db + do some grants
// atomically.
for db in &node.spec.cluster.databases {
let mut conf = Config::from_str(node.connstr.as_str())?;
conf.dbname(&db.name);
let mut db_client = conf.connect(NoTls)?;
// This will only change ownership on the schema itself, not the objects
// inside it. Without it owner of the `public` schema will be `cloud_admin`
// and database owner cannot do anything with it. SQL procedure ensures
// that it won't error out if schema `public` doesn't exist.
let alter_query = format!(
"DO $$\n\
DECLARE\n\
schema_owner TEXT;\n\
BEGIN\n\
IF EXISTS(\n\
SELECT nspname\n\
FROM pg_catalog.pg_namespace\n\
WHERE nspname = 'public'\n\
)\n\
THEN\n\
SELECT nspowner::regrole::text\n\
FROM pg_catalog.pg_namespace\n\
WHERE nspname = 'public'\n\
INTO schema_owner;\n\
\n\
IF schema_owner = 'cloud_admin' OR schema_owner = 'zenith_admin'\n\
THEN\n\
ALTER SCHEMA public OWNER TO {};\n\
END IF;\n\
END IF;\n\
END\n\
$$;",
db.owner.pg_quote()
);
db_client.simple_query(&alter_query)?;
// Explicitly grant CREATE ON SCHEMA PUBLIC to the web_access user.
// This is needed because since postgres 15 this privilege is removed by default.
let grant_query = "DO $$\n\
BEGIN\n\
IF EXISTS(\n\
SELECT nspname\n\
FROM pg_catalog.pg_namespace\n\
WHERE nspname = 'public'\n\
) AND\n\
current_setting('server_version_num')::int/10000 >= 15\n\
THEN\n\
IF EXISTS(\n\
SELECT rolname\n\
FROM pg_catalog.pg_roles\n\
WHERE rolname = 'web_access'\n\
)\n\
THEN\n\
GRANT CREATE ON SCHEMA public TO web_access;\n\
END IF;\n\
END IF;\n\
END\n\
$$;"
.to_string();
info!("grant query for db {} : {}", &db.name, &grant_query);
db_client.simple_query(&grant_query)?;
}
Ok(())
}

View File

@@ -1,204 +0,0 @@
{
"format_version": 1.0,
"timestamp": "2021-05-23T18:25:43.511Z",
"operation_uuid": "0f657b36-4b0f-4a2d-9c2e-1dcd615e7d8b",
"cluster": {
"cluster_id": "test-cluster-42",
"name": "Zenith Test",
"state": "restarted",
"roles": [
{
"name": "postgres",
"encrypted_password": "6b1d16b78004bbd51fa06af9eda75972",
"options": null
},
{
"name": "alexk",
"encrypted_password": null,
"options": null
},
{
"name": "zenith \"new\"",
"encrypted_password": "5b1d16b78004bbd51fa06af9eda75972",
"options": null
},
{
"name": "zen",
"encrypted_password": "9b1d16b78004bbd51fa06af9eda75972"
},
{
"name": "\"name\";\\n select 1;",
"encrypted_password": "5b1d16b78004bbd51fa06af9eda75972"
},
{
"name": "MyRole",
"encrypted_password": "5b1d16b78004bbd51fa06af9eda75972"
}
],
"databases": [
{
"name": "DB2",
"owner": "alexk",
"options": [
{
"name": "LC_COLLATE",
"value": "C",
"vartype": "string"
},
{
"name": "LC_CTYPE",
"value": "C",
"vartype": "string"
},
{
"name": "TEMPLATE",
"value": "template0",
"vartype": "enum"
}
]
},
{
"name": "zenith",
"owner": "MyRole"
},
{
"name": "zen",
"owner": "zen"
}
],
"settings": [
{
"name": "fsync",
"value": "off",
"vartype": "bool"
},
{
"name": "wal_level",
"value": "replica",
"vartype": "enum"
},
{
"name": "hot_standby",
"value": "on",
"vartype": "bool"
},
{
"name": "neon.safekeepers",
"value": "127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501",
"vartype": "string"
},
{
"name": "wal_log_hints",
"value": "on",
"vartype": "bool"
},
{
"name": "log_connections",
"value": "on",
"vartype": "bool"
},
{
"name": "shared_buffers",
"value": "32768",
"vartype": "integer"
},
{
"name": "port",
"value": "55432",
"vartype": "integer"
},
{
"name": "max_connections",
"value": "100",
"vartype": "integer"
},
{
"name": "max_wal_senders",
"value": "10",
"vartype": "integer"
},
{
"name": "listen_addresses",
"value": "0.0.0.0",
"vartype": "string"
},
{
"name": "wal_sender_timeout",
"value": "0",
"vartype": "integer"
},
{
"name": "password_encryption",
"value": "md5",
"vartype": "enum"
},
{
"name": "maintenance_work_mem",
"value": "65536",
"vartype": "integer"
},
{
"name": "max_parallel_workers",
"value": "8",
"vartype": "integer"
},
{
"name": "max_worker_processes",
"value": "8",
"vartype": "integer"
},
{
"name": "neon.tenant_id",
"value": "b0554b632bd4d547a63b86c3630317e8",
"vartype": "string"
},
{
"name": "max_replication_slots",
"value": "10",
"vartype": "integer"
},
{
"name": "neon.timeline_id",
"value": "2414a61ffc94e428f14b5758fe308e13",
"vartype": "string"
},
{
"name": "shared_preload_libraries",
"value": "neon",
"vartype": "string"
},
{
"name": "synchronous_standby_names",
"value": "walproposer",
"vartype": "string"
},
{
"name": "neon.pageserver_connstring",
"value": "host=127.0.0.1 port=6400",
"vartype": "string"
}
]
},
"delta_operations": [
{
"action": "delete_db",
"name": "zenith_test"
},
{
"action": "rename_db",
"name": "DB",
"new_name": "DB2"
},
{
"action": "delete_role",
"name": "zenith2"
},
{
"action": "rename_role",
"name": "zenith new",
"new_name": "zenith \"new\""
}
]
}

View File

@@ -1,48 +0,0 @@
#[cfg(test)]
mod config_tests {
use std::fs::{remove_file, File};
use std::io::{Read, Write};
use std::path::Path;
use compute_tools::config::*;
fn write_test_file(path: &Path, content: &str) {
let mut file = File::create(path).unwrap();
file.write_all(content.as_bytes()).unwrap();
}
fn check_file_content(path: &Path, expected_content: &str) {
let mut file = File::open(path).unwrap();
let mut content = String::new();
file.read_to_string(&mut content).unwrap();
assert_eq!(content, expected_content);
}
#[test]
fn test_line_in_file() {
let path = Path::new("./tests/tmp/config_test.txt");
write_test_file(path, "line1\nline2.1\t line2.2\nline3");
let line = "line2.1\t line2.2";
let result = line_in_file(path, line).unwrap();
assert!(!result);
check_file_content(path, "line1\nline2.1\t line2.2\nline3");
let line = "line4";
let result = line_in_file(path, line).unwrap();
assert!(result);
check_file_content(path, "line1\nline2.1\t line2.2\nline3\nline4");
remove_file(path).unwrap();
let path = Path::new("./tests/tmp/new_config_test.txt");
let line = "line4";
let result = line_in_file(path, line).unwrap();
assert!(result);
check_file_content(path, "line4");
remove_file(path).unwrap();
}
}

View File

@@ -1,41 +0,0 @@
#[cfg(test)]
mod pg_helpers_tests {
use std::fs::File;
use compute_tools::pg_helpers::*;
use compute_tools::spec::ComputeSpec;
#[test]
fn params_serialize() {
let file = File::open("tests/cluster_spec.json").unwrap();
let spec: ComputeSpec = serde_json::from_reader(file).unwrap();
assert_eq!(
spec.cluster.databases.first().unwrap().to_pg_options(),
"LC_COLLATE 'C' LC_CTYPE 'C' TEMPLATE template0 OWNER \"alexk\""
);
assert_eq!(
spec.cluster.roles.first().unwrap().to_pg_options(),
"LOGIN PASSWORD 'md56b1d16b78004bbd51fa06af9eda75972'"
);
}
#[test]
fn settings_serialize() {
let file = File::open("tests/cluster_spec.json").unwrap();
let spec: ComputeSpec = serde_json::from_reader(file).unwrap();
assert_eq!(
spec.cluster.settings.as_pg_settings(),
"fsync = off\nwal_level = replica\nhot_standby = on\nneon.safekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'\nwal_log_hints = on\nlog_connections = on\nshared_buffers = 32768\nport = 55432\nmax_connections = 100\nmax_wal_senders = 10\nlisten_addresses = '0.0.0.0'\nwal_sender_timeout = 0\npassword_encryption = md5\nmaintenance_work_mem = 65536\nmax_parallel_workers = 8\nmax_worker_processes = 8\nneon.tenant_id = 'b0554b632bd4d547a63b86c3630317e8'\nmax_replication_slots = 10\nneon.timeline_id = '2414a61ffc94e428f14b5758fe308e13'\nshared_preload_libraries = 'neon'\nsynchronous_standby_names = 'walproposer'\nneon.pageserver_connstring = 'host=127.0.0.1 port=6400'"
);
}
#[test]
fn ident_pg_quote() {
let ident: PgIdent = PgIdent::from("\"name\";\\n select 1;");
assert_eq!(ident.pg_quote(), "\"\"\"name\"\";\\n select 1;\"");
}
}

View File

@@ -1 +0,0 @@
**/*

View File

@@ -1,27 +1,30 @@
[package]
name = "control_plane"
version = "0.1.0"
edition = "2021"
authors = ["Stas Kelvich <stas@zenith.tech>"]
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
clap = "4.0"
comfy-table = "6.1"
git-version = "0.3.5"
tar = "0.4.38"
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
rand = "0.8.3"
tar = "0.4.33"
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
serde = { version = "1.0", features = ["derive"] }
serde_with = "2.0"
serde_json = "1"
toml = "0.5"
once_cell = "1.13.0"
lazy_static = "1.4"
regex = "1"
anyhow = "1.0"
thiserror = "1"
nix = "0.25"
reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }
bytes = "1.0.1"
nix = "0.20"
url = "2.2.2"
hex = { version = "0.4.3", features = ["serde"] }
reqwest = { version = "0.11", features = ["blocking", "json"] }
# Note: Do not directly depend on pageserver or safekeeper; use pageserver_api or safekeeper_api
# instead, so that recompile times are better.
pageserver_api = { path = "../libs/pageserver_api" }
safekeeper_api = { path = "../libs/safekeeper_api" }
utils = { path = "../libs/utils" }
workspace_hack = { version = "0.1", path = "../workspace_hack" }
pageserver = { path = "../pageserver" }
walkeeper = { path = "../walkeeper" }
postgres_ffi = { path = "../postgres_ffi" }
zenith_utils = { path = "../zenith_utils" }
workspace_hack = { path = "../workspace_hack" }

View File

@@ -1,20 +0,0 @@
# Page server and three safekeepers.
[pageserver]
listen_pg_addr = '127.0.0.1:64000'
listen_http_addr = '127.0.0.1:9898'
auth_type = 'Trust'
[[safekeepers]]
id = 1
pg_port = 5454
http_port = 7676
[[safekeepers]]
id = 2
pg_port = 5455
http_port = 7677
[[safekeepers]]
id = 3
pg_port = 5456
http_port = 7678

View File

@@ -1,14 +0,0 @@
# Minimal neon environment with one safekeeper. This is equivalent to the built-in
# defaults that you get with no --config
[pageserver]
listen_pg_addr = '127.0.0.1:64000'
listen_http_addr = '127.0.0.1:9898'
auth_type = 'Trust'
[[safekeepers]]
id = 1
pg_port = 5454
http_port = 7676
[etcd_broker]
broker_endpoints = ['http://127.0.0.1:2379']

File diff suppressed because it is too large Load Diff

View File

@@ -1,25 +1,24 @@
use std::collections::BTreeMap;
use std::fs::{self, File};
use std::fs::{self, File, OpenOptions};
use std::io::Write;
use std::net::SocketAddr;
use std::net::TcpStream;
use std::os::unix::fs::PermissionsExt;
use std::path::PathBuf;
use std::process::{Command, Stdio};
use std::str::FromStr;
use std::sync::Arc;
use std::time::Duration;
use std::{collections::BTreeMap, path::PathBuf};
use anyhow::{Context, Result};
use utils::{
connstring::connection_host_port,
id::{TenantId, TimelineId},
lsn::Lsn,
postgres_backend::AuthType,
};
use lazy_static::lazy_static;
use regex::Regex;
use zenith_utils::connstring::connection_host_port;
use zenith_utils::lsn::Lsn;
use zenith_utils::postgres_backend::AuthType;
use zenith_utils::zid::ZTenantId;
use zenith_utils::zid::ZTimelineId;
use crate::local_env::{LocalEnv, DEFAULT_PG_VERSION};
use crate::postgresql_conf::PostgresConf;
use crate::local_env::LocalEnv;
use crate::storage::PageServerNode;
//
@@ -28,7 +27,7 @@ use crate::storage::PageServerNode;
pub struct ComputeControlPlane {
base_port: u16,
pageserver: Arc<PageServerNode>,
pub nodes: BTreeMap<(TenantId, String), Arc<PostgresNode>>,
pub nodes: BTreeMap<(ZTenantId, String), Arc<PostgresNode>>,
env: LocalEnv,
}
@@ -38,8 +37,10 @@ impl ComputeControlPlane {
// pgdatadirs
// |- tenants
// | |- <tenant_id>
// | | |- <node name>
// | | |- <branch name>
pub fn load(env: LocalEnv) -> Result<ComputeControlPlane> {
// TODO: since pageserver do not have config file yet we believe here that
// it is running on default port. Change that when pageserver will have config.
let pageserver = Arc::new(PageServerNode::from_env(&env));
let mut nodes = BTreeMap::default();
@@ -53,7 +54,7 @@ impl ComputeControlPlane {
.with_context(|| format!("failed to list {}", tenant_dir.path().display()))?
{
let node = PostgresNode::from_dir_entry(timeline_dir?, &env, &pageserver)?;
nodes.insert((node.tenant_id, node.name.clone()), Arc::new(node));
nodes.insert((node.tenantid, node.name.clone()), Arc::new(node));
}
}
@@ -74,34 +75,43 @@ impl ComputeControlPlane {
.unwrap_or(self.base_port)
}
pub fn local(local_env: &LocalEnv, pageserver: &Arc<PageServerNode>) -> ComputeControlPlane {
ComputeControlPlane {
base_port: 65431,
pageserver: Arc::clone(pageserver),
nodes: BTreeMap::new(),
env: local_env.clone(),
}
}
pub fn new_node(
&mut self,
tenant_id: TenantId,
name: &str,
timeline_id: TimelineId,
lsn: Option<Lsn>,
tenantid: ZTenantId,
branch_name: &str,
port: Option<u16>,
pg_version: u32,
) -> Result<Arc<PostgresNode>> {
let timeline_id = self
.pageserver
.branch_get_by_name(&tenantid, branch_name)?
.timeline_id;
let port = port.unwrap_or_else(|| self.get_port());
let node = Arc::new(PostgresNode {
name: name.to_owned(),
name: branch_name.to_owned(),
address: SocketAddr::new("127.0.0.1".parse().unwrap(), port),
env: self.env.clone(),
pageserver: Arc::clone(&self.pageserver),
is_test: false,
timeline_id,
lsn,
tenant_id,
timelineid: timeline_id,
tenantid,
uses_wal_proposer: false,
pg_version,
});
node.create_pgdata()?;
node.setup_pg_conf(self.env.pageserver.auth_type)?;
node.setup_pg_conf(self.env.auth_type)?;
self.nodes
.insert((tenant_id, node.name.clone()), Arc::clone(&node));
.insert((tenantid, node.name.clone()), Arc::clone(&node));
Ok(node)
}
@@ -116,11 +126,9 @@ pub struct PostgresNode {
pub env: LocalEnv,
pageserver: Arc<PageServerNode>,
is_test: bool,
pub timeline_id: TimelineId,
pub lsn: Option<Lsn>, // if it's a read-only node. None for primary
pub tenant_id: TenantId,
pub timelineid: ZTimelineId,
pub tenantid: ZTenantId,
uses_wal_proposer: bool,
pg_version: u32,
}
impl PostgresNode {
@@ -136,36 +144,76 @@ impl PostgresNode {
);
}
lazy_static! {
static ref CONF_PORT_RE: Regex = Regex::new(r"(?m)^\s*port\s*=\s*(\d+)\s*$").unwrap();
static ref CONF_TIMELINE_RE: Regex =
Regex::new(r"(?m)^\s*zenith.zenith_timeline\s*=\s*'(\w+)'\s*$").unwrap();
static ref CONF_TENANT_RE: Regex =
Regex::new(r"(?m)^\s*zenith.zenith_tenant\s*=\s*'(\w+)'\s*$").unwrap();
}
// parse data directory name
let fname = entry.file_name();
let name = fname.to_str().unwrap().to_string();
// Read config file into memory
// find out tcp port in config file
let cfg_path = entry.path().join("postgresql.conf");
let cfg_path_str = cfg_path.to_string_lossy();
let mut conf_file = File::open(&cfg_path)
.with_context(|| format!("failed to open config file in {}", cfg_path_str))?;
let conf = PostgresConf::read(&mut conf_file)
.with_context(|| format!("failed to read config file in {}", cfg_path_str))?;
let config = fs::read_to_string(cfg_path.clone()).with_context(|| {
format!(
"failed to read config file in {}",
cfg_path.to_str().unwrap()
)
})?;
// Read a few options from the config file
let context = format!("in config file {}", cfg_path_str);
let port: u16 = conf.parse_field("port", &context)?;
let timeline_id: TimelineId = conf.parse_field("neon.timeline_id", &context)?;
let tenant_id: TenantId = conf.parse_field("neon.tenant_id", &context)?;
let uses_wal_proposer = conf.get("neon.safekeepers").is_some();
// parse port
let err_msg = format!(
"failed to find port definition in config file {}",
cfg_path.to_str().unwrap()
);
let port: u16 = CONF_PORT_RE
.captures(config.as_str())
.ok_or_else(|| anyhow::Error::msg(err_msg.clone() + " 1"))?
.iter()
.last()
.ok_or_else(|| anyhow::Error::msg(err_msg.clone() + " 2"))?
.ok_or_else(|| anyhow::Error::msg(err_msg.clone() + " 3"))?
.as_str()
.parse()
.with_context(|| err_msg)?;
// Read postgres version from PG_VERSION file to determine which postgres version binary to use.
// If it doesn't exist, assume broken data directory and use default pg version.
let pg_version_path = entry.path().join("PG_VERSION");
// parse timeline
let err_msg = format!(
"failed to find timeline definition in config file {}",
cfg_path.to_str().unwrap()
);
let timelineid: ZTimelineId = CONF_TIMELINE_RE
.captures(config.as_str())
.ok_or_else(|| anyhow::Error::msg(err_msg.clone() + " 1"))?
.iter()
.last()
.ok_or_else(|| anyhow::Error::msg(err_msg.clone() + " 2"))?
.ok_or_else(|| anyhow::Error::msg(err_msg.clone() + " 3"))?
.as_str()
.parse()
.with_context(|| err_msg)?;
let pg_version_str =
fs::read_to_string(pg_version_path).unwrap_or_else(|_| DEFAULT_PG_VERSION.to_string());
let pg_version = u32::from_str(&pg_version_str)?;
// parse tenant
let err_msg = format!(
"failed to find tenant definition in config file {}",
cfg_path.to_str().unwrap()
);
let tenantid = CONF_TENANT_RE
.captures(config.as_str())
.ok_or_else(|| anyhow::Error::msg(err_msg.clone() + " 1"))?
.iter()
.last()
.ok_or_else(|| anyhow::Error::msg(err_msg.clone() + " 2"))?
.ok_or_else(|| anyhow::Error::msg(err_msg.clone() + " 3"))?
.as_str()
.parse()
.with_context(|| err_msg)?;
// parse recovery_target_lsn, if any
let recovery_target_lsn: Option<Lsn> =
conf.parse_field_optional("recovery_target_lsn", &context)?;
let uses_wal_proposer = config.contains("wal_acceptors");
// ok now
Ok(PostgresNode {
@@ -174,38 +222,23 @@ impl PostgresNode {
env: env.clone(),
pageserver: Arc::clone(pageserver),
is_test: false,
timeline_id,
lsn: recovery_target_lsn,
tenant_id,
timelineid,
tenantid,
uses_wal_proposer,
pg_version,
})
}
fn sync_safekeepers(&self, auth_token: &Option<String>, pg_version: u32) -> Result<Lsn> {
let pg_path = self.env.pg_bin_dir(pg_version)?.join("postgres");
let mut cmd = Command::new(&pg_path);
cmd.arg("--sync-safekeepers")
fn sync_walkeepers(&self) -> Result<Lsn> {
let pg_path = self.env.pg_bin_dir().join("postgres");
let sync_handle = Command::new(pg_path)
.arg("--sync-safekeepers")
.env_clear()
.env(
"LD_LIBRARY_PATH",
self.env.pg_lib_dir(pg_version)?.to_str().unwrap(),
)
.env(
"DYLD_LIBRARY_PATH",
self.env.pg_lib_dir(pg_version)?.to_str().unwrap(),
)
.env("LD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap())
.env("DYLD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap())
.env("PGDATA", self.pgdata().to_str().unwrap())
.stdout(Stdio::piped())
// Comment this to avoid capturing stderr (useful if command hangs)
.stderr(Stdio::piped());
if let Some(token) = auth_token {
cmd.env("ZENITH_AUTH_TOKEN", token);
}
let sync_handle = cmd
.stderr(Stdio::piped())
.spawn()
.expect("postgres --sync-safekeepers failed to start");
@@ -220,7 +253,7 @@ impl PostgresNode {
}
let lsn = Lsn::from_str(std::str::from_utf8(&sync_output.stdout)?.trim())?;
println!("Safekeepers synced on {}", lsn);
println!("Walkeepers synced on {}", lsn);
Ok(lsn)
}
@@ -234,29 +267,24 @@ impl PostgresNode {
);
let sql = if let Some(lsn) = lsn {
format!("basebackup {} {} {}", self.tenant_id, self.timeline_id, lsn)
format!("basebackup {} {} {}", self.tenantid, self.timelineid, lsn)
} else {
format!("basebackup {} {}", self.tenant_id, self.timeline_id)
format!("basebackup {} {}", self.tenantid, self.timelineid)
};
let mut client = self
.pageserver
.page_server_psql_client()
.context("connecting to page server failed")?;
.with_context(|| "connecting to page server failed")?;
let copyreader = client
.copy_out(sql.as_str())
.context("page server 'basebackup' command failed")?;
.with_context(|| "page server 'basebackup' command failed")?;
// Read the archive directly from the `CopyOutReader`
//
// Set `ignore_zeros` so that unpack() reads all the Copy data and
// doesn't stop at the end-of-archive marker. Otherwise, if the server
// sends an Error after finishing the tarball, we will not notice it.
let mut ar = tar::Archive::new(copyreader);
ar.set_ignore_zeros(true);
ar.unpack(&self.pgdata())
.context("extracting base backup failed")?;
tar::Archive::new(copyreader)
.unpack(&self.pgdata())
.with_context(|| "extracting page backup failed")?;
Ok(())
}
@@ -277,117 +305,88 @@ impl PostgresNode {
})
}
// Write postgresql.conf with default configuration
// and PG_VERSION file to the data directory of a new node.
// Connect to a page server, get base backup, and untar it to initialize a
// new data directory
fn setup_pg_conf(&self, auth_type: AuthType) -> Result<()> {
let mut conf = PostgresConf::new();
conf.append("max_wal_senders", "10");
conf.append("wal_log_hints", "off");
conf.append("max_replication_slots", "10");
conf.append("hot_standby", "on");
conf.append("shared_buffers", "1MB");
conf.append("fsync", "off");
conf.append("max_connections", "100");
conf.append("wal_level", "replica");
// wal_sender_timeout is the maximum time to wait for WAL replication.
// It also defines how often the walreciever will send a feedback message to the wal sender.
conf.append("wal_sender_timeout", "5s");
conf.append("listen_addresses", &self.address.ip().to_string());
conf.append("port", &self.address.port().to_string());
conf.append("wal_keep_size", "0");
// walproposer panics when basebackup is invalid, it is pointless to restart in this case.
conf.append("restart_after_crash", "off");
File::create(self.pgdata().join("postgresql.conf").to_str().unwrap())?;
// Configure the node to fetch pages from pageserver
let pageserver_connstr = {
let (host, port) = connection_host_port(&self.pageserver.pg_connection_config);
// wal_log_hints is mandatory when running against pageserver (see gh issue#192)
// TODO: is it possible to check wal_log_hints at pageserver side via XLOG_PARAMETER_CHANGE?
self.append_conf(
"postgresql.conf",
&format!(
"max_wal_senders = 10\n\
wal_log_hints = on\n\
max_replication_slots = 10\n\
hot_standby = on\n\
shared_buffers = 1MB\n\
fsync = off\n\
max_connections = 100\n\
wal_sender_timeout = 0\n\
wal_level = replica\n\
zenith.file_cache_size = 4096\n\
zenith.file_cache_path = '/tmp/file.cache'\n\
listen_addresses = '{address}'\n\
port = {port}\n",
address = self.address.ip(),
port = self.address.port()
),
)?;
// Set up authentication
//
// $ZENITH_AUTH_TOKEN will be replaced with value from environment
// variable during compute pg startup. It is done this way because
// otherwise user will be able to retrieve the value using SHOW
// command or pg_settings
let password = if let AuthType::NeonJWT = auth_type {
"$ZENITH_AUTH_TOKEN"
} else {
""
};
// NOTE avoiding spaces in connection string, because it is less error prone if we forward it somewhere.
// Also note that not all parameters are supported here. Because in compute we substitute $ZENITH_AUTH_TOKEN
// We parse this string and build it back with token from env var, and for simplicity rebuild
// uses only needed variables namely host, port, user, password.
format!("postgresql://no_user:{password}@{host}:{port}")
};
conf.append("shared_preload_libraries", "neon");
conf.append_line("");
conf.append("neon.pageserver_connstring", &pageserver_connstr);
conf.append("neon.tenant_id", &self.tenant_id.to_string());
conf.append("neon.timeline_id", &self.timeline_id.to_string());
if let Some(lsn) = self.lsn {
conf.append("recovery_target_lsn", &lsn.to_string());
}
// Never clean up old WAL. TODO: We should use a replication
// slot or something proper, to prevent the compute node
// from removing WAL that hasn't been streamed to the safekeeper or
// page server yet. (gh issue #349)
self.append_conf("postgresql.conf", "wal_keep_size='10TB'\n")?;
conf.append_line("");
// Configure backpressure
// - Replication write lag depends on how fast the walreceiver can process incoming WAL.
// This lag determines latency of get_page_at_lsn. Speed of applying WAL is about 10MB/sec,
// so to avoid expiration of 1 minute timeout, this lag should not be larger than 600MB.
// Actually latency should be much smaller (better if < 1sec). But we assume that recently
// updates pages are not requested from pageserver.
// - Replication flush lag depends on speed of persisting data by checkpointer (creation of
// delta/image layers) and advancing disk_consistent_lsn. Safekeepers are able to
// remove/archive WAL only beyond disk_consistent_lsn. Too large a lag can cause long
// recovery time (in case of pageserver crash) and disk space overflow at safekeepers.
// - Replication apply lag depends on speed of uploading changes to S3 by uploader thread.
// To be able to restore database in case of pageserver node crash, safekeeper should not
// remove WAL beyond this point. Too large lag can cause space exhaustion in safekeepers
// (if they are not able to upload WAL to S3).
conf.append("max_replication_write_lag", "500MB");
conf.append("max_replication_flush_lag", "10GB");
if !self.env.safekeepers.is_empty() {
// Configure the node to connect to the safekeepers
conf.append("synchronous_standby_names", "walproposer");
let safekeepers = self
.env
.safekeepers
.iter()
.map(|sk| format!("localhost:{}", sk.pg_port))
.collect::<Vec<String>>()
.join(",");
conf.append("neon.safekeepers", &safekeepers);
// set up authentication
let password = if let AuthType::ZenithJWT = auth_type {
"$ZENITH_AUTH_TOKEN"
} else {
// We only use setup without safekeepers for tests,
// and don't care about data durability on pageserver,
// so set more relaxed synchronous_commit.
conf.append("synchronous_commit", "remote_write");
""
};
// Configure the node to stream WAL directly to the pageserver
// This isn't really a supported configuration, but can be useful for
// testing.
conf.append("synchronous_standby_names", "pageserver");
}
// Configure that node to take pages from pageserver
let (host, port) = connection_host_port(&self.pageserver.pg_connection_config);
self.append_conf(
"postgresql.conf",
format!(
concat!(
"shared_preload_libraries = zenith\n",
// $ZENITH_AUTH_TOKEN will be replaced with value from environment variable during compute pg startup
// it is done this way because otherwise user will be able to retrieve the value using SHOW command or pg_settings
"zenith.page_server_connstring = 'host={} port={} password={}'\n",
"zenith.zenith_timeline='{}'\n",
"zenith.zenith_tenant='{}'\n",
),
host, port, password, self.timelineid, self.tenantid,
)
.as_str(),
)?;
let mut file = File::create(self.pgdata().join("postgresql.conf"))?;
file.write_all(conf.to_string().as_bytes())?;
let mut file = File::create(self.pgdata().join("PG_VERSION"))?;
file.write_all(self.pg_version.to_string().as_bytes())?;
// Configure the node to stream WAL directly to the pageserver
self.append_conf(
"postgresql.conf",
format!(
concat!(
"synchronous_standby_names = 'pageserver'\n", // TODO: add a new function arg?
"zenith.callmemaybe_connstring = '{}'\n", // FIXME escaping
),
self.connstr(),
)
.as_str(),
)?;
Ok(())
}
fn load_basebackup(&self, auth_token: &Option<String>) -> Result<()> {
let backup_lsn = if let Some(lsn) = self.lsn {
Some(lsn)
} else if self.uses_wal_proposer {
fn load_basebackup(&self) -> Result<()> {
let lsn = if self.uses_wal_proposer {
// LSN 0 means that it is bootstrap and we need to download just
// latest data from the pageserver. That is a bit clumsy but whole bootstrap
// procedure evolves quite actively right now, so let's think about it again
// when things would be more stable (TODO).
let lsn = self.sync_safekeepers(auth_token, self.pg_version)?;
let lsn = self.sync_walkeepers()?;
if lsn == Lsn(0) {
None
} else {
@@ -397,13 +396,13 @@ impl PostgresNode {
None
};
self.do_basebackup(backup_lsn)?;
self.do_basebackup(lsn)?;
Ok(())
}
pub fn pgdata(&self) -> PathBuf {
self.env.pg_data_dir(&self.tenant_id, &self.name)
self.env.pg_data_dir(&self.tenantid, &self.name)
}
pub fn status(&self) -> &str {
@@ -419,8 +418,16 @@ impl PostgresNode {
}
}
pub fn append_conf(&self, config: &str, opts: &str) -> Result<()> {
OpenOptions::new()
.append(true)
.open(self.pgdata().join(config).to_str().unwrap())?
.write_all(opts.as_bytes())?;
Ok(())
}
fn pg_ctl(&self, args: &[&str], auth_token: &Option<String>) -> Result<()> {
let pg_ctl_path = self.env.pg_bin_dir(self.pg_version)?.join("pg_ctl");
let pg_ctl_path = self.env.pg_bin_dir().join("pg_ctl");
let mut cmd = Command::new(pg_ctl_path);
cmd.args(
[
@@ -436,26 +443,16 @@ impl PostgresNode {
.concat(),
)
.env_clear()
.env(
"LD_LIBRARY_PATH",
self.env.pg_lib_dir(self.pg_version)?.to_str().unwrap(),
)
.env(
"DYLD_LIBRARY_PATH",
self.env.pg_lib_dir(self.pg_version)?.to_str().unwrap(),
);
.env("LD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap())
.env("DYLD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap());
if let Some(token) = auth_token {
cmd.env("ZENITH_AUTH_TOKEN", token);
}
let pg_ctl = cmd.status().with_context(|| "pg_ctl failed")?;
let pg_ctl = cmd.output().context("pg_ctl failed")?;
if !pg_ctl.status.success() {
anyhow::bail!(
"pg_ctl failed, exit code: {}, stdout: {}, stderr: {}",
pg_ctl.status,
String::from_utf8_lossy(&pg_ctl.stdout),
String::from_utf8_lossy(&pg_ctl.stderr),
);
if !pg_ctl.success() {
anyhow::bail!("pg_ctl failed");
}
Ok(())
}
@@ -482,11 +479,7 @@ impl PostgresNode {
fs::write(&postgresql_conf_path, postgresql_conf)?;
// 3. Load basebackup
self.load_basebackup(auth_token)?;
if self.lsn.is_some() {
File::create(self.pgdata().join("standby.signal"))?;
}
self.load_basebackup()?;
// 4. Finally start the compute node postgres
println!("Starting postgres node at '{}'", self.connstr());
@@ -523,7 +516,7 @@ impl PostgresNode {
"host={} port={} user={} dbname={}",
self.address.ip(),
self.address.port(),
"cloud_admin",
"zenith_admin",
"postgres"
)
}
@@ -534,7 +527,9 @@ impl PostgresNode {
.output()
.expect("failed to execute whoami");
assert!(output.status.success(), "whoami failed");
if !output.status.success() {
panic!("whoami failed");
}
String::from_utf8(output.stdout).unwrap().trim().to_string()
}

View File

@@ -1,101 +0,0 @@
use std::{
fs,
path::PathBuf,
process::{Command, Stdio},
};
use anyhow::Context;
use nix::{
sys::signal::{kill, Signal},
unistd::Pid,
};
use crate::{local_env, read_pidfile};
pub fn start_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
let etcd_broker = &env.etcd_broker;
println!(
"Starting etcd broker using {}",
etcd_broker.etcd_binary_path.display()
);
let etcd_data_dir = env.base_data_dir.join("etcd");
fs::create_dir_all(&etcd_data_dir).with_context(|| {
format!(
"Failed to create etcd data dir: {}",
etcd_data_dir.display()
)
})?;
let etcd_stdout_file =
fs::File::create(etcd_data_dir.join("etcd.stdout.log")).with_context(|| {
format!(
"Failed to create etcd stout file in directory {}",
etcd_data_dir.display()
)
})?;
let etcd_stderr_file =
fs::File::create(etcd_data_dir.join("etcd.stderr.log")).with_context(|| {
format!(
"Failed to create etcd stderr file in directory {}",
etcd_data_dir.display()
)
})?;
let client_urls = etcd_broker.comma_separated_endpoints();
let etcd_process = Command::new(&etcd_broker.etcd_binary_path)
.args(&[
format!("--data-dir={}", etcd_data_dir.display()),
format!("--listen-client-urls={client_urls}"),
format!("--advertise-client-urls={client_urls}"),
// Set --quota-backend-bytes to keep the etcd virtual memory
// size smaller. Our test etcd clusters are very small.
// See https://github.com/etcd-io/etcd/issues/7910
"--quota-backend-bytes=100000000".to_string(),
// etcd doesn't compact (vacuum) with default settings,
// enable it to prevent space exhaustion.
"--auto-compaction-mode=revision".to_string(),
"--auto-compaction-retention=1".to_string(),
])
.stdout(Stdio::from(etcd_stdout_file))
.stderr(Stdio::from(etcd_stderr_file))
.spawn()
.context("Failed to spawn etcd subprocess")?;
let pid = etcd_process.id();
let etcd_pid_file_path = etcd_pid_file_path(env);
fs::write(&etcd_pid_file_path, pid.to_string()).with_context(|| {
format!(
"Failed to create etcd pid file at {}",
etcd_pid_file_path.display()
)
})?;
Ok(())
}
pub fn stop_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
let etcd_path = &env.etcd_broker.etcd_binary_path;
println!("Stopping etcd broker at {}", etcd_path.display());
let etcd_pid_file_path = etcd_pid_file_path(env);
let pid = Pid::from_raw(read_pidfile(&etcd_pid_file_path).with_context(|| {
format!(
"Failed to read etcd pid file at {}",
etcd_pid_file_path.display()
)
})?);
kill(pid, Signal::SIGTERM).with_context(|| {
format!(
"Failed to stop etcd with pid {pid} at {}",
etcd_pid_file_path.display()
)
})?;
Ok(())
}
fn etcd_pid_file_path(env: &local_env::LocalEnv) -> PathBuf {
env.base_data_dir.join("etcd.pid")
}

View File

@@ -9,13 +9,9 @@
use anyhow::{anyhow, bail, Context, Result};
use std::fs;
use std::path::Path;
use std::process::Command;
pub mod compute;
pub mod etcd;
pub mod local_env;
pub mod postgresql_conf;
pub mod safekeeper;
pub mod storage;
/// Read a PID file
@@ -33,32 +29,3 @@ pub fn read_pidfile(pidfile: &Path) -> Result<i32> {
}
Ok(pid)
}
fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
let cmd = cmd.env_clear().env("RUST_BACKTRACE", "1");
let var = "LLVM_PROFILE_FILE";
if let Some(val) = std::env::var_os(var) {
cmd.env(var, val);
}
const RUST_LOG_KEY: &str = "RUST_LOG";
if let Ok(rust_log_value) = std::env::var(RUST_LOG_KEY) {
cmd.env(RUST_LOG_KEY, rust_log_value)
} else {
cmd
}
}
fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
for env_key in [
"AWS_ACCESS_KEY_ID",
"AWS_SECRET_ACCESS_KEY",
"AWS_SESSION_TOKEN",
] {
if let Ok(value) = std::env::var(env_key) {
cmd = cmd.env(env_key, value);
}
}
cmd
}

View File

@@ -1,246 +1,74 @@
//! This module is responsible for locating and loading paths in a local setup.
//!
//! Now it also provides init method which acts like a stub for proper installation
//! script which will use local paths.
use anyhow::{bail, ensure, Context};
use reqwest::Url;
//
// This module is responsible for locating and loading paths in a local setup.
//
// Now it also provides init method which acts like a stub for proper installation
// script which will use local paths.
//
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DisplayFromStr};
use std::collections::HashMap;
use std::env;
use std::fs;
use std::path::{Path, PathBuf};
use std::path::PathBuf;
use std::process::{Command, Stdio};
use utils::{
auth::{encode_from_key_file, Claims, Scope},
id::{NodeId, TenantId, TenantTimelineId, TimelineId},
postgres_backend::AuthType,
};
use crate::safekeeper::SafekeeperNode;
pub const DEFAULT_PG_VERSION: u32 = 14;
use zenith_utils::auth::{encode_from_key_path, Claims, Scope};
use zenith_utils::postgres_backend::AuthType;
use zenith_utils::zid::ZTenantId;
//
// This data structures represents neon_local CLI config
// This data structures represent deserialized zenith CLI config
//
// It is deserialized from the .neon/config file, or the config file passed
// to 'neon_local init --config=<path>' option. See control_plane/simple.conf for
// an example.
//
#[serde_as]
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct LocalEnv {
// Base directory for all the nodes (the pageserver, safekeepers and
// compute nodes).
//
// This is not stored in the config file. Rather, this is the path where the
// config file itself is. It is read from the NEON_REPO_DIR env variable or
// '.neon' if not given.
#[serde(skip)]
// Pageserver connection settings
pub pageserver_pg_port: u16,
pub pageserver_http_port: u16,
// Base directory for both pageserver and compute nodes
pub base_data_dir: PathBuf,
// Path to postgres distribution. It's expected that "bin", "include",
// "lib", "share" from postgres distribution are there. If at some point
// in time we will be able to run against vanilla postgres we may split that
// to four separate paths and match OS-specific installation layout.
#[serde(default)]
pub pg_distrib_dir: PathBuf,
// Path to pageserver binary.
#[serde(default)]
pub neon_distrib_dir: PathBuf,
pub zenith_distrib_dir: PathBuf,
// Default tenant ID to use with the 'neon_local' command line utility, when
// --tenant_id is not explicitly specified.
#[serde(default)]
#[serde_as(as = "Option<DisplayFromStr>")]
pub default_tenant_id: Option<TenantId>,
// keeping tenant id in config to reduce copy paste when running zenith locally with single tenant
#[serde(with = "hex")]
pub tenantid: ZTenantId,
// used to issue tokens during e.g pg start
#[serde(default)]
pub private_key_path: PathBuf,
pub etcd_broker: EtcdBroker,
pub pageserver: PageServerConf,
#[serde(default)]
pub safekeepers: Vec<SafekeeperConf>,
/// Keep human-readable aliases in memory (and persist them to config), to hide ZId hex strings from the user.
#[serde(default)]
// A `HashMap<String, HashMap<TenantId, TimelineId>>` would be more appropriate here,
// but deserialization into a generic toml object as `toml::Value::try_from` fails with an error.
// https://toml.io/en/v1.0.0 does not contain a concept of "a table inside another table".
#[serde_as(as = "HashMap<_, Vec<(DisplayFromStr, DisplayFromStr)>>")]
branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
}
/// Etcd broker config for cluster internal communication.
#[serde_as]
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
pub struct EtcdBroker {
/// A prefix to all to any key when pushing/polling etcd from a node.
#[serde(default)]
pub broker_etcd_prefix: Option<String>,
/// Broker (etcd) endpoints for storage nodes coordination, e.g. 'http://127.0.0.1:2379'.
#[serde(default)]
#[serde_as(as = "Vec<DisplayFromStr>")]
pub broker_endpoints: Vec<Url>,
/// Etcd binary path to use.
#[serde(default)]
pub etcd_binary_path: PathBuf,
}
impl EtcdBroker {
pub fn locate_etcd() -> anyhow::Result<PathBuf> {
let which_output = Command::new("which")
.arg("etcd")
.output()
.context("Failed to run 'which etcd' command")?;
let stdout = String::from_utf8_lossy(&which_output.stdout);
ensure!(
which_output.status.success(),
"'which etcd' invocation failed. Status: {}, stdout: {stdout}, stderr: {}",
which_output.status,
String::from_utf8_lossy(&which_output.stderr)
);
let etcd_path = PathBuf::from(stdout.trim());
ensure!(
etcd_path.is_file(),
"'which etcd' invocation was successful, but the path it returned is not a file or does not exist: {}",
etcd_path.display()
);
Ok(etcd_path)
}
pub fn comma_separated_endpoints(&self) -> String {
self.broker_endpoints
.iter()
.map(|url| {
// URL by default adds a '/' path at the end, which is not what etcd CLI wants.
let url_string = url.as_str();
if url_string.ends_with('/') {
&url_string[0..url_string.len() - 1]
} else {
url_string
}
})
.fold(String::new(), |mut comma_separated_urls, url| {
if !comma_separated_urls.is_empty() {
comma_separated_urls.push(',');
}
comma_separated_urls.push_str(url);
comma_separated_urls
})
}
}
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
#[serde(default)]
pub struct PageServerConf {
// node id
pub id: NodeId,
// Pageserver connection settings
pub listen_pg_addr: String,
pub listen_http_addr: String,
// jwt auth token used for communication with pageserver
pub auth_token: String,
// used to determine which auth type is used
pub auth_type: AuthType,
// jwt auth token used for communication with pageserver
pub auth_token: String,
}
impl Default for PageServerConf {
fn default() -> Self {
Self {
id: NodeId(0),
listen_pg_addr: String::new(),
listen_http_addr: String::new(),
auth_type: AuthType::Trust,
auth_token: String::new(),
}
}
}
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
#[serde(default)]
pub struct SafekeeperConf {
pub id: NodeId,
pub pg_port: u16,
pub http_port: u16,
pub sync: bool,
pub remote_storage: Option<String>,
pub backup_threads: Option<u32>,
pub auth_enabled: bool,
}
impl Default for SafekeeperConf {
fn default() -> Self {
Self {
id: NodeId(0),
pg_port: 0,
http_port: 0,
sync: true,
remote_storage: None,
backup_threads: None,
auth_enabled: false,
}
}
// used to issue tokens during e.g pg start
pub private_key_path: PathBuf,
}
impl LocalEnv {
pub fn pg_distrib_dir_raw(&self) -> PathBuf {
self.pg_distrib_dir.clone()
// postgres installation paths
pub fn pg_bin_dir(&self) -> PathBuf {
self.pg_distrib_dir.join("bin")
}
pub fn pg_lib_dir(&self) -> PathBuf {
self.pg_distrib_dir.join("lib")
}
pub fn pg_distrib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
let path = self.pg_distrib_dir.clone();
match pg_version {
14 => Ok(path.join(format!("v{pg_version}"))),
15 => Ok(path.join(format!("v{pg_version}"))),
_ => bail!("Unsupported postgres version: {}", pg_version),
}
}
pub fn pg_bin_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
match pg_version {
14 => Ok(self.pg_distrib_dir(pg_version)?.join("bin")),
15 => Ok(self.pg_distrib_dir(pg_version)?.join("bin")),
_ => bail!("Unsupported postgres version: {}", pg_version),
}
}
pub fn pg_lib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
match pg_version {
14 => Ok(self.pg_distrib_dir(pg_version)?.join("lib")),
15 => Ok(self.pg_distrib_dir(pg_version)?.join("lib")),
_ => bail!("Unsupported postgres version: {}", pg_version),
}
}
pub fn pageserver_bin(&self) -> anyhow::Result<PathBuf> {
Ok(self.neon_distrib_dir.join("pageserver"))
}
pub fn safekeeper_bin(&self) -> anyhow::Result<PathBuf> {
Ok(self.neon_distrib_dir.join("safekeeper"))
pub fn pageserver_bin(&self) -> Result<PathBuf> {
Ok(self.zenith_distrib_dir.join("pageserver"))
}
pub fn pg_data_dirs_path(&self) -> PathBuf {
self.base_data_dir.join("pgdatadirs").join("tenants")
}
pub fn pg_data_dir(&self, tenant_id: &TenantId, branch_name: &str) -> PathBuf {
pub fn pg_data_dir(&self, tenantid: &ZTenantId, branch_name: &str) -> PathBuf {
self.pg_data_dirs_path()
.join(tenant_id.to_string())
.join(tenantid.to_string())
.join(branch_name)
}
@@ -248,280 +76,127 @@ impl LocalEnv {
pub fn pageserver_data_dir(&self) -> PathBuf {
self.base_data_dir.clone()
}
pub fn safekeeper_data_dir(&self, data_dir_name: &str) -> PathBuf {
self.base_data_dir.join("safekeepers").join(data_dir_name)
}
pub fn register_branch_mapping(
&mut self,
branch_name: String,
tenant_id: TenantId,
timeline_id: TimelineId,
) -> anyhow::Result<()> {
let existing_values = self
.branch_name_mappings
.entry(branch_name.clone())
.or_default();
let existing_ids = existing_values
.iter()
.find(|(existing_tenant_id, _)| existing_tenant_id == &tenant_id);
if let Some((_, old_timeline_id)) = existing_ids {
if old_timeline_id == &timeline_id {
Ok(())
} else {
bail!("branch '{branch_name}' is already mapped to timeline {old_timeline_id}, cannot map to another timeline {timeline_id}");
}
} else {
existing_values.push((tenant_id, timeline_id));
Ok(())
}
}
pub fn get_branch_timeline_id(
&self,
branch_name: &str,
tenant_id: TenantId,
) -> Option<TimelineId> {
self.branch_name_mappings
.get(branch_name)?
.iter()
.find(|(mapped_tenant_id, _)| mapped_tenant_id == &tenant_id)
.map(|&(_, timeline_id)| timeline_id)
.map(TimelineId::from)
}
pub fn timeline_name_mappings(&self) -> HashMap<TenantTimelineId, String> {
self.branch_name_mappings
.iter()
.flat_map(|(name, tenant_timelines)| {
tenant_timelines.iter().map(|&(tenant_id, timeline_id)| {
(TenantTimelineId::new(tenant_id, timeline_id), name.clone())
})
})
.collect()
}
/// Create a LocalEnv from a config file.
///
/// Unlike 'load_config', this function fills in any defaults that are missing
/// from the config file.
pub fn parse_config(toml: &str) -> anyhow::Result<Self> {
let mut env: LocalEnv = toml::from_str(toml)?;
// Find postgres binaries.
// Follow POSTGRES_DISTRIB_DIR if set, otherwise look in "pg_install".
// Note that later in the code we assume, that distrib dirs follow the same pattern
// for all postgres versions.
if env.pg_distrib_dir == Path::new("") {
if let Some(postgres_bin) = env::var_os("POSTGRES_DISTRIB_DIR") {
env.pg_distrib_dir = postgres_bin.into();
} else {
let cwd = env::current_dir()?;
env.pg_distrib_dir = cwd.join("pg_install")
}
}
// Find neon binaries.
if env.neon_distrib_dir == Path::new("") {
env.neon_distrib_dir = env::current_exe()?.parent().unwrap().to_owned();
}
// If no initial tenant ID was given, generate it.
if env.default_tenant_id.is_none() {
env.default_tenant_id = Some(TenantId::generate());
}
env.base_data_dir = base_path();
Ok(env)
}
/// Locate and load config
pub fn load_config() -> anyhow::Result<Self> {
let repopath = base_path();
if !repopath.exists() {
bail!(
"Neon config is not found in {}. You need to run 'neon_local init' first",
repopath.to_str().unwrap()
);
}
// TODO: check that it looks like a neon repository
// load and parse file
let config = fs::read_to_string(repopath.join("config"))?;
let mut env: LocalEnv = toml::from_str(config.as_str())?;
env.base_data_dir = repopath;
Ok(env)
}
pub fn persist_config(&self, base_path: &Path) -> anyhow::Result<()> {
// Currently, the user first passes a config file with 'neon_local init --config=<path>'
// We read that in, in `create_config`, and fill any missing defaults. Then it's saved
// to .neon/config. TODO: We lose any formatting and comments along the way, which is
// a bit sad.
let mut conf_content = r#"# This file describes a locale deployment of the page server
# and safekeeeper node. It is read by the 'neon_local' command-line
# utility.
"#
.to_string();
// Convert the LocalEnv to a toml file.
//
// This could be as simple as this:
//
// conf_content += &toml::to_string_pretty(env)?;
//
// But it results in a "values must be emitted before tables". I'm not sure
// why, AFAICS the table, i.e. 'safekeepers: Vec<SafekeeperConf>' is last.
// Maybe rust reorders the fields to squeeze avoid padding or something?
// In any case, converting to toml::Value first, and serializing that, works.
// See https://github.com/alexcrichton/toml-rs/issues/142
conf_content += &toml::to_string_pretty(&toml::Value::try_from(self)?)?;
let target_config_path = base_path.join("config");
fs::write(&target_config_path, conf_content).with_context(|| {
format!(
"Failed to write config file into path '{}'",
target_config_path.display()
)
})
}
// this function is used only for testing purposes in CLI e g generate tokens during init
pub fn generate_auth_token(&self, claims: &Claims) -> anyhow::Result<String> {
let private_key_path = if self.private_key_path.is_absolute() {
self.private_key_path.to_path_buf()
} else {
self.base_data_dir.join(&self.private_key_path)
};
let key_data = fs::read(private_key_path)?;
encode_from_key_file(claims, &key_data)
}
//
// Initialize a new Neon repository
//
pub fn init(&mut self, pg_version: u32) -> anyhow::Result<()> {
// check if config already exists
let base_path = &self.base_data_dir;
ensure!(
base_path != Path::new(""),
"repository base path is missing"
);
ensure!(
!base_path.exists(),
"directory '{}' already exists. Perhaps already initialized?",
base_path.display()
);
if !self.pg_bin_dir(pg_version)?.join("postgres").exists() {
bail!(
"Can't find postgres binary at {}",
self.pg_bin_dir(pg_version)?.display()
);
}
for binary in ["pageserver", "safekeeper"] {
if !self.neon_distrib_dir.join(binary).exists() {
bail!(
"Can't find binary '{binary}' in neon distrib dir '{}'",
self.neon_distrib_dir.display()
);
}
}
fs::create_dir(&base_path)?;
// generate keys for jwt
// openssl genrsa -out private_key.pem 2048
let private_key_path;
if self.private_key_path == PathBuf::new() {
private_key_path = base_path.join("auth_private_key.pem");
let keygen_output = Command::new("openssl")
.arg("genrsa")
.args(&["-out", private_key_path.to_str().unwrap()])
.arg("2048")
.stdout(Stdio::null())
.output()
.context("failed to generate auth private key")?;
if !keygen_output.status.success() {
bail!(
"openssl failed: '{}'",
String::from_utf8_lossy(&keygen_output.stderr)
);
}
self.private_key_path = PathBuf::from("auth_private_key.pem");
let public_key_path = base_path.join("auth_public_key.pem");
// openssl rsa -in private_key.pem -pubout -outform PEM -out public_key.pem
let keygen_output = Command::new("openssl")
.arg("rsa")
.args(&["-in", private_key_path.to_str().unwrap()])
.arg("-pubout")
.args(&["-outform", "PEM"])
.args(&["-out", public_key_path.to_str().unwrap()])
.stdout(Stdio::null())
.output()
.context("failed to generate auth private key")?;
if !keygen_output.status.success() {
bail!(
"openssl failed: '{}'",
String::from_utf8_lossy(&keygen_output.stderr)
);
}
}
self.pageserver.auth_token =
self.generate_auth_token(&Claims::new(None, Scope::PageServerApi))?;
fs::create_dir_all(self.pg_data_dirs_path())?;
for safekeeper in &self.safekeepers {
fs::create_dir_all(SafekeeperNode::datadir_path_by_id(self, safekeeper.id))?;
}
self.persist_config(base_path)
}
}
fn base_path() -> PathBuf {
match std::env::var_os("NEON_REPO_DIR") {
Some(val) => PathBuf::from(val),
None => PathBuf::from(".neon"),
match std::env::var_os("ZENITH_REPO_DIR") {
Some(val) => PathBuf::from(val.to_str().unwrap()),
None => ".zenith".into(),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn simple_conf_parsing() {
let simple_conf_toml = include_str!("../simple.conf");
let simple_conf_parse_result = LocalEnv::parse_config(simple_conf_toml);
assert!(
simple_conf_parse_result.is_ok(),
"failed to parse simple config {simple_conf_toml}, reason: {simple_conf_parse_result:?}"
);
let string_to_replace = "broker_endpoints = ['http://127.0.0.1:2379']";
let spoiled_url_str = "broker_endpoints = ['!@$XOXO%^&']";
let spoiled_url_toml = simple_conf_toml.replace(string_to_replace, spoiled_url_str);
assert!(
spoiled_url_toml.contains(spoiled_url_str),
"Failed to replace string {string_to_replace} in the toml file {simple_conf_toml}"
);
let spoiled_url_parse_result = LocalEnv::parse_config(&spoiled_url_toml);
assert!(
spoiled_url_parse_result.is_err(),
"expected toml with invalid Url {spoiled_url_toml} to fail the parsing, but got {spoiled_url_parse_result:?}"
//
// Initialize a new Zenith repository
//
pub fn init(
pageserver_pg_port: u16,
pageserver_http_port: u16,
tenantid: ZTenantId,
auth_type: AuthType,
) -> Result<()> {
// check if config already exists
let base_path = base_path();
if base_path.exists() {
anyhow::bail!(
"{} already exists. Perhaps already initialized?",
base_path.to_str().unwrap()
);
}
fs::create_dir(&base_path)?;
// ok, now check that expected binaries are present
// Find postgres binaries. Follow POSTGRES_DISTRIB_DIR if set, otherwise look in "tmp_install".
let pg_distrib_dir: PathBuf = {
if let Some(postgres_bin) = env::var_os("POSTGRES_DISTRIB_DIR") {
postgres_bin.into()
} else {
let cwd = env::current_dir()?;
cwd.join("tmp_install")
}
};
if !pg_distrib_dir.join("bin/postgres").exists() {
anyhow::bail!("Can't find postgres binary at {:?}", pg_distrib_dir);
}
// generate keys for jwt
// openssl genrsa -out private_key.pem 2048
let private_key_path = base_path.join("auth_private_key.pem");
let keygen_output = Command::new("openssl")
.arg("genrsa")
.args(&["-out", private_key_path.to_str().unwrap()])
.arg("2048")
.stdout(Stdio::null())
.output()
.with_context(|| "failed to generate auth private key")?;
if !keygen_output.status.success() {
anyhow::bail!(
"openssl failed: '{}'",
String::from_utf8_lossy(&keygen_output.stderr)
);
}
let public_key_path = base_path.join("auth_public_key.pem");
// openssl rsa -in private_key.pem -pubout -outform PEM -out public_key.pem
let keygen_output = Command::new("openssl")
.arg("rsa")
.args(&["-in", private_key_path.to_str().unwrap()])
.arg("-pubout")
.args(&["-outform", "PEM"])
.args(&["-out", public_key_path.to_str().unwrap()])
.stdout(Stdio::null())
.output()
.with_context(|| "failed to generate auth private key")?;
if !keygen_output.status.success() {
anyhow::bail!(
"openssl failed: '{}'",
String::from_utf8_lossy(&keygen_output.stderr)
);
}
let auth_token =
encode_from_key_path(&Claims::new(None, Scope::PageServerApi), &private_key_path)?;
// Find zenith binaries.
let zenith_distrib_dir = env::current_exe()?.parent().unwrap().to_owned();
if !zenith_distrib_dir.join("pageserver").exists() {
anyhow::bail!("Can't find pageserver binary.",);
}
let conf = LocalEnv {
pageserver_pg_port,
pageserver_http_port,
pg_distrib_dir,
zenith_distrib_dir,
base_data_dir: base_path,
tenantid,
auth_token,
auth_type,
private_key_path,
};
fs::create_dir_all(conf.pg_data_dirs_path())?;
let toml = toml::to_string_pretty(&conf)?;
fs::write(conf.base_data_dir.join("config"), toml)?;
Ok(())
}
// Locate and load config
pub fn load_config() -> Result<LocalEnv> {
let repopath = base_path();
if !repopath.exists() {
anyhow::bail!(
"Zenith config is not found in {}. You need to run 'zenith init' first",
repopath.to_str().unwrap()
);
}
// TODO: check that it looks like a zenith repository
// load and parse file
let config = fs::read_to_string(repopath.join("config"))?;
toml::from_str(config.as_str()).map_err(|e| e.into())
}

View File

@@ -1,226 +0,0 @@
///
/// Module for parsing postgresql.conf file.
///
/// NOTE: This doesn't implement the full, correct postgresql.conf syntax. Just
/// enough to extract a few settings we need in Neon, assuming you don't do
/// funny stuff like include-directives or funny escaping.
use anyhow::{bail, Context, Result};
use once_cell::sync::Lazy;
use regex::Regex;
use std::collections::HashMap;
use std::fmt;
use std::io::BufRead;
use std::str::FromStr;
/// In-memory representation of a postgresql.conf file
#[derive(Default)]
pub struct PostgresConf {
lines: Vec<String>,
hash: HashMap<String, String>,
}
static CONF_LINE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^((?:\w|\.)+)\s*=\s*(\S+)$").unwrap());
impl PostgresConf {
pub fn new() -> PostgresConf {
PostgresConf::default()
}
/// Read file into memory
pub fn read(read: impl std::io::Read) -> Result<PostgresConf> {
let mut result = Self::new();
for line in std::io::BufReader::new(read).lines() {
let line = line?;
// Store each line in a vector, in original format
result.lines.push(line.clone());
// Also parse each line and insert key=value lines into a hash map.
//
// FIXME: This doesn't match exactly the flex/bison grammar in PostgreSQL.
// But it's close enough for our usage.
let line = line.trim();
if line.starts_with('#') {
// comment, ignore
continue;
} else if let Some(caps) = CONF_LINE_RE.captures(line) {
let name = caps.get(1).unwrap().as_str();
let raw_val = caps.get(2).unwrap().as_str();
if let Ok(val) = deescape_str(raw_val) {
// Note: if there's already an entry in the hash map for
// this key, this will replace it. That's the behavior what
// we want; when PostgreSQL reads the file, each line
// overrides any previous value for the same setting.
result.hash.insert(name.to_string(), val.to_string());
}
}
}
Ok(result)
}
/// Return the current value of 'option'
pub fn get(&self, option: &str) -> Option<&str> {
self.hash.get(option).map(|x| x.as_ref())
}
/// Return the current value of a field, parsed to the right datatype.
///
/// This calls the FromStr::parse() function on the value of the field. If
/// the field does not exist, or parsing fails, returns an error.
///
pub fn parse_field<T>(&self, field_name: &str, context: &str) -> Result<T>
where
T: FromStr,
<T as FromStr>::Err: std::error::Error + Send + Sync + 'static,
{
self.get(field_name)
.with_context(|| format!("could not find '{}' option {}", field_name, context))?
.parse::<T>()
.with_context(|| format!("could not parse '{}' option {}", field_name, context))
}
pub fn parse_field_optional<T>(&self, field_name: &str, context: &str) -> Result<Option<T>>
where
T: FromStr,
<T as FromStr>::Err: std::error::Error + Send + Sync + 'static,
{
if let Some(val) = self.get(field_name) {
let result = val
.parse::<T>()
.with_context(|| format!("could not parse '{}' option {}", field_name, context))?;
Ok(Some(result))
} else {
Ok(None)
}
}
///
/// Note: if you call this multiple times for the same option, the config
/// file will a line for each call. It would be nice to have a function
/// to change an existing line, but that's a TODO.
///
pub fn append(&mut self, option: &str, value: &str) {
self.lines
.push(format!("{}={}\n", option, escape_str(value)));
self.hash.insert(option.to_string(), value.to_string());
}
/// Append an arbitrary non-setting line to the config file
pub fn append_line(&mut self, line: &str) {
self.lines.push(line.to_string());
}
}
impl fmt::Display for PostgresConf {
/// Return the whole configuration file as a string
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
for line in self.lines.iter() {
f.write_str(line)?;
}
Ok(())
}
}
/// Escape a value for putting in postgresql.conf.
fn escape_str(s: &str) -> String {
// If the string doesn't contain anything that needs quoting or escaping, return it
// as it is.
//
// The first part of the regex, before the '|', matches the INTEGER rule in the
// PostgreSQL flex grammar (guc-file.l). It matches plain integers like "123" and
// "-123", and also accepts units like "10MB". The second part of the regex matches
// the UNQUOTED_STRING rule, and accepts strings that contain a single word, beginning
// with a letter. That covers words like "off" or "posix". Everything else is quoted.
//
// This regex is a bit more conservative than the rules in guc-file.l, so we quote some
// strings that PostgreSQL would accept without quoting, but that's OK.
static UNQUOTED_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(^[-+]?[0-9]+[a-zA-Z]*$)|(^[a-zA-Z][a-zA-Z0-9]*$)").unwrap());
if UNQUOTED_RE.is_match(s) {
s.to_string()
} else {
// Otherwise escape and quote it
let s = s
.replace('\\', "\\\\")
.replace('\n', "\\n")
.replace('\'', "''");
"\'".to_owned() + &s + "\'"
}
}
/// De-escape a possibly-quoted value.
///
/// See `DeescapeQuotedString` function in PostgreSQL sources for how PostgreSQL
/// does this.
fn deescape_str(s: &str) -> Result<String> {
// If the string has a quote at the beginning and end, strip them out.
if s.len() >= 2 && s.starts_with('\'') && s.ends_with('\'') {
let mut result = String::new();
let mut iter = s[1..(s.len() - 1)].chars().peekable();
while let Some(c) = iter.next() {
let newc = if c == '\\' {
match iter.next() {
Some('b') => '\x08',
Some('f') => '\x0c',
Some('n') => '\n',
Some('r') => '\r',
Some('t') => '\t',
Some('0'..='7') => {
// TODO
bail!("octal escapes not supported");
}
Some(n) => n,
None => break,
}
} else if c == '\'' && iter.peek() == Some(&'\'') {
// doubled quote becomes just one quote
iter.next().unwrap()
} else {
c
};
result.push(newc);
}
Ok(result)
} else {
Ok(s.to_string())
}
}
#[test]
fn test_postgresql_conf_escapes() -> Result<()> {
assert_eq!(escape_str("foo bar"), "'foo bar'");
// these don't need to be quoted
assert_eq!(escape_str("foo"), "foo");
assert_eq!(escape_str("123"), "123");
assert_eq!(escape_str("+123"), "+123");
assert_eq!(escape_str("-10"), "-10");
assert_eq!(escape_str("1foo"), "1foo");
assert_eq!(escape_str("foo1"), "foo1");
assert_eq!(escape_str("10MB"), "10MB");
assert_eq!(escape_str("-10kB"), "-10kB");
// these need quoting and/or escaping
assert_eq!(escape_str("foo bar"), "'foo bar'");
assert_eq!(escape_str("fo'o"), "'fo''o'");
assert_eq!(escape_str("fo\no"), "'fo\\no'");
assert_eq!(escape_str("fo\\o"), "'fo\\\\o'");
assert_eq!(escape_str("10 cats"), "'10 cats'");
// Test de-escaping
assert_eq!(deescape_str(&escape_str("foo"))?, "foo");
assert_eq!(deescape_str(&escape_str("fo'o\nba\\r"))?, "fo'o\nba\\r");
assert_eq!(deescape_str("'\\b\\f\\n\\r\\t'")?, "\x08\x0c\n\r\t");
// octal-escapes are currently not supported
assert!(deescape_str("'foo\\7\\07\\007'").is_err());
Ok(())
}

View File

@@ -1,278 +0,0 @@
use std::io::Write;
use std::path::PathBuf;
use std::process::Command;
use std::sync::Arc;
use std::time::Duration;
use std::{io, result, thread};
use anyhow::bail;
use nix::errno::Errno;
use nix::sys::signal::{kill, Signal};
use nix::unistd::Pid;
use postgres::Config;
use reqwest::blocking::{Client, RequestBuilder, Response};
use reqwest::{IntoUrl, Method};
use thiserror::Error;
use utils::{connstring::connection_address, http::error::HttpErrorBody, id::NodeId};
use crate::local_env::{LocalEnv, SafekeeperConf};
use crate::storage::PageServerNode;
use crate::{fill_aws_secrets_vars, fill_rust_env_vars, read_pidfile};
#[derive(Error, Debug)]
pub enum SafekeeperHttpError {
#[error("Reqwest error: {0}")]
Transport(#[from] reqwest::Error),
#[error("Error: {0}")]
Response(String),
}
type Result<T> = result::Result<T, SafekeeperHttpError>;
pub trait ResponseErrorMessageExt: Sized {
fn error_from_body(self) -> Result<Self>;
}
impl ResponseErrorMessageExt for Response {
fn error_from_body(self) -> Result<Self> {
let status = self.status();
if !(status.is_client_error() || status.is_server_error()) {
return Ok(self);
}
// reqwest does not export its error construction utility functions, so let's craft the message ourselves
let url = self.url().to_owned();
Err(SafekeeperHttpError::Response(
match self.json::<HttpErrorBody>() {
Ok(err_body) => format!("Error: {}", err_body.msg),
Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
},
))
}
}
//
// Control routines for safekeeper.
//
// Used in CLI and tests.
//
#[derive(Debug)]
pub struct SafekeeperNode {
pub id: NodeId,
pub conf: SafekeeperConf,
pub pg_connection_config: Config,
pub env: LocalEnv,
pub http_client: Client,
pub http_base_url: String,
pub pageserver: Arc<PageServerNode>,
}
impl SafekeeperNode {
pub fn from_env(env: &LocalEnv, conf: &SafekeeperConf) -> SafekeeperNode {
let pageserver = Arc::new(PageServerNode::from_env(env));
SafekeeperNode {
id: conf.id,
conf: conf.clone(),
pg_connection_config: Self::safekeeper_connection_config(conf.pg_port),
env: env.clone(),
http_client: Client::new(),
http_base_url: format!("http://127.0.0.1:{}/v1", conf.http_port),
pageserver,
}
}
/// Construct libpq connection string for connecting to this safekeeper.
fn safekeeper_connection_config(port: u16) -> Config {
// TODO safekeeper authentication not implemented yet
format!("postgresql://no_user@127.0.0.1:{}/no_db", port)
.parse()
.unwrap()
}
pub fn datadir_path_by_id(env: &LocalEnv, sk_id: NodeId) -> PathBuf {
env.safekeeper_data_dir(format!("sk{}", sk_id).as_ref())
}
pub fn datadir_path(&self) -> PathBuf {
SafekeeperNode::datadir_path_by_id(&self.env, self.id)
}
pub fn pid_file(&self) -> PathBuf {
self.datadir_path().join("safekeeper.pid")
}
pub fn start(&self) -> anyhow::Result<()> {
print!(
"Starting safekeeper at '{}' in '{}'",
connection_address(&self.pg_connection_config),
self.datadir_path().display()
);
io::stdout().flush().unwrap();
let listen_pg = format!("127.0.0.1:{}", self.conf.pg_port);
let listen_http = format!("127.0.0.1:{}", self.conf.http_port);
let mut cmd = Command::new(self.env.safekeeper_bin()?);
fill_rust_env_vars(
cmd.args(&["-D", self.datadir_path().to_str().unwrap()])
.args(&["--id", self.id.to_string().as_ref()])
.args(&["--listen-pg", &listen_pg])
.args(&["--listen-http", &listen_http])
.arg("--daemonize"),
);
if !self.conf.sync {
cmd.arg("--no-sync");
}
let comma_separated_endpoints = self.env.etcd_broker.comma_separated_endpoints();
if !comma_separated_endpoints.is_empty() {
cmd.args(&["--broker-endpoints", &comma_separated_endpoints]);
}
if let Some(prefix) = self.env.etcd_broker.broker_etcd_prefix.as_deref() {
cmd.args(&["--broker-etcd-prefix", prefix]);
}
if let Some(threads) = self.conf.backup_threads {
cmd.args(&["--backup-threads", threads.to_string().as_ref()]);
}
if let Some(ref remote_storage) = self.conf.remote_storage {
cmd.args(&["--remote-storage", remote_storage]);
}
if self.conf.auth_enabled {
cmd.arg("--auth-validation-public-key-path");
// PathBuf is better be passed as is, not via `String`.
cmd.arg(self.env.base_data_dir.join("auth_public_key.pem"));
}
fill_aws_secrets_vars(&mut cmd);
if !cmd.status()?.success() {
bail!(
"Safekeeper failed to start. See '{}' for details.",
self.datadir_path().join("safekeeper.log").display()
);
}
// It takes a while for the safekeeper to start up. Wait until it is
// open for business.
const RETRIES: i8 = 15;
for retries in 1..RETRIES {
match self.check_status() {
Ok(_) => {
println!("\nSafekeeper started");
return Ok(());
}
Err(err) => {
match err {
SafekeeperHttpError::Transport(err) => {
if err.is_connect() && retries < 5 {
print!(".");
io::stdout().flush().unwrap();
} else {
if retries == 5 {
println!() // put a line break after dots for second message
}
println!(
"Safekeeper not responding yet, err {} retrying ({})...",
err, retries
);
}
}
SafekeeperHttpError::Response(msg) => {
bail!("safekeeper failed to start: {} ", msg)
}
}
thread::sleep(Duration::from_secs(1));
}
}
}
bail!("safekeeper failed to start in {} seconds", RETRIES);
}
///
/// Stop the server.
///
/// If 'immediate' is true, we use SIGQUIT, killing the process immediately.
/// Otherwise we use SIGTERM, triggering a clean shutdown
///
/// If the server is not running, returns success
///
pub fn stop(&self, immediate: bool) -> anyhow::Result<()> {
let pid_file = self.pid_file();
if !pid_file.exists() {
println!("Safekeeper {} is already stopped", self.id);
return Ok(());
}
let pid = read_pidfile(&pid_file)?;
let pid = Pid::from_raw(pid);
let sig = if immediate {
print!("Stopping safekeeper {} immediately..", self.id);
Signal::SIGQUIT
} else {
print!("Stopping safekeeper {} gracefully..", self.id);
Signal::SIGTERM
};
io::stdout().flush().unwrap();
match kill(pid, sig) {
Ok(_) => (),
Err(Errno::ESRCH) => {
println!(
"Safekeeper with pid {} does not exist, but a PID file was found",
pid
);
return Ok(());
}
Err(err) => bail!(
"Failed to send signal to safekeeper with pid {}: {}",
pid,
err.desc()
),
}
// Wait until process is gone
for i in 0..600 {
let signal = None; // Send no signal, just get the error code
match kill(pid, signal) {
Ok(_) => (), // Process exists, keep waiting
Err(Errno::ESRCH) => {
// Process not found, we're done
println!("done!");
return Ok(());
}
Err(err) => bail!(
"Failed to send signal to pageserver with pid {}: {}",
pid,
err.desc()
),
};
if i % 10 == 0 {
print!(".");
io::stdout().flush().unwrap();
}
thread::sleep(Duration::from_millis(100));
}
bail!("Failed to stop safekeeper with pid {}", pid);
}
fn http_request<U: IntoUrl>(&self, method: Method, url: U) -> RequestBuilder {
// TODO: authentication
//if self.env.auth_type == AuthType::NeonJWT {
// builder = builder.bearer_auth(&self.env.safekeeper_auth_token)
//}
self.http_client.request(method, url)
}
pub fn check_status(&self) -> Result<()> {
self.http_request(Method::GET, format!("{}/{}", self.http_base_url, "status"))
.send()?
.error_from_body()?;
Ok(())
}
}

View File

@@ -1,33 +1,26 @@
use std::collections::HashMap;
use std::fs::File;
use std::io::{BufReader, Write};
use std::num::NonZeroU64;
use std::path::{Path, PathBuf};
use std::io::Write;
use std::net::TcpStream;
use std::path::PathBuf;
use std::process::Command;
use std::time::Duration;
use std::{io, result, thread};
use anyhow::{bail, Context};
use nix::errno::Errno;
use anyhow::{anyhow, bail};
use nix::sys::signal::{kill, Signal};
use nix::unistd::Pid;
use pageserver_api::models::{
TenantConfigRequest, TenantCreateRequest, TenantInfo, TimelineCreateRequest, TimelineInfo,
};
use pageserver::http::models::{BranchCreateRequest, TenantCreateRequest};
use postgres::{Config, NoTls};
use reqwest::blocking::{Client, RequestBuilder, Response};
use reqwest::{IntoUrl, Method};
use thiserror::Error;
use utils::{
connstring::connection_address,
http::error::HttpErrorBody,
id::{TenantId, TimelineId},
lsn::Lsn,
postgres_backend::AuthType,
};
use zenith_utils::http::error::HttpErrorBody;
use zenith_utils::postgres_backend::AuthType;
use zenith_utils::zid::ZTenantId;
use crate::local_env::LocalEnv;
use crate::{fill_aws_secrets_vars, fill_rust_env_vars, read_pidfile};
use crate::read_pidfile;
use pageserver::branches::BranchInfo;
use zenith_utils::connstring::connection_address;
#[derive(Error, Debug)]
pub enum PageserverHttpError {
@@ -38,12 +31,6 @@ pub enum PageserverHttpError {
Response(String),
}
impl From<anyhow::Error> for PageserverHttpError {
fn from(e: anyhow::Error) -> Self {
Self::Response(e.to_string())
}
}
type Result<T> = result::Result<T, PageserverHttpError>;
pub trait ResponseErrorMessageExt: Sized {
@@ -57,7 +44,7 @@ impl ResponseErrorMessageExt for Response {
return Ok(self);
}
// reqwest does not export its error construction utility functions, so let's craft the message ourselves
// reqwest do not export it's error construction utility functions, so lets craft the message ourselves
let url = self.url().to_owned();
Err(PageserverHttpError::Response(
match self.json::<HttpErrorBody>() {
@@ -75,6 +62,7 @@ impl ResponseErrorMessageExt for Response {
//
#[derive(Debug)]
pub struct PageServerNode {
pub kill_on_exit: bool,
pub pg_connection_config: Config,
pub env: LocalEnv,
pub http_client: Client,
@@ -83,113 +71,67 @@ pub struct PageServerNode {
impl PageServerNode {
pub fn from_env(env: &LocalEnv) -> PageServerNode {
let password = if env.pageserver.auth_type == AuthType::NeonJWT {
&env.pageserver.auth_token
let password = if env.auth_type == AuthType::ZenithJWT {
&env.auth_token
} else {
""
};
Self {
PageServerNode {
kill_on_exit: false,
pg_connection_config: Self::pageserver_connection_config(
password,
&env.pageserver.listen_pg_addr,
env.pageserver_pg_port,
),
env: env.clone(),
http_client: Client::new(),
http_base_url: format!("http://{}/v1", env.pageserver.listen_http_addr),
http_base_url: format!("http://localhost:{}/v1", env.pageserver_http_port),
}
}
/// Construct libpq connection string for connecting to the pageserver.
fn pageserver_connection_config(password: &str, listen_addr: &str) -> Config {
format!("postgresql://no_user:{password}@{listen_addr}/no_db")
fn pageserver_connection_config(password: &str, port: u16) -> Config {
format!("postgresql://no_user:{}@localhost:{}/no_db", password, port)
.parse()
.unwrap()
}
pub fn initialize(
&self,
create_tenant: Option<TenantId>,
initial_timeline_id: Option<TimelineId>,
config_overrides: &[&str],
pg_version: u32,
) -> anyhow::Result<TimelineId> {
let id = format!("id={}", self.env.pageserver.id);
// FIXME: the paths should be shell-escaped to handle paths with spaces, quotas etc.
let pg_distrib_dir_param = format!(
"pg_distrib_dir='{}'",
self.env.pg_distrib_dir_raw().display()
);
pub fn init(&self, create_tenant: Option<&str>, enable_auth: bool) -> anyhow::Result<()> {
let mut cmd = Command::new(self.env.pageserver_bin()?);
let listen_pg = format!("localhost:{}", self.env.pageserver_pg_port);
let listen_http = format!("localhost:{}", self.env.pageserver_http_port);
let mut args = vec![
"--init",
"-D",
self.env.base_data_dir.to_str().unwrap(),
"--postgres-distrib",
self.env.pg_distrib_dir.to_str().unwrap(),
"--listen-pg",
&listen_pg,
"--listen-http",
&listen_http,
];
let authg_type_param = format!("auth_type='{}'", self.env.pageserver.auth_type);
let listen_http_addr_param = format!(
"listen_http_addr='{}'",
self.env.pageserver.listen_http_addr
);
let listen_pg_addr_param =
format!("listen_pg_addr='{}'", self.env.pageserver.listen_pg_addr);
let broker_endpoints_param = format!(
"broker_endpoints=[{}]",
self.env
.etcd_broker
.broker_endpoints
.iter()
.map(|url| format!("'{url}'"))
.collect::<Vec<_>>()
.join(",")
);
let broker_etcd_prefix_param = self
.env
.etcd_broker
.broker_etcd_prefix
.as_ref()
.map(|prefix| format!("broker_etcd_prefix='{prefix}'"));
let mut init_config_overrides = config_overrides.to_vec();
init_config_overrides.push(&id);
init_config_overrides.push(&pg_distrib_dir_param);
init_config_overrides.push(&authg_type_param);
init_config_overrides.push(&listen_http_addr_param);
init_config_overrides.push(&listen_pg_addr_param);
init_config_overrides.push(&broker_endpoints_param);
if let Some(broker_etcd_prefix_param) = broker_etcd_prefix_param.as_deref() {
init_config_overrides.push(broker_etcd_prefix_param);
if enable_auth {
args.extend(&["--auth-validation-public-key-path", "auth_public_key.pem"]);
args.extend(&["--auth-type", "ZenithJWT"]);
}
if self.env.pageserver.auth_type != AuthType::Trust {
init_config_overrides.push("auth_validation_public_key_path='auth_public_key.pem'");
if let Some(tenantid) = create_tenant {
args.extend(&["--create-tenant", tenantid])
}
self.start_node(&init_config_overrides, &self.env.base_data_dir, true)?;
let init_result = self
.try_init_timeline(create_tenant, initial_timeline_id, pg_version)
.context("Failed to create initial tenant and timeline for pageserver");
match &init_result {
Ok(initial_timeline_id) => {
println!("Successfully initialized timeline {initial_timeline_id}")
}
Err(e) => eprintln!("{e:#}"),
}
self.stop(false)?;
init_result
}
let status = cmd
.args(args)
.env_clear()
.env("RUST_BACKTRACE", "1")
.status()
.expect("pageserver init failed");
fn try_init_timeline(
&self,
new_tenant_id: Option<TenantId>,
new_timeline_id: Option<TimelineId>,
pg_version: u32,
) -> anyhow::Result<TimelineId> {
let initial_tenant_id = self.tenant_create(new_tenant_id, HashMap::new())?;
let initial_timeline_info = self.timeline_create(
initial_tenant_id,
new_timeline_id,
None,
None,
Some(pg_version),
)?;
Ok(initial_timeline_info.timeline_id)
if status.success() {
Ok(())
} else {
Err(anyhow!("pageserver init failed"))
}
}
pub fn repo_path(&self) -> PathBuf {
@@ -200,49 +142,24 @@ impl PageServerNode {
self.repo_path().join("pageserver.pid")
}
pub fn start(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
self.start_node(config_overrides, &self.repo_path(), false)
}
fn start_node(
&self,
config_overrides: &[&str],
datadir: &Path,
update_config: bool,
) -> anyhow::Result<()> {
println!(
pub fn start(&self) -> anyhow::Result<()> {
print!(
"Starting pageserver at '{}' in '{}'",
connection_address(&self.pg_connection_config),
datadir.display()
self.repo_path().display()
);
io::stdout().flush()?;
let mut args = vec![
"-D",
datadir.to_str().with_context(|| {
format!(
"Datadir path '{}' cannot be represented as a unicode string",
datadir.display()
)
})?,
];
if update_config {
args.push("--update-config");
}
for config_override in config_overrides {
args.extend(["-c", config_override]);
}
io::stdout().flush().unwrap();
let mut cmd = Command::new(self.env.pageserver_bin()?);
let mut filled_cmd = fill_rust_env_vars(cmd.args(&args).arg("--daemonize"));
filled_cmd = fill_aws_secrets_vars(filled_cmd);
cmd.args(&["-D", self.repo_path().to_str().unwrap()])
.arg("-d")
.env_clear()
.env("RUST_BACKTRACE", "1");
if !filled_cmd.status()?.success() {
if !cmd.status()?.success() {
bail!(
"Pageserver failed to start. See console output and '{}' for details.",
datadir.join("pageserver.log").display()
"Pageserver failed to start. See '{}' for details.",
self.repo_path().join("pageserver.log").display()
);
}
@@ -251,7 +168,7 @@ impl PageServerNode {
const RETRIES: i8 = 15;
for retries in 1..RETRIES {
match self.check_status() {
Ok(()) => {
Ok(_) => {
println!("\nPageserver started");
return Ok(());
}
@@ -265,87 +182,49 @@ impl PageServerNode {
if retries == 5 {
println!() // put a line break after dots for second message
}
println!("Pageserver not responding yet, err {err} retrying ({retries})...");
println!(
"Pageserver not responding yet, err {} retrying ({})...",
err, retries
);
}
}
PageserverHttpError::Response(msg) => {
bail!("pageserver failed to start: {msg} ")
bail!("pageserver failed to start: {} ", msg)
}
}
thread::sleep(Duration::from_secs(1));
}
}
}
bail!("pageserver failed to start in {RETRIES} seconds");
bail!("pageserver failed to start in {} seconds", RETRIES);
}
///
/// Stop the server.
///
/// If 'immediate' is true, we use SIGQUIT, killing the process immediately.
/// Otherwise we use SIGTERM, triggering a clean shutdown
///
/// If the server is not running, returns success
///
pub fn stop(&self, immediate: bool) -> anyhow::Result<()> {
let pid_file = self.pid_file();
if !pid_file.exists() {
println!("Pageserver is already stopped");
return Ok(());
pub fn stop(&self) -> anyhow::Result<()> {
let pid = read_pidfile(&self.pid_file())?;
let pid = Pid::from_raw(pid);
if kill(pid, Signal::SIGTERM).is_err() {
bail!("Failed to kill pageserver with pid {}", pid);
}
let pid = Pid::from_raw(read_pidfile(&pid_file)?);
let sig = if immediate {
print!("Stopping pageserver immediately..");
Signal::SIGQUIT
} else {
print!("Stopping pageserver gracefully..");
Signal::SIGTERM
};
io::stdout().flush().unwrap();
match kill(pid, sig) {
Ok(_) => (),
Err(Errno::ESRCH) => {
println!("Pageserver with pid {pid} does not exist, but a PID file was found");
// wait for pageserver stop
let address = connection_address(&self.pg_connection_config);
for _ in 0..5 {
let stream = TcpStream::connect(&address);
thread::sleep(Duration::from_secs(1));
if let Err(_e) = stream {
println!("Pageserver stopped");
return Ok(());
}
Err(err) => bail!(
"Failed to send signal to pageserver with pid {pid}: {}",
err.desc()
),
println!("Stopping pageserver on {}", address);
}
// Wait until process is gone
for i in 0..600 {
let signal = None; // Send no signal, just get the error code
match kill(pid, signal) {
Ok(_) => (), // Process exists, keep waiting
Err(Errno::ESRCH) => {
// Process not found, we're done
println!("done!");
return Ok(());
}
Err(err) => bail!(
"Failed to send signal to pageserver with pid {}: {}",
pid,
err.desc()
),
};
if i % 10 == 0 {
print!(".");
io::stdout().flush().unwrap();
}
thread::sleep(Duration::from_millis(100));
}
bail!("Failed to stop pageserver with pid {pid}");
bail!("Failed to stop pageserver with pid {}", pid);
}
pub fn page_server_psql(&self, sql: &str) -> Vec<postgres::SimpleQueryMessage> {
let mut client = self.pg_connection_config.connect(NoTls).unwrap();
println!("Pageserver query: '{sql}'");
println!("Pageserver query: '{}'", sql);
client.simple_query(sql).unwrap()
}
@@ -355,232 +234,87 @@ impl PageServerNode {
fn http_request<U: IntoUrl>(&self, method: Method, url: U) -> RequestBuilder {
let mut builder = self.http_client.request(method, url);
if self.env.pageserver.auth_type == AuthType::NeonJWT {
builder = builder.bearer_auth(&self.env.pageserver.auth_token)
if self.env.auth_type == AuthType::ZenithJWT {
builder = builder.bearer_auth(&self.env.auth_token)
}
builder
}
pub fn check_status(&self) -> Result<()> {
self.http_request(Method::GET, format!("{}/status", self.http_base_url))
self.http_request(Method::GET, format!("{}/{}", self.http_base_url, "status"))
.send()?
.error_from_body()?;
Ok(())
}
pub fn tenant_list(&self) -> Result<Vec<TenantInfo>> {
pub fn tenant_list(&self) -> Result<Vec<String>> {
Ok(self
.http_request(Method::GET, format!("{}/tenant", self.http_base_url))
.http_request(Method::GET, format!("{}/{}", self.http_base_url, "tenant"))
.send()?
.error_from_body()?
.json()?)
}
pub fn tenant_create(
&self,
new_tenant_id: Option<TenantId>,
settings: HashMap<&str, &str>,
) -> anyhow::Result<TenantId> {
let mut settings = settings.clone();
let request = TenantCreateRequest {
new_tenant_id,
checkpoint_distance: settings
.remove("checkpoint_distance")
.map(|x| x.parse::<u64>())
.transpose()?,
checkpoint_timeout: settings.remove("checkpoint_timeout").map(|x| x.to_string()),
compaction_target_size: settings
.remove("compaction_target_size")
.map(|x| x.parse::<u64>())
.transpose()?,
compaction_period: settings.remove("compaction_period").map(|x| x.to_string()),
compaction_threshold: settings
.remove("compaction_threshold")
.map(|x| x.parse::<usize>())
.transpose()?,
gc_horizon: settings
.remove("gc_horizon")
.map(|x| x.parse::<u64>())
.transpose()?,
gc_period: settings.remove("gc_period").map(|x| x.to_string()),
image_creation_threshold: settings
.remove("image_creation_threshold")
.map(|x| x.parse::<usize>())
.transpose()?,
pitr_interval: settings.remove("pitr_interval").map(|x| x.to_string()),
walreceiver_connect_timeout: settings
.remove("walreceiver_connect_timeout")
.map(|x| x.to_string()),
lagging_wal_timeout: settings
.remove("lagging_wal_timeout")
.map(|x| x.to_string()),
max_lsn_wal_lag: settings
.remove("max_lsn_wal_lag")
.map(|x| x.parse::<NonZeroU64>())
.transpose()
.context("Failed to parse 'max_lsn_wal_lag' as non zero integer")?,
};
if !settings.is_empty() {
bail!("Unrecognized tenant settings: {settings:?}")
}
self.http_request(Method::POST, format!("{}/tenant", self.http_base_url))
.json(&request)
pub fn tenant_create(&self, tenantid: ZTenantId) -> Result<()> {
Ok(self
.http_request(Method::POST, format!("{}/{}", self.http_base_url, "tenant"))
.json(&TenantCreateRequest {
tenant_id: tenantid,
})
.send()?
.error_from_body()?
.json::<Option<String>>()
.with_context(|| {
format!("Failed to parse tenant creation response for tenant id: {new_tenant_id:?}")
})?
.context("No tenant id was found in the tenant creation response")
.and_then(|tenant_id_string| {
tenant_id_string.parse().with_context(|| {
format!("Failed to parse response string as tenant id: '{tenant_id_string}'")
})
})
.json()?)
}
pub fn tenant_config(&self, tenant_id: TenantId, settings: HashMap<&str, &str>) -> Result<()> {
self.http_request(Method::PUT, format!("{}/tenant/config", self.http_base_url))
.json(&TenantConfigRequest {
tenant_id,
checkpoint_distance: settings
.get("checkpoint_distance")
.map(|x| x.parse::<u64>())
.transpose()
.context("Failed to parse 'checkpoint_distance' as an integer")?,
checkpoint_timeout: settings.get("checkpoint_timeout").map(|x| x.to_string()),
compaction_target_size: settings
.get("compaction_target_size")
.map(|x| x.parse::<u64>())
.transpose()
.context("Failed to parse 'compaction_target_size' as an integer")?,
compaction_period: settings.get("compaction_period").map(|x| x.to_string()),
compaction_threshold: settings
.get("compaction_threshold")
.map(|x| x.parse::<usize>())
.transpose()
.context("Failed to parse 'compaction_threshold' as an integer")?,
gc_horizon: settings
.get("gc_horizon")
.map(|x| x.parse::<u64>())
.transpose()
.context("Failed to parse 'gc_horizon' as an integer")?,
gc_period: settings.get("gc_period").map(|x| x.to_string()),
image_creation_threshold: settings
.get("image_creation_threshold")
.map(|x| x.parse::<usize>())
.transpose()
.context("Failed to parse 'image_creation_threshold' as non zero integer")?,
pitr_interval: settings.get("pitr_interval").map(|x| x.to_string()),
walreceiver_connect_timeout: settings
.get("walreceiver_connect_timeout")
.map(|x| x.to_string()),
lagging_wal_timeout: settings.get("lagging_wal_timeout").map(|x| x.to_string()),
max_lsn_wal_lag: settings
.get("max_lsn_wal_lag")
.map(|x| x.parse::<NonZeroU64>())
.transpose()
.context("Failed to parse 'max_lsn_wal_lag' as non zero integer")?,
})
.send()?
.error_from_body()?;
Ok(())
}
pub fn timeline_list(&self, tenant_id: &TenantId) -> anyhow::Result<Vec<TimelineInfo>> {
let timeline_infos: Vec<TimelineInfo> = self
pub fn branch_list(&self, tenantid: &ZTenantId) -> Result<Vec<BranchInfo>> {
Ok(self
.http_request(
Method::GET,
format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
format!("{}/branch/{}", self.http_base_url, tenantid),
)
.send()?
.error_from_body()?
.json()?;
Ok(timeline_infos)
.json()?)
}
pub fn timeline_create(
pub fn branch_create(
&self,
tenant_id: TenantId,
new_timeline_id: Option<TimelineId>,
ancestor_start_lsn: Option<Lsn>,
ancestor_timeline_id: Option<TimelineId>,
pg_version: Option<u32>,
) -> anyhow::Result<TimelineInfo> {
self.http_request(
Method::POST,
format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
)
.json(&TimelineCreateRequest {
new_timeline_id,
ancestor_start_lsn,
ancestor_timeline_id,
pg_version,
})
.send()?
.error_from_body()?
.json::<Option<TimelineInfo>>()
.with_context(|| {
format!("Failed to parse timeline creation response for tenant id: {tenant_id}")
})?
.with_context(|| {
format!(
"No timeline id was found in the timeline creation response for tenant {tenant_id}"
branch_name: &str,
startpoint: &str,
tenantid: &ZTenantId,
) -> Result<BranchInfo> {
Ok(self
.http_request(Method::POST, format!("{}/branch", self.http_base_url))
.json(&BranchCreateRequest {
tenant_id: tenantid.to_owned(),
name: branch_name.to_owned(),
start_point: startpoint.to_owned(),
})
.send()?
.error_from_body()?
.json()?)
}
pub fn branch_get_by_name(
&self,
tenantid: &ZTenantId,
branch_name: &str,
) -> Result<BranchInfo> {
Ok(self
.http_request(
Method::GET,
format!("{}/branch/{}/{}", self.http_base_url, tenantid, branch_name),
)
})
}
/// Import a basebackup prepared using either:
/// a) `pg_basebackup -F tar`, or
/// b) The `fullbackup` pageserver endpoint
///
/// # Arguments
/// * `tenant_id` - tenant to import into. Created if not exists
/// * `timeline_id` - id to assign to imported timeline
/// * `base` - (start lsn of basebackup, path to `base.tar` file)
/// * `pg_wal` - if there's any wal to import: (end lsn, path to `pg_wal.tar`)
pub fn timeline_import(
&self,
tenant_id: TenantId,
timeline_id: TimelineId,
base: (Lsn, PathBuf),
pg_wal: Option<(Lsn, PathBuf)>,
pg_version: u32,
) -> anyhow::Result<()> {
let mut client = self.pg_connection_config.connect(NoTls).unwrap();
// Init base reader
let (start_lsn, base_tarfile_path) = base;
let base_tarfile = File::open(base_tarfile_path)?;
let mut base_reader = BufReader::new(base_tarfile);
// Init wal reader if necessary
let (end_lsn, wal_reader) = if let Some((end_lsn, wal_tarfile_path)) = pg_wal {
let wal_tarfile = File::open(wal_tarfile_path)?;
let wal_reader = BufReader::new(wal_tarfile);
(end_lsn, Some(wal_reader))
} else {
(start_lsn, None)
};
// Import base
let import_cmd = format!(
"import basebackup {tenant_id} {timeline_id} {start_lsn} {end_lsn} {pg_version}"
);
let mut writer = client.copy_in(&import_cmd)?;
io::copy(&mut base_reader, &mut writer)?;
writer.finish()?;
// Import wal if necessary
if let Some(mut wal_reader) = wal_reader {
let import_cmd = format!("import wal {tenant_id} {timeline_id} {start_lsn} {end_lsn}");
let mut writer = client.copy_in(&import_cmd)?;
io::copy(&mut wal_reader, &mut writer)?;
writer.finish()?;
}
Ok(())
.send()?
.error_for_status()?
.json()?)
}
}
impl Drop for PageServerNode {
fn drop(&mut self) {
if self.kill_on_exit {
let _ = self.stop();
}
}
}

View File

@@ -1,48 +0,0 @@
#!/bin/bash
set -eux
PG_VERSION=${PG_VERSION:-14}
SPEC_FILE_ORG=/var/db/postgres/specs/spec.json
SPEC_FILE=/tmp/spec.json
echo "Waiting pageserver become ready."
while ! nc -z pageserver 6400; do
sleep 1;
done
echo "Page server is ready."
echo "Create a tenant and timeline"
PARAMS=(
-sb
-X POST
-H "Content-Type: application/json"
-d "{}"
http://pageserver:9898/v1/tenant/
)
tenant_id=$(curl "${PARAMS[@]}" | sed 's/"//g')
PARAMS=(
-sb
-X POST
-H "Content-Type: application/json"
-d "{\"tenant_id\":\"${tenant_id}\", \"pg_version\": ${PG_VERSION}}"
"http://pageserver:9898/v1/tenant/${tenant_id}/timeline/"
)
result=$(curl "${PARAMS[@]}")
echo $result | jq .
echo "Overwrite tenant id and timeline id in spec file"
tenant_id=$(echo ${result} | jq -r .tenant_id)
timeline_id=$(echo ${result} | jq -r .timeline_id)
sed "s/TENANT_ID/${tenant_id}/" ${SPEC_FILE_ORG} > ${SPEC_FILE}
sed -i "s/TIMELINE_ID/${timeline_id}/" ${SPEC_FILE}
cat ${SPEC_FILE}
echo "Start compute node"
/usr/local/bin/compute_ctl --pgdata /var/db/postgres/compute \
-C "postgresql://cloud_admin@localhost:55433/postgres" \
-b /usr/local/bin/postgres \
-S ${SPEC_FILE}

View File

@@ -1,141 +0,0 @@
{
"format_version": 1.0,
"timestamp": "2022-10-12T18:00:00.000Z",
"operation_uuid": "0f657b36-4b0f-4a2d-9c2e-1dcd615e7d8c",
"cluster": {
"cluster_id": "docker_compose",
"name": "docker_compose_test",
"state": "restarted",
"roles": [
{
"name": "cloud_admin",
"encrypted_password": "b093c0d3b281ba6da1eacc608620abd8",
"options": null
}
],
"databases": [
],
"settings": [
{
"name": "fsync",
"value": "off",
"vartype": "bool"
},
{
"name": "wal_level",
"value": "replica",
"vartype": "enum"
},
{
"name": "hot_standby",
"value": "on",
"vartype": "bool"
},
{
"name": "wal_log_hints",
"value": "on",
"vartype": "bool"
},
{
"name": "log_connections",
"value": "on",
"vartype": "bool"
},
{
"name": "port",
"value": "55433",
"vartype": "integer"
},
{
"name": "shared_buffers",
"value": "1MB",
"vartype": "string"
},
{
"name": "max_connections",
"value": "100",
"vartype": "integer"
},
{
"name": "listen_addresses",
"value": "0.0.0.0",
"vartype": "string"
},
{
"name": "max_wal_senders",
"value": "10",
"vartype": "integer"
},
{
"name": "max_replication_slots",
"value": "10",
"vartype": "integer"
},
{
"name": "wal_sender_timeout",
"value": "5s",
"vartype": "string"
},
{
"name": "wal_keep_size",
"value": "0",
"vartype": "integer"
},
{
"name": "password_encryption",
"value": "md5",
"vartype": "enum"
},
{
"name": "restart_after_crash",
"value": "off",
"vartype": "bool"
},
{
"name": "synchronous_standby_names",
"value": "walproposer",
"vartype": "string"
},
{
"name": "shared_preload_libraries",
"value": "neon",
"vartype": "string"
},
{
"name": "neon.safekeepers",
"value": "safekeeper1:5454,safekeeper2:5454,safekeeper3:5454",
"vartype": "string"
},
{
"name": "neon.timeline_id",
"value": "TIMELINE_ID",
"vartype": "string"
},
{
"name": "neon.tenant_id",
"value": "TENANT_ID",
"vartype": "string"
},
{
"name": "neon.pageserver_connstring",
"value": "host=pageserver port=6400",
"vartype": "string"
},
{
"name": "max_replication_write_lag",
"value": "500MB",
"vartype": "string"
},
{
"name": "max_replication_flush_lag",
"value": "10GB",
"vartype": "string"
}
]
},
"delta_operations": [
]
}

View File

@@ -1,200 +0,0 @@
version: '3'
services:
etcd:
image: quay.io/coreos/etcd:v3.5.4
ports:
- 2379:2379
- 2380:2380
environment:
# This signifficantly speeds up etcd and we anyway don't data persistency there.
ETCD_UNSAFE_NO_FSYNC: "1"
command:
- "etcd"
- "--auto-compaction-mode=revision"
- "--auto-compaction-retention=1"
- "--name=etcd-cluster"
- "--initial-cluster-state=new"
- "--initial-cluster-token=etcd-cluster-1"
- "--initial-cluster=etcd-cluster=http://etcd:2380"
- "--initial-advertise-peer-urls=http://etcd:2380"
- "--advertise-client-urls=http://etcd:2379"
- "--listen-client-urls=http://0.0.0.0:2379"
- "--listen-peer-urls=http://0.0.0.0:2380"
- "--quota-backend-bytes=134217728" # 128 MB
minio:
image: quay.io/minio/minio:RELEASE.2022-10-20T00-55-09Z
ports:
- 9000:9000
- 9001:9001
environment:
- MINIO_ROOT_USER=minio
- MINIO_ROOT_PASSWORD=password
command: server /data --address :9000 --console-address ":9001"
minio_create_buckets:
image: minio/mc
environment:
- MINIO_ROOT_USER=minio
- MINIO_ROOT_PASSWORD=password
entrypoint:
- "/bin/sh"
- "-c"
command:
- "until (/usr/bin/mc alias set minio http://minio:9000 $$MINIO_ROOT_USER $$MINIO_ROOT_PASSWORD) do
echo 'Waiting to start minio...' && sleep 1;
done;
/usr/bin/mc mb minio/neon --region=eu-north-1;
exit 0;"
depends_on:
- minio
pageserver:
image: neondatabase/neon:${TAG:-latest}
environment:
- BROKER_ENDPOINT='http://etcd:2379'
- AWS_ACCESS_KEY_ID=minio
- AWS_SECRET_ACCESS_KEY=password
#- RUST_BACKTRACE=1
ports:
#- 6400:6400 # pg protocol handler
- 9898:9898 # http endpoints
entrypoint:
- "/bin/sh"
- "-c"
command:
- "/usr/local/bin/pageserver -D /data/.neon/
-c \"broker_endpoints=[$$BROKER_ENDPOINT]\"
-c \"listen_pg_addr='0.0.0.0:6400'\"
-c \"listen_http_addr='0.0.0.0:9898'\"
-c \"remote_storage={endpoint='http://minio:9000',
bucket_name='neon',
bucket_region='eu-north-1',
prefix_in_bucket='/pageserver/'}\""
depends_on:
- etcd
- minio_create_buckets
safekeeper1:
image: neondatabase/neon:${TAG:-latest}
environment:
- SAFEKEEPER_ADVERTISE_URL=safekeeper1:5454
- SAFEKEEPER_ID=1
- BROKER_ENDPOINT=http://etcd:2379
- AWS_ACCESS_KEY_ID=minio
- AWS_SECRET_ACCESS_KEY=password
#- RUST_BACKTRACE=1
ports:
#- 5454:5454 # pg protocol handler
- 7676:7676 # http endpoints
entrypoint:
- "/bin/sh"
- "-c"
command:
- "safekeeper --listen-pg=$$SAFEKEEPER_ADVERTISE_URL
--listen-http='0.0.0.0:7676'
--id=$$SAFEKEEPER_ID
--broker-endpoints=$$BROKER_ENDPOINT
-D /data
--remote-storage=\"{endpoint='http://minio:9000',
bucket_name='neon',
bucket_region='eu-north-1',
prefix_in_bucket='/safekeeper/'}\""
depends_on:
- etcd
- minio_create_buckets
safekeeper2:
image: neondatabase/neon:${TAG:-latest}
environment:
- SAFEKEEPER_ADVERTISE_URL=safekeeper2:5454
- SAFEKEEPER_ID=2
- BROKER_ENDPOINT=http://etcd:2379
- AWS_ACCESS_KEY_ID=minio
- AWS_SECRET_ACCESS_KEY=password
#- RUST_BACKTRACE=1
ports:
#- 5454:5454 # pg protocol handler
- 7677:7676 # http endpoints
entrypoint:
- "/bin/sh"
- "-c"
command:
- "safekeeper --listen-pg=$$SAFEKEEPER_ADVERTISE_URL
--listen-http='0.0.0.0:7676'
--id=$$SAFEKEEPER_ID
--broker-endpoints=$$BROKER_ENDPOINT
-D /data
--remote-storage=\"{endpoint='http://minio:9000',
bucket_name='neon',
bucket_region='eu-north-1',
prefix_in_bucket='/safekeeper/'}\""
depends_on:
- etcd
- minio_create_buckets
safekeeper3:
image: neondatabase/neon:${TAG:-latest}
environment:
- SAFEKEEPER_ADVERTISE_URL=safekeeper3:5454
- SAFEKEEPER_ID=3
- BROKER_ENDPOINT=http://etcd:2379
- AWS_ACCESS_KEY_ID=minio
- AWS_SECRET_ACCESS_KEY=password
#- RUST_BACKTRACE=1
ports:
#- 5454:5454 # pg protocol handler
- 7678:7676 # http endpoints
entrypoint:
- "/bin/sh"
- "-c"
command:
- "safekeeper --listen-pg=$$SAFEKEEPER_ADVERTISE_URL
--listen-http='0.0.0.0:7676'
--id=$$SAFEKEEPER_ID
--broker-endpoints=$$BROKER_ENDPOINT
-D /data
--remote-storage=\"{endpoint='http://minio:9000',
bucket_name='neon',
bucket_region='eu-north-1',
prefix_in_bucket='/safekeeper/'}\""
depends_on:
- etcd
- minio_create_buckets
compute:
build:
context: ./image/compute
args:
- COMPUTE_IMAGE=compute-node-v${PG_VERSION:-14}:${TAG:-latest}
- http_proxy=$http_proxy
- https_proxy=$https_proxy
environment:
- PG_VERSION=${PG_VERSION:-14}
#- RUST_BACKTRACE=1
volumes:
- ./compute/var/db/postgres/specs/:/var/db/postgres/specs/
- ./compute/shell/:/shell/
ports:
- 55433:55433 # pg protocol handler
- 3080:3080 # http endpoints
entrypoint:
- "/shell/compute.sh"
depends_on:
- safekeeper1
- safekeeper2
- safekeeper3
- pageserver
compute_is_ready:
image: postgres:latest
entrypoint:
- "/bin/bash"
- "-c"
command:
- "until pg_isready -h compute -p 55433 ; do
echo 'Waiting to start compute...' && sleep 1;
done"
depends_on:
- compute

View File

@@ -1,10 +0,0 @@
ARG COMPUTE_IMAGE=compute-node-v14:latest
FROM neondatabase/${COMPUTE_IMAGE}
USER root
RUN apt-get update && \
apt-get install -y curl \
jq \
netcat
USER postgres

13
docker-entrypoint.sh Executable file
View File

@@ -0,0 +1,13 @@
#!/bin/sh
set -eux
if [ "$1" = 'pageserver' ]; then
if [ ! -d "/data/tenants" ]; then
echo "Initializing pageserver data directory"
pageserver --init -D /data --postgres-distrib /usr/local
fi
echo "Staring pageserver at 0.0.0.0:6400"
pageserver -l 0.0.0.0:6400 -D /data
else
"$@"
fi

Some files were not shown because too many files have changed in this diff Show More