Compare commits

..

1 Commits

Author SHA1 Message Date
Anastasia Lubennikova
02f8650111 Add feature flag RemoteExtensionsUseLatest to compute_ctl.
This will allow us to test new versions of extensions, without waiting for main branch commit
2024-02-08 01:40:05 +00:00
919 changed files with 45687 additions and 149951 deletions

View File

@@ -23,30 +23,10 @@ platforms = [
]
[final-excludes]
workspace-members = [
# vm_monitor benefits from the same Cargo.lock as the rest of our artifacts, but
# it is built primarly in separate repo neondatabase/autoscaling and thus is excluded
# from depending on workspace-hack because most of the dependencies are not used.
"vm_monitor",
# All of these exist in libs and are not usually built independently.
# Putting workspace hack there adds a bottleneck for cargo builds.
"compute_api",
"consumption_metrics",
"desim",
"metrics",
"pageserver_api",
"postgres_backend",
"postgres_connection",
"postgres_ffi",
"pq_proto",
"remote_storage",
"safekeeper_api",
"tenant_size_model",
"tracing-utils",
"utils",
"wal_craft",
"walproposer",
]
# vm_monitor benefits from the same Cargo.lock as the rest of our artifacts, but
# it is built primarly in separate repo neondatabase/autoscaling and thus is excluded
# from depending on workspace-hack because most of the dependencies are not used.
workspace-members = ["vm_monitor"]
# Write out exact versions rather than a semver range. (Defaults to false.)
# exact-versions = true

View File

@@ -1,2 +1,2 @@
[profile.default]
slow-timeout = { period = "60s", terminate-after = 3 }
slow-timeout = { period = "20s", terminate-after = 3 }

View File

@@ -8,24 +8,20 @@
!scripts/combine_control_files.py
!scripts/ninstall.sh
!vm-cgconfig.conf
!docker-compose/run-tests.sh
# Directories
!.cargo/
!.config/
!compute/
!compute_tools/
!control_plane/
!libs/
!neon_local/
!pageserver/
!patches/
!pgxn/
!proxy/
!storage_scrubber/
!s3_scrubber/
!safekeeper/
!storage_broker/
!storage_controller/
!trace/
!vendor/postgres-*/
!workspace_hack/

2
.gitattributes vendored
View File

@@ -1,2 +0,0 @@
# allows for nicer hunk headers with git show
*.rs diff=rust

View File

@@ -1,6 +0,0 @@
blank_issues_enabled: true
contact_links:
- name: Feature request
url: https://console.neon.tech/app/projects?modal=feedback
about: For feature requests in the Neon product, please submit via the feedback form on `https://console.neon.tech`

View File

@@ -16,9 +16,9 @@ assignees: ''
## Implementation ideas
## Tasks
```[tasklist]
- [ ] Example Task
### Tasks
```

View File

@@ -1,22 +1,14 @@
self-hosted-runner:
labels:
- arm64
- dev
- gen3
- large
- large-arm64
# Remove `macos-14` from the list after https://github.com/rhysd/actionlint/pull/392 is merged.
- macos-14
- small
- small-arm64
- us-east-2
config-variables:
- AZURE_DEV_CLIENT_ID
- AZURE_DEV_REGISTRY_NAME
- AZURE_DEV_SUBSCRIPTION_ID
- AZURE_PROD_CLIENT_ID
- AZURE_PROD_REGISTRY_NAME
- AZURE_PROD_SUBSCRIPTION_ID
- AZURE_TENANT_ID
- BENCHMARK_PROJECT_ID_PUB
- BENCHMARK_PROJECT_ID_SUB
- REMOTE_STORAGE_AZURE_CONTAINER
- REMOTE_STORAGE_AZURE_REGION
- SLACK_UPCOMING_RELEASE_CHANNEL_ID
- DEV_AWS_OIDC_ROLE_ARN

View File

@@ -39,7 +39,7 @@ runs:
PR_NUMBER=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true)
if [ "${PR_NUMBER}" != "null" ]; then
BRANCH_OR_PR=pr-${PR_NUMBER}
elif [ "${GITHUB_REF_NAME}" = "main" ] || [ "${GITHUB_REF_NAME}" = "release" ] || [ "${GITHUB_REF_NAME}" = "release-proxy" ]; then
elif [ "${GITHUB_REF_NAME}" = "main" ] || [ "${GITHUB_REF_NAME}" = "release" ]; then
# Shortcut for special branches
BRANCH_OR_PR=${GITHUB_REF_NAME}
else
@@ -59,7 +59,7 @@ runs:
BUCKET: neon-github-public-dev
# TODO: We can replace with a special docker image with Java and Allure pre-installed
- uses: actions/setup-java@v4
- uses: actions/setup-java@v3
with:
distribution: 'temurin'
java-version: '17'
@@ -76,8 +76,8 @@ runs:
rm -f ${ALLURE_ZIP}
fi
env:
ALLURE_VERSION: 2.27.0
ALLURE_ZIP_SHA256: b071858fb2fa542c65d8f152c5c40d26267b2dfb74df1f1608a589ecca38e777
ALLURE_VERSION: 2.24.0
ALLURE_ZIP_SHA256: 60b1d6ce65d9ef24b23cf9c2c19fd736a123487c38e54759f1ed1a7a77353c90
# Potentially we could have several running build for the same key (for example, for the main branch), so we use improvised lock for this
- name: Acquire lock
@@ -150,7 +150,7 @@ runs:
# Use aws s3 cp (instead of aws s3 sync) to keep files from previous runs to make old URLs work,
# and to keep files on the host to upload them to the database
time s5cmd --log error cp "${WORKDIR}/report/*" "s3://${BUCKET}/${REPORT_PREFIX}/${GITHUB_RUN_ID}/"
time aws s3 cp --recursive --only-show-errors "${WORKDIR}/report" "s3://${BUCKET}/${REPORT_PREFIX}/${GITHUB_RUN_ID}"
# Generate redirect
cat <<EOF > ${WORKDIR}/index.html
@@ -180,10 +180,10 @@ runs:
fi
- name: Cache poetry deps
uses: actions/cache@v4
uses: actions/cache@v3
with:
path: ~/.cache/pypoetry/virtualenvs
key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-${{ hashFiles('poetry.lock') }}
key: v2-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
- name: Store Allure test stat in the DB (new)
if: ${{ !cancelled() && inputs.store-test-results-into-db == 'true' }}
@@ -215,7 +215,7 @@ runs:
rm -rf ${WORKDIR}
fi
- uses: actions/github-script@v7
- uses: actions/github-script@v6
if: always()
env:
REPORT_URL: ${{ steps.generate-report.outputs.report-url }}

View File

@@ -19,7 +19,7 @@ runs:
PR_NUMBER=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true)
if [ "${PR_NUMBER}" != "null" ]; then
BRANCH_OR_PR=pr-${PR_NUMBER}
elif [ "${GITHUB_REF_NAME}" = "main" ] || [ "${GITHUB_REF_NAME}" = "release" ] || [ "${GITHUB_REF_NAME}" = "release-proxy" ]; then
elif [ "${GITHUB_REF_NAME}" = "main" ] || [ "${GITHUB_REF_NAME}" = "release" ]; then
# Shortcut for special branches
BRANCH_OR_PR=${GITHUB_REF_NAME}
else

View File

@@ -26,7 +26,7 @@ runs:
TARGET: ${{ inputs.path }}
ARCHIVE: /tmp/downloads/${{ inputs.name }}.tar.zst
SKIP_IF_DOES_NOT_EXIST: ${{ inputs.skip-if-does-not-exist }}
PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}/{2}', github.event.pull_request.head.sha || github.sha, github.run_id, github.run_attempt) }}
PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}', github.run_id, github.run_attempt) }}
run: |
BUCKET=neon-github-public-dev
FILENAME=$(basename $ARCHIVE)

View File

@@ -3,14 +3,14 @@ description: 'Create Branch using API'
inputs:
api_key:
description: 'Neon API key'
desctiption: 'Neon API key'
required: true
project_id:
description: 'ID of the Project to create Branch in'
desctiption: 'ID of the Project to create Branch in'
required: true
api_host:
description: 'Neon API host'
default: console-stage.neon.build
desctiption: 'Neon API host'
default: console.stage.neon.tech
outputs:
dsn:
description: 'Created Branch DSN (for main database)'

View File

@@ -3,17 +3,17 @@ description: 'Delete Branch using API'
inputs:
api_key:
description: 'Neon API key'
desctiption: 'Neon API key'
required: true
project_id:
description: 'ID of the Project which should be deleted'
desctiption: 'ID of the Project which should be deleted'
required: true
branch_id:
description: 'ID of the branch to delete'
desctiption: 'ID of the branch to delete'
required: true
api_host:
description: 'Neon API host'
default: console-stage.neon.build
desctiption: 'Neon API host'
default: console.stage.neon.tech
runs:
using: "composite"

View File

@@ -3,19 +3,22 @@ description: 'Create Neon Project using API'
inputs:
api_key:
description: 'Neon API key'
desctiption: 'Neon API key'
required: true
region_id:
description: 'Region ID, if not set the project will be created in the default region'
desctiption: 'Region ID, if not set the project will be created in the default region'
default: aws-us-east-2
postgres_version:
description: 'Postgres version; default is 16'
default: '16'
desctiption: 'Postgres version; default is 15'
default: 15
api_host:
description: 'Neon API host'
default: console-stage.neon.build
desctiption: 'Neon API host'
default: console.stage.neon.tech
provisioner:
desctiption: 'k8s-pod or k8s-neonvm'
default: 'k8s-pod'
compute_units:
description: '[Min, Max] compute units'
desctiption: '[Min, Max] compute units; Min and Max are used for k8s-neonvm with autoscaling, for k8s-pod values Min and Max should be equal'
default: '[1, 1]'
outputs:
@@ -34,6 +37,10 @@ runs:
# A shell without `set -x` to not to expose password/dsn in logs
shell: bash -euo pipefail {0}
run: |
if [ "${PROVISIONER}" == "k8s-pod" ] && [ "${MIN_CU}" != "${MAX_CU}" ]; then
echo >&2 "For k8s-pod provisioner MIN_CU should be equal to MAX_CU"
fi
project=$(curl \
"https://${API_HOST}/api/v2/projects" \
--fail \
@@ -45,7 +52,7 @@ runs:
\"name\": \"Created by actions/neon-project-create; GITHUB_RUN_ID=${GITHUB_RUN_ID}\",
\"pg_version\": ${POSTGRES_VERSION},
\"region_id\": \"${REGION_ID}\",
\"provisioner\": \"k8s-neonvm\",
\"provisioner\": \"${PROVISIONER}\",
\"autoscaling_limit_min_cu\": ${MIN_CU},
\"autoscaling_limit_max_cu\": ${MAX_CU},
\"settings\": { }
@@ -68,5 +75,6 @@ runs:
API_KEY: ${{ inputs.api_key }}
REGION_ID: ${{ inputs.region_id }}
POSTGRES_VERSION: ${{ inputs.postgres_version }}
PROVISIONER: ${{ inputs.provisioner }}
MIN_CU: ${{ fromJSON(inputs.compute_units)[0] }}
MAX_CU: ${{ fromJSON(inputs.compute_units)[1] }}

View File

@@ -3,14 +3,14 @@ description: 'Delete Neon Project using API'
inputs:
api_key:
description: 'Neon API key'
desctiption: 'Neon API key'
required: true
project_id:
description: 'ID of the Project to delete'
desctiption: 'ID of the Project to delete'
required: true
api_host:
description: 'Neon API host'
default: console-stage.neon.build
desctiption: 'Neon API host'
default: console.stage.neon.tech
runs:
using: "composite"

View File

@@ -43,7 +43,7 @@ inputs:
pg_version:
description: 'Postgres version to use for tests'
required: false
default: 'v16'
default: 'v14'
benchmark_durations:
description: 'benchmark durations JSON'
required: false
@@ -56,14 +56,14 @@ runs:
if: inputs.build_type != 'remote'
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact
name: neon-${{ runner.os }}-${{ inputs.build_type }}-artifact
path: /tmp/neon
- name: Download Neon binaries for the previous release
if: inputs.build_type != 'remote'
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact
name: neon-${{ runner.os }}-${{ inputs.build_type }}-artifact
path: /tmp/neon-previous
prefix: latest
@@ -71,7 +71,7 @@ runs:
if: inputs.build_type != 'remote'
uses: ./.github/actions/download
with:
name: compatibility-snapshot-${{ runner.arch }}-${{ inputs.build_type }}-pg${{ inputs.pg_version }}
name: compatibility-snapshot-${{ inputs.build_type }}-pg${{ inputs.pg_version }}
path: /tmp/compatibility_snapshot_pg${{ inputs.pg_version }}
prefix: latest
# The lack of compatibility snapshot (for example, for the new Postgres version)
@@ -80,15 +80,16 @@ runs:
- name: Checkout
if: inputs.needs_postgres_source == 'true'
uses: actions/checkout@v4
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 1
- name: Cache poetry deps
uses: actions/cache@v4
uses: actions/cache@v3
with:
path: ~/.cache/pypoetry/virtualenvs
key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-${{ hashFiles('poetry.lock') }}
key: v2-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
- name: Install Python deps
shell: bash -euxo pipefail {0}
@@ -113,8 +114,6 @@ runs:
export PLATFORM=${PLATFORM:-github-actions-selfhosted}
export POSTGRES_DISTRIB_DIR=${POSTGRES_DISTRIB_DIR:-/tmp/neon/pg_install}
export DEFAULT_PG_VERSION=${PG_VERSION#v}
export LD_LIBRARY_PATH=${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/lib
export BENCHMARK_CONNSTR=${BENCHMARK_CONNSTR:-}
if [ "${BUILD_TYPE}" = "remote" ]; then
export REMOTE_ENV=1
@@ -130,8 +129,8 @@ runs:
exit 1
fi
if [[ "${{ inputs.run_in_parallel }}" == "true" ]]; then
# -n sets the number of parallel processes that pytest-xdist will run
EXTRA_PARAMS="-n12 $EXTRA_PARAMS"
# -n16 uses sixteen processes to run tests via pytest-xdist
EXTRA_PARAMS="-n16 $EXTRA_PARAMS"
# --dist=loadgroup points tests marked with @pytest.mark.xdist_group
# to the same worker to make @pytest.mark.order work with xdist
@@ -169,28 +168,23 @@ runs:
EXTRA_PARAMS="--durations-path $TEST_OUTPUT/benchmark_durations.json $EXTRA_PARAMS"
fi
if [[ $BUILD_TYPE == "debug" && $RUNNER_ARCH == 'X64' ]]; then
if [[ "${{ inputs.build_type }}" == "debug" ]]; then
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
elif [[ "${{ inputs.build_type }}" == "release" ]]; then
cov_prefix=()
else
cov_prefix=()
fi
# Wake up the cluster if we use remote neon instance
if [ "${{ inputs.build_type }}" = "remote" ] && [ -n "${BENCHMARK_CONNSTR}" ]; then
QUERIES=("SELECT version()")
if [[ "${PLATFORM}" = "neon"* ]]; then
QUERIES+=("SHOW neon.tenant_id")
QUERIES+=("SHOW neon.timeline_id")
fi
for q in "${QUERIES[@]}"; do
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/psql ${BENCHMARK_CONNSTR} -c "${q}"
done
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/psql ${BENCHMARK_CONNSTR} -c "SELECT version();"
fi
# Run the tests.
#
# --alluredir saves test results in Allure format (in a specified directory)
# The junit.xml file allows CI tools to display more fine-grained test information
# in its "Tests" tab in the results page.
# --verbose prints name of each test (helpful when there are
# multiple tests in one file)
# -rA prints summary in the end
@@ -199,6 +193,7 @@ runs:
#
mkdir -p $TEST_OUTPUT/allure/results
"${cov_prefix[@]}" ./scripts/pytest \
--junitxml=$TEST_OUTPUT/junit.xml \
--alluredir=$TEST_OUTPUT/allure/results \
--tb=short \
--verbose \
@@ -211,13 +206,13 @@ runs:
fi
- name: Upload compatibility snapshot
# Note, that we use `github.base_ref` which is a target branch for a PR
if: github.event_name == 'pull_request' && github.base_ref == 'release'
if: github.ref_name == 'release'
uses: ./.github/actions/upload
with:
name: compatibility-snapshot-${{ runner.arch }}-${{ inputs.build_type }}-pg${{ inputs.pg_version }}
name: compatibility-snapshot-${{ inputs.build_type }}-pg${{ inputs.pg_version }}-${{ github.run_id }}
# Directory is created by test_compatibility.py::test_create_snapshot, keep the path in sync with the test
path: /tmp/test_output/compatibility_snapshot_pg${{ inputs.pg_version }}/
prefix: latest
- name: Upload test results
if: ${{ !cancelled() }}

View File

@@ -1,36 +0,0 @@
name: "Set custom docker config directory"
description: "Create a directory for docker config and set DOCKER_CONFIG"
# Use custom DOCKER_CONFIG directory to avoid conflicts with default settings
runs:
using: "composite"
steps:
- name: Show warning on GitHub-hosted runners
if: runner.environment == 'github-hosted'
shell: bash -euo pipefail {0}
run: |
# Using the following environment variables to find a path to the workflow file
# ${GITHUB_WORKFLOW_REF} - octocat/hello-world/.github/workflows/my-workflow.yml@refs/heads/my_branch
# ${GITHUB_REPOSITORY} - octocat/hello-world
# ${GITHUB_REF} - refs/heads/my_branch
# From https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/variables
filename_with_ref=${GITHUB_WORKFLOW_REF#"$GITHUB_REPOSITORY/"}
filename=${filename_with_ref%"@$GITHUB_REF"}
# https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#setting-a-warning-message
title='Unnecessary usage of `.github/actions/set-docker-config-dir`'
message='No need to use `.github/actions/set-docker-config-dir` action on GitHub-hosted runners'
echo "::warning file=${filename},title=${title}::${message}"
- uses: pyTooling/Actions/with-post-step@74afc5a42a17a046c90c68cb5cfa627e5c6c5b6b # v1.0.7
env:
DOCKER_CONFIG: .docker-custom-${{ github.run_id }}-${{ github.run_attempt }}
with:
main: |
mkdir -p "${DOCKER_CONFIG}"
echo DOCKER_CONFIG=${DOCKER_CONFIG} | tee -a $GITHUB_ENV
post: |
if [ -d "${DOCKER_CONFIG}" ]; then
rm -r "${DOCKER_CONFIG}"
fi

View File

@@ -8,7 +8,7 @@ inputs:
description: "A directory or file to upload"
required: true
prefix:
description: "S3 prefix. Default is '${GITHUB_SHA}/${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
description: "S3 prefix. Default is '${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
required: false
runs:
@@ -45,7 +45,7 @@ runs:
env:
SOURCE: ${{ inputs.path }}
ARCHIVE: /tmp/uploads/${{ inputs.name }}.tar.zst
PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}/{2}', github.event.pull_request.head.sha || github.sha, github.run_id , github.run_attempt) }}
PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}', github.run_id, github.run_attempt) }}
run: |
BUCKET=neon-github-public-dev
FILENAME=$(basename $ARCHIVE)

View File

@@ -1,168 +0,0 @@
name: Prepare benchmarking databases by restoring dumps
on:
workflow_call:
# no inputs needed
defaults:
run:
shell: bash -euxo pipefail {0}
jobs:
setup-databases:
permissions:
contents: write
statuses: write
id-token: write # aws-actions/configure-aws-credentials
strategy:
fail-fast: false
matrix:
platform: [ aws-rds-postgres, aws-aurora-serverless-v2-postgres, neon ]
database: [ clickbench, tpch, userexample ]
env:
LD_LIBRARY_PATH: /tmp/neon/pg_install/v16/lib
PLATFORM: ${{ matrix.platform }}
PG_BINARIES: /tmp/neon/pg_install/v16/bin
runs-on: [ self-hosted, us-east-2, x64 ]
container:
image: neondatabase/build-tools:pinned
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
options: --init
steps:
- name: Set up Connection String
id: set-up-prep-connstr
run: |
case "${PLATFORM}" in
neon)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }}
;;
aws-rds-postgres)
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }}
;;
aws-aurora-serverless-v2-postgres)
CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_CONNSTR }}
;;
*)
echo >&2 "Unknown PLATFORM=${PLATFORM}"
exit 1
;;
esac
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
- uses: actions/checkout@v4
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: eu-central-1
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
role-duration-seconds: 18000 # 5 hours
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
# we create a table that has one row for each database that we want to restore with the status whether the restore is done
- name: Create benchmark_restore_status table if it does not exist
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-prep-connstr.outputs.connstr }}
DATABASE_NAME: ${{ matrix.database }}
# to avoid a race condition of multiple jobs trying to create the table at the same time,
# we use an advisory lock
run: |
${PG_BINARIES}/psql "${{ env.BENCHMARK_CONNSTR }}" -c "
SELECT pg_advisory_lock(4711);
CREATE TABLE IF NOT EXISTS benchmark_restore_status (
databasename text primary key,
restore_done boolean
);
SELECT pg_advisory_unlock(4711);
"
- name: Check if restore is already done
id: check-restore-done
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-prep-connstr.outputs.connstr }}
DATABASE_NAME: ${{ matrix.database }}
run: |
skip=false
if ${PG_BINARIES}/psql "${{ env.BENCHMARK_CONNSTR }}" -tAc "SELECT 1 FROM benchmark_restore_status WHERE databasename='${{ env.DATABASE_NAME }}' AND restore_done=true;" | grep -q 1; then
echo "Restore already done for database ${{ env.DATABASE_NAME }} on platform ${{ env.PLATFORM }}. Skipping this database."
skip=true
fi
echo "skip=${skip}" | tee -a $GITHUB_OUTPUT
- name: Check and create database if it does not exist
if: steps.check-restore-done.outputs.skip != 'true'
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-prep-connstr.outputs.connstr }}
DATABASE_NAME: ${{ matrix.database }}
run: |
DB_EXISTS=$(${PG_BINARIES}/psql "${{ env.BENCHMARK_CONNSTR }}" -tAc "SELECT 1 FROM pg_database WHERE datname='${{ env.DATABASE_NAME }}'")
if [ "$DB_EXISTS" != "1" ]; then
echo "Database ${{ env.DATABASE_NAME }} does not exist. Creating it..."
${PG_BINARIES}/psql "${{ env.BENCHMARK_CONNSTR }}" -c "CREATE DATABASE \"${{ env.DATABASE_NAME }}\";"
else
echo "Database ${{ env.DATABASE_NAME }} already exists."
fi
- name: Download dump from S3 to /tmp/dumps
if: steps.check-restore-done.outputs.skip != 'true'
env:
DATABASE_NAME: ${{ matrix.database }}
run: |
mkdir -p /tmp/dumps
aws s3 cp s3://neon-github-dev/performance/pgdumps/$DATABASE_NAME/$DATABASE_NAME.pg_dump /tmp/dumps/
- name: Replace database name in connection string
if: steps.check-restore-done.outputs.skip != 'true'
id: replace-dbname
env:
DATABASE_NAME: ${{ matrix.database }}
BENCHMARK_CONNSTR: ${{ steps.set-up-prep-connstr.outputs.connstr }}
run: |
# Extract the part before the database name
base_connstr="${BENCHMARK_CONNSTR%/*}"
# Extract the query parameters (if any) after the database name
query_params="${BENCHMARK_CONNSTR#*\?}"
# Reconstruct the new connection string
if [ "$query_params" != "$BENCHMARK_CONNSTR" ]; then
new_connstr="${base_connstr}/${DATABASE_NAME}?${query_params}"
else
new_connstr="${base_connstr}/${DATABASE_NAME}"
fi
echo "database_connstr=${new_connstr}" >> $GITHUB_OUTPUT
- name: Restore dump
if: steps.check-restore-done.outputs.skip != 'true'
env:
DATABASE_NAME: ${{ matrix.database }}
DATABASE_CONNSTR: ${{ steps.replace-dbname.outputs.database_connstr }}
# the following works only with larger computes:
# PGOPTIONS: "-c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7"
# we add the || true because:
# the dumps were created with Neon and contain neon extensions that are not
# available in RDS, so we will always report an error, but we can ignore it
run: |
${PG_BINARIES}/pg_restore --clean --if-exists --no-owner --jobs=4 \
-d "${DATABASE_CONNSTR}" /tmp/dumps/${DATABASE_NAME}.pg_dump || true
- name: Update benchmark_restore_status table
if: steps.check-restore-done.outputs.skip != 'true'
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-prep-connstr.outputs.connstr }}
DATABASE_NAME: ${{ matrix.database }}
run: |
${PG_BINARIES}/psql "${{ env.BENCHMARK_CONNSTR }}" -c "
INSERT INTO benchmark_restore_status (databasename, restore_done) VALUES ('${{ env.DATABASE_NAME }}', true)
ON CONFLICT (databasename) DO UPDATE SET restore_done = true;
"

View File

@@ -1,324 +0,0 @@
name: Build and Test Locally
on:
workflow_call:
inputs:
arch:
description: 'x64 or arm64'
required: true
type: string
build-tag:
description: 'build tag'
required: true
type: string
build-tools-image:
description: 'build-tools image'
required: true
type: string
build-type:
description: 'debug or release'
required: true
type: string
pg-versions:
description: 'a json array of postgres versions to run regression tests on'
required: true
type: string
defaults:
run:
shell: bash -euxo pipefail {0}
env:
RUST_BACKTRACE: 1
COPT: '-Werror'
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
jobs:
build-neon:
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large')) }}
container:
image: ${{ inputs.build-tools-image }}
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
# Raise locked memory limit for tokio-epoll-uring.
# On 5.10 LTS kernels < 5.10.162 (and generally mainline kernels < 5.12),
# io_uring will account the memory of the CQ and SQ as locked.
# More details: https://github.com/neondatabase/neon/issues/6373#issuecomment-1905814391
options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
env:
BUILD_TYPE: ${{ inputs.build-type }}
GIT_VERSION: ${{ github.event.pull_request.head.sha || github.sha }}
BUILD_TAG: ${{ inputs.build-tag }}
steps:
- name: Fix git ownership
run: |
# Workaround for `fatal: detected dubious ownership in repository at ...`
#
# Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
# Ref https://github.com/actions/checkout/issues/785
#
git config --global --add safe.directory ${{ github.workspace }}
git config --global --add safe.directory ${GITHUB_WORKSPACE}
for r in 14 15 16 17; do
git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
done
- uses: actions/checkout@v4
with:
submodules: true
- name: Set pg 14 revision for caching
id: pg_v14_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
- name: Set pg 15 revision for caching
id: pg_v15_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
- name: Set pg 16 revision for caching
id: pg_v16_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
- name: Set pg 17 revision for caching
id: pg_v17_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) >> $GITHUB_OUTPUT
# Set some environment variables used by all the steps.
#
# CARGO_FLAGS is extra options to pass to "cargo build", "cargo test" etc.
# It also includes --features, if any
#
# CARGO_FEATURES is passed to "cargo metadata". It is separate from CARGO_FLAGS,
# because "cargo metadata" doesn't accept --release or --debug options
#
# We run tests with addtional features, that are turned off by default (e.g. in release builds), see
# corresponding Cargo.toml files for their descriptions.
- name: Set env variables
env:
ARCH: ${{ inputs.arch }}
run: |
CARGO_FEATURES="--features testing"
if [[ $BUILD_TYPE == "debug" && $ARCH == 'x64' ]]; then
cov_prefix="scripts/coverage --profraw-prefix=$GITHUB_JOB --dir=/tmp/coverage run"
CARGO_FLAGS="--locked"
elif [[ $BUILD_TYPE == "debug" ]]; then
cov_prefix=""
CARGO_FLAGS="--locked"
elif [[ $BUILD_TYPE == "release" ]]; then
cov_prefix=""
CARGO_FLAGS="--locked --release"
fi
{
echo "cov_prefix=${cov_prefix}"
echo "CARGO_FEATURES=${CARGO_FEATURES}"
echo "CARGO_FLAGS=${CARGO_FLAGS}"
echo "CARGO_HOME=${GITHUB_WORKSPACE}/.cargo"
} >> $GITHUB_ENV
- name: Cache postgres v14 build
id: cache_pg_14
uses: actions/cache@v4
with:
path: pg_install/v14
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
- name: Cache postgres v15 build
id: cache_pg_15
uses: actions/cache@v4
with:
path: pg_install/v15
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
- name: Cache postgres v16 build
id: cache_pg_16
uses: actions/cache@v4
with:
path: pg_install/v16
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
- name: Cache postgres v17 build
id: cache_pg_17
uses: actions/cache@v4
with:
path: pg_install/v17
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
- name: Build postgres v14
if: steps.cache_pg_14.outputs.cache-hit != 'true'
run: mold -run make postgres-v14 -j$(nproc)
- name: Build postgres v15
if: steps.cache_pg_15.outputs.cache-hit != 'true'
run: mold -run make postgres-v15 -j$(nproc)
- name: Build postgres v16
if: steps.cache_pg_16.outputs.cache-hit != 'true'
run: mold -run make postgres-v16 -j$(nproc)
- name: Build postgres v17
if: steps.cache_pg_17.outputs.cache-hit != 'true'
run: mold -run make postgres-v17 -j$(nproc)
- name: Build neon extensions
run: mold -run make neon-pg-ext -j$(nproc)
- name: Build walproposer-lib
run: mold -run make walproposer-lib -j$(nproc)
- name: Run cargo build
run: |
PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
export PQ_LIB_DIR
${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_FEATURES --bins --tests
# Do install *before* running rust tests because they might recompile the
# binaries with different features/flags.
- name: Install rust binaries
env:
ARCH: ${{ inputs.arch }}
run: |
# Install target binaries
mkdir -p /tmp/neon/bin/
binaries=$(
${cov_prefix} cargo metadata $CARGO_FEATURES --format-version=1 --no-deps |
jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
)
for bin in $binaries; do
SRC=target/$BUILD_TYPE/$bin
DST=/tmp/neon/bin/$bin
cp "$SRC" "$DST"
done
# Install test executables and write list of all binaries (for code coverage)
if [[ $BUILD_TYPE == "debug" && $ARCH == 'x64' ]]; then
# Keep bloated coverage data files away from the rest of the artifact
mkdir -p /tmp/coverage/
mkdir -p /tmp/neon/test_bin/
test_exe_paths=$(
${cov_prefix} cargo test $CARGO_FLAGS $CARGO_FEATURES --message-format=json --no-run |
jq -r '.executable | select(. != null)'
)
for bin in $test_exe_paths; do
SRC=$bin
DST=/tmp/neon/test_bin/$(basename $bin)
# We don't need debug symbols for code coverage, so strip them out to make
# the artifact smaller.
strip "$SRC" -o "$DST"
echo "$DST" >> /tmp/coverage/binaries.list
done
for bin in $binaries; do
echo "/tmp/neon/bin/$bin" >> /tmp/coverage/binaries.list
done
fi
- name: Run rust tests
env:
NEXTEST_RETRIES: 3
run: |
PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
export PQ_LIB_DIR
LD_LIBRARY_PATH=$(pwd)/pg_install/v17/lib
export LD_LIBRARY_PATH
#nextest does not yet support running doctests
${cov_prefix} cargo test --doc $CARGO_FLAGS $CARGO_FEATURES
# run all non-pageserver tests
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E '!package(pageserver)'
# run pageserver tests with different settings
for io_engine in std-fs tokio-epoll-uring ; do
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
done
# Run separate tests for real S3
export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests
export REMOTE_STORAGE_S3_REGION=eu-central-1
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_s3)'
# Run separate tests for real Azure Blob Storage
# XXX: replace region with `eu-central-1`-like region
export ENABLE_REAL_AZURE_REMOTE_STORAGE=y
export AZURE_STORAGE_ACCOUNT="${{ secrets.AZURE_STORAGE_ACCOUNT_DEV }}"
export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}"
export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_azure)'
- name: Install postgres binaries
run: |
# Use tar to copy files matching the pattern, preserving the paths in the destionation
tar c \
pg_install/v* \
pg_install/build/*/src/test/regress/*.so \
pg_install/build/*/src/test/regress/pg_regress \
pg_install/build/*/src/test/isolation/isolationtester \
pg_install/build/*/src/test/isolation/pg_isolation_regress \
| tar x -C /tmp/neon
- name: Upload Neon artifact
uses: ./.github/actions/upload
with:
name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-artifact
path: /tmp/neon
# XXX: keep this after the binaries.list is formed, so the coverage can properly work later
- name: Merge and upload coverage data
if: inputs.build-type == 'debug'
uses: ./.github/actions/save-coverage-data
regress-tests:
# Don't run regression tests on debug arm64 builds
if: inputs.build-type != 'debug' || inputs.arch != 'arm64'
needs: [ build-neon ]
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large')) }}
container:
image: ${{ inputs.build-tools-image }}
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
# for changed limits, see comments on `options:` earlier in this file
options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
strategy:
fail-fast: false
matrix:
pg_version: ${{ fromJson(inputs.pg-versions) }}
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Pytest regression tests
uses: ./.github/actions/run-python-test-set
timeout-minutes: 60
with:
build_type: ${{ inputs.build-type }}
test_selection: regress
needs_postgres_source: true
run_with_real_s3: true
real_s3_bucket: neon-github-ci-tests
real_s3_region: eu-central-1
rerun_flaky: true
pg_version: ${{ matrix.pg_version }}
env:
TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
BUILD_TAG: ${{ inputs.build-tag }}
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
# Temporary disable this step until we figure out why it's so flaky
# Ref https://github.com/neondatabase/neon/issues/4540
- name: Merge and upload coverage data
if: |
false &&
inputs.build-type == 'debug' && matrix.pg_version == 'v16'
uses: ./.github/actions/save-coverage-data

View File

@@ -1,56 +0,0 @@
name: Push images to ACR
on:
workflow_call:
inputs:
client_id:
description: Client ID of Azure managed identity or Entra app
required: true
type: string
image_tag:
description: Tag for the container image
required: true
type: string
images:
description: Images to push
required: true
type: string
registry_name:
description: Name of the container registry
required: true
type: string
subscription_id:
description: Azure subscription ID
required: true
type: string
tenant_id:
description: Azure tenant ID
required: true
type: string
jobs:
push-to-acr:
runs-on: ubuntu-22.04
permissions:
contents: read # This is required for actions/checkout
id-token: write # This is required for Azure Login to work.
steps:
- name: Azure login
uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a # @v2.1.1
with:
client-id: ${{ inputs.client_id }}
subscription-id: ${{ inputs.subscription_id }}
tenant-id: ${{ inputs.tenant_id }}
- name: Login to ACR
run: |
az acr login --name=${{ inputs.registry_name }}
- name: Copy docker images to ACR ${{ inputs.registry_name }}
run: |
images='${{ inputs.images }}'
for image in ${images}; do
docker buildx imagetools create \
-t ${{ inputs.registry_name }}.azurecr.io/neondatabase/${image}:${{ inputs.image_tag }} \
neondatabase/${image}:${{ inputs.image_tag }}
done

View File

@@ -16,15 +16,8 @@ concurrency:
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
jobs:
check-permissions:
if: ${{ !contains(github.event.pull_request.labels.*.name, 'run-no-ci') }}
uses: ./.github/workflows/check-permissions.yml
with:
github-event-name: ${{ github.event_name}}
actionlint:
needs: [ check-permissions ]
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: reviewdog/action-actionlint@v1
@@ -36,16 +29,3 @@ jobs:
fail_on_error: true
filter_mode: nofilter
level: error
- name: Disallow 'ubuntu-latest' runners
run: |
PAT='^\s*runs-on:.*-latest'
if grep -ERq $PAT .github/workflows; then
grep -ERl $PAT .github/workflows |\
while read -r f
do
l=$(grep -nE $PAT $f | awk -F: '{print $1}' | head -1)
echo "::error file=$f,line=$l::Please use 'ubuntu-22.04' instead of 'ubuntu-latest'"
done
exit 1
fi

View File

@@ -18,7 +18,6 @@ on:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
cancel-in-progress: false
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -44,7 +43,7 @@ jobs:
contains(fromJSON('["opened", "synchronize", "reopened", "closed"]'), github.event.action) &&
contains(github.event.pull_request.labels.*.name, 'approved-for-ci-run')
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
steps:
- run: gh pr --repo "${GITHUB_REPOSITORY}" edit "${PR_NUMBER}" --remove-label "approved-for-ci-run"
@@ -60,50 +59,24 @@ jobs:
github.event.action == 'labeled' &&
contains(github.event.pull_request.labels.*.name, 'approved-for-ci-run')
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
steps:
- run: gh pr --repo "${GITHUB_REPOSITORY}" edit "${PR_NUMBER}" --remove-label "approved-for-ci-run"
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
ref: main
token: ${{ secrets.CI_ACCESS_TOKEN }}
- name: Look for existing PR
id: get-pr
env:
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
run: |
ALREADY_CREATED="$(gh pr --repo ${GITHUB_REPOSITORY} list --head ${BRANCH} --base main --json number --jq '.[].number')"
echo "ALREADY_CREATED=${ALREADY_CREATED}" >> ${GITHUB_OUTPUT}
- name: Get changed labels
id: get-labels
if: steps.get-pr.outputs.ALREADY_CREATED != ''
env:
ALREADY_CREATED: ${{ steps.get-pr.outputs.ALREADY_CREATED }}
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
run: |
LABELS_TO_REMOVE=$(comm -23 <(gh pr --repo ${GITHUB_REPOSITORY} view ${ALREADY_CREATED} --json labels --jq '.labels.[].name'| ( grep -E '^run' || true ) | sort) \
<(gh pr --repo ${GITHUB_REPOSITORY} view ${PR_NUMBER} --json labels --jq '.labels.[].name' | ( grep -E '^run' || true ) | sort ) |\
( grep -v run-e2e-tests-in-draft || true ) | paste -sd , -)
LABELS_TO_ADD=$(comm -13 <(gh pr --repo ${GITHUB_REPOSITORY} view ${ALREADY_CREATED} --json labels --jq '.labels.[].name'| ( grep -E '^run' || true ) |sort) \
<(gh pr --repo ${GITHUB_REPOSITORY} view ${PR_NUMBER} --json labels --jq '.labels.[].name' | ( grep -E '^run' || true ) | sort ) |\
paste -sd , -)
echo "LABELS_TO_ADD=${LABELS_TO_ADD}" >> ${GITHUB_OUTPUT}
echo "LABELS_TO_REMOVE=${LABELS_TO_REMOVE}" >> ${GITHUB_OUTPUT}
- run: gh pr checkout "${PR_NUMBER}"
- run: git checkout -b "${BRANCH}"
- run: git push --force origin "${BRANCH}"
if: steps.get-pr.outputs.ALREADY_CREATED == ''
- name: Create a Pull Request for CI run (if required)
if: steps.get-pr.outputs.ALREADY_CREATED == ''
env:
env:
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
run: |
cat << EOF > body.md
@@ -114,33 +87,16 @@ jobs:
Feel free to review/comment/discuss the original PR #${PR_NUMBER}.
EOF
LABELS=$( (gh pr --repo "${GITHUB_REPOSITORY}" view ${PR_NUMBER} --json labels --jq '.labels.[].name'; echo run-e2e-tests-in-draft )| \
grep -E '^run' | paste -sd , -)
gh pr --repo "${GITHUB_REPOSITORY}" create --title "CI run for PR #${PR_NUMBER}" \
ALREADY_CREATED="$(gh pr --repo ${GITHUB_REPOSITORY} list --head ${BRANCH} --base main --json number --jq '.[].number')"
if [ -z "${ALREADY_CREATED}" ]; then
gh pr --repo "${GITHUB_REPOSITORY}" create --title "CI run for PR #${PR_NUMBER}" \
--body-file "body.md" \
--head "${BRANCH}" \
--base "main" \
--label ${LABELS} \
--label "run-e2e-tests-in-draft" \
--draft
- name: Modify the existing pull request (if required)
if: steps.get-pr.outputs.ALREADY_CREATED != ''
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
LABELS_TO_ADD: ${{ steps.get-labels.outputs.LABELS_TO_ADD }}
LABELS_TO_REMOVE: ${{ steps.get-labels.outputs.LABELS_TO_REMOVE }}
ALREADY_CREATED: ${{ steps.get-pr.outputs.ALREADY_CREATED }}
run: |
ADD_CMD=
REMOVE_CMD=
[ -z "${LABELS_TO_ADD}" ] || ADD_CMD="--add-label ${LABELS_TO_ADD}"
[ -z "${LABELS_TO_REMOVE}" ] || REMOVE_CMD="--remove-label ${LABELS_TO_REMOVE}"
if [ -n "${ADD_CMD}" ] || [ -n "${REMOVE_CMD}" ]; then
gh pr --repo "${GITHUB_REPOSITORY}" edit ${ALREADY_CREATED} ${ADD_CMD} ${REMOVE_CMD}
fi
- run: git push --force origin "${BRANCH}"
if: steps.get-pr.outputs.ALREADY_CREATED != ''
cleanup:
# Close PRs and delete branchs if the original PR is closed.
@@ -152,7 +108,7 @@ jobs:
github.event.action == 'closed' &&
github.event.pull_request.head.repo.full_name != github.repository
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
steps:
- name: Close PR and delete `ci-run/pr-${{ env.PR_NUMBER }}` branch

View File

@@ -12,6 +12,7 @@ on:
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
- cron: '0 3 * * *' # run once a day, timezone is utc
workflow_dispatch: # adds ability to run this manually
inputs:
region_id:
@@ -37,11 +38,6 @@ on:
description: 'AWS-RDS and AWS-AURORA normally only run on Saturday. Set this to true to run them on every workflow_dispatch'
required: false
default: false
run_only_pgvector_tests:
type: boolean
description: 'Run pgvector tests but no other tests. If not set, all tests including pgvector tests will be run'
required: false
default: false
defaults:
run:
@@ -54,55 +50,28 @@ concurrency:
jobs:
bench:
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
permissions:
contents: write
statuses: write
id-token: write # aws-actions/configure-aws-credentials
strategy:
fail-fast: false
matrix:
include:
- DEFAULT_PG_VERSION: 16
PLATFORM: "neon-staging"
region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
RUNNER: [ self-hosted, us-east-2, x64 ]
- DEFAULT_PG_VERSION: 16
PLATFORM: "azure-staging"
region_id: 'azure-eastus2'
RUNNER: [ self-hosted, eastus2, x64 ]
env:
TEST_PG_BENCH_DURATIONS_MATRIX: "300"
TEST_PG_BENCH_SCALES_MATRIX: "10,100"
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: ${{ matrix.DEFAULT_PG_VERSION }}
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
PLATFORM: ${{ matrix.PLATFORM }}
PLATFORM: "neon-staging"
runs-on: ${{ matrix.RUNNER }}
runs-on: [ self-hosted, us-east-2, x64 ]
container:
image: neondatabase/build-tools:pinned
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
steps:
- uses: actions/checkout@v4
- name: Configure AWS credentials # necessary on Azure runners
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: eu-central-1
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
role-duration-seconds: 18000 # 5 hours
- uses: actions/checkout@v3
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
name: neon-${{ runner.os }}-release-artifact
path: /tmp/neon/
prefix: latest
@@ -110,7 +79,7 @@ jobs:
id: create-neon-project
uses: ./.github/actions/neon-project-create
with:
region_id: ${{ matrix.region_id }}
region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
@@ -121,18 +90,10 @@ jobs:
test_selection: performance
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
pg_version: ${{ env.DEFAULT_PG_VERSION }}
# Set --sparse-ordering option of pytest-order plugin
# to ensure tests are running in order of appears in the file.
# It's important for test_perf_pgbench.py::test_pgbench_remote_* tests
extra_params:
-m remote_cluster
--sparse-ordering
--timeout 14400
--ignore test_runner/performance/test_perf_olap.py
--ignore test_runner/performance/test_perf_pgvector_queries.py
--ignore test_runner/performance/test_logical_replication.py
--ignore test_runner/performance/test_physical_replication.py
extra_params: -m remote_cluster --sparse-ordering --timeout 5400 --ignore test_runner/performance/test_perf_olap.py
env:
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -146,7 +107,6 @@ jobs:
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Create Allure report
id: create-allure-report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
@@ -155,119 +115,22 @@ jobs:
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: |
Periodic perf testing: ${{ job.status }}
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
replication-tests:
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
permissions:
contents: write
statuses: write
id-token: write # aws-actions/configure-aws-credentials
env:
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 16
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
PLATFORM: "neon-staging"
runs-on: [ self-hosted, us-east-2, x64 ]
container:
image: neondatabase/build-tools:pinned
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
options: --init
steps:
- uses: actions/checkout@v4
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: eu-central-1
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
role-duration-seconds: 18000 # 5 hours
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
- name: Run Logical Replication benchmarks
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance/test_logical_replication.py
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 5400
pg_version: ${{ env.DEFAULT_PG_VERSION }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
BENCHMARK_PROJECT_ID_PUB: ${{ vars.BENCHMARK_PROJECT_ID_PUB }}
BENCHMARK_PROJECT_ID_SUB: ${{ vars.BENCHMARK_PROJECT_ID_SUB }}
- name: Run Physical Replication benchmarks
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance/test_physical_replication.py
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 5400
pg_version: ${{ env.DEFAULT_PG_VERSION }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Create Allure report
id: create-allure-report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
with:
store-test-results-into-db: true
env:
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C06T9AMNDQQ" # on-call-compute-staging-stream
slack-message: |
Periodic replication testing: ${{ job.status }}
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
slack-message: "Periodic perf testing: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
generate-matrices:
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
# Create matrices for the benchmarking jobs, so we run benchmarks on rds only once a week (on Saturday)
#
# Available platforms:
# - neonvm-captest-new: Freshly created project (1 CU)
# - neonvm-captest-freetier: Use freetier-sized compute (0.25 CU)
# - neonvm-captest-azure-new: Freshly created project (1 CU) in azure region
# - neonvm-captest-azure-freetier: Use freetier-sized compute (0.25 CU) in azure region
# - neonvm-captest-reuse: Reusing existing project
# - neon-captest-new: Freshly created project (1 CU)
# - neon-captest-freetier: Use freetier-sized compute (0.25 CU)
# - neon-captest-reuse: Reusing existing project
# - rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
# - rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
env:
RUN_AWS_RDS_AND_AURORA: ${{ github.event.inputs.run_AWS_RDS_AND_AURORA || 'false' }}
DEFAULT_REGION_ID: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
outputs:
pgbench-compare-matrix: ${{ steps.pgbench-compare-matrix.outputs.matrix }}
olap-compare-matrix: ${{ steps.olap-compare-matrix.outputs.matrix }}
@@ -277,37 +140,22 @@ jobs:
- name: Generate matrix for pgbench benchmark
id: pgbench-compare-matrix
run: |
region_id_default=${{ env.DEFAULT_REGION_ID }}
runner_default='["self-hosted", "us-east-2", "x64"]'
runner_azure='["self-hosted", "eastus2", "x64"]'
image_default="neondatabase/build-tools:pinned"
matrix='{
"pg_version" : [
16
],
"region_id" : [
"'"$region_id_default"'"
],
"platform": [
"neonvm-captest-new",
"neonvm-captest-reuse",
"neon-captest-new",
"neon-captest-reuse",
"neonvm-captest-new"
],
"db_size": [ "10gb" ],
"runner": ['"$runner_default"'],
"image": [ "'"$image_default"'" ],
"include": [{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_default"', "image": "'"$image_default"'" },
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned" },
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "10gb","runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned" },
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "50gb","runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned" },
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-sharding-reuse", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" }]
"include": [{ "platform": "neon-captest-freetier", "db_size": "3gb" },
{ "platform": "neon-captest-new", "db_size": "50gb" },
{ "platform": "neonvm-captest-freetier", "db_size": "3gb" },
{ "platform": "neonvm-captest-new", "db_size": "50gb" }]
}'
if [ "$(date +%A)" = "Saturday" ] || [ ${RUN_AWS_RDS_AND_AURORA} = "true" ]; then
matrix=$(echo "$matrix" | jq '.include += [{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "rds-postgres", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "rds-aurora", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" }]')
if [ "$(date +%A)" = "Saturday" ]; then
matrix=$(echo "$matrix" | jq '.include += [{ "platform": "rds-postgres", "db_size": "10gb"},
{ "platform": "rds-aurora", "db_size": "50gb"}]')
fi
echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT
@@ -317,13 +165,13 @@ jobs:
run: |
matrix='{
"platform": [
"neonvm-captest-reuse"
"neon-captest-reuse"
]
}'
if [ "$(date +%A)" = "Saturday" ] || [ ${RUN_AWS_RDS_AND_AURORA} = "true" ]; then
matrix=$(echo "$matrix" | jq '.include += [{ "platform": "rds-postgres" },
{ "platform": "rds-aurora" }]')
{ "platform": "rds-aurora" }]')
fi
echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT
@@ -333,7 +181,7 @@ jobs:
run: |
matrix='{
"platform": [
"neonvm-captest-reuse"
"neon-captest-reuse"
],
"scale": [
"10"
@@ -342,22 +190,13 @@ jobs:
if [ "$(date +%A)" = "Saturday" ] || [ ${RUN_AWS_RDS_AND_AURORA} = "true" ]; then
matrix=$(echo "$matrix" | jq '.include += [{ "platform": "rds-postgres", "scale": "10" },
{ "platform": "rds-aurora", "scale": "10" }]')
{ "platform": "rds-aurora", "scale": "10" }]')
fi
echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT
prepare_AWS_RDS_databases:
uses: ./.github/workflows/_benchmarking_preparation.yml
secrets: inherit
pgbench-compare:
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
needs: [ generate-matrices, prepare_AWS_RDS_databases ]
permissions:
contents: write
statuses: write
id-token: write # aws-actions/configure-aws-credentials
needs: [ generate-matrices ]
strategy:
fail-fast: false
@@ -367,58 +206,54 @@ jobs:
TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
TEST_PG_BENCH_SCALES_MATRIX: ${{ matrix.db_size }}
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: ${{ matrix.pg_version }}
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
PLATFORM: ${{ matrix.platform }}
runs-on: ${{ matrix.runner }}
runs-on: [ self-hosted, us-east-2, x64 ]
container:
image: ${{ matrix.image }}
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
# Increase timeout to 8h, default timeout is 6h
timeout-minutes: 480
steps:
- uses: actions/checkout@v4
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: eu-central-1
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
role-duration-seconds: 18000 # 5 hours
- uses: actions/checkout@v3
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
name: neon-${{ runner.os }}-release-artifact
path: /tmp/neon/
prefix: latest
- name: Add Postgres binaries to PATH
run: |
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
- name: Create Neon Project
if: contains(fromJson('["neonvm-captest-new", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform)
if: contains(fromJson('["neon-captest-new", "neon-captest-freetier", "neonvm-captest-new", "neonvm-captest-freetier"]'), matrix.platform)
id: create-neon-project
uses: ./.github/actions/neon-project-create
with:
region_id: ${{ matrix.region_id }}
region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
compute_units: ${{ (contains(matrix.platform, 'captest-freetier') && '[0.25, 0.25]') || '[1, 1]' }}
compute_units: ${{ (matrix.platform == 'neon-captest-freetier' && '[0.25, 0.25]') || '[1, 1]' }}
provisioner: ${{ (contains(matrix.platform, 'neonvm-') && 'k8s-neonvm') || 'k8s-pod' }}
- name: Set up Connection String
id: set-up-connstr
run: |
case "${PLATFORM}" in
neonvm-captest-reuse)
neon-captest-reuse)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }}
;;
neonvm-captest-sharding-reuse)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_SHARDING_CONNSTR }}
;;
neonvm-captest-new | neonvm-captest-freetier | neonvm-azure-captest-new | neonvm-azure-captest-freetier)
neon-captest-new | neon-captest-freetier | neonvm-captest-new | neonvm-captest-freetier)
CONNSTR=${{ steps.create-neon-project.outputs.dsn }}
;;
rds-aurora)
@@ -435,6 +270,12 @@ jobs:
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
QUERY="SELECT version();"
if [[ "${PLATFORM}" = "neon"* ]]; then
QUERY="${QUERY} SHOW neon.tenant_id; SHOW neon.timeline_id;"
fi
psql ${CONNSTR} -c "${QUERY}"
- name: Benchmark init
uses: ./.github/actions/run-python-test-set
with:
@@ -443,7 +284,6 @@ jobs:
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init
pg_version: ${{ env.DEFAULT_PG_VERSION }}
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -457,7 +297,6 @@ jobs:
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update
pg_version: ${{ env.DEFAULT_PG_VERSION }}
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -471,7 +310,6 @@ jobs:
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only
pg_version: ${{ env.DEFAULT_PG_VERSION }}
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -485,7 +323,6 @@ jobs:
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Create Allure report
id: create-allure-report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
@@ -494,143 +331,7 @@ jobs:
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: |
Periodic perf testing on ${{ matrix.platform }}: ${{ job.status }}
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
pgbench-pgvector:
permissions:
contents: write
statuses: write
id-token: write # aws-actions/configure-aws-credentials
strategy:
fail-fast: false
matrix:
include:
- PLATFORM: "neonvm-captest-pgvector"
RUNNER: [ self-hosted, us-east-2, x64 ]
- PLATFORM: "azure-captest-pgvector"
RUNNER: [ self-hosted, eastus2, x64 ]
env:
TEST_PG_BENCH_DURATIONS_MATRIX: "15m"
TEST_PG_BENCH_SCALES_MATRIX: "1"
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 16
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
PLATFORM: ${{ matrix.PLATFORM }}
runs-on: ${{ matrix.RUNNER }}
container:
image: neondatabase/build-tools:pinned
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
options: --init
steps:
- uses: actions/checkout@v4
# until https://github.com/neondatabase/neon/issues/8275 is fixed we temporarily install postgresql-16
# instead of using Neon artifacts containing pgbench
- name: Install postgresql-16 where pytest expects it
run: |
# Just to make it easier to test things locally on macOS (with arm64)
arch=$(uname -m | sed 's/x86_64/amd64/g' | sed 's/aarch64/arm64/g')
cd /home/nonroot
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-17/libpq5_17.0-1.pgdg110+1_${arch}.deb"
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.4-1.pgdg110+2_${arch}.deb"
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.4-1.pgdg110+2_${arch}.deb"
dpkg -x libpq5_17.0-1.pgdg110+1_${arch}.deb pg
dpkg -x postgresql-16_16.4-1.pgdg110+2_${arch}.deb pg
dpkg -x postgresql-client-16_16.4-1.pgdg110+2_${arch}.deb pg
mkdir -p /tmp/neon/pg_install/v16/bin
ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/pgbench /tmp/neon/pg_install/v16/bin/pgbench
ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/psql /tmp/neon/pg_install/v16/bin/psql
ln -s /home/nonroot/pg/usr/lib/$(uname -m)-linux-gnu /tmp/neon/pg_install/v16/lib
LD_LIBRARY_PATH="/home/nonroot/pg/usr/lib/$(uname -m)-linux-gnu:${LD_LIBRARY_PATH:-}"
export LD_LIBRARY_PATH
echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" >> ${GITHUB_ENV}
/tmp/neon/pg_install/v16/bin/pgbench --version
/tmp/neon/pg_install/v16/bin/psql --version
- name: Set up Connection String
id: set-up-connstr
run: |
case "${PLATFORM}" in
neonvm-captest-pgvector)
CONNSTR=${{ secrets.BENCHMARK_PGVECTOR_CONNSTR }}
;;
azure-captest-pgvector)
CONNSTR=${{ secrets.BENCHMARK_PGVECTOR_CONNSTR_AZURE }}
;;
*)
echo >&2 "Unknown PLATFORM=${PLATFORM}"
exit 1
;;
esac
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: eu-central-1
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
role-duration-seconds: 18000 # 5 hours
- name: Benchmark pgvector hnsw indexing
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance/test_perf_olap.py
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgvector_indexing
pg_version: ${{ env.DEFAULT_PG_VERSION }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
- name: Benchmark pgvector queries
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance/test_perf_pgvector_queries.py
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600
pg_version: ${{ env.DEFAULT_PG_VERSION }}
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
- name: Create Allure report
id: create-allure-report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: |
Periodic perf testing on ${{ env.PLATFORM }}: ${{ job.status }}
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
slack-message: "Periodic perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
@@ -642,12 +343,8 @@ jobs:
#
# *_CLICKBENCH_CONNSTR: Genuine ClickBench DB with ~100M rows
# *_CLICKBENCH_10M_CONNSTR: DB with the first 10M rows of ClickBench DB
if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
permissions:
contents: write
statuses: write
id-token: write # aws-actions/configure-aws-credentials
needs: [ generate-matrices, pgbench-compare, prepare_AWS_RDS_databases ]
if: ${{ !cancelled() }}
needs: [ generate-matrices, pgbench-compare ]
strategy:
fail-fast: false
@@ -655,7 +352,7 @@ jobs:
env:
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 16
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
TEST_OLAP_COLLECT_EXPLAIN: ${{ github.event.inputs.collect_olap_explain }}
TEST_OLAP_COLLECT_PG_STAT_STATEMENTS: ${{ github.event.inputs.collect_pg_stat_statements }}
@@ -665,34 +362,29 @@ jobs:
runs-on: [ self-hosted, us-east-2, x64 ]
container:
image: neondatabase/build-tools:pinned
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
steps:
- uses: actions/checkout@v4
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: eu-central-1
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
role-duration-seconds: 18000 # 5 hours
- uses: actions/checkout@v3
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
name: neon-${{ runner.os }}-release-artifact
path: /tmp/neon/
prefix: latest
- name: Add Postgres binaries to PATH
run: |
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
- name: Set up Connection String
id: set-up-connstr
run: |
case "${PLATFORM}" in
neonvm-captest-reuse)
neon-captest-reuse)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_10M_CONNSTR }}
;;
rds-aurora)
@@ -702,13 +394,19 @@ jobs:
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CLICKBENCH_10M_CONNSTR }}
;;
*)
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neonvm-captest-reuse', 'rds-aurora', or 'rds-postgres'"
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
exit 1
;;
esac
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
QUERY="SELECT version();"
if [[ "${PLATFORM}" = "neon"* ]]; then
QUERY="${QUERY} SHOW neon.tenant_id; SHOW neon.timeline_id;"
fi
psql ${CONNSTR} -c "${QUERY}"
- name: ClickBench benchmark
uses: ./.github/actions/run-python-test-set
with:
@@ -717,7 +415,6 @@ jobs:
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_clickbench
pg_version: ${{ env.DEFAULT_PG_VERSION }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -727,7 +424,6 @@ jobs:
TEST_OLAP_SCALE: 10
- name: Create Allure report
id: create-allure-report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
@@ -736,10 +432,7 @@ jobs:
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: |
Periodic OLAP perf testing on ${{ matrix.platform }}: ${{ job.status }}
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
slack-message: "Periodic OLAP perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
@@ -750,12 +443,8 @@ jobs:
# We might change it after https://github.com/neondatabase/neon/issues/2900.
#
# *_TPCH_S10_CONNSTR: DB generated with scale factor 10 (~10 GB)
if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
permissions:
contents: write
statuses: write
id-token: write # aws-actions/configure-aws-credentials
needs: [ generate-matrices, clickbench-compare, prepare_AWS_RDS_databases ]
if: ${{ !cancelled() }}
needs: [ generate-matrices, clickbench-compare ]
strategy:
fail-fast: false
@@ -763,7 +452,7 @@ jobs:
env:
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 16
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
@@ -772,43 +461,38 @@ jobs:
runs-on: [ self-hosted, us-east-2, x64 ]
container:
image: neondatabase/build-tools:pinned
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
steps:
- uses: actions/checkout@v4
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: eu-central-1
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
role-duration-seconds: 18000 # 5 hours
- uses: actions/checkout@v3
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
name: neon-${{ runner.os }}-release-artifact
path: /tmp/neon/
prefix: latest
- name: Add Postgres binaries to PATH
run: |
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
- name: Get Connstring Secret Name
run: |
case "${PLATFORM}" in
neonvm-captest-reuse)
neon-captest-reuse)
ENV_PLATFORM=CAPTEST_TPCH
;;
rds-aurora)
ENV_PLATFORM=RDS_AURORA_TPCH
;;
rds-postgres)
ENV_PLATFORM=RDS_POSTGRES_TPCH
ENV_PLATFORM=RDS_AURORA_TPCH
;;
*)
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neonvm-captest-reuse', 'rds-aurora', or 'rds-postgres'"
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
exit 1
;;
esac
@@ -823,6 +507,12 @@ jobs:
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
QUERY="SELECT version();"
if [[ "${PLATFORM}" = "neon"* ]]; then
QUERY="${QUERY} SHOW neon.tenant_id; SHOW neon.timeline_id;"
fi
psql ${CONNSTR} -c "${QUERY}"
- name: Run TPC-H benchmark
uses: ./.github/actions/run-python-test-set
with:
@@ -831,7 +521,6 @@ jobs:
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_tpch
pg_version: ${{ env.DEFAULT_PG_VERSION }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -839,7 +528,6 @@ jobs:
TEST_OLAP_SCALE: ${{ matrix.scale }}
- name: Create Allure report
id: create-allure-report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
@@ -848,20 +536,13 @@ jobs:
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: |
Periodic TPC-H perf testing on ${{ matrix.platform }}: ${{ job.status }}
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
slack-message: "Periodic TPC-H perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
user-examples-compare:
if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
permissions:
contents: write
statuses: write
id-token: write # aws-actions/configure-aws-credentials
needs: [ generate-matrices, tpch-compare, prepare_AWS_RDS_databases ]
if: ${{ !cancelled() }}
needs: [ generate-matrices, tpch-compare ]
strategy:
fail-fast: false
@@ -869,7 +550,7 @@ jobs:
env:
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 16
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
@@ -877,34 +558,29 @@ jobs:
runs-on: [ self-hosted, us-east-2, x64 ]
container:
image: neondatabase/build-tools:pinned
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
steps:
- uses: actions/checkout@v4
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: eu-central-1
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
role-duration-seconds: 18000 # 5 hours
- uses: actions/checkout@v3
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
name: neon-${{ runner.os }}-release-artifact
path: /tmp/neon/
prefix: latest
- name: Add Postgres binaries to PATH
run: |
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
- name: Set up Connection String
id: set-up-connstr
run: |
case "${PLATFORM}" in
neonvm-captest-reuse)
neon-captest-reuse)
CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_CAPTEST_CONNSTR }}
;;
rds-aurora)
@@ -914,13 +590,19 @@ jobs:
CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_RDS_POSTGRES_CONNSTR }}
;;
*)
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neonvm-captest-reuse', 'rds-aurora', or 'rds-postgres'"
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
exit 1
;;
esac
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
QUERY="SELECT version();"
if [[ "${PLATFORM}" = "neon"* ]]; then
QUERY="${QUERY} SHOW neon.tenant_id; SHOW neon.timeline_id;"
fi
psql ${CONNSTR} -c "${QUERY}"
- name: Run user examples
uses: ./.github/actions/run-python-test-set
with:
@@ -929,14 +611,12 @@ jobs:
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_user_examples
pg_version: ${{ env.DEFAULT_PG_VERSION }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
- name: Create Allure report
id: create-allure-report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
@@ -945,10 +625,6 @@ jobs:
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: |
Periodic TPC-H perf testing on ${{ matrix.platform }}: ${{ job.status }}
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
slack-message: "Periodic User example perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

View File

@@ -1,103 +0,0 @@
name: Build build-tools image
on:
workflow_call:
inputs:
image-tag:
description: "build-tools image tag"
required: true
type: string
outputs:
image-tag:
description: "build-tools tag"
value: ${{ inputs.image-tag }}
image:
description: "build-tools image"
value: neondatabase/build-tools:${{ inputs.image-tag }}
defaults:
run:
shell: bash -euo pipefail {0}
concurrency:
group: build-build-tools-image-${{ inputs.image-tag }}
cancel-in-progress: false
# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
permissions: {}
jobs:
check-image:
uses: ./.github/workflows/check-build-tools-image.yml
build-image:
needs: [ check-image ]
if: needs.check-image.outputs.found == 'false'
strategy:
matrix:
arch: [ x64, arm64 ]
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
env:
IMAGE_TAG: ${{ inputs.image-tag }}
steps:
- name: Check `input.tag` is correct
env:
INPUTS_IMAGE_TAG: ${{ inputs.image-tag }}
CHECK_IMAGE_TAG : ${{ needs.check-image.outputs.image-tag }}
run: |
if [ "${INPUTS_IMAGE_TAG}" != "${CHECK_IMAGE_TAG}" ]; then
echo "'inputs.image-tag' (${INPUTS_IMAGE_TAG}) does not match the tag of the latest build-tools image 'inputs.image-tag' (${CHECK_IMAGE_TAG})"
exit 1
fi
- uses: actions/checkout@v4
- uses: ./.github/actions/set-docker-config-dir
- uses: docker/setup-buildx-action@v3
with:
cache-binary: false
- uses: docker/login-action@v3
with:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
- uses: docker/login-action@v3
with:
registry: cache.neon.build
username: ${{ secrets.NEON_CI_DOCKERCACHE_USERNAME }}
password: ${{ secrets.NEON_CI_DOCKERCACHE_PASSWORD }}
- uses: docker/build-push-action@v6
with:
context: .
provenance: false
push: true
pull: true
file: Dockerfile.build-tools
cache-from: type=registry,ref=cache.neon.build/build-tools:cache-${{ matrix.arch }}
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/build-tools:cache-{0},mode=max', matrix.arch) || '' }}
tags: neondatabase/build-tools:${{ inputs.image-tag }}-${{ matrix.arch }}
merge-images:
needs: [ build-image ]
runs-on: ubuntu-22.04
env:
IMAGE_TAG: ${{ inputs.image-tag }}
steps:
- uses: docker/login-action@v3
with:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
- name: Create multi-arch image
run: |
docker buildx imagetools create -t neondatabase/build-tools:${IMAGE_TAG} \
neondatabase/build-tools:${IMAGE_TAG}-x64 \
neondatabase/build-tools:${IMAGE_TAG}-arm64

View File

@@ -0,0 +1,124 @@
name: Build and Push Docker Image
on:
workflow_call:
inputs:
dockerfile-path:
required: true
type: string
image-name:
required: true
type: string
outputs:
build-tools-tag:
description: "tag generated for build tools"
value: ${{ jobs.tag.outputs.build-tools-tag }}
jobs:
check-if-build-tools-dockerfile-changed:
runs-on: ubuntu-latest
outputs:
docker_file_changed: ${{ steps.dockerfile.outputs.docker_file_changed }}
steps:
- name: Check if Dockerfile.buildtools has changed
id: dockerfile
run: |
if [[ "$GITHUB_EVENT_NAME" != "pull_request" ]]; then
echo "docker_file_changed=false" >> $GITHUB_OUTPUT
exit
fi
updated_files=$(gh pr --repo neondatabase/neon diff ${{ github.event.pull_request.number }} --name-only)
if [[ $updated_files == *"Dockerfile.buildtools"* ]]; then
echo "docker_file_changed=true" >> $GITHUB_OUTPUT
fi
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
tag:
runs-on: ubuntu-latest
needs: [ check-if-build-tools-dockerfile-changed ]
outputs:
build-tools-tag: ${{steps.buildtools-tag.outputs.image_tag}}
steps:
- name: Get buildtools tag
env:
DOCKERFILE_CHANGED: ${{ needs.check-if-build-tools-dockerfile-changed.outputs.docker_file_changed }}
run: |
if [[ "$GITHUB_EVENT_NAME" == "pull_request" ]] && [[ "${DOCKERFILE_CHANGED}" == "true" ]]; then
IMAGE_TAG=$GITHUB_RUN_ID
else
IMAGE_TAG=pinned
fi
echo "image_tag=${IMAGE_TAG}" >> $GITHUB_OUTPUT
shell: bash
id: buildtools-tag
kaniko:
if: needs.check-if-build-tools-dockerfile-changed.outputs.docker_file_changed == 'true'
needs: [ tag, check-if-build-tools-dockerfile-changed ]
runs-on: [ self-hosted, dev, x64 ]
container: gcr.io/kaniko-project/executor:v1.7.0-debug
steps:
- name: Checkout
uses: actions/checkout@v1
- name: Configure ECR login
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
- name: Kaniko build
run: |
/kaniko/executor \
--reproducible \
--snapshotMode=redo \
--skip-unused-stages \
--dockerfile ${{ inputs.dockerfile-path }} \
--cache=true \
--cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache \
--destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/${{ inputs.image-name }}:${{ needs.tag.outputs.build-tools-tag }}-amd64
kaniko-arm:
if: needs.check-if-build-tools-dockerfile-changed.outputs.docker_file_changed == 'true'
needs: [ tag, check-if-build-tools-dockerfile-changed ]
runs-on: [ self-hosted, dev, arm64 ]
container: gcr.io/kaniko-project/executor:v1.7.0-debug
steps:
- name: Checkout
uses: actions/checkout@v1
- name: Configure ECR login
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
- name: Kaniko build
run: |
/kaniko/executor \
--reproducible \
--snapshotMode=redo \
--skip-unused-stages \
--dockerfile ${{ inputs.dockerfile-path }} \
--cache=true \
--cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache \
--destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/${{ inputs.image-name }}:${{ needs.tag.outputs.build-tools-tag }}-arm64
manifest:
if: needs.check-if-build-tools-dockerfile-changed.outputs.docker_file_changed == 'true'
name: 'manifest'
runs-on: [ self-hosted, dev, x64 ]
needs:
- tag
- kaniko
- kaniko-arm
- check-if-build-tools-dockerfile-changed
steps:
- name: Create manifest
run: |
docker manifest create 369495373322.dkr.ecr.eu-central-1.amazonaws.com/${{ inputs.image-name }}:${{ needs.tag.outputs.build-tools-tag }} \
--amend 369495373322.dkr.ecr.eu-central-1.amazonaws.com/${{ inputs.image-name }}:${{ needs.tag.outputs.build-tools-tag }}-amd64 \
--amend 369495373322.dkr.ecr.eu-central-1.amazonaws.com/${{ inputs.image-name }}:${{ needs.tag.outputs.build-tools-tag }}-arm64
- name: Push manifest
run: docker manifest push 369495373322.dkr.ecr.eu-central-1.amazonaws.com/${{ inputs.image-name }}:${{ needs.tag.outputs.build-tools-tag }}

File diff suppressed because it is too large Load Diff

View File

@@ -1,51 +0,0 @@
name: Check build-tools image
on:
workflow_call:
outputs:
image-tag:
description: "build-tools image tag"
value: ${{ jobs.check-image.outputs.tag }}
found:
description: "Whether the image is found in the registry"
value: ${{ jobs.check-image.outputs.found }}
defaults:
run:
shell: bash -euo pipefail {0}
# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
permissions: {}
jobs:
check-image:
runs-on: ubuntu-22.04
outputs:
tag: ${{ steps.get-build-tools-tag.outputs.image-tag }}
found: ${{ steps.check-image.outputs.found }}
steps:
- uses: actions/checkout@v4
- name: Get build-tools image tag for the current commit
id: get-build-tools-tag
env:
IMAGE_TAG: |
${{ hashFiles('Dockerfile.build-tools',
'.github/workflows/check-build-tools-image.yml',
'.github/workflows/build-build-tools-image.yml') }}
run: |
echo "image-tag=${IMAGE_TAG}" | tee -a $GITHUB_OUTPUT
- name: Check if such tag found in the registry
id: check-image
env:
IMAGE_TAG: ${{ steps.get-build-tools-tag.outputs.image-tag }}
run: |
if docker manifest inspect neondatabase/build-tools:${IMAGE_TAG}; then
found=true
else
found=false
fi
echo "found=${found}" | tee -a $GITHUB_OUTPUT

View File

@@ -1,36 +0,0 @@
name: Check Permissions
on:
workflow_call:
inputs:
github-event-name:
required: true
type: string
defaults:
run:
shell: bash -euo pipefail {0}
# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
permissions: {}
jobs:
check-permissions:
runs-on: ubuntu-22.04
steps:
- name: Disallow CI runs on PRs from forks
if: |
inputs.github-event-name == 'pull_request' &&
github.event.pull_request.head.repo.full_name != github.repository
run: |
if [ "${{ contains(fromJSON('["OWNER", "MEMBER", "COLLABORATOR"]'), github.event.pull_request.author_association) }}" = "true" ]; then
MESSAGE="Please create a PR from a branch of ${GITHUB_REPOSITORY} instead of a fork"
else
MESSAGE="The PR should be reviewed and labelled with 'approved-for-ci-run' to trigger a CI run"
fi
# TODO: use actions/github-script to post this message as a PR comment
echo >&2 "We don't run CI for PRs from forks"
echo >&2 "${MESSAGE}"
exit 1

View File

@@ -1,32 +0,0 @@
# A workflow from
# https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#force-deleting-cache-entries
name: cleanup caches by a branch
on:
pull_request:
types:
- closed
jobs:
cleanup:
runs-on: ubuntu-22.04
steps:
- name: Cleanup
run: |
gh extension install actions/gh-actions-cache
echo "Fetching list of cache key"
cacheKeysForPR=$(gh actions-cache list -R $REPO -B $BRANCH -L 100 | cut -f 1 )
## Setting this to not fail the workflow while deleting cache keys.
set +e
echo "Deleting caches..."
for cacheKey in $cacheKeysForPR
do
gh actions-cache delete $cacheKey -R $REPO -B $BRANCH --confirm
done
echo "Done"
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO: ${{ github.repository }}
BRANCH: refs/pull/${{ github.event.pull_request.number }}/merge

View File

@@ -1,102 +0,0 @@
name: Cloud Regression Test
on:
schedule:
# * is a special character in YAML so you have to quote this string
# ┌───────────── minute (0 - 59)
# │ ┌───────────── hour (0 - 23)
# │ │ ┌───────────── day of the month (1 - 31)
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
- cron: '45 1 * * *' # run once a day, timezone is utc
workflow_dispatch: # adds ability to run this manually
defaults:
run:
shell: bash -euxo pipefail {0}
concurrency:
# Allow only one workflow
group: ${{ github.workflow }}
cancel-in-progress: true
jobs:
regress:
env:
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 16
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
runs-on: us-east-2
container:
image: neondatabase/build-tools:pinned
options: --init
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Patch the test
run: |
cd "vendor/postgres-v${DEFAULT_PG_VERSION}"
patch -p1 < "../../compute/patches/cloud_regress_pg${DEFAULT_PG_VERSION}.patch"
- name: Generate a random password
id: pwgen
run: |
set +x
DBPASS=$(dd if=/dev/random bs=48 count=1 2>/dev/null | base64)
echo "::add-mask::${DBPASS//\//}"
echo DBPASS="${DBPASS//\//}" >> "${GITHUB_OUTPUT}"
- name: Change tests according to the generated password
env:
DBPASS: ${{ steps.pwgen.outputs.DBPASS }}
run: |
cd vendor/postgres-v"${DEFAULT_PG_VERSION}"/src/test/regress
for fname in sql/*.sql expected/*.out; do
sed -i.bak s/NEON_PASSWORD_PLACEHOLDER/"'${DBPASS}'"/ "${fname}"
done
for ph in $(grep NEON_MD5_PLACEHOLDER expected/password.out | awk '{print $3;}' | sort | uniq); do
USER=$(echo "${ph}" | cut -c 22-)
MD5=md5$(echo -n "${DBPASS}${USER}" | md5sum | awk '{print $1;}')
sed -i.bak "s/${ph}/${MD5}/" expected/password.out
done
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
- name: Run the regression tests
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: cloud_regress
pg_version: ${{ env.DEFAULT_PG_VERSION }}
extra_params: -m remote_cluster
env:
BENCHMARK_CONNSTR: ${{ secrets.PG_REGRESS_CONNSTR }}
- name: Create Allure report
id: create-allure-report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # on-call-staging-stream
slack-message: |
Periodic pg_regress on staging: ${{ job.status }}
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

View File

@@ -1,78 +0,0 @@
name: Add `external` label to issues and PRs created by external users
on:
issues:
types:
- opened
pull_request_target:
types:
- opened
workflow_dispatch:
inputs:
github-actor:
description: 'GitHub username. If empty, the username of the current user will be used'
required: false
# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
permissions: {}
env:
LABEL: external
jobs:
check-user:
runs-on: ubuntu-22.04
outputs:
is-member: ${{ steps.check-user.outputs.is-member }}
steps:
- name: Check whether `${{ github.actor }}` is a member of `${{ github.repository_owner }}`
id: check-user
env:
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
ACTOR: ${{ inputs.github-actor || github.actor }}
run: |
expected_error="User does not exist or is not a member of the organization"
output_file=output.txt
for i in $(seq 1 10); do
if gh api "/orgs/${GITHUB_REPOSITORY_OWNER}/members/${ACTOR}" \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" > ${output_file}; then
is_member=true
break
elif grep -q "${expected_error}" ${output_file}; then
is_member=false
break
elif [ $i -eq 10 ]; then
title="Failed to get memmbership status for ${ACTOR}"
message="The latest GitHub API error message: '$(cat ${output_file})'"
echo "::error file=.github/workflows/label-for-external-users.yml,title=${title}::${message}"
exit 1
fi
sleep 1
done
echo "is-member=${is_member}" | tee -a ${GITHUB_OUTPUT}
add-label:
if: needs.check-user.outputs.is-member == 'false'
needs: [ check-user ]
runs-on: ubuntu-22.04
permissions:
pull-requests: write # for `gh pr edit`
issues: write # for `gh issue edit`
steps:
- name: Add `${{ env.LABEL }}` label
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
ITEM_NUMBER: ${{ github.event[github.event_name == 'pull_request_target' && 'pull_request' || 'issue'].number }}
GH_CLI_COMMAND: ${{ github.event_name == 'pull_request_target' && 'pr' || 'issue' }}
run: |
gh ${GH_CLI_COMMAND} --repo ${GITHUB_REPOSITORY} edit --add-label=${LABEL} ${ITEM_NUMBER}

View File

@@ -20,25 +20,7 @@ env:
COPT: '-Werror'
jobs:
check-permissions:
if: ${{ !contains(github.event.pull_request.labels.*.name, 'run-no-ci') }}
uses: ./.github/workflows/check-permissions.yml
with:
github-event-name: ${{ github.event_name}}
check-build-tools-image:
needs: [ check-permissions ]
uses: ./.github/workflows/check-build-tools-image.yml
build-build-tools-image:
needs: [ check-build-tools-image ]
uses: ./.github/workflows/build-build-tools-image.yml
with:
image-tag: ${{ needs.check-build-tools-image.outputs.image-tag }}
secrets: inherit
check-macos-build:
needs: [ check-permissions ]
if: |
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
@@ -56,6 +38,7 @@ jobs:
uses: actions/checkout@v4
with:
submodules: true
fetch-depth: 1
- name: Install macOS postgres dependencies
run: brew install flex bison openssl protobuf icu4c pkg-config
@@ -72,45 +55,34 @@ jobs:
id: pg_v16_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
- name: Set pg 17 revision for caching
id: pg_v17_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) >> $GITHUB_OUTPUT
- name: Cache postgres v14 build
id: cache_pg_14
uses: actions/cache@v4
uses: actions/cache@v3
with:
path: pg_install/v14
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Cache postgres v15 build
id: cache_pg_15
uses: actions/cache@v4
uses: actions/cache@v3
with:
path: pg_install/v15
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Cache postgres v16 build
id: cache_pg_16
uses: actions/cache@v4
uses: actions/cache@v3
with:
path: pg_install/v16
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Cache postgres v17 build
id: cache_pg_17
uses: actions/cache@v4
with:
path: pg_install/v17
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Set extra env for macOS
run: |
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
- name: Cache cargo deps
uses: actions/cache@v4
uses: actions/cache@v3
with:
path: |
~/.cargo/registry
@@ -131,10 +103,6 @@ jobs:
if: steps.cache_pg_16.outputs.cache-hit != 'true'
run: make postgres-v16 -j$(sysctl -n hw.ncpu)
- name: Build postgres v17
if: steps.cache_pg_17.outputs.cache-hit != 'true'
run: make postgres-v17 -j$(sysctl -n hw.ncpu)
- name: Build neon extensions
run: make neon-pg-ext -j$(sysctl -n hw.ncpu)
@@ -147,22 +115,211 @@ jobs:
- name: Check that no warnings are produced
run: ./run_clippy.sh
check-linux-arm-build:
timeout-minutes: 90
runs-on: [ self-hosted, dev, arm64 ]
env:
# Use release build only, to have less debug info around
# Hence keeping target/ (and general cache size) smaller
BUILD_TYPE: release
CARGO_FEATURES: --features testing
CARGO_FLAGS: --release
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
options: --init
steps:
- name: Fix git ownership
run: |
# Workaround for `fatal: detected dubious ownership in repository at ...`
#
# Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
# Ref https://github.com/actions/checkout/issues/785
#
git config --global --add safe.directory ${{ github.workspace }}
git config --global --add safe.directory ${GITHUB_WORKSPACE}
for r in 14 15 16; do
git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
done
- name: Checkout
uses: actions/checkout@v4
with:
submodules: true
fetch-depth: 1
- name: Set pg 14 revision for caching
id: pg_v14_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
- name: Set pg 15 revision for caching
id: pg_v15_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
- name: Set pg 16 revision for caching
id: pg_v16_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
- name: Set env variables
run: |
echo "CARGO_HOME=${GITHUB_WORKSPACE}/.cargo" >> $GITHUB_ENV
- name: Cache postgres v14 build
id: cache_pg_14
uses: actions/cache@v3
with:
path: pg_install/v14
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Cache postgres v15 build
id: cache_pg_15
uses: actions/cache@v3
with:
path: pg_install/v15
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Cache postgres v16 build
id: cache_pg_16
uses: actions/cache@v3
with:
path: pg_install/v16
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Build postgres v14
if: steps.cache_pg_14.outputs.cache-hit != 'true'
run: mold -run make postgres-v14 -j$(nproc)
- name: Build postgres v15
if: steps.cache_pg_15.outputs.cache-hit != 'true'
run: mold -run make postgres-v15 -j$(nproc)
- name: Build postgres v16
if: steps.cache_pg_16.outputs.cache-hit != 'true'
run: mold -run make postgres-v16 -j$(nproc)
- name: Build neon extensions
run: mold -run make neon-pg-ext -j$(nproc)
- name: Build walproposer-lib
run: mold -run make walproposer-lib -j$(nproc)
- name: Run cargo build
run: |
mold -run cargo build --locked $CARGO_FLAGS $CARGO_FEATURES --bins --tests
- name: Run cargo test
env:
NEXTEST_RETRIES: 3
run: |
cargo nextest run $CARGO_FEATURES
# Run separate tests for real S3
export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests
export REMOTE_STORAGE_S3_REGION=eu-central-1
# Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
cargo nextest run --package remote_storage --test test_real_s3
# Run separate tests for real Azure Blob Storage
# XXX: replace region with `eu-central-1`-like region
export ENABLE_REAL_AZURE_REMOTE_STORAGE=y
export AZURE_STORAGE_ACCOUNT="${{ secrets.AZURE_STORAGE_ACCOUNT_DEV }}"
export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}"
export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
# Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
cargo nextest run --package remote_storage --test test_real_azure
check-codestyle-rust-arm:
timeout-minutes: 90
runs-on: [ self-hosted, dev, arm64 ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
steps:
- name: Fix git ownership
run: |
# Workaround for `fatal: detected dubious ownership in repository at ...`
#
# Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
# Ref https://github.com/actions/checkout/issues/785
#
git config --global --add safe.directory ${{ github.workspace }}
git config --global --add safe.directory ${GITHUB_WORKSPACE}
for r in 14 15 16; do
git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
done
- name: Checkout
uses: actions/checkout@v4
with:
submodules: true
fetch-depth: 1
# Some of our rust modules use FFI and need those to be checked
- name: Get postgres headers
run: make postgres-headers -j$(nproc)
# cargo hack runs the given cargo subcommand (clippy in this case) for all feature combinations.
# This will catch compiler & clippy warnings in all feature combinations.
# TODO: use cargo hack for build and test as well, but, that's quite expensive.
# NB: keep clippy args in sync with ./run_clippy.sh
- run: |
CLIPPY_COMMON_ARGS="$( source .neon_clippy_args; echo "$CLIPPY_COMMON_ARGS")"
if [ "$CLIPPY_COMMON_ARGS" = "" ]; then
echo "No clippy args found in .neon_clippy_args"
exit 1
fi
echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV
- name: Run cargo clippy (debug)
run: cargo hack --feature-powerset clippy $CLIPPY_COMMON_ARGS
- name: Run cargo clippy (release)
run: cargo hack --feature-powerset clippy --release $CLIPPY_COMMON_ARGS
- name: Check documentation generation
run: cargo doc --workspace --no-deps --document-private-items
env:
RUSTDOCFLAGS: "-Dwarnings -Arustdoc::private_intra_doc_links"
# Use `${{ !cancelled() }}` to run quck tests after the longer clippy run
- name: Check formatting
if: ${{ !cancelled() }}
run: cargo fmt --all -- --check
# https://github.com/facebookincubator/cargo-guppy/tree/bec4e0eb29dcd1faac70b1b5360267fc02bf830e/tools/cargo-hakari#2-keep-the-workspace-hack-up-to-date-in-ci
- name: Check rust dependencies
if: ${{ !cancelled() }}
run: |
cargo hakari generate --diff # workspace-hack Cargo.toml is up-to-date
cargo hakari manage-deps --dry-run # all workspace crates depend on workspace-hack
# https://github.com/EmbarkStudios/cargo-deny
- name: Check rust licenses/bans/advisories/sources
if: ${{ !cancelled() }}
run: cargo deny check
gather-rust-build-stats:
needs: [ check-permissions, build-build-tools-image ]
if: |
contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats') ||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
github.ref_name == 'main'
runs-on: [ self-hosted, large ]
runs-on: [ self-hosted, gen3, large ]
container:
image: ${{ needs.build-build-tools-image.outputs.image }}
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
env:
BUILD_TYPE: release
# remove the cachepot wrapper and build without crate caches
RUSTC_WRAPPER: ""
# build with incremental compilation produce partial results
# so do not attempt to cache this build, also disable the incremental compilation
CARGO_INCREMENTAL: 0
@@ -172,6 +329,7 @@ jobs:
uses: actions/checkout@v4
with:
submodules: true
fetch-depth: 1
# Some of our rust modules use FFI and need those to be checked
- name: Get postgres headers
@@ -181,7 +339,7 @@ jobs:
run: make walproposer-lib -j$(nproc)
- name: Produce the build stats
run: PQ_LIB_DIR=$(pwd)/pg_install/v17/lib cargo build --all --release --timings -j$(nproc)
run: cargo build --all --release --timings
- name: Upload the build stats
id: upload-stats
@@ -196,7 +354,7 @@ jobs:
echo "report-url=${REPORT_URL}" >> $GITHUB_OUTPUT
- name: Publish build stats report
uses: actions/github-script@v7
uses: actions/github-script@v6
env:
REPORT_URL: ${{ steps.upload-stats.outputs.report-url }}
SHA: ${{ github.event.pull_request.head.sha || github.sha }}

View File

@@ -1,155 +0,0 @@
name: Periodic pagebench performance test on dedicated EC2 machine in eu-central-1 region
on:
schedule:
# * is a special character in YAML so you have to quote this string
# ┌───────────── minute (0 - 59)
# │ ┌───────────── hour (0 - 23)
# │ │ ┌───────────── day of the month (1 - 31)
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
- cron: '0 18 * * *' # Runs at 6 PM UTC every day
workflow_dispatch: # Allows manual triggering of the workflow
inputs:
commit_hash:
type: string
description: 'The long neon repo commit hash for the system under test (pageserver) to be tested.'
required: false
default: ''
defaults:
run:
shell: bash -euo pipefail {0}
concurrency:
group: ${{ github.workflow }}
cancel-in-progress: false
jobs:
trigger_bench_on_ec2_machine_in_eu_central_1:
runs-on: [ self-hosted, small ]
container:
image: neondatabase/build-tools:pinned
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
options: --init
timeout-minutes: 360 # Set the timeout to 6 hours
env:
API_KEY: ${{ secrets.PERIODIC_PAGEBENCH_EC2_RUNNER_API_KEY }}
RUN_ID: ${{ github.run_id }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_EC2_US_TEST_RUNNER_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY : ${{ secrets.AWS_EC2_US_TEST_RUNNER_ACCESS_KEY_SECRET }}
AWS_DEFAULT_REGION : "eu-central-1"
AWS_INSTANCE_ID : "i-02a59a3bf86bc7e74"
steps:
# we don't need the neon source code because we run everything remotely
# however we still need the local github actions to run the allure step below
- uses: actions/checkout@v4
- name: Show my own (github runner) external IP address - usefull for IP allowlisting
run: curl https://ifconfig.me
- name: Start EC2 instance and wait for the instance to boot up
run: |
aws ec2 start-instances --instance-ids $AWS_INSTANCE_ID
aws ec2 wait instance-running --instance-ids $AWS_INSTANCE_ID
sleep 60 # sleep some time to allow cloudinit and our API server to start up
- name: Determine public IP of the EC2 instance and set env variable EC2_MACHINE_URL_US
run: |
public_ip=$(aws ec2 describe-instances --instance-ids $AWS_INSTANCE_ID --query 'Reservations[*].Instances[*].PublicIpAddress' --output text)
echo "Public IP of the EC2 instance: $public_ip"
echo "EC2_MACHINE_URL_US=https://${public_ip}:8443" >> $GITHUB_ENV
- name: Determine commit hash
env:
INPUT_COMMIT_HASH: ${{ github.event.inputs.commit_hash }}
run: |
if [ -z "$INPUT_COMMIT_HASH" ]; then
echo "COMMIT_HASH=$(curl -s https://api.github.com/repos/neondatabase/neon/commits/main | jq -r '.sha')" >> $GITHUB_ENV
else
echo "COMMIT_HASH=$INPUT_COMMIT_HASH" >> $GITHUB_ENV
fi
- name: Start Bench with run_id
run: |
curl -k -X 'POST' \
"${EC2_MACHINE_URL_US}/start_test/${GITHUB_RUN_ID}" \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-H "Authorization: Bearer $API_KEY" \
-d "{\"neonRepoCommitHash\": \"${COMMIT_HASH}\"}"
- name: Poll Test Status
id: poll_step
run: |
status=""
while [[ "$status" != "failure" && "$status" != "success" ]]; do
response=$(curl -k -X 'GET' \
"${EC2_MACHINE_URL_US}/test_status/${GITHUB_RUN_ID}" \
-H 'accept: application/json' \
-H "Authorization: Bearer $API_KEY")
echo "Response: $response"
set +x
status=$(echo $response | jq -r '.status')
echo "Test status: $status"
if [[ "$status" == "failure" ]]; then
echo "Test failed"
exit 1 # Fail the job step if status is failure
elif [[ "$status" == "success" || "$status" == "null" ]]; then
break
elif [[ "$status" == "too_many_runs" ]]; then
echo "Too many runs already running"
echo "too_many_runs=true" >> "$GITHUB_OUTPUT"
exit 1
fi
sleep 60 # Poll every 60 seconds
done
- name: Retrieve Test Logs
if: always() && steps.poll_step.outputs.too_many_runs != 'true'
run: |
curl -k -X 'GET' \
"${EC2_MACHINE_URL_US}/test_log/${GITHUB_RUN_ID}" \
-H 'accept: application/gzip' \
-H "Authorization: Bearer $API_KEY" \
--output "test_log_${GITHUB_RUN_ID}.gz"
- name: Unzip Test Log and Print it into this job's log
if: always() && steps.poll_step.outputs.too_many_runs != 'true'
run: |
gzip -d "test_log_${GITHUB_RUN_ID}.gz"
cat "test_log_${GITHUB_RUN_ID}"
- name: Create Allure report
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: "Periodic pagebench testing on dedicated hardware: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
- name: Cleanup Test Resources
if: always()
run: |
curl -k -X 'POST' \
"${EC2_MACHINE_URL_US}/cleanup_test/${GITHUB_RUN_ID}" \
-H 'accept: application/json' \
-H "Authorization: Bearer $API_KEY" \
-d ''
- name: Stop EC2 instance and wait for the instance to be stopped
if: always() && steps.poll_step.outputs.too_many_runs != 'true'
run: |
aws ec2 stop-instances --instance-ids $AWS_INSTANCE_ID
aws ec2 wait instance-stopped --instance-ids $AWS_INSTANCE_ID

View File

@@ -1,211 +0,0 @@
name: Test Postgres client libraries
on:
schedule:
# * is a special character in YAML so you have to quote this string
# ┌───────────── minute (0 - 59)
# │ ┌───────────── hour (0 - 23)
# │ │ ┌───────────── day of the month (1 - 31)
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
- cron: '23 02 * * *' # run once a day, timezone is utc
pull_request:
paths:
- '.github/workflows/pg-clients.yml'
- 'test_runner/pg_clients/**'
- 'test_runner/logical_repl/**'
- 'poetry.lock'
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref_name }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
defaults:
run:
shell: bash -euxo pipefail {0}
env:
DEFAULT_PG_VERSION: 16
PLATFORM: neon-captest-new
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
AWS_DEFAULT_REGION: eu-central-1
jobs:
check-permissions:
if: ${{ !contains(github.event.pull_request.labels.*.name, 'run-no-ci') }}
uses: ./.github/workflows/check-permissions.yml
with:
github-event-name: ${{ github.event_name }}
check-build-tools-image:
needs: [ check-permissions ]
uses: ./.github/workflows/check-build-tools-image.yml
build-build-tools-image:
needs: [ check-build-tools-image ]
uses: ./.github/workflows/build-build-tools-image.yml
with:
image-tag: ${{ needs.check-build-tools-image.outputs.image-tag }}
secrets: inherit
test-logical-replication:
needs: [ build-build-tools-image ]
runs-on: ubuntu-22.04
container:
image: ${{ needs.build-build-tools-image.outputs.image }}
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
options: --init --user root
services:
clickhouse:
image: clickhouse/clickhouse-server:24.6.3.64
ports:
- 9000:9000
- 8123:8123
zookeeper:
image: quay.io/debezium/zookeeper:2.7
ports:
- 2181:2181
kafka:
image: quay.io/debezium/kafka:2.7
env:
ZOOKEEPER_CONNECT: "zookeeper:2181"
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092
KAFKA_BROKER_ID: 1
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_JMX_PORT: 9991
ports:
- 9092:9092
debezium:
image: quay.io/debezium/connect:2.7
env:
BOOTSTRAP_SERVERS: kafka:9092
GROUP_ID: 1
CONFIG_STORAGE_TOPIC: debezium-config
OFFSET_STORAGE_TOPIC: debezium-offset
STATUS_STORAGE_TOPIC: debezium-status
DEBEZIUM_CONFIG_CONNECTOR_CLASS: io.debezium.connector.postgresql.PostgresConnector
ports:
- 8083:8083
steps:
- uses: actions/checkout@v4
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
- name: Create Neon Project
id: create-neon-project
uses: ./.github/actions/neon-project-create
with:
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
- name: Run tests
uses: ./.github/actions/run-python-test-set
with:
build_type: remote
test_selection: logical_repl
run_in_parallel: false
extra_params: -m remote_cluster
pg_version: ${{ env.DEFAULT_PG_VERSION }}
env:
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
- name: Delete Neon Project
if: always()
uses: ./.github/actions/neon-project-delete
with:
project_id: ${{ steps.create-neon-project.outputs.project_id }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Create Allure report
if: ${{ !cancelled() }}
id: create-allure-report
uses: ./.github/actions/allure-report-generate
with:
store-test-results-into-db: true
env:
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
- name: Post to a Slack channel
if: github.event.schedule && failure()
uses: slackapi/slack-github-action@v1
with:
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
slack-message: |
Testing the logical replication: <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|${{ job.status }}> (<${{ steps.create-allure-report.outputs.report-url }}|test report>)
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
test-postgres-client-libs:
needs: [ build-build-tools-image ]
runs-on: ubuntu-22.04
container:
image: ${{ needs.build-build-tools-image.outputs.image }}
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
options: --init --user root
steps:
- uses: actions/checkout@v4
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
- name: Create Neon Project
id: create-neon-project
uses: ./.github/actions/neon-project-create
with:
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
- name: Run tests
uses: ./.github/actions/run-python-test-set
with:
build_type: remote
test_selection: pg_clients
run_in_parallel: false
extra_params: -m remote_cluster
pg_version: ${{ env.DEFAULT_PG_VERSION }}
env:
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
- name: Delete Neon Project
if: always()
uses: ./.github/actions/neon-project-delete
with:
project_id: ${{ steps.create-neon-project.outputs.project_id }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Create Allure report
if: ${{ !cancelled() }}
id: create-allure-report
uses: ./.github/actions/allure-report-generate
with:
store-test-results-into-db: true
env:
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
- name: Post to a Slack channel
if: github.event.schedule && failure()
uses: slackapi/slack-github-action@v1
with:
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
slack-message: |
Testing Postgres clients: <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|${{ job.status }}> (<${{ steps.create-allure-report.outputs.report-url }}|test report>)
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

98
.github/workflows/pg_clients.yml vendored Normal file
View File

@@ -0,0 +1,98 @@
name: Test Postgres client libraries
on:
schedule:
# * is a special character in YAML so you have to quote this string
# ┌───────────── minute (0 - 59)
# │ ┌───────────── hour (0 - 23)
# │ │ ┌───────────── day of the month (1 - 31)
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
- cron: '23 02 * * *' # run once a day, timezone is utc
workflow_dispatch:
concurrency:
# Allow only one workflow per any non-`main` branch.
group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
cancel-in-progress: true
jobs:
test-postgres-client-libs:
# TODO: switch to gen2 runner, requires docker
runs-on: [ ubuntu-latest ]
env:
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
steps:
- name: Checkout
uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Install Poetry
uses: snok/install-poetry@v1
- name: Cache poetry deps
uses: actions/cache@v3
with:
path: ~/.cache/pypoetry/virtualenvs
key: v2-${{ runner.os }}-python-deps-ubunutu-latest-${{ hashFiles('poetry.lock') }}
- name: Install Python deps
shell: bash -euxo pipefail {0}
run: ./scripts/pysync
- name: Create Neon Project
id: create-neon-project
uses: ./.github/actions/neon-project-create
with:
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
- name: Run pytest
env:
REMOTE_ENV: 1
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
shell: bash -euxo pipefail {0}
run: |
# Test framework expects we have psql binary;
# but since we don't really need it in this test, let's mock it
mkdir -p "$POSTGRES_DISTRIB_DIR/v${DEFAULT_PG_VERSION}/bin" && touch "$POSTGRES_DISTRIB_DIR/v${DEFAULT_PG_VERSION}/bin/psql";
./scripts/pytest \
--junitxml=$TEST_OUTPUT/junit.xml \
--tb=short \
--verbose \
-m "remote_cluster" \
-rA "test_runner/pg_clients"
- name: Delete Neon Project
if: ${{ always() }}
uses: ./.github/actions/neon-project-delete
with:
project_id: ${{ steps.create-neon-project.outputs.project_id }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
# We use GitHub's action upload-artifact because `ubuntu-latest` doesn't have configured AWS CLI.
# It will be fixed after switching to gen2 runner
- name: Upload python test logs
if: always()
uses: actions/upload-artifact@v3
with:
retention-days: 7
name: python-test-pg_clients-${{ runner.os }}-stage-logs
path: ${{ env.TEST_OUTPUT }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: "Testing Postgres clients: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

View File

@@ -1,101 +0,0 @@
name: 'Pin build-tools image'
on:
workflow_dispatch:
inputs:
from-tag:
description: 'Source tag'
required: true
type: string
force:
description: 'Force the image to be pinned'
default: false
type: boolean
workflow_call:
inputs:
from-tag:
description: 'Source tag'
required: true
type: string
force:
description: 'Force the image to be pinned'
default: false
type: boolean
defaults:
run:
shell: bash -euo pipefail {0}
concurrency:
group: pin-build-tools-image-${{ inputs.from-tag }}
cancel-in-progress: false
# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
permissions: {}
env:
FROM_TAG: ${{ inputs.from-tag }}
TO_TAG: pinned
jobs:
check-manifests:
runs-on: ubuntu-22.04
outputs:
skip: ${{ steps.check-manifests.outputs.skip }}
steps:
- name: Check if we really need to pin the image
id: check-manifests
run: |
docker manifest inspect neondatabase/build-tools:${FROM_TAG} > ${FROM_TAG}.json
docker manifest inspect neondatabase/build-tools:${TO_TAG} > ${TO_TAG}.json
if diff ${FROM_TAG}.json ${TO_TAG}.json; then
skip=true
else
skip=false
fi
echo "skip=${skip}" | tee -a $GITHUB_OUTPUT
tag-image:
needs: check-manifests
# use format(..) to catch both inputs.force = true AND inputs.force = 'true'
if: needs.check-manifests.outputs.skip == 'false' || format('{0}', inputs.force) == 'true'
runs-on: ubuntu-22.04
permissions:
id-token: write # for `azure/login`
steps:
- uses: docker/login-action@v3
with:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
- uses: docker/login-action@v3
with:
registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com
username: ${{ secrets.AWS_ACCESS_KEY_DEV }}
password: ${{ secrets.AWS_SECRET_KEY_DEV }}
- name: Azure login
uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a # @v2.1.1
with:
client-id: ${{ secrets.AZURE_DEV_CLIENT_ID }}
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
subscription-id: ${{ secrets.AZURE_DEV_SUBSCRIPTION_ID }}
- name: Login to ACR
run: |
az acr login --name=neoneastus2
- name: Tag build-tools with `${{ env.TO_TAG }}` in Docker Hub, ECR, and ACR
run: |
docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:${TO_TAG} \
-t neoneastus2.azurecr.io/neondatabase/build-tools:${TO_TAG} \
-t neondatabase/build-tools:${TO_TAG} \
neondatabase/build-tools:${FROM_TAG}

View File

@@ -19,7 +19,7 @@ on:
jobs:
notify:
runs-on: ubuntu-22.04
runs-on: [ ubuntu-latest ]
steps:
- uses: neondatabase/dev-actions/release-pr-notify@main

View File

@@ -2,31 +2,12 @@ name: Create Release Branch
on:
schedule:
# It should be kept in sync with if-condition in jobs
- cron: '0 6 * * MON' # Storage release
- cron: '0 6 * * THU' # Proxy release
- cron: '0 6 * * 1'
workflow_dispatch:
inputs:
create-storage-release-branch:
type: boolean
description: 'Create Storage release PR'
required: false
create-proxy-release-branch:
type: boolean
description: 'Create Proxy release PR'
required: false
# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
permissions: {}
defaults:
run:
shell: bash -euo pipefail {0}
jobs:
create-storage-release-branch:
if: ${{ github.event.schedule == '0 6 * * MON' || format('{0}', inputs.create-storage-release-branch) == 'true' }}
runs-on: ubuntu-22.04
create_release_branch:
runs-on: [ ubuntu-latest ]
permissions:
contents: write # for `git push`
@@ -37,71 +18,27 @@ jobs:
with:
ref: main
- name: Set environment variables
run: |
echo "RELEASE_DATE=$(date +'%Y-%m-%d')" | tee -a $GITHUB_ENV
echo "RELEASE_BRANCH=rc/$(date +'%Y-%m-%d')" | tee -a $GITHUB_ENV
- name: Get current date
id: date
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
- name: Create release branch
run: git checkout -b $RELEASE_BRANCH
run: git checkout -b releases/${{ steps.date.outputs.date }}
- name: Push new branch
run: git push origin $RELEASE_BRANCH
run: git push origin releases/${{ steps.date.outputs.date }}
- name: Create pull request into release
env:
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
run: |
TITLE="Storage & Compute release ${RELEASE_DATE}"
cat << EOF > body.md
## ${TITLE}
## Release ${{ steps.date.outputs.date }}
**Please merge this Pull Request using 'Create a merge commit' button**
**Please merge this PR using 'Create a merge commit'!**
EOF
gh pr create --title "${TITLE}" \
gh pr create --title "Release ${{ steps.date.outputs.date }}" \
--body-file "body.md" \
--head "${RELEASE_BRANCH}" \
--head "releases/${{ steps.date.outputs.date }}" \
--base "release"
create-proxy-release-branch:
if: ${{ github.event.schedule == '0 6 * * THU' || format('{0}', inputs.create-proxy-release-branch) == 'true' }}
runs-on: ubuntu-22.04
permissions:
contents: write # for `git push`
steps:
- name: Check out code
uses: actions/checkout@v4
with:
ref: main
- name: Set environment variables
run: |
echo "RELEASE_DATE=$(date +'%Y-%m-%d')" | tee -a $GITHUB_ENV
echo "RELEASE_BRANCH=rc/proxy/$(date +'%Y-%m-%d')" | tee -a $GITHUB_ENV
- name: Create release branch
run: git checkout -b $RELEASE_BRANCH
- name: Push new branch
run: git push origin $RELEASE_BRANCH
- name: Create pull request into release
env:
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
run: |
TITLE="Proxy release ${RELEASE_DATE}"
cat << EOF > body.md
## ${TITLE}
**Please merge this Pull Request using 'Create a merge commit' button**
EOF
gh pr create --title "${TITLE}" \
--body-file "body.md" \
--head "${RELEASE_BRANCH}" \
--base "release-proxy"

View File

@@ -9,15 +9,17 @@ on:
defaults:
run:
shell: bash -euxo pipefail {0}
env:
# A concurrency group that we use for e2e-tests runs, matches `concurrency.group` above with `github.repository` as a prefix
E2E_CONCURRENCY_GROUP: ${{ github.repository }}-e2e-tests-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
jobs:
cancel-previous-e2e-tests:
if: github.event_name == 'pull_request'
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
steps:
- name: Cancel previous e2e-tests runs for this PR
@@ -29,13 +31,13 @@ jobs:
--field concurrency_group="${{ env.E2E_CONCURRENCY_GROUP }}"
tag:
runs-on: ubuntu-22.04
runs-on: [ ubuntu-latest ]
outputs:
build-tag: ${{ steps.build-tag.outputs.tag }}
steps:
# Need `fetch-depth: 0` to count the number of commits in the branch
- uses: actions/checkout@v4
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
@@ -49,8 +51,6 @@ jobs:
echo "tag=$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
echo "tag=release-$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
elif [[ "$GITHUB_REF_NAME" == "release-proxy" ]]; then
echo "tag=release-proxy-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
else
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
BUILD_AND_TEST_RUN_ID=$(gh run list -b $CURRENT_BRANCH -c $CURRENT_SHA -w 'Build and Test' -L 1 --json databaseId --jq '.[].databaseId')
@@ -60,88 +60,59 @@ jobs:
trigger-e2e-tests:
needs: [ tag ]
runs-on: ubuntu-22.04
runs-on: [ self-hosted, gen3, small ]
env:
EVENT_ACTION: ${{ github.event.action }}
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
TAG: ${{ needs.tag.outputs.build-tag }}
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
options: --init
steps:
- name: Wait for `promote-images` job to finish
# It's important to have a timeout here, the script in the step can run infinitely
timeout-minutes: 60
- name: check if ecr image are present
run: |
if [ "${GITHUB_EVENT_NAME}" != "pull_request" ] || [ "${EVENT_ACTION}" != "ready_for_review" ]; then
exit 0
fi
# For PRs we use the run id as the tag
BUILD_AND_TEST_RUN_ID=${TAG}
while true; do
conclusion=$(gh run --repo ${GITHUB_REPOSITORY} view ${BUILD_AND_TEST_RUN_ID} --json jobs --jq '.jobs[] | select(.name == "promote-images") | .conclusion')
case "$conclusion" in
success)
break
;;
failure | cancelled | skipped)
echo "The 'promote-images' job didn't succeed: '${conclusion}'. Exiting..."
exit 1
;;
*)
echo "The 'promote-images' hasn't succeed yet. Waiting..."
sleep 60
;;
esac
for REPO in neon compute-tools compute-node-v14 vm-compute-node-v14 compute-node-v15 vm-compute-node-v15 compute-node-v16 vm-compute-node-v16; do
OUTPUT=$(aws ecr describe-images --repository-name ${REPO} --region eu-central-1 --query "imageDetails[?imageTags[?contains(@, '${TAG}')]]" --output text)
if [ "$OUTPUT" == "" ]; then
echo "$REPO with image tag $TAG not found" >> $GITHUB_OUTPUT
exit 1
fi
done
- name: Set e2e-platforms
id: e2e-platforms
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
# Default set of platforms to run e2e tests on
platforms='["docker", "k8s"]'
# If the PR changes vendor/, pgxn/ or libs/vm_monitor/ directories, or compute/Dockerfile.compute-node, add k8s-neonvm to the list of platforms.
# If the workflow run is not a pull request, add k8s-neonvm to the list.
if [ "$GITHUB_EVENT_NAME" == "pull_request" ]; then
for f in $(gh api "/repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}/files" --paginate --jq '.[].filename'); do
case "$f" in
vendor/*|pgxn/*|libs/vm_monitor/*|compute/Dockerfile.compute-node)
platforms=$(echo "${platforms}" | jq --compact-output '. += ["k8s-neonvm"] | unique')
;;
*)
# no-op
;;
esac
done
else
platforms=$(echo "${platforms}" | jq --compact-output '. += ["k8s-neonvm"] | unique')
fi
echo "e2e-platforms=${platforms}" | tee -a $GITHUB_OUTPUT
- name: Set PR's status to pending and request a remote CI test
env:
E2E_PLATFORMS: ${{ steps.e2e-platforms.outputs.e2e-platforms }}
COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
run: |
REMOTE_REPO="${GITHUB_REPOSITORY_OWNER}/cloud"
# For pull requests, GH Actions set "github.sha" variable to point at a fake merge commit
# but we need to use a real sha of a latest commit in the PR's branch for the e2e job,
# to place a job run status update later.
COMMIT_SHA=${{ github.event.pull_request.head.sha }}
# For non-PR kinds of runs, the above will produce an empty variable, pick the original sha value for those
COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}
gh api "/repos/${GITHUB_REPOSITORY}/statuses/${COMMIT_SHA}" \
--method POST \
--raw-field "state=pending" \
--raw-field "description=[$REMOTE_REPO] Remote CI job is about to start" \
--raw-field "context=neon-cloud-e2e"
REMOTE_REPO="${{ github.repository_owner }}/cloud"
gh workflow --repo ${REMOTE_REPO} \
run testing.yml \
--ref "main" \
--raw-field "ci_job_name=neon-cloud-e2e" \
--raw-field "commit_hash=$COMMIT_SHA" \
--raw-field "remote_repo=${GITHUB_REPOSITORY}" \
--raw-field "storage_image_tag=${TAG}" \
--raw-field "compute_image_tag=${TAG}" \
--raw-field "concurrency_group=${E2E_CONCURRENCY_GROUP}" \
--raw-field "e2e-platforms=${E2E_PLATFORMS}"
curl -f -X POST \
https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
-H "Accept: application/vnd.github.v3+json" \
--user "${{ secrets.CI_ACCESS_TOKEN }}" \
--data \
"{
\"state\": \"pending\",
\"context\": \"neon-cloud-e2e\",
\"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
}"
curl -f -X POST \
https://api.github.com/repos/$REMOTE_REPO/actions/workflows/testing.yml/dispatches \
-H "Accept: application/vnd.github.v3+json" \
--user "${{ secrets.CI_ACCESS_TOKEN }}" \
--data \
"{
\"ref\": \"main\",
\"inputs\": {
\"ci_job_name\": \"neon-cloud-e2e\",
\"commit_hash\": \"$COMMIT_SHA\",
\"remote_repo\": \"${{ github.repository }}\",
\"storage_image_tag\": \"${TAG}\",
\"compute_image_tag\": \"${TAG}\",
\"concurrency_group\": \"${{ env.E2E_CONCURRENCY_GROUP }}\"
}
}"

View File

@@ -0,0 +1,70 @@
name: 'Update build tools image tag'
# This workflow it used to update tag of build tools in ECR.
# The most common use case is adding/moving `pinned` tag to `${GITHUB_RUN_IT}` image.
on:
workflow_dispatch:
inputs:
from-tag:
description: 'Source tag'
required: true
type: string
to-tag:
description: 'Destination tag'
required: true
type: string
default: 'pinned'
defaults:
run:
shell: bash -euo pipefail {0}
permissions: {}
jobs:
tag-image:
runs-on: [ self-hosted, gen3, small ]
env:
ECR_IMAGE: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools
DOCKER_HUB_IMAGE: docker.io/neondatabase/build-tools
FROM_TAG: ${{ inputs.from-tag }}
TO_TAG: ${{ inputs.to-tag }}
steps:
# Use custom DOCKER_CONFIG directory to avoid conflicts with default settings
# The default value is ~/.docker
- name: Set custom docker config directory
run: |
mkdir -p .docker-custom
echo DOCKER_CONFIG=$(pwd)/.docker-custom >> $GITHUB_ENV
- uses: docker/login-action@v2
with:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
- uses: docker/login-action@v2
with:
registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com
username: ${{ secrets.AWS_ACCESS_KEY_DEV }}
password: ${{ secrets.AWS_SECRET_KEY_DEV }}
- uses: actions/setup-go@v5
with:
go-version: '1.21'
- name: Install crane
run: |
go install github.com/google/go-containerregistry/cmd/crane@a0658aa1d0cc7a7f1bcc4a3af9155335b6943f40 # v0.18.0
- name: Copy images
run: |
crane copy "${ECR_IMAGE}:${FROM_TAG}" "${ECR_IMAGE}:${TO_TAG}"
crane copy "${ECR_IMAGE}:${FROM_TAG}" "${DOCKER_HUB_IMAGE}:${TO_TAG}"
- name: Remove custom docker config directory
if: always()
run: |
rm -rf .docker-custom

1
.gitignore vendored
View File

@@ -9,7 +9,6 @@ test_output/
neon.iml
/.neon
/integration_tests/.neon
compaction-suite-results.*
# Coverage
*.profraw

4
.gitmodules vendored
View File

@@ -10,7 +10,3 @@
path = vendor/postgres-v16
url = https://github.com/neondatabase/postgres.git
branch = REL_16_STABLE_neon
[submodule "vendor/postgres-v17"]
path = vendor/postgres-v17
url = https://github.com/neondatabase/postgres.git
branch = REL_17_STABLE_neon

View File

@@ -1,5 +1,4 @@
# * `-A unknown_lints` do not warn about unknown lint suppressions
# that people with newer toolchains might use
# * `-D warnings` - fail on any warnings (`cargo` returns non-zero exit status)
# * `-D clippy::todo` - don't let `todo!()` slip into `main`
export CLIPPY_COMMON_ARGS="--locked --workspace --all-targets -- -A unknown_lints -D warnings -D clippy::todo"
export CLIPPY_COMMON_ARGS="--locked --workspace --all-targets -- -A unknown_lints -D warnings"

View File

@@ -1,13 +1,12 @@
/compute_tools/ @neondatabase/control-plane @neondatabase/compute
/storage_controller @neondatabase/storage
/libs/pageserver_api/ @neondatabase/storage
/libs/postgres_ffi/ @neondatabase/compute @neondatabase/storage
/control_plane/ @neondatabase/compute @neondatabase/storage
/libs/pageserver_api/ @neondatabase/compute @neondatabase/storage
/libs/postgres_ffi/ @neondatabase/compute
/libs/remote_storage/ @neondatabase/storage
/libs/safekeeper_api/ @neondatabase/storage
/libs/vm_monitor/ @neondatabase/autoscaling
/libs/safekeeper_api/ @neondatabase/safekeepers
/libs/vm_monitor/ @neondatabase/autoscaling @neondatabase/compute
/pageserver/ @neondatabase/storage
/pgxn/ @neondatabase/compute
/pgxn/neon/ @neondatabase/compute @neondatabase/storage
/proxy/ @neondatabase/proxy
/safekeeper/ @neondatabase/storage
/safekeeper/ @neondatabase/safekeepers
/vendor/ @neondatabase/compute

View File

@@ -74,11 +74,16 @@ We're using the following approach to make it work:
For details see [`approved-for-ci-run.yml`](.github/workflows/approved-for-ci-run.yml)
## How do I make build-tools image "pinned"
## How do I add the "pinned" tag to an buildtools image?
We use the `pinned` tag for `Dockerfile.buildtools` build images in our CI/CD setup, currently adding the `pinned` tag is a manual operation.
It's possible to update the `pinned` tag of the `build-tools` image using the `pin-build-tools-image.yml` workflow.
You can call it from GitHub UI: https://github.com/neondatabase/neon/actions/workflows/update_build_tools_image.yml,
or using GitHub CLI:
```bash
gh workflow -R neondatabase/neon run pin-build-tools-image.yml \
-f from-tag=cc98d9b00d670f182c507ae3783342bd7e64c31e
```
gh workflow -R neondatabase/neon run update_build_tools_image.yml \
-f from-tag=6254913013 \
-f to-tag=pinned \
# Default `-f to-tag` is `pinned`, so the parameter can be omitted.
```

2871
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -3,24 +3,21 @@ resolver = "2"
members = [
"compute_tools",
"control_plane",
"control_plane/storcon_cli",
"control_plane/attachment_service",
"pageserver",
"pageserver/compaction",
"pageserver/ctl",
"pageserver/client",
"pageserver/pagebench",
"proxy",
"safekeeper",
"storage_broker",
"storage_controller",
"storage_controller/client",
"storage_scrubber",
"s3_scrubber",
"workspace_hack",
"trace",
"libs/compute_api",
"libs/pageserver_api",
"libs/postgres_ffi",
"libs/safekeeper_api",
"libs/desim",
"libs/utils",
"libs/consumption_metrics",
"libs/postgres_backend",
@@ -41,31 +38,26 @@ license = "Apache-2.0"
## All dependency versions, used in the project
[workspace.dependencies]
ahash = "0.8"
anyhow = { version = "1.0", features = ["backtrace"] }
arc-swap = "1.6"
async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] }
atomic-take = "1.1.0"
azure_core = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls", "hmac_rust"] }
azure_identity = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls"] }
azure_storage = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls"] }
azure_storage_blobs = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls"] }
azure_core = "0.18"
azure_identity = "0.18"
azure_storage = "0.18"
azure_storage_blobs = "0.18"
flate2 = "1.0.26"
async-stream = "0.3"
async-trait = "0.1"
aws-config = { version = "1.3", default-features = false, features=["rustls"] }
aws-sdk-s3 = "1.26"
aws-sdk-iam = "1.15.0"
aws-smithy-async = { version = "1.2.1", default-features = false, features=["rt-tokio"] }
aws-smithy-types = "1.1.9"
aws-credential-types = "1.2.0"
aws-sigv4 = { version = "1.2.1", features = ["sign-http"] }
aws-types = "1.2.0"
aws-config = { version = "1.1.4", default-features = false, features=["rustls"] }
aws-sdk-s3 = "1.14"
aws-sdk-secretsmanager = { version = "1.14.0" }
aws-smithy-async = { version = "1.1.4", default-features = false, features=["rt-tokio"] }
aws-smithy-types = "1.1.4"
aws-credential-types = "1.1.4"
axum = { version = "0.6.20", features = ["ws"] }
base64 = "0.13.0"
bincode = "1.3"
bindgen = "0.70"
bit_field = "0.10.2"
bindgen = "0.65"
bstr = "1.0"
byteorder = "1.4"
bytes = "1.0"
@@ -73,78 +65,74 @@ camino = "1.1.6"
cfg-if = "1.0.0"
chrono = { version = "0.4", default-features = false, features = ["clock"] }
clap = { version = "4.0", features = ["derive"] }
comfy-table = "7.1"
comfy-table = "6.1"
const_format = "0.2"
crc32c = "0.6"
crossbeam-utils = "0.8.5"
dashmap = { version = "5.5.0", features = ["raw-api"] }
either = "1.8"
enum-map = "2.4.2"
enumset = "1.0.12"
fail = "0.5.0"
fallible-iterator = "0.2"
framed-websockets = { version = "0.1.0", git = "https://github.com/neondatabase/framed-websockets" }
fs2 = "0.4.3"
futures = "0.3"
futures-core = "0.3"
futures-util = "0.3"
git-version = "0.3"
hashbrown = "0.14"
hashlink = "0.9.1"
hashbrown = "0.13"
hashlink = "0.8.1"
hdrhistogram = "7.5.2"
hex = "0.4"
hex-literal = "0.4"
hmac = "0.12.1"
hostname = "0.4"
http = {version = "1.1.0", features = ["std"]}
hostname = "0.3.1"
http-types = { version = "2", default-features = false }
humantime = "2.1"
humantime-serde = "1.1.1"
hyper = "0.14"
tokio-tungstenite = "0.20.0"
indexmap = "2"
indoc = "2"
hyper-tungstenite = "0.11"
inotify = "0.10.2"
ipnet = "2.9.0"
itertools = "0.10"
jsonwebtoken = "9"
lasso = "0.7"
libc = "0.2"
md5 = "0.7.0"
measured = { version = "0.0.22", features=["lasso"] }
measured-process = { version = "0.0.22" }
memoffset = "0.9"
nix = { version = "0.27", features = ["dir", "fs", "process", "socket", "signal", "poll"] }
memoffset = "0.8"
native-tls = "0.2"
nix = { version = "0.27", features = ["fs", "process", "socket", "signal", "poll"] }
notify = "6.0.0"
num_cpus = "1.15"
num-traits = "0.2.15"
once_cell = "1.13"
opentelemetry = "0.24"
opentelemetry_sdk = "0.24"
opentelemetry-otlp = { version = "0.17", default-features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
opentelemetry-semantic-conventions = "0.16"
opentelemetry = "0.20.0"
opentelemetry-otlp = { version = "0.13.0", default_features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
opentelemetry-semantic-conventions = "0.12.0"
parking_lot = "0.12"
parquet = { version = "53", default-features = false, features = ["zstd"] }
parquet_derive = "53"
parquet = { version = "49.0.0", default-features = false, features = ["zstd"] }
parquet_derive = "49.0.0"
pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
pin-project-lite = "0.2"
procfs = "0.16"
prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
procfs = "0.14"
prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
prost = "0.11"
rand = "0.8"
redis = { version = "0.25.2", features = ["tokio-rustls-comp", "keep-alive"] }
redis = { version = "0.24.0", features = ["tokio-rustls-comp", "keep-alive"] }
regex = "1.10.2"
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] }
reqwest-tracing = { version = "0.5", features = ["opentelemetry_0_24"] }
reqwest-middleware = "0.3.0"
reqwest-retry = "0.5"
reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"] }
reqwest-tracing = { version = "0.4.7", features = ["opentelemetry_0_20"] }
reqwest-middleware = "0.2.0"
reqwest-retry = "0.2.2"
routerify = "3"
rpds = "0.13"
rustc-hash = "1.1.0"
rustls = "0.22"
rustls-pemfile = "2"
rustls = "0.21"
rustls-pemfile = "1"
rustls-split = "0.3"
scopeguard = "1.1"
sysinfo = "0.29.2"
sd-notify = "0.4.1"
send-future = "0.1.0"
sentry = { version = "0.32", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
sentry = { version = "0.31", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1"
serde_path_to_error = "0.1"
@@ -155,63 +143,52 @@ signal-hook = "0.3"
smallvec = "1.11"
smol_str = { version = "0.2.0", features = ["serde"] }
socket2 = "0.5"
strum = "0.26"
strum_macros = "0.26"
"subtle" = "2.5.0"
svg_fmt = "0.4.3"
strum = "0.24"
strum_macros = "0.24"
svg_fmt = "0.4.1"
sync_wrapper = "0.1.2"
tar = "0.4"
test-context = "0.3"
task-local-extensions = "0.1.4"
test-context = "0.1"
thiserror = "1.0"
tikv-jemallocator = "0.5"
tikv-jemalloc-ctl = "0.5"
tls-listener = { version = "0.7", features = ["rustls", "hyper-h1"] }
tokio = { version = "1.17", features = ["macros"] }
tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
tokio-io-timeout = "1.2.0"
tokio-postgres-rustls = "0.11.0"
tokio-rustls = "0.25"
tokio-postgres-rustls = "0.10.0"
tokio-rustls = "0.24"
tokio-stream = "0.1"
tokio-tar = "0.3"
tokio-util = { version = "0.7.10", features = ["io", "rt"] }
toml = "0.8"
toml_edit = "0.22"
toml = "0.7"
toml_edit = "0.19"
tonic = {version = "0.9", features = ["tls", "tls-roots"]}
tower-service = "0.3.2"
tracing = "0.1"
tracing-error = "0.2"
tracing-opentelemetry = "0.25"
tracing-subscriber = { version = "0.3", default-features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] }
try-lock = "0.2.5"
tracing-error = "0.2.0"
tracing-opentelemetry = "0.20.0"
tracing-subscriber = { version = "0.3", default_features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] }
twox-hash = { version = "1.6.3", default-features = false }
typed-json = "0.1"
url = "2.2"
urlencoding = "2.1"
uuid = { version = "1.6.1", features = ["v4", "v7", "serde"] }
walkdir = "2.3.2"
rustls-native-certs = "0.7"
webpki-roots = "0.25"
x509-parser = "0.15"
whoami = "1.5.1"
## TODO replace this with tracing
env_logger = "0.10"
log = "0.4"
## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
# We want to use the 'neon' branch for these, but there's currently one
# incompatible change on the branch. See:
#
# - PR #8076 which contained changes that depended on the new changes in
# the rust-postgres crate, and
# - PR #8654 which reverted those changes and made the code in proxy incompatible
# with the tip of the 'neon' branch again.
#
# When those proxy changes are re-applied (see PR #8747), we can switch using
# the tip of the 'neon' branch again.
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
## Other git libraries
heapless = { default-features=false, features=[], git = "https://github.com/japaric/heapless.git", rev = "644653bf3b831c6bb4963be2de24804acf5e5001" } # upstream release pending
## Local libraries
compute_api = { version = "0.1", path = "./libs/compute_api/" }
@@ -219,16 +196,13 @@ consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" }
metrics = { version = "0.1", path = "./libs/metrics/" }
pageserver_api = { version = "0.1", path = "./libs/pageserver_api/" }
pageserver_client = { path = "./pageserver/client" }
pageserver_compaction = { version = "0.1", path = "./pageserver/compaction/" }
postgres_backend = { version = "0.1", path = "./libs/postgres_backend/" }
postgres_connection = { version = "0.1", path = "./libs/postgres_connection/" }
postgres_ffi = { version = "0.1", path = "./libs/postgres_ffi/" }
pq_proto = { version = "0.1", path = "./libs/pq_proto/" }
remote_storage = { version = "0.1", path = "./libs/remote_storage/" }
safekeeper_api = { version = "0.1", path = "./libs/safekeeper_api" }
desim = { version = "0.1", path = "./libs/desim" }
storage_broker = { version = "0.1", path = "./storage_broker/" } # Note: main broker code is inside the binary crate, so linking with the library shouldn't be heavy.
storage_controller_client = { path = "./storage_controller/client" }
tenant_size_model = { version = "0.1", path = "./libs/tenant_size_model/" }
tracing-utils = { version = "0.1", path = "./libs/tracing-utils/" }
utils = { version = "0.1", path = "./libs/utils/" }
@@ -240,15 +214,20 @@ workspace_hack = { version = "0.1", path = "./workspace_hack/" }
## Build dependencies
criterion = "0.5.1"
rcgen = "0.12"
rcgen = "0.11"
rstest = "0.18"
camino-tempfile = "1.0.2"
tonic-build = "0.9"
[patch.crates-io]
# Needed to get `tokio-postgres-rustls` to depend on our fork.
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
# This is only needed for proxy's tests.
# TODO: we should probably fork `tokio-postgres-rustls` instead.
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
# bug fixes for UUID
parquet = { git = "https://github.com/neondatabase/arrow-rs", branch = "neon-fix-bugs" }
parquet_derive = { git = "https://github.com/neondatabase/arrow-rs", branch = "neon-fix-bugs" }
################# Binary contents sections

View File

@@ -5,8 +5,6 @@
ARG REPOSITORY=neondatabase
ARG IMAGE=build-tools
ARG TAG=pinned
ARG DEFAULT_PG_VERSION=17
ARG STABLE_PG_VERSION=16
# Build Postgres
FROM $REPOSITORY/$IMAGE:$TAG AS pg-build
@@ -15,12 +13,11 @@ WORKDIR /home/nonroot
COPY --chown=nonroot vendor/postgres-v14 vendor/postgres-v14
COPY --chown=nonroot vendor/postgres-v15 vendor/postgres-v15
COPY --chown=nonroot vendor/postgres-v16 vendor/postgres-v16
COPY --chown=nonroot vendor/postgres-v17 vendor/postgres-v17
COPY --chown=nonroot pgxn pgxn
COPY --chown=nonroot Makefile Makefile
COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh
ENV BUILD_TYPE=release
ENV BUILD_TYPE release
RUN set -e \
&& mold -run make -j $(nproc) -s neon-pg-ext \
&& rm -rf pg_install/build \
@@ -31,34 +28,40 @@ FROM $REPOSITORY/$IMAGE:$TAG AS build
WORKDIR /home/nonroot
ARG GIT_VERSION=local
ARG BUILD_TAG
ARG STABLE_PG_VERSION
# Enable https://github.com/paritytech/cachepot to cache Rust crates' compilation results in Docker builds.
# Set up cachepot to use an AWS S3 bucket for cache results, to reuse it between `docker build` invocations.
# cachepot falls back to local filesystem if S3 is misconfigured, not failing the build
ARG RUSTC_WRAPPER=cachepot
ENV AWS_REGION=eu-central-1
ENV CACHEPOT_S3_KEY_PREFIX=cachepot
ARG CACHEPOT_BUCKET=neon-github-dev
#ARG AWS_ACCESS_KEY_ID
#ARG AWS_SECRET_ACCESS_KEY
COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server
COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server
COPY --from=pg-build /home/nonroot/pg_install/v16/include/postgresql/server pg_install/v16/include/postgresql/server
COPY --from=pg-build /home/nonroot/pg_install/v17/include/postgresql/server pg_install/v17/include/postgresql/server
COPY --from=pg-build /home/nonroot/pg_install/v16/lib pg_install/v16/lib
COPY --from=pg-build /home/nonroot/pg_install/v17/lib pg_install/v17/lib
COPY --chown=nonroot . .
ARG ADDITIONAL_RUSTFLAGS
# Show build caching stats to check if it was used in the end.
# Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
RUN set -e \
&& PQ_LIB_DIR=$(pwd)/pg_install/v${STABLE_PG_VERSION}/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment ${ADDITIONAL_RUSTFLAGS}" cargo build \
&& mold -run cargo build \
--bin pg_sni_router \
--bin pageserver \
--bin pagectl \
--bin safekeeper \
--bin storage_broker \
--bin storage_controller \
--bin attachment_service \
--bin proxy \
--bin neon_local \
--bin storage_scrubber \
--locked --release
--locked --release \
&& cachepot -s
# Build final image
#
FROM debian:bullseye-slim
ARG DEFAULT_PG_VERSION
WORKDIR /data
RUN set -e \
@@ -66,6 +69,8 @@ RUN set -e \
&& apt install -y \
libreadline-dev \
libseccomp-dev \
libicu67 \
openssl \
ca-certificates \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
&& useradd -d /data neon \
@@ -76,38 +81,26 @@ COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver
COPY --from=build --chown=neon:neon /home/nonroot/target/release/pagectl /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/safekeeper /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_broker /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_controller /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/attachment_service /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/neon_local /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_scrubber /usr/local/bin
COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/
COPY --from=pg-build /home/nonroot/pg_install/v15 /usr/local/v15/
COPY --from=pg-build /home/nonroot/pg_install/v16 /usr/local/v16/
COPY --from=pg-build /home/nonroot/pg_install/v17 /usr/local/v17/
COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/
# By default, pageserver uses `.neon/` working directory in WORKDIR, so create one and fill it with the dummy config.
# Now, when `docker run ... pageserver` is run, it can start without errors, yet will have some default dummy values.
RUN mkdir -p /data/.neon/ && \
echo "id=1234" > "/data/.neon/identity.toml" && \
echo "broker_endpoint='http://storage_broker:50051'\n" \
"pg_distrib_dir='/usr/local/'\n" \
"listen_pg_addr='0.0.0.0:6400'\n" \
"listen_http_addr='0.0.0.0:9898'\n" \
"availability_zone='local'\n" \
> /data/.neon/pageserver.toml && \
chown -R neon:neon /data/.neon
# When running a binary that links with libpq, default to using our most recent postgres version. Binaries
# that want a particular postgres version will select it explicitly: this is just a default.
ENV LD_LIBRARY_PATH=/usr/local/v${DEFAULT_PG_VERSION}/lib
RUN mkdir -p /data/.neon/ && chown -R neon:neon /data/.neon/ \
&& /usr/local/bin/pageserver -D /data/.neon/ --init \
-c "id=1234" \
-c "broker_endpoint='http://storage_broker:50051'" \
-c "pg_distrib_dir='/usr/local/'" \
-c "listen_pg_addr='0.0.0.0:6400'" \
-c "listen_http_addr='0.0.0.0:9898'"
VOLUME ["/data"]
USER neon
EXPOSE 6400
EXPOSE 9898
CMD ["/usr/local/bin/pageserver", "-D", "/data/.neon"]

View File

@@ -1,21 +1,10 @@
FROM debian:bullseye-slim
# Use ARG as a build-time environment variable here to allow.
# It's not supposed to be set outside.
# Alternatively it can be obtained using the following command
# ```
# . /etc/os-release && echo "${VERSION_CODENAME}"
# ```
ARG DEBIAN_VERSION_CODENAME=bullseye
# Add nonroot user
RUN useradd -ms /bin/bash nonroot -b /home
SHELL ["/bin/bash", "-c"]
# System deps
#
# 'gdb' is included so that we get backtraces of core dumps produced in
# regression tests
RUN set -e \
&& apt update \
&& apt install -y \
@@ -27,7 +16,6 @@ RUN set -e \
cmake \
curl \
flex \
gdb \
git \
gnupg \
gzip \
@@ -38,6 +26,7 @@ RUN set -e \
liblzma-dev \
libncurses5-dev \
libncursesw5-dev \
libpq-dev \
libreadline-dev \
libseccomp-dev \
libsqlite3-dev \
@@ -62,40 +51,29 @@ RUN set -e \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# protobuf-compiler (protoc)
ENV PROTOC_VERSION=25.1
ENV PROTOC_VERSION 25.1
RUN curl -fsSL "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-linux-$(uname -m | sed 's/aarch64/aarch_64/g').zip" -o "protoc.zip" \
&& unzip -q protoc.zip -d protoc \
&& mv protoc/bin/protoc /usr/local/bin/protoc \
&& mv protoc/include/google /usr/local/include/google \
&& rm -rf protoc.zip protoc
# s5cmd
ENV S5CMD_VERSION=2.2.2
RUN curl -sL "https://github.com/peak/s5cmd/releases/download/v${S5CMD_VERSION}/s5cmd_${S5CMD_VERSION}_Linux-$(uname -m | sed 's/x86_64/64bit/g' | sed 's/aarch64/arm64/g').tar.gz" | tar zxvf - s5cmd \
&& chmod +x s5cmd \
&& mv s5cmd /usr/local/bin/s5cmd
# LLVM
ENV LLVM_VERSION=18
ENV LLVM_VERSION=17
RUN curl -fsSL 'https://apt.llvm.org/llvm-snapshot.gpg.key' | apt-key add - \
&& echo "deb http://apt.llvm.org/${DEBIAN_VERSION_CODENAME}/ llvm-toolchain-${DEBIAN_VERSION_CODENAME}-${LLVM_VERSION} main" > /etc/apt/sources.list.d/llvm.stable.list \
&& echo "deb http://apt.llvm.org/bullseye/ llvm-toolchain-bullseye-${LLVM_VERSION} main" > /etc/apt/sources.list.d/llvm.stable.list \
&& apt update \
&& apt install -y clang-${LLVM_VERSION} llvm-${LLVM_VERSION} \
&& bash -c 'for f in /usr/bin/clang*-${LLVM_VERSION} /usr/bin/llvm*-${LLVM_VERSION}; do ln -s "${f}" "${f%-${LLVM_VERSION}}"; done' \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# Install docker
RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg \
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/debian ${DEBIAN_VERSION_CODENAME} stable" > /etc/apt/sources.list.d/docker.list \
# PostgreSQL 14
RUN curl -fsSL 'https://www.postgresql.org/media/keys/ACCC4CF8.asc' | apt-key add - \
&& echo 'deb http://apt.postgresql.org/pub/repos/apt bullseye-pgdg main' > /etc/apt/sources.list.d/pgdg.list \
&& apt update \
&& apt install -y docker-ce docker-ce-cli \
&& apt install -y postgresql-client-14 \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# Configure sudo & docker
RUN usermod -aG sudo nonroot && \
echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers && \
usermod -aG docker nonroot
# AWS CLI
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "awscliv2.zip" \
&& unzip -q awscliv2.zip \
@@ -103,7 +81,7 @@ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "aws
&& rm awscliv2.zip
# Mold: A Modern Linker
ENV MOLD_VERSION=v2.33.0
ENV MOLD_VERSION v2.4.0
RUN set -e \
&& git clone https://github.com/rui314/mold.git \
&& mkdir mold/build \
@@ -128,51 +106,12 @@ RUN for package in Capture::Tiny DateTime Devel::Cover Digest::MD5 File::Spec JS
&& make install \
&& rm -rf ../lcov.tar.gz
# Compile and install the static OpenSSL library
ENV OPENSSL_VERSION=1.1.1w
ENV OPENSSL_PREFIX=/usr/local/openssl
RUN wget -O /tmp/openssl-${OPENSSL_VERSION}.tar.gz https://www.openssl.org/source/openssl-${OPENSSL_VERSION}.tar.gz && \
echo "cf3098950cb4d853ad95c0841f1f9c6d3dc102dccfcacd521d93925208b76ac8 /tmp/openssl-${OPENSSL_VERSION}.tar.gz" | sha256sum --check && \
cd /tmp && \
tar xzvf /tmp/openssl-${OPENSSL_VERSION}.tar.gz && \
rm /tmp/openssl-${OPENSSL_VERSION}.tar.gz && \
cd /tmp/openssl-${OPENSSL_VERSION} && \
./config --prefix=${OPENSSL_PREFIX} -static --static no-shared -fPIC && \
make -j "$(nproc)" && \
make install && \
cd /tmp && \
rm -rf /tmp/openssl-${OPENSSL_VERSION}
# Use the same version of libicu as the compute nodes so that
# clusters created using inidb on pageserver can be used by computes.
#
# TODO: at this time, Dockerfile.compute-node uses the debian bullseye libicu
# package, which is 67.1. We're duplicating that knowledge here, and also, technically,
# Debian has a few patches on top of 67.1 that we're not adding here.
ENV ICU_VERSION=67.1
ENV ICU_PREFIX=/usr/local/icu
# Download and build static ICU
RUN wget -O /tmp/libicu-${ICU_VERSION}.tgz https://github.com/unicode-org/icu/releases/download/release-${ICU_VERSION//./-}/icu4c-${ICU_VERSION//./_}-src.tgz && \
echo "94a80cd6f251a53bd2a997f6f1b5ac6653fe791dfab66e1eb0227740fb86d5dc /tmp/libicu-${ICU_VERSION}.tgz" | sha256sum --check && \
mkdir /tmp/icu && \
pushd /tmp/icu && \
tar -xzf /tmp/libicu-${ICU_VERSION}.tgz && \
pushd icu/source && \
./configure --prefix=${ICU_PREFIX} --enable-static --enable-shared=no CXXFLAGS="-fPIC" CFLAGS="-fPIC" && \
make -j "$(nproc)" && \
make install && \
popd && \
rm -rf icu && \
rm -f /tmp/libicu-${ICU_VERSION}.tgz && \
popd
# Switch to nonroot user
USER nonroot:nonroot
WORKDIR /home/nonroot
# Python
ENV PYTHON_VERSION=3.9.19 \
ENV PYTHON_VERSION=3.9.18 \
PYENV_ROOT=/home/nonroot/.pyenv \
PATH=/home/nonroot/.pyenv/shims:/home/nonroot/.pyenv/bin:/home/nonroot/.poetry/bin:$PATH
RUN set -e \
@@ -196,14 +135,9 @@ WORKDIR /home/nonroot
# Rust
# Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
ENV RUSTC_VERSION=1.81.0
ENV RUSTC_VERSION=1.75.0
ENV RUSTUP_HOME="/home/nonroot/.rustup"
ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
ARG RUSTFILT_VERSION=0.2.1
ARG CARGO_HAKARI_VERSION=0.9.30
ARG CARGO_DENY_VERSION=0.16.1
ARG CARGO_HACK_VERSION=0.6.31
ARG CARGO_NEXTEST_VERSION=0.9.72
RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && whoami && \
chmod +x rustup-init && \
./rustup-init -y --default-toolchain ${RUSTC_VERSION} && \
@@ -211,14 +145,16 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
export PATH="$HOME/.cargo/bin:$PATH" && \
. "$HOME/.cargo/env" && \
cargo --version && rustup --version && \
rustup component add llvm-tools rustfmt clippy && \
cargo install rustfilt --version ${RUSTFILT_VERSION} && \
cargo install cargo-hakari --version ${CARGO_HAKARI_VERSION} && \
cargo install cargo-deny --locked --version ${CARGO_DENY_VERSION} && \
cargo install cargo-hack --version ${CARGO_HACK_VERSION} && \
cargo install cargo-nextest --version ${CARGO_NEXTEST_VERSION} && \
rustup component add llvm-tools-preview rustfmt clippy && \
cargo install --git https://github.com/paritytech/cachepot && \
cargo install rustfilt && \
cargo install cargo-hakari && \
cargo install cargo-deny && \
cargo install cargo-hack && \
cargo install cargo-nextest && \
rm -rf /home/nonroot/.cargo/registry && \
rm -rf /home/nonroot/.cargo/git
ENV RUSTC_WRAPPER=cachepot
# Show versions
RUN whoami \
@@ -228,6 +164,3 @@ RUN whoami \
&& rustup --version --verbose \
&& rustc --version --verbose \
&& clang --version
# Set following flag to check in Makefile if its running in Docker
RUN touch /home/nonroot/.docker_build

File diff suppressed because it is too large Load Diff

32
Dockerfile.compute-tools Normal file
View File

@@ -0,0 +1,32 @@
# First transient image to build compute_tools binaries
# NB: keep in sync with rust image version in .github/workflows/build_and_test.yml
ARG REPOSITORY=neondatabase
ARG IMAGE=build-tools
ARG TAG=pinned
ARG BUILD_TAG
FROM $REPOSITORY/$IMAGE:$TAG AS rust-build
WORKDIR /home/nonroot
# Enable https://github.com/paritytech/cachepot to cache Rust crates' compilation results in Docker builds.
# Set up cachepot to use an AWS S3 bucket for cache results, to reuse it between `docker build` invocations.
# cachepot falls back to local filesystem if S3 is misconfigured, not failing the build.
ARG RUSTC_WRAPPER=cachepot
ENV AWS_REGION=eu-central-1
ENV CACHEPOT_S3_KEY_PREFIX=cachepot
ARG CACHEPOT_BUCKET=neon-github-dev
#ARG AWS_ACCESS_KEY_ID
#ARG AWS_SECRET_ACCESS_KEY
ARG BUILD_TAG
ENV BUILD_TAG=$BUILD_TAG
COPY . .
RUN set -e \
&& mold -run cargo build -p compute_tools --locked --release \
&& cachepot -s
# Final image that only has one binary
FROM debian:bullseye-slim
COPY --from=rust-build /home/nonroot/target/release/compute_ctl /usr/local/bin/compute_ctl

129
Makefile
View File

@@ -3,9 +3,6 @@ ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
# Where to install Postgres, default is ./pg_install, maybe useful for package managers
POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install/
OPENSSL_PREFIX_DIR := /usr/local/openssl
ICU_PREFIX_DIR := /usr/local/icu
#
# We differentiate between release / debug build types using the BUILD_TYPE
# environment variable.
@@ -23,31 +20,19 @@ else
$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
endif
ifeq ($(shell test -e /home/nonroot/.docker_build && echo -n yes),yes)
# Exclude static build openssl, icu for local build (MacOS, Linux)
# Only keep for build type release and debug
PG_CFLAGS += -I$(OPENSSL_PREFIX_DIR)/include
PG_CONFIGURE_OPTS += --with-icu
PG_CONFIGURE_OPTS += ICU_CFLAGS='-I/$(ICU_PREFIX_DIR)/include -DU_STATIC_IMPLEMENTATION'
PG_CONFIGURE_OPTS += ICU_LIBS='-L$(ICU_PREFIX_DIR)/lib -L$(ICU_PREFIX_DIR)/lib64 -licui18n -licuuc -licudata -lstdc++ -Wl,-Bdynamic -lm'
PG_CONFIGURE_OPTS += LDFLAGS='-L$(OPENSSL_PREFIX_DIR)/lib -L$(OPENSSL_PREFIX_DIR)/lib64 -L$(ICU_PREFIX_DIR)/lib -L$(ICU_PREFIX_DIR)/lib64 -Wl,-Bstatic -lssl -lcrypto -Wl,-Bdynamic -lrt -lm -ldl -lpthread'
endif
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux)
# Seccomp BPF is only available for Linux
PG_CONFIGURE_OPTS += --with-libseccomp
else ifeq ($(UNAME_S),Darwin)
ifndef DISABLE_HOMEBREW
# macOS with brew-installed openssl requires explicit paths
# It can be configured with OPENSSL_PREFIX variable
OPENSSL_PREFIX := $(shell brew --prefix openssl@3)
PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
PG_CONFIGURE_OPTS += PKG_CONFIG_PATH=$(shell brew --prefix icu4c)/lib/pkgconfig
# macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure
# brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage
EXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/:
endif
# macOS with brew-installed openssl requires explicit paths
# It can be configured with OPENSSL_PREFIX variable
OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
PG_CONFIGURE_OPTS += PKG_CONFIG_PATH=$(shell brew --prefix icu4c)/lib/pkgconfig
# macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure
# brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage
EXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/:
endif
# Use -C option so that when PostgreSQL "make install" installs the
@@ -66,11 +51,9 @@ CARGO_BUILD_FLAGS += $(filter -j1,$(MAKEFLAGS))
CARGO_CMD_PREFIX += $(if $(filter n,$(MAKEFLAGS)),,+)
# Force cargo not to print progress bar
CARGO_CMD_PREFIX += CARGO_TERM_PROGRESS_WHEN=never CI=1
# Set PQ_LIB_DIR to make sure `storage_controller` get linked with bundled libpq (through diesel)
# Set PQ_LIB_DIR to make sure `attachment_service` get linked with bundled libpq (through diesel)
CARGO_CMD_PREFIX += PQ_LIB_DIR=$(POSTGRES_INSTALL_DIR)/v16/lib
CACHEDIR_TAG_CONTENTS := "Signature: 8a477f597d28d172789f06886806bc55"
#
# Top level Makefile to build Neon and PostgreSQL
#
@@ -81,46 +64,32 @@ all: neon postgres neon-pg-ext
#
# The 'postgres_ffi' depends on the Postgres headers.
.PHONY: neon
neon: postgres-headers walproposer-lib cargo-target-dir
neon: postgres-headers walproposer-lib
+@echo "Compiling Neon"
$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS)
.PHONY: cargo-target-dir
cargo-target-dir:
# https://github.com/rust-lang/cargo/issues/14281
mkdir -p target
test -e target/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > target/CACHEDIR.TAG
### PostgreSQL parts
# Some rules are duplicated for Postgres v14 and 15. We may want to refactor
# to avoid the duplication in the future, but it's tolerable for now.
#
$(POSTGRES_INSTALL_DIR)/build/%/config.status:
mkdir -p $(POSTGRES_INSTALL_DIR)
test -e $(POSTGRES_INSTALL_DIR)/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > $(POSTGRES_INSTALL_DIR)/CACHEDIR.TAG
+@echo "Configuring Postgres $* build"
@test -s $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure || { \
echo "\nPostgres submodule not found in $(ROOT_PROJECT_DIR)/vendor/postgres-$*/, execute "; \
echo "'git submodule update --init --recursive --depth 2 --progress .' in project root.\n"; \
exit 1; }
mkdir -p $(POSTGRES_INSTALL_DIR)/build/$*
VERSION=$*; \
EXTRA_VERSION=$$(cd $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION && git rev-parse HEAD); \
(cd $(POSTGRES_INSTALL_DIR)/build/$$VERSION && \
env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION/configure \
(cd $(POSTGRES_INSTALL_DIR)/build/$* && \
env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure \
CFLAGS='$(PG_CFLAGS)' \
$(PG_CONFIGURE_OPTS) --with-extra-version=" ($$EXTRA_VERSION)" \
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/$$VERSION > configure.log)
$(PG_CONFIGURE_OPTS) \
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/$* > configure.log)
# nicer alias to run 'configure'
# Note: I've been unable to use templates for this part of our configuration.
# I'm not sure why it wouldn't work, but this is the only place (apart from
# the "build-all-versions" entry points) where direct mention of PostgreSQL
# versions is used.
.PHONY: postgres-configure-v17
postgres-configure-v17: $(POSTGRES_INSTALL_DIR)/build/v17/config.status
.PHONY: postgres-configure-v16
postgres-configure-v16: $(POSTGRES_INSTALL_DIR)/build/v16/config.status
.PHONY: postgres-configure-v15
@@ -150,8 +119,6 @@ postgres-%: postgres-configure-% \
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect install
+@echo "Compiling amcheck $*"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/amcheck install
+@echo "Compiling test_decoding $*"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/test_decoding install
.PHONY: postgres-clean-%
postgres-clean-%:
@@ -192,8 +159,8 @@ neon-pg-ext-%: postgres-%
-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile install
.PHONY: neon-pg-clean-ext-%
neon-pg-clean-ext-%:
.PHONY: neon-pg-ext-clean-%
neon-pg-ext-clean-%:
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
-C $(POSTGRES_INSTALL_DIR)/build/neon-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile clean
@@ -217,31 +184,29 @@ neon-pg-clean-ext-%:
# they depend on openssl and other libraries that are not included in our
# Rust build.
.PHONY: walproposer-lib
walproposer-lib: neon-pg-ext-v17
walproposer-lib: neon-pg-ext-v16
+@echo "Compiling walproposer-lib"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v16/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile walproposer-lib
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgport.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgcommon.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
cp $(POSTGRES_INSTALL_DIR)/v16/lib/libpgport.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
cp $(POSTGRES_INSTALL_DIR)/v16/lib/libpgcommon.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
ifeq ($(UNAME_S),Linux)
$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgport.a \
pg_strong_random.o
$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgcommon.a \
checksum_helper.o \
cryptohash_openssl.o \
pg_crc32c.o \
hmac_openssl.o \
cryptohash_openssl.o \
scram-common.o \
md5_common.o \
parse_manifest.o \
scram-common.o
ifeq ($(UNAME_S),Linux)
$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgcommon.a \
pg_crc32c.o
checksum_helper.o
endif
.PHONY: walproposer-lib-clean
walproposer-lib-clean:
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config \
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v16/bin/pg_config \
-C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile clean
@@ -249,48 +214,42 @@ walproposer-lib-clean:
neon-pg-ext: \
neon-pg-ext-v14 \
neon-pg-ext-v15 \
neon-pg-ext-v16 \
neon-pg-ext-v17
neon-pg-ext-v16
.PHONY: neon-pg-clean-ext
neon-pg-clean-ext: \
neon-pg-clean-ext-v14 \
neon-pg-clean-ext-v15 \
neon-pg-clean-ext-v16 \
neon-pg-clean-ext-v17
.PHONY: neon-pg-ext-clean
neon-pg-ext-clean: \
neon-pg-ext-clean-v14 \
neon-pg-ext-clean-v15 \
neon-pg-ext-clean-v16
# shorthand to build all Postgres versions
.PHONY: postgres
postgres: \
postgres-v14 \
postgres-v15 \
postgres-v16 \
postgres-v17
postgres-v16
.PHONY: postgres-headers
postgres-headers: \
postgres-headers-v14 \
postgres-headers-v15 \
postgres-headers-v16 \
postgres-headers-v17
postgres-headers-v16
.PHONY: postgres-clean
postgres-clean: \
postgres-clean-v14 \
postgres-clean-v15 \
postgres-clean-v16 \
postgres-clean-v17
postgres-clean-v16
.PHONY: postgres-check
postgres-check: \
postgres-check-v14 \
postgres-check-v15 \
postgres-check-v16 \
postgres-check-v17
postgres-check-v16
# This doesn't remove the effects of 'configure'.
.PHONY: clean
clean: postgres-clean neon-pg-clean-ext
clean: postgres-clean neon-pg-ext-clean
$(CARGO_CMD_PREFIX) cargo clean
# This removes everything
@@ -331,13 +290,13 @@ postgres-%-pgindent: postgres-%-pg-bsd-indent postgres-%-typedefs.list
rm -f pg*.BAK
# Indent pxgn/neon.
.PHONY: neon-pgindent
neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \
INDENT=$(POSTGRES_INSTALL_DIR)/build/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \
-C $(POSTGRES_INSTALL_DIR)/build/neon-v17 \
.PHONY: pgindent
neon-pgindent: postgres-v16-pg-bsd-indent neon-pg-ext-v16
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v16/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v16/src/tools/find_typedef \
INDENT=$(POSTGRES_INSTALL_DIR)/build/v16/src/tools/pg_bsd_indent/pg_bsd_indent \
PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v16/src/tools/pgindent/pgindent \
-C $(POSTGRES_INSTALL_DIR)/build/neon-v16 \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile pgindent

2
NOTICE
View File

@@ -1,5 +1,5 @@
Neon
Copyright 2022 - 2024 Neon Inc.
Copyright 2022 Neon Inc.
The PostgreSQL submodules in vendor/ are licensed under the PostgreSQL license.
See vendor/postgres-vX/COPYRIGHT for details.

View File

@@ -1,13 +1,11 @@
[![Neon](https://github.com/neondatabase/neon/assets/11527560/f15a17f0-836e-40c5-b35d-030606a6b660)](https://neon.tech)
[![Neon](https://user-images.githubusercontent.com/13738772/236813940-dcfdcb5b-69d3-449b-a686-013febe834d4.png)](https://neon.tech)
# Neon
Neon is a serverless open-source alternative to AWS Aurora Postgres. It separates storage and compute and substitutes the PostgreSQL storage layer by redistributing data across a cluster of nodes.
## Quick start
Try the [Neon Free Tier](https://neon.tech/github) to create a serverless Postgres instance. Then connect to it with your preferred Postgres client (psql, dbeaver, etc) or use the online [SQL Editor](https://neon.tech/docs/get-started-with-neon/query-with-neon-sql-editor/). See [Connect from any application](https://neon.tech/docs/connect/connect-from-any-app/) for connection instructions.
Try the [Neon Free Tier](https://neon.tech/docs/introduction/technical-preview-free-tier/) to create a serverless Postgres instance. Then connect to it with your preferred Postgres client (psql, dbeaver, etc) or use the online [SQL Editor](https://neon.tech/docs/get-started-with-neon/query-with-neon-sql-editor/). See [Connect from any application](https://neon.tech/docs/connect/connect-from-any-app/) for connection instructions.
Alternatively, compile and run the project [locally](#running-local-installation).
@@ -64,12 +62,6 @@ brew install protobuf openssl flex bison icu4c pkg-config
echo 'export PATH="$(brew --prefix openssl)/bin:$PATH"' >> ~/.zshrc
```
If you get errors about missing `m4` you may have to install it manually:
```
brew install m4
brew link --force m4
```
2. [Install Rust](https://www.rust-lang.org/tools/install)
```
# recommended approach from https://www.rust-lang.org/tools/install
@@ -132,7 +124,7 @@ make -j`sysctl -n hw.logicalcpu` -s
To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `pg_install/bin` and `pg_install/lib`, respectively.
To run the integration tests or Python scripts (not required to use the code), install
Python (3.9 or higher), and install the python3 packages using `./scripts/pysync` (requires [poetry>=1.8](https://python-poetry.org/)) in the project directory.
Python (3.9 or higher), and install the python3 packages using `./scripts/pysync` (requires [poetry>=1.3](https://python-poetry.org/)) in the project directory.
#### Running neon database
@@ -238,22 +230,8 @@ postgres=# select * from t;
> cargo neon stop
```
More advanced usages can be found at [Control Plane and Neon Local](./control_plane/README.md).
#### Handling build failures
If you encounter errors during setting up the initial tenant, it's best to stop everything (`cargo neon stop`) and remove the `.neon` directory. Then fix the problems, and start the setup again.
## Running tests
### Rust unit tests
We are using [`cargo-nextest`](https://nexte.st/) to run the tests in Github Workflows.
Some crates do not support running plain `cargo test` anymore, prefer `cargo nextest run` instead.
You can install `cargo-nextest` with `cargo install cargo-nextest`.
### Integration tests
Ensure your dependencies are installed as described [here](https://github.com/neondatabase/neon#dependency-installation-notes).
```sh
@@ -268,25 +246,9 @@ By default, this runs both debug and release modes, and all supported postgres v
testing locally, it is convenient to run just one set of permutations, like this:
```sh
DEFAULT_PG_VERSION=16 BUILD_TYPE=release ./scripts/pytest
DEFAULT_PG_VERSION=15 BUILD_TYPE=release ./scripts/pytest
```
## Flamegraphs
You may find yourself in need of flamegraphs for software in this repository.
You can use [`flamegraph-rs`](https://github.com/flamegraph-rs/flamegraph) or the original [`flamegraph.pl`](https://github.com/brendangregg/FlameGraph). Your choice!
>[!IMPORTANT]
> If you're using `lld` or `mold`, you need the `--no-rosegment` linker argument.
> It's a [general thing with Rust / lld / mold](https://crbug.com/919499#c16), not specific to this repository.
> See [this PR for further instructions](https://github.com/neondatabase/neon/pull/6764).
## Cleanup
For cleaning up the source tree from build artifacts, run `make clean` in the source directory.
For removing every artifact from build and configure steps, run `make distclean`, and also consider removing the cargo binaries in the `target` directory, as well as the database in the `.neon` directory. Note that removing the `.neon` directory will remove your database, with all data in it. You have been warned!
## Documentation
[docs](/docs) Contains a top-level overview of all available markdown documentation.

View File

@@ -2,13 +2,4 @@ disallowed-methods = [
"tokio::task::block_in_place",
# Allow this for now, to deny it later once we stop using Handle::block_on completely
# "tokio::runtime::Handle::block_on",
# use tokio_epoll_uring_ext instead
"tokio_epoll_uring::thread_local_system",
]
disallowed-macros = [
# use std::pin::pin
"futures::pin_mut",
# cannot disallow this, because clippy finds used from tokio macros
#"tokio::pin",
]

View File

@@ -1,21 +0,0 @@
This directory contains files that are needed to build the compute
images, or included in the compute images.
Dockerfile.compute-node
To build the compute image
vm-image-spec.yaml
Instructions for vm-builder, to turn the compute-node image into
corresponding vm-compute-node image.
etc/
Configuration files included in /etc in the compute image
patches/
Some extensions need to be patched to work with Neon. This
directory contains such patches. They are applied to the extension
sources in Dockerfile.compute-node
In addition to these, postgres itself, the neon postgres extension,
and compute_ctl are built and copied into the compute image by
Dockerfile.compute-node.

View File

@@ -1,331 +0,0 @@
collector_name: neon_collector
metrics:
- metric_name: lfc_misses
type: gauge
help: 'lfc_misses'
key_labels:
values: [lfc_misses]
query: |
select lfc_value as lfc_misses from neon.neon_lfc_stats where lfc_key='file_cache_misses';
- metric_name: lfc_used
type: gauge
help: 'LFC chunks used (chunk = 1MB)'
key_labels:
values: [lfc_used]
query: |
select lfc_value as lfc_used from neon.neon_lfc_stats where lfc_key='file_cache_used';
- metric_name: lfc_hits
type: gauge
help: 'lfc_hits'
key_labels:
values: [lfc_hits]
query: |
select lfc_value as lfc_hits from neon.neon_lfc_stats where lfc_key='file_cache_hits';
- metric_name: lfc_writes
type: gauge
help: 'lfc_writes'
key_labels:
values: [lfc_writes]
query: |
select lfc_value as lfc_writes from neon.neon_lfc_stats where lfc_key='file_cache_writes';
- metric_name: lfc_cache_size_limit
type: gauge
help: 'LFC cache size limit in bytes'
key_labels:
values: [lfc_cache_size_limit]
query: |
select pg_size_bytes(current_setting('neon.file_cache_size_limit')) as lfc_cache_size_limit;
- metric_name: connection_counts
type: gauge
help: 'Connection counts'
key_labels:
- datname
- state
values: [count]
query: |
select datname, state, count(*) as count from pg_stat_activity where state <> '' group by datname, state;
- metric_name: pg_stats_userdb
type: gauge
help: 'Stats for several oldest non-system dbs'
key_labels:
- datname
value_label: kind
values:
- db_size
- deadlocks
# Rows
- inserted
- updated
- deleted
# We export stats for 10 non-system database. Without this limit
# it is too easy to abuse the system by creating lots of databases.
query: |
select pg_database_size(datname) as db_size, deadlocks,
tup_inserted as inserted, tup_updated as updated, tup_deleted as deleted,
datname
from pg_stat_database
where datname IN (
select datname
from pg_database
where datname <> 'postgres' and not datistemplate
order by oid
limit 10
);
- metric_name: max_cluster_size
type: gauge
help: 'neon.max_cluster_size setting'
key_labels:
values: [max_cluster_size]
query: |
select setting::int as max_cluster_size from pg_settings where name = 'neon.max_cluster_size';
- metric_name: db_total_size
type: gauge
help: 'Size of all databases'
key_labels:
values: [total]
query: |
select sum(pg_database_size(datname)) as total from pg_database;
- metric_name: getpage_wait_seconds_count
type: counter
help: 'Number of getpage requests'
values: [getpage_wait_seconds_count]
query_ref: neon_perf_counters
- metric_name: getpage_wait_seconds_sum
type: counter
help: 'Time spent in getpage requests'
values: [getpage_wait_seconds_sum]
query_ref: neon_perf_counters
- metric_name: getpage_prefetch_requests_total
type: counter
help: 'Number of getpage issued for prefetching'
values: [getpage_prefetch_requests_total]
query_ref: neon_perf_counters
- metric_name: getpage_sync_requests_total
type: counter
help: 'Number of synchronous getpage issued'
values: [getpage_sync_requests_total]
query_ref: neon_perf_counters
- metric_name: getpage_prefetch_misses_total
type: counter
help: 'Total number of readahead misses; consisting of either prefetches that don't satisfy the LSN bounds once the prefetch got read by the backend, or cases where somehow no readahead was issued for the read'
values: [getpage_prefetch_misses_total]
query_ref: neon_perf_counters
- metric_name: getpage_prefetch_discards_total
type: counter
help: 'Number of prefetch responses issued but not used'
values: [getpage_prefetch_discards_total]
query_ref: neon_perf_counters
- metric_name: pageserver_requests_sent_total
type: counter
help: 'Number of all requests sent to the pageserver (not just GetPage requests)'
values: [pageserver_requests_sent_total]
query_ref: neon_perf_counters
- metric_name: pageserver_disconnects_total
type: counter
help: 'Number of times that the connection to the pageserver was lost'
values: [pageserver_disconnects_total]
query_ref: neon_perf_counters
- metric_name: pageserver_send_flushes_total
type: counter
help: 'Number of flushes to the pageserver connection'
values: [pageserver_send_flushes_total]
query_ref: neon_perf_counters
- metric_name: getpage_wait_seconds_buckets
type: counter
help: 'Histogram buckets of getpage request latency'
key_labels:
- bucket_le
values: [value]
query_ref: getpage_wait_seconds_buckets
# DEPRECATED
- metric_name: lfc_approximate_working_set_size
type: gauge
help: 'Approximate working set size in pages of 8192 bytes'
key_labels:
values: [approximate_working_set_size]
query: |
select neon.approximate_working_set_size(false) as approximate_working_set_size;
- metric_name: lfc_approximate_working_set_size_windows
type: gauge
help: 'Approximate working set size in pages of 8192 bytes'
key_labels: [duration]
values: [size]
# NOTE: This is the "public" / "human-readable" version. Here, we supply a small selection
# of durations in a pretty-printed form.
query: |
select
x as duration,
neon.approximate_working_set_size_seconds(extract('epoch' from x::interval)::int) as size
from
(values ('5m'),('15m'),('1h')) as t (x);
- metric_name: compute_current_lsn
type: gauge
help: 'Current LSN of the database'
key_labels:
values: [lsn]
query: |
select
case
when pg_catalog.pg_is_in_recovery()
then (pg_last_wal_replay_lsn() - '0/0')::FLOAT8
else (pg_current_wal_lsn() - '0/0')::FLOAT8
end as lsn;
- metric_name: compute_receive_lsn
type: gauge
help: 'Returns the last write-ahead log location that has been received and synced to disk by streaming replication'
key_labels:
values: [lsn]
query: |
SELECT
CASE
WHEN pg_catalog.pg_is_in_recovery()
THEN (pg_last_wal_receive_lsn() - '0/0')::FLOAT8
ELSE 0
END AS lsn;
- metric_name: replication_delay_bytes
type: gauge
help: 'Bytes between received and replayed LSN'
key_labels:
values: [replication_delay_bytes]
# We use a GREATEST call here because this calculation can be negative.
# The calculation is not atomic, meaning after we've gotten the receive
# LSN, the replay LSN may have advanced past the receive LSN we
# are using for the calculation.
query: |
SELECT GREATEST(0, pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn())) AS replication_delay_bytes;
- metric_name: replication_delay_seconds
type: gauge
help: 'Time since last LSN was replayed'
key_labels:
values: [replication_delay_seconds]
query: |
SELECT
CASE
WHEN pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn() THEN 0
ELSE GREATEST (0, EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp()))
END AS replication_delay_seconds;
- metric_name: checkpoints_req
type: gauge
help: 'Number of requested checkpoints'
key_labels:
values: [checkpoints_req]
query: |
SELECT checkpoints_req FROM pg_stat_bgwriter;
- metric_name: checkpoints_timed
type: gauge
help: 'Number of scheduled checkpoints'
key_labels:
values: [checkpoints_timed]
query: |
SELECT checkpoints_timed FROM pg_stat_bgwriter;
- metric_name: compute_logical_snapshot_files
type: gauge
help: 'Number of snapshot files in pg_logical/snapshot'
key_labels:
- timeline_id
values: [num_logical_snapshot_files]
query: |
SELECT
(SELECT setting FROM pg_settings WHERE name = 'neon.timeline_id') AS timeline_id,
-- Postgres creates temporary snapshot files of the form %X-%X.snap.%d.tmp. These
-- temporary snapshot files are renamed to the actual snapshot files after they are
-- completely built. We only WAL-log the completely built snapshot files.
(SELECT COUNT(*) FROM pg_ls_dir('pg_logical/snapshots') AS name WHERE name LIKE '%.snap') AS num_logical_snapshot_files;
# In all the below metrics, we cast LSNs to floats because Prometheus only supports floats.
# It's probably fine because float64 can store integers from -2^53 to +2^53 exactly.
# Number of slots is limited by max_replication_slots, so collecting position for all of them shouldn't be bad.
- metric_name: logical_slot_restart_lsn
type: gauge
help: 'restart_lsn of logical slots'
key_labels:
- slot_name
values: [restart_lsn]
query: |
select slot_name, (restart_lsn - '0/0')::FLOAT8 as restart_lsn
from pg_replication_slots
where slot_type = 'logical';
- metric_name: compute_subscriptions_count
type: gauge
help: 'Number of logical replication subscriptions grouped by enabled/disabled'
key_labels:
- enabled
values: [subscriptions_count]
query: |
select subenabled::text as enabled, count(*) as subscriptions_count
from pg_subscription
group by subenabled;
- metric_name: retained_wal
type: gauge
help: 'Retained WAL in inactive replication slots'
key_labels:
- slot_name
values: [retained_wal]
query: |
SELECT slot_name, pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)::FLOAT8 AS retained_wal
FROM pg_replication_slots
WHERE active = false;
- metric_name: wal_is_lost
type: gauge
help: 'Whether or not the replication slot wal_status is lost'
key_labels:
- slot_name
values: [wal_is_lost]
query: |
SELECT slot_name,
CASE WHEN wal_status = 'lost' THEN 1 ELSE 0 END AS wal_is_lost
FROM pg_replication_slots;
queries:
- query_name: neon_perf_counters
query: |
WITH c AS (
SELECT pg_catalog.jsonb_object_agg(metric, value) jb FROM neon.neon_perf_counters
)
SELECT d.*
FROM pg_catalog.jsonb_to_record((select jb from c)) as d(
getpage_wait_seconds_count numeric,
getpage_wait_seconds_sum numeric,
getpage_prefetch_requests_total numeric,
getpage_sync_requests_total numeric,
getpage_prefetch_misses_total numeric,
getpage_prefetch_discards_total numeric,
pageserver_requests_sent_total numeric,
pageserver_disconnects_total numeric,
pageserver_send_flushes_total numeric
);
- query_name: getpage_wait_seconds_buckets
query: |
SELECT bucket_le, value FROM neon.neon_perf_counters WHERE metric = 'getpage_wait_seconds_bucket';

View File

@@ -1,55 +0,0 @@
collector_name: neon_collector_autoscaling
metrics:
- metric_name: lfc_misses
type: gauge
help: 'lfc_misses'
key_labels:
values: [lfc_misses]
query: |
select lfc_value as lfc_misses from neon.neon_lfc_stats where lfc_key='file_cache_misses';
- metric_name: lfc_used
type: gauge
help: 'LFC chunks used (chunk = 1MB)'
key_labels:
values: [lfc_used]
query: |
select lfc_value as lfc_used from neon.neon_lfc_stats where lfc_key='file_cache_used';
- metric_name: lfc_hits
type: gauge
help: 'lfc_hits'
key_labels:
values: [lfc_hits]
query: |
select lfc_value as lfc_hits from neon.neon_lfc_stats where lfc_key='file_cache_hits';
- metric_name: lfc_writes
type: gauge
help: 'lfc_writes'
key_labels:
values: [lfc_writes]
query: |
select lfc_value as lfc_writes from neon.neon_lfc_stats where lfc_key='file_cache_writes';
- metric_name: lfc_cache_size_limit
type: gauge
help: 'LFC cache size limit in bytes'
key_labels:
values: [lfc_cache_size_limit]
query: |
select pg_size_bytes(current_setting('neon.file_cache_size_limit')) as lfc_cache_size_limit;
- metric_name: lfc_approximate_working_set_size_windows
type: gauge
help: 'Approximate working set size in pages of 8192 bytes'
key_labels: [duration_seconds]
values: [size]
# NOTE: This is the "internal" / "machine-readable" version. This outputs the working set
# size looking back 1..60 minutes, labeled with the number of minutes.
query: |
select
x::text as duration_seconds,
neon.approximate_working_set_size_seconds(x) as size
from
(select generate_series * 60 as x from generate_series(1, 60)) as t (x);

View File

@@ -1,17 +0,0 @@
[databases]
*=host=localhost port=5432 auth_user=cloud_admin
[pgbouncer]
listen_port=6432
listen_addr=0.0.0.0
auth_type=scram-sha-256
auth_user=cloud_admin
auth_dbname=postgres
client_tls_sslmode=disable
server_tls_sslmode=disable
pool_mode=transaction
max_client_conn=10000
default_pool_size=64
max_prepared_statements=0
admin_users=postgres
unix_socket_dir=/tmp/
unix_socket_mode=0777

View File

@@ -1,33 +0,0 @@
# Configuration for sql_exporter
# Global defaults.
global:
# If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s.
scrape_timeout: 10s
# Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first.
scrape_timeout_offset: 500ms
# Minimum interval between collector runs: by default (0s) collectors are executed on every scrape.
min_interval: 0s
# Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections,
# as will concurrent scrapes.
max_connections: 1
# Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should
# always be the same as max_connections.
max_idle_connections: 1
# Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse.
# If 0, connections are not closed due to a connection's age.
max_connection_lifetime: 5m
# The target to monitor and the collectors to execute on it.
target:
# Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL)
# the schema gets dropped or replaced to match the driver expected DSN format.
data_source_name: 'postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter'
# Collectors (referenced by name) to execute on the target.
# Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
collectors: [neon_collector]
# Collector files specifies a list of globs. One collector definition is read from each matching file.
# Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
collector_files:
- "neon_collector.yml"

View File

@@ -1,33 +0,0 @@
# Configuration for sql_exporter for autoscaling-agent
# Global defaults.
global:
# If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s.
scrape_timeout: 10s
# Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first.
scrape_timeout_offset: 500ms
# Minimum interval between collector runs: by default (0s) collectors are executed on every scrape.
min_interval: 0s
# Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections,
# as will concurrent scrapes.
max_connections: 1
# Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should
# always be the same as max_connections.
max_idle_connections: 1
# Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse.
# If 0, connections are not closed due to a connection's age.
max_connection_lifetime: 5m
# The target to monitor and the collectors to execute on it.
target:
# Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL)
# the schema gets dropped or replaced to match the driver expected DSN format.
data_source_name: 'postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter_autoscaling'
# Collectors (referenced by name) to execute on the target.
# Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
collectors: [neon_collector_autoscaling]
# Collector files specifies a list of globs. One collector definition is read from each matching file.
# Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
collector_files:
- "neon_collector_autoscaling.yml"

File diff suppressed because it is too large Load Diff

View File

@@ -1,223 +0,0 @@
commit 7dd414ee75f2875cffb1d6ba474df1f135a6fc6f
Author: Alexey Masterov <alexeymasterov@neon.tech>
Date: Fri May 31 06:34:26 2024 +0000
These alternative expected files were added to consider the neon features
diff --git a/ext-src/pg_anon-src/tests/expected/permissions_masked_role_1.out b/ext-src/pg_anon-src/tests/expected/permissions_masked_role_1.out
new file mode 100644
index 0000000..2539cfd
--- /dev/null
+++ b/ext-src/pg_anon-src/tests/expected/permissions_masked_role_1.out
@@ -0,0 +1,101 @@
+BEGIN;
+CREATE EXTENSION anon CASCADE;
+NOTICE: installing required extension "pgcrypto"
+SELECT anon.init();
+ init
+------
+ t
+(1 row)
+
+CREATE ROLE mallory_the_masked_user;
+SECURITY LABEL FOR anon ON ROLE mallory_the_masked_user IS 'MASKED';
+CREATE TABLE t1(i INT);
+ALTER TABLE t1 ADD COLUMN t TEXT;
+SECURITY LABEL FOR anon ON COLUMN t1.t
+IS 'MASKED WITH VALUE NULL';
+INSERT INTO t1 VALUES (1,'test');
+--
+-- We're checking the owner's permissions
+--
+-- see
+-- https://postgresql-anonymizer.readthedocs.io/en/latest/SECURITY/#permissions
+--
+SET ROLE mallory_the_masked_user;
+SELECT anon.pseudo_first_name(0) IS NOT NULL;
+ ?column?
+----------
+ t
+(1 row)
+
+-- SHOULD FAIL
+DO $$
+BEGIN
+ PERFORM anon.init();
+ EXCEPTION WHEN insufficient_privilege
+ THEN RAISE NOTICE 'insufficient_privilege';
+END$$;
+NOTICE: insufficient_privilege
+-- SHOULD FAIL
+DO $$
+BEGIN
+ PERFORM anon.anonymize_table('t1');
+ EXCEPTION WHEN insufficient_privilege
+ THEN RAISE NOTICE 'insufficient_privilege';
+END$$;
+NOTICE: insufficient_privilege
+-- SHOULD FAIL
+SAVEPOINT fail_start_engine;
+SELECT anon.start_dynamic_masking();
+ERROR: Only supersusers can start the dynamic masking engine.
+CONTEXT: PL/pgSQL function anon.start_dynamic_masking(boolean) line 18 at RAISE
+ROLLBACK TO fail_start_engine;
+RESET ROLE;
+SELECT anon.start_dynamic_masking();
+ start_dynamic_masking
+-----------------------
+ t
+(1 row)
+
+SET ROLE mallory_the_masked_user;
+SELECT * FROM mask.t1;
+ i | t
+---+---
+ 1 |
+(1 row)
+
+-- SHOULD FAIL
+DO $$
+BEGIN
+ SELECT * FROM public.t1;
+ EXCEPTION WHEN insufficient_privilege
+ THEN RAISE NOTICE 'insufficient_privilege';
+END$$;
+NOTICE: insufficient_privilege
+-- SHOULD FAIL
+SAVEPOINT fail_stop_engine;
+SELECT anon.stop_dynamic_masking();
+ERROR: Only supersusers can stop the dynamic masking engine.
+CONTEXT: PL/pgSQL function anon.stop_dynamic_masking() line 18 at RAISE
+ROLLBACK TO fail_stop_engine;
+RESET ROLE;
+SELECT anon.stop_dynamic_masking();
+NOTICE: The previous priviledges of 'mallory_the_masked_user' are not restored. You need to grant them manually.
+ stop_dynamic_masking
+----------------------
+ t
+(1 row)
+
+SET ROLE mallory_the_masked_user;
+SELECT COUNT(*)=1 FROM anon.pg_masking_rules;
+ ?column?
+----------
+ t
+(1 row)
+
+-- SHOULD FAIL
+SAVEPOINT fail_seclabel_on_role;
+SECURITY LABEL FOR anon ON ROLE mallory_the_masked_user IS NULL;
+ERROR: permission denied
+DETAIL: The current user must have the CREATEROLE attribute.
+ROLLBACK TO fail_seclabel_on_role;
+ROLLBACK;
diff --git a/ext-src/pg_anon-src/tests/expected/permissions_owner_1.out b/ext-src/pg_anon-src/tests/expected/permissions_owner_1.out
new file mode 100644
index 0000000..8b090fe
--- /dev/null
+++ b/ext-src/pg_anon-src/tests/expected/permissions_owner_1.out
@@ -0,0 +1,104 @@
+BEGIN;
+CREATE EXTENSION anon CASCADE;
+NOTICE: installing required extension "pgcrypto"
+SELECT anon.init();
+ init
+------
+ t
+(1 row)
+
+CREATE ROLE oscar_the_owner;
+ALTER DATABASE :DBNAME OWNER TO oscar_the_owner;
+CREATE ROLE mallory_the_masked_user;
+SECURITY LABEL FOR anon ON ROLE mallory_the_masked_user IS 'MASKED';
+--
+-- We're checking the owner's permissions
+--
+-- see
+-- https://postgresql-anonymizer.readthedocs.io/en/latest/SECURITY/#permissions
+--
+SET ROLE oscar_the_owner;
+SELECT anon.pseudo_first_name(0) IS NOT NULL;
+ ?column?
+----------
+ t
+(1 row)
+
+-- SHOULD FAIL
+DO $$
+BEGIN
+ PERFORM anon.init();
+ EXCEPTION WHEN insufficient_privilege
+ THEN RAISE NOTICE 'insufficient_privilege';
+END$$;
+NOTICE: insufficient_privilege
+CREATE TABLE t1(i INT);
+ALTER TABLE t1 ADD COLUMN t TEXT;
+SECURITY LABEL FOR anon ON COLUMN t1.t
+IS 'MASKED WITH VALUE NULL';
+INSERT INTO t1 VALUES (1,'test');
+SELECT anon.anonymize_table('t1');
+ anonymize_table
+-----------------
+ t
+(1 row)
+
+SELECT * FROM t1;
+ i | t
+---+---
+ 1 |
+(1 row)
+
+UPDATE t1 SET t='test' WHERE i=1;
+-- SHOULD FAIL
+SAVEPOINT fail_start_engine;
+SELECT anon.start_dynamic_masking();
+ start_dynamic_masking
+-----------------------
+ t
+(1 row)
+
+ROLLBACK TO fail_start_engine;
+RESET ROLE;
+SELECT anon.start_dynamic_masking();
+ start_dynamic_masking
+-----------------------
+ t
+(1 row)
+
+SET ROLE oscar_the_owner;
+SELECT * FROM t1;
+ i | t
+---+------
+ 1 | test
+(1 row)
+
+--SELECT * FROM mask.t1;
+-- SHOULD FAIL
+SAVEPOINT fail_stop_engine;
+SELECT anon.stop_dynamic_masking();
+ERROR: permission denied for schema mask
+CONTEXT: SQL statement "DROP VIEW mask.t1;"
+PL/pgSQL function anon.mask_drop_view(oid) line 3 at EXECUTE
+SQL statement "SELECT anon.mask_drop_view(oid)
+ FROM pg_catalog.pg_class
+ WHERE relnamespace=quote_ident(pg_catalog.current_setting('anon.sourceschema'))::REGNAMESPACE
+ AND relkind IN ('r','p','f')"
+PL/pgSQL function anon.stop_dynamic_masking() line 22 at PERFORM
+ROLLBACK TO fail_stop_engine;
+RESET ROLE;
+SELECT anon.stop_dynamic_masking();
+NOTICE: The previous priviledges of 'mallory_the_masked_user' are not restored. You need to grant them manually.
+ stop_dynamic_masking
+----------------------
+ t
+(1 row)
+
+SET ROLE oscar_the_owner;
+-- SHOULD FAIL
+SAVEPOINT fail_seclabel_on_role;
+SECURITY LABEL FOR anon ON ROLE mallory_the_masked_user IS NULL;
+ERROR: permission denied
+DETAIL: The current user must have the CREATEROLE attribute.
+ROLLBACK TO fail_seclabel_on_role;
+ROLLBACK;

View File

@@ -1,19 +0,0 @@
commit b3ea51ee158f113f2f82d0b97c12c54343c9a695 (HEAD -> master)
Author: Alexey Masterov <alexeymasterov@neon.tech>
Date: Fri Jun 7 19:23:42 2024 +0000
Disable REGRESS_OPTIONS causing initdb
diff --git a/ext-src/pg_cron-src/Makefile b/ext-src/pg_cron-src/Makefile
index 053314c..fbd5fb5 100644
--- a/ext-src/pg_cron-src/Makefile
+++ b/ext-src/pg_cron-src/Makefile
@@ -5,7 +5,7 @@ EXTENSION = pg_cron
DATA_built = $(EXTENSION)--1.0.sql
DATA = $(wildcard $(EXTENSION)--*--*.sql)
-REGRESS_OPTS =--temp-config=./pg_cron.conf --temp-instance=./tmp_check
+#REGRESS_OPTS =--temp-config=./pg_cron.conf --temp-instance=./tmp_check
REGRESS = pg_cron-test
# compilation configuration

View File

@@ -1,53 +0,0 @@
diff --git a/expected/ut-A.out b/expected/ut-A.out
index da723b8..5328114 100644
--- a/expected/ut-A.out
+++ b/expected/ut-A.out
@@ -9,13 +9,16 @@ SET search_path TO public;
----
-- No.A-1-1-3
CREATE EXTENSION pg_hint_plan;
+LOG: Sending request to compute_ctl: http://localhost:3080/extension_server/pg_hint_plan
-- No.A-1-2-3
DROP EXTENSION pg_hint_plan;
-- No.A-1-1-4
CREATE SCHEMA other_schema;
CREATE EXTENSION pg_hint_plan SCHEMA other_schema;
+LOG: Sending request to compute_ctl: http://localhost:3080/extension_server/pg_hint_plan
ERROR: extension "pg_hint_plan" must be installed in schema "hint_plan"
CREATE EXTENSION pg_hint_plan;
+LOG: Sending request to compute_ctl: http://localhost:3080/extension_server/pg_hint_plan
DROP SCHEMA other_schema;
----
---- No. A-5-1 comment pattern
@@ -3175,6 +3178,7 @@ SELECT s.query, s.calls
FROM public.pg_stat_statements s
JOIN pg_catalog.pg_database d
ON (s.dbid = d.oid)
+ WHERE s.query LIKE 'SELECT * FROM s1.t1%' OR s.query LIKE '%pg_stat_statements_reset%'
ORDER BY 1;
query | calls
--------------------------------------+-------
diff --git a/expected/ut-fdw.out b/expected/ut-fdw.out
index d372459..6282afe 100644
--- a/expected/ut-fdw.out
+++ b/expected/ut-fdw.out
@@ -7,6 +7,7 @@ SET pg_hint_plan.debug_print TO on;
SET client_min_messages TO LOG;
SET pg_hint_plan.enable_hint TO on;
CREATE EXTENSION file_fdw;
+LOG: Sending request to compute_ctl: http://localhost:3080/extension_server/file_fdw
CREATE SERVER file_server FOREIGN DATA WRAPPER file_fdw;
CREATE USER MAPPING FOR PUBLIC SERVER file_server;
CREATE FOREIGN TABLE ft1 (id int, val int) SERVER file_server OPTIONS (format 'csv', filename :'filename');
diff --git a/sql/ut-A.sql b/sql/ut-A.sql
index 7c7d58a..4fd1a07 100644
--- a/sql/ut-A.sql
+++ b/sql/ut-A.sql
@@ -963,6 +963,7 @@ SELECT s.query, s.calls
FROM public.pg_stat_statements s
JOIN pg_catalog.pg_database d
ON (s.dbid = d.oid)
+ WHERE s.query LIKE 'SELECT * FROM s1.t1%' OR s.query LIKE '%pg_stat_statements_reset%'
ORDER BY 1;
----

View File

@@ -1,62 +0,0 @@
diff --git a/src/hnswbuild.c b/src/hnswbuild.c
index dcfb2bd..d5189ee 100644
--- a/src/hnswbuild.c
+++ b/src/hnswbuild.c
@@ -860,9 +860,17 @@ HnswParallelBuildMain(dsm_segment *seg, shm_toc *toc)
hnswarea = shm_toc_lookup(toc, PARALLEL_KEY_HNSW_AREA, false);
+#ifdef NEON_SMGR
+ smgr_start_unlogged_build(RelationGetSmgr(indexRel));
+#endif
+
/* Perform inserts */
HnswParallelScanAndInsert(heapRel, indexRel, hnswshared, hnswarea, false);
+#ifdef NEON_SMGR
+ smgr_finish_unlogged_build_phase_1(RelationGetSmgr(indexRel));
+#endif
+
/* Close relations within worker */
index_close(indexRel, indexLockmode);
table_close(heapRel, heapLockmode);
@@ -1117,12 +1125,38 @@ BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo,
SeedRandom(42);
#endif
+#ifdef NEON_SMGR
+ smgr_start_unlogged_build(RelationGetSmgr(index));
+#endif
+
InitBuildState(buildstate, heap, index, indexInfo, forkNum);
BuildGraph(buildstate, forkNum);
- if (RelationNeedsWAL(index) || forkNum == INIT_FORKNUM)
+#ifdef NEON_SMGR
+ smgr_finish_unlogged_build_phase_1(RelationGetSmgr(index));
+#endif
+
+ if (RelationNeedsWAL(index) || forkNum == INIT_FORKNUM) {
log_newpage_range(index, forkNum, 0, RelationGetNumberOfBlocksInFork(index, forkNum), true);
+#ifdef NEON_SMGR
+ {
+#if PG_VERSION_NUM >= 160000
+ RelFileLocator rlocator = RelationGetSmgr(index)->smgr_rlocator.locator;
+#else
+ RelFileNode rlocator = RelationGetSmgr(index)->smgr_rnode.node;
+#endif
+
+ SetLastWrittenLSNForBlockRange(XactLastRecEnd, rlocator,
+ MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
+ SetLastWrittenLSNForRelation(XactLastRecEnd, rlocator, MAIN_FORKNUM);
+ }
+#endif
+ }
+
+#ifdef NEON_SMGR
+ smgr_end_unlogged_build(RelationGetSmgr(index));
+#endif
FreeBuildState(buildstate);
}

View File

@@ -1,54 +0,0 @@
commit 68f3b3b0d594f08aacc4a082ee210749ed5677eb
Author: Anastasia Lubennikova <anastasia@neon.tech>
Date: Mon Jul 15 12:31:56 2024 +0100
Neon: fix unlogged index build patch
diff --git a/src/ruminsert.c b/src/ruminsert.c
index e8b209d..e89bf2a 100644
--- a/src/ruminsert.c
+++ b/src/ruminsert.c
@@ -628,6 +628,10 @@ rumbuild(Relation heap, Relation index, struct IndexInfo *indexInfo)
elog(ERROR, "index \"%s\" already contains data",
RelationGetRelationName(index));
+#ifdef NEON_SMGR
+ smgr_start_unlogged_build(index->rd_smgr);
+#endif
+
initRumState(&buildstate.rumstate, index);
buildstate.rumstate.isBuild = true;
buildstate.indtuples = 0;
@@ -693,6 +697,10 @@ rumbuild(Relation heap, Relation index, struct IndexInfo *indexInfo)
buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
rumUpdateStats(index, &buildstate.buildStats, buildstate.rumstate.isBuild);
+#ifdef NEON_SMGR
+ smgr_finish_unlogged_build_phase_1(index->rd_smgr);
+#endif
+
/*
* Write index to xlog
*/
@@ -713,6 +721,21 @@ rumbuild(Relation heap, Relation index, struct IndexInfo *indexInfo)
UnlockReleaseBuffer(buffer);
}
+#ifdef NEON_SMGR
+ {
+#if PG_VERSION_NUM >= 160000
+ RelFileLocator rlocator = RelationGetSmgr(index)->smgr_rlocator.locator;
+#else
+ RelFileNode rlocator = RelationGetSmgr(index)->smgr_rnode.node;
+#endif
+
+ SetLastWrittenLSNForBlockRange(XactLastRecEnd, rlocator, MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
+ SetLastWrittenLSNForRelation(XactLastRecEnd, rlocator, MAIN_FORKNUM);
+
+ smgr_end_unlogged_build(index->rd_smgr);
+ }
+#endif
+
/*
* Return statistics
*/

View File

@@ -1,117 +0,0 @@
# Supplemental file for neondatabase/autoscaling's vm-builder, for producing the VM compute image.
---
commands:
- name: cgconfigparser
user: root
sysvInitAction: sysinit
shell: 'cgconfigparser -l /etc/cgconfig.conf -s 1664'
# restrict permissions on /neonvm/bin/resize-swap, because we grant access to compute_ctl for
# running it as root.
- name: chmod-resize-swap
user: root
sysvInitAction: sysinit
shell: 'chmod 711 /neonvm/bin/resize-swap'
- name: chmod-set-disk-quota
user: root
sysvInitAction: sysinit
shell: 'chmod 711 /neonvm/bin/set-disk-quota'
- name: pgbouncer
user: postgres
sysvInitAction: respawn
shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini'
- name: postgres-exporter
user: nobody
sysvInitAction: respawn
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter'
- name: sql-exporter
user: nobody
sysvInitAction: respawn
shell: '/bin/sql_exporter -config.file=/etc/sql_exporter.yml -web.listen-address=:9399'
- name: sql-exporter-autoscaling
user: nobody
sysvInitAction: respawn
shell: '/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499'
shutdownHook: |
su -p postgres --session-command '/usr/local/bin/pg_ctl stop -D /var/db/postgres/compute/pgdata -m fast --wait -t 10'
files:
- filename: compute_ctl-sudoers
content: |
# Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap
# and /neonvm/bin/set-disk-quota as root without requiring entering a password (NOPASSWD),
# regardless of hostname (ALL)
postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap, /neonvm/bin/set-disk-quota
- filename: cgconfig.conf
content: |
# Configuration for cgroups in VM compute nodes
group neon-postgres {
perm {
admin {
uid = postgres;
}
task {
gid = users;
}
}
memory {}
}
build: |
# Build cgroup-tools
#
# At time of writing (2023-03-14), debian bullseye has a version of cgroup-tools (technically
# libcgroup) that doesn't support cgroup v2 (version 0.41-11). Unfortunately, the vm-monitor
# requires cgroup v2, so we'll build cgroup-tools ourselves.
FROM debian:bullseye-slim as libcgroup-builder
ENV LIBCGROUP_VERSION=v2.0.3
RUN set -exu \
&& apt update \
&& apt install --no-install-recommends -y \
git \
ca-certificates \
automake \
cmake \
make \
gcc \
byacc \
flex \
libtool \
libpam0g-dev \
&& git clone --depth 1 -b $LIBCGROUP_VERSION https://github.com/libcgroup/libcgroup \
&& INSTALL_DIR="/libcgroup-install" \
&& mkdir -p "$INSTALL_DIR/bin" "$INSTALL_DIR/include" \
&& cd libcgroup \
# extracted from bootstrap.sh, with modified flags:
&& (test -d m4 || mkdir m4) \
&& autoreconf -fi \
&& rm -rf autom4te.cache \
&& CFLAGS="-O3" ./configure --prefix="$INSTALL_DIR" --sysconfdir=/etc --localstatedir=/var --enable-opaque-hierarchy="name=systemd" \
# actually build the thing...
&& make install
merge: |
# tweak nofile limits
RUN set -e \
&& echo 'fs.file-max = 1048576' >>/etc/sysctl.conf \
&& test ! -e /etc/security || ( \
echo '* - nofile 1048576' >>/etc/security/limits.conf \
&& echo 'root - nofile 1048576' >>/etc/security/limits.conf \
)
# Allow postgres user (compute_ctl) to run swap resizer.
# Need to install sudo in order to allow this.
#
# Also, remove the 'read' permission from group/other on /neonvm/bin/resize-swap, just to be safe.
RUN set -e \
&& apt update \
&& apt install --no-install-recommends -y \
sudo \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
COPY compute_ctl-sudoers /etc/sudoers.d/compute_ctl-sudoers
COPY cgconfig.conf /etc/cgconfig.conf
RUN set -e \
&& chmod 0644 /etc/cgconfig.conf
COPY --from=libcgroup-builder /libcgroup-install/bin/* /usr/bin/
COPY --from=libcgroup-builder /libcgroup-install/lib/* /usr/lib/
COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/

View File

@@ -4,13 +4,9 @@ version = "0.1.0"
edition.workspace = true
license.workspace = true
[features]
default = []
# Enables test specific features.
testing = []
[dependencies]
anyhow.workspace = true
async-compression.workspace = true
chrono.workspace = true
cfg-if.workspace = true
clap.workspace = true
@@ -21,9 +17,9 @@ nix.workspace = true
notify.workspace = true
num_cpus.workspace = true
opentelemetry.workspace = true
opentelemetry_sdk.workspace = true
postgres.workspace = true
regex.workspace = true
serde.workspace = true
serde_json.workspace = true
signal-hook.workspace = true
tar.workspace = true
@@ -31,20 +27,18 @@ reqwest = { workspace = true, features = ["json"] }
tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
tokio-postgres.workspace = true
tokio-util.workspace = true
tokio-stream.workspace = true
tracing.workspace = true
tracing-opentelemetry.workspace = true
tracing-subscriber.workspace = true
tracing-utils.workspace = true
thiserror.workspace = true
url.workspace = true
compute_api.workspace = true
utils.workspace = true
workspace_hack.workspace = true
toml_edit.workspace = true
remote_storage = { version = "0.1", path = "../libs/remote_storage/" }
vm_monitor = { version = "0.1", path = "../libs/vm_monitor/" }
zstd = "0.13"
bytes = "1.0"
rust-ini = "0.20.0"
rlimit = "0.10.1"

View File

@@ -32,29 +32,6 @@ compute_ctl -D /var/db/postgres/compute \
-b /usr/local/bin/postgres
```
## State Diagram
Computes can be in various states. Below is a diagram that details how a
compute moves between states.
```mermaid
%% https://mermaid.js.org/syntax/stateDiagram.html
stateDiagram-v2
[*] --> Empty : Compute spawned
Empty --> ConfigurationPending : Waiting for compute spec
ConfigurationPending --> Configuration : Received compute spec
Configuration --> Failed : Failed to configure the compute
Configuration --> Running : Compute has been configured
Empty --> Init : Compute spec is immediately available
Empty --> TerminationPending : Requested termination
Init --> Failed : Failed to start Postgres
Init --> Running : Started Postgres
Running --> TerminationPending : Requested termination
TerminationPending --> Terminated : Terminated compute
Failed --> [*] : Compute exited
Terminated --> [*] : Compute exited
```
## Tests
Cargo formatter:

View File

@@ -6,7 +6,7 @@
//! - Every start is a fresh start, so the data directory is removed and
//! initialized again on each run.
//! - If remote_extension_config is provided, it will be used to fetch extensions list
//! and download `shared_preload_libraries` from the remote storage.
//! and download `shared_preload_libraries` from the remote storage.
//! - Next it will put configuration files into the `PGDATA` directory.
//! - Sync safekeepers and get commit LSN.
//! - Get `basebackup` from pageserver using the returned on the previous step LSN.
@@ -33,6 +33,7 @@
//! -b /usr/local/bin/postgres \
//! -r http://pg-ext-s3-gateway \
//! ```
//!
use std::collections::HashMap;
use std::fs::File;
use std::path::Path;
@@ -44,19 +45,15 @@ use std::{thread, time::Duration};
use anyhow::{Context, Result};
use chrono::Utc;
use clap::Arg;
use compute_tools::disk_quota::set_disk_quota;
use compute_tools::lsn_lease::launch_lsn_lease_bg_task_for_static;
use nix::sys::signal::{kill, Signal};
use signal_hook::consts::{SIGQUIT, SIGTERM};
use signal_hook::{consts::SIGINT, iterator::Signals};
use tracing::{error, info, warn};
use tracing::{error, info};
use url::Url;
use compute_api::responses::ComputeStatus;
use compute_api::spec::ComputeSpec;
use compute_tools::compute::{
forward_termination_signal, ComputeNode, ComputeState, ParsedSpec, PG_PID,
};
use compute_tools::compute::{ComputeNode, ComputeState, ParsedSpec, PG_PID, SYNC_SAFEKEEPERS_PID};
use compute_tools::configurator::launch_configurator;
use compute_tools::extension_server::get_pg_version;
use compute_tools::http::api::launch_http_server;
@@ -64,45 +61,12 @@ use compute_tools::logger::*;
use compute_tools::monitor::launch_monitor;
use compute_tools::params::*;
use compute_tools::spec::*;
use compute_tools::swap::resize_swap;
use rlimit::{setrlimit, Resource};
// this is an arbitrary build tag. Fine as a default / for testing purposes
// in-case of not-set environment var
const BUILD_TAG_DEFAULT: &str = "latest";
fn main() -> Result<()> {
let (build_tag, clap_args) = init()?;
// enable core dumping for all child processes
setrlimit(Resource::CORE, rlimit::INFINITY, rlimit::INFINITY)?;
let (pg_handle, start_pg_result) = {
// Enter startup tracing context
let _startup_context_guard = startup_context_from_env();
let cli_args = process_cli(&clap_args)?;
let cli_spec = try_spec_from_cli(&clap_args, &cli_args)?;
let wait_spec_result = wait_spec(build_tag, cli_args, cli_spec)?;
start_postgres(&clap_args, wait_spec_result)?
// Startup is finished, exit the startup tracing span
};
// PostgreSQL is now running, if startup was successful. Wait until it exits.
let wait_pg_result = wait_postgres(pg_handle)?;
let delay_exit = cleanup_after_postgres_exit(start_pg_result)?;
maybe_delay_exit(delay_exit);
deinit_and_exit(wait_pg_result);
}
fn init() -> Result<(String, clap::ArgMatches)> {
init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;
@@ -117,15 +81,9 @@ fn init() -> Result<(String, clap::ArgMatches)> {
.to_string();
info!("build_tag: {build_tag}");
Ok((build_tag, cli().get_matches()))
}
fn process_cli(matches: &clap::ArgMatches) -> Result<ProcessCliResult> {
let pgbin_default = "postgres";
let pgbin = matches
.get_one::<String>("pgbin")
.map(|s| s.as_str())
.unwrap_or(pgbin_default);
let matches = cli().get_matches();
let pgbin_default = String::from("postgres");
let pgbin = matches.get_one::<String>("pgbin").unwrap_or(&pgbin_default);
let ext_remote_storage = matches
.get_one::<String>("remote-ext-config")
@@ -151,35 +109,7 @@ fn process_cli(matches: &clap::ArgMatches) -> Result<ProcessCliResult> {
.expect("Postgres connection string is required");
let spec_json = matches.get_one::<String>("spec");
let spec_path = matches.get_one::<String>("spec-path");
let resize_swap_on_bind = matches.get_flag("resize-swap-on-bind");
let set_disk_quota_for_fs = matches.get_one::<String>("set-disk-quota-for-fs");
Ok(ProcessCliResult {
connstr,
pgdata,
pgbin,
ext_remote_storage,
http_port,
spec_json,
spec_path,
resize_swap_on_bind,
set_disk_quota_for_fs,
})
}
struct ProcessCliResult<'clap> {
connstr: &'clap str,
pgdata: &'clap str,
pgbin: &'clap str,
ext_remote_storage: Option<&'clap str>,
http_port: u16,
spec_json: Option<&'clap String>,
spec_path: Option<&'clap String>,
resize_swap_on_bind: bool,
set_disk_quota_for_fs: Option<&'clap String>,
}
fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
// Extract OpenTelemetry context for the startup actions from the
// TRACEPARENT and TRACESTATE env variables, and attach it to the current
// tracing context.
@@ -216,9 +146,9 @@ fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
if let Ok(val) = std::env::var("TRACESTATE") {
startup_tracing_carrier.insert("tracestate".to_string(), val);
}
if !startup_tracing_carrier.is_empty() {
let startup_context_guard = if !startup_tracing_carrier.is_empty() {
use opentelemetry::propagation::TextMapPropagator;
use opentelemetry_sdk::propagation::TraceContextPropagator;
use opentelemetry::sdk::propagation::TraceContextPropagator;
let guard = TraceContextPropagator::new()
.extract(&startup_tracing_carrier)
.attach();
@@ -226,17 +156,8 @@ fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
Some(guard)
} else {
None
}
}
};
fn try_spec_from_cli(
matches: &clap::ArgMatches,
ProcessCliResult {
spec_json,
spec_path,
..
}: &ProcessCliResult,
) -> Result<CliSpecParams> {
let compute_id = matches.get_one::<String>("compute-id");
let control_plane_uri = matches.get_one::<String>("control-plane-uri");
@@ -277,35 +198,6 @@ fn try_spec_from_cli(
}
};
Ok(CliSpecParams {
spec,
live_config_allowed,
})
}
struct CliSpecParams {
/// If a spec was provided via CLI or file, the [`ComputeSpec`]
spec: Option<ComputeSpec>,
live_config_allowed: bool,
}
fn wait_spec(
build_tag: String,
ProcessCliResult {
connstr,
pgdata,
pgbin,
ext_remote_storage,
resize_swap_on_bind,
set_disk_quota_for_fs,
http_port,
..
}: ProcessCliResult,
CliSpecParams {
spec,
live_config_allowed,
}: CliSpecParams,
) -> Result<WaitSpecResult> {
let mut new_state = ComputeState::new();
let spec_set;
@@ -333,17 +225,19 @@ fn wait_spec(
// If this is a pooled VM, prewarm before starting HTTP server and becoming
// available for binding. Prewarming helps Postgres start quicker later,
// because QEMU will already have its memory allocated from the host, and
// because QEMU will already have it's memory allocated from the host, and
// the necessary binaries will already be cached.
if !spec_set {
compute.prewarm_postgres()?;
}
// Launch http service first, so that we can serve control-plane requests
// while configuration is still in progress.
// Launch http service first, so we were able to serve control-plane
// requests, while configuration is still in progress.
let _http_handle =
launch_http_server(http_port, &compute).expect("cannot launch http endpoint thread");
let extension_server_port: u16 = http_port;
if !spec_set {
// No spec provided, hang waiting for it.
info!("no compute spec provided, waiting");
@@ -358,50 +252,21 @@ fn wait_spec(
break;
}
}
// Record for how long we slept waiting for the spec.
let now = Utc::now();
state.metrics.wait_for_spec_ms = now
.signed_duration_since(state.start_time)
.to_std()
.unwrap()
.as_millis() as u64;
// Reset start time, so that the total startup time that is calculated later will
// not include the time that we waited for the spec.
state.start_time = now;
}
launch_lsn_lease_bg_task_for_static(&compute);
Ok(WaitSpecResult {
compute,
http_port,
resize_swap_on_bind,
set_disk_quota_for_fs: set_disk_quota_for_fs.cloned(),
})
}
struct WaitSpecResult {
compute: Arc<ComputeNode>,
// passed through from ProcessCliResult
http_port: u16,
resize_swap_on_bind: bool,
set_disk_quota_for_fs: Option<String>,
}
fn start_postgres(
// need to allow unused because `matches` is only used if target_os = "linux"
#[allow(unused_variables)] matches: &clap::ArgMatches,
WaitSpecResult {
compute,
http_port,
resize_swap_on_bind,
set_disk_quota_for_fs,
}: WaitSpecResult,
) -> Result<(Option<PostgresHandle>, StartPostgresResult)> {
// We got all we need, update the state.
let mut state = compute.state.lock().unwrap();
// Record for how long we slept waiting for the spec.
state.metrics.wait_for_spec_ms = Utc::now()
.signed_duration_since(state.start_time)
.to_std()
.unwrap()
.as_millis() as u64;
// Reset start time to the actual start of the configuration, so that
// total startup time was properly measured at the end.
state.start_time = Utc::now();
state.status = ComputeStatus::Init;
compute.state_changed.notify_all();
@@ -409,83 +274,33 @@ fn start_postgres(
"running compute with features: {:?}",
state.pspec.as_ref().unwrap().spec.features
);
// before we release the mutex, fetch the swap size (if any) for later.
let swap_size_bytes = state.pspec.as_ref().unwrap().spec.swap_size_bytes;
let disk_quota_bytes = state.pspec.as_ref().unwrap().spec.disk_quota_bytes;
drop(state);
// Launch remaining service threads
let _monitor_handle = launch_monitor(&compute);
let _configurator_handle = launch_configurator(&compute);
let mut prestartup_failed = false;
let mut delay_exit = false;
// Resize swap to the desired size if the compute spec says so
if let (Some(size_bytes), true) = (swap_size_bytes, resize_swap_on_bind) {
// To avoid 'swapoff' hitting postgres startup, we need to run resize-swap to completion
// *before* starting postgres.
//
// In theory, we could do this asynchronously if SkipSwapon was enabled for VMs, but this
// carries a risk of introducing hard-to-debug issues - e.g. if postgres sometimes gets
// OOM-killed during startup because swap wasn't available yet.
match resize_swap(size_bytes) {
Ok(()) => {
let size_mib = size_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
info!(%size_bytes, %size_mib, "resized swap");
}
Err(err) => {
let err = err.context("failed to resize swap");
error!("{err:#}");
// Mark compute startup as failed; don't try to start postgres, and report this
// error to the control plane when it next asks.
prestartup_failed = true;
compute.set_failed_status(err);
delay_exit = true;
}
}
}
// Set disk quota if the compute spec says so
if let (Some(disk_quota_bytes), Some(disk_quota_fs_mountpoint)) =
(disk_quota_bytes, set_disk_quota_for_fs)
{
match set_disk_quota(disk_quota_bytes, &disk_quota_fs_mountpoint) {
Ok(()) => {
let size_mib = disk_quota_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
info!(%disk_quota_bytes, %size_mib, "set disk quota");
}
Err(err) => {
let err = err.context("failed to set disk quota");
error!("{err:#}");
// Mark compute startup as failed; don't try to start postgres, and report this
// error to the control plane when it next asks.
prestartup_failed = true;
compute.set_failed_status(err);
delay_exit = true;
}
}
}
let extension_server_port: u16 = http_port;
// Start Postgres
let mut pg = None;
if !prestartup_failed {
pg = match compute.start_compute(extension_server_port) {
Ok(pg) => Some(pg),
Err(err) => {
error!("could not start the compute node: {:#}", err);
compute.set_failed_status(err);
delay_exit = true;
None
}
};
} else {
warn!("skipping postgres startup because pre-startup step failed");
}
let mut delay_exit = false;
let mut exit_code = None;
let pg = match compute.start_compute(extension_server_port) {
Ok(pg) => Some(pg),
Err(err) => {
error!("could not start the compute node: {:#}", err);
let mut state = compute.state.lock().unwrap();
state.error = Some(format!("{:?}", err));
state.status = ComputeStatus::Failed;
// Notify others that Postgres failed to start. In case of configuring the
// empty compute, it's likely that API handler is still waiting for compute
// state change. With this we will notify it that compute is in Failed state,
// so control plane will know about it earlier and record proper error instead
// of timeout.
compute.state_changed.notify_all();
drop(state); // unlock
delay_exit = true;
None
}
};
// Start the vm-monitor if directed to. The vm-monitor only runs on linux
// because it requires cgroups.
@@ -518,7 +333,7 @@ fn start_postgres(
// This token is used internally by the monitor to clean up all threads
let token = CancellationToken::new();
let vm_monitor = rt.as_ref().map(|rt| {
let vm_monitor = &rt.as_ref().map(|rt| {
rt.spawn(vm_monitor::start(
Box::leak(Box::new(vm_monitor::Args {
cgroup: cgroup.cloned(),
@@ -531,41 +346,12 @@ fn start_postgres(
}
}
Ok((
pg,
StartPostgresResult {
delay_exit,
compute,
#[cfg(target_os = "linux")]
rt,
#[cfg(target_os = "linux")]
token,
#[cfg(target_os = "linux")]
vm_monitor,
},
))
}
type PostgresHandle = (std::process::Child, std::thread::JoinHandle<()>);
struct StartPostgresResult {
delay_exit: bool,
// passed through from WaitSpecResult
compute: Arc<ComputeNode>,
#[cfg(target_os = "linux")]
rt: Option<tokio::runtime::Runtime>,
#[cfg(target_os = "linux")]
token: tokio_util::sync::CancellationToken,
#[cfg(target_os = "linux")]
vm_monitor: Option<tokio::task::JoinHandle<Result<()>>>,
}
fn wait_postgres(pg: Option<PostgresHandle>) -> Result<WaitPostgresResult> {
// Wait for the child Postgres process forever. In this state Ctrl+C will
// propagate to Postgres and it will be shut down as well.
let mut exit_code = None;
if let Some((mut pg, logs_handle)) = pg {
// Startup is finished, exit the startup tracing span
drop(startup_context_guard);
let ecode = pg
.wait()
.expect("failed to start waiting on Postgres process");
@@ -580,25 +366,6 @@ fn wait_postgres(pg: Option<PostgresHandle>) -> Result<WaitPostgresResult> {
exit_code = ecode.code()
}
Ok(WaitPostgresResult { exit_code })
}
struct WaitPostgresResult {
exit_code: Option<i32>,
}
fn cleanup_after_postgres_exit(
StartPostgresResult {
mut delay_exit,
compute,
#[cfg(target_os = "linux")]
vm_monitor,
#[cfg(target_os = "linux")]
token,
#[cfg(target_os = "linux")]
rt,
}: StartPostgresResult,
) -> Result<bool> {
// Terminate the vm_monitor so it releases the file watcher on
// /sys/fs/cgroup/neon-postgres.
// Note: the vm-monitor only runs on linux because it requires cgroups.
@@ -627,32 +394,17 @@ fn cleanup_after_postgres_exit(
info!("synced safekeepers at lsn {lsn}");
}
let mut state = compute.state.lock().unwrap();
if state.status == ComputeStatus::TerminationPending {
state.status = ComputeStatus::Terminated;
compute.state_changed.notify_all();
// we were asked to terminate gracefully, don't exit to avoid restart
delay_exit = true
}
drop(state);
if let Err(err) = compute.check_for_core_dumps() {
error!("error while checking for core dumps: {err:?}");
}
Ok(delay_exit)
}
fn maybe_delay_exit(delay_exit: bool) {
// If launch failed, keep serving HTTP requests for a while, so the cloud
// control plane can get the actual error.
if delay_exit {
info!("giving control plane 30s to collect the error before shutdown");
thread::sleep(Duration::from_secs(30));
}
}
fn deinit_and_exit(WaitPostgresResult { exit_code }: WaitPostgresResult) -> ! {
// Shutdown trace pipeline gracefully, so that it has a chance to send any
// pending traces before we exit. Shutting down OTEL tracing provider may
// hang for quite some time, see, for example:
@@ -760,20 +512,10 @@ fn cli() -> clap::Command {
Arg::new("filecache-connstr")
.long("filecache-connstr")
.default_value(
"host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable application_name=vm-monitor",
"host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable",
)
.value_name("FILECACHE_CONNSTR"),
)
.arg(
Arg::new("resize-swap-on-bind")
.long("resize-swap-on-bind")
.action(clap::ArgAction::SetTrue),
)
.arg(
Arg::new("set-disk-quota-for-fs")
.long("set-disk-quota-for-fs")
.value_name("SET_DISK_QUOTA_FOR_FS")
)
}
/// When compute_ctl is killed, send also termination signal to sync-safekeepers
@@ -781,7 +523,16 @@ fn cli() -> clap::Command {
/// wait for termination which would be easy then.
fn handle_exit_signal(sig: i32) {
info!("received {sig} termination signal");
forward_termination_signal();
let ss_pid = SYNC_SAFEKEEPERS_PID.load(Ordering::SeqCst);
if ss_pid != 0 {
let ss_pid = nix::unistd::Pid::from_raw(ss_pid as i32);
kill(ss_pid, Signal::SIGTERM).ok();
}
let pg_pid = PG_PID.load(Ordering::SeqCst);
if pg_pid != 0 {
let pg_pid = nix::unistd::Pid::from_raw(pg_pid as i32);
kill(pg_pid, Signal::SIGTERM).ok();
}
exit(1);
}

View File

@@ -1,116 +0,0 @@
use compute_api::{
responses::CatalogObjects,
spec::{Database, Role},
};
use futures::Stream;
use postgres::{Client, NoTls};
use std::{path::Path, process::Stdio, result::Result, sync::Arc};
use tokio::{
io::{AsyncBufReadExt, BufReader},
process::Command,
task,
};
use tokio_stream::{self as stream, StreamExt};
use tokio_util::codec::{BytesCodec, FramedRead};
use tracing::warn;
use crate::{
compute::ComputeNode,
pg_helpers::{get_existing_dbs, get_existing_roles},
};
pub async fn get_dbs_and_roles(compute: &Arc<ComputeNode>) -> anyhow::Result<CatalogObjects> {
let connstr = compute.connstr.clone();
task::spawn_blocking(move || {
let mut client = Client::connect(connstr.as_str(), NoTls)?;
let roles: Vec<Role>;
{
let mut xact = client.transaction()?;
roles = get_existing_roles(&mut xact)?;
}
let databases: Vec<Database> = get_existing_dbs(&mut client)?.values().cloned().collect();
Ok(CatalogObjects { roles, databases })
})
.await?
}
#[derive(Debug, thiserror::Error)]
pub enum SchemaDumpError {
#[error("Database does not exist.")]
DatabaseDoesNotExist,
#[error("Failed to execute pg_dump.")]
IO(#[from] std::io::Error),
}
// It uses the pg_dump utility to dump the schema of the specified database.
// The output is streamed back to the caller and supposed to be streamed via HTTP.
//
// Before return the result with the output, it checks that pg_dump produced any output.
// If not, it tries to parse the stderr output to determine if the database does not exist
// and special error is returned.
//
// To make sure that the process is killed when the caller drops the stream, we use tokio kill_on_drop feature.
pub async fn get_database_schema(
compute: &Arc<ComputeNode>,
dbname: &str,
) -> Result<impl Stream<Item = Result<bytes::Bytes, std::io::Error>>, SchemaDumpError> {
let pgbin = &compute.pgbin;
let basepath = Path::new(pgbin).parent().unwrap();
let pgdump = basepath.join("pg_dump");
let mut connstr = compute.connstr.clone();
connstr.set_path(dbname);
let mut cmd = Command::new(pgdump)
.arg("--schema-only")
.arg(connstr.as_str())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.kill_on_drop(true)
.spawn()?;
let stdout = cmd.stdout.take().ok_or_else(|| {
std::io::Error::new(std::io::ErrorKind::Other, "Failed to capture stdout.")
})?;
let stderr = cmd.stderr.take().ok_or_else(|| {
std::io::Error::new(std::io::ErrorKind::Other, "Failed to capture stderr.")
})?;
let mut stdout_reader = FramedRead::new(stdout, BytesCodec::new());
let stderr_reader = BufReader::new(stderr);
let first_chunk = match stdout_reader.next().await {
Some(Ok(bytes)) if !bytes.is_empty() => bytes,
Some(Err(e)) => {
return Err(SchemaDumpError::IO(e));
}
_ => {
let mut lines = stderr_reader.lines();
if let Some(line) = lines.next_line().await? {
if line.contains(&format!("FATAL: database \"{}\" does not exist", dbname)) {
return Err(SchemaDumpError::DatabaseDoesNotExist);
}
warn!("pg_dump stderr: {}", line)
}
tokio::spawn(async move {
while let Ok(Some(line)) = lines.next_line().await {
warn!("pg_dump stderr: {}", line)
}
});
return Err(SchemaDumpError::IO(std::io::Error::new(
std::io::ErrorKind::Other,
"failed to start pg_dump",
)));
}
};
let initial_stream = stream::once(Ok(first_chunk.freeze()));
// Consume stderr and log warnings
tokio::spawn(async move {
let mut lines = stderr_reader.lines();
while let Ok(Some(line)) = lines.next_line().await {
warn!("pg_dump stderr: {}", line)
}
});
Ok(initial_stream.chain(stdout_reader.map(|res| res.map(|b| b.freeze()))))
}

View File

@@ -2,7 +2,7 @@ use std::collections::HashMap;
use std::env;
use std::fs;
use std::io::BufRead;
use std::os::unix::fs::{symlink, PermissionsExt};
use std::os::unix::fs::PermissionsExt;
use std::path::Path;
use std::process::{Command, Stdio};
use std::str::FromStr;
@@ -10,7 +10,6 @@ use std::sync::atomic::AtomicU32;
use std::sync::atomic::Ordering;
use std::sync::{Condvar, Mutex, RwLock};
use std::thread;
use std::time::Duration;
use std::time::Instant;
use anyhow::{Context, Result};
@@ -18,9 +17,9 @@ use chrono::{DateTime, Utc};
use futures::future::join_all;
use futures::stream::FuturesUnordered;
use futures::StreamExt;
use nix::unistd::Pid;
use postgres::error::SqlState;
use postgres::{Client, NoTls};
use tokio;
use tokio_postgres;
use tracing::{debug, error, info, instrument, warn};
use utils::id::{TenantId, TimelineId};
use utils::lsn::Lsn;
@@ -29,8 +28,6 @@ use compute_api::responses::{ComputeMetrics, ComputeStatus};
use compute_api::spec::{ComputeFeature, ComputeMode, ComputeSpec};
use utils::measured_stream::MeasuredReader;
use nix::sys::signal::{kill, Signal};
use remote_storage::{DownloadError, RemotePath};
use crate::checker::create_availability_check_data;
@@ -57,7 +54,6 @@ pub struct ComputeNode {
/// - we push new spec and it does reconfiguration
/// - but then something happens and compute pod / VM is destroyed,
/// so k8s controller starts it again with the **old** spec
///
/// and the same for empty computes:
/// - we started compute without any spec
/// - we push spec and it does configuration
@@ -306,13 +302,6 @@ impl ComputeNode {
self.state_changed.notify_all();
}
pub fn set_failed_status(&self, err: anyhow::Error) {
let mut state = self.state.lock().unwrap();
state.error = Some(format!("{err:?}"));
state.status = ComputeStatus::Failed;
self.state_changed.notify_all();
}
pub fn get_status(&self) -> ComputeStatus {
self.state.lock().unwrap().status
}
@@ -335,8 +324,7 @@ impl ComputeNode {
let spec = compute_state.pspec.as_ref().expect("spec must be set");
let start_time = Instant::now();
let shard0_connstr = spec.pageserver_connstr.split(',').next().unwrap();
let mut config = postgres::Config::from_str(shard0_connstr)?;
let mut config = postgres::Config::from_str(&spec.pageserver_connstr)?;
// Use the storage auth token from the config file, if given.
// Note: this overrides any password set in the connection string.
@@ -406,17 +394,9 @@ impl ComputeNode {
// Gets the basebackup in a retry loop
#[instrument(skip_all, fields(%lsn))]
pub fn get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> {
let mut retry_period_ms = 500.0;
let mut retry_period_ms = 500;
let mut attempts = 0;
const DEFAULT_ATTEMPTS: u16 = 10;
#[cfg(feature = "testing")]
let max_attempts = if let Ok(v) = env::var("NEON_COMPUTE_TESTING_BASEBACKUP_RETRIES") {
u16::from_str(&v).unwrap()
} else {
DEFAULT_ATTEMPTS
};
#[cfg(not(feature = "testing"))]
let max_attempts = DEFAULT_ATTEMPTS;
let max_attempts = 5;
loop {
let result = self.try_get_basebackup(compute_state, lsn);
match result {
@@ -428,8 +408,8 @@ impl ComputeNode {
"Failed to get basebackup: {} (attempt {}/{})",
e, attempts, max_attempts
);
std::thread::sleep(std::time::Duration::from_millis(retry_period_ms as u64));
retry_period_ms *= 1.5;
std::thread::sleep(std::time::Duration::from_millis(retry_period_ms));
retry_period_ms *= 2;
}
Err(_) => {
return result;
@@ -654,48 +634,6 @@ impl ComputeNode {
// Update pg_hba.conf received with basebackup.
update_pg_hba(pgdata_path)?;
// Place pg_dynshmem under /dev/shm. This allows us to use
// 'dynamic_shared_memory_type = mmap' so that the files are placed in
// /dev/shm, similar to how 'dynamic_shared_memory_type = posix' works.
//
// Why on earth don't we just stick to the 'posix' default, you might
// ask. It turns out that making large allocations with 'posix' doesn't
// work very well with autoscaling. The behavior we want is that:
//
// 1. You can make large DSM allocations, larger than the current RAM
// size of the VM, without errors
//
// 2. If the allocated memory is really used, the VM is scaled up
// automatically to accommodate that
//
// We try to make that possible by having swap in the VM. But with the
// default 'posix' DSM implementation, we fail step 1, even when there's
// plenty of swap available. PostgreSQL uses posix_fallocate() to create
// the shmem segment, which is really just a file in /dev/shm in Linux,
// but posix_fallocate() on tmpfs returns ENOMEM if the size is larger
// than available RAM.
//
// Using 'dynamic_shared_memory_type = mmap' works around that, because
// the Postgres 'mmap' DSM implementation doesn't use
// posix_fallocate(). Instead, it uses repeated calls to write(2) to
// fill the file with zeros. It's weird that that differs between
// 'posix' and 'mmap', but we take advantage of it. When the file is
// filled slowly with write(2), the kernel allows it to grow larger, as
// long as there's swap available.
//
// In short, using 'dynamic_shared_memory_type = mmap' allows us one DSM
// segment to be larger than currently available RAM. But because we
// don't want to store it on a real file, which the kernel would try to
// flush to disk, so symlink pg_dynshm to /dev/shm.
//
// We don't set 'dynamic_shared_memory_type = mmap' here, we let the
// control plane control that option. If 'mmap' is not used, this
// symlink doesn't affect anything.
//
// See https://github.com/neondatabase/autoscaling/issues/800
std::fs::remove_dir(pgdata_path.join("pg_dynshmem"))?;
symlink("/dev/shm/", pgdata_path.join("pg_dynshmem"))?;
match spec.mode {
ComputeMode::Primary => {}
ComputeMode::Replica | ComputeMode::Static(..) => {
@@ -718,7 +656,7 @@ impl ComputeNode {
info!("running initdb");
let initdb_bin = Path::new(&self.pgbin).parent().unwrap().join("initdb");
Command::new(initdb_bin)
.args(["--pgdata", pgdata])
.args(["-D", pgdata])
.output()
.expect("cannot start initdb process");
@@ -740,12 +678,8 @@ impl ComputeNode {
// Stop it when it's ready
info!("waiting for postgres");
wait_for_postgres(&mut pg, Path::new(pgdata))?;
// SIGQUIT orders postgres to exit immediately. We don't want to SIGKILL
// it to avoid orphaned processes prowling around while datadir is
// wiped.
let pm_pid = Pid::from_raw(pg.id() as i32);
kill(pm_pid, Signal::SIGQUIT)?;
info!("sent SIGQUIT signal");
pg.kill()?;
info!("sent kill signal");
pg.wait()?;
info!("done prewarming");
@@ -786,26 +720,6 @@ impl ComputeNode {
Ok((pg, logs_handle))
}
/// Do post configuration of the already started Postgres. This function spawns a background thread to
/// configure the database after applying the compute spec. Currently, it upgrades the neon extension
/// version. In the future, it may upgrade all 3rd-party extensions.
#[instrument(skip_all)]
pub fn post_apply_config(&self) -> Result<()> {
let connstr = self.connstr.clone();
thread::spawn(move || {
let func = || {
let mut client = Client::connect(connstr.as_str(), NoTls)?;
handle_neon_extension_upgrade(&mut client)
.context("handle_neon_extension_upgrade")?;
Ok::<_, anyhow::Error>(())
};
if let Err(err) = func() {
error!("error while post_apply_config: {err:#}");
}
});
Ok(())
}
/// Do initial configuration of the already started Postgres.
#[instrument(skip_all)]
pub fn apply_config(&self, compute_state: &ComputeState) -> Result<()> {
@@ -815,92 +729,61 @@ impl ComputeNode {
// In this case we need to connect with old `zenith_admin` name
// and create new user. We cannot simply rename connected user,
// but we can create a new one and grant it all privileges.
let mut connstr = self.connstr.clone();
connstr
.query_pairs_mut()
.append_pair("application_name", "apply_config");
let connstr = self.connstr.clone();
let mut client = match Client::connect(connstr.as_str(), NoTls) {
Err(e) => match e.code() {
Some(&SqlState::INVALID_PASSWORD)
| Some(&SqlState::INVALID_AUTHORIZATION_SPECIFICATION) => {
// connect with zenith_admin if cloud_admin could not authenticate
info!(
"cannot connect to postgres: {}, retrying with `zenith_admin` username",
e
);
let mut zenith_admin_connstr = connstr.clone();
Err(e) => {
info!(
"cannot connect to postgres: {}, retrying with `zenith_admin` username",
e
);
let mut zenith_admin_connstr = connstr.clone();
zenith_admin_connstr
.set_username("zenith_admin")
.map_err(|_| anyhow::anyhow!("invalid connstr"))?;
zenith_admin_connstr
.set_username("zenith_admin")
.map_err(|_| anyhow::anyhow!("invalid connstr"))?;
let mut client =
Client::connect(zenith_admin_connstr.as_str(), NoTls)
.context("broken cloud_admin credential: tried connecting with cloud_admin but could not authenticate, and zenith_admin does not work either")?;
// Disable forwarding so that users don't get a cloud_admin role
let mut client = Client::connect(zenith_admin_connstr.as_str(), NoTls)?;
// Disable forwarding so that users don't get a cloud_admin role
client.simple_query("SET neon.forward_ddl = false")?;
client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
client.simple_query("GRANT zenith_admin TO cloud_admin")?;
drop(client);
let mut func = || {
client.simple_query("SET neon.forward_ddl = false")?;
client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
client.simple_query("GRANT zenith_admin TO cloud_admin")?;
Ok::<_, anyhow::Error>(())
};
func().context("apply_config setup cloud_admin")?;
drop(client);
// reconnect with connstring with expected name
Client::connect(connstr.as_str(), NoTls)?
}
_ => return Err(e.into()),
},
// reconnect with connstring with expected name
Client::connect(connstr.as_str(), NoTls)?
}
Ok(client) => client,
};
// Disable DDL forwarding because control plane already knows about these roles/databases.
client
.simple_query("SET neon.forward_ddl = false")
.context("apply_config SET neon.forward_ddl = false")?;
client.simple_query("SET neon.forward_ddl = false")?;
// Proceed with post-startup configuration. Note, that order of operations is important.
let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec;
create_neon_superuser(spec, &mut client).context("apply_config create_neon_superuser")?;
cleanup_instance(&mut client).context("apply_config cleanup_instance")?;
handle_roles(spec, &mut client).context("apply_config handle_roles")?;
handle_databases(spec, &mut client).context("apply_config handle_databases")?;
handle_role_deletions(spec, connstr.as_str(), &mut client)
.context("apply_config handle_role_deletions")?;
handle_grants(
spec,
&mut client,
connstr.as_str(),
self.has_feature(ComputeFeature::AnonExtension),
)
.context("apply_config handle_grants")?;
handle_extensions(spec, &mut client).context("apply_config handle_extensions")?;
handle_extension_neon(&mut client).context("apply_config handle_extension_neon")?;
create_availability_check_data(&mut client)
.context("apply_config create_availability_check_data")?;
create_neon_superuser(spec, &mut client)?;
cleanup_instance(&mut client)?;
handle_roles(spec, &mut client)?;
handle_databases(spec, &mut client)?;
handle_role_deletions(spec, connstr.as_str(), &mut client)?;
handle_grants(spec, &mut client, connstr.as_str())?;
handle_extensions(spec, &mut client)?;
handle_extension_neon(&mut client)?;
create_availability_check_data(&mut client)?;
// 'Close' connection
drop(client);
// Run migrations separately to not hold up cold starts
thread::spawn(move || {
let mut connstr = connstr.clone();
connstr
.query_pairs_mut()
.append_pair("application_name", "migrations");
let mut client = Client::connect(connstr.as_str(), NoTls)?;
handle_migrations(&mut client).context("apply_config handle_migrations")
handle_migrations(&mut client)
});
Ok(())
}
// Wrapped this around `pg_ctl reload`, but right now we don't use
// `pg_ctl` for start / stop.
// We could've wrapped this around `pg_ctl reload`, but right now we don't use
// `pg_ctl` for start / stop, so this just seems much easier to do as we already
// have opened connection to Postgres and superuser access.
#[instrument(skip_all)]
fn pg_reload_conf(&self) -> Result<()> {
let pgctl_bin = Path::new(&self.pgbin).parent().unwrap().join("pg_ctl");
@@ -943,39 +826,33 @@ impl ComputeNode {
// temporarily reset max_cluster_size in config
// to avoid the possibility of hitting the limit, while we are reconfiguring:
// creating new extensions, roles, etc...
config::with_compute_ctl_tmp_override(pgdata_path, "neon.max_cluster_size=-1", || {
self.pg_reload_conf()?;
config::compute_ctl_temp_override_create(pgdata_path, "neon.max_cluster_size=-1")?;
self.pg_reload_conf()?;
let mut client = Client::connect(self.connstr.as_str(), NoTls)?;
let mut client = Client::connect(self.connstr.as_str(), NoTls)?;
// Proceed with post-startup configuration. Note, that order of operations is important.
// Disable DDL forwarding because control plane already knows about these roles/databases.
if spec.mode == ComputeMode::Primary {
client.simple_query("SET neon.forward_ddl = false")?;
cleanup_instance(&mut client)?;
handle_roles(&spec, &mut client)?;
handle_databases(&spec, &mut client)?;
handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
handle_grants(
&spec,
&mut client,
self.connstr.as_str(),
self.has_feature(ComputeFeature::AnonExtension),
)?;
handle_extensions(&spec, &mut client)?;
handle_extension_neon(&mut client)?;
// We can skip handle_migrations here because a new migration can only appear
// if we have a new version of the compute_ctl binary, which can only happen
// if compute got restarted, in which case we'll end up inside of apply_config
// instead of reconfigure.
}
// Proceed with post-startup configuration. Note, that order of operations is important.
// Disable DDL forwarding because control plane already knows about these roles/databases.
if spec.mode == ComputeMode::Primary {
client.simple_query("SET neon.forward_ddl = false")?;
cleanup_instance(&mut client)?;
handle_roles(&spec, &mut client)?;
handle_databases(&spec, &mut client)?;
handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
handle_grants(&spec, &mut client, self.connstr.as_str())?;
handle_extensions(&spec, &mut client)?;
handle_extension_neon(&mut client)?;
// We can skip handle_migrations here because a new migration can only appear
// if we have a new version of the compute_ctl binary, which can only happen
// if compute got restarted, in which case we'll end up inside of apply_config
// instead of reconfigure.
}
// 'Close' connection
drop(client);
Ok(())
})?;
// 'Close' connection
drop(client);
// reset max_cluster_size in config back to original value and reload config
config::compute_ctl_temp_override_remove(pgdata_path)?;
self.pg_reload_conf()?;
let unknown_op = "unknown".to_string();
@@ -1060,26 +937,18 @@ impl ComputeNode {
let pg_process = self.start_postgres(pspec.storage_auth_token.clone())?;
let config_time = Utc::now();
if pspec.spec.mode == ComputeMode::Primary {
if !pspec.spec.skip_pg_catalog_updates {
let pgdata_path = Path::new(&self.pgdata);
// temporarily reset max_cluster_size in config
// to avoid the possibility of hitting the limit, while we are applying config:
// creating new extensions, roles, etc...
config::with_compute_ctl_tmp_override(
pgdata_path,
"neon.max_cluster_size=-1",
|| {
self.pg_reload_conf()?;
if pspec.spec.mode == ComputeMode::Primary && !pspec.spec.skip_pg_catalog_updates {
let pgdata_path = Path::new(&self.pgdata);
// temporarily reset max_cluster_size in config
// to avoid the possibility of hitting the limit, while we are applying config:
// creating new extensions, roles, etc...
config::compute_ctl_temp_override_create(pgdata_path, "neon.max_cluster_size=-1")?;
self.pg_reload_conf()?;
self.apply_config(&compute_state)?;
self.apply_config(&compute_state)?;
Ok(())
},
)?;
self.pg_reload_conf()?;
}
self.post_apply_config()?;
config::compute_ctl_temp_override_remove(pgdata_path)?;
self.pg_reload_conf()?;
}
let startup_end_time = Utc::now();
@@ -1133,14 +1002,11 @@ impl ComputeNode {
// EKS worker nodes have following core dump settings:
// /proc/sys/kernel/core_pattern -> core
// /proc/sys/kernel/core_uses_pid -> 1
// ulimit -c -> unlimited
// ulimint -c -> unlimited
// which results in core dumps being written to postgres data directory as core.<pid>.
//
// Use that as a default location and pattern, except macos where core dumps are written
// to /cores/ directory by default.
//
// With default Linux settings, the core dump file is called just "core", so check for
// that too.
pub fn check_for_core_dumps(&self) -> Result<()> {
let core_dump_dir = match std::env::consts::OS {
"macos" => Path::new("/cores/"),
@@ -1152,17 +1018,8 @@ impl ComputeNode {
let files = fs::read_dir(core_dump_dir)?;
let cores = files.filter_map(|entry| {
let entry = entry.ok()?;
let is_core_dump = match entry.file_name().to_str()? {
n if n.starts_with("core.") => true,
"core" => true,
_ => false,
};
if is_core_dump {
Some(entry.path())
} else {
None
}
let _ = entry.file_name().to_str()?.strip_prefix("core.")?;
Some(entry.path())
});
// Print backtrace for each core dump
@@ -1316,12 +1173,10 @@ LIMIT 100",
.await
.map_err(DownloadError::Other);
if download_size.is_ok() {
self.ext_download_progress
.write()
.expect("bad lock")
.insert(ext_archive_name.to_string(), (download_start, true));
}
self.ext_download_progress
.write()
.expect("bad lock")
.insert(ext_archive_name.to_string(), (download_start, true));
download_size
}
@@ -1380,10 +1235,19 @@ LIMIT 100",
info!("Downloading to shared preload libraries: {:?}", &libs_vec);
let build_tag_str = if spec
.features
.contains(&ComputeFeature::RemoteExtensionsUseLatest)
{
"latest"
} else {
&self.build_tag
};
let mut download_tasks = Vec::new();
for library in &libs_vec {
let (ext_name, ext_path) =
remote_extensions.get_ext(library, true, &self.build_tag, &self.pgversion)?;
remote_extensions.get_ext(library, true, build_tag_str, &self.pgversion)?;
download_tasks.push(self.download_extension(ext_name, ext_path));
}
let results = join_all(download_tasks).await;
@@ -1413,50 +1277,4 @@ LIMIT 100",
}
Ok(remote_ext_metrics)
}
/// Waits until current thread receives a state changed notification and
/// the pageserver connection strings has changed.
///
/// The operation will time out after a specified duration.
pub fn wait_timeout_while_pageserver_connstr_unchanged(&self, duration: Duration) {
let state = self.state.lock().unwrap();
let old_pageserver_connstr = state
.pspec
.as_ref()
.expect("spec must be set")
.pageserver_connstr
.clone();
let mut unchanged = true;
let _ = self
.state_changed
.wait_timeout_while(state, duration, |s| {
let pageserver_connstr = &s
.pspec
.as_ref()
.expect("spec must be set")
.pageserver_connstr;
unchanged = pageserver_connstr == &old_pageserver_connstr;
unchanged
})
.unwrap();
if !unchanged {
info!("Pageserver config changed");
}
}
}
pub fn forward_termination_signal() {
let ss_pid = SYNC_SAFEKEEPERS_PID.load(Ordering::SeqCst);
if ss_pid != 0 {
let ss_pid = nix::unistd::Pid::from_raw(ss_pid as i32);
kill(ss_pid, Signal::SIGTERM).ok();
}
let pg_pid = PG_PID.load(Ordering::SeqCst);
if pg_pid != 0 {
let pg_pid = nix::unistd::Pid::from_raw(pg_pid as i32);
// Use 'fast' shutdown (SIGINT) because it also creates a shutdown checkpoint, which is important for
// ROs to get a list of running xacts faster instead of going through the CLOG.
// See https://www.postgresql.org/docs/current/server-shutdown.html for the list of modes and signals.
kill(pg_pid, Signal::SIGINT).ok();
}
}

View File

@@ -6,8 +6,8 @@ use std::path::Path;
use anyhow::Result;
use crate::pg_helpers::escape_conf_value;
use crate::pg_helpers::{GenericOptionExt, PgOptionsSerialize};
use compute_api::spec::{ComputeMode, ComputeSpec, GenericOption};
use crate::pg_helpers::PgOptionsSerialize;
use compute_api::spec::{ComputeMode, ComputeSpec};
/// Check that `line` is inside a text file and put it there if it is not.
/// Create file if it doesn't exist.
@@ -17,7 +17,6 @@ pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
.write(true)
.create(true)
.append(false)
.truncate(false)
.open(path)?;
let buf = io::BufReader::new(&file);
let mut count: usize = 0;
@@ -52,9 +51,6 @@ pub fn write_postgres_conf(
if let Some(s) = &spec.pageserver_connstring {
writeln!(file, "neon.pageserver_connstring={}", escape_conf_value(s))?;
}
if let Some(stripe_size) = spec.shard_stripe_size {
writeln!(file, "neon.stripe_size={stripe_size}")?;
}
if !spec.safekeeper_connstrings.is_empty() {
writeln!(
file,
@@ -86,27 +82,6 @@ pub fn write_postgres_conf(
}
}
if cfg!(target_os = "linux") {
// Check /proc/sys/vm/overcommit_memory -- if it equals 2 (i.e. linux memory overcommit is
// disabled), then the control plane has enabled swap and we should set
// dynamic_shared_memory_type = 'mmap'.
//
// This is (maybe?) temporary - for more, see https://github.com/neondatabase/cloud/issues/12047.
let overcommit_memory_contents = std::fs::read_to_string("/proc/sys/vm/overcommit_memory")
// ignore any errors - they may be expected to occur under certain situations (e.g. when
// not running in Linux).
.unwrap_or_else(|_| String::new());
if overcommit_memory_contents.trim() == "2" {
let opt = GenericOption {
name: "dynamic_shared_memory_type".to_owned(),
value: Some("mmap".to_owned()),
vartype: "enum".to_owned(),
};
write!(file, "{}", opt.to_pg_setting())?;
}
}
// If there are any extra options in the 'settings' field, append those
if spec.cluster.settings.is_some() {
writeln!(file, "# Managed by compute_ctl: begin")?;
@@ -125,17 +100,18 @@ pub fn write_postgres_conf(
Ok(())
}
pub fn with_compute_ctl_tmp_override<F>(pgdata_path: &Path, options: &str, exec: F) -> Result<()>
where
F: FnOnce() -> Result<()>,
{
/// create file compute_ctl_temp_override.conf in pgdata_dir
/// add provided options to this file
pub fn compute_ctl_temp_override_create(pgdata_path: &Path, options: &str) -> Result<()> {
let path = pgdata_path.join("compute_ctl_temp_override.conf");
let mut file = File::create(path)?;
write!(file, "{}", options)?;
let res = exec();
file.set_len(0)?;
res
Ok(())
}
/// remove file compute_ctl_temp_override.conf in pgdata_dir
pub fn compute_ctl_temp_override_remove(pgdata_path: &Path) -> Result<()> {
let path = pgdata_path.join("compute_ctl_temp_override.conf");
std::fs::remove_file(path)?;
Ok(())
}

View File

@@ -11,17 +11,9 @@ use crate::compute::ComputeNode;
fn configurator_main_loop(compute: &Arc<ComputeNode>) {
info!("waiting for reconfiguration requests");
loop {
let mut state = compute.state.lock().unwrap();
let state = compute.state.lock().unwrap();
let mut state = compute.state_changed.wait(state).unwrap();
// We have to re-check the status after re-acquiring the lock because it could be that
// the status has changed while we were waiting for the lock, and we might not need to
// wait on the condition variable. Otherwise, we might end up in some soft-/deadlock, i.e.
// we are waiting for a condition variable that will never be signaled.
if state.status != ComputeStatus::ConfigurationPending {
state = compute.state_changed.wait(state).unwrap();
}
// Re-check the status after waking up
if state.status == ComputeStatus::ConfigurationPending {
info!("got configuration request");
state.status = ComputeStatus::Configuration;

View File

@@ -1,25 +0,0 @@
use anyhow::Context;
pub const DISK_QUOTA_BIN: &str = "/neonvm/bin/set-disk-quota";
/// If size_bytes is 0, it disables the quota. Otherwise, it sets filesystem quota to size_bytes.
/// `fs_mountpoint` should point to the mountpoint of the filesystem where the quota should be set.
pub fn set_disk_quota(size_bytes: u64, fs_mountpoint: &str) -> anyhow::Result<()> {
let size_kb = size_bytes / 1024;
// run `/neonvm/bin/set-disk-quota {size_kb} {mountpoint}`
let child_result = std::process::Command::new("/usr/bin/sudo")
.arg(DISK_QUOTA_BIN)
.arg(size_kb.to_string())
.arg(fs_mountpoint)
.spawn();
child_result
.context("spawn() failed")
.and_then(|mut child| child.wait().context("wait() failed"))
.and_then(|status| match status.success() {
true => Ok(()),
false => Err(anyhow::anyhow!("process exited with {status}")),
})
// wrap any prior error with the overall context that we couldn't run the command
.with_context(|| format!("could not run `/usr/bin/sudo {DISK_QUOTA_BIN}`"))
}

View File

@@ -71,7 +71,7 @@ More specifically, here is an example ext_index.json
}
}
*/
use anyhow::Result;
use anyhow::{self, Result};
use anyhow::{bail, Context};
use bytes::Bytes;
use compute_api::spec::RemoteExtSpec;
@@ -124,7 +124,6 @@ fn parse_pg_version(human_version: &str) -> &str {
"14" => return "v14",
"15" => return "v15",
"16" => return "v16",
"17" => return "v17",
_ => {}
},
_ => {}

View File

@@ -5,21 +5,19 @@ use std::net::SocketAddr;
use std::sync::Arc;
use std::thread;
use crate::catalog::SchemaDumpError;
use crate::catalog::{get_database_schema, get_dbs_and_roles};
use crate::compute::forward_termination_signal;
use crate::compute::{ComputeNode, ComputeState, ParsedSpec};
use compute_api::requests::ConfigurationRequest;
use compute_api::responses::{ComputeStatus, ComputeStatusResponse, GenericAPIError};
use compute_api::spec::ComputeFeature;
use anyhow::Result;
use hyper::header::CONTENT_TYPE;
use hyper::service::{make_service_fn, service_fn};
use hyper::{Body, Method, Request, Response, Server, StatusCode};
use num_cpus;
use serde_json;
use tokio::task;
use tracing::{debug, error, info, warn};
use tracing::{error, info, warn};
use tracing_utils::http::OtelName;
use utils::http::request::must_get_query_param;
fn status_response_from_state(state: &ComputeState) -> ComputeStatusResponse {
ComputeStatusResponse {
@@ -48,7 +46,7 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
match (req.method(), req.uri().path()) {
// Serialized compute state.
(&Method::GET, "/status") => {
debug!("serving /status GET request");
info!("serving /status GET request");
let state = compute.state.lock().unwrap();
let status_response = status_response_from_state(&state);
Response::new(Body::from(serde_json::to_string(&status_response).unwrap()))
@@ -126,45 +124,6 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
}
}
(&Method::POST, "/terminate") => {
info!("serving /terminate POST request");
match handle_terminate_request(compute).await {
Ok(()) => Response::new(Body::empty()),
Err((msg, code)) => {
error!("error handling /terminate request: {msg}");
render_json_error(&msg, code)
}
}
}
(&Method::GET, "/dbs_and_roles") => {
info!("serving /dbs_and_roles GET request",);
match get_dbs_and_roles(compute).await {
Ok(res) => render_json(Body::from(serde_json::to_string(&res).unwrap())),
Err(_) => {
render_json_error("can't get dbs and roles", StatusCode::INTERNAL_SERVER_ERROR)
}
}
}
(&Method::GET, "/database_schema") => {
let database = match must_get_query_param(&req, "database") {
Err(e) => return e.into_response(),
Ok(database) => database,
};
info!("serving /database_schema GET request with database: {database}",);
match get_database_schema(compute, &database).await {
Ok(res) => render_plain(Body::wrap_stream(res)),
Err(SchemaDumpError::DatabaseDoesNotExist) => {
render_json_error("database does not exist", StatusCode::NOT_FOUND)
}
Err(e) => {
error!("can't get schema dump: {}", e);
render_json_error("can't get schema dump", StatusCode::INTERNAL_SERVER_ERROR)
}
}
}
// download extension files from remote extension storage on demand
(&Method::POST, route) if route.starts_with("/extension_server/") => {
info!("serving {:?} POST request", route);
@@ -213,12 +172,16 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
}
};
remote_extensions.get_ext(
&filename,
is_library,
&compute.build_tag,
&compute.pgversion,
)
let build_tag_str = if spec
.features
.contains(&ComputeFeature::RemoteExtensionsUseLatest)
{
"latest"
} else {
&compute.build_tag
};
remote_extensions.get_ext(&filename, is_library, build_tag_str, &compute.pgversion)
};
match ext {
@@ -335,68 +298,10 @@ fn render_json_error(e: &str, status: StatusCode) -> Response<Body> {
};
Response::builder()
.status(status)
.header(CONTENT_TYPE, "application/json")
.body(Body::from(serde_json::to_string(&error).unwrap()))
.unwrap()
}
fn render_json(body: Body) -> Response<Body> {
Response::builder()
.header(CONTENT_TYPE, "application/json")
.body(body)
.unwrap()
}
fn render_plain(body: Body) -> Response<Body> {
Response::builder()
.header(CONTENT_TYPE, "text/plain")
.body(body)
.unwrap()
}
async fn handle_terminate_request(compute: &Arc<ComputeNode>) -> Result<(), (String, StatusCode)> {
{
let mut state = compute.state.lock().unwrap();
if state.status == ComputeStatus::Terminated {
return Ok(());
}
if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
let msg = format!(
"invalid compute status for termination request: {:?}",
state.status.clone()
);
return Err((msg, StatusCode::PRECONDITION_FAILED));
}
state.status = ComputeStatus::TerminationPending;
compute.state_changed.notify_all();
drop(state);
}
forward_termination_signal();
info!("sent signal and notified waiters");
// Spawn a blocking thread to wait for compute to become Terminated.
// This is needed to do not block the main pool of workers and
// be able to serve other requests while some particular request
// is waiting for compute to finish configuration.
let c = compute.clone();
task::spawn_blocking(move || {
let mut state = c.state.lock().unwrap();
while state.status != ComputeStatus::Terminated {
state = c.state_changed.wait(state).unwrap();
info!(
"waiting for compute to become Terminated, current status: {:?}",
state.status
);
}
Ok(())
})
.await
.unwrap()?;
info!("terminated Postgres");
Ok(())
}
// Main Hyper HTTP server function that runs it and blocks waiting on it forever.
#[tokio::main]
async fn serve(port: u16, state: Arc<ComputeNode>) {

View File

@@ -68,51 +68,6 @@ paths:
schema:
$ref: "#/components/schemas/Info"
/dbs_and_roles:
get:
tags:
- Info
summary: Get databases and roles in the catalog.
description: ""
operationId: getDbsAndRoles
responses:
200:
description: Compute schema objects
content:
application/json:
schema:
$ref: "#/components/schemas/DbsAndRoles"
/database_schema:
get:
tags:
- Info
summary: Get schema dump
parameters:
- name: database
in: query
description: Database name to dump.
required: true
schema:
type: string
example: "postgres"
description: Get schema dump in SQL format.
operationId: getDatabaseSchema
responses:
200:
description: Schema dump
content:
text/plain:
schema:
type: string
description: Schema dump in SQL format.
404:
description: Non existing database.
content:
application/json:
schema:
$ref: "#/components/schemas/GenericError"
/check_writability:
post:
tags:
@@ -213,29 +168,6 @@ paths:
schema:
$ref: "#/components/schemas/GenericError"
/terminate:
post:
tags:
- Terminate
summary: Terminate Postgres and wait for it to exit
description: ""
operationId: terminate
responses:
200:
description: Result
412:
description: "wrong state"
content:
application/json:
schema:
$ref: "#/components/schemas/GenericError"
500:
description: "Unexpected error"
content:
application/json:
schema:
$ref: "#/components/schemas/GenericError"
components:
securitySchemes:
JWT:
@@ -274,73 +206,6 @@ components:
num_cpus:
type: integer
DbsAndRoles:
type: object
description: Databases and Roles
required:
- roles
- databases
properties:
roles:
type: array
items:
$ref: "#/components/schemas/Role"
databases:
type: array
items:
$ref: "#/components/schemas/Database"
Database:
type: object
description: Database
required:
- name
- owner
- restrict_conn
- invalid
properties:
name:
type: string
owner:
type: string
options:
type: array
items:
$ref: "#/components/schemas/GenericOption"
restrict_conn:
type: boolean
invalid:
type: boolean
Role:
type: object
description: Role
required:
- name
properties:
name:
type: string
encrypted_password:
type: string
options:
type: array
items:
$ref: "#/components/schemas/GenericOption"
GenericOption:
type: object
description: Schema Generic option
required:
- name
- vartype
properties:
name:
type: string
value:
type: string
vartype:
type: string
ComputeState:
type: object
required:

View File

@@ -8,15 +8,10 @@ pub mod configurator;
pub mod http;
#[macro_use]
pub mod logger;
pub mod catalog;
pub mod compute;
pub mod disk_quota;
pub mod extension_server;
pub mod lsn_lease;
mod migration;
pub mod monitor;
pub mod params;
pub mod pg_helpers;
pub mod spec;
pub mod swap;
pub mod sync_sk;

View File

@@ -1,3 +1,4 @@
use tracing_opentelemetry::OpenTelemetryLayer;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::prelude::*;
@@ -22,7 +23,8 @@ pub fn init_tracing_and_logging(default_log_level: &str) -> anyhow::Result<()> {
.with_writer(std::io::stderr);
// Initialize OpenTelemetry
let otlp_layer = tracing_utils::init_tracing_without_runtime("compute_ctl");
let otlp_layer =
tracing_utils::init_tracing_without_runtime("compute_ctl").map(OpenTelemetryLayer::new);
// Put it all together
tracing_subscriber::registry()

View File

@@ -1,185 +0,0 @@
use anyhow::bail;
use anyhow::Result;
use postgres::{NoTls, SimpleQueryMessage};
use std::time::SystemTime;
use std::{str::FromStr, sync::Arc, thread, time::Duration};
use utils::id::TenantId;
use utils::id::TimelineId;
use compute_api::spec::ComputeMode;
use tracing::{info, warn};
use utils::{
lsn::Lsn,
shard::{ShardCount, ShardNumber, TenantShardId},
};
use crate::compute::ComputeNode;
/// Spawns a background thread to periodically renew LSN leases for static compute.
/// Do nothing if the compute is not in static mode.
pub fn launch_lsn_lease_bg_task_for_static(compute: &Arc<ComputeNode>) {
let (tenant_id, timeline_id, lsn) = {
let state = compute.state.lock().unwrap();
let spec = state.pspec.as_ref().expect("Spec must be set");
match spec.spec.mode {
ComputeMode::Static(lsn) => (spec.tenant_id, spec.timeline_id, lsn),
_ => return,
}
};
let compute = compute.clone();
let span = tracing::info_span!("lsn_lease_bg_task", %tenant_id, %timeline_id, %lsn);
thread::spawn(move || {
let _entered = span.entered();
if let Err(e) = lsn_lease_bg_task(compute, tenant_id, timeline_id, lsn) {
// TODO: might need stronger error feedback than logging an warning.
warn!("Exited with error: {e}");
}
});
}
/// Renews lsn lease periodically so static compute are not affected by GC.
fn lsn_lease_bg_task(
compute: Arc<ComputeNode>,
tenant_id: TenantId,
timeline_id: TimelineId,
lsn: Lsn,
) -> Result<()> {
loop {
let valid_until = acquire_lsn_lease_with_retry(&compute, tenant_id, timeline_id, lsn)?;
let valid_duration = valid_until
.duration_since(SystemTime::now())
.unwrap_or(Duration::ZERO);
// Sleep for 60 seconds less than the valid duration but no more than half of the valid duration.
let sleep_duration = valid_duration
.saturating_sub(Duration::from_secs(60))
.max(valid_duration / 2);
info!(
"Request succeeded, sleeping for {} seconds",
sleep_duration.as_secs()
);
compute.wait_timeout_while_pageserver_connstr_unchanged(sleep_duration);
}
}
/// Acquires lsn lease in a retry loop. Returns the expiration time if a lease is granted.
/// Returns an error if a lease is explicitly not granted. Otherwise, we keep sending requests.
fn acquire_lsn_lease_with_retry(
compute: &Arc<ComputeNode>,
tenant_id: TenantId,
timeline_id: TimelineId,
lsn: Lsn,
) -> Result<SystemTime> {
let mut attempts = 0usize;
let mut retry_period_ms: f64 = 500.0;
const MAX_RETRY_PERIOD_MS: f64 = 60.0 * 1000.0;
loop {
// Note: List of pageservers is dynamic, need to re-read configs before each attempt.
let configs = {
let state = compute.state.lock().unwrap();
let spec = state.pspec.as_ref().expect("spec must be set");
let conn_strings = spec.pageserver_connstr.split(',');
conn_strings
.map(|connstr| {
let mut config = postgres::Config::from_str(connstr).expect("Invalid connstr");
if let Some(storage_auth_token) = &spec.storage_auth_token {
config.password(storage_auth_token.clone());
}
config
})
.collect::<Vec<_>>()
};
let result = try_acquire_lsn_lease(tenant_id, timeline_id, lsn, &configs);
match result {
Ok(Some(res)) => {
return Ok(res);
}
Ok(None) => {
bail!("Permanent error: lease could not be obtained, LSN is behind the GC cutoff");
}
Err(e) => {
warn!("Failed to acquire lsn lease: {e} (attempt {attempts})");
compute.wait_timeout_while_pageserver_connstr_unchanged(Duration::from_millis(
retry_period_ms as u64,
));
retry_period_ms *= 1.5;
retry_period_ms = retry_period_ms.min(MAX_RETRY_PERIOD_MS);
}
}
attempts += 1;
}
}
/// Tries to acquire an LSN lease through PS page_service API.
fn try_acquire_lsn_lease(
tenant_id: TenantId,
timeline_id: TimelineId,
lsn: Lsn,
configs: &[postgres::Config],
) -> Result<Option<SystemTime>> {
fn get_valid_until(
config: &postgres::Config,
tenant_shard_id: TenantShardId,
timeline_id: TimelineId,
lsn: Lsn,
) -> Result<Option<SystemTime>> {
let mut client = config.connect(NoTls)?;
let cmd = format!("lease lsn {} {} {} ", tenant_shard_id, timeline_id, lsn);
let res = client.simple_query(&cmd)?;
let msg = match res.first() {
Some(msg) => msg,
None => bail!("empty response"),
};
let row = match msg {
SimpleQueryMessage::Row(row) => row,
_ => bail!("error parsing lsn lease response"),
};
// Note: this will be None if a lease is explicitly not granted.
let valid_until_str = row.get("valid_until");
let valid_until = valid_until_str.map(|s| {
SystemTime::UNIX_EPOCH
.checked_add(Duration::from_millis(u128::from_str(s).unwrap() as u64))
.expect("Time larger than max SystemTime could handle")
});
Ok(valid_until)
}
let shard_count = configs.len();
let valid_until = if shard_count > 1 {
configs
.iter()
.enumerate()
.map(|(shard_number, config)| {
let tenant_shard_id = TenantShardId {
tenant_id,
shard_count: ShardCount::new(shard_count as u8),
shard_number: ShardNumber(shard_number as u8),
};
get_valid_until(config, tenant_shard_id, timeline_id, lsn)
})
.collect::<Result<Vec<Option<SystemTime>>>>()?
.into_iter()
.min()
.unwrap()
} else {
get_valid_until(
&configs[0],
TenantShardId::unsharded(tenant_id),
timeline_id,
lsn,
)?
};
Ok(valid_until)
}

View File

@@ -1,105 +0,0 @@
use anyhow::{Context, Result};
use postgres::Client;
use tracing::info;
pub(crate) struct MigrationRunner<'m> {
client: &'m mut Client,
migrations: &'m [&'m str],
}
impl<'m> MigrationRunner<'m> {
pub fn new(client: &'m mut Client, migrations: &'m [&'m str]) -> Self {
// The neon_migration.migration_id::id column is a bigint, which is equivalent to an i64
assert!(migrations.len() + 1 < i64::MAX as usize);
Self { client, migrations }
}
fn get_migration_id(&mut self) -> Result<i64> {
let query = "SELECT id FROM neon_migration.migration_id";
let row = self
.client
.query_one(query, &[])
.context("run_migrations get migration_id")?;
Ok(row.get::<&str, i64>("id"))
}
fn update_migration_id(&mut self, migration_id: i64) -> Result<()> {
let setval = format!("UPDATE neon_migration.migration_id SET id={}", migration_id);
self.client
.simple_query(&setval)
.context("run_migrations update id")?;
Ok(())
}
fn prepare_migrations(&mut self) -> Result<()> {
let query = "CREATE SCHEMA IF NOT EXISTS neon_migration";
self.client.simple_query(query)?;
let query = "CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)";
self.client.simple_query(query)?;
let query = "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING";
self.client.simple_query(query)?;
let query = "ALTER SCHEMA neon_migration OWNER TO cloud_admin";
self.client.simple_query(query)?;
let query = "REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC";
self.client.simple_query(query)?;
Ok(())
}
pub fn run_migrations(mut self) -> Result<()> {
self.prepare_migrations()?;
let mut current_migration = self.get_migration_id()? as usize;
while current_migration < self.migrations.len() {
macro_rules! migration_id {
($cm:expr) => {
($cm + 1) as i64
};
}
let migration = self.migrations[current_migration];
if migration.starts_with("-- SKIP") {
info!("Skipping migration id={}", migration_id!(current_migration));
} else {
info!(
"Running migration id={}:\n{}\n",
migration_id!(current_migration),
migration
);
self.client
.simple_query("BEGIN")
.context("begin migration")?;
self.client.simple_query(migration).with_context(|| {
format!(
"run_migrations migration id={}",
migration_id!(current_migration)
)
})?;
// Migration IDs start at 1
self.update_migration_id(migration_id!(current_migration))?;
self.client
.simple_query("COMMIT")
.context("commit migration")?;
info!("Finished migration id={}", migration_id!(current_migration));
}
current_migration += 1;
}
Ok(())
}
}

View File

@@ -1 +0,0 @@
ALTER ROLE neon_superuser BYPASSRLS;

View File

@@ -1,18 +0,0 @@
DO $$
DECLARE
role_name text;
BEGIN
FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, 'neon_superuser', 'member')
LOOP
RAISE NOTICE 'EXECUTING ALTER ROLE % INHERIT', quote_ident(role_name);
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' INHERIT';
END LOOP;
FOR role_name IN SELECT rolname FROM pg_roles
WHERE
NOT pg_has_role(rolname, 'neon_superuser', 'member') AND NOT starts_with(rolname, 'pg_')
LOOP
RAISE NOTICE 'EXECUTING ALTER ROLE % NOBYPASSRLS', quote_ident(role_name);
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOBYPASSRLS';
END LOOP;
END $$;

View File

@@ -1,6 +0,0 @@
DO $$
BEGIN
IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
EXECUTE 'GRANT pg_create_subscription TO neon_superuser';
END IF;
END $$;

View File

@@ -1 +0,0 @@
GRANT pg_monitor TO neon_superuser WITH ADMIN OPTION;

View File

@@ -1,4 +0,0 @@
-- SKIP: Deemed insufficient for allowing relations created by extensions to be
-- interacted with by neon_superuser without permission issues.
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser;

View File

@@ -1,4 +0,0 @@
-- SKIP: Deemed insufficient for allowing relations created by extensions to be
-- interacted with by neon_superuser without permission issues.
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser;

View File

@@ -1,3 +0,0 @@
-- SKIP: Moved inline to the handle_grants() functions.
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser WITH GRANT OPTION;

View File

@@ -1,3 +0,0 @@
-- SKIP: Moved inline to the handle_grants() functions.
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser WITH GRANT OPTION;

View File

@@ -1,13 +0,0 @@
-- SKIP: The original goal of this migration was to prevent creating
-- subscriptions, but this migration was insufficient.
DO $$
DECLARE
role_name TEXT;
BEGIN
FOR role_name IN SELECT rolname FROM pg_roles WHERE rolreplication IS TRUE
LOOP
RAISE NOTICE 'EXECUTING ALTER ROLE % NOREPLICATION', quote_ident(role_name);
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOREPLICATION';
END LOOP;
END $$;

View File

@@ -1,7 +0,0 @@
DO $$
BEGIN
IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
EXECUTE 'GRANT EXECUTE ON FUNCTION pg_export_snapshot TO neon_superuser';
EXECUTE 'GRANT EXECUTE ON FUNCTION pg_log_standby_snapshot TO neon_superuser';
END IF;
END $$;

View File

@@ -1 +0,0 @@
GRANT EXECUTE ON FUNCTION pg_show_replication_origin_status TO neon_superuser;

View File

@@ -17,11 +17,7 @@ const MONITOR_CHECK_INTERVAL: Duration = Duration::from_millis(500);
// should be handled gracefully.
fn watch_compute_activity(compute: &ComputeNode) {
// Suppose that `connstr` doesn't change
let mut connstr = compute.connstr.clone();
connstr
.query_pairs_mut()
.append_pair("application_name", "compute_activity_monitor");
let connstr = connstr.as_str();
let connstr = compute.connstr.as_str();
// During startup and configuration we connect to every Postgres database,
// but we don't want to count this as some user activity. So wait until

View File

@@ -22,10 +22,9 @@ use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};
const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds
/// Escape a string for including it in a SQL literal.
///
/// Wrapping the result with `E'{}'` or `'{}'` is not required,
/// as it returns a ready-to-use SQL string literal, e.g. `'db'''` or `E'db\\'`.
/// Escape a string for including it in a SQL literal. Wrapping the result
/// with `E'{}'` or `'{}'` is not required, as it returns a ready-to-use
/// SQL string literal, e.g. `'db'''` or `E'db\\'`.
/// See <https://github.com/postgres/postgres/blob/da98d005cdbcd45af563d0c4ac86d0e9772cd15f/src/backend/utils/adt/quote.c#L47>
/// for the original implementation.
pub fn escape_literal(s: &str) -> String {
@@ -45,7 +44,7 @@ pub fn escape_conf_value(s: &str) -> String {
format!("'{}'", res)
}
pub trait GenericOptionExt {
trait GenericOptionExt {
fn to_pg_option(&self) -> String;
fn to_pg_setting(&self) -> String;
}
@@ -265,10 +264,9 @@ pub fn wait_for_postgres(pg: &mut Child, pgdata: &Path) -> Result<()> {
// case we miss some events for some reason. Not strictly necessary, but
// better safe than sorry.
let (tx, rx) = std::sync::mpsc::channel();
let watcher_res = notify::recommended_watcher(move |res| {
let (mut watcher, rx): (Box<dyn Watcher>, _) = match notify::recommended_watcher(move |res| {
let _ = tx.send(res);
});
let (mut watcher, rx): (Box<dyn Watcher>, _) = match watcher_res {
}) {
Ok(watcher) => (Box::new(watcher), rx),
Err(e) => {
match e.kind {
@@ -490,7 +488,7 @@ pub fn handle_postgres_logs(stderr: std::process::ChildStderr) -> JoinHandle<()>
/// Read Postgres logs from `stderr` until EOF. Buffer is flushed on one of the following conditions:
/// - next line starts with timestamp
/// - EOF
/// - no new lines were written for the last 100 milliseconds
/// - no new lines were written for the last second
async fn handle_postgres_logs_async(stderr: tokio::process::ChildStderr) -> Result<()> {
let mut lines = tokio::io::BufReader::new(stderr).lines();
let timeout_duration = Duration::from_millis(100);

View File

@@ -2,7 +2,7 @@ use std::fs::File;
use std::path::Path;
use std::str::FromStr;
use anyhow::{anyhow, bail, Context, Result};
use anyhow::{anyhow, bail, Result};
use postgres::config::Config;
use postgres::{Client, NoTls};
use reqwest::StatusCode;
@@ -10,7 +10,6 @@ use tracing::{error, info, info_span, instrument, span_enabled, warn, Level};
use crate::config;
use crate::logger::inlinify;
use crate::migration::MigrationRunner;
use crate::params::PG_HBA_ALL_MD5;
use crate::pg_helpers::*;
@@ -491,7 +490,7 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
"rename_db" => {
let new_name = op.new_name.as_ref().unwrap();
if existing_dbs.contains_key(&op.name) {
if existing_dbs.get(&op.name).is_some() {
let query: String = format!(
"ALTER DATABASE {} RENAME TO {}",
op.name.pg_quote(),
@@ -582,12 +581,7 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
/// Grant CREATE ON DATABASE to the database owner and do some other alters and grants
/// to allow users creating trusted extensions and re-creating `public` schema, for example.
#[instrument(skip_all)]
pub fn handle_grants(
spec: &ComputeSpec,
client: &mut Client,
connstr: &str,
enable_anon_extension: bool,
) -> Result<()> {
pub fn handle_grants(spec: &ComputeSpec, client: &mut Client, connstr: &str) -> Result<()> {
info!("modifying database permissions");
let existing_dbs = get_existing_dbs(client)?;
@@ -656,9 +650,6 @@ pub fn handle_grants(
// remove this code if possible. The worst thing that could happen is that
// user won't be able to use public schema in NEW databases created in the
// very OLD project.
//
// Also, alter default permissions so that relations created by extensions can be
// used by neon_superuser without permission issues.
let grant_query = "DO $$\n\
BEGIN\n\
IF EXISTS(\n\
@@ -677,15 +668,6 @@ pub fn handle_grants(
GRANT CREATE ON SCHEMA public TO web_access;\n\
END IF;\n\
END IF;\n\
IF EXISTS(\n\
SELECT nspname\n\
FROM pg_catalog.pg_namespace\n\
WHERE nspname = 'public'\n\
)\n\
THEN\n\
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser WITH GRANT OPTION;\n\
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser WITH GRANT OPTION;\n\
END IF;\n\
END\n\
$$;"
.to_string();
@@ -696,12 +678,6 @@ pub fn handle_grants(
inlinify(&grant_query)
);
db_client.simple_query(&grant_query)?;
// it is important to run this after all grants
if enable_anon_extension {
handle_extension_anon(spec, &db.owner, &mut db_client, false)
.context("handle_grants handle_extension_anon")?;
}
}
Ok(())
@@ -746,22 +722,7 @@ pub fn handle_extension_neon(client: &mut Client) -> Result<()> {
// - extension was just installed
// - extension was already installed and is up to date
let query = "ALTER EXTENSION neon UPDATE";
info!("update neon extension version with query: {}", query);
if let Err(e) = client.simple_query(query) {
error!(
"failed to upgrade neon extension during `handle_extension_neon`: {}",
e
);
}
Ok(())
}
#[instrument(skip_all)]
pub fn handle_neon_extension_upgrade(client: &mut Client) -> Result<()> {
info!("handle neon extension upgrade");
let query = "ALTER EXTENSION neon UPDATE";
info!("update neon extension version with query: {}", query);
info!("update neon extension schema with query: {}", query);
client.simple_query(query)?;
Ok(())
@@ -775,149 +736,78 @@ pub fn handle_migrations(client: &mut Client) -> Result<()> {
// !BE SURE TO ONLY ADD MIGRATIONS TO THE END OF THIS ARRAY. IF YOU DO NOT, VERY VERY BAD THINGS MAY HAPPEN!
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// Add new migrations in numerical order.
let migrations = [
include_str!("./migrations/0001-neon_superuser_bypass_rls.sql"),
include_str!("./migrations/0002-alter_roles.sql"),
include_str!("./migrations/0003-grant_pg_create_subscription_to_neon_superuser.sql"),
include_str!("./migrations/0004-grant_pg_monitor_to_neon_superuser.sql"),
include_str!("./migrations/0005-grant_all_on_tables_to_neon_superuser.sql"),
include_str!("./migrations/0006-grant_all_on_sequences_to_neon_superuser.sql"),
include_str!(
"./migrations/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql"
),
include_str!(
"./migrations/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql"
),
include_str!("./migrations/0009-revoke_replication_for_previously_allowed_roles.sql"),
include_str!(
"./migrations/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql"
),
include_str!(
"./migrations/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql"
),
"ALTER ROLE neon_superuser BYPASSRLS",
r#"
DO $$
DECLARE
role_name text;
BEGIN
FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, 'neon_superuser', 'member')
LOOP
RAISE NOTICE 'EXECUTING ALTER ROLE % INHERIT', quote_ident(role_name);
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' INHERIT';
END LOOP;
FOR role_name IN SELECT rolname FROM pg_roles
WHERE
NOT pg_has_role(rolname, 'neon_superuser', 'member') AND NOT starts_with(rolname, 'pg_')
LOOP
RAISE NOTICE 'EXECUTING ALTER ROLE % NOBYPASSRLS', quote_ident(role_name);
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOBYPASSRLS';
END LOOP;
END $$;
"#,
r#"
DO $$
BEGIN
IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
EXECUTE 'GRANT pg_create_subscription TO neon_superuser';
END IF;
END
$$;"#,
];
MigrationRunner::new(client, &migrations).run_migrations()?;
let mut query = "CREATE SCHEMA IF NOT EXISTS neon_migration";
client.simple_query(query)?;
Ok(())
}
query = "CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)";
client.simple_query(query)?;
/// Connect to the database as superuser and pre-create anon extension
/// if it is present in shared_preload_libraries
#[instrument(skip_all)]
pub fn handle_extension_anon(
spec: &ComputeSpec,
db_owner: &str,
db_client: &mut Client,
grants_only: bool,
) -> Result<()> {
info!("handle extension anon");
query = "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING";
client.simple_query(query)?;
if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") {
if libs.contains("anon") {
if !grants_only {
// check if extension is already initialized using anon.is_initialized()
let query = "SELECT anon.is_initialized()";
match db_client.query(query, &[]) {
Ok(rows) => {
if !rows.is_empty() {
let is_initialized: bool = rows[0].get(0);
if is_initialized {
info!("anon extension is already initialized");
return Ok(());
}
}
}
Err(e) => {
warn!(
"anon extension is_installed check failed with expected error: {}",
e
);
}
};
query = "ALTER SCHEMA neon_migration OWNER TO cloud_admin";
client.simple_query(query)?;
// Create anon extension if this compute needs it
// Users cannot create it themselves, because superuser is required.
let mut query = "CREATE EXTENSION IF NOT EXISTS anon CASCADE";
info!("creating anon extension with query: {}", query);
match db_client.query(query, &[]) {
Ok(_) => {}
Err(e) => {
error!("anon extension creation failed with error: {}", e);
return Ok(());
}
}
query = "REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC";
client.simple_query(query)?;
// check that extension is installed
query = "SELECT extname FROM pg_extension WHERE extname = 'anon'";
let rows = db_client.query(query, &[])?;
if rows.is_empty() {
error!("anon extension is not installed");
return Ok(());
}
query = "SELECT id FROM neon_migration.migration_id";
let row = client.query_one(query, &[])?;
let mut current_migration: usize = row.get::<&str, i64>("id") as usize;
let starting_migration_id = current_migration;
// Initialize anon extension
// This also requires superuser privileges, so users cannot do it themselves.
query = "SELECT anon.init()";
match db_client.query(query, &[]) {
Ok(_) => {}
Err(e) => {
error!("anon.init() failed with error: {}", e);
return Ok(());
}
}
}
query = "BEGIN";
client.simple_query(query)?;
// check that extension is installed, if not bail early
let query = "SELECT extname FROM pg_extension WHERE extname = 'anon'";
match db_client.query(query, &[]) {
Ok(rows) => {
if rows.is_empty() {
error!("anon extension is not installed");
return Ok(());
}
}
Err(e) => {
error!("anon extension check failed with error: {}", e);
return Ok(());
}
};
let query = format!("GRANT ALL ON SCHEMA anon TO {}", db_owner);
info!("granting anon extension permissions with query: {}", query);
db_client.simple_query(&query)?;
// Grant permissions to db_owner to use anon extension functions
let query = format!("GRANT ALL ON ALL FUNCTIONS IN SCHEMA anon TO {}", db_owner);
info!("granting anon extension permissions with query: {}", query);
db_client.simple_query(&query)?;
// This is needed, because some functions are defined as SECURITY DEFINER.
// In Postgres SECURITY DEFINER functions are executed with the privileges
// of the owner.
// In anon extension this it is needed to access some GUCs, which are only accessible to
// superuser. But we've patched postgres to allow db_owner to access them as well.
// So we need to change owner of these functions to db_owner.
let query = format!("
SELECT 'ALTER FUNCTION '||nsp.nspname||'.'||p.proname||'('||pg_get_function_identity_arguments(p.oid)||') OWNER TO {};'
from pg_proc p
join pg_namespace nsp ON p.pronamespace = nsp.oid
where nsp.nspname = 'anon';", db_owner);
info!("change anon extension functions owner to db owner");
db_client.simple_query(&query)?;
// affects views as well
let query = format!("GRANT ALL ON ALL TABLES IN SCHEMA anon TO {}", db_owner);
info!("granting anon extension permissions with query: {}", query);
db_client.simple_query(&query)?;
let query = format!("GRANT ALL ON ALL SEQUENCES IN SCHEMA anon TO {}", db_owner);
info!("granting anon extension permissions with query: {}", query);
db_client.simple_query(&query)?;
}
while current_migration < migrations.len() {
info!("Running migration:\n{}\n", migrations[current_migration]);
client.simple_query(migrations[current_migration])?;
current_migration += 1;
}
let setval = format!(
"UPDATE neon_migration.migration_id SET id={}",
migrations.len()
);
client.simple_query(&setval)?;
query = "COMMIT";
client.simple_query(query)?;
info!(
"Ran {} migrations",
(migrations.len() - starting_migration_id)
);
Ok(())
}

View File

@@ -1,45 +0,0 @@
use std::path::Path;
use anyhow::{anyhow, Context};
use tracing::warn;
pub const RESIZE_SWAP_BIN: &str = "/neonvm/bin/resize-swap";
pub fn resize_swap(size_bytes: u64) -> anyhow::Result<()> {
// run `/neonvm/bin/resize-swap --once {size_bytes}`
//
// Passing '--once' causes resize-swap to delete itself after successful completion, which
// means that if compute_ctl restarts later, we won't end up calling 'swapoff' while
// postgres is running.
//
// NOTE: resize-swap is not very clever. If present, --once MUST be the first arg.
let child_result = std::process::Command::new("/usr/bin/sudo")
.arg(RESIZE_SWAP_BIN)
.arg("--once")
.arg(size_bytes.to_string())
.spawn();
child_result
.context("spawn() failed")
.and_then(|mut child| child.wait().context("wait() failed"))
.and_then(|status| match status.success() {
true => Ok(()),
false => {
// The command failed. Maybe it was because the resize-swap file doesn't exist?
// The --once flag causes it to delete itself on success so we don't disable swap
// while postgres is running; maybe this is fine.
match Path::new(RESIZE_SWAP_BIN).try_exists() {
Err(_) | Ok(true) => Err(anyhow!("process exited with {status}")),
// The path doesn't exist; we're actually ok
Ok(false) => {
warn!("ignoring \"not found\" error from resize-swap to avoid swapoff while compute is running");
Ok(())
},
}
}
})
// wrap any prior error with the overall context that we couldn't run the command
.with_context(|| {
format!("could not run `/usr/bin/sudo {RESIZE_SWAP_BIN} --once {size_bytes}`")
})
}

View File

@@ -6,23 +6,28 @@ license.workspace = true
[dependencies]
anyhow.workspace = true
async-trait.workspace = true
camino.workspace = true
clap.workspace = true
comfy-table.workspace = true
diesel = { version = "2.1.4", features = ["postgres"]}
diesel_migrations = { version = "2.1.0", features = ["postgres"]}
futures.workspace = true
humantime.workspace = true
git-version.workspace = true
nix.workspace = true
once_cell.workspace = true
humantime-serde.workspace = true
postgres.workspace = true
hex.workspace = true
hyper.workspace = true
regex.workspace = true
reqwest = { workspace = true, features = ["blocking", "json"] }
scopeguard.workspace = true
serde.workspace = true
serde_json.workspace = true
serde_with.workspace = true
tar.workspace = true
thiserror.workspace = true
toml.workspace = true
toml_edit.workspace = true
tokio.workspace = true
tokio-postgres.workspace = true
tokio-util.workspace = true
@@ -34,7 +39,6 @@ safekeeper_api.workspace = true
postgres_connection.workspace = true
storage_broker.workspace = true
utils.workspace = true
whoami.workspace = true
compute_api.workspace = true
workspace_hack.workspace = true

View File

@@ -1,26 +0,0 @@
# Control Plane and Neon Local
This crate contains tools to start a Neon development environment locally. This utility can be used with the `cargo neon` command.
## Example: Start with Postgres 16
To create and start a local development environment with Postgres 16, you will need to provide `--pg-version` flag to 3 of the start-up commands.
```shell
cargo neon init --pg-version 16
cargo neon start
cargo neon tenant create --set-default --pg-version 16
cargo neon endpoint create main --pg-version 16
cargo neon endpoint start main
```
## Example: Create Test User and Database
By default, `cargo neon` starts an endpoint with `cloud_admin` and `postgres` database. If you want to have a role and a database similar to what we have on the cloud service, you can do it with the following commands when starting an endpoint.
```shell
cargo neon endpoint create main --pg-version 16 --update-catalog true
cargo neon endpoint start main --create-test-user true
```
The first command creates `neon_superuser` and necessary roles. The second command creates `test` user and `neondb` database. You will see a connection string that connects you to the test user after running the second command.

Some files were not shown because too many files have changed in this diff Show More