mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-17 05:00:38 +00:00
Compare commits
1 Commits
jcsp/rfc-h
...
alexk/comp
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
abb7c54f24 |
@@ -14,7 +14,6 @@
|
||||
!compute/
|
||||
!compute_tools/
|
||||
!control_plane/
|
||||
!docker-compose/ext-src
|
||||
!libs/
|
||||
!pageserver/
|
||||
!pgxn/
|
||||
|
||||
5
.github/actionlint.yml
vendored
5
.github/actionlint.yml
vendored
@@ -28,8 +28,3 @@ config-variables:
|
||||
- DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN
|
||||
- SLACK_ON_CALL_STORAGE_STAGING_STREAM
|
||||
- SLACK_CICD_CHANNEL_ID
|
||||
- SLACK_STORAGE_CHANNEL_ID
|
||||
- NEON_DEV_AWS_ACCOUNT_ID
|
||||
- NEON_PROD_AWS_ACCOUNT_ID
|
||||
- AWS_ECR_REGION
|
||||
- BENCHMARK_LARGE_OLTP_PROJECTID
|
||||
|
||||
@@ -38,11 +38,9 @@ runs:
|
||||
#
|
||||
- name: Set variables
|
||||
shell: bash -euxo pipefail {0}
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BUCKET: neon-github-public-dev
|
||||
run: |
|
||||
if [ -n "${PR_NUMBER}" ]; then
|
||||
PR_NUMBER=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true)
|
||||
if [ "${PR_NUMBER}" != "null" ]; then
|
||||
BRANCH_OR_PR=pr-${PR_NUMBER}
|
||||
elif [ "${GITHUB_REF_NAME}" = "main" ] || [ "${GITHUB_REF_NAME}" = "release" ] || \
|
||||
[ "${GITHUB_REF_NAME}" = "release-proxy" ] || [ "${GITHUB_REF_NAME}" = "release-compute" ]; then
|
||||
@@ -61,6 +59,8 @@ runs:
|
||||
echo "LOCK_FILE=${LOCK_FILE}" >> $GITHUB_ENV
|
||||
echo "WORKDIR=${WORKDIR}" >> $GITHUB_ENV
|
||||
echo "BUCKET=${BUCKET}" >> $GITHUB_ENV
|
||||
env:
|
||||
BUCKET: neon-github-public-dev
|
||||
|
||||
# TODO: We can replace with a special docker image with Java and Allure pre-installed
|
||||
- uses: actions/setup-java@v4
|
||||
@@ -80,8 +80,8 @@ runs:
|
||||
rm -f ${ALLURE_ZIP}
|
||||
fi
|
||||
env:
|
||||
ALLURE_VERSION: 2.32.2
|
||||
ALLURE_ZIP_SHA256: 3f28885e2118f6317c92f667eaddcc6491400af1fb9773c1f3797a5fa5174953
|
||||
ALLURE_VERSION: 2.27.0
|
||||
ALLURE_ZIP_SHA256: b071858fb2fa542c65d8f152c5c40d26267b2dfb74df1f1608a589ecca38e777
|
||||
|
||||
- uses: aws-actions/configure-aws-credentials@v4
|
||||
if: ${{ !cancelled() }}
|
||||
|
||||
@@ -18,11 +18,9 @@ runs:
|
||||
steps:
|
||||
- name: Set variables
|
||||
shell: bash -euxo pipefail {0}
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
REPORT_DIR: ${{ inputs.report-dir }}
|
||||
run: |
|
||||
if [ -n "${PR_NUMBER}" ]; then
|
||||
PR_NUMBER=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true)
|
||||
if [ "${PR_NUMBER}" != "null" ]; then
|
||||
BRANCH_OR_PR=pr-${PR_NUMBER}
|
||||
elif [ "${GITHUB_REF_NAME}" = "main" ] || [ "${GITHUB_REF_NAME}" = "release" ] || \
|
||||
[ "${GITHUB_REF_NAME}" = "release-proxy" ] || [ "${GITHUB_REF_NAME}" = "release-compute" ]; then
|
||||
@@ -34,6 +32,8 @@ runs:
|
||||
|
||||
echo "BRANCH_OR_PR=${BRANCH_OR_PR}" >> $GITHUB_ENV
|
||||
echo "REPORT_DIR=${REPORT_DIR}" >> $GITHUB_ENV
|
||||
env:
|
||||
REPORT_DIR: ${{ inputs.report-dir }}
|
||||
|
||||
- uses: aws-actions/configure-aws-credentials@v4
|
||||
if: ${{ !cancelled() }}
|
||||
|
||||
12
.github/actions/neon-branch-create/action.yml
vendored
12
.github/actions/neon-branch-create/action.yml
vendored
@@ -84,13 +84,7 @@ runs:
|
||||
--header "Authorization: Bearer ${API_KEY}"
|
||||
)
|
||||
|
||||
role_name=$(echo "$roles" | jq --raw-output '
|
||||
(.roles | map(select(.protected == false))) as $roles |
|
||||
if any($roles[]; .name == "neondb_owner")
|
||||
then "neondb_owner"
|
||||
else $roles[0].name
|
||||
end
|
||||
')
|
||||
role_name=$(echo $roles | jq --raw-output '.roles[] | select(.protected == false) | .name')
|
||||
echo "role_name=${role_name}" >> $GITHUB_OUTPUT
|
||||
env:
|
||||
API_HOST: ${{ inputs.api_host }}
|
||||
@@ -113,13 +107,13 @@ runs:
|
||||
)
|
||||
|
||||
if [ -z "${reset_password}" ]; then
|
||||
sleep $i
|
||||
sleep 1
|
||||
continue
|
||||
fi
|
||||
|
||||
password=$(echo $reset_password | jq --raw-output '.role.password')
|
||||
if [ "${password}" == "null" ]; then
|
||||
sleep $i # increasing backoff
|
||||
sleep 1
|
||||
continue
|
||||
fi
|
||||
|
||||
|
||||
22
.github/actions/neon-project-create/action.yml
vendored
22
.github/actions/neon-project-create/action.yml
vendored
@@ -19,11 +19,7 @@ inputs:
|
||||
default: '[1, 1]'
|
||||
# settings below only needed if you want the project to be sharded from the beginning
|
||||
shard_split_project:
|
||||
description: 'by default new projects are not shard-split initiailly, but only when shard-split threshold is reached, specify true to explicitly shard-split initially'
|
||||
required: false
|
||||
default: 'false'
|
||||
disable_sharding:
|
||||
description: 'by default new projects use storage controller default policy to shard-split when shard-split threshold is reached, specify true to explicitly disable sharding'
|
||||
description: 'by default new projects are not shard-split, specify true to shard-split'
|
||||
required: false
|
||||
default: 'false'
|
||||
admin_api_key:
|
||||
@@ -111,21 +107,6 @@ runs:
|
||||
-H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer ${ADMIN_API_KEY}" \
|
||||
-d "{\"new_shard_count\": $SHARD_COUNT, \"new_stripe_size\": $STRIPE_SIZE}"
|
||||
fi
|
||||
if [ "${DISABLE_SHARDING}" = "true" ]; then
|
||||
# determine tenant ID
|
||||
TENANT_ID=`${PSQL} ${dsn} -t -A -c "SHOW neon.tenant_id"`
|
||||
|
||||
echo "Explicitly disabling shard-splitting for project ${project_id} with tenant_id ${TENANT_ID}"
|
||||
|
||||
echo "Sending PUT request to https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/storage/proxy/control/v1/tenant/${TENANT_ID}/policy"
|
||||
echo "with body {\"scheduling\": \"Essential\"}"
|
||||
|
||||
# we need an ADMIN API KEY to invoke storage controller API for shard splitting (bash -u above checks that the variable is set)
|
||||
curl -X PUT \
|
||||
"https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/storage/proxy/control/v1/tenant/${TENANT_ID}/policy" \
|
||||
-H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer ${ADMIN_API_KEY}" \
|
||||
-d "{\"scheduling\": \"Essential\"}"
|
||||
fi
|
||||
|
||||
env:
|
||||
API_HOST: ${{ inputs.api_host }}
|
||||
@@ -135,7 +116,6 @@ runs:
|
||||
MIN_CU: ${{ fromJSON(inputs.compute_units)[0] }}
|
||||
MAX_CU: ${{ fromJSON(inputs.compute_units)[1] }}
|
||||
SHARD_SPLIT_PROJECT: ${{ inputs.shard_split_project }}
|
||||
DISABLE_SHARDING: ${{ inputs.disable_sharding }}
|
||||
ADMIN_API_KEY: ${{ inputs.admin_api_key }}
|
||||
SHARD_COUNT: ${{ inputs.shard_count }}
|
||||
STRIPE_SIZE: ${{ inputs.stripe_size }}
|
||||
|
||||
10
.github/actions/run-python-test-set/action.yml
vendored
10
.github/actions/run-python-test-set/action.yml
vendored
@@ -44,11 +44,6 @@ inputs:
|
||||
description: 'Postgres version to use for tests'
|
||||
required: false
|
||||
default: 'v16'
|
||||
sanitizers:
|
||||
description: 'enabled or disabled'
|
||||
required: false
|
||||
default: 'disabled'
|
||||
type: string
|
||||
benchmark_durations:
|
||||
description: 'benchmark durations JSON'
|
||||
required: false
|
||||
@@ -64,7 +59,7 @@ runs:
|
||||
if: inputs.build_type != 'remote'
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}${{ inputs.sanitizers == 'enabled' && '-sanitized' || '' }}-artifact
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact
|
||||
path: /tmp/neon
|
||||
aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
|
||||
|
||||
@@ -117,7 +112,6 @@ runs:
|
||||
ALLOW_FORWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'forward compatibility breakage')
|
||||
RERUN_FAILED: ${{ inputs.rerun_failed }}
|
||||
PG_VERSION: ${{ inputs.pg_version }}
|
||||
SANITIZERS: ${{ inputs.sanitizers }}
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
# PLATFORM will be embedded in the perf test report
|
||||
@@ -242,5 +236,5 @@ runs:
|
||||
uses: ./.github/actions/allure-report-store
|
||||
with:
|
||||
report-dir: /tmp/test_output/allure/results
|
||||
unique-key: ${{ inputs.build_type }}-${{ inputs.pg_version }}-${{ runner.arch }}
|
||||
unique-key: ${{ inputs.build_type }}-${{ inputs.pg_version }}
|
||||
aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
|
||||
|
||||
25
.github/scripts/previous-releases.jq
vendored
25
.github/scripts/previous-releases.jq
vendored
@@ -1,25 +0,0 @@
|
||||
# Expects response from https://docs.github.com/en/rest/releases/releases?apiVersion=2022-11-28#list-releases as input,
|
||||
# with tag names `release` for storage, `release-compute` for compute and `release-proxy` for proxy releases.
|
||||
# Extract only the `tag_name` field from each release object
|
||||
[ .[].tag_name ]
|
||||
|
||||
# Transform each tag name into a structured object using regex capture
|
||||
| reduce map(
|
||||
capture("^(?<full>release(-(?<component>proxy|compute))?-(?<version>\\d+))$")
|
||||
| {
|
||||
component: (.component // "storage"), # Default to "storage" if no component is specified
|
||||
version: (.version | tonumber), # Convert the version number to an integer
|
||||
full: .full # Store the full tag name for final output
|
||||
}
|
||||
)[] as $entry # Loop over the transformed list
|
||||
|
||||
# Accumulate the latest (highest-numbered) version for each component
|
||||
({};
|
||||
.[$entry.component] |= (if . == null or $entry.version > .version then $entry else . end))
|
||||
|
||||
# Convert the resulting object into an array of formatted strings
|
||||
| to_entries
|
||||
| map("\(.key)=\(.value.full)")
|
||||
|
||||
# Output each string separately
|
||||
| .[]
|
||||
@@ -280,7 +280,7 @@ jobs:
|
||||
- name: Upload Neon artifact
|
||||
uses: ./.github/actions/upload
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}${{ inputs.sanitizers == 'enabled' && '-sanitized' || '' }}-artifact
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-artifact
|
||||
path: /tmp/neon
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
@@ -337,7 +337,7 @@ jobs:
|
||||
- name: Pytest regression tests
|
||||
continue-on-error: ${{ matrix.lfc_state == 'with-lfc' && inputs.build-type == 'debug' }}
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
timeout-minutes: ${{ inputs.sanitizers != 'enabled' && 75 || 180 }}
|
||||
timeout-minutes: ${{ inputs.sanitizers != 'enabled' && 60 || 180 }}
|
||||
with:
|
||||
build_type: ${{ inputs.build-type }}
|
||||
test_selection: regress
|
||||
@@ -347,7 +347,6 @@ jobs:
|
||||
real_s3_region: eu-central-1
|
||||
rerun_failed: true
|
||||
pg_version: ${{ matrix.pg_version }}
|
||||
sanitizers: ${{ inputs.sanitizers }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
# `--session-timeout` is equal to (timeout-minutes - 10 minutes) * 60 seconds.
|
||||
# Attempt to stop tests gracefully to generate test reports
|
||||
@@ -360,6 +359,7 @@ jobs:
|
||||
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
|
||||
PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task
|
||||
USE_LFC: ${{ matrix.lfc_state == 'with-lfc' && 'true' || 'false' }}
|
||||
SANITIZERS: ${{ inputs.sanitizers }}
|
||||
|
||||
# Temporary disable this step until we figure out why it's so flaky
|
||||
# Ref https://github.com/neondatabase/neon/issues/4540
|
||||
|
||||
103
.github/workflows/_meta.yml
vendored
103
.github/workflows/_meta.yml
vendored
@@ -1,103 +0,0 @@
|
||||
name: Generate run metadata
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
github-event-name:
|
||||
type: string
|
||||
required: true
|
||||
outputs:
|
||||
build-tag:
|
||||
description: "Tag for the current workflow run"
|
||||
value: ${{ jobs.tags.outputs.build-tag }}
|
||||
previous-storage-release:
|
||||
description: "Tag of the last storage release"
|
||||
value: ${{ jobs.tags.outputs.storage }}
|
||||
previous-proxy-release:
|
||||
description: "Tag of the last proxy release"
|
||||
value: ${{ jobs.tags.outputs.proxy }}
|
||||
previous-compute-release:
|
||||
description: "Tag of the last compute release"
|
||||
value: ${{ jobs.tags.outputs.compute }}
|
||||
run-kind:
|
||||
description: "The kind of run we're currently in. Will be one of `pr`, `push-main`, `storage-rc`, `storage-release`, `proxy-rc`, `proxy-release`, `compute-rc`, `compute-release` or `merge_queue`"
|
||||
value: ${{ jobs.tags.outputs.run-kind }}
|
||||
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
tags:
|
||||
runs-on: ubuntu-22.04
|
||||
outputs:
|
||||
build-tag: ${{ steps.build-tag.outputs.tag }}
|
||||
compute: ${{ steps.previous-releases.outputs.compute }}
|
||||
proxy: ${{ steps.previous-releases.outputs.proxy }}
|
||||
storage: ${{ steps.previous-releases.outputs.storage }}
|
||||
run-kind: ${{ steps.run-kind.outputs.run-kind }}
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
# Need `fetch-depth: 0` to count the number of commits in the branch
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get run kind
|
||||
id: run-kind
|
||||
env:
|
||||
RUN_KIND: >-
|
||||
${{
|
||||
false
|
||||
|| (inputs.github-event-name == 'push' && github.ref_name == 'main') && 'push-main'
|
||||
|| (inputs.github-event-name == 'push' && github.ref_name == 'release') && 'storage-release'
|
||||
|| (inputs.github-event-name == 'push' && github.ref_name == 'release-compute') && 'compute-release'
|
||||
|| (inputs.github-event-name == 'push' && github.ref_name == 'release-proxy') && 'proxy-release'
|
||||
|| (inputs.github-event-name == 'pull_request' && github.base_ref == 'release') && 'storage-rc-pr'
|
||||
|| (inputs.github-event-name == 'pull_request' && github.base_ref == 'release-compute') && 'compute-rc-pr'
|
||||
|| (inputs.github-event-name == 'pull_request' && github.base_ref == 'release-proxy') && 'proxy-rc-pr'
|
||||
|| (inputs.github-event-name == 'pull_request') && 'pr'
|
||||
|| 'unknown'
|
||||
}}
|
||||
run: |
|
||||
echo "run-kind=$RUN_KIND" | tee -a $GITHUB_OUTPUT
|
||||
|
||||
- name: Get build tag
|
||||
id: build-tag
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
CURRENT_BRANCH: ${{ github.head_ref || github.ref_name }}
|
||||
CURRENT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
RUN_KIND: ${{ steps.run-kind.outputs.run-kind }}
|
||||
run: |
|
||||
case $RUN_KIND in
|
||||
push-main)
|
||||
echo "tag=$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
|
||||
;;
|
||||
storage-release)
|
||||
echo "tag=release-$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
|
||||
;;
|
||||
proxy-release)
|
||||
echo "tag=release-proxy-$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
|
||||
;;
|
||||
compute-release)
|
||||
echo "tag=release-compute-$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
|
||||
;;
|
||||
pr|storage-rc-pr|compute-rc-pr|proxy-rc-pr)
|
||||
BUILD_AND_TEST_RUN_ID=$(gh run list -b $CURRENT_BRANCH -c $CURRENT_SHA -w 'Build and Test' -L 1 --json databaseId --jq '.[].databaseId')
|
||||
echo "tag=$BUILD_AND_TEST_RUN_ID" | tee -a $GITHUB_OUTPUT
|
||||
;;
|
||||
*)
|
||||
echo "Unexpected RUN_KIND ('${RUN_KIND}'), failing to assign build-tag!"
|
||||
exit 1
|
||||
esac
|
||||
|
||||
- name: Get the previous release-tags
|
||||
id: previous-releases
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
gh api --paginate \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
"/repos/${GITHUB_REPOSITORY}/releases" \
|
||||
| jq -f .github/scripts/previous-releases.jq -r \
|
||||
| tee -a "${GITHUB_OUTPUT}"
|
||||
@@ -2,7 +2,7 @@ name: Push images to Container Registry
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
# Example: {"docker.io/neondatabase/neon:13196061314":["${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/neon:13196061314","neoneastus2.azurecr.io/neondatabase/neon:13196061314"]}
|
||||
# Example: {"docker.io/neondatabase/neon:13196061314":["369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:13196061314","neoneastus2.azurecr.io/neondatabase/neon:13196061314"]}
|
||||
image-map:
|
||||
description: JSON map of images, mapping from a source image to an array of target images that should be pushed.
|
||||
required: true
|
||||
@@ -11,12 +11,8 @@ on:
|
||||
description: AWS region to log in to. Required when pushing to ECR.
|
||||
required: false
|
||||
type: string
|
||||
aws-account-id:
|
||||
description: AWS account ID to log in to for pushing to ECR. Required when pushing to ECR.
|
||||
required: false
|
||||
type: string
|
||||
aws-role-to-assume:
|
||||
description: AWS role to assume to for pushing to ECR. Required when pushing to ECR.
|
||||
aws-account-ids:
|
||||
description: Comma separated AWS account IDs to log in to for pushing to ECR. Required when pushing to ECR.
|
||||
required: false
|
||||
type: string
|
||||
azure-client-id:
|
||||
@@ -35,6 +31,16 @@ on:
|
||||
description: ACR registry name. Required when pushing to ACR.
|
||||
required: false
|
||||
type: string
|
||||
secrets:
|
||||
docker-hub-username:
|
||||
description: Docker Hub username. Required when pushing to Docker Hub.
|
||||
required: false
|
||||
docker-hub-password:
|
||||
description: Docker Hub password. Required when pushing to Docker Hub.
|
||||
required: false
|
||||
aws-role-to-assume:
|
||||
description: AWS role to assume. Required when pushing to ECR.
|
||||
required: false
|
||||
|
||||
permissions: {}
|
||||
|
||||
@@ -47,11 +53,10 @@ jobs:
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
id-token: write # Required for aws/azure login
|
||||
packages: write # required for pushing to GHCR
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
sparse-checkout: .github/scripts/push_with_image_map.py
|
||||
sparse-checkout: scripts/push_with_image_map.py
|
||||
sparse-checkout-cone-mode: false
|
||||
|
||||
- name: Print image-map
|
||||
@@ -62,14 +67,14 @@ jobs:
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: "${{ inputs.aws-region }}"
|
||||
role-to-assume: "arn:aws:iam::${{ inputs.aws-account-id }}:role/${{ inputs.aws-role-to-assume }}"
|
||||
role-to-assume: "${{ secrets.aws-role-to-assume }}"
|
||||
role-duration-seconds: 3600
|
||||
|
||||
- name: Login to ECR
|
||||
if: contains(inputs.image-map, 'amazonaws.com/')
|
||||
uses: aws-actions/amazon-ecr-login@v2
|
||||
with:
|
||||
registries: "${{ inputs.aws-account-id }}"
|
||||
registries: "${{ inputs.aws-account-ids }}"
|
||||
|
||||
- name: Configure Azure credentials
|
||||
if: contains(inputs.image-map, 'azurecr.io/')
|
||||
@@ -84,21 +89,13 @@ jobs:
|
||||
run: |
|
||||
az acr login --name=${{ inputs.acr-registry-name }}
|
||||
|
||||
- name: Login to GHCR
|
||||
if: contains(inputs.image-map, 'ghcr.io/')
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ secrets.docker-hub-username }}
|
||||
password: ${{ secrets.docker-hub-password }}
|
||||
|
||||
- name: Copy docker images to target registries
|
||||
run: python3 .github/scripts/push_with_image_map.py
|
||||
run: python scripts/push_with_image_map.py
|
||||
env:
|
||||
IMAGE_MAP: ${{ inputs.image-map }}
|
||||
|
||||
59
.github/workflows/benchmarking.yml
vendored
59
.github/workflows/benchmarking.yml
vendored
@@ -140,7 +140,6 @@ jobs:
|
||||
--ignore test_runner/performance/test_logical_replication.py
|
||||
--ignore test_runner/performance/test_physical_replication.py
|
||||
--ignore test_runner/performance/test_perf_ingest_using_pgcopydb.py
|
||||
--ignore test_runner/performance/test_cumulative_statistics_persistence.py
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -172,61 +171,6 @@ jobs:
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
cumstats-test:
|
||||
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
env:
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
DEFAULT_PG_VERSION: 17
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
|
||||
PLATFORM: "neon-staging"
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Verify that cumulative statistics are preserved
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
test_selection: performance/test_cumulative_statistics_persistence.py
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 3600
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
|
||||
replication-tests:
|
||||
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
|
||||
permissions:
|
||||
@@ -454,9 +398,6 @@ jobs:
|
||||
runs-on: ${{ matrix.runner }}
|
||||
container:
|
||||
image: ${{ matrix.image }}
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
options: --init
|
||||
|
||||
# Increase timeout to 8h, default timeout is 6h
|
||||
|
||||
423
.github/workflows/build_and_test.yml
vendored
423
.github/workflows/build_and_test.yml
vendored
@@ -65,11 +65,38 @@ jobs:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
filters: .github/file-filters.yaml
|
||||
|
||||
meta:
|
||||
tag:
|
||||
needs: [ check-permissions ]
|
||||
uses: ./.github/workflows/_meta.yml
|
||||
with:
|
||||
github-event-name: ${{ github.event_name }}
|
||||
runs-on: [ self-hosted, small ]
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
|
||||
outputs:
|
||||
build-tag: ${{steps.build-tag.outputs.tag}}
|
||||
|
||||
steps:
|
||||
# Need `fetch-depth: 0` to count the number of commits in the branch
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get build tag
|
||||
run: |
|
||||
echo run:$GITHUB_RUN_ID
|
||||
echo ref:$GITHUB_REF_NAME
|
||||
echo rev:$(git rev-list --count HEAD)
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
echo "tag=$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
|
||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||
echo "tag=release-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
|
||||
elif [[ "$GITHUB_REF_NAME" == "release-proxy" ]]; then
|
||||
echo "tag=release-proxy-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
|
||||
elif [[ "$GITHUB_REF_NAME" == "release-compute" ]]; then
|
||||
echo "tag=release-compute-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release', 'release-proxy', 'release-compute'"
|
||||
echo "tag=$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
shell: bash
|
||||
id: build-tag
|
||||
|
||||
build-build-tools-image:
|
||||
needs: [ check-permissions ]
|
||||
@@ -172,7 +199,7 @@ jobs:
|
||||
secrets: inherit
|
||||
|
||||
build-and-test-locally:
|
||||
needs: [ meta, build-build-tools-image ]
|
||||
needs: [ tag, build-build-tools-image ]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -186,7 +213,7 @@ jobs:
|
||||
with:
|
||||
arch: ${{ matrix.arch }}
|
||||
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
build-tag: ${{ needs.meta.outputs.build-tag }}
|
||||
build-tag: ${{ needs.tag.outputs.build-tag }}
|
||||
build-type: ${{ matrix.build-type }}
|
||||
# Run tests on all Postgres versions in release builds and only on the latest version in debug builds.
|
||||
# Run without LFC on v17 release and debug builds only. For all the other cases LFC is enabled.
|
||||
@@ -470,24 +497,13 @@ jobs:
|
||||
})
|
||||
|
||||
trigger-e2e-tests:
|
||||
# Depends on jobs that can get skipped
|
||||
if: >-
|
||||
${{
|
||||
(
|
||||
!github.event.pull_request.draft
|
||||
|| contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft')
|
||||
|| contains(fromJSON('["push-main", "storage-release", "proxy-release", "compute-release"]'), needs.meta.outputs.run-kind)
|
||||
) && !failure() && !cancelled()
|
||||
}}
|
||||
needs: [ check-permissions, push-neon-image-dev, push-compute-image-dev, meta ]
|
||||
if: ${{ !github.event.pull_request.draft || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft') || github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute' }}
|
||||
needs: [ check-permissions, push-neon-image-dev, push-compute-image-dev, tag ]
|
||||
uses: ./.github/workflows/trigger-e2e-tests.yml
|
||||
with:
|
||||
github-event-name: ${{ github.event_name }}
|
||||
secrets: inherit
|
||||
|
||||
neon-image-arch:
|
||||
needs: [ check-permissions, build-build-tools-image, meta ]
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
needs: [ check-permissions, build-build-tools-image, tag ]
|
||||
strategy:
|
||||
matrix:
|
||||
arch: [ x64, arm64 ]
|
||||
@@ -523,7 +539,7 @@ jobs:
|
||||
build-args: |
|
||||
ADDITIONAL_RUSTFLAGS=${{ matrix.arch == 'arm64' && '-Ctarget-feature=+lse -Ctarget-cpu=neoverse-n1' || '' }}
|
||||
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
|
||||
BUILD_TAG=${{ needs.meta.outputs.build-tag }}
|
||||
BUILD_TAG=${{ needs.tag.outputs.build-tag }}
|
||||
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-bookworm
|
||||
DEBIAN_VERSION=bookworm
|
||||
provenance: false
|
||||
@@ -533,11 +549,10 @@ jobs:
|
||||
cache-from: type=registry,ref=cache.neon.build/neon:cache-bookworm-${{ matrix.arch }}
|
||||
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/neon:cache-{0}-{1},mode=max', 'bookworm', matrix.arch) || '' }}
|
||||
tags: |
|
||||
neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-${{ matrix.arch }}
|
||||
neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-${{ matrix.arch }}
|
||||
|
||||
neon-image:
|
||||
needs: [ neon-image-arch, meta ]
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
needs: [ neon-image-arch, tag ]
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
@@ -552,14 +567,13 @@ jobs:
|
||||
|
||||
- name: Create multi-arch image
|
||||
run: |
|
||||
docker buildx imagetools create -t neondatabase/neon:${{ needs.meta.outputs.build-tag }} \
|
||||
-t neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm \
|
||||
neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-x64 \
|
||||
neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-arm64
|
||||
docker buildx imagetools create -t neondatabase/neon:${{ needs.tag.outputs.build-tag }} \
|
||||
-t neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm \
|
||||
neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-x64 \
|
||||
neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-arm64
|
||||
|
||||
compute-node-image-arch:
|
||||
needs: [ check-permissions, build-build-tools-image, meta ]
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
needs: [ check-permissions, build-build-tools-image, tag ]
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
@@ -617,7 +631,7 @@ jobs:
|
||||
build-args: |
|
||||
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
|
||||
PG_VERSION=${{ matrix.version.pg }}
|
||||
BUILD_TAG=${{ needs.meta.outputs.build-tag }}
|
||||
BUILD_TAG=${{ needs.tag.outputs.build-tag }}
|
||||
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-${{ matrix.version.debian }}
|
||||
DEBIAN_VERSION=${{ matrix.version.debian }}
|
||||
provenance: false
|
||||
@@ -627,7 +641,7 @@ jobs:
|
||||
cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-node-{0}:cache-{1}-{2},mode=max', matrix.version.pg, matrix.version.debian, matrix.arch) || '' }}
|
||||
tags: |
|
||||
neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
|
||||
- name: Build neon extensions test image
|
||||
if: matrix.version.pg >= 'v16'
|
||||
@@ -637,7 +651,7 @@ jobs:
|
||||
build-args: |
|
||||
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
|
||||
PG_VERSION=${{ matrix.version.pg }}
|
||||
BUILD_TAG=${{ needs.meta.outputs.build-tag }}
|
||||
BUILD_TAG=${{ needs.tag.outputs.build-tag }}
|
||||
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-${{ matrix.version.debian }}
|
||||
DEBIAN_VERSION=${{ matrix.version.debian }}
|
||||
provenance: false
|
||||
@@ -647,11 +661,10 @@ jobs:
|
||||
target: extension-tests
|
||||
cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
tags: |
|
||||
neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.meta.outputs.build-tag}}-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.tag.outputs.build-tag}}-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
|
||||
compute-node-image:
|
||||
needs: [ compute-node-image-arch, meta ]
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
needs: [ compute-node-image-arch, tag ]
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
@@ -679,28 +692,27 @@ jobs:
|
||||
|
||||
- name: Create multi-arch compute-node image
|
||||
run: |
|
||||
docker buildx imagetools create -t neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \
|
||||
-t neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }} \
|
||||
neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
|
||||
neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-arm64
|
||||
docker buildx imagetools create -t neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \
|
||||
-t neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }} \
|
||||
neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
|
||||
neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-arm64
|
||||
|
||||
- name: Create multi-arch neon-test-extensions image
|
||||
if: matrix.version.pg >= 'v16'
|
||||
run: |
|
||||
docker buildx imagetools create -t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \
|
||||
-t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }} \
|
||||
neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
|
||||
neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-arm64
|
||||
docker buildx imagetools create -t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \
|
||||
-t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }} \
|
||||
neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
|
||||
neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-arm64
|
||||
|
||||
vm-compute-node-image-arch:
|
||||
needs: [ check-permissions, meta, compute-node-image ]
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||
vm-compute-node-image:
|
||||
needs: [ check-permissions, tag, compute-node-image ]
|
||||
runs-on: [ self-hosted, large ]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
arch: [ amd64, arm64 ]
|
||||
version:
|
||||
# see the comment for `compute-node-image-arch` job
|
||||
- pg: v14
|
||||
debian: bullseye
|
||||
- pg: v15
|
||||
@@ -710,14 +722,14 @@ jobs:
|
||||
- pg: v17
|
||||
debian: bookworm
|
||||
env:
|
||||
VM_BUILDER_VERSION: v0.42.2
|
||||
VM_BUILDER_VERSION: v0.37.1
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Downloading vm-builder
|
||||
run: |
|
||||
curl -fL https://github.com/neondatabase/autoscaling/releases/download/$VM_BUILDER_VERSION/vm-builder-${{ matrix.arch }} -o vm-builder
|
||||
curl -fL https://github.com/neondatabase/autoscaling/releases/download/$VM_BUILDER_VERSION/vm-builder -o vm-builder
|
||||
chmod +x vm-builder
|
||||
|
||||
- uses: neondatabase/dev-actions/set-docker-config-dir@6094485bf440001c94a94a3f9e221e81ff6b6193
|
||||
@@ -730,50 +742,22 @@ jobs:
|
||||
# it won't have the proper authentication (written at v0.6.0)
|
||||
- name: Pulling compute-node image
|
||||
run: |
|
||||
docker pull neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}
|
||||
docker pull neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}
|
||||
|
||||
- name: Build vm image
|
||||
run: |
|
||||
./vm-builder \
|
||||
-size=2G \
|
||||
-spec=compute/vm-image-spec-${{ matrix.version.debian }}.yaml \
|
||||
-src=neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \
|
||||
-dst=neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.arch }} \
|
||||
-target-arch=linux/${{ matrix.arch }}
|
||||
-src=neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \
|
||||
-dst=neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}
|
||||
|
||||
- name: Pushing vm-compute-node image
|
||||
run: |
|
||||
docker push neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.arch }}
|
||||
|
||||
vm-compute-node-image:
|
||||
needs: [ vm-compute-node-image-arch, meta ]
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
runs-on: ubuntu-22.04
|
||||
strategy:
|
||||
matrix:
|
||||
version:
|
||||
# see the comment for `compute-node-image-arch` job
|
||||
- pg: v14
|
||||
- pg: v15
|
||||
- pg: v16
|
||||
- pg: v17
|
||||
steps:
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
- name: Create multi-arch compute-node image
|
||||
run: |
|
||||
docker buildx imagetools create -t neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \
|
||||
neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-amd64 \
|
||||
neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-arm64
|
||||
|
||||
docker push neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}
|
||||
|
||||
test-images:
|
||||
needs: [ check-permissions, meta, neon-image, compute-node-image ]
|
||||
# Depends on jobs that can get skipped
|
||||
if: "!failure() && !cancelled()"
|
||||
needs: [ check-permissions, tag, neon-image, compute-node-image ]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -791,6 +775,17 @@ jobs:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
- name: Get the last compute release tag
|
||||
id: get-last-compute-release-tag
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
run: |
|
||||
tag=$(gh api -q '[.[].tag_name | select(startswith("release-compute"))][0]'\
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
"/repos/${{ github.repository }}/releases")
|
||||
echo tag=${tag} >> ${GITHUB_OUTPUT}
|
||||
|
||||
# `neondatabase/neon` contains multiple binaries, all of them use the same input for the version into the same version formatting library.
|
||||
# Pick pageserver as currently the only binary with extra "version" features printed in the string to verify.
|
||||
# Regular pageserver version string looks like
|
||||
@@ -800,9 +795,8 @@ jobs:
|
||||
# Ensure that we don't have bad versions.
|
||||
- name: Verify image versions
|
||||
shell: bash # ensure no set -e for better error messages
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
run: |
|
||||
pageserver_version=$(docker run --rm neondatabase/neon:${{ needs.meta.outputs.build-tag }} "/bin/sh" "-c" "/usr/local/bin/pageserver --version")
|
||||
pageserver_version=$(docker run --rm neondatabase/neon:${{ needs.tag.outputs.build-tag }} "/bin/sh" "-c" "/usr/local/bin/pageserver --version")
|
||||
|
||||
echo "Pageserver version string: $pageserver_version"
|
||||
|
||||
@@ -819,24 +813,7 @@ jobs:
|
||||
- name: Verify docker-compose example and test extensions
|
||||
timeout-minutes: 20
|
||||
env:
|
||||
TAG: >-
|
||||
${{
|
||||
contains(fromJSON('["compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind)
|
||||
&& needs.meta.outputs.previous-storage-release
|
||||
|| needs.meta.outputs.build-tag
|
||||
}}
|
||||
COMPUTE_TAG: >-
|
||||
${{
|
||||
contains(fromJSON('["storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind)
|
||||
&& needs.meta.outputs.previous-compute-release
|
||||
|| needs.meta.outputs.build-tag
|
||||
}}
|
||||
TEST_EXTENSIONS_TAG: >-
|
||||
${{
|
||||
contains(fromJSON('["storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind)
|
||||
&& 'latest'
|
||||
|| needs.meta.outputs.build-tag
|
||||
}}
|
||||
TAG: ${{needs.tag.outputs.build-tag}}
|
||||
TEST_VERSION_ONLY: ${{ matrix.pg_version }}
|
||||
run: ./docker-compose/docker_compose_test.sh
|
||||
|
||||
@@ -848,17 +825,10 @@ jobs:
|
||||
|
||||
- name: Test extension upgrade
|
||||
timeout-minutes: 20
|
||||
if: ${{ contains(fromJSON('["pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
if: ${{ needs.tag.outputs.build-tag == github.run_id }}
|
||||
env:
|
||||
TAG: >-
|
||||
${{
|
||||
false
|
||||
|| needs.meta.outputs.run-kind == 'pr' && needs.meta.outputs.build-tag
|
||||
|| needs.meta.outputs.run-kind == 'compute-rc-pr' && needs.meta.outputs.previous-storage-release
|
||||
}}
|
||||
TEST_EXTENSIONS_TAG: ${{ needs.meta.outputs.previous-compute-release }}
|
||||
NEW_COMPUTE_TAG: ${{ needs.meta.outputs.build-tag }}
|
||||
OLD_COMPUTE_TAG: ${{ needs.meta.outputs.previous-compute-release }}
|
||||
NEWTAG: ${{ needs.tag.outputs.build-tag }}
|
||||
OLDTAG: ${{ steps.get-last-compute-release-tag.outputs.tag }}
|
||||
run: ./docker-compose/test_extensions_upgrade.sh
|
||||
|
||||
- name: Print logs and clean up
|
||||
@@ -868,7 +838,7 @@ jobs:
|
||||
docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml down
|
||||
|
||||
generate-image-maps:
|
||||
needs: [ meta ]
|
||||
needs: [ tag ]
|
||||
runs-on: ubuntu-22.04
|
||||
outputs:
|
||||
neon-dev: ${{ steps.generate.outputs.neon-dev }}
|
||||
@@ -878,111 +848,101 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
sparse-checkout: .github/scripts/generate_image_maps.py
|
||||
sparse-checkout: scripts/generate_image_maps.py
|
||||
sparse-checkout-cone-mode: false
|
||||
|
||||
- name: Generate Image Maps
|
||||
id: generate
|
||||
run: python3 .github/scripts/generate_image_maps.py
|
||||
run: python scripts/generate_image_maps.py
|
||||
env:
|
||||
BUILD_TAG: "${{ needs.meta.outputs.build-tag }}"
|
||||
BUILD_TAG: "${{ needs.tag.outputs.build-tag }}"
|
||||
BRANCH: "${{ github.ref_name }}"
|
||||
DEV_ACR: "${{ vars.AZURE_DEV_REGISTRY_NAME }}"
|
||||
PROD_ACR: "${{ vars.AZURE_PROD_REGISTRY_NAME }}"
|
||||
DEV_AWS: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}"
|
||||
PROD_AWS: "${{ vars.NEON_PROD_AWS_ACCOUNT_ID }}"
|
||||
AWS_REGION: "${{ vars.AWS_ECR_REGION }}"
|
||||
|
||||
push-neon-image-dev:
|
||||
needs: [ meta, generate-image-maps, neon-image ]
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
needs: [ generate-image-maps, neon-image ]
|
||||
uses: ./.github/workflows/_push-to-container-registry.yml
|
||||
permissions:
|
||||
id-token: write # Required for aws/azure login
|
||||
packages: write # required for pushing to GHCR
|
||||
with:
|
||||
image-map: '${{ needs.generate-image-maps.outputs.neon-dev }}'
|
||||
aws-region: ${{ vars.AWS_ECR_REGION }}
|
||||
aws-account-id: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}"
|
||||
aws-role-to-assume: "gha-oidc-neon-admin"
|
||||
aws-region: eu-central-1
|
||||
aws-account-ids: "369495373322"
|
||||
azure-client-id: ${{ vars.AZURE_DEV_CLIENT_ID }}
|
||||
azure-subscription-id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
|
||||
azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
|
||||
acr-registry-name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}
|
||||
secrets: inherit
|
||||
secrets:
|
||||
aws-role-to-assume: "${{ vars.DEV_AWS_OIDC_ROLE_ARN }}"
|
||||
docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
push-compute-image-dev:
|
||||
needs: [ meta, generate-image-maps, vm-compute-node-image ]
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
needs: [ generate-image-maps, vm-compute-node-image ]
|
||||
uses: ./.github/workflows/_push-to-container-registry.yml
|
||||
permissions:
|
||||
id-token: write # Required for aws/azure login
|
||||
packages: write # required for pushing to GHCR
|
||||
with:
|
||||
image-map: '${{ needs.generate-image-maps.outputs.compute-dev }}'
|
||||
aws-region: ${{ vars.AWS_ECR_REGION }}
|
||||
aws-account-id: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}"
|
||||
aws-role-to-assume: "gha-oidc-neon-admin"
|
||||
aws-region: eu-central-1
|
||||
aws-account-ids: "369495373322"
|
||||
azure-client-id: ${{ vars.AZURE_DEV_CLIENT_ID }}
|
||||
azure-subscription-id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
|
||||
azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
|
||||
acr-registry-name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}
|
||||
secrets: inherit
|
||||
secrets:
|
||||
aws-role-to-assume: "${{ vars.DEV_AWS_OIDC_ROLE_ARN }}"
|
||||
docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
push-neon-image-prod:
|
||||
needs: [ meta, generate-image-maps, neon-image, test-images ]
|
||||
# Depends on jobs that can get skipped
|
||||
if: ${{ !failure() && !cancelled() && contains(fromJSON('["storage-release", "proxy-release"]'), needs.meta.outputs.run-kind) }}
|
||||
if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
|
||||
needs: [ generate-image-maps, neon-image, test-images ]
|
||||
uses: ./.github/workflows/_push-to-container-registry.yml
|
||||
permissions:
|
||||
id-token: write # Required for aws/azure login
|
||||
packages: write # required for pushing to GHCR
|
||||
with:
|
||||
image-map: '${{ needs.generate-image-maps.outputs.neon-prod }}'
|
||||
aws-region: ${{ vars.AWS_ECR_REGION }}
|
||||
aws-account-id: "${{ vars.NEON_PROD_AWS_ACCOUNT_ID }}"
|
||||
aws-role-to-assume: "gha-oidc-neon-admin"
|
||||
aws-region: eu-central-1
|
||||
aws-account-ids: "093970136003"
|
||||
azure-client-id: ${{ vars.AZURE_PROD_CLIENT_ID }}
|
||||
azure-subscription-id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }}
|
||||
azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
|
||||
acr-registry-name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}
|
||||
secrets: inherit
|
||||
secrets:
|
||||
aws-role-to-assume: "${{ secrets.PROD_GHA_OIDC_ROLE }}"
|
||||
docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
push-compute-image-prod:
|
||||
needs: [ meta, generate-image-maps, vm-compute-node-image, test-images ]
|
||||
# Depends on jobs that can get skipped
|
||||
if: ${{ !failure() && !cancelled() && needs.meta.outputs.run-kind == 'compute-release' }}
|
||||
if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
|
||||
needs: [ generate-image-maps, vm-compute-node-image, test-images ]
|
||||
uses: ./.github/workflows/_push-to-container-registry.yml
|
||||
permissions:
|
||||
id-token: write # Required for aws/azure login
|
||||
packages: write # required for pushing to GHCR
|
||||
with:
|
||||
image-map: '${{ needs.generate-image-maps.outputs.compute-prod }}'
|
||||
aws-region: ${{ vars.AWS_ECR_REGION }}
|
||||
aws-account-id: "${{ vars.NEON_PROD_AWS_ACCOUNT_ID }}"
|
||||
aws-role-to-assume: "gha-oidc-neon-admin"
|
||||
aws-region: eu-central-1
|
||||
aws-account-ids: "093970136003"
|
||||
azure-client-id: ${{ vars.AZURE_PROD_CLIENT_ID }}
|
||||
azure-subscription-id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }}
|
||||
azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
|
||||
acr-registry-name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}
|
||||
secrets: inherit
|
||||
secrets:
|
||||
aws-role-to-assume: "${{ secrets.PROD_GHA_OIDC_ROLE }}"
|
||||
docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
# This is a bit of a special case so we're not using a generated image map.
|
||||
add-latest-tag-to-neon-extensions-test-image:
|
||||
if: github.ref_name == 'main'
|
||||
needs: [ meta, compute-node-image ]
|
||||
needs: [ tag, compute-node-image ]
|
||||
uses: ./.github/workflows/_push-to-container-registry.yml
|
||||
with:
|
||||
image-map: |
|
||||
{
|
||||
"docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v16:latest"],
|
||||
"docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v17:latest"]
|
||||
"docker.io/neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v16:latest"],
|
||||
"docker.io/neondatabase/neon-test-extensions-v17:${{ needs.tag.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v17:latest"]
|
||||
}
|
||||
secrets: inherit
|
||||
secrets:
|
||||
docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
trigger-custom-extensions-build-and-wait:
|
||||
needs: [ check-permissions, meta ]
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
needs: [ check-permissions, tag ]
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
@@ -1017,7 +977,7 @@ jobs:
|
||||
\"ci_job_name\": \"build-and-upload-extensions\",
|
||||
\"commit_hash\": \"$COMMIT_SHA\",
|
||||
\"remote_repo\": \"${{ github.repository }}\",
|
||||
\"compute_image_tag\": \"${{ needs.meta.outputs.build-tag }}\",
|
||||
\"compute_image_tag\": \"${{ needs.tag.outputs.build-tag }}\",
|
||||
\"remote_branch_name\": \"${{ github.ref_name }}\"
|
||||
}
|
||||
}"
|
||||
@@ -1061,116 +1021,121 @@ jobs:
|
||||
exit 1
|
||||
|
||||
deploy:
|
||||
needs: [ check-permissions, push-neon-image-prod, push-compute-image-prod, meta, build-and-test-locally, trigger-custom-extensions-build-and-wait ]
|
||||
# `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-neon-image-prod` and `push-compute-image-prod`
|
||||
if: ${{ contains(fromJSON('["push-main", "storage-release", "proxy-release", "compute-release"]'), needs.meta.outputs.run-kind) && !failure() && !cancelled() }}
|
||||
needs: [ check-permissions, push-neon-image-prod, push-compute-image-prod, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait ]
|
||||
# `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod`
|
||||
if: (github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute') && !failure() && !cancelled()
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: write
|
||||
runs-on: [ self-hosted, small ]
|
||||
container: ${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/ansible:latest
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Create git tag and GitHub release
|
||||
if: ${{ contains(fromJSON('["storage-release", "proxy-release", "compute-release"]'), needs.meta.outputs.run-kind) }}
|
||||
if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
|
||||
uses: actions/github-script@v7
|
||||
env:
|
||||
TAG: "${{ needs.meta.outputs.build-tag }}"
|
||||
BRANCH: "${{ github.ref_name }}"
|
||||
PREVIOUS_RELEASE: >-
|
||||
${{
|
||||
false
|
||||
|| needs.meta.outputs.run-kind == 'storage-release' && needs.meta.outputs.previous-storage-release
|
||||
|| needs.meta.outputs.run-kind == 'proxy-release' && needs.meta.outputs.previous-proxy-release
|
||||
|| needs.meta.outputs.run-kind == 'compute-release' && needs.meta.outputs.previous-compute-release
|
||||
|| 'unknown'
|
||||
}}
|
||||
with:
|
||||
retries: 5
|
||||
script: |
|
||||
const { TAG, BRANCH, PREVIOUS_RELEASE } = process.env
|
||||
const tag = "${{ needs.tag.outputs.build-tag }}";
|
||||
const branch = "${{ github.ref_name }}";
|
||||
|
||||
try {
|
||||
const existingRef = await github.rest.git.getRef({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
ref: `tags/${TAG}`,
|
||||
ref: `tags/${tag}`,
|
||||
});
|
||||
|
||||
if (existingRef.data.object.sha !== context.sha) {
|
||||
throw new Error(`Tag ${TAG} already exists but points to a different commit (expected: ${context.sha}, actual: ${existingRef.data.object.sha}).`);
|
||||
throw new Error(`Tag ${tag} already exists but points to a different commit (expected: ${context.sha}, actual: ${existingRef.data.object.sha}).`);
|
||||
}
|
||||
|
||||
console.log(`Tag ${TAG} already exists and points to ${context.sha} as expected.`);
|
||||
console.log(`Tag ${tag} already exists and points to ${context.sha} as expected.`);
|
||||
} catch (error) {
|
||||
if (error.status !== 404) {
|
||||
throw error;
|
||||
}
|
||||
|
||||
console.log(`Tag ${TAG} does not exist. Creating it...`);
|
||||
console.log(`Tag ${tag} does not exist. Creating it...`);
|
||||
await github.rest.git.createRef({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
ref: `refs/tags/${TAG}`,
|
||||
ref: `refs/tags/${tag}`,
|
||||
sha: context.sha,
|
||||
});
|
||||
console.log(`Tag ${TAG} created successfully.`);
|
||||
console.log(`Tag ${tag} created successfully.`);
|
||||
}
|
||||
|
||||
try {
|
||||
const existingRelease = await github.rest.repos.getReleaseByTag({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
tag: TAG,
|
||||
tag: tag,
|
||||
});
|
||||
|
||||
console.log(`Release for tag ${TAG} already exists (ID: ${existingRelease.data.id}).`);
|
||||
console.log(`Release for tag ${tag} already exists (ID: ${existingRelease.data.id}).`);
|
||||
} catch (error) {
|
||||
if (error.status !== 404) {
|
||||
throw error;
|
||||
}
|
||||
|
||||
console.log(`Release for tag ${TAG} does not exist. Creating it...`);
|
||||
console.log(`Release for tag ${tag} does not exist. Creating it...`);
|
||||
|
||||
// Find the PR number using the commit SHA
|
||||
const pullRequests = await github.rest.pulls.list({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
state: 'closed',
|
||||
base: BRANCH,
|
||||
base: branch,
|
||||
});
|
||||
|
||||
const pr = pullRequests.data.find(pr => pr.merge_commit_sha === context.sha);
|
||||
const prNumber = pr ? pr.number : null;
|
||||
|
||||
// Find the previous release on the branch
|
||||
const releases = await github.rest.repos.listReleases({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
per_page: 100,
|
||||
});
|
||||
|
||||
const branchReleases = releases.data
|
||||
.filter((release) => {
|
||||
const regex = new RegExp(`^${branch}-\\d+$`);
|
||||
return regex.test(release.tag_name) && !release.draft && !release.prerelease;
|
||||
})
|
||||
.sort((a, b) => new Date(b.created_at) - new Date(a.created_at));
|
||||
|
||||
const previousTag = branchReleases.length > 0 ? branchReleases[0].tag_name : null;
|
||||
|
||||
const releaseNotes = [
|
||||
prNumber
|
||||
? `Release PR https://github.com/${context.repo.owner}/${context.repo.repo}/pull/${prNumber}.`
|
||||
: 'Release PR not found.',
|
||||
`Diff with the previous release https://github.com/${context.repo.owner}/${context.repo.repo}/compare/${PREVIOUS_RELEASE}...${TAG}.`
|
||||
previousTag
|
||||
? `Diff with the previous release https://github.com/${context.repo.owner}/${context.repo.repo}/compare/${previousTag}...${tag}.`
|
||||
: `No previous release found on branch ${branch}.`,
|
||||
].join('\n\n');
|
||||
|
||||
await github.rest.repos.createRelease({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
tag_name: TAG,
|
||||
tag_name: tag,
|
||||
body: releaseNotes,
|
||||
});
|
||||
console.log(`Release for tag ${TAG} created successfully.`);
|
||||
console.log(`Release for tag ${tag} created successfully.`);
|
||||
}
|
||||
|
||||
- name: Trigger deploy workflow
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
RUN_KIND: ${{ needs.meta.outputs.run-kind }}
|
||||
run: |
|
||||
case ${RUN_KIND} in
|
||||
push-main)
|
||||
gh workflow --repo neondatabase/infra run deploy-dev.yml --ref main -f branch=main -f dockerTag=${{needs.meta.outputs.build-tag}} -f deployPreprodRegion=false
|
||||
;;
|
||||
storage-release)
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
gh workflow --repo neondatabase/infra run deploy-dev.yml --ref main -f branch=main -f dockerTag=${{needs.tag.outputs.build-tag}} -f deployPreprodRegion=false
|
||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||
gh workflow --repo neondatabase/infra run deploy-dev.yml --ref main \
|
||||
-f deployPgSniRouter=false \
|
||||
-f deployProxy=false \
|
||||
@@ -1178,7 +1143,7 @@ jobs:
|
||||
-f deployStorageBroker=true \
|
||||
-f deployStorageController=true \
|
||||
-f branch=main \
|
||||
-f dockerTag=${{needs.meta.outputs.build-tag}} \
|
||||
-f dockerTag=${{needs.tag.outputs.build-tag}} \
|
||||
-f deployPreprodRegion=true
|
||||
|
||||
gh workflow --repo neondatabase/infra run deploy-prod.yml --ref main \
|
||||
@@ -1186,9 +1151,8 @@ jobs:
|
||||
-f deployStorageBroker=true \
|
||||
-f deployStorageController=true \
|
||||
-f branch=main \
|
||||
-f dockerTag=${{needs.meta.outputs.build-tag}}
|
||||
;;
|
||||
proxy-release)
|
||||
-f dockerTag=${{needs.tag.outputs.build-tag}}
|
||||
elif [[ "$GITHUB_REF_NAME" == "release-proxy" ]]; then
|
||||
gh workflow --repo neondatabase/infra run deploy-dev.yml --ref main \
|
||||
-f deployPgSniRouter=true \
|
||||
-f deployProxy=true \
|
||||
@@ -1196,7 +1160,7 @@ jobs:
|
||||
-f deployStorageBroker=false \
|
||||
-f deployStorageController=false \
|
||||
-f branch=main \
|
||||
-f dockerTag=${{needs.meta.outputs.build-tag}} \
|
||||
-f dockerTag=${{needs.tag.outputs.build-tag}} \
|
||||
-f deployPreprodRegion=true
|
||||
|
||||
gh workflow --repo neondatabase/infra run deploy-proxy-prod.yml --ref main \
|
||||
@@ -1206,32 +1170,13 @@ jobs:
|
||||
-f deployProxyScram=true \
|
||||
-f deployProxyAuthBroker=true \
|
||||
-f branch=main \
|
||||
-f dockerTag=${{needs.meta.outputs.build-tag}}
|
||||
;;
|
||||
compute-release)
|
||||
gh workflow --repo neondatabase/infra run deploy-compute-dev.yml --ref main -f dockerTag=${{needs.meta.outputs.build-tag}}
|
||||
;;
|
||||
*)
|
||||
echo "RUN_KIND (value '${RUN_KIND}') is not set to either 'push-main', 'storage-release', 'proxy-release' or 'compute-release'"
|
||||
-f dockerTag=${{needs.tag.outputs.build-tag}}
|
||||
elif [[ "$GITHUB_REF_NAME" == "release-compute" ]]; then
|
||||
gh workflow --repo neondatabase/infra run deploy-compute-dev.yml --ref main -f dockerTag=${{needs.tag.outputs.build-tag}}
|
||||
else
|
||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main', 'release', 'release-proxy' or 'release-compute'"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
notify-storage-release-deploy-failure:
|
||||
needs: [ deploy ]
|
||||
# We want this to run even if (transitive) dependencies are skipped, because deploy should really be successful on release branch workflow runs.
|
||||
if: github.ref_name == 'release' && needs.deploy.result != 'success' && always()
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Post release-deploy failure to team-storage slack channel
|
||||
uses: slackapi/slack-github-action@v2
|
||||
with:
|
||||
method: chat.postMessage
|
||||
token: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
payload: |
|
||||
channel: ${{ vars.SLACK_STORAGE_CHANNEL_ID }}
|
||||
text: |
|
||||
🔴 @oncall-storage: deploy job on release branch had unexpected status "${{ needs.deploy.result }}" <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>.
|
||||
fi
|
||||
|
||||
# The job runs on `release` branch and copies compatibility data and Neon artifact from the last *release PR* to the latest directory
|
||||
promote-compatibility-data:
|
||||
@@ -1240,7 +1185,7 @@ jobs:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: read
|
||||
# `!failure() && !cancelled()` is required because the workflow transitively depends on the job that can be skipped: `push-neon-image-prod` and `push-compute-image-prod`
|
||||
# `!failure() && !cancelled()` is required because the workflow transitively depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod`
|
||||
if: github.ref_name == 'release' && !failure() && !cancelled()
|
||||
|
||||
runs-on: ubuntu-22.04
|
||||
@@ -1329,9 +1274,8 @@ jobs:
|
||||
done
|
||||
|
||||
pin-build-tools-image:
|
||||
needs: [ build-build-tools-image, test-images, build-and-test-locally ]
|
||||
# `!failure() && !cancelled()` is required because the job (transitively) depends on jobs that can be skipped
|
||||
if: github.ref_name == 'main' && !failure() && !cancelled()
|
||||
needs: [ build-build-tools-image, push-compute-image-prod, push-neon-image-prod, build-and-test-locally ]
|
||||
if: github.ref_name == 'main'
|
||||
uses: ./.github/workflows/pin-build-tools-image.yml
|
||||
with:
|
||||
from-tag: ${{ needs.build-build-tools-image.outputs.image-tag }}
|
||||
@@ -1350,7 +1294,6 @@ jobs:
|
||||
# Format `needs` differently to make the list more readable.
|
||||
# Usually we do `needs: [...]`
|
||||
needs:
|
||||
- meta
|
||||
- build-and-test-locally
|
||||
- check-codestyle-python
|
||||
- check-codestyle-rust
|
||||
@@ -1374,7 +1317,7 @@ jobs:
|
||||
|| needs.check-codestyle-python.result == 'skipped'
|
||||
|| needs.check-codestyle-rust.result == 'skipped'
|
||||
|| needs.files-changed.result == 'skipped'
|
||||
|| (needs.push-compute-image-dev.result == 'skipped' && contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind))
|
||||
|| (needs.push-neon-image-dev.result == 'skipped' && contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind))
|
||||
|| needs.push-compute-image-dev.result == 'skipped'
|
||||
|| needs.push-neon-image-dev.result == 'skipped'
|
||||
|| needs.test-images.result == 'skipped'
|
||||
|| (needs.trigger-custom-extensions-build-and-wait.result == 'skipped' && contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind))
|
||||
|| needs.trigger-custom-extensions-build-and-wait.result == 'skipped'
|
||||
|
||||
@@ -27,7 +27,7 @@ env:
|
||||
jobs:
|
||||
tag:
|
||||
runs-on: [ self-hosted, small ]
|
||||
container: ${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/base:pinned
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
|
||||
outputs:
|
||||
build-tag: ${{steps.build-tag.outputs.tag}}
|
||||
|
||||
|
||||
3
.github/workflows/cloud-regress.yml
vendored
3
.github/workflows/cloud-regress.yml
vendored
@@ -38,9 +38,6 @@ jobs:
|
||||
runs-on: us-east-2
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
|
||||
@@ -52,9 +52,8 @@ jobs:
|
||||
- name: Test extension upgrade
|
||||
timeout-minutes: 20
|
||||
env:
|
||||
NEW_COMPUTE_TAG: latest
|
||||
OLD_COMPUTE_TAG: ${{ steps.get-last-compute-release-tag.outputs.tag }}
|
||||
TEST_EXTENSIONS_TAG: ${{ steps.get-last-compute-release-tag.outputs.tag }}
|
||||
NEWTAG: latest
|
||||
OLDTAG: ${{ steps.get-last-compute-release-tag.outputs.tag }}
|
||||
PG_VERSION: ${{ matrix.pg-version }}
|
||||
FORCE_ALL_UPGRADE_TESTS: true
|
||||
run: ./docker-compose/test_extensions_upgrade.sh
|
||||
|
||||
10
.github/workflows/ingest_benchmark.yml
vendored
10
.github/workflows/ingest_benchmark.yml
vendored
@@ -32,27 +32,18 @@ jobs:
|
||||
- target_project: new_empty_project_stripe_size_2048
|
||||
stripe_size: 2048 # 16 MiB
|
||||
postgres_version: 16
|
||||
disable_sharding: false
|
||||
- target_project: new_empty_project_stripe_size_32768
|
||||
stripe_size: 32768 # 256 MiB # note that this is different from null because using null will shard_split the project only if it reaches the threshold
|
||||
# while here it is sharded from the beginning with a shard size of 256 MiB
|
||||
disable_sharding: false
|
||||
postgres_version: 16
|
||||
- target_project: new_empty_project
|
||||
stripe_size: null # run with neon defaults which will shard split only when reaching the threshold
|
||||
disable_sharding: false
|
||||
postgres_version: 16
|
||||
- target_project: new_empty_project
|
||||
stripe_size: null # run with neon defaults which will shard split only when reaching the threshold
|
||||
disable_sharding: false
|
||||
postgres_version: 17
|
||||
- target_project: large_existing_project
|
||||
stripe_size: null # cannot re-shared or choose different stripe size for existing, already sharded project
|
||||
disable_sharding: false
|
||||
postgres_version: 16
|
||||
- target_project: new_empty_project_unsharded
|
||||
stripe_size: null # run with neon defaults which will shard split only when reaching the threshold
|
||||
disable_sharding: true
|
||||
postgres_version: 16
|
||||
max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results
|
||||
permissions:
|
||||
@@ -105,7 +96,6 @@ jobs:
|
||||
admin_api_key: ${{ secrets.NEON_STAGING_ADMIN_API_KEY }}
|
||||
shard_count: 8
|
||||
stripe_size: ${{ matrix.stripe_size }}
|
||||
disable_sharding: ${{ matrix.disable_sharding }}
|
||||
|
||||
- name: Initialize Neon project
|
||||
if: ${{ startsWith(matrix.target_project, 'new_empty_project') }}
|
||||
|
||||
147
.github/workflows/large_oltp_benchmark.yml
vendored
147
.github/workflows/large_oltp_benchmark.yml
vendored
@@ -1,147 +0,0 @@
|
||||
name: large oltp benchmark
|
||||
|
||||
on:
|
||||
# uncomment to run on push for debugging your PR
|
||||
push:
|
||||
branches: [ bodobolero/synthetic_oltp_workload ]
|
||||
|
||||
schedule:
|
||||
# * is a special character in YAML so you have to quote this string
|
||||
# ┌───────────── minute (0 - 59)
|
||||
# │ ┌───────────── hour (0 - 23)
|
||||
# │ │ ┌───────────── day of the month (1 - 31)
|
||||
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
|
||||
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
|
||||
- cron: '0 15 * * *' # run once a day, timezone is utc, avoid conflict with other benchmarks
|
||||
workflow_dispatch: # adds ability to run this manually
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
concurrency:
|
||||
# Allow only one workflow globally because we need dedicated resources which only exist once
|
||||
group: large-oltp-bench-workflow
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
oltp:
|
||||
strategy:
|
||||
fail-fast: false # allow other variants to continue even if one fails
|
||||
matrix:
|
||||
include:
|
||||
- target: new_branch
|
||||
custom_scripts: insert_webhooks.sql@2 select_any_webhook_with_skew.sql@4 select_recent_webhook.sql@4
|
||||
- target: reuse_branch
|
||||
custom_scripts: insert_webhooks.sql@2 select_any_webhook_with_skew.sql@4 select_recent_webhook.sql@4
|
||||
max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
env:
|
||||
TEST_PG_BENCH_DURATIONS_MATRIX: "1h" # todo update to > 1 h
|
||||
TEST_PGBENCH_CUSTOM_SCRIPTS: ${{ matrix.custom_scripts }}
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
PG_VERSION: 16 # pre-determined by pre-determined project
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
SAVE_PERF_REPORT: ${{ github.ref_name == 'main' }}
|
||||
PLATFORM: ${{ matrix.target }}
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
options: --init
|
||||
|
||||
# Increase timeout to 8h, default timeout is 6h
|
||||
timeout-minutes: 480
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Configure AWS credentials # necessary to download artefacts
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Create Neon Branch for large tenant
|
||||
if: ${{ matrix.target == 'new_branch' }}
|
||||
id: create-neon-branch-oltp-target
|
||||
uses: ./.github/actions/neon-branch-create
|
||||
with:
|
||||
project_id: ${{ vars.BENCHMARK_LARGE_OLTP_PROJECTID }}
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
|
||||
- name: Set up Connection String
|
||||
id: set-up-connstr
|
||||
run: |
|
||||
case "${{ matrix.target }}" in
|
||||
new_branch)
|
||||
CONNSTR=${{ steps.create-neon-branch-oltp-target.outputs.dsn }}
|
||||
;;
|
||||
reuse_branch)
|
||||
CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }}
|
||||
;;
|
||||
*)
|
||||
echo >&2 "Unknown target=${{ matrix.target }}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Benchmark pgbench with custom-scripts
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
test_selection: performance
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_perf_oltp_large_tenant
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
|
||||
- name: Delete Neon Branch for large tenant
|
||||
if: ${{ always() && matrix.target == 'new_branch' }}
|
||||
uses: ./.github/actions/neon-branch-delete
|
||||
with:
|
||||
project_id: ${{ vars.BENCHMARK_LARGE_OLTP_PROJECTID }}
|
||||
branch_id: ${{ steps.create-neon-branch-oltp-target.outputs.branch_id }}
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
|
||||
- name: Create Allure report
|
||||
id: create-allure-report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
|
||||
slack-message: |
|
||||
Periodic large oltp perf testing: ${{ job.status }}
|
||||
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
|
||||
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
2
.github/workflows/neon_extra_builds.yml
vendored
2
.github/workflows/neon_extra_builds.yml
vendored
@@ -71,7 +71,7 @@ jobs:
|
||||
uses: ./.github/workflows/build-macos.yml
|
||||
with:
|
||||
pg_versions: ${{ needs.files-changed.outputs.postgres_changes }}
|
||||
rebuild_rust_code: ${{ fromJson(needs.files-changed.outputs.rebuild_rust_code) }}
|
||||
rebuild_rust_code: ${{ needs.files-changed.outputs.rebuild_rust_code }}
|
||||
rebuild_everything: ${{ fromJson(needs.files-changed.outputs.rebuild_everything) }}
|
||||
|
||||
gather-rust-build-stats:
|
||||
|
||||
96
.github/workflows/pin-build-tools-image.yml
vendored
96
.github/workflows/pin-build-tools-image.yml
vendored
@@ -33,6 +33,10 @@ concurrency:
|
||||
# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
|
||||
permissions: {}
|
||||
|
||||
env:
|
||||
FROM_TAG: ${{ inputs.from-tag }}
|
||||
TO_TAG: pinned
|
||||
|
||||
jobs:
|
||||
check-manifests:
|
||||
runs-on: ubuntu-22.04
|
||||
@@ -42,14 +46,11 @@ jobs:
|
||||
steps:
|
||||
- name: Check if we really need to pin the image
|
||||
id: check-manifests
|
||||
env:
|
||||
FROM_TAG: ${{ inputs.from-tag }}
|
||||
TO_TAG: pinned
|
||||
run: |
|
||||
docker manifest inspect "docker.io/neondatabase/build-tools:${FROM_TAG}" > "${FROM_TAG}.json"
|
||||
docker manifest inspect "docker.io/neondatabase/build-tools:${TO_TAG}" > "${TO_TAG}.json"
|
||||
docker manifest inspect neondatabase/build-tools:${FROM_TAG} > ${FROM_TAG}.json
|
||||
docker manifest inspect neondatabase/build-tools:${TO_TAG} > ${TO_TAG}.json
|
||||
|
||||
if diff "${FROM_TAG}.json" "${TO_TAG}.json"; then
|
||||
if diff ${FROM_TAG}.json ${TO_TAG}.json; then
|
||||
skip=true
|
||||
else
|
||||
skip=false
|
||||
@@ -63,36 +64,55 @@ jobs:
|
||||
# use format(..) to catch both inputs.force = true AND inputs.force = 'true'
|
||||
if: needs.check-manifests.outputs.skip == 'false' || format('{0}', inputs.force) == 'true'
|
||||
|
||||
permissions:
|
||||
id-token: write # Required for aws/azure login
|
||||
packages: write # required for pushing to GHCR
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
uses: ./.github/workflows/_push-to-container-registry.yml
|
||||
with:
|
||||
image-map: |
|
||||
{
|
||||
"docker.io/neondatabase/build-tools:${{ inputs.from-tag }}-bullseye": [
|
||||
"docker.io/neondatabase/build-tools:pinned-bullseye",
|
||||
"ghcr.io/neondatabase/build-tools:pinned-bullseye",
|
||||
"${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/build-tools:pinned-bullseye",
|
||||
"${{ vars.AZURE_DEV_REGISTRY_NAME }}.azurecr.io/neondatabase/build-tools:pinned-bullseye"
|
||||
],
|
||||
"docker.io/neondatabase/build-tools:${{ inputs.from-tag }}-bookworm": [
|
||||
"docker.io/neondatabase/build-tools:pinned-bookworm",
|
||||
"docker.io/neondatabase/build-tools:pinned",
|
||||
"ghcr.io/neondatabase/build-tools:pinned-bookworm",
|
||||
"ghcr.io/neondatabase/build-tools:pinned",
|
||||
"${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/build-tools:pinned-bookworm",
|
||||
"${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/build-tools:pinned",
|
||||
"${{ vars.AZURE_DEV_REGISTRY_NAME }}.azurecr.io/neondatabase/build-tools:pinned-bookworm",
|
||||
"${{ vars.AZURE_DEV_REGISTRY_NAME }}.azurecr.io/neondatabase/build-tools:pinned"
|
||||
]
|
||||
}
|
||||
aws-region: ${{ vars.AWS_ECR_REGION }}
|
||||
aws-account-id: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}"
|
||||
aws-role-to-assume: "gha-oidc-neon-admin"
|
||||
azure-client-id: ${{ vars.AZURE_DEV_CLIENT_ID }}
|
||||
azure-subscription-id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
|
||||
azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
|
||||
acr-registry-name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}
|
||||
secrets: inherit
|
||||
permissions:
|
||||
id-token: write # for `azure/login` and aws auth
|
||||
|
||||
steps:
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 3600
|
||||
|
||||
- name: Login to Amazon Dev ECR
|
||||
uses: aws-actions/amazon-ecr-login@v2
|
||||
|
||||
- name: Azure login
|
||||
uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a # @v2.1.1
|
||||
with:
|
||||
client-id: ${{ secrets.AZURE_DEV_CLIENT_ID }}
|
||||
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
|
||||
subscription-id: ${{ secrets.AZURE_DEV_SUBSCRIPTION_ID }}
|
||||
|
||||
- name: Login to ACR
|
||||
run: |
|
||||
az acr login --name=neoneastus2
|
||||
|
||||
- name: Tag build-tools with `${{ env.TO_TAG }}` in Docker Hub, ECR, and ACR
|
||||
env:
|
||||
DEFAULT_DEBIAN_VERSION: bookworm
|
||||
run: |
|
||||
for debian_version in bullseye bookworm; do
|
||||
tags=()
|
||||
|
||||
tags+=("-t" "neondatabase/build-tools:${TO_TAG}-${debian_version}")
|
||||
tags+=("-t" "369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:${TO_TAG}-${debian_version}")
|
||||
tags+=("-t" "neoneastus2.azurecr.io/neondatabase/build-tools:${TO_TAG}-${debian_version}")
|
||||
|
||||
if [ "${debian_version}" == "${DEFAULT_DEBIAN_VERSION}" ]; then
|
||||
tags+=("-t" "neondatabase/build-tools:${TO_TAG}")
|
||||
tags+=("-t" "369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:${TO_TAG}")
|
||||
tags+=("-t" "neoneastus2.azurecr.io/neondatabase/build-tools:${TO_TAG}")
|
||||
fi
|
||||
|
||||
docker buildx imagetools create "${tags[@]}" \
|
||||
neondatabase/build-tools:${FROM_TAG}-${debian_version}
|
||||
done
|
||||
|
||||
59
.github/workflows/trigger-e2e-tests.yml
vendored
59
.github/workflows/trigger-e2e-tests.yml
vendored
@@ -5,10 +5,6 @@ on:
|
||||
types:
|
||||
- ready_for_review
|
||||
workflow_call:
|
||||
inputs:
|
||||
github-event-name:
|
||||
type: string
|
||||
required: true
|
||||
|
||||
defaults:
|
||||
run:
|
||||
@@ -23,7 +19,7 @@ jobs:
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'run-no-ci') }}
|
||||
uses: ./.github/workflows/check-permissions.yml
|
||||
with:
|
||||
github-event-name: ${{ inputs.github-event-name || github.event_name }}
|
||||
github-event-name: ${{ github.event_name }}
|
||||
|
||||
cancel-previous-e2e-tests:
|
||||
needs: [ check-permissions ]
|
||||
@@ -39,29 +35,46 @@ jobs:
|
||||
run cancel-previous-in-concurrency-group.yml \
|
||||
--field concurrency_group="${{ env.E2E_CONCURRENCY_GROUP }}"
|
||||
|
||||
meta:
|
||||
uses: ./.github/workflows/_meta.yml
|
||||
with:
|
||||
github-event-name: ${{ inputs.github-event-name || github.event_name }}
|
||||
tag:
|
||||
needs: [ check-permissions ]
|
||||
runs-on: ubuntu-22.04
|
||||
outputs:
|
||||
build-tag: ${{ steps.build-tag.outputs.tag }}
|
||||
|
||||
steps:
|
||||
# Need `fetch-depth: 0` to count the number of commits in the branch
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get build tag
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
CURRENT_BRANCH: ${{ github.head_ref || github.ref_name }}
|
||||
CURRENT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
run: |
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
echo "tag=$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
|
||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||
echo "tag=release-$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
|
||||
elif [[ "$GITHUB_REF_NAME" == "release-proxy" ]]; then
|
||||
echo "tag=release-proxy-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
|
||||
elif [[ "$GITHUB_REF_NAME" == "release-compute" ]]; then
|
||||
echo "tag=release-compute-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
|
||||
BUILD_AND_TEST_RUN_ID=$(gh run list -b $CURRENT_BRANCH -c $CURRENT_SHA -w 'Build and Test' -L 1 --json databaseId --jq '.[].databaseId')
|
||||
echo "tag=$BUILD_AND_TEST_RUN_ID" | tee -a $GITHUB_OUTPUT
|
||||
fi
|
||||
id: build-tag
|
||||
|
||||
trigger-e2e-tests:
|
||||
needs: [ meta ]
|
||||
needs: [ tag ]
|
||||
runs-on: ubuntu-22.04
|
||||
env:
|
||||
EVENT_ACTION: ${{ github.event.action }}
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
TAG: >-
|
||||
${{
|
||||
contains(fromJSON('["compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind)
|
||||
&& needs.meta.outputs.previous-storage-release
|
||||
|| needs.meta.outputs.build-tag
|
||||
}}
|
||||
COMPUTE_TAG: >-
|
||||
${{
|
||||
contains(fromJSON('["storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind)
|
||||
&& needs.meta.outputs.previous-compute-release
|
||||
|| needs.meta.outputs.build-tag
|
||||
}}
|
||||
TAG: ${{ needs.tag.outputs.build-tag }}
|
||||
steps:
|
||||
- name: Wait for `push-{neon,compute}-image-dev` job to finish
|
||||
# It's important to have a timeout here, the script in the step can run infinitely
|
||||
@@ -144,6 +157,6 @@ jobs:
|
||||
--raw-field "commit_hash=$COMMIT_SHA" \
|
||||
--raw-field "remote_repo=${GITHUB_REPOSITORY}" \
|
||||
--raw-field "storage_image_tag=${TAG}" \
|
||||
--raw-field "compute_image_tag=${COMPUTE_TAG}" \
|
||||
--raw-field "compute_image_tag=${TAG}" \
|
||||
--raw-field "concurrency_group=${E2E_CONCURRENCY_GROUP}" \
|
||||
--raw-field "e2e-platforms=${E2E_PLATFORMS}"
|
||||
|
||||
162
Cargo.lock
generated
162
Cargo.lock
generated
@@ -984,9 +984,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "bindgen"
|
||||
version = "0.71.1"
|
||||
version = "0.70.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3"
|
||||
checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f"
|
||||
dependencies = [
|
||||
"bitflags 2.8.0",
|
||||
"cexpr",
|
||||
@@ -997,7 +997,7 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex",
|
||||
"rustc-hash 2.1.1",
|
||||
"rustc-hash",
|
||||
"shlex",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
@@ -1316,6 +1316,7 @@ dependencies = [
|
||||
"flate2",
|
||||
"futures",
|
||||
"http 1.1.0",
|
||||
"jsonwebtoken",
|
||||
"metrics",
|
||||
"nix 0.27.1",
|
||||
"notify",
|
||||
@@ -1325,6 +1326,7 @@ dependencies = [
|
||||
"opentelemetry_sdk",
|
||||
"postgres",
|
||||
"postgres_initdb",
|
||||
"prometheus",
|
||||
"regex",
|
||||
"remote_storage",
|
||||
"reqwest",
|
||||
@@ -1342,7 +1344,6 @@ dependencies = [
|
||||
"tokio-util",
|
||||
"tower 0.5.2",
|
||||
"tower-http",
|
||||
"tower-otel",
|
||||
"tracing",
|
||||
"tracing-opentelemetry",
|
||||
"tracing-subscriber",
|
||||
@@ -1548,17 +1549,6 @@ dependencies = [
|
||||
"itertools 0.10.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cron"
|
||||
version = "0.15.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5877d3fbf742507b66bc2a1945106bd30dd8504019d596901ddd012a4dd01740"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"once_cell",
|
||||
"winnow",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.8"
|
||||
@@ -1887,12 +1877,6 @@ dependencies = [
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "difflib"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8"
|
||||
|
||||
[[package]]
|
||||
name = "digest"
|
||||
version = "0.10.7"
|
||||
@@ -2410,9 +2394,9 @@ checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
|
||||
|
||||
[[package]]
|
||||
name = "futures-timer"
|
||||
version = "3.0.3"
|
||||
version = "3.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24"
|
||||
checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c"
|
||||
|
||||
[[package]]
|
||||
name = "futures-util"
|
||||
@@ -2515,27 +2499,6 @@ version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
||||
|
||||
[[package]]
|
||||
name = "governor"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "842dc78579ce01e6a1576ad896edc92fca002dd60c9c3746b7fc2bec6fb429d0"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"dashmap 6.1.0",
|
||||
"futures-sink",
|
||||
"futures-timer",
|
||||
"futures-util",
|
||||
"no-std-compat",
|
||||
"nonzero_ext",
|
||||
"parking_lot 0.12.1",
|
||||
"portable-atomic",
|
||||
"quanta",
|
||||
"rand 0.8.5",
|
||||
"smallvec",
|
||||
"spinning_top",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "group"
|
||||
version = "0.12.1"
|
||||
@@ -3371,17 +3334,6 @@ dependencies = [
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "json-structural-diff"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e878e36a8a44c158505c2c818abdc1350413ad83dcb774a0459f6a7ef2b65cbf"
|
||||
dependencies = [
|
||||
"difflib",
|
||||
"regex",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jsonwebtoken"
|
||||
version = "9.2.0"
|
||||
@@ -3558,7 +3510,7 @@ dependencies = [
|
||||
"measured-derive",
|
||||
"memchr",
|
||||
"parking_lot 0.12.1",
|
||||
"rustc-hash 1.1.0",
|
||||
"rustc-hash",
|
||||
"ryu",
|
||||
]
|
||||
|
||||
@@ -3746,12 +3698,6 @@ dependencies = [
|
||||
"memoffset 0.9.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "no-std-compat"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b93853da6d84c2e3c7d730d6473e8817692dd89be387eb01b94d7f108ecb5b8c"
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "7.1.3"
|
||||
@@ -3762,12 +3708,6 @@ dependencies = [
|
||||
"minimal-lexical",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nonzero_ext"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21"
|
||||
|
||||
[[package]]
|
||||
name = "notify"
|
||||
version = "8.0.0"
|
||||
@@ -4218,6 +4158,7 @@ dependencies = [
|
||||
"pageserver_client",
|
||||
"pageserver_compaction",
|
||||
"pin-project-lite",
|
||||
"postgres",
|
||||
"postgres-protocol",
|
||||
"postgres-types",
|
||||
"postgres_backend",
|
||||
@@ -4304,6 +4245,7 @@ dependencies = [
|
||||
"futures",
|
||||
"http-utils",
|
||||
"pageserver_api",
|
||||
"postgres",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"thiserror 1.0.69",
|
||||
@@ -4519,18 +4461,18 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pin-project"
|
||||
version = "1.1.9"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dfe2e71e1471fe07709406bf725f710b02927c9c54b2b5b2ec0e8087d97c327d"
|
||||
checksum = "c95a7476719eab1e366eaf73d0260af3021184f18177925b07f54b30089ceead"
|
||||
dependencies = [
|
||||
"pin-project-internal",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project-internal"
|
||||
version = "1.1.9"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f6e859e6e5bd50440ab63c47e3ebabc90f26251f7c73c3d3e837b74a1cc3fa67"
|
||||
checksum = "39407670928234ebc5e6e580247dd567ad73a3578460c5990f9503df207e8f07"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -4624,12 +4566,6 @@ dependencies = [
|
||||
"never-say-never",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "portable-atomic"
|
||||
version = "1.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6"
|
||||
|
||||
[[package]]
|
||||
name = "postgres"
|
||||
version = "0.19.7"
|
||||
@@ -4724,6 +4660,7 @@ dependencies = [
|
||||
"anyhow",
|
||||
"itertools 0.10.5",
|
||||
"once_cell",
|
||||
"postgres",
|
||||
"tokio-postgres",
|
||||
"url",
|
||||
]
|
||||
@@ -5051,7 +4988,7 @@ dependencies = [
|
||||
"reqwest-tracing",
|
||||
"rsa",
|
||||
"rstest",
|
||||
"rustc-hash 1.1.0",
|
||||
"rustc-hash",
|
||||
"rustls 0.23.18",
|
||||
"rustls-native-certs 0.8.0",
|
||||
"rustls-pemfile 2.1.1",
|
||||
@@ -5091,21 +5028,6 @@ dependencies = [
|
||||
"zerocopy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quanta"
|
||||
version = "0.12.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3bd1fe6824cea6538803de3ff1bc0cf3949024db3d43c9643024bfb33a807c0e"
|
||||
dependencies = [
|
||||
"crossbeam-utils",
|
||||
"libc",
|
||||
"once_cell",
|
||||
"raw-cpuid",
|
||||
"wasi 0.11.0+wasi-snapshot-preview1",
|
||||
"web-sys",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.26.0"
|
||||
@@ -5236,15 +5158,6 @@ dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "raw-cpuid"
|
||||
version = "11.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c6928fa44c097620b706542d428957635951bade7143269085389d42c8a4927e"
|
||||
dependencies = [
|
||||
"bitflags 2.8.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon"
|
||||
version = "1.7.0"
|
||||
@@ -5693,12 +5606,6 @@ version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-hash"
|
||||
version = "2.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
|
||||
|
||||
[[package]]
|
||||
name = "rustc_version"
|
||||
version = "0.4.0"
|
||||
@@ -5895,6 +5802,7 @@ dependencies = [
|
||||
"once_cell",
|
||||
"pageserver_api",
|
||||
"parking_lot 0.12.1",
|
||||
"postgres",
|
||||
"postgres-protocol",
|
||||
"postgres_backend",
|
||||
"postgres_ffi",
|
||||
@@ -6458,15 +6366,6 @@ version = "0.9.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
|
||||
|
||||
[[package]]
|
||||
name = "spinning_top"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d96d2d1d716fb500937168cc09353ffdc7a012be8475ac7308e1bdf0e3923300"
|
||||
dependencies = [
|
||||
"lock_api",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "spki"
|
||||
version = "0.6.0"
|
||||
@@ -6537,19 +6436,16 @@ dependencies = [
|
||||
"chrono",
|
||||
"clap",
|
||||
"control_plane",
|
||||
"cron",
|
||||
"diesel",
|
||||
"diesel-async",
|
||||
"diesel_migrations",
|
||||
"fail",
|
||||
"futures",
|
||||
"governor",
|
||||
"hex",
|
||||
"http-utils",
|
||||
"humantime",
|
||||
"hyper 0.14.30",
|
||||
"itertools 0.10.5",
|
||||
"json-structural-diff",
|
||||
"lasso",
|
||||
"measured",
|
||||
"metrics",
|
||||
@@ -6572,7 +6468,6 @@ dependencies = [
|
||||
"strum",
|
||||
"strum_macros",
|
||||
"thiserror 1.0.69",
|
||||
"tikv-jemallocator",
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tokio-postgres-rustls",
|
||||
@@ -7126,11 +7021,14 @@ dependencies = [
|
||||
name = "tokio-postgres2"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
"futures-util",
|
||||
"log",
|
||||
"parking_lot 0.12.1",
|
||||
"percent-encoding",
|
||||
"phf",
|
||||
"pin-project-lite",
|
||||
"postgres-protocol2",
|
||||
@@ -7375,20 +7273,6 @@ version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
|
||||
|
||||
[[package]]
|
||||
name = "tower-otel"
|
||||
version = "0.2.0"
|
||||
source = "git+https://github.com/mattiapenati/tower-otel?rev=56a7321053bcb72443888257b622ba0d43a11fcd#56a7321053bcb72443888257b622ba0d43a11fcd"
|
||||
dependencies = [
|
||||
"http 1.1.0",
|
||||
"opentelemetry",
|
||||
"pin-project",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
"tracing-opentelemetry",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tower-service"
|
||||
version = "0.3.3"
|
||||
@@ -7731,13 +7615,13 @@ dependencies = [
|
||||
"hex",
|
||||
"hex-literal",
|
||||
"humantime",
|
||||
"inferno 0.12.0",
|
||||
"jsonwebtoken",
|
||||
"metrics",
|
||||
"nix 0.27.1",
|
||||
"once_cell",
|
||||
"pin-project-lite",
|
||||
"postgres_connection",
|
||||
"pprof",
|
||||
"pq_proto",
|
||||
"rand 0.8.5",
|
||||
"regex",
|
||||
@@ -8245,9 +8129,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
||||
|
||||
[[package]]
|
||||
name = "winnow"
|
||||
version = "0.6.26"
|
||||
version = "0.6.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e90edd2ac1aa278a5c4599b1d89cf03074b610800f866d4026dc199d7929a28"
|
||||
checksum = "59b5e5f6c299a3c7890b876a2a587f3115162487e704907d9b6cd29473052ba1"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
11
Cargo.toml
11
Cargo.toml
@@ -43,7 +43,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
edition = "2024"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
## All dependency versions, used in the project
|
||||
@@ -70,14 +70,13 @@ aws-types = "1.3"
|
||||
axum = { version = "0.8.1", features = ["ws"] }
|
||||
base64 = "0.13.0"
|
||||
bincode = "1.3"
|
||||
bindgen = "0.71"
|
||||
bindgen = "0.70"
|
||||
bit_field = "0.10.2"
|
||||
bstr = "1.0"
|
||||
byteorder = "1.4"
|
||||
bytes = "1.9"
|
||||
camino = "1.1.6"
|
||||
cfg-if = "1.0.0"
|
||||
cron = "0.15"
|
||||
chrono = { version = "0.4", default-features = false, features = ["clock"] }
|
||||
clap = { version = "4.0", features = ["derive", "env"] }
|
||||
clashmap = { version = "1.0", features = ["raw-api"] }
|
||||
@@ -95,7 +94,6 @@ futures = "0.3"
|
||||
futures-core = "0.3"
|
||||
futures-util = "0.3"
|
||||
git-version = "0.3"
|
||||
governor = "0.8"
|
||||
hashbrown = "0.14"
|
||||
hashlink = "0.9.1"
|
||||
hdrhistogram = "7.5.2"
|
||||
@@ -194,10 +192,6 @@ toml_edit = "0.22"
|
||||
tonic = {version = "0.12.3", default-features = false, features = ["channel", "tls", "tls-roots"]}
|
||||
tower = { version = "0.5.2", default-features = false }
|
||||
tower-http = { version = "0.6.2", features = ["request-id", "trace"] }
|
||||
|
||||
# This revision uses opentelemetry 0.27. There's no tag for it.
|
||||
tower-otel = { git = "https://github.com/mattiapenati/tower-otel", rev = "56a7321053bcb72443888257b622ba0d43a11fcd" }
|
||||
|
||||
tower-service = "0.3.3"
|
||||
tracing = "0.1"
|
||||
tracing-error = "0.2"
|
||||
@@ -216,7 +210,6 @@ rustls-native-certs = "0.8"
|
||||
x509-parser = "0.16"
|
||||
whoami = "1.5.1"
|
||||
zerocopy = { version = "0.7", features = ["derive"] }
|
||||
json-structural-diff = { version = "0.2.0" }
|
||||
|
||||
## TODO replace this with tracing
|
||||
env_logger = "0.10"
|
||||
|
||||
7
Makefile
7
Makefile
@@ -11,16 +11,15 @@ ICU_PREFIX_DIR := /usr/local/icu
|
||||
#
|
||||
BUILD_TYPE ?= debug
|
||||
WITH_SANITIZERS ?= no
|
||||
PG_CFLAGS = -fsigned-char
|
||||
ifeq ($(BUILD_TYPE),release)
|
||||
PG_CONFIGURE_OPTS = --enable-debug --with-openssl
|
||||
PG_CFLAGS += -O2 -g3 $(CFLAGS)
|
||||
PG_CFLAGS = -O2 -g3 $(CFLAGS)
|
||||
PG_LDFLAGS = $(LDFLAGS)
|
||||
# Unfortunately, `--profile=...` is a nightly feature
|
||||
CARGO_BUILD_FLAGS += --release
|
||||
else ifeq ($(BUILD_TYPE),debug)
|
||||
PG_CONFIGURE_OPTS = --enable-debug --with-openssl --enable-cassert --enable-depend
|
||||
PG_CFLAGS += -O0 -g3 $(CFLAGS)
|
||||
PG_CFLAGS = -O0 -g3 $(CFLAGS)
|
||||
PG_LDFLAGS = $(LDFLAGS)
|
||||
else
|
||||
$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
|
||||
@@ -160,8 +159,6 @@ postgres-%: postgres-configure-% \
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_visibility install
|
||||
+@echo "Compiling pageinspect $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect install
|
||||
+@echo "Compiling pg_trgm $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_trgm install
|
||||
+@echo "Compiling amcheck $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/amcheck install
|
||||
+@echo "Compiling test_decoding $*"
|
||||
|
||||
@@ -292,7 +292,7 @@ WORKDIR /home/nonroot
|
||||
|
||||
# Rust
|
||||
# Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
|
||||
ENV RUSTC_VERSION=1.85.0
|
||||
ENV RUSTC_VERSION=1.84.1
|
||||
ENV RUSTUP_HOME="/home/nonroot/.rustup"
|
||||
ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
|
||||
ARG RUSTFILT_VERSION=0.2.1
|
||||
|
||||
@@ -162,7 +162,7 @@ FROM build-deps AS pg-build
|
||||
ARG PG_VERSION
|
||||
COPY vendor/postgres-${PG_VERSION:?} postgres
|
||||
RUN cd postgres && \
|
||||
export CONFIGURE_CMD="./configure CFLAGS='-O2 -g3 -fsigned-char' --enable-debug --with-openssl --with-uuid=ossp \
|
||||
export CONFIGURE_CMD="./configure CFLAGS='-O2 -g3' --enable-debug --with-openssl --with-uuid=ossp \
|
||||
--with-icu --with-libxml --with-libxslt --with-lz4" && \
|
||||
if [ "${PG_VERSION:?}" != "v14" ]; then \
|
||||
# zstd is available only from PG15
|
||||
@@ -395,22 +395,15 @@ RUN case "${PG_VERSION:?}" in \
|
||||
cd plv8-src && \
|
||||
if [[ "${PG_VERSION:?}" < "v17" ]]; then patch -p1 < /ext-src/plv8-3.1.10.patch; fi
|
||||
|
||||
# Step 1: Build the vendored V8 engine. It doesn't depend on PostgreSQL, so use
|
||||
# 'build-deps' as the base. This enables caching and avoids unnecessary rebuilds.
|
||||
# (The V8 engine takes a very long time to build)
|
||||
FROM build-deps AS plv8-build
|
||||
FROM pg-build AS plv8-build
|
||||
ARG PG_VERSION
|
||||
WORKDIR /ext-src/plv8-src
|
||||
RUN apt update && \
|
||||
apt install --no-install-recommends --no-install-suggests -y \
|
||||
ninja-build python3-dev libncurses5 binutils clang \
|
||||
&& apt clean && rm -rf /var/lib/apt/lists/*
|
||||
COPY --from=plv8-src /ext-src/ /ext-src/
|
||||
RUN make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) v8
|
||||
|
||||
# Step 2: Build the PostgreSQL-dependent parts
|
||||
COPY --from=pg-build /usr/local/pgsql /usr/local/pgsql
|
||||
ENV PATH="/usr/local/pgsql/bin:$PATH"
|
||||
COPY --from=plv8-src /ext-src/ /ext-src/
|
||||
WORKDIR /ext-src/plv8-src
|
||||
RUN \
|
||||
# generate and copy upgrade scripts
|
||||
make generate_upgrades && \
|
||||
@@ -1458,11 +1451,9 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
FROM build-deps AS pg_mooncake-src
|
||||
ARG PG_VERSION
|
||||
WORKDIR /ext-src
|
||||
COPY compute/patches/duckdb_v113.patch .
|
||||
RUN wget https://github.com/Mooncake-Labs/pg_mooncake/releases/download/v0.1.2/pg_mooncake-0.1.2.tar.gz -O pg_mooncake.tar.gz && \
|
||||
echo "4550473784fcdd2e1e18062bc01eb9c286abd27cdf5e11a4399be6c0a426ba90 pg_mooncake.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_mooncake-src && cd pg_mooncake-src && tar xzf ../pg_mooncake.tar.gz --strip-components=1 -C . && \
|
||||
cd third_party/duckdb && patch -p1 < /ext-src/duckdb_v113.patch && cd ../.. && \
|
||||
echo "make -f pg_mooncake-src/Makefile.build installcheck TEST_DIR=./test SQL_DIR=./sql SRC_DIR=./src" > neon-test.sh && \
|
||||
chmod a+x neon-test.sh
|
||||
|
||||
@@ -1482,7 +1473,6 @@ RUN make release -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
FROM build-deps AS pg_duckdb-src
|
||||
WORKDIR /ext-src
|
||||
COPY compute/patches/pg_duckdb_v031.patch .
|
||||
COPY compute/patches/duckdb_v120.patch .
|
||||
# pg_duckdb build requires source dir to be a git repo to get submodules
|
||||
# allow neon_superuser to execute some functions that in pg_duckdb are available to superuser only:
|
||||
# - extension management function duckdb.install_extension()
|
||||
@@ -1490,9 +1480,7 @@ COPY compute/patches/duckdb_v120.patch .
|
||||
RUN git clone --depth 1 --branch v0.3.1 https://github.com/duckdb/pg_duckdb.git pg_duckdb-src && \
|
||||
cd pg_duckdb-src && \
|
||||
git submodule update --init --recursive && \
|
||||
patch -p1 < /ext-src/pg_duckdb_v031.patch && \
|
||||
cd third_party/duckdb && \
|
||||
patch -p1 < /ext-src/duckdb_v120.patch
|
||||
patch -p1 < /ext-src/pg_duckdb_v031.patch
|
||||
|
||||
FROM pg-build AS pg_duckdb-build
|
||||
ARG PG_VERSION
|
||||
@@ -1521,73 +1509,6 @@ WORKDIR /ext-src/pg_repack-src
|
||||
RUN make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install
|
||||
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pgaudit"
|
||||
# compile pgaudit extension
|
||||
#
|
||||
#########################################################################################
|
||||
|
||||
FROM build-deps AS pgaudit-src
|
||||
ARG PG_VERSION
|
||||
WORKDIR /ext-src
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v14") \
|
||||
export PGAUDIT_VERSION=1.6.2 \
|
||||
export PGAUDIT_CHECKSUM=1f350d70a0cbf488c0f2b485e3a5c9b11f78ad9e3cbb95ef6904afa1eb3187eb \
|
||||
;; \
|
||||
"v15") \
|
||||
export PGAUDIT_VERSION=1.7.0 \
|
||||
export PGAUDIT_CHECKSUM=8f4a73e451c88c567e516e6cba7dc1e23bc91686bb6f1f77f8f3126d428a8bd8 \
|
||||
;; \
|
||||
"v16") \
|
||||
export PGAUDIT_VERSION=16.0 \
|
||||
export PGAUDIT_CHECKSUM=d53ef985f2d0b15ba25c512c4ce967dce07b94fd4422c95bd04c4c1a055fe738 \
|
||||
;; \
|
||||
"v17") \
|
||||
export PGAUDIT_VERSION=17.0 \
|
||||
export PGAUDIT_CHECKSUM=7d0d08d030275d525f36cd48b38c6455f1023da863385badff0cec44965bfd8c \
|
||||
;; \
|
||||
*) \
|
||||
echo "pgaudit is not supported on this PostgreSQL version" && exit 1;; \
|
||||
esac && \
|
||||
wget https://github.com/pgaudit/pgaudit/archive/refs/tags/${PGAUDIT_VERSION}.tar.gz -O pgaudit.tar.gz && \
|
||||
echo "${PGAUDIT_CHECKSUM} pgaudit.tar.gz" | sha256sum --check && \
|
||||
mkdir pgaudit-src && cd pgaudit-src && tar xzf ../pgaudit.tar.gz --strip-components=1 -C .
|
||||
|
||||
FROM pg-build AS pgaudit-build
|
||||
COPY --from=pgaudit-src /ext-src/ /ext-src/
|
||||
WORKDIR /ext-src/pgaudit-src
|
||||
RUN make install USE_PGXS=1 -j $(getconf _NPROCESSORS_ONLN)
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pgauditlogtofile"
|
||||
# compile pgauditlogtofile extension
|
||||
#
|
||||
#########################################################################################
|
||||
|
||||
FROM build-deps AS pgauditlogtofile-src
|
||||
ARG PG_VERSION
|
||||
WORKDIR /ext-src
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v14" | "v15" | "v16" | "v17") \
|
||||
export PGAUDITLOGTOFILE_VERSION=v1.6.4 \
|
||||
export PGAUDITLOGTOFILE_CHECKSUM=ef801eb09c26aaa935c0dabd92c81eb9ebe338930daa9674d420a280c6bc2d70 \
|
||||
;; \
|
||||
*) \
|
||||
echo "pgauditlogtofile is not supported on this PostgreSQL version" && exit 1;; \
|
||||
esac && \
|
||||
wget https://github.com/fmbiete/pgauditlogtofile/archive/refs/tags/${PGAUDITLOGTOFILE_VERSION}.tar.gz -O pgauditlogtofile.tar.gz && \
|
||||
echo "${PGAUDITLOGTOFILE_CHECKSUM} pgauditlogtofile.tar.gz" | sha256sum --check && \
|
||||
mkdir pgauditlogtofile-src && cd pgauditlogtofile-src && tar xzf ../pgauditlogtofile.tar.gz --strip-components=1 -C .
|
||||
|
||||
FROM pg-build AS pgauditlogtofile-build
|
||||
COPY --from=pgauditlogtofile-src /ext-src/ /ext-src/
|
||||
WORKDIR /ext-src/pgauditlogtofile-src
|
||||
RUN make install USE_PGXS=1 -j $(getconf _NPROCESSORS_ONLN)
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "neon-ext-build"
|
||||
@@ -1683,8 +1604,6 @@ COPY --from=pg_partman-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg_mooncake-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg_duckdb-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg_repack-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pgaudit-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pgauditlogtofile-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
@@ -1818,7 +1737,7 @@ RUN make PG_VERSION="${PG_VERSION:?}" -C compute
|
||||
|
||||
FROM pg-build AS extension-tests
|
||||
ARG PG_VERSION
|
||||
COPY docker-compose/ext-src/ /ext-src/
|
||||
RUN mkdir /ext-src
|
||||
|
||||
COPY --from=pg-build /postgres /postgres
|
||||
#COPY --from=postgis-src /ext-src/ /ext-src/
|
||||
@@ -1856,20 +1775,14 @@ COPY --from=pg_semver-src /ext-src/ /ext-src/
|
||||
COPY --from=pg_ivm-src /ext-src/ /ext-src/
|
||||
COPY --from=pg_partman-src /ext-src/ /ext-src/
|
||||
#COPY --from=pg_mooncake-src /ext-src/ /ext-src/
|
||||
COPY --from=pg_repack-src /ext-src/ /ext-src/
|
||||
COPY --from=pg_repack-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY compute/patches/pg_repack.patch /ext-src
|
||||
RUN cd /ext-src/pg_repack-src && patch -p1 </ext-src/pg_repack.patch && rm -f /ext-src/pg_repack.patch
|
||||
#COPY --from=pg_repack-src /ext-src/ /ext-src/
|
||||
|
||||
COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
|
||||
RUN apt-get update && apt-get install -y libtap-parser-sourcehandler-pgtap-perl\
|
||||
&& apt clean && rm -rf /ext-src/*.tar.gz /var/lib/apt/lists/*
|
||||
ENV PATH=/usr/local/pgsql/bin:$PATH
|
||||
ENV PGHOST=compute
|
||||
ENV PGPORT=55433
|
||||
ENV PGUSER=cloud_admin
|
||||
ENV PGDATABASE=postgres
|
||||
ENV PG_VERSION=${PG_VERSION:?}
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
diff --git a/libduckdb.map b/libduckdb.map
|
||||
new file mode 100644
|
||||
index 0000000000..3b56f00cd7
|
||||
--- /dev/null
|
||||
+++ b/libduckdb.map
|
||||
@@ -0,0 +1,6 @@
|
||||
+DUCKDB_1.1.3 {
|
||||
+ global:
|
||||
+ *duckdb*;
|
||||
+ local:
|
||||
+ *;
|
||||
+};
|
||||
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
|
||||
index 3e757a4bcc..88ab4005b9 100644
|
||||
--- a/src/CMakeLists.txt
|
||||
+++ b/src/CMakeLists.txt
|
||||
@@ -135,6 +135,8 @@ else()
|
||||
target_link_libraries(duckdb ${DUCKDB_LINK_LIBS})
|
||||
link_threads(duckdb)
|
||||
link_extension_libraries(duckdb)
|
||||
+ target_link_options(duckdb PRIVATE
|
||||
+ -Wl,--version-script=${CMAKE_SOURCE_DIR}/libduckdb.map)
|
||||
|
||||
add_library(duckdb_static STATIC ${ALL_OBJECT_FILES})
|
||||
target_link_libraries(duckdb_static ${DUCKDB_LINK_LIBS})
|
||||
@@ -1,67 +0,0 @@
|
||||
diff --git a/libduckdb_pg_duckdb.map b/libduckdb_pg_duckdb.map
|
||||
new file mode 100644
|
||||
index 0000000000..0872978b48
|
||||
--- /dev/null
|
||||
+++ b/libduckdb_pg_duckdb.map
|
||||
@@ -0,0 +1,6 @@
|
||||
+DUCKDB_1.2.0 {
|
||||
+ global:
|
||||
+ *duckdb*;
|
||||
+ local:
|
||||
+ *;
|
||||
+};
|
||||
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
|
||||
index 58adef3fc0..2c522f91be 100644
|
||||
--- a/src/CMakeLists.txt
|
||||
+++ b/src/CMakeLists.txt
|
||||
@@ -59,7 +59,7 @@ endfunction()
|
||||
|
||||
if(AMALGAMATION_BUILD)
|
||||
|
||||
- add_library(duckdb SHARED "${PROJECT_SOURCE_DIR}/src/amalgamation/duckdb.cpp")
|
||||
+ add_library(duckdb_pg_duckdb SHARED "${PROJECT_SOURCE_DIR}/src/amalgamation/duckdb.cpp")
|
||||
target_link_libraries(duckdb ${DUCKDB_SYSTEM_LIBS})
|
||||
link_threads(duckdb)
|
||||
link_extension_libraries(duckdb)
|
||||
@@ -109,7 +109,7 @@ else()
|
||||
duckdb_yyjson
|
||||
duckdb_zstd)
|
||||
|
||||
- add_library(duckdb SHARED ${ALL_OBJECT_FILES})
|
||||
+ add_library(duckdb_pg_duckdb SHARED ${ALL_OBJECT_FILES})
|
||||
|
||||
if(WIN32 AND NOT MINGW)
|
||||
ensure_variable_is_number(DUCKDB_MAJOR_VERSION RC_MAJOR_VERSION)
|
||||
@@ -131,9 +131,11 @@ else()
|
||||
target_sources(duckdb PRIVATE version.rc)
|
||||
endif()
|
||||
|
||||
- target_link_libraries(duckdb ${DUCKDB_LINK_LIBS})
|
||||
- link_threads(duckdb)
|
||||
- link_extension_libraries(duckdb)
|
||||
+ target_link_libraries(duckdb_pg_duckdb ${DUCKDB_LINK_LIBS})
|
||||
+ link_threads(duckdb_pg_duckdb)
|
||||
+ link_extension_libraries(duckdb_pg_duckdb)
|
||||
+ target_link_options(duckdb_pg_duckdb PRIVATE
|
||||
+ -Wl,--version-script=${CMAKE_SOURCE_DIR}/libduckdb_pg_duckdb.map)
|
||||
|
||||
add_library(duckdb_static STATIC ${ALL_OBJECT_FILES})
|
||||
target_link_libraries(duckdb_static ${DUCKDB_LINK_LIBS})
|
||||
@@ -141,7 +143,7 @@ else()
|
||||
link_extension_libraries(duckdb_static)
|
||||
|
||||
target_include_directories(
|
||||
- duckdb PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
|
||||
+ duckdb_pg_duckdb PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
|
||||
|
||||
target_include_directories(
|
||||
@@ -161,7 +163,7 @@ else()
|
||||
endif()
|
||||
|
||||
install(
|
||||
- TARGETS duckdb duckdb_static
|
||||
+ TARGETS duckdb_pg_duckdb duckdb_static
|
||||
EXPORT "${DUCKDB_EXPORT_SET}"
|
||||
LIBRARY DESTINATION "${INSTALL_LIB_DIR}"
|
||||
ARCHIVE DESTINATION "${INSTALL_LIB_DIR}"
|
||||
@@ -1,25 +1,3 @@
|
||||
diff --git a/Makefile b/Makefile
|
||||
index 3235cc8..6b892bc 100644
|
||||
--- a/Makefile
|
||||
+++ b/Makefile
|
||||
@@ -32,7 +32,7 @@ else
|
||||
DUCKDB_BUILD_TYPE = release
|
||||
endif
|
||||
|
||||
-DUCKDB_LIB = libduckdb$(DLSUFFIX)
|
||||
+DUCKDB_LIB = libduckdb_pg_duckdb$(DLSUFFIX)
|
||||
FULL_DUCKDB_LIB = third_party/duckdb/build/$(DUCKDB_BUILD_TYPE)/src/$(DUCKDB_LIB)
|
||||
|
||||
ERROR_ON_WARNING ?=
|
||||
@@ -54,7 +54,7 @@ override PG_CXXFLAGS += -std=c++17 ${DUCKDB_BUILD_CXX_FLAGS} ${COMPILER_FLAGS} -
|
||||
# changes to the vendored code in one place.
|
||||
override PG_CFLAGS += -Wno-declaration-after-statement
|
||||
|
||||
-SHLIB_LINK += -Wl,-rpath,$(PG_LIB)/ -lpq -Lthird_party/duckdb/build/$(DUCKDB_BUILD_TYPE)/src -L$(PG_LIB) -lduckdb -lstdc++ -llz4
|
||||
+SHLIB_LINK += -Wl,-rpath,$(PG_LIB)/ -lpq -Lthird_party/duckdb/build/$(DUCKDB_BUILD_TYPE)/src -L$(PG_LIB) -lduckdb_pg_duckdb -lstdc++ -llz4
|
||||
|
||||
include Makefile.global
|
||||
|
||||
diff --git a/sql/pg_duckdb--0.2.0--0.3.0.sql b/sql/pg_duckdb--0.2.0--0.3.0.sql
|
||||
index d777d76..af60106 100644
|
||||
--- a/sql/pg_duckdb--0.2.0--0.3.0.sql
|
||||
|
||||
@@ -1,72 +0,0 @@
|
||||
diff --git a/regress/Makefile b/regress/Makefile
|
||||
index bf6edcb..89b4c7f 100644
|
||||
--- a/regress/Makefile
|
||||
+++ b/regress/Makefile
|
||||
@@ -17,7 +17,7 @@ INTVERSION := $(shell echo $$(($$(echo $(VERSION).0 | sed 's/\([[:digit:]]\{1,\}
|
||||
# Test suite
|
||||
#
|
||||
|
||||
-REGRESS := init-extension repack-setup repack-run error-on-invalid-idx no-error-on-invalid-idx after-schema repack-check nosuper tablespace get_order_by trigger
|
||||
+REGRESS := init-extension repack-setup repack-run error-on-invalid-idx no-error-on-invalid-idx after-schema repack-check nosuper get_order_by trigger
|
||||
|
||||
USE_PGXS = 1 # use pgxs if not in contrib directory
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
diff --git a/regress/expected/nosuper.out b/regress/expected/nosuper.out
|
||||
index 8d0a94e..63b68bf 100644
|
||||
--- a/regress/expected/nosuper.out
|
||||
+++ b/regress/expected/nosuper.out
|
||||
@@ -4,22 +4,22 @@
|
||||
SET client_min_messages = error;
|
||||
DROP ROLE IF EXISTS nosuper;
|
||||
SET client_min_messages = warning;
|
||||
-CREATE ROLE nosuper WITH LOGIN;
|
||||
+CREATE ROLE nosuper WITH LOGIN PASSWORD 'NoSuPeRpAsSwOrD';
|
||||
-- => OK
|
||||
\! pg_repack --dbname=contrib_regression --table=tbl_cluster --no-superuser-check
|
||||
INFO: repacking table "public.tbl_cluster"
|
||||
-- => ERROR
|
||||
-\! pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper
|
||||
+\! PGPASSWORD=NoSuPeRpAsSwOrD pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper
|
||||
ERROR: pg_repack failed with error: You must be a superuser to use pg_repack
|
||||
-- => ERROR
|
||||
-\! pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check
|
||||
+\! PGPASSWORD=NoSuPeRpAsSwOrD pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check
|
||||
ERROR: pg_repack failed with error: ERROR: permission denied for schema repack
|
||||
LINE 1: select repack.version(), repack.version_sql()
|
||||
^
|
||||
GRANT ALL ON ALL TABLES IN SCHEMA repack TO nosuper;
|
||||
GRANT USAGE ON SCHEMA repack TO nosuper;
|
||||
-- => ERROR
|
||||
-\! pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check
|
||||
+\! PGPASSWORD=NoSuPeRpAsSwOrD pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check
|
||||
INFO: repacking table "public.tbl_cluster"
|
||||
ERROR: query failed: ERROR: current transaction is aborted, commands ignored until end of transaction block
|
||||
DETAIL: query was: RESET lock_timeout
|
||||
diff --git a/regress/sql/nosuper.sql b/regress/sql/nosuper.sql
|
||||
index 072f0fa..dbe60f8 100644
|
||||
--- a/regress/sql/nosuper.sql
|
||||
+++ b/regress/sql/nosuper.sql
|
||||
@@ -4,19 +4,19 @@
|
||||
SET client_min_messages = error;
|
||||
DROP ROLE IF EXISTS nosuper;
|
||||
SET client_min_messages = warning;
|
||||
-CREATE ROLE nosuper WITH LOGIN;
|
||||
+CREATE ROLE nosuper WITH LOGIN PASSWORD 'NoSuPeRpAsSwOrD';
|
||||
-- => OK
|
||||
\! pg_repack --dbname=contrib_regression --table=tbl_cluster --no-superuser-check
|
||||
-- => ERROR
|
||||
-\! pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper
|
||||
+\! PGPASSWORD=NoSuPeRpAsSwOrD pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper
|
||||
-- => ERROR
|
||||
-\! pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check
|
||||
+\! PGPASSWORD=NoSuPeRpAsSwOrD pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check
|
||||
|
||||
GRANT ALL ON ALL TABLES IN SCHEMA repack TO nosuper;
|
||||
GRANT USAGE ON SCHEMA repack TO nosuper;
|
||||
|
||||
-- => ERROR
|
||||
-\! pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check
|
||||
+\! PGPASSWORD=NoSuPeRpAsSwOrD pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check
|
||||
|
||||
REVOKE ALL ON ALL TABLES IN SCHEMA repack FROM nosuper;
|
||||
REVOKE USAGE ON SCHEMA repack FROM nosuper;
|
||||
@@ -44,11 +44,6 @@ shutdownHook: |
|
||||
files:
|
||||
- filename: compute_ctl-sudoers
|
||||
content: |
|
||||
# Reverse hostname lookup doesn't currently work, and isn't needed anyway when all
|
||||
# the rules use ALL as the hostname. Avoid the pointless lookups and the "unable to
|
||||
# resolve host" log messages that they generate.
|
||||
Defaults !fqdn
|
||||
|
||||
# Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap
|
||||
# and /neonvm/bin/set-disk-quota as root without requiring entering a password (NOPASSWD),
|
||||
# regardless of hostname (ALL)
|
||||
|
||||
@@ -44,11 +44,6 @@ shutdownHook: |
|
||||
files:
|
||||
- filename: compute_ctl-sudoers
|
||||
content: |
|
||||
# Reverse hostname lookup doesn't currently work, and isn't needed anyway when all
|
||||
# the rules use ALL as the hostname. Avoid the pointless lookups and the "unable to
|
||||
# resolve host" log messages that they generate.
|
||||
Defaults !fqdn
|
||||
|
||||
# Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap
|
||||
# and /neonvm/bin/set-disk-quota as root without requiring entering a password (NOPASSWD),
|
||||
# regardless of hostname (ALL)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "compute_tools"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[features]
|
||||
@@ -25,6 +25,7 @@ fail.workspace = true
|
||||
flate2.workspace = true
|
||||
futures.workspace = true
|
||||
http.workspace = true
|
||||
jsonwebtoken.workspace = true
|
||||
metrics.workspace = true
|
||||
nix.workspace = true
|
||||
notify.workspace = true
|
||||
@@ -46,7 +47,6 @@ tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
|
||||
tokio-postgres.workspace = true
|
||||
tokio-util.workspace = true
|
||||
tokio-stream.workspace = true
|
||||
tower-otel.workspace = true
|
||||
tracing.workspace = true
|
||||
tracing-opentelemetry.workspace = true
|
||||
tracing-subscriber.workspace = true
|
||||
@@ -54,6 +54,7 @@ tracing-utils.workspace = true
|
||||
thiserror.workspace = true
|
||||
url.workspace = true
|
||||
uuid.workspace = true
|
||||
prometheus.workspace = true
|
||||
walkdir.workspace = true
|
||||
|
||||
postgres_initdb.workspace = true
|
||||
|
||||
@@ -33,28 +33,42 @@
|
||||
//! -b /usr/local/bin/postgres \
|
||||
//! -r http://pg-ext-s3-gateway \
|
||||
//! ```
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::OsString;
|
||||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
use std::process::exit;
|
||||
use std::sync::mpsc;
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
use std::str::FromStr;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::sync::{mpsc, Arc, Condvar, Mutex, RwLock};
|
||||
use std::time::SystemTime;
|
||||
use std::{thread, time::Duration};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use chrono::Utc;
|
||||
use clap::Parser;
|
||||
use compute_api::responses::ComputeCtlConfig;
|
||||
use compute_tools::disk_quota::set_disk_quota;
|
||||
use compute_tools::http::server::Server;
|
||||
use compute_tools::lsn_lease::launch_lsn_lease_bg_task_for_static;
|
||||
use signal_hook::consts::{SIGQUIT, SIGTERM};
|
||||
use signal_hook::{consts::SIGINT, iterator::Signals};
|
||||
use tracing::{error, info, warn};
|
||||
use url::Url;
|
||||
|
||||
use compute_api::responses::{ComputeCtlConfig, ComputeStatus};
|
||||
use compute_api::spec::ComputeSpec;
|
||||
use compute_tools::compute::{ComputeNode, ComputeNodeParams, forward_termination_signal};
|
||||
|
||||
use compute_tools::compute::{
|
||||
forward_termination_signal, ComputeNode, ComputeState, ParsedSpec, PG_PID,
|
||||
};
|
||||
use compute_tools::configurator::launch_configurator;
|
||||
use compute_tools::extension_server::get_pg_version_string;
|
||||
use compute_tools::logger::*;
|
||||
use compute_tools::monitor::launch_monitor;
|
||||
use compute_tools::params::*;
|
||||
use compute_tools::spec::*;
|
||||
use rlimit::{Resource, setrlimit};
|
||||
use signal_hook::consts::{SIGINT, SIGQUIT, SIGTERM};
|
||||
use signal_hook::iterator::Signals;
|
||||
use tracing::{error, info};
|
||||
use url::Url;
|
||||
use compute_tools::swap::resize_swap;
|
||||
use rlimit::{setrlimit, Resource};
|
||||
use utils::failpoint_support;
|
||||
|
||||
// this is an arbitrary build tag. Fine as a default / for testing purposes
|
||||
@@ -72,6 +86,19 @@ fn parse_remote_ext_config(arg: &str) -> Result<String> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate a compute ID if one is not supplied. This exists to keep forward
|
||||
/// compatibility tests working, but will be removed in a future iteration.
|
||||
fn generate_compute_id() -> String {
|
||||
let now = SystemTime::now();
|
||||
|
||||
format!(
|
||||
"compute-{}",
|
||||
now.duration_since(SystemTime::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs()
|
||||
)
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(rename_all = "kebab-case")]
|
||||
struct Cli {
|
||||
@@ -85,13 +112,16 @@ struct Cli {
|
||||
/// outside the compute will talk to the compute through this port. Keep
|
||||
/// the previous name for this argument around for a smoother release
|
||||
/// with the control plane.
|
||||
#[arg(long, default_value_t = 3080)]
|
||||
///
|
||||
/// TODO: Remove the alias after the control plane release which teaches the
|
||||
/// control plane about the renamed argument.
|
||||
#[arg(long, alias = "http-port", default_value_t = 3080)]
|
||||
pub external_http_port: u16,
|
||||
|
||||
/// The port to bind the internal listening HTTP server to. Clients include
|
||||
/// The port to bind the internal listening HTTP server to. Clients like
|
||||
/// the neon extension (for installing remote extensions) and local_proxy.
|
||||
#[arg(long, default_value_t = 3081)]
|
||||
pub internal_http_port: u16,
|
||||
#[arg(long)]
|
||||
pub internal_http_port: Option<u16>,
|
||||
|
||||
#[arg(short = 'D', long, value_name = "DATADIR")]
|
||||
pub pgdata: String,
|
||||
@@ -126,7 +156,7 @@ struct Cli {
|
||||
#[arg(short = 'S', long, group = "spec-path")]
|
||||
pub spec_path: Option<OsString>,
|
||||
|
||||
#[arg(short = 'i', long, group = "compute-id")]
|
||||
#[arg(short = 'i', long, group = "compute-id", default_value = generate_compute_id())]
|
||||
pub compute_id: String,
|
||||
|
||||
#[arg(short = 'p', long, conflicts_with_all = ["spec", "spec-path"], value_name = "CONTROL_PLANE_API_BASE_URL")]
|
||||
@@ -136,8 +166,6 @@ struct Cli {
|
||||
fn main() -> Result<()> {
|
||||
let cli = Cli::parse();
|
||||
|
||||
let scenario = failpoint_support::init();
|
||||
|
||||
// For historical reasons, the main thread that processes the spec and launches postgres
|
||||
// is synchronous, but we always have this tokio runtime available and we "enter" it so
|
||||
// that you can use tokio::spawn() and tokio::runtime::Handle::current().block_on(...)
|
||||
@@ -149,43 +177,34 @@ fn main() -> Result<()> {
|
||||
|
||||
let build_tag = runtime.block_on(init())?;
|
||||
|
||||
let scenario = failpoint_support::init();
|
||||
|
||||
// enable core dumping for all child processes
|
||||
setrlimit(Resource::CORE, rlimit::INFINITY, rlimit::INFINITY)?;
|
||||
|
||||
let connstr = Url::parse(&cli.connstr).context("cannot parse connstr as a URL")?;
|
||||
let (pg_handle, start_pg_result) = {
|
||||
// Enter startup tracing context
|
||||
let _startup_context_guard = startup_context_from_env();
|
||||
|
||||
let cli_spec = try_spec_from_cli(&cli)?;
|
||||
let cli_spec = try_spec_from_cli(&cli)?;
|
||||
|
||||
let compute_node = ComputeNode::new(
|
||||
ComputeNodeParams {
|
||||
compute_id: cli.compute_id,
|
||||
connstr,
|
||||
pgdata: cli.pgdata.clone(),
|
||||
pgbin: cli.pgbin.clone(),
|
||||
pgversion: get_pg_version_string(&cli.pgbin),
|
||||
external_http_port: cli.external_http_port,
|
||||
internal_http_port: cli.internal_http_port,
|
||||
ext_remote_storage: cli.remote_ext_config.clone(),
|
||||
resize_swap_on_bind: cli.resize_swap_on_bind,
|
||||
set_disk_quota_for_fs: cli.set_disk_quota_for_fs,
|
||||
#[cfg(target_os = "linux")]
|
||||
filecache_connstr: cli.filecache_connstr,
|
||||
#[cfg(target_os = "linux")]
|
||||
cgroup: cli.cgroup,
|
||||
#[cfg(target_os = "linux")]
|
||||
vm_monitor_addr: cli.vm_monitor_addr,
|
||||
build_tag,
|
||||
let compute = wait_spec(build_tag, &cli, cli_spec)?;
|
||||
|
||||
live_config_allowed: cli_spec.live_config_allowed,
|
||||
},
|
||||
cli_spec.spec,
|
||||
)?;
|
||||
start_postgres(&cli, compute)?
|
||||
|
||||
let exit_code = compute_node.run()?;
|
||||
// Startup is finished, exit the startup tracing span
|
||||
};
|
||||
|
||||
// PostgreSQL is now running, if startup was successful. Wait until it exits.
|
||||
let wait_pg_result = wait_postgres(pg_handle)?;
|
||||
|
||||
let delay_exit = cleanup_after_postgres_exit(start_pg_result)?;
|
||||
|
||||
maybe_delay_exit(delay_exit);
|
||||
|
||||
scenario.teardown();
|
||||
|
||||
deinit_and_exit(exit_code);
|
||||
deinit_and_exit(wait_pg_result);
|
||||
}
|
||||
|
||||
async fn init() -> Result<String> {
|
||||
@@ -206,6 +225,56 @@ async fn init() -> Result<String> {
|
||||
Ok(build_tag)
|
||||
}
|
||||
|
||||
fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
|
||||
// Extract OpenTelemetry context for the startup actions from the
|
||||
// TRACEPARENT and TRACESTATE env variables, and attach it to the current
|
||||
// tracing context.
|
||||
//
|
||||
// This is used to propagate the context for the 'start_compute' operation
|
||||
// from the neon control plane. This allows linking together the wider
|
||||
// 'start_compute' operation that creates the compute container, with the
|
||||
// startup actions here within the container.
|
||||
//
|
||||
// There is no standard for passing context in env variables, but a lot of
|
||||
// tools use TRACEPARENT/TRACESTATE, so we use that convention too. See
|
||||
// https://github.com/open-telemetry/opentelemetry-specification/issues/740
|
||||
//
|
||||
// Switch to the startup context here, and exit it once the startup has
|
||||
// completed and Postgres is up and running.
|
||||
//
|
||||
// If this pod is pre-created without binding it to any particular endpoint
|
||||
// yet, this isn't the right place to enter the startup context. In that
|
||||
// case, the control plane should pass the tracing context as part of the
|
||||
// /configure API call.
|
||||
//
|
||||
// NOTE: This is supposed to only cover the *startup* actions. Once
|
||||
// postgres is configured and up-and-running, we exit this span. Any other
|
||||
// actions that are performed on incoming HTTP requests, for example, are
|
||||
// performed in separate spans.
|
||||
//
|
||||
// XXX: If the pod is restarted, we perform the startup actions in the same
|
||||
// context as the original startup actions, which probably doesn't make
|
||||
// sense.
|
||||
let mut startup_tracing_carrier: HashMap<String, String> = HashMap::new();
|
||||
if let Ok(val) = std::env::var("TRACEPARENT") {
|
||||
startup_tracing_carrier.insert("traceparent".to_string(), val);
|
||||
}
|
||||
if let Ok(val) = std::env::var("TRACESTATE") {
|
||||
startup_tracing_carrier.insert("tracestate".to_string(), val);
|
||||
}
|
||||
if !startup_tracing_carrier.is_empty() {
|
||||
use opentelemetry::propagation::TextMapPropagator;
|
||||
use opentelemetry_sdk::propagation::TraceContextPropagator;
|
||||
let guard = TraceContextPropagator::new()
|
||||
.extract(&startup_tracing_carrier)
|
||||
.attach();
|
||||
info!("startup tracing context attached");
|
||||
Some(guard)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn try_spec_from_cli(cli: &Cli) -> Result<CliSpecParams> {
|
||||
// First, try to get cluster spec from the cli argument
|
||||
if let Some(ref spec_json) = cli.spec_json {
|
||||
@@ -256,7 +325,342 @@ struct CliSpecParams {
|
||||
live_config_allowed: bool,
|
||||
}
|
||||
|
||||
fn deinit_and_exit(exit_code: Option<i32>) -> ! {
|
||||
fn wait_spec(
|
||||
build_tag: String,
|
||||
cli: &Cli,
|
||||
CliSpecParams {
|
||||
spec,
|
||||
live_config_allowed,
|
||||
compute_ctl_config: _,
|
||||
}: CliSpecParams,
|
||||
) -> Result<Arc<ComputeNode>> {
|
||||
let mut new_state = ComputeState::new();
|
||||
let spec_set;
|
||||
|
||||
if let Some(spec) = spec {
|
||||
let pspec = ParsedSpec::try_from(spec).map_err(|msg| anyhow::anyhow!(msg))?;
|
||||
info!("new pspec.spec: {:?}", pspec.spec);
|
||||
new_state.pspec = Some(pspec);
|
||||
spec_set = true;
|
||||
} else {
|
||||
spec_set = false;
|
||||
}
|
||||
let connstr = Url::parse(&cli.connstr).context("cannot parse connstr as a URL")?;
|
||||
let conn_conf = postgres::config::Config::from_str(connstr.as_str())
|
||||
.context("cannot build postgres config from connstr")?;
|
||||
let tokio_conn_conf = tokio_postgres::config::Config::from_str(connstr.as_str())
|
||||
.context("cannot build tokio postgres config from connstr")?;
|
||||
let compute_node = ComputeNode {
|
||||
compute_id: cli.compute_id.clone(),
|
||||
connstr,
|
||||
conn_conf,
|
||||
tokio_conn_conf,
|
||||
pgdata: cli.pgdata.clone(),
|
||||
pgbin: cli.pgbin.clone(),
|
||||
pgversion: get_pg_version_string(&cli.pgbin),
|
||||
external_http_port: cli.external_http_port,
|
||||
internal_http_port: cli.internal_http_port.unwrap_or(cli.external_http_port + 1),
|
||||
live_config_allowed,
|
||||
state: Mutex::new(new_state),
|
||||
state_changed: Condvar::new(),
|
||||
ext_remote_storage: cli.remote_ext_config.clone(),
|
||||
ext_download_progress: RwLock::new(HashMap::new()),
|
||||
build_tag,
|
||||
};
|
||||
let compute = Arc::new(compute_node);
|
||||
|
||||
// If this is a pooled VM, prewarm before starting HTTP server and becoming
|
||||
// available for binding. Prewarming helps Postgres start quicker later,
|
||||
// because QEMU will already have its memory allocated from the host, and
|
||||
// the necessary binaries will already be cached.
|
||||
if !spec_set {
|
||||
compute.prewarm_postgres()?;
|
||||
}
|
||||
|
||||
// Launch the external HTTP server first, so that we can serve control plane
|
||||
// requests while configuration is still in progress.
|
||||
Server::External(cli.external_http_port).launch(&compute);
|
||||
|
||||
// The internal HTTP server could be launched later, but there isn't much
|
||||
// sense in waiting.
|
||||
Server::Internal(cli.internal_http_port.unwrap_or(cli.external_http_port + 1)).launch(&compute);
|
||||
|
||||
if !spec_set {
|
||||
// No spec provided, hang waiting for it.
|
||||
info!("no compute spec provided, waiting");
|
||||
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
while state.status != ComputeStatus::ConfigurationPending {
|
||||
state = compute.state_changed.wait(state).unwrap();
|
||||
|
||||
if state.status == ComputeStatus::ConfigurationPending {
|
||||
info!("got spec, continue configuration");
|
||||
// Spec is already set by the http server handler.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Record for how long we slept waiting for the spec.
|
||||
let now = Utc::now();
|
||||
state.metrics.wait_for_spec_ms = now
|
||||
.signed_duration_since(state.start_time)
|
||||
.to_std()
|
||||
.unwrap()
|
||||
.as_millis() as u64;
|
||||
|
||||
// Reset start time, so that the total startup time that is calculated later will
|
||||
// not include the time that we waited for the spec.
|
||||
state.start_time = now;
|
||||
}
|
||||
|
||||
launch_lsn_lease_bg_task_for_static(&compute);
|
||||
|
||||
Ok(compute)
|
||||
}
|
||||
|
||||
fn start_postgres(
|
||||
cli: &Cli,
|
||||
compute: Arc<ComputeNode>,
|
||||
) -> Result<(Option<PostgresHandle>, StartPostgresResult)> {
|
||||
// We got all we need, update the state.
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
state.set_status(ComputeStatus::Init, &compute.state_changed);
|
||||
|
||||
info!(
|
||||
"running compute with features: {:?}",
|
||||
state.pspec.as_ref().unwrap().spec.features
|
||||
);
|
||||
// before we release the mutex, fetch some parameters for later.
|
||||
let &ComputeSpec {
|
||||
swap_size_bytes,
|
||||
disk_quota_bytes,
|
||||
#[cfg(target_os = "linux")]
|
||||
disable_lfc_resizing,
|
||||
..
|
||||
} = &state.pspec.as_ref().unwrap().spec;
|
||||
drop(state);
|
||||
|
||||
// Launch remaining service threads
|
||||
let _monitor_handle = launch_monitor(&compute);
|
||||
let _configurator_handle = launch_configurator(&compute);
|
||||
|
||||
let mut prestartup_failed = false;
|
||||
let mut delay_exit = false;
|
||||
|
||||
// Resize swap to the desired size if the compute spec says so
|
||||
if let (Some(size_bytes), true) = (swap_size_bytes, cli.resize_swap_on_bind) {
|
||||
// To avoid 'swapoff' hitting postgres startup, we need to run resize-swap to completion
|
||||
// *before* starting postgres.
|
||||
//
|
||||
// In theory, we could do this asynchronously if SkipSwapon was enabled for VMs, but this
|
||||
// carries a risk of introducing hard-to-debug issues - e.g. if postgres sometimes gets
|
||||
// OOM-killed during startup because swap wasn't available yet.
|
||||
match resize_swap(size_bytes) {
|
||||
Ok(()) => {
|
||||
let size_mib = size_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
|
||||
info!(%size_bytes, %size_mib, "resized swap");
|
||||
}
|
||||
Err(err) => {
|
||||
let err = err.context("failed to resize swap");
|
||||
error!("{err:#}");
|
||||
|
||||
// Mark compute startup as failed; don't try to start postgres, and report this
|
||||
// error to the control plane when it next asks.
|
||||
prestartup_failed = true;
|
||||
compute.set_failed_status(err);
|
||||
delay_exit = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Set disk quota if the compute spec says so
|
||||
if let (Some(disk_quota_bytes), Some(disk_quota_fs_mountpoint)) =
|
||||
(disk_quota_bytes, cli.set_disk_quota_for_fs.as_ref())
|
||||
{
|
||||
match set_disk_quota(disk_quota_bytes, disk_quota_fs_mountpoint) {
|
||||
Ok(()) => {
|
||||
let size_mib = disk_quota_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
|
||||
info!(%disk_quota_bytes, %size_mib, "set disk quota");
|
||||
}
|
||||
Err(err) => {
|
||||
let err = err.context("failed to set disk quota");
|
||||
error!("{err:#}");
|
||||
|
||||
// Mark compute startup as failed; don't try to start postgres, and report this
|
||||
// error to the control plane when it next asks.
|
||||
prestartup_failed = true;
|
||||
compute.set_failed_status(err);
|
||||
delay_exit = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Start Postgres
|
||||
let mut pg = None;
|
||||
if !prestartup_failed {
|
||||
pg = match compute.start_compute() {
|
||||
Ok(pg) => {
|
||||
info!(postmaster_pid = %pg.0.id(), "Postgres was started");
|
||||
Some(pg)
|
||||
}
|
||||
Err(err) => {
|
||||
error!("could not start the compute node: {:#}", err);
|
||||
compute.set_failed_status(err);
|
||||
delay_exit = true;
|
||||
None
|
||||
}
|
||||
};
|
||||
} else {
|
||||
warn!("skipping postgres startup because pre-startup step failed");
|
||||
}
|
||||
|
||||
// Start the vm-monitor if directed to. The vm-monitor only runs on linux
|
||||
// because it requires cgroups.
|
||||
cfg_if::cfg_if! {
|
||||
if #[cfg(target_os = "linux")] {
|
||||
use std::env;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
// This token is used internally by the monitor to clean up all threads
|
||||
let token = CancellationToken::new();
|
||||
|
||||
// don't pass postgres connection string to vm-monitor if we don't want it to resize LFC
|
||||
let pgconnstr = if disable_lfc_resizing.unwrap_or(false) {
|
||||
None
|
||||
} else {
|
||||
Some(cli.filecache_connstr.clone())
|
||||
};
|
||||
|
||||
let vm_monitor = if env::var_os("AUTOSCALING").is_some() {
|
||||
let vm_monitor = tokio::spawn(vm_monitor::start(
|
||||
Box::leak(Box::new(vm_monitor::Args {
|
||||
cgroup: Some(cli.cgroup.clone()),
|
||||
pgconnstr,
|
||||
addr: cli.vm_monitor_addr.clone(),
|
||||
})),
|
||||
token.clone(),
|
||||
));
|
||||
Some(vm_monitor)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
Ok((
|
||||
pg,
|
||||
StartPostgresResult {
|
||||
delay_exit,
|
||||
compute,
|
||||
#[cfg(target_os = "linux")]
|
||||
token,
|
||||
#[cfg(target_os = "linux")]
|
||||
vm_monitor,
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
type PostgresHandle = (std::process::Child, tokio::task::JoinHandle<Result<()>>);
|
||||
|
||||
struct StartPostgresResult {
|
||||
delay_exit: bool,
|
||||
// passed through from WaitSpecResult
|
||||
compute: Arc<ComputeNode>,
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
token: tokio_util::sync::CancellationToken,
|
||||
#[cfg(target_os = "linux")]
|
||||
vm_monitor: Option<tokio::task::JoinHandle<Result<()>>>,
|
||||
}
|
||||
|
||||
fn wait_postgres(pg: Option<PostgresHandle>) -> Result<WaitPostgresResult> {
|
||||
// Wait for the child Postgres process forever. In this state Ctrl+C will
|
||||
// propagate to Postgres and it will be shut down as well.
|
||||
let mut exit_code = None;
|
||||
if let Some((mut pg, logs_handle)) = pg {
|
||||
info!(postmaster_pid = %pg.id(), "Waiting for Postgres to exit");
|
||||
|
||||
let ecode = pg
|
||||
.wait()
|
||||
.expect("failed to start waiting on Postgres process");
|
||||
PG_PID.store(0, Ordering::SeqCst);
|
||||
|
||||
// Process has exited. Wait for the log collecting task to finish.
|
||||
let _ = tokio::runtime::Handle::current()
|
||||
.block_on(logs_handle)
|
||||
.map_err(|e| tracing::error!("log task panicked: {:?}", e));
|
||||
|
||||
info!("Postgres exited with code {}, shutting down", ecode);
|
||||
exit_code = ecode.code()
|
||||
}
|
||||
|
||||
Ok(WaitPostgresResult { exit_code })
|
||||
}
|
||||
|
||||
struct WaitPostgresResult {
|
||||
exit_code: Option<i32>,
|
||||
}
|
||||
|
||||
fn cleanup_after_postgres_exit(
|
||||
StartPostgresResult {
|
||||
mut delay_exit,
|
||||
compute,
|
||||
#[cfg(target_os = "linux")]
|
||||
vm_monitor,
|
||||
#[cfg(target_os = "linux")]
|
||||
token,
|
||||
}: StartPostgresResult,
|
||||
) -> Result<bool> {
|
||||
// Terminate the vm_monitor so it releases the file watcher on
|
||||
// /sys/fs/cgroup/neon-postgres.
|
||||
// Note: the vm-monitor only runs on linux because it requires cgroups.
|
||||
cfg_if::cfg_if! {
|
||||
if #[cfg(target_os = "linux")] {
|
||||
if let Some(handle) = vm_monitor {
|
||||
// Kills all threads spawned by the monitor
|
||||
token.cancel();
|
||||
// Kills the actual task running the monitor
|
||||
handle.abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Maybe sync safekeepers again, to speed up next startup
|
||||
let compute_state = compute.state.lock().unwrap().clone();
|
||||
let pspec = compute_state.pspec.as_ref().expect("spec must be set");
|
||||
if matches!(pspec.spec.mode, compute_api::spec::ComputeMode::Primary) {
|
||||
info!("syncing safekeepers on shutdown");
|
||||
let storage_auth_token = pspec.storage_auth_token.clone();
|
||||
let lsn = compute.sync_safekeepers(storage_auth_token)?;
|
||||
info!("synced safekeepers at lsn {lsn}");
|
||||
}
|
||||
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
if state.status == ComputeStatus::TerminationPending {
|
||||
state.status = ComputeStatus::Terminated;
|
||||
compute.state_changed.notify_all();
|
||||
// we were asked to terminate gracefully, don't exit to avoid restart
|
||||
delay_exit = true
|
||||
}
|
||||
drop(state);
|
||||
|
||||
if let Err(err) = compute.check_for_core_dumps() {
|
||||
error!("error while checking for core dumps: {err:?}");
|
||||
}
|
||||
|
||||
Ok(delay_exit)
|
||||
}
|
||||
|
||||
fn maybe_delay_exit(delay_exit: bool) {
|
||||
// If launch failed, keep serving HTTP requests for a while, so the cloud
|
||||
// control plane can get the actual error.
|
||||
if delay_exit {
|
||||
info!("giving control plane 30s to collect the error before shutdown");
|
||||
thread::sleep(Duration::from_secs(30));
|
||||
}
|
||||
}
|
||||
|
||||
fn deinit_and_exit(WaitPostgresResult { exit_code }: WaitPostgresResult) -> ! {
|
||||
// Shutdown trace pipeline gracefully, so that it has a chance to send any
|
||||
// pending traces before we exit. Shutting down OTEL tracing provider may
|
||||
// hang for quite some time, see, for example:
|
||||
|
||||
@@ -25,13 +25,13 @@
|
||||
//! docker push localhost:3030/localregistry/compute-node-v14:latest
|
||||
//! ```
|
||||
|
||||
use anyhow::{Context, bail};
|
||||
use anyhow::{bail, Context};
|
||||
use aws_config::BehaviorVersion;
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use clap::{Parser, Subcommand};
|
||||
use compute_tools::extension_server::{PostgresMajorVersion, get_pg_version};
|
||||
use compute_tools::extension_server::{get_pg_version, PostgresMajorVersion};
|
||||
use nix::unistd::Pid;
|
||||
use tracing::{Instrument, error, info, info_span, warn};
|
||||
use tracing::{error, info, info_span, warn, Instrument};
|
||||
use utils::fs_ext::is_directory_empty;
|
||||
|
||||
#[path = "fast_import/aws_s3_sync.rs"]
|
||||
@@ -361,14 +361,6 @@ async fn run_dump_restore(
|
||||
// how we run it
|
||||
.env_clear()
|
||||
.env("LD_LIBRARY_PATH", &pg_lib_dir)
|
||||
.env(
|
||||
"ASAN_OPTIONS",
|
||||
std::env::var("ASAN_OPTIONS").unwrap_or_default(),
|
||||
)
|
||||
.env(
|
||||
"UBSAN_OPTIONS",
|
||||
std::env::var("UBSAN_OPTIONS").unwrap_or_default(),
|
||||
)
|
||||
.kill_on_drop(true)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
@@ -402,14 +394,6 @@ async fn run_dump_restore(
|
||||
// how we run it
|
||||
.env_clear()
|
||||
.env("LD_LIBRARY_PATH", &pg_lib_dir)
|
||||
.env(
|
||||
"ASAN_OPTIONS",
|
||||
std::env::var("ASAN_OPTIONS").unwrap_or_default(),
|
||||
)
|
||||
.env(
|
||||
"UBSAN_OPTIONS",
|
||||
std::env::var("UBSAN_OPTIONS").unwrap_or_default(),
|
||||
)
|
||||
.kill_on_drop(true)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
@@ -558,9 +542,7 @@ async fn cmd_dumprestore(
|
||||
decode_connstring(kms_client.as_ref().unwrap(), &key_id, dest_ciphertext)
|
||||
.await?
|
||||
} else {
|
||||
bail!(
|
||||
"destination connection string must be provided in spec for dump_restore command"
|
||||
);
|
||||
bail!("destination connection string must be provided in spec for dump_restore command");
|
||||
};
|
||||
|
||||
(source, dest)
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use tokio::task::JoinSet;
|
||||
use tracing::{info, warn};
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use super::s3_uri::S3Uri;
|
||||
|
||||
use tracing::{info, warn};
|
||||
|
||||
const MAX_PARALLEL_UPLOADS: usize = 10;
|
||||
|
||||
/// Upload all files from 'local' to 'remote'
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use anyhow::Result;
|
||||
use std::str::FromStr;
|
||||
|
||||
/// Struct to hold parsed S3 components
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
|
||||
@@ -1,20 +1,18 @@
|
||||
use std::path::Path;
|
||||
use std::process::Stdio;
|
||||
use std::result::Result;
|
||||
use std::sync::Arc;
|
||||
|
||||
use compute_api::responses::CatalogObjects;
|
||||
use futures::Stream;
|
||||
use postgres::NoTls;
|
||||
use tokio::io::{AsyncBufReadExt, BufReader};
|
||||
use tokio::process::Command;
|
||||
use tokio::spawn;
|
||||
use std::{path::Path, process::Stdio, result::Result, sync::Arc};
|
||||
use tokio::{
|
||||
io::{AsyncBufReadExt, BufReader},
|
||||
process::Command,
|
||||
spawn,
|
||||
};
|
||||
use tokio_stream::{self as stream, StreamExt};
|
||||
use tokio_util::codec::{BytesCodec, FramedRead};
|
||||
use tracing::warn;
|
||||
|
||||
use crate::compute::ComputeNode;
|
||||
use crate::pg_helpers::{get_existing_dbs_async, get_existing_roles_async, postgres_conf_for_db};
|
||||
use compute_api::responses::CatalogObjects;
|
||||
|
||||
pub async fn get_dbs_and_roles(compute: &Arc<ComputeNode>) -> anyhow::Result<CatalogObjects> {
|
||||
let conf = compute.get_tokio_conn_conf(Some("compute_ctl:get_dbs_and_roles"));
|
||||
@@ -57,15 +55,15 @@ pub enum SchemaDumpError {
|
||||
pub async fn get_database_schema(
|
||||
compute: &Arc<ComputeNode>,
|
||||
dbname: &str,
|
||||
) -> Result<impl Stream<Item = Result<bytes::Bytes, std::io::Error>> + use<>, SchemaDumpError> {
|
||||
let pgbin = &compute.params.pgbin;
|
||||
) -> Result<impl Stream<Item = Result<bytes::Bytes, std::io::Error>>, SchemaDumpError> {
|
||||
let pgbin = &compute.pgbin;
|
||||
let basepath = Path::new(pgbin).parent().unwrap();
|
||||
let pgdump = basepath.join("pg_dump");
|
||||
|
||||
// Replace the DB in the connection string and disable it to parts.
|
||||
// This is the only option to handle DBs with special characters.
|
||||
let conf = postgres_conf_for_db(&compute.params.connstr, dbname)
|
||||
.map_err(|_| SchemaDumpError::Unexpected)?;
|
||||
let conf =
|
||||
postgres_conf_for_db(&compute.connstr, dbname).map_err(|_| SchemaDumpError::Unexpected)?;
|
||||
let host = conf
|
||||
.get_hosts()
|
||||
.first()
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use anyhow::{Ok, Result, anyhow};
|
||||
use anyhow::{anyhow, Ok, Result};
|
||||
use tokio_postgres::NoTls;
|
||||
use tracing::{error, instrument, warn};
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,14 +1,13 @@
|
||||
use std::fmt::Write as FmtWrite;
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
use std::io::prelude::*;
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::Result;
|
||||
use compute_api::spec::{ComputeMode, ComputeSpec, GenericOption};
|
||||
|
||||
use crate::pg_helpers::{GenericOptionExt, PgOptionsSerialize, escape_conf_value};
|
||||
use crate::pg_helpers::escape_conf_value;
|
||||
use crate::pg_helpers::{GenericOptionExt, PgOptionsSerialize};
|
||||
use compute_api::spec::{ComputeMode, ComputeSpec, GenericOption};
|
||||
|
||||
/// Check that `line` is inside a text file and put it there if it is not.
|
||||
/// Create file if it doesn't exist.
|
||||
@@ -57,20 +56,10 @@ pub fn write_postgres_conf(
|
||||
writeln!(file, "neon.stripe_size={stripe_size}")?;
|
||||
}
|
||||
if !spec.safekeeper_connstrings.is_empty() {
|
||||
let mut neon_safekeepers_value = String::new();
|
||||
tracing::info!(
|
||||
"safekeepers_connstrings is not zero, gen: {:?}",
|
||||
spec.safekeepers_generation
|
||||
);
|
||||
// If generation is given, prepend sk list with g#number:
|
||||
if let Some(generation) = spec.safekeepers_generation {
|
||||
write!(neon_safekeepers_value, "g#{}:", generation)?;
|
||||
}
|
||||
neon_safekeepers_value.push_str(&spec.safekeeper_connstrings.join(","));
|
||||
writeln!(
|
||||
file,
|
||||
"neon.safekeepers={}",
|
||||
escape_conf_value(&neon_safekeepers_value)
|
||||
escape_conf_value(&spec.safekeeper_connstrings.join(","))
|
||||
)?;
|
||||
}
|
||||
if let Some(s) = &spec.tenant_id {
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use tracing::{error, info, instrument};
|
||||
|
||||
use compute_api::responses::ComputeStatus;
|
||||
|
||||
use crate::compute::ComputeNode;
|
||||
|
||||
#[instrument(skip_all)]
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
use anyhow::Context;
|
||||
use tracing::instrument;
|
||||
|
||||
pub const DISK_QUOTA_BIN: &str = "/neonvm/bin/set-disk-quota";
|
||||
|
||||
/// If size_bytes is 0, it disables the quota. Otherwise, it sets filesystem quota to size_bytes.
|
||||
/// `fs_mountpoint` should point to the mountpoint of the filesystem where the quota should be set.
|
||||
#[instrument]
|
||||
pub fn set_disk_quota(size_bytes: u64, fs_mountpoint: &str) -> anyhow::Result<()> {
|
||||
let size_kb = size_bytes / 1024;
|
||||
// run `/neonvm/bin/set-disk-quota {size_kb} {mountpoint}`
|
||||
|
||||
@@ -71,15 +71,15 @@ More specifically, here is an example ext_index.json
|
||||
}
|
||||
}
|
||||
*/
|
||||
use std::path::Path;
|
||||
use std::str;
|
||||
|
||||
use anyhow::{Context, Result, bail};
|
||||
use anyhow::Result;
|
||||
use anyhow::{bail, Context};
|
||||
use bytes::Bytes;
|
||||
use compute_api::spec::RemoteExtSpec;
|
||||
use regex::Regex;
|
||||
use remote_storage::*;
|
||||
use reqwest::StatusCode;
|
||||
use std::path::Path;
|
||||
use std::str;
|
||||
use tar::Archive;
|
||||
use tracing::info;
|
||||
use tracing::log::warn;
|
||||
@@ -244,10 +244,7 @@ pub fn create_control_files(remote_extensions: &RemoteExtSpec, pgbin: &str) {
|
||||
info!("writing file {:?}{:?}", control_path, control_content);
|
||||
std::fs::write(control_path, control_content).unwrap();
|
||||
} else {
|
||||
warn!(
|
||||
"control file {:?} exists both locally and remotely. ignoring the remote version.",
|
||||
control_path
|
||||
);
|
||||
warn!("control file {:?} exists both locally and remotely. ignoring the remote version.", control_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
use axum::extract::rejection::JsonRejection;
|
||||
use axum::extract::{FromRequest, Request};
|
||||
use axum::extract::{rejection::JsonRejection, FromRequest, Request};
|
||||
use compute_api::responses::GenericAPIError;
|
||||
use http::StatusCode;
|
||||
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
use axum::extract::FromRequestParts;
|
||||
use axum::extract::rejection::PathRejection;
|
||||
use axum::extract::{rejection::PathRejection, FromRequestParts};
|
||||
use compute_api::responses::GenericAPIError;
|
||||
use http::StatusCode;
|
||||
use http::request::Parts;
|
||||
use http::{request::Parts, StatusCode};
|
||||
|
||||
/// Custom `Path` extractor, so that we can format errors into
|
||||
/// `JsonResponse<GenericAPIError>`.
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
use axum::extract::FromRequestParts;
|
||||
use axum::extract::rejection::QueryRejection;
|
||||
use axum::extract::{rejection::QueryRejection, FromRequestParts};
|
||||
use compute_api::responses::GenericAPIError;
|
||||
use http::StatusCode;
|
||||
use http::request::Parts;
|
||||
use http::{request::Parts, StatusCode};
|
||||
|
||||
/// Custom `Query` extractor, so that we can format errors into
|
||||
/// `JsonResponse<GenericAPIError>`.
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
use axum::body::Body;
|
||||
use axum::response::Response;
|
||||
use axum::{body::Body, response::Response};
|
||||
use compute_api::responses::{ComputeStatus, GenericAPIError};
|
||||
use http::StatusCode;
|
||||
use http::header::CONTENT_TYPE;
|
||||
use http::{header::CONTENT_TYPE, StatusCode};
|
||||
use serde::Serialize;
|
||||
use tracing::error;
|
||||
|
||||
|
||||
@@ -1,13 +1,10 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::extract::State;
|
||||
use axum::response::Response;
|
||||
use axum::{extract::State, response::Response};
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use http::StatusCode;
|
||||
|
||||
use crate::checker::check_writability;
|
||||
use crate::compute::ComputeNode;
|
||||
use crate::http::JsonResponse;
|
||||
use crate::{checker::check_writability, compute::ComputeNode, http::JsonResponse};
|
||||
|
||||
/// Check that the compute is currently running.
|
||||
pub(in crate::http) async fn is_writable(State(compute): State<Arc<ComputeNode>>) -> Response {
|
||||
|
||||
@@ -1,16 +1,18 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::extract::State;
|
||||
use axum::response::Response;
|
||||
use compute_api::requests::ConfigurationRequest;
|
||||
use compute_api::responses::{ComputeStatus, ComputeStatusResponse};
|
||||
use axum::{extract::State, response::Response};
|
||||
use compute_api::{
|
||||
requests::ConfigurationRequest,
|
||||
responses::{ComputeStatus, ComputeStatusResponse},
|
||||
};
|
||||
use http::StatusCode;
|
||||
use tokio::task;
|
||||
use tracing::info;
|
||||
|
||||
use crate::compute::{ComputeNode, ParsedSpec};
|
||||
use crate::http::JsonResponse;
|
||||
use crate::http::extract::Json;
|
||||
use crate::{
|
||||
compute::{ComputeNode, ParsedSpec},
|
||||
http::{extract::Json, JsonResponse},
|
||||
};
|
||||
|
||||
// Accept spec in JSON format and request compute configuration. If anything
|
||||
// goes wrong after we set the compute status to `ConfigurationPending` and
|
||||
@@ -22,7 +24,7 @@ pub(in crate::http) async fn configure(
|
||||
State(compute): State<Arc<ComputeNode>>,
|
||||
request: Json<ConfigurationRequest>,
|
||||
) -> Response {
|
||||
if !compute.params.live_config_allowed {
|
||||
if !compute.live_config_allowed {
|
||||
return JsonResponse::error(
|
||||
StatusCode::PRECONDITION_FAILED,
|
||||
"live configuration is not allowed for this compute node".to_string(),
|
||||
@@ -45,18 +47,13 @@ pub(in crate::http) async fn configure(
|
||||
return JsonResponse::invalid_status(state.status);
|
||||
}
|
||||
|
||||
// Pass the tracing span to the main thread that performs the startup,
|
||||
// so that the start_compute operation is considered a child of this
|
||||
// configure request for tracing purposes.
|
||||
state.startup_span = Some(tracing::Span::current());
|
||||
|
||||
state.pspec = Some(pspec);
|
||||
state.set_status(ComputeStatus::ConfigurationPending, &compute.state_changed);
|
||||
drop(state);
|
||||
}
|
||||
|
||||
// Spawn a blocking thread to wait for compute to become Running. This is
|
||||
// needed to not block the main pool of workers and to be able to serve
|
||||
// needed to do not block the main pool of workers and be able to serve
|
||||
// other requests while some particular request is waiting for compute to
|
||||
// finish configuration.
|
||||
let c = compute.clone();
|
||||
|
||||
@@ -1,16 +1,14 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::body::Body;
|
||||
use axum::extract::State;
|
||||
use axum::response::Response;
|
||||
use http::StatusCode;
|
||||
use http::header::CONTENT_TYPE;
|
||||
use axum::{body::Body, extract::State, response::Response};
|
||||
use http::{header::CONTENT_TYPE, StatusCode};
|
||||
use serde::Deserialize;
|
||||
|
||||
use crate::catalog::{SchemaDumpError, get_database_schema};
|
||||
use crate::compute::ComputeNode;
|
||||
use crate::http::JsonResponse;
|
||||
use crate::http::extract::Query;
|
||||
use crate::{
|
||||
catalog::{get_database_schema, SchemaDumpError},
|
||||
compute::ComputeNode,
|
||||
http::{extract::Query, JsonResponse},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub(in crate::http) struct DatabaseSchemaParams {
|
||||
|
||||
@@ -1,12 +1,9 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::extract::State;
|
||||
use axum::response::Response;
|
||||
use axum::{extract::State, response::Response};
|
||||
use http::StatusCode;
|
||||
|
||||
use crate::catalog::get_dbs_and_roles;
|
||||
use crate::compute::ComputeNode;
|
||||
use crate::http::JsonResponse;
|
||||
use crate::{catalog::get_dbs_and_roles, compute::ComputeNode, http::JsonResponse};
|
||||
|
||||
/// Get the databases and roles from the compute.
|
||||
pub(in crate::http) async fn get_catalog_objects(
|
||||
|
||||
@@ -1,13 +1,19 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::extract::State;
|
||||
use axum::response::{IntoResponse, Response};
|
||||
use axum::{
|
||||
extract::State,
|
||||
response::{IntoResponse, Response},
|
||||
};
|
||||
use http::StatusCode;
|
||||
use serde::Deserialize;
|
||||
|
||||
use crate::compute::ComputeNode;
|
||||
use crate::http::JsonResponse;
|
||||
use crate::http::extract::{Path, Query};
|
||||
use crate::{
|
||||
compute::ComputeNode,
|
||||
http::{
|
||||
extract::{Path, Query},
|
||||
JsonResponse,
|
||||
},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub(in crate::http) struct ExtensionServerParams {
|
||||
@@ -18,11 +24,11 @@ pub(in crate::http) struct ExtensionServerParams {
|
||||
/// Download a remote extension.
|
||||
pub(in crate::http) async fn download_extension(
|
||||
Path(filename): Path<String>,
|
||||
ext_server_params: Query<ExtensionServerParams>,
|
||||
params: Query<ExtensionServerParams>,
|
||||
State(compute): State<Arc<ComputeNode>>,
|
||||
) -> Response {
|
||||
// Don't even try to download extensions if no remote storage is configured
|
||||
if compute.params.ext_remote_storage.is_none() {
|
||||
if compute.ext_remote_storage.is_none() {
|
||||
return JsonResponse::error(
|
||||
StatusCode::PRECONDITION_FAILED,
|
||||
"remote storage is not configured",
|
||||
@@ -46,9 +52,9 @@ pub(in crate::http) async fn download_extension(
|
||||
|
||||
remote_extensions.get_ext(
|
||||
&filename,
|
||||
ext_server_params.is_library,
|
||||
&compute.params.build_tag,
|
||||
&compute.params.pgversion,
|
||||
params.is_library,
|
||||
&compute.build_tag,
|
||||
&compute.pgversion,
|
||||
)
|
||||
};
|
||||
|
||||
|
||||
@@ -1,14 +1,16 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::extract::State;
|
||||
use axum::response::Response;
|
||||
use compute_api::requests::ExtensionInstallRequest;
|
||||
use compute_api::responses::{ComputeStatus, ExtensionInstallResponse};
|
||||
use axum::{extract::State, response::Response};
|
||||
use compute_api::{
|
||||
requests::ExtensionInstallRequest,
|
||||
responses::{ComputeStatus, ExtensionInstallResponse},
|
||||
};
|
||||
use http::StatusCode;
|
||||
|
||||
use crate::compute::ComputeNode;
|
||||
use crate::http::JsonResponse;
|
||||
use crate::http::extract::Json;
|
||||
use crate::{
|
||||
compute::ComputeNode,
|
||||
http::{extract::Json, JsonResponse},
|
||||
};
|
||||
|
||||
/// Install a extension.
|
||||
pub(in crate::http) async fn install_extension(
|
||||
|
||||
@@ -17,8 +17,7 @@ pub struct FailpointConfig {
|
||||
pub actions: String,
|
||||
}
|
||||
|
||||
use crate::http::JsonResponse;
|
||||
use crate::http::extract::Json;
|
||||
use crate::http::{extract::Json, JsonResponse};
|
||||
|
||||
/// Configure failpoints for testing purposes.
|
||||
pub(in crate::http) async fn configure_failpoints(
|
||||
|
||||
@@ -1,14 +1,16 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::extract::State;
|
||||
use axum::response::Response;
|
||||
use compute_api::requests::SetRoleGrantsRequest;
|
||||
use compute_api::responses::{ComputeStatus, SetRoleGrantsResponse};
|
||||
use axum::{extract::State, response::Response};
|
||||
use compute_api::{
|
||||
requests::SetRoleGrantsRequest,
|
||||
responses::{ComputeStatus, SetRoleGrantsResponse},
|
||||
};
|
||||
use http::StatusCode;
|
||||
|
||||
use crate::compute::ComputeNode;
|
||||
use crate::http::JsonResponse;
|
||||
use crate::http::extract::Json;
|
||||
use crate::{
|
||||
compute::ComputeNode,
|
||||
http::{extract::Json, JsonResponse},
|
||||
};
|
||||
|
||||
/// Add grants for a role.
|
||||
pub(in crate::http) async fn add_grant(
|
||||
|
||||
@@ -1,12 +1,10 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::extract::State;
|
||||
use axum::response::Response;
|
||||
use axum::{extract::State, response::Response};
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use http::StatusCode;
|
||||
|
||||
use crate::compute::ComputeNode;
|
||||
use crate::http::JsonResponse;
|
||||
use crate::{compute::ComputeNode, http::JsonResponse};
|
||||
|
||||
/// Collect current Postgres usage insights.
|
||||
pub(in crate::http) async fn get_insights(State(compute): State<Arc<ComputeNode>>) -> Response {
|
||||
|
||||
@@ -1,12 +1,10 @@
|
||||
use axum::body::Body;
|
||||
use axum::response::Response;
|
||||
use http::StatusCode;
|
||||
use axum::{body::Body, response::Response};
|
||||
use http::header::CONTENT_TYPE;
|
||||
use http::StatusCode;
|
||||
use metrics::proto::MetricFamily;
|
||||
use metrics::{Encoder, TextEncoder};
|
||||
|
||||
use crate::http::JsonResponse;
|
||||
use crate::metrics::collect;
|
||||
use crate::{http::JsonResponse, metrics::collect};
|
||||
|
||||
/// Expose Prometheus metrics.
|
||||
pub(in crate::http) async fn get_metrics() -> Response {
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::extract::State;
|
||||
use axum::response::Response;
|
||||
use axum::{extract::State, response::Response};
|
||||
use http::StatusCode;
|
||||
|
||||
use crate::compute::ComputeNode;
|
||||
use crate::http::JsonResponse;
|
||||
use crate::{compute::ComputeNode, http::JsonResponse};
|
||||
|
||||
/// Get startup metrics.
|
||||
pub(in crate::http) async fn get_metrics(State(compute): State<Arc<ComputeNode>>) -> Response {
|
||||
|
||||
@@ -1,13 +1,9 @@
|
||||
use std::ops::Deref;
|
||||
use std::sync::Arc;
|
||||
use std::{ops::Deref, sync::Arc};
|
||||
|
||||
use axum::extract::State;
|
||||
use axum::http::StatusCode;
|
||||
use axum::response::Response;
|
||||
use axum::{extract::State, http::StatusCode, response::Response};
|
||||
use compute_api::responses::ComputeStatusResponse;
|
||||
|
||||
use crate::compute::ComputeNode;
|
||||
use crate::http::JsonResponse;
|
||||
use crate::{compute::ComputeNode, http::JsonResponse};
|
||||
|
||||
/// Retrieve the state of the comute.
|
||||
pub(in crate::http) async fn get_status(State(compute): State<Arc<ComputeNode>>) -> Response {
|
||||
|
||||
@@ -1,14 +1,18 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::extract::State;
|
||||
use axum::response::{IntoResponse, Response};
|
||||
use axum::{
|
||||
extract::State,
|
||||
response::{IntoResponse, Response},
|
||||
};
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use http::StatusCode;
|
||||
use tokio::task;
|
||||
use tracing::info;
|
||||
|
||||
use crate::compute::{ComputeNode, forward_termination_signal};
|
||||
use crate::http::JsonResponse;
|
||||
use crate::{
|
||||
compute::{forward_termination_signal, ComputeNode},
|
||||
http::JsonResponse,
|
||||
};
|
||||
|
||||
/// Terminate the compute.
|
||||
pub(in crate::http) async fn terminate(State(compute): State<Arc<ComputeNode>>) -> Response {
|
||||
|
||||
@@ -1,20 +1,25 @@
|
||||
use std::fmt::Display;
|
||||
use std::net::{IpAddr, Ipv6Addr, SocketAddr};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use std::{
|
||||
fmt::Display,
|
||||
net::{IpAddr, Ipv6Addr, SocketAddr},
|
||||
sync::Arc,
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use anyhow::Result;
|
||||
use axum::Router;
|
||||
use axum::extract::Request;
|
||||
use axum::middleware::{self, Next};
|
||||
use axum::response::{IntoResponse, Response};
|
||||
use axum::routing::{get, post};
|
||||
use axum::{
|
||||
body::Body,
|
||||
extract::Request,
|
||||
middleware::{self, Next},
|
||||
response::{IntoResponse, Response},
|
||||
routing::{get, post},
|
||||
Router,
|
||||
};
|
||||
use http::StatusCode;
|
||||
use tokio::net::TcpListener;
|
||||
use tower::ServiceBuilder;
|
||||
use tower_http::request_id::PropagateRequestIdLayer;
|
||||
use tower_http::trace::TraceLayer;
|
||||
use tracing::{Span, debug, error, info};
|
||||
use tower_http::classify::ServerErrorsFailureClass;
|
||||
use tower_http::{request_id::PropagateRequestIdLayer, trace::TraceLayer};
|
||||
use tracing::{debug, error, info, Span};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::routes::{
|
||||
@@ -81,47 +86,85 @@ impl From<Server> for Router<Arc<ComputeNode>> {
|
||||
.route("/terminate", post(terminate::terminate)),
|
||||
};
|
||||
|
||||
router.fallback(Server::handle_404).method_not_allowed_fallback(Server::handle_405).layer(
|
||||
ServiceBuilder::new()
|
||||
// Add this middleware since we assume the request ID exists
|
||||
.layer(middleware::from_fn(maybe_add_request_id_header))
|
||||
.layer(
|
||||
TraceLayer::new_for_http()
|
||||
.on_request(|request: &http::Request<_>, _span: &Span| {
|
||||
let request_id = request
|
||||
.headers()
|
||||
.get(X_REQUEST_ID)
|
||||
.unwrap()
|
||||
.to_str()
|
||||
.unwrap();
|
||||
|
||||
match request.uri().path() {
|
||||
"/metrics" => {
|
||||
debug!(%request_id, "{} {}", request.method(), request.uri())
|
||||
}
|
||||
_ => info!(%request_id, "{} {}", request.method(), request.uri()),
|
||||
};
|
||||
})
|
||||
.on_response(
|
||||
|response: &http::Response<_>, latency: Duration, _span: &Span| {
|
||||
let request_id = response
|
||||
router
|
||||
.fallback(Server::handle_404)
|
||||
.method_not_allowed_fallback(Server::handle_405)
|
||||
.layer(
|
||||
ServiceBuilder::new()
|
||||
// Add this middleware since we assume the request ID exists
|
||||
.layer(middleware::from_fn(maybe_add_request_id_header))
|
||||
.layer(
|
||||
TraceLayer::new_for_http()
|
||||
.make_span_with(|request: &Request<Body>| {
|
||||
let request_id = request
|
||||
.headers()
|
||||
.get(X_REQUEST_ID)
|
||||
.unwrap()
|
||||
.to_str()
|
||||
.unwrap();
|
||||
|
||||
info!(
|
||||
%request_id,
|
||||
code = response.status().as_u16(),
|
||||
latency = latency.as_millis()
|
||||
)
|
||||
},
|
||||
),
|
||||
)
|
||||
.layer(PropagateRequestIdLayer::x_request_id()),
|
||||
)
|
||||
.layer(tower_otel::trace::HttpLayer::server(tracing::Level::INFO))
|
||||
match request.uri().path() {
|
||||
"/metrics" => {
|
||||
tracing::span!(
|
||||
tracing::Level::DEBUG,
|
||||
"",
|
||||
method = tracing::field::display(request.method()),
|
||||
uri = tracing::field::display(request.uri()),
|
||||
request_id = tracing::field::display(request_id)
|
||||
)
|
||||
}
|
||||
_ => tracing::span!(
|
||||
tracing::Level::INFO,
|
||||
"",
|
||||
method = tracing::field::display(request.method()),
|
||||
uri = tracing::field::display(request.uri()),
|
||||
request_id = tracing::field::display(request_id)
|
||||
),
|
||||
}
|
||||
})
|
||||
.on_request(|request: &http::Request<_>, _span: &Span| {
|
||||
match request.uri().path() {
|
||||
"/metrics" => debug!("incoming request"),
|
||||
_ => info!("incoming request"),
|
||||
};
|
||||
})
|
||||
.on_response(
|
||||
|response: &http::Response<_>, latency: Duration, _span: &Span| {
|
||||
// All errors will be logged in the on_failure handler
|
||||
if let 200..=399 = response.status().as_u16() {
|
||||
info!(
|
||||
message = "request finished",
|
||||
code = %response.status().as_u16(),
|
||||
latency_ms = %latency.as_millis()
|
||||
)
|
||||
}
|
||||
},
|
||||
)
|
||||
.on_failure(
|
||||
|error: ServerErrorsFailureClass,
|
||||
latency: Duration,
|
||||
_span: &Span| {
|
||||
match error {
|
||||
ServerErrorsFailureClass::StatusCode(code) => {
|
||||
error!(
|
||||
message = "request failed",
|
||||
code = %code,
|
||||
latency_ms = %latency.as_millis()
|
||||
);
|
||||
}
|
||||
ServerErrorsFailureClass::Error(error) => {
|
||||
error!(
|
||||
message = "request failed unexpectedly",
|
||||
error = %error,
|
||||
latency_ms = %latency.as_millis()
|
||||
);
|
||||
}
|
||||
}
|
||||
},
|
||||
),
|
||||
)
|
||||
.layer(PropagateRequestIdLayer::x_request_id()),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use compute_api::responses::{InstalledExtension, InstalledExtensions};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use anyhow::Result;
|
||||
use compute_api::responses::{InstalledExtension, InstalledExtensions};
|
||||
use postgres::{Client, NoTls};
|
||||
|
||||
use crate::metrics::INSTALLED_EXTENSIONS;
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
use std::collections::HashMap;
|
||||
use tracing::info;
|
||||
use tracing_subscriber::layer::SubscriberExt;
|
||||
use tracing_subscriber::prelude::*;
|
||||
|
||||
@@ -44,50 +42,3 @@ pub async fn init_tracing_and_logging(default_log_level: &str) -> anyhow::Result
|
||||
pub fn inlinify(s: &str) -> String {
|
||||
s.replace('\n', "\u{200B}")
|
||||
}
|
||||
|
||||
pub fn startup_context_from_env() -> Option<opentelemetry::Context> {
|
||||
// Extract OpenTelemetry context for the startup actions from the
|
||||
// TRACEPARENT and TRACESTATE env variables, and attach it to the current
|
||||
// tracing context.
|
||||
//
|
||||
// This is used to propagate the context for the 'start_compute' operation
|
||||
// from the neon control plane. This allows linking together the wider
|
||||
// 'start_compute' operation that creates the compute container, with the
|
||||
// startup actions here within the container.
|
||||
//
|
||||
// There is no standard for passing context in env variables, but a lot of
|
||||
// tools use TRACEPARENT/TRACESTATE, so we use that convention too. See
|
||||
// https://github.com/open-telemetry/opentelemetry-specification/issues/740
|
||||
//
|
||||
// Switch to the startup context here, and exit it once the startup has
|
||||
// completed and Postgres is up and running.
|
||||
//
|
||||
// If this pod is pre-created without binding it to any particular endpoint
|
||||
// yet, this isn't the right place to enter the startup context. In that
|
||||
// case, the control plane should pass the tracing context as part of the
|
||||
// /configure API call.
|
||||
//
|
||||
// NOTE: This is supposed to only cover the *startup* actions. Once
|
||||
// postgres is configured and up-and-running, we exit this span. Any other
|
||||
// actions that are performed on incoming HTTP requests, for example, are
|
||||
// performed in separate spans.
|
||||
//
|
||||
// XXX: If the pod is restarted, we perform the startup actions in the same
|
||||
// context as the original startup actions, which probably doesn't make
|
||||
// sense.
|
||||
let mut startup_tracing_carrier: HashMap<String, String> = HashMap::new();
|
||||
if let Ok(val) = std::env::var("TRACEPARENT") {
|
||||
startup_tracing_carrier.insert("traceparent".to_string(), val);
|
||||
}
|
||||
if let Ok(val) = std::env::var("TRACESTATE") {
|
||||
startup_tracing_carrier.insert("tracestate".to_string(), val);
|
||||
}
|
||||
if !startup_tracing_carrier.is_empty() {
|
||||
use opentelemetry::propagation::TextMapPropagator;
|
||||
use opentelemetry_sdk::propagation::TraceContextPropagator;
|
||||
info!("got startup tracing context from env variables");
|
||||
Some(TraceContextPropagator::new().extract(&startup_tracing_carrier))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,15 +1,17 @@
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
use std::time::{Duration, SystemTime};
|
||||
|
||||
use anyhow::{Result, bail};
|
||||
use compute_api::spec::ComputeMode;
|
||||
use anyhow::bail;
|
||||
use anyhow::Result;
|
||||
use postgres::{NoTls, SimpleQueryMessage};
|
||||
use std::time::SystemTime;
|
||||
use std::{str::FromStr, sync::Arc, thread, time::Duration};
|
||||
use utils::id::TenantId;
|
||||
use utils::id::TimelineId;
|
||||
|
||||
use compute_api::spec::ComputeMode;
|
||||
use tracing::{info, warn};
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
use utils::shard::{ShardCount, ShardNumber, TenantShardId};
|
||||
use utils::{
|
||||
lsn::Lsn,
|
||||
shard::{ShardCount, ShardNumber, TenantShardId},
|
||||
};
|
||||
|
||||
use crate::compute::ComputeNode;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use metrics::core::Collector;
|
||||
use metrics::proto::MetricFamily;
|
||||
use metrics::{IntCounterVec, UIntGaugeVec, register_int_counter_vec, register_uint_gauge_vec};
|
||||
use metrics::{register_int_counter_vec, register_uint_gauge_vec, IntCounterVec, UIntGaugeVec};
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
pub(crate) static INSTALLED_EXTENSIONS: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
use std::{thread, time::Duration};
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use compute_api::spec::ComputeFeature;
|
||||
use postgres::{Client, NoTls};
|
||||
use tracing::{debug, error, info, warn};
|
||||
|
||||
use crate::compute::ComputeNode;
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use compute_api::spec::ComputeFeature;
|
||||
|
||||
const MONITOR_CHECK_INTERVAL: Duration = Duration::from_millis(500);
|
||||
|
||||
@@ -18,7 +17,7 @@ const MONITOR_CHECK_INTERVAL: Duration = Duration::from_millis(500);
|
||||
// should be handled gracefully.
|
||||
fn watch_compute_activity(compute: &ComputeNode) {
|
||||
// Suppose that `connstr` doesn't change
|
||||
let connstr = compute.params.connstr.clone();
|
||||
let connstr = compute.connstr.clone();
|
||||
let conf = compute.get_conn_conf(Some("compute_ctl:activity_monitor"));
|
||||
|
||||
// During startup and configuration we connect to every Postgres database,
|
||||
|
||||
@@ -9,8 +9,7 @@ use std::process::Child;
|
||||
use std::str::FromStr;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use anyhow::{Result, bail};
|
||||
use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};
|
||||
use anyhow::{bail, Result};
|
||||
use futures::StreamExt;
|
||||
use ini::Ini;
|
||||
use notify::{RecursiveMode, Watcher};
|
||||
@@ -22,6 +21,8 @@ use tokio_postgres;
|
||||
use tokio_postgres::NoTls;
|
||||
use tracing::{debug, error, info, instrument};
|
||||
|
||||
use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};
|
||||
|
||||
const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds
|
||||
|
||||
/// Escape a string for including it in a SQL literal.
|
||||
|
||||
@@ -1,21 +1,21 @@
|
||||
use anyhow::{anyhow, bail, Result};
|
||||
use reqwest::StatusCode;
|
||||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::{Result, anyhow, bail};
|
||||
use compute_api::responses::{
|
||||
ComputeCtlConfig, ControlPlaneComputeStatus, ControlPlaneSpecResponse,
|
||||
};
|
||||
use compute_api::spec::ComputeSpec;
|
||||
use reqwest::StatusCode;
|
||||
use tokio_postgres::Client;
|
||||
use tracing::{error, info, instrument, warn};
|
||||
|
||||
use crate::config;
|
||||
use crate::metrics::{CPLANE_REQUESTS_TOTAL, CPlaneRequestRPC, UNKNOWN_HTTP_STATUS};
|
||||
use crate::metrics::{CPlaneRequestRPC, CPLANE_REQUESTS_TOTAL, UNKNOWN_HTTP_STATUS};
|
||||
use crate::migration::MigrationRunner;
|
||||
use crate::params::PG_HBA_ALL_MD5;
|
||||
use crate::pg_helpers::*;
|
||||
|
||||
use compute_api::responses::{
|
||||
ComputeCtlConfig, ControlPlaneComputeStatus, ControlPlaneSpecResponse,
|
||||
};
|
||||
use compute_api::spec::ComputeSpec;
|
||||
|
||||
// Do control plane request and return response if any. In case of error it
|
||||
// returns a bool flag indicating whether it makes sense to retry the request
|
||||
// and a string with error message.
|
||||
@@ -141,6 +141,7 @@ pub fn get_spec_from_control_plane(
|
||||
/// Check `pg_hba.conf` and update if needed to allow external connections.
|
||||
pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> {
|
||||
// XXX: consider making it a part of spec.json
|
||||
info!("checking pg_hba.conf");
|
||||
let pghba_path = pgdata_path.join("pg_hba.conf");
|
||||
|
||||
if config::line_in_file(&pghba_path, PG_HBA_ALL_MD5)? {
|
||||
@@ -155,11 +156,12 @@ pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> {
|
||||
/// Create a standby.signal file
|
||||
pub fn add_standby_signal(pgdata_path: &Path) -> Result<()> {
|
||||
// XXX: consider making it a part of spec.json
|
||||
info!("adding standby.signal");
|
||||
let signalfile = pgdata_path.join("standby.signal");
|
||||
|
||||
if !signalfile.exists() {
|
||||
File::create(signalfile)?;
|
||||
info!("created standby.signal");
|
||||
File::create(signalfile)?;
|
||||
} else {
|
||||
info!("reused pre-existing standby.signal");
|
||||
}
|
||||
@@ -168,6 +170,7 @@ pub fn add_standby_signal(pgdata_path: &Path) -> Result<()> {
|
||||
|
||||
#[instrument(skip_all)]
|
||||
pub async fn handle_neon_extension_upgrade(client: &mut Client) -> Result<()> {
|
||||
info!("handle neon extension upgrade");
|
||||
let query = "ALTER EXTENSION neon UPDATE";
|
||||
info!("update neon extension version with query: {}", query);
|
||||
client.simple_query(query).await?;
|
||||
|
||||
@@ -1,416 +1,18 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::future::Future;
|
||||
use std::iter::{empty, once};
|
||||
use std::iter::empty;
|
||||
use std::iter::once;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use crate::compute::construct_superuser_query;
|
||||
use crate::pg_helpers::{escape_literal, DatabaseExt, Escaping, GenericOptionsSearch, RoleExt};
|
||||
use anyhow::{bail, Result};
|
||||
use compute_api::spec::{ComputeFeature, ComputeSpec, Database, PgIdent, Role};
|
||||
use futures::future::join_all;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio_postgres::Client;
|
||||
use tokio_postgres::error::SqlState;
|
||||
use tracing::{Instrument, debug, error, info, info_span, instrument, warn};
|
||||
|
||||
use crate::compute::{ComputeNode, ComputeState, construct_superuser_query};
|
||||
use crate::pg_helpers::{
|
||||
DatabaseExt, Escaping, GenericOptionsSearch, RoleExt, escape_literal, get_existing_dbs_async,
|
||||
get_existing_roles_async,
|
||||
};
|
||||
use crate::spec_apply::ApplySpecPhase::{
|
||||
CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateSchemaNeon,
|
||||
CreateSuperUser, DropInvalidDatabases, DropRoles, FinalizeDropLogicalSubscriptions,
|
||||
HandleNeonExtension, HandleOtherExtensions, RenameAndDeleteDatabases, RenameRoles,
|
||||
RunInEachDatabase,
|
||||
};
|
||||
use crate::spec_apply::PerDatabasePhase::{
|
||||
ChangeSchemaPerms, DeleteDBRoleReferences, DropLogicalSubscriptions, HandleAnonExtension,
|
||||
};
|
||||
|
||||
impl ComputeNode {
|
||||
/// Apply the spec to the running PostgreSQL instance.
|
||||
/// The caller can decide to run with multiple clients in parallel, or
|
||||
/// single mode. Either way, the commands executed will be the same, and
|
||||
/// only commands run in different databases are parallelized.
|
||||
#[instrument(skip_all)]
|
||||
pub fn apply_spec_sql(
|
||||
&self,
|
||||
spec: Arc<ComputeSpec>,
|
||||
conf: Arc<tokio_postgres::Config>,
|
||||
concurrency: usize,
|
||||
) -> Result<()> {
|
||||
info!("Applying config with max {} concurrency", concurrency);
|
||||
debug!("Config: {:?}", spec);
|
||||
|
||||
let rt = tokio::runtime::Handle::current();
|
||||
rt.block_on(async {
|
||||
// Proceed with post-startup configuration. Note, that order of operations is important.
|
||||
let client = Self::get_maintenance_client(&conf).await?;
|
||||
let spec = spec.clone();
|
||||
|
||||
let databases = get_existing_dbs_async(&client).await?;
|
||||
let roles = get_existing_roles_async(&client)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|role| (role.name.clone(), role))
|
||||
.collect::<HashMap<String, Role>>();
|
||||
|
||||
// Check if we need to drop subscriptions before starting the endpoint.
|
||||
//
|
||||
// It is important to do this operation exactly once when endpoint starts on a new branch.
|
||||
// Otherwise, we may drop not inherited, but newly created subscriptions.
|
||||
//
|
||||
// We cannot rely only on spec.drop_subscriptions_before_start flag,
|
||||
// because if for some reason compute restarts inside VM,
|
||||
// it will start again with the same spec and flag value.
|
||||
//
|
||||
// To handle this, we save the fact of the operation in the database
|
||||
// in the neon.drop_subscriptions_done table.
|
||||
// If the table does not exist, we assume that the operation was never performed, so we must do it.
|
||||
// If table exists, we check if the operation was performed on the current timelilne.
|
||||
//
|
||||
let mut drop_subscriptions_done = false;
|
||||
|
||||
if spec.drop_subscriptions_before_start {
|
||||
let timeline_id = self.get_timeline_id().context("timeline_id must be set")?;
|
||||
let query = format!("select 1 from neon.drop_subscriptions_done where timeline_id = '{}'", timeline_id);
|
||||
|
||||
info!("Checking if drop subscription operation was already performed for timeline_id: {}", timeline_id);
|
||||
|
||||
drop_subscriptions_done = match
|
||||
client.simple_query(&query).await {
|
||||
Ok(result) => {
|
||||
matches!(&result[0], postgres::SimpleQueryMessage::Row(_))
|
||||
},
|
||||
Err(e) =>
|
||||
{
|
||||
match e.code() {
|
||||
Some(&SqlState::UNDEFINED_TABLE) => false,
|
||||
_ => {
|
||||
// We don't expect any other error here, except for the schema/table not existing
|
||||
error!("Error checking if drop subscription operation was already performed: {}", e);
|
||||
return Err(e.into());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
let jwks_roles = Arc::new(
|
||||
spec.as_ref()
|
||||
.local_proxy_config
|
||||
.iter()
|
||||
.flat_map(|it| &it.jwks)
|
||||
.flatten()
|
||||
.flat_map(|setting| &setting.role_names)
|
||||
.cloned()
|
||||
.collect::<HashSet<_>>(),
|
||||
);
|
||||
|
||||
let ctx = Arc::new(tokio::sync::RwLock::new(MutableApplyContext {
|
||||
roles,
|
||||
dbs: databases,
|
||||
}));
|
||||
|
||||
// Apply special pre drop database phase.
|
||||
// NOTE: we use the code of RunInEachDatabase phase for parallelism
|
||||
// and connection management, but we don't really run it in *each* database,
|
||||
// only in databases, we're about to drop.
|
||||
info!("Applying PerDatabase (pre-dropdb) phase");
|
||||
let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency));
|
||||
|
||||
// Run the phase for each database that we're about to drop.
|
||||
let db_processes = spec
|
||||
.delta_operations
|
||||
.iter()
|
||||
.flatten()
|
||||
.filter_map(move |op| {
|
||||
if op.action.as_str() == "delete_db" {
|
||||
Some(op.name.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.map(|dbname| {
|
||||
let spec = spec.clone();
|
||||
let ctx = ctx.clone();
|
||||
let jwks_roles = jwks_roles.clone();
|
||||
let mut conf = conf.as_ref().clone();
|
||||
let concurrency_token = concurrency_token.clone();
|
||||
// We only need dbname field for this phase, so set other fields to dummy values
|
||||
let db = DB::UserDB(Database {
|
||||
name: dbname.clone(),
|
||||
owner: "cloud_admin".to_string(),
|
||||
options: None,
|
||||
restrict_conn: false,
|
||||
invalid: false,
|
||||
});
|
||||
|
||||
debug!("Applying per-database phases for Database {:?}", &db);
|
||||
|
||||
match &db {
|
||||
DB::SystemDB => {}
|
||||
DB::UserDB(db) => {
|
||||
conf.dbname(db.name.as_str());
|
||||
}
|
||||
}
|
||||
|
||||
let conf = Arc::new(conf);
|
||||
let fut = Self::apply_spec_sql_db(
|
||||
spec.clone(),
|
||||
conf,
|
||||
ctx.clone(),
|
||||
jwks_roles.clone(),
|
||||
concurrency_token.clone(),
|
||||
db,
|
||||
[DropLogicalSubscriptions].to_vec(),
|
||||
);
|
||||
|
||||
Ok(tokio::spawn(fut))
|
||||
})
|
||||
.collect::<Vec<Result<_, anyhow::Error>>>();
|
||||
|
||||
for process in db_processes.into_iter() {
|
||||
let handle = process?;
|
||||
if let Err(e) = handle.await? {
|
||||
// Handle the error case where the database does not exist
|
||||
// We do not check whether the DB exists or not in the deletion phase,
|
||||
// so we shouldn't be strict about it in pre-deletion cleanup as well.
|
||||
if e.to_string().contains("does not exist") {
|
||||
warn!("Error dropping subscription: {}", e);
|
||||
} else {
|
||||
return Err(e);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
for phase in [
|
||||
CreateSuperUser,
|
||||
DropInvalidDatabases,
|
||||
RenameRoles,
|
||||
CreateAndAlterRoles,
|
||||
RenameAndDeleteDatabases,
|
||||
CreateAndAlterDatabases,
|
||||
CreateSchemaNeon,
|
||||
] {
|
||||
info!("Applying phase {:?}", &phase);
|
||||
apply_operations(
|
||||
spec.clone(),
|
||||
ctx.clone(),
|
||||
jwks_roles.clone(),
|
||||
phase,
|
||||
|| async { Ok(&client) },
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
info!("Applying RunInEachDatabase2 phase");
|
||||
let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency));
|
||||
|
||||
let db_processes = spec
|
||||
.cluster
|
||||
.databases
|
||||
.iter()
|
||||
.map(|db| DB::new(db.clone()))
|
||||
// include
|
||||
.chain(once(DB::SystemDB))
|
||||
.map(|db| {
|
||||
let spec = spec.clone();
|
||||
let ctx = ctx.clone();
|
||||
let jwks_roles = jwks_roles.clone();
|
||||
let mut conf = conf.as_ref().clone();
|
||||
let concurrency_token = concurrency_token.clone();
|
||||
let db = db.clone();
|
||||
|
||||
debug!("Applying per-database phases for Database {:?}", &db);
|
||||
|
||||
match &db {
|
||||
DB::SystemDB => {}
|
||||
DB::UserDB(db) => {
|
||||
conf.dbname(db.name.as_str());
|
||||
}
|
||||
}
|
||||
|
||||
let conf = Arc::new(conf);
|
||||
let mut phases = vec![
|
||||
DeleteDBRoleReferences,
|
||||
ChangeSchemaPerms,
|
||||
HandleAnonExtension,
|
||||
];
|
||||
|
||||
if spec.drop_subscriptions_before_start && !drop_subscriptions_done {
|
||||
info!("Adding DropLogicalSubscriptions phase because drop_subscriptions_before_start is set");
|
||||
phases.push(DropLogicalSubscriptions);
|
||||
}
|
||||
|
||||
let fut = Self::apply_spec_sql_db(
|
||||
spec.clone(),
|
||||
conf,
|
||||
ctx.clone(),
|
||||
jwks_roles.clone(),
|
||||
concurrency_token.clone(),
|
||||
db,
|
||||
phases,
|
||||
);
|
||||
|
||||
Ok(tokio::spawn(fut))
|
||||
})
|
||||
.collect::<Vec<Result<_, anyhow::Error>>>();
|
||||
|
||||
for process in db_processes.into_iter() {
|
||||
let handle = process?;
|
||||
handle.await??;
|
||||
}
|
||||
|
||||
let mut phases = vec![
|
||||
HandleOtherExtensions,
|
||||
HandleNeonExtension, // This step depends on CreateSchemaNeon
|
||||
CreateAvailabilityCheck,
|
||||
DropRoles,
|
||||
];
|
||||
|
||||
// This step depends on CreateSchemaNeon
|
||||
if spec.drop_subscriptions_before_start && !drop_subscriptions_done {
|
||||
info!("Adding FinalizeDropLogicalSubscriptions phase because drop_subscriptions_before_start is set");
|
||||
phases.push(FinalizeDropLogicalSubscriptions);
|
||||
}
|
||||
|
||||
for phase in phases {
|
||||
debug!("Applying phase {:?}", &phase);
|
||||
apply_operations(
|
||||
spec.clone(),
|
||||
ctx.clone(),
|
||||
jwks_roles.clone(),
|
||||
phase,
|
||||
|| async { Ok(&client) },
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
Ok::<(), anyhow::Error>(())
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Apply SQL migrations of the RunInEachDatabase phase.
|
||||
///
|
||||
/// May opt to not connect to databases that don't have any scheduled
|
||||
/// operations. The function is concurrency-controlled with the provided
|
||||
/// semaphore. The caller has to make sure the semaphore isn't exhausted.
|
||||
async fn apply_spec_sql_db(
|
||||
spec: Arc<ComputeSpec>,
|
||||
conf: Arc<tokio_postgres::Config>,
|
||||
ctx: Arc<tokio::sync::RwLock<MutableApplyContext>>,
|
||||
jwks_roles: Arc<HashSet<String>>,
|
||||
concurrency_token: Arc<tokio::sync::Semaphore>,
|
||||
db: DB,
|
||||
subphases: Vec<PerDatabasePhase>,
|
||||
) -> Result<()> {
|
||||
let _permit = concurrency_token.acquire().await?;
|
||||
|
||||
let mut client_conn = None;
|
||||
|
||||
for subphase in subphases {
|
||||
apply_operations(
|
||||
spec.clone(),
|
||||
ctx.clone(),
|
||||
jwks_roles.clone(),
|
||||
RunInEachDatabase {
|
||||
db: db.clone(),
|
||||
subphase,
|
||||
},
|
||||
// Only connect if apply_operation actually wants a connection.
|
||||
// It's quite possible this database doesn't need any queries,
|
||||
// so by not connecting we save time and effort connecting to
|
||||
// that database.
|
||||
|| async {
|
||||
if client_conn.is_none() {
|
||||
let db_client = Self::get_maintenance_client(&conf).await?;
|
||||
client_conn.replace(db_client);
|
||||
}
|
||||
let client = client_conn.as_ref().unwrap();
|
||||
Ok(client)
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
drop(client_conn);
|
||||
|
||||
Ok::<(), anyhow::Error>(())
|
||||
}
|
||||
|
||||
/// Choose how many concurrent connections to use for applying the spec changes.
|
||||
pub fn max_service_connections(
|
||||
&self,
|
||||
compute_state: &ComputeState,
|
||||
spec: &ComputeSpec,
|
||||
) -> usize {
|
||||
// If the cluster is in Init state we don't have to deal with user connections,
|
||||
// and can thus use all `max_connections` connection slots. However, that's generally not
|
||||
// very efficient, so we generally still limit it to a smaller number.
|
||||
if compute_state.status == ComputeStatus::Init {
|
||||
// If the settings contain 'max_connections', use that as template
|
||||
if let Some(config) = spec.cluster.settings.find("max_connections") {
|
||||
config.parse::<usize>().ok()
|
||||
} else {
|
||||
// Otherwise, try to find the setting in the postgresql_conf string
|
||||
spec.cluster
|
||||
.postgresql_conf
|
||||
.iter()
|
||||
.flat_map(|conf| conf.split("\n"))
|
||||
.filter_map(|line| {
|
||||
if !line.contains("max_connections") {
|
||||
return None;
|
||||
}
|
||||
|
||||
let (key, value) = line.split_once("=")?;
|
||||
let key = key
|
||||
.trim_start_matches(char::is_whitespace)
|
||||
.trim_end_matches(char::is_whitespace);
|
||||
|
||||
let value = value
|
||||
.trim_start_matches(char::is_whitespace)
|
||||
.trim_end_matches(char::is_whitespace);
|
||||
|
||||
if key != "max_connections" {
|
||||
return None;
|
||||
}
|
||||
|
||||
value.parse::<usize>().ok()
|
||||
})
|
||||
.next()
|
||||
}
|
||||
// If max_connections is present, use at most 1/3rd of that.
|
||||
// When max_connections is lower than 30, try to use at least 10 connections, but
|
||||
// never more than max_connections.
|
||||
.map(|limit| match limit {
|
||||
0..10 => limit,
|
||||
10..30 => 10,
|
||||
30.. => limit / 3,
|
||||
})
|
||||
// If we didn't find max_connections, default to 10 concurrent connections.
|
||||
.unwrap_or(10)
|
||||
} else {
|
||||
// state == Running
|
||||
// Because the cluster is already in the Running state, we should assume users are
|
||||
// already connected to the cluster, and high concurrency could negatively
|
||||
// impact user connectivity. Therefore, we can limit concurrency to the number of
|
||||
// reserved superuser connections, which users wouldn't be able to use anyway.
|
||||
spec.cluster
|
||||
.settings
|
||||
.find("superuser_reserved_connections")
|
||||
.iter()
|
||||
.filter_map(|val| val.parse::<usize>().ok())
|
||||
.map(|val| if val > 1 { val - 1 } else { 1 })
|
||||
.last()
|
||||
.unwrap_or(3)
|
||||
}
|
||||
}
|
||||
}
|
||||
use tracing::{debug, info_span, Instrument};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum DB {
|
||||
@@ -445,11 +47,6 @@ pub enum PerDatabasePhase {
|
||||
DeleteDBRoleReferences,
|
||||
ChangeSchemaPerms,
|
||||
HandleAnonExtension,
|
||||
/// This is a shared phase, used for both i) dropping dangling LR subscriptions
|
||||
/// before dropping the DB, and ii) dropping all subscriptions after creating
|
||||
/// a fresh branch.
|
||||
/// N.B. we will skip all DBs that are not present in Postgres, invalid, or
|
||||
/// have `datallowconn = false` (`restrict_conn`).
|
||||
DropLogicalSubscriptions,
|
||||
}
|
||||
|
||||
@@ -571,7 +168,7 @@ where
|
||||
///
|
||||
/// In the future we may generate a single stream of changes and then
|
||||
/// sort/merge/batch execution, but for now this is a nice way to improve
|
||||
/// batching behavior of the commands.
|
||||
/// batching behaviour of the commands.
|
||||
async fn get_operations<'a>(
|
||||
spec: &'a ComputeSpec,
|
||||
ctx: &'a RwLock<MutableApplyContext>,
|
||||
@@ -854,41 +451,6 @@ async fn get_operations<'a>(
|
||||
)),
|
||||
}))),
|
||||
ApplySpecPhase::RunInEachDatabase { db, subphase } => {
|
||||
// Do some checks that user DB exists and we can access it.
|
||||
//
|
||||
// During the phases like DropLogicalSubscriptions, DeleteDBRoleReferences,
|
||||
// which happen before dropping the DB, the current run could be a retry,
|
||||
// so it's a valid case when DB is absent already. The case of
|
||||
// `pg_database.datallowconn = false`/`restrict_conn` is a bit tricky, as
|
||||
// in theory user can have some dangling objects there, so we will fail at
|
||||
// the actual drop later. Yet, to fix that in the current code we would need
|
||||
// to ALTER DATABASE, and then check back, but that even more invasive, so
|
||||
// that's not what we really want to do here.
|
||||
//
|
||||
// For ChangeSchemaPerms, skipping DBs we cannot access is totally fine.
|
||||
if let DB::UserDB(db) = db {
|
||||
let databases = &ctx.read().await.dbs;
|
||||
|
||||
let edb = match databases.get(&db.name) {
|
||||
Some(edb) => edb,
|
||||
None => {
|
||||
warn!(
|
||||
"skipping RunInEachDatabase phase {:?}, database {} doesn't exist in PostgreSQL",
|
||||
subphase, db.name
|
||||
);
|
||||
return Ok(Box::new(empty()));
|
||||
}
|
||||
};
|
||||
|
||||
if edb.restrict_conn || edb.invalid {
|
||||
warn!(
|
||||
"skipping RunInEachDatabase phase {:?}, database {} is (restrict_conn={}, invalid={})",
|
||||
subphase, db.name, edb.restrict_conn, edb.invalid
|
||||
);
|
||||
return Ok(Box::new(empty()));
|
||||
}
|
||||
}
|
||||
|
||||
match subphase {
|
||||
PerDatabasePhase::DropLogicalSubscriptions => {
|
||||
match &db {
|
||||
@@ -968,12 +530,25 @@ async fn get_operations<'a>(
|
||||
Ok(Box::new(operations))
|
||||
}
|
||||
PerDatabasePhase::ChangeSchemaPerms => {
|
||||
let ctx = ctx.read().await;
|
||||
let databases = &ctx.dbs;
|
||||
|
||||
let db = match &db {
|
||||
// ignore schema permissions on the system database
|
||||
DB::SystemDB => return Ok(Box::new(empty())),
|
||||
DB::UserDB(db) => db,
|
||||
};
|
||||
|
||||
if databases.get(&db.name).is_none() {
|
||||
bail!("database {} doesn't exist in PostgreSQL", db.name);
|
||||
}
|
||||
|
||||
let edb = databases.get(&db.name).unwrap();
|
||||
|
||||
if edb.restrict_conn || edb.invalid {
|
||||
return Ok(Box::new(empty()));
|
||||
}
|
||||
|
||||
let operations = vec![
|
||||
Operation {
|
||||
query: format!(
|
||||
@@ -991,7 +566,6 @@ async fn get_operations<'a>(
|
||||
|
||||
Ok(Box::new(operations))
|
||||
}
|
||||
// TODO: remove this completely https://github.com/neondatabase/cloud/issues/22663
|
||||
PerDatabasePhase::HandleAnonExtension => {
|
||||
// Only install Anon into user databases
|
||||
let db = match &db {
|
||||
|
||||
@@ -2,7 +2,6 @@ DO $$
|
||||
DECLARE
|
||||
subname TEXT;
|
||||
BEGIN
|
||||
LOCK TABLE pg_subscription IN ACCESS EXCLUSIVE MODE;
|
||||
FOR subname IN SELECT pg_subscription.subname FROM pg_subscription WHERE subdbid = (SELECT oid FROM pg_database WHERE datname = {datname_str}) LOOP
|
||||
EXECUTE format('ALTER SUBSCRIPTION %I DISABLE;', subname);
|
||||
EXECUTE format('ALTER SUBSCRIPTION %I SET (slot_name = NONE);', subname);
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::{Context, anyhow};
|
||||
use tracing::{instrument, warn};
|
||||
use anyhow::{anyhow, Context};
|
||||
use tracing::warn;
|
||||
|
||||
pub const RESIZE_SWAP_BIN: &str = "/neonvm/bin/resize-swap";
|
||||
|
||||
#[instrument]
|
||||
pub fn resize_swap(size_bytes: u64) -> anyhow::Result<()> {
|
||||
// run `/neonvm/bin/resize-swap --once {size_bytes}`
|
||||
//
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#[cfg(test)]
|
||||
mod config_tests {
|
||||
|
||||
use std::fs::{File, remove_file};
|
||||
use std::fs::{remove_file, File};
|
||||
use std::io::{Read, Write};
|
||||
use std::path::Path;
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ use anyhow::Context;
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use nix::errno::Errno;
|
||||
use nix::fcntl::{FcntlArg, FdFlag};
|
||||
use nix::sys::signal::{Signal, kill};
|
||||
use nix::sys::signal::{kill, Signal};
|
||||
use nix::unistd::Pid;
|
||||
use utils::pid_file::{self, PidFileRead};
|
||||
|
||||
|
||||
@@ -5,16 +5,7 @@
|
||||
//! easier to work with locally. The python tests in `test_runner`
|
||||
//! rely on `neon_local` to set up the environment for each test.
|
||||
//!
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{BTreeSet, HashMap};
|
||||
use std::fs::File;
|
||||
use std::os::fd::AsRawFd;
|
||||
use std::path::PathBuf;
|
||||
use std::process::exit;
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::{Context, Result, anyhow, bail};
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use clap::Parser;
|
||||
use compute_api::spec::ComputeMode;
|
||||
use control_plane::endpoint::ComputeControlPlane;
|
||||
@@ -28,7 +19,7 @@ use control_plane::storage_controller::{
|
||||
NeonStorageControllerStartArgs, NeonStorageControllerStopArgs, StorageController,
|
||||
};
|
||||
use control_plane::{broker, local_env};
|
||||
use nix::fcntl::{FlockArg, flock};
|
||||
use nix::fcntl::{flock, FlockArg};
|
||||
use pageserver_api::config::{
|
||||
DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
|
||||
DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT,
|
||||
@@ -40,18 +31,27 @@ use pageserver_api::models::{ShardParameters, TimelineCreateRequest, TimelineInf
|
||||
use pageserver_api::shard::{ShardCount, ShardStripeSize, TenantShardId};
|
||||
use postgres_backend::AuthType;
|
||||
use postgres_connection::parse_host_port;
|
||||
use safekeeper_api::membership::SafekeeperGeneration;
|
||||
use safekeeper_api::{
|
||||
DEFAULT_HTTP_LISTEN_PORT as DEFAULT_SAFEKEEPER_HTTP_PORT,
|
||||
DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT,
|
||||
};
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{BTreeSet, HashMap};
|
||||
use std::fs::File;
|
||||
use std::os::fd::AsRawFd;
|
||||
use std::path::PathBuf;
|
||||
use std::process::exit;
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
use storage_broker::DEFAULT_LISTEN_ADDR as DEFAULT_BROKER_ADDR;
|
||||
use tokio::task::JoinSet;
|
||||
use url::Host;
|
||||
use utils::auth::{Claims, Scope};
|
||||
use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
use utils::project_git_version;
|
||||
use utils::{
|
||||
auth::{Claims, Scope},
|
||||
id::{NodeId, TenantId, TenantTimelineId, TimelineId},
|
||||
lsn::Lsn,
|
||||
project_git_version,
|
||||
};
|
||||
|
||||
// Default id of a safekeeper node, if not specified on the command line.
|
||||
const DEFAULT_SAFEKEEPER_ID: NodeId = NodeId(1);
|
||||
@@ -597,15 +597,7 @@ struct EndpointStartCmdArgs {
|
||||
#[clap(long = "pageserver-id")]
|
||||
endpoint_pageserver_id: Option<NodeId>,
|
||||
|
||||
#[clap(
|
||||
long,
|
||||
help = "Safekeepers membership generation to prefix neon.safekeepers with. Normally neon_local sets it on its own, but this option allows to override. Non zero value forces endpoint to use membership configurations."
|
||||
)]
|
||||
safekeepers_generation: Option<u32>,
|
||||
#[clap(
|
||||
long,
|
||||
help = "List of safekeepers endpoint will talk to. Normally neon_local chooses them on its own, but this option allows to override."
|
||||
)]
|
||||
#[clap(long)]
|
||||
safekeepers: Option<String>,
|
||||
|
||||
#[clap(
|
||||
@@ -626,9 +618,9 @@ struct EndpointStartCmdArgs {
|
||||
)]
|
||||
allow_multiple: bool,
|
||||
|
||||
#[clap(short = 't', long, value_parser= humantime::parse_duration, help = "timeout until we fail the command")]
|
||||
#[arg(default_value = "90s")]
|
||||
start_timeout: Duration,
|
||||
#[clap(short = 't', long, help = "timeout until we fail the command")]
|
||||
#[arg(default_value = "10s")]
|
||||
start_timeout: humantime::Duration,
|
||||
}
|
||||
|
||||
#[derive(clap::Args)]
|
||||
@@ -895,6 +887,20 @@ fn print_timeline(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns a map of timeline IDs to timeline_id@lsn strings.
|
||||
/// Connects to the pageserver to query this information.
|
||||
async fn get_timeline_infos(
|
||||
env: &local_env::LocalEnv,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
) -> Result<HashMap<TimelineId, TimelineInfo>> {
|
||||
Ok(get_default_pageserver(env)
|
||||
.timeline_list(tenant_shard_id)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|timeline_info| (timeline_info.timeline_id, timeline_info))
|
||||
.collect())
|
||||
}
|
||||
|
||||
/// Helper function to get tenant id from an optional --tenant_id option or from the config file
|
||||
fn get_tenant_id(
|
||||
tenant_id_arg: Option<TenantId>,
|
||||
@@ -929,9 +935,7 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result<LocalEnv> {
|
||||
let init_conf: NeonLocalInitConf = if let Some(config_path) = &args.config {
|
||||
// User (likely the Python test suite) provided a description of the environment.
|
||||
if args.num_pageservers.is_some() {
|
||||
bail!(
|
||||
"Cannot specify both --num-pageservers and --config, use key `pageservers` in the --config file instead"
|
||||
);
|
||||
bail!("Cannot specify both --num-pageservers and --config, use key `pageservers` in the --config file instead");
|
||||
}
|
||||
// load and parse the file
|
||||
let contents = std::fs::read_to_string(config_path).with_context(|| {
|
||||
@@ -1247,6 +1251,12 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
// TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the storage controller
|
||||
// where shard 0 is attached, and query there.
|
||||
let tenant_shard_id = get_tenant_shard_id(args.tenant_shard_id, env)?;
|
||||
let timeline_infos = get_timeline_infos(env, &tenant_shard_id)
|
||||
.await
|
||||
.unwrap_or_else(|e| {
|
||||
eprintln!("Failed to load timeline info: {}", e);
|
||||
HashMap::new()
|
||||
});
|
||||
|
||||
let timeline_name_mappings = env.timeline_name_mappings();
|
||||
|
||||
@@ -1275,9 +1285,12 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
lsn.to_string()
|
||||
}
|
||||
_ => {
|
||||
// As the LSN here refers to the one that the compute is started with,
|
||||
// we display nothing as it is a primary/hot standby compute.
|
||||
"---".to_string()
|
||||
// -> primary endpoint or hot replica
|
||||
// Use the LSN at the end of the timeline.
|
||||
timeline_infos
|
||||
.get(&endpoint.timeline_id)
|
||||
.map(|bi| bi.last_record_lsn.to_string())
|
||||
.unwrap_or_else(|| "?".to_string())
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1325,14 +1338,10 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
|
||||
match (mode, args.hot_standby) {
|
||||
(ComputeMode::Static(_), true) => {
|
||||
bail!(
|
||||
"Cannot start a node in hot standby mode when it is already configured as a static replica"
|
||||
)
|
||||
bail!("Cannot start a node in hot standby mode when it is already configured as a static replica")
|
||||
}
|
||||
(ComputeMode::Primary, true) => {
|
||||
bail!(
|
||||
"Cannot start a node as a hot standby replica, it is already configured as primary node"
|
||||
)
|
||||
bail!("Cannot start a node as a hot standby replica, it is already configured as primary node")
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
@@ -1359,7 +1368,6 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
let pageserver_id = args.endpoint_pageserver_id;
|
||||
let remote_ext_config = &args.remote_ext_config;
|
||||
|
||||
let safekeepers_generation = args.safekeepers_generation.map(SafekeeperGeneration::new);
|
||||
// If --safekeepers argument is given, use only the listed
|
||||
// safekeeper nodes; otherwise all from the env.
|
||||
let safekeepers = if let Some(safekeepers) = parse_safekeepers(&args.safekeepers)? {
|
||||
@@ -1435,13 +1443,11 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
endpoint
|
||||
.start(
|
||||
&auth_token,
|
||||
safekeepers_generation,
|
||||
safekeepers,
|
||||
pageservers,
|
||||
remote_ext_config.as_ref(),
|
||||
stripe_size.0 as usize,
|
||||
args.create_test_user,
|
||||
args.start_timeout,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::Context;
|
||||
|
||||
use camino::Utf8PathBuf;
|
||||
|
||||
use crate::{background_process, local_env};
|
||||
|
||||
@@ -37,25 +37,28 @@
|
||||
//! ```
|
||||
//!
|
||||
use std::collections::BTreeMap;
|
||||
use std::net::{IpAddr, Ipv4Addr, SocketAddr, TcpStream};
|
||||
use std::net::IpAddr;
|
||||
use std::net::Ipv4Addr;
|
||||
use std::net::SocketAddr;
|
||||
use std::net::TcpStream;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::{Context, Result, anyhow, bail};
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use compute_api::requests::ConfigurationRequest;
|
||||
use compute_api::responses::{ComputeCtlConfig, ComputeStatus, ComputeStatusResponse};
|
||||
use compute_api::spec::{
|
||||
Cluster, ComputeFeature, ComputeMode, ComputeSpec, Database, PgIdent, RemoteExtSpec, Role,
|
||||
};
|
||||
use nix::sys::signal::{Signal, kill};
|
||||
use compute_api::responses::ComputeCtlConfig;
|
||||
use compute_api::spec::Database;
|
||||
use compute_api::spec::PgIdent;
|
||||
use compute_api::spec::RemoteExtSpec;
|
||||
use compute_api::spec::Role;
|
||||
use nix::sys::signal::kill;
|
||||
use nix::sys::signal::Signal;
|
||||
use pageserver_api::shard::ShardStripeSize;
|
||||
use reqwest::header::CONTENT_TYPE;
|
||||
use safekeeper_api::membership::SafekeeperGeneration;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::debug;
|
||||
use url::Host;
|
||||
use utils::id::{NodeId, TenantId, TimelineId};
|
||||
|
||||
@@ -63,6 +66,9 @@ use crate::local_env::LocalEnv;
|
||||
use crate::postgresql_conf::PostgresConf;
|
||||
use crate::storage_controller::StorageController;
|
||||
|
||||
use compute_api::responses::{ComputeStatus, ComputeStatusResponse};
|
||||
use compute_api::spec::{Cluster, ComputeFeature, ComputeMode, ComputeSpec};
|
||||
|
||||
// contents of a endpoint.json file
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
pub struct EndpointConf {
|
||||
@@ -75,10 +81,8 @@ pub struct EndpointConf {
|
||||
internal_http_port: u16,
|
||||
pg_version: u32,
|
||||
skip_pg_catalog_updates: bool,
|
||||
reconfigure_concurrency: usize,
|
||||
drop_subscriptions_before_start: bool,
|
||||
features: Vec<ComputeFeature>,
|
||||
cluster: Option<Cluster>,
|
||||
}
|
||||
|
||||
//
|
||||
@@ -175,9 +179,7 @@ impl ComputeControlPlane {
|
||||
// we also skip catalog updates in the cloud.
|
||||
skip_pg_catalog_updates,
|
||||
drop_subscriptions_before_start,
|
||||
reconfigure_concurrency: 1,
|
||||
features: vec![],
|
||||
cluster: None,
|
||||
});
|
||||
|
||||
ep.create_endpoint_dir()?;
|
||||
@@ -194,9 +196,7 @@ impl ComputeControlPlane {
|
||||
pg_version,
|
||||
skip_pg_catalog_updates,
|
||||
drop_subscriptions_before_start,
|
||||
reconfigure_concurrency: 1,
|
||||
features: vec![],
|
||||
cluster: None,
|
||||
})?,
|
||||
)?;
|
||||
std::fs::write(
|
||||
@@ -228,9 +228,7 @@ impl ComputeControlPlane {
|
||||
});
|
||||
|
||||
if let Some((key, _)) = duplicates.next() {
|
||||
bail!(
|
||||
"attempting to create a duplicate primary endpoint on tenant {tenant_id}, timeline {timeline_id}: endpoint {key:?} exists already. please don't do this, it is not supported."
|
||||
);
|
||||
bail!("attempting to create a duplicate primary endpoint on tenant {tenant_id}, timeline {timeline_id}: endpoint {key:?} exists already. please don't do this, it is not supported.");
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
@@ -263,11 +261,8 @@ pub struct Endpoint {
|
||||
skip_pg_catalog_updates: bool,
|
||||
|
||||
drop_subscriptions_before_start: bool,
|
||||
reconfigure_concurrency: usize,
|
||||
// Feature flags
|
||||
features: Vec<ComputeFeature>,
|
||||
// Cluster settings
|
||||
cluster: Option<Cluster>,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq)]
|
||||
@@ -307,8 +302,6 @@ impl Endpoint {
|
||||
let conf: EndpointConf =
|
||||
serde_json::from_slice(&std::fs::read(entry.path().join("endpoint.json"))?)?;
|
||||
|
||||
debug!("serialized endpoint conf: {:?}", conf);
|
||||
|
||||
Ok(Endpoint {
|
||||
pg_address: SocketAddr::new(IpAddr::from(Ipv4Addr::LOCALHOST), conf.pg_port),
|
||||
external_http_address: SocketAddr::new(
|
||||
@@ -326,10 +319,8 @@ impl Endpoint {
|
||||
tenant_id: conf.tenant_id,
|
||||
pg_version: conf.pg_version,
|
||||
skip_pg_catalog_updates: conf.skip_pg_catalog_updates,
|
||||
reconfigure_concurrency: conf.reconfigure_concurrency,
|
||||
drop_subscriptions_before_start: conf.drop_subscriptions_before_start,
|
||||
features: conf.features,
|
||||
cluster: conf.cluster,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -577,17 +568,14 @@ impl Endpoint {
|
||||
Ok(safekeeper_connstrings)
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn start(
|
||||
&self,
|
||||
auth_token: &Option<String>,
|
||||
safekeepers_generation: Option<SafekeeperGeneration>,
|
||||
safekeepers: Vec<NodeId>,
|
||||
pageservers: Vec<(Host, u16)>,
|
||||
remote_ext_config: Option<&String>,
|
||||
shard_stripe_size: usize,
|
||||
create_test_user: bool,
|
||||
start_timeout: Duration,
|
||||
) -> Result<()> {
|
||||
if self.status() == EndpointStatus::Running {
|
||||
anyhow::bail!("The endpoint is already running");
|
||||
@@ -619,7 +607,7 @@ impl Endpoint {
|
||||
};
|
||||
|
||||
// Create spec file
|
||||
let mut spec = ComputeSpec {
|
||||
let spec = ComputeSpec {
|
||||
skip_pg_catalog_updates: self.skip_pg_catalog_updates,
|
||||
format_version: 1.0,
|
||||
operation_uuid: None,
|
||||
@@ -652,49 +640,22 @@ impl Endpoint {
|
||||
Vec::new()
|
||||
},
|
||||
settings: None,
|
||||
postgresql_conf: Some(postgresql_conf.clone()),
|
||||
postgresql_conf: Some(postgresql_conf),
|
||||
},
|
||||
delta_operations: None,
|
||||
tenant_id: Some(self.tenant_id),
|
||||
timeline_id: Some(self.timeline_id),
|
||||
mode: self.mode,
|
||||
pageserver_connstring: Some(pageserver_connstring),
|
||||
safekeepers_generation: safekeepers_generation.map(|g| g.into_inner()),
|
||||
safekeeper_connstrings,
|
||||
storage_auth_token: auth_token.clone(),
|
||||
remote_extensions,
|
||||
pgbouncer_settings: None,
|
||||
shard_stripe_size: Some(shard_stripe_size),
|
||||
local_proxy_config: None,
|
||||
reconfigure_concurrency: self.reconfigure_concurrency,
|
||||
reconfigure_concurrency: 1,
|
||||
drop_subscriptions_before_start: self.drop_subscriptions_before_start,
|
||||
};
|
||||
|
||||
// this strange code is needed to support respec() in tests
|
||||
if self.cluster.is_some() {
|
||||
debug!("Cluster is already set in the endpoint spec, using it");
|
||||
spec.cluster = self.cluster.clone().unwrap();
|
||||
|
||||
debug!("spec.cluster {:?}", spec.cluster);
|
||||
|
||||
// fill missing fields again
|
||||
if create_test_user {
|
||||
spec.cluster.roles.push(Role {
|
||||
name: PgIdent::from_str("test").unwrap(),
|
||||
encrypted_password: None,
|
||||
options: None,
|
||||
});
|
||||
spec.cluster.databases.push(Database {
|
||||
name: PgIdent::from_str("neondb").unwrap(),
|
||||
owner: PgIdent::from_str("test").unwrap(),
|
||||
options: None,
|
||||
restrict_conn: false,
|
||||
invalid: false,
|
||||
});
|
||||
}
|
||||
spec.cluster.postgresql_conf = Some(postgresql_conf);
|
||||
}
|
||||
|
||||
let spec_path = self.endpoint_path().join("spec.json");
|
||||
std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;
|
||||
|
||||
@@ -712,14 +673,18 @@ impl Endpoint {
|
||||
println!("Also at '{}'", conn_str);
|
||||
}
|
||||
let mut cmd = Command::new(self.env.neon_distrib_dir.join("compute_ctl"));
|
||||
//cmd.args([
|
||||
// "--external-http-port",
|
||||
// &self.external_http_address.port().to_string(),
|
||||
//])
|
||||
//.args([
|
||||
// "--internal-http-port",
|
||||
// &self.internal_http_address.port().to_string(),
|
||||
//])
|
||||
cmd.args([
|
||||
"--external-http-port",
|
||||
"--http-port",
|
||||
&self.external_http_address.port().to_string(),
|
||||
])
|
||||
.args([
|
||||
"--internal-http-port",
|
||||
&self.internal_http_address.port().to_string(),
|
||||
])
|
||||
.args(["--pgdata", self.pgdata().to_str().unwrap()])
|
||||
.args(["--connstr", &conn_str])
|
||||
.args([
|
||||
@@ -736,16 +701,20 @@ impl Endpoint {
|
||||
])
|
||||
// TODO: It would be nice if we generated compute IDs with the same
|
||||
// algorithm as the real control plane.
|
||||
.args([
|
||||
"--compute-id",
|
||||
&format!(
|
||||
"compute-{}",
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs()
|
||||
),
|
||||
])
|
||||
//
|
||||
// TODO: Add this back when
|
||||
// https://github.com/neondatabase/neon/pull/10747 is merged.
|
||||
//
|
||||
//.args([
|
||||
// "--compute-id",
|
||||
// &format!(
|
||||
// "compute-{}",
|
||||
// SystemTime::now()
|
||||
// .duration_since(UNIX_EPOCH)
|
||||
// .unwrap()
|
||||
// .as_secs()
|
||||
// ),
|
||||
//])
|
||||
.stdin(std::process::Stdio::null())
|
||||
.stderr(logfile.try_clone()?)
|
||||
.stdout(logfile);
|
||||
@@ -775,18 +744,17 @@ impl Endpoint {
|
||||
std::fs::write(pidfile_path, pid.to_string())?;
|
||||
|
||||
// Wait for it to start
|
||||
let mut attempt = 0;
|
||||
const ATTEMPT_INTERVAL: Duration = Duration::from_millis(100);
|
||||
let start_at = Instant::now();
|
||||
const MAX_ATTEMPTS: u32 = 10 * 90; // Wait up to 1.5 min
|
||||
loop {
|
||||
attempt += 1;
|
||||
match self.get_status().await {
|
||||
Ok(state) => {
|
||||
match state.status {
|
||||
ComputeStatus::Init => {
|
||||
if Instant::now().duration_since(start_at) > start_timeout {
|
||||
bail!(
|
||||
"compute startup timed out {:?}; still in Init state",
|
||||
start_timeout
|
||||
);
|
||||
if attempt == MAX_ATTEMPTS {
|
||||
bail!("compute startup timed out; still in Init state");
|
||||
}
|
||||
// keep retrying
|
||||
}
|
||||
@@ -813,11 +781,8 @@ impl Endpoint {
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
if Instant::now().duration_since(start_at) > start_timeout {
|
||||
return Err(e).context(format!(
|
||||
"timed out {:?} waiting to connect to compute_ctl HTTP",
|
||||
start_timeout,
|
||||
));
|
||||
if attempt == MAX_ATTEMPTS {
|
||||
return Err(e).context("timed out waiting to connect to compute_ctl HTTP");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,22 +3,28 @@
|
||||
//! Now it also provides init method which acts like a stub for proper installation
|
||||
//! script which will use local paths.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{Command, Stdio};
|
||||
use std::time::Duration;
|
||||
use std::{env, fs};
|
||||
use anyhow::{bail, Context};
|
||||
|
||||
use anyhow::{Context, bail};
|
||||
use clap::ValueEnum;
|
||||
use postgres_backend::AuthType;
|
||||
use reqwest::Url;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utils::auth::{Claims, encode_from_key_file};
|
||||
use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId};
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::net::IpAddr;
|
||||
use std::net::Ipv4Addr;
|
||||
use std::net::SocketAddr;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{Command, Stdio};
|
||||
use std::time::Duration;
|
||||
use utils::{
|
||||
auth::{encode_from_key_file, Claims},
|
||||
id::{NodeId, TenantId, TenantTimelineId, TimelineId},
|
||||
};
|
||||
|
||||
use crate::pageserver::{PAGESERVER_REMOTE_STORAGE_DIR, PageServerNode};
|
||||
use crate::pageserver::PageServerNode;
|
||||
use crate::pageserver::PAGESERVER_REMOTE_STORAGE_DIR;
|
||||
use crate::safekeeper::SafekeeperNode;
|
||||
|
||||
pub const DEFAULT_PG_VERSION: u32 = 16;
|
||||
@@ -165,8 +171,6 @@ pub struct NeonStorageControllerConf {
|
||||
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub long_reconcile_threshold: Option<Duration>,
|
||||
|
||||
pub load_safekeepers: bool,
|
||||
}
|
||||
|
||||
impl NeonStorageControllerConf {
|
||||
@@ -190,7 +194,6 @@ impl Default for NeonStorageControllerConf {
|
||||
max_secondary_lag_bytes: None,
|
||||
heartbeat_interval: Self::DEFAULT_HEARTBEAT_INTERVAL,
|
||||
long_reconcile_threshold: None,
|
||||
load_safekeepers: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -462,9 +465,7 @@ impl LocalEnv {
|
||||
if old_timeline_id == &timeline_id {
|
||||
Ok(())
|
||||
} else {
|
||||
bail!(
|
||||
"branch '{branch_name}' is already mapped to timeline {old_timeline_id}, cannot map to another timeline {timeline_id}"
|
||||
);
|
||||
bail!("branch '{branch_name}' is already mapped to timeline {old_timeline_id}, cannot map to another timeline {timeline_id}");
|
||||
}
|
||||
} else {
|
||||
existing_values.push((tenant_id, timeline_id));
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
//! ```
|
||||
//!
|
||||
use std::collections::HashMap;
|
||||
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
use std::num::NonZeroU64;
|
||||
@@ -14,19 +15,22 @@ use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::{Context, bail};
|
||||
use anyhow::{bail, Context};
|
||||
use camino::Utf8PathBuf;
|
||||
use pageserver_api::models::{self, TenantInfo, TimelineInfo};
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_client::mgmt_api;
|
||||
use postgres_backend::AuthType;
|
||||
use postgres_connection::{PgConnectionConfig, parse_host_port};
|
||||
use postgres_connection::{parse_host_port, PgConnectionConfig};
|
||||
use utils::auth::{Claims, Scope};
|
||||
use utils::id::{NodeId, TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
use utils::id::NodeId;
|
||||
use utils::{
|
||||
id::{TenantId, TimelineId},
|
||||
lsn::Lsn,
|
||||
};
|
||||
|
||||
use crate::background_process;
|
||||
use crate::local_env::{LocalEnv, NeonLocalInitPageserverConf, PageServerConf};
|
||||
use crate::local_env::{NeonLocalInitPageserverConf, PageServerConf};
|
||||
use crate::{background_process, local_env::LocalEnv};
|
||||
|
||||
/// Directory within .neon which will be used by default for LocalFs remote storage.
|
||||
pub const PAGESERVER_REMOTE_STORAGE_DIR: &str = "local_fs_remote_storage/pageserver";
|
||||
@@ -77,11 +81,7 @@ impl PageServerNode {
|
||||
&self,
|
||||
conf: NeonLocalInitPageserverConf,
|
||||
) -> anyhow::Result<toml_edit::DocumentMut> {
|
||||
assert_eq!(
|
||||
&PageServerConf::from(&conf),
|
||||
&self.conf,
|
||||
"during neon_local init, we derive the runtime state of ps conf (self.conf) from the --config flag fully"
|
||||
);
|
||||
assert_eq!(&PageServerConf::from(&conf), &self.conf, "during neon_local init, we derive the runtime state of ps conf (self.conf) from the --config flag fully");
|
||||
|
||||
// TODO(christian): instead of what we do here, create a pageserver_api::config::ConfigToml (PR #7656)
|
||||
|
||||
@@ -335,21 +335,13 @@ impl PageServerNode {
|
||||
.map(|x| x.parse::<u64>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'checkpoint_distance' as an integer")?,
|
||||
checkpoint_timeout: settings
|
||||
.remove("checkpoint_timeout")
|
||||
.map(humantime::parse_duration)
|
||||
.transpose()
|
||||
.context("Failed to parse 'checkpoint_timeout' as duration")?,
|
||||
checkpoint_timeout: settings.remove("checkpoint_timeout").map(|x| x.to_string()),
|
||||
compaction_target_size: settings
|
||||
.remove("compaction_target_size")
|
||||
.map(|x| x.parse::<u64>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'compaction_target_size' as an integer")?,
|
||||
compaction_period: settings
|
||||
.remove("compaction_period")
|
||||
.map(humantime::parse_duration)
|
||||
.transpose()
|
||||
.context("Failed to parse 'compaction_period' as duration")?,
|
||||
compaction_period: settings.remove("compaction_period").map(|x| x.to_string()),
|
||||
compaction_threshold: settings
|
||||
.remove("compaction_threshold")
|
||||
.map(|x| x.parse::<usize>())
|
||||
@@ -395,10 +387,7 @@ impl PageServerNode {
|
||||
.map(|x| x.parse::<u64>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'gc_horizon' as an integer")?,
|
||||
gc_period: settings.remove("gc_period")
|
||||
.map(humantime::parse_duration)
|
||||
.transpose()
|
||||
.context("Failed to parse 'gc_period' as duration")?,
|
||||
gc_period: settings.remove("gc_period").map(|x| x.to_string()),
|
||||
image_creation_threshold: settings
|
||||
.remove("image_creation_threshold")
|
||||
.map(|x| x.parse::<usize>())
|
||||
@@ -414,20 +403,13 @@ impl PageServerNode {
|
||||
.map(|x| x.parse::<usize>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'image_creation_preempt_threshold' as integer")?,
|
||||
pitr_interval: settings.remove("pitr_interval")
|
||||
.map(humantime::parse_duration)
|
||||
.transpose()
|
||||
.context("Failed to parse 'pitr_interval' as duration")?,
|
||||
pitr_interval: settings.remove("pitr_interval").map(|x| x.to_string()),
|
||||
walreceiver_connect_timeout: settings
|
||||
.remove("walreceiver_connect_timeout")
|
||||
.map(humantime::parse_duration)
|
||||
.transpose()
|
||||
.context("Failed to parse 'walreceiver_connect_timeout' as duration")?,
|
||||
.map(|x| x.to_string()),
|
||||
lagging_wal_timeout: settings
|
||||
.remove("lagging_wal_timeout")
|
||||
.map(humantime::parse_duration)
|
||||
.transpose()
|
||||
.context("Failed to parse 'lagging_wal_timeout' as duration")?,
|
||||
.map(|x| x.to_string()),
|
||||
max_lsn_wal_lag: settings
|
||||
.remove("max_lsn_wal_lag")
|
||||
.map(|x| x.parse::<NonZeroU64>())
|
||||
@@ -445,14 +427,8 @@ impl PageServerNode {
|
||||
.context("Failed to parse 'min_resident_size_override' as integer")?,
|
||||
evictions_low_residence_duration_metric_threshold: settings
|
||||
.remove("evictions_low_residence_duration_metric_threshold")
|
||||
.map(humantime::parse_duration)
|
||||
.transpose()
|
||||
.context("Failed to parse 'evictions_low_residence_duration_metric_threshold' as duration")?,
|
||||
heatmap_period: settings
|
||||
.remove("heatmap_period")
|
||||
.map(humantime::parse_duration)
|
||||
.transpose()
|
||||
.context("Failed to parse 'heatmap_period' as duration")?,
|
||||
.map(|x| x.to_string()),
|
||||
heatmap_period: settings.remove("heatmap_period").map(|x| x.to_string()),
|
||||
lazy_slru_download: settings
|
||||
.remove("lazy_slru_download")
|
||||
.map(|x| x.parse::<bool>())
|
||||
@@ -463,15 +439,10 @@ impl PageServerNode {
|
||||
.map(serde_json::from_str)
|
||||
.transpose()
|
||||
.context("parse `timeline_get_throttle` from json")?,
|
||||
lsn_lease_length: settings.remove("lsn_lease_length")
|
||||
.map(humantime::parse_duration)
|
||||
.transpose()
|
||||
.context("Failed to parse 'lsn_lease_length' as duration")?,
|
||||
lsn_lease_length: settings.remove("lsn_lease_length").map(|x| x.to_string()),
|
||||
lsn_lease_length_for_ts: settings
|
||||
.remove("lsn_lease_length_for_ts")
|
||||
.map(humantime::parse_duration)
|
||||
.transpose()
|
||||
.context("Failed to parse 'lsn_lease_length_for_ts' as duration")?,
|
||||
.map(|x| x.to_string()),
|
||||
timeline_offloading: settings
|
||||
.remove("timeline_offloading")
|
||||
.map(|x| x.parse::<bool>())
|
||||
|
||||
@@ -1,6 +1,3 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fmt;
|
||||
|
||||
///
|
||||
/// Module for parsing postgresql.conf file.
|
||||
///
|
||||
@@ -9,6 +6,8 @@ use std::fmt;
|
||||
/// funny stuff like include-directives or funny escaping.
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt;
|
||||
|
||||
/// In-memory representation of a postgresql.conf file
|
||||
#[derive(Default, Debug)]
|
||||
|
||||
@@ -14,15 +14,18 @@ use std::{io, result};
|
||||
|
||||
use anyhow::Context;
|
||||
use camino::Utf8PathBuf;
|
||||
use http_utils::error::HttpErrorBody;
|
||||
use postgres_connection::PgConnectionConfig;
|
||||
use reqwest::{IntoUrl, Method};
|
||||
use thiserror::Error;
|
||||
|
||||
use http_utils::error::HttpErrorBody;
|
||||
use utils::auth::{Claims, Scope};
|
||||
use utils::id::NodeId;
|
||||
|
||||
use crate::background_process;
|
||||
use crate::local_env::{LocalEnv, SafekeeperConf};
|
||||
use crate::{
|
||||
background_process,
|
||||
local_env::{LocalEnv, SafekeeperConf},
|
||||
};
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum SafekeeperHttpError {
|
||||
|
||||
@@ -1,39 +1,44 @@
|
||||
use std::ffi::OsStr;
|
||||
use std::fs;
|
||||
use std::net::SocketAddr;
|
||||
use std::path::PathBuf;
|
||||
use std::process::ExitStatus;
|
||||
use std::str::FromStr;
|
||||
use std::sync::OnceLock;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use crate::{
|
||||
background_process,
|
||||
local_env::{LocalEnv, NeonStorageControllerConf},
|
||||
};
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use hyper0::Uri;
|
||||
use nix::unistd::Pid;
|
||||
use pageserver_api::controller_api::{
|
||||
NodeConfigureRequest, NodeDescribeResponse, NodeRegisterRequest, TenantCreateRequest,
|
||||
TenantCreateResponse, TenantLocateResponse, TenantShardMigrateRequest,
|
||||
TenantShardMigrateResponse,
|
||||
use pageserver_api::{
|
||||
controller_api::{
|
||||
NodeConfigureRequest, NodeDescribeResponse, NodeRegisterRequest, TenantCreateRequest,
|
||||
TenantCreateResponse, TenantLocateResponse, TenantShardMigrateRequest,
|
||||
TenantShardMigrateResponse,
|
||||
},
|
||||
models::{
|
||||
TenantShardSplitRequest, TenantShardSplitResponse, TimelineCreateRequest, TimelineInfo,
|
||||
},
|
||||
shard::{ShardStripeSize, TenantShardId},
|
||||
};
|
||||
use pageserver_api::models::{
|
||||
TenantShardSplitRequest, TenantShardSplitResponse, TimelineCreateRequest, TimelineInfo,
|
||||
};
|
||||
use pageserver_api::shard::{ShardStripeSize, TenantShardId};
|
||||
use pageserver_client::mgmt_api::ResponseErrorMessageExt;
|
||||
use postgres_backend::AuthType;
|
||||
use reqwest::Method;
|
||||
use serde::de::DeserializeOwned;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde::{de::DeserializeOwned, Deserialize, Serialize};
|
||||
use std::{
|
||||
ffi::OsStr,
|
||||
fs,
|
||||
net::SocketAddr,
|
||||
path::PathBuf,
|
||||
process::ExitStatus,
|
||||
str::FromStr,
|
||||
sync::OnceLock,
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
use tokio::process::Command;
|
||||
use tracing::instrument;
|
||||
use url::Url;
|
||||
use utils::auth::{Claims, Scope, encode_from_key_file};
|
||||
use utils::id::{NodeId, TenantId};
|
||||
use utils::{
|
||||
auth::{encode_from_key_file, Claims, Scope},
|
||||
id::{NodeId, TenantId},
|
||||
};
|
||||
use whoami::username;
|
||||
|
||||
use crate::background_process;
|
||||
use crate::local_env::{LocalEnv, NeonStorageControllerConf};
|
||||
|
||||
pub struct StorageController {
|
||||
env: LocalEnv,
|
||||
private_key: Option<Vec<u8>>,
|
||||
@@ -91,8 +96,7 @@ pub struct AttachHookRequest {
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct AttachHookResponse {
|
||||
#[serde(rename = "gen")]
|
||||
pub generation: Option<u32>,
|
||||
pub gen: Option<u32>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
@@ -537,10 +541,6 @@ impl StorageController {
|
||||
args.push("--start-as-candidate".to_string());
|
||||
}
|
||||
|
||||
if self.config.load_safekeepers {
|
||||
args.push("--load-safekeepers".to_string());
|
||||
}
|
||||
|
||||
if let Some(private_key) = &self.private_key {
|
||||
let claims = Claims::new(None, Scope::PageServerApi);
|
||||
let jwt_token =
|
||||
@@ -779,7 +779,7 @@ impl StorageController {
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(response.generation)
|
||||
Ok(response.gen)
|
||||
}
|
||||
|
||||
#[instrument(skip(self))]
|
||||
|
||||
@@ -1,27 +1,34 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
use futures::StreamExt;
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
str::FromStr,
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use clap::{Parser, Subcommand};
|
||||
use futures::StreamExt;
|
||||
use pageserver_api::controller_api::{
|
||||
AvailabilityZone, NodeAvailabilityWrapper, NodeConfigureRequest, NodeDescribeResponse,
|
||||
NodeRegisterRequest, NodeSchedulingPolicy, NodeShardResponse, PlacementPolicy,
|
||||
SafekeeperDescribeResponse, SafekeeperSchedulingPolicyRequest, ShardSchedulingPolicy,
|
||||
ShardsPreferredAzsRequest, ShardsPreferredAzsResponse, SkSchedulingPolicy, TenantCreateRequest,
|
||||
TenantDescribeResponse, TenantPolicyRequest, TenantShardMigrateRequest,
|
||||
TenantShardMigrateResponse,
|
||||
use pageserver_api::{
|
||||
controller_api::{
|
||||
AvailabilityZone, NodeAvailabilityWrapper, NodeDescribeResponse, NodeShardResponse,
|
||||
SafekeeperDescribeResponse, SafekeeperSchedulingPolicyRequest, ShardSchedulingPolicy,
|
||||
ShardsPreferredAzsRequest, ShardsPreferredAzsResponse, SkSchedulingPolicy,
|
||||
TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
|
||||
},
|
||||
models::{
|
||||
EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary,
|
||||
ShardParameters, TenantConfig, TenantConfigPatchRequest, TenantConfigRequest,
|
||||
TenantShardSplitRequest, TenantShardSplitResponse,
|
||||
},
|
||||
shard::{ShardStripeSize, TenantShardId},
|
||||
};
|
||||
use pageserver_api::models::{
|
||||
EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary, ShardParameters,
|
||||
TenantConfig, TenantConfigPatchRequest, TenantConfigRequest, TenantShardSplitRequest,
|
||||
TenantShardSplitResponse,
|
||||
};
|
||||
use pageserver_api::shard::{ShardStripeSize, TenantShardId};
|
||||
use pageserver_client::mgmt_api::{self};
|
||||
use reqwest::{Method, StatusCode, Url};
|
||||
use utils::id::{NodeId, TenantId};
|
||||
|
||||
use pageserver_api::controller_api::{
|
||||
NodeConfigureRequest, NodeRegisterRequest, NodeSchedulingPolicy, PlacementPolicy,
|
||||
TenantShardMigrateRequest, TenantShardMigrateResponse,
|
||||
};
|
||||
use storage_controller_client::control_api::Client;
|
||||
use utils::id::{NodeId, TenantId, TimelineId};
|
||||
|
||||
#[derive(Subcommand, Debug)]
|
||||
enum Command {
|
||||
@@ -40,9 +47,6 @@ enum Command {
|
||||
listen_http_addr: String,
|
||||
#[arg(long)]
|
||||
listen_http_port: u16,
|
||||
#[arg(long)]
|
||||
listen_https_port: Option<u16>,
|
||||
|
||||
#[arg(long)]
|
||||
availability_zone_id: String,
|
||||
},
|
||||
@@ -235,19 +239,6 @@ enum Command {
|
||||
#[arg(long)]
|
||||
scheduling_policy: SkSchedulingPolicyArg,
|
||||
},
|
||||
/// Downloads any missing heatmap layers for all shard for a given timeline
|
||||
DownloadHeatmapLayers {
|
||||
/// Tenant ID or tenant shard ID. When an unsharded tenant ID is specified,
|
||||
/// the operation is performed on all shards. When a sharded tenant ID is
|
||||
/// specified, the operation is only performed on the specified shard.
|
||||
#[arg(long)]
|
||||
tenant_shard_id: TenantShardId,
|
||||
#[arg(long)]
|
||||
timeline_id: TimelineId,
|
||||
/// Optional: Maximum download concurrency (default is 16)
|
||||
#[arg(long)]
|
||||
concurrency: Option<usize>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
@@ -390,7 +381,6 @@ async fn main() -> anyhow::Result<()> {
|
||||
listen_pg_port,
|
||||
listen_http_addr,
|
||||
listen_http_port,
|
||||
listen_https_port,
|
||||
availability_zone_id,
|
||||
} => {
|
||||
storcon_client
|
||||
@@ -403,7 +393,6 @@ async fn main() -> anyhow::Result<()> {
|
||||
listen_pg_port,
|
||||
listen_http_addr,
|
||||
listen_http_port,
|
||||
listen_https_port,
|
||||
availability_zone_id: AvailabilityZone(availability_zone_id),
|
||||
}),
|
||||
)
|
||||
@@ -914,9 +903,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
}
|
||||
Command::TenantDrop { tenant_id, unclean } => {
|
||||
if !unclean {
|
||||
anyhow::bail!(
|
||||
"This command is not a tenant deletion, and uncleanly drops all controller state for the tenant. If you know what you're doing, add `--unclean` to proceed."
|
||||
)
|
||||
anyhow::bail!("This command is not a tenant deletion, and uncleanly drops all controller state for the tenant. If you know what you're doing, add `--unclean` to proceed.")
|
||||
}
|
||||
storcon_client
|
||||
.dispatch::<(), ()>(
|
||||
@@ -928,9 +915,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
}
|
||||
Command::NodeDrop { node_id, unclean } => {
|
||||
if !unclean {
|
||||
anyhow::bail!(
|
||||
"This command is not a clean node decommission, and uncleanly drops all controller state for the node, without checking if any tenants still refer to it. If you know what you're doing, add `--unclean` to proceed."
|
||||
)
|
||||
anyhow::bail!("This command is not a clean node decommission, and uncleanly drops all controller state for the node, without checking if any tenants still refer to it. If you know what you're doing, add `--unclean` to proceed.")
|
||||
}
|
||||
storcon_client
|
||||
.dispatch::<(), ()>(Method::POST, format!("debug/v1/node/{node_id}/drop"), None)
|
||||
@@ -956,7 +941,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
threshold: threshold.into(),
|
||||
},
|
||||
)),
|
||||
heatmap_period: Some(Duration::from_secs(300)),
|
||||
heatmap_period: Some("300s".to_string()),
|
||||
..Default::default()
|
||||
},
|
||||
})
|
||||
@@ -1262,24 +1247,6 @@ async fn main() -> anyhow::Result<()> {
|
||||
String::from(scheduling_policy)
|
||||
);
|
||||
}
|
||||
Command::DownloadHeatmapLayers {
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
concurrency,
|
||||
} => {
|
||||
let mut path = format!(
|
||||
"/v1/tenant/{}/timeline/{}/download_heatmap_layers",
|
||||
tenant_shard_id, timeline_id,
|
||||
);
|
||||
|
||||
if let Some(c) = concurrency {
|
||||
path = format!("{path}?concurrency={c}");
|
||||
}
|
||||
|
||||
storcon_client
|
||||
.dispatch::<(), ()>(Method::POST, path, None)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -77,5 +77,4 @@ echo "Start compute node"
|
||||
/usr/local/bin/compute_ctl --pgdata /var/db/postgres/compute \
|
||||
-C "postgresql://cloud_admin@localhost:55433/postgres" \
|
||||
-b /usr/local/bin/postgres \
|
||||
--compute-id "compute-$RANDOM" \
|
||||
-S ${SPEC_FILE}
|
||||
|
||||
@@ -186,7 +186,7 @@ services:
|
||||
|
||||
neon-test-extensions:
|
||||
profiles: ["test-extensions"]
|
||||
image: ${REPOSITORY:-neondatabase}/neon-test-extensions-v${PG_TEST_VERSION:-16}:${TEST_EXTENSIONS_TAG:-${TAG:-latest}}
|
||||
image: ${REPOSITORY:-neondatabase}/neon-test-extensions-v${PG_TEST_VERSION:-16}:${TAG:-latest}
|
||||
environment:
|
||||
- PGPASSWORD=cloud_admin
|
||||
entrypoint:
|
||||
|
||||
@@ -51,6 +51,8 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
|
||||
done
|
||||
|
||||
if [ $pg_version -ge 16 ]; then
|
||||
docker cp ext-src $TEST_CONTAINER_NAME:/
|
||||
docker exec $TEST_CONTAINER_NAME bash -c "apt update && apt install -y libtap-parser-sourcehandler-pgtap-perl"
|
||||
# This is required for the pg_hint_plan test, to prevent flaky log message causing the test to fail
|
||||
# It cannot be moved to Dockerfile now because the database directory is created after the start of the container
|
||||
echo Adding dummy config
|
||||
@@ -79,8 +81,15 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
|
||||
[ $EXT_SUCCESS -eq 0 ] && FAILED=$(tail -1 testout.txt | awk '{for(i=1;i<=NF;i++){print "/ext-src/"$i;}}')
|
||||
[ $CONTRIB_SUCCESS -eq 0 ] && CONTRIB_FAILED=$(tail -1 testout_contrib.txt | awk '{for(i=0;i<=NF;i++){print "/postgres/contrib/"$i;}}')
|
||||
for d in $FAILED $CONTRIB_FAILED; do
|
||||
docker exec $TEST_CONTAINER_NAME bash -c 'for file in $(find '"$d"' -name regression.diffs -o -name regression.out); do cat $file; done' || [ $? -eq 1 ]
|
||||
dn="$(basename $d)"
|
||||
rm -rf $dn
|
||||
mkdir $dn
|
||||
docker cp $TEST_CONTAINER_NAME:$d/regression.diffs $dn || [ $? -eq 1 ]
|
||||
docker cp $TEST_CONTAINER_NAME:$d/regression.out $dn || [ $? -eq 1 ]
|
||||
cat $dn/regression.out $dn/regression.diffs || true
|
||||
rm -rf $dn
|
||||
done
|
||||
rm -rf $FAILED
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --use-existing --inputdir=./regress --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression repack-setup repack-run error-on-invalid-idx no-error-on-invalid-idx after-schema repack-check nosuper get_order_by trigger
|
||||
@@ -1,24 +0,0 @@
|
||||
diff --git a/test/sql/base.sql b/test/sql/base.sql
|
||||
index 53adb30..2eed91b 100644
|
||||
--- a/test/sql/base.sql
|
||||
+++ b/test/sql/base.sql
|
||||
@@ -2,7 +2,6 @@
|
||||
BEGIN;
|
||||
|
||||
\i test/pgtap-core.sql
|
||||
-CREATE EXTENSION semver;
|
||||
|
||||
SELECT plan(334);
|
||||
--SELECT * FROM no_plan();
|
||||
diff --git a/test/sql/corpus.sql b/test/sql/corpus.sql
|
||||
index c0fe98e..39cdd2e 100644
|
||||
--- a/test/sql/corpus.sql
|
||||
+++ b/test/sql/corpus.sql
|
||||
@@ -4,7 +4,6 @@ BEGIN;
|
||||
-- Test the SemVer corpus from https://regex101.com/r/Ly7O1x/3/.
|
||||
|
||||
\i test/pgtap-core.sql
|
||||
-CREATE EXTENSION semver;
|
||||
|
||||
SELECT plan(76);
|
||||
--SELECT * FROM no_plan();
|
||||
@@ -1,7 +1,6 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
patch -p1 <test-upgrade-${PG_VERSION}.patch
|
||||
psql -d contrib_regression -c "DROP EXTENSION IF EXISTS pgtap"
|
||||
patch -p1 <test-upgrade.patch
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --inputdir=test --dbname=contrib_regression base corpus
|
||||
@@ -1,16 +1,3 @@
|
||||
diff --git a/Makefile b/Makefile
|
||||
index f255fe6..0a0fa65 100644
|
||||
--- a/Makefile
|
||||
+++ b/Makefile
|
||||
@@ -346,7 +346,7 @@ test: test-serial test-parallel
|
||||
TB_DIR = test/build
|
||||
GENERATED_SCHEDULE_DEPS = $(TB_DIR)/all_tests $(TB_DIR)/exclude_tests
|
||||
REGRESS = --schedule $(TB_DIR)/run.sch # Set this again just to be safe
|
||||
-REGRESS_OPTS = --inputdir=test --max-connections=$(PARALLEL_CONN) --schedule $(SETUP_SCH) $(REGRESS_CONF)
|
||||
+REGRESS_OPTS = --use-existing --dbname=contrib_regression --inputdir=test --max-connections=$(PARALLEL_CONN) --schedule $(SETUP_SCH) $(REGRESS_CONF)
|
||||
SETUP_SCH = test/schedule/main.sch # schedule to use for test setup; this can be forcibly changed by some targets!
|
||||
IGNORE_TESTS = $(notdir $(EXCLUDE_TEST_FILES:.sql=))
|
||||
PARALLEL_TESTS = $(filter-out $(IGNORE_TESTS),$(filter-out $(SERIAL_TESTS),$(ALL_TESTS)))
|
||||
diff --git a/test/schedule/create.sql b/test/schedule/create.sql
|
||||
index ba355ed..7e250f5 100644
|
||||
--- a/test/schedule/create.sql
|
||||
|
||||
@@ -2,4 +2,5 @@
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
patch -p1 <test-upgrade.patch
|
||||
make installcheck
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --inputdir=test --max-connections=86 --schedule test/schedule/main.sch --schedule test/build/run.sch --dbname contrib_regression --use-existing
|
||||
@@ -2,5 +2,4 @@
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
REGRESS="$(make -n installcheck | awk '{print substr($0,index($0,"init-extension")+15);}')"
|
||||
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --dbname=contrib_regression ${REGRESS}
|
||||
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --dbname=contrib_regression plv8 plv8-errors scalar_args inline json startup_pre startup varparam json_conv jsonb_conv window guc es6 arraybuffer composites currentresource startup_perms bytea find_function_perms memory_limits reset show array_spread regression dialect bigint procedure
|
||||
@@ -6,16 +6,12 @@ generate_id() {
|
||||
local -n resvar=$1
|
||||
printf -v resvar '%08x%08x%08x%08x' $SRANDOM $SRANDOM $SRANDOM $SRANDOM
|
||||
}
|
||||
echo "${OLD_COMPUTE_TAG}"
|
||||
echo "${NEW_COMPUTE_TAG}"
|
||||
echo "${TEST_EXTENSIONS_TAG}"
|
||||
if [ -z "${OLD_COMPUTE_TAG:-}" ] || [ -z "${NEW_COMPUTE_TAG:-}" ] || [ -z "${TEST_EXTENSIONS_TAG:-}" ]; then
|
||||
echo OLD_COMPUTE_TAG, NEW_COMPUTE_TAG and TEST_EXTENSIONS_TAG must be set
|
||||
if [ -z ${OLDTAG+x} ] || [ -z ${NEWTAG+x} ] || [ -z "${OLDTAG}" ] || [ -z "${NEWTAG}" ]; then
|
||||
echo OLDTAG and NEWTAG must be defined
|
||||
exit 1
|
||||
fi
|
||||
export PG_VERSION=${PG_VERSION:-16}
|
||||
export PG_TEST_VERSION=${PG_VERSION}
|
||||
# Waits for compute node is ready
|
||||
function wait_for_ready {
|
||||
TIME=0
|
||||
while ! docker compose logs compute_is_ready | grep -q "accepting connections" && [ ${TIME} -le 300 ] ; do
|
||||
@@ -27,45 +23,11 @@ function wait_for_ready {
|
||||
exit 2
|
||||
fi
|
||||
}
|
||||
# Creates extensions. Gets a string with space-separated extensions as a parameter
|
||||
function create_extensions() {
|
||||
for ext in ${1}; do
|
||||
docker compose exec neon-test-extensions psql -X -v ON_ERROR_STOP=1 -d contrib_regression -c "CREATE EXTENSION IF NOT EXISTS ${ext} CASCADE"
|
||||
done
|
||||
}
|
||||
# Creates a new timeline. Gets the parent ID and an extension name as parameters.
|
||||
# Saves the timeline ID in the variable EXT_TIMELINE
|
||||
function create_timeline() {
|
||||
generate_id new_timeline_id
|
||||
|
||||
PARAMS=(
|
||||
-sbf
|
||||
-X POST
|
||||
-H "Content-Type: application/json"
|
||||
-d "{\"new_timeline_id\": \"${new_timeline_id}\", \"pg_version\": ${PG_VERSION}, \"ancestor_timeline_id\": \"${1}\"}"
|
||||
"http://127.0.0.1:9898/v1/tenant/${tenant_id}/timeline/"
|
||||
)
|
||||
result=$(curl "${PARAMS[@]}")
|
||||
echo $result | jq .
|
||||
EXT_TIMELINE[${2}]=${new_timeline_id}
|
||||
}
|
||||
# Checks if the timeline ID of the compute node is expected. Gets the timeline ID as a parameter
|
||||
function check_timeline() {
|
||||
TID=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.timeline_id")
|
||||
if [ "${TID}" != "${1}" ]; then
|
||||
echo Timeline mismatch
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
# Restarts the compute node with the required compute tag and timeline.
|
||||
# Accepts the tag for the compute node and the timeline as parameters.
|
||||
function restart_compute() {
|
||||
docker compose down compute compute_is_ready
|
||||
COMPUTE_TAG=${1} TENANT_ID=${tenant_id} TIMELINE_ID=${2} docker compose up --quiet-pull -d --build compute compute_is_ready
|
||||
wait_for_ready
|
||||
check_timeline ${2}
|
||||
}
|
||||
declare -A EXT_TIMELINE
|
||||
EXTENSIONS='[
|
||||
{"extname": "plv8", "extdir": "plv8-src"},
|
||||
{"extname": "vector", "extdir": "pgvector-src"},
|
||||
@@ -81,11 +43,10 @@ EXTENSIONS='[
|
||||
{"extname": "semver", "extdir": "pg_semver-src"},
|
||||
{"extname": "pg_ivm", "extdir": "pg_ivm-src"},
|
||||
{"extname": "pgjwt", "extdir": "pgjwt-src"},
|
||||
{"extname": "pgtap", "extdir": "pgtap-src"},
|
||||
{"extname": "pg_repack", "extdir": "pg_repack-src"}
|
||||
{"extname": "pgtap", "extdir": "pgtap-src"}
|
||||
]'
|
||||
EXTNAMES=$(echo ${EXTENSIONS} | jq -r '.[].extname' | paste -sd ' ' -)
|
||||
COMPUTE_TAG=${NEW_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d
|
||||
TAG=${NEWTAG} docker compose --profile test-extensions up --quiet-pull --build -d
|
||||
wait_for_ready
|
||||
docker compose exec neon-test-extensions psql -c "DROP DATABASE IF EXISTS contrib_regression"
|
||||
docker compose exec neon-test-extensions psql -c "CREATE DATABASE contrib_regression"
|
||||
@@ -93,14 +54,11 @@ create_extensions "${EXTNAMES}"
|
||||
query="select json_object_agg(extname,extversion) from pg_extension where extname in ('${EXTNAMES// /\',\'}')"
|
||||
new_vers=$(docker compose exec neon-test-extensions psql -Aqt -d contrib_regression -c "$query")
|
||||
docker compose --profile test-extensions down
|
||||
COMPUTE_TAG=${OLD_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d --force-recreate
|
||||
TAG=${OLDTAG} docker compose --profile test-extensions up --quiet-pull --build -d --force-recreate
|
||||
wait_for_ready
|
||||
docker compose cp ext-src neon-test-extensions:/
|
||||
docker compose exec neon-test-extensions psql -c "DROP DATABASE IF EXISTS contrib_regression"
|
||||
docker compose exec neon-test-extensions psql -c "CREATE DATABASE contrib_regression"
|
||||
tenant_id=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.tenant_id")
|
||||
EXT_TIMELINE["main"]=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.timeline_id")
|
||||
create_timeline "${EXT_TIMELINE["main"]}" init
|
||||
restart_compute "${OLD_COMPUTE_TAG}" "${EXT_TIMELINE["init"]}"
|
||||
create_extensions "${EXTNAMES}"
|
||||
if [ "${FORCE_ALL_UPGRADE_TESTS:-false}" = true ]; then
|
||||
exts="${EXTNAMES}"
|
||||
@@ -111,13 +69,29 @@ fi
|
||||
if [ -z "${exts}" ]; then
|
||||
echo "No extensions were upgraded"
|
||||
else
|
||||
tenant_id=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.tenant_id")
|
||||
timeline_id=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.timeline_id")
|
||||
for ext in ${exts}; do
|
||||
echo Testing ${ext}...
|
||||
create_timeline "${EXT_TIMELINE["main"]}" ${ext}
|
||||
EXTDIR=$(echo ${EXTENSIONS} | jq -r '.[] | select(.extname=="'${ext}'") | .extdir')
|
||||
restart_compute "${OLD_COMPUTE_TAG}" "${EXT_TIMELINE[${ext}]}"
|
||||
docker compose exec neon-test-extensions psql -d contrib_regression -c "CREATE EXTENSION ${ext} CASCADE"
|
||||
restart_compute "${NEW_COMPUTE_TAG}" "${EXT_TIMELINE[${ext}]}"
|
||||
generate_id new_timeline_id
|
||||
PARAMS=(
|
||||
-sbf
|
||||
-X POST
|
||||
-H "Content-Type: application/json"
|
||||
-d "{\"new_timeline_id\": \"${new_timeline_id}\", \"pg_version\": ${PG_VERSION}, \"ancestor_timeline_id\": \"${timeline_id}\"}"
|
||||
"http://127.0.0.1:9898/v1/tenant/${tenant_id}/timeline/"
|
||||
)
|
||||
result=$(curl "${PARAMS[@]}")
|
||||
echo $result | jq .
|
||||
TENANT_ID=${tenant_id} TIMELINE_ID=${new_timeline_id} TAG=${OLDTAG} docker compose down compute compute_is_ready
|
||||
COMPUTE_TAG=${NEWTAG} TAG=${OLDTAG} TENANT_ID=${tenant_id} TIMELINE_ID=${new_timeline_id} docker compose up --quiet-pull -d --build compute compute_is_ready
|
||||
wait_for_ready
|
||||
TID=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.timeline_id")
|
||||
if [ ${TID} != ${new_timeline_id} ]; then
|
||||
echo Timeline mismatch
|
||||
exit 1
|
||||
fi
|
||||
docker compose exec neon-test-extensions psql -d contrib_regression -c "\dx ${ext}"
|
||||
if ! docker compose exec neon-test-extensions sh -c /ext-src/${EXTDIR}/test-upgrade.sh; then
|
||||
docker compose exec neon-test-extensions cat /ext-src/${EXTDIR}/regression.diffs
|
||||
|
||||
@@ -1,164 +0,0 @@
|
||||
|
||||
# Pageserver Hot Secondaries
|
||||
|
||||
## Summary
|
||||
|
||||
It is proposed to add a new mode for pageserver tenant shard locations,
|
||||
called "hot secondary", which is able to serve page_service requests but
|
||||
does not do all the same housekeeping as an attached location, and does
|
||||
not store any additional data in S3.
|
||||
|
||||
There is a stark tradeoff between resource cost and complexity: a very simple solution would be to have multiple full attached locations doing independent I/O, but this RFC proposes some additional complexity to
|
||||
reduce cost.
|
||||
|
||||
## Background
|
||||
|
||||
In the [pageserver migration RFC](028-pageserver-migration.md), we introduced the concept of "warm secondaries". These are pageserver locations that poll remote storage for a _heatmap_ describing which layers they should hold, and then download those layers from S3. This enables them to rapidly transition into a usable attached location with a warm cache.
|
||||
|
||||
Combined with the storage controller's detect of pageserver failures, warm
|
||||
secondaries enabled high availability of pageservers with a recovery time
|
||||
objective (RTO) measured in seconds (depends on configured heartbeat frequency) -- occasional cloud instance failures are typically recovered
|
||||
in well under a minute, without human intervention.
|
||||
|
||||
## Purpose
|
||||
|
||||
We aim to provide a sub-second RTO for pageserver failures, for mission
|
||||
critical workloads. To do this, we should enable the postgres client
|
||||
to make its own decision about cutting over to a secondary, rather than
|
||||
waiting for the controller to detect a failure and instruct it to
|
||||
use a different pageserver. These secondaries should be maintained
|
||||
in a continuously readable state rather than requiring explicit activation.
|
||||
|
||||
Because low-RTO failover is intrinsically vulnerable to "flapping"/false
|
||||
positives, reads from such a hot secondary will not "promote" the secondary: we don't want to flap back and forth at millisecond timescales. Rather, reads will be served by hot secondaries at any time,
|
||||
but their transition to an attached (primary) location will still be
|
||||
managed by the storage controller.
|
||||
|
||||
## Design of Hot Location Mode
|
||||
|
||||
At a high level, hot locations are basically the same `Tenant` and `Timeline` types as an attached location, but with some behavioral tweaks. This RFC won't get into code structure details: these changes
|
||||
may be expressed as different types (more robust) or as different modes
|
||||
for existing types (less code churn, more complexity).
|
||||
|
||||
### Load and ingest
|
||||
|
||||
Initially, we may start in the same way as a normal attached location:
|
||||
by discovering the latest metadata in remote storage and constructing
|
||||
a LayerMap.
|
||||
|
||||
We should also do ingest as normal: subscribing to safekeeper and streaming
|
||||
writes into ephemeral layer files that are then frozen into L0s. However,
|
||||
we do not want to wastefully upload these to S3 (they duplicate what the
|
||||
attached location is already writing).
|
||||
|
||||
### "Virtual" compaction
|
||||
|
||||
Clearly ingesting but never uploading or compacting will generate an unbounded stack of L0 layers, unless we do something about it.
|
||||
|
||||
To solve this, we may add a special type of compaction that re-reads
|
||||
from remote storage, updates the layer map to contain all L1
|
||||
and image layers from the remote metadata, and triggers download of these.
|
||||
|
||||
We do not download remote L0s during virtual compaction, because the hot secondary has also been ingesting and generating these, so it would be wasteful. We just trim any local L0s which are now covered by the L1 high watermark of the remote metadata, and retain any that are still needed to serve reads.
|
||||
|
||||
Note that this process is expected to generate some overlaps in LSN space: we might have an L0 that we generated locally which overlaps with an L1 from remote storage. getpage@lsn logic must handle this, and avoid assuming non-overlapping layers (i.e. having read some deltas from L0, we must not read the same deltas again in an L1, we must remember what LSN we already passed).
|
||||
|
||||
The average total network download bandwidth of the hot secondary is equal to the rate at which the attached location generates L1 and image layers, plus the rate at which WAL is generated.
|
||||
|
||||
The average total disk write bandwidth is the sum of WAL generation rate plus L1/image generation rate: this is about the same as a normal attached location. The average disk _read_ bandwidth of a hot secondary is far lower than an attached location because it is not reading back layers to compact them -- layers are only read in periods where the attached location was unavailable, so computes started reading from a hot secondary.
|
||||
|
||||
The trigger for virtual compaction can be similar to the existing trigger
|
||||
for L1 compaction on attached locations: once we build up a deep stack of L0s, then we do virtual compaction to trim it. This assumes that the attached location has kept up with compaction. The hot secondary can be
|
||||
more tolerant of a deeper L0 stack because it is less often serving
|
||||
reads: for example it might make sense to trigger normal L1 compaction at 10 L0 layers, and trigger shallow compaction at 15 L0 layers, giving a good chance that by the time the hot secondary does compaction, the attached location has already written out some layer files for it to read.
|
||||
|
||||
To avoid an availability gap while downloading data from S3, it is important that the hot
|
||||
secondary downloads new layer files before updating its layer map to de-reference replaced
|
||||
layers.
|
||||
|
||||
### Handling missing layers/timelines
|
||||
|
||||
If an incoming request references a timeline that the hot secondary is
|
||||
unaware of, it must go read from S3 to determine if the timeline exists, and if so then load it.
|
||||
|
||||
The hot secondary should also be tipped off by the storage controller when
|
||||
timelines are created, so that in normal operation it is aware of timelines
|
||||
immediately rather than having to load on demand (loading on demand could
|
||||
have much higher latency for reads).
|
||||
|
||||
Hot secondaries may also experience 404s reading layers from remote storage, because the layer might have been deleted by the attached location
|
||||
during compaction or GC. If the hot secondary finds such a 404, it should
|
||||
trigger a re-download of the timeline index.
|
||||
|
||||
### Transition from Hot Secondary to Attached
|
||||
|
||||
While a hot secondary can serve writes independently for a short period of time (until
|
||||
too many L0s build up to efficiently serve reads), it needs to be promoted to be the attached
|
||||
location if the last attached location becomes unavailable (or if the storage controller
|
||||
determines that the tenant should be migrated).
|
||||
|
||||
This can be done trivially by shutting down and starting up again in attached mode (on startup
|
||||
the layer map will be reset to the content of remote storage), but this can impose an availability gap, because:
|
||||
- After unexpected failure of an attached location, the hot secondary's local L0s may be
|
||||
further ahead in WAL ingest than the contents of remote storage, so resetting to what's
|
||||
in remote storage will make recent data unavailable until it is re-ingested.
|
||||
- Even if the remote data is up to date with latest WAL, it may take some time to download
|
||||
layers.
|
||||
|
||||
To avoid an availability gap while re-ingesting WAL, it is necessary to stitch the local L0s with remote storage state. We may do this at startup, by making an exception to our
|
||||
usual policy of only respecting remote storage state at startup. This exception can
|
||||
be specific to L0 files, and perhaps also specific to when we can detect that these
|
||||
were written by a hot secondary (perhaps by marking these files with a suffix or magic 0xffff generation?)
|
||||
|
||||
We should also only do this cutover once we're reasonably sure the old attached location
|
||||
isn't still uploading, so that on startup we do not see a whole new layer map with lots
|
||||
of layers that need downloading.
|
||||
|
||||
We may still tolerate some availability gap in the <1s range while reloading the tenant
|
||||
in a different mode. We should aim for this to be under 100ms under usual circumstances,
|
||||
as it should only require long enough to:
|
||||
- Flush ephemeral layer to L0 on shutdown (writing 128MB takes of the order 100ms)
|
||||
- Load remote index on startup (reading from S3 takes of the order 10ms)
|
||||
|
||||
Doing many such cutovers concurrently may result in worse availability, so the controller
|
||||
should be tuned to understand that when cutting over multiple hot secondaries to attached,
|
||||
it is best not to rush it (as they are already in a readable state, it is less urgent
|
||||
than when activating warm secondaries).
|
||||
|
||||
## Summary of a failover
|
||||
|
||||
To summarize the order of operations when a pageserver instance fails while holding a tenant
|
||||
that has a hot secondary location:
|
||||
- after some short timeout (100s of ms), compute gives up on getpage requests to the primary and sends
|
||||
them to the hot secondary.
|
||||
- after some much longer timeout (e.g. ~30s), controller decides that the hot secondary should
|
||||
become attached, so that it can do its own compaction.
|
||||
- Hot secondary is instructed to do a compaction before shutting it down, so that during
|
||||
its restart into attached mode it will not have to deal with any remote storage change.
|
||||
- Hot secondary shuts down, flushing ephemeral layer to L0.
|
||||
- Previously-secondary location starts up in attached mode with a new generation. Downloads
|
||||
index from remote storage, and identifies which L0 files to retain. Adds these to LayerMap
|
||||
and enqueues them for upload.
|
||||
- Now fully available for reads and able to proceed with compaction etc as normal.
|
||||
|
||||
## Optimisations/details
|
||||
|
||||
- We should add a read-only mode to RemoteTimelineClient
|
||||
|
||||
## Alternatives considered
|
||||
|
||||
### Full mirror
|
||||
|
||||
We could make hot secondary locations do all compaction, gc, etc operations
|
||||
independently, and maintain their own set of layer files in S3. These would essentially be separate tenants in pageserver terms, but consuming the same safekeeper timelines.
|
||||
|
||||
These locations would on longer be anything special in pageserver terms, they'd simply be attached locations that use some modified path like `<tenant_id>.secondary` to avoid colliding with the primary data.
|
||||
|
||||
The storage controller could have some `AttachedHotSecondary` placement
|
||||
policy that configures the hot secondary location with some flag to indicate that the alternative storage path should be used.
|
||||
|
||||
Clearly the advantage of this approach is code simplicity. However, the
|
||||
downsides are substantial:
|
||||
- Double object storage costs
|
||||
- Compaction costs are doubled (CPU & disk read I/O), whereas the proposed
|
||||
implementation of hot secondaries only pays twice for the compaction _write_ IO as it writes compacted layers to local disk.
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "compute_api"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
//! Structs representing the JSON formats used in the compute_ctl's HTTP API.
|
||||
use crate::{
|
||||
privilege::Privilege,
|
||||
responses::ComputeCtlConfig,
|
||||
spec::{ComputeSpec, ExtVersion, PgIdent},
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::privilege::Privilege;
|
||||
use crate::responses::ComputeCtlConfig;
|
||||
use crate::spec::{ComputeSpec, ExtVersion, PgIdent};
|
||||
|
||||
/// Request of the /configure API
|
||||
///
|
||||
/// We now pass only `spec` in the configuration request, but later we can
|
||||
|
||||
@@ -6,8 +6,10 @@ use chrono::{DateTime, Utc};
|
||||
use jsonwebtoken::jwk::JwkSet;
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
|
||||
use crate::privilege::Privilege;
|
||||
use crate::spec::{ComputeSpec, Database, ExtVersion, PgIdent, Role};
|
||||
use crate::{
|
||||
privilege::Privilege,
|
||||
spec::{ComputeSpec, Database, ExtVersion, PgIdent, Role},
|
||||
};
|
||||
|
||||
#[derive(Serialize, Debug, Deserialize)]
|
||||
pub struct GenericAPIError {
|
||||
|
||||
@@ -5,12 +5,13 @@
|
||||
//! and connect it to the storage nodes.
|
||||
use std::collections::HashMap;
|
||||
|
||||
use regex::Regex;
|
||||
use remote_storage::RemotePath;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use regex::Regex;
|
||||
use remote_storage::RemotePath;
|
||||
|
||||
/// String type alias representing Postgres identifier and
|
||||
/// intended to be used for DB / role names.
|
||||
pub type PgIdent = String;
|
||||
@@ -101,17 +102,6 @@ pub struct ComputeSpec {
|
||||
pub timeline_id: Option<TimelineId>,
|
||||
pub pageserver_connstring: Option<String>,
|
||||
|
||||
/// Safekeeper membership config generation. It is put in
|
||||
/// neon.safekeepers GUC and serves two purposes:
|
||||
/// 1) Non zero value forces walproposer to use membership configurations.
|
||||
/// 2) If walproposer wants to update list of safekeepers to connect to
|
||||
/// taking them from some safekeeper mconf, it should check what value
|
||||
/// is newer by comparing the generation.
|
||||
///
|
||||
/// Note: it could be SafekeeperGeneration, but this needs linking
|
||||
/// compute_ctl with postgres_ffi.
|
||||
#[serde(default)]
|
||||
pub safekeepers_generation: Option<u32>,
|
||||
#[serde(default)]
|
||||
pub safekeeper_connstrings: Vec<String>,
|
||||
|
||||
@@ -262,7 +252,7 @@ pub enum ComputeMode {
|
||||
Replica,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq, Eq)]
|
||||
#[derive(Clone, Debug, Default, Deserialize, Serialize)]
|
||||
pub struct Cluster {
|
||||
pub cluster_id: Option<String>,
|
||||
pub name: Option<String>,
|
||||
@@ -293,7 +283,7 @@ pub struct DeltaOp {
|
||||
|
||||
/// Rust representation of Postgres role info with only those fields
|
||||
/// that matter for us.
|
||||
#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
pub struct Role {
|
||||
pub name: PgIdent,
|
||||
pub encrypted_password: Option<String>,
|
||||
@@ -302,7 +292,7 @@ pub struct Role {
|
||||
|
||||
/// Rust representation of Postgres database info with only those fields
|
||||
/// that matter for us.
|
||||
#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
pub struct Database {
|
||||
pub name: PgIdent,
|
||||
pub owner: PgIdent,
|
||||
@@ -318,7 +308,7 @@ pub struct Database {
|
||||
/// Common type representing both SQL statement params with or without value,
|
||||
/// like `LOGIN` or `OWNER username` in the `CREATE/ALTER ROLE`, and config
|
||||
/// options like `wal_level = logical`.
|
||||
#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
pub struct GenericOption {
|
||||
pub name: String,
|
||||
pub value: Option<String>,
|
||||
@@ -349,9 +339,8 @@ pub struct JwksSettings {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::fs::File;
|
||||
|
||||
use super::*;
|
||||
use std::fs::File;
|
||||
|
||||
#[test]
|
||||
fn allow_installing_remote_extensions() {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "consumption_metrics"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
[dependencies]
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user