mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-19 06:00:38 +00:00
Compare commits
2 Commits
neon_ances
...
neon_expla
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5f748aa8f0 | ||
|
|
546e57781c |
21
.github/PULL_REQUEST_TEMPLATE/release-pr.md
vendored
Normal file
21
.github/PULL_REQUEST_TEMPLATE/release-pr.md
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
## Release 202Y-MM-DD
|
||||
|
||||
**NB: this PR must be merged only by 'Create a merge commit'!**
|
||||
|
||||
### Checklist when preparing for release
|
||||
- [ ] Read or refresh [the release flow guide](https://www.notion.so/neondatabase/Release-general-flow-61f2e39fd45d4d14a70c7749604bd70b)
|
||||
- [ ] Ask in the [cloud Slack channel](https://neondb.slack.com/archives/C033A2WE6BZ) that you are going to rollout the release. Any blockers?
|
||||
- [ ] Does this release contain any db migrations? Destructive ones? What is the rollback plan?
|
||||
|
||||
<!-- List everything that should be done **before** release, any issues / setting changes / etc -->
|
||||
|
||||
### Checklist after release
|
||||
- [ ] Make sure instructions from PRs included in this release and labeled `manual_release_instructions` are executed (either by you or by people who wrote them).
|
||||
- [ ] Based on the merged commits write release notes and open a PR into `website` repo ([example](https://github.com/neondatabase/website/pull/219/files))
|
||||
- [ ] Check [#dev-production-stream](https://neondb.slack.com/archives/C03F5SM1N02) Slack channel
|
||||
- [ ] Check [stuck projects page](https://console.neon.tech/admin/projects?sort=last_active&order=desc&stuck=true)
|
||||
- [ ] Check [recent operation failures](https://console.neon.tech/admin/operations?action=create_timeline%2Cstart_compute%2Cstop_compute%2Csuspend_compute%2Capply_config%2Cdelete_timeline%2Cdelete_tenant%2Ccreate_branch%2Ccheck_availability&sort=updated_at&order=desc&had_retries=some)
|
||||
- [ ] Check [cloud SLO dashboard](https://neonprod.grafana.net/d/_oWcBMJ7k/cloud-slos?orgId=1)
|
||||
- [ ] Check [compute startup metrics dashboard](https://neonprod.grafana.net/d/5OkYJEmVz/compute-startup-time)
|
||||
|
||||
<!-- List everything that should be done **after** release, any admin UI configuration / Grafana dashboard / alert changes / setting changes / etc -->
|
||||
2
.github/actionlint.yml
vendored
2
.github/actionlint.yml
vendored
@@ -33,5 +33,3 @@ config-variables:
|
||||
- NEON_PROD_AWS_ACCOUNT_ID
|
||||
- AWS_ECR_REGION
|
||||
- BENCHMARK_LARGE_OLTP_PROJECTID
|
||||
- SLACK_ON_CALL_DEVPROD_STREAM
|
||||
- SLACK_RUST_CHANNEL_ID
|
||||
|
||||
43
.github/scripts/generate_image_maps.py
vendored
43
.github/scripts/generate_image_maps.py
vendored
@@ -1,16 +1,14 @@
|
||||
import itertools
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
source_tag = os.getenv("SOURCE_TAG")
|
||||
target_tag = os.getenv("TARGET_TAG")
|
||||
branch = os.getenv("BRANCH")
|
||||
dev_acr = os.getenv("DEV_ACR")
|
||||
prod_acr = os.getenv("PROD_ACR")
|
||||
dev_aws = os.getenv("DEV_AWS")
|
||||
prod_aws = os.getenv("PROD_AWS")
|
||||
aws_region = os.getenv("AWS_REGION")
|
||||
build_tag = os.environ["BUILD_TAG"]
|
||||
branch = os.environ["BRANCH"]
|
||||
dev_acr = os.environ["DEV_ACR"]
|
||||
prod_acr = os.environ["PROD_ACR"]
|
||||
dev_aws = os.environ["DEV_AWS"]
|
||||
prod_aws = os.environ["PROD_AWS"]
|
||||
aws_region = os.environ["AWS_REGION"]
|
||||
|
||||
components = {
|
||||
"neon": ["neon"],
|
||||
@@ -41,23 +39,24 @@ registries = {
|
||||
|
||||
outputs: dict[str, dict[str, list[str]]] = {}
|
||||
|
||||
target_tags = [target_tag, "latest"] if branch == "main" else [target_tag]
|
||||
target_stages = (
|
||||
["dev", "prod"] if branch in ["release", "release-proxy", "release-compute"] else ["dev"]
|
||||
)
|
||||
target_tags = [build_tag, "latest"] if branch == "main" else [build_tag]
|
||||
target_stages = ["dev", "prod"] if branch.startswith("release") else ["dev"]
|
||||
|
||||
for component_name, component_images in components.items():
|
||||
for stage in target_stages:
|
||||
outputs[f"{component_name}-{stage}"] = {
|
||||
f"docker.io/neondatabase/{component_image}:{source_tag}": [
|
||||
f"{registry}/{component_image}:{tag}"
|
||||
for registry, tag in itertools.product(registries[stage], target_tags)
|
||||
if not (registry == "docker.io/neondatabase" and tag == source_tag)
|
||||
outputs[f"{component_name}-{stage}"] = dict(
|
||||
[
|
||||
(
|
||||
f"docker.io/neondatabase/{component_image}:{build_tag}",
|
||||
[
|
||||
f"{combo[0]}/{component_image}:{combo[1]}"
|
||||
for combo in itertools.product(registries[stage], target_tags)
|
||||
],
|
||||
)
|
||||
for component_image in component_images
|
||||
]
|
||||
for component_image in component_images
|
||||
}
|
||||
)
|
||||
|
||||
with open(os.getenv("GITHUB_OUTPUT", "/dev/null"), "a") as f:
|
||||
with open(os.environ["GITHUB_OUTPUT"], "a") as f:
|
||||
for key, value in outputs.items():
|
||||
f.write(f"{key}={json.dumps(value)}\n")
|
||||
print(f"Image map for {key}:\n{json.dumps(value, indent=2)}\n\n", file=sys.stderr)
|
||||
|
||||
110
.github/scripts/lint-release-pr.sh
vendored
110
.github/scripts/lint-release-pr.sh
vendored
@@ -1,110 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
DOCS_URL="https://docs.neon.build/overview/repositories/neon.html"
|
||||
|
||||
message() {
|
||||
if [[ -n "${GITHUB_PR_NUMBER:-}" ]]; then
|
||||
gh pr comment --repo "${GITHUB_REPOSITORY}" "${GITHUB_PR_NUMBER}" --edit-last --body "$1" \
|
||||
|| gh pr comment --repo "${GITHUB_REPOSITORY}" "${GITHUB_PR_NUMBER}" --body "$1"
|
||||
fi
|
||||
echo "$1"
|
||||
}
|
||||
|
||||
report_error() {
|
||||
message "❌ $1
|
||||
For more details, see the documentation: ${DOCS_URL}"
|
||||
|
||||
exit 1
|
||||
}
|
||||
|
||||
case "$RELEASE_BRANCH" in
|
||||
"release") COMPONENT="Storage" ;;
|
||||
"release-proxy") COMPONENT="Proxy" ;;
|
||||
"release-compute") COMPONENT="Compute" ;;
|
||||
*)
|
||||
report_error "Unknown release branch: ${RELEASE_BRANCH}"
|
||||
;;
|
||||
esac
|
||||
|
||||
|
||||
# Identify main and release branches
|
||||
MAIN_BRANCH="origin/main"
|
||||
REMOTE_RELEASE_BRANCH="origin/${RELEASE_BRANCH}"
|
||||
|
||||
# Find merge base
|
||||
MERGE_BASE=$(git merge-base "${MAIN_BRANCH}" "${REMOTE_RELEASE_BRANCH}")
|
||||
echo "Merge base of ${MAIN_BRANCH} and ${RELEASE_BRANCH}: ${MERGE_BASE}"
|
||||
|
||||
# Get the HEAD commit (last commit in PR, expected to be the merge commit)
|
||||
LAST_COMMIT=$(git rev-parse HEAD)
|
||||
|
||||
MERGE_COMMIT_MESSAGE=$(git log -1 --format=%s "${LAST_COMMIT}")
|
||||
EXPECTED_MESSAGE_REGEX="^$COMPONENT release [0-9]{4}-[0-9]{2}-[0-9]{2}$"
|
||||
|
||||
if ! [[ "${MERGE_COMMIT_MESSAGE}" =~ ${EXPECTED_MESSAGE_REGEX} ]]; then
|
||||
report_error "Merge commit message does not match expected pattern: '<component> release YYYY-MM-DD'
|
||||
Expected component: ${COMPONENT}
|
||||
Found: '${MERGE_COMMIT_MESSAGE}'"
|
||||
fi
|
||||
echo "✅ Merge commit message is correctly formatted: '${MERGE_COMMIT_MESSAGE}'"
|
||||
|
||||
LAST_COMMIT_PARENTS=$(git cat-file -p "${LAST_COMMIT}" | jq -sR '[capture("parent (?<parent>[0-9a-f]{40})"; "g") | .parent]')
|
||||
|
||||
if [[ "$(echo "${LAST_COMMIT_PARENTS}" | jq 'length')" -ne 2 ]]; then
|
||||
report_error "Last commit must be a merge commit with exactly two parents"
|
||||
fi
|
||||
|
||||
EXPECTED_RELEASE_HEAD=$(git rev-parse "${REMOTE_RELEASE_BRANCH}")
|
||||
if echo "${LAST_COMMIT_PARENTS}" | jq -e --arg rel "${EXPECTED_RELEASE_HEAD}" 'index($rel) != null' > /dev/null; then
|
||||
LINEAR_HEAD=$(echo "${LAST_COMMIT_PARENTS}" | jq -r '[.[] | select(. != $rel)][0]' --arg rel "${EXPECTED_RELEASE_HEAD}")
|
||||
else
|
||||
report_error "Last commit must merge the release branch (${RELEASE_BRANCH})"
|
||||
fi
|
||||
echo "✅ Last commit correctly merges the previous commit and the release branch"
|
||||
echo "Top commit of linear history: ${LINEAR_HEAD}"
|
||||
|
||||
MERGE_COMMIT_TREE=$(git rev-parse "${LAST_COMMIT}^{tree}")
|
||||
LINEAR_HEAD_TREE=$(git rev-parse "${LINEAR_HEAD}^{tree}")
|
||||
|
||||
if [[ "${MERGE_COMMIT_TREE}" != "${LINEAR_HEAD_TREE}" ]]; then
|
||||
report_error "Tree of merge commit (${MERGE_COMMIT_TREE}) does not match tree of linear history head (${LINEAR_HEAD_TREE})
|
||||
This indicates that the merge of ${RELEASE_BRANCH} into this branch was not performed using the merge strategy 'ours'"
|
||||
fi
|
||||
echo "✅ Merge commit tree matches the linear history head"
|
||||
|
||||
EXPECTED_PREVIOUS_COMMIT="${LINEAR_HEAD}"
|
||||
|
||||
# Now traverse down the history, ensuring each commit has exactly one parent
|
||||
CURRENT_COMMIT="${EXPECTED_PREVIOUS_COMMIT}"
|
||||
while [[ "${CURRENT_COMMIT}" != "${MERGE_BASE}" && "${CURRENT_COMMIT}" != "${EXPECTED_RELEASE_HEAD}" ]]; do
|
||||
CURRENT_COMMIT_PARENTS=$(git cat-file -p "${CURRENT_COMMIT}" | jq -sR '[capture("parent (?<parent>[0-9a-f]{40})"; "g") | .parent]')
|
||||
|
||||
if [[ "$(echo "${CURRENT_COMMIT_PARENTS}" | jq 'length')" -ne 1 ]]; then
|
||||
report_error "Commit ${CURRENT_COMMIT} must have exactly one parent"
|
||||
fi
|
||||
|
||||
NEXT_COMMIT=$(echo "${CURRENT_COMMIT_PARENTS}" | jq -r '.[0]')
|
||||
|
||||
if [[ "${NEXT_COMMIT}" == "${MERGE_BASE}" ]]; then
|
||||
echo "✅ Reached merge base (${MERGE_BASE})"
|
||||
PR_BASE="${MERGE_BASE}"
|
||||
elif [[ "${NEXT_COMMIT}" == "${EXPECTED_RELEASE_HEAD}" ]]; then
|
||||
echo "✅ Reached release branch (${EXPECTED_RELEASE_HEAD})"
|
||||
PR_BASE="${EXPECTED_RELEASE_HEAD}"
|
||||
elif [[ -z "${NEXT_COMMIT}" ]]; then
|
||||
report_error "Unexpected end of commit history before reaching merge base"
|
||||
fi
|
||||
|
||||
# Move to the next commit in the chain
|
||||
CURRENT_COMMIT="${NEXT_COMMIT}"
|
||||
done
|
||||
|
||||
echo "✅ All commits are properly ordered and linear"
|
||||
echo "✅ Release PR structure is valid"
|
||||
|
||||
echo
|
||||
|
||||
message "Commits that are part of this release:
|
||||
$(git log --oneline "${PR_BASE}..${LINEAR_HEAD}")"
|
||||
6
.github/scripts/previous-releases.jq
vendored
6
.github/scripts/previous-releases.jq
vendored
@@ -17,12 +17,6 @@
|
||||
({};
|
||||
.[$entry.component] |= (if . == null or $entry.version > .version then $entry else . end))
|
||||
|
||||
# Ensure that each component exists, or fail
|
||||
| (["storage", "compute", "proxy"] - (keys)) as $missing
|
||||
| if ($missing | length) > 0 then
|
||||
"Error: Found no release for \($missing | join(", "))!\n" | halt_error(1)
|
||||
else . end
|
||||
|
||||
# Convert the resulting object into an array of formatted strings
|
||||
| to_entries
|
||||
| map("\(.key)=\(.value.full)")
|
||||
|
||||
47
.github/workflows/_create-release-pr.yml
vendored
47
.github/workflows/_create-release-pr.yml
vendored
@@ -7,8 +7,8 @@ on:
|
||||
description: 'Component name'
|
||||
required: true
|
||||
type: string
|
||||
source-branch:
|
||||
description: 'Source branch'
|
||||
release-branch:
|
||||
description: 'Release branch'
|
||||
required: true
|
||||
type: string
|
||||
secrets:
|
||||
@@ -30,25 +30,17 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ inputs.source-branch }}
|
||||
fetch-depth: 0
|
||||
ref: main
|
||||
|
||||
- name: Set variables
|
||||
id: vars
|
||||
env:
|
||||
COMPONENT_NAME: ${{ inputs.component-name }}
|
||||
RELEASE_BRANCH: >-
|
||||
${{
|
||||
false
|
||||
|| inputs.component-name == 'Storage' && 'release'
|
||||
|| inputs.component-name == 'Proxy' && 'release-proxy'
|
||||
|| inputs.component-name == 'Compute' && 'release-compute'
|
||||
}}
|
||||
RELEASE_BRANCH: ${{ inputs.release-branch }}
|
||||
run: |
|
||||
today=$(date +'%Y-%m-%d')
|
||||
echo "title=${COMPONENT_NAME} release ${today}" | tee -a ${GITHUB_OUTPUT}
|
||||
echo "rc-branch=rc/${RELEASE_BRANCH}/${today}" | tee -a ${GITHUB_OUTPUT}
|
||||
echo "release-branch=${RELEASE_BRANCH}" | tee -a ${GITHUB_OUTPUT}
|
||||
|
||||
- name: Configure git
|
||||
run: |
|
||||
@@ -57,36 +49,31 @@ jobs:
|
||||
|
||||
- name: Create RC branch
|
||||
env:
|
||||
RELEASE_BRANCH: ${{ steps.vars.outputs.release-branch }}
|
||||
RC_BRANCH: ${{ steps.vars.outputs.rc-branch }}
|
||||
TITLE: ${{ steps.vars.outputs.title }}
|
||||
run: |
|
||||
git switch -c "${RC_BRANCH}"
|
||||
git checkout -b "${RC_BRANCH}"
|
||||
|
||||
# Manually create a merge commit on the current branch, keeping the
|
||||
# tree and setting the parents to the current HEAD and the HEAD of the
|
||||
# release branch. This commit is what we'll fast-forward the release
|
||||
# branch to when merging the release branch.
|
||||
# For details on why, look at
|
||||
# https://docs.neon.build/overview/repositories/neon.html#background-on-commit-history-of-release-prs
|
||||
current_tree=$(git rev-parse 'HEAD^{tree}')
|
||||
release_head=$(git rev-parse "origin/${RELEASE_BRANCH}")
|
||||
current_head=$(git rev-parse HEAD)
|
||||
merge_commit=$(git commit-tree -p "${current_head}" -p "${release_head}" -m "${TITLE}" "${current_tree}")
|
||||
|
||||
# Fast-forward the current branch to the newly created merge_commit
|
||||
git merge --ff-only ${merge_commit}
|
||||
# create an empty commit to distinguish workflow runs
|
||||
# from other possible releases from the same commit
|
||||
git commit --allow-empty -m "${TITLE}"
|
||||
|
||||
git push origin "${RC_BRANCH}"
|
||||
|
||||
- name: Create a PR into ${{ steps.vars.outputs.release-branch }}
|
||||
- name: Create a PR into ${{ inputs.release-branch }}
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.ci-access-token }}
|
||||
RC_BRANCH: ${{ steps.vars.outputs.rc-branch }}
|
||||
RELEASE_BRANCH: ${{ steps.vars.outputs.release-branch }}
|
||||
RELEASE_BRANCH: ${{ inputs.release-branch }}
|
||||
TITLE: ${{ steps.vars.outputs.title }}
|
||||
run: |
|
||||
cat << EOF > body.md
|
||||
## ${TITLE}
|
||||
|
||||
**Please merge this Pull Request using 'Create a merge commit' button**
|
||||
EOF
|
||||
|
||||
gh pr create --title "${TITLE}" \
|
||||
--body "" \
|
||||
--body-file "body.md" \
|
||||
--head "${RC_BRANCH}" \
|
||||
--base "${RELEASE_BRANCH}"
|
||||
|
||||
30
.github/workflows/_meta.yml
vendored
30
.github/workflows/_meta.yml
vendored
@@ -19,18 +19,11 @@ on:
|
||||
description: "Tag of the last compute release"
|
||||
value: ${{ jobs.tags.outputs.compute }}
|
||||
run-kind:
|
||||
description: "The kind of run we're currently in. Will be one of `push-main`, `storage-release`, `compute-release`, `proxy-release`, `storage-rc-pr`, `compute-rc-pr`, `proxy-rc-pr`, `pr`, or `workflow-dispatch`"
|
||||
description: "The kind of run we're currently in. Will be one of `pr`, `push-main`, `storage-rc`, `storage-release`, `proxy-rc`, `proxy-release`, `compute-rc`, `compute-release` or `merge_queue`"
|
||||
value: ${{ jobs.tags.outputs.run-kind }}
|
||||
release-pr-run-id:
|
||||
description: "Only available if `run-kind in [storage-release, proxy-release, compute-release]`. Contains the run ID of the `Build and Test` workflow, assuming one with the current commit can be found."
|
||||
value: ${{ jobs.tags.outputs.release-pr-run-id }}
|
||||
|
||||
permissions: {}
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euo pipefail {0}
|
||||
|
||||
jobs:
|
||||
tags:
|
||||
runs-on: ubuntu-22.04
|
||||
@@ -40,7 +33,6 @@ jobs:
|
||||
proxy: ${{ steps.previous-releases.outputs.proxy }}
|
||||
storage: ${{ steps.previous-releases.outputs.storage }}
|
||||
run-kind: ${{ steps.run-kind.outputs.run-kind }}
|
||||
release-pr-run-id: ${{ steps.release-pr-run-id.outputs.release-pr-run-id }}
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
@@ -63,7 +55,6 @@ jobs:
|
||||
|| (inputs.github-event-name == 'pull_request' && github.base_ref == 'release-compute') && 'compute-rc-pr'
|
||||
|| (inputs.github-event-name == 'pull_request' && github.base_ref == 'release-proxy') && 'proxy-rc-pr'
|
||||
|| (inputs.github-event-name == 'pull_request') && 'pr'
|
||||
|| (inputs.github-event-name == 'workflow_dispatch') && 'workflow-dispatch'
|
||||
|| 'unknown'
|
||||
}}
|
||||
run: |
|
||||
@@ -91,16 +82,9 @@ jobs:
|
||||
echo "tag=release-compute-$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
|
||||
;;
|
||||
pr|storage-rc-pr|compute-rc-pr|proxy-rc-pr)
|
||||
BUILD_AND_TEST_RUN_ID=$(gh api --paginate \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
"/repos/${GITHUB_REPOSITORY}/actions/runs?head_sha=${CURRENT_SHA}&branch=${CURRENT_BRANCH}" \
|
||||
| jq '[.workflow_runs[] | select(.name == "Build and Test")][0].id // ("Error: No matching workflow run found." | halt_error(1))')
|
||||
BUILD_AND_TEST_RUN_ID=$(gh run list -b $CURRENT_BRANCH -c $CURRENT_SHA -w 'Build and Test' -L 1 --json databaseId --jq '.[].databaseId')
|
||||
echo "tag=$BUILD_AND_TEST_RUN_ID" | tee -a $GITHUB_OUTPUT
|
||||
;;
|
||||
workflow-dispatch)
|
||||
echo "tag=$GITHUB_RUN_ID" | tee -a $GITHUB_OUTPUT
|
||||
;;
|
||||
*)
|
||||
echo "Unexpected RUN_KIND ('${RUN_KIND}'), failing to assign build-tag!"
|
||||
exit 1
|
||||
@@ -117,13 +101,3 @@ jobs:
|
||||
"/repos/${GITHUB_REPOSITORY}/releases" \
|
||||
| jq -f .github/scripts/previous-releases.jq -r \
|
||||
| tee -a "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Get the release PR run ID
|
||||
id: release-pr-run-id
|
||||
if: ${{ contains(fromJson('["storage-release", "compute-release", "proxy-release"]'), steps.run-kind.outputs.run-kind) }}
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
CURRENT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
run: |
|
||||
RELEASE_PR_RUN_ID=$(gh api "/repos/${GITHUB_REPOSITORY}/actions/runs?head_sha=$CURRENT_SHA" | jq '[.workflow_runs[] | select(.name == "Build and Test") | select(.head_branch | test("^rc/release(-(proxy)|(compute))?/[0-9]{4}-[0-9]{2}-[0-9]{2}$"; "s"))] | first | .id // ("Falied to find Build and Test run from RC PR!" | halt_error(1))')
|
||||
echo "release-pr-run-id=$RELEASE_PR_RUN_ID" | tee -a $GITHUB_OUTPUT
|
||||
|
||||
135
.github/workflows/build_and_test.yml
vendored
135
.github/workflows/build_and_test.yml
vendored
@@ -476,7 +476,7 @@ jobs:
|
||||
(
|
||||
!github.event.pull_request.draft
|
||||
|| contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft')
|
||||
|| needs.meta.outputs.run-kind == 'push-main'
|
||||
|| contains(fromJSON('["push-main", "storage-release", "proxy-release", "compute-release"]'), needs.meta.outputs.run-kind)
|
||||
) && !failure() && !cancelled()
|
||||
}}
|
||||
needs: [ check-permissions, push-neon-image-dev, push-compute-image-dev, meta ]
|
||||
@@ -487,7 +487,7 @@ jobs:
|
||||
|
||||
neon-image-arch:
|
||||
needs: [ check-permissions, build-build-tools-image, meta ]
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "storage-rc-pr", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
strategy:
|
||||
matrix:
|
||||
arch: [ x64, arm64 ]
|
||||
@@ -537,7 +537,7 @@ jobs:
|
||||
|
||||
neon-image:
|
||||
needs: [ neon-image-arch, meta ]
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "storage-rc-pr", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
@@ -559,7 +559,7 @@ jobs:
|
||||
|
||||
compute-node-image-arch:
|
||||
needs: [ check-permissions, build-build-tools-image, meta ]
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
@@ -651,7 +651,7 @@ jobs:
|
||||
|
||||
compute-node-image:
|
||||
needs: [ compute-node-image-arch, meta ]
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
@@ -694,7 +694,7 @@ jobs:
|
||||
|
||||
vm-compute-node-image-arch:
|
||||
needs: [ check-permissions, meta, compute-node-image ]
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@@ -747,7 +747,7 @@ jobs:
|
||||
|
||||
vm-compute-node-image:
|
||||
needs: [ vm-compute-node-image-arch, meta ]
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
runs-on: ubuntu-22.04
|
||||
strategy:
|
||||
matrix:
|
||||
@@ -773,12 +773,7 @@ jobs:
|
||||
test-images:
|
||||
needs: [ check-permissions, meta, neon-image, compute-node-image ]
|
||||
# Depends on jobs that can get skipped
|
||||
if: >-
|
||||
${{
|
||||
!failure()
|
||||
&& !cancelled()
|
||||
&& contains(fromJSON('["push-main", "pr", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind)
|
||||
}}
|
||||
if: "!failure() && !cancelled()"
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -805,7 +800,7 @@ jobs:
|
||||
# Ensure that we don't have bad versions.
|
||||
- name: Verify image versions
|
||||
shell: bash # ensure no set -e for better error messages
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "storage-rc-pr", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
run: |
|
||||
pageserver_version=$(docker run --rm neondatabase/neon:${{ needs.meta.outputs.build-tag }} "/bin/sh" "-c" "/usr/local/bin/pageserver --version")
|
||||
|
||||
@@ -826,19 +821,19 @@ jobs:
|
||||
env:
|
||||
TAG: >-
|
||||
${{
|
||||
needs.meta.outputs.run-kind == 'compute-rc-pr'
|
||||
contains(fromJSON('["compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind)
|
||||
&& needs.meta.outputs.previous-storage-release
|
||||
|| needs.meta.outputs.build-tag
|
||||
}}
|
||||
COMPUTE_TAG: >-
|
||||
${{
|
||||
contains(fromJSON('["storage-rc-pr", "proxy-rc-pr"]'), needs.meta.outputs.run-kind)
|
||||
contains(fromJSON('["storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind)
|
||||
&& needs.meta.outputs.previous-compute-release
|
||||
|| needs.meta.outputs.build-tag
|
||||
}}
|
||||
TEST_EXTENSIONS_TAG: >-
|
||||
${{
|
||||
contains(fromJSON('["storage-rc-pr", "proxy-rc-pr"]'), needs.meta.outputs.run-kind)
|
||||
contains(fromJSON('["storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind)
|
||||
&& 'latest'
|
||||
|| needs.meta.outputs.build-tag
|
||||
}}
|
||||
@@ -890,13 +885,7 @@ jobs:
|
||||
id: generate
|
||||
run: python3 .github/scripts/generate_image_maps.py
|
||||
env:
|
||||
SOURCE_TAG: >-
|
||||
${{
|
||||
contains(fromJson('["storage-release", "compute-release", "proxy-release"]'), needs.meta.outputs.run-kind)
|
||||
&& needs.meta.outputs.release-pr-run-id
|
||||
|| needs.meta.outputs.build-tag
|
||||
}}
|
||||
TARGET_TAG: ${{ needs.meta.outputs.build-tag }}
|
||||
BUILD_TAG: "${{ needs.meta.outputs.build-tag }}"
|
||||
BRANCH: "${{ github.ref_name }}"
|
||||
DEV_ACR: "${{ vars.AZURE_DEV_REGISTRY_NAME }}"
|
||||
PROD_ACR: "${{ vars.AZURE_PROD_REGISTRY_NAME }}"
|
||||
@@ -906,7 +895,7 @@ jobs:
|
||||
|
||||
push-neon-image-dev:
|
||||
needs: [ meta, generate-image-maps, neon-image ]
|
||||
if: ${{ !failure() && !cancelled() && contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
uses: ./.github/workflows/_push-to-container-registry.yml
|
||||
permissions:
|
||||
id-token: write # Required for aws/azure login
|
||||
@@ -924,7 +913,7 @@ jobs:
|
||||
|
||||
push-compute-image-dev:
|
||||
needs: [ meta, generate-image-maps, vm-compute-node-image ]
|
||||
if: ${{ !failure() && !cancelled() && contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
uses: ./.github/workflows/_push-to-container-registry.yml
|
||||
permissions:
|
||||
id-token: write # Required for aws/azure login
|
||||
@@ -978,55 +967,16 @@ jobs:
|
||||
acr-registry-name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}
|
||||
secrets: inherit
|
||||
|
||||
push-neon-test-extensions-image-ghcr:
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
# This is a bit of a special case so we're not using a generated image map.
|
||||
add-latest-tag-to-neon-extensions-test-image:
|
||||
if: github.ref_name == 'main'
|
||||
needs: [ meta, compute-node-image ]
|
||||
uses: ./.github/workflows/_push-to-container-registry.yml
|
||||
with:
|
||||
image-map: |
|
||||
{
|
||||
"docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}": [
|
||||
"ghcr.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}"
|
||||
],
|
||||
"docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}": [
|
||||
"ghcr.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}"
|
||||
]
|
||||
}
|
||||
secrets: inherit
|
||||
|
||||
add-latest-tag-to-neon-test-extensions-image:
|
||||
if: ${{ needs.meta.outputs.run-kind == 'push-main' }}
|
||||
needs: [ meta, compute-node-image ]
|
||||
uses: ./.github/workflows/_push-to-container-registry.yml
|
||||
with:
|
||||
image-map: |
|
||||
{
|
||||
"docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}": [
|
||||
"docker.io/neondatabase/neon-test-extensions-v16:latest",
|
||||
"ghcr.io/neondatabase/neon-test-extensions-v16:latest"
|
||||
],
|
||||
"docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}": [
|
||||
"docker.io/neondatabase/neon-test-extensions-v17:latest",
|
||||
"ghcr.io/neondatabase/neon-test-extensions-v17:latest"
|
||||
]
|
||||
}
|
||||
secrets: inherit
|
||||
|
||||
add-release-tag-to-neon-test-extensions-image:
|
||||
if: ${{ needs.meta.outputs.run-kind == 'compute-release' }}
|
||||
needs: [ meta, compute-node-image ]
|
||||
uses: ./.github/workflows/_push-to-container-registry.yml
|
||||
with:
|
||||
image-map: |
|
||||
{
|
||||
"docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.release-pr-run-id }}": [
|
||||
"docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}",
|
||||
"ghcr.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}"
|
||||
],
|
||||
"docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.release-pr-run-id }}": [
|
||||
"docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}",
|
||||
"ghcr.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}"
|
||||
]
|
||||
"docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v16:latest"],
|
||||
"docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v17:latest"]
|
||||
}
|
||||
secrets: inherit
|
||||
|
||||
@@ -1225,7 +1175,7 @@ jobs:
|
||||
-f deployPgSniRouter=false \
|
||||
-f deployProxy=false \
|
||||
-f deployStorage=true \
|
||||
-f deployStorageBroker=false \
|
||||
-f deployStorageBroker=true \
|
||||
-f deployStorageController=true \
|
||||
-f branch=main \
|
||||
-f dockerTag=${{needs.meta.outputs.build-tag}} \
|
||||
@@ -1233,7 +1183,7 @@ jobs:
|
||||
|
||||
gh workflow --repo neondatabase/infra run deploy-prod.yml --ref main \
|
||||
-f deployStorage=true \
|
||||
-f deployStorageBroker=false \
|
||||
-f deployStorageBroker=true \
|
||||
-f deployStorageController=true \
|
||||
-f branch=main \
|
||||
-f dockerTag=${{needs.meta.outputs.build-tag}}
|
||||
@@ -1281,11 +1231,11 @@ jobs:
|
||||
payload: |
|
||||
channel: ${{ vars.SLACK_STORAGE_CHANNEL_ID }}
|
||||
text: |
|
||||
🔴 <!subteam^S06CJ87UMNY|@oncall-storage>: deploy job on release branch had unexpected status "${{ needs.deploy.result }}" <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>.
|
||||
🔴 @oncall-storage: deploy job on release branch had unexpected status "${{ needs.deploy.result }}" <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>.
|
||||
|
||||
# The job runs on `release` branch and copies compatibility data and Neon artifact from the last *release PR* to the latest directory
|
||||
promote-compatibility-data:
|
||||
needs: [ meta, deploy ]
|
||||
needs: [ deploy ]
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
@@ -1295,6 +1245,37 @@ jobs:
|
||||
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Fetch GITHUB_RUN_ID and COMMIT_SHA for the last merged release PR
|
||||
id: fetch-last-release-pr-info
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
branch_name_and_pr_number=$(gh pr list \
|
||||
--repo "${GITHUB_REPOSITORY}" \
|
||||
--base release \
|
||||
--state merged \
|
||||
--limit 10 \
|
||||
--json mergeCommit,headRefName,number \
|
||||
--jq ".[] | select(.mergeCommit.oid==\"${GITHUB_SHA}\") | { branch_name: .headRefName, pr_number: .number }")
|
||||
branch_name=$(echo "${branch_name_and_pr_number}" | jq -r '.branch_name')
|
||||
pr_number=$(echo "${branch_name_and_pr_number}" | jq -r '.pr_number')
|
||||
|
||||
run_id=$(gh run list \
|
||||
--repo "${GITHUB_REPOSITORY}" \
|
||||
--workflow build_and_test.yml \
|
||||
--branch "${branch_name}" \
|
||||
--json databaseId \
|
||||
--limit 1 \
|
||||
--jq '.[].databaseId')
|
||||
|
||||
last_commit_sha=$(gh pr view "${pr_number}" \
|
||||
--repo "${GITHUB_REPOSITORY}" \
|
||||
--json commits \
|
||||
--jq '.commits[-1].oid')
|
||||
|
||||
echo "run-id=${run_id}" | tee -a ${GITHUB_OUTPUT}
|
||||
echo "commit-sha=${last_commit_sha}" | tee -a ${GITHUB_OUTPUT}
|
||||
|
||||
- uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
@@ -1305,8 +1286,8 @@ jobs:
|
||||
env:
|
||||
BUCKET: neon-github-public-dev
|
||||
AWS_REGION: eu-central-1
|
||||
COMMIT_SHA: ${{ github.sha }}
|
||||
RUN_ID: ${{ needs.meta.outputs.release-pr-run-id }}
|
||||
COMMIT_SHA: ${{ steps.fetch-last-release-pr-info.outputs.commit-sha }}
|
||||
RUN_ID: ${{ steps.fetch-last-release-pr-info.outputs.run-id }}
|
||||
run: |
|
||||
old_prefix="artifacts/${COMMIT_SHA}/${RUN_ID}"
|
||||
new_prefix="artifacts/latest"
|
||||
@@ -1395,5 +1376,5 @@ jobs:
|
||||
|| needs.files-changed.result == 'skipped'
|
||||
|| (needs.push-compute-image-dev.result == 'skipped' && contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind))
|
||||
|| (needs.push-neon-image-dev.result == 'skipped' && contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind))
|
||||
|| (needs.test-images.result == 'skipped' && contains(fromJSON('["push-main", "pr", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind))
|
||||
|| needs.test-images.result == 'skipped'
|
||||
|| (needs.trigger-custom-extensions-build-and-wait.result == 'skipped' && contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind))
|
||||
|
||||
7
.github/workflows/cargo-deny.yml
vendored
7
.github/workflows/cargo-deny.yml
vendored
@@ -7,7 +7,7 @@ on:
|
||||
required: false
|
||||
type: string
|
||||
schedule:
|
||||
- cron: '0 10 * * *'
|
||||
- cron: '0 0 * * *'
|
||||
|
||||
jobs:
|
||||
cargo-deny:
|
||||
@@ -50,9 +50,8 @@ jobs:
|
||||
method: chat.postMessage
|
||||
token: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
payload: |
|
||||
channel: ${{ vars.SLACK_ON_CALL_DEVPROD_STREAM }}
|
||||
channel: ${{ vars.SLACK_CICD_CHANNEL_ID }}
|
||||
text: |
|
||||
Periodic cargo-deny on ${{ matrix.ref }}: ${{ job.status }}
|
||||
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
|
||||
Fixing the problem should be fairly straight forward from the logs. If not, <#${{ vars.SLACK_RUST_CHANNEL_ID }}> is there to help.
|
||||
Pinging <!subteam^S0838JPSH32|@oncall-devprod>.
|
||||
Pinging @oncall-devprod.
|
||||
|
||||
36
.github/workflows/fast-forward.yml
vendored
36
.github/workflows/fast-forward.yml
vendored
@@ -1,36 +0,0 @@
|
||||
name: Fast forward merge
|
||||
on:
|
||||
pull_request:
|
||||
types: [labeled]
|
||||
branches:
|
||||
- release
|
||||
- release-proxy
|
||||
- release-compute
|
||||
|
||||
jobs:
|
||||
fast-forward:
|
||||
if: ${{ github.event.label.name == 'fast-forward' }}
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
- name: Remove fast-forward label to PR
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
run: |
|
||||
gh pr edit ${{ github.event.pull_request.number }} --repo "${GITHUB_REPOSITORY}" --remove-label "fast-forward"
|
||||
|
||||
- name: Fast forwarding
|
||||
uses: sequoia-pgp/fast-forward@ea7628bedcb0b0b96e94383ada458d812fca4979
|
||||
# See https://docs.github.com/en/graphql/reference/enums#mergestatestatus
|
||||
if: ${{ github.event.pull_request.mergeable_state == 'clean' }}
|
||||
with:
|
||||
merge: true
|
||||
comment: on-error
|
||||
github_token: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
|
||||
- name: Comment if mergeable_state is not clean
|
||||
if: ${{ github.event.pull_request.mergeable_state != 'clean' }}
|
||||
run: |
|
||||
gh pr comment ${{ github.event.pull_request.number }} \
|
||||
--repo "${GITHUB_REPOSITORY}" \
|
||||
--body "Not trying to forward pull-request, because \`mergeable_state\` is \`${{ github.event.pull_request.mergeable_state }}\`, not \`clean\`."
|
||||
89
.github/workflows/large_oltp_benchmark.yml
vendored
89
.github/workflows/large_oltp_benchmark.yml
vendored
@@ -2,8 +2,8 @@ name: large oltp benchmark
|
||||
|
||||
on:
|
||||
# uncomment to run on push for debugging your PR
|
||||
#push:
|
||||
# branches: [ bodobolero/synthetic_oltp_workload ]
|
||||
push:
|
||||
branches: [ bodobolero/synthetic_oltp_workload ]
|
||||
|
||||
schedule:
|
||||
# * is a special character in YAML so you have to quote this string
|
||||
@@ -12,7 +12,7 @@ on:
|
||||
# │ │ ┌───────────── day of the month (1 - 31)
|
||||
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
|
||||
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
|
||||
- cron: '0 15 * * 0,2,4' # run on Sunday, Tuesday, Thursday at 3 PM UTC
|
||||
- cron: '0 15 * * *' # run once a day, timezone is utc, avoid conflict with other benchmarks
|
||||
workflow_dispatch: # adds ability to run this manually
|
||||
|
||||
defaults:
|
||||
@@ -22,7 +22,7 @@ defaults:
|
||||
concurrency:
|
||||
# Allow only one workflow globally because we need dedicated resources which only exist once
|
||||
group: large-oltp-bench-workflow
|
||||
cancel-in-progress: false
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
oltp:
|
||||
@@ -31,9 +31,9 @@ jobs:
|
||||
matrix:
|
||||
include:
|
||||
- target: new_branch
|
||||
custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
|
||||
custom_scripts: insert_webhooks.sql@2 select_any_webhook_with_skew.sql@4 select_recent_webhook.sql@4
|
||||
- target: reuse_branch
|
||||
custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
|
||||
custom_scripts: insert_webhooks.sql@2 select_any_webhook_with_skew.sql@4 select_recent_webhook.sql@4
|
||||
max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results
|
||||
permissions:
|
||||
contents: write
|
||||
@@ -46,6 +46,7 @@ jobs:
|
||||
PG_VERSION: 16 # pre-determined by pre-determined project
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
SAVE_PERF_REPORT: ${{ github.ref_name == 'main' }}
|
||||
PLATFORM: ${{ matrix.target }}
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
@@ -56,10 +57,8 @@ jobs:
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
options: --init
|
||||
|
||||
# Increase timeout to 2 days, default timeout is 6h - database maintenance can take a long time
|
||||
# (normally 1h pgbench, 3h vacuum analyze 3.5h re-index) x 2 = 15h, leave some buffer for regressions
|
||||
# in one run vacuum didn't finish within 12 hours
|
||||
timeout-minutes: 2880
|
||||
# Increase timeout to 8h, default timeout is 6h
|
||||
timeout-minutes: 480
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -90,45 +89,29 @@ jobs:
|
||||
- name: Set up Connection String
|
||||
id: set-up-connstr
|
||||
run: |
|
||||
case "${{ matrix.target }}" in
|
||||
new_branch)
|
||||
CONNSTR=${{ steps.create-neon-branch-oltp-target.outputs.dsn }}
|
||||
;;
|
||||
reuse_branch)
|
||||
CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }}
|
||||
;;
|
||||
*)
|
||||
echo >&2 "Unknown target=${{ matrix.target }}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
case "${{ matrix.target }}" in
|
||||
new_branch)
|
||||
CONNSTR=${{ steps.create-neon-branch-oltp-target.outputs.dsn }}
|
||||
;;
|
||||
reuse_branch)
|
||||
CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }}
|
||||
;;
|
||||
*)
|
||||
echo >&2 "Unknown target=${{ matrix.target }}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
CONNSTR_WITHOUT_POOLER="${CONNSTR//-pooler/}"
|
||||
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
||||
|
||||
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
||||
echo "connstr_without_pooler=${CONNSTR_WITHOUT_POOLER}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Delete rows from prior runs in reuse branch
|
||||
if: ${{ matrix.target == 'reuse_branch' }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr_without_pooler }}
|
||||
PG_CONFIG: /tmp/neon/pg_install/v16/bin/pg_config
|
||||
PSQL: /tmp/neon/pg_install/v16/bin/psql
|
||||
PG_16_LIB_PATH: /tmp/neon/pg_install/v16/lib
|
||||
run: |
|
||||
echo "$(date '+%Y-%m-%d %H:%M:%S') - Deleting rows in table webhook.incoming_webhooks from prior runs"
|
||||
export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
|
||||
${PSQL} "${BENCHMARK_CONNSTR}" -c "SET statement_timeout = 0; DELETE FROM webhook.incoming_webhooks WHERE created_at > '2025-02-27 23:59:59+00';"
|
||||
echo "$(date '+%Y-%m-%d %H:%M:%S') - Finished deleting rows in table webhook.incoming_webhooks from prior runs"
|
||||
|
||||
- name: Benchmark pgbench with custom-scripts
|
||||
- name: Benchmark pgbench with custom-scripts
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
test_selection: performance
|
||||
run_in_parallel: false
|
||||
save_perf_report: true
|
||||
extra_params: -m remote_cluster --timeout 7200 -k test_perf_oltp_large_tenant_pgbench
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_perf_oltp_large_tenant
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
@@ -136,21 +119,6 @@ jobs:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
|
||||
- name: Benchmark database maintenance
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
test_selection: performance
|
||||
run_in_parallel: false
|
||||
save_perf_report: true
|
||||
extra_params: -m remote_cluster --timeout 172800 -k test_perf_oltp_large_tenant_maintenance
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr_without_pooler }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
|
||||
- name: Delete Neon Branch for large tenant
|
||||
if: ${{ always() && matrix.target == 'new_branch' }}
|
||||
uses: ./.github/actions/neon-branch-delete
|
||||
@@ -159,13 +127,6 @@ jobs:
|
||||
branch_id: ${{ steps.create-neon-branch-oltp-target.outputs.branch_id }}
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
|
||||
- name: Configure AWS credentials # again because prior steps could have exceeded 5 hours
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours
|
||||
|
||||
- name: Create Allure report
|
||||
id: create-allure-report
|
||||
if: ${{ !cancelled() }}
|
||||
|
||||
24
.github/workflows/lint-release-pr.yml
vendored
24
.github/workflows/lint-release-pr.yml
vendored
@@ -1,24 +0,0 @@
|
||||
name: Lint Release PR
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- release
|
||||
- release-proxy
|
||||
- release-compute
|
||||
|
||||
jobs:
|
||||
lint-release-pr:
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout PR branch
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # Fetch full history for git operations
|
||||
ref: ${{ github.event.pull_request.head.ref }}
|
||||
|
||||
- name: Run lint script
|
||||
env:
|
||||
RELEASE_BRANCH: ${{ github.base_ref }}
|
||||
run: |
|
||||
./.github/scripts/lint-release-pr.sh
|
||||
12
.github/workflows/periodic_pagebench.yml
vendored
12
.github/workflows/periodic_pagebench.yml
vendored
@@ -3,12 +3,12 @@ name: Periodic pagebench performance test on dedicated EC2 machine in eu-central
|
||||
on:
|
||||
schedule:
|
||||
# * is a special character in YAML so you have to quote this string
|
||||
# ┌───────────── minute (0 - 59)
|
||||
# │ ┌───────────── hour (0 - 23)
|
||||
# │ │ ┌───────────── day of the month (1 - 31)
|
||||
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
|
||||
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
|
||||
- cron: '0 */3 * * *' # Runs every 3 hours
|
||||
# ┌───────────── minute (0 - 59)
|
||||
# │ ┌───────────── hour (0 - 23)
|
||||
# │ │ ┌───────────── day of the month (1 - 31)
|
||||
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
|
||||
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
|
||||
- cron: '0 18 * * *' # Runs at 6 PM UTC every day
|
||||
workflow_dispatch: # Allows manual triggering of the workflow
|
||||
inputs:
|
||||
commit_hash:
|
||||
|
||||
51
.github/workflows/pre-merge-checks.yml
vendored
51
.github/workflows/pre-merge-checks.yml
vendored
@@ -8,6 +8,8 @@ on:
|
||||
- .github/workflows/build-build-tools-image.yml
|
||||
- .github/workflows/pre-merge-checks.yml
|
||||
merge_group:
|
||||
branches:
|
||||
- main
|
||||
|
||||
defaults:
|
||||
run:
|
||||
@@ -17,17 +19,15 @@ defaults:
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
meta:
|
||||
get-changed-files:
|
||||
runs-on: ubuntu-22.04
|
||||
outputs:
|
||||
python-changed: ${{ steps.python-src.outputs.any_changed }}
|
||||
rust-changed: ${{ steps.rust-src.outputs.any_changed }}
|
||||
branch: ${{ steps.group-metadata.outputs.branch }}
|
||||
pr-number: ${{ steps.group-metadata.outputs.pr-number }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: step-security/changed-files@3dbe17c78367e7d60f00d78ae6781a35be47b4a1 # v45.0.1
|
||||
- uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf # v45.0.4
|
||||
id: python-src
|
||||
with:
|
||||
files: |
|
||||
@@ -38,7 +38,7 @@ jobs:
|
||||
poetry.lock
|
||||
pyproject.toml
|
||||
|
||||
- uses: step-security/changed-files@3dbe17c78367e7d60f00d78ae6781a35be47b4a1 # v45.0.1
|
||||
- uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf # v45.0.4
|
||||
id: rust-src
|
||||
with:
|
||||
files: |
|
||||
@@ -58,20 +58,12 @@ jobs:
|
||||
echo "${PYTHON_CHANGED_FILES}"
|
||||
echo "${RUST_CHANGED_FILES}"
|
||||
|
||||
- name: Merge group metadata
|
||||
if: ${{ github.event_name == 'merge_group' }}
|
||||
id: group-metadata
|
||||
env:
|
||||
MERGE_QUEUE_REF: ${{ github.event.merge_group.head_ref }}
|
||||
run: |
|
||||
echo $MERGE_QUEUE_REF | jq -Rr 'capture("refs/heads/gh-readonly-queue/(?<branch>.*)/pr-(?<pr_number>[0-9]+)-[0-9a-f]{40}") | ["branch=" + .branch, "pr-number=" + .pr_number] | .[]' | tee -a "${GITHUB_OUTPUT}"
|
||||
|
||||
build-build-tools-image:
|
||||
if: |
|
||||
false
|
||||
|| needs.meta.outputs.python-changed == 'true'
|
||||
|| needs.meta.outputs.rust-changed == 'true'
|
||||
needs: [ meta ]
|
||||
|| needs.get-changed-files.outputs.python-changed == 'true'
|
||||
|| needs.get-changed-files.outputs.rust-changed == 'true'
|
||||
needs: [ get-changed-files ]
|
||||
uses: ./.github/workflows/build-build-tools-image.yml
|
||||
with:
|
||||
# Build only one combination to save time
|
||||
@@ -80,8 +72,8 @@ jobs:
|
||||
secrets: inherit
|
||||
|
||||
check-codestyle-python:
|
||||
if: needs.meta.outputs.python-changed == 'true'
|
||||
needs: [ meta, build-build-tools-image ]
|
||||
if: needs.get-changed-files.outputs.python-changed == 'true'
|
||||
needs: [ get-changed-files, build-build-tools-image ]
|
||||
uses: ./.github/workflows/_check-codestyle-python.yml
|
||||
with:
|
||||
# `-bookworm-x64` suffix should match the combination in `build-build-tools-image`
|
||||
@@ -89,8 +81,8 @@ jobs:
|
||||
secrets: inherit
|
||||
|
||||
check-codestyle-rust:
|
||||
if: needs.meta.outputs.rust-changed == 'true'
|
||||
needs: [ meta, build-build-tools-image ]
|
||||
if: needs.get-changed-files.outputs.rust-changed == 'true'
|
||||
needs: [ get-changed-files, build-build-tools-image ]
|
||||
uses: ./.github/workflows/_check-codestyle-rust.yml
|
||||
with:
|
||||
# `-bookworm-x64` suffix should match the combination in `build-build-tools-image`
|
||||
@@ -109,7 +101,7 @@ jobs:
|
||||
statuses: write # for `github.repos.createCommitStatus(...)`
|
||||
contents: write
|
||||
needs:
|
||||
- meta
|
||||
- get-changed-files
|
||||
- check-codestyle-python
|
||||
- check-codestyle-rust
|
||||
runs-on: ubuntu-22.04
|
||||
@@ -137,20 +129,7 @@ jobs:
|
||||
run: exit 1
|
||||
if: |
|
||||
false
|
||||
|| (github.event_name == 'merge_group' && needs.meta.outputs.branch != 'main')
|
||||
|| (needs.check-codestyle-python.result == 'skipped' && needs.meta.outputs.python-changed == 'true')
|
||||
|| (needs.check-codestyle-rust.result == 'skipped' && needs.meta.outputs.rust-changed == 'true')
|
||||
|| (needs.check-codestyle-python.result == 'skipped' && needs.get-changed-files.outputs.python-changed == 'true')
|
||||
|| (needs.check-codestyle-rust.result == 'skipped' && needs.get-changed-files.outputs.rust-changed == 'true')
|
||||
|| contains(needs.*.result, 'failure')
|
||||
|| contains(needs.*.result, 'cancelled')
|
||||
|
||||
- name: Add fast-forward label to PR to trigger fast-forward merge
|
||||
if: >-
|
||||
${{
|
||||
always()
|
||||
&& github.event_name == 'merge_group'
|
||||
&& contains(fromJson('["release", "release-proxy", "release-compute"]'), github.base_ref)
|
||||
}}
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
run: >-
|
||||
gh pr edit ${{ needs.meta.outputs.pr-number }} --repo "${GITHUB_REPOSITORY}" --add-label "fast-forward"
|
||||
|
||||
6
.github/workflows/release.yml
vendored
6
.github/workflows/release.yml
vendored
@@ -38,7 +38,7 @@ jobs:
|
||||
uses: ./.github/workflows/_create-release-pr.yml
|
||||
with:
|
||||
component-name: 'Storage'
|
||||
source-branch: ${{ github.ref_name }}
|
||||
release-branch: 'release'
|
||||
secrets:
|
||||
ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
|
||||
@@ -51,7 +51,7 @@ jobs:
|
||||
uses: ./.github/workflows/_create-release-pr.yml
|
||||
with:
|
||||
component-name: 'Proxy'
|
||||
source-branch: ${{ github.ref_name }}
|
||||
release-branch: 'release-proxy'
|
||||
secrets:
|
||||
ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
|
||||
@@ -64,6 +64,6 @@ jobs:
|
||||
uses: ./.github/workflows/_create-release-pr.yml
|
||||
with:
|
||||
component-name: 'Compute'
|
||||
source-branch: ${{ github.ref_name }}
|
||||
release-branch: 'release-compute'
|
||||
secrets:
|
||||
ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# Autoscaling
|
||||
/libs/vm_monitor/ @neondatabase/autoscaling
|
||||
|
||||
# DevProd & PerfCorr
|
||||
/.github/ @neondatabase/developer-productivity @neondatabase/performance-correctness
|
||||
# DevProd
|
||||
/.github/ @neondatabase/developer-productivity
|
||||
|
||||
# Compute
|
||||
/pgxn/ @neondatabase/compute
|
||||
|
||||
429
Cargo.lock
generated
429
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
13
Cargo.toml
13
Cargo.toml
@@ -106,13 +106,13 @@ hostname = "0.4"
|
||||
http = {version = "1.1.0", features = ["std"]}
|
||||
http-types = { version = "2", default-features = false }
|
||||
http-body-util = "0.1.2"
|
||||
humantime = "2.2"
|
||||
humantime = "2.1"
|
||||
humantime-serde = "1.1.1"
|
||||
hyper0 = { package = "hyper", version = "0.14" }
|
||||
hyper = "1.4"
|
||||
hyper-util = "0.1"
|
||||
tokio-tungstenite = "0.21.0"
|
||||
indexmap = { version = "2", features = ["serde"] }
|
||||
indexmap = "2"
|
||||
indoc = "2"
|
||||
ipnet = "2.10.0"
|
||||
itertools = "0.10"
|
||||
@@ -126,9 +126,7 @@ measured = { version = "0.0.22", features=["lasso"] }
|
||||
measured-process = { version = "0.0.22" }
|
||||
memoffset = "0.9"
|
||||
nix = { version = "0.27", features = ["dir", "fs", "process", "socket", "signal", "poll"] }
|
||||
# Do not update to >= 7.0.0, at least. The update will have a significant impact
|
||||
# on compute startup metrics (start_postgres_ms), >= 25% degradation.
|
||||
notify = "6.0.0"
|
||||
notify = "8.0.0"
|
||||
num_cpus = "1.15"
|
||||
num-traits = "0.2.15"
|
||||
once_cell = "1.13"
|
||||
@@ -141,7 +139,7 @@ parquet = { version = "53", default-features = false, features = ["zstd"] }
|
||||
parquet_derive = "53"
|
||||
pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
|
||||
pin-project-lite = "0.2"
|
||||
pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointer", "prost-codec"] }
|
||||
pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointer", "protobuf", "protobuf-codec"] }
|
||||
procfs = "0.16"
|
||||
prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
|
||||
prost = "0.13"
|
||||
@@ -157,7 +155,6 @@ rpds = "0.13"
|
||||
rustc-hash = "1.1.0"
|
||||
rustls = { version = "0.23.16", default-features = false }
|
||||
rustls-pemfile = "2"
|
||||
rustls-pki-types = "1.11"
|
||||
scopeguard = "1.1"
|
||||
sysinfo = "0.29.2"
|
||||
sd-notify = "0.4.1"
|
||||
@@ -221,7 +218,7 @@ zerocopy = { version = "0.7", features = ["derive"] }
|
||||
json-structural-diff = { version = "0.2.0" }
|
||||
|
||||
## TODO replace this with tracing
|
||||
env_logger = "0.11"
|
||||
env_logger = "0.10"
|
||||
log = "0.4"
|
||||
|
||||
## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
|
||||
|
||||
@@ -1484,7 +1484,7 @@ WORKDIR /ext-src
|
||||
COPY compute/patches/pg_duckdb_v031.patch .
|
||||
COPY compute/patches/duckdb_v120.patch .
|
||||
# pg_duckdb build requires source dir to be a git repo to get submodules
|
||||
# allow neon_superuser to execute some functions that in pg_duckdb are available to superuser only:
|
||||
# allow neon_superuser to execute some functions that in pg_duckdb are available to superuser only:
|
||||
# - extension management function duckdb.install_extension()
|
||||
# - access to duckdb.extensions table and its sequence
|
||||
RUN git clone --depth 1 --branch v0.3.1 https://github.com/duckdb/pg_duckdb.git pg_duckdb-src && \
|
||||
@@ -1499,8 +1499,8 @@ ARG PG_VERSION
|
||||
COPY --from=pg_duckdb-src /ext-src/ /ext-src/
|
||||
WORKDIR /ext-src/pg_duckdb-src
|
||||
RUN make install -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_duckdb.control
|
||||
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_duckdb.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pg_repack"
|
||||
@@ -1735,8 +1735,6 @@ RUN set -e \
|
||||
libevent-dev \
|
||||
libtool \
|
||||
pkg-config \
|
||||
libcurl4-openssl-dev \
|
||||
libssl-dev \
|
||||
&& apt clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Use `dist_man_MANS=` to skip manpage generation (which requires python3/pandoc)
|
||||
@@ -1745,7 +1743,7 @@ RUN set -e \
|
||||
&& git clone --recurse-submodules --depth 1 --branch ${PGBOUNCER_TAG} https://github.com/pgbouncer/pgbouncer.git pgbouncer \
|
||||
&& cd pgbouncer \
|
||||
&& ./autogen.sh \
|
||||
&& ./configure --prefix=/usr/local/pgbouncer \
|
||||
&& ./configure --prefix=/usr/local/pgbouncer --without-openssl \
|
||||
&& make -j $(nproc) dist_man_MANS= \
|
||||
&& make install dist_man_MANS=
|
||||
|
||||
@@ -1760,15 +1758,15 @@ ARG TARGETARCH
|
||||
# test_runner/regress/test_compute_metrics.py
|
||||
# See comment on the top of the file regading `echo`, `-e` and `\n`
|
||||
RUN if [ "$TARGETARCH" = "amd64" ]; then\
|
||||
postgres_exporter_sha256='59aa4a7bb0f7d361f5e05732f5ed8c03cc08f78449cef5856eadec33a627694b';\
|
||||
postgres_exporter_sha256='027e75dda7af621237ff8f5ac66b78a40b0093595f06768612b92b1374bd3105';\
|
||||
pgbouncer_exporter_sha256='c9f7cf8dcff44f0472057e9bf52613d93f3ffbc381ad7547a959daa63c5e84ac';\
|
||||
sql_exporter_sha256='38e439732bbf6e28ca4a94d7bc3686d3fa1abdb0050773d5617a9efdb9e64d08';\
|
||||
else\
|
||||
postgres_exporter_sha256='d1dedea97f56c6d965837bfd1fbb3e35a3b4a4556f8cccee8bd513d8ee086124';\
|
||||
postgres_exporter_sha256='131a376d25778ff9701a4c81f703f179e0b58db5c2c496e66fa43f8179484786';\
|
||||
pgbouncer_exporter_sha256='217c4afd7e6492ae904055bc14fe603552cf9bac458c063407e991d68c519da3';\
|
||||
sql_exporter_sha256='11918b00be6e2c3a67564adfdb2414fdcbb15a5db76ea17d1d1a944237a893c6';\
|
||||
fi\
|
||||
&& curl -sL https://github.com/prometheus-community/postgres_exporter/releases/download/v0.17.1/postgres_exporter-0.17.1.linux-${TARGETARCH}.tar.gz\
|
||||
&& curl -sL https://github.com/prometheus-community/postgres_exporter/releases/download/v0.16.0/postgres_exporter-0.16.0.linux-${TARGETARCH}.tar.gz\
|
||||
| tar xzf - --strip-components=1 -C.\
|
||||
&& curl -sL https://github.com/prometheus-community/pgbouncer_exporter/releases/download/v0.10.2/pgbouncer_exporter-0.10.2.linux-${TARGETARCH}.tar.gz\
|
||||
| tar xzf - --strip-components=1 -C.\
|
||||
@@ -1935,7 +1933,6 @@ RUN apt update && \
|
||||
locales \
|
||||
procps \
|
||||
ca-certificates \
|
||||
rsyslog \
|
||||
$VERSION_INSTALLS && \
|
||||
apt clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
|
||||
localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
|
||||
@@ -1981,13 +1978,6 @@ COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/neo
|
||||
# Make the libraries we built available
|
||||
RUN echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig
|
||||
|
||||
# rsyslog config permissions
|
||||
# directory for rsyslogd pid file
|
||||
RUN mkdir /var/run/rsyslogd && \
|
||||
chown -R postgres:postgres /var/run/rsyslogd && \
|
||||
chown -R postgres:postgres /etc/rsyslog.d/
|
||||
|
||||
|
||||
ENV LANG=en_US.utf8
|
||||
USER postgres
|
||||
ENTRYPOINT ["/usr/local/bin/compute_ctl"]
|
||||
|
||||
@@ -29,7 +29,6 @@
|
||||
import 'sql_exporter/lfc_approximate_working_set_size.libsonnet',
|
||||
import 'sql_exporter/lfc_approximate_working_set_size_windows.libsonnet',
|
||||
import 'sql_exporter/lfc_cache_size_limit.libsonnet',
|
||||
import 'sql_exporter/lfc_chunk_size.libsonnet',
|
||||
import 'sql_exporter/lfc_hits.libsonnet',
|
||||
import 'sql_exporter/lfc_misses.libsonnet',
|
||||
import 'sql_exporter/lfc_used.libsonnet',
|
||||
|
||||
@@ -1,5 +1 @@
|
||||
SELECT sum(pg_database_size(datname)) AS total
|
||||
FROM pg_database
|
||||
-- Ignore invalid databases, as we will likely have problems with
|
||||
-- getting their size from the Pageserver.
|
||||
WHERE datconnlimit != -2;
|
||||
SELECT sum(pg_database_size(datname)) AS total FROM pg_database;
|
||||
|
||||
@@ -1,10 +0,0 @@
|
||||
{
|
||||
metric_name: 'lfc_chunk_size',
|
||||
type: 'gauge',
|
||||
help: 'LFC chunk size, measured in 8KiB pages',
|
||||
key_labels: null,
|
||||
values: [
|
||||
'lfc_chunk_size_pages',
|
||||
],
|
||||
query: importstr 'sql_exporter/lfc_chunk_size.sql',
|
||||
}
|
||||
@@ -1 +0,0 @@
|
||||
SELECT lfc_value AS lfc_chunk_size_pages FROM neon.neon_lfc_stats WHERE lfc_key = 'file_cache_chunk_size_pages';
|
||||
@@ -1,20 +1,10 @@
|
||||
-- We export stats for 10 non-system databases. Without this limit it is too
|
||||
-- easy to abuse the system by creating lots of databases.
|
||||
|
||||
SELECT pg_database_size(datname) AS db_size,
|
||||
deadlocks,
|
||||
tup_inserted AS inserted,
|
||||
tup_updated AS updated,
|
||||
tup_deleted AS deleted,
|
||||
datname
|
||||
SELECT pg_database_size(datname) AS db_size, deadlocks, tup_inserted AS inserted,
|
||||
tup_updated AS updated, tup_deleted AS deleted, datname
|
||||
FROM pg_stat_database
|
||||
WHERE datname IN (
|
||||
SELECT datname FROM pg_database
|
||||
-- Ignore invalid databases, as we will likely have problems with
|
||||
-- getting their size from the Pageserver.
|
||||
WHERE datconnlimit != -2
|
||||
AND datname <> 'postgres'
|
||||
AND NOT datistemplate
|
||||
ORDER BY oid
|
||||
LIMIT 10
|
||||
WHERE datname <> 'postgres' AND NOT datistemplate ORDER BY oid LIMIT 10
|
||||
);
|
||||
|
||||
@@ -39,10 +39,6 @@ commands:
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
shell: '/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499'
|
||||
- name: rsyslogd
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
shell: '/usr/sbin/rsyslogd -n -i /var/run/rsyslogd/rsyslogd.pid -f /etc/compute_rsyslog.conf'
|
||||
shutdownHook: |
|
||||
su -p postgres --session-command '/usr/local/bin/pg_ctl stop -D /var/db/postgres/compute/pgdata -m fast --wait -t 10'
|
||||
files:
|
||||
@@ -58,7 +54,7 @@ files:
|
||||
# regardless of hostname (ALL)
|
||||
#
|
||||
# Also allow it to shut down the VM. The fast_import job does that when it's finished.
|
||||
postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap, /neonvm/bin/set-disk-quota, /neonvm/bin/poweroff, /usr/sbin/rsyslogd
|
||||
postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap, /neonvm/bin/set-disk-quota, /neonvm/bin/poweroff
|
||||
- filename: cgconfig.conf
|
||||
content: |
|
||||
# Configuration for cgroups in VM compute nodes
|
||||
@@ -73,12 +69,6 @@ files:
|
||||
}
|
||||
memory {}
|
||||
}
|
||||
# Create dummy rsyslog config, because it refuses to start without at least one action configured.
|
||||
# compute_ctl will rewrite this file with the actual configuration, if needed.
|
||||
- filename: compute_rsyslog.conf
|
||||
content: |
|
||||
*.* /dev/null
|
||||
$IncludeConfig /etc/rsyslog.d/*.conf
|
||||
build: |
|
||||
# Build cgroup-tools
|
||||
#
|
||||
@@ -142,12 +132,6 @@ merge: |
|
||||
RUN set -e \
|
||||
&& chmod 0644 /etc/cgconfig.conf
|
||||
|
||||
|
||||
COPY compute_rsyslog.conf /etc/compute_rsyslog.conf
|
||||
RUN chmod 0666 /etc/compute_rsyslog.conf
|
||||
RUN mkdir /var/log/rsyslog && chown -R postgres /var/log/rsyslog
|
||||
|
||||
|
||||
COPY --from=libcgroup-builder /libcgroup-install/bin/* /usr/bin/
|
||||
COPY --from=libcgroup-builder /libcgroup-install/lib/* /usr/lib/
|
||||
COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/
|
||||
|
||||
@@ -39,10 +39,6 @@ commands:
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
shell: '/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499'
|
||||
- name: rsyslogd
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
shell: '/usr/sbin/rsyslogd -n -i /var/run/rsyslogd/rsyslogd.pid -f /etc/compute_rsyslog.conf'
|
||||
shutdownHook: |
|
||||
su -p postgres --session-command '/usr/local/bin/pg_ctl stop -D /var/db/postgres/compute/pgdata -m fast --wait -t 10'
|
||||
files:
|
||||
@@ -58,7 +54,7 @@ files:
|
||||
# regardless of hostname (ALL)
|
||||
#
|
||||
# Also allow it to shut down the VM. The fast_import job does that when it's finished.
|
||||
postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap, /neonvm/bin/set-disk-quota, /neonvm/bin/poweroff, /usr/sbin/rsyslogd
|
||||
postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap, /neonvm/bin/set-disk-quota, /neonvm/bin/poweroff
|
||||
- filename: cgconfig.conf
|
||||
content: |
|
||||
# Configuration for cgroups in VM compute nodes
|
||||
@@ -73,12 +69,6 @@ files:
|
||||
}
|
||||
memory {}
|
||||
}
|
||||
# Create dummy rsyslog config, because it refuses to start without at least one action configured.
|
||||
# compute_ctl will rewrite this file with the actual configuration, if needed.
|
||||
- filename: compute_rsyslog.conf
|
||||
content: |
|
||||
*.* /dev/null
|
||||
$IncludeConfig /etc/rsyslog.d/*.conf
|
||||
build: |
|
||||
# Build cgroup-tools
|
||||
#
|
||||
@@ -138,11 +128,6 @@ merge: |
|
||||
RUN set -e \
|
||||
&& chmod 0644 /etc/cgconfig.conf
|
||||
|
||||
COPY compute_rsyslog.conf /etc/compute_rsyslog.conf
|
||||
RUN chmod 0666 /etc/compute_rsyslog.conf
|
||||
RUN mkdir /var/log/rsyslog && chown -R postgres /var/log/rsyslog
|
||||
|
||||
|
||||
COPY --from=libcgroup-builder /libcgroup-install/bin/* /usr/bin/
|
||||
COPY --from=libcgroup-builder /libcgroup-install/lib/* /usr/lib/
|
||||
COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/
|
||||
|
||||
@@ -26,7 +26,6 @@ fail.workspace = true
|
||||
flate2.workspace = true
|
||||
futures.workspace = true
|
||||
http.workspace = true
|
||||
indexmap.workspace = true
|
||||
jsonwebtoken.workspace = true
|
||||
metrics.workspace = true
|
||||
nix.workspace = true
|
||||
@@ -35,19 +34,16 @@ num_cpus.workspace = true
|
||||
once_cell.workspace = true
|
||||
opentelemetry.workspace = true
|
||||
opentelemetry_sdk.workspace = true
|
||||
p256 = { version = "0.13", features = ["pem"] }
|
||||
postgres.workspace = true
|
||||
regex.workspace = true
|
||||
reqwest = { workspace = true, features = ["json"] }
|
||||
ring = "0.17"
|
||||
serde.workspace = true
|
||||
serde_with.workspace = true
|
||||
serde_json.workspace = true
|
||||
signal-hook.workspace = true
|
||||
spki = { version = "0.7.3", features = ["std"] }
|
||||
tar.workspace = true
|
||||
tower.workspace = true
|
||||
tower-http.workspace = true
|
||||
reqwest = { workspace = true, features = ["json"] }
|
||||
tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
|
||||
tokio-postgres.workspace = true
|
||||
tokio-util.workspace = true
|
||||
@@ -61,7 +57,6 @@ thiserror.workspace = true
|
||||
url.workspace = true
|
||||
uuid.workspace = true
|
||||
walkdir.workspace = true
|
||||
x509-cert = { version = "0.2.5" }
|
||||
|
||||
postgres_initdb.workspace = true
|
||||
compute_api.workspace = true
|
||||
|
||||
@@ -12,9 +12,7 @@ use anyhow::{Context, Result};
|
||||
use chrono::{DateTime, Utc};
|
||||
use compute_api::privilege::Privilege;
|
||||
use compute_api::responses::{ComputeCtlConfig, ComputeMetrics, ComputeStatus};
|
||||
use compute_api::spec::{
|
||||
ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, PgIdent,
|
||||
};
|
||||
use compute_api::spec::{ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, PgIdent};
|
||||
use futures::StreamExt;
|
||||
use futures::future::join_all;
|
||||
use futures::stream::FuturesUnordered;
|
||||
@@ -37,11 +35,9 @@ use crate::logger::startup_context_from_env;
|
||||
use crate::lsn_lease::launch_lsn_lease_bg_task_for_static;
|
||||
use crate::monitor::launch_monitor;
|
||||
use crate::pg_helpers::*;
|
||||
use crate::rsyslog::{configure_audit_rsyslog, launch_pgaudit_gc};
|
||||
use crate::spec::*;
|
||||
use crate::swap::resize_swap;
|
||||
use crate::sync_sk::{check_if_synced, ping_safekeeper};
|
||||
use crate::tls::watch_cert_for_changes;
|
||||
use crate::{config, extension_server, local_proxy};
|
||||
|
||||
pub static SYNC_SAFEKEEPERS_PID: AtomicU32 = AtomicU32::new(0);
|
||||
@@ -113,7 +109,6 @@ pub struct ComputeNode {
|
||||
|
||||
// key: ext_archive_name, value: started download time, download_completed?
|
||||
pub ext_download_progress: RwLock<HashMap<String, (DateTime<Utc>, bool)>>,
|
||||
pub compute_ctl_config: ComputeCtlConfig,
|
||||
}
|
||||
|
||||
// store some metrics about download size that might impact startup time
|
||||
@@ -137,6 +132,8 @@ pub struct ComputeState {
|
||||
/// passed by the control plane with a /configure HTTP request.
|
||||
pub pspec: Option<ParsedSpec>,
|
||||
|
||||
pub compute_ctl_config: ComputeCtlConfig,
|
||||
|
||||
/// If the spec is passed by a /configure request, 'startup_span' is the
|
||||
/// /configure request's tracing span. The main thread enters it when it
|
||||
/// processes the compute startup, so that the compute startup is considered
|
||||
@@ -160,6 +157,7 @@ impl ComputeState {
|
||||
last_active: None,
|
||||
error: None,
|
||||
pspec: None,
|
||||
compute_ctl_config: ComputeCtlConfig::default(),
|
||||
startup_span: None,
|
||||
metrics: ComputeMetrics::default(),
|
||||
}
|
||||
@@ -296,6 +294,79 @@ struct StartVmMonitorResult {
|
||||
vm_monitor: Option<tokio::task::JoinHandle<Result<()>>>,
|
||||
}
|
||||
|
||||
pub(crate) fn construct_superuser_query(spec: &ComputeSpec) -> String {
|
||||
let roles = spec
|
||||
.cluster
|
||||
.roles
|
||||
.iter()
|
||||
.map(|r| escape_literal(&r.name))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let dbs = spec
|
||||
.cluster
|
||||
.databases
|
||||
.iter()
|
||||
.map(|db| escape_literal(&db.name))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let roles_decl = if roles.is_empty() {
|
||||
String::from("roles text[] := NULL;")
|
||||
} else {
|
||||
format!(
|
||||
r#"
|
||||
roles text[] := ARRAY(SELECT rolname
|
||||
FROM pg_catalog.pg_roles
|
||||
WHERE rolname IN ({}));"#,
|
||||
roles.join(", ")
|
||||
)
|
||||
};
|
||||
|
||||
let database_decl = if dbs.is_empty() {
|
||||
String::from("dbs text[] := NULL;")
|
||||
} else {
|
||||
format!(
|
||||
r#"
|
||||
dbs text[] := ARRAY(SELECT datname
|
||||
FROM pg_catalog.pg_database
|
||||
WHERE datname IN ({}));"#,
|
||||
dbs.join(", ")
|
||||
)
|
||||
};
|
||||
|
||||
// ALL PRIVILEGES grants CREATE, CONNECT, and TEMPORARY on all databases
|
||||
// (see https://www.postgresql.org/docs/current/ddl-priv.html)
|
||||
let query = format!(
|
||||
r#"
|
||||
DO $$
|
||||
DECLARE
|
||||
r text;
|
||||
{}
|
||||
{}
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT FROM pg_catalog.pg_roles WHERE rolname = 'neon_superuser')
|
||||
THEN
|
||||
CREATE ROLE neon_superuser CREATEDB CREATEROLE NOLOGIN REPLICATION BYPASSRLS IN ROLE pg_read_all_data, pg_write_all_data;
|
||||
IF array_length(roles, 1) IS NOT NULL THEN
|
||||
EXECUTE format('GRANT neon_superuser TO %s',
|
||||
array_to_string(ARRAY(SELECT quote_ident(x) FROM unnest(roles) as x), ', '));
|
||||
FOREACH r IN ARRAY roles LOOP
|
||||
EXECUTE format('ALTER ROLE %s CREATEROLE CREATEDB', quote_ident(r));
|
||||
END LOOP;
|
||||
END IF;
|
||||
IF array_length(dbs, 1) IS NOT NULL THEN
|
||||
EXECUTE format('GRANT ALL PRIVILEGES ON DATABASE %s TO neon_superuser',
|
||||
array_to_string(ARRAY(SELECT quote_ident(x) FROM unnest(dbs) as x), ', '));
|
||||
END IF;
|
||||
END IF;
|
||||
END
|
||||
$$;"#,
|
||||
roles_decl, database_decl,
|
||||
);
|
||||
|
||||
query
|
||||
}
|
||||
|
||||
impl ComputeNode {
|
||||
pub fn new(
|
||||
params: ComputeNodeParams,
|
||||
@@ -313,6 +384,7 @@ impl ComputeNode {
|
||||
let pspec = ParsedSpec::try_from(cli_spec).map_err(|msg| anyhow::anyhow!(msg))?;
|
||||
new_state.pspec = Some(pspec);
|
||||
}
|
||||
new_state.compute_ctl_config = compute_ctl_config;
|
||||
|
||||
Ok(ComputeNode {
|
||||
params,
|
||||
@@ -321,7 +393,6 @@ impl ComputeNode {
|
||||
state: Mutex::new(new_state),
|
||||
state_changed: Condvar::new(),
|
||||
ext_download_progress: RwLock::new(HashMap::new()),
|
||||
compute_ctl_config,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -344,7 +415,7 @@ impl ComputeNode {
|
||||
// requests while configuration is still in progress.
|
||||
crate::http::server::Server::External {
|
||||
port: this.params.external_http_port,
|
||||
config: this.compute_ctl_config.clone(),
|
||||
jwks: this.state.lock().unwrap().compute_ctl_config.jwks.clone(),
|
||||
compute_id: this.params.compute_id.clone(),
|
||||
}
|
||||
.launch(&this);
|
||||
@@ -523,16 +594,6 @@ impl ComputeNode {
|
||||
// Collect all the tasks that must finish here
|
||||
let mut pre_tasks = tokio::task::JoinSet::new();
|
||||
|
||||
// Make sure TLS certificates are properly loaded and in the right place.
|
||||
if self.compute_ctl_config.tls.is_some() {
|
||||
let this = self.clone();
|
||||
pre_tasks.spawn(async move {
|
||||
this.watch_cert_for_changes().await;
|
||||
|
||||
Ok::<(), anyhow::Error>(())
|
||||
});
|
||||
}
|
||||
|
||||
// If there are any remote extensions in shared_preload_libraries, start downloading them
|
||||
if pspec.spec.remote_extensions.is_some() {
|
||||
let (this, spec) = (self.clone(), pspec.spec.clone());
|
||||
@@ -588,13 +649,11 @@ impl ComputeNode {
|
||||
if let Some(pgbouncer_settings) = &pspec.spec.pgbouncer_settings {
|
||||
info!("tuning pgbouncer");
|
||||
|
||||
let pgbouncer_settings = pgbouncer_settings.clone();
|
||||
let tls_config = self.compute_ctl_config.tls.clone();
|
||||
|
||||
// Spawn a background task to do the tuning,
|
||||
// so that we don't block the main thread that starts Postgres.
|
||||
let pgbouncer_settings = pgbouncer_settings.clone();
|
||||
let _handle = tokio::spawn(async move {
|
||||
let res = tune_pgbouncer(pgbouncer_settings, tls_config).await;
|
||||
let res = tune_pgbouncer(pgbouncer_settings).await;
|
||||
if let Err(err) = res {
|
||||
error!("error while tuning pgbouncer: {err:?}");
|
||||
// Continue with the startup anyway
|
||||
@@ -617,21 +676,6 @@ impl ComputeNode {
|
||||
});
|
||||
}
|
||||
|
||||
// Configure and start rsyslog if necessary
|
||||
if let ComputeAudit::Hipaa = pspec.spec.audit_log_level {
|
||||
let remote_endpoint = std::env::var("AUDIT_LOGGING_ENDPOINT").unwrap_or("".to_string());
|
||||
if remote_endpoint.is_empty() {
|
||||
anyhow::bail!("AUDIT_LOGGING_ENDPOINT is empty");
|
||||
}
|
||||
|
||||
let log_directory_path = Path::new(&self.params.pgdata).join("log");
|
||||
let log_directory_path = log_directory_path.to_string_lossy().to_string();
|
||||
configure_audit_rsyslog(log_directory_path.clone(), "hipaa", &remote_endpoint)?;
|
||||
|
||||
// Launch a background task to clean up the audit logs
|
||||
launch_pgaudit_gc(log_directory_path);
|
||||
}
|
||||
|
||||
// Launch remaining service threads
|
||||
let _monitor_handle = launch_monitor(self);
|
||||
let _configurator_handle = launch_configurator(self);
|
||||
@@ -654,9 +698,9 @@ impl ComputeNode {
|
||||
if pspec.spec.mode == ComputeMode::Primary {
|
||||
self.configure_as_primary(&compute_state)?;
|
||||
|
||||
let conf = self.get_tokio_conn_conf(None);
|
||||
tokio::task::spawn(async {
|
||||
let res = get_installed_extensions(conf).await;
|
||||
let conf = self.get_conn_conf(None);
|
||||
tokio::task::spawn_blocking(|| {
|
||||
let res = get_installed_extensions(conf);
|
||||
match res {
|
||||
Ok(extensions) => {
|
||||
info!(
|
||||
@@ -1114,10 +1158,9 @@ impl ComputeNode {
|
||||
// Remove/create an empty pgdata directory and put configuration there.
|
||||
self.create_pgdata()?;
|
||||
config::write_postgres_conf(
|
||||
pgdata_path,
|
||||
&pgdata_path.join("postgresql.conf"),
|
||||
&pspec.spec,
|
||||
self.params.internal_http_port,
|
||||
&self.compute_ctl_config.tls,
|
||||
)?;
|
||||
|
||||
// Syncing safekeepers is only safe with primary nodes: if a primary
|
||||
@@ -1499,13 +1542,11 @@ impl ComputeNode {
|
||||
if let Some(ref pgbouncer_settings) = spec.pgbouncer_settings {
|
||||
info!("tuning pgbouncer");
|
||||
|
||||
let pgbouncer_settings = pgbouncer_settings.clone();
|
||||
let tls_config = self.compute_ctl_config.tls.clone();
|
||||
|
||||
// Spawn a background task to do the tuning,
|
||||
// so that we don't block the main thread that starts Postgres.
|
||||
let pgbouncer_settings = pgbouncer_settings.clone();
|
||||
tokio::spawn(async move {
|
||||
let res = tune_pgbouncer(pgbouncer_settings, tls_config).await;
|
||||
let res = tune_pgbouncer(pgbouncer_settings).await;
|
||||
if let Err(err) = res {
|
||||
error!("error while tuning pgbouncer: {err:?}");
|
||||
}
|
||||
@@ -1517,8 +1558,7 @@ impl ComputeNode {
|
||||
|
||||
// Spawn a background task to do the configuration,
|
||||
// so that we don't block the main thread that starts Postgres.
|
||||
let mut local_proxy = local_proxy.clone();
|
||||
local_proxy.tls = self.compute_ctl_config.tls.clone();
|
||||
let local_proxy = local_proxy.clone();
|
||||
tokio::spawn(async move {
|
||||
if let Err(err) = local_proxy::configure(&local_proxy) {
|
||||
error!("error while configuring local_proxy: {err:?}");
|
||||
@@ -1528,12 +1568,8 @@ impl ComputeNode {
|
||||
|
||||
// Write new config
|
||||
let pgdata_path = Path::new(&self.params.pgdata);
|
||||
config::write_postgres_conf(
|
||||
pgdata_path,
|
||||
&spec,
|
||||
self.params.internal_http_port,
|
||||
&self.compute_ctl_config.tls,
|
||||
)?;
|
||||
let postgresql_conf_path = pgdata_path.join("postgresql.conf");
|
||||
config::write_postgres_conf(&postgresql_conf_path, &spec, self.params.internal_http_port)?;
|
||||
|
||||
if !spec.skip_pg_catalog_updates {
|
||||
let max_concurrent_connections = spec.reconfigure_concurrency;
|
||||
@@ -1604,56 +1640,6 @@ impl ComputeNode {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn watch_cert_for_changes(self: Arc<Self>) {
|
||||
// update status on cert renewal
|
||||
if let Some(tls_config) = &self.compute_ctl_config.tls {
|
||||
let tls_config = tls_config.clone();
|
||||
|
||||
// wait until the cert exists.
|
||||
let mut cert_watch = watch_cert_for_changes(tls_config.cert_path.clone()).await;
|
||||
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let handle = tokio::runtime::Handle::current();
|
||||
'cert_update: loop {
|
||||
// let postgres/pgbouncer/local_proxy know the new cert/key exists.
|
||||
// we need to wait until it's configurable first.
|
||||
|
||||
let mut state = self.state.lock().unwrap();
|
||||
'status_update: loop {
|
||||
match state.status {
|
||||
// let's update the state to config pending
|
||||
ComputeStatus::ConfigurationPending | ComputeStatus::Running => {
|
||||
state.set_status(
|
||||
ComputeStatus::ConfigurationPending,
|
||||
&self.state_changed,
|
||||
);
|
||||
break 'status_update;
|
||||
}
|
||||
|
||||
// exit loop
|
||||
ComputeStatus::Failed
|
||||
| ComputeStatus::TerminationPending
|
||||
| ComputeStatus::Terminated => break 'cert_update,
|
||||
|
||||
// wait
|
||||
ComputeStatus::Init
|
||||
| ComputeStatus::Configuration
|
||||
| ComputeStatus::Empty => {
|
||||
state = self.state_changed.wait(state).unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
drop(state);
|
||||
|
||||
// wait for a new certificate update
|
||||
if handle.block_on(cert_watch.changed()).is_err() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// Update the `last_active` in the shared state, but ensure that it's a more recent one.
|
||||
pub fn update_last_active(&self, last_active: Option<DateTime<Utc>>) {
|
||||
let mut state = self.state.lock().unwrap();
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use anyhow::Result;
|
||||
use std::fmt::Write as FmtWrite;
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::io;
|
||||
@@ -6,13 +5,10 @@ use std::io::Write;
|
||||
use std::io::prelude::*;
|
||||
use std::path::Path;
|
||||
|
||||
use compute_api::responses::TlsConfig;
|
||||
use compute_api::spec::{ComputeAudit, ComputeMode, ComputeSpec, GenericOption};
|
||||
use anyhow::Result;
|
||||
use compute_api::spec::{ComputeMode, ComputeSpec, GenericOption};
|
||||
|
||||
use crate::pg_helpers::{
|
||||
GenericOptionExt, GenericOptionsSearch, PgOptionsSerialize, escape_conf_value,
|
||||
};
|
||||
use crate::tls::{self, SERVER_CRT, SERVER_KEY};
|
||||
use crate::pg_helpers::{GenericOptionExt, PgOptionsSerialize, escape_conf_value};
|
||||
|
||||
/// Check that `line` is inside a text file and put it there if it is not.
|
||||
/// Create file if it doesn't exist.
|
||||
@@ -40,12 +36,10 @@ pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
|
||||
|
||||
/// Create or completely rewrite configuration file specified by `path`
|
||||
pub fn write_postgres_conf(
|
||||
pgdata_path: &Path,
|
||||
path: &Path,
|
||||
spec: &ComputeSpec,
|
||||
extension_server_port: u16,
|
||||
tls_config: &Option<TlsConfig>,
|
||||
) -> Result<()> {
|
||||
let path = pgdata_path.join("postgresql.conf");
|
||||
// File::create() destroys the file content if it exists.
|
||||
let mut file = File::create(path)?;
|
||||
|
||||
@@ -90,20 +84,6 @@ pub fn write_postgres_conf(
|
||||
)?;
|
||||
}
|
||||
|
||||
// tls
|
||||
if let Some(tls_config) = tls_config {
|
||||
writeln!(file, "ssl = on")?;
|
||||
|
||||
// postgres requires the keyfile to be in a secure file,
|
||||
// currently too complicated to ensure that at the VM level,
|
||||
// so we just copy them to another file instead. :shrug:
|
||||
tls::update_key_path_blocking(pgdata_path, tls_config);
|
||||
|
||||
// these are the default, but good to be explicit.
|
||||
writeln!(file, "ssl_cert_file = '{}'", SERVER_CRT)?;
|
||||
writeln!(file, "ssl_key_file = '{}'", SERVER_KEY)?;
|
||||
}
|
||||
|
||||
// Locales
|
||||
if cfg!(target_os = "macos") {
|
||||
writeln!(file, "lc_messages='C'")?;
|
||||
@@ -158,55 +138,6 @@ pub fn write_postgres_conf(
|
||||
writeln!(file, "# Managed by compute_ctl: end")?;
|
||||
}
|
||||
|
||||
// If audit logging is enabled, configure pgaudit.
|
||||
//
|
||||
// Note, that this is called after the settings from spec are written.
|
||||
// This way we always override the settings from the spec
|
||||
// and don't allow the user or the control plane admin to change them.
|
||||
if let ComputeAudit::Hipaa = spec.audit_log_level {
|
||||
writeln!(file, "# Managed by compute_ctl audit settings: begin")?;
|
||||
// This log level is very verbose
|
||||
// but this is necessary for HIPAA compliance.
|
||||
// Exclude 'misc' category, because it doesn't contain anythig relevant.
|
||||
writeln!(file, "pgaudit.log='all, -misc'")?;
|
||||
writeln!(file, "pgaudit.log_parameter=on")?;
|
||||
// Disable logging of catalog queries
|
||||
// The catalog doesn't contain sensitive data, so we don't need to audit it.
|
||||
writeln!(file, "pgaudit.log_catalog=off")?;
|
||||
// Set log rotation to 5 minutes
|
||||
// TODO: tune this after performance testing
|
||||
writeln!(file, "pgaudit.log_rotation_age=5")?;
|
||||
|
||||
// Add audit shared_preload_libraries, if they are not present.
|
||||
//
|
||||
// The caller who sets the flag is responsible for ensuring that the necessary
|
||||
// shared_preload_libraries are present in the compute image,
|
||||
// otherwise the compute start will fail.
|
||||
if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") {
|
||||
let mut extra_shared_preload_libraries = String::new();
|
||||
if !libs.contains("pgaudit") {
|
||||
extra_shared_preload_libraries.push_str(",pgaudit");
|
||||
}
|
||||
if !libs.contains("pgauditlogtofile") {
|
||||
extra_shared_preload_libraries.push_str(",pgauditlogtofile");
|
||||
}
|
||||
writeln!(
|
||||
file,
|
||||
"shared_preload_libraries='{}{}'",
|
||||
libs, extra_shared_preload_libraries
|
||||
)?;
|
||||
} else {
|
||||
// Typically, this should be unreacheable,
|
||||
// because we always set at least some shared_preload_libraries in the spec
|
||||
// but let's handle it explicitly anyway.
|
||||
writeln!(
|
||||
file,
|
||||
"shared_preload_libraries='neon,pgaudit,pgauditlogtofile'"
|
||||
)?;
|
||||
}
|
||||
writeln!(file, "# Managed by compute_ctl audit settings: end")?;
|
||||
}
|
||||
|
||||
writeln!(file, "neon.extension_server_port={}", extension_server_port)?;
|
||||
|
||||
if spec.drop_subscriptions_before_start {
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
# Load imfile module to read log files
|
||||
module(load="imfile")
|
||||
|
||||
# Input configuration for log files in the specified directory
|
||||
# Replace {log_directory} with the directory containing the log files
|
||||
input(type="imfile" File="{log_directory}/*.log" Tag="{tag}" Severity="info" Facility="local0")
|
||||
# the directory to store rsyslog state files
|
||||
global(workDirectory="/var/log/rsyslog")
|
||||
|
||||
# Forward logs to remote syslog server
|
||||
*.* @@{remote_endpoint}
|
||||
@@ -202,24 +202,8 @@ pub async fn download_extension(
|
||||
// move contents of the libdir / sharedir in unzipped archive to the correct local paths
|
||||
for paths in [sharedir_paths, libdir_paths] {
|
||||
let (zip_dir, real_dir) = paths;
|
||||
|
||||
let dir = match std::fs::read_dir(&zip_dir) {
|
||||
Ok(dir) => dir,
|
||||
Err(e) => match e.kind() {
|
||||
// In the event of a SQL-only extension, there would be nothing
|
||||
// to move from the lib/ directory, so note that in the log and
|
||||
// move on.
|
||||
std::io::ErrorKind::NotFound => {
|
||||
info!("nothing to move from {}", zip_dir);
|
||||
continue;
|
||||
}
|
||||
_ => return Err(anyhow::anyhow!(e)),
|
||||
},
|
||||
};
|
||||
|
||||
info!("mv {zip_dir:?}/* {real_dir:?}");
|
||||
|
||||
for file in dir {
|
||||
for file in std::fs::read_dir(zip_dir)? {
|
||||
let old_file = file?.path();
|
||||
let new_file =
|
||||
Path::new(&real_dir).join(old_file.file_name().context("error parsing file")?);
|
||||
@@ -269,31 +253,27 @@ pub fn create_control_files(remote_extensions: &RemoteExtSpec, pgbin: &str) {
|
||||
}
|
||||
}
|
||||
|
||||
// Do request to extension storage proxy, e.g.,
|
||||
// Do request to extension storage proxy, i.e.
|
||||
// curl http://pg-ext-s3-gateway/latest/v15/extensions/anon.tar.zst
|
||||
// using HTTP GET and return the response body as bytes.
|
||||
// using HHTP GET
|
||||
// and return the response body as bytes
|
||||
//
|
||||
async fn download_extension_tar(ext_remote_storage: &str, ext_path: &str) -> Result<Bytes> {
|
||||
let uri = format!("{}/{}", ext_remote_storage, ext_path);
|
||||
let filename = Path::new(ext_path)
|
||||
.file_name()
|
||||
.unwrap_or_else(|| std::ffi::OsStr::new("unknown"))
|
||||
.to_str()
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
|
||||
info!("Downloading extension file '{}' from uri {}", filename, uri);
|
||||
info!("Download extension {} from uri {}", ext_path, uri);
|
||||
|
||||
match do_extension_server_request(&uri).await {
|
||||
Ok(resp) => {
|
||||
info!("Successfully downloaded remote extension data {}", ext_path);
|
||||
REMOTE_EXT_REQUESTS_TOTAL
|
||||
.with_label_values(&[&StatusCode::OK.to_string(), &filename])
|
||||
.with_label_values(&[&StatusCode::OK.to_string()])
|
||||
.inc();
|
||||
Ok(resp)
|
||||
}
|
||||
Err((msg, status)) => {
|
||||
REMOTE_EXT_REQUESTS_TOTAL
|
||||
.with_label_values(&[&status, &filename])
|
||||
.with_label_values(&[&status])
|
||||
.inc();
|
||||
bail!(msg);
|
||||
}
|
||||
|
||||
@@ -1,2 +1 @@
|
||||
pub(in crate::http) mod authorize;
|
||||
pub(in crate::http) mod request_id;
|
||||
|
||||
@@ -1,16 +0,0 @@
|
||||
use axum::{extract::Request, middleware::Next, response::Response};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::http::headers::X_REQUEST_ID;
|
||||
|
||||
/// This middleware function allows compute_ctl to generate its own request ID
|
||||
/// if one isn't supplied. The control plane will always send one as a UUID. The
|
||||
/// neon Postgres extension on the other hand does not send one.
|
||||
pub async fn maybe_add_request_id_header(mut request: Request, next: Next) -> Response {
|
||||
let headers = request.headers_mut();
|
||||
if !headers.contains_key(X_REQUEST_ID) {
|
||||
headers.append(X_REQUEST_ID, Uuid::new_v4().to_string().parse().unwrap());
|
||||
}
|
||||
|
||||
next.run(request).await
|
||||
}
|
||||
@@ -5,19 +5,20 @@ use std::time::Duration;
|
||||
|
||||
use anyhow::Result;
|
||||
use axum::Router;
|
||||
use axum::middleware::{self};
|
||||
use axum::response::IntoResponse;
|
||||
use axum::extract::Request;
|
||||
use axum::middleware::{self, Next};
|
||||
use axum::response::{IntoResponse, Response};
|
||||
use axum::routing::{get, post};
|
||||
use compute_api::responses::ComputeCtlConfig;
|
||||
use http::StatusCode;
|
||||
use jsonwebtoken::jwk::JwkSet;
|
||||
use tokio::net::TcpListener;
|
||||
use tower::ServiceBuilder;
|
||||
use tower_http::{
|
||||
auth::AsyncRequireAuthorizationLayer, request_id::PropagateRequestIdLayer, trace::TraceLayer,
|
||||
};
|
||||
use tracing::{Span, error, info};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::middleware::request_id::maybe_add_request_id_header;
|
||||
use super::{
|
||||
headers::X_REQUEST_ID,
|
||||
middleware::authorize::Authorize,
|
||||
@@ -41,7 +42,7 @@ pub enum Server {
|
||||
},
|
||||
External {
|
||||
port: u16,
|
||||
config: ComputeCtlConfig,
|
||||
jwks: JwkSet,
|
||||
compute_id: String,
|
||||
},
|
||||
}
|
||||
@@ -79,7 +80,7 @@ impl From<&Server> for Router<Arc<ComputeNode>> {
|
||||
router
|
||||
}
|
||||
Server::External {
|
||||
config, compute_id, ..
|
||||
jwks, compute_id, ..
|
||||
} => {
|
||||
let unauthenticated_router =
|
||||
Router::<Arc<ComputeNode>>::new().route("/metrics", get(metrics::get_metrics));
|
||||
@@ -95,7 +96,7 @@ impl From<&Server> for Router<Arc<ComputeNode>> {
|
||||
.route("/terminate", post(terminate::terminate))
|
||||
.layer(AsyncRequireAuthorizationLayer::new(Authorize::new(
|
||||
compute_id.clone(),
|
||||
config.jwks.clone(),
|
||||
jwks.clone(),
|
||||
)));
|
||||
|
||||
router
|
||||
@@ -218,3 +219,15 @@ impl Server {
|
||||
tokio::spawn(self.serve(state));
|
||||
}
|
||||
}
|
||||
|
||||
/// This middleware function allows compute_ctl to generate its own request ID
|
||||
/// if one isn't supplied. The control plane will always send one as a UUID. The
|
||||
/// neon Postgres extension on the other hand does not send one.
|
||||
async fn maybe_add_request_id_header(mut request: Request, next: Next) -> Response {
|
||||
let headers = request.headers_mut();
|
||||
if headers.get(X_REQUEST_ID).is_none() {
|
||||
headers.append(X_REQUEST_ID, Uuid::new_v4().to_string().parse().unwrap());
|
||||
}
|
||||
|
||||
next.run(request).await
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::collections::HashMap;
|
||||
|
||||
use anyhow::Result;
|
||||
use compute_api::responses::{InstalledExtension, InstalledExtensions};
|
||||
use tokio_postgres::{Client, Config, NoTls};
|
||||
use postgres::{Client, NoTls};
|
||||
|
||||
use crate::metrics::INSTALLED_EXTENSIONS;
|
||||
|
||||
@@ -10,7 +10,7 @@ use crate::metrics::INSTALLED_EXTENSIONS;
|
||||
/// and to make database listing query here more explicit.
|
||||
///
|
||||
/// Limit the number of databases to 500 to avoid excessive load.
|
||||
async fn list_dbs(client: &mut Client) -> Result<Vec<String>> {
|
||||
fn list_dbs(client: &mut Client) -> Result<Vec<String>> {
|
||||
// `pg_database.datconnlimit = -2` means that the database is in the
|
||||
// invalid state
|
||||
let databases = client
|
||||
@@ -20,8 +20,7 @@ async fn list_dbs(client: &mut Client) -> Result<Vec<String>> {
|
||||
AND datconnlimit <> - 2
|
||||
LIMIT 500",
|
||||
&[],
|
||||
)
|
||||
.await?
|
||||
)?
|
||||
.iter()
|
||||
.map(|row| {
|
||||
let db: String = row.get("datname");
|
||||
@@ -37,36 +36,20 @@ async fn list_dbs(client: &mut Client) -> Result<Vec<String>> {
|
||||
/// Same extension can be installed in multiple databases with different versions,
|
||||
/// so we report a separate metric (number of databases where it is installed)
|
||||
/// for each extension version.
|
||||
pub async fn get_installed_extensions(mut conf: Config) -> Result<InstalledExtensions> {
|
||||
pub fn get_installed_extensions(mut conf: postgres::config::Config) -> Result<InstalledExtensions> {
|
||||
conf.application_name("compute_ctl:get_installed_extensions");
|
||||
let databases: Vec<String> = {
|
||||
let (mut client, connection) = conf.connect(NoTls).await?;
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = connection.await {
|
||||
eprintln!("connection error: {}", e);
|
||||
}
|
||||
});
|
||||
|
||||
list_dbs(&mut client).await?
|
||||
};
|
||||
let mut client = conf.connect(NoTls)?;
|
||||
let databases: Vec<String> = list_dbs(&mut client)?;
|
||||
|
||||
let mut extensions_map: HashMap<(String, String, String), InstalledExtension> = HashMap::new();
|
||||
for db in databases.iter() {
|
||||
conf.dbname(db);
|
||||
|
||||
let (client, connection) = conf.connect(NoTls).await?;
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = connection.await {
|
||||
eprintln!("connection error: {}", e);
|
||||
}
|
||||
});
|
||||
|
||||
let extensions: Vec<(String, String, i32)> = client
|
||||
let mut db_client = conf.connect(NoTls)?;
|
||||
let extensions: Vec<(String, String, i32)> = db_client
|
||||
.query(
|
||||
"SELECT extname, extversion, extowner::integer FROM pg_catalog.pg_extension",
|
||||
&[],
|
||||
)
|
||||
.await?
|
||||
)?
|
||||
.iter()
|
||||
.map(|row| {
|
||||
(
|
||||
|
||||
@@ -21,9 +21,7 @@ mod migration;
|
||||
pub mod monitor;
|
||||
pub mod params;
|
||||
pub mod pg_helpers;
|
||||
pub mod rsyslog;
|
||||
pub mod spec;
|
||||
mod spec_apply;
|
||||
pub mod swap;
|
||||
pub mod sync_sk;
|
||||
pub mod tls;
|
||||
|
||||
@@ -24,8 +24,7 @@ pub async fn init_tracing_and_logging(default_log_level: &str) -> anyhow::Result
|
||||
.with_writer(std::io::stderr);
|
||||
|
||||
// Initialize OpenTelemetry
|
||||
let otlp_layer =
|
||||
tracing_utils::init_tracing("compute_ctl", tracing_utils::ExportConfig::default()).await;
|
||||
let otlp_layer = tracing_utils::init_tracing("compute_ctl").await;
|
||||
|
||||
// Put it all together
|
||||
tracing_subscriber::registry()
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
use metrics::core::{AtomicF64, Collector, GenericGauge};
|
||||
use metrics::core::Collector;
|
||||
use metrics::proto::MetricFamily;
|
||||
use metrics::{
|
||||
IntCounterVec, UIntGaugeVec, register_gauge, register_int_counter_vec, register_uint_gauge_vec,
|
||||
};
|
||||
use metrics::{IntCounterVec, UIntGaugeVec, register_int_counter_vec, register_uint_gauge_vec};
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
pub(crate) static INSTALLED_EXTENSIONS: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
||||
@@ -56,16 +54,9 @@ pub(crate) static REMOTE_EXT_REQUESTS_TOTAL: Lazy<IntCounterVec> = Lazy::new(||
|
||||
register_int_counter_vec!(
|
||||
"compute_ctl_remote_ext_requests_total",
|
||||
"Total number of requests made by compute_ctl to download extensions from S3 proxy by status",
|
||||
&["http_status", "filename"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
// Size of audit log directory in bytes
|
||||
pub(crate) static AUDIT_LOG_DIR_SIZE: Lazy<GenericGauge<AtomicF64>> = Lazy::new(|| {
|
||||
register_gauge!(
|
||||
"compute_audit_log_dir_size",
|
||||
"Size of audit log directory in bytes",
|
||||
// Do not use any labels like extension name yet.
|
||||
// We can add them later if needed.
|
||||
&["http_status"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
@@ -75,6 +66,5 @@ pub fn collect() -> Vec<MetricFamily> {
|
||||
metrics.extend(CPLANE_REQUESTS_TOTAL.collect());
|
||||
metrics.extend(REMOTE_EXT_REQUESTS_TOTAL.collect());
|
||||
metrics.extend(DB_MIGRATION_FAILED.collect());
|
||||
metrics.extend(AUDIT_LOG_DIR_SIZE.collect());
|
||||
metrics
|
||||
}
|
||||
|
||||
@@ -10,10 +10,8 @@ use std::str::FromStr;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use anyhow::{Result, bail};
|
||||
use compute_api::responses::TlsConfig;
|
||||
use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};
|
||||
use futures::StreamExt;
|
||||
use indexmap::IndexMap;
|
||||
use ini::Ini;
|
||||
use notify::{RecursiveMode, Watcher};
|
||||
use postgres::config::Config;
|
||||
@@ -188,40 +186,15 @@ impl DatabaseExt for Database {
|
||||
/// Postgres SQL queries and DATABASE_URL.
|
||||
pub trait Escaping {
|
||||
fn pg_quote(&self) -> String;
|
||||
fn pg_quote_dollar(&self) -> (String, String);
|
||||
}
|
||||
|
||||
impl Escaping for PgIdent {
|
||||
/// This is intended to mimic Postgres quote_ident(), but for simplicity it
|
||||
/// always quotes provided string with `""` and escapes every `"`.
|
||||
/// **Not idempotent**, i.e. if string is already escaped it will be escaped again.
|
||||
/// N.B. it's not useful for escaping identifiers that are used inside WHERE
|
||||
/// clause, use `escape_literal()` instead.
|
||||
fn pg_quote(&self) -> String {
|
||||
format!("\"{}\"", self.replace('"', "\"\""))
|
||||
}
|
||||
|
||||
/// This helper is intended to be used for dollar-escaping strings for usage
|
||||
/// inside PL/pgSQL procedures. In addition to dollar-escaping the string,
|
||||
/// it also returns a tag that is intended to be used inside the outer
|
||||
/// PL/pgSQL procedure. If you do not need an outer tag, just discard it.
|
||||
/// Here we somewhat mimic the logic of Postgres' `pg_get_functiondef()`,
|
||||
/// <https://github.com/postgres/postgres/blob/8b49392b270b4ac0b9f5c210e2a503546841e832/src/backend/utils/adt/ruleutils.c#L2924>
|
||||
fn pg_quote_dollar(&self) -> (String, String) {
|
||||
let mut tag: String = "x".to_string();
|
||||
let mut outer_tag = "xx".to_string();
|
||||
|
||||
// Find the first suitable tag that is not present in the string.
|
||||
// Postgres' max role/DB name length is 63 bytes, so even in the
|
||||
// worst case it won't take long.
|
||||
while self.contains(&format!("${tag}$")) || self.contains(&format!("${outer_tag}$")) {
|
||||
tag += "x";
|
||||
outer_tag = tag.clone() + "x";
|
||||
}
|
||||
|
||||
let escaped = format!("${tag}${self}${tag}$");
|
||||
|
||||
(escaped, outer_tag)
|
||||
let result = format!("\"{}\"", self.replace('"', "\"\""));
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
@@ -253,13 +226,10 @@ pub async fn get_existing_dbs_async(
|
||||
// invalid state. See:
|
||||
// https://github.com/postgres/postgres/commit/a4b4cc1d60f7e8ccfcc8ff8cb80c28ee411ad9a9
|
||||
let rowstream = client
|
||||
// We use a subquery instead of a fancy `datdba::regrole::text AS owner`,
|
||||
// because the latter automatically wraps the result in double quotes,
|
||||
// if the role name contains special characters.
|
||||
.query_raw::<str, &String, &[String; 0]>(
|
||||
"SELECT
|
||||
datname AS name,
|
||||
(SELECT rolname FROM pg_roles WHERE oid = datdba) AS owner,
|
||||
datdba::regrole::text AS owner,
|
||||
NOT datallowconn AS restrict_conn,
|
||||
datconnlimit = - 2 AS invalid
|
||||
FROM
|
||||
@@ -408,7 +378,7 @@ pub fn create_pgdata(pgdata: &str) -> Result<()> {
|
||||
|
||||
/// Update pgbouncer.ini with provided options
|
||||
fn update_pgbouncer_ini(
|
||||
pgbouncer_config: IndexMap<String, String>,
|
||||
pgbouncer_config: HashMap<String, String>,
|
||||
pgbouncer_ini_path: &str,
|
||||
) -> Result<()> {
|
||||
let mut conf = Ini::load_from_file(pgbouncer_ini_path)?;
|
||||
@@ -429,10 +399,7 @@ fn update_pgbouncer_ini(
|
||||
/// Tune pgbouncer.
|
||||
/// 1. Apply new config using pgbouncer admin console
|
||||
/// 2. Add new values to pgbouncer.ini to preserve them after restart
|
||||
pub async fn tune_pgbouncer(
|
||||
mut pgbouncer_config: IndexMap<String, String>,
|
||||
tls_config: Option<TlsConfig>,
|
||||
) -> Result<()> {
|
||||
pub async fn tune_pgbouncer(pgbouncer_config: HashMap<String, String>) -> Result<()> {
|
||||
let pgbouncer_connstr = if std::env::var_os("AUTOSCALING").is_some() {
|
||||
// for VMs use pgbouncer specific way to connect to
|
||||
// pgbouncer admin console without password
|
||||
@@ -478,21 +445,19 @@ pub async fn tune_pgbouncer(
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(tls_config) = tls_config {
|
||||
// pgbouncer starts in a half-ok state if it cannot find these files.
|
||||
// It will default to client_tls_sslmode=deny, which causes proxy to error.
|
||||
// There is a small window at startup where these files don't yet exist in the VM.
|
||||
// Best to wait until it exists.
|
||||
loop {
|
||||
if let Ok(true) = tokio::fs::try_exists(&tls_config.key_path).await {
|
||||
break;
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(500)).await
|
||||
}
|
||||
// Apply new config
|
||||
for (option_name, value) in pgbouncer_config.iter() {
|
||||
let query = format!("SET {}={}", option_name, value);
|
||||
// keep this log line for debugging purposes
|
||||
info!("Applying pgbouncer setting change: {}", query);
|
||||
|
||||
pgbouncer_config.insert("client_tls_cert_file".to_string(), tls_config.cert_path);
|
||||
pgbouncer_config.insert("client_tls_key_file".to_string(), tls_config.key_path);
|
||||
pgbouncer_config.insert("client_tls_sslmode".to_string(), "allow".to_string());
|
||||
if let Err(err) = client.simple_query(&query).await {
|
||||
// Don't fail on error, just print it into log
|
||||
error!(
|
||||
"Failed to apply pgbouncer setting change: {}, {}",
|
||||
query, err
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
// save values to pgbouncer.ini
|
||||
@@ -508,13 +473,6 @@ pub async fn tune_pgbouncer(
|
||||
};
|
||||
update_pgbouncer_ini(pgbouncer_config, &pgbouncer_ini_path)?;
|
||||
|
||||
info!("Applying pgbouncer setting change");
|
||||
|
||||
if let Err(err) = client.simple_query("RELOAD").await {
|
||||
// Don't fail on error, just print it into log
|
||||
error!("Failed to apply pgbouncer setting change, {err}",);
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -1,138 +0,0 @@
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
use std::time::Duration;
|
||||
use std::{fs::OpenOptions, io::Write};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use tracing::{error, info, instrument, warn};
|
||||
|
||||
fn get_rsyslog_pid() -> Option<String> {
|
||||
let output = Command::new("pgrep")
|
||||
.arg("rsyslogd")
|
||||
.output()
|
||||
.expect("Failed to execute pgrep");
|
||||
|
||||
if !output.stdout.is_empty() {
|
||||
let pid = std::str::from_utf8(&output.stdout)
|
||||
.expect("Invalid UTF-8 in process output")
|
||||
.trim()
|
||||
.to_string();
|
||||
Some(pid)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
// Restart rsyslogd to apply the new configuration.
|
||||
// This is necessary, because there is no other way to reload the rsyslog configuration.
|
||||
//
|
||||
// Rsyslogd shouldn't lose any messages, because of the restart,
|
||||
// because it tracks the last read position in the log files
|
||||
// and will continue reading from that position.
|
||||
// TODO: test it properly
|
||||
//
|
||||
fn restart_rsyslog() -> Result<()> {
|
||||
let old_pid = get_rsyslog_pid().context("rsyslogd is not running")?;
|
||||
info!("rsyslogd is running with pid: {}, restart it", old_pid);
|
||||
|
||||
// kill it to restart
|
||||
let _ = Command::new("pkill")
|
||||
.arg("rsyslogd")
|
||||
.output()
|
||||
.context("Failed to stop rsyslogd")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn configure_audit_rsyslog(
|
||||
log_directory: String,
|
||||
tag: &str,
|
||||
remote_endpoint: &str,
|
||||
) -> Result<()> {
|
||||
let config_content: String = format!(
|
||||
include_str!("config_template/compute_audit_rsyslog_template.conf"),
|
||||
log_directory = log_directory,
|
||||
tag = tag,
|
||||
remote_endpoint = remote_endpoint
|
||||
);
|
||||
|
||||
info!("rsyslog config_content: {}", config_content);
|
||||
|
||||
let rsyslog_conf_path = "/etc/rsyslog.d/compute_audit_rsyslog.conf";
|
||||
let mut file = OpenOptions::new()
|
||||
.create(true)
|
||||
.write(true)
|
||||
.truncate(true)
|
||||
.open(rsyslog_conf_path)?;
|
||||
|
||||
file.write_all(config_content.as_bytes())?;
|
||||
|
||||
info!(
|
||||
"rsyslog configuration file {} added successfully. Starting rsyslogd",
|
||||
rsyslog_conf_path
|
||||
);
|
||||
|
||||
// start the service, using the configuration
|
||||
restart_rsyslog()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[instrument(skip_all)]
|
||||
async fn pgaudit_gc_main_loop(log_directory: String) -> Result<()> {
|
||||
info!("running pgaudit GC main loop");
|
||||
loop {
|
||||
// Check log_directory for old pgaudit logs and delete them.
|
||||
// New log files are checked every 5 minutes, as set in pgaudit.log_rotation_age
|
||||
// Find files that were not modified in the last 15 minutes and delete them.
|
||||
// This should be enough time for rsyslog to process the logs and for us to catch the alerts.
|
||||
//
|
||||
// In case of a very high load, we might need to adjust this value and pgaudit.log_rotation_age.
|
||||
//
|
||||
// TODO: add some smarter logic to delete the files that are fully streamed according to rsyslog
|
||||
// imfile-state files, but for now just do a simple GC to avoid filling up the disk.
|
||||
let _ = Command::new("find")
|
||||
.arg(&log_directory)
|
||||
.arg("-name")
|
||||
.arg("audit*.log")
|
||||
.arg("-mmin")
|
||||
.arg("+15")
|
||||
.arg("-delete")
|
||||
.output()?;
|
||||
|
||||
// also collect the metric for the size of the log directory
|
||||
async fn get_log_files_size(path: &Path) -> Result<u64> {
|
||||
let mut total_size = 0;
|
||||
|
||||
for entry in fs::read_dir(path)? {
|
||||
let entry = entry?;
|
||||
let entry_path = entry.path();
|
||||
|
||||
if entry_path.is_file() && entry_path.to_string_lossy().ends_with("log") {
|
||||
total_size += entry.metadata()?.len();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(total_size)
|
||||
}
|
||||
|
||||
let log_directory_size = get_log_files_size(Path::new(&log_directory))
|
||||
.await
|
||||
.unwrap_or_else(|e| {
|
||||
warn!("Failed to get log directory size: {}", e);
|
||||
0
|
||||
});
|
||||
crate::metrics::AUDIT_LOG_DIR_SIZE.set(log_directory_size as f64);
|
||||
tokio::time::sleep(Duration::from_secs(60)).await;
|
||||
}
|
||||
}
|
||||
|
||||
// launch pgaudit GC thread to clean up the old pgaudit logs stored in the log_directory
|
||||
pub fn launch_pgaudit_gc(log_directory: String) {
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = pgaudit_gc_main_loop(log_directory).await {
|
||||
error!("pgaudit GC main loop failed: {}", e);
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -6,22 +6,21 @@ use std::sync::Arc;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use compute_api::spec::{ComputeAudit, ComputeFeature, ComputeSpec, Database, PgIdent, Role};
|
||||
use compute_api::spec::{ComputeFeature, ComputeSpec, Database, PgIdent, Role};
|
||||
use futures::future::join_all;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio_postgres::Client;
|
||||
use tokio_postgres::error::SqlState;
|
||||
use tracing::{Instrument, debug, error, info, info_span, instrument, warn};
|
||||
|
||||
use crate::compute::{ComputeNode, ComputeState};
|
||||
use crate::compute::{ComputeNode, ComputeState, construct_superuser_query};
|
||||
use crate::pg_helpers::{
|
||||
DatabaseExt, Escaping, GenericOptionsSearch, RoleExt, get_existing_dbs_async,
|
||||
DatabaseExt, Escaping, GenericOptionsSearch, RoleExt, escape_literal, get_existing_dbs_async,
|
||||
get_existing_roles_async,
|
||||
};
|
||||
use crate::spec_apply::ApplySpecPhase::{
|
||||
CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateNeonSuperuser,
|
||||
CreatePgauditExtension, CreatePgauditlogtofileExtension, CreateSchemaNeon,
|
||||
DisablePostgresDBPgAudit, DropInvalidDatabases, DropRoles, FinalizeDropLogicalSubscriptions,
|
||||
CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateSchemaNeon,
|
||||
CreateSuperUser, DropInvalidDatabases, DropRoles, FinalizeDropLogicalSubscriptions,
|
||||
HandleNeonExtension, HandleOtherExtensions, RenameAndDeleteDatabases, RenameRoles,
|
||||
RunInEachDatabase,
|
||||
};
|
||||
@@ -188,7 +187,7 @@ impl ComputeNode {
|
||||
}
|
||||
|
||||
for phase in [
|
||||
CreateNeonSuperuser,
|
||||
CreateSuperUser,
|
||||
DropInvalidDatabases,
|
||||
RenameRoles,
|
||||
CreateAndAlterRoles,
|
||||
@@ -278,19 +277,6 @@ impl ComputeNode {
|
||||
phases.push(FinalizeDropLogicalSubscriptions);
|
||||
}
|
||||
|
||||
// Keep DisablePostgresDBPgAudit phase at the end,
|
||||
// so that all config operations are audit logged.
|
||||
match spec.audit_log_level
|
||||
{
|
||||
ComputeAudit::Hipaa => {
|
||||
phases.push(CreatePgauditExtension);
|
||||
phases.push(CreatePgauditlogtofileExtension);
|
||||
phases.push(DisablePostgresDBPgAudit);
|
||||
}
|
||||
ComputeAudit::Log => { /* not implemented yet */ }
|
||||
ComputeAudit::Disabled => {}
|
||||
}
|
||||
|
||||
for phase in phases {
|
||||
debug!("Applying phase {:?}", &phase);
|
||||
apply_operations(
|
||||
@@ -469,7 +455,7 @@ pub enum PerDatabasePhase {
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum ApplySpecPhase {
|
||||
CreateNeonSuperuser,
|
||||
CreateSuperUser,
|
||||
DropInvalidDatabases,
|
||||
RenameRoles,
|
||||
CreateAndAlterRoles,
|
||||
@@ -477,9 +463,6 @@ pub enum ApplySpecPhase {
|
||||
CreateAndAlterDatabases,
|
||||
CreateSchemaNeon,
|
||||
RunInEachDatabase { db: DB, subphase: PerDatabasePhase },
|
||||
CreatePgauditExtension,
|
||||
CreatePgauditlogtofileExtension,
|
||||
DisablePostgresDBPgAudit,
|
||||
HandleOtherExtensions,
|
||||
HandleNeonExtension,
|
||||
CreateAvailabilityCheck,
|
||||
@@ -596,10 +579,14 @@ async fn get_operations<'a>(
|
||||
apply_spec_phase: &'a ApplySpecPhase,
|
||||
) -> Result<Box<dyn Iterator<Item = Operation> + 'a + Send>> {
|
||||
match apply_spec_phase {
|
||||
ApplySpecPhase::CreateNeonSuperuser => Ok(Box::new(once(Operation {
|
||||
query: include_str!("sql/create_neon_superuser.sql").to_string(),
|
||||
comment: None,
|
||||
}))),
|
||||
ApplySpecPhase::CreateSuperUser => {
|
||||
let query = construct_superuser_query(spec);
|
||||
|
||||
Ok(Box::new(once(Operation {
|
||||
query,
|
||||
comment: None,
|
||||
})))
|
||||
}
|
||||
ApplySpecPhase::DropInvalidDatabases => {
|
||||
let mut ctx = ctx.write().await;
|
||||
let databases = &mut ctx.dbs;
|
||||
@@ -733,15 +720,14 @@ async fn get_operations<'a>(
|
||||
// We do not check whether the DB exists or not,
|
||||
// Postgres will take care of it for us
|
||||
"delete_db" => {
|
||||
let (db_name, outer_tag) = op.name.pg_quote_dollar();
|
||||
// In Postgres we can't drop a database if it is a template.
|
||||
// So we need to unset the template flag first, but it could
|
||||
// be a retry, so we could've already dropped the database.
|
||||
// Check that database exists first to make it idempotent.
|
||||
let unset_template_query: String = format!(
|
||||
include_str!("sql/unset_template_for_drop_dbs.sql"),
|
||||
datname = db_name,
|
||||
outer_tag = outer_tag,
|
||||
datname_str = escape_literal(&op.name),
|
||||
datname = &op.name.pg_quote()
|
||||
);
|
||||
|
||||
// Use FORCE to drop database even if there are active connections.
|
||||
@@ -848,8 +834,6 @@ async fn get_operations<'a>(
|
||||
comment: None,
|
||||
},
|
||||
Operation {
|
||||
// ALL PRIVILEGES grants CREATE, CONNECT, and TEMPORARY on the database
|
||||
// (see https://www.postgresql.org/docs/current/ddl-priv.html)
|
||||
query: format!(
|
||||
"GRANT ALL PRIVILEGES ON DATABASE {} TO neon_superuser",
|
||||
db.name.pg_quote()
|
||||
@@ -909,11 +893,9 @@ async fn get_operations<'a>(
|
||||
PerDatabasePhase::DropLogicalSubscriptions => {
|
||||
match &db {
|
||||
DB::UserDB(db) => {
|
||||
let (db_name, outer_tag) = db.name.pg_quote_dollar();
|
||||
let drop_subscription_query: String = format!(
|
||||
include_str!("sql/drop_subscriptions.sql"),
|
||||
datname_str = db_name,
|
||||
outer_tag = outer_tag,
|
||||
datname_str = escape_literal(&db.name),
|
||||
);
|
||||
|
||||
let operations = vec![Operation {
|
||||
@@ -952,7 +934,6 @@ async fn get_operations<'a>(
|
||||
DB::SystemDB => PgIdent::from("cloud_admin").pg_quote(),
|
||||
DB::UserDB(db) => db.owner.pg_quote(),
|
||||
};
|
||||
let (escaped_role, outer_tag) = op.name.pg_quote_dollar();
|
||||
|
||||
Some(vec![
|
||||
// This will reassign all dependent objects to the db owner
|
||||
@@ -967,9 +948,7 @@ async fn get_operations<'a>(
|
||||
Operation {
|
||||
query: format!(
|
||||
include_str!("sql/pre_drop_role_revoke_privileges.sql"),
|
||||
// N.B. this has to be properly dollar-escaped with `pg_quote_dollar()`
|
||||
role_name = escaped_role,
|
||||
outer_tag = outer_tag,
|
||||
role_name = quoted,
|
||||
),
|
||||
comment: None,
|
||||
},
|
||||
@@ -994,14 +973,12 @@ async fn get_operations<'a>(
|
||||
DB::SystemDB => return Ok(Box::new(empty())),
|
||||
DB::UserDB(db) => db,
|
||||
};
|
||||
let (db_owner, outer_tag) = db.owner.pg_quote_dollar();
|
||||
|
||||
let operations = vec![
|
||||
Operation {
|
||||
query: format!(
|
||||
include_str!("sql/set_public_schema_owner.sql"),
|
||||
db_owner = db_owner,
|
||||
outer_tag = outer_tag,
|
||||
db_owner = db.owner.pg_quote()
|
||||
),
|
||||
comment: None,
|
||||
},
|
||||
@@ -1121,25 +1098,6 @@ async fn get_operations<'a>(
|
||||
}
|
||||
Ok(Box::new(empty()))
|
||||
}
|
||||
ApplySpecPhase::CreatePgauditExtension => Ok(Box::new(once(Operation {
|
||||
query: String::from("CREATE EXTENSION IF NOT EXISTS pgaudit"),
|
||||
comment: Some(String::from("create pgaudit extensions")),
|
||||
}))),
|
||||
ApplySpecPhase::CreatePgauditlogtofileExtension => Ok(Box::new(once(Operation {
|
||||
query: String::from("CREATE EXTENSION IF NOT EXISTS pgauditlogtofile"),
|
||||
comment: Some(String::from("create pgauditlogtofile extensions")),
|
||||
}))),
|
||||
// Disable pgaudit logging for postgres database.
|
||||
// Postgres is neon system database used by monitors
|
||||
// and compute_ctl tuning functions and thus generates a lot of noise.
|
||||
// We do not consider data stored in this database as sensitive.
|
||||
ApplySpecPhase::DisablePostgresDBPgAudit => {
|
||||
let query = "ALTER DATABASE postgres SET pgaudit.log to 'none'";
|
||||
Ok(Box::new(once(Operation {
|
||||
query: query.to_string(),
|
||||
comment: Some(query.to_string()),
|
||||
})))
|
||||
}
|
||||
ApplySpecPhase::HandleNeonExtension => {
|
||||
let operations = vec![
|
||||
Operation {
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'neon_superuser')
|
||||
THEN
|
||||
CREATE ROLE neon_superuser CREATEDB CREATEROLE NOLOGIN REPLICATION BYPASSRLS IN ROLE pg_read_all_data, pg_write_all_data;
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
@@ -1,4 +1,4 @@
|
||||
DO ${outer_tag}$
|
||||
DO $$
|
||||
DECLARE
|
||||
subname TEXT;
|
||||
BEGIN
|
||||
@@ -9,4 +9,4 @@ BEGIN
|
||||
EXECUTE format('DROP SUBSCRIPTION %I;', subname);
|
||||
END LOOP;
|
||||
END;
|
||||
${outer_tag}$;
|
||||
$$;
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
DO ${outer_tag}$
|
||||
SET SESSION ROLE neon_superuser;
|
||||
|
||||
DO $$
|
||||
DECLARE
|
||||
schema TEXT;
|
||||
grantor TEXT;
|
||||
revoke_query TEXT;
|
||||
BEGIN
|
||||
FOR schema IN
|
||||
@@ -14,25 +15,14 @@ BEGIN
|
||||
-- ii) it's easy to add more schemas to the list if needed.
|
||||
WHERE schema_name IN ('public')
|
||||
LOOP
|
||||
FOR grantor IN EXECUTE
|
||||
format(
|
||||
'SELECT DISTINCT rtg.grantor FROM information_schema.role_table_grants AS rtg WHERE grantee = %s',
|
||||
-- N.B. this has to be properly dollar-escaped with `pg_quote_dollar()`
|
||||
quote_literal({role_name})
|
||||
)
|
||||
LOOP
|
||||
EXECUTE format('SET LOCAL ROLE %I', grantor);
|
||||
revoke_query := format(
|
||||
'REVOKE ALL PRIVILEGES ON ALL TABLES IN SCHEMA %I FROM {role_name} GRANTED BY neon_superuser;',
|
||||
schema
|
||||
);
|
||||
|
||||
revoke_query := format(
|
||||
'REVOKE ALL PRIVILEGES ON ALL TABLES IN SCHEMA %I FROM %I GRANTED BY %I',
|
||||
schema,
|
||||
-- N.B. this has to be properly dollar-escaped with `pg_quote_dollar()`
|
||||
{role_name},
|
||||
grantor
|
||||
);
|
||||
|
||||
EXECUTE revoke_query;
|
||||
END LOOP;
|
||||
EXECUTE revoke_query;
|
||||
END LOOP;
|
||||
END;
|
||||
${outer_tag}$;
|
||||
$$;
|
||||
|
||||
RESET ROLE;
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
DO ${outer_tag}$
|
||||
DO
|
||||
$$
|
||||
DECLARE
|
||||
schema_owner TEXT;
|
||||
BEGIN
|
||||
@@ -15,8 +16,8 @@ DO ${outer_tag}$
|
||||
|
||||
IF schema_owner = 'cloud_admin' OR schema_owner = 'zenith_admin'
|
||||
THEN
|
||||
EXECUTE format('ALTER SCHEMA public OWNER TO %I', {db_owner});
|
||||
ALTER SCHEMA public OWNER TO {db_owner};
|
||||
END IF;
|
||||
END IF;
|
||||
END
|
||||
${outer_tag}$;
|
||||
$$;
|
||||
@@ -1,12 +1,12 @@
|
||||
DO ${outer_tag}$
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS(
|
||||
SELECT 1
|
||||
FROM pg_catalog.pg_database
|
||||
WHERE datname = {datname}
|
||||
WHERE datname = {datname_str}
|
||||
)
|
||||
THEN
|
||||
EXECUTE format('ALTER DATABASE %I is_template false', {datname});
|
||||
ALTER DATABASE {datname} is_template false;
|
||||
END IF;
|
||||
END
|
||||
${outer_tag}$;
|
||||
$$;
|
||||
@@ -1,118 +0,0 @@
|
||||
use std::{io::Write, os::unix::fs::OpenOptionsExt, path::Path, time::Duration};
|
||||
|
||||
use anyhow::{Context, Result, bail};
|
||||
use compute_api::responses::TlsConfig;
|
||||
use ring::digest;
|
||||
use spki::ObjectIdentifier;
|
||||
use spki::der::{Decode, PemReader};
|
||||
use x509_cert::Certificate;
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct CertDigest(digest::Digest);
|
||||
|
||||
pub async fn watch_cert_for_changes(cert_path: String) -> tokio::sync::watch::Receiver<CertDigest> {
|
||||
let mut digest = compute_digest(&cert_path).await;
|
||||
let (tx, rx) = tokio::sync::watch::channel(digest);
|
||||
tokio::spawn(async move {
|
||||
while !tx.is_closed() {
|
||||
let new_digest = compute_digest(&cert_path).await;
|
||||
if digest.0.as_ref() != new_digest.0.as_ref() {
|
||||
digest = new_digest;
|
||||
_ = tx.send(digest);
|
||||
}
|
||||
|
||||
tokio::time::sleep(Duration::from_secs(60)).await
|
||||
}
|
||||
});
|
||||
rx
|
||||
}
|
||||
|
||||
async fn compute_digest(cert_path: &str) -> CertDigest {
|
||||
loop {
|
||||
match try_compute_digest(cert_path).await {
|
||||
Ok(d) => break d,
|
||||
Err(e) => {
|
||||
tracing::error!("could not read cert file {e:?}");
|
||||
tokio::time::sleep(Duration::from_secs(1)).await
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn try_compute_digest(cert_path: &str) -> Result<CertDigest> {
|
||||
let data = tokio::fs::read(cert_path).await?;
|
||||
// sha256 is extremely collision resistent. can safely assume the digest to be unique
|
||||
Ok(CertDigest(digest::digest(&digest::SHA256, &data)))
|
||||
}
|
||||
|
||||
pub const SERVER_CRT: &str = "server.crt";
|
||||
pub const SERVER_KEY: &str = "server.key";
|
||||
|
||||
pub fn update_key_path_blocking(pg_data: &Path, tls_config: &TlsConfig) {
|
||||
loop {
|
||||
match try_update_key_path_blocking(pg_data, tls_config) {
|
||||
Ok(()) => break,
|
||||
Err(e) => {
|
||||
tracing::error!("could not create key file {e:?}");
|
||||
std::thread::sleep(Duration::from_secs(1))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Postgres requires the keypath be "secure". This means
|
||||
// 1. Owned by the postgres user.
|
||||
// 2. Have permission 600.
|
||||
fn try_update_key_path_blocking(pg_data: &Path, tls_config: &TlsConfig) -> Result<()> {
|
||||
let key = std::fs::read_to_string(&tls_config.key_path)?;
|
||||
let crt = std::fs::read_to_string(&tls_config.cert_path)?;
|
||||
|
||||
// to mitigate a race condition during renewal.
|
||||
verify_key_cert(&key, &crt)?;
|
||||
|
||||
let mut key_file = std::fs::OpenOptions::new()
|
||||
.write(true)
|
||||
.create(true)
|
||||
.truncate(true)
|
||||
.mode(0o600)
|
||||
.open(pg_data.join(SERVER_KEY))?;
|
||||
|
||||
let mut crt_file = std::fs::OpenOptions::new()
|
||||
.write(true)
|
||||
.create(true)
|
||||
.truncate(true)
|
||||
.mode(0o600)
|
||||
.open(pg_data.join(SERVER_CRT))?;
|
||||
|
||||
key_file.write_all(key.as_bytes())?;
|
||||
crt_file.write_all(crt.as_bytes())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn verify_key_cert(key: &str, cert: &str) -> Result<()> {
|
||||
const ECDSA_WITH_SHA256: ObjectIdentifier = ObjectIdentifier::new_unwrap("1.2.840.10045.4.3.2");
|
||||
|
||||
let cert = Certificate::decode(&mut PemReader::new(cert.as_bytes()).context("pem reader")?)
|
||||
.context("decode cert")?;
|
||||
|
||||
match cert.signature_algorithm.oid {
|
||||
ECDSA_WITH_SHA256 => {
|
||||
let key = p256::SecretKey::from_sec1_pem(key).context("parse key")?;
|
||||
|
||||
let a = key.public_key().to_sec1_bytes();
|
||||
let b = cert
|
||||
.tbs_certificate
|
||||
.subject_public_key_info
|
||||
.subject_public_key
|
||||
.raw_bytes();
|
||||
|
||||
if *a != *b {
|
||||
bail!("private key file does not match certificate")
|
||||
}
|
||||
}
|
||||
_ => bail!("unknown TLS key type"),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -61,24 +61,6 @@ test.escaping = 'here''s a backslash \\ and a quote '' and a double-quote " hoor
|
||||
assert_eq!(ident.pg_quote(), "\"\"\"name\"\";\\n select 1;\"");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ident_pg_quote_dollar() {
|
||||
let test_cases = vec![
|
||||
("name", ("$x$name$x$", "xx")),
|
||||
("name$", ("$x$name$$x$", "xx")),
|
||||
("name$$", ("$x$name$$$x$", "xx")),
|
||||
("name$$$", ("$x$name$$$$x$", "xx")),
|
||||
("name$$$$", ("$x$name$$$$$x$", "xx")),
|
||||
("name$x$", ("$xx$name$x$$xx$", "xxx")),
|
||||
];
|
||||
|
||||
for (input, expected) in test_cases {
|
||||
let (escaped, tag) = PgIdent::from(input).pg_quote_dollar();
|
||||
assert_eq!(escaped, expected.0);
|
||||
assert_eq!(tag, expected.1);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generic_options_search() {
|
||||
let generic_options: GenericOptions = Some(vec![
|
||||
|
||||
@@ -36,9 +36,7 @@ use pageserver_api::config::{
|
||||
use pageserver_api::controller_api::{
|
||||
NodeAvailabilityWrapper, PlacementPolicy, TenantCreateRequest,
|
||||
};
|
||||
use pageserver_api::models::{
|
||||
ShardParameters, TenantConfigRequest, TimelineCreateRequest, TimelineInfo,
|
||||
};
|
||||
use pageserver_api::models::{ShardParameters, TimelineCreateRequest, TimelineInfo};
|
||||
use pageserver_api::shard::{ShardCount, ShardStripeSize, TenantShardId};
|
||||
use postgres_backend::AuthType;
|
||||
use postgres_connection::parse_host_port;
|
||||
@@ -965,7 +963,6 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result<LocalEnv> {
|
||||
id: pageserver_id,
|
||||
listen_pg_addr: format!("127.0.0.1:{pg_port}"),
|
||||
listen_http_addr: format!("127.0.0.1:{http_port}"),
|
||||
listen_https_addr: None,
|
||||
pg_auth_type: AuthType::Trust,
|
||||
http_auth_type: AuthType::Trust,
|
||||
other: Default::default(),
|
||||
@@ -979,8 +976,7 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result<LocalEnv> {
|
||||
neon_distrib_dir: None,
|
||||
default_tenant_id: TenantId::from_array(std::array::from_fn(|_| 0)),
|
||||
storage_controller: None,
|
||||
control_plane_hooks_api: None,
|
||||
generate_local_ssl_certs: false,
|
||||
control_plane_compute_hook_api: None,
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1131,16 +1127,12 @@ async fn handle_tenant(subcmd: &TenantCmd, env: &mut local_env::LocalEnv) -> any
|
||||
let tenant_id = get_tenant_id(args.tenant_id, env)?;
|
||||
let tenant_conf: HashMap<_, _> =
|
||||
args.config.iter().flat_map(|c| c.split_once(':')).collect();
|
||||
let config = PageServerNode::parse_config(tenant_conf)?;
|
||||
|
||||
let req = TenantConfigRequest { tenant_id, config };
|
||||
|
||||
let storage_controller = StorageController::from_env(env);
|
||||
storage_controller
|
||||
.set_tenant_config(&req)
|
||||
pageserver
|
||||
.tenant_config(tenant_id, tenant_conf)
|
||||
.await
|
||||
.with_context(|| format!("Tenant config failed for tenant with id {tenant_id}"))?;
|
||||
println!("tenant {tenant_id} successfully configured via storcon");
|
||||
println!("tenant {tenant_id} successfully configured on the pageserver");
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
|
||||
@@ -48,8 +48,7 @@ use anyhow::{Context, Result, anyhow, bail};
|
||||
use compute_api::requests::ConfigurationRequest;
|
||||
use compute_api::responses::{ComputeCtlConfig, ComputeStatus, ComputeStatusResponse};
|
||||
use compute_api::spec::{
|
||||
Cluster, ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, Database, PgIdent,
|
||||
RemoteExtSpec, Role,
|
||||
Cluster, ComputeFeature, ComputeMode, ComputeSpec, Database, PgIdent, RemoteExtSpec, Role,
|
||||
};
|
||||
use nix::sys::signal::{Signal, kill};
|
||||
use pageserver_api::shard::ShardStripeSize;
|
||||
@@ -669,7 +668,6 @@ impl Endpoint {
|
||||
local_proxy_config: None,
|
||||
reconfigure_concurrency: self.reconfigure_concurrency,
|
||||
drop_subscriptions_before_start: self.drop_subscriptions_before_start,
|
||||
audit_log_level: ComputeAudit::Disabled,
|
||||
};
|
||||
|
||||
// this strange code is needed to support respec() in tests
|
||||
|
||||
@@ -72,19 +72,15 @@ pub struct LocalEnv {
|
||||
// be propagated into each pageserver's configuration.
|
||||
pub control_plane_api: Url,
|
||||
|
||||
// Control plane upcall APIs for storage controller. If set, this will be propagated into the
|
||||
// Control plane upcall API for storage controller. If set, this will be propagated into the
|
||||
// storage controller's configuration.
|
||||
pub control_plane_hooks_api: Option<Url>,
|
||||
pub control_plane_compute_hook_api: Option<Url>,
|
||||
|
||||
/// Keep human-readable aliases in memory (and persist them to config), to hide ZId hex strings from the user.
|
||||
// A `HashMap<String, HashMap<TenantId, TimelineId>>` would be more appropriate here,
|
||||
// but deserialization into a generic toml object as `toml::Value::try_from` fails with an error.
|
||||
// https://toml.io/en/v1.0.0 does not contain a concept of "a table inside another table".
|
||||
pub branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
|
||||
|
||||
/// Flag to generate SSL certificates for components that need it.
|
||||
/// Also generates root CA certificate that is used to sign all other certificates.
|
||||
pub generate_local_ssl_certs: bool,
|
||||
}
|
||||
|
||||
/// On-disk state stored in `.neon/config`.
|
||||
@@ -104,13 +100,8 @@ pub struct OnDiskConfig {
|
||||
pub pageservers: Vec<PageServerConf>,
|
||||
pub safekeepers: Vec<SafekeeperConf>,
|
||||
pub control_plane_api: Option<Url>,
|
||||
pub control_plane_hooks_api: Option<Url>,
|
||||
pub control_plane_compute_hook_api: Option<Url>,
|
||||
branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
|
||||
// Note: skip serializing because in compat tests old storage controller fails
|
||||
// to load new config file. May be removed after this field is in release branch.
|
||||
#[serde(skip_serializing_if = "std::ops::Not::not")]
|
||||
pub generate_local_ssl_certs: bool,
|
||||
}
|
||||
|
||||
fn fail_if_pageservers_field_specified<'de, D>(_: D) -> Result<Vec<PageServerConf>, D::Error>
|
||||
@@ -137,8 +128,7 @@ pub struct NeonLocalInitConf {
|
||||
pub pageservers: Vec<NeonLocalInitPageserverConf>,
|
||||
pub safekeepers: Vec<SafekeeperConf>,
|
||||
pub control_plane_api: Option<Url>,
|
||||
pub control_plane_hooks_api: Option<Url>,
|
||||
pub generate_local_ssl_certs: bool,
|
||||
pub control_plane_compute_hook_api: Option<Option<Url>>,
|
||||
}
|
||||
|
||||
/// Broker config for cluster internal communication.
|
||||
@@ -149,7 +139,7 @@ pub struct NeonBroker {
|
||||
pub listen_addr: SocketAddr,
|
||||
}
|
||||
|
||||
/// A part of storage controller's config the neon_local knows about.
|
||||
/// Broker config for cluster internal communication.
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
#[serde(default)]
|
||||
pub struct NeonStorageControllerConf {
|
||||
@@ -175,12 +165,6 @@ pub struct NeonStorageControllerConf {
|
||||
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub long_reconcile_threshold: Option<Duration>,
|
||||
|
||||
pub use_https_pageserver_api: bool,
|
||||
|
||||
pub timelines_onto_safekeepers: bool,
|
||||
|
||||
pub use_https_safekeeper_api: bool,
|
||||
}
|
||||
|
||||
impl NeonStorageControllerConf {
|
||||
@@ -204,9 +188,6 @@ impl Default for NeonStorageControllerConf {
|
||||
max_secondary_lag_bytes: None,
|
||||
heartbeat_interval: Self::DEFAULT_HEARTBEAT_INTERVAL,
|
||||
long_reconcile_threshold: None,
|
||||
use_https_pageserver_api: false,
|
||||
timelines_onto_safekeepers: false,
|
||||
use_https_safekeeper_api: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -236,7 +217,6 @@ pub struct PageServerConf {
|
||||
pub id: NodeId,
|
||||
pub listen_pg_addr: String,
|
||||
pub listen_http_addr: String,
|
||||
pub listen_https_addr: Option<String>,
|
||||
pub pg_auth_type: AuthType,
|
||||
pub http_auth_type: AuthType,
|
||||
pub no_sync: bool,
|
||||
@@ -248,7 +228,6 @@ impl Default for PageServerConf {
|
||||
id: NodeId(0),
|
||||
listen_pg_addr: String::new(),
|
||||
listen_http_addr: String::new(),
|
||||
listen_https_addr: None,
|
||||
pg_auth_type: AuthType::Trust,
|
||||
http_auth_type: AuthType::Trust,
|
||||
no_sync: false,
|
||||
@@ -264,7 +243,6 @@ pub struct NeonLocalInitPageserverConf {
|
||||
pub id: NodeId,
|
||||
pub listen_pg_addr: String,
|
||||
pub listen_http_addr: String,
|
||||
pub listen_https_addr: Option<String>,
|
||||
pub pg_auth_type: AuthType,
|
||||
pub http_auth_type: AuthType,
|
||||
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
|
||||
@@ -279,7 +257,6 @@ impl From<&NeonLocalInitPageserverConf> for PageServerConf {
|
||||
id,
|
||||
listen_pg_addr,
|
||||
listen_http_addr,
|
||||
listen_https_addr,
|
||||
pg_auth_type,
|
||||
http_auth_type,
|
||||
no_sync,
|
||||
@@ -289,7 +266,6 @@ impl From<&NeonLocalInitPageserverConf> for PageServerConf {
|
||||
id: *id,
|
||||
listen_pg_addr: listen_pg_addr.clone(),
|
||||
listen_http_addr: listen_http_addr.clone(),
|
||||
listen_https_addr: listen_https_addr.clone(),
|
||||
pg_auth_type: *pg_auth_type,
|
||||
http_auth_type: *http_auth_type,
|
||||
no_sync: *no_sync,
|
||||
@@ -304,7 +280,6 @@ pub struct SafekeeperConf {
|
||||
pub pg_port: u16,
|
||||
pub pg_tenant_only_port: Option<u16>,
|
||||
pub http_port: u16,
|
||||
pub https_port: Option<u16>,
|
||||
pub sync: bool,
|
||||
pub remote_storage: Option<String>,
|
||||
pub backup_threads: Option<u32>,
|
||||
@@ -319,7 +294,6 @@ impl Default for SafekeeperConf {
|
||||
pg_port: 0,
|
||||
pg_tenant_only_port: None,
|
||||
http_port: 0,
|
||||
https_port: None,
|
||||
sync: true,
|
||||
remote_storage: None,
|
||||
backup_threads: None,
|
||||
@@ -436,41 +410,6 @@ impl LocalEnv {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ssl_ca_cert_path(&self) -> Option<PathBuf> {
|
||||
if self.generate_local_ssl_certs {
|
||||
Some(self.base_data_dir.join("rootCA.crt"))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ssl_ca_key_path(&self) -> Option<PathBuf> {
|
||||
if self.generate_local_ssl_certs {
|
||||
Some(self.base_data_dir.join("rootCA.key"))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn generate_ssl_ca_cert(&self) -> anyhow::Result<()> {
|
||||
let cert_path = self.ssl_ca_cert_path().unwrap();
|
||||
let key_path = self.ssl_ca_key_path().unwrap();
|
||||
if !fs::exists(cert_path.as_path())? {
|
||||
generate_ssl_ca_cert(cert_path.as_path(), key_path.as_path())?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn generate_ssl_cert(&self, cert_path: &Path, key_path: &Path) -> anyhow::Result<()> {
|
||||
self.generate_ssl_ca_cert()?;
|
||||
generate_ssl_cert(
|
||||
cert_path,
|
||||
key_path,
|
||||
self.ssl_ca_cert_path().unwrap().as_path(),
|
||||
self.ssl_ca_key_path().unwrap().as_path(),
|
||||
)
|
||||
}
|
||||
|
||||
/// Inspect the base data directory and extract the instance id and instance directory path
|
||||
/// for all storage controller instances
|
||||
pub async fn storage_controller_instances(&self) -> std::io::Result<Vec<(u8, PathBuf)>> {
|
||||
@@ -578,10 +517,8 @@ impl LocalEnv {
|
||||
pageservers,
|
||||
safekeepers,
|
||||
control_plane_api,
|
||||
control_plane_hooks_api,
|
||||
control_plane_compute_hook_api: _,
|
||||
control_plane_compute_hook_api,
|
||||
branch_name_mappings,
|
||||
generate_local_ssl_certs,
|
||||
} = on_disk_config;
|
||||
LocalEnv {
|
||||
base_data_dir: repopath.to_owned(),
|
||||
@@ -594,9 +531,8 @@ impl LocalEnv {
|
||||
pageservers,
|
||||
safekeepers,
|
||||
control_plane_api: control_plane_api.unwrap(),
|
||||
control_plane_hooks_api,
|
||||
control_plane_compute_hook_api,
|
||||
branch_name_mappings,
|
||||
generate_local_ssl_certs,
|
||||
}
|
||||
};
|
||||
|
||||
@@ -632,7 +568,6 @@ impl LocalEnv {
|
||||
struct PageserverConfigTomlSubset {
|
||||
listen_pg_addr: String,
|
||||
listen_http_addr: String,
|
||||
listen_https_addr: Option<String>,
|
||||
pg_auth_type: AuthType,
|
||||
http_auth_type: AuthType,
|
||||
#[serde(default)]
|
||||
@@ -657,7 +592,6 @@ impl LocalEnv {
|
||||
let PageserverConfigTomlSubset {
|
||||
listen_pg_addr,
|
||||
listen_http_addr,
|
||||
listen_https_addr,
|
||||
pg_auth_type,
|
||||
http_auth_type,
|
||||
no_sync,
|
||||
@@ -675,7 +609,6 @@ impl LocalEnv {
|
||||
},
|
||||
listen_pg_addr,
|
||||
listen_http_addr,
|
||||
listen_https_addr,
|
||||
pg_auth_type,
|
||||
http_auth_type,
|
||||
no_sync,
|
||||
@@ -701,10 +634,8 @@ impl LocalEnv {
|
||||
pageservers: vec![], // it's skip_serializing anyway
|
||||
safekeepers: self.safekeepers.clone(),
|
||||
control_plane_api: Some(self.control_plane_api.clone()),
|
||||
control_plane_hooks_api: self.control_plane_hooks_api.clone(),
|
||||
control_plane_compute_hook_api: None,
|
||||
control_plane_compute_hook_api: self.control_plane_compute_hook_api.clone(),
|
||||
branch_name_mappings: self.branch_name_mappings.clone(),
|
||||
generate_local_ssl_certs: self.generate_local_ssl_certs,
|
||||
},
|
||||
)
|
||||
}
|
||||
@@ -786,8 +717,7 @@ impl LocalEnv {
|
||||
pageservers,
|
||||
safekeepers,
|
||||
control_plane_api,
|
||||
generate_local_ssl_certs,
|
||||
control_plane_hooks_api,
|
||||
control_plane_compute_hook_api,
|
||||
} = conf;
|
||||
|
||||
// Find postgres binaries.
|
||||
@@ -834,24 +764,16 @@ impl LocalEnv {
|
||||
pageservers: pageservers.iter().map(Into::into).collect(),
|
||||
safekeepers,
|
||||
control_plane_api: control_plane_api.unwrap(),
|
||||
control_plane_hooks_api,
|
||||
control_plane_compute_hook_api: control_plane_compute_hook_api.unwrap_or_default(),
|
||||
branch_name_mappings: Default::default(),
|
||||
generate_local_ssl_certs,
|
||||
};
|
||||
|
||||
if generate_local_ssl_certs {
|
||||
env.generate_ssl_ca_cert()?;
|
||||
}
|
||||
|
||||
// create endpoints dir
|
||||
fs::create_dir_all(env.endpoints_path())?;
|
||||
|
||||
// create safekeeper dirs
|
||||
for safekeeper in &env.safekeepers {
|
||||
fs::create_dir_all(SafekeeperNode::datadir_path_by_id(&env, safekeeper.id))?;
|
||||
SafekeeperNode::from_env(&env, safekeeper)
|
||||
.initialize()
|
||||
.context("safekeeper init failed")?;
|
||||
}
|
||||
|
||||
// initialize pageserver state
|
||||
@@ -929,80 +851,3 @@ fn generate_auth_keys(private_key_path: &Path, public_key_path: &Path) -> anyhow
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn generate_ssl_ca_cert(cert_path: &Path, key_path: &Path) -> anyhow::Result<()> {
|
||||
// openssl req -x509 -newkey rsa:2048 -nodes -subj "/CN=Neon Local CA" -days 36500 \
|
||||
// -out rootCA.crt -keyout rootCA.key
|
||||
let keygen_output = Command::new("openssl")
|
||||
.args([
|
||||
"req", "-x509", "-newkey", "rsa:2048", "-nodes", "-days", "36500",
|
||||
])
|
||||
.args(["-subj", "/CN=Neon Local CA"])
|
||||
.args(["-out", cert_path.to_str().unwrap()])
|
||||
.args(["-keyout", key_path.to_str().unwrap()])
|
||||
.output()
|
||||
.context("failed to generate CA certificate")?;
|
||||
if !keygen_output.status.success() {
|
||||
bail!(
|
||||
"openssl failed: '{}'",
|
||||
String::from_utf8_lossy(&keygen_output.stderr)
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn generate_ssl_cert(
|
||||
cert_path: &Path,
|
||||
key_path: &Path,
|
||||
ca_cert_path: &Path,
|
||||
ca_key_path: &Path,
|
||||
) -> anyhow::Result<()> {
|
||||
// Generate Certificate Signing Request (CSR).
|
||||
let mut csr_path = cert_path.to_path_buf();
|
||||
csr_path.set_extension(".csr");
|
||||
|
||||
// openssl req -new -nodes -newkey rsa:2048 -keyout server.key -out server.csr \
|
||||
// -subj "/CN=localhost" -addext "subjectAltName=DNS:localhost,IP:127.0.0.1"
|
||||
let keygen_output = Command::new("openssl")
|
||||
.args(["req", "-new", "-nodes"])
|
||||
.args(["-newkey", "rsa:2048"])
|
||||
.args(["-subj", "/CN=localhost"])
|
||||
.args(["-addext", "subjectAltName=DNS:localhost,IP:127.0.0.1"])
|
||||
.args(["-keyout", key_path.to_str().unwrap()])
|
||||
.args(["-out", csr_path.to_str().unwrap()])
|
||||
.output()
|
||||
.context("failed to generate CSR")?;
|
||||
if !keygen_output.status.success() {
|
||||
bail!(
|
||||
"openssl failed: '{}'",
|
||||
String::from_utf8_lossy(&keygen_output.stderr)
|
||||
);
|
||||
}
|
||||
|
||||
// Sign CSR with CA key.
|
||||
//
|
||||
// openssl x509 -req -in server.csr -CA rootCA.crt -CAkey rootCA.key -CAcreateserial \
|
||||
// -out server.crt -days 36500 -copy_extensions copyall
|
||||
let keygen_output = Command::new("openssl")
|
||||
.args(["x509", "-req"])
|
||||
.args(["-in", csr_path.to_str().unwrap()])
|
||||
.args(["-CA", ca_cert_path.to_str().unwrap()])
|
||||
.args(["-CAkey", ca_key_path.to_str().unwrap()])
|
||||
.arg("-CAcreateserial")
|
||||
.args(["-out", cert_path.to_str().unwrap()])
|
||||
.args(["-days", "36500"])
|
||||
.args(["-copy_extensions", "copyall"])
|
||||
.output()
|
||||
.context("failed to sign CSR")?;
|
||||
if !keygen_output.status.success() {
|
||||
bail!(
|
||||
"openssl failed: '{}'",
|
||||
String::from_utf8_lossy(&keygen_output.stderr)
|
||||
);
|
||||
}
|
||||
|
||||
// Remove CSR file as it's not needed anymore.
|
||||
fs::remove_file(csr_path)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -21,7 +21,6 @@ use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_client::mgmt_api;
|
||||
use postgres_backend::AuthType;
|
||||
use postgres_connection::{PgConnectionConfig, parse_host_port};
|
||||
use reqwest::Certificate;
|
||||
use utils::auth::{Claims, Scope};
|
||||
use utils::id::{NodeId, TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
@@ -50,29 +49,12 @@ impl PageServerNode {
|
||||
let (host, port) =
|
||||
parse_host_port(&conf.listen_pg_addr).expect("Unable to parse listen_pg_addr");
|
||||
let port = port.unwrap_or(5432);
|
||||
|
||||
let ssl_ca_cert = env.ssl_ca_cert_path().map(|ssl_ca_file| {
|
||||
let buf = std::fs::read(ssl_ca_file).expect("SSL root CA file should exist");
|
||||
Certificate::from_pem(&buf).expect("CA certificate should be valid")
|
||||
});
|
||||
|
||||
let endpoint = if env.storage_controller.use_https_pageserver_api {
|
||||
format!(
|
||||
"https://{}",
|
||||
conf.listen_https_addr.as_ref().expect(
|
||||
"listen https address should be specified if use_https_pageserver_api is on"
|
||||
)
|
||||
)
|
||||
} else {
|
||||
format!("http://{}", conf.listen_http_addr)
|
||||
};
|
||||
|
||||
Self {
|
||||
pg_connection_config: PgConnectionConfig::new_host_port(host, port),
|
||||
conf: conf.clone(),
|
||||
env: env.clone(),
|
||||
http_client: mgmt_api::Client::new(
|
||||
endpoint,
|
||||
format!("http://{}", conf.listen_http_addr),
|
||||
{
|
||||
match conf.http_auth_type {
|
||||
AuthType::Trust => None,
|
||||
@@ -83,9 +65,7 @@ impl PageServerNode {
|
||||
}
|
||||
}
|
||||
.as_deref(),
|
||||
ssl_ca_cert,
|
||||
)
|
||||
.expect("Client constructs with no errors"),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -240,13 +220,6 @@ impl PageServerNode {
|
||||
.context("write identity toml")?;
|
||||
drop(identity_toml);
|
||||
|
||||
if self.env.generate_local_ssl_certs {
|
||||
self.env.generate_ssl_cert(
|
||||
datadir.join("server.crt").as_path(),
|
||||
datadir.join("server.key").as_path(),
|
||||
)?;
|
||||
}
|
||||
|
||||
// TODO: invoke a TBD config-check command to validate that pageserver will start with the written config
|
||||
|
||||
// Write metadata file, used by pageserver on startup to register itself with
|
||||
@@ -257,15 +230,6 @@ impl PageServerNode {
|
||||
parse_host_port(&self.conf.listen_http_addr).expect("Unable to parse listen_http_addr");
|
||||
let http_port = http_port.unwrap_or(9898);
|
||||
|
||||
let https_port = match self.conf.listen_https_addr.as_ref() {
|
||||
Some(https_addr) => {
|
||||
let (_https_host, https_port) =
|
||||
parse_host_port(https_addr).expect("Unable to parse listen_https_addr");
|
||||
Some(https_port.unwrap_or(9899))
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
// Intentionally hand-craft JSON: this acts as an implicit format compat test
|
||||
// in case the pageserver-side structure is edited, and reflects the real life
|
||||
// situation: the metadata is written by some other script.
|
||||
@@ -276,7 +240,6 @@ impl PageServerNode {
|
||||
postgres_port: self.pg_connection_config.port(),
|
||||
http_host: "localhost".to_string(),
|
||||
http_port,
|
||||
https_port,
|
||||
other: HashMap::from([(
|
||||
"availability_zone_id".to_string(),
|
||||
serde_json::json!(az_id),
|
||||
|
||||
@@ -111,18 +111,6 @@ impl SafekeeperNode {
|
||||
.expect("non-Unicode path")
|
||||
}
|
||||
|
||||
/// Initializes a safekeeper node by creating all necessary files,
|
||||
/// e.g. SSL certificates.
|
||||
pub fn initialize(&self) -> anyhow::Result<()> {
|
||||
if self.env.generate_local_ssl_certs {
|
||||
self.env.generate_ssl_cert(
|
||||
&self.datadir_path().join("server.crt"),
|
||||
&self.datadir_path().join("server.key"),
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn start(
|
||||
&self,
|
||||
extra_opts: &[String],
|
||||
@@ -208,16 +196,6 @@ impl SafekeeperNode {
|
||||
]);
|
||||
}
|
||||
|
||||
if let Some(https_port) = self.conf.https_port {
|
||||
args.extend([
|
||||
"--listen-https".to_owned(),
|
||||
format!("{}:{}", self.listen_addr, https_port),
|
||||
]);
|
||||
}
|
||||
if let Some(ssl_ca_file) = self.env.ssl_ca_cert_path() {
|
||||
args.push(format!("--ssl-ca-file={}", ssl_ca_file.to_str().unwrap()));
|
||||
}
|
||||
|
||||
args.extend_from_slice(extra_opts);
|
||||
|
||||
background_process::start_process(
|
||||
|
||||
@@ -12,10 +12,13 @@ use hyper0::Uri;
|
||||
use nix::unistd::Pid;
|
||||
use pageserver_api::controller_api::{
|
||||
NodeConfigureRequest, NodeDescribeResponse, NodeRegisterRequest, TenantCreateRequest,
|
||||
TenantCreateResponse, TenantLocateResponse,
|
||||
TenantCreateResponse, TenantLocateResponse, TenantShardMigrateRequest,
|
||||
TenantShardMigrateResponse,
|
||||
};
|
||||
use pageserver_api::models::{TenantConfigRequest, TimelineCreateRequest, TimelineInfo};
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_api::models::{
|
||||
TenantShardSplitRequest, TenantShardSplitResponse, TimelineCreateRequest, TimelineInfo,
|
||||
};
|
||||
use pageserver_api::shard::{ShardStripeSize, TenantShardId};
|
||||
use pageserver_client::mgmt_api::ResponseErrorMessageExt;
|
||||
use postgres_backend::AuthType;
|
||||
use reqwest::Method;
|
||||
@@ -534,18 +537,6 @@ impl StorageController {
|
||||
args.push("--start-as-candidate".to_string());
|
||||
}
|
||||
|
||||
if self.config.use_https_pageserver_api {
|
||||
args.push("--use-https-pageserver-api".to_string());
|
||||
}
|
||||
|
||||
if self.config.use_https_safekeeper_api {
|
||||
args.push("--use-https-safekeeper-api".to_string());
|
||||
}
|
||||
|
||||
if let Some(ssl_ca_file) = self.env.ssl_ca_cert_path() {
|
||||
args.push(format!("--ssl-ca-file={}", ssl_ca_file.to_str().unwrap()));
|
||||
}
|
||||
|
||||
if let Some(private_key) = &self.private_key {
|
||||
let claims = Claims::new(None, Scope::PageServerApi);
|
||||
let jwt_token =
|
||||
@@ -562,8 +553,10 @@ impl StorageController {
|
||||
args.push(format!("--public-key=\"{public_key}\""));
|
||||
}
|
||||
|
||||
if let Some(control_plane_hooks_api) = &self.env.control_plane_hooks_api {
|
||||
args.push(format!("--control-plane-url={control_plane_hooks_api}"));
|
||||
if let Some(control_plane_compute_hook_api) = &self.env.control_plane_compute_hook_api {
|
||||
args.push(format!(
|
||||
"--compute-hook-url={control_plane_compute_hook_api}"
|
||||
));
|
||||
}
|
||||
|
||||
if let Some(split_threshold) = self.config.split_threshold.as_ref() {
|
||||
@@ -586,10 +579,6 @@ impl StorageController {
|
||||
self.env.base_data_dir.display()
|
||||
));
|
||||
|
||||
if self.config.timelines_onto_safekeepers {
|
||||
args.push("--timelines-onto-safekeepers".to_string());
|
||||
}
|
||||
|
||||
background_process::start_process(
|
||||
COMMAND,
|
||||
&instance_dir,
|
||||
@@ -836,6 +825,41 @@ impl StorageController {
|
||||
.await
|
||||
}
|
||||
|
||||
#[instrument(skip(self))]
|
||||
pub async fn tenant_migrate(
|
||||
&self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
node_id: NodeId,
|
||||
) -> anyhow::Result<TenantShardMigrateResponse> {
|
||||
self.dispatch(
|
||||
Method::PUT,
|
||||
format!("control/v1/tenant/{tenant_shard_id}/migrate"),
|
||||
Some(TenantShardMigrateRequest {
|
||||
node_id,
|
||||
migration_config: None,
|
||||
}),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
#[instrument(skip(self), fields(%tenant_id, %new_shard_count))]
|
||||
pub async fn tenant_split(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
new_shard_count: u8,
|
||||
new_stripe_size: Option<ShardStripeSize>,
|
||||
) -> anyhow::Result<TenantShardSplitResponse> {
|
||||
self.dispatch(
|
||||
Method::PUT,
|
||||
format!("control/v1/tenant/{tenant_id}/shard_split"),
|
||||
Some(TenantShardSplitRequest {
|
||||
new_shard_count,
|
||||
new_stripe_size,
|
||||
}),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
#[instrument(skip_all, fields(node_id=%req.node_id))]
|
||||
pub async fn node_register(&self, req: NodeRegisterRequest) -> anyhow::Result<()> {
|
||||
self.dispatch::<_, ()>(Method::POST, "control/v1/node".to_string(), Some(req))
|
||||
@@ -880,9 +904,4 @@ impl StorageController {
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn set_tenant_config(&self, req: &TenantConfigRequest) -> anyhow::Result<()> {
|
||||
self.dispatch(Method::PUT, "v1/tenant/config".to_string(), Some(req))
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,21 +1,20 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
|
||||
use clap::{Parser, Subcommand};
|
||||
use futures::StreamExt;
|
||||
use pageserver_api::controller_api::{
|
||||
AvailabilityZone, MigrationConfig, NodeAvailabilityWrapper, NodeConfigureRequest,
|
||||
NodeDescribeResponse, NodeRegisterRequest, NodeSchedulingPolicy, NodeShardResponse,
|
||||
PlacementPolicy, SafekeeperDescribeResponse, SafekeeperSchedulingPolicyRequest,
|
||||
ShardSchedulingPolicy, ShardsPreferredAzsRequest, ShardsPreferredAzsResponse,
|
||||
SkSchedulingPolicy, TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
|
||||
TenantShardMigrateRequest, TenantShardMigrateResponse,
|
||||
AvailabilityZone, NodeAvailabilityWrapper, NodeConfigureRequest, NodeDescribeResponse,
|
||||
NodeRegisterRequest, NodeSchedulingPolicy, NodeShardResponse, PlacementPolicy,
|
||||
SafekeeperDescribeResponse, SafekeeperSchedulingPolicyRequest, ShardSchedulingPolicy,
|
||||
ShardsPreferredAzsRequest, ShardsPreferredAzsResponse, SkSchedulingPolicy, TenantCreateRequest,
|
||||
TenantDescribeResponse, TenantPolicyRequest, TenantShardMigrateRequest,
|
||||
TenantShardMigrateResponse,
|
||||
};
|
||||
use pageserver_api::models::{
|
||||
EvictionPolicy, EvictionPolicyLayerAccessThreshold, ShardParameters, TenantConfig,
|
||||
TenantConfigPatchRequest, TenantConfigRequest, TenantShardSplitRequest,
|
||||
EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary, ShardParameters,
|
||||
TenantConfig, TenantConfigPatchRequest, TenantConfigRequest, TenantShardSplitRequest,
|
||||
TenantShardSplitResponse,
|
||||
};
|
||||
use pageserver_api::shard::{ShardStripeSize, TenantShardId};
|
||||
@@ -113,15 +112,6 @@ enum Command {
|
||||
tenant_shard_id: TenantShardId,
|
||||
#[arg(long)]
|
||||
node: NodeId,
|
||||
#[arg(long, default_value_t = true, action = clap::ArgAction::Set)]
|
||||
prewarm: bool,
|
||||
#[arg(long, default_value_t = false, action = clap::ArgAction::Set)]
|
||||
override_scheduler: bool,
|
||||
},
|
||||
/// Watch the location of a tenant shard evolve, e.g. while expecting it to migrate
|
||||
TenantShardWatch {
|
||||
#[arg(long)]
|
||||
tenant_shard_id: TenantShardId,
|
||||
},
|
||||
/// Migrate the secondary location for a tenant shard to a specific pageserver.
|
||||
TenantShardMigrateSecondary {
|
||||
@@ -158,6 +148,12 @@ enum Command {
|
||||
#[arg(long)]
|
||||
tenant_id: TenantId,
|
||||
},
|
||||
/// For a tenant which hasn't been onboarded to the storage controller yet, add it in secondary
|
||||
/// mode so that it can warm up content on a pageserver.
|
||||
TenantWarmup {
|
||||
#[arg(long)]
|
||||
tenant_id: TenantId,
|
||||
},
|
||||
TenantSetPreferredAz {
|
||||
#[arg(long)]
|
||||
tenant_id: TenantId,
|
||||
@@ -273,10 +269,6 @@ struct Cli {
|
||||
/// a token with both scopes to use with this tool.
|
||||
jwt: Option<String>,
|
||||
|
||||
#[arg(long)]
|
||||
/// Trusted root CA certificate to use in https APIs.
|
||||
ssl_ca_file: Option<PathBuf>,
|
||||
|
||||
#[command(subcommand)]
|
||||
command: Command,
|
||||
}
|
||||
@@ -387,17 +379,9 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
let storcon_client = Client::new(cli.api.clone(), cli.jwt.clone());
|
||||
|
||||
let ssl_ca_cert = match &cli.ssl_ca_file {
|
||||
Some(ssl_ca_file) => {
|
||||
let buf = tokio::fs::read(ssl_ca_file).await?;
|
||||
Some(reqwest::Certificate::from_pem(&buf)?)
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
let mut trimmed = cli.api.to_string();
|
||||
trimmed.pop();
|
||||
let vps_client = mgmt_api::Client::new(trimmed, cli.jwt.as_deref(), ssl_ca_cert)?;
|
||||
let vps_client = mgmt_api::Client::new(trimmed, cli.jwt.as_deref());
|
||||
|
||||
match cli.command {
|
||||
Command::NodeRegister {
|
||||
@@ -635,43 +619,19 @@ async fn main() -> anyhow::Result<()> {
|
||||
Command::TenantShardMigrate {
|
||||
tenant_shard_id,
|
||||
node,
|
||||
prewarm,
|
||||
override_scheduler,
|
||||
} => {
|
||||
let migration_config = MigrationConfig {
|
||||
prewarm,
|
||||
override_scheduler,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let req = TenantShardMigrateRequest {
|
||||
node_id: node,
|
||||
origin_node_id: None,
|
||||
migration_config,
|
||||
migration_config: None,
|
||||
};
|
||||
|
||||
match storcon_client
|
||||
storcon_client
|
||||
.dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(
|
||||
Method::PUT,
|
||||
format!("control/v1/tenant/{tenant_shard_id}/migrate"),
|
||||
Some(req),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Err(mgmt_api::Error::ApiError(StatusCode::PRECONDITION_FAILED, msg)) => {
|
||||
anyhow::bail!(
|
||||
"Migration to {node} rejected, may require `--force` ({}) ",
|
||||
msg
|
||||
);
|
||||
}
|
||||
Err(e) => return Err(e.into()),
|
||||
Ok(_) => {}
|
||||
}
|
||||
|
||||
watch_tenant_shard(storcon_client, tenant_shard_id, Some(node)).await?;
|
||||
}
|
||||
Command::TenantShardWatch { tenant_shard_id } => {
|
||||
watch_tenant_shard(storcon_client, tenant_shard_id, None).await?;
|
||||
.await?;
|
||||
}
|
||||
Command::TenantShardMigrateSecondary {
|
||||
tenant_shard_id,
|
||||
@@ -679,8 +639,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
} => {
|
||||
let req = TenantShardMigrateRequest {
|
||||
node_id: node,
|
||||
origin_node_id: None,
|
||||
migration_config: MigrationConfig::default(),
|
||||
migration_config: None,
|
||||
};
|
||||
|
||||
storcon_client
|
||||
@@ -865,6 +824,94 @@ async fn main() -> anyhow::Result<()> {
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Command::TenantWarmup { tenant_id } => {
|
||||
let describe_response = storcon_client
|
||||
.dispatch::<(), TenantDescribeResponse>(
|
||||
Method::GET,
|
||||
format!("control/v1/tenant/{tenant_id}"),
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
match describe_response {
|
||||
Ok(describe) => {
|
||||
if matches!(describe.policy, PlacementPolicy::Secondary) {
|
||||
// Fine: it's already known to controller in secondary mode: calling
|
||||
// again to put it into secondary mode won't cause problems.
|
||||
} else {
|
||||
anyhow::bail!("Tenant already present with policy {:?}", describe.policy);
|
||||
}
|
||||
}
|
||||
Err(mgmt_api::Error::ApiError(StatusCode::NOT_FOUND, _)) => {
|
||||
// Fine: this tenant isn't know to the storage controller yet.
|
||||
}
|
||||
Err(e) => {
|
||||
// Unexpected API error
|
||||
return Err(e.into());
|
||||
}
|
||||
}
|
||||
|
||||
vps_client
|
||||
.location_config(
|
||||
TenantShardId::unsharded(tenant_id),
|
||||
pageserver_api::models::LocationConfig {
|
||||
mode: pageserver_api::models::LocationConfigMode::Secondary,
|
||||
generation: None,
|
||||
secondary_conf: Some(LocationConfigSecondary { warm: true }),
|
||||
shard_number: 0,
|
||||
shard_count: 0,
|
||||
shard_stripe_size: ShardParameters::DEFAULT_STRIPE_SIZE.0,
|
||||
tenant_conf: TenantConfig::default(),
|
||||
},
|
||||
None,
|
||||
true,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let describe_response = storcon_client
|
||||
.dispatch::<(), TenantDescribeResponse>(
|
||||
Method::GET,
|
||||
format!("control/v1/tenant/{tenant_id}"),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let secondary_ps_id = describe_response
|
||||
.shards
|
||||
.first()
|
||||
.unwrap()
|
||||
.node_secondary
|
||||
.first()
|
||||
.unwrap();
|
||||
|
||||
println!("Tenant {tenant_id} warming up on pageserver {secondary_ps_id}");
|
||||
loop {
|
||||
let (status, progress) = vps_client
|
||||
.tenant_secondary_download(
|
||||
TenantShardId::unsharded(tenant_id),
|
||||
Some(Duration::from_secs(10)),
|
||||
)
|
||||
.await?;
|
||||
println!(
|
||||
"Progress: {}/{} layers, {}/{} bytes",
|
||||
progress.layers_downloaded,
|
||||
progress.layers_total,
|
||||
progress.bytes_downloaded,
|
||||
progress.bytes_total
|
||||
);
|
||||
match status {
|
||||
StatusCode::OK => {
|
||||
println!("Download complete");
|
||||
break;
|
||||
}
|
||||
StatusCode::ACCEPTED => {
|
||||
// Loop
|
||||
}
|
||||
_ => {
|
||||
anyhow::bail!("Unexpected download status: {status}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Command::TenantDrop { tenant_id, unclean } => {
|
||||
if !unclean {
|
||||
anyhow::bail!(
|
||||
@@ -1058,8 +1105,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
format!("control/v1/tenant/{}/migrate", mv.tenant_shard_id),
|
||||
Some(TenantShardMigrateRequest {
|
||||
node_id: mv.to,
|
||||
origin_node_id: Some(mv.from),
|
||||
migration_config: MigrationConfig::default(),
|
||||
migration_config: None,
|
||||
}),
|
||||
)
|
||||
.await
|
||||
@@ -1238,68 +1284,3 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
static WATCH_INTERVAL: Duration = Duration::from_secs(5);
|
||||
|
||||
async fn watch_tenant_shard(
|
||||
storcon_client: Client,
|
||||
tenant_shard_id: TenantShardId,
|
||||
until_migrated_to: Option<NodeId>,
|
||||
) -> anyhow::Result<()> {
|
||||
if let Some(until_migrated_to) = until_migrated_to {
|
||||
println!(
|
||||
"Waiting for tenant shard {} to be migrated to node {}",
|
||||
tenant_shard_id, until_migrated_to
|
||||
);
|
||||
}
|
||||
|
||||
loop {
|
||||
let desc = storcon_client
|
||||
.dispatch::<(), TenantDescribeResponse>(
|
||||
Method::GET,
|
||||
format!("control/v1/tenant/{}", tenant_shard_id.tenant_id),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Output the current state of the tenant shard
|
||||
let shard = desc
|
||||
.shards
|
||||
.iter()
|
||||
.find(|s| s.tenant_shard_id == tenant_shard_id)
|
||||
.ok_or(anyhow::anyhow!("Tenant shard not found"))?;
|
||||
let summary = format!(
|
||||
"attached: {} secondary: {} {}",
|
||||
shard
|
||||
.node_attached
|
||||
.map(|n| format!("{}", n))
|
||||
.unwrap_or("none".to_string()),
|
||||
shard
|
||||
.node_secondary
|
||||
.iter()
|
||||
.map(|n| n.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join(","),
|
||||
if shard.is_reconciling {
|
||||
"(reconciler active)"
|
||||
} else {
|
||||
"(reconciler idle)"
|
||||
}
|
||||
);
|
||||
println!("{}", summary);
|
||||
|
||||
// Maybe drop out if we finished migration
|
||||
if let Some(until_migrated_to) = until_migrated_to {
|
||||
if shard.node_attached == Some(until_migrated_to) && !shard.is_reconciling {
|
||||
println!(
|
||||
"Tenant shard {} is now on node {}",
|
||||
tenant_shard_id, until_migrated_to
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
tokio::time::sleep(WATCH_INTERVAL).await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -27,10 +27,6 @@ yanked = "warn"
|
||||
id = "RUSTSEC-2023-0071"
|
||||
reason = "the marvin attack only affects private key decryption, not public key signature verification"
|
||||
|
||||
[[advisories.ignore]]
|
||||
id = "RUSTSEC-2024-0436"
|
||||
reason = "The paste crate is a build-only dependency with no runtime components. It is unlikely to have any security impact."
|
||||
|
||||
# This section is considered when running `cargo deny check licenses`
|
||||
# More documentation for the licenses section can be found here:
|
||||
# https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html
|
||||
|
||||
72
docs/explain-extension.md
Normal file
72
docs/explain-extension.md
Normal file
@@ -0,0 +1,72 @@
|
||||
# Neon changes to Postgres EXPLAIN command
|
||||
|
||||
## Why do we need to include more information in EXPLAIN?
|
||||
|
||||
Neon contains two components: prefetch and LFC (local file cache) which may have critical impact on query performance.
|
||||
Both are trying to solve the problem with relatively large round-trip between compute and page server (much larger than average access time for modern SSDs).
|
||||
This is why Neon can provide comparable performance only if all data set is present at local node.
|
||||
Certainly the fastest case of accessing data in Postgres is when it is present in Postgres cache (shared buffers).
|
||||
Unfortunately size of shared buffer can not be changed on the flight: it requires Postgres restart. It is not acceptable for autoscaling.
|
||||
This is why we have relatively small shared buffers and dynamically resized local file cache (LFC).
|
||||
It is intended that LFC fits in memory, although it can improve performance even if data is read from local disk.
|
||||
See https://neondb.slack.com/archives/C03QLRH7PPD/p1718714926044699
|
||||
To minimize in-memory footprint of LFC and to improve sequential scan, LFC uses chunks which size is larger than size of Postgres page (right now chunk size is 1Mb).
|
||||
|
||||
|
||||
LFC as any other cache is useless after cool restart. Also some data sets can not fit in local disk.
|
||||
This is where another approach can help: prefetching. If we are able to predict which pages will be needed soon,
|
||||
compute can send prefetch requests to page server before this page is actually requested by executor.
|
||||
Prefetch is also used by Postgres (using `fadvise`), but only for vacuum and bitmap scan. Neon provides prefetch for more execution plan nodes:
|
||||
sequential scan, index scan (prefetch of referenced heap pages), index-only scan (prefetch B-Tree leaves).
|
||||
|
||||
As far as work of prefetch and LFC may have critical impact on query performance, we need to provide this information to the users.
|
||||
The most convenient and natural way is to include it in EXPLAIN. Two new keyword are added by Neon to EXPLAIN options: `prefetch` and `filecache`.
|
||||
|
||||
## prefetch
|
||||
|
||||
The following information is available about prefetch:
|
||||
* `hits` - number of pages which are received from page server before actually requested by executor. Prefetch distance is controlled by `effective_io_concurrency` GUC. The larger it is, the more chances that page server will be able to complete request before it is needed. But it should not be larger than `neon.prefetch_buffer_size`.
|
||||
* `misses` - number of accessed pages which were not prefetched. Prefetch is not implemented for all plan nodes. And even for those nodes for which it is implemented (i.e. sequential scan) some mispredictions are possible. Please notice that `hits + misses != accessed pages`. If prefetch request for the page was issued but not yet completed before the page is requested, then such access is not considered as prefetch hit or miss.
|
||||
* `expired` - page can be updated by backend since the moment of sending prefetch request to page server. Or result of prefetch just not used because executor doesn't need this page (for example because of presence of `LIMIT` clause in the query). In both cases such requests are considered as expired.
|
||||
* `duplicates` - multiple prefetch requests for the same page. For some nodes predicting next pages is trivial, i.e. for sequential scan. But in case of index scan we need to prefetch referenced heap pages. And definitely index entries can have multiple references to the same heap page. Such non-unique prefetch requests are considered as duplicates.
|
||||
|
||||
|
||||
## filecache
|
||||
|
||||
The following information is available about file cache (LFC):
|
||||
* `hits` - number of accessed pages found in LFC.
|
||||
* `misses` - number of accessed pages not found in LFC.
|
||||
|
||||
# LFC statistic
|
||||
|
||||
While `filecache` option of EXPLAIN command provides information about LFC usage in the particular query, there is also available global statistic about LFC usage.
|
||||
It is provided by `neon` extension.
|
||||
|
||||
## `neon_lfc_stats` view
|
||||
|
||||
This view provides information as key-value pairs (so that new information can be added without changing neon extension interface).
|
||||
The following keys are provided:
|
||||
* `file_cache_hits` - total number of LFC hits (for all backends and queries since server startup).
|
||||
* `file_cache_misses` - total number of LFC misses.
|
||||
* `file_cache_used` - number chunks used in LFC
|
||||
* `file_cache_writes` - number of pages written to LFC
|
||||
* `file_cache_size` - current cache size in chunks (can not be larger than `neon.file_cache_size_limit`)
|
||||
* `file_cache_used_pages` - number is used pages. As far as not all pages of the chunk can be filled with data it can be smaller than `file_cache_used*128` (128 is number of 8kB pages in 1MB chunk)
|
||||
* `file_cache_evicted_pages` - number of pages evicted from LFC because working set doesn't fit in LFC.
|
||||
* `file_cache_limit` - current limit of LFC size (in chunks)
|
||||
|
||||
## `local_cache` view
|
||||
This view is similar with `pg_buffercache` view and contains the following columns:
|
||||
```
|
||||
(pageoffs int8,
|
||||
relfilenode oid,
|
||||
reltablespace oid,
|
||||
reldatabase oid,
|
||||
relforknumber int2,
|
||||
relblocknumber int8,
|
||||
accesscount int4)
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,201 +0,0 @@
|
||||
# Sparse Keyspace for Relation Directories
|
||||
|
||||
## Summary
|
||||
|
||||
This is an RFC describing a new storage strategy for storing relation directories.
|
||||
|
||||
## Motivation
|
||||
|
||||
Postgres maintains a directory structure for databases and relations. In Neon, we store these information
|
||||
by serializing the directory data in a single key (see `pgdatadir_mapping.rs`).
|
||||
|
||||
```rust
|
||||
// DbDir:
|
||||
// 00 00000000 00000000 00000000 00 00000000
|
||||
|
||||
// RelDir:
|
||||
// 00 SPCNODE DBNODE 00000000 00 00000001 (Postgres never uses relfilenode 0)
|
||||
```
|
||||
|
||||
We have a dedicated structure on the ingestion path to serialize the relation directory into this single key.
|
||||
|
||||
```rust
|
||||
#[derive(Debug, Serialize, Deserialize, Default)]
|
||||
pub(crate) struct RelDirectory {
|
||||
// Set of relations that exist. (relfilenode, forknum)
|
||||
//
|
||||
// TODO: Store it as a btree or radix tree or something else that spans multiple
|
||||
// key-value pairs, if you have a lot of relations
|
||||
pub(crate) rels: HashSet<(Oid, u8)>,
|
||||
}
|
||||
```
|
||||
|
||||
The current codebase has the following three access patterns for the relation directory.
|
||||
|
||||
1. Check if a relation exists.
|
||||
2. List all relations.
|
||||
3. Create/drop a relation.
|
||||
|
||||
For (1), we currently have to get the reldir key, deserialize it, and check whether the relation exists in the
|
||||
hash set. For (2), we get the reldir key and the hash set. For (3), we need first to get
|
||||
and deserialize the key, add the new relation record to the hash set, and then serialize it and write it back.
|
||||
|
||||
If we have 100k relations in a database, we would have a 100k-large hash set. Then, every
|
||||
relation created and dropped would have deserialized and serialized this 100k-large hash set. This makes the
|
||||
relation create/drop process to be quadratic. When we check if a relation exists in the ingestion path,
|
||||
we would have to deserialize this super big 100k-large key before checking if a single relation exists.
|
||||
|
||||
In this RFC, we will propose a new way to store the reldir data in the sparse keyspace and propose how
|
||||
to seamlessly migrate users to use the new keyspace.
|
||||
|
||||
The PoC patch is implemented in [PR10316](https://github.com/neondatabase/neon/pull/10316).
|
||||
|
||||
## Key Mapping
|
||||
|
||||
We will use the recently introduced sparse keyspace to store actual data. Sparse keyspace was proposed in
|
||||
[038-aux-file-v2.md](038-aux-file-v2.md). The original reldir has one single value of `HashSet<(Oid, u8)>`
|
||||
for each of the databases (identified as `spcnode, dbnode`). We encode the `Oid` (`relnode, forknum`),
|
||||
into the key.
|
||||
|
||||
```plain
|
||||
(REL_DIR_KEY_PREFIX, spcnode, dbnode, relnode, forknum, 1) -> deleted
|
||||
(REL_DIR_KEY_PREFIX, spcnode, dbnode, relnode, forknum, 1) -> exists
|
||||
```
|
||||
|
||||
Assume all reldir data are stored in this new keyspace; the 3 reldir operations we mentioned before can be
|
||||
implemented as follows.
|
||||
|
||||
1. Check if a relation exists: check if the key maps to "exists".
|
||||
2. List all relations: scan the sprase keyspace over the `rel_dir_key_prefix`. Extract relnode and forknum from the key.
|
||||
3. Create/drop a relation: write "exists" or "deleted" to the corresponding key of the relation. The delete tombstone will
|
||||
be removed during image layer generation upon compaction.
|
||||
|
||||
Note that "exists" and "deleted" will be encoded as a single byte as two variants of an enum.
|
||||
The mapping is implemented as `rel_tag_sparse_key` in the PoC patch.
|
||||
|
||||
## Changes to Sparse Keyspace
|
||||
|
||||
Previously, we only used sparse keyspaces for the aux files, which did not carry over when branching. The reldir
|
||||
information needs to be preserved from the parent branch to the child branch. Therefore, the read path needs
|
||||
to be updated accordingly to accommodate such "inherited sparse keys". This is done in
|
||||
[PR#10313](https://github.com/neondatabase/neon/pull/10313).
|
||||
|
||||
## Coexistence of the Old and New Keyspaces
|
||||
|
||||
Migrating to the new keyspace will be done gradually: when we flip a config item to enable the new reldir keyspace, the
|
||||
ingestion path will start to write to the new keyspace and the old reldir data will be kept in the old one. The read
|
||||
path needs to combine the data from both keyspaces.
|
||||
|
||||
Theoretically, we could do a rewrite at the startup time that scans all relation directories and copies that data into the
|
||||
new keyspace. However, this could take a long time, especially if we have thousands of tenants doing the migration
|
||||
process simultaneously after the pageserver restarts. Therefore, we propose the coexistence strategy so that the
|
||||
migration can happen seamlessly and imposes no potential downtime for the user.
|
||||
|
||||
With the coexistence assumption, the 3 reldir operations will be implemented as follows:
|
||||
|
||||
1. Check if a relation exists
|
||||
- Check the new keyspace if the key maps to any value. If it maps to "exists" or "deleted", directly
|
||||
return it to the user.
|
||||
- Otherwise, deserialize the old reldir key and get the result.
|
||||
2. List all relations: scan the sparse keyspace over the `rel_dir_key_prefix` and deserialize the old reldir key.
|
||||
Combine them to obtain the final result.
|
||||
3. Create/drop a relation: write "exists" or "deleted" to the corresponding key of the relation into the new keyspace.
|
||||
- We assume no overwrite of relations will happen (i.e., the user won't create a relation at the same Oid). This will be implemented as a runtime check.
|
||||
- For relation creation, we add `sparse_reldir_tableX -> exists` to the keyspace.
|
||||
- For relation drop, we first check if the relation is recorded in the old keyspace. If yes, we deserialize the old reldir key,
|
||||
remove the relation, and then write it back. Otherwise, we put `sparse_reldir_tableX -> deleted` to the keyspace.
|
||||
- The delete tombstone will be removed during image layer generation upon compaction.
|
||||
|
||||
This process ensures that the transition will not introduce any downtime and all new updates are written to the new keyspace. The total
|
||||
amount of data in the storage would be `O(relations_modifications)` and we can guarantee `O(current_relations)` after compaction.
|
||||
There could be some relations that exist in the old reldir key for a long time. Refer to the "Full Migration" section on how to deal
|
||||
with them. Plus, for relation modifications, it will have `O(old_relations)` complexity until we do the full migration, which gives
|
||||
us `O(1)` complexity after fully opt-in the sparse keyspace.
|
||||
|
||||
The process also implies that a relation will only exists either in the old reldir key or in the new sparse keyspace. It is not possible
|
||||
to have a table to be recorded in the old reldir key while later having a delete tombstone for it in the sparse keyspace at any LSN.
|
||||
|
||||
We will introduce a config item and an index_part record to record the current status of the migration process.
|
||||
|
||||
- Config item `enable_reldir_v2`: controls whether the ingestion path writes the reldir info into the new keyspace.
|
||||
- `index_part.json` field `reldir_v2_status`: whether the timeline has written any key into the new reldir keyspace.
|
||||
|
||||
If `enable_reldir_v2` is set to `true` and the timeline ingests the first key into the new reldir keyspace, it will update
|
||||
`index_part.json` to set `reldir_v2_status` to `Status::Migrating`. Even if `enable_reldir_v2` gets flipped back to
|
||||
`false` (i.e., when the pageserver restarts and such config isn't persisted), the read/write path will still
|
||||
read/write to the new keyspace to avoid data inconsistency. This also indicates that the migration is one-way only:
|
||||
once v2 is enabled, the user cannot go back to v1.
|
||||
|
||||
## Next Steps
|
||||
|
||||
### Full Migration
|
||||
|
||||
This won't be implemented in the project's first phase but might be implemented in the future. Having both v1 and
|
||||
v2 existing in the system would force us to keep the code to deserialize the old reldir key forever. To entirely deprecate this
|
||||
code path, we must ensure the timeline has no old reldir data.
|
||||
|
||||
We can trigger a special image layer generation process at the gc-horizon. The generated image layers will cover several keyspaces:
|
||||
the old reldir key in each of the databases, and the new reldir sparse keyspace. It will remove the old reldir key while
|
||||
copying them into the corresponding keys in the sparse keyspace in the resulting image. This special process happens in
|
||||
the background during compaction. For example, assume this special process is triggered at LSN 0/180. The `create_image_layers`
|
||||
process discovers the following keys at this LSN.
|
||||
|
||||
```plain
|
||||
db1/reldir_key -> (table 1, table 2, table 3)
|
||||
...db1 rel keys
|
||||
db2/reldir_key -> (table 4, table 5, table 6)
|
||||
...db2 rel keys
|
||||
sparse_reldir_db2_table7 -> exists
|
||||
sparse_reldir_db1_table8 -> deleted
|
||||
```
|
||||
|
||||
It will generate the following keys:
|
||||
|
||||
```plain
|
||||
db1/reldir_key -> () # we have to keep the key because it is part of `collect_keyspace`.
|
||||
...db1 rel keys
|
||||
db2/reldir_key -> ()
|
||||
...db2 rel keys
|
||||
|
||||
-- start image layer for the sparse keyspace at sparse_reldir_prefix at LSN 0/180
|
||||
sparse_reldir_db1_table1 -> exists
|
||||
sparse_reldir_db1_table2 -> exists
|
||||
sparse_reldir_db1_table3 -> exists
|
||||
sparse_reldir_db2_table4 -> exists
|
||||
sparse_reldir_db2_table5 -> exists
|
||||
sparse_reldir_db2_table6 -> exists
|
||||
sparse_reldir_db2_table7 -> exists
|
||||
-- end image layer for the sparse keyspace at sparse_reldir_prefix+1
|
||||
|
||||
# The `sparse_reldir_db1_table8` key gets dropped as part of the image layer generation code for the sparse keyspace.
|
||||
# Note that the read path will stop reading if a key is not found in the image layer covering the key range so there
|
||||
# are no correctness issue.
|
||||
```
|
||||
|
||||
We must verify that no pending modifications to the old reldir exists in the delta/image layers above the gc-horizon before
|
||||
we start this process (We can do a vectored read to get the full key history of the old reldir key and ensure there are no more images
|
||||
above the gc-horizon). Otherwise, it will violate the property that "a relation will only exists either in the old reldir key or
|
||||
in the new sparse keyspace". After we run this migration process, we can mark `reldir_v2_status` in the `index_part.json` to
|
||||
`Status::Migrated`, and the read path won't need to read from the old reldir anymore. Once the status is set to `Migrated`, we
|
||||
don't need to add the key into `collect_keyspace` and therefore all of them will be removed from all future image layers.
|
||||
|
||||
The migration process can be proactively triggered across all attached/detached tenants to help us fully remove the old reldir code.
|
||||
|
||||
### Consolidate Relation Size Keys
|
||||
|
||||
We have relsize at the end of all relation nodes.
|
||||
|
||||
```plain
|
||||
// RelSize:
|
||||
// 00 SPCNODE DBNODE RELNODE FORK FFFFFFFF
|
||||
```
|
||||
|
||||
This means that computing logical size requires us to do several single-key gets across the keyspace,
|
||||
potentially requiring downloading many layer files. We could consolidate them into a single
|
||||
keyspace, improving logical size calculation performance.
|
||||
|
||||
### Migrate DBDir Keys
|
||||
|
||||
We assume the number of databases created by the users will be small, and therefore, the current way
|
||||
of storing the database directory would be acceptable. In the future, we could also migrate DBDir keys into
|
||||
the sparse keyspace to support large amount of databases.
|
||||
@@ -1,7 +1,3 @@
|
||||
# Neon RFCs
|
||||
|
||||
## Overview
|
||||
|
||||
This directory contains Request for Comments documents, or RFCs, for
|
||||
features or concepts that have been proposed. Alternative names:
|
||||
technical design doc, ERD, one-pager
|
||||
@@ -63,10 +59,37 @@ RFC lifecycle:
|
||||
|
||||
### RFC template
|
||||
|
||||
Use template with `YYYY-MM-DD-copy-me.md` as a starting point. Timestamp prefix helps to avoid awkward 'id' collisions.
|
||||
|
||||
```sh
|
||||
cp docs/rfcs/YYYY-MM-DD-copy-me.md docs/rfcs/$(date +"%Y-%m-%d")-<name>.md
|
||||
```
|
||||
|
||||
Note, a lot of the sections are marked as ‘if relevant’. They are included into the template as a reminder and to help inspiration.
|
||||
|
||||
```
|
||||
# Name
|
||||
Created on ..
|
||||
Implemented on ..
|
||||
|
||||
## Summary
|
||||
|
||||
## Motivation
|
||||
|
||||
## Non Goals (if relevant)
|
||||
|
||||
## Impacted components (e.g. pageserver, safekeeper, console, etc)
|
||||
|
||||
## Proposed implementation
|
||||
|
||||
### Reliability, failure modes and corner cases (if relevant)
|
||||
|
||||
### Interaction/Sequence diagram (if relevant)
|
||||
|
||||
### Scalability (if relevant)
|
||||
|
||||
### Security implications (if relevant)
|
||||
|
||||
### Unresolved questions (if relevant)
|
||||
|
||||
## Alternative implementation (if relevant)
|
||||
|
||||
## Pros/cons of proposed approaches (if relevant)
|
||||
|
||||
## Definition of Done (if relevant)
|
||||
|
||||
```
|
||||
|
||||
@@ -1,30 +0,0 @@
|
||||
# Name
|
||||
|
||||
Created on YYYY-MM-DD
|
||||
Implemented on _TBD_
|
||||
|
||||
## Summary
|
||||
|
||||
## Motivation
|
||||
|
||||
## Non Goals (if relevant)
|
||||
|
||||
## Impacted components (e.g. pageserver, safekeeper, console, etc)
|
||||
|
||||
## Proposed implementation
|
||||
|
||||
### Reliability, failure modes and corner cases (if relevant)
|
||||
|
||||
### Interaction/Sequence diagram (if relevant)
|
||||
|
||||
### Scalability (if relevant)
|
||||
|
||||
### Security implications (if relevant)
|
||||
|
||||
### Unresolved questions (if relevant)
|
||||
|
||||
## Alternative implementation (if relevant)
|
||||
|
||||
## Pros/cons of proposed approaches (if relevant)
|
||||
|
||||
## Definition of Done (if relevant)
|
||||
@@ -101,25 +101,15 @@ changes such as a pageserver node becoming unavailable, or the tenant's shard co
|
||||
postgres clients to handle such changes, the storage controller calls an API hook when a tenant's pageserver
|
||||
location changes.
|
||||
|
||||
The hook is configured using the storage controller's `--control-plane-url` CLI option, from which the hook URL is computed.
|
||||
The hook is configured using the storage controller's `--compute-hook-url` CLI option. If the hook requires
|
||||
JWT auth, the token may be provided with `--control-plane-jwt-token`. The hook will be invoked with a `PUT` request.
|
||||
|
||||
Currently, there is two hooks, each computed by appending the name to the provided control plane URL prefix:
|
||||
|
||||
- `notify-attach`, called whenever attachment for pageservers changes
|
||||
- `notify-safekeepers`, called whenever attachment for safekeepers changes
|
||||
|
||||
If the hooks require JWT auth, the token may be provided with `--control-plane-jwt-token`.
|
||||
The hooks will be invoked with a `PUT` request.
|
||||
|
||||
In the Neon cloud service, these hooks are implemented by Neon's internal cloud control plane. In `neon_local` systems,
|
||||
In the Neon cloud service, this hook is implemented by Neon's internal cloud control plane. In `neon_local` systems
|
||||
the storage controller integrates directly with neon_local to reconfigure local postgres processes instead of calling
|
||||
the compute hook.
|
||||
|
||||
When implementing an on-premise Neon deployment, you must implement a service that handles the compute hooks. This is not complicated.
|
||||
|
||||
### `notify-attach` body
|
||||
|
||||
The `notify-attach` request body follows the format of the `ComputeHookNotifyRequest` structure, provided below for convenience.
|
||||
When implementing an on-premise Neon deployment, you must implement a service that handles the compute hook. This is not complicated:
|
||||
the request body has format of the `ComputeHookNotifyRequest` structure, provided below for convenience.
|
||||
|
||||
```
|
||||
struct ComputeHookNotifyRequestShard {
|
||||
@@ -138,15 +128,15 @@ When a notification is received:
|
||||
|
||||
1. Modify postgres configuration for this tenant:
|
||||
|
||||
- set `neon.pageserver_connstring` to a comma-separated list of postgres connection strings to pageservers according to the `shards` list. The
|
||||
- set `neon.pageserver_connstr` to a comma-separated list of postgres connection strings to pageservers according to the `shards` list. The
|
||||
shards identified by `NodeId` must be converted to the address+port of the node.
|
||||
- if stripe_size is not None, set `neon.shard_stripe_size` to this value
|
||||
- if stripe_size is not None, set `neon.stripe_size` to this value
|
||||
|
||||
2. Send SIGHUP to postgres to reload configuration
|
||||
3. Respond with 200 to the notification request. Do not return success if postgres was not updated: if an error is returned, the controller
|
||||
will retry the notification until it succeeds..
|
||||
|
||||
Example body:
|
||||
### Example notification body
|
||||
|
||||
```
|
||||
{
|
||||
@@ -158,34 +148,3 @@ Example body:
|
||||
],
|
||||
}
|
||||
```
|
||||
|
||||
### `notify-safekeepers` body
|
||||
|
||||
The `notify-safekeepers` request body forllows the format of the `SafekeepersNotifyRequest` structure, provided below for convenience.
|
||||
|
||||
```
|
||||
pub struct SafekeeperInfo {
|
||||
pub id: NodeId,
|
||||
pub hostname: String,
|
||||
}
|
||||
|
||||
pub struct SafekeepersNotifyRequest {
|
||||
pub tenant_id: TenantId,
|
||||
pub timeline_id: TimelineId,
|
||||
pub generation: u32,
|
||||
pub safekeepers: Vec<SafekeeperInfo>,
|
||||
}
|
||||
```
|
||||
|
||||
When a notification is received:
|
||||
|
||||
1. Modify postgres configuration for this tenant:
|
||||
|
||||
- set `neon.safekeeper_connstrings` to an array of postgres connection strings to safekeepers according to the `safekeepers` list. The
|
||||
safekeepers identified by `NodeId` must be converted to the address+port of the respective safekeeper.
|
||||
The hostname is provided for debugging purposes, so we reserve changes to how we pass it.
|
||||
- set `neon.safekeepers_generation` to the provided `generation` value.
|
||||
|
||||
2. Send SIGHUP to postgres to reload configuration
|
||||
3. Respond with 200 to the notification request. Do not return success if postgres was not updated: if an error is returned, the controller
|
||||
will retry the notification until it succeeds..
|
||||
@@ -7,7 +7,6 @@ license.workspace = true
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
chrono.workspace = true
|
||||
indexmap.workspace = true
|
||||
jsonwebtoken.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
|
||||
@@ -139,7 +139,6 @@ pub struct ComputeCtlConfig {
|
||||
/// Set of JSON web keys that the compute can use to authenticate
|
||||
/// communication from the control plane.
|
||||
pub jwks: JwkSet,
|
||||
pub tls: Option<TlsConfig>,
|
||||
}
|
||||
|
||||
impl Default for ComputeCtlConfig {
|
||||
@@ -148,17 +147,10 @@ impl Default for ComputeCtlConfig {
|
||||
jwks: JwkSet {
|
||||
keys: Vec::default(),
|
||||
},
|
||||
tls: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
pub struct TlsConfig {
|
||||
pub key_path: String,
|
||||
pub cert_path: String,
|
||||
}
|
||||
|
||||
/// Response of the `/computes/{compute_id}/spec` control-plane API.
|
||||
#[derive(Deserialize, Debug)]
|
||||
pub struct ControlPlaneSpecResponse {
|
||||
|
||||
@@ -5,15 +5,12 @@
|
||||
//! and connect it to the storage nodes.
|
||||
use std::collections::HashMap;
|
||||
|
||||
use indexmap::IndexMap;
|
||||
use regex::Regex;
|
||||
use remote_storage::RemotePath;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use crate::responses::TlsConfig;
|
||||
|
||||
/// String type alias representing Postgres identifier and
|
||||
/// intended to be used for DB / role names.
|
||||
pub type PgIdent = String;
|
||||
@@ -128,7 +125,7 @@ pub struct ComputeSpec {
|
||||
// information about available remote extensions
|
||||
pub remote_extensions: Option<RemoteExtSpec>,
|
||||
|
||||
pub pgbouncer_settings: Option<IndexMap<String, String>>,
|
||||
pub pgbouncer_settings: Option<HashMap<String, String>>,
|
||||
|
||||
// Stripe size for pageserver sharding, in pages
|
||||
#[serde(default)]
|
||||
@@ -158,16 +155,6 @@ pub struct ComputeSpec {
|
||||
/// over the same replication content from publisher.
|
||||
#[serde(default)] // Default false
|
||||
pub drop_subscriptions_before_start: bool,
|
||||
|
||||
/// Log level for audit logging:
|
||||
///
|
||||
/// Disabled - no audit logging. This is the default.
|
||||
/// log - log masked statements to the postgres log using pgaudit extension
|
||||
/// hipaa - log unmasked statements to the file using pgaudit and pgauditlogtofile extension
|
||||
///
|
||||
/// Extensions should be present in shared_preload_libraries
|
||||
#[serde(default)]
|
||||
pub audit_log_level: ComputeAudit,
|
||||
}
|
||||
|
||||
/// Feature flag to signal `compute_ctl` to enable certain experimental functionality.
|
||||
@@ -275,17 +262,6 @@ pub enum ComputeMode {
|
||||
Replica,
|
||||
}
|
||||
|
||||
/// Log level for audit logging
|
||||
/// Disabled, log, hipaa
|
||||
/// Default is Disabled
|
||||
#[derive(Clone, Debug, Default, Eq, PartialEq, Deserialize, Serialize)]
|
||||
pub enum ComputeAudit {
|
||||
#[default]
|
||||
Disabled,
|
||||
Log,
|
||||
Hipaa,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq, Eq)]
|
||||
pub struct Cluster {
|
||||
pub cluster_id: Option<String>,
|
||||
@@ -360,9 +336,6 @@ pub struct LocalProxySpec {
|
||||
#[serde(default)]
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub jwks: Option<Vec<JwksSettings>>,
|
||||
#[serde(default)]
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub tls: Option<TlsConfig>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
|
||||
@@ -7,9 +7,7 @@ license.workspace = true
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
bytes.workspace = true
|
||||
camino.workspace = true
|
||||
fail.workspace = true
|
||||
futures.workspace = true
|
||||
hyper0.workspace = true
|
||||
itertools.workspace = true
|
||||
jemalloc_pprof.workspace = true
|
||||
@@ -17,14 +15,12 @@ once_cell.workspace = true
|
||||
pprof.workspace = true
|
||||
regex.workspace = true
|
||||
routerify.workspace = true
|
||||
rustls-pemfile.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
serde_path_to_error.workspace = true
|
||||
thiserror.workspace = true
|
||||
tracing.workspace = true
|
||||
tokio.workspace = true
|
||||
tokio-rustls.workspace = true
|
||||
tokio-util.workspace = true
|
||||
url.workspace = true
|
||||
uuid.workspace = true
|
||||
|
||||
@@ -399,10 +399,12 @@ pub async fn profile_cpu_handler(req: Request<Body>) -> Result<Response<Body>, A
|
||||
// Return the report in the requested format.
|
||||
match format {
|
||||
Format::Pprof => {
|
||||
let body = report
|
||||
let mut body = Vec::new();
|
||||
report
|
||||
.pprof()
|
||||
.map_err(|err| ApiError::InternalServerError(err.into()))?
|
||||
.encode_to_vec();
|
||||
.write_to_vec(&mut body)
|
||||
.map_err(|err| ApiError::InternalServerError(err.into()))?;
|
||||
|
||||
Response::builder()
|
||||
.status(200)
|
||||
|
||||
@@ -3,11 +3,9 @@ pub mod error;
|
||||
pub mod failpoints;
|
||||
pub mod json;
|
||||
pub mod request;
|
||||
pub mod server;
|
||||
pub mod tls_certs;
|
||||
|
||||
extern crate hyper0 as hyper;
|
||||
|
||||
/// Current fast way to apply simple http routing in various Neon binaries.
|
||||
/// Re-exported for sake of uniform approach, that could be later replaced with better alternatives, if needed.
|
||||
pub use routerify::{RequestServiceBuilder, RouterBuilder, RouterService, ext::RequestExt};
|
||||
pub use routerify::{RouterBuilder, RouterService, ext::RequestExt};
|
||||
|
||||
@@ -1,155 +0,0 @@
|
||||
use std::{error::Error, sync::Arc};
|
||||
|
||||
use futures::StreamExt;
|
||||
use futures::stream::FuturesUnordered;
|
||||
use hyper0::Body;
|
||||
use hyper0::server::conn::Http;
|
||||
use routerify::{RequestService, RequestServiceBuilder};
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tokio_rustls::TlsAcceptor;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{error, info};
|
||||
|
||||
use crate::error::ApiError;
|
||||
|
||||
/// A simple HTTP server over hyper library.
|
||||
/// You may want to use it instead of [`hyper0::server::Server`] because:
|
||||
/// 1. hyper0's Server was removed from hyper v1.
|
||||
/// It's recommended to replace hyepr0's Server with a manual loop, which is done here.
|
||||
/// 2. hyper0's Server doesn't support TLS out of the box, and there is no way
|
||||
/// to support it efficiently with the Accept trait that hyper0's Server uses.
|
||||
/// That's one of the reasons why it was removed from v1.
|
||||
/// <https://github.com/hyperium/hyper/blob/115339d3df50f20c8717680aa35f48858e9a6205/docs/ROADMAP.md#higher-level-client-and-server-problems>
|
||||
pub struct Server {
|
||||
request_service: Arc<RequestServiceBuilder<Body, ApiError>>,
|
||||
listener: tokio::net::TcpListener,
|
||||
tls_acceptor: Option<TlsAcceptor>,
|
||||
}
|
||||
|
||||
impl Server {
|
||||
pub fn new(
|
||||
request_service: Arc<RequestServiceBuilder<Body, ApiError>>,
|
||||
listener: std::net::TcpListener,
|
||||
tls_acceptor: Option<TlsAcceptor>,
|
||||
) -> anyhow::Result<Self> {
|
||||
// Note: caller of from_std is responsible for setting nonblocking mode.
|
||||
listener.set_nonblocking(true)?;
|
||||
let listener = tokio::net::TcpListener::from_std(listener)?;
|
||||
|
||||
Ok(Self {
|
||||
request_service,
|
||||
listener,
|
||||
tls_acceptor,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn serve(self, cancel: CancellationToken) -> anyhow::Result<()> {
|
||||
fn suppress_io_error(err: &std::io::Error) -> bool {
|
||||
use std::io::ErrorKind::*;
|
||||
matches!(err.kind(), ConnectionReset | ConnectionAborted | BrokenPipe)
|
||||
}
|
||||
fn suppress_hyper_error(err: &hyper0::Error) -> bool {
|
||||
if err.is_incomplete_message() || err.is_closed() || err.is_timeout() {
|
||||
return true;
|
||||
}
|
||||
if let Some(inner) = err.source() {
|
||||
if let Some(io) = inner.downcast_ref::<std::io::Error>() {
|
||||
return suppress_io_error(io);
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
let mut connections = FuturesUnordered::new();
|
||||
loop {
|
||||
tokio::select! {
|
||||
stream = self.listener.accept() => {
|
||||
let (tcp_stream, remote_addr) = match stream {
|
||||
Ok(stream) => stream,
|
||||
Err(err) => {
|
||||
if !suppress_io_error(&err) {
|
||||
info!("Failed to accept TCP connection: {err:#}");
|
||||
}
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let service = self.request_service.build(remote_addr);
|
||||
let tls_acceptor = self.tls_acceptor.clone();
|
||||
let cancel = cancel.clone();
|
||||
|
||||
connections.push(tokio::spawn(
|
||||
async move {
|
||||
match tls_acceptor {
|
||||
Some(tls_acceptor) => {
|
||||
// Handle HTTPS connection.
|
||||
let tls_stream = tokio::select! {
|
||||
tls_stream = tls_acceptor.accept(tcp_stream) => tls_stream,
|
||||
_ = cancel.cancelled() => return,
|
||||
};
|
||||
let tls_stream = match tls_stream {
|
||||
Ok(tls_stream) => tls_stream,
|
||||
Err(err) => {
|
||||
if !suppress_io_error(&err) {
|
||||
info!("Failed to accept TLS connection: {err:#}");
|
||||
}
|
||||
return;
|
||||
}
|
||||
};
|
||||
if let Err(err) = Self::serve_connection(tls_stream, service, cancel).await {
|
||||
if !suppress_hyper_error(&err) {
|
||||
info!("Failed to serve HTTPS connection: {err:#}");
|
||||
}
|
||||
}
|
||||
}
|
||||
None => {
|
||||
// Handle HTTP connection.
|
||||
if let Err(err) = Self::serve_connection(tcp_stream, service, cancel).await {
|
||||
if !suppress_hyper_error(&err) {
|
||||
info!("Failed to serve HTTP connection: {err:#}");
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}));
|
||||
}
|
||||
Some(conn) = connections.next() => {
|
||||
if let Err(err) = conn {
|
||||
error!("Connection panicked: {err:#}");
|
||||
}
|
||||
}
|
||||
_ = cancel.cancelled() => {
|
||||
// Wait for graceful shutdown of all connections.
|
||||
while let Some(conn) = connections.next().await {
|
||||
if let Err(err) = conn {
|
||||
error!("Connection panicked: {err:#}");
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Serves HTTP connection with graceful shutdown.
|
||||
async fn serve_connection<I>(
|
||||
io: I,
|
||||
service: RequestService<Body, ApiError>,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<(), hyper0::Error>
|
||||
where
|
||||
I: AsyncRead + AsyncWrite + Unpin + Send + 'static,
|
||||
{
|
||||
let mut conn = Http::new().serve_connection(io, service).with_upgrades();
|
||||
|
||||
tokio::select! {
|
||||
res = &mut conn => res,
|
||||
_ = cancel.cancelled() => {
|
||||
Pin::new(&mut conn).graceful_shutdown();
|
||||
// Note: connection should still be awaited for graceful shutdown to complete.
|
||||
conn.await
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
use camino::Utf8Path;
|
||||
use tokio_rustls::rustls::pki_types::{CertificateDer, PrivateKeyDer};
|
||||
|
||||
pub fn load_cert_chain(filename: &Utf8Path) -> anyhow::Result<Vec<CertificateDer<'static>>> {
|
||||
let file = std::fs::File::open(filename)?;
|
||||
let mut reader = std::io::BufReader::new(file);
|
||||
|
||||
Ok(rustls_pemfile::certs(&mut reader).collect::<Result<Vec<_>, _>>()?)
|
||||
}
|
||||
|
||||
pub fn load_private_key(filename: &Utf8Path) -> anyhow::Result<PrivateKeyDer<'static>> {
|
||||
let file = std::fs::File::open(filename)?;
|
||||
let mut reader = std::io::BufReader::new(file);
|
||||
|
||||
let key = rustls_pemfile::private_key(&mut reader)?;
|
||||
|
||||
key.ok_or(anyhow::anyhow!(
|
||||
"no private key found in {}",
|
||||
filename.as_str(),
|
||||
))
|
||||
}
|
||||
@@ -35,7 +35,6 @@ pub struct NodeMetadata {
|
||||
pub postgres_port: u16,
|
||||
pub http_host: String,
|
||||
pub http_port: u16,
|
||||
pub https_port: Option<u16>,
|
||||
|
||||
// Deployment tools may write fields to the metadata file beyond what we
|
||||
// use in this type: this type intentionally only names fields that require.
|
||||
@@ -58,9 +57,6 @@ pub struct ConfigToml {
|
||||
// types mapped 1:1 into the runtime PageServerConfig type
|
||||
pub listen_pg_addr: String,
|
||||
pub listen_http_addr: String,
|
||||
pub listen_https_addr: Option<String>,
|
||||
pub ssl_key_file: Utf8PathBuf,
|
||||
pub ssl_cert_file: Utf8PathBuf,
|
||||
pub availability_zone: Option<String>,
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub wait_lsn_timeout: Duration,
|
||||
@@ -272,16 +268,15 @@ pub struct TenantConfigToml {
|
||||
/// size exceeds `compaction_upper_limit * checkpoint_distance`.
|
||||
pub compaction_upper_limit: usize,
|
||||
pub compaction_algorithm: crate::models::CompactionAlgorithmSettings,
|
||||
/// If true, compact down L0 across all tenant timelines before doing regular compaction. L0
|
||||
/// compaction must be responsive to avoid read amp during heavy ingestion. Defaults to true.
|
||||
/// If true, compact down L0 across all tenant timelines before doing regular compaction.
|
||||
pub compaction_l0_first: bool,
|
||||
/// If true, use a separate semaphore (i.e. concurrency limit) for the L0 compaction pass. Only
|
||||
/// has an effect if `compaction_l0_first` is true. Defaults to true.
|
||||
/// has an effect if `compaction_l0_first` is `true`.
|
||||
pub compaction_l0_semaphore: bool,
|
||||
/// Level0 delta layer threshold at which to delay layer flushes such that they take 2x as long,
|
||||
/// and block on layer flushes during ephemeral layer rolls, for compaction backpressure. This
|
||||
/// helps compaction keep up with WAL ingestion, and avoids read amplification blowing up.
|
||||
/// Should be >compaction_threshold. 0 to disable. Defaults to 3x compaction_threshold.
|
||||
/// Level0 delta layer threshold at which to delay layer flushes for compaction backpressure,
|
||||
/// such that they take 2x as long, and start waiting for layer flushes during ephemeral layer
|
||||
/// rolls. This helps compaction keep up with WAL ingestion, and avoids read amplification
|
||||
/// blowing up. Should be >compaction_threshold. 0 to disable. Disabled by default.
|
||||
pub l0_flush_delay_threshold: Option<usize>,
|
||||
/// Level0 delta layer threshold at which to stall layer flushes. Must be >compaction_threshold
|
||||
/// to avoid deadlock. 0 to disable. Disabled by default.
|
||||
@@ -289,8 +284,6 @@ pub struct TenantConfigToml {
|
||||
/// If true, Level0 delta layer flushes will wait for S3 upload before flushing the next
|
||||
/// layer. This is a temporary backpressure mechanism which should be removed once
|
||||
/// l0_flush_{delay,stall}_threshold is fully enabled.
|
||||
///
|
||||
/// TODO: this is no longer enabled, remove it when the config option is no longer set.
|
||||
pub l0_flush_wait_upload: bool,
|
||||
// Determines how much history is retained, to allow
|
||||
// branching and read replicas at an older point in time.
|
||||
@@ -428,9 +421,6 @@ pub mod defaults {
|
||||
|
||||
pub const DEFAULT_WAL_RECEIVER_PROTOCOL: utils::postgres_client::PostgresClientProtocol =
|
||||
utils::postgres_client::PostgresClientProtocol::Vanilla;
|
||||
|
||||
pub const DEFAULT_SSL_KEY_FILE: &str = "server.key";
|
||||
pub const DEFAULT_SSL_CERT_FILE: &str = "server.crt";
|
||||
}
|
||||
|
||||
impl Default for ConfigToml {
|
||||
@@ -440,9 +430,6 @@ impl Default for ConfigToml {
|
||||
Self {
|
||||
listen_pg_addr: (DEFAULT_PG_LISTEN_ADDR.to_string()),
|
||||
listen_http_addr: (DEFAULT_HTTP_LISTEN_ADDR.to_string()),
|
||||
listen_https_addr: (None),
|
||||
ssl_key_file: Utf8PathBuf::from(DEFAULT_SSL_KEY_FILE),
|
||||
ssl_cert_file: Utf8PathBuf::from(DEFAULT_SSL_CERT_FILE),
|
||||
availability_zone: (None),
|
||||
wait_lsn_timeout: (humantime::parse_duration(DEFAULT_WAIT_LSN_TIMEOUT)
|
||||
.expect("cannot parse default wait lsn timeout")),
|
||||
@@ -570,15 +557,13 @@ pub mod tenant_conf_defaults {
|
||||
// be reduced later by optimizing L0 hole calculation to avoid loading all keys into memory). So
|
||||
// with this config, we can get a maximum peak compaction usage of 9 GB.
|
||||
pub const DEFAULT_COMPACTION_UPPER_LIMIT: usize = 20;
|
||||
// Enable L0 compaction pass and semaphore by default. L0 compaction must be responsive to avoid
|
||||
// read amp.
|
||||
pub const DEFAULT_COMPACTION_L0_FIRST: bool = true;
|
||||
pub const DEFAULT_COMPACTION_L0_FIRST: bool = false;
|
||||
pub const DEFAULT_COMPACTION_L0_SEMAPHORE: bool = true;
|
||||
|
||||
pub const DEFAULT_COMPACTION_ALGORITHM: crate::models::CompactionAlgorithm =
|
||||
crate::models::CompactionAlgorithm::Legacy;
|
||||
|
||||
pub const DEFAULT_L0_FLUSH_WAIT_UPLOAD: bool = false;
|
||||
pub const DEFAULT_L0_FLUSH_WAIT_UPLOAD: bool = true;
|
||||
|
||||
pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;
|
||||
|
||||
@@ -589,8 +574,9 @@ pub mod tenant_conf_defaults {
|
||||
pub const DEFAULT_GC_PERIOD: &str = "1 hr";
|
||||
pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
|
||||
// If there are more than threshold * compaction_threshold (that is 3 * 10 in the default config) L0 layers, image
|
||||
// layer creation will end immediately. Set to 0 to disable.
|
||||
pub const DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD: usize = 3;
|
||||
// layer creation will end immediately. Set to 0 to disable. The target default will be 3 once we
|
||||
// want to enable this feature.
|
||||
pub const DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD: usize = 0;
|
||||
pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
|
||||
pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "10 seconds";
|
||||
pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
|
||||
|
||||
@@ -16,30 +16,6 @@ fn test_node_metadata_v1_backward_compatibilty() {
|
||||
postgres_port: 23,
|
||||
http_host: "localhost".to_string(),
|
||||
http_port: 42,
|
||||
https_port: None,
|
||||
other: HashMap::new(),
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_node_metadata_v2_backward_compatibilty() {
|
||||
let v2 = serde_json::to_vec(&serde_json::json!({
|
||||
"host": "localhost",
|
||||
"port": 23,
|
||||
"http_host": "localhost",
|
||||
"http_port": 42,
|
||||
"https_port": 123,
|
||||
}));
|
||||
|
||||
assert_eq!(
|
||||
serde_json::from_slice::<NodeMetadata>(&v2.unwrap()).unwrap(),
|
||||
NodeMetadata {
|
||||
postgres_host: "localhost".to_string(),
|
||||
postgres_port: 23,
|
||||
http_host: "localhost".to_string(),
|
||||
http_port: 42,
|
||||
https_port: Some(123),
|
||||
other: HashMap::new(),
|
||||
}
|
||||
)
|
||||
|
||||
@@ -182,66 +182,20 @@ pub struct TenantDescribeResponseShard {
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct TenantShardMigrateRequest {
|
||||
pub node_id: NodeId,
|
||||
|
||||
/// Optionally, callers may specify the node they are migrating _from_, and the server will
|
||||
/// reject the request if the shard is no longer attached there: this enables writing safer
|
||||
/// clients that don't risk fighting with some other movement of the shard.
|
||||
#[serde(default)]
|
||||
pub origin_node_id: Option<NodeId>,
|
||||
|
||||
#[serde(default)]
|
||||
pub migration_config: MigrationConfig,
|
||||
pub migration_config: Option<MigrationConfig>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)]
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct MigrationConfig {
|
||||
/// If true, the migration will be executed even if it is to a location with a sub-optimal scheduling
|
||||
/// score: this is usually not what you want, and if you use this then you'll also need to set the
|
||||
/// tenant's scheduling policy to Essential or Pause to avoid the optimiser reverting your migration.
|
||||
///
|
||||
/// Default: false
|
||||
#[serde(default)]
|
||||
pub override_scheduler: bool,
|
||||
|
||||
/// If true, the migration will be done gracefully by creating a secondary location first and
|
||||
/// waiting for it to warm up before cutting over. If false, if there is no existing secondary
|
||||
/// location at the destination, the tenant will be migrated immediately. If the tenant's data
|
||||
/// can't be downloaded within [`Self::secondary_warmup_timeout`], then the migration will go
|
||||
/// ahead but run with a cold cache that can severely reduce performance until it warms up.
|
||||
///
|
||||
/// When doing a graceful migration, the migration API returns as soon as it is started.
|
||||
///
|
||||
/// Default: true
|
||||
#[serde(default = "default_prewarm")]
|
||||
pub prewarm: bool,
|
||||
|
||||
/// For non-prewarm migrations which will immediately enter a cutover to the new node: how long to wait
|
||||
/// overall for secondary warmup before cutting over
|
||||
#[serde(default)]
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub secondary_warmup_timeout: Option<Duration>,
|
||||
/// For non-prewarm migrations which will immediately enter a cutover to the new node: how long to wait
|
||||
/// within each secondary download poll call to pageserver.
|
||||
#[serde(default)]
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub secondary_download_request_timeout: Option<Duration>,
|
||||
}
|
||||
|
||||
fn default_prewarm() -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
impl Default for MigrationConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
override_scheduler: false,
|
||||
prewarm: default_prewarm(),
|
||||
secondary_warmup_timeout: None,
|
||||
secondary_download_request_timeout: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone, Debug)]
|
||||
#[serde(into = "NodeAvailabilityWrapper")]
|
||||
pub enum NodeAvailability {
|
||||
@@ -489,7 +443,6 @@ pub struct SafekeeperDescribeResponse {
|
||||
pub host: String,
|
||||
pub port: i32,
|
||||
pub http_port: i32,
|
||||
pub https_port: Option<i32>,
|
||||
pub availability_zone_id: String,
|
||||
pub scheduling_policy: SkSchedulingPolicy,
|
||||
}
|
||||
@@ -534,43 +487,4 @@ mod test {
|
||||
err
|
||||
);
|
||||
}
|
||||
|
||||
/// Check that a minimal migrate request with no config results in the expected default settings
|
||||
#[test]
|
||||
fn test_migrate_request_decode_defaults() {
|
||||
let json = r#"{
|
||||
"node_id": 123
|
||||
}"#;
|
||||
|
||||
let request: TenantShardMigrateRequest = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(request.node_id, NodeId(123));
|
||||
assert_eq!(request.origin_node_id, None);
|
||||
assert!(!request.migration_config.override_scheduler);
|
||||
assert!(request.migration_config.prewarm);
|
||||
assert_eq!(request.migration_config.secondary_warmup_timeout, None);
|
||||
assert_eq!(
|
||||
request.migration_config.secondary_download_request_timeout,
|
||||
None
|
||||
);
|
||||
}
|
||||
|
||||
/// Check that a partially specified migration config results in the expected default settings
|
||||
#[test]
|
||||
fn test_migration_config_decode_defaults() {
|
||||
// Specify just one field of the config
|
||||
let json = r#"{
|
||||
}"#;
|
||||
|
||||
let config: MigrationConfig = serde_json::from_str(json).unwrap();
|
||||
|
||||
// Check each field's expected default value
|
||||
assert!(!config.override_scheduler);
|
||||
assert!(config.prewarm);
|
||||
assert_eq!(config.secondary_warmup_timeout, None);
|
||||
assert_eq!(config.secondary_download_request_timeout, None);
|
||||
assert_eq!(config.secondary_warmup_timeout, None);
|
||||
|
||||
// Consistency check that the Default impl agrees with our serde defaults
|
||||
assert_eq!(MigrationConfig::default(), config);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -176,39 +176,6 @@ impl LsnLease {
|
||||
}
|
||||
}
|
||||
|
||||
/// Controls the detach ancestor behavior.
|
||||
/// - When set to `NoAncestorAndReparent`, we will only detach a branch if its ancestor is a root branch. It will automatically reparent any children of the ancestor before and at the branch point.
|
||||
/// - When set to `MultiLevelAndNoReparent`, we will detach a branch from multiple levels of ancestors, and no reparenting will happen at all.
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub enum DetachBehavior {
|
||||
#[default]
|
||||
NoAncestorAndReparent,
|
||||
MultiLevelAndNoReparent,
|
||||
}
|
||||
|
||||
impl std::str::FromStr for DetachBehavior {
|
||||
type Err = &'static str;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s {
|
||||
"no_ancestor_and_reparent" => Ok(DetachBehavior::NoAncestorAndReparent),
|
||||
"multi_level_and_no_reparent" => Ok(DetachBehavior::MultiLevelAndNoReparent),
|
||||
"v1" => Ok(DetachBehavior::NoAncestorAndReparent),
|
||||
"v2" => Ok(DetachBehavior::MultiLevelAndNoReparent),
|
||||
_ => Err("cannot parse detach behavior"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for DetachBehavior {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
DetachBehavior::NoAncestorAndReparent => write!(f, "no_ancestor_and_reparent"),
|
||||
DetachBehavior::MultiLevelAndNoReparent => write!(f, "multi_level_and_no_reparent"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The only [`TenantState`] variants we could be `TenantState::Activating` from.
|
||||
///
|
||||
/// XXX: We used to have more variants here, but now it's just one, which makes this rather
|
||||
@@ -307,31 +274,6 @@ pub struct TimelineCreateRequest {
|
||||
pub mode: TimelineCreateRequestMode,
|
||||
}
|
||||
|
||||
/// Storage controller specific extensions to [`TimelineInfo`].
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct TimelineCreateResponseStorcon {
|
||||
#[serde(flatten)]
|
||||
pub timeline_info: TimelineInfo,
|
||||
|
||||
pub safekeepers: Option<SafekeepersInfo>,
|
||||
}
|
||||
|
||||
/// Safekeepers as returned in timeline creation request to storcon or pushed to
|
||||
/// cplane in the post migration hook.
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct SafekeepersInfo {
|
||||
pub tenant_id: TenantId,
|
||||
pub timeline_id: TimelineId,
|
||||
pub generation: u32,
|
||||
pub safekeepers: Vec<SafekeeperInfo>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct SafekeeperInfo {
|
||||
pub id: NodeId,
|
||||
pub hostname: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
#[serde(untagged)]
|
||||
pub enum TimelineCreateRequestMode {
|
||||
@@ -1204,15 +1146,6 @@ pub struct TimelineArchivalConfigRequest {
|
||||
pub state: TimelineArchivalState,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone)]
|
||||
pub struct TimelinePatchIndexPartRequest {
|
||||
pub rel_size_migration: Option<RelSizeMigration>,
|
||||
pub gc_compaction_last_completed_lsn: Option<Lsn>,
|
||||
pub applied_gc_cutoff_lsn: Option<Lsn>,
|
||||
#[serde(default)]
|
||||
pub force_index_update: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct TimelinesInfoAndOffloaded {
|
||||
pub timelines: Vec<TimelineInfo>,
|
||||
@@ -1258,10 +1191,9 @@ pub struct TimelineInfo {
|
||||
pub last_record_lsn: Lsn,
|
||||
pub prev_record_lsn: Option<Lsn>,
|
||||
|
||||
/// Legacy field, retained for one version to enable old storage controller to
|
||||
/// decode (it was a mandatory field).
|
||||
#[serde(default, rename = "latest_gc_cutoff_lsn")]
|
||||
pub _unused: Lsn,
|
||||
/// Legacy field for compat with control plane. Synonym of `min_readable_lsn`.
|
||||
/// TODO: remove once control plane no longer reads it.
|
||||
pub latest_gc_cutoff_lsn: Lsn,
|
||||
|
||||
/// The LSN up to which GC has advanced: older data may still exist but it is not available for clients.
|
||||
/// This LSN is not suitable for deciding where to create branches etc: use [`TimelineInfo::min_readable_lsn`] instead,
|
||||
@@ -1510,14 +1442,8 @@ pub struct TenantScanRemoteStorageResponse {
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum TenantSorting {
|
||||
/// Total size of layers on local disk for all timelines in a shard.
|
||||
ResidentSize,
|
||||
/// The logical size of the largest timeline within a _tenant_ (not shard). Only tracked on
|
||||
/// shard 0, contains the sum across all shards.
|
||||
MaxLogicalSize,
|
||||
/// The logical size of the largest timeline within a _tenant_ (not shard), divided by number of
|
||||
/// shards. Only tracked on shard 0, and estimates the per-shard logical size.
|
||||
MaxLogicalSizePerShard,
|
||||
}
|
||||
|
||||
impl Default for TenantSorting {
|
||||
@@ -1547,20 +1473,14 @@ pub struct TopTenantShardsRequest {
|
||||
pub struct TopTenantShardItem {
|
||||
pub id: TenantShardId,
|
||||
|
||||
/// Total size of layers on local disk for all timelines in this shard.
|
||||
/// Total size of layers on local disk for all timelines in this tenant
|
||||
pub resident_size: u64,
|
||||
|
||||
/// Total size of layers in remote storage for all timelines in this shard.
|
||||
/// Total size of layers in remote storage for all timelines in this tenant
|
||||
pub physical_size: u64,
|
||||
|
||||
/// The largest logical size of a timeline within this _tenant_ (not shard). This is only
|
||||
/// tracked on shard 0, and contains the sum of the logical size across all shards.
|
||||
/// The largest logical size of a timeline within this tenant
|
||||
pub max_logical_size: u64,
|
||||
|
||||
/// The largest logical size of a timeline within this _tenant_ (not shard) divided by number of
|
||||
/// shards. This is only tracked on shard 0, and is only an estimate as we divide it evenly by
|
||||
/// shard count, rounded up.
|
||||
pub max_logical_size_per_shard: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Default)]
|
||||
|
||||
@@ -112,16 +112,6 @@ impl ShardIdentity {
|
||||
}
|
||||
}
|
||||
|
||||
/// An unsharded identity with the given stripe size (if non-zero). This is typically used to
|
||||
/// carry over a stripe size for an unsharded tenant from persistent storage.
|
||||
pub fn unsharded_with_stripe_size(stripe_size: ShardStripeSize) -> Self {
|
||||
let mut shard_identity = Self::unsharded();
|
||||
if stripe_size.0 > 0 {
|
||||
shard_identity.stripe_size = stripe_size;
|
||||
}
|
||||
shard_identity
|
||||
}
|
||||
|
||||
/// A broken instance of this type is only used for `TenantState::Broken` tenants,
|
||||
/// which are constructed in code paths that don't have access to proper configuration.
|
||||
///
|
||||
|
||||
@@ -396,14 +396,6 @@ pub mod waldecoder {
|
||||
self.lsn + self.inputbuf.remaining() as u64
|
||||
}
|
||||
|
||||
/// Returns the LSN up to which the WAL decoder has processed.
|
||||
///
|
||||
/// If [`Self::poll_decode`] returned a record, then this will return
|
||||
/// the end LSN of said record.
|
||||
pub fn lsn(&self) -> Lsn {
|
||||
self.lsn
|
||||
}
|
||||
|
||||
pub fn feed_bytes(&mut self, buf: &[u8]) {
|
||||
self.inputbuf.extend_from_slice(buf);
|
||||
}
|
||||
|
||||
@@ -135,8 +135,8 @@ impl Type {
|
||||
pub enum Kind {
|
||||
/// A simple type like `VARCHAR` or `INTEGER`.
|
||||
Simple,
|
||||
/// An enumerated type.
|
||||
Enum,
|
||||
/// An enumerated type along with its variants.
|
||||
Enum(Vec<String>),
|
||||
/// A pseudo-type.
|
||||
Pseudo,
|
||||
/// An array type along with the type of its elements.
|
||||
@@ -146,9 +146,9 @@ pub enum Kind {
|
||||
/// A multirange type along with the type of its elements.
|
||||
Multirange(Type),
|
||||
/// A domain type along with its underlying type.
|
||||
Domain(Oid),
|
||||
/// A composite type.
|
||||
Composite(Oid),
|
||||
Domain(Type),
|
||||
/// A composite type along with information about its fields.
|
||||
Composite(Vec<Field>),
|
||||
}
|
||||
|
||||
/// Information about a field of a composite type.
|
||||
|
||||
@@ -19,10 +19,10 @@ use crate::config::{Host, SslMode};
|
||||
use crate::connection::{Request, RequestMessages};
|
||||
use crate::query::RowStream;
|
||||
use crate::simple_query::SimpleQueryStream;
|
||||
use crate::types::{Oid, Type};
|
||||
use crate::types::{Oid, ToSql, Type};
|
||||
use crate::{
|
||||
CancelToken, Error, ReadyForQueryStatus, SimpleQueryMessage, Statement, Transaction,
|
||||
TransactionBuilder, query, simple_query,
|
||||
CancelToken, Error, ReadyForQueryStatus, Row, SimpleQueryMessage, Statement, Transaction,
|
||||
TransactionBuilder, query, simple_query, slice_iter,
|
||||
};
|
||||
|
||||
pub struct Responses {
|
||||
@@ -54,18 +54,26 @@ impl Responses {
|
||||
/// A cache of type info and prepared statements for fetching type info
|
||||
/// (corresponding to the queries in the [crate::prepare] module).
|
||||
#[derive(Default)]
|
||||
pub(crate) struct CachedTypeInfo {
|
||||
struct CachedTypeInfo {
|
||||
/// A statement for basic information for a type from its
|
||||
/// OID. Corresponds to [TYPEINFO_QUERY](crate::prepare::TYPEINFO_QUERY) (or its
|
||||
/// fallback).
|
||||
pub(crate) typeinfo: Option<Statement>,
|
||||
typeinfo: Option<Statement>,
|
||||
/// A statement for getting information for a composite type from its OID.
|
||||
/// Corresponds to [TYPEINFO_QUERY](crate::prepare::TYPEINFO_COMPOSITE_QUERY).
|
||||
typeinfo_composite: Option<Statement>,
|
||||
/// A statement for getting information for a composite type from its OID.
|
||||
/// Corresponds to [TYPEINFO_QUERY](crate::prepare::TYPEINFO_COMPOSITE_QUERY) (or
|
||||
/// its fallback).
|
||||
typeinfo_enum: Option<Statement>,
|
||||
|
||||
/// Cache of types already looked up.
|
||||
pub(crate) types: HashMap<Oid, Type>,
|
||||
types: HashMap<Oid, Type>,
|
||||
}
|
||||
|
||||
pub struct InnerClient {
|
||||
sender: mpsc::UnboundedSender<Request>,
|
||||
cached_typeinfo: Mutex<CachedTypeInfo>,
|
||||
|
||||
/// A buffer to use when writing out postgres commands.
|
||||
buffer: Mutex<BytesMut>,
|
||||
@@ -83,6 +91,38 @@ impl InnerClient {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn typeinfo(&self) -> Option<Statement> {
|
||||
self.cached_typeinfo.lock().typeinfo.clone()
|
||||
}
|
||||
|
||||
pub fn set_typeinfo(&self, statement: &Statement) {
|
||||
self.cached_typeinfo.lock().typeinfo = Some(statement.clone());
|
||||
}
|
||||
|
||||
pub fn typeinfo_composite(&self) -> Option<Statement> {
|
||||
self.cached_typeinfo.lock().typeinfo_composite.clone()
|
||||
}
|
||||
|
||||
pub fn set_typeinfo_composite(&self, statement: &Statement) {
|
||||
self.cached_typeinfo.lock().typeinfo_composite = Some(statement.clone());
|
||||
}
|
||||
|
||||
pub fn typeinfo_enum(&self) -> Option<Statement> {
|
||||
self.cached_typeinfo.lock().typeinfo_enum.clone()
|
||||
}
|
||||
|
||||
pub fn set_typeinfo_enum(&self, statement: &Statement) {
|
||||
self.cached_typeinfo.lock().typeinfo_enum = Some(statement.clone());
|
||||
}
|
||||
|
||||
pub fn type_(&self, oid: Oid) -> Option<Type> {
|
||||
self.cached_typeinfo.lock().types.get(&oid).cloned()
|
||||
}
|
||||
|
||||
pub fn set_type(&self, oid: Oid, type_: &Type) {
|
||||
self.cached_typeinfo.lock().types.insert(oid, type_.clone());
|
||||
}
|
||||
|
||||
/// Call the given function with a buffer to be used when writing out
|
||||
/// postgres commands.
|
||||
pub fn with_buf<F, R>(&self, f: F) -> R
|
||||
@@ -102,6 +142,7 @@ pub struct SocketConfig {
|
||||
pub host: Host,
|
||||
pub port: u16,
|
||||
pub connect_timeout: Option<Duration>,
|
||||
// pub keepalive: Option<KeepaliveConfig>,
|
||||
}
|
||||
|
||||
/// An asynchronous PostgreSQL client.
|
||||
@@ -110,7 +151,6 @@ pub struct SocketConfig {
|
||||
/// through this client object.
|
||||
pub struct Client {
|
||||
inner: Arc<InnerClient>,
|
||||
cached_typeinfo: CachedTypeInfo,
|
||||
|
||||
socket_config: SocketConfig,
|
||||
ssl_mode: SslMode,
|
||||
@@ -129,9 +169,9 @@ impl Client {
|
||||
Client {
|
||||
inner: Arc::new(InnerClient {
|
||||
sender,
|
||||
cached_typeinfo: Default::default(),
|
||||
buffer: Default::default(),
|
||||
}),
|
||||
cached_typeinfo: Default::default(),
|
||||
|
||||
socket_config,
|
||||
ssl_mode,
|
||||
@@ -149,6 +189,55 @@ impl Client {
|
||||
&self.inner
|
||||
}
|
||||
|
||||
/// Executes a statement, returning a vector of the resulting rows.
|
||||
///
|
||||
/// A statement may contain parameters, specified by `$n`, where `n` is the index of the parameter of the list
|
||||
/// provided, 1-indexed.
|
||||
///
|
||||
/// The `statement` argument can either be a `Statement`, or a raw query string. If the same statement will be
|
||||
/// repeatedly executed (perhaps with different query parameters), consider preparing the statement up front
|
||||
/// with the `prepare` method.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if the number of parameters provided does not match the number expected.
|
||||
pub async fn query(
|
||||
&self,
|
||||
statement: Statement,
|
||||
params: &[&(dyn ToSql + Sync)],
|
||||
) -> Result<Vec<Row>, Error> {
|
||||
self.query_raw(statement, slice_iter(params))
|
||||
.await?
|
||||
.try_collect()
|
||||
.await
|
||||
}
|
||||
|
||||
/// The maximally flexible version of [`query`].
|
||||
///
|
||||
/// A statement may contain parameters, specified by `$n`, where `n` is the index of the parameter of the list
|
||||
/// provided, 1-indexed.
|
||||
///
|
||||
/// The `statement` argument can either be a `Statement`, or a raw query string. If the same statement will be
|
||||
/// repeatedly executed (perhaps with different query parameters), consider preparing the statement up front
|
||||
/// with the `prepare` method.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if the number of parameters provided does not match the number expected.
|
||||
///
|
||||
/// [`query`]: #method.query
|
||||
pub async fn query_raw<'a, I>(
|
||||
&self,
|
||||
statement: Statement,
|
||||
params: I,
|
||||
) -> Result<RowStream, Error>
|
||||
where
|
||||
I: IntoIterator<Item = &'a (dyn ToSql + Sync)>,
|
||||
I::IntoIter: ExactSizeIterator,
|
||||
{
|
||||
query::query(&self.inner, statement, params).await
|
||||
}
|
||||
|
||||
/// Pass text directly to the Postgres backend to allow it to sort out typing itself and
|
||||
/// to save a roundtrip
|
||||
pub async fn query_raw_txt<S, I>(&self, statement: &str, params: I) -> Result<RowStream, Error>
|
||||
@@ -195,14 +284,6 @@ impl Client {
|
||||
simple_query::batch_execute(self.inner(), query).await
|
||||
}
|
||||
|
||||
pub async fn discard_all(&mut self) -> Result<ReadyForQueryStatus, Error> {
|
||||
// clear the prepared statements that are about to be nuked from the postgres session
|
||||
|
||||
self.cached_typeinfo.typeinfo = None;
|
||||
|
||||
self.batch_execute("discard all").await
|
||||
}
|
||||
|
||||
/// Begins a new database transaction.
|
||||
///
|
||||
/// The transaction will roll back by default - use the `commit` method to commit it.
|
||||
@@ -266,8 +347,8 @@ impl Client {
|
||||
}
|
||||
|
||||
/// Query for type information
|
||||
pub(crate) async fn get_type_inner(&mut self, oid: Oid) -> Result<Type, Error> {
|
||||
crate::prepare::get_type(&self.inner, &mut self.cached_typeinfo, oid).await
|
||||
pub async fn get_type(&self, oid: Oid) -> Result<Type, Error> {
|
||||
crate::prepare::get_type(&self.inner, oid).await
|
||||
}
|
||||
|
||||
/// Determines if the connection to the server has already closed.
|
||||
|
||||
@@ -22,7 +22,7 @@ pub trait GenericClient: private::Sealed {
|
||||
I::IntoIter: ExactSizeIterator + Sync + Send;
|
||||
|
||||
/// Query for type information
|
||||
async fn get_type(&mut self, oid: Oid) -> Result<Type, Error>;
|
||||
async fn get_type(&self, oid: Oid) -> Result<Type, Error>;
|
||||
}
|
||||
|
||||
impl private::Sealed for Client {}
|
||||
@@ -38,8 +38,8 @@ impl GenericClient for Client {
|
||||
}
|
||||
|
||||
/// Query for type information
|
||||
async fn get_type(&mut self, oid: Oid) -> Result<Type, Error> {
|
||||
self.get_type_inner(oid).await
|
||||
async fn get_type(&self, oid: Oid) -> Result<Type, Error> {
|
||||
crate::prepare::get_type(self.inner(), oid).await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -56,7 +56,7 @@ impl GenericClient for Transaction<'_> {
|
||||
}
|
||||
|
||||
/// Query for type information
|
||||
async fn get_type(&mut self, oid: Oid) -> Result<Type, Error> {
|
||||
self.client_mut().get_type(oid).await
|
||||
async fn get_type(&self, oid: Oid) -> Result<Type, Error> {
|
||||
self.client().get_type(oid).await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,10 +9,10 @@ use log::debug;
|
||||
use postgres_protocol2::message::backend::Message;
|
||||
use postgres_protocol2::message::frontend;
|
||||
|
||||
use crate::client::{CachedTypeInfo, InnerClient};
|
||||
use crate::client::InnerClient;
|
||||
use crate::codec::FrontendMessage;
|
||||
use crate::connection::RequestMessages;
|
||||
use crate::types::{Kind, Oid, Type};
|
||||
use crate::types::{Field, Kind, Oid, Type};
|
||||
use crate::{Column, Error, Statement, query, slice_iter};
|
||||
|
||||
pub(crate) const TYPEINFO_QUERY: &str = "\
|
||||
@@ -23,7 +23,23 @@ INNER JOIN pg_catalog.pg_namespace n ON t.typnamespace = n.oid
|
||||
WHERE t.oid = $1
|
||||
";
|
||||
|
||||
async fn prepare_typecheck(
|
||||
const TYPEINFO_ENUM_QUERY: &str = "\
|
||||
SELECT enumlabel
|
||||
FROM pg_catalog.pg_enum
|
||||
WHERE enumtypid = $1
|
||||
ORDER BY enumsortorder
|
||||
";
|
||||
|
||||
pub(crate) const TYPEINFO_COMPOSITE_QUERY: &str = "\
|
||||
SELECT attname, atttypid
|
||||
FROM pg_catalog.pg_attribute
|
||||
WHERE attrelid = $1
|
||||
AND NOT attisdropped
|
||||
AND attnum > 0
|
||||
ORDER BY attnum
|
||||
";
|
||||
|
||||
pub async fn prepare(
|
||||
client: &Arc<InnerClient>,
|
||||
name: &'static str,
|
||||
query: &str,
|
||||
@@ -51,7 +67,7 @@ async fn prepare_typecheck(
|
||||
let mut parameters = vec![];
|
||||
let mut it = parameter_description.parameters();
|
||||
while let Some(oid) = it.next().map_err(Error::parse)? {
|
||||
let type_ = Type::from_oid(oid).ok_or_else(Error::unexpected_message)?;
|
||||
let type_ = get_type(client, oid).await?;
|
||||
parameters.push(type_);
|
||||
}
|
||||
|
||||
@@ -59,7 +75,7 @@ async fn prepare_typecheck(
|
||||
if let Some(row_description) = row_description {
|
||||
let mut it = row_description.fields();
|
||||
while let Some(field) = it.next().map_err(Error::parse)? {
|
||||
let type_ = Type::from_oid(field.type_oid()).ok_or_else(Error::unexpected_message)?;
|
||||
let type_ = get_type(client, field.type_oid()).await?;
|
||||
let column = Column::new(field.name().to_string(), type_, field);
|
||||
columns.push(column);
|
||||
}
|
||||
@@ -68,6 +84,15 @@ async fn prepare_typecheck(
|
||||
Ok(Statement::new(client, name, parameters, columns))
|
||||
}
|
||||
|
||||
fn prepare_rec<'a>(
|
||||
client: &'a Arc<InnerClient>,
|
||||
name: &'static str,
|
||||
query: &'a str,
|
||||
types: &'a [Type],
|
||||
) -> Pin<Box<dyn Future<Output = Result<Statement, Error>> + 'a + Send>> {
|
||||
Box::pin(prepare(client, name, query, types))
|
||||
}
|
||||
|
||||
fn encode(client: &InnerClient, name: &str, query: &str, types: &[Type]) -> Result<Bytes, Error> {
|
||||
if types.is_empty() {
|
||||
debug!("preparing query {}: {}", name, query);
|
||||
@@ -83,20 +108,16 @@ fn encode(client: &InnerClient, name: &str, query: &str, types: &[Type]) -> Resu
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn get_type(
|
||||
client: &Arc<InnerClient>,
|
||||
typecache: &mut CachedTypeInfo,
|
||||
oid: Oid,
|
||||
) -> Result<Type, Error> {
|
||||
pub async fn get_type(client: &Arc<InnerClient>, oid: Oid) -> Result<Type, Error> {
|
||||
if let Some(type_) = Type::from_oid(oid) {
|
||||
return Ok(type_);
|
||||
}
|
||||
|
||||
if let Some(type_) = typecache.types.get(&oid) {
|
||||
return Ok(type_.clone());
|
||||
};
|
||||
if let Some(type_) = client.type_(oid) {
|
||||
return Ok(type_);
|
||||
}
|
||||
|
||||
let stmt = typeinfo_statement(client, typecache).await?;
|
||||
let stmt = typeinfo_statement(client).await?;
|
||||
|
||||
let rows = query::query(client, stmt, slice_iter(&[&oid])).await?;
|
||||
pin_mut!(rows);
|
||||
@@ -115,48 +136,100 @@ pub async fn get_type(
|
||||
let relid: Oid = row.try_get(6)?;
|
||||
|
||||
let kind = if type_ == b'e' as i8 {
|
||||
Kind::Enum
|
||||
let variants = get_enum_variants(client, oid).await?;
|
||||
Kind::Enum(variants)
|
||||
} else if type_ == b'p' as i8 {
|
||||
Kind::Pseudo
|
||||
} else if basetype != 0 {
|
||||
Kind::Domain(basetype)
|
||||
let type_ = get_type_rec(client, basetype).await?;
|
||||
Kind::Domain(type_)
|
||||
} else if elem_oid != 0 {
|
||||
let type_ = get_type_rec(client, typecache, elem_oid).await?;
|
||||
let type_ = get_type_rec(client, elem_oid).await?;
|
||||
Kind::Array(type_)
|
||||
} else if relid != 0 {
|
||||
Kind::Composite(relid)
|
||||
let fields = get_composite_fields(client, relid).await?;
|
||||
Kind::Composite(fields)
|
||||
} else if let Some(rngsubtype) = rngsubtype {
|
||||
let type_ = get_type_rec(client, typecache, rngsubtype).await?;
|
||||
let type_ = get_type_rec(client, rngsubtype).await?;
|
||||
Kind::Range(type_)
|
||||
} else {
|
||||
Kind::Simple
|
||||
};
|
||||
|
||||
let type_ = Type::new(name, oid, kind, schema);
|
||||
typecache.types.insert(oid, type_.clone());
|
||||
client.set_type(oid, &type_);
|
||||
|
||||
Ok(type_)
|
||||
}
|
||||
|
||||
fn get_type_rec<'a>(
|
||||
client: &'a Arc<InnerClient>,
|
||||
typecache: &'a mut CachedTypeInfo,
|
||||
oid: Oid,
|
||||
) -> Pin<Box<dyn Future<Output = Result<Type, Error>> + Send + 'a>> {
|
||||
Box::pin(get_type(client, typecache, oid))
|
||||
Box::pin(get_type(client, oid))
|
||||
}
|
||||
|
||||
async fn typeinfo_statement(
|
||||
client: &Arc<InnerClient>,
|
||||
typecache: &mut CachedTypeInfo,
|
||||
) -> Result<Statement, Error> {
|
||||
if let Some(stmt) = &typecache.typeinfo {
|
||||
return Ok(stmt.clone());
|
||||
async fn typeinfo_statement(client: &Arc<InnerClient>) -> Result<Statement, Error> {
|
||||
if let Some(stmt) = client.typeinfo() {
|
||||
return Ok(stmt);
|
||||
}
|
||||
|
||||
let typeinfo = "neon_proxy_typeinfo";
|
||||
let stmt = prepare_typecheck(client, typeinfo, TYPEINFO_QUERY, &[]).await?;
|
||||
let stmt = prepare_rec(client, typeinfo, TYPEINFO_QUERY, &[]).await?;
|
||||
|
||||
typecache.typeinfo = Some(stmt.clone());
|
||||
client.set_typeinfo(&stmt);
|
||||
Ok(stmt)
|
||||
}
|
||||
|
||||
async fn get_enum_variants(client: &Arc<InnerClient>, oid: Oid) -> Result<Vec<String>, Error> {
|
||||
let stmt = typeinfo_enum_statement(client).await?;
|
||||
|
||||
query::query(client, stmt, slice_iter(&[&oid]))
|
||||
.await?
|
||||
.and_then(|row| async move { row.try_get(0) })
|
||||
.try_collect()
|
||||
.await
|
||||
}
|
||||
|
||||
async fn typeinfo_enum_statement(client: &Arc<InnerClient>) -> Result<Statement, Error> {
|
||||
if let Some(stmt) = client.typeinfo_enum() {
|
||||
return Ok(stmt);
|
||||
}
|
||||
|
||||
let typeinfo = "neon_proxy_typeinfo_enum";
|
||||
let stmt = prepare_rec(client, typeinfo, TYPEINFO_ENUM_QUERY, &[]).await?;
|
||||
|
||||
client.set_typeinfo_enum(&stmt);
|
||||
Ok(stmt)
|
||||
}
|
||||
|
||||
async fn get_composite_fields(client: &Arc<InnerClient>, oid: Oid) -> Result<Vec<Field>, Error> {
|
||||
let stmt = typeinfo_composite_statement(client).await?;
|
||||
|
||||
let rows = query::query(client, stmt, slice_iter(&[&oid]))
|
||||
.await?
|
||||
.try_collect::<Vec<_>>()
|
||||
.await?;
|
||||
|
||||
let mut fields = vec![];
|
||||
for row in rows {
|
||||
let name = row.try_get(0)?;
|
||||
let oid = row.try_get(1)?;
|
||||
let type_ = get_type_rec(client, oid).await?;
|
||||
fields.push(Field::new(name, type_));
|
||||
}
|
||||
|
||||
Ok(fields)
|
||||
}
|
||||
|
||||
async fn typeinfo_composite_statement(client: &Arc<InnerClient>) -> Result<Statement, Error> {
|
||||
if let Some(stmt) = client.typeinfo_composite() {
|
||||
return Ok(stmt);
|
||||
}
|
||||
|
||||
let typeinfo = "neon_proxy_typeinfo_composite";
|
||||
let stmt = prepare_rec(client, typeinfo, TYPEINFO_COMPOSITE_QUERY, &[]).await?;
|
||||
|
||||
client.set_typeinfo_composite(&stmt);
|
||||
Ok(stmt)
|
||||
}
|
||||
|
||||
@@ -72,9 +72,4 @@ impl<'a> Transaction<'a> {
|
||||
pub fn client(&self) -> &Client {
|
||||
self.client
|
||||
}
|
||||
|
||||
/// Returns a reference to the underlying `Client`.
|
||||
pub fn client_mut(&mut self) -> &mut Client {
|
||||
self.client
|
||||
}
|
||||
}
|
||||
|
||||
@@ -131,14 +131,6 @@ impl Configuration {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(members: MemberSet) -> Self {
|
||||
Configuration {
|
||||
generation: INITIAL_GENERATION,
|
||||
members,
|
||||
new_members: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Is `sk_id` member of the configuration?
|
||||
pub fn contains(&self, sk_id: NodeId) -> bool {
|
||||
self.members.contains(sk_id) || self.new_members.as_ref().is_some_and(|m| m.contains(sk_id))
|
||||
|
||||
@@ -18,7 +18,7 @@ pub struct SafekeeperStatus {
|
||||
pub id: NodeId,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct TimelineCreateRequest {
|
||||
pub tenant_id: TenantId,
|
||||
pub timeline_id: TimelineId,
|
||||
@@ -221,11 +221,6 @@ pub struct TimelineMembershipSwitchResponse {
|
||||
pub current_conf: Configuration,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Serialize, Deserialize)]
|
||||
pub struct TimelineDeleteResult {
|
||||
pub dir_existed: bool,
|
||||
}
|
||||
|
||||
fn lsn_invalid() -> Lsn {
|
||||
Lsn::INVALID
|
||||
}
|
||||
@@ -288,7 +283,7 @@ pub struct SafekeeperUtilization {
|
||||
}
|
||||
|
||||
/// pull_timeline request body.
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct PullTimelineRequest {
|
||||
pub tenant_id: TenantId,
|
||||
pub timeline_id: TimelineId,
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
//! .with_writer(std::io::stderr);
|
||||
//!
|
||||
//! // Initialize OpenTelemetry. Exports tracing spans as OpenTelemetry traces
|
||||
//! let otlp_layer = tracing_utils::init_tracing("my_application", tracing_utils::ExportConfig::default()).await;
|
||||
//! let otlp_layer = tracing_utils::init_tracing("my_application").await;
|
||||
//!
|
||||
//! // Put it all together
|
||||
//! tracing_subscriber::registry()
|
||||
@@ -38,12 +38,8 @@ pub mod http;
|
||||
|
||||
use opentelemetry::KeyValue;
|
||||
use opentelemetry::trace::TracerProvider;
|
||||
use opentelemetry_otlp::WithExportConfig;
|
||||
pub use opentelemetry_otlp::{ExportConfig, Protocol};
|
||||
use tracing::level_filters::LevelFilter;
|
||||
use tracing::{Dispatch, Subscriber};
|
||||
use tracing::Subscriber;
|
||||
use tracing_subscriber::Layer;
|
||||
use tracing_subscriber::layer::SubscriberExt;
|
||||
use tracing_subscriber::registry::LookupSpan;
|
||||
|
||||
/// Set up OpenTelemetry exporter, using configuration from environment variables.
|
||||
@@ -73,28 +69,19 @@ use tracing_subscriber::registry::LookupSpan;
|
||||
///
|
||||
/// This doesn't block, but is marked as 'async' to hint that this must be called in
|
||||
/// asynchronous execution context.
|
||||
pub async fn init_tracing<S>(
|
||||
service_name: &str,
|
||||
export_config: ExportConfig,
|
||||
) -> Option<impl Layer<S>>
|
||||
pub async fn init_tracing<S>(service_name: &str) -> Option<impl Layer<S>>
|
||||
where
|
||||
S: Subscriber + for<'span> LookupSpan<'span>,
|
||||
{
|
||||
if std::env::var("OTEL_SDK_DISABLED") == Ok("true".to_string()) {
|
||||
return None;
|
||||
};
|
||||
Some(init_tracing_internal(
|
||||
service_name.to_string(),
|
||||
export_config,
|
||||
))
|
||||
Some(init_tracing_internal(service_name.to_string()))
|
||||
}
|
||||
|
||||
/// Like `init_tracing`, but creates a separate tokio Runtime for the tracing
|
||||
/// tasks.
|
||||
pub fn init_tracing_without_runtime<S>(
|
||||
service_name: &str,
|
||||
export_config: ExportConfig,
|
||||
) -> Option<impl Layer<S>>
|
||||
pub fn init_tracing_without_runtime<S>(service_name: &str) -> Option<impl Layer<S>>
|
||||
where
|
||||
S: Subscriber + for<'span> LookupSpan<'span>,
|
||||
{
|
||||
@@ -125,22 +112,16 @@ where
|
||||
));
|
||||
let _guard = runtime.enter();
|
||||
|
||||
Some(init_tracing_internal(
|
||||
service_name.to_string(),
|
||||
export_config,
|
||||
))
|
||||
Some(init_tracing_internal(service_name.to_string()))
|
||||
}
|
||||
|
||||
fn init_tracing_internal<S>(service_name: String, export_config: ExportConfig) -> impl Layer<S>
|
||||
fn init_tracing_internal<S>(service_name: String) -> impl Layer<S>
|
||||
where
|
||||
S: Subscriber + for<'span> LookupSpan<'span>,
|
||||
{
|
||||
// Sets up exporter from the provided [`ExportConfig`] parameter.
|
||||
// If the endpoint is not specified, it is loaded from the
|
||||
// OTEL_EXPORTER_OTLP_ENDPOINT environment variable.
|
||||
// Sets up exporter from the OTEL_EXPORTER_* environment variables.
|
||||
let exporter = opentelemetry_otlp::SpanExporter::builder()
|
||||
.with_http()
|
||||
.with_export_config(export_config)
|
||||
.build()
|
||||
.expect("could not initialize opentelemetry exporter");
|
||||
|
||||
@@ -170,51 +151,3 @@ where
|
||||
pub fn shutdown_tracing() {
|
||||
opentelemetry::global::shutdown_tracer_provider();
|
||||
}
|
||||
|
||||
pub enum OtelEnablement {
|
||||
Disabled,
|
||||
Enabled {
|
||||
service_name: String,
|
||||
export_config: ExportConfig,
|
||||
runtime: &'static tokio::runtime::Runtime,
|
||||
},
|
||||
}
|
||||
|
||||
pub struct OtelGuard {
|
||||
pub dispatch: Dispatch,
|
||||
}
|
||||
|
||||
impl Drop for OtelGuard {
|
||||
fn drop(&mut self) {
|
||||
shutdown_tracing();
|
||||
}
|
||||
}
|
||||
|
||||
/// Initializes OTEL infrastructure for performance tracing according to the provided configuration
|
||||
///
|
||||
/// Performance tracing is handled by a different [`tracing::Subscriber`]. This functions returns
|
||||
/// an [`OtelGuard`] containing a [`tracing::Dispatch`] associated with a newly created subscriber.
|
||||
/// Applications should use this dispatch for their performance traces.
|
||||
///
|
||||
/// The lifetime of the guard should match taht of the application. On drop, it tears down the
|
||||
/// OTEL infra.
|
||||
pub fn init_performance_tracing(otel_enablement: OtelEnablement) -> Option<OtelGuard> {
|
||||
let otel_subscriber = match otel_enablement {
|
||||
OtelEnablement::Disabled => None,
|
||||
OtelEnablement::Enabled {
|
||||
service_name,
|
||||
export_config,
|
||||
runtime,
|
||||
} => {
|
||||
let otel_layer = runtime
|
||||
.block_on(init_tracing(&service_name, export_config))
|
||||
.with_filter(LevelFilter::INFO);
|
||||
let otel_subscriber = tracing_subscriber::registry().with(otel_layer);
|
||||
let otel_dispatch = Dispatch::new(otel_subscriber);
|
||||
|
||||
Some(otel_dispatch)
|
||||
}
|
||||
};
|
||||
|
||||
otel_subscriber.map(|dispatch| OtelGuard { dispatch })
|
||||
}
|
||||
|
||||
@@ -42,7 +42,6 @@ toml_edit = { workspace = true, features = ["serde"] }
|
||||
tracing.workspace = true
|
||||
tracing-error.workspace = true
|
||||
tracing-subscriber = { workspace = true, features = ["json", "registry"] }
|
||||
tracing-utils.workspace = true
|
||||
rand.workspace = true
|
||||
scopeguard.workspace = true
|
||||
strum.workspace = true
|
||||
|
||||
@@ -49,13 +49,7 @@ pub fn bench_log_slow(c: &mut Criterion) {
|
||||
// performance too. Use a simple noop future that yields once, to avoid any scheduler fast
|
||||
// paths for a ready future.
|
||||
if enabled {
|
||||
b.iter(|| {
|
||||
runtime.block_on(log_slow(
|
||||
"ready",
|
||||
THRESHOLD,
|
||||
std::pin::pin!(tokio::task::yield_now()),
|
||||
))
|
||||
});
|
||||
b.iter(|| runtime.block_on(log_slow("ready", THRESHOLD, tokio::task::yield_now())));
|
||||
} else {
|
||||
b.iter(|| runtime.block_on(tokio::task::yield_now()));
|
||||
}
|
||||
|
||||
@@ -165,7 +165,6 @@ pub fn init(
|
||||
};
|
||||
log_layer.with_filter(rust_log_env_filter())
|
||||
});
|
||||
|
||||
let r = r.with(
|
||||
TracingEventCountLayer(&TRACING_EVENT_COUNT_METRIC).with_filter(rust_log_env_filter()),
|
||||
);
|
||||
@@ -331,90 +330,37 @@ impl std::fmt::Debug for SecretString {
|
||||
///
|
||||
/// TODO: consider upgrading this to a warning, but currently it fires too often.
|
||||
#[inline]
|
||||
pub async fn log_slow<F, O>(name: &str, threshold: Duration, f: std::pin::Pin<&mut F>) -> O
|
||||
where
|
||||
F: Future<Output = O>,
|
||||
{
|
||||
monitor_slow_future(
|
||||
threshold,
|
||||
threshold, // period = threshold
|
||||
f,
|
||||
|MonitorSlowFutureCallback {
|
||||
ready,
|
||||
is_slow,
|
||||
elapsed_total,
|
||||
elapsed_since_last_callback: _,
|
||||
}| {
|
||||
if !is_slow {
|
||||
return;
|
||||
}
|
||||
if ready {
|
||||
info!(
|
||||
"slow {name} completed after {:.3}s",
|
||||
elapsed_total.as_secs_f64()
|
||||
);
|
||||
} else {
|
||||
info!(
|
||||
"slow {name} still running after {:.3}s",
|
||||
elapsed_total.as_secs_f64()
|
||||
);
|
||||
}
|
||||
},
|
||||
)
|
||||
.await
|
||||
}
|
||||
pub async fn log_slow<O>(name: &str, threshold: Duration, f: impl Future<Output = O>) -> O {
|
||||
// TODO: we unfortunately have to pin the future on the heap, since GetPage futures are huge and
|
||||
// won't fit on the stack.
|
||||
let mut f = Box::pin(f);
|
||||
|
||||
/// Poll future `fut` to completion, invoking callback `cb` at the given `threshold` and every
|
||||
/// `period` afterwards, and also unconditionally when the future completes.
|
||||
#[inline]
|
||||
pub async fn monitor_slow_future<F, O>(
|
||||
threshold: Duration,
|
||||
period: Duration,
|
||||
mut fut: std::pin::Pin<&mut F>,
|
||||
mut cb: impl FnMut(MonitorSlowFutureCallback),
|
||||
) -> O
|
||||
where
|
||||
F: Future<Output = O>,
|
||||
{
|
||||
let started = Instant::now();
|
||||
let mut attempt = 1;
|
||||
let mut last_cb = started;
|
||||
|
||||
loop {
|
||||
// NB: use timeout_at() instead of timeout() to avoid an extra clock reading in the common
|
||||
// case where the timeout doesn't fire.
|
||||
let deadline = started + threshold + (attempt - 1) * period;
|
||||
// TODO: still call the callback if the future panics? Copy how we do it for the page_service flush_in_progress counter.
|
||||
let res = tokio::time::timeout_at(deadline, &mut fut).await;
|
||||
let now = Instant::now();
|
||||
let elapsed_total = now - started;
|
||||
cb(MonitorSlowFutureCallback {
|
||||
ready: res.is_ok(),
|
||||
is_slow: elapsed_total >= threshold,
|
||||
elapsed_total,
|
||||
elapsed_since_last_callback: now - last_cb,
|
||||
});
|
||||
last_cb = now;
|
||||
if let Ok(output) = res {
|
||||
let deadline = started + attempt * threshold;
|
||||
if let Ok(output) = tokio::time::timeout_at(deadline, &mut f).await {
|
||||
// NB: we check if we exceeded the threshold even if the timeout never fired, because
|
||||
// scheduling or execution delays may cause the future to succeed even if it exceeds the
|
||||
// timeout. This costs an extra unconditional clock reading, but seems worth it to avoid
|
||||
// false negatives.
|
||||
let elapsed = started.elapsed();
|
||||
if elapsed >= threshold {
|
||||
info!("slow {name} completed after {:.3}s", elapsed.as_secs_f64());
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
let elapsed = started.elapsed().as_secs_f64();
|
||||
info!("slow {name} still running after {elapsed:.3}s",);
|
||||
|
||||
attempt += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// See [`monitor_slow_future`].
|
||||
pub struct MonitorSlowFutureCallback {
|
||||
/// Whether the future completed. If true, there will be no more callbacks.
|
||||
pub ready: bool,
|
||||
/// Whether the future is taking `>=` the specififed threshold duration to complete.
|
||||
/// Monotonic: if true in one callback invocation, true in all subsequent onces.
|
||||
pub is_slow: bool,
|
||||
/// The time elapsed since the [`monitor_slow_future`] was first polled.
|
||||
pub elapsed_total: Duration,
|
||||
/// The time elapsed since the last callback invocation.
|
||||
/// For the initial callback invocation, the time elapsed since the [`monitor_slow_future`] was first polled.
|
||||
pub elapsed_since_last_callback: Duration,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use metrics::IntCounterVec;
|
||||
|
||||
@@ -48,7 +48,6 @@ pprof.workspace = true
|
||||
rand.workspace = true
|
||||
range-set-blaze = { version = "0.1.16", features = ["alloc"] }
|
||||
regex.workspace = true
|
||||
rustls.workspace = true
|
||||
scopeguard.workspace = true
|
||||
send-future.workspace = true
|
||||
serde.workspace = true
|
||||
@@ -63,12 +62,10 @@ tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util"
|
||||
tokio-epoll-uring.workspace = true
|
||||
tokio-io-timeout.workspace = true
|
||||
tokio-postgres.workspace = true
|
||||
tokio-rustls.workspace = true
|
||||
tokio-stream.workspace = true
|
||||
tokio-util.workspace = true
|
||||
toml_edit = { workspace = true, features = [ "serde" ] }
|
||||
tracing.workspace = true
|
||||
tracing-utils.workspace = true
|
||||
url.workspace = true
|
||||
walkdir.workspace = true
|
||||
metrics.workspace = true
|
||||
@@ -101,7 +98,6 @@ criterion.workspace = true
|
||||
hex-literal.workspace = true
|
||||
tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time", "test-util"] }
|
||||
indoc.workspace = true
|
||||
uuid.workspace = true
|
||||
|
||||
[[bench]]
|
||||
name = "bench_layer_map"
|
||||
@@ -119,10 +115,6 @@ harness = false
|
||||
name = "upload_queue"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "bench_metrics"
|
||||
harness = false
|
||||
|
||||
[[bin]]
|
||||
name = "test_helper_slow_client_reads"
|
||||
required-features = [ "testing" ]
|
||||
|
||||
@@ -1,366 +0,0 @@
|
||||
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
|
||||
//
|
||||
// Demonstrates that repeat label values lookup is a multicore scalability bottleneck
|
||||
// that is worth avoiding.
|
||||
//
|
||||
criterion_group!(
|
||||
label_values,
|
||||
label_values::bench_naive_usage,
|
||||
label_values::bench_cache_label_values_lookup
|
||||
);
|
||||
mod label_values {
|
||||
use super::*;
|
||||
|
||||
pub fn bench_naive_usage(c: &mut Criterion) {
|
||||
let mut g = c.benchmark_group("label_values__naive_usage");
|
||||
|
||||
for ntimelines in [1, 4, 8] {
|
||||
g.bench_with_input(
|
||||
BenchmarkId::new("ntimelines", ntimelines),
|
||||
&ntimelines,
|
||||
|b, ntimelines| {
|
||||
b.iter_custom(|iters| {
|
||||
let barrier = std::sync::Barrier::new(*ntimelines + 1);
|
||||
|
||||
let timelines = (0..*ntimelines)
|
||||
.map(|_| {
|
||||
(
|
||||
TenantId::generate().to_string(),
|
||||
"0000".to_string(),
|
||||
TimelineId::generate().to_string(),
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let metric_vec = metrics::UIntGaugeVec::new(
|
||||
metrics::opts!("testmetric", "testhelp"),
|
||||
&["tenant_id", "shard_id", "timeline_id"],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
std::thread::scope(|s| {
|
||||
for (tenant_id, shard_id, timeline_id) in &timelines {
|
||||
s.spawn(|| {
|
||||
barrier.wait();
|
||||
for _ in 0..iters {
|
||||
metric_vec
|
||||
.with_label_values(&[tenant_id, shard_id, timeline_id])
|
||||
.inc();
|
||||
}
|
||||
barrier.wait();
|
||||
});
|
||||
}
|
||||
barrier.wait();
|
||||
let start = std::time::Instant::now();
|
||||
barrier.wait();
|
||||
start.elapsed()
|
||||
})
|
||||
})
|
||||
},
|
||||
);
|
||||
}
|
||||
g.finish();
|
||||
}
|
||||
|
||||
pub fn bench_cache_label_values_lookup(c: &mut Criterion) {
|
||||
let mut g = c.benchmark_group("label_values__cache_label_values_lookup");
|
||||
|
||||
for ntimelines in [1, 4, 8] {
|
||||
g.bench_with_input(
|
||||
BenchmarkId::new("ntimelines", ntimelines),
|
||||
&ntimelines,
|
||||
|b, ntimelines| {
|
||||
b.iter_custom(|iters| {
|
||||
let barrier = std::sync::Barrier::new(*ntimelines + 1);
|
||||
|
||||
let timelines = (0..*ntimelines)
|
||||
.map(|_| {
|
||||
(
|
||||
TenantId::generate().to_string(),
|
||||
"0000".to_string(),
|
||||
TimelineId::generate().to_string(),
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let metric_vec = metrics::UIntGaugeVec::new(
|
||||
metrics::opts!("testmetric", "testhelp"),
|
||||
&["tenant_id", "shard_id", "timeline_id"],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
std::thread::scope(|s| {
|
||||
for (tenant_id, shard_id, timeline_id) in &timelines {
|
||||
s.spawn(|| {
|
||||
let metric = metric_vec.with_label_values(&[
|
||||
tenant_id,
|
||||
shard_id,
|
||||
timeline_id,
|
||||
]);
|
||||
barrier.wait();
|
||||
for _ in 0..iters {
|
||||
metric.inc();
|
||||
}
|
||||
barrier.wait();
|
||||
});
|
||||
}
|
||||
barrier.wait();
|
||||
let start = std::time::Instant::now();
|
||||
barrier.wait();
|
||||
start.elapsed()
|
||||
})
|
||||
})
|
||||
},
|
||||
);
|
||||
}
|
||||
g.finish();
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Demonstrates that even a single metric can be a scalability bottleneck
|
||||
// if multiple threads in it concurrently but there's nothing we can do
|
||||
// about it without changing the metrics framework to use e.g. sharded counte atomics.
|
||||
//
|
||||
criterion_group!(
|
||||
single_metric_multicore_scalability,
|
||||
single_metric_multicore_scalability::bench,
|
||||
);
|
||||
mod single_metric_multicore_scalability {
|
||||
use super::*;
|
||||
|
||||
pub fn bench(c: &mut Criterion) {
|
||||
let mut g = c.benchmark_group("single_metric_multicore_scalability");
|
||||
|
||||
for nthreads in [1, 4, 8] {
|
||||
g.bench_with_input(
|
||||
BenchmarkId::new("nthreads", nthreads),
|
||||
&nthreads,
|
||||
|b, nthreads| {
|
||||
b.iter_custom(|iters| {
|
||||
let barrier = std::sync::Barrier::new(*nthreads + 1);
|
||||
|
||||
let metric = metrics::UIntGauge::new("testmetric", "testhelp").unwrap();
|
||||
|
||||
std::thread::scope(|s| {
|
||||
for _ in 0..*nthreads {
|
||||
s.spawn(|| {
|
||||
barrier.wait();
|
||||
for _ in 0..iters {
|
||||
metric.inc();
|
||||
}
|
||||
barrier.wait();
|
||||
});
|
||||
}
|
||||
barrier.wait();
|
||||
let start = std::time::Instant::now();
|
||||
barrier.wait();
|
||||
start.elapsed()
|
||||
})
|
||||
})
|
||||
},
|
||||
);
|
||||
}
|
||||
g.finish();
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Demonstrates that even if we cache label value, the propagation of such a cached metric value
|
||||
// by Clone'ing it is a scalability bottleneck.
|
||||
// The reason is that it's an Arc internally and thus there's contention on the reference count atomics.
|
||||
//
|
||||
// We can avoid that by having long-lived references per thread (= indirection).
|
||||
//
|
||||
criterion_group!(
|
||||
propagation_of_cached_label_value,
|
||||
propagation_of_cached_label_value::bench_naive,
|
||||
propagation_of_cached_label_value::bench_long_lived_reference_per_thread,
|
||||
);
|
||||
mod propagation_of_cached_label_value {
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::*;
|
||||
|
||||
pub fn bench_naive(c: &mut Criterion) {
|
||||
let mut g = c.benchmark_group("propagation_of_cached_label_value__naive");
|
||||
|
||||
for nthreads in [1, 4, 8] {
|
||||
g.bench_with_input(
|
||||
BenchmarkId::new("nthreads", nthreads),
|
||||
&nthreads,
|
||||
|b, nthreads| {
|
||||
b.iter_custom(|iters| {
|
||||
let barrier = std::sync::Barrier::new(*nthreads + 1);
|
||||
|
||||
let metric = metrics::UIntGauge::new("testmetric", "testhelp").unwrap();
|
||||
|
||||
std::thread::scope(|s| {
|
||||
for _ in 0..*nthreads {
|
||||
s.spawn(|| {
|
||||
barrier.wait();
|
||||
for _ in 0..iters {
|
||||
// propagating the metric means we'd clone it into the child RequestContext
|
||||
let propagated = metric.clone();
|
||||
// simulate some work
|
||||
criterion::black_box(propagated);
|
||||
}
|
||||
barrier.wait();
|
||||
});
|
||||
}
|
||||
barrier.wait();
|
||||
let start = std::time::Instant::now();
|
||||
barrier.wait();
|
||||
start.elapsed()
|
||||
})
|
||||
})
|
||||
},
|
||||
);
|
||||
}
|
||||
g.finish();
|
||||
}
|
||||
|
||||
pub fn bench_long_lived_reference_per_thread(c: &mut Criterion) {
|
||||
let mut g =
|
||||
c.benchmark_group("propagation_of_cached_label_value__long_lived_reference_per_thread");
|
||||
|
||||
for nthreads in [1, 4, 8] {
|
||||
g.bench_with_input(
|
||||
BenchmarkId::new("nthreads", nthreads),
|
||||
&nthreads,
|
||||
|b, nthreads| {
|
||||
b.iter_custom(|iters| {
|
||||
let barrier = std::sync::Barrier::new(*nthreads + 1);
|
||||
|
||||
let metric = metrics::UIntGauge::new("testmetric", "testhelp").unwrap();
|
||||
|
||||
std::thread::scope(|s| {
|
||||
for _ in 0..*nthreads {
|
||||
s.spawn(|| {
|
||||
// This is the technique.
|
||||
let this_threads_metric_reference = Arc::new(metric.clone());
|
||||
|
||||
barrier.wait();
|
||||
for _ in 0..iters {
|
||||
// propagating the metric means we'd clone it into the child RequestContext
|
||||
let propagated = Arc::clone(&this_threads_metric_reference);
|
||||
// simulate some work (include the pointer chase!)
|
||||
criterion::black_box(&*propagated);
|
||||
}
|
||||
barrier.wait();
|
||||
});
|
||||
}
|
||||
barrier.wait();
|
||||
let start = std::time::Instant::now();
|
||||
barrier.wait();
|
||||
start.elapsed()
|
||||
})
|
||||
})
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
criterion_main!(
|
||||
label_values,
|
||||
single_metric_multicore_scalability,
|
||||
propagation_of_cached_label_value
|
||||
);
|
||||
|
||||
/*
|
||||
RUST_BACKTRACE=full cargo bench --bench bench_metrics -- --discard-baseline --noplot
|
||||
|
||||
Results on an im4gn.2xlarge instance
|
||||
|
||||
label_values__naive_usage/ntimelines/1 time: [178.71 ns 178.74 ns 178.76 ns]
|
||||
label_values__naive_usage/ntimelines/4 time: [532.94 ns 539.59 ns 546.31 ns]
|
||||
label_values__naive_usage/ntimelines/8 time: [1.1082 µs 1.1109 µs 1.1135 µs]
|
||||
label_values__cache_label_values_lookup/ntimelines/1 time: [6.4116 ns 6.4119 ns 6.4123 ns]
|
||||
label_values__cache_label_values_lookup/ntimelines/4 time: [6.3482 ns 6.3819 ns 6.4079 ns]
|
||||
label_values__cache_label_values_lookup/ntimelines/8 time: [6.4213 ns 6.5279 ns 6.6293 ns]
|
||||
single_metric_multicore_scalability/nthreads/1 time: [6.0102 ns 6.0104 ns 6.0106 ns]
|
||||
single_metric_multicore_scalability/nthreads/4 time: [38.127 ns 38.275 ns 38.416 ns]
|
||||
single_metric_multicore_scalability/nthreads/8 time: [73.698 ns 74.882 ns 75.864 ns]
|
||||
propagation_of_cached_label_value__naive/nthreads/1 time: [14.424 ns 14.425 ns 14.426 ns]
|
||||
propagation_of_cached_label_value__naive/nthreads/4 time: [100.71 ns 102.53 ns 104.35 ns]
|
||||
propagation_of_cached_label_value__naive/nthreads/8 time: [211.50 ns 214.44 ns 216.87 ns]
|
||||
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/1 time: [14.135 ns 14.147 ns 14.160 ns]
|
||||
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/4 time: [14.243 ns 14.255 ns 14.268 ns]
|
||||
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/8 time: [14.470 ns 14.682 ns 14.895 ns]
|
||||
|
||||
Results on an i3en.3xlarge instance
|
||||
|
||||
label_values__naive_usage/ntimelines/1 time: [117.32 ns 117.53 ns 117.74 ns]
|
||||
label_values__naive_usage/ntimelines/4 time: [736.58 ns 741.12 ns 745.61 ns]
|
||||
label_values__naive_usage/ntimelines/8 time: [1.4513 µs 1.4596 µs 1.4665 µs]
|
||||
label_values__cache_label_values_lookup/ntimelines/1 time: [8.0964 ns 8.0979 ns 8.0995 ns]
|
||||
label_values__cache_label_values_lookup/ntimelines/4 time: [8.1620 ns 8.2912 ns 8.4491 ns]
|
||||
label_values__cache_label_values_lookup/ntimelines/8 time: [14.148 ns 14.237 ns 14.324 ns]
|
||||
single_metric_multicore_scalability/nthreads/1 time: [8.0993 ns 8.1013 ns 8.1046 ns]
|
||||
single_metric_multicore_scalability/nthreads/4 time: [80.039 ns 80.672 ns 81.297 ns]
|
||||
single_metric_multicore_scalability/nthreads/8 time: [153.58 ns 154.23 ns 154.90 ns]
|
||||
propagation_of_cached_label_value__naive/nthreads/1 time: [13.924 ns 13.926 ns 13.928 ns]
|
||||
propagation_of_cached_label_value__naive/nthreads/4 time: [143.66 ns 145.27 ns 146.59 ns]
|
||||
propagation_of_cached_label_value__naive/nthreads/8 time: [296.51 ns 297.90 ns 299.30 ns]
|
||||
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/1 time: [14.013 ns 14.149 ns 14.308 ns]
|
||||
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/4 time: [14.311 ns 14.625 ns 14.984 ns]
|
||||
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/8 time: [25.981 ns 26.227 ns 26.476 ns]
|
||||
|
||||
Results on an Standard L16s v3 (16 vcpus, 128 GiB memory) Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
|
||||
|
||||
label_values__naive_usage/ntimelines/1 time: [101.63 ns 101.84 ns 102.06 ns]
|
||||
label_values__naive_usage/ntimelines/4 time: [417.55 ns 424.73 ns 432.63 ns]
|
||||
label_values__naive_usage/ntimelines/8 time: [874.91 ns 889.51 ns 904.25 ns]
|
||||
label_values__cache_label_values_lookup/ntimelines/1 time: [5.7724 ns 5.7760 ns 5.7804 ns]
|
||||
label_values__cache_label_values_lookup/ntimelines/4 time: [7.8878 ns 7.9401 ns 8.0034 ns]
|
||||
label_values__cache_label_values_lookup/ntimelines/8 time: [7.2621 ns 7.6354 ns 8.0337 ns]
|
||||
single_metric_multicore_scalability/nthreads/1 time: [5.7710 ns 5.7744 ns 5.7785 ns]
|
||||
single_metric_multicore_scalability/nthreads/4 time: [66.629 ns 66.994 ns 67.336 ns]
|
||||
single_metric_multicore_scalability/nthreads/8 time: [130.85 ns 131.98 ns 132.91 ns]
|
||||
propagation_of_cached_label_value__naive/nthreads/1 time: [11.540 ns 11.546 ns 11.553 ns]
|
||||
propagation_of_cached_label_value__naive/nthreads/4 time: [131.22 ns 131.90 ns 132.56 ns]
|
||||
propagation_of_cached_label_value__naive/nthreads/8 time: [260.99 ns 262.75 ns 264.26 ns]
|
||||
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/1 time: [11.544 ns 11.550 ns 11.557 ns]
|
||||
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/4 time: [11.568 ns 11.642 ns 11.763 ns]
|
||||
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/8 time: [13.416 ns 14.121 ns 14.886 ns
|
||||
|
||||
Results on an M4 MAX MacBook Pro Total Number of Cores: 14 (10 performance and 4 efficiency)
|
||||
|
||||
label_values__naive_usage/ntimelines/1 time: [52.711 ns 53.026 ns 53.381 ns]
|
||||
label_values__naive_usage/ntimelines/4 time: [323.99 ns 330.40 ns 337.53 ns]
|
||||
label_values__naive_usage/ntimelines/8 time: [1.1615 µs 1.1998 µs 1.2399 µs]
|
||||
label_values__cache_label_values_lookup/ntimelines/1 time: [1.6635 ns 1.6715 ns 1.6809 ns]
|
||||
label_values__cache_label_values_lookup/ntimelines/4 time: [1.7786 ns 1.7876 ns 1.8028 ns]
|
||||
label_values__cache_label_values_lookup/ntimelines/8 time: [1.8195 ns 1.8371 ns 1.8665 ns]
|
||||
single_metric_multicore_scalability/nthreads/1 time: [1.7764 ns 1.7909 ns 1.8079 ns]
|
||||
single_metric_multicore_scalability/nthreads/4 time: [33.875 ns 34.868 ns 35.923 ns]
|
||||
single_metric_multicore_scalability/nthreads/8 time: [226.85 ns 235.30 ns 244.18 ns]
|
||||
propagation_of_cached_label_value__naive/nthreads/1 time: [3.4337 ns 3.4491 ns 3.4660 ns]
|
||||
propagation_of_cached_label_value__naive/nthreads/4 time: [69.486 ns 71.937 ns 74.472 ns]
|
||||
propagation_of_cached_label_value__naive/nthreads/8 time: [434.87 ns 456.47 ns 477.84 ns]
|
||||
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/1 time: [3.3767 ns 3.3974 ns 3.4220 ns]
|
||||
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/4 time: [3.6105 ns 4.2355 ns 5.1463 ns]
|
||||
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/8 time: [4.0889 ns 4.9714 ns 6.0779 ns]
|
||||
|
||||
Results on a Hetzner AX102 AMD Ryzen 9 7950X3D 16-Core Processor
|
||||
|
||||
label_values__naive_usage/ntimelines/1 time: [64.510 ns 64.559 ns 64.610 ns]
|
||||
label_values__naive_usage/ntimelines/4 time: [309.71 ns 326.09 ns 342.32 ns]
|
||||
label_values__naive_usage/ntimelines/8 time: [776.92 ns 819.35 ns 856.93 ns]
|
||||
label_values__cache_label_values_lookup/ntimelines/1 time: [1.2855 ns 1.2943 ns 1.3021 ns]
|
||||
label_values__cache_label_values_lookup/ntimelines/4 time: [1.3865 ns 1.4139 ns 1.4441 ns]
|
||||
label_values__cache_label_values_lookup/ntimelines/8 time: [1.5311 ns 1.5669 ns 1.6046 ns]
|
||||
single_metric_multicore_scalability/nthreads/1 time: [1.1927 ns 1.1981 ns 1.2049 ns]
|
||||
single_metric_multicore_scalability/nthreads/4 time: [24.346 ns 25.439 ns 26.634 ns]
|
||||
single_metric_multicore_scalability/nthreads/8 time: [58.666 ns 60.137 ns 61.486 ns]
|
||||
propagation_of_cached_label_value__naive/nthreads/1 time: [2.7067 ns 2.7238 ns 2.7402 ns]
|
||||
propagation_of_cached_label_value__naive/nthreads/4 time: [62.723 ns 66.214 ns 69.787 ns]
|
||||
propagation_of_cached_label_value__naive/nthreads/8 time: [164.24 ns 170.10 ns 175.68 ns]
|
||||
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/1 time: [2.2915 ns 2.2960 ns 2.3012 ns]
|
||||
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/4 time: [2.5726 ns 2.6158 ns 2.6624 ns]
|
||||
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/8 time: [2.7068 ns 2.8243 ns 2.9824 ns]
|
||||
|
||||
*/
|
||||
@@ -7,7 +7,7 @@ use http_utils::error::HttpErrorBody;
|
||||
use pageserver_api::models::*;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
pub use reqwest::Body as ReqwestBody;
|
||||
use reqwest::{Certificate, IntoUrl, Method, StatusCode, Url};
|
||||
use reqwest::{IntoUrl, Method, StatusCode};
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
@@ -38,9 +38,6 @@ pub enum Error {
|
||||
|
||||
#[error("Cancelled")]
|
||||
Cancelled,
|
||||
|
||||
#[error("create client: {0}{}", .0.source().map(|e| format!(": {e}")).unwrap_or_default())]
|
||||
CreateClient(reqwest::Error),
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -72,17 +69,8 @@ pub enum ForceAwaitLogicalSize {
|
||||
}
|
||||
|
||||
impl Client {
|
||||
pub fn new(
|
||||
mgmt_api_endpoint: String,
|
||||
jwt: Option<&str>,
|
||||
ssl_ca_cert: Option<Certificate>,
|
||||
) -> Result<Self> {
|
||||
let mut http_client = reqwest::Client::builder();
|
||||
if let Some(ssl_ca_cert) = ssl_ca_cert {
|
||||
http_client = http_client.add_root_certificate(ssl_ca_cert);
|
||||
}
|
||||
let http_client = http_client.build().map_err(Error::CreateClient)?;
|
||||
Ok(Self::from_client(http_client, mgmt_api_endpoint, jwt))
|
||||
pub fn new(mgmt_api_endpoint: String, jwt: Option<&str>) -> Self {
|
||||
Self::from_client(reqwest::Client::new(), mgmt_api_endpoint, jwt)
|
||||
}
|
||||
|
||||
pub fn from_client(
|
||||
@@ -113,10 +101,12 @@ impl Client {
|
||||
debug_assert!(path.starts_with('/'));
|
||||
let uri = format!("{}{}", self.mgmt_api_endpoint, path);
|
||||
|
||||
let mut req = self.client.request(Method::GET, uri);
|
||||
if let Some(value) = &self.authorization_header {
|
||||
req = req.header(reqwest::header::AUTHORIZATION, value);
|
||||
}
|
||||
let req = self.client.request(Method::GET, uri);
|
||||
let req = if let Some(value) = &self.authorization_header {
|
||||
req.header(reqwest::header::AUTHORIZATION, value)
|
||||
} else {
|
||||
req
|
||||
};
|
||||
req.send().await.map_err(Error::ReceiveBody)
|
||||
}
|
||||
|
||||
@@ -458,21 +448,13 @@ impl Client {
|
||||
&self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
behavior: Option<DetachBehavior>,
|
||||
) -> Result<AncestorDetached> {
|
||||
let uri = format!(
|
||||
"{}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/detach_ancestor",
|
||||
self.mgmt_api_endpoint
|
||||
);
|
||||
let mut uri = Url::parse(&uri)
|
||||
.map_err(|e| Error::ApiError(StatusCode::INTERNAL_SERVER_ERROR, format!("{e}")))?;
|
||||
|
||||
if let Some(behavior) = behavior {
|
||||
uri.query_pairs_mut()
|
||||
.append_pair("detach_behavior", &behavior.to_string());
|
||||
}
|
||||
|
||||
self.request(Method::PUT, uri, ())
|
||||
self.request(Method::PUT, &uri, ())
|
||||
.await?
|
||||
.json()
|
||||
.await
|
||||
|
||||
@@ -12,7 +12,7 @@ pub(crate) fn setup_logging() {
|
||||
logging::TracingErrorLayerEnablement::EnableWithRustLogFilter,
|
||||
logging::Output::Stdout,
|
||||
)
|
||||
.expect("Failed to init test logging");
|
||||
.expect("Failed to init test logging")
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -15,7 +15,6 @@ hdrhistogram.workspace = true
|
||||
humantime.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
rand.workspace = true
|
||||
reqwest.workspace=true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
tracing.workspace = true
|
||||
|
||||
@@ -36,8 +36,7 @@ async fn main_impl(args: Args) -> anyhow::Result<()> {
|
||||
let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(
|
||||
args.mgmt_api_endpoint.clone(),
|
||||
args.pageserver_jwt.as_deref(),
|
||||
None, // TODO: support ssl_ca_file for https APIs in pagebench.
|
||||
)?);
|
||||
));
|
||||
|
||||
// discover targets
|
||||
let timelines: Vec<TenantTimelineId> = crate::util::cli::targets::discover(
|
||||
|
||||
@@ -77,8 +77,7 @@ async fn main_impl(
|
||||
let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(
|
||||
args.mgmt_api_endpoint.clone(),
|
||||
args.pageserver_jwt.as_deref(),
|
||||
None, // TODO: support ssl_ca_file for https APIs in pagebench.
|
||||
)?);
|
||||
));
|
||||
|
||||
// discover targets
|
||||
let timelines: Vec<TenantTimelineId> = crate::util::cli::targets::discover(
|
||||
|
||||
@@ -125,8 +125,7 @@ async fn main_impl(
|
||||
let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(
|
||||
args.mgmt_api_endpoint.clone(),
|
||||
args.pageserver_jwt.as_deref(),
|
||||
None, // TODO: support ssl_ca_file for https APIs in pagebench.
|
||||
)?);
|
||||
));
|
||||
|
||||
if let Some(engine_str) = &args.set_io_engine {
|
||||
mgmt_api_client.put_io_engine(engine_str).await?;
|
||||
|
||||
@@ -83,8 +83,7 @@ async fn main_impl(args: Args) -> anyhow::Result<()> {
|
||||
let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(
|
||||
args.mgmt_api_endpoint.clone(),
|
||||
args.pageserver_jwt.as_deref(),
|
||||
None, // TODO: support ssl_ca_file for https APIs in pagebench.
|
||||
)?);
|
||||
));
|
||||
|
||||
if let Some(engine_str) = &args.set_io_engine {
|
||||
mgmt_api_client.put_io_engine(engine_str).await?;
|
||||
|
||||
@@ -40,8 +40,7 @@ async fn main_impl(args: Args) -> anyhow::Result<()> {
|
||||
let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(
|
||||
args.mgmt_api_endpoint.clone(),
|
||||
args.pageserver_jwt.as_deref(),
|
||||
None, // TODO: support ssl_ca_file for https APIs in pagebench.
|
||||
)?);
|
||||
));
|
||||
|
||||
// discover targets
|
||||
let timelines: Vec<TenantTimelineId> = crate::util::cli::targets::discover(
|
||||
|
||||
@@ -331,15 +331,7 @@ where
|
||||
.append(&header, data)
|
||||
.await
|
||||
.map_err(|e| BasebackupError::Client(e, "send_tarball,pg_hba.conf"))?;
|
||||
} else if *filepath == "postgresql.auto.conf" {
|
||||
let ancestor_lsn = format!("neon.ancestor_lsn='{}'", self.timeline.get_ancestor_lsn());
|
||||
let data = ancestor_lsn.as_bytes();
|
||||
let header = new_tar_header(filepath, data.len() as u64)?;
|
||||
self.ar
|
||||
.append(&header, data)
|
||||
.await
|
||||
.map_err(|e| BasebackupError::Client(e, "send_tarball,pg_hba.conf"))?;
|
||||
} else {
|
||||
} else {
|
||||
let header = new_tar_header(filepath, 0)?;
|
||||
self.ar
|
||||
.append(&header, io::empty())
|
||||
|
||||
@@ -25,8 +25,8 @@ use pageserver::task_mgr::{
|
||||
};
|
||||
use pageserver::tenant::{TenantSharedResources, mgr, secondary};
|
||||
use pageserver::{
|
||||
CancellableTask, ConsumptionMetricsTasks, HttpEndpointListener, HttpsEndpointListener, http,
|
||||
page_cache, page_service, task_mgr, virtual_file,
|
||||
CancellableTask, ConsumptionMetricsTasks, HttpEndpointListener, http, page_cache, page_service,
|
||||
task_mgr, virtual_file,
|
||||
};
|
||||
use postgres_backend::AuthType;
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
@@ -110,7 +110,6 @@ fn main() -> anyhow::Result<()> {
|
||||
} else {
|
||||
TracingErrorLayerEnablement::Disabled
|
||||
};
|
||||
|
||||
logging::init(
|
||||
conf.log_format,
|
||||
tracing_error_layer_enablement,
|
||||
@@ -344,15 +343,8 @@ fn start_pageserver(
|
||||
info!("Starting pageserver http handler on {http_addr}");
|
||||
let http_listener = tcp_listener::bind(http_addr)?;
|
||||
|
||||
let https_listener = match conf.listen_https_addr.as_ref() {
|
||||
Some(https_addr) => {
|
||||
info!("Starting pageserver https handler on {https_addr}");
|
||||
Some(tcp_listener::bind(https_addr)?)
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
let pg_addr = &conf.listen_pg_addr;
|
||||
|
||||
info!("Starting pageserver pg protocol handler on {pg_addr}");
|
||||
let pageserver_listener = tcp_listener::bind(pg_addr)?;
|
||||
|
||||
@@ -583,8 +575,9 @@ fn start_pageserver(
|
||||
|
||||
// Start up the service to handle HTTP mgmt API request. We created the
|
||||
// listener earlier already.
|
||||
let (http_endpoint_listener, https_endpoint_listener) = {
|
||||
let http_endpoint_listener = {
|
||||
let _rt_guard = MGMT_REQUEST_RUNTIME.enter(); // for hyper
|
||||
let cancel = CancellationToken::new();
|
||||
|
||||
let router_state = Arc::new(
|
||||
http::routes::State::new(
|
||||
@@ -599,51 +592,22 @@ fn start_pageserver(
|
||||
)
|
||||
.context("Failed to initialize router state")?,
|
||||
);
|
||||
|
||||
let router = http::make_router(router_state, launch_ts, http_auth.clone())?
|
||||
.build()
|
||||
.map_err(|err| anyhow!(err))?;
|
||||
let service = http_utils::RouterService::new(router).unwrap();
|
||||
let server = hyper0::Server::from_tcp(http_listener)?
|
||||
.serve(service)
|
||||
.with_graceful_shutdown({
|
||||
let cancel = cancel.clone();
|
||||
async move { cancel.clone().cancelled().await }
|
||||
});
|
||||
|
||||
let service =
|
||||
Arc::new(http_utils::RequestServiceBuilder::new(router).map_err(|err| anyhow!(err))?);
|
||||
|
||||
let http_task = {
|
||||
let server =
|
||||
http_utils::server::Server::new(Arc::clone(&service), http_listener, None)?;
|
||||
let cancel = CancellationToken::new();
|
||||
|
||||
let task = MGMT_REQUEST_RUNTIME.spawn(task_mgr::exit_on_panic_or_error(
|
||||
"http endpoint listener",
|
||||
server.serve(cancel.clone()),
|
||||
));
|
||||
HttpEndpointListener(CancellableTask { task, cancel })
|
||||
};
|
||||
|
||||
let https_task = match https_listener {
|
||||
Some(https_listener) => {
|
||||
let certs = http_utils::tls_certs::load_cert_chain(&conf.ssl_cert_file)?;
|
||||
let key = http_utils::tls_certs::load_private_key(&conf.ssl_key_file)?;
|
||||
|
||||
let server_config = rustls::ServerConfig::builder()
|
||||
.with_no_client_auth()
|
||||
.with_single_cert(certs, key)?;
|
||||
|
||||
let tls_acceptor = tokio_rustls::TlsAcceptor::from(Arc::new(server_config));
|
||||
|
||||
let server =
|
||||
http_utils::server::Server::new(service, https_listener, Some(tls_acceptor))?;
|
||||
let cancel = CancellationToken::new();
|
||||
|
||||
let task = MGMT_REQUEST_RUNTIME.spawn(task_mgr::exit_on_panic_or_error(
|
||||
"https endpoint listener",
|
||||
server.serve(cancel.clone()),
|
||||
));
|
||||
Some(HttpsEndpointListener(CancellableTask { task, cancel }))
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
(http_task, https_task)
|
||||
let task = MGMT_REQUEST_RUNTIME.spawn(task_mgr::exit_on_panic_or_error(
|
||||
"http endpoint listener",
|
||||
server,
|
||||
));
|
||||
HttpEndpointListener(CancellableTask { task, cancel })
|
||||
};
|
||||
|
||||
let consumption_metrics_tasks = {
|
||||
@@ -719,7 +683,6 @@ fn start_pageserver(
|
||||
shutdown_pageserver.cancel();
|
||||
pageserver::shutdown_pageserver(
|
||||
http_endpoint_listener,
|
||||
https_endpoint_listener,
|
||||
page_service,
|
||||
consumption_metrics_tasks,
|
||||
disk_usage_eviction_task,
|
||||
|
||||
@@ -53,11 +53,6 @@ pub struct PageServerConf {
|
||||
pub listen_pg_addr: String,
|
||||
/// Example (default): 127.0.0.1:9898
|
||||
pub listen_http_addr: String,
|
||||
/// Example: 127.0.0.1:9899
|
||||
pub listen_https_addr: Option<String>,
|
||||
|
||||
pub ssl_key_file: Utf8PathBuf,
|
||||
pub ssl_cert_file: Utf8PathBuf,
|
||||
|
||||
/// Current availability zone. Used for traffic metrics.
|
||||
pub availability_zone: Option<String>,
|
||||
@@ -322,9 +317,6 @@ impl PageServerConf {
|
||||
let pageserver_api::config::ConfigToml {
|
||||
listen_pg_addr,
|
||||
listen_http_addr,
|
||||
listen_https_addr,
|
||||
ssl_key_file,
|
||||
ssl_cert_file,
|
||||
availability_zone,
|
||||
wait_lsn_timeout,
|
||||
wal_redo_timeout,
|
||||
@@ -383,9 +375,6 @@ impl PageServerConf {
|
||||
// ------------------------------------------------------------
|
||||
listen_pg_addr,
|
||||
listen_http_addr,
|
||||
listen_https_addr,
|
||||
ssl_key_file,
|
||||
ssl_cert_file,
|
||||
availability_zone,
|
||||
wait_lsn_timeout,
|
||||
wal_redo_timeout,
|
||||
@@ -467,8 +456,8 @@ impl PageServerConf {
|
||||
no_sync: no_sync.unwrap_or(false),
|
||||
enable_read_path_debugging: enable_read_path_debugging.unwrap_or(false),
|
||||
validate_wal_contiguity: validate_wal_contiguity.unwrap_or(false),
|
||||
load_previous_heatmap: load_previous_heatmap.unwrap_or(true),
|
||||
generate_unarchival_heatmap: generate_unarchival_heatmap.unwrap_or(true),
|
||||
load_previous_heatmap: load_previous_heatmap.unwrap_or(false),
|
||||
generate_unarchival_heatmap: generate_unarchival_heatmap.unwrap_or(false),
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------
|
||||
@@ -502,9 +491,7 @@ impl PageServerConf {
|
||||
#[cfg(test)]
|
||||
pub fn test_repo_dir(test_name: &str) -> Utf8PathBuf {
|
||||
let test_output_dir = std::env::var("TEST_OUTPUT").unwrap_or("../tmp_check".into());
|
||||
|
||||
let test_id = uuid::Uuid::new_v4();
|
||||
Utf8PathBuf::from(format!("{test_output_dir}/test_{test_name}_{test_id}"))
|
||||
Utf8PathBuf::from(format!("{test_output_dir}/test_{test_name}"))
|
||||
}
|
||||
|
||||
pub fn dummy_conf(repo_dir: Utf8PathBuf) -> Self {
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user