Compare commits

..

4 Commits

Author SHA1 Message Date
Alex Chi Z
1060520830 assume primary is the default
Signed-off-by: Alex Chi Z <chi@neon.tech>
2025-03-17 16:36:01 -04:00
Alex Chi Z
4077eeecaa resolve comments
Signed-off-by: Alex Chi Z <chi@neon.tech>
2025-03-13 16:52:20 -04:00
Alex Chi Z
b86c4fabb4 embed everything into application_name, finish storage-side code
Signed-off-by: Alex Chi Z <chi@neon.tech>
2025-03-11 17:28:47 -04:00
Alex Chi Z
4d99b0df58 feat(compute_ctl): pass compute type to pageserver
Signed-off-by: Alex Chi Z <chi@neon.tech>
2025-03-11 16:29:24 -04:00
146 changed files with 1324 additions and 3318 deletions

View File

@@ -0,0 +1,21 @@
## Release 202Y-MM-DD
**NB: this PR must be merged only by 'Create a merge commit'!**
### Checklist when preparing for release
- [ ] Read or refresh [the release flow guide](https://www.notion.so/neondatabase/Release-general-flow-61f2e39fd45d4d14a70c7749604bd70b)
- [ ] Ask in the [cloud Slack channel](https://neondb.slack.com/archives/C033A2WE6BZ) that you are going to rollout the release. Any blockers?
- [ ] Does this release contain any db migrations? Destructive ones? What is the rollback plan?
<!-- List everything that should be done **before** release, any issues / setting changes / etc -->
### Checklist after release
- [ ] Make sure instructions from PRs included in this release and labeled `manual_release_instructions` are executed (either by you or by people who wrote them).
- [ ] Based on the merged commits write release notes and open a PR into `website` repo ([example](https://github.com/neondatabase/website/pull/219/files))
- [ ] Check [#dev-production-stream](https://neondb.slack.com/archives/C03F5SM1N02) Slack channel
- [ ] Check [stuck projects page](https://console.neon.tech/admin/projects?sort=last_active&order=desc&stuck=true)
- [ ] Check [recent operation failures](https://console.neon.tech/admin/operations?action=create_timeline%2Cstart_compute%2Cstop_compute%2Csuspend_compute%2Capply_config%2Cdelete_timeline%2Cdelete_tenant%2Ccreate_branch%2Ccheck_availability&sort=updated_at&order=desc&had_retries=some)
- [ ] Check [cloud SLO dashboard](https://neonprod.grafana.net/d/_oWcBMJ7k/cloud-slos?orgId=1)
- [ ] Check [compute startup metrics dashboard](https://neonprod.grafana.net/d/5OkYJEmVz/compute-startup-time)
<!-- List everything that should be done **after** release, any admin UI configuration / Grafana dashboard / alert changes / setting changes / etc -->

View File

@@ -1,16 +1,14 @@
import itertools
import json
import os
import sys
source_tag = os.getenv("SOURCE_TAG")
target_tag = os.getenv("TARGET_TAG")
branch = os.getenv("BRANCH")
dev_acr = os.getenv("DEV_ACR")
prod_acr = os.getenv("PROD_ACR")
dev_aws = os.getenv("DEV_AWS")
prod_aws = os.getenv("PROD_AWS")
aws_region = os.getenv("AWS_REGION")
build_tag = os.environ["BUILD_TAG"]
branch = os.environ["BRANCH"]
dev_acr = os.environ["DEV_ACR"]
prod_acr = os.environ["PROD_ACR"]
dev_aws = os.environ["DEV_AWS"]
prod_aws = os.environ["PROD_AWS"]
aws_region = os.environ["AWS_REGION"]
components = {
"neon": ["neon"],
@@ -41,23 +39,24 @@ registries = {
outputs: dict[str, dict[str, list[str]]] = {}
target_tags = [target_tag, "latest"] if branch == "main" else [target_tag]
target_stages = (
["dev", "prod"] if branch in ["release", "release-proxy", "release-compute"] else ["dev"]
)
target_tags = [build_tag, "latest"] if branch == "main" else [build_tag]
target_stages = ["dev", "prod"] if branch.startswith("release") else ["dev"]
for component_name, component_images in components.items():
for stage in target_stages:
outputs[f"{component_name}-{stage}"] = {
f"docker.io/neondatabase/{component_image}:{source_tag}": [
f"{registry}/{component_image}:{tag}"
for registry, tag in itertools.product(registries[stage], target_tags)
if not (registry == "docker.io/neondatabase" and tag == source_tag)
outputs[f"{component_name}-{stage}"] = dict(
[
(
f"docker.io/neondatabase/{component_image}:{build_tag}",
[
f"{combo[0]}/{component_image}:{combo[1]}"
for combo in itertools.product(registries[stage], target_tags)
],
)
for component_image in component_images
]
for component_image in component_images
}
)
with open(os.getenv("GITHUB_OUTPUT", "/dev/null"), "a") as f:
with open(os.environ["GITHUB_OUTPUT"], "a") as f:
for key, value in outputs.items():
f.write(f"{key}={json.dumps(value)}\n")
print(f"Image map for {key}:\n{json.dumps(value, indent=2)}\n\n", file=sys.stderr)

View File

@@ -1,110 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
DOCS_URL="https://docs.neon.build/overview/repositories/neon.html"
message() {
if [[ -n "${GITHUB_PR_NUMBER:-}" ]]; then
gh pr comment --repo "${GITHUB_REPOSITORY}" "${GITHUB_PR_NUMBER}" --edit-last --body "$1" \
|| gh pr comment --repo "${GITHUB_REPOSITORY}" "${GITHUB_PR_NUMBER}" --body "$1"
fi
echo "$1"
}
report_error() {
message "$1
For more details, see the documentation: ${DOCS_URL}"
exit 1
}
case "$RELEASE_BRANCH" in
"release") COMPONENT="Storage" ;;
"release-proxy") COMPONENT="Proxy" ;;
"release-compute") COMPONENT="Compute" ;;
*)
report_error "Unknown release branch: ${RELEASE_BRANCH}"
;;
esac
# Identify main and release branches
MAIN_BRANCH="origin/main"
REMOTE_RELEASE_BRANCH="origin/${RELEASE_BRANCH}"
# Find merge base
MERGE_BASE=$(git merge-base "${MAIN_BRANCH}" "${REMOTE_RELEASE_BRANCH}")
echo "Merge base of ${MAIN_BRANCH} and ${RELEASE_BRANCH}: ${MERGE_BASE}"
# Get the HEAD commit (last commit in PR, expected to be the merge commit)
LAST_COMMIT=$(git rev-parse HEAD)
MERGE_COMMIT_MESSAGE=$(git log -1 --format=%s "${LAST_COMMIT}")
EXPECTED_MESSAGE_REGEX="^$COMPONENT release [0-9]{4}-[0-9]{2}-[0-9]{2}$"
if ! [[ "${MERGE_COMMIT_MESSAGE}" =~ ${EXPECTED_MESSAGE_REGEX} ]]; then
report_error "Merge commit message does not match expected pattern: '<component> release YYYY-MM-DD'
Expected component: ${COMPONENT}
Found: '${MERGE_COMMIT_MESSAGE}'"
fi
echo "✅ Merge commit message is correctly formatted: '${MERGE_COMMIT_MESSAGE}'"
LAST_COMMIT_PARENTS=$(git cat-file -p "${LAST_COMMIT}" | jq -sR '[capture("parent (?<parent>[0-9a-f]{40})"; "g") | .parent]')
if [[ "$(echo "${LAST_COMMIT_PARENTS}" | jq 'length')" -ne 2 ]]; then
report_error "Last commit must be a merge commit with exactly two parents"
fi
EXPECTED_RELEASE_HEAD=$(git rev-parse "${REMOTE_RELEASE_BRANCH}")
if echo "${LAST_COMMIT_PARENTS}" | jq -e --arg rel "${EXPECTED_RELEASE_HEAD}" 'index($rel) != null' > /dev/null; then
LINEAR_HEAD=$(echo "${LAST_COMMIT_PARENTS}" | jq -r '[.[] | select(. != $rel)][0]' --arg rel "${EXPECTED_RELEASE_HEAD}")
else
report_error "Last commit must merge the release branch (${RELEASE_BRANCH})"
fi
echo "✅ Last commit correctly merges the previous commit and the release branch"
echo "Top commit of linear history: ${LINEAR_HEAD}"
MERGE_COMMIT_TREE=$(git rev-parse "${LAST_COMMIT}^{tree}")
LINEAR_HEAD_TREE=$(git rev-parse "${LINEAR_HEAD}^{tree}")
if [[ "${MERGE_COMMIT_TREE}" != "${LINEAR_HEAD_TREE}" ]]; then
report_error "Tree of merge commit (${MERGE_COMMIT_TREE}) does not match tree of linear history head (${LINEAR_HEAD_TREE})
This indicates that the merge of ${RELEASE_BRANCH} into this branch was not performed using the merge strategy 'ours'"
fi
echo "✅ Merge commit tree matches the linear history head"
EXPECTED_PREVIOUS_COMMIT="${LINEAR_HEAD}"
# Now traverse down the history, ensuring each commit has exactly one parent
CURRENT_COMMIT="${EXPECTED_PREVIOUS_COMMIT}"
while [[ "${CURRENT_COMMIT}" != "${MERGE_BASE}" && "${CURRENT_COMMIT}" != "${EXPECTED_RELEASE_HEAD}" ]]; do
CURRENT_COMMIT_PARENTS=$(git cat-file -p "${CURRENT_COMMIT}" | jq -sR '[capture("parent (?<parent>[0-9a-f]{40})"; "g") | .parent]')
if [[ "$(echo "${CURRENT_COMMIT_PARENTS}" | jq 'length')" -ne 1 ]]; then
report_error "Commit ${CURRENT_COMMIT} must have exactly one parent"
fi
NEXT_COMMIT=$(echo "${CURRENT_COMMIT_PARENTS}" | jq -r '.[0]')
if [[ "${NEXT_COMMIT}" == "${MERGE_BASE}" ]]; then
echo "✅ Reached merge base (${MERGE_BASE})"
PR_BASE="${MERGE_BASE}"
elif [[ "${NEXT_COMMIT}" == "${EXPECTED_RELEASE_HEAD}" ]]; then
echo "✅ Reached release branch (${EXPECTED_RELEASE_HEAD})"
PR_BASE="${EXPECTED_RELEASE_HEAD}"
elif [[ -z "${NEXT_COMMIT}" ]]; then
report_error "Unexpected end of commit history before reaching merge base"
fi
# Move to the next commit in the chain
CURRENT_COMMIT="${NEXT_COMMIT}"
done
echo "✅ All commits are properly ordered and linear"
echo "✅ Release PR structure is valid"
echo
message "Commits that are part of this release:
$(git log --oneline "${PR_BASE}..${LINEAR_HEAD}")"

View File

@@ -17,12 +17,6 @@
({};
.[$entry.component] |= (if . == null or $entry.version > .version then $entry else . end))
# Ensure that each component exists, or fail
| (["storage", "compute", "proxy"] - (keys)) as $missing
| if ($missing | length) > 0 then
"Error: Found no release for \($missing | join(", "))!\n" | halt_error(1)
else . end
# Convert the resulting object into an array of formatted strings
| to_entries
| map("\(.key)=\(.value.full)")

View File

@@ -7,8 +7,8 @@ on:
description: 'Component name'
required: true
type: string
source-branch:
description: 'Source branch'
release-branch:
description: 'Release branch'
required: true
type: string
secrets:
@@ -30,25 +30,17 @@ jobs:
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.source-branch }}
fetch-depth: 0
ref: main
- name: Set variables
id: vars
env:
COMPONENT_NAME: ${{ inputs.component-name }}
RELEASE_BRANCH: >-
${{
false
|| inputs.component-name == 'Storage' && 'release'
|| inputs.component-name == 'Proxy' && 'release-proxy'
|| inputs.component-name == 'Compute' && 'release-compute'
}}
RELEASE_BRANCH: ${{ inputs.release-branch }}
run: |
today=$(date +'%Y-%m-%d')
echo "title=${COMPONENT_NAME} release ${today}" | tee -a ${GITHUB_OUTPUT}
echo "rc-branch=rc/${RELEASE_BRANCH}/${today}" | tee -a ${GITHUB_OUTPUT}
echo "release-branch=${RELEASE_BRANCH}" | tee -a ${GITHUB_OUTPUT}
- name: Configure git
run: |
@@ -57,36 +49,31 @@ jobs:
- name: Create RC branch
env:
RELEASE_BRANCH: ${{ steps.vars.outputs.release-branch }}
RC_BRANCH: ${{ steps.vars.outputs.rc-branch }}
TITLE: ${{ steps.vars.outputs.title }}
run: |
git switch -c "${RC_BRANCH}"
git checkout -b "${RC_BRANCH}"
# Manually create a merge commit on the current branch, keeping the
# tree and setting the parents to the current HEAD and the HEAD of the
# release branch. This commit is what we'll fast-forward the release
# branch to when merging the release branch.
# For details on why, look at
# https://docs.neon.build/overview/repositories/neon.html#background-on-commit-history-of-release-prs
current_tree=$(git rev-parse 'HEAD^{tree}')
release_head=$(git rev-parse "origin/${RELEASE_BRANCH}")
current_head=$(git rev-parse HEAD)
merge_commit=$(git commit-tree -p "${current_head}" -p "${release_head}" -m "${TITLE}" "${current_tree}")
# Fast-forward the current branch to the newly created merge_commit
git merge --ff-only ${merge_commit}
# create an empty commit to distinguish workflow runs
# from other possible releases from the same commit
git commit --allow-empty -m "${TITLE}"
git push origin "${RC_BRANCH}"
- name: Create a PR into ${{ steps.vars.outputs.release-branch }}
- name: Create a PR into ${{ inputs.release-branch }}
env:
GH_TOKEN: ${{ secrets.ci-access-token }}
RC_BRANCH: ${{ steps.vars.outputs.rc-branch }}
RELEASE_BRANCH: ${{ steps.vars.outputs.release-branch }}
RELEASE_BRANCH: ${{ inputs.release-branch }}
TITLE: ${{ steps.vars.outputs.title }}
run: |
cat << EOF > body.md
## ${TITLE}
**Please merge this Pull Request using 'Create a merge commit' button**
EOF
gh pr create --title "${TITLE}" \
--body "" \
--body-file "body.md" \
--head "${RC_BRANCH}" \
--base "${RELEASE_BRANCH}"

View File

@@ -21,16 +21,9 @@ on:
run-kind:
description: "The kind of run we're currently in. Will be one of `push-main`, `storage-release`, `compute-release`, `proxy-release`, `storage-rc-pr`, `compute-rc-pr`, `proxy-rc-pr`, `pr`, or `workflow-dispatch`"
value: ${{ jobs.tags.outputs.run-kind }}
release-pr-run-id:
description: "Only available if `run-kind in [storage-release, proxy-release, compute-release]`. Contains the run ID of the `Build and Test` workflow, assuming one with the current commit can be found."
value: ${{ jobs.tags.outputs.release-pr-run-id }}
permissions: {}
defaults:
run:
shell: bash -euo pipefail {0}
jobs:
tags:
runs-on: ubuntu-22.04
@@ -40,7 +33,6 @@ jobs:
proxy: ${{ steps.previous-releases.outputs.proxy }}
storage: ${{ steps.previous-releases.outputs.storage }}
run-kind: ${{ steps.run-kind.outputs.run-kind }}
release-pr-run-id: ${{ steps.release-pr-run-id.outputs.release-pr-run-id }}
permissions:
contents: read
steps:
@@ -91,11 +83,7 @@ jobs:
echo "tag=release-compute-$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
;;
pr|storage-rc-pr|compute-rc-pr|proxy-rc-pr)
BUILD_AND_TEST_RUN_ID=$(gh api --paginate \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"/repos/${GITHUB_REPOSITORY}/actions/runs?head_sha=${CURRENT_SHA}&branch=${CURRENT_BRANCH}" \
| jq '[.workflow_runs[] | select(.name == "Build and Test")][0].id // ("Error: No matching workflow run found." | halt_error(1))')
BUILD_AND_TEST_RUN_ID=$(gh run list -b $CURRENT_BRANCH -c $CURRENT_SHA -w 'Build and Test' -L 1 --json databaseId --jq '.[].databaseId')
echo "tag=$BUILD_AND_TEST_RUN_ID" | tee -a $GITHUB_OUTPUT
;;
workflow-dispatch)
@@ -117,13 +105,3 @@ jobs:
"/repos/${GITHUB_REPOSITORY}/releases" \
| jq -f .github/scripts/previous-releases.jq -r \
| tee -a "${GITHUB_OUTPUT}"
- name: Get the release PR run ID
id: release-pr-run-id
if: ${{ contains(fromJson('["storage-release", "compute-release", "proxy-release"]'), steps.run-kind.outputs.run-kind) }}
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
CURRENT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
run: |
RELEASE_PR_RUN_ID=$(gh api "/repos/${GITHUB_REPOSITORY}/actions/runs?head_sha=$CURRENT_SHA" | jq '[.workflow_runs[] | select(.name == "Build and Test") | select(.head_branch | test("^rc/release(-(proxy)|(compute))?/[0-9]{4}-[0-9]{2}-[0-9]{2}$"; "s"))] | first | .id // ("Falied to find Build and Test run from RC PR!" | halt_error(1))')
echo "release-pr-run-id=$RELEASE_PR_RUN_ID" | tee -a $GITHUB_OUTPUT

View File

@@ -476,7 +476,7 @@ jobs:
(
!github.event.pull_request.draft
|| contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft')
|| needs.meta.outputs.run-kind == 'push-main'
|| contains(fromJSON('["push-main", "storage-release", "proxy-release", "compute-release"]'), needs.meta.outputs.run-kind)
) && !failure() && !cancelled()
}}
needs: [ check-permissions, push-neon-image-dev, push-compute-image-dev, meta ]
@@ -487,7 +487,7 @@ jobs:
neon-image-arch:
needs: [ check-permissions, build-build-tools-image, meta ]
if: ${{ contains(fromJSON('["push-main", "pr", "storage-rc-pr", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
strategy:
matrix:
arch: [ x64, arm64 ]
@@ -537,7 +537,7 @@ jobs:
neon-image:
needs: [ neon-image-arch, meta ]
if: ${{ contains(fromJSON('["push-main", "pr", "storage-rc-pr", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
runs-on: ubuntu-22.04
permissions:
id-token: write # aws-actions/configure-aws-credentials
@@ -559,7 +559,7 @@ jobs:
compute-node-image-arch:
needs: [ check-permissions, build-build-tools-image, meta ]
if: ${{ contains(fromJSON('["push-main", "pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
permissions:
id-token: write # aws-actions/configure-aws-credentials
statuses: write
@@ -651,7 +651,7 @@ jobs:
compute-node-image:
needs: [ compute-node-image-arch, meta ]
if: ${{ contains(fromJSON('["push-main", "pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
permissions:
id-token: write # aws-actions/configure-aws-credentials
statuses: write
@@ -694,7 +694,7 @@ jobs:
vm-compute-node-image-arch:
needs: [ check-permissions, meta, compute-node-image ]
if: ${{ contains(fromJSON('["push-main", "pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
strategy:
fail-fast: false
@@ -747,7 +747,7 @@ jobs:
vm-compute-node-image:
needs: [ vm-compute-node-image-arch, meta ]
if: ${{ contains(fromJSON('["push-main", "pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
runs-on: ubuntu-22.04
strategy:
matrix:
@@ -773,12 +773,7 @@ jobs:
test-images:
needs: [ check-permissions, meta, neon-image, compute-node-image ]
# Depends on jobs that can get skipped
if: >-
${{
!failure()
&& !cancelled()
&& contains(fromJSON('["push-main", "pr", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind)
}}
if: "!failure() && !cancelled()"
strategy:
fail-fast: false
matrix:
@@ -805,7 +800,7 @@ jobs:
# Ensure that we don't have bad versions.
- name: Verify image versions
shell: bash # ensure no set -e for better error messages
if: ${{ contains(fromJSON('["push-main", "pr", "storage-rc-pr", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
run: |
pageserver_version=$(docker run --rm neondatabase/neon:${{ needs.meta.outputs.build-tag }} "/bin/sh" "-c" "/usr/local/bin/pageserver --version")
@@ -826,19 +821,19 @@ jobs:
env:
TAG: >-
${{
needs.meta.outputs.run-kind == 'compute-rc-pr'
contains(fromJSON('["compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind)
&& needs.meta.outputs.previous-storage-release
|| needs.meta.outputs.build-tag
}}
COMPUTE_TAG: >-
${{
contains(fromJSON('["storage-rc-pr", "proxy-rc-pr"]'), needs.meta.outputs.run-kind)
contains(fromJSON('["storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind)
&& needs.meta.outputs.previous-compute-release
|| needs.meta.outputs.build-tag
}}
TEST_EXTENSIONS_TAG: >-
${{
contains(fromJSON('["storage-rc-pr", "proxy-rc-pr"]'), needs.meta.outputs.run-kind)
contains(fromJSON('["storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind)
&& 'latest'
|| needs.meta.outputs.build-tag
}}
@@ -890,13 +885,7 @@ jobs:
id: generate
run: python3 .github/scripts/generate_image_maps.py
env:
SOURCE_TAG: >-
${{
contains(fromJson('["storage-release", "compute-release", "proxy-release"]'), needs.meta.outputs.run-kind)
&& needs.meta.outputs.release-pr-run-id
|| needs.meta.outputs.build-tag
}}
TARGET_TAG: ${{ needs.meta.outputs.build-tag }}
BUILD_TAG: "${{ needs.meta.outputs.build-tag }}"
BRANCH: "${{ github.ref_name }}"
DEV_ACR: "${{ vars.AZURE_DEV_REGISTRY_NAME }}"
PROD_ACR: "${{ vars.AZURE_PROD_REGISTRY_NAME }}"
@@ -906,7 +895,7 @@ jobs:
push-neon-image-dev:
needs: [ meta, generate-image-maps, neon-image ]
if: ${{ !failure() && !cancelled() && contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
uses: ./.github/workflows/_push-to-container-registry.yml
permissions:
id-token: write # Required for aws/azure login
@@ -924,7 +913,7 @@ jobs:
push-compute-image-dev:
needs: [ meta, generate-image-maps, vm-compute-node-image ]
if: ${{ !failure() && !cancelled() && contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
uses: ./.github/workflows/_push-to-container-registry.yml
permissions:
id-token: write # Required for aws/azure login
@@ -978,55 +967,16 @@ jobs:
acr-registry-name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}
secrets: inherit
push-neon-test-extensions-image-ghcr:
if: ${{ contains(fromJSON('["push-main", "pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
# This is a bit of a special case so we're not using a generated image map.
add-latest-tag-to-neon-extensions-test-image:
if: github.ref_name == 'main'
needs: [ meta, compute-node-image ]
uses: ./.github/workflows/_push-to-container-registry.yml
with:
image-map: |
{
"docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}": [
"ghcr.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}"
],
"docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}": [
"ghcr.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}"
]
}
secrets: inherit
add-latest-tag-to-neon-test-extensions-image:
if: ${{ needs.meta.outputs.run-kind == 'push-main' }}
needs: [ meta, compute-node-image ]
uses: ./.github/workflows/_push-to-container-registry.yml
with:
image-map: |
{
"docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}": [
"docker.io/neondatabase/neon-test-extensions-v16:latest",
"ghcr.io/neondatabase/neon-test-extensions-v16:latest"
],
"docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}": [
"docker.io/neondatabase/neon-test-extensions-v17:latest",
"ghcr.io/neondatabase/neon-test-extensions-v17:latest"
]
}
secrets: inherit
add-release-tag-to-neon-test-extensions-image:
if: ${{ needs.meta.outputs.run-kind == 'compute-release' }}
needs: [ meta, compute-node-image ]
uses: ./.github/workflows/_push-to-container-registry.yml
with:
image-map: |
{
"docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.release-pr-run-id }}": [
"docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}",
"ghcr.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}"
],
"docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.release-pr-run-id }}": [
"docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}",
"ghcr.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}"
]
"docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v16:latest"],
"docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v17:latest"]
}
secrets: inherit
@@ -1285,7 +1235,7 @@ jobs:
# The job runs on `release` branch and copies compatibility data and Neon artifact from the last *release PR* to the latest directory
promote-compatibility-data:
needs: [ meta, deploy ]
needs: [ deploy ]
permissions:
id-token: write # aws-actions/configure-aws-credentials
statuses: write
@@ -1295,6 +1245,37 @@ jobs:
runs-on: ubuntu-22.04
steps:
- name: Fetch GITHUB_RUN_ID and COMMIT_SHA for the last merged release PR
id: fetch-last-release-pr-info
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
branch_name_and_pr_number=$(gh pr list \
--repo "${GITHUB_REPOSITORY}" \
--base release \
--state merged \
--limit 10 \
--json mergeCommit,headRefName,number \
--jq ".[] | select(.mergeCommit.oid==\"${GITHUB_SHA}\") | { branch_name: .headRefName, pr_number: .number }")
branch_name=$(echo "${branch_name_and_pr_number}" | jq -r '.branch_name')
pr_number=$(echo "${branch_name_and_pr_number}" | jq -r '.pr_number')
run_id=$(gh run list \
--repo "${GITHUB_REPOSITORY}" \
--workflow build_and_test.yml \
--branch "${branch_name}" \
--json databaseId \
--limit 1 \
--jq '.[].databaseId')
last_commit_sha=$(gh pr view "${pr_number}" \
--repo "${GITHUB_REPOSITORY}" \
--json commits \
--jq '.commits[-1].oid')
echo "run-id=${run_id}" | tee -a ${GITHUB_OUTPUT}
echo "commit-sha=${last_commit_sha}" | tee -a ${GITHUB_OUTPUT}
- uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: eu-central-1
@@ -1305,8 +1286,8 @@ jobs:
env:
BUCKET: neon-github-public-dev
AWS_REGION: eu-central-1
COMMIT_SHA: ${{ github.sha }}
RUN_ID: ${{ needs.meta.outputs.release-pr-run-id }}
COMMIT_SHA: ${{ steps.fetch-last-release-pr-info.outputs.commit-sha }}
RUN_ID: ${{ steps.fetch-last-release-pr-info.outputs.run-id }}
run: |
old_prefix="artifacts/${COMMIT_SHA}/${RUN_ID}"
new_prefix="artifacts/latest"
@@ -1395,5 +1376,5 @@ jobs:
|| needs.files-changed.result == 'skipped'
|| (needs.push-compute-image-dev.result == 'skipped' && contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind))
|| (needs.push-neon-image-dev.result == 'skipped' && contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind))
|| (needs.test-images.result == 'skipped' && contains(fromJSON('["push-main", "pr", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind))
|| needs.test-images.result == 'skipped'
|| (needs.trigger-custom-extensions-build-and-wait.result == 'skipped' && contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind))

View File

@@ -1,36 +0,0 @@
name: Fast forward merge
on:
pull_request:
types: [labeled]
branches:
- release
- release-proxy
- release-compute
jobs:
fast-forward:
if: ${{ github.event.label.name == 'fast-forward' }}
runs-on: ubuntu-22.04
steps:
- name: Remove fast-forward label to PR
env:
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
run: |
gh pr edit ${{ github.event.pull_request.number }} --repo "${GITHUB_REPOSITORY}" --remove-label "fast-forward"
- name: Fast forwarding
uses: sequoia-pgp/fast-forward@ea7628bedcb0b0b96e94383ada458d812fca4979
# See https://docs.github.com/en/graphql/reference/enums#mergestatestatus
if: ${{ github.event.pull_request.mergeable_state == 'clean' }}
with:
merge: true
comment: on-error
github_token: ${{ secrets.CI_ACCESS_TOKEN }}
- name: Comment if mergeable_state is not clean
if: ${{ github.event.pull_request.mergeable_state != 'clean' }}
run: |
gh pr comment ${{ github.event.pull_request.number }} \
--repo "${GITHUB_REPOSITORY}" \
--body "Not trying to forward pull-request, because \`mergeable_state\` is \`${{ github.event.pull_request.mergeable_state }}\`, not \`clean\`."

View File

@@ -2,8 +2,8 @@ name: large oltp benchmark
on:
# uncomment to run on push for debugging your PR
#push:
# branches: [ bodobolero/synthetic_oltp_workload ]
push:
branches: [ bodobolero/synthetic_oltp_workload ]
schedule:
# * is a special character in YAML so you have to quote this string
@@ -12,7 +12,7 @@ on:
# │ │ ┌───────────── day of the month (1 - 31)
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
- cron: '0 15 * * 0,2,4' # run on Sunday, Tuesday, Thursday at 3 PM UTC
- cron: '0 15 * * *' # run once a day, timezone is utc, avoid conflict with other benchmarks
workflow_dispatch: # adds ability to run this manually
defaults:
@@ -22,7 +22,7 @@ defaults:
concurrency:
# Allow only one workflow globally because we need dedicated resources which only exist once
group: large-oltp-bench-workflow
cancel-in-progress: false
cancel-in-progress: true
jobs:
oltp:
@@ -31,9 +31,9 @@ jobs:
matrix:
include:
- target: new_branch
custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
custom_scripts: insert_webhooks.sql@2 select_any_webhook_with_skew.sql@4 select_recent_webhook.sql@4
- target: reuse_branch
custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
custom_scripts: insert_webhooks.sql@2 select_any_webhook_with_skew.sql@4 select_recent_webhook.sql@4
max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results
permissions:
contents: write
@@ -46,6 +46,7 @@ jobs:
PG_VERSION: 16 # pre-determined by pre-determined project
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.ref_name == 'main' }}
PLATFORM: ${{ matrix.target }}
runs-on: [ self-hosted, us-east-2, x64 ]
@@ -56,10 +57,8 @@ jobs:
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
options: --init
# Increase timeout to 2 days, default timeout is 6h - database maintenance can take a long time
# (normally 1h pgbench, 3h vacuum analyze 3.5h re-index) x 2 = 15h, leave some buffer for regressions
# in one run vacuum didn't finish within 12 hours
timeout-minutes: 2880
# Increase timeout to 8h, default timeout is 6h
timeout-minutes: 480
steps:
- uses: actions/checkout@v4
@@ -90,45 +89,29 @@ jobs:
- name: Set up Connection String
id: set-up-connstr
run: |
case "${{ matrix.target }}" in
new_branch)
CONNSTR=${{ steps.create-neon-branch-oltp-target.outputs.dsn }}
;;
reuse_branch)
CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }}
;;
*)
echo >&2 "Unknown target=${{ matrix.target }}"
exit 1
;;
esac
case "${{ matrix.target }}" in
new_branch)
CONNSTR=${{ steps.create-neon-branch-oltp-target.outputs.dsn }}
;;
reuse_branch)
CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }}
;;
*)
echo >&2 "Unknown target=${{ matrix.target }}"
exit 1
;;
esac
CONNSTR_WITHOUT_POOLER="${CONNSTR//-pooler/}"
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
echo "connstr_without_pooler=${CONNSTR_WITHOUT_POOLER}" >> $GITHUB_OUTPUT
- name: Delete rows from prior runs in reuse branch
if: ${{ matrix.target == 'reuse_branch' }}
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr_without_pooler }}
PG_CONFIG: /tmp/neon/pg_install/v16/bin/pg_config
PSQL: /tmp/neon/pg_install/v16/bin/psql
PG_16_LIB_PATH: /tmp/neon/pg_install/v16/lib
run: |
echo "$(date '+%Y-%m-%d %H:%M:%S') - Deleting rows in table webhook.incoming_webhooks from prior runs"
export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
${PSQL} "${BENCHMARK_CONNSTR}" -c "SET statement_timeout = 0; DELETE FROM webhook.incoming_webhooks WHERE created_at > '2025-02-27 23:59:59+00';"
echo "$(date '+%Y-%m-%d %H:%M:%S') - Finished deleting rows in table webhook.incoming_webhooks from prior runs"
- name: Benchmark pgbench with custom-scripts
- name: Benchmark pgbench with custom-scripts
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance
run_in_parallel: false
save_perf_report: true
extra_params: -m remote_cluster --timeout 7200 -k test_perf_oltp_large_tenant_pgbench
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_perf_oltp_large_tenant
pg_version: ${{ env.PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
@@ -136,21 +119,6 @@ jobs:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
- name: Benchmark database maintenance
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance
run_in_parallel: false
save_perf_report: true
extra_params: -m remote_cluster --timeout 172800 -k test_perf_oltp_large_tenant_maintenance
pg_version: ${{ env.PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr_without_pooler }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
- name: Delete Neon Branch for large tenant
if: ${{ always() && matrix.target == 'new_branch' }}
uses: ./.github/actions/neon-branch-delete
@@ -159,13 +127,6 @@ jobs:
branch_id: ${{ steps.create-neon-branch-oltp-target.outputs.branch_id }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Configure AWS credentials # again because prior steps could have exceeded 5 hours
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: eu-central-1
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
role-duration-seconds: 18000 # 5 hours
- name: Create Allure report
id: create-allure-report
if: ${{ !cancelled() }}

View File

@@ -1,24 +0,0 @@
name: Lint Release PR
on:
pull_request:
branches:
- release
- release-proxy
- release-compute
jobs:
lint-release-pr:
runs-on: ubuntu-22.04
steps:
- name: Checkout PR branch
uses: actions/checkout@v4
with:
fetch-depth: 0 # Fetch full history for git operations
ref: ${{ github.event.pull_request.head.ref }}
- name: Run lint script
env:
RELEASE_BRANCH: ${{ github.base_ref }}
run: |
./.github/scripts/lint-release-pr.sh

View File

@@ -8,6 +8,8 @@ on:
- .github/workflows/build-build-tools-image.yml
- .github/workflows/pre-merge-checks.yml
merge_group:
branches:
- main
defaults:
run:
@@ -17,17 +19,15 @@ defaults:
permissions: {}
jobs:
meta:
get-changed-files:
runs-on: ubuntu-22.04
outputs:
python-changed: ${{ steps.python-src.outputs.any_changed }}
rust-changed: ${{ steps.rust-src.outputs.any_changed }}
branch: ${{ steps.group-metadata.outputs.branch }}
pr-number: ${{ steps.group-metadata.outputs.pr-number }}
steps:
- uses: actions/checkout@v4
- uses: step-security/changed-files@3dbe17c78367e7d60f00d78ae6781a35be47b4a1 # v45.0.1
- uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf # v45.0.4
id: python-src
with:
files: |
@@ -38,7 +38,7 @@ jobs:
poetry.lock
pyproject.toml
- uses: step-security/changed-files@3dbe17c78367e7d60f00d78ae6781a35be47b4a1 # v45.0.1
- uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf # v45.0.4
id: rust-src
with:
files: |
@@ -58,20 +58,12 @@ jobs:
echo "${PYTHON_CHANGED_FILES}"
echo "${RUST_CHANGED_FILES}"
- name: Merge group metadata
if: ${{ github.event_name == 'merge_group' }}
id: group-metadata
env:
MERGE_QUEUE_REF: ${{ github.event.merge_group.head_ref }}
run: |
echo $MERGE_QUEUE_REF | jq -Rr 'capture("refs/heads/gh-readonly-queue/(?<branch>.*)/pr-(?<pr_number>[0-9]+)-[0-9a-f]{40}") | ["branch=" + .branch, "pr-number=" + .pr_number] | .[]' | tee -a "${GITHUB_OUTPUT}"
build-build-tools-image:
if: |
false
|| needs.meta.outputs.python-changed == 'true'
|| needs.meta.outputs.rust-changed == 'true'
needs: [ meta ]
|| needs.get-changed-files.outputs.python-changed == 'true'
|| needs.get-changed-files.outputs.rust-changed == 'true'
needs: [ get-changed-files ]
uses: ./.github/workflows/build-build-tools-image.yml
with:
# Build only one combination to save time
@@ -80,8 +72,8 @@ jobs:
secrets: inherit
check-codestyle-python:
if: needs.meta.outputs.python-changed == 'true'
needs: [ meta, build-build-tools-image ]
if: needs.get-changed-files.outputs.python-changed == 'true'
needs: [ get-changed-files, build-build-tools-image ]
uses: ./.github/workflows/_check-codestyle-python.yml
with:
# `-bookworm-x64` suffix should match the combination in `build-build-tools-image`
@@ -89,8 +81,8 @@ jobs:
secrets: inherit
check-codestyle-rust:
if: needs.meta.outputs.rust-changed == 'true'
needs: [ meta, build-build-tools-image ]
if: needs.get-changed-files.outputs.rust-changed == 'true'
needs: [ get-changed-files, build-build-tools-image ]
uses: ./.github/workflows/_check-codestyle-rust.yml
with:
# `-bookworm-x64` suffix should match the combination in `build-build-tools-image`
@@ -109,7 +101,7 @@ jobs:
statuses: write # for `github.repos.createCommitStatus(...)`
contents: write
needs:
- meta
- get-changed-files
- check-codestyle-python
- check-codestyle-rust
runs-on: ubuntu-22.04
@@ -137,20 +129,7 @@ jobs:
run: exit 1
if: |
false
|| (github.event_name == 'merge_group' && needs.meta.outputs.branch != 'main')
|| (needs.check-codestyle-python.result == 'skipped' && needs.meta.outputs.python-changed == 'true')
|| (needs.check-codestyle-rust.result == 'skipped' && needs.meta.outputs.rust-changed == 'true')
|| (needs.check-codestyle-python.result == 'skipped' && needs.get-changed-files.outputs.python-changed == 'true')
|| (needs.check-codestyle-rust.result == 'skipped' && needs.get-changed-files.outputs.rust-changed == 'true')
|| contains(needs.*.result, 'failure')
|| contains(needs.*.result, 'cancelled')
- name: Add fast-forward label to PR to trigger fast-forward merge
if: >-
${{
always()
&& github.event_name == 'merge_group'
&& contains(fromJson('["release", "release-proxy", "release-compute"]'), github.base_ref)
}}
env:
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
run: >-
gh pr edit ${{ needs.meta.outputs.pr-number }} --repo "${GITHUB_REPOSITORY}" --add-label "fast-forward"

View File

@@ -38,7 +38,7 @@ jobs:
uses: ./.github/workflows/_create-release-pr.yml
with:
component-name: 'Storage'
source-branch: ${{ github.ref_name }}
release-branch: 'release'
secrets:
ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }}
@@ -51,7 +51,7 @@ jobs:
uses: ./.github/workflows/_create-release-pr.yml
with:
component-name: 'Proxy'
source-branch: ${{ github.ref_name }}
release-branch: 'release-proxy'
secrets:
ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }}
@@ -64,6 +64,6 @@ jobs:
uses: ./.github/workflows/_create-release-pr.yml
with:
component-name: 'Compute'
source-branch: ${{ github.ref_name }}
release-branch: 'release-compute'
secrets:
ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }}

307
Cargo.lock generated
View File

@@ -191,7 +191,7 @@ checksum = "965c2d33e53cb6b267e148a4cb0760bc01f4904c1cd4bb4002a085bb016d1490"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
"synstructure",
]
@@ -203,7 +203,7 @@ checksum = "7b18050c2cd6fe86c3a76584ef5e0baf286d038cda203eb6223df2cc413565f7"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -272,7 +272,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -283,7 +283,7 @@ checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -1021,7 +1021,7 @@ dependencies = [
"regex",
"rustc-hash 2.1.1",
"shlex",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -1248,7 +1248,7 @@ dependencies = [
"heck",
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -1309,7 +1309,6 @@ version = "0.1.0"
dependencies = [
"anyhow",
"chrono",
"indexmap 2.0.1",
"jsonwebtoken",
"regex",
"remote_storage",
@@ -1340,7 +1339,6 @@ dependencies = [
"flate2",
"futures",
"http 1.1.0",
"indexmap 2.0.1",
"jsonwebtoken",
"metrics",
"nix 0.27.1",
@@ -1349,20 +1347,17 @@ dependencies = [
"once_cell",
"opentelemetry",
"opentelemetry_sdk",
"p256 0.13.2",
"postgres",
"postgres_initdb",
"regex",
"remote_storage",
"reqwest",
"ring",
"rlimit",
"rust-ini",
"serde",
"serde_json",
"serde_with",
"signal-hook",
"spki 0.7.3",
"tar",
"thiserror 1.0.69",
"tokio",
@@ -1382,7 +1377,6 @@ dependencies = [
"vm_monitor",
"walkdir",
"workspace_hack",
"x509-cert",
"zstd",
]
@@ -1709,7 +1703,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -1733,7 +1727,7 @@ dependencies = [
"proc-macro2",
"quote",
"strsim 0.10.0",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -1744,7 +1738,7 @@ checksum = "29a358ff9f12ec09c3e61fef9b5a9902623a695a46a917b07f269bff1445611a"
dependencies = [
"darling_core",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -1807,8 +1801,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fffa369a668c8af7dbf8b5e56c9f744fbd399949ed171606040001947de40b1c"
dependencies = [
"const-oid",
"der_derive",
"flagset",
"pem-rfc7468",
"zeroize",
]
@@ -1827,17 +1819,6 @@ dependencies = [
"rusticata-macros",
]
[[package]]
name = "der_derive"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8034092389675178f570469e6c3b0465d3d30b4505c294a6550db47f3c17ad18"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
]
[[package]]
name = "deranged"
version = "0.3.11"
@@ -1907,7 +1888,7 @@ dependencies = [
"dsl_auto_type",
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -1927,7 +1908,7 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "209c735641a413bc68c4923a9d6ad4bcb3ca306b794edaa7eb0b3228a99ffb25"
dependencies = [
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -1956,7 +1937,7 @@ checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -1979,7 +1960,7 @@ dependencies = [
"heck",
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -2124,7 +2105,7 @@ dependencies = [
"darling",
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -2134,19 +2115,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0"
dependencies = [
"log",
"regex",
]
[[package]]
name = "env_logger"
version = "0.11.7"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3716d7a920fb4fac5d84e9d4bce8ceb321e9414b4409da61b07b75c1e3d0697"
checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580"
dependencies = [
"humantime",
"is-terminal",
"log",
"regex",
"termcolor",
]
[[package]]
name = "env_logger"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c012a26a7f605efc424dd53697843a72be7dc86ad2d01f7814337794a12231d"
dependencies = [
"anstream",
"anstyle",
"env_filter",
"jiff",
"log",
]
@@ -2167,7 +2157,7 @@ checksum = "3bf679796c0322556351f287a51b49e48f7c4986e727b5dd78c972d30e2e16cc"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -2301,12 +2291,6 @@ version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
[[package]]
name = "flagset"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b3ea1ec5f8307826a5b71094dd91fc04d4ae75d5709b20ad351c7fb4815c86ec"
[[package]]
name = "flate2"
version = "1.0.26"
@@ -2433,7 +2417,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -2546,7 +2530,7 @@ checksum = "53010ccb100b96a67bc32c0175f0ed1426b31b655d562898e57325f81c023ac0"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -2863,7 +2847,6 @@ version = "0.1.0"
dependencies = [
"anyhow",
"bytes",
"camino",
"fail",
"futures",
"hyper 0.14.30",
@@ -2874,7 +2857,6 @@ dependencies = [
"pprof",
"regex",
"routerify",
"rustls-pemfile 2.1.1",
"serde",
"serde_json",
"serde_path_to_error",
@@ -2904,9 +2886,9 @@ checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421"
[[package]]
name = "humantime"
version = "2.2.0"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f"
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
[[package]]
name = "humantime-serde"
@@ -3166,7 +3148,7 @@ checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -3259,7 +3241,7 @@ dependencies = [
"crossbeam-channel",
"crossbeam-utils",
"dashmap 6.1.0",
"env_logger",
"env_logger 0.11.2",
"indexmap 2.0.1",
"itoa",
"log",
@@ -3272,11 +3254,11 @@ dependencies = [
[[package]]
name = "inotify"
version = "0.9.6"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8069d3ec154eb856955c1c0fbffefbf5f3c40a104ec912d4797314c1801abff"
checksum = "f37dccff2791ab604f9babef0ba14fbe0be30bd368dc541e2b08d07c8aa908f3"
dependencies = [
"bitflags 1.3.2",
"bitflags 2.8.0",
"inotify-sys",
"libc",
]
@@ -3382,30 +3364,6 @@ dependencies = [
"tracing",
]
[[package]]
name = "jiff"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d699bc6dfc879fb1bf9bdff0d4c56f0884fc6f0d0eb0fba397a6d00cd9a6b85e"
dependencies = [
"jiff-static",
"log",
"portable-atomic",
"portable-atomic-util",
"serde",
]
[[package]]
name = "jiff-static"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d16e75759ee0aa64c57a56acbf43916987b20c77373cb7e808979e02b93c9f9"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
]
[[package]]
name = "jobserver"
version = "0.1.32"
@@ -3577,9 +3535,9 @@ dependencies = [
[[package]]
name = "log"
version = "0.4.26"
version = "0.4.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e"
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
[[package]]
name = "lru"
@@ -3660,7 +3618,7 @@ dependencies = [
"heck",
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -3774,18 +3732,6 @@ dependencies = [
"adler2",
]
[[package]]
name = "mio"
version = "0.8.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c"
dependencies = [
"libc",
"log",
"wasi 0.11.0+wasi-snapshot-preview1",
"windows-sys 0.48.0",
]
[[package]]
name = "mio"
version = "1.0.3"
@@ -3793,6 +3739,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd"
dependencies = [
"libc",
"log",
"wasi 0.11.0+wasi-snapshot-preview1",
"windows-sys 0.52.0",
]
@@ -3870,23 +3817,29 @@ checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21"
[[package]]
name = "notify"
version = "6.1.1"
version = "8.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6205bd8bb1e454ad2e27422015fb5e4f2bcc7e08fa8f27058670d208324a4d2d"
checksum = "2fee8403b3d66ac7b26aee6e40a897d85dc5ce26f44da36b8b73e987cc52e943"
dependencies = [
"bitflags 2.8.0",
"crossbeam-channel",
"filetime",
"fsevent-sys",
"inotify",
"kqueue",
"libc",
"log",
"mio 0.8.11",
"mio",
"notify-types",
"walkdir",
"windows-sys 0.48.0",
"windows-sys 0.59.0",
]
[[package]]
name = "notify-types"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e0826a989adedc2a244799e823aece04662b66609d96af8dff7ac6df9a8925d"
[[package]]
name = "ntapi"
version = "0.4.1"
@@ -4329,6 +4282,8 @@ dependencies = [
"reqwest",
"rpds",
"rustls 0.23.18",
"rustls-pemfile 2.1.1",
"rustls-pki-types",
"scopeguard",
"send-future",
"serde",
@@ -4353,7 +4308,6 @@ dependencies = [
"tokio-util",
"toml_edit",
"tracing",
"tracing-utils",
"url",
"utils",
"uuid",
@@ -4531,7 +4485,7 @@ dependencies = [
"parquet",
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -4633,7 +4587,7 @@ checksum = "f6e859e6e5bd50440ab63c47e3ebabc90f26251f7c73c3d3e837b74a1cc3fa67"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -4729,15 +4683,6 @@ version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6"
[[package]]
name = "portable-atomic-util"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507"
dependencies = [
"portable-atomic",
]
[[package]]
name = "postgres"
version = "0.19.7"
@@ -4845,7 +4790,7 @@ dependencies = [
"bytes",
"crc32c",
"criterion",
"env_logger",
"env_logger 0.10.2",
"log",
"memoffset 0.9.0",
"once_cell",
@@ -4944,7 +4889,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d3928fb5db768cb86f891ff014f0144589297e3c6a1aba6ed7cecfdace270c7"
dependencies = [
"proc-macro2",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -4958,9 +4903,9 @@ dependencies = [
[[package]]
name = "proc-macro2"
version = "1.0.94"
version = "1.0.92"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84"
checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0"
dependencies = [
"unicode-ident",
]
@@ -5035,7 +4980,7 @@ checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4"
dependencies = [
"bytes",
"heck",
"itertools 0.12.1",
"itertools 0.10.5",
"log",
"multimap",
"once_cell",
@@ -5044,7 +4989,7 @@ dependencies = [
"prost 0.12.6",
"prost-types 0.12.6",
"regex",
"syn 2.0.100",
"syn 2.0.90",
"tempfile",
]
@@ -5065,7 +5010,7 @@ dependencies = [
"prost 0.13.3",
"prost-types 0.13.3",
"regex",
"syn 2.0.100",
"syn 2.0.90",
"tempfile",
]
@@ -5076,10 +5021,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1"
dependencies = [
"anyhow",
"itertools 0.12.1",
"itertools 0.10.5",
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -5092,7 +5037,7 @@ dependencies = [
"itertools 0.12.1",
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -5139,7 +5084,7 @@ dependencies = [
"consumption_metrics",
"ecdsa 0.16.9",
"ed25519-dalek",
"env_logger",
"env_logger 0.10.2",
"fallible-iterator",
"flate2",
"framed-websockets",
@@ -5276,9 +5221,9 @@ dependencies = [
[[package]]
name = "quote"
version = "1.0.39"
version = "1.0.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1f1914ce909e1658d9907913b4b91947430c7d9be598b15a1912935b8c04801"
checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
dependencies = [
"proc-macro2",
]
@@ -5807,7 +5752,7 @@ dependencies = [
"regex",
"relative-path",
"rustc_version",
"syn 2.0.100",
"syn 2.0.90",
"unicode-ident",
]
@@ -6022,7 +5967,7 @@ dependencies = [
"crc32c",
"criterion",
"desim",
"env_logger",
"env_logger 0.10.2",
"fail",
"futures",
"hex",
@@ -6044,7 +5989,6 @@ dependencies = [
"regex",
"remote_storage",
"reqwest",
"rustls 0.23.18",
"safekeeper_api",
"safekeeper_client",
"scopeguard",
@@ -6061,7 +6005,6 @@ dependencies = [
"tokio",
"tokio-io-timeout",
"tokio-postgres",
"tokio-rustls 0.26.0",
"tokio-stream",
"tokio-tar",
"tokio-util",
@@ -6355,7 +6298,7 @@ checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -6437,7 +6380,7 @@ dependencies = [
"darling",
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -6452,9 +6395,9 @@ dependencies = [
[[package]]
name = "sha1"
version = "0.10.6"
version = "0.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
checksum = "f04293dc80c3993519f2d7f6f511707ee7094fe0c6d3406feb330cdb3540eba3"
dependencies = [
"cfg-if",
"cpufeatures",
@@ -6839,7 +6782,7 @@ dependencies = [
"proc-macro2",
"quote",
"rustversion",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -6890,9 +6833,9 @@ dependencies = [
[[package]]
name = "syn"
version = "2.0.100"
version = "2.0.90"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0"
checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31"
dependencies = [
"proc-macro2",
"quote",
@@ -6922,7 +6865,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -6973,6 +6916,15 @@ dependencies = [
"serde_json",
]
[[package]]
name = "termcolor"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6"
dependencies = [
"winapi-util",
]
[[package]]
name = "test-context"
version = "0.3.0"
@@ -6991,7 +6943,7 @@ checksum = "78ea17a2dc368aeca6f554343ced1b1e31f76d63683fa8016e5844bd7a5144a1"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -7020,7 +6972,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -7031,7 +6983,7 @@ checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -7162,27 +7114,6 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]]
name = "tls_codec"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0de2e01245e2bb89d6f05801c564fa27624dbd7b1846859876c7dad82e90bf6b"
dependencies = [
"tls_codec_derive",
"zeroize",
]
[[package]]
name = "tls_codec_derive"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d2e76690929402faae40aebdda620a2c0e25dd6d3b9afe48867dfd95991f4bd"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
]
[[package]]
name = "tokio"
version = "1.43.0"
@@ -7192,7 +7123,7 @@ dependencies = [
"backtrace",
"bytes",
"libc",
"mio 1.0.3",
"mio",
"parking_lot 0.12.1",
"pin-project-lite",
"signal-hook-registry",
@@ -7235,7 +7166,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -7468,7 +7399,7 @@ dependencies = [
"prost-build 0.13.3",
"prost-types 0.13.3",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -7583,7 +7514,7 @@ checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -7914,7 +7845,6 @@ dependencies = [
"tracing",
"tracing-error",
"tracing-subscriber",
"tracing-utils",
"walkdir",
]
@@ -7978,7 +7908,7 @@ dependencies = [
"anyhow",
"camino-tempfile",
"clap",
"env_logger",
"env_logger 0.10.2",
"log",
"postgres",
"postgres_ffi",
@@ -8083,7 +8013,7 @@ dependencies = [
"once_cell",
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
"wasm-bindgen-shared",
]
@@ -8117,7 +8047,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
@@ -8424,7 +8354,6 @@ name = "workspace_hack"
version = "0.1.0"
dependencies = [
"ahash",
"anstream",
"anyhow",
"base64 0.13.1",
"base64 0.21.7",
@@ -8435,17 +8364,12 @@ dependencies = [
"chrono",
"clap",
"clap_builder",
"const-oid",
"crypto-bigint 0.5.5",
"der 0.7.8",
"deranged",
"digest",
"displaydoc",
"ecdsa 0.16.9",
"either",
"elliptic-curve 0.13.8",
"env_filter",
"env_logger",
"fail",
"form_urlencoded",
"futures-channel",
@@ -8463,6 +8387,7 @@ dependencies = [
"hyper-util",
"indexmap 1.9.3",
"indexmap 2.0.1",
"itertools 0.10.5",
"itertools 0.12.1",
"lazy_static",
"libc",
@@ -8478,7 +8403,6 @@ dependencies = [
"num-rational",
"num-traits",
"once_cell",
"p256 0.13.2",
"parquet",
"prettyplease",
"proc-macro2",
@@ -8491,7 +8415,6 @@ dependencies = [
"reqwest",
"rustls 0.23.18",
"scopeguard",
"sec1 0.7.3",
"serde",
"serde_json",
"sha2",
@@ -8500,7 +8423,7 @@ dependencies = [
"spki 0.7.3",
"stable_deref_trait",
"subtle",
"syn 2.0.100",
"syn 2.0.90",
"sync_wrapper 0.1.2",
"tikv-jemalloc-ctl",
"tikv-jemalloc-sys",
@@ -8537,18 +8460,6 @@ version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51"
[[package]]
name = "x509-cert"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1301e935010a701ae5f8655edc0ad17c44bad3ac5ce8c39185f75453b720ae94"
dependencies = [
"const-oid",
"der 0.7.8",
"spki 0.7.3",
"tls_codec",
]
[[package]]
name = "x509-certificate"
version = "0.23.1"
@@ -8629,7 +8540,7 @@ checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
"synstructure",
]
@@ -8651,7 +8562,7 @@ checksum = "b3c129550b3e6de3fd0ba67ba5c81818f9805e58b8d7fee80a3a59d2c9fc601a"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -8671,15 +8582,15 @@ checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
"synstructure",
]
[[package]]
name = "zeroize"
version = "1.8.1"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde"
checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d"
dependencies = [
"serde",
"zeroize_derive",
@@ -8693,7 +8604,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]
@@ -8715,7 +8626,7 @@ checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 2.0.90",
]
[[package]]

View File

@@ -106,13 +106,13 @@ hostname = "0.4"
http = {version = "1.1.0", features = ["std"]}
http-types = { version = "2", default-features = false }
http-body-util = "0.1.2"
humantime = "2.2"
humantime = "2.1"
humantime-serde = "1.1.1"
hyper0 = { package = "hyper", version = "0.14" }
hyper = "1.4"
hyper-util = "0.1"
tokio-tungstenite = "0.21.0"
indexmap = { version = "2", features = ["serde"] }
indexmap = "2"
indoc = "2"
ipnet = "2.10.0"
itertools = "0.10"
@@ -126,9 +126,7 @@ measured = { version = "0.0.22", features=["lasso"] }
measured-process = { version = "0.0.22" }
memoffset = "0.9"
nix = { version = "0.27", features = ["dir", "fs", "process", "socket", "signal", "poll"] }
# Do not update to >= 7.0.0, at least. The update will have a significant impact
# on compute startup metrics (start_postgres_ms), >= 25% degradation.
notify = "6.0.0"
notify = "8.0.0"
num_cpus = "1.15"
num-traits = "0.2.15"
once_cell = "1.13"
@@ -221,7 +219,7 @@ zerocopy = { version = "0.7", features = ["derive"] }
json-structural-diff = { version = "0.2.0" }
## TODO replace this with tracing
env_logger = "0.11"
env_logger = "0.10"
log = "0.4"
## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed

View File

@@ -1735,8 +1735,6 @@ RUN set -e \
libevent-dev \
libtool \
pkg-config \
libcurl4-openssl-dev \
libssl-dev \
&& apt clean && rm -rf /var/lib/apt/lists/*
# Use `dist_man_MANS=` to skip manpage generation (which requires python3/pandoc)
@@ -1745,7 +1743,7 @@ RUN set -e \
&& git clone --recurse-submodules --depth 1 --branch ${PGBOUNCER_TAG} https://github.com/pgbouncer/pgbouncer.git pgbouncer \
&& cd pgbouncer \
&& ./autogen.sh \
&& ./configure --prefix=/usr/local/pgbouncer \
&& ./configure --prefix=/usr/local/pgbouncer --without-openssl \
&& make -j $(nproc) dist_man_MANS= \
&& make install dist_man_MANS=

View File

@@ -145,7 +145,7 @@ merge: |
COPY compute_rsyslog.conf /etc/compute_rsyslog.conf
RUN chmod 0666 /etc/compute_rsyslog.conf
RUN mkdir /var/log/rsyslog && chown -R postgres /var/log/rsyslog
RUN chmod 0666 /var/log/
COPY --from=libcgroup-builder /libcgroup-install/bin/* /usr/bin/

View File

@@ -140,7 +140,7 @@ merge: |
COPY compute_rsyslog.conf /etc/compute_rsyslog.conf
RUN chmod 0666 /etc/compute_rsyslog.conf
RUN mkdir /var/log/rsyslog && chown -R postgres /var/log/rsyslog
RUN chmod 0666 /var/log/
COPY --from=libcgroup-builder /libcgroup-install/bin/* /usr/bin/

View File

@@ -26,7 +26,6 @@ fail.workspace = true
flate2.workspace = true
futures.workspace = true
http.workspace = true
indexmap.workspace = true
jsonwebtoken.workspace = true
metrics.workspace = true
nix.workspace = true
@@ -35,19 +34,16 @@ num_cpus.workspace = true
once_cell.workspace = true
opentelemetry.workspace = true
opentelemetry_sdk.workspace = true
p256 = { version = "0.13", features = ["pem"] }
postgres.workspace = true
regex.workspace = true
reqwest = { workspace = true, features = ["json"] }
ring = "0.17"
serde.workspace = true
serde_with.workspace = true
serde_json.workspace = true
signal-hook.workspace = true
spki = { version = "0.7.3", features = ["std"] }
tar.workspace = true
tower.workspace = true
tower-http.workspace = true
reqwest = { workspace = true, features = ["json"] }
tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
tokio-postgres.workspace = true
tokio-util.workspace = true
@@ -61,7 +57,6 @@ thiserror.workspace = true
url.workspace = true
uuid.workspace = true
walkdir.workspace = true
x509-cert = { version = "0.2.5" }
postgres_initdb.workspace = true
compute_api.workspace = true

View File

@@ -37,11 +37,10 @@ use crate::logger::startup_context_from_env;
use crate::lsn_lease::launch_lsn_lease_bg_task_for_static;
use crate::monitor::launch_monitor;
use crate::pg_helpers::*;
use crate::rsyslog::{configure_audit_rsyslog, launch_pgaudit_gc};
use crate::rsyslog::configure_audit_rsyslog;
use crate::spec::*;
use crate::swap::resize_swap;
use crate::sync_sk::{check_if_synced, ping_safekeeper};
use crate::tls::watch_cert_for_changes;
use crate::{config, extension_server, local_proxy};
pub static SYNC_SAFEKEEPERS_PID: AtomicU32 = AtomicU32::new(0);
@@ -113,7 +112,6 @@ pub struct ComputeNode {
// key: ext_archive_name, value: started download time, download_completed?
pub ext_download_progress: RwLock<HashMap<String, (DateTime<Utc>, bool)>>,
pub compute_ctl_config: ComputeCtlConfig,
}
// store some metrics about download size that might impact startup time
@@ -137,6 +135,8 @@ pub struct ComputeState {
/// passed by the control plane with a /configure HTTP request.
pub pspec: Option<ParsedSpec>,
pub compute_ctl_config: ComputeCtlConfig,
/// If the spec is passed by a /configure request, 'startup_span' is the
/// /configure request's tracing span. The main thread enters it when it
/// processes the compute startup, so that the compute startup is considered
@@ -160,6 +160,7 @@ impl ComputeState {
last_active: None,
error: None,
pspec: None,
compute_ctl_config: ComputeCtlConfig::default(),
startup_span: None,
metrics: ComputeMetrics::default(),
}
@@ -313,6 +314,7 @@ impl ComputeNode {
let pspec = ParsedSpec::try_from(cli_spec).map_err(|msg| anyhow::anyhow!(msg))?;
new_state.pspec = Some(pspec);
}
new_state.compute_ctl_config = compute_ctl_config;
Ok(ComputeNode {
params,
@@ -321,7 +323,6 @@ impl ComputeNode {
state: Mutex::new(new_state),
state_changed: Condvar::new(),
ext_download_progress: RwLock::new(HashMap::new()),
compute_ctl_config,
})
}
@@ -344,7 +345,7 @@ impl ComputeNode {
// requests while configuration is still in progress.
crate::http::server::Server::External {
port: this.params.external_http_port,
config: this.compute_ctl_config.clone(),
jwks: this.state.lock().unwrap().compute_ctl_config.jwks.clone(),
compute_id: this.params.compute_id.clone(),
}
.launch(&this);
@@ -523,16 +524,6 @@ impl ComputeNode {
// Collect all the tasks that must finish here
let mut pre_tasks = tokio::task::JoinSet::new();
// Make sure TLS certificates are properly loaded and in the right place.
if self.compute_ctl_config.tls.is_some() {
let this = self.clone();
pre_tasks.spawn(async move {
this.watch_cert_for_changes().await;
Ok::<(), anyhow::Error>(())
});
}
// If there are any remote extensions in shared_preload_libraries, start downloading them
if pspec.spec.remote_extensions.is_some() {
let (this, spec) = (self.clone(), pspec.spec.clone());
@@ -588,13 +579,11 @@ impl ComputeNode {
if let Some(pgbouncer_settings) = &pspec.spec.pgbouncer_settings {
info!("tuning pgbouncer");
let pgbouncer_settings = pgbouncer_settings.clone();
let tls_config = self.compute_ctl_config.tls.clone();
// Spawn a background task to do the tuning,
// so that we don't block the main thread that starts Postgres.
let pgbouncer_settings = pgbouncer_settings.clone();
let _handle = tokio::spawn(async move {
let res = tune_pgbouncer(pgbouncer_settings, tls_config).await;
let res = tune_pgbouncer(pgbouncer_settings).await;
if let Err(err) = res {
error!("error while tuning pgbouncer: {err:?}");
// Continue with the startup anyway
@@ -625,11 +614,13 @@ impl ComputeNode {
}
let log_directory_path = Path::new(&self.params.pgdata).join("log");
let log_directory_path = log_directory_path.to_string_lossy().to_string();
configure_audit_rsyslog(log_directory_path.clone(), "hipaa", &remote_endpoint)?;
// Launch a background task to clean up the audit logs
launch_pgaudit_gc(log_directory_path);
// TODO: make this more robust
// now rsyslog starts once and there is no monitoring or restart if it fails
configure_audit_rsyslog(
log_directory_path.to_str().unwrap(),
"hipaa",
&remote_endpoint,
)?;
}
// Launch remaining service threads
@@ -654,9 +645,9 @@ impl ComputeNode {
if pspec.spec.mode == ComputeMode::Primary {
self.configure_as_primary(&compute_state)?;
let conf = self.get_tokio_conn_conf(None);
tokio::task::spawn(async {
let res = get_installed_extensions(conf).await;
let conf = self.get_conn_conf(None);
tokio::task::spawn_blocking(|| {
let res = get_installed_extensions(conf);
match res {
Ok(extensions) => {
info!(
@@ -864,6 +855,12 @@ impl ComputeNode {
info!("Storage auth token not set");
}
if let Some(spec) = &compute_state.pspec {
config.application_name(&format!("compute_ctl-{}", spec.spec.mode.to_type_str()));
} else {
config.application_name("compute_ctl");
}
// Connect to pageserver
let mut client = config.connect(NoTls)?;
let pageserver_connect_micros = start_time.elapsed().as_micros() as u64;
@@ -1114,10 +1111,9 @@ impl ComputeNode {
// Remove/create an empty pgdata directory and put configuration there.
self.create_pgdata()?;
config::write_postgres_conf(
pgdata_path,
&pgdata_path.join("postgresql.conf"),
&pspec.spec,
self.params.internal_http_port,
&self.compute_ctl_config.tls,
)?;
// Syncing safekeepers is only safe with primary nodes: if a primary
@@ -1499,13 +1495,11 @@ impl ComputeNode {
if let Some(ref pgbouncer_settings) = spec.pgbouncer_settings {
info!("tuning pgbouncer");
let pgbouncer_settings = pgbouncer_settings.clone();
let tls_config = self.compute_ctl_config.tls.clone();
// Spawn a background task to do the tuning,
// so that we don't block the main thread that starts Postgres.
let pgbouncer_settings = pgbouncer_settings.clone();
tokio::spawn(async move {
let res = tune_pgbouncer(pgbouncer_settings, tls_config).await;
let res = tune_pgbouncer(pgbouncer_settings).await;
if let Err(err) = res {
error!("error while tuning pgbouncer: {err:?}");
}
@@ -1517,8 +1511,7 @@ impl ComputeNode {
// Spawn a background task to do the configuration,
// so that we don't block the main thread that starts Postgres.
let mut local_proxy = local_proxy.clone();
local_proxy.tls = self.compute_ctl_config.tls.clone();
let local_proxy = local_proxy.clone();
tokio::spawn(async move {
if let Err(err) = local_proxy::configure(&local_proxy) {
error!("error while configuring local_proxy: {err:?}");
@@ -1528,12 +1521,8 @@ impl ComputeNode {
// Write new config
let pgdata_path = Path::new(&self.params.pgdata);
config::write_postgres_conf(
pgdata_path,
&spec,
self.params.internal_http_port,
&self.compute_ctl_config.tls,
)?;
let postgresql_conf_path = pgdata_path.join("postgresql.conf");
config::write_postgres_conf(&postgresql_conf_path, &spec, self.params.internal_http_port)?;
if !spec.skip_pg_catalog_updates {
let max_concurrent_connections = spec.reconfigure_concurrency;
@@ -1604,56 +1593,6 @@ impl ComputeNode {
Ok(())
}
pub async fn watch_cert_for_changes(self: Arc<Self>) {
// update status on cert renewal
if let Some(tls_config) = &self.compute_ctl_config.tls {
let tls_config = tls_config.clone();
// wait until the cert exists.
let mut cert_watch = watch_cert_for_changes(tls_config.cert_path.clone()).await;
tokio::task::spawn_blocking(move || {
let handle = tokio::runtime::Handle::current();
'cert_update: loop {
// let postgres/pgbouncer/local_proxy know the new cert/key exists.
// we need to wait until it's configurable first.
let mut state = self.state.lock().unwrap();
'status_update: loop {
match state.status {
// let's update the state to config pending
ComputeStatus::ConfigurationPending | ComputeStatus::Running => {
state.set_status(
ComputeStatus::ConfigurationPending,
&self.state_changed,
);
break 'status_update;
}
// exit loop
ComputeStatus::Failed
| ComputeStatus::TerminationPending
| ComputeStatus::Terminated => break 'cert_update,
// wait
ComputeStatus::Init
| ComputeStatus::Configuration
| ComputeStatus::Empty => {
state = self.state_changed.wait(state).unwrap();
}
}
}
drop(state);
// wait for a new certificate update
if handle.block_on(cert_watch.changed()).is_err() {
break;
}
}
});
}
}
/// Update the `last_active` in the shared state, but ensure that it's a more recent one.
pub fn update_last_active(&self, last_active: Option<DateTime<Utc>>) {
let mut state = self.state.lock().unwrap();

View File

@@ -6,13 +6,11 @@ use std::io::Write;
use std::io::prelude::*;
use std::path::Path;
use compute_api::responses::TlsConfig;
use compute_api::spec::{ComputeAudit, ComputeMode, ComputeSpec, GenericOption};
use crate::pg_helpers::{
GenericOptionExt, GenericOptionsSearch, PgOptionsSerialize, escape_conf_value,
};
use crate::tls::{self, SERVER_CRT, SERVER_KEY};
/// Check that `line` is inside a text file and put it there if it is not.
/// Create file if it doesn't exist.
@@ -40,12 +38,10 @@ pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
/// Create or completely rewrite configuration file specified by `path`
pub fn write_postgres_conf(
pgdata_path: &Path,
path: &Path,
spec: &ComputeSpec,
extension_server_port: u16,
tls_config: &Option<TlsConfig>,
) -> Result<()> {
let path = pgdata_path.join("postgresql.conf");
// File::create() destroys the file content if it exists.
let mut file = File::create(path)?;
@@ -90,20 +86,6 @@ pub fn write_postgres_conf(
)?;
}
// tls
if let Some(tls_config) = tls_config {
writeln!(file, "ssl = on")?;
// postgres requires the keyfile to be in a secure file,
// currently too complicated to ensure that at the VM level,
// so we just copy them to another file instead. :shrug:
tls::update_key_path_blocking(pgdata_path, tls_config);
// these are the default, but good to be explicit.
writeln!(file, "ssl_cert_file = '{}'", SERVER_CRT)?;
writeln!(file, "ssl_key_file = '{}'", SERVER_KEY)?;
}
// Locales
if cfg!(target_os = "macos") {
writeln!(file, "lc_messages='C'")?;
@@ -117,6 +99,7 @@ pub fn write_postgres_conf(
writeln!(file, "lc_numeric='C.UTF-8'")?;
}
writeln!(file, "neon.endpoint_type={}", spec.mode.to_type_str())?;
match spec.mode {
ComputeMode::Primary => {}
ComputeMode::Static(lsn) => {
@@ -167,8 +150,7 @@ pub fn write_postgres_conf(
writeln!(file, "# Managed by compute_ctl audit settings: begin")?;
// This log level is very verbose
// but this is necessary for HIPAA compliance.
// Exclude 'misc' category, because it doesn't contain anythig relevant.
writeln!(file, "pgaudit.log='all, -misc'")?;
writeln!(file, "pgaudit.log='all'")?;
writeln!(file, "pgaudit.log_parameter=on")?;
// Disable logging of catalog queries
// The catalog doesn't contain sensitive data, so we don't need to audit it.

View File

@@ -4,8 +4,7 @@ module(load="imfile")
# Input configuration for log files in the specified directory
# Replace {log_directory} with the directory containing the log files
input(type="imfile" File="{log_directory}/*.log" Tag="{tag}" Severity="info" Facility="local0")
# the directory to store rsyslog state files
global(workDirectory="/var/log/rsyslog")
global(workDirectory="/var/log")
# Forward logs to remote syslog server
*.* @@{remote_endpoint}

View File

@@ -1,2 +1 @@
pub(in crate::http) mod authorize;
pub(in crate::http) mod request_id;

View File

@@ -1,16 +0,0 @@
use axum::{extract::Request, middleware::Next, response::Response};
use uuid::Uuid;
use crate::http::headers::X_REQUEST_ID;
/// This middleware function allows compute_ctl to generate its own request ID
/// if one isn't supplied. The control plane will always send one as a UUID. The
/// neon Postgres extension on the other hand does not send one.
pub async fn maybe_add_request_id_header(mut request: Request, next: Next) -> Response {
let headers = request.headers_mut();
if !headers.contains_key(X_REQUEST_ID) {
headers.append(X_REQUEST_ID, Uuid::new_v4().to_string().parse().unwrap());
}
next.run(request).await
}

View File

@@ -5,19 +5,20 @@ use std::time::Duration;
use anyhow::Result;
use axum::Router;
use axum::middleware::{self};
use axum::response::IntoResponse;
use axum::extract::Request;
use axum::middleware::{self, Next};
use axum::response::{IntoResponse, Response};
use axum::routing::{get, post};
use compute_api::responses::ComputeCtlConfig;
use http::StatusCode;
use jsonwebtoken::jwk::JwkSet;
use tokio::net::TcpListener;
use tower::ServiceBuilder;
use tower_http::{
auth::AsyncRequireAuthorizationLayer, request_id::PropagateRequestIdLayer, trace::TraceLayer,
};
use tracing::{Span, error, info};
use uuid::Uuid;
use super::middleware::request_id::maybe_add_request_id_header;
use super::{
headers::X_REQUEST_ID,
middleware::authorize::Authorize,
@@ -41,7 +42,7 @@ pub enum Server {
},
External {
port: u16,
config: ComputeCtlConfig,
jwks: JwkSet,
compute_id: String,
},
}
@@ -79,7 +80,7 @@ impl From<&Server> for Router<Arc<ComputeNode>> {
router
}
Server::External {
config, compute_id, ..
jwks, compute_id, ..
} => {
let unauthenticated_router =
Router::<Arc<ComputeNode>>::new().route("/metrics", get(metrics::get_metrics));
@@ -95,7 +96,7 @@ impl From<&Server> for Router<Arc<ComputeNode>> {
.route("/terminate", post(terminate::terminate))
.layer(AsyncRequireAuthorizationLayer::new(Authorize::new(
compute_id.clone(),
config.jwks.clone(),
jwks.clone(),
)));
router
@@ -218,3 +219,15 @@ impl Server {
tokio::spawn(self.serve(state));
}
}
/// This middleware function allows compute_ctl to generate its own request ID
/// if one isn't supplied. The control plane will always send one as a UUID. The
/// neon Postgres extension on the other hand does not send one.
async fn maybe_add_request_id_header(mut request: Request, next: Next) -> Response {
let headers = request.headers_mut();
if headers.get(X_REQUEST_ID).is_none() {
headers.append(X_REQUEST_ID, Uuid::new_v4().to_string().parse().unwrap());
}
next.run(request).await
}

View File

@@ -2,7 +2,7 @@ use std::collections::HashMap;
use anyhow::Result;
use compute_api::responses::{InstalledExtension, InstalledExtensions};
use tokio_postgres::{Client, Config, NoTls};
use postgres::{Client, NoTls};
use crate::metrics::INSTALLED_EXTENSIONS;
@@ -10,7 +10,7 @@ use crate::metrics::INSTALLED_EXTENSIONS;
/// and to make database listing query here more explicit.
///
/// Limit the number of databases to 500 to avoid excessive load.
async fn list_dbs(client: &mut Client) -> Result<Vec<String>> {
fn list_dbs(client: &mut Client) -> Result<Vec<String>> {
// `pg_database.datconnlimit = -2` means that the database is in the
// invalid state
let databases = client
@@ -20,8 +20,7 @@ async fn list_dbs(client: &mut Client) -> Result<Vec<String>> {
AND datconnlimit <> - 2
LIMIT 500",
&[],
)
.await?
)?
.iter()
.map(|row| {
let db: String = row.get("datname");
@@ -37,36 +36,20 @@ async fn list_dbs(client: &mut Client) -> Result<Vec<String>> {
/// Same extension can be installed in multiple databases with different versions,
/// so we report a separate metric (number of databases where it is installed)
/// for each extension version.
pub async fn get_installed_extensions(mut conf: Config) -> Result<InstalledExtensions> {
pub fn get_installed_extensions(mut conf: postgres::config::Config) -> Result<InstalledExtensions> {
conf.application_name("compute_ctl:get_installed_extensions");
let databases: Vec<String> = {
let (mut client, connection) = conf.connect(NoTls).await?;
tokio::spawn(async move {
if let Err(e) = connection.await {
eprintln!("connection error: {}", e);
}
});
list_dbs(&mut client).await?
};
let mut client = conf.connect(NoTls)?;
let databases: Vec<String> = list_dbs(&mut client)?;
let mut extensions_map: HashMap<(String, String, String), InstalledExtension> = HashMap::new();
for db in databases.iter() {
conf.dbname(db);
let (client, connection) = conf.connect(NoTls).await?;
tokio::spawn(async move {
if let Err(e) = connection.await {
eprintln!("connection error: {}", e);
}
});
let extensions: Vec<(String, String, i32)> = client
let mut db_client = conf.connect(NoTls)?;
let extensions: Vec<(String, String, i32)> = db_client
.query(
"SELECT extname, extversion, extowner::integer FROM pg_catalog.pg_extension",
&[],
)
.await?
)?
.iter()
.map(|row| {
(

View File

@@ -26,4 +26,3 @@ pub mod spec;
mod spec_apply;
pub mod swap;
pub mod sync_sk;
pub mod tls;

View File

@@ -24,8 +24,7 @@ pub async fn init_tracing_and_logging(default_log_level: &str) -> anyhow::Result
.with_writer(std::io::stderr);
// Initialize OpenTelemetry
let otlp_layer =
tracing_utils::init_tracing("compute_ctl", tracing_utils::ExportConfig::default()).await;
let otlp_layer = tracing_utils::init_tracing("compute_ctl").await;
// Put it all together
tracing_subscriber::registry()

View File

@@ -1,8 +1,6 @@
use metrics::core::{AtomicF64, Collector, GenericGauge};
use metrics::core::Collector;
use metrics::proto::MetricFamily;
use metrics::{
IntCounterVec, UIntGaugeVec, register_gauge, register_int_counter_vec, register_uint_gauge_vec,
};
use metrics::{IntCounterVec, UIntGaugeVec, register_int_counter_vec, register_uint_gauge_vec};
use once_cell::sync::Lazy;
pub(crate) static INSTALLED_EXTENSIONS: Lazy<UIntGaugeVec> = Lazy::new(|| {
@@ -61,20 +59,10 @@ pub(crate) static REMOTE_EXT_REQUESTS_TOTAL: Lazy<IntCounterVec> = Lazy::new(||
.expect("failed to define a metric")
});
// Size of audit log directory in bytes
pub(crate) static AUDIT_LOG_DIR_SIZE: Lazy<GenericGauge<AtomicF64>> = Lazy::new(|| {
register_gauge!(
"compute_audit_log_dir_size",
"Size of audit log directory in bytes",
)
.expect("failed to define a metric")
});
pub fn collect() -> Vec<MetricFamily> {
let mut metrics = INSTALLED_EXTENSIONS.collect();
metrics.extend(CPLANE_REQUESTS_TOTAL.collect());
metrics.extend(REMOTE_EXT_REQUESTS_TOTAL.collect());
metrics.extend(DB_MIGRATION_FAILED.collect());
metrics.extend(AUDIT_LOG_DIR_SIZE.collect());
metrics
}

View File

@@ -10,10 +10,8 @@ use std::str::FromStr;
use std::time::{Duration, Instant};
use anyhow::{Result, bail};
use compute_api::responses::TlsConfig;
use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};
use futures::StreamExt;
use indexmap::IndexMap;
use ini::Ini;
use notify::{RecursiveMode, Watcher};
use postgres::config::Config;
@@ -208,8 +206,8 @@ impl Escaping for PgIdent {
/// Here we somewhat mimic the logic of Postgres' `pg_get_functiondef()`,
/// <https://github.com/postgres/postgres/blob/8b49392b270b4ac0b9f5c210e2a503546841e832/src/backend/utils/adt/ruleutils.c#L2924>
fn pg_quote_dollar(&self) -> (String, String) {
let mut tag: String = "x".to_string();
let mut outer_tag = "xx".to_string();
let mut tag: String = "".to_string();
let mut outer_tag = "x".to_string();
// Find the first suitable tag that is not present in the string.
// Postgres' max role/DB name length is 63 bytes, so even in the
@@ -408,7 +406,7 @@ pub fn create_pgdata(pgdata: &str) -> Result<()> {
/// Update pgbouncer.ini with provided options
fn update_pgbouncer_ini(
pgbouncer_config: IndexMap<String, String>,
pgbouncer_config: HashMap<String, String>,
pgbouncer_ini_path: &str,
) -> Result<()> {
let mut conf = Ini::load_from_file(pgbouncer_ini_path)?;
@@ -429,10 +427,7 @@ fn update_pgbouncer_ini(
/// Tune pgbouncer.
/// 1. Apply new config using pgbouncer admin console
/// 2. Add new values to pgbouncer.ini to preserve them after restart
pub async fn tune_pgbouncer(
mut pgbouncer_config: IndexMap<String, String>,
tls_config: Option<TlsConfig>,
) -> Result<()> {
pub async fn tune_pgbouncer(pgbouncer_config: HashMap<String, String>) -> Result<()> {
let pgbouncer_connstr = if std::env::var_os("AUTOSCALING").is_some() {
// for VMs use pgbouncer specific way to connect to
// pgbouncer admin console without password
@@ -478,21 +473,19 @@ pub async fn tune_pgbouncer(
}
};
if let Some(tls_config) = tls_config {
// pgbouncer starts in a half-ok state if it cannot find these files.
// It will default to client_tls_sslmode=deny, which causes proxy to error.
// There is a small window at startup where these files don't yet exist in the VM.
// Best to wait until it exists.
loop {
if let Ok(true) = tokio::fs::try_exists(&tls_config.key_path).await {
break;
}
tokio::time::sleep(Duration::from_millis(500)).await
}
// Apply new config
for (option_name, value) in pgbouncer_config.iter() {
let query = format!("SET {}={}", option_name, value);
// keep this log line for debugging purposes
info!("Applying pgbouncer setting change: {}", query);
pgbouncer_config.insert("client_tls_cert_file".to_string(), tls_config.cert_path);
pgbouncer_config.insert("client_tls_key_file".to_string(), tls_config.key_path);
pgbouncer_config.insert("client_tls_sslmode".to_string(), "allow".to_string());
if let Err(err) = client.simple_query(&query).await {
// Don't fail on error, just print it into log
error!(
"Failed to apply pgbouncer setting change: {}, {}",
query, err
);
};
}
// save values to pgbouncer.ini
@@ -508,13 +501,6 @@ pub async fn tune_pgbouncer(
};
update_pgbouncer_ini(pgbouncer_config, &pgbouncer_ini_path)?;
info!("Applying pgbouncer setting change");
if let Err(err) = client.simple_query("RELOAD").await {
// Don't fail on error, just print it into log
error!("Failed to apply pgbouncer setting change, {err}",);
};
Ok(())
}

View File

@@ -1,11 +1,8 @@
use std::fs;
use std::path::Path;
use std::process::Command;
use std::time::Duration;
use std::{fs::OpenOptions, io::Write};
use anyhow::{Context, Result};
use tracing::{error, info, instrument, warn};
use tracing::info;
fn get_rsyslog_pid() -> Option<String> {
let output = Command::new("pgrep")
@@ -46,7 +43,7 @@ fn restart_rsyslog() -> Result<()> {
}
pub fn configure_audit_rsyslog(
log_directory: String,
log_directory: &str,
tag: &str,
remote_endpoint: &str,
) -> Result<()> {
@@ -78,61 +75,3 @@ pub fn configure_audit_rsyslog(
Ok(())
}
#[instrument(skip_all)]
async fn pgaudit_gc_main_loop(log_directory: String) -> Result<()> {
info!("running pgaudit GC main loop");
loop {
// Check log_directory for old pgaudit logs and delete them.
// New log files are checked every 5 minutes, as set in pgaudit.log_rotation_age
// Find files that were not modified in the last 15 minutes and delete them.
// This should be enough time for rsyslog to process the logs and for us to catch the alerts.
//
// In case of a very high load, we might need to adjust this value and pgaudit.log_rotation_age.
//
// TODO: add some smarter logic to delete the files that are fully streamed according to rsyslog
// imfile-state files, but for now just do a simple GC to avoid filling up the disk.
let _ = Command::new("find")
.arg(&log_directory)
.arg("-name")
.arg("audit*.log")
.arg("-mmin")
.arg("+15")
.arg("-delete")
.output()?;
// also collect the metric for the size of the log directory
async fn get_log_files_size(path: &Path) -> Result<u64> {
let mut total_size = 0;
for entry in fs::read_dir(path)? {
let entry = entry?;
let entry_path = entry.path();
if entry_path.is_file() && entry_path.to_string_lossy().ends_with("log") {
total_size += entry.metadata()?.len();
}
}
Ok(total_size)
}
let log_directory_size = get_log_files_size(Path::new(&log_directory))
.await
.unwrap_or_else(|e| {
warn!("Failed to get log directory size: {}", e);
0
});
crate::metrics::AUDIT_LOG_DIR_SIZE.set(log_directory_size as f64);
tokio::time::sleep(Duration::from_secs(60)).await;
}
}
// launch pgaudit GC thread to clean up the old pgaudit logs stored in the log_directory
pub fn launch_pgaudit_gc(log_directory: String) {
tokio::spawn(async move {
if let Err(e) = pgaudit_gc_main_loop(log_directory).await {
error!("pgaudit GC main loop failed: {}", e);
}
});
}

View File

@@ -1,118 +0,0 @@
use std::{io::Write, os::unix::fs::OpenOptionsExt, path::Path, time::Duration};
use anyhow::{Context, Result, bail};
use compute_api::responses::TlsConfig;
use ring::digest;
use spki::ObjectIdentifier;
use spki::der::{Decode, PemReader};
use x509_cert::Certificate;
#[derive(Clone, Copy)]
pub struct CertDigest(digest::Digest);
pub async fn watch_cert_for_changes(cert_path: String) -> tokio::sync::watch::Receiver<CertDigest> {
let mut digest = compute_digest(&cert_path).await;
let (tx, rx) = tokio::sync::watch::channel(digest);
tokio::spawn(async move {
while !tx.is_closed() {
let new_digest = compute_digest(&cert_path).await;
if digest.0.as_ref() != new_digest.0.as_ref() {
digest = new_digest;
_ = tx.send(digest);
}
tokio::time::sleep(Duration::from_secs(60)).await
}
});
rx
}
async fn compute_digest(cert_path: &str) -> CertDigest {
loop {
match try_compute_digest(cert_path).await {
Ok(d) => break d,
Err(e) => {
tracing::error!("could not read cert file {e:?}");
tokio::time::sleep(Duration::from_secs(1)).await
}
}
}
}
async fn try_compute_digest(cert_path: &str) -> Result<CertDigest> {
let data = tokio::fs::read(cert_path).await?;
// sha256 is extremely collision resistent. can safely assume the digest to be unique
Ok(CertDigest(digest::digest(&digest::SHA256, &data)))
}
pub const SERVER_CRT: &str = "server.crt";
pub const SERVER_KEY: &str = "server.key";
pub fn update_key_path_blocking(pg_data: &Path, tls_config: &TlsConfig) {
loop {
match try_update_key_path_blocking(pg_data, tls_config) {
Ok(()) => break,
Err(e) => {
tracing::error!("could not create key file {e:?}");
std::thread::sleep(Duration::from_secs(1))
}
}
}
}
// Postgres requires the keypath be "secure". This means
// 1. Owned by the postgres user.
// 2. Have permission 600.
fn try_update_key_path_blocking(pg_data: &Path, tls_config: &TlsConfig) -> Result<()> {
let key = std::fs::read_to_string(&tls_config.key_path)?;
let crt = std::fs::read_to_string(&tls_config.cert_path)?;
// to mitigate a race condition during renewal.
verify_key_cert(&key, &crt)?;
let mut key_file = std::fs::OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.mode(0o600)
.open(pg_data.join(SERVER_KEY))?;
let mut crt_file = std::fs::OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.mode(0o600)
.open(pg_data.join(SERVER_CRT))?;
key_file.write_all(key.as_bytes())?;
crt_file.write_all(crt.as_bytes())?;
Ok(())
}
fn verify_key_cert(key: &str, cert: &str) -> Result<()> {
const ECDSA_WITH_SHA256: ObjectIdentifier = ObjectIdentifier::new_unwrap("1.2.840.10045.4.3.2");
let cert = Certificate::decode(&mut PemReader::new(cert.as_bytes()).context("pem reader")?)
.context("decode cert")?;
match cert.signature_algorithm.oid {
ECDSA_WITH_SHA256 => {
let key = p256::SecretKey::from_sec1_pem(key).context("parse key")?;
let a = key.public_key().to_sec1_bytes();
let b = cert
.tbs_certificate
.subject_public_key_info
.subject_public_key
.raw_bytes();
if *a != *b {
bail!("private key file does not match certificate")
}
}
_ => bail!("unknown TLS key type"),
}
Ok(())
}

View File

@@ -64,8 +64,7 @@ test.escaping = 'here''s a backslash \\ and a quote '' and a double-quote " hoor
#[test]
fn ident_pg_quote_dollar() {
let test_cases = vec![
("name", ("$x$name$x$", "xx")),
("name$", ("$x$name$$x$", "xx")),
("name", ("$$name$$", "x")),
("name$$", ("$x$name$$$x$", "xx")),
("name$$$", ("$x$name$$$$x$", "xx")),
("name$$$$", ("$x$name$$$$$x$", "xx")),

View File

@@ -36,9 +36,7 @@ use pageserver_api::config::{
use pageserver_api::controller_api::{
NodeAvailabilityWrapper, PlacementPolicy, TenantCreateRequest,
};
use pageserver_api::models::{
ShardParameters, TenantConfigRequest, TimelineCreateRequest, TimelineInfo,
};
use pageserver_api::models::{ShardParameters, TimelineCreateRequest, TimelineInfo};
use pageserver_api::shard::{ShardCount, ShardStripeSize, TenantShardId};
use postgres_backend::AuthType;
use postgres_connection::parse_host_port;
@@ -979,7 +977,7 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result<LocalEnv> {
neon_distrib_dir: None,
default_tenant_id: TenantId::from_array(std::array::from_fn(|_| 0)),
storage_controller: None,
control_plane_hooks_api: None,
control_plane_compute_hook_api: None,
generate_local_ssl_certs: false,
}
};
@@ -1131,16 +1129,12 @@ async fn handle_tenant(subcmd: &TenantCmd, env: &mut local_env::LocalEnv) -> any
let tenant_id = get_tenant_id(args.tenant_id, env)?;
let tenant_conf: HashMap<_, _> =
args.config.iter().flat_map(|c| c.split_once(':')).collect();
let config = PageServerNode::parse_config(tenant_conf)?;
let req = TenantConfigRequest { tenant_id, config };
let storage_controller = StorageController::from_env(env);
storage_controller
.set_tenant_config(&req)
pageserver
.tenant_config(tenant_id, tenant_conf)
.await
.with_context(|| format!("Tenant config failed for tenant with id {tenant_id}"))?;
println!("tenant {tenant_id} successfully configured via storcon");
println!("tenant {tenant_id} successfully configured on the pageserver");
}
}
Ok(())

View File

@@ -72,9 +72,9 @@ pub struct LocalEnv {
// be propagated into each pageserver's configuration.
pub control_plane_api: Url,
// Control plane upcall APIs for storage controller. If set, this will be propagated into the
// Control plane upcall API for storage controller. If set, this will be propagated into the
// storage controller's configuration.
pub control_plane_hooks_api: Option<Url>,
pub control_plane_compute_hook_api: Option<Url>,
/// Keep human-readable aliases in memory (and persist them to config), to hide ZId hex strings from the user.
// A `HashMap<String, HashMap<TenantId, TimelineId>>` would be more appropriate here,
@@ -104,7 +104,6 @@ pub struct OnDiskConfig {
pub pageservers: Vec<PageServerConf>,
pub safekeepers: Vec<SafekeeperConf>,
pub control_plane_api: Option<Url>,
pub control_plane_hooks_api: Option<Url>,
pub control_plane_compute_hook_api: Option<Url>,
branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
// Note: skip serializing because in compat tests old storage controller fails
@@ -137,7 +136,7 @@ pub struct NeonLocalInitConf {
pub pageservers: Vec<NeonLocalInitPageserverConf>,
pub safekeepers: Vec<SafekeeperConf>,
pub control_plane_api: Option<Url>,
pub control_plane_hooks_api: Option<Url>,
pub control_plane_compute_hook_api: Option<Option<Url>>,
pub generate_local_ssl_certs: bool,
}
@@ -149,7 +148,7 @@ pub struct NeonBroker {
pub listen_addr: SocketAddr,
}
/// A part of storage controller's config the neon_local knows about.
/// Broker config for cluster internal communication.
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
#[serde(default)]
pub struct NeonStorageControllerConf {
@@ -176,11 +175,10 @@ pub struct NeonStorageControllerConf {
#[serde(with = "humantime_serde")]
pub long_reconcile_threshold: Option<Duration>,
#[serde(default)]
pub use_https_pageserver_api: bool,
pub timelines_onto_safekeepers: bool,
pub use_https_safekeeper_api: bool,
}
impl NeonStorageControllerConf {
@@ -206,7 +204,6 @@ impl Default for NeonStorageControllerConf {
long_reconcile_threshold: None,
use_https_pageserver_api: false,
timelines_onto_safekeepers: false,
use_https_safekeeper_api: false,
}
}
}
@@ -304,7 +301,6 @@ pub struct SafekeeperConf {
pub pg_port: u16,
pub pg_tenant_only_port: Option<u16>,
pub http_port: u16,
pub https_port: Option<u16>,
pub sync: bool,
pub remote_storage: Option<String>,
pub backup_threads: Option<u32>,
@@ -319,7 +315,6 @@ impl Default for SafekeeperConf {
pg_port: 0,
pg_tenant_only_port: None,
http_port: 0,
https_port: None,
sync: true,
remote_storage: None,
backup_threads: None,
@@ -578,8 +573,7 @@ impl LocalEnv {
pageservers,
safekeepers,
control_plane_api,
control_plane_hooks_api,
control_plane_compute_hook_api: _,
control_plane_compute_hook_api,
branch_name_mappings,
generate_local_ssl_certs,
} = on_disk_config;
@@ -594,7 +588,7 @@ impl LocalEnv {
pageservers,
safekeepers,
control_plane_api: control_plane_api.unwrap(),
control_plane_hooks_api,
control_plane_compute_hook_api,
branch_name_mappings,
generate_local_ssl_certs,
}
@@ -701,8 +695,7 @@ impl LocalEnv {
pageservers: vec![], // it's skip_serializing anyway
safekeepers: self.safekeepers.clone(),
control_plane_api: Some(self.control_plane_api.clone()),
control_plane_hooks_api: self.control_plane_hooks_api.clone(),
control_plane_compute_hook_api: None,
control_plane_compute_hook_api: self.control_plane_compute_hook_api.clone(),
branch_name_mappings: self.branch_name_mappings.clone(),
generate_local_ssl_certs: self.generate_local_ssl_certs,
},
@@ -786,8 +779,8 @@ impl LocalEnv {
pageservers,
safekeepers,
control_plane_api,
control_plane_compute_hook_api,
generate_local_ssl_certs,
control_plane_hooks_api,
} = conf;
// Find postgres binaries.
@@ -834,7 +827,7 @@ impl LocalEnv {
pageservers: pageservers.iter().map(Into::into).collect(),
safekeepers,
control_plane_api: control_plane_api.unwrap(),
control_plane_hooks_api,
control_plane_compute_hook_api: control_plane_compute_hook_api.unwrap_or_default(),
branch_name_mappings: Default::default(),
generate_local_ssl_certs,
};
@@ -849,9 +842,6 @@ impl LocalEnv {
// create safekeeper dirs
for safekeeper in &env.safekeepers {
fs::create_dir_all(SafekeeperNode::datadir_path_by_id(&env, safekeeper.id))?;
SafekeeperNode::from_env(&env, safekeeper)
.initialize()
.context("safekeeper init failed")?;
}
// initialize pageserver state

View File

@@ -111,18 +111,6 @@ impl SafekeeperNode {
.expect("non-Unicode path")
}
/// Initializes a safekeeper node by creating all necessary files,
/// e.g. SSL certificates.
pub fn initialize(&self) -> anyhow::Result<()> {
if self.env.generate_local_ssl_certs {
self.env.generate_ssl_cert(
&self.datadir_path().join("server.crt"),
&self.datadir_path().join("server.key"),
)?;
}
Ok(())
}
pub async fn start(
&self,
extra_opts: &[String],
@@ -208,16 +196,6 @@ impl SafekeeperNode {
]);
}
if let Some(https_port) = self.conf.https_port {
args.extend([
"--listen-https".to_owned(),
format!("{}:{}", self.listen_addr, https_port),
]);
}
if let Some(ssl_ca_file) = self.env.ssl_ca_cert_path() {
args.push(format!("--ssl-ca-file={}", ssl_ca_file.to_str().unwrap()));
}
args.extend_from_slice(extra_opts);
background_process::start_process(

View File

@@ -14,7 +14,7 @@ use pageserver_api::controller_api::{
NodeConfigureRequest, NodeDescribeResponse, NodeRegisterRequest, TenantCreateRequest,
TenantCreateResponse, TenantLocateResponse,
};
use pageserver_api::models::{TenantConfigRequest, TimelineCreateRequest, TimelineInfo};
use pageserver_api::models::{TimelineCreateRequest, TimelineInfo};
use pageserver_api::shard::TenantShardId;
use pageserver_client::mgmt_api::ResponseErrorMessageExt;
use postgres_backend::AuthType;
@@ -538,10 +538,6 @@ impl StorageController {
args.push("--use-https-pageserver-api".to_string());
}
if self.config.use_https_safekeeper_api {
args.push("--use-https-safekeeper-api".to_string());
}
if let Some(ssl_ca_file) = self.env.ssl_ca_cert_path() {
args.push(format!("--ssl-ca-file={}", ssl_ca_file.to_str().unwrap()));
}
@@ -562,8 +558,10 @@ impl StorageController {
args.push(format!("--public-key=\"{public_key}\""));
}
if let Some(control_plane_hooks_api) = &self.env.control_plane_hooks_api {
args.push(format!("--control-plane-url={control_plane_hooks_api}"));
if let Some(control_plane_compute_hook_api) = &self.env.control_plane_compute_hook_api {
args.push(format!(
"--compute-hook-url={control_plane_compute_hook_api}"
));
}
if let Some(split_threshold) = self.config.split_threshold.as_ref() {
@@ -880,9 +878,4 @@ impl StorageController {
)
.await
}
pub async fn set_tenant_config(&self, req: &TenantConfigRequest) -> anyhow::Result<()> {
self.dispatch(Method::PUT, "v1/tenant/config".to_string(), Some(req))
.await
}
}

View File

@@ -14,8 +14,8 @@ use pageserver_api::controller_api::{
TenantShardMigrateRequest, TenantShardMigrateResponse,
};
use pageserver_api::models::{
EvictionPolicy, EvictionPolicyLayerAccessThreshold, ShardParameters, TenantConfig,
TenantConfigPatchRequest, TenantConfigRequest, TenantShardSplitRequest,
EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary, ShardParameters,
TenantConfig, TenantConfigPatchRequest, TenantConfigRequest, TenantShardSplitRequest,
TenantShardSplitResponse,
};
use pageserver_api::shard::{ShardStripeSize, TenantShardId};
@@ -158,6 +158,12 @@ enum Command {
#[arg(long)]
tenant_id: TenantId,
},
/// For a tenant which hasn't been onboarded to the storage controller yet, add it in secondary
/// mode so that it can warm up content on a pageserver.
TenantWarmup {
#[arg(long)]
tenant_id: TenantId,
},
TenantSetPreferredAz {
#[arg(long)]
tenant_id: TenantId,
@@ -865,6 +871,94 @@ async fn main() -> anyhow::Result<()> {
)
.await?;
}
Command::TenantWarmup { tenant_id } => {
let describe_response = storcon_client
.dispatch::<(), TenantDescribeResponse>(
Method::GET,
format!("control/v1/tenant/{tenant_id}"),
None,
)
.await;
match describe_response {
Ok(describe) => {
if matches!(describe.policy, PlacementPolicy::Secondary) {
// Fine: it's already known to controller in secondary mode: calling
// again to put it into secondary mode won't cause problems.
} else {
anyhow::bail!("Tenant already present with policy {:?}", describe.policy);
}
}
Err(mgmt_api::Error::ApiError(StatusCode::NOT_FOUND, _)) => {
// Fine: this tenant isn't know to the storage controller yet.
}
Err(e) => {
// Unexpected API error
return Err(e.into());
}
}
vps_client
.location_config(
TenantShardId::unsharded(tenant_id),
pageserver_api::models::LocationConfig {
mode: pageserver_api::models::LocationConfigMode::Secondary,
generation: None,
secondary_conf: Some(LocationConfigSecondary { warm: true }),
shard_number: 0,
shard_count: 0,
shard_stripe_size: ShardParameters::DEFAULT_STRIPE_SIZE.0,
tenant_conf: TenantConfig::default(),
},
None,
true,
)
.await?;
let describe_response = storcon_client
.dispatch::<(), TenantDescribeResponse>(
Method::GET,
format!("control/v1/tenant/{tenant_id}"),
None,
)
.await?;
let secondary_ps_id = describe_response
.shards
.first()
.unwrap()
.node_secondary
.first()
.unwrap();
println!("Tenant {tenant_id} warming up on pageserver {secondary_ps_id}");
loop {
let (status, progress) = vps_client
.tenant_secondary_download(
TenantShardId::unsharded(tenant_id),
Some(Duration::from_secs(10)),
)
.await?;
println!(
"Progress: {}/{} layers, {}/{} bytes",
progress.layers_downloaded,
progress.layers_total,
progress.bytes_downloaded,
progress.bytes_total
);
match status {
StatusCode::OK => {
println!("Download complete");
break;
}
StatusCode::ACCEPTED => {
// Loop
}
_ => {
anyhow::bail!("Unexpected download status: {status}");
}
}
}
}
Command::TenantDrop { tenant_id, unclean } => {
if !unclean {
anyhow::bail!(

View File

@@ -31,6 +31,10 @@ reason = "the marvin attack only affects private key decryption, not public key
id = "RUSTSEC-2024-0436"
reason = "The paste crate is a build-only dependency with no runtime components. It is unlikely to have any security impact."
[[advisories.ignore]]
id = "RUSTSEC-2025-0014"
reason = "The humantime is widely used and is not easy to replace right now. It is unmaintained, but it has no known vulnerabilities to care about. #11179"
# This section is considered when running `cargo deny check licenses`
# More documentation for the licenses section can be found here:
# https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html

View File

@@ -1,7 +1,3 @@
# Neon RFCs
## Overview
This directory contains Request for Comments documents, or RFCs, for
features or concepts that have been proposed. Alternative names:
technical design doc, ERD, one-pager
@@ -63,10 +59,37 @@ RFC lifecycle:
### RFC template
Use template with `YYYY-MM-DD-copy-me.md` as a starting point. Timestamp prefix helps to avoid awkward 'id' collisions.
```sh
cp docs/rfcs/YYYY-MM-DD-copy-me.md docs/rfcs/$(date +"%Y-%m-%d")-<name>.md
```
Note, a lot of the sections are marked as if relevant. They are included into the template as a reminder and to help inspiration.
```
# Name
Created on ..
Implemented on ..
## Summary
## Motivation
## Non Goals (if relevant)
## Impacted components (e.g. pageserver, safekeeper, console, etc)
## Proposed implementation
### Reliability, failure modes and corner cases (if relevant)
### Interaction/Sequence diagram (if relevant)
### Scalability (if relevant)
### Security implications (if relevant)
### Unresolved questions (if relevant)
## Alternative implementation (if relevant)
## Pros/cons of proposed approaches (if relevant)
## Definition of Done (if relevant)
```

View File

@@ -1,30 +0,0 @@
# Name
Created on YYYY-MM-DD
Implemented on _TBD_
## Summary
## Motivation
## Non Goals (if relevant)
## Impacted components (e.g. pageserver, safekeeper, console, etc)
## Proposed implementation
### Reliability, failure modes and corner cases (if relevant)
### Interaction/Sequence diagram (if relevant)
### Scalability (if relevant)
### Security implications (if relevant)
### Unresolved questions (if relevant)
## Alternative implementation (if relevant)
## Pros/cons of proposed approaches (if relevant)
## Definition of Done (if relevant)

View File

@@ -101,25 +101,15 @@ changes such as a pageserver node becoming unavailable, or the tenant's shard co
postgres clients to handle such changes, the storage controller calls an API hook when a tenant's pageserver
location changes.
The hook is configured using the storage controller's `--control-plane-url` CLI option, from which the hook URL is computed.
The hook is configured using the storage controller's `--compute-hook-url` CLI option. If the hook requires
JWT auth, the token may be provided with `--control-plane-jwt-token`. The hook will be invoked with a `PUT` request.
Currently, there is two hooks, each computed by appending the name to the provided control plane URL prefix:
- `notify-attach`, called whenever attachment for pageservers changes
- `notify-safekeepers`, called whenever attachment for safekeepers changes
If the hooks require JWT auth, the token may be provided with `--control-plane-jwt-token`.
The hooks will be invoked with a `PUT` request.
In the Neon cloud service, these hooks are implemented by Neon's internal cloud control plane. In `neon_local` systems,
In the Neon cloud service, this hook is implemented by Neon's internal cloud control plane. In `neon_local` systems
the storage controller integrates directly with neon_local to reconfigure local postgres processes instead of calling
the compute hook.
When implementing an on-premise Neon deployment, you must implement a service that handles the compute hooks. This is not complicated.
### `notify-attach` body
The `notify-attach` request body follows the format of the `ComputeHookNotifyRequest` structure, provided below for convenience.
When implementing an on-premise Neon deployment, you must implement a service that handles the compute hook. This is not complicated:
the request body has format of the `ComputeHookNotifyRequest` structure, provided below for convenience.
```
struct ComputeHookNotifyRequestShard {
@@ -138,15 +128,15 @@ When a notification is received:
1. Modify postgres configuration for this tenant:
- set `neon.pageserver_connstring` to a comma-separated list of postgres connection strings to pageservers according to the `shards` list. The
- set `neon.pageserver_connstr` to a comma-separated list of postgres connection strings to pageservers according to the `shards` list. The
shards identified by `NodeId` must be converted to the address+port of the node.
- if stripe_size is not None, set `neon.shard_stripe_size` to this value
- if stripe_size is not None, set `neon.stripe_size` to this value
2. Send SIGHUP to postgres to reload configuration
3. Respond with 200 to the notification request. Do not return success if postgres was not updated: if an error is returned, the controller
will retry the notification until it succeeds..
Example body:
### Example notification body
```
{
@@ -158,34 +148,3 @@ Example body:
],
}
```
### `notify-safekeepers` body
The `notify-safekeepers` request body forllows the format of the `SafekeepersNotifyRequest` structure, provided below for convenience.
```
pub struct SafekeeperInfo {
pub id: NodeId,
pub hostname: String,
}
pub struct SafekeepersNotifyRequest {
pub tenant_id: TenantId,
pub timeline_id: TimelineId,
pub generation: u32,
pub safekeepers: Vec<SafekeeperInfo>,
}
```
When a notification is received:
1. Modify postgres configuration for this tenant:
- set `neon.safekeeper_connstrings` to an array of postgres connection strings to safekeepers according to the `safekeepers` list. The
safekeepers identified by `NodeId` must be converted to the address+port of the respective safekeeper.
The hostname is provided for debugging purposes, so we reserve changes to how we pass it.
- set `neon.safekeepers_generation` to the provided `generation` value.
2. Send SIGHUP to postgres to reload configuration
3. Respond with 200 to the notification request. Do not return success if postgres was not updated: if an error is returned, the controller
will retry the notification until it succeeds..

View File

@@ -7,7 +7,6 @@ license.workspace = true
[dependencies]
anyhow.workspace = true
chrono.workspace = true
indexmap.workspace = true
jsonwebtoken.workspace = true
serde.workspace = true
serde_json.workspace = true

View File

@@ -139,7 +139,6 @@ pub struct ComputeCtlConfig {
/// Set of JSON web keys that the compute can use to authenticate
/// communication from the control plane.
pub jwks: JwkSet,
pub tls: Option<TlsConfig>,
}
impl Default for ComputeCtlConfig {
@@ -148,17 +147,10 @@ impl Default for ComputeCtlConfig {
jwks: JwkSet {
keys: Vec::default(),
},
tls: None,
}
}
}
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct TlsConfig {
pub key_path: String,
pub cert_path: String,
}
/// Response of the `/computes/{compute_id}/spec` control-plane API.
#[derive(Deserialize, Debug)]
pub struct ControlPlaneSpecResponse {

View File

@@ -5,15 +5,12 @@
//! and connect it to the storage nodes.
use std::collections::HashMap;
use indexmap::IndexMap;
use regex::Regex;
use remote_storage::RemotePath;
use serde::{Deserialize, Serialize};
use utils::id::{TenantId, TimelineId};
use utils::lsn::Lsn;
use crate::responses::TlsConfig;
/// String type alias representing Postgres identifier and
/// intended to be used for DB / role names.
pub type PgIdent = String;
@@ -128,7 +125,7 @@ pub struct ComputeSpec {
// information about available remote extensions
pub remote_extensions: Option<RemoteExtSpec>,
pub pgbouncer_settings: Option<IndexMap<String, String>>,
pub pgbouncer_settings: Option<HashMap<String, String>>,
// Stripe size for pageserver sharding, in pages
#[serde(default)]
@@ -275,6 +272,18 @@ pub enum ComputeMode {
Replica,
}
impl ComputeMode {
/// Convert the compute mode to a string that can be used to identify the type of compute,
/// which means that if it's a static compute, the LSN will not be included.
pub fn to_type_str(&self) -> &'static str {
match self {
ComputeMode::Primary => "primary",
ComputeMode::Static(_) => "static",
ComputeMode::Replica => "replica",
}
}
}
/// Log level for audit logging
/// Disabled, log, hipaa
/// Default is Disabled
@@ -360,9 +369,6 @@ pub struct LocalProxySpec {
#[serde(default)]
#[serde(skip_serializing_if = "Option::is_none")]
pub jwks: Option<Vec<JwksSettings>>,
#[serde(default)]
#[serde(skip_serializing_if = "Option::is_none")]
pub tls: Option<TlsConfig>,
}
#[derive(Clone, Debug, Deserialize, Serialize)]

View File

@@ -7,7 +7,6 @@ license.workspace = true
[dependencies]
anyhow.workspace = true
bytes.workspace = true
camino.workspace = true
fail.workspace = true
futures.workspace = true
hyper0.workspace = true
@@ -17,7 +16,6 @@ once_cell.workspace = true
pprof.workspace = true
regex.workspace = true
routerify.workspace = true
rustls-pemfile.workspace = true
serde.workspace = true
serde_json.workspace = true
serde_path_to_error.workspace = true

View File

@@ -4,7 +4,6 @@ pub mod failpoints;
pub mod json;
pub mod request;
pub mod server;
pub mod tls_certs;
extern crate hyper0 as hyper;

View File

@@ -1,21 +0,0 @@
use camino::Utf8Path;
use tokio_rustls::rustls::pki_types::{CertificateDer, PrivateKeyDer};
pub fn load_cert_chain(filename: &Utf8Path) -> anyhow::Result<Vec<CertificateDer<'static>>> {
let file = std::fs::File::open(filename)?;
let mut reader = std::io::BufReader::new(file);
Ok(rustls_pemfile::certs(&mut reader).collect::<Result<Vec<_>, _>>()?)
}
pub fn load_private_key(filename: &Utf8Path) -> anyhow::Result<PrivateKeyDer<'static>> {
let file = std::fs::File::open(filename)?;
let mut reader = std::io::BufReader::new(file);
let key = rustls_pemfile::private_key(&mut reader)?;
key.ok_or(anyhow::anyhow!(
"no private key found in {}",
filename.as_str(),
))
}

View File

@@ -272,16 +272,15 @@ pub struct TenantConfigToml {
/// size exceeds `compaction_upper_limit * checkpoint_distance`.
pub compaction_upper_limit: usize,
pub compaction_algorithm: crate::models::CompactionAlgorithmSettings,
/// If true, compact down L0 across all tenant timelines before doing regular compaction. L0
/// compaction must be responsive to avoid read amp during heavy ingestion. Defaults to true.
/// If true, compact down L0 across all tenant timelines before doing regular compaction.
pub compaction_l0_first: bool,
/// If true, use a separate semaphore (i.e. concurrency limit) for the L0 compaction pass. Only
/// has an effect if `compaction_l0_first` is true. Defaults to true.
/// has an effect if `compaction_l0_first` is `true`.
pub compaction_l0_semaphore: bool,
/// Level0 delta layer threshold at which to delay layer flushes such that they take 2x as long,
/// and block on layer flushes during ephemeral layer rolls, for compaction backpressure. This
/// helps compaction keep up with WAL ingestion, and avoids read amplification blowing up.
/// Should be >compaction_threshold. 0 to disable. Defaults to 3x compaction_threshold.
/// Level0 delta layer threshold at which to delay layer flushes for compaction backpressure,
/// such that they take 2x as long, and start waiting for layer flushes during ephemeral layer
/// rolls. This helps compaction keep up with WAL ingestion, and avoids read amplification
/// blowing up. Should be >compaction_threshold. 0 to disable. Disabled by default.
pub l0_flush_delay_threshold: Option<usize>,
/// Level0 delta layer threshold at which to stall layer flushes. Must be >compaction_threshold
/// to avoid deadlock. 0 to disable. Disabled by default.
@@ -289,8 +288,6 @@ pub struct TenantConfigToml {
/// If true, Level0 delta layer flushes will wait for S3 upload before flushing the next
/// layer. This is a temporary backpressure mechanism which should be removed once
/// l0_flush_{delay,stall}_threshold is fully enabled.
///
/// TODO: this is no longer enabled, remove it when the config option is no longer set.
pub l0_flush_wait_upload: bool,
// Determines how much history is retained, to allow
// branching and read replicas at an older point in time.
@@ -570,15 +567,13 @@ pub mod tenant_conf_defaults {
// be reduced later by optimizing L0 hole calculation to avoid loading all keys into memory). So
// with this config, we can get a maximum peak compaction usage of 9 GB.
pub const DEFAULT_COMPACTION_UPPER_LIMIT: usize = 20;
// Enable L0 compaction pass and semaphore by default. L0 compaction must be responsive to avoid
// read amp.
pub const DEFAULT_COMPACTION_L0_FIRST: bool = true;
pub const DEFAULT_COMPACTION_L0_FIRST: bool = false;
pub const DEFAULT_COMPACTION_L0_SEMAPHORE: bool = true;
pub const DEFAULT_COMPACTION_ALGORITHM: crate::models::CompactionAlgorithm =
crate::models::CompactionAlgorithm::Legacy;
pub const DEFAULT_L0_FLUSH_WAIT_UPLOAD: bool = false;
pub const DEFAULT_L0_FLUSH_WAIT_UPLOAD: bool = true;
pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;
@@ -589,8 +584,9 @@ pub mod tenant_conf_defaults {
pub const DEFAULT_GC_PERIOD: &str = "1 hr";
pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
// If there are more than threshold * compaction_threshold (that is 3 * 10 in the default config) L0 layers, image
// layer creation will end immediately. Set to 0 to disable.
pub const DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD: usize = 3;
// layer creation will end immediately. Set to 0 to disable. The target default will be 3 once we
// want to enable this feature.
pub const DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD: usize = 0;
pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "10 seconds";
pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";

View File

@@ -176,39 +176,6 @@ impl LsnLease {
}
}
/// Controls the detach ancestor behavior.
/// - When set to `NoAncestorAndReparent`, we will only detach a branch if its ancestor is a root branch. It will automatically reparent any children of the ancestor before and at the branch point.
/// - When set to `MultiLevelAndNoReparent`, we will detach a branch from multiple levels of ancestors, and no reparenting will happen at all.
#[derive(Debug, Clone, Copy, Default)]
pub enum DetachBehavior {
#[default]
NoAncestorAndReparent,
MultiLevelAndNoReparent,
}
impl std::str::FromStr for DetachBehavior {
type Err = &'static str;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"no_ancestor_and_reparent" => Ok(DetachBehavior::NoAncestorAndReparent),
"multi_level_and_no_reparent" => Ok(DetachBehavior::MultiLevelAndNoReparent),
"v1" => Ok(DetachBehavior::NoAncestorAndReparent),
"v2" => Ok(DetachBehavior::MultiLevelAndNoReparent),
_ => Err("cannot parse detach behavior"),
}
}
}
impl std::fmt::Display for DetachBehavior {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
DetachBehavior::NoAncestorAndReparent => write!(f, "no_ancestor_and_reparent"),
DetachBehavior::MultiLevelAndNoReparent => write!(f, "multi_level_and_no_reparent"),
}
}
}
/// The only [`TenantState`] variants we could be `TenantState::Activating` from.
///
/// XXX: We used to have more variants here, but now it's just one, which makes this rather
@@ -1258,10 +1225,9 @@ pub struct TimelineInfo {
pub last_record_lsn: Lsn,
pub prev_record_lsn: Option<Lsn>,
/// Legacy field, retained for one version to enable old storage controller to
/// decode (it was a mandatory field).
#[serde(default, rename = "latest_gc_cutoff_lsn")]
pub _unused: Lsn,
/// Legacy field for compat with control plane. Synonym of `min_readable_lsn`.
/// TODO: remove once control plane no longer reads it.
pub latest_gc_cutoff_lsn: Lsn,
/// The LSN up to which GC has advanced: older data may still exist but it is not available for clients.
/// This LSN is not suitable for deciding where to create branches etc: use [`TimelineInfo::min_readable_lsn`] instead,

View File

@@ -112,16 +112,6 @@ impl ShardIdentity {
}
}
/// An unsharded identity with the given stripe size (if non-zero). This is typically used to
/// carry over a stripe size for an unsharded tenant from persistent storage.
pub fn unsharded_with_stripe_size(stripe_size: ShardStripeSize) -> Self {
let mut shard_identity = Self::unsharded();
if stripe_size.0 > 0 {
shard_identity.stripe_size = stripe_size;
}
shard_identity
}
/// A broken instance of this type is only used for `TenantState::Broken` tenants,
/// which are constructed in code paths that don't have access to proper configuration.
///

View File

@@ -135,8 +135,8 @@ impl Type {
pub enum Kind {
/// A simple type like `VARCHAR` or `INTEGER`.
Simple,
/// An enumerated type.
Enum,
/// An enumerated type along with its variants.
Enum(Vec<String>),
/// A pseudo-type.
Pseudo,
/// An array type along with the type of its elements.
@@ -146,9 +146,9 @@ pub enum Kind {
/// A multirange type along with the type of its elements.
Multirange(Type),
/// A domain type along with its underlying type.
Domain(Oid),
/// A composite type.
Composite(Oid),
Domain(Type),
/// A composite type along with information about its fields.
Composite(Vec<Field>),
}
/// Information about a field of a composite type.

View File

@@ -19,10 +19,10 @@ use crate::config::{Host, SslMode};
use crate::connection::{Request, RequestMessages};
use crate::query::RowStream;
use crate::simple_query::SimpleQueryStream;
use crate::types::{Oid, Type};
use crate::types::{Oid, ToSql, Type};
use crate::{
CancelToken, Error, ReadyForQueryStatus, SimpleQueryMessage, Statement, Transaction,
TransactionBuilder, query, simple_query,
CancelToken, Error, ReadyForQueryStatus, Row, SimpleQueryMessage, Statement, Transaction,
TransactionBuilder, query, simple_query, slice_iter,
};
pub struct Responses {
@@ -54,18 +54,26 @@ impl Responses {
/// A cache of type info and prepared statements for fetching type info
/// (corresponding to the queries in the [crate::prepare] module).
#[derive(Default)]
pub(crate) struct CachedTypeInfo {
struct CachedTypeInfo {
/// A statement for basic information for a type from its
/// OID. Corresponds to [TYPEINFO_QUERY](crate::prepare::TYPEINFO_QUERY) (or its
/// fallback).
pub(crate) typeinfo: Option<Statement>,
typeinfo: Option<Statement>,
/// A statement for getting information for a composite type from its OID.
/// Corresponds to [TYPEINFO_QUERY](crate::prepare::TYPEINFO_COMPOSITE_QUERY).
typeinfo_composite: Option<Statement>,
/// A statement for getting information for a composite type from its OID.
/// Corresponds to [TYPEINFO_QUERY](crate::prepare::TYPEINFO_COMPOSITE_QUERY) (or
/// its fallback).
typeinfo_enum: Option<Statement>,
/// Cache of types already looked up.
pub(crate) types: HashMap<Oid, Type>,
types: HashMap<Oid, Type>,
}
pub struct InnerClient {
sender: mpsc::UnboundedSender<Request>,
cached_typeinfo: Mutex<CachedTypeInfo>,
/// A buffer to use when writing out postgres commands.
buffer: Mutex<BytesMut>,
@@ -83,6 +91,38 @@ impl InnerClient {
})
}
pub fn typeinfo(&self) -> Option<Statement> {
self.cached_typeinfo.lock().typeinfo.clone()
}
pub fn set_typeinfo(&self, statement: &Statement) {
self.cached_typeinfo.lock().typeinfo = Some(statement.clone());
}
pub fn typeinfo_composite(&self) -> Option<Statement> {
self.cached_typeinfo.lock().typeinfo_composite.clone()
}
pub fn set_typeinfo_composite(&self, statement: &Statement) {
self.cached_typeinfo.lock().typeinfo_composite = Some(statement.clone());
}
pub fn typeinfo_enum(&self) -> Option<Statement> {
self.cached_typeinfo.lock().typeinfo_enum.clone()
}
pub fn set_typeinfo_enum(&self, statement: &Statement) {
self.cached_typeinfo.lock().typeinfo_enum = Some(statement.clone());
}
pub fn type_(&self, oid: Oid) -> Option<Type> {
self.cached_typeinfo.lock().types.get(&oid).cloned()
}
pub fn set_type(&self, oid: Oid, type_: &Type) {
self.cached_typeinfo.lock().types.insert(oid, type_.clone());
}
/// Call the given function with a buffer to be used when writing out
/// postgres commands.
pub fn with_buf<F, R>(&self, f: F) -> R
@@ -102,6 +142,7 @@ pub struct SocketConfig {
pub host: Host,
pub port: u16,
pub connect_timeout: Option<Duration>,
// pub keepalive: Option<KeepaliveConfig>,
}
/// An asynchronous PostgreSQL client.
@@ -110,7 +151,6 @@ pub struct SocketConfig {
/// through this client object.
pub struct Client {
inner: Arc<InnerClient>,
cached_typeinfo: CachedTypeInfo,
socket_config: SocketConfig,
ssl_mode: SslMode,
@@ -129,9 +169,9 @@ impl Client {
Client {
inner: Arc::new(InnerClient {
sender,
cached_typeinfo: Default::default(),
buffer: Default::default(),
}),
cached_typeinfo: Default::default(),
socket_config,
ssl_mode,
@@ -149,6 +189,55 @@ impl Client {
&self.inner
}
/// Executes a statement, returning a vector of the resulting rows.
///
/// A statement may contain parameters, specified by `$n`, where `n` is the index of the parameter of the list
/// provided, 1-indexed.
///
/// The `statement` argument can either be a `Statement`, or a raw query string. If the same statement will be
/// repeatedly executed (perhaps with different query parameters), consider preparing the statement up front
/// with the `prepare` method.
///
/// # Panics
///
/// Panics if the number of parameters provided does not match the number expected.
pub async fn query(
&self,
statement: Statement,
params: &[&(dyn ToSql + Sync)],
) -> Result<Vec<Row>, Error> {
self.query_raw(statement, slice_iter(params))
.await?
.try_collect()
.await
}
/// The maximally flexible version of [`query`].
///
/// A statement may contain parameters, specified by `$n`, where `n` is the index of the parameter of the list
/// provided, 1-indexed.
///
/// The `statement` argument can either be a `Statement`, or a raw query string. If the same statement will be
/// repeatedly executed (perhaps with different query parameters), consider preparing the statement up front
/// with the `prepare` method.
///
/// # Panics
///
/// Panics if the number of parameters provided does not match the number expected.
///
/// [`query`]: #method.query
pub async fn query_raw<'a, I>(
&self,
statement: Statement,
params: I,
) -> Result<RowStream, Error>
where
I: IntoIterator<Item = &'a (dyn ToSql + Sync)>,
I::IntoIter: ExactSizeIterator,
{
query::query(&self.inner, statement, params).await
}
/// Pass text directly to the Postgres backend to allow it to sort out typing itself and
/// to save a roundtrip
pub async fn query_raw_txt<S, I>(&self, statement: &str, params: I) -> Result<RowStream, Error>
@@ -195,10 +284,14 @@ impl Client {
simple_query::batch_execute(self.inner(), query).await
}
pub async fn discard_all(&mut self) -> Result<ReadyForQueryStatus, Error> {
pub async fn discard_all(&self) -> Result<ReadyForQueryStatus, Error> {
// clear the prepared statements that are about to be nuked from the postgres session
self.cached_typeinfo.typeinfo = None;
{
let mut typeinfo = self.inner.cached_typeinfo.lock();
typeinfo.typeinfo = None;
typeinfo.typeinfo_composite = None;
typeinfo.typeinfo_enum = None;
}
self.batch_execute("discard all").await
}
@@ -266,8 +359,8 @@ impl Client {
}
/// Query for type information
pub(crate) async fn get_type_inner(&mut self, oid: Oid) -> Result<Type, Error> {
crate::prepare::get_type(&self.inner, &mut self.cached_typeinfo, oid).await
pub async fn get_type(&self, oid: Oid) -> Result<Type, Error> {
crate::prepare::get_type(&self.inner, oid).await
}
/// Determines if the connection to the server has already closed.

View File

@@ -22,7 +22,7 @@ pub trait GenericClient: private::Sealed {
I::IntoIter: ExactSizeIterator + Sync + Send;
/// Query for type information
async fn get_type(&mut self, oid: Oid) -> Result<Type, Error>;
async fn get_type(&self, oid: Oid) -> Result<Type, Error>;
}
impl private::Sealed for Client {}
@@ -38,8 +38,8 @@ impl GenericClient for Client {
}
/// Query for type information
async fn get_type(&mut self, oid: Oid) -> Result<Type, Error> {
self.get_type_inner(oid).await
async fn get_type(&self, oid: Oid) -> Result<Type, Error> {
crate::prepare::get_type(self.inner(), oid).await
}
}
@@ -56,7 +56,7 @@ impl GenericClient for Transaction<'_> {
}
/// Query for type information
async fn get_type(&mut self, oid: Oid) -> Result<Type, Error> {
self.client_mut().get_type(oid).await
async fn get_type(&self, oid: Oid) -> Result<Type, Error> {
self.client().get_type(oid).await
}
}

View File

@@ -9,10 +9,10 @@ use log::debug;
use postgres_protocol2::message::backend::Message;
use postgres_protocol2::message::frontend;
use crate::client::{CachedTypeInfo, InnerClient};
use crate::client::InnerClient;
use crate::codec::FrontendMessage;
use crate::connection::RequestMessages;
use crate::types::{Kind, Oid, Type};
use crate::types::{Field, Kind, Oid, Type};
use crate::{Column, Error, Statement, query, slice_iter};
pub(crate) const TYPEINFO_QUERY: &str = "\
@@ -23,7 +23,23 @@ INNER JOIN pg_catalog.pg_namespace n ON t.typnamespace = n.oid
WHERE t.oid = $1
";
async fn prepare_typecheck(
const TYPEINFO_ENUM_QUERY: &str = "\
SELECT enumlabel
FROM pg_catalog.pg_enum
WHERE enumtypid = $1
ORDER BY enumsortorder
";
pub(crate) const TYPEINFO_COMPOSITE_QUERY: &str = "\
SELECT attname, atttypid
FROM pg_catalog.pg_attribute
WHERE attrelid = $1
AND NOT attisdropped
AND attnum > 0
ORDER BY attnum
";
pub async fn prepare(
client: &Arc<InnerClient>,
name: &'static str,
query: &str,
@@ -51,7 +67,7 @@ async fn prepare_typecheck(
let mut parameters = vec![];
let mut it = parameter_description.parameters();
while let Some(oid) = it.next().map_err(Error::parse)? {
let type_ = Type::from_oid(oid).ok_or_else(Error::unexpected_message)?;
let type_ = get_type(client, oid).await?;
parameters.push(type_);
}
@@ -59,7 +75,7 @@ async fn prepare_typecheck(
if let Some(row_description) = row_description {
let mut it = row_description.fields();
while let Some(field) = it.next().map_err(Error::parse)? {
let type_ = Type::from_oid(field.type_oid()).ok_or_else(Error::unexpected_message)?;
let type_ = get_type(client, field.type_oid()).await?;
let column = Column::new(field.name().to_string(), type_, field);
columns.push(column);
}
@@ -68,6 +84,15 @@ async fn prepare_typecheck(
Ok(Statement::new(client, name, parameters, columns))
}
fn prepare_rec<'a>(
client: &'a Arc<InnerClient>,
name: &'static str,
query: &'a str,
types: &'a [Type],
) -> Pin<Box<dyn Future<Output = Result<Statement, Error>> + 'a + Send>> {
Box::pin(prepare(client, name, query, types))
}
fn encode(client: &InnerClient, name: &str, query: &str, types: &[Type]) -> Result<Bytes, Error> {
if types.is_empty() {
debug!("preparing query {}: {}", name, query);
@@ -83,20 +108,16 @@ fn encode(client: &InnerClient, name: &str, query: &str, types: &[Type]) -> Resu
})
}
pub async fn get_type(
client: &Arc<InnerClient>,
typecache: &mut CachedTypeInfo,
oid: Oid,
) -> Result<Type, Error> {
pub async fn get_type(client: &Arc<InnerClient>, oid: Oid) -> Result<Type, Error> {
if let Some(type_) = Type::from_oid(oid) {
return Ok(type_);
}
if let Some(type_) = typecache.types.get(&oid) {
return Ok(type_.clone());
};
if let Some(type_) = client.type_(oid) {
return Ok(type_);
}
let stmt = typeinfo_statement(client, typecache).await?;
let stmt = typeinfo_statement(client).await?;
let rows = query::query(client, stmt, slice_iter(&[&oid])).await?;
pin_mut!(rows);
@@ -115,48 +136,100 @@ pub async fn get_type(
let relid: Oid = row.try_get(6)?;
let kind = if type_ == b'e' as i8 {
Kind::Enum
let variants = get_enum_variants(client, oid).await?;
Kind::Enum(variants)
} else if type_ == b'p' as i8 {
Kind::Pseudo
} else if basetype != 0 {
Kind::Domain(basetype)
let type_ = get_type_rec(client, basetype).await?;
Kind::Domain(type_)
} else if elem_oid != 0 {
let type_ = get_type_rec(client, typecache, elem_oid).await?;
let type_ = get_type_rec(client, elem_oid).await?;
Kind::Array(type_)
} else if relid != 0 {
Kind::Composite(relid)
let fields = get_composite_fields(client, relid).await?;
Kind::Composite(fields)
} else if let Some(rngsubtype) = rngsubtype {
let type_ = get_type_rec(client, typecache, rngsubtype).await?;
let type_ = get_type_rec(client, rngsubtype).await?;
Kind::Range(type_)
} else {
Kind::Simple
};
let type_ = Type::new(name, oid, kind, schema);
typecache.types.insert(oid, type_.clone());
client.set_type(oid, &type_);
Ok(type_)
}
fn get_type_rec<'a>(
client: &'a Arc<InnerClient>,
typecache: &'a mut CachedTypeInfo,
oid: Oid,
) -> Pin<Box<dyn Future<Output = Result<Type, Error>> + Send + 'a>> {
Box::pin(get_type(client, typecache, oid))
Box::pin(get_type(client, oid))
}
async fn typeinfo_statement(
client: &Arc<InnerClient>,
typecache: &mut CachedTypeInfo,
) -> Result<Statement, Error> {
if let Some(stmt) = &typecache.typeinfo {
return Ok(stmt.clone());
async fn typeinfo_statement(client: &Arc<InnerClient>) -> Result<Statement, Error> {
if let Some(stmt) = client.typeinfo() {
return Ok(stmt);
}
let typeinfo = "neon_proxy_typeinfo";
let stmt = prepare_typecheck(client, typeinfo, TYPEINFO_QUERY, &[]).await?;
let stmt = prepare_rec(client, typeinfo, TYPEINFO_QUERY, &[]).await?;
typecache.typeinfo = Some(stmt.clone());
client.set_typeinfo(&stmt);
Ok(stmt)
}
async fn get_enum_variants(client: &Arc<InnerClient>, oid: Oid) -> Result<Vec<String>, Error> {
let stmt = typeinfo_enum_statement(client).await?;
query::query(client, stmt, slice_iter(&[&oid]))
.await?
.and_then(|row| async move { row.try_get(0) })
.try_collect()
.await
}
async fn typeinfo_enum_statement(client: &Arc<InnerClient>) -> Result<Statement, Error> {
if let Some(stmt) = client.typeinfo_enum() {
return Ok(stmt);
}
let typeinfo = "neon_proxy_typeinfo_enum";
let stmt = prepare_rec(client, typeinfo, TYPEINFO_ENUM_QUERY, &[]).await?;
client.set_typeinfo_enum(&stmt);
Ok(stmt)
}
async fn get_composite_fields(client: &Arc<InnerClient>, oid: Oid) -> Result<Vec<Field>, Error> {
let stmt = typeinfo_composite_statement(client).await?;
let rows = query::query(client, stmt, slice_iter(&[&oid]))
.await?
.try_collect::<Vec<_>>()
.await?;
let mut fields = vec![];
for row in rows {
let name = row.try_get(0)?;
let oid = row.try_get(1)?;
let type_ = get_type_rec(client, oid).await?;
fields.push(Field::new(name, type_));
}
Ok(fields)
}
async fn typeinfo_composite_statement(client: &Arc<InnerClient>) -> Result<Statement, Error> {
if let Some(stmt) = client.typeinfo_composite() {
return Ok(stmt);
}
let typeinfo = "neon_proxy_typeinfo_composite";
let stmt = prepare_rec(client, typeinfo, TYPEINFO_COMPOSITE_QUERY, &[]).await?;
client.set_typeinfo_composite(&stmt);
Ok(stmt)
}

View File

@@ -72,9 +72,4 @@ impl<'a> Transaction<'a> {
pub fn client(&self) -> &Client {
self.client
}
/// Returns a reference to the underlying `Client`.
pub fn client_mut(&mut self) -> &mut Client {
self.client
}
}

View File

@@ -221,11 +221,6 @@ pub struct TimelineMembershipSwitchResponse {
pub current_conf: Configuration,
}
#[derive(Clone, Copy, Serialize, Deserialize)]
pub struct TimelineDeleteResult {
pub dir_existed: bool,
}
fn lsn_invalid() -> Lsn {
Lsn::INVALID
}

View File

@@ -21,7 +21,7 @@
//! .with_writer(std::io::stderr);
//!
//! // Initialize OpenTelemetry. Exports tracing spans as OpenTelemetry traces
//! let otlp_layer = tracing_utils::init_tracing("my_application", tracing_utils::ExportConfig::default()).await;
//! let otlp_layer = tracing_utils::init_tracing("my_application").await;
//!
//! // Put it all together
//! tracing_subscriber::registry()
@@ -38,12 +38,8 @@ pub mod http;
use opentelemetry::KeyValue;
use opentelemetry::trace::TracerProvider;
use opentelemetry_otlp::WithExportConfig;
pub use opentelemetry_otlp::{ExportConfig, Protocol};
use tracing::level_filters::LevelFilter;
use tracing::{Dispatch, Subscriber};
use tracing::Subscriber;
use tracing_subscriber::Layer;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::registry::LookupSpan;
/// Set up OpenTelemetry exporter, using configuration from environment variables.
@@ -73,28 +69,19 @@ use tracing_subscriber::registry::LookupSpan;
///
/// This doesn't block, but is marked as 'async' to hint that this must be called in
/// asynchronous execution context.
pub async fn init_tracing<S>(
service_name: &str,
export_config: ExportConfig,
) -> Option<impl Layer<S>>
pub async fn init_tracing<S>(service_name: &str) -> Option<impl Layer<S>>
where
S: Subscriber + for<'span> LookupSpan<'span>,
{
if std::env::var("OTEL_SDK_DISABLED") == Ok("true".to_string()) {
return None;
};
Some(init_tracing_internal(
service_name.to_string(),
export_config,
))
Some(init_tracing_internal(service_name.to_string()))
}
/// Like `init_tracing`, but creates a separate tokio Runtime for the tracing
/// tasks.
pub fn init_tracing_without_runtime<S>(
service_name: &str,
export_config: ExportConfig,
) -> Option<impl Layer<S>>
pub fn init_tracing_without_runtime<S>(service_name: &str) -> Option<impl Layer<S>>
where
S: Subscriber + for<'span> LookupSpan<'span>,
{
@@ -125,22 +112,16 @@ where
));
let _guard = runtime.enter();
Some(init_tracing_internal(
service_name.to_string(),
export_config,
))
Some(init_tracing_internal(service_name.to_string()))
}
fn init_tracing_internal<S>(service_name: String, export_config: ExportConfig) -> impl Layer<S>
fn init_tracing_internal<S>(service_name: String) -> impl Layer<S>
where
S: Subscriber + for<'span> LookupSpan<'span>,
{
// Sets up exporter from the provided [`ExportConfig`] parameter.
// If the endpoint is not specified, it is loaded from the
// OTEL_EXPORTER_OTLP_ENDPOINT environment variable.
// Sets up exporter from the OTEL_EXPORTER_* environment variables.
let exporter = opentelemetry_otlp::SpanExporter::builder()
.with_http()
.with_export_config(export_config)
.build()
.expect("could not initialize opentelemetry exporter");
@@ -170,51 +151,3 @@ where
pub fn shutdown_tracing() {
opentelemetry::global::shutdown_tracer_provider();
}
pub enum OtelEnablement {
Disabled,
Enabled {
service_name: String,
export_config: ExportConfig,
runtime: &'static tokio::runtime::Runtime,
},
}
pub struct OtelGuard {
pub dispatch: Dispatch,
}
impl Drop for OtelGuard {
fn drop(&mut self) {
shutdown_tracing();
}
}
/// Initializes OTEL infrastructure for performance tracing according to the provided configuration
///
/// Performance tracing is handled by a different [`tracing::Subscriber`]. This functions returns
/// an [`OtelGuard`] containing a [`tracing::Dispatch`] associated with a newly created subscriber.
/// Applications should use this dispatch for their performance traces.
///
/// The lifetime of the guard should match taht of the application. On drop, it tears down the
/// OTEL infra.
pub fn init_performance_tracing(otel_enablement: OtelEnablement) -> Option<OtelGuard> {
let otel_subscriber = match otel_enablement {
OtelEnablement::Disabled => None,
OtelEnablement::Enabled {
service_name,
export_config,
runtime,
} => {
let otel_layer = runtime
.block_on(init_tracing(&service_name, export_config))
.with_filter(LevelFilter::INFO);
let otel_subscriber = tracing_subscriber::registry().with(otel_layer);
let otel_dispatch = Dispatch::new(otel_subscriber);
Some(otel_dispatch)
}
};
otel_subscriber.map(|dispatch| OtelGuard { dispatch })
}

View File

@@ -42,7 +42,6 @@ toml_edit = { workspace = true, features = ["serde"] }
tracing.workspace = true
tracing-error.workspace = true
tracing-subscriber = { workspace = true, features = ["json", "registry"] }
tracing-utils.workspace = true
rand.workspace = true
scopeguard.workspace = true
strum.workspace = true

View File

@@ -49,13 +49,7 @@ pub fn bench_log_slow(c: &mut Criterion) {
// performance too. Use a simple noop future that yields once, to avoid any scheduler fast
// paths for a ready future.
if enabled {
b.iter(|| {
runtime.block_on(log_slow(
"ready",
THRESHOLD,
std::pin::pin!(tokio::task::yield_now()),
))
});
b.iter(|| runtime.block_on(log_slow("ready", THRESHOLD, tokio::task::yield_now())));
} else {
b.iter(|| runtime.block_on(tokio::task::yield_now()));
}

View File

@@ -165,7 +165,6 @@ pub fn init(
};
log_layer.with_filter(rust_log_env_filter())
});
let r = r.with(
TracingEventCountLayer(&TRACING_EVENT_COUNT_METRIC).with_filter(rust_log_env_filter()),
);
@@ -331,90 +330,37 @@ impl std::fmt::Debug for SecretString {
///
/// TODO: consider upgrading this to a warning, but currently it fires too often.
#[inline]
pub async fn log_slow<F, O>(name: &str, threshold: Duration, f: std::pin::Pin<&mut F>) -> O
where
F: Future<Output = O>,
{
monitor_slow_future(
threshold,
threshold, // period = threshold
f,
|MonitorSlowFutureCallback {
ready,
is_slow,
elapsed_total,
elapsed_since_last_callback: _,
}| {
if !is_slow {
return;
}
if ready {
info!(
"slow {name} completed after {:.3}s",
elapsed_total.as_secs_f64()
);
} else {
info!(
"slow {name} still running after {:.3}s",
elapsed_total.as_secs_f64()
);
}
},
)
.await
}
pub async fn log_slow<O>(name: &str, threshold: Duration, f: impl Future<Output = O>) -> O {
// TODO: we unfortunately have to pin the future on the heap, since GetPage futures are huge and
// won't fit on the stack.
let mut f = Box::pin(f);
/// Poll future `fut` to completion, invoking callback `cb` at the given `threshold` and every
/// `period` afterwards, and also unconditionally when the future completes.
#[inline]
pub async fn monitor_slow_future<F, O>(
threshold: Duration,
period: Duration,
mut fut: std::pin::Pin<&mut F>,
mut cb: impl FnMut(MonitorSlowFutureCallback),
) -> O
where
F: Future<Output = O>,
{
let started = Instant::now();
let mut attempt = 1;
let mut last_cb = started;
loop {
// NB: use timeout_at() instead of timeout() to avoid an extra clock reading in the common
// case where the timeout doesn't fire.
let deadline = started + threshold + (attempt - 1) * period;
// TODO: still call the callback if the future panics? Copy how we do it for the page_service flush_in_progress counter.
let res = tokio::time::timeout_at(deadline, &mut fut).await;
let now = Instant::now();
let elapsed_total = now - started;
cb(MonitorSlowFutureCallback {
ready: res.is_ok(),
is_slow: elapsed_total >= threshold,
elapsed_total,
elapsed_since_last_callback: now - last_cb,
});
last_cb = now;
if let Ok(output) = res {
let deadline = started + attempt * threshold;
if let Ok(output) = tokio::time::timeout_at(deadline, &mut f).await {
// NB: we check if we exceeded the threshold even if the timeout never fired, because
// scheduling or execution delays may cause the future to succeed even if it exceeds the
// timeout. This costs an extra unconditional clock reading, but seems worth it to avoid
// false negatives.
let elapsed = started.elapsed();
if elapsed >= threshold {
info!("slow {name} completed after {:.3}s", elapsed.as_secs_f64());
}
return output;
}
let elapsed = started.elapsed().as_secs_f64();
info!("slow {name} still running after {elapsed:.3}s",);
attempt += 1;
}
}
/// See [`monitor_slow_future`].
pub struct MonitorSlowFutureCallback {
/// Whether the future completed. If true, there will be no more callbacks.
pub ready: bool,
/// Whether the future is taking `>=` the specififed threshold duration to complete.
/// Monotonic: if true in one callback invocation, true in all subsequent onces.
pub is_slow: bool,
/// The time elapsed since the [`monitor_slow_future`] was first polled.
pub elapsed_total: Duration,
/// The time elapsed since the last callback invocation.
/// For the initial callback invocation, the time elapsed since the [`monitor_slow_future`] was first polled.
pub elapsed_since_last_callback: Duration,
}
#[cfg(test)]
mod tests {
use metrics::IntCounterVec;

View File

@@ -48,6 +48,8 @@ pprof.workspace = true
rand.workspace = true
range-set-blaze = { version = "0.1.16", features = ["alloc"] }
regex.workspace = true
rustls-pemfile.workspace = true
rustls-pki-types.workspace = true
rustls.workspace = true
scopeguard.workspace = true
send-future.workspace = true
@@ -68,7 +70,6 @@ tokio-stream.workspace = true
tokio-util.workspace = true
toml_edit = { workspace = true, features = [ "serde" ] }
tracing.workspace = true
tracing-utils.workspace = true
url.workspace = true
walkdir.workspace = true
metrics.workspace = true

View File

@@ -7,7 +7,7 @@ use http_utils::error::HttpErrorBody;
use pageserver_api::models::*;
use pageserver_api::shard::TenantShardId;
pub use reqwest::Body as ReqwestBody;
use reqwest::{Certificate, IntoUrl, Method, StatusCode, Url};
use reqwest::{Certificate, IntoUrl, Method, StatusCode};
use utils::id::{TenantId, TimelineId};
use utils::lsn::Lsn;
@@ -458,21 +458,13 @@ impl Client {
&self,
tenant_shard_id: TenantShardId,
timeline_id: TimelineId,
behavior: Option<DetachBehavior>,
) -> Result<AncestorDetached> {
let uri = format!(
"{}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/detach_ancestor",
self.mgmt_api_endpoint
);
let mut uri = Url::parse(&uri)
.map_err(|e| Error::ApiError(StatusCode::INTERNAL_SERVER_ERROR, format!("{e}")))?;
if let Some(behavior) = behavior {
uri.query_pairs_mut()
.append_pair("detach_behavior", &behavior.to_string());
}
self.request(Method::PUT, uri, ())
self.request(Method::PUT, &uri, ())
.await?
.json()
.await

View File

@@ -12,7 +12,7 @@ pub(crate) fn setup_logging() {
logging::TracingErrorLayerEnablement::EnableWithRustLogFilter,
logging::Output::Stdout,
)
.expect("Failed to init test logging");
.expect("Failed to init test logging")
});
}

View File

@@ -331,15 +331,7 @@ where
.append(&header, data)
.await
.map_err(|e| BasebackupError::Client(e, "send_tarball,pg_hba.conf"))?;
} else if *filepath == "postgresql.auto.conf" {
let ancestor_lsn = format!("neon.ancestor_lsn='{}'", self.timeline.get_ancestor_lsn());
let data = ancestor_lsn.as_bytes();
let header = new_tar_header(filepath, data.len() as u64)?;
self.ar
.append(&header, data)
.await
.map_err(|e| BasebackupError::Client(e, "send_tarball,pg_hba.conf"))?;
} else {
} else {
let header = new_tar_header(filepath, 0)?;
self.ar
.append(&header, io::empty())

View File

@@ -30,6 +30,7 @@ use pageserver::{
};
use postgres_backend::AuthType;
use remote_storage::GenericRemoteStorage;
use rustls_pki_types::{CertificateDer, PrivateKeyDer};
use tokio::signal::unix::SignalKind;
use tokio::time::Instant;
use tokio_util::sync::CancellationToken;
@@ -110,7 +111,6 @@ fn main() -> anyhow::Result<()> {
} else {
TracingErrorLayerEnablement::Disabled
};
logging::init(
conf.log_format,
tracing_error_layer_enablement,
@@ -621,8 +621,8 @@ fn start_pageserver(
let https_task = match https_listener {
Some(https_listener) => {
let certs = http_utils::tls_certs::load_cert_chain(&conf.ssl_cert_file)?;
let key = http_utils::tls_certs::load_private_key(&conf.ssl_key_file)?;
let certs = load_certs(&conf.ssl_cert_file)?;
let key = load_private_key(&conf.ssl_key_file)?;
let server_config = rustls::ServerConfig::builder()
.with_no_client_auth()
@@ -734,6 +734,25 @@ fn start_pageserver(
})
}
fn load_certs(filename: &Utf8Path) -> std::io::Result<Vec<CertificateDer<'static>>> {
let file = std::fs::File::open(filename)?;
let mut reader = std::io::BufReader::new(file);
rustls_pemfile::certs(&mut reader).collect()
}
fn load_private_key(filename: &Utf8Path) -> anyhow::Result<PrivateKeyDer<'static>> {
let file = std::fs::File::open(filename)?;
let mut reader = std::io::BufReader::new(file);
let key = rustls_pemfile::private_key(&mut reader)?;
key.ok_or(anyhow::anyhow!(
"no private key found in {}",
filename.as_str(),
))
}
async fn create_remote_storage_client(
conf: &'static PageServerConf,
) -> anyhow::Result<GenericRemoteStorage> {

View File

@@ -1079,6 +1079,7 @@ components:
- last_record_lsn
- disk_consistent_lsn
- state
- latest_gc_cutoff_lsn
properties:
timeline_id:
type: string
@@ -1122,6 +1123,9 @@ components:
min_readable_lsn:
type: string
format: hex
latest_gc_cutoff_lsn:
type: string
format: hex
applied_gc_cutoff_lsn:
type: string
format: hex

View File

@@ -28,9 +28,9 @@ use hyper::{Body, Request, Response, StatusCode, Uri, header};
use metrics::launch_timestamp::LaunchTimestamp;
use pageserver_api::models::virtual_file::IoMode;
use pageserver_api::models::{
DetachBehavior, DownloadRemoteLayersTaskSpawnRequest, IngestAuxFilesRequest,
ListAuxFilesRequest, LocationConfig, LocationConfigListResponse, LocationConfigMode, LsnLease,
LsnLeaseRequest, OffloadedTimelineInfo, PageTraceEvent, ShardParameters, StatusResponse,
DownloadRemoteLayersTaskSpawnRequest, IngestAuxFilesRequest, ListAuxFilesRequest,
LocationConfig, LocationConfigListResponse, LocationConfigMode, LsnLease, LsnLeaseRequest,
OffloadedTimelineInfo, PageTraceEvent, ShardParameters, StatusResponse,
TenantConfigPatchRequest, TenantConfigRequest, TenantDetails, TenantInfo,
TenantLocationConfigRequest, TenantLocationConfigResponse, TenantScanRemoteStorageResponse,
TenantScanRemoteStorageShard, TenantShardLocation, TenantShardSplitRequest,
@@ -460,7 +460,10 @@ async fn build_timeline_info_common(
initdb_lsn,
last_record_lsn,
prev_record_lsn: Some(timeline.get_prev_record_lsn()),
_unused: Default::default(), // Unused, for legacy decode only
// Externally, expose the lowest LSN that can be used to create a branch as the "GC cutoff", although internally
// we distinguish between the "planned" GC cutoff (PITR point) and the "latest" GC cutoff (where we
// actually trimmed data to), which can pass each other when PITR is changed.
latest_gc_cutoff_lsn: min_readable_lsn,
min_readable_lsn,
applied_gc_cutoff_lsn: *timeline.get_applied_gc_cutoff_lsn(),
current_logical_size: current_logical_size.size_dont_care_about_accuracy(),
@@ -2391,7 +2394,6 @@ async fn timeline_checkpoint_handler(
let state = get_state(&request);
let mut flags = EnumSet::empty();
flags |= CompactFlags::NoYield; // run compaction to completion
if Some(true) == parse_query_param::<_, bool>(&request, "force_l0_compaction")? {
flags |= CompactFlags::ForceL0Compaction;
}
@@ -2506,9 +2508,6 @@ async fn timeline_detach_ancestor_handler(
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
let behavior: Option<DetachBehavior> = parse_query_param(&request, "detach_behavior")?;
let behavior = behavior.unwrap_or_default();
let span = tracing::info_span!("detach_ancestor", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id);
@@ -2558,7 +2557,7 @@ async fn timeline_detach_ancestor_handler(
let ctx = &ctx.with_scope_timeline(&timeline);
let progress = timeline
.prepare_to_detach_from_ancestor(&tenant, options, behavior, ctx)
.prepare_to_detach_from_ancestor(&tenant, options, ctx)
.await?;
// uncomment to allow early as possible Tenant::drop
@@ -2573,7 +2572,6 @@ async fn timeline_detach_ancestor_handler(
tenant_shard_id,
timeline_id,
prepared,
behavior,
attempt,
ctx,
)

View File

@@ -465,40 +465,12 @@ pub(crate) fn page_cache_errors_inc(error_kind: PageCacheErrorKind) {
pub(crate) static WAIT_LSN_TIME: Lazy<Histogram> = Lazy::new(|| {
register_histogram!(
"pageserver_wait_lsn_seconds",
"Time spent waiting for WAL to arrive. Updated on completion of the wait_lsn operation.",
"Time spent waiting for WAL to arrive",
CRITICAL_OP_BUCKETS.into(),
)
.expect("failed to define a metric")
});
pub(crate) static WAIT_LSN_START_FINISH_COUNTERPAIR: Lazy<IntCounterPairVec> = Lazy::new(|| {
register_int_counter_pair_vec!(
"pageserver_wait_lsn_started_count",
"Number of wait_lsn operations started.",
"pageserver_wait_lsn_finished_count",
"Number of wait_lsn operations finished.",
&["tenant_id", "shard_id", "timeline_id"],
)
.expect("failed to define a metric")
});
pub(crate) static WAIT_LSN_IN_PROGRESS_MICROS: Lazy<IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!(
"pageserver_wait_lsn_in_progress_micros",
"Time spent waiting for WAL to arrive, by timeline_id. Updated periodically while waiting.",
&["tenant_id", "shard_id", "timeline_id"],
)
.expect("failed to define a metric")
});
pub(crate) static WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS: Lazy<IntCounter> = Lazy::new(|| {
register_int_counter!(
"pageserver_wait_lsn_in_progress_micros_global",
"Time spent waiting for WAL to arrive, globally. Updated periodically while waiting."
)
.expect("failed to define a metric")
});
static FLUSH_WAIT_UPLOAD_TIME: Lazy<GaugeVec> = Lazy::new(|| {
register_gauge_vec!(
"pageserver_flush_wait_upload_seconds",
@@ -2858,6 +2830,7 @@ impl StorageTimeMetrics {
}
}
#[derive(Debug)]
pub(crate) struct TimelineMetrics {
tenant_id: String,
shard_id: String,
@@ -2890,8 +2863,6 @@ pub(crate) struct TimelineMetrics {
pub valid_lsn_lease_count_gauge: UIntGauge,
pub wal_records_received: IntCounter,
pub storage_io_size: StorageIoSizeMetrics,
pub wait_lsn_in_progress_micros: GlobalAndPerTenantIntCounter,
pub wait_lsn_start_finish_counterpair: IntCounterPair,
shutdown: std::sync::atomic::AtomicBool,
}
@@ -3029,17 +3000,6 @@ impl TimelineMetrics {
let storage_io_size = StorageIoSizeMetrics::new(&tenant_id, &shard_id, &timeline_id);
let wait_lsn_in_progress_micros = GlobalAndPerTenantIntCounter {
global: WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS.clone(),
per_tenant: WAIT_LSN_IN_PROGRESS_MICROS
.get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
.unwrap(),
};
let wait_lsn_start_finish_counterpair = WAIT_LSN_START_FINISH_COUNTERPAIR
.get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
.unwrap();
TimelineMetrics {
tenant_id,
shard_id,
@@ -3072,8 +3032,6 @@ impl TimelineMetrics {
storage_io_size,
valid_lsn_lease_count_gauge,
wal_records_received,
wait_lsn_in_progress_micros,
wait_lsn_start_finish_counterpair,
shutdown: std::sync::atomic::AtomicBool::default(),
}
}
@@ -3266,15 +3224,6 @@ impl TimelineMetrics {
let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, shard_id, timeline_id]);
}
let _ =
WAIT_LSN_IN_PROGRESS_MICROS.remove_label_values(&[tenant_id, shard_id, timeline_id]);
{
let mut res = [Ok(()), Ok(())];
WAIT_LSN_START_FINISH_COUNTERPAIR
.remove_label_values(&mut res, &[tenant_id, shard_id, timeline_id]);
}
let _ = SMGR_QUERY_STARTED_PER_TENANT_TIMELINE.remove_label_values(&[
SmgrQueryType::GetPageAtLsn.into(),
tenant_id,
@@ -3887,29 +3836,27 @@ pub mod tokio_epoll_uring {
});
}
pub(crate) struct GlobalAndPerTenantIntCounter {
global: IntCounter,
per_tenant: IntCounter,
}
impl GlobalAndPerTenantIntCounter {
#[inline(always)]
pub(crate) fn inc(&self) {
self.inc_by(1)
}
#[inline(always)]
pub(crate) fn inc_by(&self, n: u64) {
self.global.inc_by(n);
self.per_tenant.inc_by(n);
}
}
pub(crate) mod tenant_throttling {
use metrics::register_int_counter_vec;
use metrics::{IntCounter, register_int_counter_vec};
use once_cell::sync::Lazy;
use utils::shard::TenantShardId;
use super::GlobalAndPerTenantIntCounter;
pub(crate) struct GlobalAndPerTenantIntCounter {
global: IntCounter,
per_tenant: IntCounter,
}
impl GlobalAndPerTenantIntCounter {
#[inline(always)]
pub(crate) fn inc(&self) {
self.inc_by(1)
}
#[inline(always)]
pub(crate) fn inc_by(&self, n: u64) {
self.global.inc_by(n);
self.per_tenant.inc_by(n);
}
}
pub(crate) struct Metrics<const KIND: usize> {
pub(super) count_accounted_start: GlobalAndPerTenantIntCounter,
@@ -4155,7 +4102,6 @@ pub fn preinitialize_metrics(conf: &'static PageServerConf) {
&CIRCUIT_BREAKERS_BROKEN,
&CIRCUIT_BREAKERS_UNBROKEN,
&PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL,
&WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS,
]
.into_iter()
.for_each(|c| {

View File

@@ -1106,19 +1106,12 @@ impl PageServerHandler {
};
// Dispatch the batch to the appropriate request handler.
let log_slow_name = batch.as_static_str();
let (mut handler_results, span) = {
// TODO: we unfortunately have to pin the future on the heap, since GetPage futures are huge and
// won't fit on the stack.
let mut boxpinned =
Box::pin(self.pagestream_dispatch_batched_message(batch, io_concurrency, ctx));
log_slow(
log_slow_name,
LOG_SLOW_GETPAGE_THRESHOLD,
boxpinned.as_mut(),
)
.await?
};
let (mut handler_results, span) = log_slow(
batch.as_static_str(),
LOG_SLOW_GETPAGE_THRESHOLD,
self.pagestream_dispatch_batched_message(batch, io_concurrency, ctx),
)
.await?;
// We purposefully don't count flush time into the smgr operation timer.
//

View File

@@ -5754,7 +5754,7 @@ pub(crate) mod harness {
logging::TracingErrorLayerEnablement::EnableWithRustLogFilter,
logging::Output::Stdout,
)
.expect("Failed to init test logging");
.expect("Failed to init test logging")
});
}
@@ -6559,11 +6559,7 @@ mod tests {
tline.freeze_and_flush().await?;
tline
.compact(
&CancellationToken::new(),
CompactFlags::NoYield.into(),
&ctx,
)
.compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
.await?;
let mut writer = tline.writer().await;
@@ -6580,11 +6576,7 @@ mod tests {
tline.freeze_and_flush().await?;
tline
.compact(
&CancellationToken::new(),
CompactFlags::NoYield.into(),
&ctx,
)
.compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
.await?;
let mut writer = tline.writer().await;
@@ -6601,11 +6593,7 @@ mod tests {
tline.freeze_and_flush().await?;
tline
.compact(
&CancellationToken::new(),
CompactFlags::NoYield.into(),
&ctx,
)
.compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
.await?;
let mut writer = tline.writer().await;
@@ -6622,11 +6610,7 @@ mod tests {
tline.freeze_and_flush().await?;
tline
.compact(
&CancellationToken::new(),
CompactFlags::NoYield.into(),
&ctx,
)
.compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
.await?;
assert_eq!(
@@ -6709,9 +6693,7 @@ mod tests {
timeline.freeze_and_flush().await?;
if compact {
// this requires timeline to be &Arc<Timeline>
timeline
.compact(&cancel, CompactFlags::NoYield.into(), ctx)
.await?;
timeline.compact(&cancel, EnumSet::empty(), ctx).await?;
}
// this doesn't really need to use the timeline_id target, but it is closer to what it
@@ -7038,7 +7020,6 @@ mod tests {
child_timeline.freeze_and_flush().await?;
let mut flags = EnumSet::new();
flags.insert(CompactFlags::ForceRepartition);
flags.insert(CompactFlags::NoYield);
child_timeline
.compact(&CancellationToken::new(), flags, &ctx)
.await?;
@@ -7417,9 +7398,7 @@ mod tests {
// Perform a cycle of flush, compact, and GC
tline.freeze_and_flush().await?;
tline
.compact(&cancel, CompactFlags::NoYield.into(), &ctx)
.await?;
tline.compact(&cancel, EnumSet::empty(), &ctx).await?;
tenant
.gc_iteration(Some(tline.timeline_id), 0, Duration::ZERO, &cancel, &ctx)
.await?;
@@ -7748,7 +7727,6 @@ mod tests {
let mut flags = EnumSet::new();
flags.insert(CompactFlags::ForceImageLayerCreation);
flags.insert(CompactFlags::ForceRepartition);
flags.insert(CompactFlags::NoYield);
flags
} else {
EnumSet::empty()
@@ -7799,9 +7777,7 @@ mod tests {
let before_num_l0_delta_files =
tline.layers.read().await.layer_map()?.level0_deltas().len();
tline
.compact(&cancel, CompactFlags::NoYield.into(), &ctx)
.await?;
tline.compact(&cancel, EnumSet::empty(), &ctx).await?;
let after_num_l0_delta_files = tline.layers.read().await.layer_map()?.level0_deltas().len();
@@ -7917,6 +7893,7 @@ mod tests {
Ok((res, reconstruct_state.get_delta_layers_visited() as usize))
}
#[allow(clippy::needless_range_loop)]
for blknum in 0..NUM_KEYS {
lsn = Lsn(lsn.0 + 0x10);
test_key.field6 = (blknum * STEP) as u32;
@@ -7966,7 +7943,6 @@ mod tests {
let mut flags = EnumSet::new();
flags.insert(CompactFlags::ForceImageLayerCreation);
flags.insert(CompactFlags::ForceRepartition);
flags.insert(CompactFlags::NoYield);
flags
},
&ctx,
@@ -8429,7 +8405,6 @@ mod tests {
let mut flags = EnumSet::new();
flags.insert(CompactFlags::ForceImageLayerCreation);
flags.insert(CompactFlags::ForceRepartition);
flags.insert(CompactFlags::NoYield);
flags
},
&ctx,
@@ -8497,7 +8472,6 @@ mod tests {
let mut flags = EnumSet::new();
flags.insert(CompactFlags::ForceImageLayerCreation);
flags.insert(CompactFlags::ForceRepartition);
flags.insert(CompactFlags::NoYield);
flags
},
&ctx,

View File

@@ -219,11 +219,7 @@ impl LocationConf {
};
let shard = if conf.shard_count == 0 {
// NB: carry over the persisted stripe size instead of using the default. This doesn't
// matter for most practical purposes, since unsharded tenants don't use the stripe
// size, but can cause inconsistencies between storcon and Pageserver and cause manual
// splits without `new_stripe_size` to use an unintended stripe size.
ShardIdentity::unsharded_with_stripe_size(ShardStripeSize(conf.shard_stripe_size))
ShardIdentity::unsharded()
} else {
ShardIdentity::new(
ShardNumber(conf.shard_number),

View File

@@ -9,7 +9,7 @@ use camino::Utf8PathBuf;
use num_traits::Num;
use pageserver_api::shard::TenantShardId;
use tokio_epoll_uring::{BoundedBuf, Slice};
use tracing::{error, info_span};
use tracing::error;
use utils::id::TimelineId;
use crate::assert_u64_eq_usize::{U64IsUsize, UsizeIsU64};
@@ -76,7 +76,6 @@ impl EphemeralFile {
|| IoBufferMut::with_capacity(TAIL_SZ),
gate.enter()?,
ctx,
info_span!(parent: None, "ephemeral_file_buffered_writer", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), timeline_id=%timeline_id, path = %filename),
),
_gate_guard: gate.enter()?,
})

View File

@@ -300,8 +300,9 @@ impl TimelineMetadata {
/// Returns true if anything was changed
pub fn detach_from_ancestor(&mut self, branchpoint: &(TimelineId, Lsn)) {
// Detaching from ancestor now doesn't always detach directly to the direct ancestor, but we
// ensure the LSN is the same. So we don't check the timeline ID.
if let Some(ancestor) = self.body.ancestor_timeline {
assert_eq!(ancestor, branchpoint.0);
}
if self.body.ancestor_lsn != Lsn(0) {
assert_eq!(self.body.ancestor_lsn, branchpoint.1);
}

View File

@@ -14,7 +14,7 @@ use futures::StreamExt;
use itertools::Itertools;
use once_cell::sync::Lazy;
use pageserver_api::key::Key;
use pageserver_api::models::{DetachBehavior, LocationConfigMode};
use pageserver_api::models::LocationConfigMode;
use pageserver_api::shard::{
ShardCount, ShardIdentity, ShardIndex, ShardNumber, ShardStripeSize, TenantShardId,
};
@@ -1914,7 +1914,6 @@ impl TenantManager {
tenant_shard_id: TenantShardId,
timeline_id: TimelineId,
prepared: PreparedTimelineDetach,
behavior: DetachBehavior,
mut attempt: detach_ancestor::Attempt,
ctx: &RequestContext,
) -> Result<HashSet<TimelineId>, detach_ancestor::Error> {
@@ -1958,14 +1957,7 @@ impl TenantManager {
.map_err(Error::NotFound)?;
let resp = timeline
.detach_from_ancestor_and_reparent(
&tenant,
prepared,
attempt.ancestor_timeline_id,
attempt.ancestor_lsn,
behavior,
ctx,
)
.detach_from_ancestor_and_reparent(&tenant, prepared, ctx)
.await?;
let mut slot_guard = slot_guard;

View File

@@ -229,7 +229,6 @@ async fn download_object(
|| IoBufferMut::with_capacity(super::BUFFER_SIZE),
gate.enter().map_err(|_| DownloadError::Cancelled)?,
ctx,
tracing::info_span!(parent: None, "download_object_buffered_writer", %dst_path),
);
// TODO: use vectored write (writev) once supported by tokio-epoll-uring.

View File

@@ -45,9 +45,8 @@ use pageserver_api::key::{
use pageserver_api::keyspace::{KeySpaceAccum, KeySpaceRandomAccum, SparseKeyPartitioning};
use pageserver_api::models::{
CompactKeyRange, CompactLsnRange, CompactionAlgorithm, CompactionAlgorithmSettings,
DetachBehavior, DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest,
EvictionPolicy, InMemoryLayerInfo, LayerMapInfo, LsnLease, PageTraceEvent, RelSizeMigration,
TimelineState,
DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest, EvictionPolicy,
InMemoryLayerInfo, LayerMapInfo, LsnLease, PageTraceEvent, RelSizeMigration, TimelineState,
};
use pageserver_api::reltag::{BlockNumber, RelTag};
use pageserver_api::shard::{ShardIdentity, ShardIndex, ShardNumber, TenantShardId};
@@ -68,7 +67,6 @@ use tracing::*;
use utils::generation::Generation;
use utils::guard_arc_swap::GuardArcSwap;
use utils::id::TimelineId;
use utils::logging::{MonitorSlowFutureCallback, monitor_slow_future};
use utils::lsn::{AtomicLsn, Lsn, RecordLsn};
use utils::postgres_client::PostgresClientProtocol;
use utils::rate_limit::RateLimit;
@@ -441,8 +439,6 @@ pub struct Timeline {
heatmap_layers_downloader: Mutex<Option<heatmap_layers_downloader::HeatmapLayersDownloader>>,
pub(crate) rel_size_v2_status: ArcSwapOption<RelSizeMigration>,
wait_lsn_log_slow: tokio::sync::Semaphore,
}
pub(crate) enum PreviousHeatmap {
@@ -1483,67 +1479,17 @@ impl Timeline {
WaitLsnTimeout::Default => self.conf.wait_lsn_timeout,
};
let timer = crate::metrics::WAIT_LSN_TIME.start_timer();
let start_finish_counterpair_guard = self.metrics.wait_lsn_start_finish_counterpair.guard();
let _timer = crate::metrics::WAIT_LSN_TIME.start_timer();
let wait_for_timeout = self.last_record_lsn.wait_for_timeout(lsn, timeout);
let wait_for_timeout = std::pin::pin!(wait_for_timeout);
// Use threshold of 1 because even 1 second of wait for ingest is very much abnormal.
let log_slow_threshold = Duration::from_secs(1);
// Use period of 10 to avoid flooding logs during an outage that affects all timelines.
let log_slow_period = Duration::from_secs(10);
let mut logging_permit = None;
let wait_for_timeout = monitor_slow_future(
log_slow_threshold,
log_slow_period,
wait_for_timeout,
|MonitorSlowFutureCallback {
ready,
is_slow,
elapsed_total,
elapsed_since_last_callback,
}| {
self.metrics
.wait_lsn_in_progress_micros
.inc_by(u64::try_from(elapsed_since_last_callback.as_micros()).unwrap());
if !is_slow {
return;
}
// It's slow, see if we should log it.
// (We limit the logging to one per invocation per timeline to avoid excessive
// logging during an extended broker / networking outage that affects all timelines.)
if logging_permit.is_none() {
logging_permit = self.wait_lsn_log_slow.try_acquire().ok();
}
if logging_permit.is_none() {
return;
}
// We log it.
if ready {
info!(
"slow wait_lsn completed after {:.3}s",
elapsed_total.as_secs_f64()
);
} else {
info!(
"slow wait_lsn still running for {:.3}s",
elapsed_total.as_secs_f64()
);
}
},
);
let res = wait_for_timeout.await;
// don't count the time spent waiting for lock below, and also in walreceiver.status(), towards the wait_lsn_time_histo
drop(logging_permit);
drop(start_finish_counterpair_guard);
drop(timer);
match res {
match self.last_record_lsn.wait_for_timeout(lsn, timeout).await {
Ok(()) => Ok(()),
Err(e) => {
use utils::seqwait::SeqWaitError::*;
match e {
Shutdown => Err(WaitLsnError::Shutdown),
Timeout => {
// don't count the time spent waiting for lock below, and also in walreceiver.status(), towards the wait_lsn_time_histo
drop(_timer);
let walreceiver_status = self.walreceiver_status();
Err(WaitLsnError::Timeout(format!(
"Timed out while waiting for WAL record at LSN {} to arrive, last_record_lsn {} disk consistent LSN={}, WalReceiver status: {}",
@@ -2477,9 +2423,8 @@ impl Timeline {
}
fn get_l0_flush_delay_threshold(&self) -> Option<usize> {
// By default, delay L0 flushes at 3x the compaction threshold. The compaction threshold
// defaults to 10, and L0 compaction is generally able to keep L0 counts below 30.
const DEFAULT_L0_FLUSH_DELAY_FACTOR: usize = 3;
// Disable L0 flushes by default. This and compaction needs further tuning.
const DEFAULT_L0_FLUSH_DELAY_FACTOR: usize = 0; // TODO: default to e.g. 3
// If compaction is disabled, don't delay.
if self.get_compaction_period() == Duration::ZERO {
@@ -2507,9 +2452,8 @@ impl Timeline {
}
fn get_l0_flush_stall_threshold(&self) -> Option<usize> {
// Disable L0 stalls by default. Stalling can cause unavailability if L0 compaction isn't
// responsive, and it can e.g. block on other compaction via the compaction semaphore or
// sibling timelines. We need more confidence before enabling this.
// Disable L0 stalls by default. In ingest benchmarks, we see image compaction take >10
// minutes, blocking L0 compaction, and we can't stall L0 flushes for that long.
const DEFAULT_L0_FLUSH_STALL_FACTOR: usize = 0; // TODO: default to e.g. 5
// If compaction is disabled, don't stall.
@@ -2877,8 +2821,6 @@ impl Timeline {
heatmap_layers_downloader: Mutex::new(None),
rel_size_v2_status: ArcSwapOption::from_pointee(rel_size_v2_status),
wait_lsn_log_slow: tokio::sync::Semaphore::new(1),
};
result.repartition_threshold =
@@ -5446,10 +5388,9 @@ impl Timeline {
self: &Arc<Timeline>,
tenant: &crate::tenant::Tenant,
options: detach_ancestor::Options,
behavior: DetachBehavior,
ctx: &RequestContext,
) -> Result<detach_ancestor::Progress, detach_ancestor::Error> {
detach_ancestor::prepare(self, tenant, behavior, options, ctx).await
detach_ancestor::prepare(self, tenant, options, ctx).await
}
/// Second step of detach from ancestor; detaches the `self` from it's current ancestor and
@@ -5465,21 +5406,9 @@ impl Timeline {
self: &Arc<Timeline>,
tenant: &crate::tenant::Tenant,
prepared: detach_ancestor::PreparedTimelineDetach,
ancestor_timeline_id: TimelineId,
ancestor_lsn: Lsn,
behavior: DetachBehavior,
ctx: &RequestContext,
) -> Result<detach_ancestor::DetachingAndReparenting, detach_ancestor::Error> {
detach_ancestor::detach_and_reparent(
self,
tenant,
prepared,
ancestor_timeline_id,
ancestor_lsn,
behavior,
ctx,
)
.await
detach_ancestor::detach_and_reparent(self, tenant, prepared, ctx).await
}
/// Final step which unblocks the GC.

View File

@@ -3189,11 +3189,7 @@ impl Timeline {
}
// TODO: move the below part to the loop body
let Some(last_key) = last_key else {
return Err(CompactionError::Other(anyhow!(
"no keys produced during compaction"
)));
};
let last_key = last_key.expect("no keys produced during compaction");
stat.on_unique_key_visited();
let retention = self

View File

@@ -3,7 +3,6 @@ use std::sync::Arc;
use anyhow::Context;
use http_utils::error::ApiError;
use pageserver_api::models::DetachBehavior;
use pageserver_api::models::detach_ancestor::AncestorDetached;
use pageserver_api::shard::ShardIdentity;
use tokio::sync::Semaphore;
@@ -33,9 +32,6 @@ pub(crate) enum Error {
#[error("too many ancestors")]
TooManyAncestors,
#[error("ancestor is not empty")]
AncestorNotEmpty,
#[error("shutting down, please retry later")]
ShuttingDown,
@@ -93,9 +89,7 @@ impl From<Error> for ApiError {
fn from(value: Error) -> Self {
match value {
Error::NoAncestor => ApiError::Conflict(value.to_string()),
Error::TooManyAncestors | Error::AncestorNotEmpty => {
ApiError::BadRequest(anyhow::anyhow!("{value}"))
}
Error::TooManyAncestors => ApiError::BadRequest(anyhow::anyhow!("{value}")),
Error::ShuttingDown => ApiError::ShuttingDown,
Error::Archived(_) => ApiError::BadRequest(anyhow::anyhow!("{value}")),
Error::OtherTimelineDetachOngoing(_) | Error::FailedToReparentAll => {
@@ -133,7 +127,7 @@ pub(crate) struct PreparedTimelineDetach {
layers: Vec<Layer>,
}
// TODO: this should be part of PageserverConf because we cannot easily modify cplane arguments.
/// TODO: this should be part of PageserverConf because we cannot easily modify cplane arguments.
#[derive(Debug)]
pub(crate) struct Options {
pub(crate) rewrite_concurrency: std::num::NonZeroUsize,
@@ -153,8 +147,7 @@ impl Default for Options {
#[derive(Debug)]
pub(crate) struct Attempt {
pub(crate) timeline_id: TimelineId,
pub(crate) ancestor_timeline_id: TimelineId,
pub(crate) ancestor_lsn: Lsn,
_guard: completion::Completion,
gate_entered: Option<utils::sync::gate::GateGuard>,
}
@@ -174,30 +167,25 @@ impl Attempt {
pub(super) async fn prepare(
detached: &Arc<Timeline>,
tenant: &Tenant,
behavior: DetachBehavior,
options: Options,
ctx: &RequestContext,
) -> Result<Progress, Error> {
use Error::*;
let Some((mut ancestor, mut ancestor_lsn)) = detached
let Some((ancestor, ancestor_lsn)) = detached
.ancestor_timeline
.as_ref()
.map(|tl| (tl.clone(), detached.ancestor_lsn))
else {
let ancestor_id;
let ancestor_lsn;
let still_in_progress = {
let accessor = detached.remote_client.initialized_upload_queue()?;
// we are safe to inspect the latest uploaded, because we can only witness this after
// restart is complete and ancestor is no more.
let latest = accessor.latest_uploaded_index_part();
let Some((id, lsn)) = latest.lineage.detached_previous_ancestor() else {
if latest.lineage.detached_previous_ancestor().is_none() {
return Err(NoAncestor);
};
ancestor_id = id;
ancestor_lsn = lsn;
latest
.gc_blocking
@@ -208,8 +196,7 @@ pub(super) async fn prepare(
if still_in_progress {
// gc is still blocked, we can still reparent and complete.
// we are safe to reparent remaining, because they were locked in in the beginning.
let attempt =
continue_with_blocked_gc(detached, tenant, ancestor_id, ancestor_lsn).await?;
let attempt = continue_with_blocked_gc(detached, tenant).await?;
// because the ancestor of detached is already set to none, we have published all
// of the layers, so we are still "prepared."
@@ -237,34 +224,13 @@ pub(super) async fn prepare(
check_no_archived_children_of_ancestor(tenant, detached, &ancestor, ancestor_lsn)?;
if let DetachBehavior::MultiLevelAndNoReparent = behavior {
// If the ancestor has an ancestor, we might be able to fast-path detach it if the current ancestor does not have any data written/used by the detaching timeline.
while let Some(ancestor_of_ancestor) = ancestor.ancestor_timeline.clone() {
if ancestor_lsn != ancestor.ancestor_lsn {
// non-technical requirement; we could flatten still if ancestor LSN does not match but that needs
// us to copy and cut more layers.
return Err(AncestorNotEmpty);
}
// Use the ancestor of the ancestor as the new ancestor (only when the ancestor LSNs are the same)
ancestor_lsn = ancestor.ancestor_lsn; // Get the LSN first before resetting the `ancestor` variable
ancestor = ancestor_of_ancestor;
// TODO: do we still need to check if we don't want to reparent?
check_no_archived_children_of_ancestor(tenant, detached, &ancestor, ancestor_lsn)?;
}
} else if ancestor.ancestor_timeline.is_some() {
if ancestor.ancestor_timeline.is_some() {
// non-technical requirement; we could flatten N ancestors just as easily but we chose
// not to, at least initially
return Err(TooManyAncestors);
}
tracing::info!(
"attempt to detach the timeline from the ancestor: {}@{}, behavior={:?}",
ancestor.timeline_id,
ancestor_lsn,
behavior
);
let attempt = start_new_attempt(detached, tenant, ancestor.timeline_id, ancestor_lsn).await?;
let attempt = start_new_attempt(detached, tenant).await?;
utils::pausable_failpoint!("timeline-detach-ancestor::before_starting_after_locking-pausable");
@@ -484,13 +450,8 @@ pub(super) async fn prepare(
Ok(Progress::Prepared(attempt, prepared))
}
async fn start_new_attempt(
detached: &Timeline,
tenant: &Tenant,
ancestor_timeline_id: TimelineId,
ancestor_lsn: Lsn,
) -> Result<Attempt, Error> {
let attempt = obtain_exclusive_attempt(detached, tenant, ancestor_timeline_id, ancestor_lsn)?;
async fn start_new_attempt(detached: &Timeline, tenant: &Tenant) -> Result<Attempt, Error> {
let attempt = obtain_exclusive_attempt(detached, tenant)?;
// insert the block in the index_part.json, if not already there.
let _dont_care = tenant
@@ -505,23 +466,13 @@ async fn start_new_attempt(
Ok(attempt)
}
async fn continue_with_blocked_gc(
detached: &Timeline,
tenant: &Tenant,
ancestor_timeline_id: TimelineId,
ancestor_lsn: Lsn,
) -> Result<Attempt, Error> {
async fn continue_with_blocked_gc(detached: &Timeline, tenant: &Tenant) -> Result<Attempt, Error> {
// FIXME: it would be nice to confirm that there is an in-memory version, since we've just
// verified there is a persistent one?
obtain_exclusive_attempt(detached, tenant, ancestor_timeline_id, ancestor_lsn)
obtain_exclusive_attempt(detached, tenant)
}
fn obtain_exclusive_attempt(
detached: &Timeline,
tenant: &Tenant,
ancestor_timeline_id: TimelineId,
ancestor_lsn: Lsn,
) -> Result<Attempt, Error> {
fn obtain_exclusive_attempt(detached: &Timeline, tenant: &Tenant) -> Result<Attempt, Error> {
use Error::{OtherTimelineDetachOngoing, ShuttingDown};
// ensure we are the only active attempt for this tenant
@@ -542,8 +493,6 @@ fn obtain_exclusive_attempt(
Ok(Attempt {
timeline_id: detached.timeline_id,
ancestor_timeline_id,
ancestor_lsn,
_guard: guard,
gate_entered: Some(_gate_entered),
})
@@ -846,9 +795,6 @@ pub(super) async fn detach_and_reparent(
detached: &Arc<Timeline>,
tenant: &Tenant,
prepared: PreparedTimelineDetach,
ancestor_timeline_id: TimelineId,
ancestor_lsn: Lsn,
behavior: DetachBehavior,
_ctx: &RequestContext,
) -> Result<DetachingAndReparenting, Error> {
let PreparedTimelineDetach { layers } = prepared;
@@ -876,30 +822,7 @@ pub(super) async fn detach_and_reparent(
"cannot (detach? reparent)? complete if the operation is not still ongoing"
);
let ancestor_to_detach = match detached.ancestor_timeline.as_ref() {
Some(mut ancestor) => {
while ancestor.timeline_id != ancestor_timeline_id {
match ancestor.ancestor_timeline.as_ref() {
Some(found) => {
if ancestor_lsn != ancestor.ancestor_lsn {
return Err(Error::DetachReparent(anyhow::anyhow!(
"cannot find the ancestor timeline to detach from: wrong ancestor lsn"
)));
}
ancestor = found;
}
None => {
return Err(Error::DetachReparent(anyhow::anyhow!(
"cannot find the ancestor timeline to detach from"
)));
}
}
}
Some(ancestor)
}
None => None,
};
let ancestor = match (ancestor_to_detach, recorded_branchpoint) {
let ancestor = match (detached.ancestor_timeline.as_ref(), recorded_branchpoint) {
(Some(ancestor), None) => {
assert!(
!layers.is_empty(),
@@ -972,11 +895,6 @@ pub(super) async fn detach_and_reparent(
Ancestor::Detached(ancestor, ancestor_lsn) => (ancestor, ancestor_lsn, false),
};
if let DetachBehavior::MultiLevelAndNoReparent = behavior {
// Do not reparent if the user requests to behave so.
return Ok(DetachingAndReparenting::Reparented(HashSet::new()));
}
let mut tasks = tokio::task::JoinSet::new();
// Returns a single permit semaphore which will be used to make one reparenting succeed,
@@ -1114,11 +1032,6 @@ pub(super) async fn complete(
}
/// Query against a locked `Tenant::timelines`.
///
/// A timeline is reparentable if:
///
/// - It is not the timeline being detached.
/// - It has the same ancestor as the timeline being detached. Note that the ancestor might not be the direct ancestor.
fn reparentable_timelines<'a, I>(
timelines: I,
detached: &'a Arc<Timeline>,

View File

@@ -1299,8 +1299,9 @@ impl OwnedAsyncWriter for VirtualFile {
buf: FullSlice<Buf>,
offset: u64,
ctx: &RequestContext,
) -> (FullSlice<Buf>, std::io::Result<()>) {
VirtualFile::write_all_at(self, buf, offset, ctx).await
) -> std::io::Result<FullSlice<Buf>> {
let (buf, res) = VirtualFile::write_all_at(self, buf, offset, ctx).await;
res.map(|_| buf)
}
}

View File

@@ -31,7 +31,7 @@ pub trait OwnedAsyncWriter {
buf: FullSlice<Buf>,
offset: u64,
ctx: &RequestContext,
) -> impl std::future::Future<Output = (FullSlice<Buf>, std::io::Result<()>)> + Send;
) -> impl std::future::Future<Output = std::io::Result<FullSlice<Buf>>> + Send;
}
/// A wrapper aorund an [`OwnedAsyncWriter`] that uses a [`Buffer`] to batch
@@ -66,7 +66,6 @@ where
buf_new: impl Fn() -> B,
gate_guard: utils::sync::gate::GateGuard,
ctx: &RequestContext,
flush_task_span: tracing::Span,
) -> Self {
Self {
writer: writer.clone(),
@@ -76,7 +75,6 @@ where
buf_new(),
gate_guard,
ctx.attached_child(),
flush_task_span,
),
bytes_submitted: 0,
}
@@ -271,12 +269,12 @@ mod tests {
buf: FullSlice<Buf>,
offset: u64,
_: &RequestContext,
) -> (FullSlice<Buf>, std::io::Result<()>) {
) -> std::io::Result<FullSlice<Buf>> {
self.writes
.lock()
.unwrap()
.push((Vec::from(&buf[..]), offset));
(buf, Ok(()))
Ok(buf)
}
}
@@ -295,7 +293,6 @@ mod tests {
|| IoBufferMut::with_capacity(2),
gate.enter()?,
ctx,
tracing::Span::none(),
);
writer.write_buffered_borrowed(b"abc", ctx).await?;

View File

@@ -1,14 +1,9 @@
use std::ops::ControlFlow;
use std::sync::Arc;
use once_cell::sync::Lazy;
use tokio_util::sync::CancellationToken;
use tracing::{Instrument, info, info_span, warn};
use utils::sync::duplex;
use super::{Buffer, CheapCloneForRead, OwnedAsyncWriter};
use crate::context::RequestContext;
use crate::virtual_file::MaybeFatalIo;
use crate::virtual_file::owned_buffers_io::io_buf_aligned::IoBufAligned;
use crate::virtual_file::owned_buffers_io::io_buf_ext::FullSlice;
@@ -123,7 +118,6 @@ where
buf: B,
gate_guard: utils::sync::gate::GateGuard,
ctx: RequestContext,
span: tracing::Span,
) -> Self
where
B: Buffer<IoBuf = Buf> + Send + 'static,
@@ -131,14 +125,11 @@ where
// It is fine to buffer up to only 1 message. We only 1 message in-flight at a time.
let (front, back) = duplex::mpsc::channel(1);
let join_handle = tokio::spawn(
async move {
FlushBackgroundTask::new(back, file, gate_guard, ctx)
.run(buf.flush())
.await
}
.instrument(span),
);
let join_handle = tokio::spawn(async move {
FlushBackgroundTask::new(back, file, gate_guard, ctx)
.run(buf.flush())
.await
});
FlushHandle {
inner: Some(FlushHandleInner {
@@ -245,7 +236,6 @@ where
/// The passed in slice is immediately sent back to the flush handle through the duplex channel.
async fn run(mut self, slice: FullSlice<Buf>) -> std::io::Result<Arc<W>> {
// Sends the extra buffer back to the handle.
// TODO: can this ever await and or fail? I think not.
self.channel.send(slice).await.map_err(|_| {
std::io::Error::new(std::io::ErrorKind::BrokenPipe, "flush handle closed early")
})?;
@@ -261,47 +251,10 @@ where
}
// Write slice to disk at `offset`.
//
// Error handling happens according to the current policy of crashing
// on fatal IO errors and retrying in place otherwise (deeming all other errors retryable).
// (The upper layers of the Pageserver write path are not equipped to retry write errors
// becasuse they often deallocate the buffers that were already written).
//
// TODO: cancellation sensitiity.
// Without it, if we hit a bug where retrying is never successful,
// then we can't shut down the timeline/tenant/pageserver cleanly because
// layers of the Pageserver write path are holding the gate open for EphemeralFile.
//
// TODO: use utils::backoff::retry once async closures are actually usable
//
let mut slice_storage = Some(request.slice);
for attempt in 1.. {
let result = async {
if attempt > 1 {
info!("retrying flush");
}
let slice = slice_storage.take().expect(
"likely previous invocation of this future didn't get polled to completion",
);
let (slice, res) = self.writer.write_all_at(slice, request.offset, &self.ctx).await;
slice_storage = Some(slice);
let res = res.maybe_fatal_err("owned_buffers_io flush");
let Err(err) = res else {
return ControlFlow::Break(());
};
warn!(%err, "error flushing buffered writer buffer to disk, retrying after backoff");
static NO_CANCELLATION: Lazy<CancellationToken> = Lazy::new(CancellationToken::new);
utils::backoff::exponential_backoff(attempt, 1.0, 10.0, &NO_CANCELLATION).await;
ControlFlow::Continue(())
}
.instrument(info_span!("flush_attempt", %attempt))
.await;
match result {
ControlFlow::Break(()) => break,
ControlFlow::Continue(()) => continue,
}
}
let slice = slice_storage.expect("loop must have run at least once");
let slice = self
.writer
.write_all_at(request.slice, request.offset, &self.ctx)
.await?;
#[cfg(test)]
{

View File

@@ -50,6 +50,20 @@
#define MIN_RECONNECT_INTERVAL_USEC 1000
#define MAX_RECONNECT_INTERVAL_USEC 1000000
enum NeonEndpointType {
EP_TYPE_PRIMARY = 1,
EP_TYPE_REPLICA,
EP_TYPE_STATIC
};
static const struct config_enum_entry neon_endpoint_types[] = {
{"primary", EP_TYPE_PRIMARY, false},
{"replica", EP_TYPE_REPLICA, false},
{"static", EP_TYPE_STATIC, false},
{NULL, 0, false}
};
/* GUCs */
char *neon_timeline;
char *neon_tenant;
@@ -62,6 +76,8 @@ int flush_every_n_requests = 8;
int neon_protocol_version = 2;
static int neon_endpoint_type = 0;
static int max_reconnect_attempts = 60;
static int stripe_size;
@@ -392,7 +408,7 @@ pageserver_connect(shardno_t shard_no, int elevel)
{
const char *keywords[4];
const char *values[4];
char pid_str[16];
char pid_str[24];
int n_pgsql_params;
TimestampTz now;
int64 us_since_last_attempt;
@@ -445,7 +461,22 @@ pageserver_connect(shardno_t shard_no, int elevel)
*/
keywords[n_pgsql_params] = "application_name";
{
int ret = snprintf(pid_str, sizeof(pid_str), "%d", MyProcPid);
int ret;
switch (neon_endpoint_type)
{
case EP_TYPE_PRIMARY:
ret = snprintf(pid_str, sizeof(pid_str), "%d-%s", MyProcPid, "primary");
break;
case EP_TYPE_REPLICA:
ret = snprintf(pid_str, sizeof(pid_str), "%d-%s", MyProcPid, "replica");
break;
case EP_TYPE_STATIC:
ret = snprintf(pid_str, sizeof(pid_str), "%d-%s", MyProcPid, "static");
break;
default:
ret = snprintf(pid_str, sizeof(pid_str), "%d", MyProcPid);
break;
}
if (ret < 0 || ret >= (int)(sizeof(pid_str)))
elog(FATAL, "stack-allocated buffer too small to hold pid");
}
@@ -1370,6 +1401,17 @@ pg_init_libpagestore(void)
GUC_UNIT_MS,
NULL, NULL, NULL);
DefineCustomEnumVariable(
"neon.endpoint_type",
"The compute endpoint node type",
NULL,
&neon_endpoint_type,
EP_TYPE_PRIMARY,
neon_endpoint_types,
PGC_POSTMASTER,
0,
NULL, NULL, NULL);
relsize_hash_init();
if (page_server != NULL)

View File

@@ -497,15 +497,6 @@ _PG_init(void)
GUC_UNIT_KB,
NULL, NULL, NULL);
DefineCustomStringVariable("neon.ancestor_lsn",
"LSN of the ancestor timeline",
"Zero if there is no arent branch",
&neon_ancestor_lsn,
"0/0",
PGC_SIGHUP,
0,
NULL, NULL, NULL);
/*
* Important: This must happen after other parts of the extension are
* loaded, otherwise any settings to GUCs that were set before the

View File

@@ -76,10 +76,6 @@
#include "access/xlogrecovery.h"
#endif
#if PG_VERSION_NUM < 160000
typedef PGAlignedBlock PGIOAlignedBlock;
#endif
/*
* If DEBUG_COMPARE_LOCAL is defined, we pass through all the SMGR API
* calls to md.c, and *also* do the calls to the Page Server. On every
@@ -1807,7 +1803,7 @@ static XLogRecPtr
log_newpage_copy(NRelFileInfo * rinfo, ForkNumber forkNum, BlockNumber blkno,
Page page, bool page_std)
{
PGIOAlignedBlock copied_buffer;
PGAlignedBlock copied_buffer;
memcpy(copied_buffer.data, page, BLCKSZ);
return log_newpage(rinfo, forkNum, blkno, copied_buffer.data, page_std);
@@ -1824,7 +1820,7 @@ static XLogRecPtr
log_newpages_copy(NRelFileInfo * rinfo, ForkNumber forkNum, BlockNumber blkno,
BlockNumber nblocks, Page *pages, bool page_std)
{
PGIOAlignedBlock copied_buffer[XLR_MAX_BLOCK_ID];
PGAlignedBlock copied_buffer[XLR_MAX_BLOCK_ID];
BlockNumber blknos[XLR_MAX_BLOCK_ID];
Page pageptrs[XLR_MAX_BLOCK_ID];
int nregistered = 0;
@@ -1862,7 +1858,7 @@ log_newpages_copy(NRelFileInfo * rinfo, ForkNumber forkNum, BlockNumber blkno,
static bool
PageIsEmptyHeapPage(char *buffer)
{
PGIOAlignedBlock empty_page;
PGAlignedBlock empty_page;
PageInit((Page) empty_page.data, BLCKSZ, 0);
@@ -2851,7 +2847,7 @@ static void
neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
int nblocks, bool skipFsync)
{
const PGIOAlignedBlock buffer = {0};
const PGAlignedBlock buffer = {0};
int remblocks = nblocks;
XLogRecPtr lsn = 0;
@@ -2898,11 +2894,6 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
relpath(reln->smgr_rlocator, forkNum),
InvalidBlockNumber)));
#ifdef DEBUG_COMPARE_LOCAL
if (IS_LOCAL_REL(reln))
mdzeroextend(reln, forkNum, blocknum, nblocks, skipFsync);
#endif
/* Don't log any pages if we're not allowed to do so. */
if (!XLogInsertAllowed())
return;
@@ -3398,16 +3389,15 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
if (forkNum == MAIN_FORKNUM && IS_LOCAL_REL(reln))
{
char pageserver_masked[BLCKSZ];
PGIOAlignedBlock mdbuf;
PGIOAlignedBlock mdbuf_masked;
XLogRecPtr request_lsn = request_lsns.request_lsn;
char mdbuf[BLCKSZ];
char mdbuf_masked[BLCKSZ];
mdread(reln, forkNum, blkno, mdbuf.data);
mdread(reln, forkNum, blkno, mdbuf);
memcpy(pageserver_masked, buffer, BLCKSZ);
memcpy(mdbuf_masked.data, mdbuf.data, BLCKSZ);
memcpy(mdbuf_masked, mdbuf, BLCKSZ);
if (PageIsNew((Page) mdbuf.data))
if (PageIsNew((Page) mdbuf))
{
if (!PageIsNew((Page) pageserver_masked))
{
@@ -3426,41 +3416,41 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forkNum,
(uint32) (request_lsn >> 32), (uint32) request_lsn,
hexdump_page(mdbuf.data));
hexdump_page(mdbuf));
}
else if (PageGetSpecialSize(mdbuf.data) == 0)
else if (PageGetSpecialSize(mdbuf) == 0)
{
/* assume heap */
RmgrTable[RM_HEAP_ID].rm_mask(mdbuf_masked.data, blkno);
RmgrTable[RM_HEAP_ID].rm_mask(mdbuf_masked, blkno);
RmgrTable[RM_HEAP_ID].rm_mask(pageserver_masked, blkno);
if (memcmp(mdbuf_masked.data, pageserver_masked, BLCKSZ) != 0)
if (memcmp(mdbuf_masked, pageserver_masked, BLCKSZ) != 0)
{
neon_log(PANIC, "heap buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n",
blkno,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forkNum,
(uint32) (request_lsn >> 32), (uint32) request_lsn,
hexdump_page(mdbuf_masked.data),
hexdump_page(mdbuf_masked),
hexdump_page(pageserver_masked));
}
}
else if (PageGetSpecialSize(mdbuf.data) == MAXALIGN(sizeof(BTPageOpaqueData)))
else if (PageGetSpecialSize(mdbuf) == MAXALIGN(sizeof(BTPageOpaqueData)))
{
if (((BTPageOpaqueData *) PageGetSpecialPointer(mdbuf.data))->btpo_cycleid < MAX_BT_CYCLE_ID)
if (((BTPageOpaqueData *) PageGetSpecialPointer(mdbuf))->btpo_cycleid < MAX_BT_CYCLE_ID)
{
/* assume btree */
RmgrTable[RM_BTREE_ID].rm_mask(mdbuf_masked.data, blkno);
RmgrTable[RM_BTREE_ID].rm_mask(mdbuf_masked, blkno);
RmgrTable[RM_BTREE_ID].rm_mask(pageserver_masked, blkno);
if (memcmp(mdbuf_masked.data, pageserver_masked, BLCKSZ) != 0)
if (memcmp(mdbuf_masked, pageserver_masked, BLCKSZ) != 0)
{
neon_log(PANIC, "btree buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n",
blkno,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forkNum,
(uint32) (request_lsn >> 32), (uint32) request_lsn,
hexdump_page(mdbuf_masked.data),
hexdump_page(mdbuf_masked),
hexdump_page(pageserver_masked));
}
}
@@ -3552,85 +3542,77 @@ neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
prefetch_pump_state(false);
#ifdef DEBUG_COMPARE_LOCAL
if (forknum == MAIN_FORKNUM && IS_LOCAL_REL(reln))
if (forkNum == MAIN_FORKNUM && IS_LOCAL_REL(reln))
{
char pageserver_masked[BLCKSZ];
PGIOAlignedBlock mdbuf;
PGIOAlignedBlock mdbuf_masked;
XLogRecPtr request_lsn = request_lsns->request_lsn;
char mdbuf[BLCKSZ];
char mdbuf_masked[BLCKSZ];
for (int i = 0; i < nblocks; i++)
{
BlockNumber blkno = blocknum + i;
if (!BITMAP_ISSET(read, i))
continue;
#if PG_MAJORVERSION_NUM >= 17
{
void* mdbuffers[1] = { mdbuf.data };
mdreadv(reln, forknum, blkno, mdbuffers, 1);
}
mdreadv(reln, forkNum, blkno + i, &mdbuf, 1);
#else
mdread(reln, forknum, blkno, mdbuf.data);
mdread(reln, forkNum, blkno + i, mdbuf);
#endif
memcpy(pageserver_masked, buffers[i], BLCKSZ);
memcpy(mdbuf_masked.data, mdbuf.data, BLCKSZ);
memcpy(pageserver_masked, buffer, BLCKSZ);
memcpy(mdbuf_masked, mdbuf, BLCKSZ);
if (PageIsNew((Page) mdbuf.data))
if (PageIsNew((Page) mdbuf))
{
if (!PageIsNew((Page) pageserver_masked))
{
neon_log(PANIC, "page is new in MD but not in Page Server at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n%s\n",
blkno,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forknum,
forkNum,
(uint32) (request_lsn >> 32), (uint32) request_lsn,
hexdump_page(buffers[i]));
hexdump_page(buffer));
}
}
else if (PageIsNew((Page) buffers[i]))
else if (PageIsNew((Page) buffer))
{
neon_log(PANIC, "page is new in Page Server but not in MD at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n%s\n",
blkno,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forknum,
forkNum,
(uint32) (request_lsn >> 32), (uint32) request_lsn,
hexdump_page(mdbuf.data));
hexdump_page(mdbuf));
}
else if (PageGetSpecialSize(mdbuf.data) == 0)
else if (PageGetSpecialSize(mdbuf) == 0)
{
/* assume heap */
RmgrTable[RM_HEAP_ID].rm_mask(mdbuf_masked.data, blkno);
RmgrTable[RM_HEAP_ID].rm_mask(mdbuf_masked, blkno);
RmgrTable[RM_HEAP_ID].rm_mask(pageserver_masked, blkno);
if (memcmp(mdbuf_masked.data, pageserver_masked, BLCKSZ) != 0)
if (memcmp(mdbuf_masked, pageserver_masked, BLCKSZ) != 0)
{
neon_log(PANIC, "heap buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n",
blkno,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forknum,
forkNum,
(uint32) (request_lsn >> 32), (uint32) request_lsn,
hexdump_page(mdbuf_masked.data),
hexdump_page(mdbuf_masked),
hexdump_page(pageserver_masked));
}
}
else if (PageGetSpecialSize(mdbuf.data) == MAXALIGN(sizeof(BTPageOpaqueData)))
else if (PageGetSpecialSize(mdbuf) == MAXALIGN(sizeof(BTPageOpaqueData)))
{
if (((BTPageOpaqueData *) PageGetSpecialPointer(mdbuf.data))->btpo_cycleid < MAX_BT_CYCLE_ID)
if (((BTPageOpaqueData *) PageGetSpecialPointer(mdbuf))->btpo_cycleid < MAX_BT_CYCLE_ID)
{
/* assume btree */
RmgrTable[RM_BTREE_ID].rm_mask(mdbuf_masked.data, blkno);
RmgrTable[RM_BTREE_ID].rm_mask(mdbuf_masked, blkno);
RmgrTable[RM_BTREE_ID].rm_mask(pageserver_masked, blkno);
if (memcmp(mdbuf_masked.data, pageserver_masked, BLCKSZ) != 0)
if (memcmp(mdbuf_masked, pageserver_masked, BLCKSZ) != 0)
{
neon_log(PANIC, "btree buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n",
blkno,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forknum,
forkNum,
(uint32) (request_lsn >> 32), (uint32) request_lsn,
hexdump_page(mdbuf_masked.data),
hexdump_page(mdbuf_masked),
hexdump_page(pageserver_masked));
}
}
@@ -3682,7 +3664,6 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const vo
switch (reln->smgr_relpersistence)
{
case 0:
#ifndef DEBUG_COMPARE_LOCAL
/* This is a bit tricky. Check if the relation exists locally */
if (mdexists(reln, forknum))
{
@@ -3701,7 +3682,6 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const vo
*/
return;
}
#endif
break;
case RELPERSISTENCE_PERMANENT:
@@ -3752,7 +3732,6 @@ neon_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
switch (reln->smgr_relpersistence)
{
case 0:
#ifndef DEBUG_COMPARE_LOCAL
/* This is a bit tricky. Check if the relation exists locally */
if (mdexists(reln, forknum))
{
@@ -3768,7 +3747,6 @@ neon_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
*/
return;
}
#endif
break;
case RELPERSISTENCE_PERMANENT:
@@ -3790,7 +3768,7 @@ neon_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
#ifdef DEBUG_COMPARE_LOCAL
if (IS_LOCAL_REL(reln))
mdwritev(reln, forknum, blkno, buffers, nblocks, skipFsync);
mdwritev(reln, forknum, blocknum, &buffer, 1, skipFsync);
#endif
}
@@ -4176,10 +4154,8 @@ neon_start_unlogged_build(SMgrRelation reln)
* FIXME: should we pass isRedo true to create the tablespace dir if it
* doesn't exist? Is it needed?
*/
#ifndef DEBUG_COMPARE_LOCAL
if (!IsParallelWorker())
if (!IsParallelWorker())
mdcreate(reln, MAIN_FORKNUM, false);
#endif
}
/*
@@ -4254,10 +4230,8 @@ neon_end_unlogged_build(SMgrRelation reln)
forget_cached_relsize(InfoFromNInfoB(rinfob), forknum);
mdclose(reln, forknum);
#ifndef DEBUG_COMPARE_LOCAL
/* use isRedo == true, so that we drop it immediately */
mdunlink(rinfob, forknum, true);
#endif
}
}

34
poetry.lock generated
View File

@@ -1491,38 +1491,14 @@ files = [
[[package]]
name = "jsonnet"
version = "0.21.0rc2"
description = "Python bindings for Jsonnet - The data templating language "
version = "0.20.0"
description = "Python bindings for Jsonnet - The data templating language"
optional = false
python-versions = "*"
groups = ["main"]
markers = "python_version < \"3.13\""
files = [
{file = "jsonnet-0.21.0rc2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8779ac6820fee44ef736df2baedc3ae93e8cd5d672ee105015c2a47fe627a727"},
{file = "jsonnet-0.21.0rc2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:99affe8c71e2551465064a8039bb3d1cba27a0b73b2b9ff1b652e06f17d4ea8b"},
{file = "jsonnet-0.21.0rc2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a9dffb9aa01013d100ddfb7230d1eeb80f2a8eef712b1825a60cad57106d8bd"},
{file = "jsonnet-0.21.0rc2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cca6c95f2879dcab52650b7aa09a4e82a139b084931b1f6f8c840f834fecc08a"},
{file = "jsonnet-0.21.0rc2-cp310-cp310-win_amd64.whl", hash = "sha256:016d6afdb302a6d00bf3bce6a0c3d9c093b992e33f9bc67c64a868035892258e"},
{file = "jsonnet-0.21.0rc2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e893ab2c9bf10d8ec9e9b0cee8961879c88d0619cc6d8f75ea284a78e06ae32b"},
{file = "jsonnet-0.21.0rc2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c06b353cd3daa2781e6cd308e05f2f116396376994bcb5f59aaadbc6a752c7f2"},
{file = "jsonnet-0.21.0rc2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eb2bc8e62b73101329072da322f7e2a1bdb3ac530b94669128d1b480e311e55"},
{file = "jsonnet-0.21.0rc2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:113766fd0c25620807bcf04d4c739f461c971a4f0e4aece9ba62b4e762de9598"},
{file = "jsonnet-0.21.0rc2-cp311-cp311-win_amd64.whl", hash = "sha256:8dab208c2c2760be60f87d1ceb8b28c86b51ed0e31129a7d90cd5fe890b41225"},
{file = "jsonnet-0.21.0rc2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:95f5b9dd26a41d6f258d1baa8d22e557051beeed8c52a6202584f1becca9dcb5"},
{file = "jsonnet-0.21.0rc2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cecc6d76e2b377260fae0a060097c113e6ac361b8f739903ea7f3f5f64cdebdf"},
{file = "jsonnet-0.21.0rc2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aaa2d18224af7e63872ef4a101e93962505456cf5f5439c3cfc25dad6845f8b1"},
{file = "jsonnet-0.21.0rc2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2a9063f811554487ed552445e964aeec969cafb266b965029c8d6b091ce47950"},
{file = "jsonnet-0.21.0rc2-cp312-cp312-win_amd64.whl", hash = "sha256:80d171182c169761f744ba50068a4ad35d48e52b91d25bf4c7bb9a72f0a04f71"},
{file = "jsonnet-0.21.0rc2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3657938f87cb6bc6da20ca631d437b5faf469ca060a7c7def9c8fd2f25a5e06"},
{file = "jsonnet-0.21.0rc2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3dcebc30cb991b58bc416ee05e9387004d04716d5c0b89714ff042bd069af5c8"},
{file = "jsonnet-0.21.0rc2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ac52c95482df3ed93c908468ca2f40d4825b6baba284b395ddc47bd663b8c3a"},
{file = "jsonnet-0.21.0rc2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8b34450823a7a1861de892fef9f29de1b4c19e1a79e27d81ffe7e57646cc89d6"},
{file = "jsonnet-0.21.0rc2-cp313-cp313-win_amd64.whl", hash = "sha256:573fd2580e46f4875ec505f1732f9e804b7063cba790342ed6fdafe9a6b30556"},
{file = "jsonnet-0.21.0rc2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:871ca1411de3626499bda60b330d37f85a592918f99ba4809089bbb8d4f5bfe4"},
{file = "jsonnet-0.21.0rc2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5d33b25a9c5bf9099100b9b16cb385a2876d891fbe639ee9d476fc75c861903a"},
{file = "jsonnet-0.21.0rc2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2bac374565c7f89a4675f19fd2b624ed1376519267f4e444f49b6fc0368f6e5"},
{file = "jsonnet-0.21.0rc2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:fab7bbd88f9159f88a7350701a97bda24de9e3b9eef14c2501ba8b9224160d60"},
{file = "jsonnet-0.21.0rc2-cp39-cp39-win_amd64.whl", hash = "sha256:ed71ffba0fd233a1bca7b0f7be79730792c5383e562a9dc7da152478d9ee1612"},
{file = "jsonnet-0.21.0rc2.tar.gz", hash = "sha256:2b83ec4b5a771c3732e0972be23a71f042ad2940db6918d3a52aade69bc394fb"},
{file = "jsonnet-0.20.0.tar.gz", hash = "sha256:7e770c7bf3a366b97b650a39430450f77612e74406731eb75c5bd59f3f104d4f"},
]
[[package]]
@@ -3844,4 +3820,4 @@ cffi = ["cffi (>=1.11)"]
[metadata]
lock-version = "2.1"
python-versions = "^3.11"
content-hash = "715fc8c896dcfa1b15054deeddcdec557ef93af91b26e1c8e4688fe4dbef5296"
content-hash = "010ffce959bb256880ab5a267048c182e4612b3151f9a94e3bf5d3a7807962fe"

View File

@@ -5,7 +5,6 @@ use std::sync::Arc;
use std::time::Duration;
use anyhow::{Context, bail, ensure};
use arc_swap::ArcSwapOption;
use camino::{Utf8Path, Utf8PathBuf};
use clap::Parser;
use compute_api::spec::LocalProxySpec;
@@ -28,7 +27,6 @@ use crate::config::{
};
use crate::control_plane::locks::ApiLocks;
use crate::control_plane::messages::{EndpointJwksResponse, JwksSettings};
use crate::ext::TaskExt;
use crate::http::health_server::AppMetrics;
use crate::intern::RoleNameInt;
use crate::metrics::{Metrics, ThreadPoolMetrics};
@@ -192,11 +190,7 @@ pub async fn run() -> anyhow::Result<()> {
// 2. The config file is written but the signal hook is not yet received
// 3. local_proxy completes startup but has no config loaded, despite there being a registerd config.
refresh_config_notify.notify_one();
tokio::spawn(refresh_config_loop(
config,
args.config_path,
refresh_config_notify,
));
tokio::spawn(refresh_config_loop(args.config_path, refresh_config_notify));
maintenance_tasks.spawn(crate::http::health_server::task_main(
metrics_listener,
@@ -275,7 +269,7 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig
};
Ok(Box::leak(Box::new(ProxyConfig {
tls_config: ArcSwapOption::from(None),
tls_config: None,
metric_collection: None,
http_config,
authentication_config: AuthenticationConfig {
@@ -317,16 +311,14 @@ enum RefreshConfigError {
Parse(#[from] serde_json::Error),
#[error(transparent)]
Validate(anyhow::Error),
#[error(transparent)]
Tls(anyhow::Error),
}
async fn refresh_config_loop(config: &ProxyConfig, path: Utf8PathBuf, rx: Arc<Notify>) {
async fn refresh_config_loop(path: Utf8PathBuf, rx: Arc<Notify>) {
let mut init = true;
loop {
rx.notified().await;
match refresh_config_inner(config, &path).await {
match refresh_config_inner(&path).await {
Ok(()) => {}
// don't log for file not found errors if this is the first time we are checking
// for computes that don't use local_proxy, this is not an error.
@@ -335,9 +327,6 @@ async fn refresh_config_loop(config: &ProxyConfig, path: Utf8PathBuf, rx: Arc<No
{
debug!(error=?e, ?path, "could not read config file");
}
Err(RefreshConfigError::Tls(e)) => {
error!(error=?e, ?path, "could not read TLS certificates");
}
Err(e) => {
error!(error=?e, ?path, "could not read config file");
}
@@ -347,10 +336,7 @@ async fn refresh_config_loop(config: &ProxyConfig, path: Utf8PathBuf, rx: Arc<No
}
}
async fn refresh_config_inner(
config: &ProxyConfig,
path: &Utf8Path,
) -> Result<(), RefreshConfigError> {
async fn refresh_config_inner(path: &Utf8Path) -> Result<(), RefreshConfigError> {
let bytes = tokio::fs::read(&path).await?;
let data: LocalProxySpec = serde_json::from_slice(&bytes)?;
@@ -420,20 +406,5 @@ async fn refresh_config_inner(
info!("successfully loaded new config");
JWKS_ROLE_MAP.store(Some(Arc::new(EndpointJwksResponse { jwks: jwks_set })));
if let Some(tls_config) = data.tls {
let tls_config = tokio::task::spawn_blocking(move || {
crate::tls::server_config::configure_tls(
&tls_config.key_path,
&tls_config.cert_path,
None,
false,
)
})
.await
.propagate_task_panic()
.map_err(RefreshConfigError::Tls)?;
config.tls_config.store(Some(Arc::new(tls_config)));
}
Ok(())
}

View File

@@ -4,7 +4,6 @@ use std::sync::Arc;
use std::time::Duration;
use anyhow::bail;
use arc_swap::ArcSwapOption;
use futures::future::Either;
use remote_storage::RemoteStorageConfig;
use tokio::net::TcpListener;
@@ -564,7 +563,6 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
(None, None) => None,
_ => bail!("either both or neither tls-key and tls-cert must be specified"),
};
let tls_config = ArcSwapOption::from(tls_config.map(Arc::new));
let backup_metric_collection_config = config::MetricBackupCollectionConfig {
remote_storage_config: args.metric_backup_collection_remote_storage.clone(),

View File

@@ -3,7 +3,6 @@ use std::sync::Arc;
use std::time::Duration;
use anyhow::{Context, Ok, bail, ensure};
use arc_swap::ArcSwapOption;
use clap::ValueEnum;
use remote_storage::RemoteStorageConfig;
@@ -18,7 +17,7 @@ pub use crate::tls::server_config::{TlsConfig, configure_tls};
use crate::types::Host;
pub struct ProxyConfig {
pub tls_config: ArcSwapOption<TlsConfig>,
pub tls_config: Option<TlsConfig>,
pub metric_collection: Option<MetricCollectionConfig>,
pub http_config: HttpConfig,
pub authentication_config: AuthenticationConfig,

View File

@@ -177,8 +177,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
let proto = ctx.protocol();
let request_gauge = metrics.connection_requests.guard(proto);
let tls = config.tls_config.load();
let tls = tls.as_deref();
let tls = config.tls_config.as_ref();
let record_handshake_error = !ctx.has_private_peer_addr();
let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client);

View File

@@ -46,8 +46,7 @@ pub async fn init() -> anyhow::Result<LoggingGuard> {
.expect("this should be a valid filter directive"),
);
let otlp_layer =
tracing_utils::init_tracing("proxy", tracing_utils::ExportConfig::default()).await;
let otlp_layer = tracing_utils::init_tracing("proxy").await;
let json_log_layer = if logfmt == LogFormat::Json {
Some(JsonLoggingLayer::new(

View File

@@ -30,16 +30,7 @@ pub struct Metrics {
static SELF: OnceLock<Metrics> = OnceLock::new();
impl Metrics {
pub fn install(thread_pool: Arc<ThreadPoolMetrics>) {
let mut metrics = Metrics::new(thread_pool);
metrics.proxy.errors_total.init_all_dense();
metrics.proxy.redis_errors_total.init_all_dense();
metrics.proxy.redis_events_count.init_all_dense();
metrics.proxy.retries_metric.init_all_dense();
metrics.proxy.invalid_endpoints_total.init_all_dense();
metrics.proxy.connection_failures_total.init_all_dense();
SELF.set(metrics)
SELF.set(Metrics::new(thread_pool))
.ok()
.expect("proxy metrics must not be installed more than once");
}

View File

@@ -114,7 +114,7 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
let mut read_buf = read_buf.reader();
let mut res = Ok(());
let accept = tokio_rustls::TlsAcceptor::from(tls.pg_config.clone())
let accept = tokio_rustls::TlsAcceptor::from(tls.to_server_config())
.accept_with(raw, |session| {
// push the early data to the tls session
while !read_buf.get_ref().is_empty() {

View File

@@ -278,8 +278,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
let proto = ctx.protocol();
let request_gauge = metrics.connection_requests.guard(proto);
let tls = config.tls_config.load();
let tls = tls.as_deref();
let tls = config.tls_config.as_ref();
let record_handshake_error = !ctx.has_private_peer_addr();
let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client);

View File

@@ -10,7 +10,7 @@ use crate::config::ComputeConfig;
use crate::control_plane::messages::MetricsAuxInfo;
use crate::metrics::{Direction, Metrics, NumClientConnectionsGuard, NumConnectionRequestsGuard};
use crate::stream::Stream;
use crate::usage_metrics::{Ids, MetricCounterRecorder, USAGE_METRICS};
use crate::usage_metrics::{Ids, MetricCounterRecorder, TrafficDirection, USAGE_METRICS};
/// Forward bytes in both directions (client <-> compute).
#[tracing::instrument(skip_all)]
@@ -24,6 +24,7 @@ pub(crate) async fn proxy_pass(
let usage_tx = USAGE_METRICS.register(Ids {
endpoint_id: aux.endpoint_id,
branch_id: aux.branch_id,
direction: TrafficDirection::Egress,
private_link_id,
});
@@ -46,7 +47,6 @@ pub(crate) async fn proxy_pass(
|cnt| {
// Number of bytes the client sent to the compute node (inbound).
metrics.get_metric(m_recv).inc_by(cnt as u64);
usage_tx.record_ingress(cnt as u64);
},
);

View File

@@ -96,18 +96,16 @@ fn generate_tls_config<'a>(
.with_safe_default_protocol_versions()
.context("ring should support the default protocol versions")?
.with_no_client_auth()
.with_single_cert(vec![cert.clone()], key.clone_key())?;
.with_single_cert(vec![cert.clone()], key.clone_key())?
.into();
let mut cert_resolver = CertResolver::new();
cert_resolver.add_cert(key, vec![cert], true)?;
let common_names = cert_resolver.get_common_names();
let config = Arc::new(config);
TlsConfig {
http_config: config.clone(),
pg_config: config,
config,
common_names,
cert_resolver: Arc::new(cert_resolver),
}

View File

@@ -22,7 +22,7 @@ use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo};
use crate::metrics::{HttpEndpointPoolsGuard, Metrics};
use crate::protocol2::ConnectionInfoExtra;
use crate::types::{DbName, EndpointCacheKey, RoleName};
use crate::usage_metrics::{Ids, MetricCounter, USAGE_METRICS};
use crate::usage_metrics::{Ids, MetricCounter, TrafficDirection, USAGE_METRICS};
#[derive(Debug, Clone)]
pub(crate) struct ConnInfo {
@@ -639,7 +639,11 @@ impl<C: ClientInnerExt> Client<C> {
(&mut inner.inner, Discard { conn_info, pool })
}
pub(crate) fn metrics(&self, ctx: &RequestContext) -> Arc<MetricCounter> {
pub(crate) fn metrics(
&self,
direction: TrafficDirection,
ctx: &RequestContext,
) -> Arc<MetricCounter> {
let aux = &self
.inner
.as_ref()
@@ -655,6 +659,7 @@ impl<C: ClientInnerExt> Client<C> {
USAGE_METRICS.register(Ids {
endpoint_id: aux.endpoint_id,
branch_id: aux.branch_id,
direction,
private_link_id,
})
}

View File

@@ -19,7 +19,7 @@ use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo};
use crate::metrics::{HttpEndpointPoolsGuard, Metrics};
use crate::protocol2::ConnectionInfoExtra;
use crate::types::EndpointCacheKey;
use crate::usage_metrics::{Ids, MetricCounter, USAGE_METRICS};
use crate::usage_metrics::{Ids, MetricCounter, TrafficDirection, USAGE_METRICS};
pub(crate) type Send = http2::SendRequest<hyper::body::Incoming>;
pub(crate) type Connect = http2::Connection<TokioIo<AsyncRW>, hyper::body::Incoming, TokioExecutor>;
@@ -265,7 +265,11 @@ impl<C: ClientInnerExt + Clone> Client<C> {
Self { inner }
}
pub(crate) fn metrics(&self, ctx: &RequestContext) -> Arc<MetricCounter> {
pub(crate) fn metrics(
&self,
direction: TrafficDirection,
ctx: &RequestContext,
) -> Arc<MetricCounter> {
let aux = &self.inner.aux;
let private_link_id = match ctx.extra() {
@@ -277,6 +281,7 @@ impl<C: ClientInnerExt + Clone> Client<C> {
USAGE_METRICS.register(Ids {
endpoint_id: aux.endpoint_id,
branch_id: aux.branch_id,
direction,
private_link_id,
})
}

View File

@@ -19,7 +19,6 @@ use std::pin::{Pin, pin};
use std::sync::Arc;
use anyhow::Context;
use arc_swap::ArcSwapOption;
use async_trait::async_trait;
use atomic_take::AtomicTake;
use bytes::Bytes;
@@ -118,7 +117,18 @@ pub async fn task_main(
auth_backend,
endpoint_rate_limiter: Arc::clone(&endpoint_rate_limiter),
});
let tls_acceptor: Arc<dyn MaybeTlsAcceptor> = Arc::new(&config.tls_config);
let tls_acceptor: Arc<dyn MaybeTlsAcceptor> = match config.tls_config.as_ref() {
Some(config) => {
let mut tls_server_config = rustls::ServerConfig::clone(&config.to_server_config());
// prefer http2, but support http/1.1
tls_server_config.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()];
Arc::new(tls_server_config)
}
None => {
warn!("TLS config is missing");
Arc::new(NoTls)
}
};
let connections = tokio_util::task::task_tracker::TaskTracker::new();
connections.close(); // allows `connections.wait to complete`
@@ -206,20 +216,22 @@ pub(crate) type AsyncRW = Pin<Box<dyn AsyncReadWrite>>;
#[async_trait]
trait MaybeTlsAcceptor: Send + Sync + 'static {
async fn accept(&self, conn: ChainRW<TcpStream>) -> std::io::Result<AsyncRW>;
async fn accept(self: Arc<Self>, conn: ChainRW<TcpStream>) -> std::io::Result<AsyncRW>;
}
#[async_trait]
impl MaybeTlsAcceptor for &'static ArcSwapOption<crate::config::TlsConfig> {
async fn accept(&self, conn: ChainRW<TcpStream>) -> std::io::Result<AsyncRW> {
match &*self.load() {
Some(config) => Ok(Box::pin(
TlsAcceptor::from(config.http_config.clone())
.accept(conn)
.await?,
)),
None => Ok(Box::pin(conn)),
}
impl MaybeTlsAcceptor for rustls::ServerConfig {
async fn accept(self: Arc<Self>, conn: ChainRW<TcpStream>) -> std::io::Result<AsyncRW> {
Ok(Box::pin(TlsAcceptor::from(self).accept(conn).await?))
}
}
struct NoTls;
#[async_trait]
impl MaybeTlsAcceptor for NoTls {
async fn accept(self: Arc<Self>, conn: ChainRW<TcpStream>) -> std::io::Result<AsyncRW> {
Ok(Box::pin(conn))
}
}

View File

@@ -42,7 +42,7 @@ use crate::metrics::{HttpDirection, Metrics};
use crate::proxy::{NeonOptions, run_until_cancelled};
use crate::serverless::backend::HttpConnError;
use crate::types::{DbName, RoleName};
use crate::usage_metrics::{MetricCounter, MetricCounterRecorder};
use crate::usage_metrics::{MetricCounter, MetricCounterRecorder, TrafficDirection};
#[derive(serde::Deserialize)]
#[serde(rename_all = "camelCase")]
@@ -614,9 +614,7 @@ async fn handle_inner(
&config.authentication_config,
ctx,
request.headers(),
// todo: race condition?
// we're unlikely to change the common names.
config.tls_config.load().as_deref(),
config.tls_config.as_ref(),
)?;
info!(
user = conn_info.conn_info.user_info.user.as_str(),
@@ -663,7 +661,6 @@ async fn handle_db_inner(
let parsed_headers = HttpHeaders::try_parse(headers)?;
let mut request_len = 0;
let fetch_and_process_request = Box::pin(
async {
let body = read_body_with_limit(
@@ -672,8 +669,6 @@ async fn handle_db_inner(
)
.await?;
request_len = body.len();
Metrics::get()
.proxy
.http_conn_content_length_bytes
@@ -768,7 +763,7 @@ async fn handle_db_inner(
}
};
let metrics = client.metrics(ctx);
let metrics = client.metrics(TrafficDirection::Egress, ctx);
let len = json_output.len();
let response = response
@@ -784,8 +779,6 @@ async fn handle_db_inner(
// count the egress bytes - we miss the TLS and header overhead but oh well...
// moving this later in the stack is going to be a lot of effort and ehhhh
metrics.record_egress(len as u64);
metrics.record_ingress(request_len as u64);
Metrics::get()
.proxy
.http_conn_content_length_bytes
@@ -843,7 +836,7 @@ async fn handle_auth_broker_inner(
.expect("all headers and params received via hyper should be valid for request");
// todo: map body to count egress
let _metrics = client.metrics(ctx);
let _metrics = client.metrics(TrafficDirection::Egress, ctx);
Ok(client
.inner
@@ -867,13 +860,7 @@ impl QueryData {
let cancel_token = inner.cancel_token();
let res = match select(
pin!(query_to_json(
config,
&mut *inner,
self,
&mut 0,
parsed_headers
)),
pin!(query_to_json(config, &*inner, self, &mut 0, parsed_headers)),
pin!(cancel.cancelled()),
)
.await
@@ -957,7 +944,7 @@ impl BatchQueryData {
builder = builder.deferrable(true);
}
let mut transaction = builder
let transaction = builder
.start()
.await
.inspect_err(|_| {
@@ -970,7 +957,7 @@ impl BatchQueryData {
let json_output = match query_batch(
config,
cancel.child_token(),
&mut transaction,
&transaction,
self,
parsed_headers,
)
@@ -1022,7 +1009,7 @@ impl BatchQueryData {
async fn query_batch(
config: &'static HttpConfig,
cancel: CancellationToken,
transaction: &mut Transaction<'_>,
transaction: &Transaction<'_>,
queries: BatchQueryData,
parsed_headers: HttpHeaders,
) -> Result<String, SqlOverHttpError> {
@@ -1060,7 +1047,7 @@ async fn query_batch(
async fn query_to_json<T: GenericClient>(
config: &'static HttpConfig,
client: &mut T,
client: &T,
data: QueryData,
current_size: &mut usize,
parsed_headers: HttpHeaders,
@@ -1173,10 +1160,10 @@ enum Discard<'a> {
}
impl Client {
fn metrics(&self, ctx: &RequestContext) -> Arc<MetricCounter> {
fn metrics(&self, direction: TrafficDirection, ctx: &RequestContext) -> Arc<MetricCounter> {
match self {
Client::Remote(client) => client.metrics(ctx),
Client::Local(local_client) => local_client.metrics(ctx),
Client::Remote(client) => client.metrics(direction, ctx),
Client::Local(local_client) => local_client.metrics(direction, ctx),
}
}

Some files were not shown because too many files have changed in this diff Show More