mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-15 20:20:38 +00:00
Compare commits
1 Commits
release-co
...
tristan957
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
32ea0d3a11 |
33
.github/actionlint.yml
vendored
33
.github/actionlint.yml
vendored
@@ -8,7 +8,6 @@ self-hosted-runner:
|
||||
- small-arm64
|
||||
- us-east-2
|
||||
config-variables:
|
||||
- AWS_ECR_REGION
|
||||
- AZURE_DEV_CLIENT_ID
|
||||
- AZURE_DEV_REGISTRY_NAME
|
||||
- AZURE_DEV_SUBSCRIPTION_ID
|
||||
@@ -16,25 +15,23 @@ config-variables:
|
||||
- AZURE_PROD_REGISTRY_NAME
|
||||
- AZURE_PROD_SUBSCRIPTION_ID
|
||||
- AZURE_TENANT_ID
|
||||
- BENCHMARK_INGEST_TARGET_PROJECTID
|
||||
- BENCHMARK_LARGE_OLTP_PROJECTID
|
||||
- BENCHMARK_PROJECT_ID_PUB
|
||||
- BENCHMARK_PROJECT_ID_SUB
|
||||
- DEV_AWS_OIDC_ROLE_ARN
|
||||
- DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN
|
||||
- HETZNER_CACHE_BUCKET
|
||||
- HETZNER_CACHE_ENDPOINT
|
||||
- HETZNER_CACHE_REGION
|
||||
- NEON_DEV_AWS_ACCOUNT_ID
|
||||
- NEON_PROD_AWS_ACCOUNT_ID
|
||||
- PGREGRESS_PG16_PROJECT_ID
|
||||
- PGREGRESS_PG17_PROJECT_ID
|
||||
- REMOTE_STORAGE_AZURE_CONTAINER
|
||||
- REMOTE_STORAGE_AZURE_REGION
|
||||
- SLACK_CICD_CHANNEL_ID
|
||||
- SLACK_ON_CALL_DEVPROD_STREAM
|
||||
- SLACK_ON_CALL_QA_STAGING_STREAM
|
||||
- SLACK_ON_CALL_STORAGE_STAGING_STREAM
|
||||
- SLACK_RUST_CHANNEL_ID
|
||||
- SLACK_STORAGE_CHANNEL_ID
|
||||
- SLACK_UPCOMING_RELEASE_CHANNEL_ID
|
||||
- DEV_AWS_OIDC_ROLE_ARN
|
||||
- BENCHMARK_INGEST_TARGET_PROJECTID
|
||||
- PGREGRESS_PG16_PROJECT_ID
|
||||
- PGREGRESS_PG17_PROJECT_ID
|
||||
- SLACK_ON_CALL_QA_STAGING_STREAM
|
||||
- DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN
|
||||
- SLACK_ON_CALL_STORAGE_STAGING_STREAM
|
||||
- SLACK_CICD_CHANNEL_ID
|
||||
- SLACK_STORAGE_CHANNEL_ID
|
||||
- NEON_DEV_AWS_ACCOUNT_ID
|
||||
- NEON_PROD_AWS_ACCOUNT_ID
|
||||
- AWS_ECR_REGION
|
||||
- BENCHMARK_LARGE_OLTP_PROJECTID
|
||||
- SLACK_ON_CALL_DEVPROD_STREAM
|
||||
- SLACK_RUST_CHANNEL_ID
|
||||
|
||||
12
.github/scripts/generate_image_maps.py
vendored
12
.github/scripts/generate_image_maps.py
vendored
@@ -39,18 +39,12 @@ registries = {
|
||||
],
|
||||
}
|
||||
|
||||
release_branches = ["release", "release-proxy", "release-compute"]
|
||||
|
||||
outputs: dict[str, dict[str, list[str]]] = {}
|
||||
|
||||
target_tags = (
|
||||
[target_tag, "latest"]
|
||||
if branch == "main"
|
||||
else [target_tag, "released"]
|
||||
if branch in release_branches
|
||||
else [target_tag]
|
||||
target_tags = [target_tag, "latest"] if branch == "main" else [target_tag]
|
||||
target_stages = (
|
||||
["dev", "prod"] if branch in ["release", "release-proxy", "release-compute"] else ["dev"]
|
||||
)
|
||||
target_stages = ["dev", "prod"] if branch in release_branches else ["dev"]
|
||||
|
||||
for component_name, component_images in components.items():
|
||||
for stage in target_stages:
|
||||
|
||||
31
.github/scripts/push_with_image_map.py
vendored
31
.github/scripts/push_with_image_map.py
vendored
@@ -11,27 +11,12 @@ try:
|
||||
except json.JSONDecodeError as e:
|
||||
raise ValueError("Failed to parse IMAGE_MAP as JSON") from e
|
||||
|
||||
failures = []
|
||||
for source, targets in parsed_image_map.items():
|
||||
for target in targets:
|
||||
cmd = ["docker", "buildx", "imagetools", "create", "-t", target, source]
|
||||
print(f"Running: {' '.join(cmd)}")
|
||||
result = subprocess.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
|
||||
pending = [(source, target) for source, targets in parsed_image_map.items() for target in targets]
|
||||
|
||||
while len(pending) > 0:
|
||||
if len(failures) > 10:
|
||||
print("Error: more than 10 failures!")
|
||||
for failure in failures:
|
||||
print(f'"{failure[0]}" failed with the following output:')
|
||||
print(failure[1])
|
||||
raise RuntimeError("Retry limit reached.")
|
||||
|
||||
source, target = pending.pop(0)
|
||||
cmd = ["docker", "buildx", "imagetools", "create", "-t", target, source]
|
||||
print(f"Running: {' '.join(cmd)}")
|
||||
result = subprocess.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
|
||||
if result.returncode != 0:
|
||||
failures.append((" ".join(cmd), result.stdout))
|
||||
pending.append((source, target))
|
||||
|
||||
if len(failures) > 0 and (github_output := os.getenv("GITHUB_OUTPUT")):
|
||||
with open(github_output, "a") as f:
|
||||
f.write("slack_notify=true\n")
|
||||
if result.returncode != 0:
|
||||
print(f"Error: {result.stdout}")
|
||||
raise RuntimeError(f"Command failed: {' '.join(cmd)}")
|
||||
|
||||
28
.github/workflows/_build-and-test-locally.yml
vendored
28
.github/workflows/_build-and-test-locally.yml
vendored
@@ -128,49 +128,29 @@ jobs:
|
||||
|
||||
- name: Cache postgres v14 build
|
||||
id: cache_pg_14
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
uses: actions/cache@d4323d4df104b026a6aa633fdb11d772146be0bf # v4.2.2
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/v14
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
|
||||
|
||||
- name: Cache postgres v15 build
|
||||
id: cache_pg_15
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
uses: actions/cache@d4323d4df104b026a6aa633fdb11d772146be0bf # v4.2.2
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/v15
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
|
||||
|
||||
- name: Cache postgres v16 build
|
||||
id: cache_pg_16
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
uses: actions/cache@d4323d4df104b026a6aa633fdb11d772146be0bf # v4.2.2
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/v16
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
|
||||
|
||||
- name: Cache postgres v17 build
|
||||
id: cache_pg_17
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
uses: actions/cache@d4323d4df104b026a6aa633fdb11d772146be0bf # v4.2.2
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/v17
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
|
||||
|
||||
|
||||
@@ -37,14 +37,8 @@ jobs:
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Cache poetry deps
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
- uses: actions/cache@d4323d4df104b026a6aa633fdb11d772146be0bf # v4.2.2
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: ~/.cache/pypoetry/virtualenvs
|
||||
key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-bookworm-${{ hashFiles('poetry.lock') }}
|
||||
|
||||
|
||||
7
.github/workflows/_check-codestyle-rust.yml
vendored
7
.github/workflows/_check-codestyle-rust.yml
vendored
@@ -48,13 +48,8 @@ jobs:
|
||||
submodules: true
|
||||
|
||||
- name: Cache cargo deps
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
uses: actions/cache@d4323d4df104b026a6aa633fdb11d772146be0bf # v4.2.2
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: |
|
||||
~/.cargo/registry
|
||||
!~/.cargo/registry/src
|
||||
|
||||
30
.github/workflows/_meta.yml
vendored
30
.github/workflows/_meta.yml
vendored
@@ -5,9 +5,6 @@ on:
|
||||
github-event-name:
|
||||
type: string
|
||||
required: true
|
||||
github-event-json:
|
||||
type: string
|
||||
required: true
|
||||
outputs:
|
||||
build-tag:
|
||||
description: "Tag for the current workflow run"
|
||||
@@ -30,9 +27,6 @@ on:
|
||||
release-pr-run-id:
|
||||
description: "Only available if `run-kind in [storage-release, proxy-release, compute-release]`. Contains the run ID of the `Build and Test` workflow, assuming one with the current commit can be found."
|
||||
value: ${{ jobs.tags.outputs.release-pr-run-id }}
|
||||
sha:
|
||||
description: "github.event.pull_request.head.sha on release PRs, github.sha otherwise"
|
||||
value: ${{ jobs.tags.outputs.sha }}
|
||||
|
||||
permissions: {}
|
||||
|
||||
@@ -51,7 +45,6 @@ jobs:
|
||||
storage: ${{ steps.previous-releases.outputs.storage }}
|
||||
run-kind: ${{ steps.run-kind.outputs.run-kind }}
|
||||
release-pr-run-id: ${{ steps.release-pr-run-id.outputs.release-pr-run-id }}
|
||||
sha: ${{ steps.sha.outputs.sha }}
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
@@ -61,6 +54,10 @@ jobs:
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get run kind
|
||||
id: run-kind
|
||||
env:
|
||||
@@ -81,23 +78,6 @@ jobs:
|
||||
run: |
|
||||
echo "run-kind=$RUN_KIND" | tee -a $GITHUB_OUTPUT
|
||||
|
||||
- name: Get the right SHA
|
||||
id: sha
|
||||
env:
|
||||
SHA: >
|
||||
${{
|
||||
contains(fromJSON('["storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), steps.run-kind.outputs.run-kind)
|
||||
&& fromJSON(inputs.github-event-json).pull_request.head.sha
|
||||
|| github.sha
|
||||
}}
|
||||
run: |
|
||||
echo "sha=$SHA" | tee -a $GITHUB_OUTPUT
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
ref: ${{ steps.sha.outputs.sha }}
|
||||
|
||||
- name: Get build tag
|
||||
id: build-tag
|
||||
env:
|
||||
@@ -163,7 +143,7 @@ jobs:
|
||||
if: ${{ contains(fromJSON('["storage-release", "compute-release", "proxy-release"]'), steps.run-kind.outputs.run-kind) }}
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
CURRENT_SHA: ${{ github.sha }}
|
||||
CURRENT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
run: |
|
||||
RELEASE_PR_RUN_ID=$(gh api "/repos/${GITHUB_REPOSITORY}/actions/runs?head_sha=$CURRENT_SHA" | jq '[.workflow_runs[] | select(.name == "Build and Test") | select(.head_branch | test("^rc/release(-(proxy|compute))?/[0-9]{4}-[0-9]{2}-[0-9]{2}$"; "s"))] | first | .id // ("Failed to find Build and Test run from RC PR!" | halt_error(1))')
|
||||
echo "release-pr-run-id=$RELEASE_PR_RUN_ID" | tee -a $GITHUB_OUTPUT
|
||||
|
||||
@@ -104,18 +104,6 @@ jobs:
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
- name: Copy docker images to target registries
|
||||
id: push
|
||||
run: python3 .github/scripts/push_with_image_map.py
|
||||
env:
|
||||
IMAGE_MAP: ${{ inputs.image-map }}
|
||||
|
||||
- name: Notify Slack if container image pushing fails
|
||||
if: steps.push.outputs.slack_notify == 'true' || failure()
|
||||
uses: slackapi/slack-github-action@485a9d42d3a73031f12ec201c457e2162c45d02d # v2.0.0
|
||||
with:
|
||||
method: chat.postMessage
|
||||
token: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
payload: |
|
||||
channel: ${{ vars.SLACK_ON_CALL_DEVPROD_STREAM }}
|
||||
text: |
|
||||
Pushing container images failed in <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
|
||||
|
||||
63
.github/workflows/build-macos.yml
vendored
63
.github/workflows/build-macos.yml
vendored
@@ -63,13 +63,8 @@ jobs:
|
||||
|
||||
- name: Cache postgres ${{ matrix.postgres-version }} build
|
||||
id: cache_pg
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
uses: actions/cache@d4323d4df104b026a6aa633fdb11d772146be0bf # v4.2.2
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/${{ matrix.postgres-version }}
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ matrix.postgres-version }}-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
@@ -134,25 +129,15 @@ jobs:
|
||||
|
||||
- name: Cache postgres v17 build
|
||||
id: cache_pg
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
uses: actions/cache@d4323d4df104b026a6aa633fdb11d772146be0bf # v4.2.2
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/v17
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Cache walproposer-lib
|
||||
id: cache_walproposer_lib
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
uses: actions/cache@d4323d4df104b026a6aa633fdb11d772146be0bf # v4.2.2
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/build/walproposer-lib
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
@@ -218,57 +203,32 @@ jobs:
|
||||
|
||||
- name: Cache postgres v14 build
|
||||
id: cache_pg
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
uses: actions/cache@d4323d4df104b026a6aa633fdb11d772146be0bf # v4.2.2
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/v14
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v14-${{ steps.pg_rev_v14.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
- name: Cache postgres v15 build
|
||||
id: cache_pg_v15
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
uses: actions/cache@d4323d4df104b026a6aa633fdb11d772146be0bf # v4.2.2
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/v15
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v15-${{ steps.pg_rev_v15.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
- name: Cache postgres v16 build
|
||||
id: cache_pg_v16
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
uses: actions/cache@d4323d4df104b026a6aa633fdb11d772146be0bf # v4.2.2
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/v16
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v16-${{ steps.pg_rev_v16.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
- name: Cache postgres v17 build
|
||||
id: cache_pg_v17
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
uses: actions/cache@d4323d4df104b026a6aa633fdb11d772146be0bf # v4.2.2
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/v17
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v17-${{ steps.pg_rev_v17.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Cache cargo deps (only for v17)
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
uses: actions/cache@d4323d4df104b026a6aa633fdb11d772146be0bf # v4.2.2
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: |
|
||||
~/.cargo/registry
|
||||
!~/.cargo/registry/src
|
||||
@@ -278,13 +238,8 @@ jobs:
|
||||
|
||||
- name: Cache walproposer-lib
|
||||
id: cache_walproposer_lib
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
uses: actions/cache@d4323d4df104b026a6aa633fdb11d772146be0bf # v4.2.2
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/build/walproposer-lib
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev_v17.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
|
||||
36
.github/workflows/build_and_test.yml
vendored
36
.github/workflows/build_and_test.yml
vendored
@@ -80,7 +80,6 @@ jobs:
|
||||
uses: ./.github/workflows/_meta.yml
|
||||
with:
|
||||
github-event-name: ${{ github.event_name }}
|
||||
github-event-json: ${{ toJSON(github.event) }}
|
||||
|
||||
build-build-tools-image:
|
||||
needs: [ check-permissions ]
|
||||
@@ -89,8 +88,8 @@ jobs:
|
||||
|
||||
check-codestyle-python:
|
||||
needs: [ meta, check-permissions, build-build-tools-image ]
|
||||
# No need to run on `main` because we this in the merge queue. We do need to run this in `.*-rc-pr` because of hotfixes.
|
||||
if: ${{ contains(fromJSON('["pr", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
# No need to run on `main` because we this in the merge queue
|
||||
if: ${{ needs.meta.outputs.run-kind == 'pr' }}
|
||||
uses: ./.github/workflows/_check-codestyle-python.yml
|
||||
with:
|
||||
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
@@ -98,8 +97,7 @@ jobs:
|
||||
|
||||
check-codestyle-jsonnet:
|
||||
needs: [ meta, check-permissions, build-build-tools-image ]
|
||||
# We do need to run this in `.*-rc-pr` because of hotfixes.
|
||||
if: ${{ contains(fromJSON('["pr", "push-main", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
if: ${{ contains(fromJSON('["pr", "push-main"]'), needs.meta.outputs.run-kind) }}
|
||||
runs-on: [ self-hosted, small ]
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}
|
||||
@@ -182,8 +180,8 @@ jobs:
|
||||
|
||||
check-codestyle-rust:
|
||||
needs: [ meta, check-permissions, build-build-tools-image ]
|
||||
# No need to run on `main` because we this in the merge queue. We do need to run this in `.*-rc-pr` because of hotfixes.
|
||||
if: ${{ contains(fromJSON('["pr", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
# No need to run on `main` because we this in the merge queue
|
||||
if: ${{ needs.meta.outputs.run-kind == 'pr' }}
|
||||
uses: ./.github/workflows/_check-codestyle-rust.yml
|
||||
with:
|
||||
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
@@ -192,8 +190,7 @@ jobs:
|
||||
|
||||
check-dependencies-rust:
|
||||
needs: [ meta, files-changed, build-build-tools-image ]
|
||||
# No need to run on `main` because we this in the merge queue. We do need to run this in `.*-rc-pr` because of hotfixes.
|
||||
if: ${{ needs.files-changed.outputs.check-rust-dependencies == 'true' && contains(fromJSON('["pr", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
if: ${{ needs.files-changed.outputs.check-rust-dependencies == 'true' && needs.meta.outputs.run-kind == 'pr' }}
|
||||
uses: ./.github/workflows/cargo-deny.yml
|
||||
with:
|
||||
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
@@ -201,8 +198,7 @@ jobs:
|
||||
|
||||
build-and-test-locally:
|
||||
needs: [ meta, build-build-tools-image ]
|
||||
# We do need to run this in `.*-rc-pr` because of hotfixes.
|
||||
if: ${{ contains(fromJSON('["pr", "push-main", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
if: ${{ contains(fromJSON('["pr", "push-main"]'), needs.meta.outputs.run-kind) }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -252,13 +248,8 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Cache poetry deps
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
uses: actions/cache@d4323d4df104b026a6aa633fdb11d772146be0bf # v4.2.2
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: ~/.cache/pypoetry/virtualenvs
|
||||
key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-bookworm-${{ hashFiles('poetry.lock') }}
|
||||
|
||||
@@ -549,7 +540,6 @@ jobs:
|
||||
uses: ./.github/workflows/trigger-e2e-tests.yml
|
||||
with:
|
||||
github-event-name: ${{ github.event_name }}
|
||||
github-event-json: ${{ toJSON(github.event) }}
|
||||
secrets: inherit
|
||||
|
||||
neon-image-arch:
|
||||
@@ -573,7 +563,6 @@ jobs:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: true
|
||||
ref: ${{ needs.meta.outputs.sha }}
|
||||
|
||||
- uses: neondatabase/dev-actions/set-docker-config-dir@6094485bf440001c94a94a3f9e221e81ff6b6193
|
||||
- uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
|
||||
@@ -683,7 +672,6 @@ jobs:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: true
|
||||
ref: ${{ needs.meta.outputs.sha }}
|
||||
|
||||
- uses: neondatabase/dev-actions/set-docker-config-dir@6094485bf440001c94a94a3f9e221e81ff6b6193
|
||||
- uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
|
||||
@@ -1568,10 +1556,10 @@ jobs:
|
||||
if: |
|
||||
contains(needs.*.result, 'failure')
|
||||
|| contains(needs.*.result, 'cancelled')
|
||||
|| (needs.check-dependencies-rust.result == 'skipped' && needs.files-changed.outputs.check-rust-dependencies == 'true' && contains(fromJSON('["pr", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind))
|
||||
|| (needs.build-and-test-locally.result == 'skipped' && contains(fromJSON('["pr", "push-main", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind))
|
||||
|| (needs.check-codestyle-python.result == 'skipped' && contains(fromJSON('["pr", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind))
|
||||
|| (needs.check-codestyle-rust.result == 'skipped' && contains(fromJSON('["pr", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind))
|
||||
|| (needs.check-dependencies-rust.result == 'skipped' && needs.files-changed.outputs.check-rust-dependencies == 'true' && needs.meta.outputs.run-kind == 'pr')
|
||||
|| (needs.build-and-test-locally.result == 'skipped' && needs.meta.outputs.run-kind == 'pr')
|
||||
|| (needs.check-codestyle-python.result == 'skipped' && needs.meta.outputs.run-kind == 'pr')
|
||||
|| (needs.check-codestyle-rust.result == 'skipped' && needs.meta.outputs.run-kind == 'pr')
|
||||
|| needs.files-changed.result == 'skipped'
|
||||
|| (needs.push-compute-image-dev.result == 'skipped' && contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind))
|
||||
|| (needs.push-neon-image-dev.result == 'skipped' && contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind))
|
||||
|
||||
@@ -55,7 +55,7 @@ jobs:
|
||||
echo tag=${tag} >> ${GITHUB_OUTPUT}
|
||||
|
||||
- name: Test extension upgrade
|
||||
timeout-minutes: 60
|
||||
timeout-minutes: 20
|
||||
env:
|
||||
NEW_COMPUTE_TAG: latest
|
||||
OLD_COMPUTE_TAG: ${{ steps.get-last-compute-release-tag.outputs.tag }}
|
||||
|
||||
@@ -23,7 +23,7 @@ jobs:
|
||||
egress-policy: audit
|
||||
|
||||
- name: Export Workflow Run for the past 2 hours
|
||||
uses: neondatabase/gh-workflow-stats-action@701b1f202666d0b82e67b4d387e909af2b920127 # v0.2.2
|
||||
uses: neondatabase/gh-workflow-stats-action@4c998b25ab5cc6588b52a610b749531f6a566b6b # v0.2.1
|
||||
with:
|
||||
db_uri: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
|
||||
db_table: "gh_workflow_stats_neon"
|
||||
@@ -43,7 +43,7 @@ jobs:
|
||||
egress-policy: audit
|
||||
|
||||
- name: Export Workflow Run for the past 48 hours
|
||||
uses: neondatabase/gh-workflow-stats-action@701b1f202666d0b82e67b4d387e909af2b920127 # v0.2.2
|
||||
uses: neondatabase/gh-workflow-stats-action@4c998b25ab5cc6588b52a610b749531f6a566b6b # v0.2.1
|
||||
with:
|
||||
db_uri: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
|
||||
db_table: "gh_workflow_stats_neon"
|
||||
@@ -63,7 +63,7 @@ jobs:
|
||||
egress-policy: audit
|
||||
|
||||
- name: Export Workflow Run for the past 30 days
|
||||
uses: neondatabase/gh-workflow-stats-action@701b1f202666d0b82e67b4d387e909af2b920127 # v0.2.2
|
||||
uses: neondatabase/gh-workflow-stats-action@4c998b25ab5cc6588b52a610b749531f6a566b6b # v0.2.1
|
||||
with:
|
||||
db_uri: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
|
||||
db_table: "gh_workflow_stats_neon"
|
||||
|
||||
4
.github/workflows/trigger-e2e-tests.yml
vendored
4
.github/workflows/trigger-e2e-tests.yml
vendored
@@ -9,9 +9,6 @@ on:
|
||||
github-event-name:
|
||||
type: string
|
||||
required: true
|
||||
github-event-json:
|
||||
type: string
|
||||
required: true
|
||||
|
||||
defaults:
|
||||
run:
|
||||
@@ -51,7 +48,6 @@ jobs:
|
||||
uses: ./.github/workflows/_meta.yml
|
||||
with:
|
||||
github-event-name: ${{ inputs.github-event-name || github.event_name }}
|
||||
github-event-json: ${{ inputs.github-event-json || toJSON(github.event) }}
|
||||
|
||||
trigger-e2e-tests:
|
||||
needs: [ meta ]
|
||||
|
||||
64
Cargo.lock
generated
64
Cargo.lock
generated
@@ -148,9 +148,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "arc-swap"
|
||||
version = "1.7.1"
|
||||
version = "1.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457"
|
||||
checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6"
|
||||
|
||||
[[package]]
|
||||
name = "archery"
|
||||
@@ -1167,6 +1167,18 @@ dependencies = [
|
||||
"half",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cicc"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
"glob",
|
||||
"serde",
|
||||
"toml",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clang-sys"
|
||||
version = "1.6.1"
|
||||
@@ -2498,9 +2510,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "glob"
|
||||
version = "0.3.1"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
||||
checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
|
||||
|
||||
[[package]]
|
||||
name = "governor"
|
||||
@@ -2809,7 +2821,6 @@ name = "http-utils"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"arc-swap",
|
||||
"bytes",
|
||||
"camino",
|
||||
"fail",
|
||||
@@ -2822,7 +2833,6 @@ dependencies = [
|
||||
"pprof",
|
||||
"regex",
|
||||
"routerify",
|
||||
"rustls 0.23.18",
|
||||
"rustls-pemfile 2.1.1",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -3861,10 +3871,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "num-bigint"
|
||||
version = "0.4.6"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
|
||||
checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"num-integer",
|
||||
"num-traits",
|
||||
]
|
||||
@@ -3913,10 +3924,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "num-integer"
|
||||
version = "0.1.46"
|
||||
version = "0.1.45"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
|
||||
checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
@@ -3945,9 +3957,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.19"
|
||||
version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
|
||||
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"libm",
|
||||
@@ -4329,7 +4341,6 @@ dependencies = [
|
||||
"strum",
|
||||
"strum_macros",
|
||||
"thiserror 1.0.69",
|
||||
"tracing-utils",
|
||||
"utils",
|
||||
]
|
||||
|
||||
@@ -4693,7 +4704,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres-protocol"
|
||||
version = "0.6.6"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#1f21e7959a96a34dcfbfce1b14b73286cdadffe9"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#394851e467755562b4173ff68f9eb0e7f737be13"
|
||||
dependencies = [
|
||||
"base64 0.22.1",
|
||||
"byteorder",
|
||||
@@ -4727,7 +4738,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres-types"
|
||||
version = "0.2.6"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#1f21e7959a96a34dcfbfce1b14b73286cdadffe9"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#394851e467755562b4173ff68f9eb0e7f737be13"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"chrono",
|
||||
@@ -5361,25 +5372,26 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "redis"
|
||||
version = "0.29.2"
|
||||
version = "0.25.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b110459d6e323b7cda23980c46c77157601199c9da6241552b284cd565a7a133"
|
||||
checksum = "71d64e978fd98a0e6b105d066ba4889a7301fca65aeac850a877d8797343feeb"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"async-trait",
|
||||
"bytes",
|
||||
"combine",
|
||||
"futures-util",
|
||||
"itoa",
|
||||
"num-bigint",
|
||||
"percent-encoding",
|
||||
"pin-project-lite",
|
||||
"rustls 0.23.18",
|
||||
"rustls-native-certs 0.8.0",
|
||||
"rustls 0.22.4",
|
||||
"rustls-native-certs 0.7.0",
|
||||
"rustls-pemfile 2.1.1",
|
||||
"rustls-pki-types",
|
||||
"ryu",
|
||||
"sha1_smol",
|
||||
"socket2",
|
||||
"tokio",
|
||||
"tokio-rustls 0.26.0",
|
||||
"tokio-rustls 0.25.0",
|
||||
"tokio-util",
|
||||
"url",
|
||||
]
|
||||
@@ -6605,7 +6617,6 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
"camino",
|
||||
"chrono",
|
||||
"clap",
|
||||
"clashmap",
|
||||
@@ -6649,7 +6660,6 @@ dependencies = [
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tokio-postgres-rustls",
|
||||
"tokio-rustls 0.26.0",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
"utils",
|
||||
@@ -7172,7 +7182,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "tokio-postgres"
|
||||
version = "0.7.10"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#1f21e7959a96a34dcfbfce1b14b73286cdadffe9"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#394851e467755562b4173ff68f9eb0e7f737be13"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"byteorder",
|
||||
@@ -7215,14 +7225,15 @@ dependencies = [
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
"futures-util",
|
||||
"log",
|
||||
"parking_lot 0.12.1",
|
||||
"phf",
|
||||
"pin-project-lite",
|
||||
"postgres-protocol2",
|
||||
"postgres-types2",
|
||||
"serde",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -7604,7 +7615,6 @@ dependencies = [
|
||||
"opentelemetry-otlp",
|
||||
"opentelemetry-semantic-conventions",
|
||||
"opentelemetry_sdk",
|
||||
"pin-project-lite",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"tracing-opentelemetry",
|
||||
|
||||
10
Cargo.toml
10
Cargo.toml
@@ -1,6 +1,7 @@
|
||||
[workspace]
|
||||
resolver = "2"
|
||||
members = [
|
||||
"cicc",
|
||||
"compute_tools",
|
||||
"control_plane",
|
||||
"control_plane/storcon_cli",
|
||||
@@ -50,7 +51,7 @@ license = "Apache-2.0"
|
||||
[workspace.dependencies]
|
||||
ahash = "0.8"
|
||||
anyhow = { version = "1.0", features = ["backtrace"] }
|
||||
arc-swap = "1.7"
|
||||
arc-swap = "1.6"
|
||||
async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] }
|
||||
atomic-take = "1.1.0"
|
||||
flate2 = "1.0.26"
|
||||
@@ -95,6 +96,7 @@ futures = "0.3"
|
||||
futures-core = "0.3"
|
||||
futures-util = "0.3"
|
||||
git-version = "0.3"
|
||||
glob = "0.3.2"
|
||||
governor = "0.8"
|
||||
hashbrown = "0.14"
|
||||
hashlink = "0.9.1"
|
||||
@@ -130,7 +132,7 @@ nix = { version = "0.27", features = ["dir", "fs", "process", "socket", "signal"
|
||||
# on compute startup metrics (start_postgres_ms), >= 25% degradation.
|
||||
notify = "6.0.0"
|
||||
num_cpus = "1.15"
|
||||
num-traits = "0.2.19"
|
||||
num-traits = "0.2.15"
|
||||
once_cell = "1.13"
|
||||
opentelemetry = "0.27"
|
||||
opentelemetry_sdk = "0.27"
|
||||
@@ -146,7 +148,7 @@ procfs = "0.16"
|
||||
prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
|
||||
prost = "0.13"
|
||||
rand = "0.8"
|
||||
redis = { version = "0.29.2", features = ["tokio-rustls-comp", "keep-alive"] }
|
||||
redis = { version = "0.25.2", features = ["tokio-rustls-comp", "keep-alive"] }
|
||||
regex = "1.10.2"
|
||||
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] }
|
||||
reqwest-tracing = { version = "0.5", features = ["opentelemetry_0_27"] }
|
||||
@@ -210,7 +212,7 @@ tracing-subscriber = { version = "0.3", default-features = false, features = ["s
|
||||
try-lock = "0.2.5"
|
||||
twox-hash = { version = "1.6.3", default-features = false }
|
||||
typed-json = "0.1"
|
||||
url = "2.2"
|
||||
url = { version = "2.2", features = ["serde"] }
|
||||
urlencoding = "2.1"
|
||||
uuid = { version = "1.6.1", features = ["v4", "v7", "serde"] }
|
||||
walkdir = "2.3.2"
|
||||
|
||||
@@ -292,7 +292,7 @@ WORKDIR /home/nonroot
|
||||
|
||||
# Rust
|
||||
# Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
|
||||
ENV RUSTC_VERSION=1.86.0
|
||||
ENV RUSTC_VERSION=1.85.0
|
||||
ENV RUSTUP_HOME="/home/nonroot/.rustup"
|
||||
ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
|
||||
ARG RUSTFILT_VERSION=0.2.1
|
||||
|
||||
12
cicc/Cargo.toml
Normal file
12
cicc/Cargo.toml
Normal file
@@ -0,0 +1,12 @@
|
||||
[package]
|
||||
name = "cicc"
|
||||
edition = "2024"
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
clap.workspace = true
|
||||
glob.workspace = true
|
||||
serde.workspace = true
|
||||
toml.workspace = true
|
||||
url.workspace = true
|
||||
22
cicc/extension.schema.json
Normal file
22
cicc/extension.schema.json
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"$id": "extension",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"version": {
|
||||
"type": "string"
|
||||
},
|
||||
"build-system": {
|
||||
"enum": [
|
||||
"pgxs"
|
||||
]
|
||||
},
|
||||
"tarball": {
|
||||
"type": "string"
|
||||
},
|
||||
"checksum": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
||||
145
cicc/src/main.rs
Normal file
145
cicc/src/main.rs
Normal file
@@ -0,0 +1,145 @@
|
||||
use std::{io::Write, path::PathBuf};
|
||||
|
||||
use anyhow::Result;
|
||||
use clap::Parser;
|
||||
use glob::glob;
|
||||
use serde::Deserialize;
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
#[command(rename_all = "kebab-case")]
|
||||
struct Cli {
|
||||
directory: PathBuf,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
enum BuildSystem {
|
||||
#[serde(rename = "pgxs")]
|
||||
PGXS,
|
||||
#[serde(rename = "cmake-ninja")]
|
||||
CmakeNinja,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
struct ExtensionManifest {
|
||||
pub name: String,
|
||||
pub version: String,
|
||||
pub build_system: BuildSystem,
|
||||
#[serde(default)]
|
||||
pub build_arguments: Vec<String>,
|
||||
pub tarball: String,
|
||||
pub checksum: String,
|
||||
pub trusted: bool,
|
||||
}
|
||||
|
||||
impl TryFrom<PathBuf> for ExtensionManifest {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
fn try_from(path: PathBuf) -> std::result::Result<Self, Self::Error> {
|
||||
let content = std::fs::read_to_string(path)?;
|
||||
|
||||
let manifest: ExtensionManifest = toml::from_str(&content)?;
|
||||
|
||||
Ok(manifest)
|
||||
}
|
||||
}
|
||||
|
||||
impl ExtensionManifest {
|
||||
pub fn compile(self) {
|
||||
let mut stdout = std::io::stdout().lock();
|
||||
|
||||
writeln!(
|
||||
&mut stdout,
|
||||
r"FROM build-deps AS {name}-src
|
||||
ARG PG_VERSION
|
||||
|
||||
WORKDIR /ext-src
|
||||
RUN wget '{tarball}' -O '{name}.tar.gz' && \
|
||||
echo '{checksum} {name}.tar.gz' | sha256sum --check && \
|
||||
mkdir '{name}-src' && cd '{name}-src' && tar xzf '../{name}.tar.gz' --strip-components=1 -C .
|
||||
|
||||
FROM pg-build AS {name}-build
|
||||
COPY --from={name}-src /ext-src/ /ext-src/
|
||||
WORKDIR /ext-src/{name}-src
|
||||
",
|
||||
name = self.name,
|
||||
tarball = self.tarball,
|
||||
checksum = self.checksum
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
match self.build_system {
|
||||
BuildSystem::PGXS => writeln!(
|
||||
&mut stdout,
|
||||
r#"RUN make -j "$(getconf _NPROCESSORS_ONLN)" install && \"#,
|
||||
)
|
||||
.unwrap(),
|
||||
BuildSystem::CmakeNinja => writeln!(
|
||||
&mut stdout,
|
||||
r"RUN cmake -G Ninja -B build -DCMAKE_BUILD_TYPE=Release {build_args}
|
||||
ninja -C install && \",
|
||||
build_args = self.build_arguments.join(" ")
|
||||
)
|
||||
.unwrap(),
|
||||
}
|
||||
|
||||
if self.trusted {
|
||||
writeln!(
|
||||
&mut stdout,
|
||||
" echo 'trusted = true' >> '/usr/local/pgsql/share/extension/{name}.control",
|
||||
name = self.name
|
||||
)
|
||||
.unwrap();
|
||||
} else {
|
||||
writeln!(&mut stdout, " true").unwrap();
|
||||
}
|
||||
|
||||
write!(&mut stdout, "\n").unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
struct DependencyManifest {}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let cli = Cli::parse();
|
||||
|
||||
if !cli.directory.exists() {
|
||||
eprintln!("Directory does not exist");
|
||||
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
for entry in glob(cli.directory.join("*.toml").to_str().unwrap()).unwrap() {
|
||||
let path = match entry {
|
||||
Ok(entry) => entry,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
let manifest: ExtensionManifest = match path.clone().try_into() {
|
||||
Ok(manifest) => manifest,
|
||||
Err(e) => {
|
||||
eprintln!("Failed to read {}: {}", path.display(), e);
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
manifest.compile();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use clap::CommandFactory;
|
||||
|
||||
use super::Cli;
|
||||
|
||||
#[test]
|
||||
fn verify_cli() {
|
||||
Cli::command().debug_assert()
|
||||
}
|
||||
}
|
||||
@@ -369,7 +369,7 @@ FROM build-deps AS plv8-src
|
||||
ARG PG_VERSION
|
||||
WORKDIR /ext-src
|
||||
|
||||
COPY compute/patches/plv8* .
|
||||
COPY compute/patches/plv8-3.1.10.patch .
|
||||
|
||||
# plv8 3.2.3 supports v17
|
||||
# last release v3.2.3 - Sep 7, 2024
|
||||
@@ -393,7 +393,7 @@ RUN case "${PG_VERSION:?}" in \
|
||||
git clone --recurse-submodules --depth 1 --branch ${PLV8_TAG} https://github.com/plv8/plv8.git plv8-src && \
|
||||
tar -czf plv8.tar.gz --exclude .git plv8-src && \
|
||||
cd plv8-src && \
|
||||
if [[ "${PG_VERSION:?}" < "v17" ]]; then patch -p1 < /ext-src/plv8_v3.1.10.patch; else patch -p1 < /ext-src/plv8_v3.2.3.patch; fi
|
||||
if [[ "${PG_VERSION:?}" < "v17" ]]; then patch -p1 < /ext-src/plv8-3.1.10.patch; fi
|
||||
|
||||
# Step 1: Build the vendored V8 engine. It doesn't depend on PostgreSQL, so use
|
||||
# 'build-deps' as the base. This enables caching and avoids unnecessary rebuilds.
|
||||
@@ -1055,6 +1055,34 @@ RUN if [ -d pg_embedding-src ]; then \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install; \
|
||||
fi
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pg_anon-build"
|
||||
# compile anon extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pg_anon-src
|
||||
ARG PG_VERSION
|
||||
|
||||
# This is an experimental extension, never got to real production.
|
||||
# !Do not remove! It can be present in shared_preload_libraries and compute will fail to start if library is not found.
|
||||
WORKDIR /ext-src
|
||||
RUN case "${PG_VERSION:?}" in "v17") \
|
||||
echo "postgresql_anonymizer does not yet support PG17" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \
|
||||
echo "321ea8d5c1648880aafde850a2c576e4a9e7b9933a34ce272efc839328999fa9 pg_anon.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_anon-src && cd pg_anon-src && tar xzf ../pg_anon.tar.gz --strip-components=1 -C .
|
||||
|
||||
FROM pg-build AS pg_anon-build
|
||||
COPY --from=pg_anon-src /ext-src/ /ext-src/
|
||||
WORKDIR /ext-src
|
||||
RUN if [ -d pg_anon-src ]; then \
|
||||
cd pg_anon-src && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/anon.control; \
|
||||
fi
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pg build with nonroot user and cargo installed"
|
||||
@@ -1338,8 +1366,8 @@ ARG PG_VERSION
|
||||
# Do not update without approve from proxy team
|
||||
# Make sure the version is reflected in proxy/src/serverless/local_conn_pool.rs
|
||||
WORKDIR /ext-src
|
||||
RUN wget https://github.com/neondatabase/pg_session_jwt/archive/refs/tags/v0.3.0.tar.gz -O pg_session_jwt.tar.gz && \
|
||||
echo "19be2dc0b3834d643706ed430af998bb4c2cdf24b3c45e7b102bb3a550e8660c pg_session_jwt.tar.gz" | sha256sum --check && \
|
||||
RUN wget https://github.com/neondatabase/pg_session_jwt/archive/refs/tags/v0.2.0.tar.gz -O pg_session_jwt.tar.gz && \
|
||||
echo "5ace028e591f2e000ca10afa5b1ca62203ebff014c2907c0ec3b29c36f28a1bb pg_session_jwt.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_session_jwt-src && cd pg_session_jwt-src && tar xzf ../pg_session_jwt.tar.gz --strip-components=1 -C . && \
|
||||
sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
sed -i 's/version = "0.12.6"/version = "0.12.9"/g' pgrx-tests/Cargo.toml && \
|
||||
@@ -1649,6 +1677,7 @@ COPY --from=pg_roaringbitmap-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg_semver-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg_embedding-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=wal2json-build /usr/local/pgsql /usr/local/pgsql
|
||||
COPY --from=pg_anon-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg_ivm-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg_partman-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg_mooncake-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
@@ -1887,30 +1916,26 @@ RUN apt update && \
|
||||
;; \
|
||||
esac && \
|
||||
apt install --no-install-recommends -y \
|
||||
ca-certificates \
|
||||
gdb \
|
||||
iproute2 \
|
||||
liblz4-1 \
|
||||
libreadline8 \
|
||||
libboost-iostreams1.74.0 \
|
||||
libboost-regex1.74.0 \
|
||||
libboost-serialization1.74.0 \
|
||||
libboost-system1.74.0 \
|
||||
libcurl4 \
|
||||
libevent-2.1-7 \
|
||||
libgeos-c1v5 \
|
||||
liblz4-1 \
|
||||
libossp-uuid16 \
|
||||
libgeos-c1v5 \
|
||||
libprotobuf-c1 \
|
||||
libreadline8 \
|
||||
libsfcgal1 \
|
||||
libxml2 \
|
||||
libxslt1.1 \
|
||||
libzstd1 \
|
||||
libcurl4 \
|
||||
libevent-2.1-7 \
|
||||
locales \
|
||||
lsof \
|
||||
procps \
|
||||
ca-certificates \
|
||||
rsyslog \
|
||||
screen \
|
||||
tcpdump \
|
||||
$VERSION_INSTALLS && \
|
||||
apt clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
|
||||
localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
|
||||
|
||||
@@ -33,7 +33,6 @@
|
||||
import 'sql_exporter/lfc_hits.libsonnet',
|
||||
import 'sql_exporter/lfc_misses.libsonnet',
|
||||
import 'sql_exporter/lfc_used.libsonnet',
|
||||
import 'sql_exporter/lfc_used_pages.libsonnet',
|
||||
import 'sql_exporter/lfc_writes.libsonnet',
|
||||
import 'sql_exporter/logical_slot_restart_lsn.libsonnet',
|
||||
import 'sql_exporter/max_cluster_size.libsonnet',
|
||||
|
||||
@@ -1,10 +0,0 @@
|
||||
{
|
||||
metric_name: 'lfc_used_pages',
|
||||
type: 'gauge',
|
||||
help: 'LFC pages used',
|
||||
key_labels: null,
|
||||
values: [
|
||||
'lfc_used_pages',
|
||||
],
|
||||
query: importstr 'sql_exporter/lfc_used_pages.sql',
|
||||
}
|
||||
@@ -1 +0,0 @@
|
||||
SELECT lfc_value AS lfc_used_pages FROM neon.neon_lfc_stats WHERE lfc_key = 'file_cache_used_pages';
|
||||
20
compute/extensions/timescaledb.yaml
Normal file
20
compute/extensions/timescaledb.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
name: ip4r
|
||||
|
||||
build-system: "pgxs"
|
||||
|
||||
trusted: true
|
||||
|
||||
pg14: &pg14
|
||||
version: "2.10.1"
|
||||
tarball: "https://github.com/timescale/timescaledb/archive/refs/tags/{version}.tar.gz"
|
||||
checksum: 6fca72a6ed0f6d32d2b3523951ede73dc5f9b0077b38450a029a5f411fdb8c73
|
||||
|
||||
pg15:
|
||||
<<: *pg14
|
||||
|
||||
pg16:
|
||||
<<: *pg14
|
||||
|
||||
|
||||
pg17:
|
||||
<<: *pg14
|
||||
0
compute/extensions/v14/dependencies/sfcgal.toml
Normal file
0
compute/extensions/v14/dependencies/sfcgal.toml
Normal file
9
compute/extensions/v14/ip4r.toml
Normal file
9
compute/extensions/v14/ip4r.toml
Normal file
@@ -0,0 +1,9 @@
|
||||
name = "ip4r"
|
||||
version = "2.4.2"
|
||||
|
||||
trusted = true
|
||||
|
||||
build-system = "pgxs"
|
||||
|
||||
tarball = "https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz"
|
||||
checksum = "0f7b1f159974f49a47842a8ab6751aecca1ed1142b6d5e38d81b064b2ead1b4b"
|
||||
0
compute/extensions/v15/dependencies/sfcgal.toml
Normal file
0
compute/extensions/v15/dependencies/sfcgal.toml
Normal file
9
compute/extensions/v15/ip4r.toml
Normal file
9
compute/extensions/v15/ip4r.toml
Normal file
@@ -0,0 +1,9 @@
|
||||
name = "ip4r"
|
||||
version = "2.4.2"
|
||||
|
||||
trusted = true
|
||||
|
||||
build-system = "pgxs"
|
||||
|
||||
tarball = "https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz"
|
||||
checksum = "0f7b1f159974f49a47842a8ab6751aecca1ed1142b6d5e38d81b064b2ead1b4b"
|
||||
0
compute/extensions/v16/dependencies/sfcgal.toml
Normal file
0
compute/extensions/v16/dependencies/sfcgal.toml
Normal file
9
compute/extensions/v16/ip4r.toml
Normal file
9
compute/extensions/v16/ip4r.toml
Normal file
@@ -0,0 +1,9 @@
|
||||
name = "ip4r"
|
||||
version = "2.4.2"
|
||||
|
||||
trusted = true
|
||||
|
||||
build-system = "pgxs"
|
||||
|
||||
tarball = "https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz"
|
||||
checksum = "0f7b1f159974f49a47842a8ab6751aecca1ed1142b6d5e38d81b064b2ead1b4b"
|
||||
0
compute/extensions/v17/dependencies/sfcgal.toml
Normal file
0
compute/extensions/v17/dependencies/sfcgal.toml
Normal file
9
compute/extensions/v17/ip4r.toml
Normal file
9
compute/extensions/v17/ip4r.toml
Normal file
@@ -0,0 +1,9 @@
|
||||
name = "ip4r"
|
||||
version = "2.4.2"
|
||||
|
||||
trusted = true
|
||||
|
||||
build-system = "pgxs"
|
||||
|
||||
tarball = "https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz"
|
||||
checksum = "0f7b1f159974f49a47842a8ab6751aecca1ed1142b6d5e38d81b064b2ead1b4b"
|
||||
14
compute/extensions/v17/timescaledb.toml
Normal file
14
compute/extensions/v17/timescaledb.toml
Normal file
@@ -0,0 +1,14 @@
|
||||
name = "timescaledb"
|
||||
version = "2.17.1"
|
||||
|
||||
trusted = true
|
||||
|
||||
build-system = "cmake-ninja"
|
||||
build-arguments = [
|
||||
"-DSEND_TELEMETRY_DEFAULT:BOOL=OFF",
|
||||
"-DUSE_TELEMETRY:BOOL=OFF",
|
||||
"-DAPACHE_ONLY:BOOL=ON",
|
||||
]
|
||||
|
||||
tarball = "https://github.com/timescale/timescaledb/archive/refs/tags/2.17.1.tar.gz"
|
||||
checksum = "6277cf43f5695e23dae1c5cfeba00474d730b66ed53665a84b787a6bb1a57e28"
|
||||
@@ -2,6 +2,23 @@ diff --git a/expected/ut-A.out b/expected/ut-A.out
|
||||
index da723b8..5328114 100644
|
||||
--- a/expected/ut-A.out
|
||||
+++ b/expected/ut-A.out
|
||||
@@ -9,13 +9,16 @@ SET search_path TO public;
|
||||
----
|
||||
-- No.A-1-1-3
|
||||
CREATE EXTENSION pg_hint_plan;
|
||||
+LOG: Sending request to compute_ctl: http://localhost:3081/extension_server/pg_hint_plan
|
||||
-- No.A-1-2-3
|
||||
DROP EXTENSION pg_hint_plan;
|
||||
-- No.A-1-1-4
|
||||
CREATE SCHEMA other_schema;
|
||||
CREATE EXTENSION pg_hint_plan SCHEMA other_schema;
|
||||
+LOG: Sending request to compute_ctl: http://localhost:3081/extension_server/pg_hint_plan
|
||||
ERROR: extension "pg_hint_plan" must be installed in schema "hint_plan"
|
||||
CREATE EXTENSION pg_hint_plan;
|
||||
+LOG: Sending request to compute_ctl: http://localhost:3081/extension_server/pg_hint_plan
|
||||
DROP SCHEMA other_schema;
|
||||
----
|
||||
---- No. A-5-1 comment pattern
|
||||
@@ -3175,6 +3178,7 @@ SELECT s.query, s.calls
|
||||
FROM public.pg_stat_statements s
|
||||
JOIN pg_catalog.pg_database d
|
||||
@@ -10,6 +27,18 @@ index da723b8..5328114 100644
|
||||
ORDER BY 1;
|
||||
query | calls
|
||||
--------------------------------------+-------
|
||||
diff --git a/expected/ut-fdw.out b/expected/ut-fdw.out
|
||||
index d372459..6282afe 100644
|
||||
--- a/expected/ut-fdw.out
|
||||
+++ b/expected/ut-fdw.out
|
||||
@@ -7,6 +7,7 @@ SET pg_hint_plan.debug_print TO on;
|
||||
SET client_min_messages TO LOG;
|
||||
SET pg_hint_plan.enable_hint TO on;
|
||||
CREATE EXTENSION file_fdw;
|
||||
+LOG: Sending request to compute_ctl: http://localhost:3081/extension_server/file_fdw
|
||||
CREATE SERVER file_server FOREIGN DATA WRAPPER file_fdw;
|
||||
CREATE USER MAPPING FOR PUBLIC SERVER file_server;
|
||||
CREATE FOREIGN TABLE ft1 (id int, val int) SERVER file_server OPTIONS (format 'csv', filename :'filename');
|
||||
diff --git a/sql/ut-A.sql b/sql/ut-A.sql
|
||||
index 7c7d58a..4fd1a07 100644
|
||||
--- a/sql/ut-A.sql
|
||||
|
||||
@@ -1,3 +1,24 @@
|
||||
diff --git a/expected/ut-A.out b/expected/ut-A.out
|
||||
index e7d68a1..65a056c 100644
|
||||
--- a/expected/ut-A.out
|
||||
+++ b/expected/ut-A.out
|
||||
@@ -9,13 +9,16 @@ SET search_path TO public;
|
||||
----
|
||||
-- No.A-1-1-3
|
||||
CREATE EXTENSION pg_hint_plan;
|
||||
+LOG: Sending request to compute_ctl: http://localhost:3081/extension_server/pg_hint_plan
|
||||
-- No.A-1-2-3
|
||||
DROP EXTENSION pg_hint_plan;
|
||||
-- No.A-1-1-4
|
||||
CREATE SCHEMA other_schema;
|
||||
CREATE EXTENSION pg_hint_plan SCHEMA other_schema;
|
||||
+LOG: Sending request to compute_ctl: http://localhost:3081/extension_server/pg_hint_plan
|
||||
ERROR: extension "pg_hint_plan" must be installed in schema "hint_plan"
|
||||
CREATE EXTENSION pg_hint_plan;
|
||||
+LOG: Sending request to compute_ctl: http://localhost:3081/extension_server/pg_hint_plan
|
||||
DROP SCHEMA other_schema;
|
||||
----
|
||||
---- No. A-5-1 comment pattern
|
||||
diff --git a/expected/ut-J.out b/expected/ut-J.out
|
||||
index 2fa3c70..314e929 100644
|
||||
--- a/expected/ut-J.out
|
||||
@@ -139,3 +160,15 @@ index a09bd34..0ad227c 100644
|
||||
error hint:
|
||||
|
||||
explain_filter
|
||||
diff --git a/expected/ut-fdw.out b/expected/ut-fdw.out
|
||||
index 017fa4b..98d989b 100644
|
||||
--- a/expected/ut-fdw.out
|
||||
+++ b/expected/ut-fdw.out
|
||||
@@ -7,6 +7,7 @@ SET pg_hint_plan.debug_print TO on;
|
||||
SET client_min_messages TO LOG;
|
||||
SET pg_hint_plan.enable_hint TO on;
|
||||
CREATE EXTENSION file_fdw;
|
||||
+LOG: Sending request to compute_ctl: http://localhost:3081/extension_server/file_fdw
|
||||
CREATE SERVER file_server FOREIGN DATA WRAPPER file_fdw;
|
||||
CREATE USER MAPPING FOR PUBLIC SERVER file_server;
|
||||
CREATE FOREIGN TABLE ft1 (id int, val int) SERVER file_server OPTIONS (format 'csv', filename :'filename');
|
||||
|
||||
@@ -1,6 +1,12 @@
|
||||
commit 46b38d3e46f9cd6c70d9b189dd6ff4abaa17cf5e
|
||||
Author: Alexander Bayandin <alexander@neon.tech>
|
||||
Date: Sat Nov 30 18:29:32 2024 +0000
|
||||
|
||||
Fix v8 9.7.37 compilation on Debian 12
|
||||
|
||||
diff --git a/patches/code/84cf3230a9680aac3b73c410c2b758760b6d3066.patch b/patches/code/84cf3230a9680aac3b73c410c2b758760b6d3066.patch
|
||||
new file mode 100644
|
||||
index 0000000..fae1cb3
|
||||
index 0000000..f0a5dc7
|
||||
--- /dev/null
|
||||
+++ b/patches/code/84cf3230a9680aac3b73c410c2b758760b6d3066.patch
|
||||
@@ -0,0 +1,30 @@
|
||||
@@ -29,21 +35,8 @@ index 0000000..fae1cb3
|
||||
+@@ -5,6 +5,7 @@
|
||||
+ #ifndef V8_HEAP_CPPGC_PREFINALIZER_HANDLER_H_
|
||||
+ #define V8_HEAP_CPPGC_PREFINALIZER_HANDLER_H_
|
||||
+
|
||||
+
|
||||
++#include <utility>
|
||||
+ #include <vector>
|
||||
+
|
||||
+
|
||||
+ #include "include/cppgc/prefinalizer.h"
|
||||
diff --git a/plv8.cc b/plv8.cc
|
||||
index c1ce883..6e47e94 100644
|
||||
--- a/plv8.cc
|
||||
+++ b/plv8.cc
|
||||
@@ -379,7 +379,7 @@ _PG_init(void)
|
||||
NULL,
|
||||
&plv8_v8_flags,
|
||||
NULL,
|
||||
- PGC_USERSET, 0,
|
||||
+ PGC_SUSET, 0,
|
||||
#if PG_VERSION_NUM >= 90100
|
||||
NULL,
|
||||
#endif
|
||||
@@ -1,13 +0,0 @@
|
||||
diff --git a/plv8.cc b/plv8.cc
|
||||
index edfa2aa..623e7f2 100644
|
||||
--- a/plv8.cc
|
||||
+++ b/plv8.cc
|
||||
@@ -385,7 +385,7 @@ _PG_init(void)
|
||||
NULL,
|
||||
&plv8_v8_flags,
|
||||
NULL,
|
||||
- PGC_USERSET, 0,
|
||||
+ PGC_SUSET, 0,
|
||||
#if PG_VERSION_NUM >= 90100
|
||||
NULL,
|
||||
#endif
|
||||
@@ -45,9 +45,7 @@ use anyhow::{Context, Result};
|
||||
use clap::Parser;
|
||||
use compute_api::responses::ComputeCtlConfig;
|
||||
use compute_api::spec::ComputeSpec;
|
||||
use compute_tools::compute::{
|
||||
BUILD_TAG, ComputeNode, ComputeNodeParams, forward_termination_signal,
|
||||
};
|
||||
use compute_tools::compute::{ComputeNode, ComputeNodeParams, forward_termination_signal};
|
||||
use compute_tools::extension_server::get_pg_version_string;
|
||||
use compute_tools::logger::*;
|
||||
use compute_tools::params::*;
|
||||
@@ -59,6 +57,10 @@ use tracing::{error, info};
|
||||
use url::Url;
|
||||
use utils::failpoint_support;
|
||||
|
||||
// this is an arbitrary build tag. Fine as a default / for testing purposes
|
||||
// in-case of not-set environment var
|
||||
const BUILD_TAG_DEFAULT: &str = "latest";
|
||||
|
||||
// Compatibility hack: if the control plane specified any remote-ext-config
|
||||
// use the default value for extension storage proxy gateway.
|
||||
// Remove this once the control plane is updated to pass the gateway URL
|
||||
@@ -145,7 +147,7 @@ fn main() -> Result<()> {
|
||||
.build()?;
|
||||
let _rt_guard = runtime.enter();
|
||||
|
||||
runtime.block_on(init())?;
|
||||
let build_tag = runtime.block_on(init())?;
|
||||
|
||||
// enable core dumping for all child processes
|
||||
setrlimit(Resource::CORE, rlimit::INFINITY, rlimit::INFINITY)?;
|
||||
@@ -172,6 +174,8 @@ fn main() -> Result<()> {
|
||||
cgroup: cli.cgroup,
|
||||
#[cfg(target_os = "linux")]
|
||||
vm_monitor_addr: cli.vm_monitor_addr,
|
||||
build_tag,
|
||||
|
||||
live_config_allowed: cli_spec.live_config_allowed,
|
||||
},
|
||||
cli_spec.spec,
|
||||
@@ -185,7 +189,7 @@ fn main() -> Result<()> {
|
||||
deinit_and_exit(exit_code);
|
||||
}
|
||||
|
||||
async fn init() -> Result<()> {
|
||||
async fn init() -> Result<String> {
|
||||
init_tracing_and_logging(DEFAULT_LOG_LEVEL).await?;
|
||||
|
||||
let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;
|
||||
@@ -195,9 +199,12 @@ async fn init() -> Result<()> {
|
||||
}
|
||||
});
|
||||
|
||||
info!("compute build_tag: {}", &BUILD_TAG.to_string());
|
||||
let build_tag = option_env!("BUILD_TAG")
|
||||
.unwrap_or(BUILD_TAG_DEFAULT)
|
||||
.to_string();
|
||||
info!("build_tag: {build_tag}");
|
||||
|
||||
Ok(())
|
||||
Ok(build_tag)
|
||||
}
|
||||
|
||||
fn try_spec_from_cli(cli: &Cli) -> Result<CliSpecParams> {
|
||||
|
||||
@@ -31,7 +31,6 @@ use camino::{Utf8Path, Utf8PathBuf};
|
||||
use clap::{Parser, Subcommand};
|
||||
use compute_tools::extension_server::{PostgresMajorVersion, get_pg_version};
|
||||
use nix::unistd::Pid;
|
||||
use std::ops::Not;
|
||||
use tracing::{Instrument, error, info, info_span, warn};
|
||||
use utils::fs_ext::is_directory_empty;
|
||||
|
||||
@@ -45,7 +44,7 @@ mod s3_uri;
|
||||
const PG_WAIT_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(600);
|
||||
const PG_WAIT_RETRY_INTERVAL: std::time::Duration = std::time::Duration::from_millis(300);
|
||||
|
||||
#[derive(Subcommand, Debug, Clone, serde::Serialize)]
|
||||
#[derive(Subcommand, Debug)]
|
||||
enum Command {
|
||||
/// Runs local postgres (neon binary), restores into it,
|
||||
/// uploads pgdata to s3 to be consumed by pageservers
|
||||
@@ -85,15 +84,6 @@ enum Command {
|
||||
},
|
||||
}
|
||||
|
||||
impl Command {
|
||||
fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Command::Pgdata { .. } => "pgdata",
|
||||
Command::DumpRestore { .. } => "dump-restore",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(clap::Parser)]
|
||||
struct Args {
|
||||
#[clap(long, env = "NEON_IMPORTER_WORKDIR")]
|
||||
@@ -447,7 +437,7 @@ async fn run_dump_restore(
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn cmd_pgdata(
|
||||
s3_client: Option<&aws_sdk_s3::Client>,
|
||||
s3_client: Option<aws_sdk_s3::Client>,
|
||||
kms_client: Option<aws_sdk_kms::Client>,
|
||||
maybe_s3_prefix: Option<s3_uri::S3Uri>,
|
||||
maybe_spec: Option<Spec>,
|
||||
@@ -516,14 +506,14 @@ async fn cmd_pgdata(
|
||||
if let Some(s3_prefix) = maybe_s3_prefix {
|
||||
info!("upload pgdata");
|
||||
aws_s3_sync::upload_dir_recursive(
|
||||
s3_client.unwrap(),
|
||||
s3_client.as_ref().unwrap(),
|
||||
Utf8Path::new(&pgdata_dir),
|
||||
&s3_prefix.append("/pgdata/"),
|
||||
)
|
||||
.await
|
||||
.context("sync dump directory to destination")?;
|
||||
|
||||
info!("write pgdata status to s3");
|
||||
info!("write status");
|
||||
{
|
||||
let status_dir = workdir.join("status");
|
||||
std::fs::create_dir(&status_dir).context("create status directory")?;
|
||||
@@ -560,15 +550,13 @@ async fn cmd_dumprestore(
|
||||
&key_id,
|
||||
spec.source_connstring_ciphertext_base64,
|
||||
)
|
||||
.await
|
||||
.context("decrypt source connection string")?;
|
||||
.await?;
|
||||
|
||||
let dest = if let Some(dest_ciphertext) =
|
||||
spec.destination_connstring_ciphertext_base64
|
||||
{
|
||||
decode_connstring(kms_client.as_ref().unwrap(), &key_id, dest_ciphertext)
|
||||
.await
|
||||
.context("decrypt destination connection string")?
|
||||
.await?
|
||||
} else {
|
||||
bail!(
|
||||
"destination connection string must be provided in spec for dump_restore command"
|
||||
@@ -613,18 +601,7 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
|
||||
// Initialize AWS clients only if s3_prefix is specified
|
||||
let (s3_client, kms_client) = if args.s3_prefix.is_some() {
|
||||
// Create AWS config with enhanced retry settings
|
||||
let config = aws_config::defaults(BehaviorVersion::v2024_03_28())
|
||||
.retry_config(
|
||||
aws_config::retry::RetryConfig::standard()
|
||||
.with_max_attempts(5) // Retry up to 5 times
|
||||
.with_initial_backoff(std::time::Duration::from_millis(200)) // Start with 200ms delay
|
||||
.with_max_backoff(std::time::Duration::from_secs(5)), // Cap at 5 seconds
|
||||
)
|
||||
.load()
|
||||
.await;
|
||||
|
||||
// Create clients from the config with enhanced retry settings
|
||||
let config = aws_config::load_defaults(BehaviorVersion::v2024_03_28()).await;
|
||||
let s3_client = aws_sdk_s3::Client::new(&config);
|
||||
let kms = aws_sdk_kms::Client::new(&config);
|
||||
(Some(s3_client), Some(kms))
|
||||
@@ -632,108 +609,79 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
(None, None)
|
||||
};
|
||||
|
||||
// Capture everything from spec assignment onwards to handle errors
|
||||
let res = async {
|
||||
let spec: Option<Spec> = if let Some(s3_prefix) = &args.s3_prefix {
|
||||
let spec_key = s3_prefix.append("/spec.json");
|
||||
let object = s3_client
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.get_object()
|
||||
.bucket(&spec_key.bucket)
|
||||
.key(spec_key.key)
|
||||
.send()
|
||||
.await
|
||||
.context("get spec from s3")?
|
||||
.body
|
||||
.collect()
|
||||
.await
|
||||
.context("download spec body")?;
|
||||
serde_json::from_slice(&object.into_bytes()).context("parse spec as json")?
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let spec: Option<Spec> = if let Some(s3_prefix) = &args.s3_prefix {
|
||||
let spec_key = s3_prefix.append("/spec.json");
|
||||
let object = s3_client
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.get_object()
|
||||
.bucket(&spec_key.bucket)
|
||||
.key(spec_key.key)
|
||||
.send()
|
||||
.await
|
||||
.context("get spec from s3")?
|
||||
.body
|
||||
.collect()
|
||||
.await
|
||||
.context("download spec body")?;
|
||||
serde_json::from_slice(&object.into_bytes()).context("parse spec as json")?
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
match tokio::fs::create_dir(&args.working_directory).await {
|
||||
Ok(()) => {}
|
||||
Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
|
||||
if !is_directory_empty(&args.working_directory)
|
||||
.await
|
||||
.context("check if working directory is empty")?
|
||||
{
|
||||
bail!("working directory is not empty");
|
||||
} else {
|
||||
// ok
|
||||
}
|
||||
match tokio::fs::create_dir(&args.working_directory).await {
|
||||
Ok(()) => {}
|
||||
Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
|
||||
if !is_directory_empty(&args.working_directory)
|
||||
.await
|
||||
.context("check if working directory is empty")?
|
||||
{
|
||||
bail!("working directory is not empty");
|
||||
} else {
|
||||
// ok
|
||||
}
|
||||
Err(e) => return Err(anyhow::Error::new(e).context("create working directory")),
|
||||
}
|
||||
Err(e) => return Err(anyhow::Error::new(e).context("create working directory")),
|
||||
}
|
||||
|
||||
match args.command.clone() {
|
||||
Command::Pgdata {
|
||||
match args.command {
|
||||
Command::Pgdata {
|
||||
source_connection_string,
|
||||
interactive,
|
||||
pg_port,
|
||||
num_cpus,
|
||||
memory_mb,
|
||||
} => {
|
||||
cmd_pgdata(
|
||||
s3_client,
|
||||
kms_client,
|
||||
args.s3_prefix,
|
||||
spec,
|
||||
source_connection_string,
|
||||
interactive,
|
||||
pg_port,
|
||||
args.working_directory,
|
||||
args.pg_bin_dir,
|
||||
args.pg_lib_dir,
|
||||
num_cpus,
|
||||
memory_mb,
|
||||
} => {
|
||||
cmd_pgdata(
|
||||
s3_client.as_ref(),
|
||||
kms_client,
|
||||
args.s3_prefix.clone(),
|
||||
spec,
|
||||
source_connection_string,
|
||||
interactive,
|
||||
pg_port,
|
||||
args.working_directory.clone(),
|
||||
args.pg_bin_dir,
|
||||
args.pg_lib_dir,
|
||||
num_cpus,
|
||||
memory_mb,
|
||||
)
|
||||
.await
|
||||
}
|
||||
Command::DumpRestore {
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Command::DumpRestore {
|
||||
source_connection_string,
|
||||
destination_connection_string,
|
||||
} => {
|
||||
cmd_dumprestore(
|
||||
kms_client,
|
||||
spec,
|
||||
source_connection_string,
|
||||
destination_connection_string,
|
||||
} => {
|
||||
cmd_dumprestore(
|
||||
kms_client,
|
||||
spec,
|
||||
source_connection_string,
|
||||
destination_connection_string,
|
||||
args.working_directory.clone(),
|
||||
args.pg_bin_dir,
|
||||
args.pg_lib_dir,
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
}
|
||||
.await;
|
||||
|
||||
if let Some(s3_prefix) = args.s3_prefix {
|
||||
info!("write job status to s3");
|
||||
{
|
||||
let status_dir = args.working_directory.join("status");
|
||||
if std::fs::exists(&status_dir)?.not() {
|
||||
std::fs::create_dir(&status_dir).context("create status directory")?;
|
||||
}
|
||||
let status_file = status_dir.join("fast_import");
|
||||
let res_obj = match res {
|
||||
Ok(_) => serde_json::json!({"command": args.command.as_str(), "done": true}),
|
||||
Err(err) => {
|
||||
serde_json::json!({"command": args.command.as_str(), "done": false, "error": err.to_string()})
|
||||
}
|
||||
};
|
||||
std::fs::write(&status_file, res_obj.to_string()).context("write status file")?;
|
||||
aws_s3_sync::upload_dir_recursive(
|
||||
s3_client.as_ref().unwrap(),
|
||||
&status_dir,
|
||||
&s3_prefix.append("/status/"),
|
||||
args.working_directory,
|
||||
args.pg_bin_dir,
|
||||
args.pg_lib_dir,
|
||||
)
|
||||
.await
|
||||
.context("sync status directory to destination")?;
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -20,7 +20,6 @@ use futures::future::join_all;
|
||||
use futures::stream::FuturesUnordered;
|
||||
use nix::sys::signal::{Signal, kill};
|
||||
use nix::unistd::Pid;
|
||||
use once_cell::sync::Lazy;
|
||||
use postgres;
|
||||
use postgres::NoTls;
|
||||
use postgres::error::SqlState;
|
||||
@@ -36,7 +35,6 @@ use crate::disk_quota::set_disk_quota;
|
||||
use crate::installed_extensions::get_installed_extensions;
|
||||
use crate::logger::startup_context_from_env;
|
||||
use crate::lsn_lease::launch_lsn_lease_bg_task_for_static;
|
||||
use crate::metrics::COMPUTE_CTL_UP;
|
||||
use crate::monitor::launch_monitor;
|
||||
use crate::pg_helpers::*;
|
||||
use crate::rsyslog::{
|
||||
@@ -51,17 +49,6 @@ use crate::{config, extension_server, local_proxy};
|
||||
|
||||
pub static SYNC_SAFEKEEPERS_PID: AtomicU32 = AtomicU32::new(0);
|
||||
pub static PG_PID: AtomicU32 = AtomicU32::new(0);
|
||||
// This is an arbitrary build tag. Fine as a default / for testing purposes
|
||||
// in-case of not-set environment var
|
||||
const BUILD_TAG_DEFAULT: &str = "latest";
|
||||
/// Build tag/version of the compute node binaries/image. It's tricky and ugly
|
||||
/// to pass it everywhere as a part of `ComputeNodeParams`, so we use a
|
||||
/// global static variable.
|
||||
pub static BUILD_TAG: Lazy<String> = Lazy::new(|| {
|
||||
option_env!("BUILD_TAG")
|
||||
.unwrap_or(BUILD_TAG_DEFAULT)
|
||||
.to_string()
|
||||
});
|
||||
|
||||
/// Static configuration params that don't change after startup. These mostly
|
||||
/// come from the CLI args, or are derived from them.
|
||||
@@ -85,6 +72,7 @@ pub struct ComputeNodeParams {
|
||||
pub pgdata: String,
|
||||
pub pgbin: String,
|
||||
pub pgversion: String,
|
||||
pub build_tag: String,
|
||||
|
||||
/// The port that the compute's external HTTP server listens on
|
||||
pub external_http_port: u16,
|
||||
@@ -185,11 +173,6 @@ impl ComputeState {
|
||||
info!("Changing compute status from {} to {}", prev, status);
|
||||
self.status = status;
|
||||
state_changed.notify_all();
|
||||
|
||||
COMPUTE_CTL_UP.reset();
|
||||
COMPUTE_CTL_UP
|
||||
.with_label_values(&[&BUILD_TAG, status.to_string().as_str()])
|
||||
.set(1);
|
||||
}
|
||||
|
||||
pub fn set_failed_status(&mut self, err: anyhow::Error, state_changed: &Condvar) {
|
||||
@@ -360,14 +343,6 @@ impl ComputeNode {
|
||||
this.prewarm_postgres()?;
|
||||
}
|
||||
|
||||
// Set the up metric with Empty status before starting the HTTP server.
|
||||
// That way on the first metric scrape, an external observer will see us
|
||||
// as 'up' and 'empty' (unless the compute was started with a spec or
|
||||
// already configured by control plane).
|
||||
COMPUTE_CTL_UP
|
||||
.with_label_values(&[&BUILD_TAG, ComputeStatus::Empty.to_string().as_str()])
|
||||
.set(1);
|
||||
|
||||
// Launch the external HTTP server first, so that we can serve control plane
|
||||
// requests while configuration is still in progress.
|
||||
crate::http::server::Server::External {
|
||||
@@ -903,14 +878,6 @@ impl ComputeNode {
|
||||
info!("Storage auth token not set");
|
||||
}
|
||||
|
||||
config.application_name("compute_ctl");
|
||||
if let Some(spec) = &compute_state.pspec {
|
||||
config.options(&format!(
|
||||
"-c neon.compute_mode={}",
|
||||
spec.spec.mode.to_type_str()
|
||||
));
|
||||
}
|
||||
|
||||
// Connect to pageserver
|
||||
let mut client = config.connect(NoTls)?;
|
||||
let pageserver_connect_micros = start_time.elapsed().as_micros() as u64;
|
||||
@@ -2057,8 +2024,12 @@ LIMIT 100",
|
||||
|
||||
let mut download_tasks = Vec::new();
|
||||
for library in &libs_vec {
|
||||
let (ext_name, ext_path) =
|
||||
remote_extensions.get_ext(library, true, &BUILD_TAG, &self.params.pgversion)?;
|
||||
let (ext_name, ext_path) = remote_extensions.get_ext(
|
||||
library,
|
||||
true,
|
||||
&self.params.build_tag,
|
||||
&self.params.pgversion,
|
||||
)?;
|
||||
download_tasks.push(self.download_extension(ext_name, ext_path));
|
||||
}
|
||||
let results = join_all(download_tasks).await;
|
||||
|
||||
@@ -117,7 +117,6 @@ pub fn write_postgres_conf(
|
||||
writeln!(file, "lc_numeric='C.UTF-8'")?;
|
||||
}
|
||||
|
||||
writeln!(file, "neon.compute_mode={}", spec.mode.to_type_str())?;
|
||||
match spec.mode {
|
||||
ComputeMode::Primary => {}
|
||||
ComputeMode::Static(lsn) => {
|
||||
|
||||
@@ -59,12 +59,9 @@ impl AsyncAuthorizeRequest<Body> for Authorize {
|
||||
Box::pin(async move {
|
||||
let request_id = request.extract_parts::<RequestId>().await.unwrap();
|
||||
|
||||
// TODO: Remove this stanza after teaching neon_local and the
|
||||
// regression tests to use a JWT + JWKS.
|
||||
//
|
||||
// https://github.com/neondatabase/neon/issues/11316
|
||||
if cfg!(feature = "testing") {
|
||||
warn!(%request_id, "Skipping compute_ctl authorization check");
|
||||
// TODO: Remove this check after a successful rollout
|
||||
if jwks.keys.is_empty() {
|
||||
warn!(%request_id, "Authorization has not been configured");
|
||||
|
||||
return Ok(request);
|
||||
}
|
||||
@@ -113,6 +110,8 @@ impl AsyncAuthorizeRequest<Body> for Authorize {
|
||||
impl Authorize {
|
||||
/// Verify the token using the JSON Web Key set and return the token data.
|
||||
fn verify(jwks: &JwkSet, token: &str, validation: &Validation) -> Result<TokenData<Claims>> {
|
||||
debug_assert!(!jwks.keys.is_empty());
|
||||
|
||||
for jwk in jwks.keys.iter() {
|
||||
let decoding_key = match DecodingKey::from_jwk(jwk) {
|
||||
Ok(key) => key,
|
||||
|
||||
@@ -5,7 +5,7 @@ use axum::response::{IntoResponse, Response};
|
||||
use http::StatusCode;
|
||||
use serde::Deserialize;
|
||||
|
||||
use crate::compute::{BUILD_TAG, ComputeNode};
|
||||
use crate::compute::ComputeNode;
|
||||
use crate::http::JsonResponse;
|
||||
use crate::http::extract::{Path, Query};
|
||||
|
||||
@@ -47,7 +47,7 @@ pub(in crate::http) async fn download_extension(
|
||||
remote_extensions.get_ext(
|
||||
&filename,
|
||||
ext_server_params.is_library,
|
||||
&BUILD_TAG,
|
||||
&compute.params.build_tag,
|
||||
&compute.params.pgversion,
|
||||
)
|
||||
};
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
use metrics::core::{AtomicF64, Collector, GenericGauge};
|
||||
use metrics::proto::MetricFamily;
|
||||
use metrics::{
|
||||
IntCounterVec, IntGaugeVec, UIntGaugeVec, register_gauge, register_int_counter_vec,
|
||||
register_int_gauge_vec, register_uint_gauge_vec,
|
||||
IntCounterVec, UIntGaugeVec, register_gauge, register_int_counter_vec, register_uint_gauge_vec,
|
||||
};
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
@@ -71,19 +70,8 @@ pub(crate) static AUDIT_LOG_DIR_SIZE: Lazy<GenericGauge<AtomicF64>> = Lazy::new(
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
// Report that `compute_ctl` is up and what's the current compute status.
|
||||
pub(crate) static COMPUTE_CTL_UP: Lazy<IntGaugeVec> = Lazy::new(|| {
|
||||
register_int_gauge_vec!(
|
||||
"compute_ctl_up",
|
||||
"Whether compute_ctl is running",
|
||||
&["build_tag", "status"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub fn collect() -> Vec<MetricFamily> {
|
||||
let mut metrics = COMPUTE_CTL_UP.collect();
|
||||
metrics.extend(INSTALLED_EXTENSIONS.collect());
|
||||
let mut metrics = INSTALLED_EXTENSIONS.collect();
|
||||
metrics.extend(CPLANE_REQUESTS_TOTAL.collect());
|
||||
metrics.extend(REMOTE_EXT_REQUESTS_TOTAL.collect());
|
||||
metrics.extend(DB_MIGRATION_FAILED.collect());
|
||||
|
||||
@@ -419,7 +419,7 @@ impl ComputeNode {
|
||||
.iter()
|
||||
.filter_map(|val| val.parse::<usize>().ok())
|
||||
.map(|val| if val > 1 { val - 1 } else { 1 })
|
||||
.next_back()
|
||||
.last()
|
||||
.unwrap_or(3)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -165,11 +165,8 @@ pub struct NeonStorageControllerConf {
|
||||
/// Database url used when running multiple storage controller instances
|
||||
pub database_url: Option<SocketAddr>,
|
||||
|
||||
/// Thresholds for auto-splitting a tenant into shards.
|
||||
/// Threshold for auto-splitting a tenant into shards
|
||||
pub split_threshold: Option<u64>,
|
||||
pub max_split_shards: Option<u8>,
|
||||
pub initial_split_threshold: Option<u64>,
|
||||
pub initial_split_shards: Option<u8>,
|
||||
|
||||
pub max_secondary_lag_bytes: Option<u64>,
|
||||
|
||||
@@ -184,8 +181,6 @@ pub struct NeonStorageControllerConf {
|
||||
pub timelines_onto_safekeepers: bool,
|
||||
|
||||
pub use_https_safekeeper_api: bool,
|
||||
|
||||
pub use_local_compute_notifications: bool,
|
||||
}
|
||||
|
||||
impl NeonStorageControllerConf {
|
||||
@@ -206,16 +201,12 @@ impl Default for NeonStorageControllerConf {
|
||||
start_as_candidate: false,
|
||||
database_url: None,
|
||||
split_threshold: None,
|
||||
max_split_shards: None,
|
||||
initial_split_threshold: None,
|
||||
initial_split_shards: None,
|
||||
max_secondary_lag_bytes: None,
|
||||
heartbeat_interval: Self::DEFAULT_HEARTBEAT_INTERVAL,
|
||||
long_reconcile_threshold: None,
|
||||
use_https_pageserver_api: false,
|
||||
timelines_onto_safekeepers: false,
|
||||
use_https_safekeeper_api: false,
|
||||
use_local_compute_notifications: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -51,19 +51,11 @@ impl PageServerNode {
|
||||
parse_host_port(&conf.listen_pg_addr).expect("Unable to parse listen_pg_addr");
|
||||
let port = port.unwrap_or(5432);
|
||||
|
||||
let ssl_ca_certs = env.ssl_ca_cert_path().map(|ssl_ca_file| {
|
||||
let ssl_ca_cert = env.ssl_ca_cert_path().map(|ssl_ca_file| {
|
||||
let buf = std::fs::read(ssl_ca_file).expect("SSL root CA file should exist");
|
||||
Certificate::from_pem_bundle(&buf).expect("SSL CA file should be valid")
|
||||
Certificate::from_pem(&buf).expect("CA certificate should be valid")
|
||||
});
|
||||
|
||||
let mut http_client = reqwest::Client::builder();
|
||||
for ssl_ca_cert in ssl_ca_certs.unwrap_or_default() {
|
||||
http_client = http_client.add_root_certificate(ssl_ca_cert);
|
||||
}
|
||||
let http_client = http_client
|
||||
.build()
|
||||
.expect("Client constructs with no errors");
|
||||
|
||||
let endpoint = if env.storage_controller.use_https_pageserver_api {
|
||||
format!(
|
||||
"https://{}",
|
||||
@@ -80,7 +72,6 @@ impl PageServerNode {
|
||||
conf: conf.clone(),
|
||||
env: env.clone(),
|
||||
http_client: mgmt_api::Client::new(
|
||||
http_client,
|
||||
endpoint,
|
||||
{
|
||||
match conf.http_auth_type {
|
||||
@@ -92,7 +83,9 @@ impl PageServerNode {
|
||||
}
|
||||
}
|
||||
.as_deref(),
|
||||
),
|
||||
ssl_ca_cert,
|
||||
)
|
||||
.expect("Client constructs with no errors"),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -149,10 +142,6 @@ impl PageServerNode {
|
||||
overrides.push("auth_validation_public_key_path='../auth_public_key.pem'".to_owned());
|
||||
}
|
||||
|
||||
if let Some(ssl_ca_file) = self.env.ssl_ca_cert_path() {
|
||||
overrides.push(format!("ssl_ca_file='{}'", ssl_ca_file.to_str().unwrap()));
|
||||
}
|
||||
|
||||
// Apply the user-provided overrides
|
||||
overrides.push({
|
||||
let mut doc =
|
||||
@@ -428,6 +417,11 @@ impl PageServerNode {
|
||||
.map(|x| x.parse::<usize>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'l0_flush_delay_threshold' as an integer")?,
|
||||
l0_flush_wait_upload: settings
|
||||
.remove("l0_flush_wait_upload")
|
||||
.map(|x| x.parse::<bool>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'l0_flush_wait_upload' as a boolean")?,
|
||||
l0_flush_stall_threshold: settings
|
||||
.remove("l0_flush_stall_threshold")
|
||||
.map(|x| x.parse::<usize>())
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use std::ffi::OsStr;
|
||||
use std::fs;
|
||||
use std::net::SocketAddr;
|
||||
use std::path::PathBuf;
|
||||
use std::process::ExitStatus;
|
||||
use std::str::FromStr;
|
||||
@@ -17,7 +18,7 @@ use pageserver_api::models::{TenantConfigRequest, TimelineCreateRequest, Timelin
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_client::mgmt_api::ResponseErrorMessageExt;
|
||||
use postgres_backend::AuthType;
|
||||
use reqwest::{Certificate, Method};
|
||||
use reqwest::Method;
|
||||
use serde::de::DeserializeOwned;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::process::Command;
|
||||
@@ -37,9 +38,9 @@ pub struct StorageController {
|
||||
client: reqwest::Client,
|
||||
config: NeonStorageControllerConf,
|
||||
|
||||
// The listen port is learned when starting the storage controller,
|
||||
// The listen addresses is learned when starting the storage controller,
|
||||
// hence the use of OnceLock to init it at the right time.
|
||||
listen_port: OnceLock<u16>,
|
||||
listen: OnceLock<SocketAddr>,
|
||||
}
|
||||
|
||||
const COMMAND: &str = "storage_controller";
|
||||
@@ -143,26 +144,15 @@ impl StorageController {
|
||||
}
|
||||
};
|
||||
|
||||
let ssl_ca_certs = env.ssl_ca_cert_path().map(|ssl_ca_file| {
|
||||
let buf = std::fs::read(ssl_ca_file).expect("SSL CA file should exist");
|
||||
Certificate::from_pem_bundle(&buf).expect("SSL CA file should be valid")
|
||||
});
|
||||
|
||||
let mut http_client = reqwest::Client::builder();
|
||||
for ssl_ca_cert in ssl_ca_certs.unwrap_or_default() {
|
||||
http_client = http_client.add_root_certificate(ssl_ca_cert);
|
||||
}
|
||||
let http_client = http_client
|
||||
.build()
|
||||
.expect("HTTP client should construct with no error");
|
||||
|
||||
Self {
|
||||
env: env.clone(),
|
||||
private_key,
|
||||
public_key,
|
||||
client: http_client,
|
||||
client: reqwest::ClientBuilder::new()
|
||||
.build()
|
||||
.expect("Failed to construct http client"),
|
||||
config: env.storage_controller.clone(),
|
||||
listen_port: OnceLock::default(),
|
||||
listen: OnceLock::default(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -347,34 +337,34 @@ impl StorageController {
|
||||
}
|
||||
}
|
||||
|
||||
if self.env.generate_local_ssl_certs {
|
||||
self.env.generate_ssl_cert(
|
||||
&instance_dir.join("server.crt"),
|
||||
&instance_dir.join("server.key"),
|
||||
)?;
|
||||
}
|
||||
let (listen, postgres_port) = {
|
||||
if let Some(base_port) = start_args.base_port {
|
||||
(
|
||||
format!("127.0.0.1:{base_port}"),
|
||||
self.config
|
||||
.database_url
|
||||
.expect("--base-port requires NeonStorageControllerConf::database_url")
|
||||
.port(),
|
||||
)
|
||||
} else {
|
||||
let listen_url = self.env.control_plane_api.clone();
|
||||
|
||||
let listen_url = &self.env.control_plane_api;
|
||||
let listen = format!(
|
||||
"{}:{}",
|
||||
listen_url.host_str().unwrap(),
|
||||
listen_url.port().unwrap()
|
||||
);
|
||||
|
||||
let scheme = listen_url.scheme();
|
||||
let host = listen_url.host_str().unwrap();
|
||||
|
||||
let (listen_port, postgres_port) = if let Some(base_port) = start_args.base_port {
|
||||
(
|
||||
base_port,
|
||||
self.config
|
||||
.database_url
|
||||
.expect("--base-port requires NeonStorageControllerConf::database_url")
|
||||
.port(),
|
||||
)
|
||||
} else {
|
||||
let port = listen_url.port().unwrap();
|
||||
(port, port + 1)
|
||||
(listen, listen_url.port().unwrap() + 1)
|
||||
}
|
||||
};
|
||||
|
||||
self.listen_port
|
||||
.set(listen_port)
|
||||
.expect("StorageController::listen_port is only set here");
|
||||
let socket_addr = listen
|
||||
.parse()
|
||||
.expect("listen address is a valid socket address");
|
||||
self.listen
|
||||
.set(socket_addr)
|
||||
.expect("StorageController::listen is only set here");
|
||||
|
||||
// Do we remove the pid file on stop?
|
||||
let pg_started = self.is_postgres_running().await?;
|
||||
@@ -510,15 +500,20 @@ impl StorageController {
|
||||
drop(client);
|
||||
conn.await??;
|
||||
|
||||
let addr = format!("{}:{}", host, listen_port);
|
||||
let listen = self
|
||||
.listen
|
||||
.get()
|
||||
.expect("cell is set earlier in this function");
|
||||
let address_for_peers = Uri::builder()
|
||||
.scheme(scheme)
|
||||
.authority(addr.clone())
|
||||
.scheme("http")
|
||||
.authority(format!("{}:{}", listen.ip(), listen.port()))
|
||||
.path_and_query("")
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let mut args = vec![
|
||||
"-l",
|
||||
&listen.to_string(),
|
||||
"--dev",
|
||||
"--database-url",
|
||||
&database_url,
|
||||
@@ -535,14 +530,6 @@ impl StorageController {
|
||||
.map(|s| s.to_string())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
match scheme {
|
||||
"http" => args.extend(["--listen".to_string(), addr]),
|
||||
"https" => args.extend(["--listen-https".to_string(), addr]),
|
||||
_ => {
|
||||
panic!("Unexpected url scheme in control_plane_api: {scheme}");
|
||||
}
|
||||
}
|
||||
|
||||
if self.config.start_as_candidate {
|
||||
args.push("--start-as-candidate".to_string());
|
||||
}
|
||||
@@ -555,10 +542,6 @@ impl StorageController {
|
||||
args.push("--use-https-safekeeper-api".to_string());
|
||||
}
|
||||
|
||||
if self.config.use_local_compute_notifications {
|
||||
args.push("--use-local-compute-notifications".to_string());
|
||||
}
|
||||
|
||||
if let Some(ssl_ca_file) = self.env.ssl_ca_cert_path() {
|
||||
args.push(format!("--ssl-ca-file={}", ssl_ca_file.to_str().unwrap()));
|
||||
}
|
||||
@@ -587,20 +570,6 @@ impl StorageController {
|
||||
args.push(format!("--split-threshold={split_threshold}"))
|
||||
}
|
||||
|
||||
if let Some(max_split_shards) = self.config.max_split_shards.as_ref() {
|
||||
args.push(format!("--max-split-shards={max_split_shards}"))
|
||||
}
|
||||
|
||||
if let Some(initial_split_threshold) = self.config.initial_split_threshold.as_ref() {
|
||||
args.push(format!(
|
||||
"--initial-split-threshold={initial_split_threshold}"
|
||||
))
|
||||
}
|
||||
|
||||
if let Some(initial_split_shards) = self.config.initial_split_shards.as_ref() {
|
||||
args.push(format!("--initial-split-shards={initial_split_shards}"))
|
||||
}
|
||||
|
||||
if let Some(lag) = self.config.max_secondary_lag_bytes.as_ref() {
|
||||
args.push(format!("--max-secondary-lag-bytes={lag}"))
|
||||
}
|
||||
@@ -621,8 +590,6 @@ impl StorageController {
|
||||
args.push("--timelines-onto-safekeepers".to_string());
|
||||
}
|
||||
|
||||
println!("Starting storage controller");
|
||||
|
||||
background_process::start_process(
|
||||
COMMAND,
|
||||
&instance_dir,
|
||||
@@ -749,26 +716,30 @@ impl StorageController {
|
||||
{
|
||||
// In the special case of the `storage_controller start` subcommand, we wish
|
||||
// to use the API endpoint of the newly started storage controller in order
|
||||
// to pass the readiness check. In this scenario [`Self::listen_port`] will
|
||||
// be set (see [`Self::start`]).
|
||||
// to pass the readiness check. In this scenario [`Self::listen`] will be set
|
||||
// (see [`Self::start`]).
|
||||
//
|
||||
// Otherwise, we infer the storage controller api endpoint from the configured
|
||||
// control plane API.
|
||||
let port = if let Some(port) = self.listen_port.get() {
|
||||
*port
|
||||
let url = if let Some(socket_addr) = self.listen.get() {
|
||||
Url::from_str(&format!(
|
||||
"http://{}:{}/{path}",
|
||||
socket_addr.ip().to_canonical(),
|
||||
socket_addr.port()
|
||||
))
|
||||
.unwrap()
|
||||
} else {
|
||||
self.env.control_plane_api.port().unwrap()
|
||||
// The configured URL has the /upcall path prefix for pageservers to use: we will strip that out
|
||||
// for general purpose API access.
|
||||
let listen_url = self.env.control_plane_api.clone();
|
||||
Url::from_str(&format!(
|
||||
"http://{}:{}/{path}",
|
||||
listen_url.host_str().unwrap(),
|
||||
listen_url.port().unwrap()
|
||||
))
|
||||
.unwrap()
|
||||
};
|
||||
|
||||
// The configured URL has the /upcall path prefix for pageservers to use: we will strip that out
|
||||
// for general purpose API access.
|
||||
let url = Url::from_str(&format!(
|
||||
"{}://{}:{port}/{path}",
|
||||
self.env.control_plane_api.scheme(),
|
||||
self.env.control_plane_api.host_str().unwrap(),
|
||||
))
|
||||
.unwrap();
|
||||
|
||||
let mut builder = self.client.request(method, url);
|
||||
if let Some(body) = body {
|
||||
builder = builder.json(&body)
|
||||
|
||||
@@ -20,7 +20,7 @@ use pageserver_api::models::{
|
||||
};
|
||||
use pageserver_api::shard::{ShardStripeSize, TenantShardId};
|
||||
use pageserver_client::mgmt_api::{self};
|
||||
use reqwest::{Certificate, Method, StatusCode, Url};
|
||||
use reqwest::{Method, StatusCode, Url};
|
||||
use storage_controller_client::control_api::Client;
|
||||
use utils::id::{NodeId, TenantId, TimelineId};
|
||||
|
||||
@@ -274,7 +274,7 @@ struct Cli {
|
||||
jwt: Option<String>,
|
||||
|
||||
#[arg(long)]
|
||||
/// Trusted root CA certificates to use in https APIs.
|
||||
/// Trusted root CA certificate to use in https APIs.
|
||||
ssl_ca_file: Option<PathBuf>,
|
||||
|
||||
#[command(subcommand)]
|
||||
@@ -385,25 +385,19 @@ where
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let cli = Cli::parse();
|
||||
|
||||
let ssl_ca_certs = match &cli.ssl_ca_file {
|
||||
let storcon_client = Client::new(cli.api.clone(), cli.jwt.clone());
|
||||
|
||||
let ssl_ca_cert = match &cli.ssl_ca_file {
|
||||
Some(ssl_ca_file) => {
|
||||
let buf = tokio::fs::read(ssl_ca_file).await?;
|
||||
Certificate::from_pem_bundle(&buf)?
|
||||
Some(reqwest::Certificate::from_pem(&buf)?)
|
||||
}
|
||||
None => Vec::new(),
|
||||
None => None,
|
||||
};
|
||||
|
||||
let mut http_client = reqwest::Client::builder();
|
||||
for ssl_ca_cert in ssl_ca_certs {
|
||||
http_client = http_client.add_root_certificate(ssl_ca_cert);
|
||||
}
|
||||
let http_client = http_client.build()?;
|
||||
|
||||
let storcon_client = Client::new(http_client.clone(), cli.api.clone(), cli.jwt.clone());
|
||||
|
||||
let mut trimmed = cli.api.to_string();
|
||||
trimmed.pop();
|
||||
let vps_client = mgmt_api::Client::new(http_client.clone(), trimmed, cli.jwt.as_deref());
|
||||
let vps_client = mgmt_api::Client::new(trimmed, cli.jwt.as_deref(), ssl_ca_cert)?;
|
||||
|
||||
match cli.command {
|
||||
Command::NodeRegister {
|
||||
@@ -1056,7 +1050,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
const DEFAULT_MIGRATE_CONCURRENCY: usize = 8;
|
||||
let mut stream = futures::stream::iter(moves)
|
||||
.map(|mv| {
|
||||
let client = Client::new(http_client.clone(), cli.api.clone(), cli.jwt.clone());
|
||||
let client = Client::new(cli.api.clone(), cli.jwt.clone());
|
||||
async move {
|
||||
client
|
||||
.dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(
|
||||
|
||||
@@ -21,7 +21,6 @@ in this repository.
|
||||
- [WAL Redo](./pageserver-walredo.md)
|
||||
- [Page cache](./pageserver-pagecache.md)
|
||||
- [Storage](./pageserver-storage.md)
|
||||
- [Compaction](./pageserver-compaction.md)
|
||||
- [Processing a GetPage request](./pageserver-processing-getpage.md)
|
||||
- [Processing WAL](./pageserver-processing-wal.md)
|
||||
|
||||
|
||||
@@ -1,110 +0,0 @@
|
||||
# Pageserver Compaction
|
||||
|
||||
Lifted from <https://www.notion.so/neondatabase/Rough-Notes-on-Compaction-1baf189e004780859e65ef63b85cfa81?pvs=4>.
|
||||
|
||||
Updated 2025-03-26.
|
||||
|
||||
## Pages and WAL
|
||||
|
||||
Postgres stores data in 8 KB pages, identified by a page number.
|
||||
|
||||
The WAL contains a sequence of page writes: either images (complete page contents) or deltas (patches applied to images). Each write is identified by its byte position in the WAL, aka LSN.
|
||||
|
||||
Each page version is thus identified by page@LSN. Postgres may read pages at past LSNs.
|
||||
|
||||
Pageservers ingest WAL by writing WAL records into a key/value store keyed by page@LSN.
|
||||
|
||||
Pageservers materialize pages for Postgres reads by finding the most recent page image and applying all subsequent page deltas, up to the read LSN.
|
||||
|
||||
## Compaction: Why?
|
||||
|
||||
Pageservers store page@LSN keys in a key/value store using a custom variant of an LSM tree. Each timeline on each tenant shard has its own LSM tree.
|
||||
|
||||
When Pageservers write new page@LSN entries, they are appended unordered to an ephemeral layer file. When the ephemeral layer file exceeds `checkpoint_distance` (default 256 MB), the key/value pairs are sorted by key and written out to a layer file (for efficient lookups).
|
||||
|
||||
As WAL writes continue, more layer files accumulate.
|
||||
|
||||
Reads must search through the layer files to find the page’s image and deltas. The more layer files accumulate, the more la yer files reads must search through before they find a page image, aka read amplification.
|
||||
|
||||
Compaction’s job is to:
|
||||
|
||||
- Reduce read amplification by reorganizing and combining layer files.
|
||||
- Remove old garbage from layer files.
|
||||
|
||||
As part of this, it may combine several page deltas into a single page image where possible.
|
||||
|
||||
## Compaction: How?
|
||||
|
||||
Neon uses a non-standard variant of an LSM tree made up of two levels of layer files: L0 and L1.
|
||||
|
||||
Compaction runs in two phases: L0→L1 compaction, and L1 image compaction.
|
||||
|
||||
L0 contains a stack of L0 layers at decreasing LSN ranges. These have been flushed sequentially from ephemeral layers. Each L0 layer covers the entire page space (page 0 to ~infinity) and the LSN range that was ingested into it. L0 layers are therefore particularly bad for read amp, since every read must search all L0 layers below the read LSN. For example:
|
||||
|
||||
```
|
||||
| Page 0-99 @ LSN 0400-04ff |
|
||||
| Page 0-99 @ LSN 0300-03ff |
|
||||
| Page 0-99 @ LSN 0200-02ff |
|
||||
| Page 0-99 @ LSN 0100-01ff |
|
||||
| Page 0-99 @ LSN 0000-00ff |
|
||||
```
|
||||
|
||||
L0→L1 compaction takes the bottom-most chunk of L0 layer files of between `compaction_threshold` (default 10) and `compaction_upper_limit` (default 20) layers. It uses merge-sort to write out sorted L1 delta layers of size `compaction_target_size` (default 128 MB).
|
||||
|
||||
L1 typically consists of a “bed” of image layers with materialized page images at a specific LSN, and then delta layers of various page/LSN ranges above them with page deltas. For example:
|
||||
|
||||
```
|
||||
Delta layers: | 30-84@0310-04ff |
|
||||
Delta layers: | 10-42@0200-02ff | | 65-92@0174-02aa |
|
||||
Image layers: | 0-39@0100 | 40-79@0100 | 80-99@0100 |
|
||||
```
|
||||
|
||||
L1 image compaction scans across the L1 keyspace at some LSN, materializes page images by reading the image and delta layers below the LSN (via vectored reads), and writes out new sorted image layers of roughly size `compaction_target_size` (default 128 MB) at that LSN.
|
||||
|
||||
Layer files below the new image files’ LSN can be garbage collected when they are no longer needed for PITR.
|
||||
|
||||
Even though the old layer files are not immediately garbage collected, the new image layers help with read amp because reads can stop traversing the layer stack as soon as they encounter a page image.
|
||||
|
||||
## Compaction: When?
|
||||
|
||||
Pageservers run a `compaction_loop` background task for each tenant shard. Every `compaction_period` (default 20 seconds) it will wake up and check if any of the shard’s timelines need compaction. Additionally, L0 layer flushes will eagerly wake the compaction loop if the L0 count exceeds `compaction_threshold` (default 10).
|
||||
|
||||
L0 compaction runs if the number of L0 layers exceeds `compaction_threshold` (default 10).
|
||||
|
||||
L1 image compaction runs across sections of the L1 keyspace that have at least `image_creation_threshold` (default 3) delta layers overlapping image layers.
|
||||
|
||||
At most `CONCURRENT_BACKGROUND_TASKS` (default 3 / 4 * CPUs = 6) background tasks can run concurrently on a Pageserver, including compaction. Further compaction tasks must wait.
|
||||
|
||||
Because L0 layers cause the most read amp (they overlap the entire keyspace and only contain page deltas), they are aggressively compacted down:
|
||||
|
||||
- L0 is compacted down across all tenant timelines before L1 compaction is attempted (`compaction_l0_first`).
|
||||
- L0 compaction uses a separate concurrency limit of `CONCURRENT_L0_COMPACTION_TASKS` (default 3 / 4 * CPUs = 6) to avoid waiting for other tasks (`compaction_l0_semaphore`).
|
||||
- If L0 compaction is needed on any tenant timeline, L1 image compaction will yield to start an immediate L0 compaction run (except for compaction run via admin APIs).
|
||||
|
||||
## Backpressure
|
||||
|
||||
With sustained heavy write loads, new L0 layers may be flushed faster than they can be compacted down. This can cause an unbounded buildup of read amplification and compaction debt, which can take hours to resolve even after the writes stop.
|
||||
|
||||
To avoid this and allow compaction to keep up, layer flushes will slow writes down to apply backpressure on the workload:
|
||||
|
||||
- At `l0_flush_delay_threshold` (default 30) L0 layers, layer flushes are delayed by the flush duration, such that they take 2x as long.
|
||||
- At `l0_flush_stall_threshold` (default disabled) L0 layers, layer flushes stall entirely until the L0 count falls back below the threshold. This is currently disabled because we don’t trust L0 compaction to be responsive enough.
|
||||
|
||||
This backpressure is propagated to the compute by waiting for layer flushes when WAL ingestion rolls the ephemeral layer. The compute will significantly slow down WAL writes at:
|
||||
|
||||
- `max_replication_write_lag` (default 500 MB), when Pageserver WAL ingestion lags
|
||||
- `max_replication_flush_lag` (default 10 GB), when Pageserver L0 flushes lag
|
||||
|
||||
Combined, this means that the compute will backpressure when there are 30 L0 layers (30 * 256 MB = 7.7 GB) and the Pageserver WAL ingestion lags the compute by 500 MB, for a total of ~8 GB L0+ephemeral compaction debt on a single shard.
|
||||
|
||||
Since we only delay L0 flushes by 2x when backpressuring, and haven’t enabled stalls, it is still possible for read amp to increase unbounded if compaction is too slow (although we haven’t seen this in practice). But this is considered better than stalling flushes and causing unavailability for as long as it takes L0 compaction to react, since we don’t trust it to be fast enough — at the expense of continually increasing read latency and CPU usage for this tenant. We should either enable stalls when we have enough confidence in L0 compaction, or scale the flush delay by the number of L0 layers to apply increasing backpressure.
|
||||
|
||||
## Circuit Breaker
|
||||
|
||||
Compaction can fail, often repeatedly. This can happen e.g. due to data corruption, faulty hardware, S3 outages, etc.
|
||||
|
||||
If compaction fails, the compaction loop will naïvely try and fail again almost immediately. It may only fail after doing a significant amount of wasted work, while holding onto the background task semaphore.
|
||||
|
||||
To avoid repeatedly doing wasted work and starving out other compaction jobs, each tenant has a compaction circuit breaker. After 5 repeated compaction failures, the circuit breaker trips and disables compaction for the next 24 hours, before resetting the breaker and trying again. This disables compaction across all tenant timelines (faulty or not).
|
||||
|
||||
Disabling compaction for a long time is dangerous, since it can lead to unbounded read amp and compaction debt, and continuous workload backpressure. However, continually failing would not help either. Tripped circuit breakers trigger an alert and must be investigated promptly.
|
||||
@@ -1,196 +0,0 @@
|
||||
|
||||
## Summary
|
||||
|
||||
This is a retrospective RFC to document the design of the `storage-controller` service.
|
||||
|
||||
This service manages the physical mapping of Tenants and Timelines to Pageservers and Safekeepers. It
|
||||
acts as the API for "storage" as an abstract concept: enabling other parts of the system to reason
|
||||
about things like creating/deleting tenants and timelines without having to understand exactly which
|
||||
pageserver and safekeeper to communicate, or any subtle rules about how to orchestrate these things.
|
||||
|
||||
The storage controller was implemented in the first half of 2024 as an essential part
|
||||
of storage sharding, especially [shard splitting](032-shard-splitting.md).
|
||||
|
||||
It initially managed only pageservers, but has extended in 2025 to also manage safekeepers. In
|
||||
some places you may seen unqualified references to 'nodes' -- those are pageservers.
|
||||
|
||||
## Design Choices
|
||||
|
||||
### Durability
|
||||
|
||||
We rely on an external postgres for all durable state. No local storage is used.
|
||||
|
||||
We avoid any unnecessary I/O to durable storage. For example:
|
||||
- most tracking of in-flight changes to the system is done in-memory rather than recording progress/steps in a database
|
||||
- When migrating tenant shards between pageservers we only touch the database to increment generation numbers,
|
||||
we do not persist the total state of a tenant shard.
|
||||
|
||||
Being frugal with database I/O has two benefits:
|
||||
- It avoids the database becoming a practical scaling bottleneck (we expect in-memory scale issues to be hit
|
||||
before we hit e.g. transactions-per-second issues)
|
||||
- It reduces cost when using a cloud database service to run the controller's postgres database.
|
||||
|
||||
The trade-off is that there is a "bootstrapping" problem: a controller can't be deployed in isolation, one
|
||||
must first have some existing database system. In practice, we expect that Neon is deployed in one of the
|
||||
following ways:
|
||||
- into a cloud which has a postgres service that can be used to run the controller
|
||||
- into a mature on-prem environment that has existing facilities for running databases
|
||||
- into a test/dev environment where a simple one-node vanilla postgres installation is sufficient
|
||||
|
||||
### Consensus
|
||||
|
||||
The controller does _not_ implement any strong consensus mechanism of its own. Instead:
|
||||
- Where strong consistency is required (for example, for pageserver generation numbers), this
|
||||
responsibility is delegated to a transaction in our postgres database.
|
||||
- Highly available deploys are done using a simple in-database record of what controller instances
|
||||
are available, distinguished by timestamps, rather than having controllers directly negotiate a leader.
|
||||
|
||||
Avoiding strong consensus among controller processes is a cost saving (we avoid running three controllers
|
||||
all the time), and simplifies implementation (we do not have to phrase all configuration changes as e.g raft
|
||||
transactions).
|
||||
|
||||
The trade-off is that under some circumstances a controller with partial network isolation can cause availability
|
||||
issues in the cluster, by making changes to pageserver state that might disagree with what the "true" active
|
||||
controller is trying to do. The impact of this is bounded by our `controllers` database table, that enables
|
||||
a rogue node to eventually realise that it is not the leader and step down. If a rogue node can't reach
|
||||
the database, then it implicitly stops making progress. A rogue controller cannot durably damage the system
|
||||
because pageserver data and safekeeper configs are protected by generation numbers that are only updated
|
||||
via postgres transactions (i.e. no controller "trusts itself" to independently make decisions about generations).
|
||||
|
||||
### Scale
|
||||
|
||||
We design for high but not unlimited scale. The memory footprint of each tenant shard is small (~8kB), so
|
||||
it is realistic to scale up to a million attached shards on a server with modest resources. Tenants in
|
||||
a detached state (i.e. not active on pageservers) do not need to be managed by storage controller, and can
|
||||
be relegated from memory to the database.
|
||||
|
||||
Typically, a tenant shard is updated about once a week, when we do a deploy. During deploys, we relocate
|
||||
a few thousand tenants from each pageserver while it is restarted, so it is extremely rare for the controller
|
||||
to have to do O(N) work (on all shards at once).
|
||||
|
||||
There are places where we do O(N) work:
|
||||
- On normal startup, when loading from the database into memory
|
||||
- On unclean startup (with no handover of observed state from a previous controller), where we will
|
||||
scan all shards on all pageservers.
|
||||
|
||||
It is important that these locations are written efficiently. At high scale we should still expect runtimes
|
||||
of the order tens of seconds to complete a storage controller start.
|
||||
|
||||
When the practical scale limit of a single storage controller is reached, just deploy another one with its
|
||||
own pageservers & safekeepers: each controller+its storage servers should be thought of as a logical cluster
|
||||
or "cell" of storage.
|
||||
|
||||
# High Level Design
|
||||
|
||||
The storage controller is an in-memory system (i.e. state for all attached
|
||||
tenants is held in memory _as well as_ being represented in durable postgres storage).
|
||||
|
||||
## Infrastructure
|
||||
|
||||
The storage controller is an async rust binary using tokio.
|
||||
|
||||
The storage controller is built around the `Service` type. This implements
|
||||
all the entry points for the outside world's interaction with the controller (HTTP handlers are mostly thin wrappers of service functions),
|
||||
and holds most in-memory state (e.g. the list of tenant shards).
|
||||
|
||||
The state is held in a `ServiceInner` wrapped in a RwLock. This monolithic
|
||||
lock is used to simplify reasoning about code that mutates state: each function that takes a write lock may be thought of as a serializable transaction on the in-memory state. This lock is clearly a bottleneck, but
|
||||
nevertheless is scalable to managing millions of tenants.
|
||||
|
||||
Persistent state is held in a postgres database, and we use the `diesel` crate to provide database client functionality. All database access is wrapped in the `Persistence` type -- this makes it easy to understand which
|
||||
code is touching the database. The database is only used when necessary, i.e. for state that cannot be recovered another way. For example, we do not store the secondary pageserver locations of tenant shards in the database, rather we learn these at startup from running pageservers, and/or make scheduling decisions to fill in the gaps. This adds some complexity, but massively reduces the load on the database, and enables running the storage controller with a very cheap postgres instance.
|
||||
|
||||
## Pageserver tenant scheduling & reconciliation
|
||||
|
||||
### Intent & observed state
|
||||
|
||||
Each tenant shard is represented by type `TenantShard`, which has an 'intent' and 'observed' state. Setting the
|
||||
intent state is called _scheduling_, and doing remote I/O to make observed
|
||||
state match intent state is called _reconciliation_.
|
||||
|
||||
The `Scheduler` type is responsible for making choices about the intent
|
||||
state, such as choosing a pageserver for a new tenant shard, or assigning
|
||||
a replacement pageserver when the original one fails.
|
||||
|
||||
The observed state is updated after tenant reconciliation (see below), and
|
||||
has the concept of a `None` state for a pageserver, indicating unknown state. This is used to ensure that we can safely clean up after we start
|
||||
but do not finish a remote call to a pageserver, or if a pageserver restarts and we are uncertain of its state.
|
||||
|
||||
### Tenant Reconciliation
|
||||
|
||||
The `Reconciler` type is responsible for updating pageservers to achieve
|
||||
the intent state. It is instantiated when `Service` determines that a shard requires reconciliation, and owned by a background tokio task that
|
||||
runs it to completion. Reconciler does not have access to the `Service` state: it is populated with a snapshot of relevant information when constructed, and submits is results to a channel that `Service` consumes
|
||||
to update the tenant shard's observed state.
|
||||
|
||||
The Reconciler does have access to the database, but only uses it for
|
||||
a single purpose: updating shards' generation numbers immediately before
|
||||
attaching them to a pageserver.
|
||||
|
||||
Operations that change a tenant's scheduling will spawn a reconciler if
|
||||
necessary, and there is also a background loop which checks every shard
|
||||
for the need to reconcile -- this background loop ensures eventual progress
|
||||
if some earlier reconciliations failed for some reason.
|
||||
|
||||
The reconciler has a general purpose code path which will attach/detach from pageservers as necessary, and a special case path for live migrations. The live migration case is more common in practice, and is taken whenever the current observed state indicates that we have a healthy attached location to migrate from. This implements live migration as described in the earlier [live migration RFC](028-pageserver-migration.md).
|
||||
|
||||
### Scheduling optimisation
|
||||
|
||||
During the periodic background reconciliation loop, the controller also
|
||||
performance _scheduling optimization_. This is the process of looking for
|
||||
shards that are in sub-optimal locations, and moving them.
|
||||
|
||||
Typically, this means:
|
||||
- Shards attached outside their preferred AZ (e.g. after a node failure), to migrate them back to their preferred AZ
|
||||
- Shards attached on the same pageserver as some other shards in the same
|
||||
tenant, to migrate them elsewhere (e.g. after a shard split)
|
||||
|
||||
Scheduling optimisation is a multi-step process to ensure graceful cutovers, e.g. by creating new secondary location, waiting for it to
|
||||
warm up, then cutting over. This is not done as an explicit queue
|
||||
of operations, but rather by iteratively calling the optimisation
|
||||
function, which will recognise each intervening state as something
|
||||
that can generate the next optimisation.
|
||||
|
||||
### Pageserver heartbeats and failure
|
||||
|
||||
The `Heartbeater` type is responsible for detecting when a pageserver
|
||||
becomes unavailable. This is fed back into `Service` for action: when
|
||||
a pageserver is marked unavailable, tenant shards on that pageserver are
|
||||
rescheduled and Reconcilers are spawned to cut them over to their new location.
|
||||
|
||||
## Pageserver timeline CRUD operations
|
||||
|
||||
By CRUD operations, we mean creating and deleting timelines. The authoritative storage for which timelines exist on the pageserver
|
||||
is in S3, and is governed by the pageserver's system of generation
|
||||
numbers. Because a shard can be attached to multiple pageservers
|
||||
concurrently, we need to handle this when doing timeline CRUD operations:
|
||||
- A timeline operation is only persistent if _after_ the ack from a pageserver, that pageserver's generation is still the latest.
|
||||
- For deletions in particular, they are only persistent if _all_ attached
|
||||
locations have acked the deletion operation, since if only the latest one
|
||||
has acked then the timeline could still return from the dead if some old-generation attachment writes an index for it.
|
||||
|
||||
## Zero-downtime controller deployments
|
||||
|
||||
When two storage controllers run at the same time, they coordinate via
|
||||
the database to establish one leader, and the other controller may proxy
|
||||
requests to this leader
|
||||
|
||||
See [Storage controller restarts RFC](037-storage-controller-restarts.md).
|
||||
|
||||
Note that this is not a strong consensus mechanism: the controller must also survive split-brain situations. This is respected by code that
|
||||
e.g. increments version numbers, which uses database transactions that
|
||||
check the expected value before modifying it. A split-brain situation can
|
||||
impact availability (e.g. if two controllers are fighting over where to
|
||||
attach a shard), but it should never impact durability and data integrity.
|
||||
|
||||
## Graceful drain & fill of pageservers during deploys
|
||||
|
||||
The storage controller has functionality for draining + filling pageservers
|
||||
while deploying new pageserver binaries, so that clients are not actively
|
||||
using a pageserver while it restarts.
|
||||
|
||||
See [Graceful restarts RFC](033-storage-controller-drain-and-fill.md)
|
||||
|
||||
## Safekeeper timeline scheduling
|
||||
|
||||
This is currently under development, see [Safekeeper dynamic membership change RFC](035-safekeeper-dynamic-membership-change.md).
|
||||
@@ -275,18 +275,6 @@ pub enum ComputeMode {
|
||||
Replica,
|
||||
}
|
||||
|
||||
impl ComputeMode {
|
||||
/// Convert the compute mode to a string that can be used to identify the type of compute,
|
||||
/// which means that if it's a static compute, the LSN will not be included.
|
||||
pub fn to_type_str(&self) -> &'static str {
|
||||
match self {
|
||||
ComputeMode::Primary => "primary",
|
||||
ComputeMode::Static(_) => "static",
|
||||
ComputeMode::Replica => "replica",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Log level for audit logging
|
||||
/// Disabled, log, hipaa
|
||||
/// Default is Disabled
|
||||
|
||||
@@ -6,7 +6,6 @@ license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
arc-swap.workspace = true
|
||||
bytes.workspace = true
|
||||
camino.workspace = true
|
||||
fail.workspace = true
|
||||
@@ -19,15 +18,14 @@ pprof.workspace = true
|
||||
regex.workspace = true
|
||||
routerify.workspace = true
|
||||
rustls-pemfile.workspace = true
|
||||
rustls.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
serde_path_to_error.workspace = true
|
||||
serde.workspace = true
|
||||
thiserror.workspace = true
|
||||
tracing.workspace = true
|
||||
tokio.workspace = true
|
||||
tokio-rustls.workspace = true
|
||||
tokio-util.workspace = true
|
||||
tokio.workspace = true
|
||||
tracing.workspace = true
|
||||
url.workspace = true
|
||||
uuid.workspace = true
|
||||
|
||||
|
||||
@@ -91,14 +91,14 @@ impl Server {
|
||||
Ok(tls_stream) => tls_stream,
|
||||
Err(err) => {
|
||||
if !suppress_io_error(&err) {
|
||||
info!(%remote_addr, "Failed to accept TLS connection: {err:#}");
|
||||
info!("Failed to accept TLS connection: {err:#}");
|
||||
}
|
||||
return;
|
||||
}
|
||||
};
|
||||
if let Err(err) = Self::serve_connection(tls_stream, service, cancel).await {
|
||||
if !suppress_hyper_error(&err) {
|
||||
info!(%remote_addr, "Failed to serve HTTPS connection: {err:#}");
|
||||
info!("Failed to serve HTTPS connection: {err:#}");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -106,7 +106,7 @@ impl Server {
|
||||
// Handle HTTP connection.
|
||||
if let Err(err) = Self::serve_connection(tcp_stream, service, cancel).await {
|
||||
if !suppress_hyper_error(&err) {
|
||||
info!(%remote_addr, "Failed to serve HTTP connection: {err:#}");
|
||||
info!("Failed to serve HTTP connection: {err:#}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,124 +1,21 @@
|
||||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
use anyhow::Context;
|
||||
use arc_swap::ArcSwap;
|
||||
use camino::Utf8Path;
|
||||
use rustls::{
|
||||
pki_types::{CertificateDer, PrivateKeyDer},
|
||||
server::{ClientHello, ResolvesServerCert},
|
||||
sign::CertifiedKey,
|
||||
};
|
||||
use tokio_rustls::rustls::pki_types::{CertificateDer, PrivateKeyDer};
|
||||
|
||||
pub async fn load_cert_chain(filename: &Utf8Path) -> anyhow::Result<Vec<CertificateDer<'static>>> {
|
||||
let cert_data = tokio::fs::read(filename)
|
||||
.await
|
||||
.context(format!("failed reading certificate file {filename:?}"))?;
|
||||
let mut reader = std::io::Cursor::new(&cert_data);
|
||||
pub fn load_cert_chain(filename: &Utf8Path) -> anyhow::Result<Vec<CertificateDer<'static>>> {
|
||||
let file = std::fs::File::open(filename)?;
|
||||
let mut reader = std::io::BufReader::new(file);
|
||||
|
||||
let cert_chain = rustls_pemfile::certs(&mut reader)
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
.context(format!("failed parsing certificate from file {filename:?}"))?;
|
||||
|
||||
Ok(cert_chain)
|
||||
Ok(rustls_pemfile::certs(&mut reader).collect::<Result<Vec<_>, _>>()?)
|
||||
}
|
||||
|
||||
pub async fn load_private_key(filename: &Utf8Path) -> anyhow::Result<PrivateKeyDer<'static>> {
|
||||
let key_data = tokio::fs::read(filename)
|
||||
.await
|
||||
.context(format!("failed reading private key file {filename:?}"))?;
|
||||
let mut reader = std::io::Cursor::new(&key_data);
|
||||
pub fn load_private_key(filename: &Utf8Path) -> anyhow::Result<PrivateKeyDer<'static>> {
|
||||
let file = std::fs::File::open(filename)?;
|
||||
let mut reader = std::io::BufReader::new(file);
|
||||
|
||||
let key = rustls_pemfile::private_key(&mut reader)
|
||||
.context(format!("failed parsing private key from file {filename:?}"))?;
|
||||
let key = rustls_pemfile::private_key(&mut reader)?;
|
||||
|
||||
key.ok_or(anyhow::anyhow!(
|
||||
"no private key found in {}",
|
||||
filename.as_str(),
|
||||
))
|
||||
}
|
||||
|
||||
pub async fn load_certified_key(
|
||||
key_filename: &Utf8Path,
|
||||
cert_filename: &Utf8Path,
|
||||
) -> anyhow::Result<CertifiedKey> {
|
||||
let cert_chain = load_cert_chain(cert_filename).await?;
|
||||
let key = load_private_key(key_filename).await?;
|
||||
|
||||
let key = rustls::crypto::ring::default_provider()
|
||||
.key_provider
|
||||
.load_private_key(key)?;
|
||||
|
||||
let certified_key = CertifiedKey::new(cert_chain, key);
|
||||
certified_key.keys_match()?;
|
||||
Ok(certified_key)
|
||||
}
|
||||
|
||||
/// Implementation of [`rustls::server::ResolvesServerCert`] which reloads certificates from
|
||||
/// the disk periodically.
|
||||
#[derive(Debug)]
|
||||
pub struct ReloadingCertificateResolver {
|
||||
certified_key: ArcSwap<CertifiedKey>,
|
||||
}
|
||||
|
||||
impl ReloadingCertificateResolver {
|
||||
/// Creates a new Resolver by loading certificate and private key from FS and
|
||||
/// creating tokio::task to reload them with provided reload_period.
|
||||
pub async fn new(
|
||||
key_filename: &Utf8Path,
|
||||
cert_filename: &Utf8Path,
|
||||
reload_period: Duration,
|
||||
) -> anyhow::Result<Arc<Self>> {
|
||||
let this = Arc::new(Self {
|
||||
certified_key: ArcSwap::from_pointee(
|
||||
load_certified_key(key_filename, cert_filename).await?,
|
||||
),
|
||||
});
|
||||
|
||||
tokio::spawn({
|
||||
let weak_this = Arc::downgrade(&this);
|
||||
let key_filename = key_filename.to_owned();
|
||||
let cert_filename = cert_filename.to_owned();
|
||||
async move {
|
||||
let start = tokio::time::Instant::now() + reload_period;
|
||||
let mut interval = tokio::time::interval_at(start, reload_period);
|
||||
let mut last_reload_failed = false;
|
||||
loop {
|
||||
interval.tick().await;
|
||||
let this = match weak_this.upgrade() {
|
||||
Some(this) => this,
|
||||
None => break, // Resolver has been destroyed, exit.
|
||||
};
|
||||
match load_certified_key(&key_filename, &cert_filename).await {
|
||||
Ok(new_certified_key) => {
|
||||
if new_certified_key.cert == this.certified_key.load().cert {
|
||||
tracing::debug!("Certificate has not changed since last reloading");
|
||||
} else {
|
||||
tracing::info!("Certificate has been reloaded");
|
||||
this.certified_key.store(Arc::new(new_certified_key));
|
||||
}
|
||||
last_reload_failed = false;
|
||||
}
|
||||
Err(err) => {
|
||||
// Note: Reloading certs may fail if it conflicts with the script updating
|
||||
// the files at the same time. Warn only if the error is persistent.
|
||||
if last_reload_failed {
|
||||
tracing::warn!("Error reloading certificate: {err:#}");
|
||||
} else {
|
||||
tracing::info!("Error reloading certificate: {err:#}");
|
||||
}
|
||||
last_reload_failed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Ok(this)
|
||||
}
|
||||
}
|
||||
|
||||
impl ResolvesServerCert for ReloadingCertificateResolver {
|
||||
fn resolve(&self, _client_hello: ClientHello<'_>) -> Option<Arc<CertifiedKey>> {
|
||||
Some(self.certified_key.load_full())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -34,7 +34,6 @@ postgres_backend.workspace = true
|
||||
nix = {workspace = true, optional = true}
|
||||
reqwest.workspace = true
|
||||
rand.workspace = true
|
||||
tracing-utils.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
bincode.workspace = true
|
||||
|
||||
@@ -61,9 +61,6 @@ pub struct ConfigToml {
|
||||
pub listen_https_addr: Option<String>,
|
||||
pub ssl_key_file: Utf8PathBuf,
|
||||
pub ssl_cert_file: Utf8PathBuf,
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub ssl_cert_reload_period: Duration,
|
||||
pub ssl_ca_file: Option<Utf8PathBuf>,
|
||||
pub availability_zone: Option<String>,
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub wait_lsn_timeout: Duration,
|
||||
@@ -134,7 +131,6 @@ pub struct ConfigToml {
|
||||
pub load_previous_heatmap: Option<bool>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub generate_unarchival_heatmap: Option<bool>,
|
||||
pub tracing: Option<Tracing>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
@@ -192,54 +188,6 @@ pub enum GetVectoredConcurrentIo {
|
||||
SidecarTask,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct Ratio {
|
||||
pub numerator: usize,
|
||||
pub denominator: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct OtelExporterConfig {
|
||||
pub endpoint: String,
|
||||
pub protocol: OtelExporterProtocol,
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub timeout: Duration,
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub enum OtelExporterProtocol {
|
||||
Grpc,
|
||||
HttpBinary,
|
||||
HttpJson,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct Tracing {
|
||||
pub sampling_ratio: Ratio,
|
||||
pub export_config: OtelExporterConfig,
|
||||
}
|
||||
|
||||
impl From<&OtelExporterConfig> for tracing_utils::ExportConfig {
|
||||
fn from(val: &OtelExporterConfig) -> Self {
|
||||
tracing_utils::ExportConfig {
|
||||
endpoint: Some(val.endpoint.clone()),
|
||||
protocol: val.protocol.into(),
|
||||
timeout: val.timeout,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<OtelExporterProtocol> for tracing_utils::Protocol {
|
||||
fn from(val: OtelExporterProtocol) -> Self {
|
||||
match val {
|
||||
OtelExporterProtocol::Grpc => tracing_utils::Protocol::Grpc,
|
||||
OtelExporterProtocol::HttpJson => tracing_utils::Protocol::HttpJson,
|
||||
OtelExporterProtocol::HttpBinary => tracing_utils::Protocol::HttpBinary,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub mod statvfs {
|
||||
pub mod mock {
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
@@ -334,6 +282,12 @@ pub struct TenantConfigToml {
|
||||
/// Level0 delta layer threshold at which to stall layer flushes. Must be >compaction_threshold
|
||||
/// to avoid deadlock. 0 to disable. Disabled by default.
|
||||
pub l0_flush_stall_threshold: Option<usize>,
|
||||
/// If true, Level0 delta layer flushes will wait for S3 upload before flushing the next
|
||||
/// layer. This is a temporary backpressure mechanism which should be removed once
|
||||
/// l0_flush_{delay,stall}_threshold is fully enabled.
|
||||
///
|
||||
/// TODO: this is no longer enabled, remove it when the config option is no longer set.
|
||||
pub l0_flush_wait_upload: bool,
|
||||
// Determines how much history is retained, to allow
|
||||
// branching and read replicas at an older point in time.
|
||||
// The unit is #of bytes of WAL.
|
||||
@@ -485,8 +439,6 @@ impl Default for ConfigToml {
|
||||
listen_https_addr: (None),
|
||||
ssl_key_file: Utf8PathBuf::from(DEFAULT_SSL_KEY_FILE),
|
||||
ssl_cert_file: Utf8PathBuf::from(DEFAULT_SSL_CERT_FILE),
|
||||
ssl_cert_reload_period: Duration::from_secs(60),
|
||||
ssl_ca_file: None,
|
||||
availability_zone: (None),
|
||||
wait_lsn_timeout: (humantime::parse_duration(DEFAULT_WAIT_LSN_TIMEOUT)
|
||||
.expect("cannot parse default wait lsn timeout")),
|
||||
@@ -586,7 +538,6 @@ impl Default for ConfigToml {
|
||||
validate_wal_contiguity: None,
|
||||
load_previous_heatmap: None,
|
||||
generate_unarchival_heatmap: None,
|
||||
tracing: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -623,6 +574,8 @@ pub mod tenant_conf_defaults {
|
||||
pub const DEFAULT_COMPACTION_ALGORITHM: crate::models::CompactionAlgorithm =
|
||||
crate::models::CompactionAlgorithm::Legacy;
|
||||
|
||||
pub const DEFAULT_L0_FLUSH_WAIT_UPLOAD: bool = false;
|
||||
|
||||
pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;
|
||||
|
||||
// Large DEFAULT_GC_PERIOD is fine as long as PITR_INTERVAL is larger.
|
||||
@@ -669,6 +622,7 @@ impl Default for TenantConfigToml {
|
||||
compaction_l0_semaphore: DEFAULT_COMPACTION_L0_SEMAPHORE,
|
||||
l0_flush_delay_threshold: None,
|
||||
l0_flush_stall_threshold: None,
|
||||
l0_flush_wait_upload: DEFAULT_L0_FLUSH_WAIT_UPLOAD,
|
||||
gc_horizon: DEFAULT_GC_HORIZON,
|
||||
gc_period: humantime::parse_duration(DEFAULT_GC_PERIOD)
|
||||
.expect("cannot parse default gc period"),
|
||||
|
||||
@@ -523,6 +523,8 @@ pub struct TenantConfigPatch {
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub l0_flush_stall_threshold: FieldPatch<usize>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub l0_flush_wait_upload: FieldPatch<bool>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub gc_horizon: FieldPatch<u64>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub gc_period: FieldPatch<String>,
|
||||
@@ -612,6 +614,9 @@ pub struct TenantConfig {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub l0_flush_stall_threshold: Option<usize>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub l0_flush_wait_upload: Option<bool>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub gc_horizon: Option<u64>,
|
||||
|
||||
@@ -707,6 +712,7 @@ impl TenantConfig {
|
||||
mut compaction_l0_semaphore,
|
||||
mut l0_flush_delay_threshold,
|
||||
mut l0_flush_stall_threshold,
|
||||
mut l0_flush_wait_upload,
|
||||
mut gc_horizon,
|
||||
mut gc_period,
|
||||
mut image_creation_threshold,
|
||||
@@ -759,6 +765,7 @@ impl TenantConfig {
|
||||
patch
|
||||
.l0_flush_stall_threshold
|
||||
.apply(&mut l0_flush_stall_threshold);
|
||||
patch.l0_flush_wait_upload.apply(&mut l0_flush_wait_upload);
|
||||
patch.gc_horizon.apply(&mut gc_horizon);
|
||||
patch
|
||||
.gc_period
|
||||
@@ -837,6 +844,7 @@ impl TenantConfig {
|
||||
compaction_l0_semaphore,
|
||||
l0_flush_delay_threshold,
|
||||
l0_flush_stall_threshold,
|
||||
l0_flush_wait_upload,
|
||||
gc_horizon,
|
||||
gc_period,
|
||||
image_creation_threshold,
|
||||
@@ -903,6 +911,9 @@ impl TenantConfig {
|
||||
l0_flush_stall_threshold: self
|
||||
.l0_flush_stall_threshold
|
||||
.or(global_conf.l0_flush_stall_threshold),
|
||||
l0_flush_wait_upload: self
|
||||
.l0_flush_wait_upload
|
||||
.unwrap_or(global_conf.l0_flush_wait_upload),
|
||||
gc_horizon: self.gc_horizon.unwrap_or(global_conf.gc_horizon),
|
||||
gc_period: self.gc_period.unwrap_or(global_conf.gc_period),
|
||||
image_creation_threshold: self
|
||||
@@ -1353,12 +1364,6 @@ pub enum TimelineArchivalState {
|
||||
Unarchived,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone)]
|
||||
pub enum TimelineVisibilityState {
|
||||
Visible,
|
||||
Invisible,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone)]
|
||||
pub struct TimelineArchivalConfigRequest {
|
||||
pub state: TimelineArchivalState,
|
||||
@@ -1418,6 +1423,11 @@ pub struct TimelineInfo {
|
||||
pub last_record_lsn: Lsn,
|
||||
pub prev_record_lsn: Option<Lsn>,
|
||||
|
||||
/// Legacy field, retained for one version to enable old storage controller to
|
||||
/// decode (it was a mandatory field).
|
||||
#[serde(default, rename = "latest_gc_cutoff_lsn")]
|
||||
pub _unused: Lsn,
|
||||
|
||||
/// The LSN up to which GC has advanced: older data may still exist but it is not available for clients.
|
||||
/// This LSN is not suitable for deciding where to create branches etc: use [`TimelineInfo::min_readable_lsn`] instead,
|
||||
/// as it is easier to reason about.
|
||||
@@ -1486,9 +1496,6 @@ pub struct TimelineInfo {
|
||||
|
||||
/// The status of the rel_size migration.
|
||||
pub rel_size_migration: Option<RelSizeMigration>,
|
||||
|
||||
/// Whether the timeline is invisible in synthetic size calculations.
|
||||
pub is_invisible: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
|
||||
@@ -8,9 +8,10 @@ license = "MIT/Apache-2.0"
|
||||
bytes.workspace = true
|
||||
fallible-iterator.workspace = true
|
||||
futures-util = { workspace = true, features = ["sink"] }
|
||||
tracing.workspace = true
|
||||
log = "0.4"
|
||||
parking_lot.workspace = true
|
||||
pin-project-lite.workspace = true
|
||||
phf = "0.11"
|
||||
postgres-protocol2 = { path = "../postgres-protocol2" }
|
||||
postgres-types2 = { path = "../postgres-types2" }
|
||||
tokio = { workspace = true, features = ["io-util", "time", "net"] }
|
||||
|
||||
@@ -6,13 +6,13 @@ use std::task::{Context, Poll};
|
||||
use bytes::BytesMut;
|
||||
use fallible_iterator::FallibleIterator;
|
||||
use futures_util::{Sink, Stream, ready};
|
||||
use log::{info, trace};
|
||||
use postgres_protocol2::message::backend::Message;
|
||||
use postgres_protocol2::message::frontend;
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tokio::sync::mpsc;
|
||||
use tokio_util::codec::Framed;
|
||||
use tokio_util::sync::PollSender;
|
||||
use tracing::{info, trace};
|
||||
|
||||
use crate::codec::{BackendMessage, BackendMessages, FrontendMessage, PostgresCodec};
|
||||
use crate::error::DbError;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -5,9 +5,9 @@ use std::sync::Arc;
|
||||
use bytes::Bytes;
|
||||
use fallible_iterator::FallibleIterator;
|
||||
use futures_util::{TryStreamExt, pin_mut};
|
||||
use log::debug;
|
||||
use postgres_protocol2::message::backend::Message;
|
||||
use postgres_protocol2::message::frontend;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::client::{CachedTypeInfo, InnerClient};
|
||||
use crate::codec::FrontendMessage;
|
||||
|
||||
@@ -7,11 +7,11 @@ use std::task::{Context, Poll};
|
||||
use bytes::{BufMut, Bytes, BytesMut};
|
||||
use fallible_iterator::FallibleIterator;
|
||||
use futures_util::{Stream, ready};
|
||||
use log::{Level, debug, log_enabled};
|
||||
use pin_project_lite::pin_project;
|
||||
use postgres_protocol2::message::backend::Message;
|
||||
use postgres_protocol2::message::frontend;
|
||||
use postgres_types2::{Format, ToSql, Type};
|
||||
use tracing::debug;
|
||||
|
||||
use crate::client::{InnerClient, Responses};
|
||||
use crate::codec::FrontendMessage;
|
||||
@@ -36,7 +36,7 @@ where
|
||||
I: IntoIterator<Item = &'a (dyn ToSql + Sync)>,
|
||||
I::IntoIter: ExactSizeIterator,
|
||||
{
|
||||
let buf = if tracing::enabled!(tracing::Level::DEBUG) {
|
||||
let buf = if log_enabled!(Level::Debug) {
|
||||
let params = params.into_iter().collect::<Vec<_>>();
|
||||
debug!(
|
||||
"executing statement {} with parameters: {:?}",
|
||||
|
||||
@@ -6,10 +6,10 @@ use std::task::{Context, Poll};
|
||||
use bytes::Bytes;
|
||||
use fallible_iterator::FallibleIterator;
|
||||
use futures_util::{Stream, ready};
|
||||
use log::debug;
|
||||
use pin_project_lite::pin_project;
|
||||
use postgres_protocol2::message::backend::Message;
|
||||
use postgres_protocol2::message::frontend;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::client::{InnerClient, Responses};
|
||||
use crate::codec::FrontendMessage;
|
||||
|
||||
@@ -558,7 +558,7 @@ async fn upload_large_enough_file(
|
||||
) -> usize {
|
||||
let header = bytes::Bytes::from_static("remote blob data content".as_bytes());
|
||||
let body = bytes::Bytes::from(vec![0u8; 1024]);
|
||||
let contents = std::iter::once(header).chain(std::iter::repeat_n(body, 128));
|
||||
let contents = std::iter::once(header).chain(std::iter::repeat(body).take(128));
|
||||
|
||||
let len = contents.clone().fold(0, |acc, next| acc + next.len());
|
||||
|
||||
|
||||
@@ -71,7 +71,6 @@ pub struct PeerInfo {
|
||||
pub ts: Instant,
|
||||
pub pg_connstr: String,
|
||||
pub http_connstr: String,
|
||||
pub https_connstr: Option<String>,
|
||||
}
|
||||
|
||||
pub type FullTransactionId = u64;
|
||||
@@ -228,8 +227,6 @@ pub struct TimelineDeleteResult {
|
||||
pub dir_existed: bool,
|
||||
}
|
||||
|
||||
pub type TenantDeleteResult = std::collections::HashMap<String, TimelineDeleteResult>;
|
||||
|
||||
fn lsn_invalid() -> Lsn {
|
||||
Lsn::INVALID
|
||||
}
|
||||
@@ -262,8 +259,6 @@ pub struct SkTimelineInfo {
|
||||
pub safekeeper_connstr: Option<String>,
|
||||
#[serde(default)]
|
||||
pub http_connstr: Option<String>,
|
||||
#[serde(default)]
|
||||
pub https_connstr: Option<String>,
|
||||
// Minimum of all active RO replicas flush LSN
|
||||
#[serde(default = "lsn_invalid")]
|
||||
pub standby_horizon: Lsn,
|
||||
|
||||
@@ -14,7 +14,6 @@ tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
|
||||
tracing.workspace = true
|
||||
tracing-opentelemetry.workspace = true
|
||||
tracing-subscriber.workspace = true
|
||||
pin-project-lite.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
tracing-subscriber.workspace = true # For examples in docs
|
||||
|
||||
@@ -31,10 +31,10 @@
|
||||
//! .init();
|
||||
//! }
|
||||
//! ```
|
||||
#![deny(unsafe_code)]
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
|
||||
pub mod http;
|
||||
pub mod perf_span;
|
||||
|
||||
use opentelemetry::KeyValue;
|
||||
use opentelemetry::trace::TracerProvider;
|
||||
|
||||
@@ -1,153 +0,0 @@
|
||||
//! Crutch module to work around tracing infrastructure deficiencies
|
||||
//!
|
||||
//! We wish to collect granular request spans without impacting performance
|
||||
//! by much. Ideally, we should have zero overhead for a sampling rate of 0.
|
||||
//!
|
||||
//! The approach taken by the pageserver crate is to use a completely different
|
||||
//! span hierarchy for the performance spans. Spans are explicitly stored in
|
||||
//! the request context and use a different [`tracing::Subscriber`] in order
|
||||
//! to avoid expensive filtering.
|
||||
//!
|
||||
//! [`tracing::Span`] instances record their [`tracing::Dispatch`] and, implcitly,
|
||||
//! their [`tracing::Subscriber`] at creation time. However, upon exiting the span,
|
||||
//! the global default [`tracing::Dispatch`] is used. This is problematic if one
|
||||
//! wishes to juggle different subscribers.
|
||||
//!
|
||||
//! In order to work around this, this module provides a [`PerfSpan`] type which
|
||||
//! wraps a [`Span`] and sets the default subscriber when exiting the span. This
|
||||
//! achieves the correct routing.
|
||||
//!
|
||||
//! There's also a modified version of [`tracing::Instrument`] which works with
|
||||
//! [`PerfSpan`].
|
||||
|
||||
use core::{
|
||||
future::Future,
|
||||
marker::Sized,
|
||||
mem::ManuallyDrop,
|
||||
pin::Pin,
|
||||
task::{Context, Poll},
|
||||
};
|
||||
use pin_project_lite::pin_project;
|
||||
use tracing::{Dispatch, field, span::Span};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PerfSpan {
|
||||
inner: ManuallyDrop<Span>,
|
||||
dispatch: Dispatch,
|
||||
}
|
||||
|
||||
#[must_use = "once a span has been entered, it should be exited"]
|
||||
pub struct PerfSpanEntered<'a> {
|
||||
span: &'a PerfSpan,
|
||||
}
|
||||
|
||||
impl PerfSpan {
|
||||
pub fn new(span: Span, dispatch: Dispatch) -> Self {
|
||||
Self {
|
||||
inner: ManuallyDrop::new(span),
|
||||
dispatch,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn record<Q: field::AsField + ?Sized, V: field::Value>(
|
||||
&self,
|
||||
field: &Q,
|
||||
value: V,
|
||||
) -> &Self {
|
||||
self.inner.record(field, value);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn enter(&self) -> PerfSpanEntered {
|
||||
if let Some(ref id) = self.inner.id() {
|
||||
self.dispatch.enter(id);
|
||||
}
|
||||
|
||||
PerfSpanEntered { span: self }
|
||||
}
|
||||
|
||||
pub fn inner(&self) -> &Span {
|
||||
&self.inner
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for PerfSpan {
|
||||
fn drop(&mut self) {
|
||||
// Bring the desired dispatch into scope before explicitly calling
|
||||
// the span destructor. This routes the span exit to the correct
|
||||
// [`tracing::Subscriber`].
|
||||
let _dispatch_guard = tracing::dispatcher::set_default(&self.dispatch);
|
||||
// SAFETY: ManuallyDrop in Drop implementation
|
||||
unsafe { ManuallyDrop::drop(&mut self.inner) }
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for PerfSpanEntered<'_> {
|
||||
fn drop(&mut self) {
|
||||
assert!(self.span.inner.id().is_some());
|
||||
|
||||
let _dispatch_guard = tracing::dispatcher::set_default(&self.span.dispatch);
|
||||
self.span.dispatch.exit(&self.span.inner.id().unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
pub trait PerfInstrument: Sized {
|
||||
fn instrument(self, span: PerfSpan) -> PerfInstrumented<Self> {
|
||||
PerfInstrumented {
|
||||
inner: ManuallyDrop::new(self),
|
||||
span,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pin_project! {
|
||||
#[project = PerfInstrumentedProj]
|
||||
#[derive(Debug, Clone)]
|
||||
#[must_use = "futures do nothing unless you `.await` or poll them"]
|
||||
pub struct PerfInstrumented<T> {
|
||||
// `ManuallyDrop` is used here to to enter instrument `Drop` by entering
|
||||
// `Span` and executing `ManuallyDrop::drop`.
|
||||
#[pin]
|
||||
inner: ManuallyDrop<T>,
|
||||
span: PerfSpan,
|
||||
}
|
||||
|
||||
impl<T> PinnedDrop for PerfInstrumented<T> {
|
||||
fn drop(this: Pin<&mut Self>) {
|
||||
let this = this.project();
|
||||
let _enter = this.span.enter();
|
||||
// SAFETY: 1. `Pin::get_unchecked_mut()` is safe, because this isn't
|
||||
// different from wrapping `T` in `Option` and calling
|
||||
// `Pin::set(&mut this.inner, None)`, except avoiding
|
||||
// additional memory overhead.
|
||||
// 2. `ManuallyDrop::drop()` is safe, because
|
||||
// `PinnedDrop::drop()` is guaranteed to be called only
|
||||
// once.
|
||||
unsafe { ManuallyDrop::drop(this.inner.get_unchecked_mut()) }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T> PerfInstrumentedProj<'a, T> {
|
||||
/// Get a mutable reference to the [`Span`] a pinned mutable reference to
|
||||
/// the wrapped type.
|
||||
fn span_and_inner_pin_mut(self) -> (&'a mut PerfSpan, Pin<&'a mut T>) {
|
||||
// SAFETY: As long as `ManuallyDrop<T>` does not move, `T` won't move
|
||||
// and `inner` is valid, because `ManuallyDrop::drop` is called
|
||||
// only inside `Drop` of the `Instrumented`.
|
||||
let inner = unsafe { self.inner.map_unchecked_mut(|v| &mut **v) };
|
||||
(self.span, inner)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Future> Future for PerfInstrumented<T> {
|
||||
type Output = T::Output;
|
||||
|
||||
fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
|
||||
let (span, inner) = self.project().span_and_inner_pin_mut();
|
||||
let _enter = span.enter();
|
||||
inner.poll(cx)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Sized> PerfInstrument for T {}
|
||||
@@ -7,7 +7,7 @@ use http_utils::error::HttpErrorBody;
|
||||
use pageserver_api::models::*;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
pub use reqwest::Body as ReqwestBody;
|
||||
use reqwest::{IntoUrl, Method, StatusCode, Url};
|
||||
use reqwest::{Certificate, IntoUrl, Method, StatusCode, Url};
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
@@ -39,8 +39,8 @@ pub enum Error {
|
||||
#[error("Cancelled")]
|
||||
Cancelled,
|
||||
|
||||
#[error("request timed out: {0}")]
|
||||
Timeout(String),
|
||||
#[error("create client: {0}{}", .0.source().map(|e| format!(": {e}")).unwrap_or_default())]
|
||||
CreateClient(reqwest::Error),
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -72,7 +72,24 @@ pub enum ForceAwaitLogicalSize {
|
||||
}
|
||||
|
||||
impl Client {
|
||||
pub fn new(client: reqwest::Client, mgmt_api_endpoint: String, jwt: Option<&str>) -> Self {
|
||||
pub fn new(
|
||||
mgmt_api_endpoint: String,
|
||||
jwt: Option<&str>,
|
||||
ssl_ca_cert: Option<Certificate>,
|
||||
) -> Result<Self> {
|
||||
let mut http_client = reqwest::Client::builder();
|
||||
if let Some(ssl_ca_cert) = ssl_ca_cert {
|
||||
http_client = http_client.add_root_certificate(ssl_ca_cert);
|
||||
}
|
||||
let http_client = http_client.build().map_err(Error::CreateClient)?;
|
||||
Ok(Self::from_client(http_client, mgmt_api_endpoint, jwt))
|
||||
}
|
||||
|
||||
pub fn from_client(
|
||||
client: reqwest::Client,
|
||||
mgmt_api_endpoint: String,
|
||||
jwt: Option<&str>,
|
||||
) -> Self {
|
||||
Self {
|
||||
mgmt_api_endpoint,
|
||||
authorization_header: jwt.map(|jwt| format!("Bearer {jwt}")),
|
||||
@@ -86,17 +103,17 @@ impl Client {
|
||||
resp.json().await.map_err(Error::ReceiveBody)
|
||||
}
|
||||
|
||||
/// Send an HTTP request to an arbitrary path with a desired HTTP method and returning a streaming
|
||||
/// Response. This function is suitable for pass-through/proxy use cases where we don't care
|
||||
/// what the response content looks like.
|
||||
/// Get an arbitrary path and returning a streaming Response. This function is suitable
|
||||
/// for pass-through/proxy use cases where we don't care what the response content looks
|
||||
/// like.
|
||||
///
|
||||
/// Use/add one of the properly typed methods below if you know aren't proxying, and
|
||||
/// know what kind of response you expect.
|
||||
pub async fn op_raw(&self, method: Method, path: String) -> Result<reqwest::Response> {
|
||||
pub async fn get_raw(&self, path: String) -> Result<reqwest::Response> {
|
||||
debug_assert!(path.starts_with('/'));
|
||||
let uri = format!("{}{}", self.mgmt_api_endpoint, path);
|
||||
|
||||
let mut req = self.client.request(method, uri);
|
||||
let mut req = self.client.request(Method::GET, uri);
|
||||
if let Some(value) = &self.authorization_header {
|
||||
req = req.header(reqwest::header::AUTHORIZATION, value);
|
||||
}
|
||||
|
||||
@@ -34,10 +34,10 @@ async fn main_impl(args: Args) -> anyhow::Result<()> {
|
||||
let args: &'static Args = Box::leak(Box::new(args));
|
||||
|
||||
let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(
|
||||
reqwest::Client::new(), // TODO: support ssl_ca_file for https APIs in pagebench.
|
||||
args.mgmt_api_endpoint.clone(),
|
||||
args.pageserver_jwt.as_deref(),
|
||||
));
|
||||
None, // TODO: support ssl_ca_file for https APIs in pagebench.
|
||||
)?);
|
||||
|
||||
// discover targets
|
||||
let timelines: Vec<TenantTimelineId> = crate::util::cli::targets::discover(
|
||||
|
||||
@@ -75,10 +75,10 @@ async fn main_impl(
|
||||
let args: &'static Args = Box::leak(Box::new(args));
|
||||
|
||||
let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(
|
||||
reqwest::Client::new(), // TODO: support ssl_ca_file for https APIs in pagebench.
|
||||
args.mgmt_api_endpoint.clone(),
|
||||
args.pageserver_jwt.as_deref(),
|
||||
));
|
||||
None, // TODO: support ssl_ca_file for https APIs in pagebench.
|
||||
)?);
|
||||
|
||||
// discover targets
|
||||
let timelines: Vec<TenantTimelineId> = crate::util::cli::targets::discover(
|
||||
|
||||
@@ -123,10 +123,10 @@ async fn main_impl(
|
||||
let args: &'static Args = Box::leak(Box::new(args));
|
||||
|
||||
let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(
|
||||
reqwest::Client::new(), // TODO: support ssl_ca_file for https APIs in pagebench.
|
||||
args.mgmt_api_endpoint.clone(),
|
||||
args.pageserver_jwt.as_deref(),
|
||||
));
|
||||
None, // TODO: support ssl_ca_file for https APIs in pagebench.
|
||||
)?);
|
||||
|
||||
if let Some(engine_str) = &args.set_io_engine {
|
||||
mgmt_api_client.put_io_engine(engine_str).await?;
|
||||
|
||||
@@ -81,10 +81,10 @@ async fn main_impl(args: Args) -> anyhow::Result<()> {
|
||||
let args: &'static Args = Box::leak(Box::new(args));
|
||||
|
||||
let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(
|
||||
reqwest::Client::new(), // TODO: support ssl_ca_file for https APIs in pagebench.
|
||||
args.mgmt_api_endpoint.clone(),
|
||||
args.pageserver_jwt.as_deref(),
|
||||
));
|
||||
None, // TODO: support ssl_ca_file for https APIs in pagebench.
|
||||
)?);
|
||||
|
||||
if let Some(engine_str) = &args.set_io_engine {
|
||||
mgmt_api_client.put_io_engine(engine_str).await?;
|
||||
|
||||
@@ -38,10 +38,10 @@ async fn main_impl(args: Args) -> anyhow::Result<()> {
|
||||
let args: &'static Args = Box::leak(Box::new(args));
|
||||
|
||||
let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(
|
||||
reqwest::Client::new(), // TODO: support ssl_ca_file for https APIs in pagebench.
|
||||
args.mgmt_api_endpoint.clone(),
|
||||
args.pageserver_jwt.as_deref(),
|
||||
));
|
||||
None, // TODO: support ssl_ca_file for https APIs in pagebench.
|
||||
)?);
|
||||
|
||||
// discover targets
|
||||
let timelines: Vec<TenantTimelineId> = crate::util::cli::targets::discover(
|
||||
|
||||
@@ -12,7 +12,6 @@ use std::time::Duration;
|
||||
use anyhow::{Context, anyhow};
|
||||
use camino::Utf8Path;
|
||||
use clap::{Arg, ArgAction, Command};
|
||||
use http_utils::tls_certs::ReloadingCertificateResolver;
|
||||
use metrics::launch_timestamp::{LaunchTimestamp, set_launch_timestamp_metric};
|
||||
use metrics::set_build_info_metric;
|
||||
use nix::sys::socket::{setsockopt, sockopt};
|
||||
@@ -35,7 +34,6 @@ use tokio::signal::unix::SignalKind;
|
||||
use tokio::time::Instant;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::*;
|
||||
use tracing_utils::OtelGuard;
|
||||
use utils::auth::{JwtAuth, SwappableJwtAuth};
|
||||
use utils::crashsafe::syncfs;
|
||||
use utils::logging::TracingErrorLayerEnablement;
|
||||
@@ -119,21 +117,6 @@ fn main() -> anyhow::Result<()> {
|
||||
logging::Output::Stdout,
|
||||
)?;
|
||||
|
||||
let otel_enablement = match &conf.tracing {
|
||||
Some(cfg) => tracing_utils::OtelEnablement::Enabled {
|
||||
service_name: "pageserver".to_string(),
|
||||
export_config: (&cfg.export_config).into(),
|
||||
runtime: *COMPUTE_REQUEST_RUNTIME,
|
||||
},
|
||||
None => tracing_utils::OtelEnablement::Disabled,
|
||||
};
|
||||
|
||||
let otel_guard = tracing_utils::init_performance_tracing(otel_enablement);
|
||||
|
||||
if otel_guard.is_some() {
|
||||
info!(?conf.tracing, "starting with OTEL tracing enabled");
|
||||
}
|
||||
|
||||
// mind the order required here: 1. logging, 2. panic_hook, 3. sentry.
|
||||
// disarming this hook on pageserver, because we never tear down tracing.
|
||||
logging::replace_panic_hook_with_tracing_panic_hook().forget();
|
||||
@@ -207,7 +190,7 @@ fn main() -> anyhow::Result<()> {
|
||||
tracing::info!("Initializing page_cache...");
|
||||
page_cache::init(conf.page_cache_size);
|
||||
|
||||
start_pageserver(launch_ts, conf, otel_guard).context("Failed to start pageserver")?;
|
||||
start_pageserver(launch_ts, conf).context("Failed to start pageserver")?;
|
||||
|
||||
scenario.teardown();
|
||||
Ok(())
|
||||
@@ -251,7 +234,6 @@ fn initialize_config(
|
||||
.context("build toml deserializer")?,
|
||||
)
|
||||
.context("deserialize config toml")?;
|
||||
|
||||
let conf = PageServerConf::parse_and_validate(identity.id, config_toml, workdir)
|
||||
.context("runtime-validation of config toml")?;
|
||||
|
||||
@@ -306,7 +288,6 @@ fn startup_checkpoint(started_at: Instant, phase: &str, human_phase: &str) {
|
||||
fn start_pageserver(
|
||||
launch_ts: &'static LaunchTimestamp,
|
||||
conf: &'static PageServerConf,
|
||||
otel_guard: Option<OtelGuard>,
|
||||
) -> anyhow::Result<()> {
|
||||
// Monotonic time for later calculating startup duration
|
||||
let started_startup_at = Instant::now();
|
||||
@@ -446,7 +427,7 @@ fn start_pageserver(
|
||||
// Set up deletion queue
|
||||
let (deletion_queue, deletion_workers) = DeletionQueue::new(
|
||||
remote_storage.clone(),
|
||||
StorageControllerUpcallClient::new(conf, &shutdown_pageserver)?,
|
||||
StorageControllerUpcallClient::new(conf, &shutdown_pageserver),
|
||||
conf,
|
||||
);
|
||||
deletion_workers.spawn_with(BACKGROUND_RUNTIME.handle());
|
||||
@@ -640,15 +621,12 @@ fn start_pageserver(
|
||||
|
||||
let https_task = match https_listener {
|
||||
Some(https_listener) => {
|
||||
let resolver = MGMT_REQUEST_RUNTIME.block_on(ReloadingCertificateResolver::new(
|
||||
&conf.ssl_key_file,
|
||||
&conf.ssl_cert_file,
|
||||
conf.ssl_cert_reload_period,
|
||||
))?;
|
||||
let certs = http_utils::tls_certs::load_cert_chain(&conf.ssl_cert_file)?;
|
||||
let key = http_utils::tls_certs::load_private_key(&conf.ssl_key_file)?;
|
||||
|
||||
let server_config = rustls::ServerConfig::builder()
|
||||
.with_no_client_auth()
|
||||
.with_cert_resolver(resolver);
|
||||
.with_single_cert(certs, key)?;
|
||||
|
||||
let tls_acceptor = tokio_rustls::TlsAcceptor::from(Arc::new(server_config));
|
||||
|
||||
@@ -692,21 +670,13 @@ fn start_pageserver(
|
||||
|
||||
// Spawn a task to listen for libpq connections. It will spawn further tasks
|
||||
// for each connection. We created the listener earlier already.
|
||||
let perf_trace_dispatch = otel_guard.as_ref().map(|g| g.dispatch.clone());
|
||||
let page_service = page_service::spawn(
|
||||
conf,
|
||||
tenant_manager.clone(),
|
||||
pg_auth,
|
||||
perf_trace_dispatch,
|
||||
{
|
||||
let _entered = COMPUTE_REQUEST_RUNTIME.enter(); // TcpListener::from_std requires it
|
||||
pageserver_listener
|
||||
.set_nonblocking(true)
|
||||
.context("set listener to nonblocking")?;
|
||||
tokio::net::TcpListener::from_std(pageserver_listener)
|
||||
.context("create tokio listener")?
|
||||
},
|
||||
);
|
||||
let page_service = page_service::spawn(conf, tenant_manager.clone(), pg_auth, {
|
||||
let _entered = COMPUTE_REQUEST_RUNTIME.enter(); // TcpListener::from_std requires it
|
||||
pageserver_listener
|
||||
.set_nonblocking(true)
|
||||
.context("set listener to nonblocking")?;
|
||||
tokio::net::TcpListener::from_std(pageserver_listener).context("create tokio listener")?
|
||||
});
|
||||
|
||||
// All started up! Now just sit and wait for shutdown signal.
|
||||
BACKGROUND_RUNTIME.block_on(async move {
|
||||
|
||||
@@ -17,7 +17,7 @@ use pageserver_api::models::ImageCompressionAlgorithm;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use postgres_backend::AuthType;
|
||||
use remote_storage::{RemotePath, RemoteStorageConfig};
|
||||
use reqwest::{Certificate, Url};
|
||||
use reqwest::Url;
|
||||
use storage_broker::Uri;
|
||||
use utils::id::{NodeId, TimelineId};
|
||||
use utils::logging::{LogFormat, SecretString};
|
||||
@@ -43,7 +43,7 @@ use crate::{TENANT_HEATMAP_BASENAME, TENANT_LOCATION_CONFIG_NAME, virtual_file};
|
||||
///
|
||||
/// For fields that require additional validation or filling in of defaults at runtime,
|
||||
/// check for examples in the [`PageServerConf::parse_and_validate`] method.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct PageServerConf {
|
||||
// Identifier of that particular pageserver so e g safekeepers
|
||||
// can safely distinguish different pageservers
|
||||
@@ -56,17 +56,8 @@ pub struct PageServerConf {
|
||||
/// Example: 127.0.0.1:9899
|
||||
pub listen_https_addr: Option<String>,
|
||||
|
||||
/// Path to a file with certificate's private key for https API.
|
||||
/// Default: server.key
|
||||
pub ssl_key_file: Utf8PathBuf,
|
||||
/// Path to a file with a X509 certificate for https API.
|
||||
/// Default: server.crt
|
||||
pub ssl_cert_file: Utf8PathBuf,
|
||||
/// Period to reload certificate and private key from files.
|
||||
/// Default: 60s.
|
||||
pub ssl_cert_reload_period: Duration,
|
||||
/// Trusted root CA certificates to use in https APIs.
|
||||
pub ssl_ca_certs: Vec<Certificate>,
|
||||
|
||||
/// Current availability zone. Used for traffic metrics.
|
||||
pub availability_zone: Option<String>,
|
||||
@@ -215,8 +206,6 @@ pub struct PageServerConf {
|
||||
|
||||
/// When set, include visible layers in the next uploaded heatmaps of an unarchived timeline.
|
||||
pub generate_unarchival_heatmap: bool,
|
||||
|
||||
pub tracing: Option<pageserver_api::config::Tracing>,
|
||||
}
|
||||
|
||||
/// Token for authentication to safekeepers
|
||||
@@ -336,8 +325,6 @@ impl PageServerConf {
|
||||
listen_https_addr,
|
||||
ssl_key_file,
|
||||
ssl_cert_file,
|
||||
ssl_cert_reload_period,
|
||||
ssl_ca_file,
|
||||
availability_zone,
|
||||
wait_lsn_timeout,
|
||||
wal_redo_timeout,
|
||||
@@ -388,7 +375,6 @@ impl PageServerConf {
|
||||
validate_wal_contiguity,
|
||||
load_previous_heatmap,
|
||||
generate_unarchival_heatmap,
|
||||
tracing,
|
||||
} = config_toml;
|
||||
|
||||
let mut conf = PageServerConf {
|
||||
@@ -400,7 +386,6 @@ impl PageServerConf {
|
||||
listen_https_addr,
|
||||
ssl_key_file,
|
||||
ssl_cert_file,
|
||||
ssl_cert_reload_period,
|
||||
availability_zone,
|
||||
wait_lsn_timeout,
|
||||
wal_redo_timeout,
|
||||
@@ -438,7 +423,6 @@ impl PageServerConf {
|
||||
wal_receiver_protocol,
|
||||
page_service_pipelining,
|
||||
get_vectored_concurrent_io,
|
||||
tracing,
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// fields that require additional validation or custom handling
|
||||
@@ -485,13 +469,6 @@ impl PageServerConf {
|
||||
validate_wal_contiguity: validate_wal_contiguity.unwrap_or(false),
|
||||
load_previous_heatmap: load_previous_heatmap.unwrap_or(true),
|
||||
generate_unarchival_heatmap: generate_unarchival_heatmap.unwrap_or(true),
|
||||
ssl_ca_certs: match ssl_ca_file {
|
||||
Some(ssl_ca_file) => {
|
||||
let buf = std::fs::read(ssl_ca_file)?;
|
||||
Certificate::from_pem_bundle(&buf)?
|
||||
}
|
||||
None => Vec::new(),
|
||||
},
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------
|
||||
@@ -510,17 +487,6 @@ impl PageServerConf {
|
||||
);
|
||||
}
|
||||
|
||||
if let Some(tracing_config) = conf.tracing.as_ref() {
|
||||
let ratio = &tracing_config.sampling_ratio;
|
||||
ensure!(
|
||||
ratio.denominator != 0 && ratio.denominator >= ratio.numerator,
|
||||
format!(
|
||||
"Invalid sampling ratio: {}/{}",
|
||||
ratio.numerator, ratio.denominator
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
IndexEntry::validate_checkpoint_distance(conf.default_tenant_conf.checkpoint_distance)
|
||||
.map_err(anyhow::Error::msg)
|
||||
.with_context(|| {
|
||||
|
||||
@@ -100,12 +100,6 @@ use crate::{
|
||||
task_mgr::TaskKind,
|
||||
tenant::Timeline,
|
||||
};
|
||||
use futures::FutureExt;
|
||||
use futures::future::BoxFuture;
|
||||
use std::future::Future;
|
||||
use tracing_utils::perf_span::{PerfInstrument, PerfSpan};
|
||||
|
||||
use tracing::{Dispatch, Span};
|
||||
|
||||
// The main structure of this module, see module-level comment.
|
||||
pub struct RequestContext {
|
||||
@@ -115,8 +109,6 @@ pub struct RequestContext {
|
||||
page_content_kind: PageContentKind,
|
||||
read_path_debug: bool,
|
||||
scope: Scope,
|
||||
perf_span: Option<PerfSpan>,
|
||||
perf_span_dispatch: Option<Dispatch>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
@@ -271,15 +263,22 @@ impl RequestContextBuilder {
|
||||
page_content_kind: PageContentKind::Unknown,
|
||||
read_path_debug: false,
|
||||
scope: Scope::new_global(),
|
||||
perf_span: None,
|
||||
perf_span_dispatch: None,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from(original: &RequestContext) -> Self {
|
||||
pub fn extend(original: &RequestContext) -> Self {
|
||||
Self {
|
||||
inner: original.clone(),
|
||||
// This is like a Copy, but avoid implementing Copy because ordinary users of
|
||||
// RequestContext should always move or ref it.
|
||||
inner: RequestContext {
|
||||
task_kind: original.task_kind,
|
||||
download_behavior: original.download_behavior,
|
||||
access_stats_behavior: original.access_stats_behavior,
|
||||
page_content_kind: original.page_content_kind,
|
||||
read_path_debug: original.read_path_debug,
|
||||
scope: original.scope.clone(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -317,74 +316,12 @@ impl RequestContextBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
pub(crate) fn perf_span_dispatch(mut self, dispatch: Option<Dispatch>) -> Self {
|
||||
self.inner.perf_span_dispatch = dispatch;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn root_perf_span<Fn>(mut self, make_span: Fn) -> Self
|
||||
where
|
||||
Fn: FnOnce() -> Span,
|
||||
{
|
||||
assert!(self.inner.perf_span.is_none());
|
||||
assert!(self.inner.perf_span_dispatch.is_some());
|
||||
|
||||
let dispatcher = self.inner.perf_span_dispatch.as_ref().unwrap();
|
||||
let new_span = tracing::dispatcher::with_default(dispatcher, make_span);
|
||||
|
||||
self.inner.perf_span = Some(PerfSpan::new(new_span, dispatcher.clone()));
|
||||
|
||||
self
|
||||
}
|
||||
|
||||
pub fn perf_span<Fn>(mut self, make_span: Fn) -> Self
|
||||
where
|
||||
Fn: FnOnce(&Span) -> Span,
|
||||
{
|
||||
if let Some(ref perf_span) = self.inner.perf_span {
|
||||
assert!(self.inner.perf_span_dispatch.is_some());
|
||||
let dispatcher = self.inner.perf_span_dispatch.as_ref().unwrap();
|
||||
|
||||
let new_span =
|
||||
tracing::dispatcher::with_default(dispatcher, || make_span(perf_span.inner()));
|
||||
|
||||
self.inner.perf_span = Some(PerfSpan::new(new_span, dispatcher.clone()));
|
||||
}
|
||||
|
||||
self
|
||||
}
|
||||
|
||||
pub fn root(self) -> RequestContext {
|
||||
self.inner
|
||||
}
|
||||
|
||||
pub fn attached_child(self) -> RequestContext {
|
||||
self.inner
|
||||
}
|
||||
|
||||
pub fn detached_child(self) -> RequestContext {
|
||||
pub fn build(self) -> RequestContext {
|
||||
self.inner
|
||||
}
|
||||
}
|
||||
|
||||
impl RequestContext {
|
||||
/// Private clone implementation
|
||||
///
|
||||
/// Callers should use the [`RequestContextBuilder`] or child spaning APIs of
|
||||
/// [`RequestContext`].
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
task_kind: self.task_kind,
|
||||
download_behavior: self.download_behavior,
|
||||
access_stats_behavior: self.access_stats_behavior,
|
||||
page_content_kind: self.page_content_kind,
|
||||
read_path_debug: self.read_path_debug,
|
||||
scope: self.scope.clone(),
|
||||
perf_span: self.perf_span.clone(),
|
||||
perf_span_dispatch: self.perf_span_dispatch.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new RequestContext that has no parent.
|
||||
///
|
||||
/// The function is called `new` because, once we add children
|
||||
@@ -400,7 +337,7 @@ impl RequestContext {
|
||||
pub fn new(task_kind: TaskKind, download_behavior: DownloadBehavior) -> Self {
|
||||
RequestContextBuilder::new(task_kind)
|
||||
.download_behavior(download_behavior)
|
||||
.root()
|
||||
.build()
|
||||
}
|
||||
|
||||
/// Create a detached child context for a task that may outlive `self`.
|
||||
@@ -421,10 +358,7 @@ impl RequestContext {
|
||||
///
|
||||
/// We could make new calls to this function fail if `self` is already canceled.
|
||||
pub fn detached_child(&self, task_kind: TaskKind, download_behavior: DownloadBehavior) -> Self {
|
||||
RequestContextBuilder::from(self)
|
||||
.task_kind(task_kind)
|
||||
.download_behavior(download_behavior)
|
||||
.detached_child()
|
||||
self.child_impl(task_kind, download_behavior)
|
||||
}
|
||||
|
||||
/// Create a child of context `self` for a task that shall not outlive `self`.
|
||||
@@ -448,7 +382,7 @@ impl RequestContext {
|
||||
/// The method to wait for child tasks would return an error, indicating
|
||||
/// that the child task was not started because the context was canceled.
|
||||
pub fn attached_child(&self) -> Self {
|
||||
RequestContextBuilder::from(self).attached_child()
|
||||
self.child_impl(self.task_kind(), self.download_behavior())
|
||||
}
|
||||
|
||||
/// Use this function when you should be creating a child context using
|
||||
@@ -463,10 +397,17 @@ impl RequestContext {
|
||||
Self::new(task_kind, download_behavior)
|
||||
}
|
||||
|
||||
fn child_impl(&self, task_kind: TaskKind, download_behavior: DownloadBehavior) -> Self {
|
||||
RequestContextBuilder::extend(self)
|
||||
.task_kind(task_kind)
|
||||
.download_behavior(download_behavior)
|
||||
.build()
|
||||
}
|
||||
|
||||
pub fn with_scope_timeline(&self, timeline: &Arc<Timeline>) -> Self {
|
||||
RequestContextBuilder::from(self)
|
||||
RequestContextBuilder::extend(self)
|
||||
.scope(Scope::new_timeline(timeline))
|
||||
.attached_child()
|
||||
.build()
|
||||
}
|
||||
|
||||
pub(crate) fn with_scope_page_service_pagestream(
|
||||
@@ -475,9 +416,9 @@ impl RequestContext {
|
||||
crate::page_service::TenantManagerTypes,
|
||||
>,
|
||||
) -> Self {
|
||||
RequestContextBuilder::from(self)
|
||||
RequestContextBuilder::extend(self)
|
||||
.scope(Scope::new_page_service_pagestream(timeline_handle))
|
||||
.attached_child()
|
||||
.build()
|
||||
}
|
||||
|
||||
pub fn with_scope_secondary_timeline(
|
||||
@@ -485,30 +426,28 @@ impl RequestContext {
|
||||
tenant_shard_id: &TenantShardId,
|
||||
timeline_id: &TimelineId,
|
||||
) -> Self {
|
||||
RequestContextBuilder::from(self)
|
||||
RequestContextBuilder::extend(self)
|
||||
.scope(Scope::new_secondary_timeline(tenant_shard_id, timeline_id))
|
||||
.attached_child()
|
||||
.build()
|
||||
}
|
||||
|
||||
pub fn with_scope_secondary_tenant(&self, tenant_shard_id: &TenantShardId) -> Self {
|
||||
RequestContextBuilder::from(self)
|
||||
RequestContextBuilder::extend(self)
|
||||
.scope(Scope::new_secondary_tenant(tenant_shard_id))
|
||||
.attached_child()
|
||||
.build()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn with_scope_unit_test(&self) -> Self {
|
||||
RequestContextBuilder::from(self)
|
||||
.task_kind(TaskKind::UnitTest)
|
||||
RequestContextBuilder::new(TaskKind::UnitTest)
|
||||
.scope(Scope::new_unit_test())
|
||||
.attached_child()
|
||||
.build()
|
||||
}
|
||||
|
||||
pub fn with_scope_debug_tools(&self) -> Self {
|
||||
RequestContextBuilder::from(self)
|
||||
.task_kind(TaskKind::DebugTool)
|
||||
RequestContextBuilder::new(TaskKind::DebugTool)
|
||||
.scope(Scope::new_debug_tools())
|
||||
.attached_child()
|
||||
.build()
|
||||
}
|
||||
|
||||
pub fn task_kind(&self) -> TaskKind {
|
||||
@@ -565,61 +504,4 @@ impl RequestContext {
|
||||
Scope::DebugTools { io_size_metrics } => io_size_metrics,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn perf_follows_from(&self, from: &RequestContext) {
|
||||
if let (Some(span), Some(from_span)) = (&self.perf_span, &from.perf_span) {
|
||||
span.inner().follows_from(from_span.inner());
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn perf_span_record<
|
||||
Q: tracing::field::AsField + ?Sized,
|
||||
V: tracing::field::Value,
|
||||
>(
|
||||
&self,
|
||||
field: &Q,
|
||||
value: V,
|
||||
) {
|
||||
if let Some(span) = &self.perf_span {
|
||||
span.record(field, value);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn has_perf_span(&self) -> bool {
|
||||
self.perf_span.is_some()
|
||||
}
|
||||
}
|
||||
|
||||
/// [`Future`] extension trait that allow for creating performance
|
||||
/// spans on sampled requests
|
||||
pub(crate) trait PerfInstrumentFutureExt<'a>: Future + Send {
|
||||
/// Instrument this future with a new performance span when the
|
||||
/// provided request context indicates the originator request
|
||||
/// was sampled. Otherwise, just box the future and return it as is.
|
||||
fn maybe_perf_instrument<Fn>(
|
||||
self,
|
||||
ctx: &RequestContext,
|
||||
make_span: Fn,
|
||||
) -> BoxFuture<'a, Self::Output>
|
||||
where
|
||||
Self: Sized + 'a,
|
||||
Fn: FnOnce(&Span) -> Span,
|
||||
{
|
||||
match &ctx.perf_span {
|
||||
Some(perf_span) => {
|
||||
assert!(ctx.perf_span_dispatch.is_some());
|
||||
let dispatcher = ctx.perf_span_dispatch.as_ref().unwrap();
|
||||
|
||||
let new_span =
|
||||
tracing::dispatcher::with_default(dispatcher, || make_span(perf_span.inner()));
|
||||
|
||||
let new_perf_span = PerfSpan::new(new_span, dispatcher.clone());
|
||||
self.instrument(new_perf_span).boxed()
|
||||
}
|
||||
None => self.boxed(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Implement the trait for all types that satisfy the trait bounds
|
||||
impl<'a, T: Future + Send + 'a> PerfInstrumentFutureExt<'a> for T {}
|
||||
|
||||
@@ -50,13 +50,10 @@ pub trait StorageControllerUpcallApi {
|
||||
impl StorageControllerUpcallClient {
|
||||
/// A None return value indicates that the input `conf` object does not have control
|
||||
/// plane API enabled.
|
||||
pub fn new(
|
||||
conf: &'static PageServerConf,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<Option<Self>, reqwest::Error> {
|
||||
pub fn new(conf: &'static PageServerConf, cancel: &CancellationToken) -> Option<Self> {
|
||||
let mut url = match conf.control_plane_api.as_ref() {
|
||||
Some(u) => u.clone(),
|
||||
None => return Ok(None),
|
||||
None => return None,
|
||||
};
|
||||
|
||||
if let Ok(mut segs) = url.path_segments_mut() {
|
||||
@@ -76,16 +73,12 @@ impl StorageControllerUpcallClient {
|
||||
client = client.default_headers(headers);
|
||||
}
|
||||
|
||||
for ssl_ca_cert in &conf.ssl_ca_certs {
|
||||
client = client.add_root_certificate(ssl_ca_cert.clone());
|
||||
}
|
||||
|
||||
Ok(Some(Self {
|
||||
http_client: client.build()?,
|
||||
Some(Self {
|
||||
http_client: client.build().expect("Failed to construct HTTP client"),
|
||||
base_url: url,
|
||||
node_id: conf.id,
|
||||
cancel: cancel.clone(),
|
||||
}))
|
||||
})
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
|
||||
@@ -669,13 +669,6 @@ paths:
|
||||
Detach a timeline from its ancestor and reparent all ancestors timelines with lower `ancestor_lsn`.
|
||||
Current implementation might not be retryable across failure cases, but will be enhanced in future.
|
||||
Detaching should be expected to be expensive operation. Timeouts should be retried.
|
||||
parameters:
|
||||
- name: detach_behavior
|
||||
in: query
|
||||
required: false
|
||||
schema:
|
||||
description: Currently valid values are `v1`, `v2`
|
||||
type: string
|
||||
responses:
|
||||
"200":
|
||||
description: |
|
||||
@@ -1086,7 +1079,6 @@ components:
|
||||
- last_record_lsn
|
||||
- disk_consistent_lsn
|
||||
- state
|
||||
- min_readable_lsn
|
||||
properties:
|
||||
timeline_id:
|
||||
type: string
|
||||
@@ -1133,40 +1125,6 @@ components:
|
||||
applied_gc_cutoff_lsn:
|
||||
type: string
|
||||
format: hex
|
||||
safekeepers:
|
||||
$ref: "#/components/schemas/TimelineSafekeepersInfo"
|
||||
|
||||
TimelineSafekeepersInfo:
|
||||
type: object
|
||||
required:
|
||||
- tenant_id
|
||||
- timeline_id
|
||||
- generation
|
||||
- safekeepers
|
||||
properties:
|
||||
tenant_id:
|
||||
type: string
|
||||
format: hex
|
||||
timeline_id:
|
||||
type: string
|
||||
format: hex
|
||||
generation:
|
||||
type: integer
|
||||
safekeepers:
|
||||
type: array
|
||||
items:
|
||||
$ref: "#/components/schemas/TimelineSafekeeperInfo"
|
||||
|
||||
TimelineSafekeeperInfo:
|
||||
type: object
|
||||
required:
|
||||
- id
|
||||
- hostname
|
||||
properties:
|
||||
id:
|
||||
type: integer
|
||||
hostname:
|
||||
type: string
|
||||
|
||||
SyntheticSizeResponse:
|
||||
type: object
|
||||
|
||||
@@ -37,8 +37,8 @@ use pageserver_api::models::{
|
||||
TenantShardSplitResponse, TenantSorting, TenantState, TenantWaitLsnRequest,
|
||||
TimelineArchivalConfigRequest, TimelineCreateRequest, TimelineCreateRequestMode,
|
||||
TimelineCreateRequestModeImportPgdata, TimelineGcRequest, TimelineInfo,
|
||||
TimelinePatchIndexPartRequest, TimelineVisibilityState, TimelinesInfoAndOffloaded,
|
||||
TopTenantShardItem, TopTenantShardsRequest, TopTenantShardsResponse,
|
||||
TimelinePatchIndexPartRequest, TimelinesInfoAndOffloaded, TopTenantShardItem,
|
||||
TopTenantShardsRequest, TopTenantShardsResponse,
|
||||
};
|
||||
use pageserver_api::shard::{ShardCount, TenantShardId};
|
||||
use remote_storage::{DownloadError, GenericRemoteStorage, TimeTravelError};
|
||||
@@ -74,8 +74,8 @@ use crate::tenant::size::ModelInputs;
|
||||
use crate::tenant::storage_layer::{IoConcurrency, LayerAccessStatsReset, LayerName};
|
||||
use crate::tenant::timeline::offload::{OffloadError, offload_timeline};
|
||||
use crate::tenant::timeline::{
|
||||
CompactFlags, CompactOptions, CompactRequest, CompactionError, MarkInvisibleRequest, Timeline,
|
||||
WaitLsnTimeout, WaitLsnWaiter, import_pgdata,
|
||||
CompactFlags, CompactOptions, CompactRequest, CompactionError, Timeline, WaitLsnTimeout,
|
||||
WaitLsnWaiter, import_pgdata,
|
||||
};
|
||||
use crate::tenant::{
|
||||
GetTimelineError, LogicalSizeCalculationCause, OffloadedTimeline, PageReconstructError,
|
||||
@@ -439,15 +439,11 @@ async fn build_timeline_info_common(
|
||||
let remote_consistent_lsn_visible = timeline
|
||||
.get_remote_consistent_lsn_visible()
|
||||
.unwrap_or(Lsn(0));
|
||||
let is_invisible = timeline.remote_client.is_invisible().unwrap_or(false);
|
||||
|
||||
let walreceiver_status = timeline.walreceiver_status();
|
||||
|
||||
let (pitr_history_size, within_ancestor_pitr) = timeline.get_pitr_history_stats();
|
||||
|
||||
// Externally, expose the lowest LSN that can be used to create a branch.
|
||||
// Internally we distinguish between the planned GC cutoff (PITR point) and the "applied" GC cutoff (where we
|
||||
// actually trimmed data to), which can pass each other when PITR is changed.
|
||||
let min_readable_lsn = std::cmp::max(
|
||||
timeline.get_gc_cutoff_lsn(),
|
||||
*timeline.get_applied_gc_cutoff_lsn(),
|
||||
@@ -464,6 +460,7 @@ async fn build_timeline_info_common(
|
||||
initdb_lsn,
|
||||
last_record_lsn,
|
||||
prev_record_lsn: Some(timeline.get_prev_record_lsn()),
|
||||
_unused: Default::default(), // Unused, for legacy decode only
|
||||
min_readable_lsn,
|
||||
applied_gc_cutoff_lsn: *timeline.get_applied_gc_cutoff_lsn(),
|
||||
current_logical_size: current_logical_size.size_dont_care_about_accuracy(),
|
||||
@@ -485,7 +482,6 @@ async fn build_timeline_info_common(
|
||||
state,
|
||||
is_archived: Some(is_archived),
|
||||
rel_size_migration: Some(timeline.get_rel_size_v2_status()),
|
||||
is_invisible: Some(is_invisible),
|
||||
|
||||
walreceiver_status,
|
||||
};
|
||||
@@ -2258,6 +2254,7 @@ async fn timeline_compact_handler(
|
||||
let state = get_state(&request);
|
||||
|
||||
let mut flags = EnumSet::empty();
|
||||
flags |= CompactFlags::NoYield; // run compaction to completion
|
||||
|
||||
if Some(true) == parse_query_param::<_, bool>(&request, "force_l0_compaction")? {
|
||||
flags |= CompactFlags::ForceL0Compaction;
|
||||
@@ -2336,38 +2333,6 @@ async fn timeline_compact_handler(
|
||||
.await
|
||||
}
|
||||
|
||||
async fn timeline_mark_invisible_handler(
|
||||
mut request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
|
||||
let compact_request = json_request_maybe::<Option<MarkInvisibleRequest>>(&mut request).await?;
|
||||
|
||||
let state = get_state(&request);
|
||||
|
||||
let visibility = match compact_request {
|
||||
Some(req) => match req.is_visible {
|
||||
Some(true) => TimelineVisibilityState::Visible,
|
||||
Some(false) | None => TimelineVisibilityState::Invisible,
|
||||
},
|
||||
None => TimelineVisibilityState::Invisible,
|
||||
};
|
||||
|
||||
async {
|
||||
let tenant = state
|
||||
.tenant_manager
|
||||
.get_attached_tenant_shard(tenant_shard_id)?;
|
||||
let timeline = tenant.get_timeline(timeline_id, true)?;
|
||||
timeline.remote_client.schedule_index_upload_for_timeline_invisible_state(visibility).map_err(ApiError::InternalServerError)?;
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
.instrument(info_span!("manual_timeline_mark_invisible", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
|
||||
.await
|
||||
}
|
||||
|
||||
// Run offload immediately on given timeline.
|
||||
async fn timeline_offload_handler(
|
||||
request: Request<Body>,
|
||||
@@ -2428,6 +2393,7 @@ async fn timeline_checkpoint_handler(
|
||||
let state = get_state(&request);
|
||||
|
||||
let mut flags = EnumSet::empty();
|
||||
flags |= CompactFlags::NoYield; // run compaction to completion
|
||||
if Some(true) == parse_query_param::<_, bool>(&request, "force_l0_compaction")? {
|
||||
flags |= CompactFlags::ForceL0Compaction;
|
||||
}
|
||||
@@ -2697,12 +2663,11 @@ async fn getpage_at_lsn_handler_inner(
|
||||
let lsn: Option<Lsn> = parse_query_param(&request, "lsn")?;
|
||||
|
||||
async {
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
|
||||
// Enable read path debugging
|
||||
let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id).await?;
|
||||
let ctx = RequestContextBuilder::new(TaskKind::MgmtRequest)
|
||||
.download_behavior(DownloadBehavior::Download)
|
||||
.scope(context::Scope::new_timeline(&timeline))
|
||||
.read_path_debug(true)
|
||||
.root();
|
||||
let ctx = RequestContextBuilder::extend(&ctx).read_path_debug(true)
|
||||
.scope(context::Scope::new_timeline(&timeline)).build();
|
||||
|
||||
// Use last_record_lsn if no lsn is provided
|
||||
let lsn = lsn.unwrap_or_else(|| timeline.get_last_record_lsn());
|
||||
@@ -3189,8 +3154,7 @@ async fn list_aux_files(
|
||||
timeline.gate.enter().map_err(|_| ApiError::Cancelled)?,
|
||||
);
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download)
|
||||
.with_scope_timeline(&timeline);
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
|
||||
let files = timeline
|
||||
.list_aux_files(body.lsn, &ctx, io_concurrency)
|
||||
.await?;
|
||||
@@ -3434,15 +3398,14 @@ async fn put_tenant_timeline_import_wal(
|
||||
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);
|
||||
|
||||
let span = info_span!("import_wal", tenant_id=%tenant_id, timeline_id=%timeline_id, start_lsn=%start_lsn, end_lsn=%end_lsn);
|
||||
async move {
|
||||
let state = get_state(&request);
|
||||
|
||||
let timeline = active_timeline_of_active_tenant(&state.tenant_manager, TenantShardId::unsharded(tenant_id), timeline_id).await?;
|
||||
let ctx = RequestContextBuilder::new(TaskKind::MgmtRequest)
|
||||
.download_behavior(DownloadBehavior::Warn)
|
||||
.scope(context::Scope::new_timeline(&timeline))
|
||||
.root();
|
||||
let ctx = RequestContextBuilder::extend(&ctx).scope(context::Scope::new_timeline(&timeline)).build();
|
||||
|
||||
let mut body = StreamReader::new(request.into_body().map(|res| {
|
||||
res.map_err(|error| {
|
||||
@@ -3787,10 +3750,6 @@ pub fn make_router(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/offload",
|
||||
|r| testing_api_handler("attempt timeline offload", r, timeline_offload_handler),
|
||||
)
|
||||
.put(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/mark_invisible",
|
||||
|r| api_handler( r, timeline_mark_invisible_handler),
|
||||
)
|
||||
.put(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/checkpoint",
|
||||
|r| testing_api_handler("run timeline checkpoint", r, timeline_checkpoint_handler),
|
||||
|
||||
@@ -55,9 +55,6 @@ pub const DEFAULT_PG_VERSION: u32 = 16;
|
||||
pub const IMAGE_FILE_MAGIC: u16 = 0x5A60;
|
||||
pub const DELTA_FILE_MAGIC: u16 = 0x5A61;
|
||||
|
||||
// Target used for performance traces.
|
||||
pub const PERF_TRACE_TARGET: &str = "P";
|
||||
|
||||
static ZERO_PAGE: bytes::Bytes = bytes::Bytes::from_static(&[0u8; 8192]);
|
||||
|
||||
pub use crate::metrics::preinitialize_metrics;
|
||||
|
||||
@@ -10,7 +10,7 @@ use std::time::{Duration, Instant};
|
||||
use enum_map::{Enum as _, EnumMap};
|
||||
use futures::Future;
|
||||
use metrics::{
|
||||
Counter, CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
|
||||
Counter, CounterVec, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
|
||||
IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
|
||||
register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
|
||||
register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
|
||||
@@ -499,6 +499,15 @@ pub(crate) static WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS: Lazy<IntCounter> = Lazy::n
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
static FLUSH_WAIT_UPLOAD_TIME: Lazy<GaugeVec> = Lazy::new(|| {
|
||||
register_gauge_vec!(
|
||||
"pageserver_flush_wait_upload_seconds",
|
||||
"Time spent waiting for preceding uploads during layer flush",
|
||||
&["tenant_id", "shard_id", "timeline_id"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
static LAST_RECORD_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
|
||||
register_int_gauge_vec!(
|
||||
"pageserver_last_record_lsn",
|
||||
@@ -2855,6 +2864,7 @@ pub(crate) struct TimelineMetrics {
|
||||
timeline_id: String,
|
||||
pub flush_time_histo: StorageTimeMetrics,
|
||||
pub flush_delay_histo: StorageTimeMetrics,
|
||||
pub flush_wait_upload_time_gauge: Gauge,
|
||||
pub compact_time_histo: StorageTimeMetrics,
|
||||
pub create_images_time_histo: StorageTimeMetrics,
|
||||
pub logical_size_histo: StorageTimeMetrics,
|
||||
@@ -2906,6 +2916,9 @@ impl TimelineMetrics {
|
||||
&shard_id,
|
||||
&timeline_id,
|
||||
);
|
||||
let flush_wait_upload_time_gauge = FLUSH_WAIT_UPLOAD_TIME
|
||||
.get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
|
||||
.unwrap();
|
||||
let compact_time_histo = StorageTimeMetrics::new(
|
||||
StorageTimeOperation::Compact,
|
||||
&tenant_id,
|
||||
@@ -3033,6 +3046,7 @@ impl TimelineMetrics {
|
||||
timeline_id,
|
||||
flush_time_histo,
|
||||
flush_delay_histo,
|
||||
flush_wait_upload_time_gauge,
|
||||
compact_time_histo,
|
||||
create_images_time_histo,
|
||||
logical_size_histo,
|
||||
@@ -3082,6 +3096,14 @@ impl TimelineMetrics {
|
||||
self.resident_physical_size_gauge.get()
|
||||
}
|
||||
|
||||
pub(crate) fn flush_wait_upload_time_gauge_add(&self, duration: f64) {
|
||||
self.flush_wait_upload_time_gauge.add(duration);
|
||||
crate::metrics::FLUSH_WAIT_UPLOAD_TIME
|
||||
.get_metric_with_label_values(&[&self.tenant_id, &self.shard_id, &self.timeline_id])
|
||||
.unwrap()
|
||||
.add(duration);
|
||||
}
|
||||
|
||||
/// Generates TIMELINE_LAYER labels for a persistent layer.
|
||||
fn make_layer_labels(&self, layer_desc: &PersistentLayerDesc) -> [&str; 5] {
|
||||
let level = match LayerMap::is_l0(&layer_desc.key_range, layer_desc.is_delta()) {
|
||||
@@ -3185,6 +3207,7 @@ impl TimelineMetrics {
|
||||
let shard_id = &self.shard_id;
|
||||
let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
|
||||
let _ = DISK_CONSISTENT_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
|
||||
let _ = FLUSH_WAIT_UPLOAD_TIME.remove_label_values(&[tenant_id, shard_id, timeline_id]);
|
||||
let _ = STANDBY_HORIZON.remove_label_values(&[tenant_id, shard_id, timeline_id]);
|
||||
{
|
||||
RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());
|
||||
|
||||
@@ -9,7 +9,6 @@ use std::sync::Arc;
|
||||
use std::time::{Duration, Instant, SystemTime};
|
||||
use std::{io, str};
|
||||
|
||||
use crate::PERF_TRACE_TARGET;
|
||||
use anyhow::{Context, bail};
|
||||
use async_compression::tokio::write::GzipEncoder;
|
||||
use bytes::Buf;
|
||||
@@ -18,7 +17,7 @@ use itertools::Itertools;
|
||||
use once_cell::sync::OnceCell;
|
||||
use pageserver_api::config::{
|
||||
PageServicePipeliningConfig, PageServicePipeliningConfigPipelined,
|
||||
PageServiceProtocolPipelinedExecutionStrategy, Tracing,
|
||||
PageServiceProtocolPipelinedExecutionStrategy,
|
||||
};
|
||||
use pageserver_api::key::rel_block_to_key;
|
||||
use pageserver_api::models::{
|
||||
@@ -37,7 +36,6 @@ use postgres_ffi::BLCKSZ;
|
||||
use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
|
||||
use pq_proto::framed::ConnectionError;
|
||||
use pq_proto::{BeMessage, FeMessage, FeStartupPacket, RowDescriptor};
|
||||
use rand::Rng;
|
||||
use strum_macros::IntoStaticStr;
|
||||
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt, BufWriter};
|
||||
use tokio::task::JoinHandle;
|
||||
@@ -55,9 +53,7 @@ use utils::sync::spsc_fold;
|
||||
use crate::auth::check_permission;
|
||||
use crate::basebackup::BasebackupError;
|
||||
use crate::config::PageServerConf;
|
||||
use crate::context::{
|
||||
DownloadBehavior, PerfInstrumentFutureExt, RequestContext, RequestContextBuilder,
|
||||
};
|
||||
use crate::context::{DownloadBehavior, RequestContext};
|
||||
use crate::metrics::{
|
||||
self, COMPUTE_COMMANDS_COUNTERS, ComputeCommandKind, LIVE_CONNECTIONS, SmgrOpTimer,
|
||||
TimelineMetrics,
|
||||
@@ -104,7 +100,6 @@ pub fn spawn(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
pg_auth: Option<Arc<SwappableJwtAuth>>,
|
||||
perf_trace_dispatch: Option<Dispatch>,
|
||||
tcp_listener: tokio::net::TcpListener,
|
||||
) -> Listener {
|
||||
let cancel = CancellationToken::new();
|
||||
@@ -122,7 +117,6 @@ pub fn spawn(
|
||||
conf,
|
||||
tenant_manager,
|
||||
pg_auth,
|
||||
perf_trace_dispatch,
|
||||
tcp_listener,
|
||||
conf.pg_auth_type,
|
||||
conf.page_service_pipelining.clone(),
|
||||
@@ -179,7 +173,6 @@ pub async fn libpq_listener_main(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
perf_trace_dispatch: Option<Dispatch>,
|
||||
listener: tokio::net::TcpListener,
|
||||
auth_type: AuthType,
|
||||
pipelining_config: PageServicePipeliningConfig,
|
||||
@@ -212,12 +205,8 @@ pub async fn libpq_listener_main(
|
||||
// Connection established. Spawn a new task to handle it.
|
||||
debug!("accepted connection from {}", peer_addr);
|
||||
let local_auth = auth.clone();
|
||||
let connection_ctx = RequestContextBuilder::from(&listener_ctx)
|
||||
.task_kind(TaskKind::PageRequestHandler)
|
||||
.download_behavior(DownloadBehavior::Download)
|
||||
.perf_span_dispatch(perf_trace_dispatch.clone())
|
||||
.detached_child();
|
||||
|
||||
let connection_ctx = listener_ctx
|
||||
.detached_child(TaskKind::PageRequestHandler, DownloadBehavior::Download);
|
||||
connection_handler_tasks.spawn(page_service_conn_main(
|
||||
conf,
|
||||
tenant_manager.clone(),
|
||||
@@ -248,7 +237,7 @@ pub async fn libpq_listener_main(
|
||||
|
||||
type ConnectionHandlerResult = anyhow::Result<()>;
|
||||
|
||||
#[instrument(skip_all, fields(peer_addr, application_name, compute_mode))]
|
||||
#[instrument(skip_all, fields(peer_addr, application_name))]
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn page_service_conn_main(
|
||||
conf: &'static PageServerConf,
|
||||
@@ -618,7 +607,6 @@ impl std::fmt::Display for BatchedPageStreamError {
|
||||
struct BatchedGetPageRequest {
|
||||
req: PagestreamGetPageRequest,
|
||||
timer: SmgrOpTimer,
|
||||
ctx: RequestContext,
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
@@ -755,7 +743,6 @@ impl PageServerHandler {
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
timeline_handles: &mut TimelineHandles,
|
||||
tracing_config: Option<&Tracing>,
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
protocol_version: PagestreamProtocolVersion,
|
||||
@@ -915,51 +902,10 @@ impl PageServerHandler {
|
||||
}
|
||||
|
||||
let key = rel_block_to_key(req.rel, req.blkno);
|
||||
|
||||
let sampled = match tracing_config {
|
||||
Some(conf) => {
|
||||
let ratio = &conf.sampling_ratio;
|
||||
|
||||
if ratio.numerator == 0 {
|
||||
false
|
||||
} else {
|
||||
rand::thread_rng().gen_range(0..ratio.denominator) < ratio.numerator
|
||||
}
|
||||
}
|
||||
None => false,
|
||||
};
|
||||
|
||||
let ctx = if sampled {
|
||||
RequestContextBuilder::from(ctx)
|
||||
.root_perf_span(|| {
|
||||
info_span!(
|
||||
target: PERF_TRACE_TARGET,
|
||||
"GET_PAGE",
|
||||
tenant_id = %tenant_id,
|
||||
shard_id = field::Empty,
|
||||
timeline_id = %timeline_id,
|
||||
lsn = %req.hdr.request_lsn,
|
||||
request_id = %req.hdr.reqid,
|
||||
key = %key,
|
||||
)
|
||||
})
|
||||
.attached_child()
|
||||
} else {
|
||||
ctx.attached_child()
|
||||
};
|
||||
|
||||
let res = timeline_handles
|
||||
let shard = match timeline_handles
|
||||
.get(tenant_id, timeline_id, ShardSelector::Page(key))
|
||||
.maybe_perf_instrument(&ctx, |current_perf_span| {
|
||||
info_span!(
|
||||
target: PERF_TRACE_TARGET,
|
||||
parent: current_perf_span,
|
||||
"SHARD_SELECTION",
|
||||
)
|
||||
})
|
||||
.await;
|
||||
|
||||
let shard = match res {
|
||||
.await
|
||||
{
|
||||
Ok(tl) => tl,
|
||||
Err(e) => {
|
||||
let span = mkspan!(before shard routing);
|
||||
@@ -986,60 +932,26 @@ impl PageServerHandler {
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// This ctx travels as part of the BatchedFeMessage through
|
||||
// batching into the request handler.
|
||||
// The request handler needs to do some per-request work
|
||||
// (relsize check) before dispatching the batch as a single
|
||||
// get_vectored call to the Timeline.
|
||||
// This ctx will be used for the reslize check, whereas the
|
||||
// get_vectored call will be a different ctx with separate
|
||||
// perf span.
|
||||
let ctx = ctx.with_scope_page_service_pagestream(&shard);
|
||||
|
||||
// Similar game for this `span`: we funnel it through so that
|
||||
// request handler log messages contain the request-specific fields.
|
||||
let span = mkspan!(shard.tenant_shard_id.shard_slug());
|
||||
|
||||
// Enrich the perf span with shard_id now that shard routing is done.
|
||||
ctx.perf_span_record(
|
||||
"shard_id",
|
||||
tracing::field::display(shard.get_shard_identity().shard_slug()),
|
||||
);
|
||||
|
||||
let timer = record_op_start_and_throttle(
|
||||
&shard,
|
||||
metrics::SmgrQueryType::GetPageAtLsn,
|
||||
received_at,
|
||||
)
|
||||
.maybe_perf_instrument(&ctx, |current_perf_span| {
|
||||
info_span!(
|
||||
target: PERF_TRACE_TARGET,
|
||||
parent: current_perf_span,
|
||||
"THROTTLE",
|
||||
)
|
||||
})
|
||||
.await?;
|
||||
|
||||
// We're holding the Handle
|
||||
// TODO: if we actually need to wait for lsn here, it delays the entire batch which doesn't need to wait
|
||||
let res = Self::wait_or_get_last_lsn(
|
||||
let effective_request_lsn = match Self::wait_or_get_last_lsn(
|
||||
&shard,
|
||||
req.hdr.request_lsn,
|
||||
req.hdr.not_modified_since,
|
||||
&shard.get_applied_gc_cutoff_lsn(),
|
||||
&ctx,
|
||||
ctx,
|
||||
)
|
||||
.maybe_perf_instrument(&ctx, |current_perf_span| {
|
||||
info_span!(
|
||||
target: PERF_TRACE_TARGET,
|
||||
parent: current_perf_span,
|
||||
"WAIT_LSN",
|
||||
)
|
||||
})
|
||||
.await;
|
||||
|
||||
let effective_request_lsn = match res {
|
||||
// TODO: if we actually need to wait for lsn here, it delays the entire batch which doesn't need to wait
|
||||
.await
|
||||
{
|
||||
Ok(lsn) => lsn,
|
||||
Err(e) => {
|
||||
return respond_error!(span, e);
|
||||
@@ -1049,7 +961,7 @@ impl PageServerHandler {
|
||||
span,
|
||||
shard: shard.downgrade(),
|
||||
effective_request_lsn,
|
||||
pages: smallvec::smallvec![BatchedGetPageRequest { req, timer, ctx }],
|
||||
pages: smallvec::smallvec![BatchedGetPageRequest { req, timer }],
|
||||
}
|
||||
}
|
||||
#[cfg(feature = "testing")]
|
||||
@@ -1602,15 +1514,12 @@ impl PageServerHandler {
|
||||
IO: AsyncRead + AsyncWrite + Send + Sync + Unpin + 'static,
|
||||
{
|
||||
let cancel = self.cancel.clone();
|
||||
let tracing_config = self.conf.tracing.clone();
|
||||
|
||||
let err = loop {
|
||||
let msg = Self::pagestream_read_message(
|
||||
&mut pgb_reader,
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
&mut timeline_handles,
|
||||
tracing_config.as_ref(),
|
||||
&cancel,
|
||||
ctx,
|
||||
protocol_version,
|
||||
@@ -1744,8 +1653,6 @@ impl PageServerHandler {
|
||||
// Batcher
|
||||
//
|
||||
|
||||
let tracing_config = self.conf.tracing.clone();
|
||||
|
||||
let cancel_batcher = self.cancel.child_token();
|
||||
let (mut batch_tx, mut batch_rx) = spsc_fold::channel();
|
||||
let batcher = pipeline_stage!("batcher", cancel_batcher.clone(), move |cancel_batcher| {
|
||||
@@ -1759,7 +1666,6 @@ impl PageServerHandler {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
&mut timeline_handles,
|
||||
tracing_config.as_ref(),
|
||||
&cancel_batcher,
|
||||
&ctx,
|
||||
protocol_version,
|
||||
@@ -2098,9 +2004,7 @@ impl PageServerHandler {
|
||||
|
||||
let results = timeline
|
||||
.get_rel_page_at_lsn_batched(
|
||||
requests
|
||||
.iter()
|
||||
.map(|p| (&p.req.rel, &p.req.blkno, p.ctx.attached_child())),
|
||||
requests.iter().map(|p| (&p.req.rel, &p.req.blkno)),
|
||||
effective_lsn,
|
||||
io_concurrency,
|
||||
ctx,
|
||||
@@ -2608,58 +2512,6 @@ impl PageServiceCmd {
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse the startup options from the postgres wire protocol startup packet.
|
||||
///
|
||||
/// It takes a sequence of `-c option=X` or `-coption=X`. It parses the options string
|
||||
/// by best effort and returns all the options parsed (key-value pairs) and a bool indicating
|
||||
/// whether all options are successfully parsed. There could be duplicates in the options
|
||||
/// if the caller passed such parameters.
|
||||
fn parse_options(options: &str) -> (Vec<(String, String)>, bool) {
|
||||
let mut parsing_config = false;
|
||||
let mut has_error = false;
|
||||
let mut config = Vec::new();
|
||||
for item in options.split_whitespace() {
|
||||
if item == "-c" {
|
||||
if !parsing_config {
|
||||
parsing_config = true;
|
||||
} else {
|
||||
// "-c" followed with another "-c"
|
||||
tracing::warn!("failed to parse the startup options: {options}");
|
||||
has_error = true;
|
||||
break;
|
||||
}
|
||||
} else if item.starts_with("-c") || parsing_config {
|
||||
let Some((mut key, value)) = item.split_once('=') else {
|
||||
// "-c" followed with an invalid option
|
||||
tracing::warn!("failed to parse the startup options: {options}");
|
||||
has_error = true;
|
||||
break;
|
||||
};
|
||||
if !parsing_config {
|
||||
// Parse "-coptions=X"
|
||||
let Some(stripped_key) = key.strip_prefix("-c") else {
|
||||
tracing::warn!("failed to parse the startup options: {options}");
|
||||
has_error = true;
|
||||
break;
|
||||
};
|
||||
key = stripped_key;
|
||||
}
|
||||
config.push((key.to_string(), value.to_string()));
|
||||
parsing_config = false;
|
||||
} else {
|
||||
tracing::warn!("failed to parse the startup options: {options}");
|
||||
has_error = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if parsing_config {
|
||||
// "-c" without the option
|
||||
tracing::warn!("failed to parse the startup options: {options}");
|
||||
has_error = true;
|
||||
}
|
||||
(config, has_error)
|
||||
}
|
||||
|
||||
impl<IO> postgres_backend::Handler<IO> for PageServerHandler
|
||||
where
|
||||
IO: AsyncRead + AsyncWrite + Send + Sync + Unpin + 'static,
|
||||
@@ -2704,14 +2556,6 @@ where
|
||||
if let Some(app_name) = params.get("application_name") {
|
||||
Span::current().record("application_name", field::display(app_name));
|
||||
}
|
||||
if let Some(options) = params.get("options") {
|
||||
let (config, _) = parse_options(options);
|
||||
for (key, value) in config {
|
||||
if key == "neon.compute_mode" {
|
||||
Span::current().record("compute_mode", field::display(value));
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Ok(())
|
||||
@@ -2825,7 +2669,6 @@ where
|
||||
PageServiceCmd::Set => {
|
||||
// important because psycopg2 executes "SET datestyle TO 'ISO'"
|
||||
// on connect
|
||||
// TODO: allow setting options, i.e., application_name/compute_mode via SET commands
|
||||
pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
||||
}
|
||||
PageServiceCmd::LeaseLsn(LeaseLsnCmd {
|
||||
@@ -3100,46 +2943,4 @@ mod tests {
|
||||
let cmd = PageServiceCmd::parse(&format!("lease {tenant_id} {timeline_id} gzip 0/16ABCDE"));
|
||||
assert!(cmd.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_options() {
|
||||
let (config, has_error) = parse_options(" -c neon.compute_mode=primary ");
|
||||
assert!(!has_error);
|
||||
assert_eq!(
|
||||
config,
|
||||
vec![("neon.compute_mode".to_string(), "primary".to_string())]
|
||||
);
|
||||
|
||||
let (config, has_error) = parse_options(" -c neon.compute_mode=primary -c foo=bar ");
|
||||
assert!(!has_error);
|
||||
assert_eq!(
|
||||
config,
|
||||
vec![
|
||||
("neon.compute_mode".to_string(), "primary".to_string()),
|
||||
("foo".to_string(), "bar".to_string()),
|
||||
]
|
||||
);
|
||||
|
||||
let (config, has_error) = parse_options(" -c neon.compute_mode=primary -cfoo=bar");
|
||||
assert!(!has_error);
|
||||
assert_eq!(
|
||||
config,
|
||||
vec![
|
||||
("neon.compute_mode".to_string(), "primary".to_string()),
|
||||
("foo".to_string(), "bar".to_string()),
|
||||
]
|
||||
);
|
||||
|
||||
let (_, has_error) = parse_options("-c");
|
||||
assert!(has_error);
|
||||
|
||||
let (_, has_error) = parse_options("-c foo=bar -c -c");
|
||||
assert!(has_error);
|
||||
|
||||
let (_, has_error) = parse_options(" ");
|
||||
assert!(!has_error);
|
||||
|
||||
let (_, has_error) = parse_options(" -c neon.compute_mode");
|
||||
assert!(has_error);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
use std::collections::{BTreeMap, HashMap, HashSet, hash_map};
|
||||
use std::ops::{ControlFlow, Range};
|
||||
|
||||
use crate::PERF_TRACE_TARGET;
|
||||
use anyhow::{Context, ensure};
|
||||
use bytes::{Buf, Bytes, BytesMut};
|
||||
use enum_map::Enum;
|
||||
@@ -32,7 +31,7 @@ use postgres_ffi::{BLCKSZ, Oid, RepOriginId, TimestampTz, TransactionId};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use strum::IntoEnumIterator;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{debug, info, info_span, trace, warn};
|
||||
use tracing::{debug, info, trace, warn};
|
||||
use utils::bin_ser::{BeSer, DeserializeError};
|
||||
use utils::lsn::Lsn;
|
||||
use utils::pausable_failpoint;
|
||||
@@ -40,7 +39,7 @@ use wal_decoder::serialized_batch::{SerializedValueBatch, ValueMeta};
|
||||
|
||||
use super::tenant::{PageReconstructError, Timeline};
|
||||
use crate::aux_file;
|
||||
use crate::context::{PerfInstrumentFutureExt, RequestContext, RequestContextBuilder};
|
||||
use crate::context::RequestContext;
|
||||
use crate::keyspace::{KeySpace, KeySpaceAccum};
|
||||
use crate::metrics::{
|
||||
RELSIZE_CACHE_ENTRIES, RELSIZE_CACHE_HITS, RELSIZE_CACHE_MISSES, RELSIZE_CACHE_MISSES_OLD,
|
||||
@@ -210,9 +209,7 @@ impl Timeline {
|
||||
let pages: smallvec::SmallVec<[_; 1]> = smallvec::smallvec![(tag, blknum)];
|
||||
let res = self
|
||||
.get_rel_page_at_lsn_batched(
|
||||
pages
|
||||
.iter()
|
||||
.map(|(tag, blknum)| (tag, blknum, ctx.attached_child())),
|
||||
pages.iter().map(|(tag, blknum)| (tag, blknum)),
|
||||
effective_lsn,
|
||||
io_concurrency.clone(),
|
||||
ctx,
|
||||
@@ -251,7 +248,7 @@ impl Timeline {
|
||||
/// The ordering of the returned vec corresponds to the ordering of `pages`.
|
||||
pub(crate) async fn get_rel_page_at_lsn_batched(
|
||||
&self,
|
||||
pages: impl ExactSizeIterator<Item = (&RelTag, &BlockNumber, RequestContext)>,
|
||||
pages: impl ExactSizeIterator<Item = (&RelTag, &BlockNumber)>,
|
||||
effective_lsn: Lsn,
|
||||
io_concurrency: IoConcurrency,
|
||||
ctx: &RequestContext,
|
||||
@@ -265,11 +262,8 @@ impl Timeline {
|
||||
let mut result = Vec::with_capacity(pages.len());
|
||||
let result_slots = result.spare_capacity_mut();
|
||||
|
||||
let mut keys_slots: BTreeMap<Key, smallvec::SmallVec<[(usize, RequestContext); 1]>> =
|
||||
BTreeMap::default();
|
||||
|
||||
let mut perf_instrument = false;
|
||||
for (response_slot_idx, (tag, blknum, ctx)) in pages.enumerate() {
|
||||
let mut keys_slots: BTreeMap<Key, smallvec::SmallVec<[usize; 1]>> = BTreeMap::default();
|
||||
for (response_slot_idx, (tag, blknum)) in pages.enumerate() {
|
||||
if tag.relnode == 0 {
|
||||
result_slots[response_slot_idx].write(Err(PageReconstructError::Other(
|
||||
RelationError::InvalidRelnode.into(),
|
||||
@@ -280,16 +274,7 @@ impl Timeline {
|
||||
}
|
||||
|
||||
let nblocks = match self
|
||||
.get_rel_size(*tag, Version::Lsn(effective_lsn), &ctx)
|
||||
.maybe_perf_instrument(&ctx, |crnt_perf_span| {
|
||||
info_span!(
|
||||
target: PERF_TRACE_TARGET,
|
||||
parent: crnt_perf_span,
|
||||
"GET_REL_SIZE",
|
||||
reltag=%tag,
|
||||
lsn=%effective_lsn,
|
||||
)
|
||||
})
|
||||
.get_rel_size(*tag, Version::Lsn(effective_lsn), ctx)
|
||||
.await
|
||||
{
|
||||
Ok(nblocks) => nblocks,
|
||||
@@ -312,12 +297,8 @@ impl Timeline {
|
||||
|
||||
let key = rel_block_to_key(*tag, *blknum);
|
||||
|
||||
if ctx.has_perf_span() {
|
||||
perf_instrument = true;
|
||||
}
|
||||
|
||||
let key_slots = keys_slots.entry(key).or_default();
|
||||
key_slots.push((response_slot_idx, ctx));
|
||||
key_slots.push(response_slot_idx);
|
||||
}
|
||||
|
||||
let keyspace = {
|
||||
@@ -333,34 +314,16 @@ impl Timeline {
|
||||
acc.to_keyspace()
|
||||
};
|
||||
|
||||
let ctx = match perf_instrument {
|
||||
true => RequestContextBuilder::from(ctx)
|
||||
.root_perf_span(|| {
|
||||
info_span!(
|
||||
target: PERF_TRACE_TARGET,
|
||||
"GET_VECTORED",
|
||||
tenant_id = %self.tenant_shard_id.tenant_id,
|
||||
timeline_id = %self.timeline_id,
|
||||
lsn = %effective_lsn,
|
||||
shard = %self.tenant_shard_id.shard_slug(),
|
||||
)
|
||||
})
|
||||
.attached_child(),
|
||||
false => ctx.attached_child(),
|
||||
};
|
||||
|
||||
let res = self
|
||||
.get_vectored(keyspace, effective_lsn, io_concurrency, &ctx)
|
||||
.maybe_perf_instrument(&ctx, |current_perf_span| current_perf_span.clone())
|
||||
.await;
|
||||
|
||||
match res {
|
||||
match self
|
||||
.get_vectored(keyspace, effective_lsn, io_concurrency, ctx)
|
||||
.await
|
||||
{
|
||||
Ok(results) => {
|
||||
for (key, res) in results {
|
||||
let mut key_slots = keys_slots.remove(&key).unwrap().into_iter();
|
||||
let (first_slot, first_req_ctx) = key_slots.next().unwrap();
|
||||
let first_slot = key_slots.next().unwrap();
|
||||
|
||||
for (slot, req_ctx) in key_slots {
|
||||
for slot in key_slots {
|
||||
let clone = match &res {
|
||||
Ok(buf) => Ok(buf.clone()),
|
||||
Err(err) => Err(match err {
|
||||
@@ -378,22 +341,17 @@ impl Timeline {
|
||||
};
|
||||
|
||||
result_slots[slot].write(clone);
|
||||
// There is no standardized way to express that the batched span followed from N request spans.
|
||||
// So, abuse the system and mark the request contexts as follows_from the batch span, so we get
|
||||
// some linkage in our trace viewer. It allows us to answer: which GET_VECTORED did this GET_PAGE wait for.
|
||||
req_ctx.perf_follows_from(&ctx);
|
||||
slots_filled += 1;
|
||||
}
|
||||
|
||||
result_slots[first_slot].write(res);
|
||||
first_req_ctx.perf_follows_from(&ctx);
|
||||
slots_filled += 1;
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
// this cannot really happen because get_vectored only errors globally on invalid LSN or too large batch size
|
||||
// (We enforce the max batch size outside of this function, in the code that constructs the batch request.)
|
||||
for (slot, req_ctx) in keys_slots.values().flatten() {
|
||||
for slot in keys_slots.values().flatten() {
|
||||
// this whole `match` is a lot like `From<GetVectoredError> for PageReconstructError`
|
||||
// but without taking ownership of the GetVectoredError
|
||||
let err = match &err {
|
||||
@@ -425,7 +383,6 @@ impl Timeline {
|
||||
}
|
||||
};
|
||||
|
||||
req_ctx.perf_follows_from(&ctx);
|
||||
result_slots[*slot].write(err);
|
||||
}
|
||||
|
||||
|
||||
@@ -38,7 +38,6 @@ use std::panic::AssertUnwindSafe;
|
||||
use std::str::FromStr;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Duration;
|
||||
|
||||
use futures::FutureExt;
|
||||
use once_cell::sync::Lazy;
|
||||
@@ -219,7 +218,8 @@ pageserver_runtime!(MGMT_REQUEST_RUNTIME, "mgmt request worker");
|
||||
pageserver_runtime!(WALRECEIVER_RUNTIME, "walreceiver worker");
|
||||
pageserver_runtime!(BACKGROUND_RUNTIME, "background op worker");
|
||||
// Bump this number when adding a new pageserver_runtime!
|
||||
const NUM_MULTIPLE_RUNTIMES: NonZeroUsize = NonZeroUsize::new(4).unwrap();
|
||||
// SAFETY: it's obviously correct
|
||||
const NUM_MULTIPLE_RUNTIMES: NonZeroUsize = unsafe { NonZeroUsize::new_unchecked(4) };
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct PageserverTaskId(u64);
|
||||
@@ -584,25 +584,18 @@ pub async fn shutdown_tasks(
|
||||
// warn to catch these in tests; there shouldn't be any
|
||||
warn!(name = task.name, tenant_shard_id = ?tenant_shard_id, timeline_id = ?timeline_id, kind = ?task_kind, "stopping left-over");
|
||||
}
|
||||
const INITIAL_COMPLAIN_TIMEOUT: Duration = Duration::from_secs(1);
|
||||
const PERIODIC_COMPLAIN_TIMEOUT: Duration = Duration::from_secs(60);
|
||||
if tokio::time::timeout(INITIAL_COMPLAIN_TIMEOUT, &mut join_handle)
|
||||
if tokio::time::timeout(std::time::Duration::from_secs(1), &mut join_handle)
|
||||
.await
|
||||
.is_err()
|
||||
{
|
||||
// allow some time to elapse before logging to cut down the number of log
|
||||
// lines.
|
||||
info!("waiting for task {} to shut down", task.name);
|
||||
loop {
|
||||
tokio::select! {
|
||||
// we never handled this return value, but:
|
||||
// - we don't deschedule which would lead to is_cancelled
|
||||
// - panics are already logged (is_panicked)
|
||||
// - task errors are already logged in the wrapper
|
||||
_ = &mut join_handle => break,
|
||||
_ = tokio::time::sleep(PERIODIC_COMPLAIN_TIMEOUT) => info!("still waiting for task {} to shut down", task.name),
|
||||
}
|
||||
}
|
||||
// we never handled this return value, but:
|
||||
// - we don't deschedule which would lead to is_cancelled
|
||||
// - panics are already logged (is_panicked)
|
||||
// - task errors are already logged in the wrapper
|
||||
let _ = join_handle.await;
|
||||
info!("task {} completed", task.name);
|
||||
}
|
||||
} else {
|
||||
|
||||
@@ -3080,7 +3080,6 @@ impl Tenant {
|
||||
let mut has_pending_l0 = false;
|
||||
for timeline in compact_l0 {
|
||||
let ctx = &ctx.with_scope_timeline(&timeline);
|
||||
// NB: don't set CompactFlags::YieldForL0, since this is an L0-only compaction pass.
|
||||
let outcome = timeline
|
||||
.compact(cancel, CompactFlags::OnlyL0Compaction.into(), ctx)
|
||||
.instrument(info_span!("compact_timeline", timeline_id = %timeline.timeline_id))
|
||||
@@ -3098,9 +3097,14 @@ impl Tenant {
|
||||
}
|
||||
}
|
||||
|
||||
// Pass 2: image compaction and timeline offloading. If any timelines have accumulated more
|
||||
// L0 layers, they may also be compacted here. Image compaction will yield if there is
|
||||
// pending L0 compaction on any tenant timeline.
|
||||
// Pass 2: image compaction and timeline offloading. If any timelines have accumulated
|
||||
// more L0 layers, they may also be compacted here.
|
||||
//
|
||||
// NB: image compaction may yield if there is pending L0 compaction.
|
||||
//
|
||||
// TODO: it will only yield if there is pending L0 compaction on the same timeline. If a
|
||||
// different timeline needs compaction, it won't. It should check `l0_compaction_trigger`.
|
||||
// We leave this for a later PR.
|
||||
//
|
||||
// TODO: consider ordering timelines by some priority, e.g. time since last full compaction,
|
||||
// amount of L1 delta debt or garbage, offload-eligible timelines first, etc.
|
||||
@@ -3111,14 +3115,8 @@ impl Tenant {
|
||||
}
|
||||
let ctx = &ctx.with_scope_timeline(&timeline);
|
||||
|
||||
// Yield for L0 if the separate L0 pass is enabled (otherwise there's no point).
|
||||
let mut flags = EnumSet::default();
|
||||
if self.get_compaction_l0_first() {
|
||||
flags |= CompactFlags::YieldForL0;
|
||||
}
|
||||
|
||||
let mut outcome = timeline
|
||||
.compact(cancel, flags, ctx)
|
||||
.compact(cancel, EnumSet::default(), ctx)
|
||||
.instrument(info_span!("compact_timeline", timeline_id = %timeline.timeline_id))
|
||||
.await
|
||||
.inspect_err(|err| self.maybe_trip_compaction_breaker(err))?;
|
||||
@@ -3248,23 +3246,17 @@ impl Tenant {
|
||||
async fn housekeeping(&self) {
|
||||
// Call through to all timelines to freeze ephemeral layers as needed. This usually happens
|
||||
// during ingest, but we don't want idle timelines to hold open layers for too long.
|
||||
//
|
||||
// We don't do this if the tenant can't upload layers (i.e. it's in stale attachment mode).
|
||||
// We don't run compaction in this case either, and don't want to keep flushing tiny L0
|
||||
// layers that won't be compacted down.
|
||||
if self.tenant_conf.load().location.may_upload_layers_hint() {
|
||||
let timelines = self
|
||||
.timelines
|
||||
.lock()
|
||||
.unwrap()
|
||||
.values()
|
||||
.filter(|tli| tli.is_active())
|
||||
.cloned()
|
||||
.collect_vec();
|
||||
let timelines = self
|
||||
.timelines
|
||||
.lock()
|
||||
.unwrap()
|
||||
.values()
|
||||
.filter(|tli| tli.is_active())
|
||||
.cloned()
|
||||
.collect_vec();
|
||||
|
||||
for timeline in timelines {
|
||||
timeline.maybe_freeze_ephemeral_layer().await;
|
||||
}
|
||||
for timeline in timelines {
|
||||
timeline.maybe_freeze_ephemeral_layer().await;
|
||||
}
|
||||
|
||||
// Shut down walredo if idle.
|
||||
@@ -3689,7 +3681,7 @@ impl Tenant {
|
||||
}
|
||||
}
|
||||
}
|
||||
TenantState::Active => {
|
||||
TenantState::Active { .. } => {
|
||||
return Ok(());
|
||||
}
|
||||
TenantState::Broken { reason, .. } => {
|
||||
@@ -4205,9 +4197,9 @@ impl Tenant {
|
||||
self.cancel.child_token(),
|
||||
);
|
||||
|
||||
let timeline_ctx = RequestContextBuilder::from(ctx)
|
||||
let timeline_ctx = RequestContextBuilder::extend(ctx)
|
||||
.scope(context::Scope::new_timeline(&timeline))
|
||||
.detached_child();
|
||||
.build();
|
||||
|
||||
Ok((timeline, timeline_ctx))
|
||||
}
|
||||
@@ -6524,7 +6516,11 @@ mod tests {
|
||||
|
||||
tline.freeze_and_flush().await?;
|
||||
tline
|
||||
.compact(&CancellationToken::new(), EnumSet::default(), &ctx)
|
||||
.compact(
|
||||
&CancellationToken::new(),
|
||||
CompactFlags::NoYield.into(),
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let mut writer = tline.writer().await;
|
||||
@@ -6541,7 +6537,11 @@ mod tests {
|
||||
|
||||
tline.freeze_and_flush().await?;
|
||||
tline
|
||||
.compact(&CancellationToken::new(), EnumSet::default(), &ctx)
|
||||
.compact(
|
||||
&CancellationToken::new(),
|
||||
CompactFlags::NoYield.into(),
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let mut writer = tline.writer().await;
|
||||
@@ -6558,7 +6558,11 @@ mod tests {
|
||||
|
||||
tline.freeze_and_flush().await?;
|
||||
tline
|
||||
.compact(&CancellationToken::new(), EnumSet::default(), &ctx)
|
||||
.compact(
|
||||
&CancellationToken::new(),
|
||||
CompactFlags::NoYield.into(),
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let mut writer = tline.writer().await;
|
||||
@@ -6575,7 +6579,11 @@ mod tests {
|
||||
|
||||
tline.freeze_and_flush().await?;
|
||||
tline
|
||||
.compact(&CancellationToken::new(), EnumSet::default(), &ctx)
|
||||
.compact(
|
||||
&CancellationToken::new(),
|
||||
CompactFlags::NoYield.into(),
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
assert_eq!(
|
||||
@@ -6658,7 +6666,9 @@ mod tests {
|
||||
timeline.freeze_and_flush().await?;
|
||||
if compact {
|
||||
// this requires timeline to be &Arc<Timeline>
|
||||
timeline.compact(&cancel, EnumSet::default(), ctx).await?;
|
||||
timeline
|
||||
.compact(&cancel, CompactFlags::NoYield.into(), ctx)
|
||||
.await?;
|
||||
}
|
||||
|
||||
// this doesn't really need to use the timeline_id target, but it is closer to what it
|
||||
@@ -6985,6 +6995,7 @@ mod tests {
|
||||
child_timeline.freeze_and_flush().await?;
|
||||
let mut flags = EnumSet::new();
|
||||
flags.insert(CompactFlags::ForceRepartition);
|
||||
flags.insert(CompactFlags::NoYield);
|
||||
child_timeline
|
||||
.compact(&CancellationToken::new(), flags, &ctx)
|
||||
.await?;
|
||||
@@ -7363,7 +7374,9 @@ mod tests {
|
||||
|
||||
// Perform a cycle of flush, compact, and GC
|
||||
tline.freeze_and_flush().await?;
|
||||
tline.compact(&cancel, EnumSet::default(), &ctx).await?;
|
||||
tline
|
||||
.compact(&cancel, CompactFlags::NoYield.into(), &ctx)
|
||||
.await?;
|
||||
tenant
|
||||
.gc_iteration(Some(tline.timeline_id), 0, Duration::ZERO, &cancel, &ctx)
|
||||
.await?;
|
||||
@@ -7692,6 +7705,7 @@ mod tests {
|
||||
let mut flags = EnumSet::new();
|
||||
flags.insert(CompactFlags::ForceImageLayerCreation);
|
||||
flags.insert(CompactFlags::ForceRepartition);
|
||||
flags.insert(CompactFlags::NoYield);
|
||||
flags
|
||||
} else {
|
||||
EnumSet::empty()
|
||||
@@ -7742,7 +7756,9 @@ mod tests {
|
||||
let before_num_l0_delta_files =
|
||||
tline.layers.read().await.layer_map()?.level0_deltas().len();
|
||||
|
||||
tline.compact(&cancel, EnumSet::default(), &ctx).await?;
|
||||
tline
|
||||
.compact(&cancel, CompactFlags::NoYield.into(), &ctx)
|
||||
.await?;
|
||||
|
||||
let after_num_l0_delta_files = tline.layers.read().await.layer_map()?.level0_deltas().len();
|
||||
|
||||
@@ -7907,6 +7923,7 @@ mod tests {
|
||||
let mut flags = EnumSet::new();
|
||||
flags.insert(CompactFlags::ForceImageLayerCreation);
|
||||
flags.insert(CompactFlags::ForceRepartition);
|
||||
flags.insert(CompactFlags::NoYield);
|
||||
flags
|
||||
},
|
||||
&ctx,
|
||||
@@ -8369,6 +8386,7 @@ mod tests {
|
||||
let mut flags = EnumSet::new();
|
||||
flags.insert(CompactFlags::ForceImageLayerCreation);
|
||||
flags.insert(CompactFlags::ForceRepartition);
|
||||
flags.insert(CompactFlags::NoYield);
|
||||
flags
|
||||
},
|
||||
&ctx,
|
||||
@@ -8436,6 +8454,7 @@ mod tests {
|
||||
let mut flags = EnumSet::new();
|
||||
flags.insert(CompactFlags::ForceImageLayerCreation);
|
||||
flags.insert(CompactFlags::ForceRepartition);
|
||||
flags.insert(CompactFlags::NoYield);
|
||||
flags
|
||||
},
|
||||
&ctx,
|
||||
@@ -11532,255 +11551,4 @@ mod tests {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
#[tokio::test]
|
||||
async fn test_synthetic_size_calculation_with_invisible_branches() -> anyhow::Result<()> {
|
||||
use pageserver_api::models::TimelineVisibilityState;
|
||||
|
||||
use crate::tenant::size::gather_inputs;
|
||||
|
||||
let tenant_conf = pageserver_api::models::TenantConfig {
|
||||
// Ensure that we don't compute gc_cutoffs (which needs reading the layer files)
|
||||
pitr_interval: Some(Duration::ZERO),
|
||||
..Default::default()
|
||||
};
|
||||
let harness = TenantHarness::create_custom(
|
||||
"test_synthetic_size_calculation_with_invisible_branches",
|
||||
tenant_conf,
|
||||
TenantId::generate(),
|
||||
ShardIdentity::unsharded(),
|
||||
Generation::new(0xdeadbeef),
|
||||
)
|
||||
.await?;
|
||||
let (tenant, ctx) = harness.load().await;
|
||||
let main_tline = tenant
|
||||
.create_test_timeline_with_layers(
|
||||
TIMELINE_ID,
|
||||
Lsn(0x10),
|
||||
DEFAULT_PG_VERSION,
|
||||
&ctx,
|
||||
vec![],
|
||||
vec![],
|
||||
vec![],
|
||||
Lsn(0x100),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let snapshot1 = TimelineId::from_array(hex!("11223344556677881122334455667790"));
|
||||
tenant
|
||||
.branch_timeline_test_with_layers(
|
||||
&main_tline,
|
||||
snapshot1,
|
||||
Some(Lsn(0x20)),
|
||||
&ctx,
|
||||
vec![],
|
||||
vec![],
|
||||
Lsn(0x50),
|
||||
)
|
||||
.await?;
|
||||
let snapshot2 = TimelineId::from_array(hex!("11223344556677881122334455667791"));
|
||||
tenant
|
||||
.branch_timeline_test_with_layers(
|
||||
&main_tline,
|
||||
snapshot2,
|
||||
Some(Lsn(0x30)),
|
||||
&ctx,
|
||||
vec![],
|
||||
vec![],
|
||||
Lsn(0x50),
|
||||
)
|
||||
.await?;
|
||||
let snapshot3 = TimelineId::from_array(hex!("11223344556677881122334455667792"));
|
||||
tenant
|
||||
.branch_timeline_test_with_layers(
|
||||
&main_tline,
|
||||
snapshot3,
|
||||
Some(Lsn(0x40)),
|
||||
&ctx,
|
||||
vec![],
|
||||
vec![],
|
||||
Lsn(0x50),
|
||||
)
|
||||
.await?;
|
||||
let limit = Arc::new(Semaphore::new(1));
|
||||
let max_retention_period = None;
|
||||
let mut logical_size_cache = HashMap::new();
|
||||
let cause = LogicalSizeCalculationCause::EvictionTaskImitation;
|
||||
let cancel = CancellationToken::new();
|
||||
|
||||
let inputs = gather_inputs(
|
||||
&tenant,
|
||||
&limit,
|
||||
max_retention_period,
|
||||
&mut logical_size_cache,
|
||||
cause,
|
||||
&cancel,
|
||||
&ctx,
|
||||
)
|
||||
.instrument(info_span!(
|
||||
"gather_inputs",
|
||||
tenant_id = "unknown",
|
||||
shard_id = "unknown",
|
||||
))
|
||||
.await?;
|
||||
use crate::tenant::size::{LsnKind, ModelInputs, SegmentMeta};
|
||||
use LsnKind::*;
|
||||
use tenant_size_model::Segment;
|
||||
let ModelInputs { mut segments, .. } = inputs;
|
||||
segments.retain(|s| s.timeline_id == TIMELINE_ID);
|
||||
for segment in segments.iter_mut() {
|
||||
segment.segment.parent = None; // We don't care about the parent for the test
|
||||
segment.segment.size = None; // We don't care about the size for the test
|
||||
}
|
||||
assert_eq!(
|
||||
segments,
|
||||
[
|
||||
SegmentMeta {
|
||||
segment: Segment {
|
||||
parent: None,
|
||||
lsn: 0x10,
|
||||
size: None,
|
||||
needed: false,
|
||||
},
|
||||
timeline_id: TIMELINE_ID,
|
||||
kind: BranchStart,
|
||||
},
|
||||
SegmentMeta {
|
||||
segment: Segment {
|
||||
parent: None,
|
||||
lsn: 0x20,
|
||||
size: None,
|
||||
needed: false,
|
||||
},
|
||||
timeline_id: TIMELINE_ID,
|
||||
kind: BranchPoint,
|
||||
},
|
||||
SegmentMeta {
|
||||
segment: Segment {
|
||||
parent: None,
|
||||
lsn: 0x30,
|
||||
size: None,
|
||||
needed: false,
|
||||
},
|
||||
timeline_id: TIMELINE_ID,
|
||||
kind: BranchPoint,
|
||||
},
|
||||
SegmentMeta {
|
||||
segment: Segment {
|
||||
parent: None,
|
||||
lsn: 0x40,
|
||||
size: None,
|
||||
needed: false,
|
||||
},
|
||||
timeline_id: TIMELINE_ID,
|
||||
kind: BranchPoint,
|
||||
},
|
||||
SegmentMeta {
|
||||
segment: Segment {
|
||||
parent: None,
|
||||
lsn: 0x100,
|
||||
size: None,
|
||||
needed: false,
|
||||
},
|
||||
timeline_id: TIMELINE_ID,
|
||||
kind: GcCutOff,
|
||||
}, // we need to retain everything above the last branch point
|
||||
SegmentMeta {
|
||||
segment: Segment {
|
||||
parent: None,
|
||||
lsn: 0x100,
|
||||
size: None,
|
||||
needed: true,
|
||||
},
|
||||
timeline_id: TIMELINE_ID,
|
||||
kind: BranchEnd,
|
||||
},
|
||||
]
|
||||
);
|
||||
|
||||
main_tline
|
||||
.remote_client
|
||||
.schedule_index_upload_for_timeline_invisible_state(
|
||||
TimelineVisibilityState::Invisible,
|
||||
)?;
|
||||
main_tline.remote_client.wait_completion().await?;
|
||||
let inputs = gather_inputs(
|
||||
&tenant,
|
||||
&limit,
|
||||
max_retention_period,
|
||||
&mut logical_size_cache,
|
||||
cause,
|
||||
&cancel,
|
||||
&ctx,
|
||||
)
|
||||
.instrument(info_span!(
|
||||
"gather_inputs",
|
||||
tenant_id = "unknown",
|
||||
shard_id = "unknown",
|
||||
))
|
||||
.await?;
|
||||
let ModelInputs { mut segments, .. } = inputs;
|
||||
segments.retain(|s| s.timeline_id == TIMELINE_ID);
|
||||
for segment in segments.iter_mut() {
|
||||
segment.segment.parent = None; // We don't care about the parent for the test
|
||||
segment.segment.size = None; // We don't care about the size for the test
|
||||
}
|
||||
assert_eq!(
|
||||
segments,
|
||||
[
|
||||
SegmentMeta {
|
||||
segment: Segment {
|
||||
parent: None,
|
||||
lsn: 0x10,
|
||||
size: None,
|
||||
needed: false,
|
||||
},
|
||||
timeline_id: TIMELINE_ID,
|
||||
kind: BranchStart,
|
||||
},
|
||||
SegmentMeta {
|
||||
segment: Segment {
|
||||
parent: None,
|
||||
lsn: 0x20,
|
||||
size: None,
|
||||
needed: false,
|
||||
},
|
||||
timeline_id: TIMELINE_ID,
|
||||
kind: BranchPoint,
|
||||
},
|
||||
SegmentMeta {
|
||||
segment: Segment {
|
||||
parent: None,
|
||||
lsn: 0x30,
|
||||
size: None,
|
||||
needed: false,
|
||||
},
|
||||
timeline_id: TIMELINE_ID,
|
||||
kind: BranchPoint,
|
||||
},
|
||||
SegmentMeta {
|
||||
segment: Segment {
|
||||
parent: None,
|
||||
lsn: 0x40,
|
||||
size: None,
|
||||
needed: false,
|
||||
},
|
||||
timeline_id: TIMELINE_ID,
|
||||
kind: BranchPoint,
|
||||
},
|
||||
SegmentMeta {
|
||||
segment: Segment {
|
||||
parent: None,
|
||||
lsn: 0x40, // Branch end LSN == last branch point LSN
|
||||
size: None,
|
||||
needed: true,
|
||||
},
|
||||
timeline_id: TIMELINE_ID,
|
||||
kind: BranchEnd,
|
||||
},
|
||||
]
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -53,7 +53,7 @@ impl<Value: Clone> LayerCoverage<Value> {
|
||||
///
|
||||
/// Complexity: O(log N)
|
||||
fn add_node(&mut self, key: i128) {
|
||||
let value = match self.nodes.range(..=key).next_back() {
|
||||
let value = match self.nodes.range(..=key).last() {
|
||||
Some((_, Some(v))) => Some(v.clone()),
|
||||
Some((_, None)) => None,
|
||||
None => None,
|
||||
|
||||
@@ -58,7 +58,7 @@ use crate::{InitializationOrder, TEMP_FILE_SUFFIX};
|
||||
|
||||
/// For a tenant that appears in TenantsMap, it may either be
|
||||
/// - `Attached`: has a full Tenant object, is elegible to service
|
||||
/// reads and ingest WAL.
|
||||
/// reads and ingest WAL.
|
||||
/// - `Secondary`: is only keeping a local cache warm.
|
||||
///
|
||||
/// Secondary is a totally distinct state rather than being a mode of a `Tenant`, because
|
||||
@@ -344,7 +344,7 @@ async fn init_load_generations(
|
||||
"Emergency mode! Tenants will be attached unsafely using their last known generation"
|
||||
);
|
||||
emergency_generations(tenant_confs)
|
||||
} else if let Some(client) = StorageControllerUpcallClient::new(conf, cancel)? {
|
||||
} else if let Some(client) = StorageControllerUpcallClient::new(conf, cancel) {
|
||||
info!("Calling {} API to re-attach tenants", client.base_url());
|
||||
// If we are configured to use the control plane API, then it is the source of truth for what tenants to load.
|
||||
match client.re_attach(conf).await {
|
||||
|
||||
@@ -194,7 +194,7 @@ pub(crate) use download::{
|
||||
};
|
||||
use index::GcCompactionState;
|
||||
pub(crate) use index::LayerFileMetadata;
|
||||
use pageserver_api::models::{RelSizeMigration, TimelineArchivalState, TimelineVisibilityState};
|
||||
use pageserver_api::models::{RelSizeMigration, TimelineArchivalState};
|
||||
use pageserver_api::shard::{ShardIndex, TenantShardId};
|
||||
use regex::Regex;
|
||||
use remote_storage::{
|
||||
@@ -573,16 +573,6 @@ impl RemoteTimelineClient {
|
||||
.ok()
|
||||
}
|
||||
|
||||
/// Returns true if the timeline is invisible in synthetic size calculations.
|
||||
pub(crate) fn is_invisible(&self) -> Option<bool> {
|
||||
self.upload_queue
|
||||
.lock()
|
||||
.unwrap()
|
||||
.initialized_mut()
|
||||
.map(|q| q.clean.0.marked_invisible_at.is_some())
|
||||
.ok()
|
||||
}
|
||||
|
||||
/// Returns `Ok(Some(timestamp))` if the timeline has been archived, `Ok(None)` if the timeline hasn't been archived.
|
||||
///
|
||||
/// Return Err(_) if the remote index_part hasn't been downloaded yet, or the timeline hasn't been stopped yet.
|
||||
@@ -855,37 +845,6 @@ impl RemoteTimelineClient {
|
||||
Ok(need_wait)
|
||||
}
|
||||
|
||||
pub(crate) fn schedule_index_upload_for_timeline_invisible_state(
|
||||
self: &Arc<Self>,
|
||||
state: TimelineVisibilityState,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
|
||||
fn need_change(
|
||||
marked_invisible_at: &Option<NaiveDateTime>,
|
||||
state: TimelineVisibilityState,
|
||||
) -> Option<bool> {
|
||||
match (marked_invisible_at, state) {
|
||||
(Some(_), TimelineVisibilityState::Invisible) => Some(false),
|
||||
(None, TimelineVisibilityState::Invisible) => Some(true),
|
||||
(Some(_), TimelineVisibilityState::Visible) => Some(false),
|
||||
(None, TimelineVisibilityState::Visible) => Some(true),
|
||||
}
|
||||
}
|
||||
|
||||
let need_upload_scheduled = need_change(&upload_queue.dirty.marked_invisible_at, state);
|
||||
|
||||
if let Some(marked_invisible_at_set) = need_upload_scheduled {
|
||||
let intended_marked_invisible_at =
|
||||
marked_invisible_at_set.then(|| Utc::now().naive_utc());
|
||||
upload_queue.dirty.marked_invisible_at = intended_marked_invisible_at;
|
||||
self.schedule_index_upload(upload_queue);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Shuts the timeline client down, but only if the timeline is archived.
|
||||
///
|
||||
/// This function and [`Self::schedule_index_upload_for_timeline_archival_state`] use the
|
||||
@@ -1968,7 +1927,9 @@ impl RemoteTimelineClient {
|
||||
/// Pick next tasks from the queue, and start as many of them as possible without violating
|
||||
/// the ordering constraints.
|
||||
///
|
||||
/// The number of inprogress tasks is limited by `Self::inprogress_tasks`, see `next_ready`.
|
||||
/// TODO: consider limiting the number of in-progress tasks, beyond what remote_storage does.
|
||||
/// This can launch an unbounded number of queued tasks. `UploadQueue::next_ready()` also has
|
||||
/// worst-case quadratic cost in the number of tasks, and may struggle beyond 10,000 tasks.
|
||||
fn launch_queued_tasks(self: &Arc<Self>, upload_queue: &mut UploadQueueInitialized) {
|
||||
while let Some((mut next_op, coalesced_ops)) = upload_queue.next_ready() {
|
||||
debug!("starting op: {next_op}");
|
||||
@@ -2216,11 +2177,6 @@ impl RemoteTimelineClient {
|
||||
}
|
||||
res
|
||||
}
|
||||
// TODO: this should wait for the deletion to be executed by the deletion queue.
|
||||
// Otherwise, the deletion may race with an upload and wrongfully delete a newer
|
||||
// file. Some of the above logic attempts to work around this, it should be replaced
|
||||
// by the upload queue ordering guarantees (see `can_bypass`). See:
|
||||
// <https://github.com/neondatabase/neon/issues/10283>.
|
||||
UploadOp::Delete(delete) => {
|
||||
if self.config.read().unwrap().block_deletions {
|
||||
let mut queue_locked = self.upload_queue.lock().unwrap();
|
||||
|
||||
@@ -110,10 +110,6 @@ pub struct IndexPart {
|
||||
/// just the specific use case here; it needs a new name.
|
||||
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||
pub(crate) gc_compaction: Option<GcCompactionState>,
|
||||
|
||||
/// The timestamp when the timeline was marked invisible in synthetic size calculations.
|
||||
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||
pub(crate) marked_invisible_at: Option<NaiveDateTime>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
|
||||
@@ -130,7 +126,7 @@ impl IndexPart {
|
||||
/// Version history
|
||||
/// - 2: added `deleted_at`
|
||||
/// - 3: no longer deserialize `timeline_layers` (serialized format is the same, but timeline_layers
|
||||
/// is always generated from the keys of `layer_metadata`)
|
||||
/// is always generated from the keys of `layer_metadata`)
|
||||
/// - 4: timeline_layers is fully removed.
|
||||
/// - 5: lineage was added
|
||||
/// - 6: last_aux_file_policy is added.
|
||||
@@ -141,11 +137,10 @@ impl IndexPart {
|
||||
/// - 11: +rel_size_migration
|
||||
/// - 12: +l2_lsn
|
||||
/// - 13: +gc_compaction
|
||||
/// - 14: +marked_invisible_at
|
||||
const LATEST_VERSION: usize = 14;
|
||||
const LATEST_VERSION: usize = 13;
|
||||
|
||||
// Versions we may see when reading from a bucket.
|
||||
pub const KNOWN_VERSIONS: &'static [usize] = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14];
|
||||
pub const KNOWN_VERSIONS: &'static [usize] = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13];
|
||||
|
||||
pub const FILE_NAME: &'static str = "index_part.json";
|
||||
|
||||
@@ -164,7 +159,6 @@ impl IndexPart {
|
||||
rel_size_migration: None,
|
||||
l2_lsn: None,
|
||||
gc_compaction: None,
|
||||
marked_invisible_at: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -474,7 +468,6 @@ mod tests {
|
||||
rel_size_migration: None,
|
||||
l2_lsn: None,
|
||||
gc_compaction: None,
|
||||
marked_invisible_at: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -523,7 +516,6 @@ mod tests {
|
||||
rel_size_migration: None,
|
||||
l2_lsn: None,
|
||||
gc_compaction: None,
|
||||
marked_invisible_at: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -573,7 +565,6 @@ mod tests {
|
||||
rel_size_migration: None,
|
||||
l2_lsn: None,
|
||||
gc_compaction: None,
|
||||
marked_invisible_at: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -626,7 +617,6 @@ mod tests {
|
||||
rel_size_migration: None,
|
||||
l2_lsn: None,
|
||||
gc_compaction: None,
|
||||
marked_invisible_at: None,
|
||||
};
|
||||
|
||||
let empty_layers_parsed = IndexPart::from_json_bytes(empty_layers_json.as_bytes()).unwrap();
|
||||
@@ -674,7 +664,6 @@ mod tests {
|
||||
rel_size_migration: None,
|
||||
l2_lsn: None,
|
||||
gc_compaction: None,
|
||||
marked_invisible_at: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -725,7 +714,6 @@ mod tests {
|
||||
rel_size_migration: None,
|
||||
l2_lsn: None,
|
||||
gc_compaction: None,
|
||||
marked_invisible_at: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -781,7 +769,6 @@ mod tests {
|
||||
rel_size_migration: None,
|
||||
l2_lsn: None,
|
||||
gc_compaction: None,
|
||||
marked_invisible_at: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -842,7 +829,6 @@ mod tests {
|
||||
rel_size_migration: None,
|
||||
l2_lsn: None,
|
||||
gc_compaction: None,
|
||||
marked_invisible_at: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -904,7 +890,6 @@ mod tests {
|
||||
rel_size_migration: None,
|
||||
l2_lsn: None,
|
||||
gc_compaction: None,
|
||||
marked_invisible_at: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -971,7 +956,6 @@ mod tests {
|
||||
rel_size_migration: None,
|
||||
l2_lsn: None,
|
||||
gc_compaction: None,
|
||||
marked_invisible_at: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -1051,7 +1035,6 @@ mod tests {
|
||||
rel_size_migration: None,
|
||||
l2_lsn: None,
|
||||
gc_compaction: None,
|
||||
marked_invisible_at: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -1132,7 +1115,6 @@ mod tests {
|
||||
rel_size_migration: Some(RelSizeMigration::Legacy),
|
||||
l2_lsn: None,
|
||||
gc_compaction: None,
|
||||
marked_invisible_at: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -1142,7 +1124,7 @@ mod tests {
|
||||
#[test]
|
||||
fn v12_v13_l2_gc_ompaction_is_parsed() {
|
||||
let example = r#"{
|
||||
"version": 13,
|
||||
"version": 12,
|
||||
"layer_metadata":{
|
||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9": { "file_size": 25600000 },
|
||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51": { "file_size": 9007199254741001 }
|
||||
@@ -1178,7 +1160,7 @@ mod tests {
|
||||
}"#;
|
||||
|
||||
let expected = IndexPart {
|
||||
version: 13,
|
||||
version: 12,
|
||||
layer_metadata: HashMap::from([
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 25600000,
|
||||
@@ -1219,95 +1201,6 @@ mod tests {
|
||||
gc_compaction: Some(GcCompactionState {
|
||||
last_completed_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
}),
|
||||
marked_invisible_at: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
assert_eq!(part, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn v14_marked_invisible_at_is_parsed() {
|
||||
let example = r#"{
|
||||
"version": 14,
|
||||
"layer_metadata":{
|
||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9": { "file_size": 25600000 },
|
||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51": { "file_size": 9007199254741001 }
|
||||
},
|
||||
"disk_consistent_lsn":"0/16960E8",
|
||||
"metadata": {
|
||||
"disk_consistent_lsn": "0/16960E8",
|
||||
"prev_record_lsn": "0/1696070",
|
||||
"ancestor_timeline": "e45a7f37d3ee2ff17dc14bf4f4e3f52e",
|
||||
"ancestor_lsn": "0/0",
|
||||
"latest_gc_cutoff_lsn": "0/1696070",
|
||||
"initdb_lsn": "0/1696070",
|
||||
"pg_version": 14
|
||||
},
|
||||
"gc_blocking": {
|
||||
"started_at": "2024-07-19T09:00:00.123",
|
||||
"reasons": ["DetachAncestor"]
|
||||
},
|
||||
"import_pgdata": {
|
||||
"V1": {
|
||||
"Done": {
|
||||
"idempotency_key": "specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5",
|
||||
"started_at": "2024-11-13T09:23:42.123",
|
||||
"finished_at": "2024-11-13T09:42:23.123"
|
||||
}
|
||||
}
|
||||
},
|
||||
"rel_size_migration": "legacy",
|
||||
"l2_lsn": "0/16960E8",
|
||||
"gc_compaction": {
|
||||
"last_completed_lsn": "0/16960E8"
|
||||
},
|
||||
"marked_invisible_at": "2023-07-31T09:00:00.123"
|
||||
}"#;
|
||||
|
||||
let expected = IndexPart {
|
||||
version: 14,
|
||||
layer_metadata: HashMap::from([
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 25600000,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
}),
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 9007199254741001,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
})
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
metadata: TimelineMetadata::new(
|
||||
Lsn::from_str("0/16960E8").unwrap(),
|
||||
Some(Lsn::from_str("0/1696070").unwrap()),
|
||||
Some(TimelineId::from_str("e45a7f37d3ee2ff17dc14bf4f4e3f52e").unwrap()),
|
||||
Lsn::INVALID,
|
||||
Lsn::from_str("0/1696070").unwrap(),
|
||||
Lsn::from_str("0/1696070").unwrap(),
|
||||
14,
|
||||
).with_recalculated_checksum().unwrap(),
|
||||
deleted_at: None,
|
||||
lineage: Default::default(),
|
||||
gc_blocking: Some(GcBlocking {
|
||||
started_at: parse_naive_datetime("2024-07-19T09:00:00.123000000"),
|
||||
reasons: enumset::EnumSet::from_iter([GcBlockingReason::DetachAncestor]),
|
||||
}),
|
||||
last_aux_file_policy: Default::default(),
|
||||
archived_at: None,
|
||||
import_pgdata: Some(import_pgdata::index_part_format::Root::V1(import_pgdata::index_part_format::V1::Done(import_pgdata::index_part_format::Done{
|
||||
started_at: parse_naive_datetime("2024-11-13T09:23:42.123000000"),
|
||||
finished_at: parse_naive_datetime("2024-11-13T09:42:23.123000000"),
|
||||
idempotency_key: import_pgdata::index_part_format::IdempotencyKey::new("specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5".to_string()),
|
||||
}))),
|
||||
rel_size_migration: Some(RelSizeMigration::Legacy),
|
||||
l2_lsn: Some("0/16960E8".parse::<Lsn>().unwrap()),
|
||||
gc_compaction: Some(GcCompactionState {
|
||||
last_completed_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
}),
|
||||
marked_invisible_at: Some(parse_naive_datetime("2023-07-31T09:00:00.123000000")),
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
|
||||
@@ -33,7 +33,7 @@ pub struct ModelInputs {
|
||||
}
|
||||
|
||||
/// A [`Segment`], with some extra information for display purposes
|
||||
#[derive(Debug, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
|
||||
#[derive(Debug, serde::Serialize, serde::Deserialize)]
|
||||
pub struct SegmentMeta {
|
||||
pub segment: Segment,
|
||||
pub timeline_id: TimelineId,
|
||||
@@ -248,8 +248,6 @@ pub(super) async fn gather_inputs(
|
||||
None
|
||||
};
|
||||
|
||||
let branch_is_invisible = timeline.is_invisible() == Some(true);
|
||||
|
||||
let lease_points = gc_info
|
||||
.leases
|
||||
.keys()
|
||||
@@ -273,10 +271,7 @@ pub(super) async fn gather_inputs(
|
||||
.map(|(lsn, _child_id, _is_offloaded)| (lsn, LsnKind::BranchPoint))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if !branch_is_invisible {
|
||||
// Do not count lease points for invisible branches.
|
||||
lsns.extend(lease_points.iter().map(|&lsn| (lsn, LsnKind::LeasePoint)));
|
||||
}
|
||||
lsns.extend(lease_points.iter().map(|&lsn| (lsn, LsnKind::LeasePoint)));
|
||||
|
||||
drop(gc_info);
|
||||
|
||||
@@ -292,9 +287,7 @@ pub(super) async fn gather_inputs(
|
||||
|
||||
// Add a point for the PITR cutoff
|
||||
let branch_start_needed = next_pitr_cutoff <= branch_start_lsn;
|
||||
if !branch_start_needed && !branch_is_invisible {
|
||||
// Only add the GcCutOff point when the timeline is visible; otherwise, do not compute the size for the LSN
|
||||
// range from the last branch point to the latest data.
|
||||
if !branch_start_needed {
|
||||
lsns.push((next_pitr_cutoff, LsnKind::GcCutOff));
|
||||
}
|
||||
|
||||
@@ -380,19 +373,11 @@ pub(super) async fn gather_inputs(
|
||||
}
|
||||
}
|
||||
|
||||
let branch_end_lsn = if branch_is_invisible {
|
||||
// If the branch is invisible, the branch end is the last requested LSN (likely a branch cutoff point).
|
||||
segments.last().unwrap().segment.lsn
|
||||
} else {
|
||||
// Otherwise, the branch end is the last record LSN.
|
||||
last_record_lsn.0
|
||||
};
|
||||
|
||||
// Current end of the timeline
|
||||
segments.push(SegmentMeta {
|
||||
segment: Segment {
|
||||
parent: Some(parent),
|
||||
lsn: branch_end_lsn,
|
||||
lsn: last_record_lsn.0,
|
||||
size: None, // Filled in later, if necessary
|
||||
needed: true,
|
||||
},
|
||||
@@ -624,7 +609,6 @@ async fn calculate_logical_size(
|
||||
Ok(TimelineAtLsnSizeResult(timeline, lsn, size_res))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[test]
|
||||
fn verify_size_for_multiple_branches() {
|
||||
// this is generated from integration test test_tenant_size_with_multiple_branches, but this way
|
||||
@@ -782,7 +766,6 @@ fn verify_size_for_multiple_branches() {
|
||||
assert_eq!(inputs.calculate(), 37_851_408);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[test]
|
||||
fn verify_size_for_one_branch() {
|
||||
let doc = r#"
|
||||
|
||||
@@ -13,13 +13,13 @@ pub mod merge_iterator;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::{BinaryHeap, HashMap};
|
||||
use std::future::Future;
|
||||
use std::ops::Range;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||
|
||||
use crate::PERF_TRACE_TARGET;
|
||||
pub use batch_split_writer::{BatchLayerWriter, SplitDeltaLayerWriter, SplitImageLayerWriter};
|
||||
use bytes::Bytes;
|
||||
pub use delta_layer::{DeltaLayer, DeltaLayerWriter, ValueRef};
|
||||
@@ -34,7 +34,7 @@ use pageserver_api::key::Key;
|
||||
use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum};
|
||||
use pageserver_api::record::NeonWalRecord;
|
||||
use pageserver_api::value::Value;
|
||||
use tracing::{Instrument, info_span, trace};
|
||||
use tracing::{Instrument, trace};
|
||||
use utils::lsn::Lsn;
|
||||
use utils::sync::gate::GateGuard;
|
||||
|
||||
@@ -43,9 +43,7 @@ use super::PageReconstructError;
|
||||
use super::layer_map::InMemoryLayerDesc;
|
||||
use super::timeline::{GetVectoredError, ReadPath};
|
||||
use crate::config::PageServerConf;
|
||||
use crate::context::{
|
||||
AccessStatsBehavior, PerfInstrumentFutureExt, RequestContext, RequestContextBuilder,
|
||||
};
|
||||
use crate::context::{AccessStatsBehavior, RequestContext};
|
||||
|
||||
pub fn range_overlaps<T>(a: &Range<T>, b: &Range<T>) -> bool
|
||||
where
|
||||
@@ -876,37 +874,13 @@ impl ReadableLayer {
|
||||
) -> Result<(), GetVectoredError> {
|
||||
match self {
|
||||
ReadableLayer::PersistentLayer(layer) => {
|
||||
let ctx = RequestContextBuilder::from(ctx)
|
||||
.perf_span(|crnt_perf_span| {
|
||||
info_span!(
|
||||
target: PERF_TRACE_TARGET,
|
||||
parent: crnt_perf_span,
|
||||
"PLAN_LAYER",
|
||||
layer = %layer
|
||||
)
|
||||
})
|
||||
.attached_child();
|
||||
|
||||
layer
|
||||
.get_values_reconstruct_data(keyspace, lsn_range, reconstruct_state, &ctx)
|
||||
.maybe_perf_instrument(&ctx, |crnt_perf_span| crnt_perf_span.clone())
|
||||
.get_values_reconstruct_data(keyspace, lsn_range, reconstruct_state, ctx)
|
||||
.await
|
||||
}
|
||||
ReadableLayer::InMemoryLayer(layer) => {
|
||||
let ctx = RequestContextBuilder::from(ctx)
|
||||
.perf_span(|crnt_perf_span| {
|
||||
info_span!(
|
||||
target: PERF_TRACE_TARGET,
|
||||
parent: crnt_perf_span,
|
||||
"PLAN_LAYER",
|
||||
layer = %layer
|
||||
)
|
||||
})
|
||||
.attached_child();
|
||||
|
||||
layer
|
||||
.get_values_reconstruct_data(keyspace, lsn_range, reconstruct_state, &ctx)
|
||||
.maybe_perf_instrument(&ctx, |crnt_perf_span| crnt_perf_span.clone())
|
||||
.get_values_reconstruct_data(keyspace, lsn_range, reconstruct_state, ctx)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -896,9 +896,9 @@ impl DeltaLayerInner {
|
||||
where
|
||||
Reader: BlockReader + Clone,
|
||||
{
|
||||
let ctx = RequestContextBuilder::from(ctx)
|
||||
let ctx = RequestContextBuilder::extend(ctx)
|
||||
.page_content_kind(PageContentKind::DeltaLayerBtreeNode)
|
||||
.attached_child();
|
||||
.build();
|
||||
|
||||
for range in keyspace.ranges.iter() {
|
||||
let mut range_end_handled = false;
|
||||
@@ -1105,9 +1105,9 @@ impl DeltaLayerInner {
|
||||
all_keys.push(entry);
|
||||
true
|
||||
},
|
||||
&RequestContextBuilder::from(ctx)
|
||||
&RequestContextBuilder::extend(ctx)
|
||||
.page_content_kind(PageContentKind::DeltaLayerBtreeNode)
|
||||
.attached_child(),
|
||||
.build(),
|
||||
)
|
||||
.await?;
|
||||
if let Some(last) = all_keys.last_mut() {
|
||||
|
||||
@@ -481,9 +481,9 @@ impl ImageLayerInner {
|
||||
let tree_reader =
|
||||
DiskBtreeReader::new(self.index_start_blk, self.index_root_blk, block_reader);
|
||||
|
||||
let ctx = RequestContextBuilder::from(ctx)
|
||||
let ctx = RequestContextBuilder::extend(ctx)
|
||||
.page_content_kind(PageContentKind::ImageLayerBtreeNode)
|
||||
.attached_child();
|
||||
.build();
|
||||
|
||||
for range in keyspace.ranges.iter() {
|
||||
let mut range_end_handled = false;
|
||||
|
||||
@@ -421,9 +421,9 @@ impl InMemoryLayer {
|
||||
reconstruct_state: &mut ValuesReconstructState,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), GetVectoredError> {
|
||||
let ctx = RequestContextBuilder::from(ctx)
|
||||
let ctx = RequestContextBuilder::extend(ctx)
|
||||
.page_content_kind(PageContentKind::InMemoryLayer)
|
||||
.attached_child();
|
||||
.build();
|
||||
|
||||
let inner = self.inner.read().await;
|
||||
|
||||
|
||||
@@ -3,13 +3,12 @@ use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
||||
use std::sync::{Arc, Weak};
|
||||
use std::time::{Duration, SystemTime};
|
||||
|
||||
use crate::PERF_TRACE_TARGET;
|
||||
use anyhow::Context;
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use pageserver_api::keyspace::KeySpace;
|
||||
use pageserver_api::models::HistoricLayerInfo;
|
||||
use pageserver_api::shard::{ShardIdentity, ShardIndex, TenantShardId};
|
||||
use tracing::{Instrument, info_span};
|
||||
use tracing::Instrument;
|
||||
use utils::generation::Generation;
|
||||
use utils::id::TimelineId;
|
||||
use utils::lsn::Lsn;
|
||||
@@ -19,7 +18,7 @@ use super::delta_layer::{self};
|
||||
use super::image_layer::{self};
|
||||
use super::{
|
||||
AsLayerDesc, ImageLayerWriter, LayerAccessStats, LayerAccessStatsReset, LayerName,
|
||||
LayerVisibilityHint, PerfInstrumentFutureExt, PersistentLayerDesc, ValuesReconstructState,
|
||||
LayerVisibilityHint, PersistentLayerDesc, ValuesReconstructState,
|
||||
};
|
||||
use crate::config::PageServerConf;
|
||||
use crate::context::{DownloadBehavior, RequestContext, RequestContextBuilder};
|
||||
@@ -325,29 +324,16 @@ impl Layer {
|
||||
reconstruct_data: &mut ValuesReconstructState,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), GetVectoredError> {
|
||||
let downloaded = {
|
||||
let ctx = RequestContextBuilder::from(ctx)
|
||||
.perf_span(|crnt_perf_span| {
|
||||
info_span!(
|
||||
target: PERF_TRACE_TARGET,
|
||||
parent: crnt_perf_span,
|
||||
"GET_LAYER",
|
||||
)
|
||||
})
|
||||
.attached_child();
|
||||
|
||||
let downloaded =
|
||||
self.0
|
||||
.get_or_maybe_download(true, &ctx)
|
||||
.maybe_perf_instrument(&ctx, |crnt_perf_context| crnt_perf_context.clone())
|
||||
.get_or_maybe_download(true, ctx)
|
||||
.await
|
||||
.map_err(|err| match err {
|
||||
DownloadError::TimelineShutdown | DownloadError::DownloadCancelled => {
|
||||
GetVectoredError::Cancelled
|
||||
}
|
||||
other => GetVectoredError::Other(anyhow::anyhow!(other)),
|
||||
})?
|
||||
};
|
||||
|
||||
})?;
|
||||
let this = ResidentLayer {
|
||||
downloaded: downloaded.clone(),
|
||||
owner: self.clone(),
|
||||
@@ -355,20 +341,9 @@ impl Layer {
|
||||
|
||||
self.record_access(ctx);
|
||||
|
||||
let ctx = RequestContextBuilder::from(ctx)
|
||||
.perf_span(|crnt_perf_span| {
|
||||
info_span!(
|
||||
target: PERF_TRACE_TARGET,
|
||||
parent: crnt_perf_span,
|
||||
"VISIT_LAYER",
|
||||
)
|
||||
})
|
||||
.attached_child();
|
||||
|
||||
downloaded
|
||||
.get_values_reconstruct_data(this, keyspace, lsn_range, reconstruct_data, &ctx)
|
||||
.get_values_reconstruct_data(this, keyspace, lsn_range, reconstruct_data, ctx)
|
||||
.instrument(tracing::debug_span!("get_values_reconstruct_data", layer=%self))
|
||||
.maybe_perf_instrument(&ctx, |crnt_perf_span| crnt_perf_span.clone())
|
||||
.await
|
||||
.map_err(|err| match err {
|
||||
GetVectoredError::Other(err) => GetVectoredError::Other(
|
||||
@@ -1070,34 +1045,15 @@ impl LayerInner {
|
||||
return Err(DownloadError::DownloadRequired);
|
||||
}
|
||||
|
||||
let ctx = if ctx.has_perf_span() {
|
||||
let dl_ctx = RequestContextBuilder::from(ctx)
|
||||
.task_kind(TaskKind::LayerDownload)
|
||||
.download_behavior(DownloadBehavior::Download)
|
||||
.root_perf_span(|| {
|
||||
info_span!(
|
||||
target: PERF_TRACE_TARGET,
|
||||
"DOWNLOAD_LAYER",
|
||||
layer = %self,
|
||||
reason = %reason
|
||||
)
|
||||
})
|
||||
.detached_child();
|
||||
ctx.perf_follows_from(&dl_ctx);
|
||||
dl_ctx
|
||||
} else {
|
||||
ctx.attached_child()
|
||||
};
|
||||
let download_ctx = ctx.detached_child(TaskKind::LayerDownload, DownloadBehavior::Download);
|
||||
|
||||
async move {
|
||||
tracing::info!(%reason, "downloading on-demand");
|
||||
|
||||
let init_cancelled = scopeguard::guard((), |_| LAYER_IMPL_METRICS.inc_init_cancelled());
|
||||
let res = self
|
||||
.download_init_and_wait(timeline, permit, ctx.attached_child())
|
||||
.maybe_perf_instrument(&ctx, |crnt_perf_span| crnt_perf_span.clone())
|
||||
.download_init_and_wait(timeline, permit, download_ctx)
|
||||
.await?;
|
||||
|
||||
scopeguard::ScopeGuard::into_inner(init_cancelled);
|
||||
Ok(res)
|
||||
}
|
||||
@@ -1764,9 +1720,9 @@ impl DownloadedLayer {
|
||||
);
|
||||
|
||||
let res = if owner.desc.is_delta {
|
||||
let ctx = RequestContextBuilder::from(ctx)
|
||||
let ctx = RequestContextBuilder::extend(ctx)
|
||||
.page_content_kind(crate::context::PageContentKind::DeltaLayerSummary)
|
||||
.attached_child();
|
||||
.build();
|
||||
let summary = Some(delta_layer::Summary::expected(
|
||||
owner.desc.tenant_shard_id.tenant_id,
|
||||
owner.desc.timeline_id,
|
||||
@@ -1782,9 +1738,9 @@ impl DownloadedLayer {
|
||||
.await
|
||||
.map(LayerKind::Delta)
|
||||
} else {
|
||||
let ctx = RequestContextBuilder::from(ctx)
|
||||
let ctx = RequestContextBuilder::extend(ctx)
|
||||
.page_content_kind(crate::context::PageContentKind::ImageLayerSummary)
|
||||
.attached_child();
|
||||
.build();
|
||||
let lsn = owner.desc.image_layer_lsn();
|
||||
let summary = Some(image_layer::Summary::expected(
|
||||
owner.desc.tenant_shard_id.tenant_id,
|
||||
|
||||
@@ -119,10 +119,6 @@ async fn smoke_test() {
|
||||
let e = layer.evict_and_wait(FOREVER).await.unwrap_err();
|
||||
assert!(matches!(e, EvictionError::NotFound));
|
||||
|
||||
let dl_ctx = RequestContextBuilder::from(ctx)
|
||||
.download_behavior(DownloadBehavior::Download)
|
||||
.attached_child();
|
||||
|
||||
// on accesses when the layer is evicted, it will automatically be downloaded.
|
||||
let img_after = {
|
||||
let mut data = ValuesReconstructState::new(io_concurrency.clone());
|
||||
@@ -131,7 +127,7 @@ async fn smoke_test() {
|
||||
controlfile_keyspace.clone(),
|
||||
Lsn(0x10)..Lsn(0x11),
|
||||
&mut data,
|
||||
&dl_ctx,
|
||||
ctx,
|
||||
)
|
||||
.instrument(download_span.clone())
|
||||
.await
|
||||
@@ -181,7 +177,7 @@ async fn smoke_test() {
|
||||
|
||||
// plain downloading is rarely needed
|
||||
layer
|
||||
.download_and_keep_resident(&dl_ctx)
|
||||
.download_and_keep_resident(ctx)
|
||||
.instrument(download_span)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -649,10 +645,9 @@ async fn cancelled_get_or_maybe_download_does_not_cancel_eviction() {
|
||||
let ctx = ctx.with_scope_timeline(&timeline);
|
||||
|
||||
// This test does downloads
|
||||
let ctx = RequestContextBuilder::from(&ctx)
|
||||
let ctx = RequestContextBuilder::extend(&ctx)
|
||||
.download_behavior(DownloadBehavior::Download)
|
||||
.attached_child();
|
||||
|
||||
.build();
|
||||
let layer = {
|
||||
let mut layers = {
|
||||
let layers = timeline.layers.read().await;
|
||||
@@ -735,9 +730,9 @@ async fn evict_and_wait_does_not_wait_for_download() {
|
||||
let ctx = ctx.with_scope_timeline(&timeline);
|
||||
|
||||
// This test does downloads
|
||||
let ctx = RequestContextBuilder::from(&ctx)
|
||||
let ctx = RequestContextBuilder::extend(&ctx)
|
||||
.download_behavior(DownloadBehavior::Download)
|
||||
.attached_child();
|
||||
.build();
|
||||
|
||||
let layer = {
|
||||
let mut layers = {
|
||||
|
||||
@@ -23,7 +23,6 @@ use std::sync::atomic::{AtomicBool, AtomicU64, Ordering as AtomicOrdering};
|
||||
use std::sync::{Arc, Mutex, OnceLock, RwLock, Weak};
|
||||
use std::time::{Duration, Instant, SystemTime};
|
||||
|
||||
use crate::PERF_TRACE_TARGET;
|
||||
use anyhow::{Context, Result, anyhow, bail, ensure};
|
||||
use arc_swap::{ArcSwap, ArcSwapOption};
|
||||
use bytes::Bytes;
|
||||
@@ -85,8 +84,8 @@ use self::eviction_task::EvictionTaskTimelineState;
|
||||
use self::layer_manager::LayerManager;
|
||||
use self::logical_size::LogicalSize;
|
||||
use self::walreceiver::{WalReceiver, WalReceiverConf};
|
||||
use super::remote_timeline_client::RemoteTimelineClient;
|
||||
use super::remote_timeline_client::index::{GcCompactionState, IndexPart};
|
||||
use super::remote_timeline_client::{RemoteTimelineClient, WaitCompletionError};
|
||||
use super::secondary::heatmap::HeatMapLayer;
|
||||
use super::storage_layer::{LayerFringe, LayerVisibilityHint, ReadableLayer};
|
||||
use super::tasks::log_compaction_error;
|
||||
@@ -97,9 +96,7 @@ use super::{
|
||||
};
|
||||
use crate::aux_file::AuxFileSizeEstimator;
|
||||
use crate::config::PageServerConf;
|
||||
use crate::context::{
|
||||
DownloadBehavior, PerfInstrumentFutureExt, RequestContext, RequestContextBuilder,
|
||||
};
|
||||
use crate::context::{DownloadBehavior, RequestContext};
|
||||
use crate::disk_usage_eviction_task::{DiskUsageEvictionInfo, EvictionCandidate, finite_f32};
|
||||
use crate::keyspace::{KeyPartitioning, KeySpace};
|
||||
use crate::l0_flush::{self, L0FlushGlobalState};
|
||||
@@ -873,14 +870,9 @@ pub(crate) enum CompactFlags {
|
||||
OnlyL0Compaction,
|
||||
EnhancedGcBottomMostCompaction,
|
||||
DryRun,
|
||||
/// Makes image compaction yield if there's pending L0 compaction. This should always be used in
|
||||
/// the background compaction task, since we want to aggressively compact down L0 to bound
|
||||
/// read amplification.
|
||||
///
|
||||
/// It only makes sense to use this when `compaction_l0_first` is enabled (such that we yield to
|
||||
/// an L0 compaction pass), and without `OnlyL0Compaction` (L0 compaction shouldn't yield for L0
|
||||
/// compaction).
|
||||
YieldForL0,
|
||||
/// Disables compaction yielding e.g. due to high L0 count. This is set e.g. when requesting
|
||||
/// compaction via HTTP API.
|
||||
NoYield,
|
||||
}
|
||||
|
||||
#[serde_with::serde_as]
|
||||
@@ -898,12 +890,6 @@ pub(crate) struct CompactRequest {
|
||||
pub sub_compaction_max_job_size_mb: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, serde::Deserialize)]
|
||||
pub(crate) struct MarkInvisibleRequest {
|
||||
#[serde(default)]
|
||||
pub is_visible: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub(crate) struct CompactOptions {
|
||||
pub flags: EnumSet<CompactFlags>,
|
||||
@@ -1292,22 +1278,9 @@ impl Timeline {
|
||||
};
|
||||
reconstruct_state.read_path = read_path;
|
||||
|
||||
let traversal_res: Result<(), _> = {
|
||||
let ctx = RequestContextBuilder::from(ctx)
|
||||
.perf_span(|crnt_perf_span| {
|
||||
info_span!(
|
||||
target: PERF_TRACE_TARGET,
|
||||
parent: crnt_perf_span,
|
||||
"PLAN_IO",
|
||||
)
|
||||
})
|
||||
.attached_child();
|
||||
|
||||
self.get_vectored_reconstruct_data(keyspace.clone(), lsn, reconstruct_state, &ctx)
|
||||
.maybe_perf_instrument(&ctx, |crnt_perf_span| crnt_perf_span.clone())
|
||||
.await
|
||||
};
|
||||
|
||||
let traversal_res: Result<(), _> = self
|
||||
.get_vectored_reconstruct_data(keyspace.clone(), lsn, reconstruct_state, ctx)
|
||||
.await;
|
||||
if let Err(err) = traversal_res {
|
||||
// Wait for all the spawned IOs to complete.
|
||||
// See comments on `spawn_io` inside `storage_layer` for more details.
|
||||
@@ -1321,46 +1294,14 @@ impl Timeline {
|
||||
|
||||
let layers_visited = reconstruct_state.get_layers_visited();
|
||||
|
||||
let ctx = RequestContextBuilder::from(ctx)
|
||||
.perf_span(|crnt_perf_span| {
|
||||
info_span!(
|
||||
target: PERF_TRACE_TARGET,
|
||||
parent: crnt_perf_span,
|
||||
"RECONSTRUCT",
|
||||
)
|
||||
})
|
||||
.attached_child();
|
||||
|
||||
let futs = FuturesUnordered::new();
|
||||
for (key, state) in std::mem::take(&mut reconstruct_state.keys) {
|
||||
futs.push({
|
||||
let walredo_self = self.myself.upgrade().expect("&self method holds the arc");
|
||||
let ctx = RequestContextBuilder::from(&ctx)
|
||||
.perf_span(|crnt_perf_span| {
|
||||
info_span!(
|
||||
target: PERF_TRACE_TARGET,
|
||||
parent: crnt_perf_span,
|
||||
"RECONSTRUCT_KEY",
|
||||
key = %key,
|
||||
)
|
||||
})
|
||||
.attached_child();
|
||||
|
||||
async move {
|
||||
assert_eq!(state.situation, ValueReconstructSituation::Complete);
|
||||
|
||||
let res = state
|
||||
.collect_pending_ios()
|
||||
.maybe_perf_instrument(&ctx, |crnt_perf_span| {
|
||||
info_span!(
|
||||
target: PERF_TRACE_TARGET,
|
||||
parent: crnt_perf_span,
|
||||
"WAIT_FOR_IO_COMPLETIONS",
|
||||
)
|
||||
})
|
||||
.await;
|
||||
|
||||
let converted = match res {
|
||||
let converted = match state.collect_pending_ios().await {
|
||||
Ok(ok) => ok,
|
||||
Err(err) => {
|
||||
return (key, Err(err));
|
||||
@@ -1377,27 +1318,16 @@ impl Timeline {
|
||||
"{converted:?}"
|
||||
);
|
||||
|
||||
let walredo_deltas = converted.num_deltas();
|
||||
let walredo_res = walredo_self
|
||||
.reconstruct_value(key, lsn, converted)
|
||||
.maybe_perf_instrument(&ctx, |crnt_perf_span| {
|
||||
info_span!(
|
||||
target: PERF_TRACE_TARGET,
|
||||
parent: crnt_perf_span,
|
||||
"WALREDO",
|
||||
deltas = %walredo_deltas,
|
||||
)
|
||||
})
|
||||
.await;
|
||||
|
||||
(key, walredo_res)
|
||||
(
|
||||
key,
|
||||
walredo_self.reconstruct_value(key, lsn, converted).await,
|
||||
)
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
let results = futs
|
||||
.collect::<BTreeMap<Key, Result<Bytes, PageReconstructError>>>()
|
||||
.maybe_perf_instrument(&ctx, |crnt_perf_span| crnt_perf_span.clone())
|
||||
.await;
|
||||
|
||||
// For aux file keys (v1 or v2) the vectored read path does not return an error
|
||||
@@ -1961,19 +1891,18 @@ impl Timeline {
|
||||
// out by other background tasks (including image compaction). We request this via
|
||||
// `BackgroundLoopKind::L0Compaction`.
|
||||
//
|
||||
// Yield for pending L0 compaction while waiting for the semaphore.
|
||||
// If this is a regular compaction pass, and L0-only compaction is enabled in the config,
|
||||
// then we should yield for immediate L0 compaction if necessary while we're waiting for the
|
||||
// background task semaphore. There's no point yielding otherwise, since we'd just end up
|
||||
// right back here.
|
||||
let is_l0_only = options.flags.contains(CompactFlags::OnlyL0Compaction);
|
||||
let semaphore_kind = match is_l0_only && self.get_compaction_l0_semaphore() {
|
||||
true => BackgroundLoopKind::L0Compaction,
|
||||
false => BackgroundLoopKind::Compaction,
|
||||
};
|
||||
let yield_for_l0 = options.flags.contains(CompactFlags::YieldForL0);
|
||||
if yield_for_l0 {
|
||||
// If this is an L0 pass, it doesn't make sense to yield for L0.
|
||||
debug_assert!(!is_l0_only, "YieldForL0 during L0 pass");
|
||||
// If `compaction_l0_first` is disabled, there's no point yielding.
|
||||
debug_assert!(self.get_compaction_l0_first(), "YieldForL0 without L0 pass");
|
||||
}
|
||||
let yield_for_l0 = !is_l0_only
|
||||
&& self.get_compaction_l0_first()
|
||||
&& !options.flags.contains(CompactFlags::NoYield);
|
||||
|
||||
let acquire = async move {
|
||||
let guard = self.compaction_lock.lock().await;
|
||||
@@ -2280,10 +2209,6 @@ impl Timeline {
|
||||
self.remote_client.is_archived()
|
||||
}
|
||||
|
||||
pub(crate) fn is_invisible(&self) -> Option<bool> {
|
||||
self.remote_client.is_invisible()
|
||||
}
|
||||
|
||||
pub(crate) fn is_stopping(&self) -> bool {
|
||||
self.current_state() == TimelineState::Stopping
|
||||
}
|
||||
@@ -2306,7 +2231,7 @@ impl Timeline {
|
||||
.await
|
||||
.expect("holding a reference to self");
|
||||
}
|
||||
TimelineState::Active => {
|
||||
TimelineState::Active { .. } => {
|
||||
return Ok(());
|
||||
}
|
||||
TimelineState::Broken { .. } | TimelineState::Stopping => {
|
||||
@@ -2637,6 +2562,14 @@ impl Timeline {
|
||||
Some(max(l0_flush_stall_threshold, compaction_threshold))
|
||||
}
|
||||
|
||||
fn get_l0_flush_wait_upload(&self) -> bool {
|
||||
let tenant_conf = self.tenant_conf.load();
|
||||
tenant_conf
|
||||
.tenant_conf
|
||||
.l0_flush_wait_upload
|
||||
.unwrap_or(self.conf.default_tenant_conf.l0_flush_wait_upload)
|
||||
}
|
||||
|
||||
fn get_image_creation_threshold(&self) -> usize {
|
||||
let tenant_conf = self.tenant_conf.load();
|
||||
tenant_conf
|
||||
@@ -3934,30 +3867,15 @@ impl Timeline {
|
||||
let TimelineVisitOutcome {
|
||||
completed_keyspace: completed,
|
||||
image_covered_keyspace,
|
||||
} = {
|
||||
let ctx = RequestContextBuilder::from(ctx)
|
||||
.perf_span(|crnt_perf_span| {
|
||||
info_span!(
|
||||
target: PERF_TRACE_TARGET,
|
||||
parent: crnt_perf_span,
|
||||
"PLAN_IO_TIMELINE",
|
||||
timeline = %timeline.timeline_id,
|
||||
lsn = %cont_lsn,
|
||||
)
|
||||
})
|
||||
.attached_child();
|
||||
|
||||
Self::get_vectored_reconstruct_data_timeline(
|
||||
timeline,
|
||||
keyspace.clone(),
|
||||
cont_lsn,
|
||||
reconstruct_state,
|
||||
&self.cancel,
|
||||
&ctx,
|
||||
)
|
||||
.maybe_perf_instrument(&ctx, |crnt_perf_span| crnt_perf_span.clone())
|
||||
.await?
|
||||
};
|
||||
} = Self::get_vectored_reconstruct_data_timeline(
|
||||
timeline,
|
||||
keyspace.clone(),
|
||||
cont_lsn,
|
||||
reconstruct_state,
|
||||
&self.cancel,
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
keyspace.remove_overlapping_with(&completed);
|
||||
|
||||
@@ -4001,24 +3919,8 @@ impl Timeline {
|
||||
|
||||
// Take the min to avoid reconstructing a page with data newer than request Lsn.
|
||||
cont_lsn = std::cmp::min(Lsn(request_lsn.0 + 1), Lsn(timeline.ancestor_lsn.0 + 1));
|
||||
|
||||
let ctx = RequestContextBuilder::from(ctx)
|
||||
.perf_span(|crnt_perf_span| {
|
||||
info_span!(
|
||||
target: PERF_TRACE_TARGET,
|
||||
parent: crnt_perf_span,
|
||||
"GET_ANCESTOR",
|
||||
timeline = %timeline.timeline_id,
|
||||
lsn = %cont_lsn,
|
||||
ancestor = %ancestor_timeline.timeline_id,
|
||||
ancestor_lsn = %timeline.ancestor_lsn
|
||||
)
|
||||
})
|
||||
.attached_child();
|
||||
|
||||
timeline_owned = timeline
|
||||
.get_ready_ancestor_timeline(ancestor_timeline, &ctx)
|
||||
.maybe_perf_instrument(&ctx, |crnt_perf_span| crnt_perf_span.clone())
|
||||
.get_ready_ancestor_timeline(ancestor_timeline, ctx)
|
||||
.await?;
|
||||
timeline = &*timeline_owned;
|
||||
};
|
||||
@@ -4689,6 +4591,27 @@ impl Timeline {
|
||||
// release lock on 'layers'
|
||||
};
|
||||
|
||||
// Backpressure mechanism: wait with continuation of the flush loop until we have uploaded all layer files.
|
||||
// This makes us refuse ingest until the new layers have been persisted to the remote
|
||||
// TODO: remove this, and rely on l0_flush_{delay,stall}_threshold instead.
|
||||
if self.get_l0_flush_wait_upload() {
|
||||
let start = Instant::now();
|
||||
self.remote_client
|
||||
.wait_completion()
|
||||
.await
|
||||
.map_err(|e| match e {
|
||||
WaitCompletionError::UploadQueueShutDownOrStopped
|
||||
| WaitCompletionError::NotInitialized(
|
||||
NotInitialized::ShuttingDown | NotInitialized::Stopped,
|
||||
) => FlushLayerError::Cancelled,
|
||||
WaitCompletionError::NotInitialized(NotInitialized::Uninitialized) => {
|
||||
FlushLayerError::Other(anyhow!(e).into())
|
||||
}
|
||||
})?;
|
||||
let duration = start.elapsed().as_secs_f64();
|
||||
self.metrics.flush_wait_upload_time_gauge_add(duration);
|
||||
}
|
||||
|
||||
// FIXME: between create_delta_layer and the scheduling of the upload in `update_metadata_file`,
|
||||
// a compaction can delete the file and then it won't be available for uploads any more.
|
||||
// We still schedule the upload, resulting in an error, but ideally we'd somehow avoid this
|
||||
@@ -7349,9 +7272,9 @@ mod tests {
|
||||
|
||||
eprintln!("Downloading {layer} and re-generating heatmap");
|
||||
|
||||
let ctx = &RequestContextBuilder::from(ctx)
|
||||
let ctx = &RequestContextBuilder::extend(ctx)
|
||||
.download_behavior(crate::context::DownloadBehavior::Download)
|
||||
.attached_child();
|
||||
.build();
|
||||
|
||||
let _resident = layer
|
||||
.download_and_keep_resident(ctx)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user