mirror of
https://github.com/neondatabase/neon.git
synced 2026-06-02 21:10:38 +00:00
Compare commits
20 Commits
release-co
...
devin/1742
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8edf6d326c | ||
|
|
1cae803c28 | ||
|
|
4360e0d4c7 | ||
|
|
6af974548e | ||
|
|
9fb77d6cdd | ||
|
|
99639c26b4 | ||
|
|
86fe26c676 | ||
|
|
eb6efda98b | ||
|
|
fd41ab9bb6 | ||
|
|
2dfff6a2a3 | ||
|
|
2cf6ae76fc | ||
|
|
57d51e949d | ||
|
|
0d3d639ef3 | ||
|
|
05ca27c981 | ||
|
|
bb64beffbb | ||
|
|
24f41bee5c | ||
|
|
a05c99f487 | ||
|
|
486ffeef6d | ||
|
|
56149a046a | ||
|
|
db30e1669c |
4
.github/scripts/generate_image_maps.py
vendored
4
.github/scripts/generate_image_maps.py
vendored
@@ -49,10 +49,10 @@ target_stages = (
|
||||
for component_name, component_images in components.items():
|
||||
for stage in target_stages:
|
||||
outputs[f"{component_name}-{stage}"] = {
|
||||
f"docker.io/neondatabase/{component_image}:{source_tag}": [
|
||||
f"ghcr.io/neondatabase/{component_image}:{source_tag}": [
|
||||
f"{registry}/{component_image}:{tag}"
|
||||
for registry, tag in itertools.product(registries[stage], target_tags)
|
||||
if not (registry == "docker.io/neondatabase" and tag == source_tag)
|
||||
if not (registry == "ghcr.io/neondatabase" and tag == source_tag)
|
||||
]
|
||||
for component_image in component_images
|
||||
}
|
||||
|
||||
@@ -27,10 +27,10 @@ jobs:
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned-bookworm
|
||||
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
|
||||
12
.github/workflows/_build-and-test-locally.yml
vendored
12
.github/workflows/_build-and-test-locally.yml
vendored
@@ -39,15 +39,15 @@ env:
|
||||
|
||||
jobs:
|
||||
build-neon:
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||
runs-on: ${{ fromJSON(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
contents: read
|
||||
container:
|
||||
image: ${{ inputs.build-tools-image }}
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
# Raise locked memory limit for tokio-epoll-uring.
|
||||
# On 5.10 LTS kernels < 5.10.162 (and generally mainline kernels < 5.12),
|
||||
# io_uring will account the memory of the CQ and SQ as locked.
|
||||
@@ -318,12 +318,12 @@ jobs:
|
||||
contents: read
|
||||
statuses: write
|
||||
needs: [ build-neon ]
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||
runs-on: ${{ fromJSON(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||
container:
|
||||
image: ${{ inputs.build-tools-image }}
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
# for changed limits, see comments on `options:` earlier in this file
|
||||
options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
|
||||
strategy:
|
||||
|
||||
@@ -15,11 +15,15 @@ defaults:
|
||||
jobs:
|
||||
check-codestyle-python:
|
||||
runs-on: [ self-hosted, small ]
|
||||
|
||||
permissions:
|
||||
packages: read
|
||||
|
||||
container:
|
||||
image: ${{ inputs.build-tools-image }}
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
|
||||
11
.github/workflows/_check-codestyle-rust.yml
vendored
11
.github/workflows/_check-codestyle-rust.yml
vendored
@@ -23,14 +23,17 @@ jobs:
|
||||
check-codestyle-rust:
|
||||
strategy:
|
||||
matrix:
|
||||
arch: ${{ fromJson(inputs.archs) }}
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'small-arm64' || 'small')) }}
|
||||
arch: ${{ fromJSON(inputs.archs) }}
|
||||
runs-on: ${{ fromJSON(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'small-arm64' || 'small')) }}
|
||||
|
||||
permissions:
|
||||
packages: read
|
||||
|
||||
container:
|
||||
image: ${{ inputs.build-tools-image }}
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
|
||||
2
.github/workflows/_meta.yml
vendored
2
.github/workflows/_meta.yml
vendored
@@ -120,7 +120,7 @@ jobs:
|
||||
|
||||
- name: Get the release PR run ID
|
||||
id: release-pr-run-id
|
||||
if: ${{ contains(fromJson('["storage-release", "compute-release", "proxy-release"]'), steps.run-kind.outputs.run-kind) }}
|
||||
if: ${{ contains(fromJSON('["storage-release", "compute-release", "proxy-release"]'), steps.run-kind.outputs.run-kind) }}
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
CURRENT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
|
||||
@@ -89,7 +89,7 @@ jobs:
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
|
||||
84
.github/workflows/benchmarking.yml
vendored
84
.github/workflows/benchmarking.yml
vendored
@@ -87,10 +87,10 @@ jobs:
|
||||
|
||||
runs-on: ${{ matrix.RUNNER }}
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned-bookworm
|
||||
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
@@ -190,10 +190,10 @@ jobs:
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned-bookworm
|
||||
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
@@ -245,10 +245,10 @@ jobs:
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned-bookworm
|
||||
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
@@ -352,7 +352,7 @@ jobs:
|
||||
region_id_default=${{ env.DEFAULT_REGION_ID }}
|
||||
runner_default='["self-hosted", "us-east-2", "x64"]'
|
||||
runner_azure='["self-hosted", "eastus2", "x64"]'
|
||||
image_default="neondatabase/build-tools:pinned-bookworm"
|
||||
image_default="ghcr.io/neondatabase/build-tools:pinned-bookworm"
|
||||
matrix='{
|
||||
"pg_version" : [
|
||||
16
|
||||
@@ -368,18 +368,18 @@ jobs:
|
||||
"db_size": [ "10gb" ],
|
||||
"runner": ['"$runner_default"'],
|
||||
"image": [ "'"$image_default"'" ],
|
||||
"include": [{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new-many-tables","db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned-bookworm" },
|
||||
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "10gb","runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned-bookworm" },
|
||||
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "50gb","runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned-bookworm" },
|
||||
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-sharding-reuse", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 17, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 17, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 17, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new-many-tables","db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 17, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" }]
|
||||
"include": [{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new-many-tables","db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_azure"', "image": "ghcr.io/neondatabase/build-tools:pinned-bookworm" },
|
||||
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "10gb","runner": '"$runner_azure"', "image": "ghcr.io/neondatabase/build-tools:pinned-bookworm" },
|
||||
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "50gb","runner": '"$runner_azure"', "image": "ghcr.io/neondatabase/build-tools:pinned-bookworm" },
|
||||
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-sharding-reuse", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 17, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 17, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 17, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new-many-tables","db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 17, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" }]
|
||||
}'
|
||||
|
||||
if [ "$(date +%A)" = "Saturday" ] || [ ${RUN_AWS_RDS_AND_AURORA} = "true" ]; then
|
||||
@@ -441,7 +441,7 @@ jobs:
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix: ${{fromJson(needs.generate-matrices.outputs.pgbench-compare-matrix)}}
|
||||
matrix: ${{fromJSON(needs.generate-matrices.outputs.pgbench-compare-matrix)}}
|
||||
|
||||
env:
|
||||
TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
|
||||
@@ -457,8 +457,8 @@ jobs:
|
||||
container:
|
||||
image: ${{ matrix.image }}
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
# Increase timeout to 8h, default timeout is 6h
|
||||
@@ -483,7 +483,7 @@ jobs:
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Create Neon Project
|
||||
if: contains(fromJson('["neonvm-captest-new", "neonvm-captest-new-many-tables", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform)
|
||||
if: contains(fromJSON('["neonvm-captest-new", "neonvm-captest-new-many-tables", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform)
|
||||
id: create-neon-project
|
||||
uses: ./.github/actions/neon-project-create
|
||||
with:
|
||||
@@ -523,7 +523,7 @@ jobs:
|
||||
# without (neonvm-captest-new)
|
||||
# and with (neonvm-captest-new-many-tables) many relations in the database
|
||||
- name: Create many relations before the run
|
||||
if: contains(fromJson('["neonvm-captest-new-many-tables"]'), matrix.platform)
|
||||
if: contains(fromJSON('["neonvm-captest-new-many-tables"]'), matrix.platform)
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
@@ -642,10 +642,10 @@ jobs:
|
||||
|
||||
runs-on: ${{ matrix.RUNNER }}
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned-bookworm
|
||||
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
@@ -753,7 +753,7 @@ jobs:
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix: ${{ fromJson(needs.generate-matrices.outputs.olap-compare-matrix) }}
|
||||
matrix: ${{ fromJSON(needs.generate-matrices.outputs.olap-compare-matrix) }}
|
||||
|
||||
env:
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
@@ -767,10 +767,10 @@ jobs:
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned-bookworm
|
||||
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
# Increase timeout to 12h, default timeout is 6h
|
||||
@@ -880,7 +880,7 @@ jobs:
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix: ${{ fromJson(needs.generate-matrices.outputs.tpch-compare-matrix) }}
|
||||
matrix: ${{ fromJSON(needs.generate-matrices.outputs.tpch-compare-matrix) }}
|
||||
|
||||
env:
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
@@ -892,10 +892,10 @@ jobs:
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned-bookworm
|
||||
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
@@ -999,7 +999,7 @@ jobs:
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix: ${{ fromJson(needs.generate-matrices.outputs.olap-compare-matrix) }}
|
||||
matrix: ${{ fromJSON(needs.generate-matrices.outputs.olap-compare-matrix) }}
|
||||
|
||||
env:
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
@@ -1011,10 +1011,10 @@ jobs:
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned-bookworm
|
||||
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
|
||||
57
.github/workflows/build-build-tools-image.yml
vendored
57
.github/workflows/build-build-tools-image.yml
vendored
@@ -19,7 +19,7 @@ on:
|
||||
value: ${{ jobs.check-image.outputs.tag }}
|
||||
image:
|
||||
description: "build-tools image"
|
||||
value: neondatabase/build-tools:${{ jobs.check-image.outputs.tag }}
|
||||
value: ghcr.io/neondatabase/build-tools:${{ jobs.check-image.outputs.tag }}
|
||||
|
||||
defaults:
|
||||
run:
|
||||
@@ -49,9 +49,18 @@ jobs:
|
||||
everything: ${{ steps.set-more-variables.outputs.everything }}
|
||||
found: ${{ steps.set-more-variables.outputs.found }}
|
||||
|
||||
permissions:
|
||||
packages: read
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Set variables
|
||||
id: set-variables
|
||||
env:
|
||||
@@ -70,12 +79,12 @@ jobs:
|
||||
env:
|
||||
IMAGE_TAG: ${{ steps.set-variables.outputs.image-tag }}
|
||||
EVERYTHING: |
|
||||
${{ contains(fromJson(steps.set-variables.outputs.archs), 'x64') &&
|
||||
contains(fromJson(steps.set-variables.outputs.archs), 'arm64') &&
|
||||
contains(fromJson(steps.set-variables.outputs.debians), 'bullseye') &&
|
||||
contains(fromJson(steps.set-variables.outputs.debians), 'bookworm') }}
|
||||
${{ contains(fromJSON(steps.set-variables.outputs.archs), 'x64') &&
|
||||
contains(fromJSON(steps.set-variables.outputs.archs), 'arm64') &&
|
||||
contains(fromJSON(steps.set-variables.outputs.debians), 'bullseye') &&
|
||||
contains(fromJSON(steps.set-variables.outputs.debians), 'bookworm') }}
|
||||
run: |
|
||||
if docker manifest inspect neondatabase/build-tools:${IMAGE_TAG}; then
|
||||
if docker manifest inspect ghcr.io/neondatabase/build-tools:${IMAGE_TAG}; then
|
||||
found=true
|
||||
else
|
||||
found=false
|
||||
@@ -90,10 +99,13 @@ jobs:
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
arch: ${{ fromJson(needs.check-image.outputs.archs) }}
|
||||
debian: ${{ fromJson(needs.check-image.outputs.debians) }}
|
||||
arch: ${{ fromJSON(needs.check-image.outputs.archs) }}
|
||||
debian: ${{ fromJSON(needs.check-image.outputs.debians) }}
|
||||
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||
permissions:
|
||||
packages: write
|
||||
|
||||
runs-on: ${{ fromJSON(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -108,6 +120,12 @@ jobs:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
registry: cache.neon.build
|
||||
@@ -126,35 +144,44 @@ jobs:
|
||||
cache-from: type=registry,ref=cache.neon.build/build-tools:cache-${{ matrix.debian }}-${{ matrix.arch }}
|
||||
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/build-tools:cache-{0}-{1},mode=max', matrix.debian, matrix.arch) || '' }}
|
||||
tags: |
|
||||
neondatabase/build-tools:${{ needs.check-image.outputs.tag }}-${{ matrix.debian }}-${{ matrix.arch }}
|
||||
ghcr.io/neondatabase/build-tools:${{ needs.check-image.outputs.tag }}-${{ matrix.debian }}-${{ matrix.arch }}
|
||||
|
||||
merge-images:
|
||||
needs: [ check-image, build-image ]
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
permissions:
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Create multi-arch image
|
||||
env:
|
||||
DEFAULT_DEBIAN_VERSION: bookworm
|
||||
ARCHS: ${{ join(fromJson(needs.check-image.outputs.archs), ' ') }}
|
||||
DEBIANS: ${{ join(fromJson(needs.check-image.outputs.debians), ' ') }}
|
||||
ARCHS: ${{ join(fromJSON(needs.check-image.outputs.archs), ' ') }}
|
||||
DEBIANS: ${{ join(fromJSON(needs.check-image.outputs.debians), ' ') }}
|
||||
EVERYTHING: ${{ needs.check-image.outputs.everything }}
|
||||
IMAGE_TAG: ${{ needs.check-image.outputs.tag }}
|
||||
run: |
|
||||
for debian in ${DEBIANS}; do
|
||||
tags=("-t" "neondatabase/build-tools:${IMAGE_TAG}-${debian}")
|
||||
tags=("-t" "ghcr.io/neondatabase/build-tools:${IMAGE_TAG}-${debian}")
|
||||
|
||||
if [ "${EVERYTHING}" == "true" ] && [ "${debian}" == "${DEFAULT_DEBIAN_VERSION}" ]; then
|
||||
tags+=("-t" "neondatabase/build-tools:${IMAGE_TAG}")
|
||||
tags+=("-t" "ghcr.io/neondatabase/build-tools:${IMAGE_TAG}")
|
||||
fi
|
||||
|
||||
for arch in ${ARCHS}; do
|
||||
tags+=("neondatabase/build-tools:${IMAGE_TAG}-${debian}-${arch}")
|
||||
tags+=("ghcr.io/neondatabase/build-tools:${IMAGE_TAG}-${debian}-${arch}")
|
||||
done
|
||||
|
||||
docker buildx imagetools create "${tags[@]}"
|
||||
|
||||
2
.github/workflows/build-macos.yml
vendored
2
.github/workflows/build-macos.yml
vendored
@@ -40,7 +40,7 @@ jobs:
|
||||
runs-on: macos-15
|
||||
strategy:
|
||||
matrix:
|
||||
postgres-version: ${{ inputs.rebuild_everything && fromJson('["v14", "v15", "v16", "v17"]') || fromJSON(inputs.pg_versions) }}
|
||||
postgres-version: ${{ inputs.rebuild_everything && fromJSON('["v14", "v15", "v16", "v17"]') || fromJSON(inputs.pg_versions) }}
|
||||
env:
|
||||
# Use release build only, to have less debug info around
|
||||
# Hence keeping target/ (and general cache size) smaller
|
||||
|
||||
202
.github/workflows/build_and_test.yml
vendored
202
.github/workflows/build_and_test.yml
vendored
@@ -77,20 +77,23 @@ jobs:
|
||||
secrets: inherit
|
||||
|
||||
check-codestyle-python:
|
||||
needs: [ check-permissions, build-build-tools-image ]
|
||||
needs: [ meta, check-permissions, build-build-tools-image ]
|
||||
# No need to run on `main` because we this in the merge queue
|
||||
if: ${{ needs.meta.outputs.run-kind == 'pr' }}
|
||||
uses: ./.github/workflows/_check-codestyle-python.yml
|
||||
with:
|
||||
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
secrets: inherit
|
||||
|
||||
check-codestyle-jsonnet:
|
||||
needs: [ check-permissions, build-build-tools-image ]
|
||||
needs: [ meta, check-permissions, build-build-tools-image ]
|
||||
if: ${{ contains(fromJSON('["pr", "push-main"]'), needs.meta.outputs.run-kind) }}
|
||||
runs-on: [ self-hosted, small ]
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
@@ -156,7 +159,9 @@ jobs:
|
||||
pass_if_unchanged: true
|
||||
|
||||
check-codestyle-rust:
|
||||
needs: [ check-permissions, build-build-tools-image ]
|
||||
needs: [ meta, check-permissions, build-build-tools-image ]
|
||||
# No need to run on `main` because we this in the merge queue
|
||||
if: ${{ needs.meta.outputs.run-kind == 'pr' }}
|
||||
uses: ./.github/workflows/_check-codestyle-rust.yml
|
||||
with:
|
||||
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
@@ -164,8 +169,8 @@ jobs:
|
||||
secrets: inherit
|
||||
|
||||
check-dependencies-rust:
|
||||
needs: [ files-changed, build-build-tools-image ]
|
||||
if: ${{ needs.files-changed.outputs.check-rust-dependencies == 'true' }}
|
||||
needs: [ meta, files-changed, build-build-tools-image ]
|
||||
if: ${{ needs.files-changed.outputs.check-rust-dependencies == 'true' && needs.meta.outputs.run-kind == 'pr' }}
|
||||
uses: ./.github/workflows/cargo-deny.yml
|
||||
with:
|
||||
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
@@ -173,12 +178,13 @@ jobs:
|
||||
|
||||
build-and-test-locally:
|
||||
needs: [ meta, build-build-tools-image ]
|
||||
if: ${{ contains(fromJSON('["pr", "push-main"]'), needs.meta.outputs.run-kind) }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
arch: [ x64, arm64 ]
|
||||
# Do not build or run tests in debug for release branches
|
||||
build-type: ${{ fromJson((startsWith(github.ref_name, 'release') && github.event_name == 'push') && '["release"]' || '["debug", "release"]') }}
|
||||
build-type: ${{ fromJSON((startsWith(github.ref_name, 'release') && github.event_name == 'push') && '["release"]' || '["debug", "release"]') }}
|
||||
include:
|
||||
- build-type: release
|
||||
arch: arm64
|
||||
@@ -209,8 +215,8 @@ jobs:
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
steps:
|
||||
- name: Checkout
|
||||
@@ -248,8 +254,8 @@ jobs:
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
# for changed limits, see comments on `options:` earlier in this file
|
||||
options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
|
||||
strategy:
|
||||
@@ -314,8 +320,8 @@ jobs:
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
@@ -367,8 +373,8 @@ jobs:
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@@ -470,14 +476,20 @@ jobs:
|
||||
})
|
||||
|
||||
trigger-e2e-tests:
|
||||
# Depends on jobs that can get skipped
|
||||
# !failure() && !cancelled() because it depends on jobs that can get skipped
|
||||
if: >-
|
||||
${{
|
||||
(
|
||||
!github.event.pull_request.draft
|
||||
|| contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft')
|
||||
|| needs.meta.outputs.run-kind == 'push-main'
|
||||
) && !failure() && !cancelled()
|
||||
(
|
||||
needs.meta.outputs.run-kind == 'pr'
|
||||
&& (
|
||||
!github.event.pull_request.draft
|
||||
|| contains(github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft')
|
||||
)
|
||||
)
|
||||
|| contains(fromJSON('["push-main", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind)
|
||||
)
|
||||
&& !failure() && !cancelled()
|
||||
}}
|
||||
needs: [ check-permissions, push-neon-image-dev, push-compute-image-dev, meta ]
|
||||
uses: ./.github/workflows/trigger-e2e-tests.yml
|
||||
@@ -492,7 +504,10 @@ jobs:
|
||||
matrix:
|
||||
arch: [ x64, arm64 ]
|
||||
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||
runs-on: ${{ fromJSON(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||
|
||||
permissions:
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -509,6 +524,12 @@ jobs:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
registry: cache.neon.build
|
||||
@@ -533,7 +554,7 @@ jobs:
|
||||
cache-from: type=registry,ref=cache.neon.build/neon:cache-bookworm-${{ matrix.arch }}
|
||||
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/neon:cache-{0}-{1},mode=max', 'bookworm', matrix.arch) || '' }}
|
||||
tags: |
|
||||
neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-${{ matrix.arch }}
|
||||
ghcr.io/neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-${{ matrix.arch }}
|
||||
|
||||
neon-image:
|
||||
needs: [ neon-image-arch, meta ]
|
||||
@@ -543,19 +564,21 @@ jobs:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Create multi-arch image
|
||||
run: |
|
||||
docker buildx imagetools create -t neondatabase/neon:${{ needs.meta.outputs.build-tag }} \
|
||||
-t neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm \
|
||||
neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-x64 \
|
||||
neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-arm64
|
||||
docker buildx imagetools create -t ghcr.io/neondatabase/neon:${{ needs.meta.outputs.build-tag }} \
|
||||
-t ghcr.io/neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm \
|
||||
ghcr.io/neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-x64 \
|
||||
ghcr.io/neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-arm64
|
||||
|
||||
compute-node-image-arch:
|
||||
needs: [ check-permissions, build-build-tools-image, meta ]
|
||||
@@ -564,6 +587,7 @@ jobs:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: read
|
||||
packages: write
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -582,7 +606,7 @@ jobs:
|
||||
debian: bookworm
|
||||
arch: [ x64, arm64 ]
|
||||
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||
runs-on: ${{ fromJSON(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -604,6 +628,12 @@ jobs:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
registry: cache.neon.build
|
||||
@@ -627,7 +657,7 @@ jobs:
|
||||
cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-node-{0}:cache-{1}-{2},mode=max', matrix.version.pg, matrix.version.debian, matrix.arch) || '' }}
|
||||
tags: |
|
||||
neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
ghcr.io/neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
|
||||
- name: Build neon extensions test image
|
||||
if: matrix.version.pg >= 'v16'
|
||||
@@ -647,7 +677,7 @@ jobs:
|
||||
target: extension-tests
|
||||
cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
tags: |
|
||||
neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.meta.outputs.build-tag}}-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
ghcr.io/neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.meta.outputs.build-tag}}-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
|
||||
compute-node-image:
|
||||
needs: [ compute-node-image-arch, meta ]
|
||||
@@ -656,6 +686,7 @@ jobs:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: read
|
||||
packages: write
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
strategy:
|
||||
@@ -674,28 +705,32 @@ jobs:
|
||||
steps:
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Create multi-arch compute-node image
|
||||
run: |
|
||||
docker buildx imagetools create -t neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \
|
||||
-t neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }} \
|
||||
neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
|
||||
neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-arm64
|
||||
docker buildx imagetools create -t ghcr.io/neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \
|
||||
-t ghcr.io/neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }} \
|
||||
ghcr.io/neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
|
||||
ghcr.io/neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-arm64
|
||||
|
||||
- name: Create multi-arch neon-test-extensions image
|
||||
if: matrix.version.pg >= 'v16'
|
||||
run: |
|
||||
docker buildx imagetools create -t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \
|
||||
-t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }} \
|
||||
neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
|
||||
neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-arm64
|
||||
docker buildx imagetools create -t ghcr.io/neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \
|
||||
-t ghcr.io/neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }} \
|
||||
ghcr.io/neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
|
||||
ghcr.io/neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-arm64
|
||||
|
||||
vm-compute-node-image-arch:
|
||||
needs: [ check-permissions, meta, compute-node-image ]
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||
runs-on: ${{ fromJSON(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -723,31 +758,34 @@ jobs:
|
||||
- uses: neondatabase/dev-actions/set-docker-config-dir@6094485bf440001c94a94a3f9e221e81ff6b6193
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# Note: we need a separate pull step here because otherwise vm-builder will try to pull, and
|
||||
# it won't have the proper authentication (written at v0.6.0)
|
||||
- name: Pulling compute-node image
|
||||
run: |
|
||||
docker pull neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}
|
||||
docker pull ghcr.io/neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}
|
||||
|
||||
- name: Build vm image
|
||||
run: |
|
||||
./vm-builder \
|
||||
-size=2G \
|
||||
-spec=compute/vm-image-spec-${{ matrix.version.debian }}.yaml \
|
||||
-src=neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \
|
||||
-dst=neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.arch }} \
|
||||
-src=ghcr.io/neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \
|
||||
-dst=ghcr.io/neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.arch }} \
|
||||
-target-arch=linux/${{ matrix.arch }}
|
||||
|
||||
- name: Pushing vm-compute-node image
|
||||
run: |
|
||||
docker push neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.arch }}
|
||||
docker push ghcr.io/neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.arch }}
|
||||
|
||||
vm-compute-node-image:
|
||||
needs: [ vm-compute-node-image-arch, meta ]
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
permissions:
|
||||
packages: write
|
||||
runs-on: ubuntu-22.04
|
||||
strategy:
|
||||
matrix:
|
||||
@@ -760,14 +798,15 @@ jobs:
|
||||
steps:
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Create multi-arch compute-node image
|
||||
run: |
|
||||
docker buildx imagetools create -t neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \
|
||||
neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-amd64 \
|
||||
neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-arm64
|
||||
docker buildx imagetools create -t ghcr.io/neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \
|
||||
ghcr.io/neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-amd64 \
|
||||
ghcr.io/neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-arm64
|
||||
|
||||
|
||||
test-images:
|
||||
@@ -785,18 +824,28 @@ jobs:
|
||||
arch: [ x64, arm64 ]
|
||||
pg_version: [v16, v17]
|
||||
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'small-arm64' || 'small')) }}
|
||||
permissions:
|
||||
packages: read
|
||||
|
||||
runs-on: ${{ fromJSON(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'small-arm64' || 'small')) }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: neondatabase/dev-actions/set-docker-config-dir@6094485bf440001c94a94a3f9e221e81ff6b6193
|
||||
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
# `neondatabase/neon` contains multiple binaries, all of them use the same input for the version into the same version formatting library.
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# `ghcr.io/neondatabase/neon` contains multiple binaries, all of them use the same input for the version into the same version formatting library.
|
||||
# Pick pageserver as currently the only binary with extra "version" features printed in the string to verify.
|
||||
# Regular pageserver version string looks like
|
||||
# Neon page server git-env:32d14403bd6ab4f4520a94cbfd81a6acef7a526c failpoints: true, features: []
|
||||
@@ -807,7 +856,7 @@ jobs:
|
||||
shell: bash # ensure no set -e for better error messages
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "storage-rc-pr", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
run: |
|
||||
pageserver_version=$(docker run --rm neondatabase/neon:${{ needs.meta.outputs.build-tag }} "/bin/sh" "-c" "/usr/local/bin/pageserver --version")
|
||||
pageserver_version=$(docker run --rm ghcr.io/neondatabase/neon:${{ needs.meta.outputs.build-tag }} "/bin/sh" "-c" "/usr/local/bin/pageserver --version")
|
||||
|
||||
echo "Pageserver version string: $pageserver_version"
|
||||
|
||||
@@ -892,7 +941,7 @@ jobs:
|
||||
env:
|
||||
SOURCE_TAG: >-
|
||||
${{
|
||||
contains(fromJson('["storage-release", "compute-release", "proxy-release"]'), needs.meta.outputs.run-kind)
|
||||
contains(fromJSON('["storage-release", "compute-release", "proxy-release"]'), needs.meta.outputs.run-kind)
|
||||
&& needs.meta.outputs.release-pr-run-id
|
||||
|| needs.meta.outputs.build-tag
|
||||
}}
|
||||
@@ -978,18 +1027,21 @@ jobs:
|
||||
acr-registry-name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}
|
||||
secrets: inherit
|
||||
|
||||
push-neon-test-extensions-image-ghcr:
|
||||
push-neon-test-extensions-image-dockerhub:
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
needs: [ meta, compute-node-image ]
|
||||
uses: ./.github/workflows/_push-to-container-registry.yml
|
||||
permissions:
|
||||
packages: write
|
||||
id-token: write
|
||||
with:
|
||||
image-map: |
|
||||
{
|
||||
"docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}": [
|
||||
"ghcr.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}"
|
||||
"ghcr.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}": [
|
||||
"docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}"
|
||||
],
|
||||
"docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}": [
|
||||
"ghcr.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}"
|
||||
"ghcr.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}": [
|
||||
"docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}"
|
||||
]
|
||||
}
|
||||
secrets: inherit
|
||||
@@ -998,14 +1050,17 @@ jobs:
|
||||
if: ${{ needs.meta.outputs.run-kind == 'push-main' }}
|
||||
needs: [ meta, compute-node-image ]
|
||||
uses: ./.github/workflows/_push-to-container-registry.yml
|
||||
permissions:
|
||||
packages: write
|
||||
id-token: write
|
||||
with:
|
||||
image-map: |
|
||||
{
|
||||
"docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}": [
|
||||
"ghcr.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}": [
|
||||
"docker.io/neondatabase/neon-test-extensions-v16:latest",
|
||||
"ghcr.io/neondatabase/neon-test-extensions-v16:latest"
|
||||
],
|
||||
"docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}": [
|
||||
"ghcr.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}": [
|
||||
"docker.io/neondatabase/neon-test-extensions-v17:latest",
|
||||
"ghcr.io/neondatabase/neon-test-extensions-v17:latest"
|
||||
]
|
||||
@@ -1014,16 +1069,19 @@ jobs:
|
||||
|
||||
add-release-tag-to-neon-test-extensions-image:
|
||||
if: ${{ needs.meta.outputs.run-kind == 'compute-release' }}
|
||||
needs: [ meta, compute-node-image ]
|
||||
needs: [ meta ]
|
||||
uses: ./.github/workflows/_push-to-container-registry.yml
|
||||
permissions:
|
||||
packages: write
|
||||
id-token: write
|
||||
with:
|
||||
image-map: |
|
||||
{
|
||||
"docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.release-pr-run-id }}": [
|
||||
"ghcr.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.release-pr-run-id }}": [
|
||||
"docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}",
|
||||
"ghcr.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}"
|
||||
],
|
||||
"docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.release-pr-run-id }}": [
|
||||
"ghcr.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.release-pr-run-id }}": [
|
||||
"docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}",
|
||||
"ghcr.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}"
|
||||
]
|
||||
@@ -1388,10 +1446,10 @@ jobs:
|
||||
if: |
|
||||
contains(needs.*.result, 'failure')
|
||||
|| contains(needs.*.result, 'cancelled')
|
||||
|| (needs.check-dependencies-rust.result == 'skipped' && needs.files-changed.outputs.check-rust-dependencies == 'true')
|
||||
|| needs.build-and-test-locally.result == 'skipped'
|
||||
|| needs.check-codestyle-python.result == 'skipped'
|
||||
|| needs.check-codestyle-rust.result == 'skipped'
|
||||
|| (needs.check-dependencies-rust.result == 'skipped' && needs.files-changed.outputs.check-rust-dependencies == 'true' && needs.meta.outputs.run-kind == 'pr')
|
||||
|| (needs.build-and-test-locally.result == 'skipped' && needs.meta.outputs.run-kind == 'pr')
|
||||
|| (needs.check-codestyle-python.result == 'skipped' && needs.meta.outputs.run-kind == 'pr')
|
||||
|| (needs.check-codestyle-rust.result == 'skipped' && needs.meta.outputs.run-kind == 'pr')
|
||||
|| needs.files-changed.result == 'skipped'
|
||||
|| (needs.push-compute-image-dev.result == 'skipped' && contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind))
|
||||
|| (needs.push-neon-image-dev.result == 'skipped' && contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind))
|
||||
|
||||
@@ -94,8 +94,8 @@ jobs:
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
|
||||
9
.github/workflows/cargo-deny.yml
vendored
9
.github/workflows/cargo-deny.yml
vendored
@@ -24,11 +24,14 @@ jobs:
|
||||
|
||||
runs-on: [self-hosted, small]
|
||||
|
||||
permissions:
|
||||
packages: read
|
||||
|
||||
container:
|
||||
image: ${{ inputs.build-tools-image || 'neondatabase/build-tools:pinned' }}
|
||||
image: ${{ inputs.build-tools-image || 'ghcr.io/neondatabase/build-tools:pinned' }}
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
|
||||
6
.github/workflows/cloud-regress.yml
vendored
6
.github/workflows/cloud-regress.yml
vendored
@@ -37,10 +37,10 @@ jobs:
|
||||
|
||||
runs-on: us-east-2
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned-bookworm
|
||||
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
|
||||
6
.github/workflows/ingest_benchmark.yml
vendored
6
.github/workflows/ingest_benchmark.yml
vendored
@@ -67,10 +67,10 @@ jobs:
|
||||
PGCOPYDB_LIB_PATH: /pgcopydb/lib
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned-bookworm
|
||||
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
timeout-minutes: 1440
|
||||
|
||||
|
||||
6
.github/workflows/large_oltp_benchmark.yml
vendored
6
.github/workflows/large_oltp_benchmark.yml
vendored
@@ -50,10 +50,10 @@ jobs:
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned-bookworm
|
||||
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
# Increase timeout to 2 days, default timeout is 6h - database maintenance can take a long time
|
||||
|
||||
8
.github/workflows/neon_extra_builds.yml
vendored
8
.github/workflows/neon_extra_builds.yml
vendored
@@ -71,8 +71,8 @@ jobs:
|
||||
uses: ./.github/workflows/build-macos.yml
|
||||
with:
|
||||
pg_versions: ${{ needs.files-changed.outputs.postgres_changes }}
|
||||
rebuild_rust_code: ${{ fromJson(needs.files-changed.outputs.rebuild_rust_code) }}
|
||||
rebuild_everything: ${{ fromJson(needs.files-changed.outputs.rebuild_everything) }}
|
||||
rebuild_rust_code: ${{ fromJSON(needs.files-changed.outputs.rebuild_rust_code) }}
|
||||
rebuild_everything: ${{ fromJSON(needs.files-changed.outputs.rebuild_everything) }}
|
||||
|
||||
gather-rust-build-stats:
|
||||
needs: [ check-permissions, build-build-tools-image, files-changed ]
|
||||
@@ -90,8 +90,8 @@ jobs:
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
env:
|
||||
|
||||
6
.github/workflows/periodic_pagebench.yml
vendored
6
.github/workflows/periodic_pagebench.yml
vendored
@@ -34,10 +34,10 @@ jobs:
|
||||
pull-requests: write
|
||||
runs-on: [ self-hosted, small ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned-bookworm
|
||||
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
timeout-minutes: 360 # Set the timeout to 6 hours
|
||||
env:
|
||||
|
||||
8
.github/workflows/pg-clients.yml
vendored
8
.github/workflows/pg-clients.yml
vendored
@@ -53,8 +53,8 @@ jobs:
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init --user root
|
||||
services:
|
||||
clickhouse:
|
||||
@@ -153,8 +153,8 @@ jobs:
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init --user root
|
||||
|
||||
steps:
|
||||
|
||||
8
.github/workflows/pin-build-tools-image.yml
vendored
8
.github/workflows/pin-build-tools-image.yml
vendored
@@ -46,8 +46,8 @@ jobs:
|
||||
FROM_TAG: ${{ inputs.from-tag }}
|
||||
TO_TAG: pinned
|
||||
run: |
|
||||
docker manifest inspect "docker.io/neondatabase/build-tools:${FROM_TAG}" > "${FROM_TAG}.json"
|
||||
docker manifest inspect "docker.io/neondatabase/build-tools:${TO_TAG}" > "${TO_TAG}.json"
|
||||
docker manifest inspect "ghcr.io/neondatabase/build-tools:${FROM_TAG}" > "${FROM_TAG}.json"
|
||||
docker manifest inspect "ghcr.io/neondatabase/build-tools:${TO_TAG}" > "${TO_TAG}.json"
|
||||
|
||||
if diff "${FROM_TAG}.json" "${TO_TAG}.json"; then
|
||||
skip=true
|
||||
@@ -71,13 +71,13 @@ jobs:
|
||||
with:
|
||||
image-map: |
|
||||
{
|
||||
"docker.io/neondatabase/build-tools:${{ inputs.from-tag }}-bullseye": [
|
||||
"ghcr.io/neondatabase/build-tools:${{ inputs.from-tag }}-bullseye": [
|
||||
"docker.io/neondatabase/build-tools:pinned-bullseye",
|
||||
"ghcr.io/neondatabase/build-tools:pinned-bullseye",
|
||||
"${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/build-tools:pinned-bullseye",
|
||||
"${{ vars.AZURE_DEV_REGISTRY_NAME }}.azurecr.io/neondatabase/build-tools:pinned-bullseye"
|
||||
],
|
||||
"docker.io/neondatabase/build-tools:${{ inputs.from-tag }}-bookworm": [
|
||||
"ghcr.io/neondatabase/build-tools:${{ inputs.from-tag }}-bookworm": [
|
||||
"docker.io/neondatabase/build-tools:pinned-bookworm",
|
||||
"docker.io/neondatabase/build-tools:pinned",
|
||||
"ghcr.io/neondatabase/build-tools:pinned-bookworm",
|
||||
|
||||
13
.github/workflows/pre-merge-checks.yml
vendored
13
.github/workflows/pre-merge-checks.yml
vendored
@@ -19,6 +19,8 @@ permissions: {}
|
||||
jobs:
|
||||
meta:
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
contents: read
|
||||
outputs:
|
||||
python-changed: ${{ steps.python-src.outputs.any_changed }}
|
||||
rust-changed: ${{ steps.rust-src.outputs.any_changed }}
|
||||
@@ -72,6 +74,9 @@ jobs:
|
||||
|| needs.meta.outputs.python-changed == 'true'
|
||||
|| needs.meta.outputs.rust-changed == 'true'
|
||||
needs: [ meta ]
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
uses: ./.github/workflows/build-build-tools-image.yml
|
||||
with:
|
||||
# Build only one combination to save time
|
||||
@@ -82,6 +87,9 @@ jobs:
|
||||
check-codestyle-python:
|
||||
if: needs.meta.outputs.python-changed == 'true'
|
||||
needs: [ meta, build-build-tools-image ]
|
||||
permissions:
|
||||
contents: read
|
||||
packages: read
|
||||
uses: ./.github/workflows/_check-codestyle-python.yml
|
||||
with:
|
||||
# `-bookworm-x64` suffix should match the combination in `build-build-tools-image`
|
||||
@@ -91,6 +99,9 @@ jobs:
|
||||
check-codestyle-rust:
|
||||
if: needs.meta.outputs.rust-changed == 'true'
|
||||
needs: [ meta, build-build-tools-image ]
|
||||
permissions:
|
||||
contents: read
|
||||
packages: read
|
||||
uses: ./.github/workflows/_check-codestyle-rust.yml
|
||||
with:
|
||||
# `-bookworm-x64` suffix should match the combination in `build-build-tools-image`
|
||||
@@ -148,7 +159,7 @@ jobs:
|
||||
${{
|
||||
always()
|
||||
&& github.event_name == 'merge_group'
|
||||
&& contains(fromJson('["release", "release-proxy", "release-compute"]'), needs.meta.outputs.branch)
|
||||
&& contains(fromJSON('["release", "release-proxy", "release-compute"]'), needs.meta.outputs.branch)
|
||||
}}
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
|
||||
91
Cargo.lock
generated
91
Cargo.lock
generated
@@ -167,45 +167,6 @@ version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
|
||||
|
||||
[[package]]
|
||||
name = "asn1-rs"
|
||||
version = "0.6.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5493c3bedbacf7fd7382c6346bbd66687d12bbaad3a89a2d2c303ee6cf20b048"
|
||||
dependencies = [
|
||||
"asn1-rs-derive",
|
||||
"asn1-rs-impl",
|
||||
"displaydoc",
|
||||
"nom",
|
||||
"num-traits",
|
||||
"rusticata-macros",
|
||||
"thiserror 1.0.69",
|
||||
"time",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "asn1-rs-derive"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "965c2d33e53cb6b267e148a4cb0760bc01f4904c1cd4bb4002a085bb016d1490"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.100",
|
||||
"synstructure",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "asn1-rs-impl"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7b18050c2cd6fe86c3a76584ef5e0baf286d038cda203eb6223df2cc413565f7"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "assert-json-diff"
|
||||
version = "2.0.2"
|
||||
@@ -1813,20 +1774,6 @@ dependencies = [
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "der-parser"
|
||||
version = "9.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5cd0a5c643689626bec213c4d8bd4d96acc8ffdb4ad4bb6bc16abf27d5f4b553"
|
||||
dependencies = [
|
||||
"asn1-rs",
|
||||
"displaydoc",
|
||||
"nom",
|
||||
"num-bigint",
|
||||
"num-traits",
|
||||
"rusticata-macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "der_derive"
|
||||
version = "0.7.3"
|
||||
@@ -4044,15 +3991,6 @@ dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "oid-registry"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a8d8034d9489cdaf79228eb9f6a3b8d7bb32ba00d6645ebd48eef4077ceb5bd9"
|
||||
dependencies = [
|
||||
"asn1-rs",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.20.2"
|
||||
@@ -5227,7 +5165,7 @@ dependencies = [
|
||||
"uuid",
|
||||
"walkdir",
|
||||
"workspace_hack",
|
||||
"x509-parser",
|
||||
"x509-cert",
|
||||
"zerocopy",
|
||||
]
|
||||
|
||||
@@ -5848,15 +5786,6 @@ dependencies = [
|
||||
"semver",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rusticata-macros"
|
||||
version = "4.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "faf0c4a6ece9950b9abdb62b1cfcf2a68b3b67a10ba445b3bb85be2a293d0632"
|
||||
dependencies = [
|
||||
"nom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.38.41"
|
||||
@@ -8440,7 +8369,6 @@ dependencies = [
|
||||
"der 0.7.8",
|
||||
"deranged",
|
||||
"digest",
|
||||
"displaydoc",
|
||||
"ecdsa 0.16.9",
|
||||
"either",
|
||||
"elliptic-curve 0.13.8",
|
||||
@@ -8568,23 +8496,6 @@ dependencies = [
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "x509-parser"
|
||||
version = "0.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fcbc162f30700d6f3f82a24bf7cc62ffe7caea42c0b2cba8bf7f3ae50cf51f69"
|
||||
dependencies = [
|
||||
"asn1-rs",
|
||||
"data-encoding",
|
||||
"der-parser",
|
||||
"lazy_static",
|
||||
"nom",
|
||||
"oid-registry",
|
||||
"rusticata-macros",
|
||||
"thiserror 1.0.69",
|
||||
"time",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "xattr"
|
||||
version = "1.0.0"
|
||||
|
||||
@@ -215,10 +215,10 @@ urlencoding = "2.1"
|
||||
uuid = { version = "1.6.1", features = ["v4", "v7", "serde"] }
|
||||
walkdir = "2.3.2"
|
||||
rustls-native-certs = "0.8"
|
||||
x509-parser = "0.16"
|
||||
whoami = "1.5.1"
|
||||
zerocopy = { version = "0.7", features = ["derive"] }
|
||||
json-structural-diff = { version = "0.2.0" }
|
||||
x509-cert = { version = "0.2.5" }
|
||||
|
||||
## TODO replace this with tracing
|
||||
env_logger = "0.11"
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
### The image itself is mainly used as a container for the binaries and for starting e2e tests with custom parameters.
|
||||
### By default, the binaries inside the image have some mock parameters and can start, but are not intended to be used
|
||||
### inside this image in the real deployments.
|
||||
ARG REPOSITORY=neondatabase
|
||||
ARG REPOSITORY=ghcr.io/neondatabase
|
||||
ARG IMAGE=build-tools
|
||||
ARG TAG=pinned
|
||||
ARG DEFAULT_PG_VERSION=17
|
||||
|
||||
@@ -77,7 +77,7 @@
|
||||
# build_and_test.yml github workflow for how that's done.
|
||||
|
||||
ARG PG_VERSION
|
||||
ARG REPOSITORY=neondatabase
|
||||
ARG REPOSITORY=ghcr.io/neondatabase
|
||||
ARG IMAGE=build-tools
|
||||
ARG TAG=pinned
|
||||
ARG BUILD_TAG
|
||||
|
||||
@@ -39,6 +39,13 @@ commands:
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
shell: '/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499'
|
||||
# Rsyslog by default creates a unix socket under /dev/log . That's where Postgres sends logs also.
|
||||
# We run syslog with postgres user so it can't create /dev/log. Instead we configure rsyslog to
|
||||
# use a different path for the socket. The symlink actually points to our custom path.
|
||||
- name: rsyslogd-socket-symlink
|
||||
user: root
|
||||
sysvInitAction: sysinit
|
||||
shell: "ln -s /var/db/postgres/rsyslogpipe /dev/log"
|
||||
- name: rsyslogd
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
@@ -77,6 +84,9 @@ files:
|
||||
# compute_ctl will rewrite this file with the actual configuration, if needed.
|
||||
- filename: compute_rsyslog.conf
|
||||
content: |
|
||||
# Syslock.Name specifies a non-default pipe location that is writeable for the postgres user.
|
||||
module(load="imuxsock" SysSock.Name="/var/db/postgres/rsyslogpipe") # provides support for local system logging
|
||||
|
||||
*.* /dev/null
|
||||
$IncludeConfig /etc/rsyslog.d/*.conf
|
||||
build: |
|
||||
|
||||
@@ -39,6 +39,13 @@ commands:
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
shell: '/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499'
|
||||
# Rsyslog by default creates a unix socket under /dev/log . That's where Postgres sends logs also.
|
||||
# We run syslog with postgres user so it can't create /dev/log. Instead we configure rsyslog to
|
||||
# use a different path for the socket. The symlink actually points to our custom path.
|
||||
- name: rsyslogd-socket-symlink
|
||||
user: root
|
||||
sysvInitAction: sysinit
|
||||
shell: "ln -s /var/db/postgres/rsyslogpipe /dev/log"
|
||||
- name: rsyslogd
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
@@ -77,6 +84,9 @@ files:
|
||||
# compute_ctl will rewrite this file with the actual configuration, if needed.
|
||||
- filename: compute_rsyslog.conf
|
||||
content: |
|
||||
# Syslock.Name specifies a non-default pipe location that is writeable for the postgres user.
|
||||
module(load="imuxsock" SysSock.Name="/var/db/postgres/rsyslogpipe") # provides support for local system logging
|
||||
|
||||
*.* /dev/null
|
||||
$IncludeConfig /etc/rsyslog.d/*.conf
|
||||
build: |
|
||||
|
||||
@@ -61,7 +61,7 @@ thiserror.workspace = true
|
||||
url.workspace = true
|
||||
uuid.workspace = true
|
||||
walkdir.workspace = true
|
||||
x509-cert = { version = "0.2.5" }
|
||||
x509-cert.workspace = true
|
||||
|
||||
postgres_initdb.workspace = true
|
||||
compute_api.workspace = true
|
||||
|
||||
@@ -37,7 +37,10 @@ use crate::logger::startup_context_from_env;
|
||||
use crate::lsn_lease::launch_lsn_lease_bg_task_for_static;
|
||||
use crate::monitor::launch_monitor;
|
||||
use crate::pg_helpers::*;
|
||||
use crate::rsyslog::{configure_audit_rsyslog, launch_pgaudit_gc};
|
||||
use crate::rsyslog::{
|
||||
PostgresLogsRsyslogConfig, configure_audit_rsyslog, configure_postgres_logs_export,
|
||||
launch_pgaudit_gc,
|
||||
};
|
||||
use crate::spec::*;
|
||||
use crate::swap::resize_swap;
|
||||
use crate::sync_sk::{check_if_synced, ping_safekeeper};
|
||||
@@ -617,7 +620,7 @@ impl ComputeNode {
|
||||
});
|
||||
}
|
||||
|
||||
// Configure and start rsyslog if necessary
|
||||
// Configure and start rsyslog for HIPAA if necessary
|
||||
if let ComputeAudit::Hipaa = pspec.spec.audit_log_level {
|
||||
let remote_endpoint = std::env::var("AUDIT_LOGGING_ENDPOINT").unwrap_or("".to_string());
|
||||
if remote_endpoint.is_empty() {
|
||||
@@ -632,6 +635,17 @@ impl ComputeNode {
|
||||
launch_pgaudit_gc(log_directory_path);
|
||||
}
|
||||
|
||||
// Configure and start rsyslog for Postgres logs export
|
||||
if self.has_feature(ComputeFeature::PostgresLogsExport) {
|
||||
if let Some(ref project_id) = pspec.spec.cluster.cluster_id {
|
||||
let host = PostgresLogsRsyslogConfig::default_host(project_id);
|
||||
let conf = PostgresLogsRsyslogConfig::new(Some(&host));
|
||||
configure_postgres_logs_export(conf)?;
|
||||
} else {
|
||||
warn!("not configuring rsyslog for Postgres logs export: project ID is missing")
|
||||
}
|
||||
}
|
||||
|
||||
// Launch remaining service threads
|
||||
let _monitor_handle = launch_monitor(self);
|
||||
let _configurator_handle = launch_configurator(self);
|
||||
|
||||
@@ -7,7 +7,7 @@ use std::io::prelude::*;
|
||||
use std::path::Path;
|
||||
|
||||
use compute_api::responses::TlsConfig;
|
||||
use compute_api::spec::{ComputeAudit, ComputeMode, ComputeSpec, GenericOption};
|
||||
use compute_api::spec::{ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, GenericOption};
|
||||
|
||||
use crate::pg_helpers::{
|
||||
GenericOptionExt, GenericOptionsSearch, PgOptionsSerialize, escape_conf_value,
|
||||
@@ -158,53 +158,89 @@ pub fn write_postgres_conf(
|
||||
writeln!(file, "# Managed by compute_ctl: end")?;
|
||||
}
|
||||
|
||||
// If audit logging is enabled, configure pgaudit.
|
||||
// If base audit logging is enabled, configure it.
|
||||
// In this setup, the audit log will be written to the standard postgresql log.
|
||||
//
|
||||
// If compliance audit logging is enabled, configure pgaudit.
|
||||
//
|
||||
// Note, that this is called after the settings from spec are written.
|
||||
// This way we always override the settings from the spec
|
||||
// and don't allow the user or the control plane admin to change them.
|
||||
if let ComputeAudit::Hipaa = spec.audit_log_level {
|
||||
writeln!(file, "# Managed by compute_ctl audit settings: begin")?;
|
||||
// This log level is very verbose
|
||||
// but this is necessary for HIPAA compliance.
|
||||
// Exclude 'misc' category, because it doesn't contain anythig relevant.
|
||||
writeln!(file, "pgaudit.log='all, -misc'")?;
|
||||
writeln!(file, "pgaudit.log_parameter=on")?;
|
||||
// Disable logging of catalog queries
|
||||
// The catalog doesn't contain sensitive data, so we don't need to audit it.
|
||||
writeln!(file, "pgaudit.log_catalog=off")?;
|
||||
// Set log rotation to 5 minutes
|
||||
// TODO: tune this after performance testing
|
||||
writeln!(file, "pgaudit.log_rotation_age=5")?;
|
||||
match spec.audit_log_level {
|
||||
ComputeAudit::Disabled => {}
|
||||
ComputeAudit::Log => {
|
||||
writeln!(file, "# Managed by compute_ctl base audit settings: start")?;
|
||||
writeln!(file, "pgaudit.log='ddl,role'")?;
|
||||
// Disable logging of catalog queries to reduce the noise
|
||||
writeln!(file, "pgaudit.log_catalog=off")?;
|
||||
|
||||
// Add audit shared_preload_libraries, if they are not present.
|
||||
//
|
||||
// The caller who sets the flag is responsible for ensuring that the necessary
|
||||
// shared_preload_libraries are present in the compute image,
|
||||
// otherwise the compute start will fail.
|
||||
if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") {
|
||||
let mut extra_shared_preload_libraries = String::new();
|
||||
if !libs.contains("pgaudit") {
|
||||
extra_shared_preload_libraries.push_str(",pgaudit");
|
||||
}
|
||||
if !libs.contains("pgauditlogtofile") {
|
||||
extra_shared_preload_libraries.push_str(",pgauditlogtofile");
|
||||
if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") {
|
||||
let mut extra_shared_preload_libraries = String::new();
|
||||
if !libs.contains("pgaudit") {
|
||||
extra_shared_preload_libraries.push_str(",pgaudit");
|
||||
}
|
||||
writeln!(
|
||||
file,
|
||||
"shared_preload_libraries='{}{}'",
|
||||
libs, extra_shared_preload_libraries
|
||||
)?;
|
||||
} else {
|
||||
// Typically, this should be unreacheable,
|
||||
// because we always set at least some shared_preload_libraries in the spec
|
||||
// but let's handle it explicitly anyway.
|
||||
writeln!(file, "shared_preload_libraries='neon,pgaudit'")?;
|
||||
}
|
||||
writeln!(file, "# Managed by compute_ctl base audit settings: end")?;
|
||||
}
|
||||
ComputeAudit::Hipaa => {
|
||||
writeln!(
|
||||
file,
|
||||
"shared_preload_libraries='{}{}'",
|
||||
libs, extra_shared_preload_libraries
|
||||
"# Managed by compute_ctl compliance audit settings: begin"
|
||||
)?;
|
||||
} else {
|
||||
// Typically, this should be unreacheable,
|
||||
// because we always set at least some shared_preload_libraries in the spec
|
||||
// but let's handle it explicitly anyway.
|
||||
// This log level is very verbose
|
||||
// but this is necessary for HIPAA compliance.
|
||||
// Exclude 'misc' category, because it doesn't contain anythig relevant.
|
||||
writeln!(file, "pgaudit.log='all, -misc'")?;
|
||||
writeln!(file, "pgaudit.log_parameter=on")?;
|
||||
// Disable logging of catalog queries
|
||||
// The catalog doesn't contain sensitive data, so we don't need to audit it.
|
||||
writeln!(file, "pgaudit.log_catalog=off")?;
|
||||
// Set log rotation to 5 minutes
|
||||
// TODO: tune this after performance testing
|
||||
writeln!(file, "pgaudit.log_rotation_age=5")?;
|
||||
|
||||
// Add audit shared_preload_libraries, if they are not present.
|
||||
//
|
||||
// The caller who sets the flag is responsible for ensuring that the necessary
|
||||
// shared_preload_libraries are present in the compute image,
|
||||
// otherwise the compute start will fail.
|
||||
if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") {
|
||||
let mut extra_shared_preload_libraries = String::new();
|
||||
if !libs.contains("pgaudit") {
|
||||
extra_shared_preload_libraries.push_str(",pgaudit");
|
||||
}
|
||||
if !libs.contains("pgauditlogtofile") {
|
||||
extra_shared_preload_libraries.push_str(",pgauditlogtofile");
|
||||
}
|
||||
writeln!(
|
||||
file,
|
||||
"shared_preload_libraries='{}{}'",
|
||||
libs, extra_shared_preload_libraries
|
||||
)?;
|
||||
} else {
|
||||
// Typically, this should be unreacheable,
|
||||
// because we always set at least some shared_preload_libraries in the spec
|
||||
// but let's handle it explicitly anyway.
|
||||
writeln!(
|
||||
file,
|
||||
"shared_preload_libraries='neon,pgaudit,pgauditlogtofile'"
|
||||
)?;
|
||||
}
|
||||
writeln!(
|
||||
file,
|
||||
"shared_preload_libraries='neon,pgaudit,pgauditlogtofile'"
|
||||
"# Managed by compute_ctl compliance audit settings: end"
|
||||
)?;
|
||||
}
|
||||
writeln!(file, "# Managed by compute_ctl audit settings: end")?;
|
||||
}
|
||||
|
||||
writeln!(file, "neon.extension_server_port={}", extension_server_port)?;
|
||||
@@ -216,6 +252,12 @@ pub fn write_postgres_conf(
|
||||
writeln!(file, "neon.disable_logical_replication_subscribers=false")?;
|
||||
}
|
||||
|
||||
// We need Postgres to send logs to rsyslog so that we can forward them
|
||||
// further to customers' log aggregation systems.
|
||||
if spec.features.contains(&ComputeFeature::PostgresLogsExport) {
|
||||
writeln!(file, "log_destination='stderr,syslog'")?;
|
||||
}
|
||||
|
||||
// This is essential to keep this line at the end of the file,
|
||||
// because it is intended to override any settings above.
|
||||
writeln!(file, "include_if_exists = 'compute_ctl_temp_override.conf'")?;
|
||||
|
||||
@@ -8,4 +8,4 @@ input(type="imfile" File="{log_directory}/*.log" Tag="{tag}" Severity="info" Fac
|
||||
global(workDirectory="/var/log/rsyslog")
|
||||
|
||||
# Forward logs to remote syslog server
|
||||
*.* @@{remote_endpoint}
|
||||
*.* @@{remote_endpoint}
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
# Program name comes from postgres' syslog_facility configuration: https://www.postgresql.org/docs/current/runtime-config-logging.html#GUC-SYSLOG-IDENT
|
||||
# Default value is 'postgres'.
|
||||
if $programname == 'postgres' then {{
|
||||
# Forward Postgres logs to telemetry otel collector
|
||||
action(type="omfwd" target="{logs_export_target}" port="{logs_export_port}" protocol="tcp"
|
||||
template="RSYSLOG_SyslogProtocol23Format"
|
||||
action.resumeRetryCount="3"
|
||||
queue.type="linkedList" queue.size="1000")
|
||||
stop
|
||||
}}
|
||||
@@ -306,6 +306,36 @@ paths:
|
||||
schema:
|
||||
$ref: "#/components/schemas/GenericError"
|
||||
|
||||
/configure_telemetry:
|
||||
post:
|
||||
tags:
|
||||
- Configure
|
||||
summary: Configure rsyslog
|
||||
description: |
|
||||
This API endpoint configures rsyslog to forward Postgres logs
|
||||
to a specified otel collector.
|
||||
operationId: configureTelemetry
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
logs_export_host:
|
||||
type: string
|
||||
description: |
|
||||
Hostname and the port of the otel collector. Leave empty to disable logs forwarding.
|
||||
Example: config-shy-breeze-123-collector-monitoring.neon-telemetry.svc.cluster.local:54526
|
||||
responses:
|
||||
204:
|
||||
description: "Telemetry configured successfully"
|
||||
500:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/GenericError"
|
||||
|
||||
components:
|
||||
securitySchemes:
|
||||
JWT:
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::body::Body;
|
||||
use axum::extract::State;
|
||||
use axum::response::Response;
|
||||
use compute_api::requests::ConfigurationRequest;
|
||||
use compute_api::requests::{ConfigurationRequest, ConfigureTelemetryRequest};
|
||||
use compute_api::responses::{ComputeStatus, ComputeStatusResponse};
|
||||
use compute_api::spec::ComputeFeature;
|
||||
use http::StatusCode;
|
||||
use tokio::task;
|
||||
use tracing::info;
|
||||
@@ -11,6 +13,7 @@ use tracing::info;
|
||||
use crate::compute::{ComputeNode, ParsedSpec};
|
||||
use crate::http::JsonResponse;
|
||||
use crate::http::extract::Json;
|
||||
use crate::rsyslog::{PostgresLogsRsyslogConfig, configure_postgres_logs_export};
|
||||
|
||||
// Accept spec in JSON format and request compute configuration. If anything
|
||||
// goes wrong after we set the compute status to `ConfigurationPending` and
|
||||
@@ -92,3 +95,25 @@ pub(in crate::http) async fn configure(
|
||||
|
||||
JsonResponse::success(StatusCode::OK, body)
|
||||
}
|
||||
|
||||
pub(in crate::http) async fn configure_telemetry(
|
||||
State(compute): State<Arc<ComputeNode>>,
|
||||
request: Json<ConfigureTelemetryRequest>,
|
||||
) -> Response {
|
||||
if !compute.has_feature(ComputeFeature::PostgresLogsExport) {
|
||||
return JsonResponse::error(
|
||||
StatusCode::PRECONDITION_FAILED,
|
||||
"Postgres logs export feature is not enabled".to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
let conf = PostgresLogsRsyslogConfig::new(request.logs_export_host.as_deref());
|
||||
if let Err(err) = configure_postgres_logs_export(conf) {
|
||||
return JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, err.to_string());
|
||||
}
|
||||
|
||||
Response::builder()
|
||||
.status(StatusCode::NO_CONTENT)
|
||||
.body(Body::from(""))
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
@@ -87,6 +87,7 @@ impl From<&Server> for Router<Arc<ComputeNode>> {
|
||||
let authenticated_router = Router::<Arc<ComputeNode>>::new()
|
||||
.route("/check_writability", post(check_writability::is_writable))
|
||||
.route("/configure", post(configure::configure))
|
||||
.route("/configure_telemetry", post(configure::configure_telemetry))
|
||||
.route("/database_schema", get(database_schema::get_schema_dump))
|
||||
.route("/dbs_and_roles", get(dbs_and_roles::get_catalog_objects))
|
||||
.route("/insights", get(insights::get_insights))
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
use std::fs;
|
||||
use std::io::ErrorKind;
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
use std::time::Duration;
|
||||
use std::{fs::OpenOptions, io::Write};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use anyhow::{Context, Result, anyhow};
|
||||
use tracing::{error, info, instrument, warn};
|
||||
|
||||
const POSTGRES_LOGS_CONF_PATH: &str = "/etc/rsyslog.d/postgres_logs.conf";
|
||||
|
||||
fn get_rsyslog_pid() -> Option<String> {
|
||||
let output = Command::new("pgrep")
|
||||
.arg("rsyslogd")
|
||||
@@ -79,6 +82,95 @@ pub fn configure_audit_rsyslog(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Configuration for enabling Postgres logs forwarding from rsyslogd
|
||||
pub struct PostgresLogsRsyslogConfig<'a> {
|
||||
pub host: Option<&'a str>,
|
||||
}
|
||||
|
||||
impl<'a> PostgresLogsRsyslogConfig<'a> {
|
||||
pub fn new(host: Option<&'a str>) -> Self {
|
||||
Self { host }
|
||||
}
|
||||
|
||||
pub fn build(&self) -> Result<String> {
|
||||
match self.host {
|
||||
Some(host) => {
|
||||
if let Some((target, port)) = host.split_once(":") {
|
||||
Ok(format!(
|
||||
include_str!(
|
||||
"config_template/compute_rsyslog_postgres_export_template.conf"
|
||||
),
|
||||
logs_export_target = target,
|
||||
logs_export_port = port,
|
||||
))
|
||||
} else {
|
||||
Err(anyhow!("Invalid host format for Postgres logs export"))
|
||||
}
|
||||
}
|
||||
None => Ok("".to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
fn current_config() -> Result<String> {
|
||||
let config_content = match std::fs::read_to_string(POSTGRES_LOGS_CONF_PATH) {
|
||||
Ok(c) => c,
|
||||
Err(err) if err.kind() == ErrorKind::NotFound => String::new(),
|
||||
Err(err) => return Err(err.into()),
|
||||
};
|
||||
Ok(config_content)
|
||||
}
|
||||
|
||||
/// Returns the default host for otel collector that receives Postgres logs
|
||||
pub fn default_host(project_id: &str) -> String {
|
||||
format!(
|
||||
"config-{}-collector.neon-telemetry.svc.cluster.local:10514",
|
||||
project_id
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn configure_postgres_logs_export(conf: PostgresLogsRsyslogConfig) -> Result<()> {
|
||||
let new_config = conf.build()?;
|
||||
let current_config = PostgresLogsRsyslogConfig::current_config()?;
|
||||
|
||||
if new_config == current_config {
|
||||
info!("postgres logs rsyslog configuration is up-to-date");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// When new config is empty we can simply remove the configuration file.
|
||||
if new_config.is_empty() {
|
||||
info!("removing rsyslog config file: {}", POSTGRES_LOGS_CONF_PATH);
|
||||
match std::fs::remove_file(POSTGRES_LOGS_CONF_PATH) {
|
||||
Ok(_) => {}
|
||||
Err(err) if err.kind() == ErrorKind::NotFound => {}
|
||||
Err(err) => return Err(err.into()),
|
||||
}
|
||||
restart_rsyslog()?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
info!(
|
||||
"configuring rsyslog for postgres logs export to: {:?}",
|
||||
conf.host
|
||||
);
|
||||
|
||||
let mut file = OpenOptions::new()
|
||||
.create(true)
|
||||
.write(true)
|
||||
.truncate(true)
|
||||
.open(POSTGRES_LOGS_CONF_PATH)?;
|
||||
file.write_all(new_config.as_bytes())?;
|
||||
|
||||
info!(
|
||||
"rsyslog configuration file {} added successfully. Starting rsyslogd",
|
||||
POSTGRES_LOGS_CONF_PATH
|
||||
);
|
||||
|
||||
restart_rsyslog()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[instrument(skip_all)]
|
||||
async fn pgaudit_gc_main_loop(log_directory: String) -> Result<()> {
|
||||
info!("running pgaudit GC main loop");
|
||||
@@ -136,3 +228,49 @@ pub fn launch_pgaudit_gc(log_directory: String) {
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::rsyslog::PostgresLogsRsyslogConfig;
|
||||
|
||||
#[test]
|
||||
fn test_postgres_logs_config() {
|
||||
{
|
||||
// Verify empty config
|
||||
let conf = PostgresLogsRsyslogConfig::new(None);
|
||||
let res = conf.build();
|
||||
assert!(res.is_ok());
|
||||
let conf_str = res.unwrap();
|
||||
assert_eq!(&conf_str, "");
|
||||
}
|
||||
|
||||
{
|
||||
// Verify config
|
||||
let conf = PostgresLogsRsyslogConfig::new(Some("collector.cvc.local:514"));
|
||||
let res = conf.build();
|
||||
assert!(res.is_ok());
|
||||
let conf_str = res.unwrap();
|
||||
assert!(conf_str.contains("omfwd"));
|
||||
assert!(conf_str.contains(r#"target="collector.cvc.local""#));
|
||||
assert!(conf_str.contains(r#"port="514""#));
|
||||
}
|
||||
|
||||
{
|
||||
// Verify invalid config
|
||||
let conf = PostgresLogsRsyslogConfig::new(Some("invalid"));
|
||||
let res = conf.build();
|
||||
assert!(res.is_err());
|
||||
}
|
||||
|
||||
{
|
||||
// Verify config with default host
|
||||
let host = PostgresLogsRsyslogConfig::default_host("shy-breeze-123");
|
||||
let conf = PostgresLogsRsyslogConfig::new(Some(&host));
|
||||
let res = conf.build();
|
||||
assert!(res.is_ok());
|
||||
let conf_str = res.unwrap();
|
||||
assert!(conf_str.contains(r#"shy-breeze-123"#));
|
||||
assert!(conf_str.contains(r#"port="10514""#));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,13 +8,12 @@ use compute_api::responses::{
|
||||
use compute_api::spec::ComputeSpec;
|
||||
use reqwest::StatusCode;
|
||||
use tokio_postgres::Client;
|
||||
use tracing::{error, info, instrument, warn};
|
||||
use tracing::{error, info, instrument};
|
||||
|
||||
use crate::config;
|
||||
use crate::metrics::{CPLANE_REQUESTS_TOTAL, CPlaneRequestRPC, UNKNOWN_HTTP_STATUS};
|
||||
use crate::migration::MigrationRunner;
|
||||
use crate::params::PG_HBA_ALL_MD5;
|
||||
use crate::pg_helpers::*;
|
||||
|
||||
// Do control plane request and return response if any. In case of error it
|
||||
// returns a bool flag indicating whether it makes sense to retry the request
|
||||
@@ -212,122 +211,3 @@ pub async fn handle_migrations(client: &mut Client) -> Result<()> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Connect to the database as superuser and pre-create anon extension
|
||||
/// if it is present in shared_preload_libraries
|
||||
#[instrument(skip_all)]
|
||||
pub async fn handle_extension_anon(
|
||||
spec: &ComputeSpec,
|
||||
db_owner: &str,
|
||||
db_client: &mut Client,
|
||||
grants_only: bool,
|
||||
) -> Result<()> {
|
||||
info!("handle extension anon");
|
||||
|
||||
if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") {
|
||||
if libs.contains("anon") {
|
||||
if !grants_only {
|
||||
// check if extension is already initialized using anon.is_initialized()
|
||||
let query = "SELECT anon.is_initialized()";
|
||||
match db_client.query(query, &[]).await {
|
||||
Ok(rows) => {
|
||||
if !rows.is_empty() {
|
||||
let is_initialized: bool = rows[0].get(0);
|
||||
if is_initialized {
|
||||
info!("anon extension is already initialized");
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"anon extension is_installed check failed with expected error: {}",
|
||||
e
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
// Create anon extension if this compute needs it
|
||||
// Users cannot create it themselves, because superuser is required.
|
||||
let mut query = "CREATE EXTENSION IF NOT EXISTS anon CASCADE";
|
||||
info!("creating anon extension with query: {}", query);
|
||||
match db_client.query(query, &[]).await {
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
error!("anon extension creation failed with error: {}", e);
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
// check that extension is installed
|
||||
query = "SELECT extname FROM pg_extension WHERE extname = 'anon'";
|
||||
let rows = db_client.query(query, &[]).await?;
|
||||
if rows.is_empty() {
|
||||
error!("anon extension is not installed");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Initialize anon extension
|
||||
// This also requires superuser privileges, so users cannot do it themselves.
|
||||
query = "SELECT anon.init()";
|
||||
match db_client.query(query, &[]).await {
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
error!("anon.init() failed with error: {}", e);
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check that extension is installed, if not bail early
|
||||
let query = "SELECT extname FROM pg_extension WHERE extname = 'anon'";
|
||||
match db_client.query(query, &[]).await {
|
||||
Ok(rows) => {
|
||||
if rows.is_empty() {
|
||||
error!("anon extension is not installed");
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!("anon extension check failed with error: {}", e);
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
|
||||
let query = format!("GRANT ALL ON SCHEMA anon TO {}", db_owner);
|
||||
info!("granting anon extension permissions with query: {}", query);
|
||||
db_client.simple_query(&query).await?;
|
||||
|
||||
// Grant permissions to db_owner to use anon extension functions
|
||||
let query = format!("GRANT ALL ON ALL FUNCTIONS IN SCHEMA anon TO {}", db_owner);
|
||||
info!("granting anon extension permissions with query: {}", query);
|
||||
db_client.simple_query(&query).await?;
|
||||
|
||||
// This is needed, because some functions are defined as SECURITY DEFINER.
|
||||
// In Postgres SECURITY DEFINER functions are executed with the privileges
|
||||
// of the owner.
|
||||
// In anon extension this it is needed to access some GUCs, which are only accessible to
|
||||
// superuser. But we've patched postgres to allow db_owner to access them as well.
|
||||
// So we need to change owner of these functions to db_owner.
|
||||
let query = format!("
|
||||
SELECT 'ALTER FUNCTION '||nsp.nspname||'.'||p.proname||'('||pg_get_function_identity_arguments(p.oid)||') OWNER TO {};'
|
||||
from pg_proc p
|
||||
join pg_namespace nsp ON p.pronamespace = nsp.oid
|
||||
where nsp.nspname = 'anon';", db_owner);
|
||||
|
||||
info!("change anon extension functions owner to db owner");
|
||||
db_client.simple_query(&query).await?;
|
||||
|
||||
// affects views as well
|
||||
let query = format!("GRANT ALL ON ALL TABLES IN SCHEMA anon TO {}", db_owner);
|
||||
info!("granting anon extension permissions with query: {}", query);
|
||||
db_client.simple_query(&query).await?;
|
||||
|
||||
let query = format!("GRANT ALL ON ALL SEQUENCES IN SCHEMA anon TO {}", db_owner);
|
||||
info!("granting anon extension permissions with query: {}", query);
|
||||
db_client.simple_query(&query).await?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ use std::sync::Arc;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use compute_api::spec::{ComputeAudit, ComputeFeature, ComputeSpec, Database, PgIdent, Role};
|
||||
use compute_api::spec::{ComputeAudit, ComputeSpec, Database, PgIdent, Role};
|
||||
use futures::future::join_all;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio_postgres::Client;
|
||||
@@ -26,7 +26,7 @@ use crate::spec_apply::ApplySpecPhase::{
|
||||
RunInEachDatabase,
|
||||
};
|
||||
use crate::spec_apply::PerDatabasePhase::{
|
||||
ChangeSchemaPerms, DeleteDBRoleReferences, DropLogicalSubscriptions, HandleAnonExtension,
|
||||
ChangeSchemaPerms, DeleteDBRoleReferences, DropLogicalSubscriptions,
|
||||
};
|
||||
|
||||
impl ComputeNode {
|
||||
@@ -238,7 +238,6 @@ impl ComputeNode {
|
||||
let mut phases = vec![
|
||||
DeleteDBRoleReferences,
|
||||
ChangeSchemaPerms,
|
||||
HandleAnonExtension,
|
||||
];
|
||||
|
||||
if spec.drop_subscriptions_before_start && !drop_subscriptions_done {
|
||||
@@ -287,7 +286,10 @@ impl ComputeNode {
|
||||
phases.push(CreatePgauditlogtofileExtension);
|
||||
phases.push(DisablePostgresDBPgAudit);
|
||||
}
|
||||
ComputeAudit::Log => { /* not implemented yet */ }
|
||||
ComputeAudit::Log => {
|
||||
phases.push(CreatePgauditExtension);
|
||||
phases.push(DisablePostgresDBPgAudit);
|
||||
}
|
||||
ComputeAudit::Disabled => {}
|
||||
}
|
||||
|
||||
@@ -458,7 +460,6 @@ impl Debug for DB {
|
||||
pub enum PerDatabasePhase {
|
||||
DeleteDBRoleReferences,
|
||||
ChangeSchemaPerms,
|
||||
HandleAnonExtension,
|
||||
/// This is a shared phase, used for both i) dropping dangling LR subscriptions
|
||||
/// before dropping the DB, and ii) dropping all subscriptions after creating
|
||||
/// a fresh branch.
|
||||
@@ -1012,98 +1013,6 @@ async fn get_operations<'a>(
|
||||
]
|
||||
.into_iter();
|
||||
|
||||
Ok(Box::new(operations))
|
||||
}
|
||||
// TODO: remove this completely https://github.com/neondatabase/cloud/issues/22663
|
||||
PerDatabasePhase::HandleAnonExtension => {
|
||||
// Only install Anon into user databases
|
||||
let db = match &db {
|
||||
DB::SystemDB => return Ok(Box::new(empty())),
|
||||
DB::UserDB(db) => db,
|
||||
};
|
||||
// Never install Anon when it's not enabled as feature
|
||||
if !spec.features.contains(&ComputeFeature::AnonExtension) {
|
||||
return Ok(Box::new(empty()));
|
||||
}
|
||||
|
||||
// Only install Anon when it's added in preload libraries
|
||||
let opt_libs = spec.cluster.settings.find("shared_preload_libraries");
|
||||
|
||||
let libs = match opt_libs {
|
||||
Some(libs) => libs,
|
||||
None => return Ok(Box::new(empty())),
|
||||
};
|
||||
|
||||
if !libs.contains("anon") {
|
||||
return Ok(Box::new(empty()));
|
||||
}
|
||||
|
||||
let db_owner = db.owner.pg_quote();
|
||||
|
||||
let operations = vec![
|
||||
// Create anon extension if this compute needs it
|
||||
// Users cannot create it themselves, because superuser is required.
|
||||
Operation {
|
||||
query: String::from("CREATE EXTENSION IF NOT EXISTS anon CASCADE"),
|
||||
comment: Some(String::from("creating anon extension")),
|
||||
},
|
||||
// Initialize anon extension
|
||||
// This also requires superuser privileges, so users cannot do it themselves.
|
||||
Operation {
|
||||
query: String::from("SELECT anon.init()"),
|
||||
comment: Some(String::from("initializing anon extension data")),
|
||||
},
|
||||
Operation {
|
||||
query: format!("GRANT ALL ON SCHEMA anon TO {}", db_owner),
|
||||
comment: Some(String::from(
|
||||
"granting anon extension schema permissions",
|
||||
)),
|
||||
},
|
||||
Operation {
|
||||
query: format!(
|
||||
"GRANT ALL ON ALL FUNCTIONS IN SCHEMA anon TO {}",
|
||||
db_owner
|
||||
),
|
||||
comment: Some(String::from(
|
||||
"granting anon extension schema functions permissions",
|
||||
)),
|
||||
},
|
||||
// We need this, because some functions are defined as SECURITY DEFINER.
|
||||
// In Postgres SECURITY DEFINER functions are executed with the privileges
|
||||
// of the owner.
|
||||
// In anon extension this it is needed to access some GUCs, which are only accessible to
|
||||
// superuser. But we've patched postgres to allow db_owner to access them as well.
|
||||
// So we need to change owner of these functions to db_owner.
|
||||
Operation {
|
||||
query: format!(
|
||||
include_str!("sql/anon_ext_fn_reassign.sql"),
|
||||
db_owner = db_owner,
|
||||
),
|
||||
comment: Some(String::from(
|
||||
"change anon extension functions owner to database_owner",
|
||||
)),
|
||||
},
|
||||
Operation {
|
||||
query: format!(
|
||||
"GRANT ALL ON ALL TABLES IN SCHEMA anon TO {}",
|
||||
db_owner,
|
||||
),
|
||||
comment: Some(String::from(
|
||||
"granting anon extension tables permissions",
|
||||
)),
|
||||
},
|
||||
Operation {
|
||||
query: format!(
|
||||
"GRANT ALL ON ALL SEQUENCES IN SCHEMA anon TO {}",
|
||||
db_owner,
|
||||
),
|
||||
comment: Some(String::from(
|
||||
"granting anon extension sequences permissions",
|
||||
)),
|
||||
},
|
||||
]
|
||||
.into_iter();
|
||||
|
||||
Ok(Box::new(operations))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,6 @@ use std::{io::Write, os::unix::fs::OpenOptionsExt, path::Path, time::Duration};
|
||||
use anyhow::{Context, Result, bail};
|
||||
use compute_api::responses::TlsConfig;
|
||||
use ring::digest;
|
||||
use spki::ObjectIdentifier;
|
||||
use spki::der::{Decode, PemReader};
|
||||
use x509_cert::Certificate;
|
||||
|
||||
@@ -91,13 +90,13 @@ fn try_update_key_path_blocking(pg_data: &Path, tls_config: &TlsConfig) -> Resul
|
||||
}
|
||||
|
||||
fn verify_key_cert(key: &str, cert: &str) -> Result<()> {
|
||||
const ECDSA_WITH_SHA256: ObjectIdentifier = ObjectIdentifier::new_unwrap("1.2.840.10045.4.3.2");
|
||||
use x509_cert::der::oid::db::rfc5912::ECDSA_WITH_SHA_256;
|
||||
|
||||
let cert = Certificate::decode(&mut PemReader::new(cert.as_bytes()).context("pem reader")?)
|
||||
.context("decode cert")?;
|
||||
|
||||
match cert.signature_algorithm.oid {
|
||||
ECDSA_WITH_SHA256 => {
|
||||
ECDSA_WITH_SHA_256 => {
|
||||
let key = p256::SecretKey::from_sec1_pem(key).context("parse key")?;
|
||||
|
||||
let a = key.public_key().to_sec1_bytes();
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
ARG REPOSITORY=neondatabase
|
||||
ARG REPOSITORY=ghcr.io/neondatabase
|
||||
ARG COMPUTE_IMAGE=compute-node-v14
|
||||
ARG TAG=latest
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ services:
|
||||
|
||||
pageserver:
|
||||
restart: always
|
||||
image: ${REPOSITORY:-neondatabase}/neon:${TAG:-latest}
|
||||
image: ${REPOSITORY:-ghcr.io/neondatabase}/neon:${TAG:-latest}
|
||||
environment:
|
||||
- AWS_ACCESS_KEY_ID=minio
|
||||
- AWS_SECRET_ACCESS_KEY=password
|
||||
@@ -45,7 +45,7 @@ services:
|
||||
|
||||
safekeeper1:
|
||||
restart: always
|
||||
image: ${REPOSITORY:-neondatabase}/neon:${TAG:-latest}
|
||||
image: ${REPOSITORY:-ghcr.io/neondatabase}/neon:${TAG:-latest}
|
||||
environment:
|
||||
- SAFEKEEPER_ADVERTISE_URL=safekeeper1:5454
|
||||
- SAFEKEEPER_ID=1
|
||||
@@ -75,7 +75,7 @@ services:
|
||||
|
||||
safekeeper2:
|
||||
restart: always
|
||||
image: ${REPOSITORY:-neondatabase}/neon:${TAG:-latest}
|
||||
image: ${REPOSITORY:-ghcr.io/neondatabase}/neon:${TAG:-latest}
|
||||
environment:
|
||||
- SAFEKEEPER_ADVERTISE_URL=safekeeper2:5454
|
||||
- SAFEKEEPER_ID=2
|
||||
@@ -105,7 +105,7 @@ services:
|
||||
|
||||
safekeeper3:
|
||||
restart: always
|
||||
image: ${REPOSITORY:-neondatabase}/neon:${TAG:-latest}
|
||||
image: ${REPOSITORY:-ghcr.io/neondatabase}/neon:${TAG:-latest}
|
||||
environment:
|
||||
- SAFEKEEPER_ADVERTISE_URL=safekeeper3:5454
|
||||
- SAFEKEEPER_ID=3
|
||||
@@ -135,7 +135,7 @@ services:
|
||||
|
||||
storage_broker:
|
||||
restart: always
|
||||
image: ${REPOSITORY:-neondatabase}/neon:${TAG:-latest}
|
||||
image: ${REPOSITORY:-ghcr.io/neondatabase}/neon:${TAG:-latest}
|
||||
ports:
|
||||
- 50051:50051
|
||||
command:
|
||||
@@ -147,7 +147,7 @@ services:
|
||||
build:
|
||||
context: ./compute_wrapper/
|
||||
args:
|
||||
- REPOSITORY=${REPOSITORY:-neondatabase}
|
||||
- REPOSITORY=${REPOSITORY:-ghcr.io/neondatabase}
|
||||
- COMPUTE_IMAGE=compute-node-v${PG_VERSION:-16}
|
||||
- TAG=${COMPUTE_TAG:-${TAG:-latest}}
|
||||
- http_proxy=${http_proxy:-}
|
||||
@@ -186,7 +186,7 @@ services:
|
||||
|
||||
neon-test-extensions:
|
||||
profiles: ["test-extensions"]
|
||||
image: ${REPOSITORY:-neondatabase}/neon-test-extensions-v${PG_TEST_VERSION:-16}:${TEST_EXTENSIONS_TAG:-${TAG:-latest}}
|
||||
image: ${REPOSITORY:-ghcr.io/neondatabase}/neon-test-extensions-v${PG_TEST_VERSION:-16}:${TEST_EXTENSIONS_TAG:-${TAG:-latest}}
|
||||
environment:
|
||||
- PGPASSWORD=cloud_admin
|
||||
entrypoint:
|
||||
|
||||
@@ -20,4 +20,4 @@ for d in ${LIST}; do
|
||||
done
|
||||
[ -z "${FAILED}" ] && exit 0
|
||||
echo "${FAILED}"
|
||||
exit 1
|
||||
exit 1
|
||||
|
||||
@@ -30,3 +30,9 @@ pub struct SetRoleGrantsRequest {
|
||||
pub privileges: Vec<Privilege>,
|
||||
pub role: PgIdent,
|
||||
}
|
||||
|
||||
/// Request of the /configure_telemetry API
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct ConfigureTelemetryRequest {
|
||||
pub logs_export_host: Option<String>,
|
||||
}
|
||||
|
||||
@@ -179,8 +179,8 @@ pub enum ComputeFeature {
|
||||
/// track short-lived connections as user activity.
|
||||
ActivityMonitorExperimental,
|
||||
|
||||
/// Pre-install and initialize anon extension for every database in the cluster
|
||||
AnonExtension,
|
||||
/// Allow to configure rsyslog for Postgres logs export
|
||||
PostgresLogsExport,
|
||||
|
||||
/// This is a special feature flag that is used to represent unknown feature flags.
|
||||
/// Basically all unknown to enum flags are represented as this one. See unit test
|
||||
|
||||
@@ -208,7 +208,6 @@
|
||||
],
|
||||
"remote_extensions": {
|
||||
"library_index": {
|
||||
"anon": "anon",
|
||||
"postgis-3": "postgis",
|
||||
"libpgrouting-3.4": "postgis",
|
||||
"postgis_raster-3": "postgis",
|
||||
@@ -217,12 +216,6 @@
|
||||
"address_standardizer-3": "postgis"
|
||||
},
|
||||
"extension_data": {
|
||||
"anon": {
|
||||
"archive_path": "5834329303/v15/extensions/anon.tar.zst",
|
||||
"control_data": {
|
||||
"anon.control": "# PostgreSQL Anonymizer (anon) extension\ncomment = ''Data anonymization tools''\ndefault_version = ''1.1.0''\ndirectory=''extension/anon''\nrelocatable = false\nrequires = ''pgcrypto''\nsuperuser = false\nmodule_pathname = ''$libdir/anon''\ntrusted = true\n"
|
||||
}
|
||||
},
|
||||
"postgis": {
|
||||
"archive_path": "5834329303/v15/extensions/postgis.tar.zst",
|
||||
"control_data": {
|
||||
@@ -238,7 +231,6 @@
|
||||
}
|
||||
},
|
||||
"custom_extensions": [
|
||||
"anon"
|
||||
],
|
||||
"public_extensions": [
|
||||
"postgis"
|
||||
|
||||
@@ -23,6 +23,7 @@ pub struct TimelineCreateRequest {
|
||||
pub tenant_id: TenantId,
|
||||
pub timeline_id: TimelineId,
|
||||
pub mconf: Configuration,
|
||||
/// In the PG_VERSION_NUM macro format, like 140017.
|
||||
pub pg_version: u32,
|
||||
pub system_id: Option<u64>,
|
||||
// By default WAL_SEGMENT_SIZE
|
||||
|
||||
@@ -26,6 +26,10 @@ impl<S: Send, R: Send> Duplex<S, R> {
|
||||
self.tx.send(x).await
|
||||
}
|
||||
|
||||
pub fn try_send(&self, x: S) -> Result<(), mpsc::error::TrySendError<S>> {
|
||||
self.tx.try_send(x)
|
||||
}
|
||||
|
||||
/// Receives the next value for this receiver.
|
||||
///
|
||||
/// This method returns `None` if the channel has been closed and there are
|
||||
|
||||
@@ -13,6 +13,7 @@ use pageserver::{page_cache, virtual_file};
|
||||
use pageserver_api::key::Key;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_api::value::Value;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use utils::bin_ser::BeSer;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use wal_decoder::serialized_batch::SerializedValueBatch;
|
||||
@@ -57,11 +58,22 @@ async fn ingest(
|
||||
|
||||
tokio::fs::create_dir_all(conf.timeline_path(&tenant_shard_id, &timeline_id)).await?;
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
|
||||
let ctx =
|
||||
RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error).with_scope_debug_tools();
|
||||
|
||||
let gate = utils::sync::gate::Gate::default();
|
||||
let cancel = CancellationToken::new();
|
||||
|
||||
let layer = InMemoryLayer::create(conf, timeline_id, tenant_shard_id, lsn, &gate, &ctx).await?;
|
||||
let layer = InMemoryLayer::create(
|
||||
conf,
|
||||
timeline_id,
|
||||
tenant_shard_id,
|
||||
lsn,
|
||||
&gate,
|
||||
&cancel,
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let data = Value::Image(Bytes::from(vec![0u8; put_size]));
|
||||
let data_ser_size = data.serialized_size().unwrap() as usize;
|
||||
|
||||
@@ -131,7 +131,8 @@ async fn get_holes(path: &Utf8Path, max_holes: usize, ctx: &RequestContext) -> R
|
||||
pub(crate) async fn main(cmd: &AnalyzeLayerMapCmd) -> Result<()> {
|
||||
let storage_path = &cmd.path;
|
||||
let max_holes = cmd.max_holes.unwrap_or(DEFAULT_MAX_HOLES);
|
||||
let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
|
||||
let ctx =
|
||||
RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error).with_scope_debug_tools();
|
||||
|
||||
// Initialize virtual_file (file desriptor cache) and page cache which are needed to access layer persistent B-Tree.
|
||||
pageserver::virtual_file::init(
|
||||
|
||||
@@ -76,7 +76,8 @@ async fn read_image_file(path: impl AsRef<Path>, ctx: &RequestContext) -> Result
|
||||
}
|
||||
|
||||
pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
|
||||
let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
|
||||
let ctx =
|
||||
RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error).with_scope_debug_tools();
|
||||
match cmd {
|
||||
LayerCmd::List { path } => {
|
||||
for tenant in fs::read_dir(path.join(TENANTS_SEGMENT_NAME))? {
|
||||
@@ -176,7 +177,8 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
|
||||
);
|
||||
pageserver::page_cache::init(100);
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
|
||||
let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error)
|
||||
.with_scope_debug_tools();
|
||||
|
||||
macro_rules! rewrite_closure {
|
||||
($($summary_ty:tt)*) => {{
|
||||
|
||||
@@ -208,7 +208,8 @@ async fn print_layerfile(path: &Utf8Path) -> anyhow::Result<()> {
|
||||
virtual_file::SyncMode::Sync,
|
||||
);
|
||||
page_cache::init(100);
|
||||
let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
|
||||
let ctx =
|
||||
RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error).with_scope_debug_tools();
|
||||
dump_layerfile_from_path(path, true, &ctx).await
|
||||
}
|
||||
|
||||
|
||||
@@ -134,6 +134,9 @@ pub(crate) enum Scope {
|
||||
UnitTest {
|
||||
io_size_metrics: &'static crate::metrics::StorageIoSizeMetrics,
|
||||
},
|
||||
DebugTools {
|
||||
io_size_metrics: &'static crate::metrics::StorageIoSizeMetrics,
|
||||
},
|
||||
}
|
||||
|
||||
static GLOBAL_IO_SIZE_METRICS: Lazy<crate::metrics::StorageIoSizeMetrics> =
|
||||
@@ -195,6 +198,12 @@ impl Scope {
|
||||
io_size_metrics: &GLOBAL_IO_SIZE_METRICS,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn new_debug_tools() -> Self {
|
||||
Scope::DebugTools {
|
||||
io_size_metrics: &GLOBAL_IO_SIZE_METRICS,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The kind of access to the page cache.
|
||||
@@ -435,6 +444,12 @@ impl RequestContext {
|
||||
.build()
|
||||
}
|
||||
|
||||
pub fn with_scope_debug_tools(&self) -> Self {
|
||||
RequestContextBuilder::new(TaskKind::DebugTool)
|
||||
.scope(Scope::new_debug_tools())
|
||||
.build()
|
||||
}
|
||||
|
||||
pub fn task_kind(&self) -> TaskKind {
|
||||
self.task_kind
|
||||
}
|
||||
@@ -486,6 +501,7 @@ impl RequestContext {
|
||||
Scope::SecondaryTenant { io_size_metrics } => io_size_metrics,
|
||||
#[cfg(test)]
|
||||
Scope::UnitTest { io_size_metrics } => io_size_metrics,
|
||||
Scope::DebugTools { io_size_metrics } => io_size_metrics,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ use camino::Utf8PathBuf;
|
||||
use num_traits::Num;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use tokio_epoll_uring::{BoundedBuf, Slice};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{error, info_span};
|
||||
use utils::id::TimelineId;
|
||||
|
||||
@@ -19,7 +20,7 @@ use crate::page_cache;
|
||||
use crate::tenant::storage_layer::inmemory_layer::vectored_dio_read::File;
|
||||
use crate::virtual_file::owned_buffers_io::io_buf_aligned::IoBufAlignedMut;
|
||||
use crate::virtual_file::owned_buffers_io::slice::SliceMutExt;
|
||||
use crate::virtual_file::owned_buffers_io::write::Buffer;
|
||||
use crate::virtual_file::owned_buffers_io::write::{Buffer, FlushTaskError};
|
||||
use crate::virtual_file::{self, IoBufferMut, VirtualFile, owned_buffers_io};
|
||||
|
||||
pub struct EphemeralFile {
|
||||
@@ -40,6 +41,7 @@ impl EphemeralFile {
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
gate: &utils::sync::gate::Gate,
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<EphemeralFile> {
|
||||
static NEXT_FILENAME: AtomicU64 = AtomicU64::new(1);
|
||||
@@ -75,6 +77,7 @@ impl EphemeralFile {
|
||||
file,
|
||||
|| IoBufferMut::with_capacity(TAIL_SZ),
|
||||
gate.enter()?,
|
||||
cancel.child_token(),
|
||||
ctx,
|
||||
info_span!(parent: None, "ephemeral_file_buffered_writer", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), timeline_id=%timeline_id, path = %filename),
|
||||
),
|
||||
@@ -101,6 +104,14 @@ impl Drop for EphemeralFile {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub(crate) enum EphemeralFileWriteError {
|
||||
#[error("{0}")]
|
||||
TooLong(String),
|
||||
#[error("cancelled")]
|
||||
Cancelled,
|
||||
}
|
||||
|
||||
impl EphemeralFile {
|
||||
pub(crate) fn len(&self) -> u64 {
|
||||
self.bytes_written
|
||||
@@ -133,7 +144,7 @@ impl EphemeralFile {
|
||||
&mut self,
|
||||
srcbuf: &[u8],
|
||||
ctx: &RequestContext,
|
||||
) -> std::io::Result<u64> {
|
||||
) -> Result<u64, EphemeralFileWriteError> {
|
||||
let (pos, control) = self.write_raw_controlled(srcbuf, ctx).await?;
|
||||
if let Some(control) = control {
|
||||
control.release().await;
|
||||
@@ -145,24 +156,24 @@ impl EphemeralFile {
|
||||
&mut self,
|
||||
srcbuf: &[u8],
|
||||
ctx: &RequestContext,
|
||||
) -> std::io::Result<(u64, Option<owned_buffers_io::write::FlushControl>)> {
|
||||
) -> Result<(u64, Option<owned_buffers_io::write::FlushControl>), EphemeralFileWriteError> {
|
||||
let pos = self.bytes_written;
|
||||
|
||||
let new_bytes_written = pos.checked_add(srcbuf.len().into_u64()).ok_or_else(|| {
|
||||
std::io::Error::new(
|
||||
std::io::ErrorKind::Other,
|
||||
format!(
|
||||
"write would grow EphemeralFile beyond u64::MAX: len={pos} writen={srcbuf_len}",
|
||||
srcbuf_len = srcbuf.len(),
|
||||
),
|
||||
)
|
||||
EphemeralFileWriteError::TooLong(format!(
|
||||
"write would grow EphemeralFile beyond u64::MAX: len={pos} writen={srcbuf_len}",
|
||||
srcbuf_len = srcbuf.len(),
|
||||
))
|
||||
})?;
|
||||
|
||||
// Write the payload
|
||||
let (nwritten, control) = self
|
||||
.buffered_writer
|
||||
.write_buffered_borrowed_controlled(srcbuf, ctx)
|
||||
.await?;
|
||||
.await
|
||||
.map_err(|e| match e {
|
||||
FlushTaskError::Cancelled => EphemeralFileWriteError::Cancelled,
|
||||
})?;
|
||||
assert_eq!(
|
||||
nwritten,
|
||||
srcbuf.len(),
|
||||
@@ -184,8 +195,14 @@ impl super::storage_layer::inmemory_layer::vectored_dio_read::File for Ephemeral
|
||||
) -> std::io::Result<(tokio_epoll_uring::Slice<B>, usize)> {
|
||||
let submitted_offset = self.buffered_writer.bytes_submitted();
|
||||
|
||||
let mutable = self.buffered_writer.inspect_mutable();
|
||||
let mutable = &mutable[0..mutable.pending()];
|
||||
let mutable = match self.buffered_writer.inspect_mutable() {
|
||||
Some(mutable) => &mutable[0..mutable.pending()],
|
||||
None => {
|
||||
// Timeline::cancel and hence buffered writer flush was cancelled.
|
||||
// Remain read-available while timeline is shutting down.
|
||||
&[]
|
||||
}
|
||||
};
|
||||
|
||||
let maybe_flushed = self.buffered_writer.inspect_maybe_flushed();
|
||||
|
||||
@@ -216,7 +233,6 @@ impl super::storage_layer::inmemory_layer::vectored_dio_read::File for Ephemeral
|
||||
let (written_range, maybe_flushed_range) = {
|
||||
if maybe_flushed.is_some() {
|
||||
// [ written ][ maybe_flushed ][ mutable ]
|
||||
// <- TAIL_SZ -><- TAIL_SZ ->
|
||||
// ^
|
||||
// `submitted_offset`
|
||||
// <++++++ on disk +++++++????????????????>
|
||||
@@ -232,7 +248,6 @@ impl super::storage_layer::inmemory_layer::vectored_dio_read::File for Ephemeral
|
||||
)
|
||||
} else {
|
||||
// [ written ][ mutable ]
|
||||
// <- TAIL_SZ ->
|
||||
// ^
|
||||
// `submitted_offset`
|
||||
// <++++++ on disk +++++++++++++++++++++++>
|
||||
@@ -366,8 +381,9 @@ mod tests {
|
||||
harness("ephemeral_file_holds_gate_open").unwrap();
|
||||
|
||||
let gate = utils::sync::gate::Gate::default();
|
||||
let cancel = CancellationToken::new();
|
||||
|
||||
let file = EphemeralFile::create(conf, tenant_id, timeline_id, &gate, &ctx)
|
||||
let file = EphemeralFile::create(conf, tenant_id, timeline_id, &gate, &cancel, &ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -397,12 +413,13 @@ mod tests {
|
||||
let (conf, tenant_id, timeline_id, ctx) = harness("test_ephemeral_file_basics").unwrap();
|
||||
|
||||
let gate = utils::sync::gate::Gate::default();
|
||||
let cancel = CancellationToken::new();
|
||||
|
||||
let mut file = EphemeralFile::create(conf, tenant_id, timeline_id, &gate, &ctx)
|
||||
let mut file = EphemeralFile::create(conf, tenant_id, timeline_id, &gate, &cancel, &ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let mutable = file.buffered_writer.inspect_mutable();
|
||||
let mutable = file.buffered_writer.mutable();
|
||||
let cap = mutable.capacity();
|
||||
let align = mutable.align();
|
||||
|
||||
@@ -445,7 +462,7 @@ mod tests {
|
||||
let maybe_flushed_buffer_contents = file.buffered_writer.inspect_maybe_flushed().unwrap();
|
||||
assert_eq!(&maybe_flushed_buffer_contents[..], &content[cap..cap * 2]);
|
||||
|
||||
let mutable_buffer_contents = file.buffered_writer.inspect_mutable();
|
||||
let mutable_buffer_contents = file.buffered_writer.mutable();
|
||||
assert_eq!(mutable_buffer_contents, &content[cap * 2..write_nbytes]);
|
||||
}
|
||||
|
||||
@@ -454,13 +471,13 @@ mod tests {
|
||||
let (conf, tenant_id, timeline_id, ctx) = harness("test_flushes_do_happen").unwrap();
|
||||
|
||||
let gate = utils::sync::gate::Gate::default();
|
||||
|
||||
let mut file = EphemeralFile::create(conf, tenant_id, timeline_id, &gate, &ctx)
|
||||
let cancel = CancellationToken::new();
|
||||
let mut file = EphemeralFile::create(conf, tenant_id, timeline_id, &gate, &cancel, &ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// mutable buffer and maybe_flushed buffer each has `cap` bytes.
|
||||
let cap = file.buffered_writer.inspect_mutable().capacity();
|
||||
let cap = file.buffered_writer.mutable().capacity();
|
||||
|
||||
let content: Vec<u8> = rand::thread_rng()
|
||||
.sample_iter(rand::distributions::Standard)
|
||||
@@ -470,10 +487,8 @@ mod tests {
|
||||
file.write_raw(&content, &ctx).await.unwrap();
|
||||
|
||||
// assert the state is as this test expects it to be
|
||||
assert_eq!(
|
||||
&file.load_to_io_buf(&ctx).await.unwrap(),
|
||||
&content[0..cap * 2 + cap / 2]
|
||||
);
|
||||
let load_io_buf_res = file.load_to_io_buf(&ctx).await.unwrap();
|
||||
assert_eq!(&load_io_buf_res[..], &content[0..cap * 2 + cap / 2]);
|
||||
let md = file.buffered_writer.as_inner().path().metadata().unwrap();
|
||||
assert_eq!(
|
||||
md.len(),
|
||||
@@ -485,7 +500,7 @@ mod tests {
|
||||
&content[cap..cap * 2]
|
||||
);
|
||||
assert_eq!(
|
||||
&file.buffered_writer.inspect_mutable()[0..cap / 2],
|
||||
&file.buffered_writer.mutable()[0..cap / 2],
|
||||
&content[cap * 2..cap * 2 + cap / 2]
|
||||
);
|
||||
}
|
||||
@@ -501,12 +516,13 @@ mod tests {
|
||||
harness("test_read_split_across_file_and_buffer").unwrap();
|
||||
|
||||
let gate = utils::sync::gate::Gate::default();
|
||||
let cancel = CancellationToken::new();
|
||||
|
||||
let mut file = EphemeralFile::create(conf, tenant_id, timeline_id, &gate, &ctx)
|
||||
let mut file = EphemeralFile::create(conf, tenant_id, timeline_id, &gate, &cancel, &ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let mutable = file.buffered_writer.inspect_mutable();
|
||||
let mutable = file.buffered_writer.mutable();
|
||||
let cap = mutable.capacity();
|
||||
let align = mutable.align();
|
||||
let content: Vec<u8> = rand::thread_rng()
|
||||
|
||||
@@ -1112,6 +1112,7 @@ mod tests {
|
||||
};
|
||||
use pageserver_api::key::DBDIR_KEY;
|
||||
use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::shard::TenantShardId;
|
||||
|
||||
@@ -1193,6 +1194,7 @@ mod tests {
|
||||
async fn ranged_search() {
|
||||
let harness = TenantHarness::create("ranged_search").await.unwrap();
|
||||
let (tenant, ctx) = harness.load().await;
|
||||
let cancel = CancellationToken::new();
|
||||
let timeline_id = TimelineId::generate();
|
||||
// Create the timeline such that the in-memory layers can be written
|
||||
// to the timeline directory.
|
||||
@@ -1209,6 +1211,7 @@ mod tests {
|
||||
harness.tenant_shard_id,
|
||||
lsn_range.start,
|
||||
&gate,
|
||||
&cancel,
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
|
||||
@@ -205,6 +205,7 @@ async fn download_object(
|
||||
}
|
||||
#[cfg(target_os = "linux")]
|
||||
crate::virtual_file::io_engine::IoEngine::TokioEpollUring => {
|
||||
use crate::virtual_file::owned_buffers_io::write::FlushTaskError;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::virtual_file::{IoBufferMut, owned_buffers_io};
|
||||
@@ -228,6 +229,7 @@ async fn download_object(
|
||||
destination_file,
|
||||
|| IoBufferMut::with_capacity(super::BUFFER_SIZE),
|
||||
gate.enter().map_err(|_| DownloadError::Cancelled)?,
|
||||
cancel.child_token(),
|
||||
ctx,
|
||||
tracing::info_span!(parent: None, "download_object_buffered_writer", %dst_path),
|
||||
);
|
||||
@@ -240,11 +242,21 @@ async fn download_object(
|
||||
{
|
||||
let chunk = match res {
|
||||
Ok(chunk) => chunk,
|
||||
Err(e) => return Err(e),
|
||||
Err(e) => return Err(DownloadError::from(e)),
|
||||
};
|
||||
buffered.write_buffered_borrowed(&chunk, ctx).await?;
|
||||
buffered
|
||||
.write_buffered_borrowed(&chunk, ctx)
|
||||
.await
|
||||
.map_err(|e| match e {
|
||||
FlushTaskError::Cancelled => DownloadError::Cancelled,
|
||||
})?;
|
||||
}
|
||||
let inner = buffered.flush_and_into_inner(ctx).await?;
|
||||
let inner = buffered
|
||||
.flush_and_into_inner(ctx)
|
||||
.await
|
||||
.map_err(|e| match e {
|
||||
FlushTaskError::Cancelled => DownloadError::Cancelled,
|
||||
})?;
|
||||
Ok(inner)
|
||||
}
|
||||
.await?;
|
||||
|
||||
@@ -19,6 +19,7 @@ use pageserver_api::keyspace::KeySpace;
|
||||
use pageserver_api::models::InMemoryLayerInfo;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::*;
|
||||
use utils::id::TimelineId;
|
||||
use utils::lsn::Lsn;
|
||||
@@ -552,13 +553,15 @@ impl InMemoryLayer {
|
||||
tenant_shard_id: TenantShardId,
|
||||
start_lsn: Lsn,
|
||||
gate: &utils::sync::gate::Gate,
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<InMemoryLayer> {
|
||||
trace!(
|
||||
"initializing new empty InMemoryLayer for writing on timeline {timeline_id} at {start_lsn}"
|
||||
);
|
||||
|
||||
let file = EphemeralFile::create(conf, tenant_shard_id, timeline_id, gate, ctx).await?;
|
||||
let file =
|
||||
EphemeralFile::create(conf, tenant_shard_id, timeline_id, gate, cancel, ctx).await?;
|
||||
let key = InMemoryLayerFileId(file.page_cache_file_id());
|
||||
|
||||
Ok(InMemoryLayer {
|
||||
|
||||
@@ -268,7 +268,7 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
||||
error_run += 1;
|
||||
let backoff =
|
||||
exponential_backoff_duration(error_run, BASE_BACKOFF_SECS, MAX_BACKOFF_SECS);
|
||||
log_compaction_error(&err, error_run, backoff, cancel.is_cancelled());
|
||||
log_compaction_error(&err, Some((error_run, backoff)), cancel.is_cancelled());
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@@ -281,10 +281,9 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
||||
}
|
||||
}
|
||||
|
||||
fn log_compaction_error(
|
||||
pub(crate) fn log_compaction_error(
|
||||
err: &CompactionError,
|
||||
error_count: u32,
|
||||
sleep_duration: Duration,
|
||||
retry_info: Option<(u32, Duration)>,
|
||||
task_cancelled: bool,
|
||||
) {
|
||||
use CompactionError::*;
|
||||
@@ -318,14 +317,26 @@ fn log_compaction_error(
|
||||
}
|
||||
};
|
||||
|
||||
match level {
|
||||
Level::ERROR => {
|
||||
error!("Compaction failed {error_count} times, retrying in {sleep_duration:?}: {err:#}")
|
||||
if let Some((error_count, sleep_duration)) = retry_info {
|
||||
match level {
|
||||
Level::ERROR => {
|
||||
error!(
|
||||
"Compaction failed {error_count} times, retrying in {sleep_duration:?}: {err:#}"
|
||||
)
|
||||
}
|
||||
Level::INFO => {
|
||||
info!(
|
||||
"Compaction failed {error_count} times, retrying in {sleep_duration:?}: {err:#}"
|
||||
)
|
||||
}
|
||||
level => unimplemented!("unexpected level {level:?}"),
|
||||
}
|
||||
Level::INFO => {
|
||||
info!("Compaction failed {error_count} times, retrying in {sleep_duration:?}: {err:#}")
|
||||
} else {
|
||||
match level {
|
||||
Level::ERROR => error!("Compaction failed: {err:#}"),
|
||||
Level::INFO => info!("Compaction failed: {err:#}"),
|
||||
level => unimplemented!("unexpected level {level:?}"),
|
||||
}
|
||||
level => unimplemented!("unexpected level {level:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -89,6 +89,7 @@ use super::remote_timeline_client::index::{GcCompactionState, IndexPart};
|
||||
use super::remote_timeline_client::{RemoteTimelineClient, WaitCompletionError};
|
||||
use super::secondary::heatmap::HeatMapLayer;
|
||||
use super::storage_layer::{LayerFringe, LayerVisibilityHint, ReadableLayer};
|
||||
use super::tasks::log_compaction_error;
|
||||
use super::upload_queue::NotInitialized;
|
||||
use super::{
|
||||
AttachedTenantConf, GcError, HeatMapTimeline, MaybeOffloaded,
|
||||
@@ -1856,18 +1857,23 @@ impl Timeline {
|
||||
flags: EnumSet<CompactFlags>,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<CompactionOutcome, CompactionError> {
|
||||
self.compact_with_options(
|
||||
cancel,
|
||||
CompactOptions {
|
||||
flags,
|
||||
compact_key_range: None,
|
||||
compact_lsn_range: None,
|
||||
sub_compaction: false,
|
||||
sub_compaction_max_job_size_mb: None,
|
||||
},
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
let res = self
|
||||
.compact_with_options(
|
||||
cancel,
|
||||
CompactOptions {
|
||||
flags,
|
||||
compact_key_range: None,
|
||||
compact_lsn_range: None,
|
||||
sub_compaction: false,
|
||||
sub_compaction_max_job_size_mb: None,
|
||||
},
|
||||
ctx,
|
||||
)
|
||||
.await;
|
||||
if let Err(err) = &res {
|
||||
log_compaction_error(err, None, cancel.is_cancelled());
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
/// Outermost timeline compaction operation; downloads needed layers.
|
||||
@@ -4180,6 +4186,7 @@ impl Timeline {
|
||||
self.timeline_id,
|
||||
self.tenant_shard_id,
|
||||
&self.gate,
|
||||
&self.cancel,
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
@@ -6736,6 +6743,8 @@ impl Timeline {
|
||||
self.tenant_shard_id,
|
||||
in_memory.lsn_range.start,
|
||||
&self.gate,
|
||||
// TODO: if we ever use this function in production code, we need to pass the real cancellation token
|
||||
&CancellationToken::new(),
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
|
||||
@@ -56,6 +56,7 @@ use crate::tenant::storage_layer::merge_iterator::MergeIterator;
|
||||
use crate::tenant::storage_layer::{
|
||||
AsLayerDesc, PersistentLayerDesc, PersistentLayerKey, ValueReconstructState,
|
||||
};
|
||||
use crate::tenant::tasks::log_compaction_error;
|
||||
use crate::tenant::timeline::{
|
||||
DeltaLayerWriter, ImageLayerCreationOutcome, ImageLayerWriter, IoConcurrency, Layer,
|
||||
ResidentLayer, drop_rlock,
|
||||
@@ -440,6 +441,20 @@ impl GcCompactionQueue {
|
||||
ctx: &RequestContext,
|
||||
gc_block: &GcBlock,
|
||||
timeline: &Arc<Timeline>,
|
||||
) -> Result<CompactionOutcome, CompactionError> {
|
||||
let res = self.iteration_inner(cancel, ctx, gc_block, timeline).await;
|
||||
if let Err(err) = &res {
|
||||
log_compaction_error(err, None, cancel.is_cancelled());
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
async fn iteration_inner(
|
||||
&self,
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
gc_block: &GcBlock,
|
||||
timeline: &Arc<Timeline>,
|
||||
) -> Result<CompactionOutcome, CompactionError> {
|
||||
let Ok(_one_op_at_a_time_guard) = self.consumer_lock.try_lock() else {
|
||||
return Err(CompactionError::AlreadyRunning(
|
||||
|
||||
@@ -4,6 +4,7 @@ use std::sync::Arc;
|
||||
use anyhow::{Context, bail, ensure};
|
||||
use itertools::Itertools;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::trace;
|
||||
use utils::id::TimelineId;
|
||||
use utils::lsn::{AtomicLsn, Lsn};
|
||||
@@ -193,6 +194,7 @@ impl OpenLayerManager {
|
||||
|
||||
/// Open a new writable layer to append data if there is no open layer, otherwise return the
|
||||
/// current open layer, called within `get_layer_for_write`.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) async fn get_layer_for_write(
|
||||
&mut self,
|
||||
lsn: Lsn,
|
||||
@@ -200,6 +202,7 @@ impl OpenLayerManager {
|
||||
timeline_id: TimelineId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
gate: &utils::sync::gate::Gate,
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Arc<InMemoryLayer>> {
|
||||
ensure!(lsn.is_aligned());
|
||||
@@ -227,9 +230,16 @@ impl OpenLayerManager {
|
||||
timeline_id, start_lsn, lsn
|
||||
);
|
||||
|
||||
let new_layer =
|
||||
InMemoryLayer::create(conf, timeline_id, tenant_shard_id, start_lsn, gate, ctx)
|
||||
.await?;
|
||||
let new_layer = InMemoryLayer::create(
|
||||
conf,
|
||||
timeline_id,
|
||||
tenant_shard_id,
|
||||
start_lsn,
|
||||
gate,
|
||||
cancel,
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
let layer = Arc::new(new_layer);
|
||||
|
||||
self.layer_map.open_layer = Some(layer.clone());
|
||||
|
||||
@@ -3,7 +3,9 @@ use std::sync::Arc;
|
||||
|
||||
pub(crate) use flush::FlushControl;
|
||||
use flush::FlushHandle;
|
||||
pub(crate) use flush::FlushTaskError;
|
||||
use tokio_epoll_uring::IoBuf;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use super::io_buf_aligned::IoBufAligned;
|
||||
use super::io_buf_ext::{FullSlice, IoBufExt};
|
||||
@@ -40,11 +42,19 @@ pub trait OwnedAsyncWriter {
|
||||
// since we would avoid copying majority of the data into the internal buffer.
|
||||
pub struct BufferedWriter<B: Buffer, W> {
|
||||
writer: Arc<W>,
|
||||
/// invariant: always remains Some(buf) except
|
||||
/// - while IO is ongoing => goes back to Some() once the IO completed successfully
|
||||
/// - after an IO error => stays `None` forever
|
||||
///
|
||||
/// In these exceptional cases, it's `None`.
|
||||
/// Clone of the buffer that was last submitted to the flush loop.
|
||||
/// `None` if no flush request has been submitted, Some forever after.
|
||||
pub(super) maybe_flushed: Option<FullSlice<B::IoBuf>>,
|
||||
/// New writes are accumulated here.
|
||||
/// `None` only during submission while we wait for flush loop to accept
|
||||
/// the full dirty buffer in exchange for a clean buffer.
|
||||
/// If that exchange fails with an [`FlushTaskError`], the write path
|
||||
/// bails and leaves this as `None`.
|
||||
/// Subsequent writes will panic if attempted.
|
||||
/// The read path continues to work without error because [`Self::maybe_flushed`]
|
||||
/// and [`Self::bytes_submitted`] are advanced before the flush loop exchange starts,
|
||||
/// so, they will never try to read from [`Self::mutable`] anyway, because it's past
|
||||
/// the [`Self::maybe_flushed`] point.
|
||||
mutable: Option<B>,
|
||||
/// A handle to the background flush task for writting data to disk.
|
||||
flush_handle: FlushHandle<B::IoBuf, W>,
|
||||
@@ -65,16 +75,19 @@ where
|
||||
writer: Arc<W>,
|
||||
buf_new: impl Fn() -> B,
|
||||
gate_guard: utils::sync::gate::GateGuard,
|
||||
cancel: CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
flush_task_span: tracing::Span,
|
||||
) -> Self {
|
||||
Self {
|
||||
writer: writer.clone(),
|
||||
mutable: Some(buf_new()),
|
||||
maybe_flushed: None,
|
||||
flush_handle: FlushHandle::spawn_new(
|
||||
writer,
|
||||
buf_new(),
|
||||
gate_guard,
|
||||
cancel,
|
||||
ctx.attached_child(),
|
||||
flush_task_span,
|
||||
),
|
||||
@@ -92,25 +105,26 @@ where
|
||||
}
|
||||
|
||||
/// Panics if used after any of the write paths returned an error
|
||||
pub fn inspect_mutable(&self) -> &B {
|
||||
self.mutable()
|
||||
pub fn inspect_mutable(&self) -> Option<&B> {
|
||||
self.mutable.as_ref()
|
||||
}
|
||||
|
||||
/// Gets a reference to the maybe flushed read-only buffer.
|
||||
/// Returns `None` if the writer has not submitted any flush request.
|
||||
pub fn inspect_maybe_flushed(&self) -> Option<&FullSlice<Buf>> {
|
||||
self.flush_handle.maybe_flushed.as_ref()
|
||||
self.maybe_flushed.as_ref()
|
||||
}
|
||||
|
||||
#[cfg_attr(target_os = "macos", allow(dead_code))]
|
||||
pub async fn flush_and_into_inner(
|
||||
mut self,
|
||||
ctx: &RequestContext,
|
||||
) -> std::io::Result<(u64, Arc<W>)> {
|
||||
) -> Result<(u64, Arc<W>), FlushTaskError> {
|
||||
self.flush(ctx).await?;
|
||||
|
||||
let Self {
|
||||
mutable: buf,
|
||||
maybe_flushed: _,
|
||||
writer,
|
||||
mut flush_handle,
|
||||
bytes_submitted: bytes_amount,
|
||||
@@ -120,12 +134,9 @@ where
|
||||
Ok((bytes_amount, writer))
|
||||
}
|
||||
|
||||
/// Gets a reference to the mutable in-memory buffer.
|
||||
#[inline(always)]
|
||||
fn mutable(&self) -> &B {
|
||||
self.mutable
|
||||
.as_ref()
|
||||
.expect("must not use after we returned an error")
|
||||
#[cfg(test)]
|
||||
pub(crate) fn mutable(&self) -> &B {
|
||||
self.mutable.as_ref().expect("must not use after an error")
|
||||
}
|
||||
|
||||
#[cfg_attr(target_os = "macos", allow(dead_code))]
|
||||
@@ -133,7 +144,7 @@ where
|
||||
&mut self,
|
||||
chunk: &[u8],
|
||||
ctx: &RequestContext,
|
||||
) -> std::io::Result<usize> {
|
||||
) -> Result<usize, FlushTaskError> {
|
||||
let (len, control) = self.write_buffered_borrowed_controlled(chunk, ctx).await?;
|
||||
if let Some(control) = control {
|
||||
control.release().await;
|
||||
@@ -146,7 +157,7 @@ where
|
||||
&mut self,
|
||||
mut chunk: &[u8],
|
||||
ctx: &RequestContext,
|
||||
) -> std::io::Result<(usize, Option<FlushControl>)> {
|
||||
) -> Result<(usize, Option<FlushControl>), FlushTaskError> {
|
||||
let chunk_len = chunk.len();
|
||||
let mut control: Option<FlushControl> = None;
|
||||
while !chunk.is_empty() {
|
||||
@@ -167,17 +178,47 @@ where
|
||||
Ok((chunk_len, control))
|
||||
}
|
||||
|
||||
/// This function can only error if the flush task got cancelled.
|
||||
/// In that case, we leave [`Self::mutable`] intentionally as `None`.
|
||||
///
|
||||
/// The read path continues to function correctly; it can read up to the
|
||||
/// point where it could read before, i.e., including what was in [`Self::mutable`]
|
||||
/// before the call to this function, because that's now stored in [`Self::maybe_flushed`].
|
||||
///
|
||||
/// The write path becomes unavailable and will panic if used.
|
||||
/// The only correct solution to retry writes is to discard the entire [`BufferedWriter`],
|
||||
/// which upper layers of pageserver write path currently do not support.
|
||||
/// It is in fact quite hard to reason about what exactly happens in today's code.
|
||||
/// Best case we accumulate junk in the EphemeralFile, worst case is data corruption.
|
||||
#[must_use = "caller must explcitly check the flush control"]
|
||||
async fn flush(&mut self, _ctx: &RequestContext) -> std::io::Result<Option<FlushControl>> {
|
||||
async fn flush(
|
||||
&mut self,
|
||||
_ctx: &RequestContext,
|
||||
) -> Result<Option<FlushControl>, FlushTaskError> {
|
||||
let buf = self.mutable.take().expect("must not use after an error");
|
||||
let buf_len = buf.pending();
|
||||
if buf_len == 0 {
|
||||
self.mutable = Some(buf);
|
||||
return Ok(None);
|
||||
}
|
||||
let (recycled, flush_control) = self.flush_handle.flush(buf, self.bytes_submitted).await?;
|
||||
// Prepare the buffer for read while flushing.
|
||||
let slice = buf.flush();
|
||||
// NB: this assignment also drops thereference to the old buffer, allowing us to re-own & make it mutable below.
|
||||
self.maybe_flushed = Some(slice.cheap_clone());
|
||||
let offset = self.bytes_submitted;
|
||||
self.bytes_submitted += u64::try_from(buf_len).unwrap();
|
||||
|
||||
// If we return/panic here or later, we'll leave mutable = None, breaking further
|
||||
// writers, but the read path should still work.
|
||||
let (recycled, flush_control) = self.flush_handle.flush(slice, offset).await?;
|
||||
|
||||
// The only other place that could hold a reference to the recycled buffer
|
||||
// is in `Self::maybe_flushed`, but we have already replace it with the new buffer.
|
||||
let recycled = Buffer::reuse_after_flush(recycled.into_raw_slice().into_inner());
|
||||
|
||||
// We got back some recycled buffer, can open up for more writes again.
|
||||
self.mutable = Some(recycled);
|
||||
|
||||
Ok(Some(flush_control))
|
||||
}
|
||||
}
|
||||
@@ -290,10 +331,12 @@ mod tests {
|
||||
let ctx = &ctx;
|
||||
let recorder = Arc::new(RecorderWriter::default());
|
||||
let gate = utils::sync::gate::Gate::default();
|
||||
let cancel = CancellationToken::new();
|
||||
let mut writer = BufferedWriter::<_, RecorderWriter>::new(
|
||||
recorder,
|
||||
|| IoBufferMut::with_capacity(2),
|
||||
gate.enter()?,
|
||||
cancel,
|
||||
ctx,
|
||||
tracing::Span::none(),
|
||||
);
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use std::ops::ControlFlow;
|
||||
use std::sync::Arc;
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{Instrument, info, info_span, warn};
|
||||
use utils::sync::duplex;
|
||||
@@ -15,9 +14,6 @@ use crate::virtual_file::owned_buffers_io::io_buf_ext::FullSlice;
|
||||
/// A handle to the flush task.
|
||||
pub struct FlushHandle<Buf, W> {
|
||||
inner: Option<FlushHandleInner<Buf, W>>,
|
||||
/// Immutable buffer for serving tail reads.
|
||||
/// `None` if no flush request has been submitted.
|
||||
pub(super) maybe_flushed: Option<FullSlice<Buf>>,
|
||||
}
|
||||
|
||||
pub struct FlushHandleInner<Buf, W> {
|
||||
@@ -25,7 +21,7 @@ pub struct FlushHandleInner<Buf, W> {
|
||||
/// and receives recyled buffer.
|
||||
channel: duplex::mpsc::Duplex<FlushRequest<Buf>, FullSlice<Buf>>,
|
||||
/// Join handle for the background flush task.
|
||||
join_handle: tokio::task::JoinHandle<std::io::Result<Arc<W>>>,
|
||||
join_handle: tokio::task::JoinHandle<Result<Arc<W>, FlushTaskError>>,
|
||||
}
|
||||
|
||||
struct FlushRequest<Buf> {
|
||||
@@ -117,27 +113,31 @@ where
|
||||
{
|
||||
/// Spawns a new background flush task and obtains a handle.
|
||||
///
|
||||
/// Note: The background task so we do not need to explicitly maintain a queue of buffers.
|
||||
/// Handle and background task are connected through a duplex channel.
|
||||
/// Dirty buffers are sent to the background task for flushing.
|
||||
/// Clean buffers are sent back to the handle for reuse.
|
||||
///
|
||||
/// The queue depth is 1, and the passed-in `buf` seeds the queue depth.
|
||||
/// I.e., the passed-in buf is immediately available to the handle as a recycled buffer.
|
||||
pub fn spawn_new<B>(
|
||||
file: Arc<W>,
|
||||
buf: B,
|
||||
gate_guard: utils::sync::gate::GateGuard,
|
||||
cancel: CancellationToken,
|
||||
ctx: RequestContext,
|
||||
span: tracing::Span,
|
||||
) -> Self
|
||||
where
|
||||
B: Buffer<IoBuf = Buf> + Send + 'static,
|
||||
{
|
||||
// It is fine to buffer up to only 1 message. We only 1 message in-flight at a time.
|
||||
let (front, back) = duplex::mpsc::channel(1);
|
||||
back.try_send(buf.flush())
|
||||
.expect("we just created it with capacity 1");
|
||||
|
||||
let join_handle = tokio::spawn(
|
||||
async move {
|
||||
FlushBackgroundTask::new(back, file, gate_guard, ctx)
|
||||
.run(buf.flush())
|
||||
.await
|
||||
}
|
||||
.instrument(span),
|
||||
FlushBackgroundTask::new(back, file, gate_guard, cancel, ctx)
|
||||
.run()
|
||||
.instrument(span),
|
||||
);
|
||||
|
||||
FlushHandle {
|
||||
@@ -145,7 +145,6 @@ where
|
||||
channel: front,
|
||||
join_handle,
|
||||
}),
|
||||
maybe_flushed: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -153,15 +152,11 @@ where
|
||||
/// Returns a buffer that completed flushing for re-use, length reset to 0, capacity unchanged.
|
||||
/// If `save_buf_for_read` is true, then we save the buffer in `Self::maybe_flushed`, otherwise
|
||||
/// clear `maybe_flushed`.
|
||||
pub async fn flush<B>(&mut self, buf: B, offset: u64) -> std::io::Result<(B, FlushControl)>
|
||||
where
|
||||
B: Buffer<IoBuf = Buf> + Send + 'static,
|
||||
{
|
||||
let slice = buf.flush();
|
||||
|
||||
// Saves a buffer for read while flushing. This also removes reference to the old buffer.
|
||||
self.maybe_flushed = Some(slice.cheap_clone());
|
||||
|
||||
pub async fn flush(
|
||||
&mut self,
|
||||
slice: FullSlice<Buf>,
|
||||
offset: u64,
|
||||
) -> Result<(FullSlice<Buf>, FlushControl), FlushTaskError> {
|
||||
let (request, flush_control) = new_flush_op(slice, offset);
|
||||
|
||||
// Submits the buffer to the background task.
|
||||
@@ -177,13 +172,10 @@ where
|
||||
return self.handle_error().await;
|
||||
};
|
||||
|
||||
// The only other place that could hold a reference to the recycled buffer
|
||||
// is in `Self::maybe_flushed`, but we have already replace it with the new buffer.
|
||||
let recycled = Buffer::reuse_after_flush(recycled.into_raw_slice().into_inner());
|
||||
Ok((recycled, flush_control))
|
||||
}
|
||||
|
||||
async fn handle_error<T>(&mut self) -> std::io::Result<T> {
|
||||
async fn handle_error<T>(&mut self) -> Result<T, FlushTaskError> {
|
||||
Err(self
|
||||
.shutdown()
|
||||
.await
|
||||
@@ -191,7 +183,7 @@ where
|
||||
}
|
||||
|
||||
/// Cleans up the channel, join the flush task.
|
||||
pub async fn shutdown(&mut self) -> std::io::Result<Arc<W>> {
|
||||
pub async fn shutdown(&mut self) -> Result<Arc<W>, FlushTaskError> {
|
||||
let handle = self
|
||||
.inner
|
||||
.take()
|
||||
@@ -217,10 +209,17 @@ pub struct FlushBackgroundTask<Buf, W> {
|
||||
/// A writter for persisting data to disk.
|
||||
writer: Arc<W>,
|
||||
ctx: RequestContext,
|
||||
cancel: CancellationToken,
|
||||
/// Prevent timeline from shuting down until the flush background task finishes flushing all remaining buffers to disk.
|
||||
_gate_guard: utils::sync::gate::GateGuard,
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum FlushTaskError {
|
||||
#[error("flush task cancelled")]
|
||||
Cancelled,
|
||||
}
|
||||
|
||||
impl<Buf, W> FlushBackgroundTask<Buf, W>
|
||||
where
|
||||
Buf: IoBufAligned + Send + Sync,
|
||||
@@ -231,25 +230,20 @@ where
|
||||
channel: duplex::mpsc::Duplex<FullSlice<Buf>, FlushRequest<Buf>>,
|
||||
file: Arc<W>,
|
||||
gate_guard: utils::sync::gate::GateGuard,
|
||||
cancel: CancellationToken,
|
||||
ctx: RequestContext,
|
||||
) -> Self {
|
||||
FlushBackgroundTask {
|
||||
channel,
|
||||
writer: file,
|
||||
_gate_guard: gate_guard,
|
||||
cancel,
|
||||
ctx,
|
||||
}
|
||||
}
|
||||
|
||||
/// Runs the background flush task.
|
||||
/// The passed in slice is immediately sent back to the flush handle through the duplex channel.
|
||||
async fn run(mut self, slice: FullSlice<Buf>) -> std::io::Result<Arc<W>> {
|
||||
// Sends the extra buffer back to the handle.
|
||||
// TODO: can this ever await and or fail? I think not.
|
||||
self.channel.send(slice).await.map_err(|_| {
|
||||
std::io::Error::new(std::io::ErrorKind::BrokenPipe, "flush handle closed early")
|
||||
})?;
|
||||
|
||||
async fn run(mut self) -> Result<Arc<W>, FlushTaskError> {
|
||||
// Exit condition: channel is closed and there is no remaining buffer to be flushed
|
||||
while let Some(request) = self.channel.recv().await {
|
||||
#[cfg(test)]
|
||||
@@ -267,15 +261,13 @@ where
|
||||
// (The upper layers of the Pageserver write path are not equipped to retry write errors
|
||||
// becasuse they often deallocate the buffers that were already written).
|
||||
//
|
||||
// TODO: cancellation sensitiity.
|
||||
// Without it, if we hit a bug where retrying is never successful,
|
||||
// then we can't shut down the timeline/tenant/pageserver cleanly because
|
||||
// layers of the Pageserver write path are holding the gate open for EphemeralFile.
|
||||
//
|
||||
// TODO: use utils::backoff::retry once async closures are actually usable
|
||||
//
|
||||
let mut slice_storage = Some(request.slice);
|
||||
for attempt in 1.. {
|
||||
if self.cancel.is_cancelled() {
|
||||
return Err(FlushTaskError::Cancelled);
|
||||
}
|
||||
let result = async {
|
||||
if attempt > 1 {
|
||||
info!("retrying flush");
|
||||
@@ -283,6 +275,11 @@ where
|
||||
let slice = slice_storage.take().expect(
|
||||
"likely previous invocation of this future didn't get polled to completion",
|
||||
);
|
||||
// Don't cancel this write by doing tokio::select with self.cancel.cancelled().
|
||||
// The underlying tokio-epoll-uring slot / kernel operation is still ongoing and occupies resources.
|
||||
// If we retry indefinitely, we'll deplete those resources.
|
||||
// Future: teach tokio-epoll-uring io_uring operation cancellation, but still,
|
||||
// wait for cancelled ops to complete and discard their error.
|
||||
let (slice, res) = self.writer.write_all_at(slice, request.offset, &self.ctx).await;
|
||||
slice_storage = Some(slice);
|
||||
let res = res.maybe_fatal_err("owned_buffers_io flush");
|
||||
@@ -290,8 +287,7 @@ where
|
||||
return ControlFlow::Break(());
|
||||
};
|
||||
warn!(%err, "error flushing buffered writer buffer to disk, retrying after backoff");
|
||||
static NO_CANCELLATION: Lazy<CancellationToken> = Lazy::new(CancellationToken::new);
|
||||
utils::backoff::exponential_backoff(attempt, 1.0, 10.0, &NO_CANCELLATION).await;
|
||||
utils::backoff::exponential_backoff(attempt, 1.0, 10.0, &self.cancel).await;
|
||||
ControlFlow::Continue(())
|
||||
}
|
||||
.instrument(info_span!("flush_attempt", %attempt))
|
||||
|
||||
@@ -2778,6 +2778,9 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
|
||||
case RELPERSISTENCE_TEMP:
|
||||
case RELPERSISTENCE_UNLOGGED:
|
||||
mdextend(reln, forkNum, blkno, buffer, skipFsync);
|
||||
/* Update LFC in case of unlogged index build */
|
||||
if (reln == unlogged_build_rel && unlogged_build_phase == UNLOGGED_BUILD_PHASE_2)
|
||||
lfc_write(InfoFromSMgrRel(reln), forkNum, blkno, buffer);
|
||||
return;
|
||||
|
||||
default:
|
||||
@@ -2866,6 +2869,14 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
|
||||
case RELPERSISTENCE_TEMP:
|
||||
case RELPERSISTENCE_UNLOGGED:
|
||||
mdzeroextend(reln, forkNum, blocknum, nblocks, skipFsync);
|
||||
/* Update LFC in case of unlogged index build */
|
||||
if (reln == unlogged_build_rel && unlogged_build_phase == UNLOGGED_BUILD_PHASE_2)
|
||||
{
|
||||
for (int i = 0; i < nblocks; i++)
|
||||
{
|
||||
lfc_write(InfoFromSMgrRel(reln), forkNum, blocknum + i, buffer.data);
|
||||
}
|
||||
}
|
||||
return;
|
||||
|
||||
default:
|
||||
@@ -3714,6 +3725,9 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const vo
|
||||
#else
|
||||
mdwrite(reln, forknum, blocknum, buffer, skipFsync);
|
||||
#endif
|
||||
/* Update LFC in case of unlogged index build */
|
||||
if (reln == unlogged_build_rel && unlogged_build_phase == UNLOGGED_BUILD_PHASE_2)
|
||||
lfc_write(InfoFromSMgrRel(reln), forknum, blocknum, buffer);
|
||||
return;
|
||||
default:
|
||||
neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
|
||||
@@ -3777,6 +3791,9 @@ neon_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
|
||||
case RELPERSISTENCE_TEMP:
|
||||
case RELPERSISTENCE_UNLOGGED:
|
||||
mdwritev(reln, forknum, blkno, buffers, nblocks, skipFsync);
|
||||
/* Update LFC in case of unlogged index build */
|
||||
if (reln == unlogged_build_rel && unlogged_build_phase == UNLOGGED_BUILD_PHASE_2)
|
||||
lfc_writev(InfoFromSMgrRel(reln), forknum, blkno, buffers, nblocks);
|
||||
return;
|
||||
default:
|
||||
neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
|
||||
|
||||
@@ -70,8 +70,9 @@ reqwest-middleware = { workspace = true, features = ["json"] }
|
||||
reqwest-retry.workspace = true
|
||||
reqwest-tracing.workspace = true
|
||||
rustc-hash.workspace = true
|
||||
rustls-pemfile.workspace = true
|
||||
rustls.workspace = true
|
||||
rustls-native-certs.workspace = true
|
||||
rustls-pemfile.workspace = true
|
||||
scopeguard.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
@@ -99,8 +100,7 @@ url.workspace = true
|
||||
urlencoding.workspace = true
|
||||
utils.workspace = true
|
||||
uuid.workspace = true
|
||||
rustls-native-certs.workspace = true
|
||||
x509-parser.workspace = true
|
||||
x509-cert.workspace = true
|
||||
redis.workspace = true
|
||||
zerocopy.workspace = true
|
||||
|
||||
|
||||
@@ -437,9 +437,11 @@ async fn request_handler(
|
||||
let testodrome_id = request
|
||||
.headers()
|
||||
.get("X-Neon-Query-ID")
|
||||
.map(|value| value.to_str().unwrap_or_default().to_string());
|
||||
.and_then(|value| value.to_str().ok())
|
||||
.map(|s| s.to_string());
|
||||
|
||||
if let Some(query_id) = testodrome_id {
|
||||
info!(parent: &ctx.span(), "testodrome query ID: {query_id}");
|
||||
ctx.set_testodrome_id(query_id);
|
||||
}
|
||||
|
||||
@@ -481,6 +483,17 @@ async fn request_handler(
|
||||
);
|
||||
let span = ctx.span();
|
||||
|
||||
let testodrome_id = request
|
||||
.headers()
|
||||
.get("X-Neon-Query-ID")
|
||||
.and_then(|value| value.to_str().ok())
|
||||
.map(|s| s.to_string());
|
||||
|
||||
if let Some(query_id) = testodrome_id {
|
||||
info!(parent: &ctx.span(), "testodrome query ID: {query_id}");
|
||||
ctx.set_testodrome_id(query_id);
|
||||
}
|
||||
|
||||
sql_over_http::handle(config, ctx, request, backend, http_cancellation_token)
|
||||
.instrument(span)
|
||||
.await
|
||||
|
||||
@@ -6,7 +6,7 @@ use anyhow::Context;
|
||||
use rustls::pki_types::CertificateDer;
|
||||
use sha2::{Digest, Sha256};
|
||||
use tracing::{error, info};
|
||||
use x509_parser::oid_registry;
|
||||
use x509_cert::der::{Reader, SliceReader, oid};
|
||||
|
||||
/// <https://github.com/postgres/postgres/blob/ca481d3c9ab7bf69ff0c8d71ad3951d407f6a33c/src/include/libpq/pqcomm.h#L159>
|
||||
pub const PG_ALPN_PROTOCOL: &[u8] = b"postgresql";
|
||||
@@ -41,27 +41,27 @@ pub enum TlsServerEndPoint {
|
||||
|
||||
impl TlsServerEndPoint {
|
||||
pub fn new(cert: &CertificateDer<'_>) -> anyhow::Result<Self> {
|
||||
let sha256_oids = [
|
||||
const SHA256_OIDS: &[oid::ObjectIdentifier] = &[
|
||||
// I'm explicitly not adding MD5 or SHA1 here... They're bad.
|
||||
oid_registry::OID_SIG_ECDSA_WITH_SHA256,
|
||||
oid_registry::OID_PKCS1_SHA256WITHRSA,
|
||||
oid::db::rfc5912::ECDSA_WITH_SHA_256,
|
||||
oid::db::rfc5912::SHA_256_WITH_RSA_ENCRYPTION,
|
||||
];
|
||||
|
||||
let pem = x509_parser::parse_x509_certificate(cert)
|
||||
.context("Failed to parse PEM object from cerficiate")?
|
||||
.1;
|
||||
let certificate = SliceReader::new(cert)
|
||||
.context("Failed to parse cerficiate")?
|
||||
.decode::<x509_cert::Certificate>()
|
||||
.context("Failed to parse cerficiate")?;
|
||||
|
||||
info!(subject = %pem.subject, "parsing TLS certificate");
|
||||
let subject = certificate.tbs_certificate.subject;
|
||||
info!(%subject, "parsing TLS certificate");
|
||||
|
||||
let reg = oid_registry::OidRegistry::default().with_all_crypto();
|
||||
let oid = pem.signature_algorithm.oid();
|
||||
let alg = reg.get(oid);
|
||||
if sha256_oids.contains(oid) {
|
||||
let oid = certificate.signature_algorithm.oid;
|
||||
if SHA256_OIDS.contains(&oid) {
|
||||
let tls_server_end_point: [u8; 32] = Sha256::new().chain_update(cert).finalize().into();
|
||||
info!(subject = %pem.subject, signature_algorithm = alg.map(|a| a.description()), tls_server_end_point = %base64::encode(tls_server_end_point), "determined channel binding");
|
||||
info!(%subject, tls_server_end_point = %base64::encode(tls_server_end_point), "determined channel binding");
|
||||
Ok(Self::Sha256(tls_server_end_point))
|
||||
} else {
|
||||
error!(subject = %pem.subject, signature_algorithm = alg.map(|a| a.description()), "unknown channel binding");
|
||||
error!(%subject, "unknown channel binding");
|
||||
Ok(Self::Undefined)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ use anyhow::{Context, bail};
|
||||
use itertools::Itertools;
|
||||
use rustls::crypto::ring::{self, sign};
|
||||
use rustls::pki_types::{CertificateDer, PrivateKeyDer};
|
||||
use x509_cert::der::{Reader, SliceReader};
|
||||
|
||||
use super::{PG_ALPN_PROTOCOL, TlsServerEndPoint};
|
||||
|
||||
@@ -131,11 +132,13 @@ impl CertResolver {
|
||||
|
||||
let first_cert = &cert_chain[0];
|
||||
let tls_server_end_point = TlsServerEndPoint::new(first_cert)?;
|
||||
let pem = x509_parser::parse_x509_certificate(first_cert)
|
||||
.context("Failed to parse PEM object from cerficiate")?
|
||||
.1;
|
||||
|
||||
let common_name = pem.subject().to_string();
|
||||
let certificate = SliceReader::new(first_cert)
|
||||
.context("Failed to parse cerficiate")?
|
||||
.decode::<x509_cert::Certificate>()
|
||||
.context("Failed to parse cerficiate")?;
|
||||
|
||||
let common_name = certificate.tbs_certificate.subject.to_string();
|
||||
|
||||
// We need to get the canonical name for this certificate so we can match them against any domain names
|
||||
// seen within the proxy codebase.
|
||||
|
||||
@@ -81,13 +81,10 @@ impl Client {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn create_timeline(&self, req: &TimelineCreateRequest) -> Result<TimelineStatus> {
|
||||
let uri = format!(
|
||||
"{}/v1/tenant/{}/timeline/{}",
|
||||
self.mgmt_api_endpoint, req.tenant_id, req.timeline_id
|
||||
);
|
||||
pub async fn create_timeline(&self, req: &TimelineCreateRequest) -> Result<reqwest::Response> {
|
||||
let uri = format!("{}/v1/tenant/timeline", self.mgmt_api_endpoint);
|
||||
let resp = self.post(&uri, req).await?;
|
||||
resp.json().await.map_err(Error::ReceiveBody)
|
||||
Ok(resp)
|
||||
}
|
||||
|
||||
pub async fn pull_timeline(&self, req: &PullTimelineRequest) -> Result<PullTimelineResponse> {
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
use safekeeper_api::models::{
|
||||
self, PullTimelineRequest, PullTimelineResponse, SafekeeperUtilization, TimelineCreateRequest,
|
||||
TimelineStatus,
|
||||
};
|
||||
use safekeeper_client::mgmt_api::{Client, Result};
|
||||
use utils::id::{NodeId, TenantId, TimelineId};
|
||||
@@ -60,7 +59,7 @@ impl SafekeeperClient {
|
||||
pub(crate) async fn create_timeline(
|
||||
&self,
|
||||
req: &TimelineCreateRequest,
|
||||
) -> Result<TimelineStatus> {
|
||||
) -> Result<reqwest::Response> {
|
||||
measured_request!(
|
||||
"create_timeline",
|
||||
crate::metrics::Method::Post,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
pub mod chaos_injector;
|
||||
mod context_iterator;
|
||||
pub(crate) mod safekeeper_reconciler;
|
||||
mod safekeeper_service;
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::cmp::Ordering;
|
||||
@@ -27,16 +28,15 @@ use itertools::Itertools;
|
||||
use pageserver_api::controller_api::{
|
||||
AvailabilityZone, MetadataHealthRecord, MetadataHealthUpdateRequest, NodeAvailability,
|
||||
NodeRegisterRequest, NodeSchedulingPolicy, NodeShard, NodeShardResponse, PlacementPolicy,
|
||||
SafekeeperDescribeResponse, ShardSchedulingPolicy, ShardsPreferredAzsRequest,
|
||||
ShardsPreferredAzsResponse, SkSchedulingPolicy, TenantCreateRequest, TenantCreateResponse,
|
||||
TenantCreateResponseShard, TenantDescribeResponse, TenantDescribeResponseShard,
|
||||
TenantLocateResponse, TenantPolicyRequest, TenantShardMigrateRequest,
|
||||
TenantShardMigrateResponse,
|
||||
ShardSchedulingPolicy, ShardsPreferredAzsRequest, ShardsPreferredAzsResponse,
|
||||
TenantCreateRequest, TenantCreateResponse, TenantCreateResponseShard, TenantDescribeResponse,
|
||||
TenantDescribeResponseShard, TenantLocateResponse, TenantPolicyRequest,
|
||||
TenantShardMigrateRequest, TenantShardMigrateResponse,
|
||||
};
|
||||
use pageserver_api::models::{
|
||||
self, DetachBehavior, LocationConfig, LocationConfigListResponse, LocationConfigMode,
|
||||
PageserverUtilization, SafekeeperInfo, SafekeepersInfo, SecondaryProgress, ShardParameters,
|
||||
TenantConfig, TenantConfigPatchRequest, TenantConfigRequest, TenantLocationConfigRequest,
|
||||
PageserverUtilization, SecondaryProgress, ShardParameters, TenantConfig,
|
||||
TenantConfigPatchRequest, TenantConfigRequest, TenantLocationConfigRequest,
|
||||
TenantLocationConfigResponse, TenantShardLocation, TenantShardSplitRequest,
|
||||
TenantShardSplitResponse, TenantSorting, TenantTimeTravelRequest,
|
||||
TimelineArchivalConfigRequest, TimelineCreateRequest, TimelineCreateResponseStorcon,
|
||||
@@ -51,18 +51,15 @@ use pageserver_api::upcall_api::{
|
||||
};
|
||||
use pageserver_client::{BlockUnblock, mgmt_api};
|
||||
use reqwest::{Certificate, StatusCode};
|
||||
use safekeeper_api::membership::{MemberSet, SafekeeperId};
|
||||
use safekeeper_api::models::SafekeeperUtilization;
|
||||
use safekeeper_reconciler::{SafekeeperReconcilers, ScheduleRequest};
|
||||
use safekeeper_reconciler::SafekeeperReconcilers;
|
||||
use tokio::sync::TryAcquireError;
|
||||
use tokio::sync::mpsc::error::TrySendError;
|
||||
use tokio::task::JoinSet;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{Instrument, debug, error, info, info_span, instrument, warn};
|
||||
use utils::completion::Barrier;
|
||||
use utils::generation::Generation;
|
||||
use utils::id::{NodeId, TenantId, TimelineId};
|
||||
use utils::logging::SecretString;
|
||||
use utils::sync::gate::Gate;
|
||||
use utils::{failpoint_support, pausable_failpoint};
|
||||
|
||||
@@ -83,8 +80,8 @@ use crate::peer_client::GlobalObservedState;
|
||||
use crate::persistence::split_state::SplitState;
|
||||
use crate::persistence::{
|
||||
AbortShardSplitStatus, ControllerPersistence, DatabaseError, DatabaseResult,
|
||||
MetadataHealthPersistence, Persistence, SafekeeperTimelineOpKind, ShardGenerationState,
|
||||
TenantFilter, TenantShardPersistence, TimelinePendingOpPersistence, TimelinePersistence,
|
||||
MetadataHealthPersistence, Persistence, ShardGenerationState, TenantFilter,
|
||||
TenantShardPersistence,
|
||||
};
|
||||
use crate::reconciler::{
|
||||
ReconcileError, ReconcileUnits, ReconcilerConfig, ReconcilerConfigBuilder, ReconcilerPriority,
|
||||
@@ -864,11 +861,9 @@ impl Service {
|
||||
};
|
||||
|
||||
tracing::info!("Sending initial heartbeats...");
|
||||
// Put a small, but reasonable timeout to get the initial heartbeats of the safekeepers to avoid a storage controller downtime
|
||||
const SK_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
let (res_ps, res_sk) = tokio::join!(
|
||||
self.heartbeater_ps.heartbeat(Arc::new(nodes_to_heartbeat)),
|
||||
tokio::time::timeout(SK_TIMEOUT, self.heartbeater_sk.heartbeat(all_sks))
|
||||
self.heartbeater_sk.heartbeat(all_sks)
|
||||
);
|
||||
|
||||
let mut online_nodes = HashMap::new();
|
||||
@@ -887,7 +882,7 @@ impl Service {
|
||||
}
|
||||
|
||||
let mut online_sks = HashMap::new();
|
||||
if let Ok(Ok(deltas)) = res_sk {
|
||||
if let Ok(deltas) = res_sk {
|
||||
for (node_id, status) in deltas.0 {
|
||||
match status {
|
||||
SafekeeperState::Available {
|
||||
@@ -1123,10 +1118,9 @@ impl Service {
|
||||
locked.safekeepers.clone()
|
||||
};
|
||||
|
||||
const SK_TIMEOUT: Duration = Duration::from_secs(3);
|
||||
let (res_ps, res_sk) = tokio::join!(
|
||||
self.heartbeater_ps.heartbeat(nodes),
|
||||
tokio::time::timeout(SK_TIMEOUT, self.heartbeater_sk.heartbeat(safekeepers))
|
||||
self.heartbeater_sk.heartbeat(safekeepers)
|
||||
);
|
||||
|
||||
if let Ok(deltas) = res_ps {
|
||||
@@ -1230,7 +1224,7 @@ impl Service {
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Ok(Ok(deltas)) = res_sk {
|
||||
if let Ok(deltas) = res_sk {
|
||||
let mut locked = self.inner.write().unwrap();
|
||||
let mut safekeepers = (*locked.safekeepers).clone();
|
||||
for (id, state) in deltas.0 {
|
||||
@@ -3649,281 +3643,6 @@ impl Service {
|
||||
.await?
|
||||
}
|
||||
|
||||
/// Timeline creation on safekeepers
|
||||
///
|
||||
/// Returns `Ok(left)` if the timeline has been created on a quorum of safekeepers,
|
||||
/// where `left` contains the list of safekeepers that didn't have a successful response.
|
||||
/// Assumes tenant lock is held while calling this function.
|
||||
async fn tenant_timeline_create_safekeepers_quorum(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
pg_version: u32,
|
||||
timeline_persistence: &TimelinePersistence,
|
||||
) -> Result<Vec<NodeId>, ApiError> {
|
||||
// If quorum is reached, return if we are outside of a specified timeout
|
||||
let jwt = self
|
||||
.config
|
||||
.safekeeper_jwt_token
|
||||
.clone()
|
||||
.map(SecretString::from);
|
||||
let mut joinset = JoinSet::new();
|
||||
|
||||
let safekeepers = {
|
||||
let locked = self.inner.read().unwrap();
|
||||
locked.safekeepers.clone()
|
||||
};
|
||||
|
||||
let mut members = Vec::new();
|
||||
for sk_id in timeline_persistence.sk_set.iter() {
|
||||
let sk_id = NodeId(*sk_id as u64);
|
||||
let Some(safekeeper) = safekeepers.get(&sk_id) else {
|
||||
return Err(ApiError::InternalServerError(anyhow::anyhow!(
|
||||
"couldn't find entry for safekeeper with id {sk_id}"
|
||||
)))?;
|
||||
};
|
||||
members.push(SafekeeperId {
|
||||
id: sk_id,
|
||||
host: safekeeper.skp.host.clone(),
|
||||
pg_port: safekeeper.skp.port as u16,
|
||||
});
|
||||
}
|
||||
let mset = MemberSet::new(members).map_err(ApiError::InternalServerError)?;
|
||||
let mconf = safekeeper_api::membership::Configuration::new(mset);
|
||||
|
||||
let req = safekeeper_api::models::TimelineCreateRequest {
|
||||
commit_lsn: None,
|
||||
mconf,
|
||||
pg_version,
|
||||
start_lsn: timeline_persistence.start_lsn.0,
|
||||
system_id: None,
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
wal_seg_size: None,
|
||||
};
|
||||
const SK_CREATE_TIMELINE_RECONCILE_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
for sk in timeline_persistence.sk_set.iter() {
|
||||
let sk_id = NodeId(*sk as u64);
|
||||
let safekeepers = safekeepers.clone();
|
||||
let jwt = jwt.clone();
|
||||
let ssl_ca_cert = self.config.ssl_ca_cert.clone();
|
||||
let req = req.clone();
|
||||
joinset.spawn(async move {
|
||||
// Unwrap is fine as we already would have returned error above
|
||||
let sk_p = safekeepers.get(&sk_id).unwrap();
|
||||
let res = sk_p
|
||||
.with_client_retries(
|
||||
|client| {
|
||||
let req = req.clone();
|
||||
async move { client.create_timeline(&req).await }
|
||||
},
|
||||
&jwt,
|
||||
&ssl_ca_cert,
|
||||
3,
|
||||
3,
|
||||
SK_CREATE_TIMELINE_RECONCILE_TIMEOUT,
|
||||
&CancellationToken::new(),
|
||||
)
|
||||
.await;
|
||||
(sk_id, sk_p.skp.host.clone(), res)
|
||||
});
|
||||
}
|
||||
// After we have built the joinset, we now wait for the tasks to complete,
|
||||
// but with a specified timeout to make sure we return swiftly, either with
|
||||
// a failure or success.
|
||||
let reconcile_deadline = tokio::time::Instant::now() + SK_CREATE_TIMELINE_RECONCILE_TIMEOUT;
|
||||
|
||||
// Wait until all tasks finish or timeout is hit, whichever occurs
|
||||
// first.
|
||||
let mut reconcile_results = Vec::new();
|
||||
loop {
|
||||
if let Ok(res) = tokio::time::timeout_at(reconcile_deadline, joinset.join_next()).await
|
||||
{
|
||||
let Some(res) = res else { break };
|
||||
match res {
|
||||
Ok(res) => {
|
||||
tracing::info!(
|
||||
"response from safekeeper id:{} at {}: {:?}",
|
||||
res.0,
|
||||
res.1,
|
||||
res.2
|
||||
);
|
||||
reconcile_results.push(res);
|
||||
}
|
||||
Err(join_err) => {
|
||||
tracing::info!("join_err for task in joinset: {join_err}");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
tracing::info!(
|
||||
"timeout for creation call after {} responses",
|
||||
reconcile_results.len()
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Now check now if quorum was reached in reconcile_results.
|
||||
let total_result_count = reconcile_results.len();
|
||||
let remaining = reconcile_results
|
||||
.into_iter()
|
||||
.filter_map(|res| res.2.is_err().then_some(res.0))
|
||||
.collect::<Vec<_>>();
|
||||
tracing::info!(
|
||||
"Got {} non-successful responses from initial creation request of total {total_result_count} responses",
|
||||
remaining.len()
|
||||
);
|
||||
if remaining.len() >= 2 {
|
||||
// Failure
|
||||
return Err(ApiError::InternalServerError(anyhow::anyhow!(
|
||||
"not enough successful reconciliations to reach quorum, please retry: {} errored",
|
||||
remaining.len()
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(remaining)
|
||||
}
|
||||
|
||||
/// Create timeline in controller database and on safekeepers.
|
||||
/// `timeline_info` is result of timeline creation on pageserver.
|
||||
///
|
||||
/// All actions must be idempotent as the call is retried until success. It
|
||||
/// tries to create timeline in the db and on at least majority of
|
||||
/// safekeepers + queue creation for safekeepers which missed it in the db
|
||||
/// for infinite retries; after that, call returns Ok.
|
||||
///
|
||||
/// The idea is that once this is reached as long as we have alive majority
|
||||
/// of safekeepers it is expected to get eventually operational as storcon
|
||||
/// will be able to seed timeline on nodes which missed creation by making
|
||||
/// pull_timeline from peers. On the other hand we don't want to fail
|
||||
/// timeline creation if one safekeeper is down.
|
||||
async fn tenant_timeline_create_safekeepers(
|
||||
self: &Arc<Self>,
|
||||
tenant_id: TenantId,
|
||||
timeline_info: &TimelineInfo,
|
||||
create_mode: models::TimelineCreateRequestMode,
|
||||
) -> Result<SafekeepersInfo, ApiError> {
|
||||
let timeline_id = timeline_info.timeline_id;
|
||||
let pg_version = timeline_info.pg_version;
|
||||
// Initially start_lsn is determined by last_record_lsn in pageserver
|
||||
// response as it does initdb. However, later we persist it and in sk
|
||||
// creation calls replace with the value from the timeline row if it
|
||||
// previously existed as on retries in theory endpoint might have
|
||||
// already written some data and advanced last_record_lsn, while we want
|
||||
// safekeepers to have consistent start_lsn.
|
||||
let start_lsn = match create_mode {
|
||||
models::TimelineCreateRequestMode::Bootstrap { .. } => timeline_info.last_record_lsn,
|
||||
models::TimelineCreateRequestMode::Branch { .. } => timeline_info.last_record_lsn,
|
||||
models::TimelineCreateRequestMode::ImportPgdata { .. } => {
|
||||
return Err(ApiError::InternalServerError(anyhow::anyhow!(
|
||||
"import pgdata doesn't specify the start lsn, aborting creation on safekeepers"
|
||||
)))?;
|
||||
}
|
||||
};
|
||||
// Choose initial set of safekeepers respecting affinity
|
||||
let sks = self.safekeepers_for_new_timeline().await?;
|
||||
let sks_persistence = sks.iter().map(|sk| sk.id.0 as i64).collect::<Vec<_>>();
|
||||
// Add timeline to db
|
||||
let mut timeline_persist = TimelinePersistence {
|
||||
tenant_id: tenant_id.to_string(),
|
||||
timeline_id: timeline_id.to_string(),
|
||||
start_lsn: start_lsn.into(),
|
||||
generation: 0,
|
||||
sk_set: sks_persistence.clone(),
|
||||
new_sk_set: None,
|
||||
cplane_notified_generation: 0,
|
||||
deleted_at: None,
|
||||
};
|
||||
let inserted = self
|
||||
.persistence
|
||||
.insert_timeline(timeline_persist.clone())
|
||||
.await?;
|
||||
if !inserted {
|
||||
if let Some(existent_persist) = self
|
||||
.persistence
|
||||
.get_timeline(tenant_id, timeline_id)
|
||||
.await?
|
||||
{
|
||||
// Replace with what we have in the db, to get stuff like the generation right.
|
||||
// We do still repeat the http calls to the safekeepers. After all, we could have
|
||||
// crashed right after the wrote to the DB.
|
||||
timeline_persist = existent_persist;
|
||||
} else {
|
||||
return Err(ApiError::InternalServerError(anyhow::anyhow!(
|
||||
"insertion said timeline already in db, but looking it up, it was gone"
|
||||
)));
|
||||
}
|
||||
}
|
||||
// Create the timeline on a quorum of safekeepers
|
||||
let remaining = self
|
||||
.tenant_timeline_create_safekeepers_quorum(
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
pg_version,
|
||||
&timeline_persist,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// For the remaining safekeepers, take care of their reconciliation asynchronously
|
||||
for &remaining_id in remaining.iter() {
|
||||
let pending_op = TimelinePendingOpPersistence {
|
||||
tenant_id: tenant_id.to_string(),
|
||||
timeline_id: timeline_id.to_string(),
|
||||
generation: timeline_persist.generation,
|
||||
op_kind: crate::persistence::SafekeeperTimelineOpKind::Pull,
|
||||
sk_id: remaining_id.0 as i64,
|
||||
};
|
||||
tracing::info!("writing pending op for sk id {remaining_id}");
|
||||
self.persistence.insert_pending_op(pending_op).await?;
|
||||
}
|
||||
if !remaining.is_empty() {
|
||||
let mut locked = self.inner.write().unwrap();
|
||||
for remaining_id in remaining {
|
||||
let Some(sk) = locked.safekeepers.get(&remaining_id) else {
|
||||
return Err(ApiError::InternalServerError(anyhow::anyhow!(
|
||||
"Couldn't find safekeeper with id {remaining_id}"
|
||||
)));
|
||||
};
|
||||
let Ok(host_list) = sks
|
||||
.iter()
|
||||
.map(|sk| {
|
||||
Ok((
|
||||
sk.id,
|
||||
locked
|
||||
.safekeepers
|
||||
.get(&sk.id)
|
||||
.ok_or_else(|| {
|
||||
ApiError::InternalServerError(anyhow::anyhow!(
|
||||
"Couldn't find safekeeper with id {remaining_id} to pull from"
|
||||
))
|
||||
})?
|
||||
.base_url(),
|
||||
))
|
||||
})
|
||||
.collect::<Result<_, ApiError>>()
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
let req = ScheduleRequest {
|
||||
safekeeper: Box::new(sk.clone()),
|
||||
host_list,
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
generation: timeline_persist.generation as u32,
|
||||
kind: crate::persistence::SafekeeperTimelineOpKind::Pull,
|
||||
};
|
||||
locked.safekeeper_reconcilers.schedule_request(self, req);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(SafekeepersInfo {
|
||||
generation: timeline_persist.generation as u32,
|
||||
safekeepers: sks,
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) async fn tenant_timeline_create(
|
||||
self: &Arc<Self>,
|
||||
tenant_id: TenantId,
|
||||
@@ -4617,62 +4336,6 @@ impl Service {
|
||||
|
||||
status_code
|
||||
}
|
||||
/// Perform timeline deletion on safekeepers. Will return success: we persist the deletion into the reconciler.
|
||||
async fn tenant_timeline_delete_safekeepers(
|
||||
self: &Arc<Self>,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
) -> Result<(), ApiError> {
|
||||
let tl = self
|
||||
.persistence
|
||||
.get_timeline(tenant_id, timeline_id)
|
||||
.await?;
|
||||
let Some(tl) = tl else {
|
||||
tracing::info!(
|
||||
"timeline {tenant_id}/{timeline_id} doesn't exist in timelines table, no deletions on safekeepers needed"
|
||||
);
|
||||
return Ok(());
|
||||
};
|
||||
let all_sks = tl
|
||||
.new_sk_set
|
||||
.iter()
|
||||
.flat_map(|sks| {
|
||||
sks.iter()
|
||||
.map(|sk| (*sk, SafekeeperTimelineOpKind::Exclude))
|
||||
})
|
||||
.chain(
|
||||
tl.sk_set
|
||||
.iter()
|
||||
.map(|v| (*v, SafekeeperTimelineOpKind::Delete)),
|
||||
)
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
// Schedule reconciliations
|
||||
{
|
||||
let mut locked = self.inner.write().unwrap();
|
||||
for (sk_id, kind) in all_sks {
|
||||
let sk_id = NodeId(sk_id as u64);
|
||||
let Some(sk) = locked.safekeepers.get(&sk_id) else {
|
||||
return Err(ApiError::InternalServerError(anyhow::anyhow!(
|
||||
"Couldn't find safekeeper with id {sk_id}"
|
||||
)));
|
||||
};
|
||||
|
||||
let req = ScheduleRequest {
|
||||
safekeeper: Box::new(sk.clone()),
|
||||
// we don't use this for this kind, put a dummy value
|
||||
host_list: Vec::new(),
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
generation: tl.generation as u32,
|
||||
kind,
|
||||
};
|
||||
locked.safekeeper_reconcilers.schedule_request(self, req);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// When you know the TenantId but not a specific shard, and would like to get the node holding shard 0.
|
||||
pub(crate) async fn tenant_shard0_node(
|
||||
&self,
|
||||
@@ -8719,168 +8382,6 @@ impl Service {
|
||||
global_observed
|
||||
}
|
||||
|
||||
/// Choose safekeepers for the new timeline: 3 in different azs.
|
||||
pub(crate) async fn safekeepers_for_new_timeline(
|
||||
&self,
|
||||
) -> Result<Vec<SafekeeperInfo>, ApiError> {
|
||||
let mut all_safekeepers = {
|
||||
let locked = self.inner.read().unwrap();
|
||||
locked
|
||||
.safekeepers
|
||||
.iter()
|
||||
.filter_map(|sk| {
|
||||
if sk.1.scheduling_policy() != SkSchedulingPolicy::Active {
|
||||
// If we don't want to schedule stuff onto the safekeeper, respect that.
|
||||
return None;
|
||||
}
|
||||
let utilization_opt = if let SafekeeperState::Available {
|
||||
last_seen_at: _,
|
||||
utilization,
|
||||
} = sk.1.availability()
|
||||
{
|
||||
Some(utilization)
|
||||
} else {
|
||||
// non-available safekeepers still get a chance for new timelines,
|
||||
// but put them last in the list.
|
||||
None
|
||||
};
|
||||
let info = SafekeeperInfo {
|
||||
hostname: sk.1.skp.host.clone(),
|
||||
id: NodeId(sk.1.skp.id as u64),
|
||||
};
|
||||
Some((utilization_opt, info, sk.1.skp.availability_zone_id.clone()))
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
all_safekeepers.sort_by_key(|sk| {
|
||||
(
|
||||
sk.0.as_ref()
|
||||
.map(|ut| ut.timeline_count)
|
||||
.unwrap_or(u64::MAX),
|
||||
// Use the id to decide on equal scores for reliability
|
||||
sk.1.id.0,
|
||||
)
|
||||
});
|
||||
let mut sks = Vec::new();
|
||||
let mut azs = HashSet::new();
|
||||
for (_sk_util, sk_info, az_id) in all_safekeepers.iter() {
|
||||
if !azs.insert(az_id) {
|
||||
continue;
|
||||
}
|
||||
sks.push(sk_info.clone());
|
||||
if sks.len() == 3 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if sks.len() == 3 {
|
||||
Ok(sks)
|
||||
} else {
|
||||
Err(ApiError::InternalServerError(anyhow::anyhow!(
|
||||
"couldn't find three safekeepers in different AZs for new timeline"
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn safekeepers_list(
|
||||
&self,
|
||||
) -> Result<Vec<SafekeeperDescribeResponse>, DatabaseError> {
|
||||
let locked = self.inner.read().unwrap();
|
||||
let mut list = locked
|
||||
.safekeepers
|
||||
.iter()
|
||||
.map(|sk| sk.1.describe_response())
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
list.sort_by_key(|v| v.id);
|
||||
Ok(list)
|
||||
}
|
||||
|
||||
pub(crate) async fn get_safekeeper(
|
||||
&self,
|
||||
id: i64,
|
||||
) -> Result<SafekeeperDescribeResponse, DatabaseError> {
|
||||
let locked = self.inner.read().unwrap();
|
||||
let sk = locked
|
||||
.safekeepers
|
||||
.get(&NodeId(id as u64))
|
||||
.ok_or(diesel::result::Error::NotFound)?;
|
||||
sk.describe_response()
|
||||
}
|
||||
|
||||
pub(crate) async fn upsert_safekeeper(
|
||||
&self,
|
||||
record: crate::persistence::SafekeeperUpsert,
|
||||
) -> Result<(), ApiError> {
|
||||
let node_id = NodeId(record.id as u64);
|
||||
let use_https = self.config.use_https_safekeeper_api;
|
||||
|
||||
if use_https && record.https_port.is_none() {
|
||||
return Err(ApiError::PreconditionFailed(
|
||||
format!(
|
||||
"cannot upsert safekeeper {node_id}: \
|
||||
https is enabled, but https port is not specified"
|
||||
)
|
||||
.into(),
|
||||
));
|
||||
}
|
||||
|
||||
self.persistence.safekeeper_upsert(record.clone()).await?;
|
||||
{
|
||||
let mut locked = self.inner.write().unwrap();
|
||||
let mut safekeepers = (*locked.safekeepers).clone();
|
||||
match safekeepers.entry(node_id) {
|
||||
std::collections::hash_map::Entry::Occupied(mut entry) => entry
|
||||
.get_mut()
|
||||
.update_from_record(record)
|
||||
.expect("all preconditions should be checked before upsert to database"),
|
||||
std::collections::hash_map::Entry::Vacant(entry) => {
|
||||
entry.insert(
|
||||
Safekeeper::from_persistence(
|
||||
crate::persistence::SafekeeperPersistence::from_upsert(
|
||||
record,
|
||||
SkSchedulingPolicy::Pause,
|
||||
),
|
||||
CancellationToken::new(),
|
||||
use_https,
|
||||
)
|
||||
.expect("all preconditions should be checked before upsert to database"),
|
||||
);
|
||||
}
|
||||
}
|
||||
locked.safekeepers = Arc::new(safekeepers);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn set_safekeeper_scheduling_policy(
|
||||
&self,
|
||||
id: i64,
|
||||
scheduling_policy: SkSchedulingPolicy,
|
||||
) -> Result<(), DatabaseError> {
|
||||
self.persistence
|
||||
.set_safekeeper_scheduling_policy(id, scheduling_policy)
|
||||
.await?;
|
||||
let node_id = NodeId(id as u64);
|
||||
// After the change has been persisted successfully, update the in-memory state
|
||||
{
|
||||
let mut locked = self.inner.write().unwrap();
|
||||
let mut safekeepers = (*locked.safekeepers).clone();
|
||||
let sk = safekeepers
|
||||
.get_mut(&node_id)
|
||||
.ok_or(DatabaseError::Logical("Not found".to_string()))?;
|
||||
sk.set_scheduling_policy(scheduling_policy);
|
||||
|
||||
match scheduling_policy {
|
||||
SkSchedulingPolicy::Active => (),
|
||||
SkSchedulingPolicy::Decomissioned | SkSchedulingPolicy::Pause => {
|
||||
locked.safekeeper_reconcilers.cancel_safekeeper(node_id);
|
||||
}
|
||||
}
|
||||
|
||||
locked.safekeepers = Arc::new(safekeepers);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn update_shards_preferred_azs(
|
||||
&self,
|
||||
req: ShardsPreferredAzsRequest,
|
||||
|
||||
518
storage_controller/src/service/safekeeper_service.rs
Normal file
518
storage_controller/src/service/safekeeper_service.rs
Normal file
@@ -0,0 +1,518 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use super::safekeeper_reconciler::ScheduleRequest;
|
||||
use crate::heartbeater::SafekeeperState;
|
||||
use crate::persistence::{
|
||||
DatabaseError, SafekeeperTimelineOpKind, TimelinePendingOpPersistence, TimelinePersistence,
|
||||
};
|
||||
use crate::safekeeper::Safekeeper;
|
||||
use http_utils::error::ApiError;
|
||||
use pageserver_api::controller_api::{SafekeeperDescribeResponse, SkSchedulingPolicy};
|
||||
use pageserver_api::models::{self, SafekeeperInfo, SafekeepersInfo, TimelineInfo};
|
||||
use safekeeper_api::membership::{MemberSet, SafekeeperId};
|
||||
use tokio::task::JoinSet;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use utils::id::{NodeId, TenantId, TimelineId};
|
||||
use utils::logging::SecretString;
|
||||
|
||||
use super::Service;
|
||||
|
||||
impl Service {
|
||||
/// Timeline creation on safekeepers
|
||||
///
|
||||
/// Returns `Ok(left)` if the timeline has been created on a quorum of safekeepers,
|
||||
/// where `left` contains the list of safekeepers that didn't have a successful response.
|
||||
/// Assumes tenant lock is held while calling this function.
|
||||
pub(super) async fn tenant_timeline_create_safekeepers_quorum(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
pg_version: u32,
|
||||
timeline_persistence: &TimelinePersistence,
|
||||
) -> Result<Vec<NodeId>, ApiError> {
|
||||
// If quorum is reached, return if we are outside of a specified timeout
|
||||
let jwt = self
|
||||
.config
|
||||
.safekeeper_jwt_token
|
||||
.clone()
|
||||
.map(SecretString::from);
|
||||
let mut joinset = JoinSet::new();
|
||||
|
||||
let safekeepers = {
|
||||
let locked = self.inner.read().unwrap();
|
||||
locked.safekeepers.clone()
|
||||
};
|
||||
|
||||
let mut members = Vec::new();
|
||||
for sk_id in timeline_persistence.sk_set.iter() {
|
||||
let sk_id = NodeId(*sk_id as u64);
|
||||
let Some(safekeeper) = safekeepers.get(&sk_id) else {
|
||||
return Err(ApiError::InternalServerError(anyhow::anyhow!(
|
||||
"couldn't find entry for safekeeper with id {sk_id}"
|
||||
)))?;
|
||||
};
|
||||
members.push(SafekeeperId {
|
||||
id: sk_id,
|
||||
host: safekeeper.skp.host.clone(),
|
||||
pg_port: safekeeper.skp.port as u16,
|
||||
});
|
||||
}
|
||||
let mset = MemberSet::new(members).map_err(ApiError::InternalServerError)?;
|
||||
let mconf = safekeeper_api::membership::Configuration::new(mset);
|
||||
|
||||
let req = safekeeper_api::models::TimelineCreateRequest {
|
||||
commit_lsn: None,
|
||||
mconf,
|
||||
pg_version,
|
||||
start_lsn: timeline_persistence.start_lsn.0,
|
||||
system_id: None,
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
wal_seg_size: None,
|
||||
};
|
||||
const SK_CREATE_TIMELINE_RECONCILE_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
for sk in timeline_persistence.sk_set.iter() {
|
||||
let sk_id = NodeId(*sk as u64);
|
||||
let safekeepers = safekeepers.clone();
|
||||
let jwt = jwt.clone();
|
||||
let ssl_ca_cert = self.config.ssl_ca_cert.clone();
|
||||
let req = req.clone();
|
||||
joinset.spawn(async move {
|
||||
// Unwrap is fine as we already would have returned error above
|
||||
let sk_p = safekeepers.get(&sk_id).unwrap();
|
||||
let res = sk_p
|
||||
.with_client_retries(
|
||||
|client| {
|
||||
let req = req.clone();
|
||||
async move { client.create_timeline(&req).await }
|
||||
},
|
||||
&jwt,
|
||||
&ssl_ca_cert,
|
||||
3,
|
||||
3,
|
||||
SK_CREATE_TIMELINE_RECONCILE_TIMEOUT,
|
||||
&CancellationToken::new(),
|
||||
)
|
||||
.await;
|
||||
(sk_id, sk_p.skp.host.clone(), res)
|
||||
});
|
||||
}
|
||||
// After we have built the joinset, we now wait for the tasks to complete,
|
||||
// but with a specified timeout to make sure we return swiftly, either with
|
||||
// a failure or success.
|
||||
let reconcile_deadline = tokio::time::Instant::now() + SK_CREATE_TIMELINE_RECONCILE_TIMEOUT;
|
||||
|
||||
// Wait until all tasks finish or timeout is hit, whichever occurs
|
||||
// first.
|
||||
let mut reconcile_results = Vec::new();
|
||||
loop {
|
||||
if let Ok(res) = tokio::time::timeout_at(reconcile_deadline, joinset.join_next()).await
|
||||
{
|
||||
let Some(res) = res else { break };
|
||||
match res {
|
||||
Ok(res) => {
|
||||
tracing::info!(
|
||||
"response from safekeeper id:{} at {}: {:?}",
|
||||
res.0,
|
||||
res.1,
|
||||
res.2
|
||||
);
|
||||
reconcile_results.push(res);
|
||||
}
|
||||
Err(join_err) => {
|
||||
tracing::info!("join_err for task in joinset: {join_err}");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
tracing::info!(
|
||||
"timeout for creation call after {} responses",
|
||||
reconcile_results.len()
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Now check now if quorum was reached in reconcile_results.
|
||||
let total_result_count = reconcile_results.len();
|
||||
let remaining = reconcile_results
|
||||
.into_iter()
|
||||
.filter_map(|res| res.2.is_err().then_some(res.0))
|
||||
.collect::<Vec<_>>();
|
||||
tracing::info!(
|
||||
"Got {} non-successful responses from initial creation request of total {total_result_count} responses",
|
||||
remaining.len()
|
||||
);
|
||||
if remaining.len() >= 2 {
|
||||
// Failure
|
||||
return Err(ApiError::InternalServerError(anyhow::anyhow!(
|
||||
"not enough successful reconciliations to reach quorum, please retry: {} errored",
|
||||
remaining.len()
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(remaining)
|
||||
}
|
||||
|
||||
/// Create timeline in controller database and on safekeepers.
|
||||
/// `timeline_info` is result of timeline creation on pageserver.
|
||||
///
|
||||
/// All actions must be idempotent as the call is retried until success. It
|
||||
/// tries to create timeline in the db and on at least majority of
|
||||
/// safekeepers + queue creation for safekeepers which missed it in the db
|
||||
/// for infinite retries; after that, call returns Ok.
|
||||
///
|
||||
/// The idea is that once this is reached as long as we have alive majority
|
||||
/// of safekeepers it is expected to get eventually operational as storcon
|
||||
/// will be able to seed timeline on nodes which missed creation by making
|
||||
/// pull_timeline from peers. On the other hand we don't want to fail
|
||||
/// timeline creation if one safekeeper is down.
|
||||
pub(super) async fn tenant_timeline_create_safekeepers(
|
||||
self: &Arc<Self>,
|
||||
tenant_id: TenantId,
|
||||
timeline_info: &TimelineInfo,
|
||||
create_mode: models::TimelineCreateRequestMode,
|
||||
) -> Result<SafekeepersInfo, ApiError> {
|
||||
let timeline_id = timeline_info.timeline_id;
|
||||
let pg_version = timeline_info.pg_version * 10000;
|
||||
// Initially start_lsn is determined by last_record_lsn in pageserver
|
||||
// response as it does initdb. However, later we persist it and in sk
|
||||
// creation calls replace with the value from the timeline row if it
|
||||
// previously existed as on retries in theory endpoint might have
|
||||
// already written some data and advanced last_record_lsn, while we want
|
||||
// safekeepers to have consistent start_lsn.
|
||||
let start_lsn = match create_mode {
|
||||
models::TimelineCreateRequestMode::Bootstrap { .. } => timeline_info.last_record_lsn,
|
||||
models::TimelineCreateRequestMode::Branch { .. } => timeline_info.last_record_lsn,
|
||||
models::TimelineCreateRequestMode::ImportPgdata { .. } => {
|
||||
return Err(ApiError::InternalServerError(anyhow::anyhow!(
|
||||
"import pgdata doesn't specify the start lsn, aborting creation on safekeepers"
|
||||
)))?;
|
||||
}
|
||||
};
|
||||
// Choose initial set of safekeepers respecting affinity
|
||||
let sks = self.safekeepers_for_new_timeline().await?;
|
||||
let sks_persistence = sks.iter().map(|sk| sk.id.0 as i64).collect::<Vec<_>>();
|
||||
// Add timeline to db
|
||||
let mut timeline_persist = TimelinePersistence {
|
||||
tenant_id: tenant_id.to_string(),
|
||||
timeline_id: timeline_id.to_string(),
|
||||
start_lsn: start_lsn.into(),
|
||||
generation: 0,
|
||||
sk_set: sks_persistence.clone(),
|
||||
new_sk_set: None,
|
||||
cplane_notified_generation: 0,
|
||||
deleted_at: None,
|
||||
};
|
||||
let inserted = self
|
||||
.persistence
|
||||
.insert_timeline(timeline_persist.clone())
|
||||
.await?;
|
||||
if !inserted {
|
||||
if let Some(existent_persist) = self
|
||||
.persistence
|
||||
.get_timeline(tenant_id, timeline_id)
|
||||
.await?
|
||||
{
|
||||
// Replace with what we have in the db, to get stuff like the generation right.
|
||||
// We do still repeat the http calls to the safekeepers. After all, we could have
|
||||
// crashed right after the wrote to the DB.
|
||||
timeline_persist = existent_persist;
|
||||
} else {
|
||||
return Err(ApiError::InternalServerError(anyhow::anyhow!(
|
||||
"insertion said timeline already in db, but looking it up, it was gone"
|
||||
)));
|
||||
}
|
||||
}
|
||||
// Create the timeline on a quorum of safekeepers
|
||||
let remaining = self
|
||||
.tenant_timeline_create_safekeepers_quorum(
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
pg_version,
|
||||
&timeline_persist,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// For the remaining safekeepers, take care of their reconciliation asynchronously
|
||||
for &remaining_id in remaining.iter() {
|
||||
let pending_op = TimelinePendingOpPersistence {
|
||||
tenant_id: tenant_id.to_string(),
|
||||
timeline_id: timeline_id.to_string(),
|
||||
generation: timeline_persist.generation,
|
||||
op_kind: crate::persistence::SafekeeperTimelineOpKind::Pull,
|
||||
sk_id: remaining_id.0 as i64,
|
||||
};
|
||||
tracing::info!("writing pending op for sk id {remaining_id}");
|
||||
self.persistence.insert_pending_op(pending_op).await?;
|
||||
}
|
||||
if !remaining.is_empty() {
|
||||
let mut locked = self.inner.write().unwrap();
|
||||
for remaining_id in remaining {
|
||||
let Some(sk) = locked.safekeepers.get(&remaining_id) else {
|
||||
return Err(ApiError::InternalServerError(anyhow::anyhow!(
|
||||
"Couldn't find safekeeper with id {remaining_id}"
|
||||
)));
|
||||
};
|
||||
let Ok(host_list) = sks
|
||||
.iter()
|
||||
.map(|sk| {
|
||||
Ok((
|
||||
sk.id,
|
||||
locked
|
||||
.safekeepers
|
||||
.get(&sk.id)
|
||||
.ok_or_else(|| {
|
||||
ApiError::InternalServerError(anyhow::anyhow!(
|
||||
"Couldn't find safekeeper with id {remaining_id} to pull from"
|
||||
))
|
||||
})?
|
||||
.base_url(),
|
||||
))
|
||||
})
|
||||
.collect::<Result<_, ApiError>>()
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
let req = ScheduleRequest {
|
||||
safekeeper: Box::new(sk.clone()),
|
||||
host_list,
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
generation: timeline_persist.generation as u32,
|
||||
kind: crate::persistence::SafekeeperTimelineOpKind::Pull,
|
||||
};
|
||||
locked.safekeeper_reconcilers.schedule_request(self, req);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(SafekeepersInfo {
|
||||
generation: timeline_persist.generation as u32,
|
||||
safekeepers: sks,
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
})
|
||||
}
|
||||
/// Perform timeline deletion on safekeepers. Will return success: we persist the deletion into the reconciler.
|
||||
pub(super) async fn tenant_timeline_delete_safekeepers(
|
||||
self: &Arc<Self>,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
) -> Result<(), ApiError> {
|
||||
let tl = self
|
||||
.persistence
|
||||
.get_timeline(tenant_id, timeline_id)
|
||||
.await?;
|
||||
let Some(tl) = tl else {
|
||||
tracing::info!(
|
||||
"timeline {tenant_id}/{timeline_id} doesn't exist in timelines table, no deletions on safekeepers needed"
|
||||
);
|
||||
return Ok(());
|
||||
};
|
||||
let all_sks = tl
|
||||
.new_sk_set
|
||||
.iter()
|
||||
.flat_map(|sks| {
|
||||
sks.iter()
|
||||
.map(|sk| (*sk, SafekeeperTimelineOpKind::Exclude))
|
||||
})
|
||||
.chain(
|
||||
tl.sk_set
|
||||
.iter()
|
||||
.map(|v| (*v, SafekeeperTimelineOpKind::Delete)),
|
||||
)
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
// Schedule reconciliations
|
||||
{
|
||||
let mut locked = self.inner.write().unwrap();
|
||||
for (sk_id, kind) in all_sks {
|
||||
let sk_id = NodeId(sk_id as u64);
|
||||
let Some(sk) = locked.safekeepers.get(&sk_id) else {
|
||||
return Err(ApiError::InternalServerError(anyhow::anyhow!(
|
||||
"Couldn't find safekeeper with id {sk_id}"
|
||||
)));
|
||||
};
|
||||
|
||||
let req = ScheduleRequest {
|
||||
safekeeper: Box::new(sk.clone()),
|
||||
// we don't use this for this kind, put a dummy value
|
||||
host_list: Vec::new(),
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
generation: tl.generation as u32,
|
||||
kind,
|
||||
};
|
||||
locked.safekeeper_reconcilers.schedule_request(self, req);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Choose safekeepers for the new timeline: 3 in different azs.
|
||||
pub(crate) async fn safekeepers_for_new_timeline(
|
||||
&self,
|
||||
) -> Result<Vec<SafekeeperInfo>, ApiError> {
|
||||
// Number of safekeepers in different AZs we are looking for
|
||||
let wanted_count = 3;
|
||||
let mut all_safekeepers = {
|
||||
let locked = self.inner.read().unwrap();
|
||||
locked
|
||||
.safekeepers
|
||||
.iter()
|
||||
.filter_map(|sk| {
|
||||
if sk.1.scheduling_policy() != SkSchedulingPolicy::Active {
|
||||
// If we don't want to schedule stuff onto the safekeeper, respect that.
|
||||
return None;
|
||||
}
|
||||
let utilization_opt = if let SafekeeperState::Available {
|
||||
last_seen_at: _,
|
||||
utilization,
|
||||
} = sk.1.availability()
|
||||
{
|
||||
Some(utilization)
|
||||
} else {
|
||||
// non-available safekeepers still get a chance for new timelines,
|
||||
// but put them last in the list.
|
||||
None
|
||||
};
|
||||
let info = SafekeeperInfo {
|
||||
hostname: sk.1.skp.host.clone(),
|
||||
id: NodeId(sk.1.skp.id as u64),
|
||||
};
|
||||
Some((utilization_opt, info, sk.1.skp.availability_zone_id.clone()))
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
all_safekeepers.sort_by_key(|sk| {
|
||||
(
|
||||
sk.0.as_ref()
|
||||
.map(|ut| ut.timeline_count)
|
||||
.unwrap_or(u64::MAX),
|
||||
// Use the id to decide on equal scores for reliability
|
||||
sk.1.id.0,
|
||||
)
|
||||
});
|
||||
let mut sks = Vec::new();
|
||||
let mut azs = HashSet::new();
|
||||
for (_sk_util, sk_info, az_id) in all_safekeepers.iter() {
|
||||
if !azs.insert(az_id) {
|
||||
continue;
|
||||
}
|
||||
sks.push(sk_info.clone());
|
||||
if sks.len() == wanted_count {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if sks.len() == wanted_count {
|
||||
Ok(sks)
|
||||
} else {
|
||||
Err(ApiError::InternalServerError(anyhow::anyhow!(
|
||||
"couldn't find {wanted_count} safekeepers in different AZs for new timeline (found: {}, total active: {})",
|
||||
sks.len(),
|
||||
all_safekeepers.len(),
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn safekeepers_list(
|
||||
&self,
|
||||
) -> Result<Vec<SafekeeperDescribeResponse>, DatabaseError> {
|
||||
let locked = self.inner.read().unwrap();
|
||||
let mut list = locked
|
||||
.safekeepers
|
||||
.iter()
|
||||
.map(|sk| sk.1.describe_response())
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
list.sort_by_key(|v| v.id);
|
||||
Ok(list)
|
||||
}
|
||||
|
||||
pub(crate) async fn get_safekeeper(
|
||||
&self,
|
||||
id: i64,
|
||||
) -> Result<SafekeeperDescribeResponse, DatabaseError> {
|
||||
let locked = self.inner.read().unwrap();
|
||||
let sk = locked
|
||||
.safekeepers
|
||||
.get(&NodeId(id as u64))
|
||||
.ok_or(diesel::result::Error::NotFound)?;
|
||||
sk.describe_response()
|
||||
}
|
||||
|
||||
pub(crate) async fn upsert_safekeeper(
|
||||
&self,
|
||||
record: crate::persistence::SafekeeperUpsert,
|
||||
) -> Result<(), ApiError> {
|
||||
let node_id = NodeId(record.id as u64);
|
||||
let use_https = self.config.use_https_safekeeper_api;
|
||||
|
||||
if use_https && record.https_port.is_none() {
|
||||
return Err(ApiError::PreconditionFailed(
|
||||
format!(
|
||||
"cannot upsert safekeeper {node_id}: \
|
||||
https is enabled, but https port is not specified"
|
||||
)
|
||||
.into(),
|
||||
));
|
||||
}
|
||||
|
||||
self.persistence.safekeeper_upsert(record.clone()).await?;
|
||||
{
|
||||
let mut locked = self.inner.write().unwrap();
|
||||
let mut safekeepers = (*locked.safekeepers).clone();
|
||||
match safekeepers.entry(node_id) {
|
||||
std::collections::hash_map::Entry::Occupied(mut entry) => entry
|
||||
.get_mut()
|
||||
.update_from_record(record)
|
||||
.expect("all preconditions should be checked before upsert to database"),
|
||||
std::collections::hash_map::Entry::Vacant(entry) => {
|
||||
entry.insert(
|
||||
Safekeeper::from_persistence(
|
||||
crate::persistence::SafekeeperPersistence::from_upsert(
|
||||
record,
|
||||
SkSchedulingPolicy::Pause,
|
||||
),
|
||||
CancellationToken::new(),
|
||||
use_https,
|
||||
)
|
||||
.expect("all preconditions should be checked before upsert to database"),
|
||||
);
|
||||
}
|
||||
}
|
||||
locked.safekeepers = Arc::new(safekeepers);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn set_safekeeper_scheduling_policy(
|
||||
&self,
|
||||
id: i64,
|
||||
scheduling_policy: SkSchedulingPolicy,
|
||||
) -> Result<(), DatabaseError> {
|
||||
self.persistence
|
||||
.set_safekeeper_scheduling_policy(id, scheduling_policy)
|
||||
.await?;
|
||||
let node_id = NodeId(id as u64);
|
||||
// After the change has been persisted successfully, update the in-memory state
|
||||
{
|
||||
let mut locked = self.inner.write().unwrap();
|
||||
let mut safekeepers = (*locked.safekeepers).clone();
|
||||
let sk = safekeepers
|
||||
.get_mut(&node_id)
|
||||
.ok_or(DatabaseError::Logical("Not found".to_string()))?;
|
||||
sk.set_scheduling_policy(scheduling_policy);
|
||||
|
||||
match scheduling_policy {
|
||||
SkSchedulingPolicy::Active => (),
|
||||
SkSchedulingPolicy::Decomissioned | SkSchedulingPolicy::Pause => {
|
||||
locked.safekeeper_reconcilers.cancel_safekeeper(node_id);
|
||||
}
|
||||
}
|
||||
|
||||
locked.safekeepers = Arc::new(safekeepers);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -246,6 +246,859 @@ class NeonAPI:
|
||||
has_running = True
|
||||
time.sleep(0.5)
|
||||
|
||||
def get_operation(self, project_id: str, operation_id: str) -> dict[str, Any]:
|
||||
"""
|
||||
Retrieves a specific operation.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
operation_id: The operation ID
|
||||
|
||||
Returns:
|
||||
The API response containing the operation details
|
||||
"""
|
||||
resp = self.__request(
|
||||
"GET",
|
||||
f"/projects/{project_id}/operations/{operation_id}",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
},
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def list_projects(self) -> dict[str, Any]:
|
||||
"""
|
||||
Lists all projects.
|
||||
|
||||
Returns:
|
||||
The API response containing the list of projects
|
||||
"""
|
||||
resp = self.__request(
|
||||
"GET",
|
||||
"/projects",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
},
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def update_project(
|
||||
self,
|
||||
project_id: str,
|
||||
name: str | None = None,
|
||||
settings: dict[str, Any] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Updates a project.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
name: New name for the project (optional)
|
||||
settings: Project settings to update (optional)
|
||||
|
||||
Returns:
|
||||
The API response containing the updated project details
|
||||
"""
|
||||
data: dict[str, Any] = {
|
||||
"project": {},
|
||||
}
|
||||
|
||||
if name is not None:
|
||||
data["project"]["name"] = name
|
||||
|
||||
if settings is not None:
|
||||
data["project"]["settings"] = settings
|
||||
|
||||
resp = self.__request(
|
||||
"PATCH",
|
||||
f"/projects/{project_id}",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json=data,
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def get_project_consumption(
|
||||
self,
|
||||
project_id: str,
|
||||
from_time: str | None = None,
|
||||
to_time: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Retrieves project consumption data.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
from_time: Start time for consumption data (optional)
|
||||
to_time: End time for consumption data (optional)
|
||||
|
||||
Returns:
|
||||
The API response containing the project consumption data
|
||||
"""
|
||||
params = {}
|
||||
if from_time is not None:
|
||||
params["from"] = from_time
|
||||
if to_time is not None:
|
||||
params["to"] = to_time
|
||||
|
||||
resp = self.__request(
|
||||
"GET",
|
||||
f"/projects/{project_id}/consumption",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
},
|
||||
params=params,
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def get_project_vpc_endpoints(self, project_id: str) -> dict[str, Any]:
|
||||
"""
|
||||
Retrieves project VPC endpoints.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
|
||||
Returns:
|
||||
The API response containing the project VPC endpoints
|
||||
"""
|
||||
resp = self.__request(
|
||||
"GET",
|
||||
f"/projects/{project_id}/vpc_endpoints",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
},
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def create_branch(
|
||||
self,
|
||||
project_id: str,
|
||||
parent_id: str | None = None,
|
||||
name: str | None = None,
|
||||
endpoints: list[dict[str, Any]] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Creates a new branch.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
parent_id: The parent branch ID (optional)
|
||||
name: Name for the branch (optional)
|
||||
endpoints: List of endpoints to create with the branch (optional)
|
||||
|
||||
Returns:
|
||||
The API response containing the created branch details and operations
|
||||
"""
|
||||
data: dict[str, Any] = {
|
||||
"branch": {},
|
||||
}
|
||||
|
||||
if parent_id is not None:
|
||||
data["branch"]["parent_id"] = parent_id
|
||||
|
||||
if name is not None:
|
||||
data["branch"]["name"] = name
|
||||
|
||||
if endpoints is not None:
|
||||
data["endpoints"] = endpoints
|
||||
|
||||
resp = self.__request(
|
||||
"POST",
|
||||
f"/projects/{project_id}/branches",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json=data,
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def get_branch_details(self, project_id: str, branch_id: str) -> dict[str, Any]:
|
||||
"""
|
||||
Retrieves details of a specific branch.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
branch_id: The branch ID
|
||||
|
||||
Returns:
|
||||
The API response containing the branch details
|
||||
"""
|
||||
resp = self.__request(
|
||||
"GET",
|
||||
f"/projects/{project_id}/branches/{branch_id}",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
},
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def update_branch(
|
||||
self,
|
||||
project_id: str,
|
||||
branch_id: str,
|
||||
name: str | None = None,
|
||||
default: bool | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Updates the specified branch.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
branch_id: The branch ID
|
||||
name: New name for the branch (optional)
|
||||
default: Whether to set this branch as the default branch (optional)
|
||||
|
||||
Returns:
|
||||
The API response containing the updated branch details and operations
|
||||
"""
|
||||
data: dict[str, Any] = {
|
||||
"branch": {},
|
||||
}
|
||||
|
||||
if name is not None:
|
||||
data["branch"]["name"] = name
|
||||
|
||||
if default is not None:
|
||||
data["branch"]["default"] = default
|
||||
|
||||
resp = self.__request(
|
||||
"PATCH",
|
||||
f"/projects/{project_id}/branches/{branch_id}",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json=data,
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def delete_branch(self, project_id: str, branch_id: str) -> dict[str, Any]:
|
||||
"""
|
||||
Deletes a branch.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
branch_id: The branch ID
|
||||
|
||||
Returns:
|
||||
The API response containing the operation details
|
||||
"""
|
||||
resp = self.__request(
|
||||
"DELETE",
|
||||
f"/projects/{project_id}/branches/{branch_id}",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
},
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def reset_branch(
|
||||
self,
|
||||
project_id: str,
|
||||
branch_id: str,
|
||||
lsn: str | None = None,
|
||||
timestamp: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Resets a branch to a specified LSN or timestamp.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
branch_id: The branch ID
|
||||
lsn: The Log Sequence Number (LSN) to reset to (optional)
|
||||
timestamp: The timestamp to reset to (optional)
|
||||
|
||||
Returns:
|
||||
The API response containing the operation details
|
||||
"""
|
||||
params = {}
|
||||
if lsn is not None:
|
||||
params["lsn"] = lsn
|
||||
if timestamp is not None:
|
||||
params["timestamp"] = timestamp
|
||||
|
||||
resp = self.__request(
|
||||
"POST",
|
||||
f"/projects/{project_id}/branches/{branch_id}/reset",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
},
|
||||
params=params,
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def reset_branch_to_parent(self, project_id: str, branch_id: str) -> dict[str, Any]:
|
||||
"""
|
||||
Resets a branch to the current state of the parent branch.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
branch_id: The branch ID
|
||||
|
||||
Returns:
|
||||
The API response containing the operation details
|
||||
"""
|
||||
resp = self.__request(
|
||||
"POST",
|
||||
f"/projects/{project_id}/branches/{branch_id}/reset_to_parent",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
},
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def restore_branch(self, project_id: str, branch_id: str) -> dict[str, Any]:
|
||||
"""
|
||||
Restores a branch that was previously deleted.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
branch_id: The branch ID
|
||||
|
||||
Returns:
|
||||
The API response containing the operation details
|
||||
"""
|
||||
resp = self.__request(
|
||||
"POST",
|
||||
f"/projects/{project_id}/branches/{branch_id}/restore",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
},
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def get_branch_schema(
|
||||
self,
|
||||
project_id: str,
|
||||
branch_id: str,
|
||||
db_name: str,
|
||||
lsn: str | None = None,
|
||||
timestamp: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Retrieves the schema from the specified database.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
branch_id: The branch ID
|
||||
db_name: Name of the database for which the schema is retrieved
|
||||
lsn: The Log Sequence Number (LSN) for which the schema is retrieved (optional)
|
||||
timestamp: The point in time for which the schema is retrieved (optional)
|
||||
|
||||
Returns:
|
||||
The API response containing the database schema
|
||||
"""
|
||||
params = {
|
||||
"db_name": db_name,
|
||||
}
|
||||
|
||||
if lsn is not None:
|
||||
params["lsn"] = lsn
|
||||
|
||||
if timestamp is not None:
|
||||
params["timestamp"] = timestamp
|
||||
|
||||
resp = self.__request(
|
||||
"GET",
|
||||
f"/projects/{project_id}/branches/{branch_id}/schema",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
},
|
||||
params=params,
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def compare_branch_schema(
|
||||
self,
|
||||
project_id: str,
|
||||
branch_id: str,
|
||||
target_branch_id: str,
|
||||
db_name: str,
|
||||
target_db_name: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Compares schemas between two branches.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
branch_id: The source branch ID
|
||||
target_branch_id: The target branch ID to compare with
|
||||
db_name: Name of the database in the source branch
|
||||
target_db_name: Name of the database in the target branch (optional)
|
||||
|
||||
Returns:
|
||||
The API response containing the schema comparison
|
||||
"""
|
||||
params = {
|
||||
"target_branch_id": target_branch_id,
|
||||
"db_name": db_name,
|
||||
}
|
||||
|
||||
if target_db_name is not None:
|
||||
params["target_db_name"] = target_db_name
|
||||
|
||||
resp = self.__request(
|
||||
"GET",
|
||||
f"/projects/{project_id}/branches/{branch_id}/compare_schema",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
},
|
||||
params=params,
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def set_branch_as_default(self, project_id: str, branch_id: str) -> dict[str, Any]:
|
||||
"""
|
||||
Sets a branch as the default branch for the project.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
branch_id: The branch ID
|
||||
|
||||
Returns:
|
||||
The API response containing the operation details
|
||||
"""
|
||||
resp = self.__request(
|
||||
"POST",
|
||||
f"/projects/{project_id}/branches/{branch_id}/set_as_default",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
},
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def get_branch_endpoints(self, project_id: str, branch_id: str) -> dict[str, Any]:
|
||||
"""
|
||||
Retrieves endpoints associated with a branch.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
branch_id: The branch ID
|
||||
|
||||
Returns:
|
||||
The API response containing the branch endpoints
|
||||
"""
|
||||
resp = self.__request(
|
||||
"GET",
|
||||
f"/projects/{project_id}/branches/{branch_id}/endpoints",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
},
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def get_branch_databases(self, project_id: str, branch_id: str) -> dict[str, Any]:
|
||||
"""
|
||||
Retrieves databases associated with a branch.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
branch_id: The branch ID
|
||||
|
||||
Returns:
|
||||
The API response containing the branch databases
|
||||
"""
|
||||
resp = self.__request(
|
||||
"GET",
|
||||
f"/projects/{project_id}/branches/{branch_id}/databases",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
},
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def create_branch_database(
|
||||
self, project_id: str, branch_id: str, database_name: str, owner_name: str | None = None
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Creates a new database in a branch.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
branch_id: The branch ID
|
||||
database_name: Name of the database to create
|
||||
owner_name: Name of the role that will own the database (optional)
|
||||
|
||||
Returns:
|
||||
The API response containing the created database details
|
||||
"""
|
||||
data: dict[str, Any] = {
|
||||
"database": {
|
||||
"name": database_name,
|
||||
},
|
||||
}
|
||||
|
||||
if owner_name is not None:
|
||||
data["database"]["owner_name"] = owner_name
|
||||
|
||||
resp = self.__request(
|
||||
"POST",
|
||||
f"/projects/{project_id}/branches/{branch_id}/databases",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json=data,
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def get_branch_database(
|
||||
self, project_id: str, branch_id: str, database_name: str
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Retrieves details of a specific database in a branch.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
branch_id: The branch ID
|
||||
database_name: Name of the database to retrieve
|
||||
|
||||
Returns:
|
||||
The API response containing the database details
|
||||
"""
|
||||
resp = self.__request(
|
||||
"GET",
|
||||
f"/projects/{project_id}/branches/{branch_id}/databases/{database_name}",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
},
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def delete_branch_database(
|
||||
self, project_id: str, branch_id: str, database_name: str
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Deletes a database from a branch.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
branch_id: The branch ID
|
||||
database_name: Name of the database to delete
|
||||
|
||||
Returns:
|
||||
The API response containing the operation details
|
||||
"""
|
||||
resp = self.__request(
|
||||
"DELETE",
|
||||
f"/projects/{project_id}/branches/{branch_id}/databases/{database_name}",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
},
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def get_branch_roles(self, project_id: str, branch_id: str) -> dict[str, Any]:
|
||||
"""
|
||||
Retrieves roles associated with a branch.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
branch_id: The branch ID
|
||||
|
||||
Returns:
|
||||
The API response containing the branch roles
|
||||
"""
|
||||
resp = self.__request(
|
||||
"GET",
|
||||
f"/projects/{project_id}/branches/{branch_id}/roles",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
},
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def create_branch_role(self, project_id: str, branch_id: str, role_name: str) -> dict[str, Any]:
|
||||
"""
|
||||
Creates a new role in a branch.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
branch_id: The branch ID
|
||||
role_name: Name of the role to create
|
||||
|
||||
Returns:
|
||||
The API response containing the created role details
|
||||
"""
|
||||
data: dict[str, Any] = {
|
||||
"role": {
|
||||
"name": role_name,
|
||||
},
|
||||
}
|
||||
|
||||
resp = self.__request(
|
||||
"POST",
|
||||
f"/projects/{project_id}/branches/{branch_id}/roles",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json=data,
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def get_branch_role(self, project_id: str, branch_id: str, role_name: str) -> dict[str, Any]:
|
||||
"""
|
||||
Retrieves details of a specific role in a branch.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
branch_id: The branch ID
|
||||
role_name: Name of the role to retrieve
|
||||
|
||||
Returns:
|
||||
The API response containing the role details
|
||||
"""
|
||||
resp = self.__request(
|
||||
"GET",
|
||||
f"/projects/{project_id}/branches/{branch_id}/roles/{role_name}",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
},
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def delete_branch_role(self, project_id: str, branch_id: str, role_name: str) -> dict[str, Any]:
|
||||
"""
|
||||
Deletes a role from a branch.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
branch_id: The branch ID
|
||||
role_name: Name of the role to delete
|
||||
|
||||
Returns:
|
||||
The API response containing the operation details
|
||||
"""
|
||||
resp = self.__request(
|
||||
"DELETE",
|
||||
f"/projects/{project_id}/branches/{branch_id}/roles/{role_name}",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
},
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def get_branch_role_password(
|
||||
self, project_id: str, branch_id: str, role_name: str
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Retrieves the password for a specific role in a branch.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
branch_id: The branch ID
|
||||
role_name: Name of the role
|
||||
|
||||
Returns:
|
||||
The API response containing the role password
|
||||
"""
|
||||
resp = self.__request(
|
||||
"GET",
|
||||
f"/projects/{project_id}/branches/{branch_id}/roles/{role_name}/reveal_password",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
},
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def reset_branch_role_password(
|
||||
self, project_id: str, branch_id: str, role_name: str
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Resets the password for a specific role in a branch.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
branch_id: The branch ID
|
||||
role_name: Name of the role
|
||||
|
||||
Returns:
|
||||
The API response containing the new role password
|
||||
"""
|
||||
resp = self.__request(
|
||||
"POST",
|
||||
f"/projects/{project_id}/branches/{branch_id}/roles/{role_name}/reset_password",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
},
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def get_endpoint(self, project_id: str, endpoint_id: str) -> dict[str, Any]:
|
||||
"""
|
||||
Retrieves details of a specific endpoint.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
endpoint_id: The endpoint ID
|
||||
|
||||
Returns:
|
||||
The API response containing the endpoint details
|
||||
"""
|
||||
resp = self.__request(
|
||||
"GET",
|
||||
f"/projects/{project_id}/endpoints/{endpoint_id}",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
},
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def update_endpoint(
|
||||
self,
|
||||
project_id: str,
|
||||
endpoint_id: str,
|
||||
branch_id: str | None = None,
|
||||
settings: dict[str, Any] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Updates an endpoint.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
endpoint_id: The endpoint ID
|
||||
branch_id: The branch ID to connect the endpoint to (optional)
|
||||
settings: Endpoint settings to update (optional)
|
||||
|
||||
Returns:
|
||||
The API response containing the updated endpoint details
|
||||
"""
|
||||
data: dict[str, Any] = {
|
||||
"endpoint": {},
|
||||
}
|
||||
|
||||
if branch_id is not None:
|
||||
data["endpoint"]["branch_id"] = branch_id
|
||||
|
||||
if settings is not None:
|
||||
data["endpoint"]["settings"] = settings
|
||||
|
||||
resp = self.__request(
|
||||
"PATCH",
|
||||
f"/projects/{project_id}/endpoints/{endpoint_id}",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json=data,
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def delete_endpoint(self, project_id: str, endpoint_id: str) -> dict[str, Any]:
|
||||
"""
|
||||
Deletes an endpoint.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
endpoint_id: The endpoint ID
|
||||
|
||||
Returns:
|
||||
The API response containing the operation details
|
||||
"""
|
||||
resp = self.__request(
|
||||
"DELETE",
|
||||
f"/projects/{project_id}/endpoints/{endpoint_id}",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
},
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def get_endpoint_stats(
|
||||
self,
|
||||
project_id: str,
|
||||
endpoint_id: str,
|
||||
from_time: str | None = None,
|
||||
to_time: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Retrieves statistics for a specific endpoint.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
endpoint_id: The endpoint ID
|
||||
from_time: Start time for statistics data (optional)
|
||||
to_time: End time for statistics data (optional)
|
||||
|
||||
Returns:
|
||||
The API response containing the endpoint statistics
|
||||
"""
|
||||
data: dict[str, Any] = {}
|
||||
|
||||
if from_time is not None:
|
||||
data["from"] = from_time
|
||||
|
||||
if to_time is not None:
|
||||
data["to"] = to_time
|
||||
|
||||
resp = self.__request(
|
||||
"POST",
|
||||
f"/projects/{project_id}/endpoints/{endpoint_id}/stats",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json=data,
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def authenticate_passwordless_session(
|
||||
self, project_id: str, endpoint_id: str
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Authenticates a passwordless session for an endpoint.
|
||||
|
||||
Args:
|
||||
project_id: The Neon project ID
|
||||
endpoint_id: The endpoint ID
|
||||
|
||||
Returns:
|
||||
The API response containing the authentication details
|
||||
"""
|
||||
resp = self.__request(
|
||||
"POST",
|
||||
f"/projects/{project_id}/endpoints/{endpoint_id}/passwordless_auth",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
},
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
|
||||
@final
|
||||
class NeonApiEndpoint:
|
||||
|
||||
@@ -1321,6 +1321,28 @@ class NeonEnv:
|
||||
for f in futs:
|
||||
f.result()
|
||||
|
||||
# Last step: register safekeepers at the storage controller
|
||||
if (
|
||||
self.storage_controller_config is not None
|
||||
and self.storage_controller_config.get("timelines_onto_safekeepers") is True
|
||||
):
|
||||
for sk_id, sk in enumerate(self.safekeepers):
|
||||
body = {
|
||||
"id": sk_id,
|
||||
"created_at": "2023-10-25T09:11:25Z",
|
||||
"updated_at": "2024-08-28T11:32:43Z",
|
||||
"region_id": "aws-us-east-2",
|
||||
"host": "127.0.0.1",
|
||||
"port": sk.port.pg,
|
||||
"http_port": sk.port.http,
|
||||
"https_port": None,
|
||||
"version": 5957,
|
||||
"availability_zone_id": f"us-east-2b-{sk_id}",
|
||||
}
|
||||
|
||||
self.storage_controller.on_safekeeper_deploy(sk_id, body)
|
||||
self.storage_controller.safekeeper_scheduling_policy(sk_id, "Active")
|
||||
|
||||
def stop(self, immediate=False, ps_assert_metric_no_errors=False, fail_on_endpoint_errors=True):
|
||||
"""
|
||||
After this method returns, there should be no child processes running.
|
||||
|
||||
@@ -812,11 +812,13 @@ def test_timeline_ancestor_detach_errors(neon_env_builder: NeonEnvBuilder, shard
|
||||
|
||||
for ps in pageservers.values():
|
||||
ps.allowed_errors.extend(SHUTDOWN_ALLOWED_ERRORS)
|
||||
# We make /detach_ancestor requests that are intended to fail.
|
||||
# It's expected that storcon drops requests to other pageservers after
|
||||
# it gets the first error (https://github.com/neondatabase/neon/issues/11177)
|
||||
ps.allowed_errors.extend(
|
||||
[
|
||||
".* WARN .* path=/v1/tenant/.*/timeline/.*/detach_ancestor request_id=.*: request was dropped before completing",
|
||||
# rare error logging, which is hard to reproduce without instrumenting responding with random sleep
|
||||
'.* ERROR .* path=/v1/tenant/.*/timeline/.*/detach_ancestor request_id=.*: Cancelled request finished with an error: Conflict\\("no ancestors"\\)',
|
||||
".* ERROR .* path=/v1/tenant/.*/timeline/.*/detach_ancestor request_id=.*: Cancelled request finished with an error.*",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@@ -2039,6 +2039,29 @@ def test_explicit_timeline_creation(neon_env_builder: NeonEnvBuilder):
|
||||
ep.safe_psql("CREATE TABLE IF NOT EXISTS t(key int, value text)")
|
||||
|
||||
|
||||
def test_explicit_timeline_creation_storcon(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
Test that having neon.safekeepers starting with g#n: with non zero n enables
|
||||
generations, which as a side effect disables automatic timeline creation.
|
||||
Like test_explicit_timeline_creation, but asks the storcon to
|
||||
create membership conf & timeline.
|
||||
"""
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
neon_env_builder.storage_controller_config = {
|
||||
"timelines_onto_safekeepers": True,
|
||||
}
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
config_lines = [
|
||||
"neon.safekeeper_proto_version = 3",
|
||||
]
|
||||
ep = env.endpoints.create("main", config_lines=config_lines)
|
||||
|
||||
# endpoint should start.
|
||||
ep.start(safekeeper_generation=1, safekeepers=[1, 2, 3])
|
||||
ep.safe_psql("CREATE TABLE IF NOT EXISTS t(key int, value text)")
|
||||
|
||||
|
||||
# In this test we check for excessive START_REPLICATION and START_WAL_PUSH queries
|
||||
# when compute is active, but there are no writes to the timeline. In that case
|
||||
# pageserver should maintain a single connection to safekeeper and don't attempt
|
||||
|
||||
@@ -61,7 +61,7 @@ memchr = { version = "2" }
|
||||
nix = { version = "0.26" }
|
||||
nom = { version = "7" }
|
||||
num = { version = "0.4" }
|
||||
num-bigint = { version = "0.4" }
|
||||
num-bigint = { version = "0.4", default-features = false, features = ["std"] }
|
||||
num-complex = { version = "0.4", default-features = false, features = ["std"] }
|
||||
num-integer = { version = "0.1", features = ["i128"] }
|
||||
num-iter = { version = "0.1", default-features = false, features = ["i128", "std"] }
|
||||
@@ -115,7 +115,6 @@ anyhow = { version = "1", features = ["backtrace"] }
|
||||
bytes = { version = "1", features = ["serde"] }
|
||||
cc = { version = "1", default-features = false, features = ["parallel"] }
|
||||
chrono = { version = "0.4", default-features = false, features = ["clock", "serde", "wasmbind"] }
|
||||
displaydoc = { version = "0.2" }
|
||||
either = { version = "1" }
|
||||
getrandom = { version = "0.2", default-features = false, features = ["std"] }
|
||||
half = { version = "2", default-features = false, features = ["num-traits"] }
|
||||
@@ -128,7 +127,7 @@ log = { version = "0.4", default-features = false, features = ["std"] }
|
||||
memchr = { version = "2" }
|
||||
nom = { version = "7" }
|
||||
num = { version = "0.4" }
|
||||
num-bigint = { version = "0.4" }
|
||||
num-bigint = { version = "0.4", default-features = false, features = ["std"] }
|
||||
num-complex = { version = "0.4", default-features = false, features = ["std"] }
|
||||
num-integer = { version = "0.1", features = ["i128"] }
|
||||
num-iter = { version = "0.1", default-features = false, features = ["i128", "std"] }
|
||||
|
||||
Reference in New Issue
Block a user