Merge WITH CONFLICTS 2025-03-11 main commit '7c462b3417ecd3ae3907f3480f3b8a8c99fc6d7b' into yuchen/dire

ct-io-delta-image-layer-write Conflicts: pageserver/src/tenant/blob_io.rs
2026-01-15 17:32:56 +00:00 · 2025-04-09 19:39:12 +02:00
parent 537eb334f2 7c462b3417
commit f078d7e1a9
380 changed files with 13238 additions and 5308 deletions
--- a/.github/actionlint.yml
+++ b/.github/actionlint.yml
@@ -32,3 +32,6 @@ config-variables:
  - NEON_DEV_AWS_ACCOUNT_ID
  - NEON_PROD_AWS_ACCOUNT_ID
  - AWS_ECR_REGION
+  - BENCHMARK_LARGE_OLTP_PROJECTID
+  - SLACK_ON_CALL_DEVPROD_STREAM
+  - SLACK_RUST_CHANNEL_ID
--- a/.github/actions/neon-branch-create/action.yml
+++ b/.github/actions/neon-branch-create/action.yml
@@ -84,7 +84,13 @@ runs:
          --header "Authorization: Bearer ${API_KEY}"
          )

-        role_name=$(echo $roles | jq --raw-output '.roles[] | select(.protected == false) | .name')
+        role_name=$(echo "$roles" | jq --raw-output '
+          (.roles | map(select(.protected == false))) as $roles |
+          if any($roles[]; .name == "neondb_owner")
+          then "neondb_owner"
+          else $roles[0].name
+          end
+        ')
        echo "role_name=${role_name}" >> $GITHUB_OUTPUT
      env:
        API_HOST: ${{ inputs.api_host }}
@@ -107,13 +113,13 @@ runs:
            )

          if [ -z "${reset_password}" ]; then
-            sleep 1
+            sleep $i
            continue
          fi

          password=$(echo $reset_password | jq --raw-output '.role.password')
          if [ "${password}" == "null" ]; then
-            sleep 1
+            sleep $i # increasing backoff
            continue
          fi

--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -44,6 +44,11 @@ inputs:
    description: 'Postgres version to use for tests'
    required: false
    default: 'v16'
+  sanitizers:
+    description: 'enabled or disabled'
+    required: false
+    default: 'disabled'
+    type: string
  benchmark_durations:
    description: 'benchmark durations JSON'
    required: false
@@ -59,7 +64,7 @@ runs:
      if: inputs.build_type != 'remote'
      uses: ./.github/actions/download
      with:
-        name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact
+        name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}${{ inputs.sanitizers == 'enabled' && '-sanitized' || '' }}-artifact
        path: /tmp/neon
        aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}

@@ -112,6 +117,7 @@ runs:
        ALLOW_FORWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'forward compatibility breakage')
        RERUN_FAILED: ${{ inputs.rerun_failed }}
        PG_VERSION: ${{ inputs.pg_version }}
+        SANITIZERS: ${{ inputs.sanitizers }}
      shell: bash -euxo pipefail {0}
      run: |
        # PLATFORM will be embedded in the perf test report
--- a/.github/scripts/generate_image_maps.py
+++ b/.github/scripts/generate_image_maps.py
--- a/.github/scripts/previous-releases.jq
+++ b/.github/scripts/previous-releases.jq
@@ -0,0 +1,25 @@
+# Expects response from https://docs.github.com/en/rest/releases/releases?apiVersion=2022-11-28#list-releases as input,
+# with tag names `release` for storage, `release-compute` for compute and `release-proxy` for proxy releases.
+# Extract only the `tag_name` field from each release object
+[ .[].tag_name ]
+
+# Transform each tag name into a structured object using regex capture
+| reduce map(
+    capture("^(?<full>release(-(?<component>proxy|compute))?-(?<version>\\d+))$")
+    | {
+        component: (.component // "storage"),  # Default to "storage" if no component is specified
+        version: (.version | tonumber),        # Convert the version number to an integer
+        full: .full                            # Store the full tag name for final output
+      }
+  )[] as $entry  # Loop over the transformed list
+
+# Accumulate the latest (highest-numbered) version for each component
+({};
+ .[$entry.component] |= (if . == null or $entry.version > .version then $entry else . end))
+
+# Convert the resulting object into an array of formatted strings
+| to_entries
+| map("\(.key)=\(.value.full)")
+
+# Output each string separately
+| .[]
--- a/.github/scripts/push_with_image_map.py
+++ b/.github/scripts/push_with_image_map.py
--- a/.github/workflows/_build-and-test-locally.yml
+++ b/.github/workflows/_build-and-test-locally.yml
@@ -280,7 +280,7 @@ jobs:
      - name: Upload Neon artifact
        uses: ./.github/actions/upload
        with:
-          name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-artifact
+          name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}${{ inputs.sanitizers == 'enabled' && '-sanitized' || '' }}-artifact
          path: /tmp/neon
          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

@@ -347,6 +347,7 @@ jobs:
          real_s3_region: eu-central-1
          rerun_failed: true
          pg_version: ${{ matrix.pg_version }}
+          sanitizers: ${{ inputs.sanitizers }}
          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
          # `--session-timeout` is equal to (timeout-minutes - 10 minutes) * 60 seconds.
          # Attempt to stop tests gracefully to generate test reports
@@ -359,7 +360,6 @@ jobs:
          PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
          PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task
          USE_LFC: ${{ matrix.lfc_state == 'with-lfc' && 'true' || 'false' }}
-          SANITIZERS: ${{ inputs.sanitizers }}

      # Temporary disable this step until we figure out why it's so flaky
      # Ref https://github.com/neondatabase/neon/issues/4540
--- a/.github/workflows/_meta.yml
+++ b/.github/workflows/_meta.yml
@@ -0,0 +1,107 @@
+name: Generate run metadata
+on:
+  workflow_call:
+    inputs:
+      github-event-name:
+        type: string
+        required: true
+    outputs:
+      build-tag:
+        description: "Tag for the current workflow run"
+        value: ${{ jobs.tags.outputs.build-tag }}
+      previous-storage-release:
+        description: "Tag of the last storage release"
+        value: ${{ jobs.tags.outputs.storage }}
+      previous-proxy-release:
+        description: "Tag of the last proxy release"
+        value: ${{ jobs.tags.outputs.proxy }}
+      previous-compute-release:
+        description: "Tag of the last compute release"
+        value: ${{ jobs.tags.outputs.compute }}
+      run-kind:
+        description: "The kind of run we're currently in. Will be one of `push-main`, `storage-release`, `compute-release`, `proxy-release`, `storage-rc-pr`, `compute-rc-pr`,  `proxy-rc-pr`, `pr`, or `workflow-dispatch`"
+        value: ${{ jobs.tags.outputs.run-kind }}
+
+permissions: {}
+
+jobs:
+  tags:
+    runs-on: ubuntu-22.04
+    outputs:
+      build-tag: ${{ steps.build-tag.outputs.tag }}
+      compute: ${{ steps.previous-releases.outputs.compute }}
+      proxy: ${{ steps.previous-releases.outputs.proxy }}
+      storage: ${{ steps.previous-releases.outputs.storage }}
+      run-kind: ${{ steps.run-kind.outputs.run-kind }}
+    permissions:
+      contents: read
+    steps:
+      # Need `fetch-depth: 0` to count the number of commits in the branch
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Get run kind
+        id: run-kind
+        env:
+          RUN_KIND: >-
+            ${{
+              false
+              || (inputs.github-event-name == 'push'         && github.ref_name == 'main')            && 'push-main'
+              || (inputs.github-event-name == 'push'         && github.ref_name == 'release')         && 'storage-release'
+              || (inputs.github-event-name == 'push'         && github.ref_name == 'release-compute') && 'compute-release'
+              || (inputs.github-event-name == 'push'         && github.ref_name == 'release-proxy')   && 'proxy-release'
+              || (inputs.github-event-name == 'pull_request' && github.base_ref == 'release')         && 'storage-rc-pr'
+              || (inputs.github-event-name == 'pull_request' && github.base_ref == 'release-compute') && 'compute-rc-pr'
+              || (inputs.github-event-name == 'pull_request' && github.base_ref == 'release-proxy')   && 'proxy-rc-pr'
+              || (inputs.github-event-name == 'pull_request')                                         && 'pr'
+              || (inputs.github-event-name == 'workflow_dispatch')                                    && 'workflow-dispatch'
+              || 'unknown'
+            }}
+        run: |
+          echo "run-kind=$RUN_KIND" | tee -a $GITHUB_OUTPUT
+
+      - name: Get build tag
+        id: build-tag
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          CURRENT_BRANCH: ${{ github.head_ref || github.ref_name }}
+          CURRENT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
+          RUN_KIND: ${{ steps.run-kind.outputs.run-kind }}
+        run: |
+          case $RUN_KIND in
+          push-main)
+            echo "tag=$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
+            ;;
+          storage-release)
+            echo "tag=release-$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
+            ;;
+          proxy-release)
+            echo "tag=release-proxy-$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
+            ;;
+          compute-release)
+            echo "tag=release-compute-$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
+            ;;
+          pr|storage-rc-pr|compute-rc-pr|proxy-rc-pr)
+            BUILD_AND_TEST_RUN_ID=$(gh run list -b $CURRENT_BRANCH -c $CURRENT_SHA -w 'Build and Test' -L 1 --json databaseId --jq '.[].databaseId')
+            echo "tag=$BUILD_AND_TEST_RUN_ID" | tee -a $GITHUB_OUTPUT
+            ;;
+          workflow-dispatch)
+            echo "tag=$GITHUB_RUN_ID" | tee -a $GITHUB_OUTPUT
+            ;;
+          *)
+            echo "Unexpected RUN_KIND ('${RUN_KIND}'), failing to assign build-tag!"
+            exit 1
+          esac
+
+      - name: Get the previous release-tags
+        id: previous-releases
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          gh api --paginate \
+            -H "Accept: application/vnd.github+json" \
+            -H "X-GitHub-Api-Version: 2022-11-28" \
+            "/repos/${GITHUB_REPOSITORY}/releases" \
+          | jq -f .github/scripts/previous-releases.jq -r \
+          | tee -a "${GITHUB_OUTPUT}"
--- a/.github/workflows/_push-to-container-registry.yml
+++ b/.github/workflows/_push-to-container-registry.yml
@@ -51,7 +51,7 @@ jobs:
    steps:
      - uses: actions/checkout@v4
        with:
-          sparse-checkout: scripts/push_with_image_map.py
+          sparse-checkout: .github/scripts/push_with_image_map.py
          sparse-checkout-cone-mode: false

      - name: Print image-map
@@ -99,6 +99,6 @@ jobs:
          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}

      - name: Copy docker images to target registries
-        run: python scripts/push_with_image_map.py
+        run: python3 .github/scripts/push_with_image_map.py
        env:
          IMAGE_MAP: ${{ inputs.image-map }}
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -140,6 +140,9 @@ jobs:
          --ignore test_runner/performance/test_logical_replication.py
          --ignore test_runner/performance/test_physical_replication.py
          --ignore test_runner/performance/test_perf_ingest_using_pgcopydb.py
+          --ignore test_runner/performance/test_cumulative_statistics_persistence.py
+          --ignore test_runner/performance/test_perf_many_relations.py
+          --ignore test_runner/performance/test_perf_oltp_large_tenant.py
      env:
        BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -171,6 +174,61 @@ jobs:
      env:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

+  cumstats-test:
+    if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
+    permissions:
+      contents: write
+      statuses: write
+      id-token: write # aws-actions/configure-aws-credentials
+    env:
+      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
+      DEFAULT_PG_VERSION: 17
+      TEST_OUTPUT: /tmp/test_output
+      BUILD_TYPE: remote
+      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
+      PLATFORM: "neon-staging"
+
+    runs-on: [ self-hosted, us-east-2, x64 ]
+    container:
+      image: neondatabase/build-tools:pinned-bookworm
+      credentials:
+        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+      options: --init
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Configure AWS credentials
+      uses: aws-actions/configure-aws-credentials@v4
+      with:
+        aws-region: eu-central-1
+        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        role-duration-seconds: 18000 # 5 hours
+
+    - name: Download Neon artifact
+      uses: ./.github/actions/download
+      with:
+        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
+        path: /tmp/neon/
+        prefix: latest
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+    
+    - name: Verify that cumulative statistics are preserved
+      uses: ./.github/actions/run-python-test-set
+      with:
+        build_type: ${{ env.BUILD_TYPE }}
+        test_selection: performance/test_cumulative_statistics_persistence.py
+        run_in_parallel: false
+        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
+        extra_params: -m remote_cluster --timeout 3600
+        pg_version: ${{ env.DEFAULT_PG_VERSION }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+      env:
+        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
+        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
+        NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
+
  replication-tests:
    if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
    permissions:
@@ -398,6 +456,9 @@ jobs:
    runs-on: ${{ matrix.runner }}
    container:
      image: ${{ matrix.image }}
+      credentials:
+        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
      options: --init

    # Increase timeout to 8h, default timeout is 6h
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -65,38 +65,11 @@ jobs:
          token: ${{ secrets.GITHUB_TOKEN }}
          filters: .github/file-filters.yaml

-  tag:
+  meta:
    needs: [ check-permissions ]
-    runs-on: [ self-hosted, small ]
-    container: ${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/base:pinned
-    outputs:
-      build-tag: ${{steps.build-tag.outputs.tag}}
-
-    steps:
-      # Need `fetch-depth: 0` to count the number of commits in the branch
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Get build tag
-        run: |
-          echo run:$GITHUB_RUN_ID
-          echo ref:$GITHUB_REF_NAME
-          echo rev:$(git rev-list --count HEAD)
-          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
-            echo "tag=$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
-          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
-            echo "tag=release-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
-          elif [[ "$GITHUB_REF_NAME" == "release-proxy" ]]; then
-            echo "tag=release-proxy-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
-          elif [[ "$GITHUB_REF_NAME" == "release-compute" ]]; then
-            echo "tag=release-compute-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
-          else
-            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release', 'release-proxy', 'release-compute'"
-            echo "tag=$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
-          fi
-        shell: bash
-        id: build-tag
+    uses: ./.github/workflows/_meta.yml
+    with:
+      github-event-name: ${{ github.event_name }}

  build-build-tools-image:
    needs: [ check-permissions ]
@@ -199,7 +172,7 @@ jobs:
    secrets: inherit

  build-and-test-locally:
-    needs: [ tag, build-build-tools-image ]
+    needs: [ meta, build-build-tools-image ]
    strategy:
      fail-fast: false
      matrix:
@@ -213,7 +186,7 @@ jobs:
    with:
      arch: ${{ matrix.arch }}
      build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
-      build-tag: ${{ needs.tag.outputs.build-tag }}
+      build-tag: ${{ needs.meta.outputs.build-tag }}
      build-type: ${{ matrix.build-type }}
      # Run tests on all Postgres versions in release builds and only on the latest version in debug builds.
      # Run without LFC on v17 release and debug builds only. For all the other cases LFC is enabled.
@@ -497,13 +470,24 @@ jobs:
            })

  trigger-e2e-tests:
-    if: ${{ !github.event.pull_request.draft || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft') || github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute' }}
-    needs: [ check-permissions, push-neon-image-dev, push-compute-image-dev, tag ]
+    # Depends on jobs that can get skipped
+    if: >-
+      ${{
+        (
+          !github.event.pull_request.draft
+          || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft')
+          || contains(fromJSON('["push-main", "storage-release", "proxy-release", "compute-release"]'), needs.meta.outputs.run-kind)
+        ) && !failure() && !cancelled()
+      }}
+    needs: [ check-permissions, push-neon-image-dev, push-compute-image-dev, meta ]
    uses: ./.github/workflows/trigger-e2e-tests.yml
+    with:
+      github-event-name: ${{ github.event_name }}
    secrets: inherit

  neon-image-arch:
-    needs: [ check-permissions, build-build-tools-image, tag ]
+    needs: [ check-permissions, build-build-tools-image, meta ]
+    if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
    strategy:
      matrix:
        arch: [ x64, arm64 ]
@@ -539,7 +523,7 @@ jobs:
          build-args: |
            ADDITIONAL_RUSTFLAGS=${{ matrix.arch == 'arm64' && '-Ctarget-feature=+lse -Ctarget-cpu=neoverse-n1' || '' }}
            GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
-            BUILD_TAG=${{ needs.tag.outputs.build-tag }}
+            BUILD_TAG=${{ needs.meta.outputs.build-tag }}
            TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-bookworm
            DEBIAN_VERSION=bookworm
          provenance: false
@@ -549,10 +533,11 @@ jobs:
          cache-from: type=registry,ref=cache.neon.build/neon:cache-bookworm-${{ matrix.arch }}
          cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/neon:cache-{0}-{1},mode=max', 'bookworm', matrix.arch) || '' }}
          tags: |
-            neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-${{ matrix.arch }}
+            neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-${{ matrix.arch }}

  neon-image:
-    needs: [ neon-image-arch, tag ]
+    needs: [ neon-image-arch, meta ]
+    if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
    runs-on: ubuntu-22.04
    permissions:
      id-token: write # aws-actions/configure-aws-credentials
@@ -567,13 +552,14 @@ jobs:

      - name: Create multi-arch image
        run: |
-          docker buildx imagetools create -t neondatabase/neon:${{ needs.tag.outputs.build-tag }} \
-                                          -t neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm \
-                                             neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-x64 \
-                                             neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-arm64
+          docker buildx imagetools create -t neondatabase/neon:${{ needs.meta.outputs.build-tag }} \
+                                          -t neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm \
+                                             neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-x64 \
+                                             neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-arm64

  compute-node-image-arch:
-    needs: [ check-permissions, build-build-tools-image, tag ]
+    needs: [ check-permissions, build-build-tools-image, meta ]
+    if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
    permissions:
      id-token: write # aws-actions/configure-aws-credentials
      statuses: write
@@ -631,7 +617,7 @@ jobs:
          build-args: |
            GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
            PG_VERSION=${{ matrix.version.pg }}
-            BUILD_TAG=${{ needs.tag.outputs.build-tag }}
+            BUILD_TAG=${{ needs.meta.outputs.build-tag }}
            TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-${{ matrix.version.debian }}
            DEBIAN_VERSION=${{ matrix.version.debian }}
          provenance: false
@@ -641,7 +627,7 @@ jobs:
          cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
          cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-node-{0}:cache-{1}-{2},mode=max', matrix.version.pg, matrix.version.debian, matrix.arch) || '' }}
          tags: |
-            neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-${{ matrix.arch }}
+            neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-${{ matrix.arch }}

      - name: Build neon extensions test image
        if: matrix.version.pg >= 'v16'
@@ -651,7 +637,7 @@ jobs:
          build-args: |
            GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
            PG_VERSION=${{ matrix.version.pg }}
-            BUILD_TAG=${{ needs.tag.outputs.build-tag }}
+            BUILD_TAG=${{ needs.meta.outputs.build-tag }}
            TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-${{ matrix.version.debian }}
            DEBIAN_VERSION=${{ matrix.version.debian }}
          provenance: false
@@ -661,10 +647,11 @@ jobs:
          target: extension-tests
          cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
          tags: |
-            neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.tag.outputs.build-tag}}-${{ matrix.version.debian }}-${{ matrix.arch }}
+            neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.meta.outputs.build-tag}}-${{ matrix.version.debian }}-${{ matrix.arch }}

  compute-node-image:
-    needs: [ compute-node-image-arch, tag ]
+    needs: [ compute-node-image-arch, meta ]
+    if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
    permissions:
      id-token: write # aws-actions/configure-aws-credentials
      statuses: write
@@ -692,27 +679,28 @@ jobs:

      - name: Create multi-arch compute-node image
        run: |
-          docker buildx imagetools create -t neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \
-                                          -t neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }} \
-                                             neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
-                                             neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-arm64
+          docker buildx imagetools create -t neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \
+                                          -t neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }} \
+                                             neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
+                                             neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-arm64

      - name: Create multi-arch neon-test-extensions image
        if: matrix.version.pg >= 'v16'
        run: |
-          docker buildx imagetools create -t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \
-                                          -t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }} \
-                                             neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
-                                             neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-arm64
+          docker buildx imagetools create -t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \
+                                          -t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }} \
+                                             neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
+                                             neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-arm64

-  vm-compute-node-image:
-    needs: [ check-permissions, tag, compute-node-image ]
-    runs-on: [ self-hosted, large ]
+  vm-compute-node-image-arch:
+    needs: [ check-permissions, meta, compute-node-image ]
+    if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
+    runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
    strategy:
      fail-fast: false
      matrix:
+        arch: [ amd64, arm64 ]
        version:
-          # see the comment for `compute-node-image-arch` job
          - pg: v14
            debian: bullseye
          - pg: v15
@@ -722,14 +710,14 @@ jobs:
          - pg: v17
            debian: bookworm
    env:
-      VM_BUILDER_VERSION: v0.37.1
+      VM_BUILDER_VERSION: v0.42.2

    steps:
      - uses: actions/checkout@v4

      - name: Downloading vm-builder
        run: |
-          curl -fL https://github.com/neondatabase/autoscaling/releases/download/$VM_BUILDER_VERSION/vm-builder -o vm-builder
+          curl -fL https://github.com/neondatabase/autoscaling/releases/download/$VM_BUILDER_VERSION/vm-builder-${{ matrix.arch }} -o vm-builder
          chmod +x vm-builder

      - uses: neondatabase/dev-actions/set-docker-config-dir@6094485bf440001c94a94a3f9e221e81ff6b6193
@@ -742,22 +730,50 @@ jobs:
      # it won't have the proper authentication (written at v0.6.0)
      - name: Pulling compute-node image
        run: |
-          docker pull neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}
+          docker pull neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}

      - name: Build vm image
        run: |
          ./vm-builder \
            -size=2G \
            -spec=compute/vm-image-spec-${{ matrix.version.debian }}.yaml \
-            -src=neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \
-            -dst=neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}
+            -src=neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \
+            -dst=neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.arch }} \
+            -target-arch=linux/${{ matrix.arch }}

      - name: Pushing vm-compute-node image
        run: |
-          docker push neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}
+          docker push neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.arch }}
+
+  vm-compute-node-image:
+    needs: [ vm-compute-node-image-arch, meta ]
+    if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
+    runs-on: ubuntu-22.04
+    strategy:
+      matrix:
+        version:
+          # see the comment for `compute-node-image-arch` job
+          - pg: v14
+          - pg: v15
+          - pg: v16
+          - pg: v17
+    steps:
+      - uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+
+      - name: Create multi-arch compute-node image
+        run: |
+          docker buildx imagetools create -t neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \
+                                             neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-amd64 \
+                                             neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-arm64
+

  test-images:
-    needs: [ check-permissions, tag, neon-image, compute-node-image ]
+    needs: [ check-permissions, meta, neon-image, compute-node-image ]
+    # Depends on jobs that can get skipped
+    if: "!failure() && !cancelled()"
    strategy:
      fail-fast: false
      matrix:
@@ -775,17 +791,6 @@ jobs:
          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}

-      - name: Get the last compute release tag
-        id: get-last-compute-release-tag
-        env:
-          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
-        run: |
-          tag=$(gh api -q '[.[].tag_name | select(startswith("release-compute"))][0]'\
-            -H "Accept: application/vnd.github+json" \
-            -H "X-GitHub-Api-Version: 2022-11-28" \
-            "/repos/${{ github.repository }}/releases")
-          echo tag=${tag} >> ${GITHUB_OUTPUT}
-
      # `neondatabase/neon` contains multiple binaries, all of them use the same input for the version into the same version formatting library.
      # Pick pageserver as currently the only binary with extra "version" features printed in the string to verify.
      # Regular pageserver version string looks like
@@ -795,8 +800,9 @@ jobs:
      # Ensure that we don't have bad versions.
      - name: Verify image versions
        shell: bash # ensure no set -e for better error messages
+        if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
        run: |
-          pageserver_version=$(docker run --rm neondatabase/neon:${{ needs.tag.outputs.build-tag }} "/bin/sh" "-c" "/usr/local/bin/pageserver --version")
+          pageserver_version=$(docker run --rm neondatabase/neon:${{ needs.meta.outputs.build-tag }} "/bin/sh" "-c" "/usr/local/bin/pageserver --version")

          echo "Pageserver version string: $pageserver_version"

@@ -813,7 +819,24 @@ jobs:
      - name: Verify docker-compose example and test extensions
        timeout-minutes: 20
        env:
-          TAG: ${{needs.tag.outputs.build-tag}}
+          TAG: >-
+            ${{
+              contains(fromJSON('["compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind)
+              && needs.meta.outputs.previous-storage-release
+              || needs.meta.outputs.build-tag
+            }}
+          COMPUTE_TAG: >-
+            ${{
+              contains(fromJSON('["storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind)
+              && needs.meta.outputs.previous-compute-release
+              || needs.meta.outputs.build-tag
+            }}
+          TEST_EXTENSIONS_TAG: >-
+            ${{
+              contains(fromJSON('["storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind)
+              && 'latest'
+              || needs.meta.outputs.build-tag
+            }}
          TEST_VERSION_ONLY: ${{ matrix.pg_version }}
        run: ./docker-compose/docker_compose_test.sh

@@ -825,10 +848,17 @@ jobs:

      - name: Test extension upgrade
        timeout-minutes: 20
-        if: ${{ needs.tag.outputs.build-tag == github.run_id }}
+        if: ${{ contains(fromJSON('["pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
        env:
-          NEWTAG: ${{ needs.tag.outputs.build-tag }}
-          OLDTAG: ${{ steps.get-last-compute-release-tag.outputs.tag }}
+          TAG: >-
+            ${{
+              false
+              || needs.meta.outputs.run-kind == 'pr' && needs.meta.outputs.build-tag
+              || needs.meta.outputs.run-kind == 'compute-rc-pr' && needs.meta.outputs.previous-storage-release
+            }}
+          TEST_EXTENSIONS_TAG: ${{ needs.meta.outputs.previous-compute-release }}
+          NEW_COMPUTE_TAG: ${{ needs.meta.outputs.build-tag }}
+          OLD_COMPUTE_TAG: ${{ needs.meta.outputs.previous-compute-release }}
        run: ./docker-compose/test_extensions_upgrade.sh

      - name: Print logs and clean up
@@ -838,7 +868,7 @@ jobs:
          docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml down

  generate-image-maps:
-    needs: [ tag ]
+    needs: [ meta ]
    runs-on: ubuntu-22.04
    outputs:
      neon-dev: ${{ steps.generate.outputs.neon-dev }}
@@ -848,14 +878,14 @@ jobs:
    steps:
      - uses: actions/checkout@v4
        with:
-          sparse-checkout: scripts/generate_image_maps.py
+          sparse-checkout: .github/scripts/generate_image_maps.py
          sparse-checkout-cone-mode: false

      - name: Generate Image Maps
        id: generate
-        run: python scripts/generate_image_maps.py
+        run: python3 .github/scripts/generate_image_maps.py
        env:
-          BUILD_TAG: "${{ needs.tag.outputs.build-tag }}"
+          BUILD_TAG: "${{ needs.meta.outputs.build-tag }}"
          BRANCH: "${{ github.ref_name }}"
          DEV_ACR: "${{ vars.AZURE_DEV_REGISTRY_NAME }}"
          PROD_ACR: "${{ vars.AZURE_PROD_REGISTRY_NAME }}"
@@ -864,7 +894,8 @@ jobs:
          AWS_REGION: "${{ vars.AWS_ECR_REGION }}"

  push-neon-image-dev:
-    needs: [ generate-image-maps, neon-image ]
+    needs: [ meta, generate-image-maps, neon-image ]
+    if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
    uses: ./.github/workflows/_push-to-container-registry.yml
    permissions:
      id-token: write  # Required for aws/azure login
@@ -881,7 +912,8 @@ jobs:
    secrets: inherit

  push-compute-image-dev:
-    needs: [ generate-image-maps, vm-compute-node-image ]
+    needs: [ meta, generate-image-maps, vm-compute-node-image ]
+    if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
    uses: ./.github/workflows/_push-to-container-registry.yml
    permissions:
      id-token: write  # Required for aws/azure login
@@ -898,8 +930,9 @@ jobs:
    secrets: inherit

  push-neon-image-prod:
-    if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
-    needs: [ generate-image-maps, neon-image, test-images ]
+    needs: [ meta, generate-image-maps, neon-image, test-images ]
+    # Depends on jobs that can get skipped
+    if: ${{ !failure() && !cancelled() && contains(fromJSON('["storage-release", "proxy-release"]'), needs.meta.outputs.run-kind) }}
    uses: ./.github/workflows/_push-to-container-registry.yml
    permissions:
      id-token: write  # Required for aws/azure login
@@ -916,8 +949,9 @@ jobs:
    secrets: inherit

  push-compute-image-prod:
-    if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
-    needs: [ generate-image-maps, vm-compute-node-image, test-images ]
+    needs: [ meta, generate-image-maps, vm-compute-node-image, test-images ]
+    # Depends on jobs that can get skipped
+    if: ${{ !failure() && !cancelled() && needs.meta.outputs.run-kind == 'compute-release' }}
    uses: ./.github/workflows/_push-to-container-registry.yml
    permissions:
      id-token: write  # Required for aws/azure login
@@ -936,18 +970,19 @@ jobs:
  # This is a bit of a special case so we're not using a generated image map.
  add-latest-tag-to-neon-extensions-test-image:
    if: github.ref_name == 'main'
-    needs: [ tag, compute-node-image ]
+    needs: [ meta, compute-node-image ]
    uses: ./.github/workflows/_push-to-container-registry.yml
    with:
      image-map: |
        {
-          "docker.io/neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v16:latest"],
-          "docker.io/neondatabase/neon-test-extensions-v17:${{ needs.tag.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v17:latest"]
+          "docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v16:latest"],
+          "docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v17:latest"]
        }
    secrets: inherit

  trigger-custom-extensions-build-and-wait:
-    needs: [ check-permissions, tag ]
+    needs: [ check-permissions, meta ]
+    if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
    runs-on: ubuntu-22.04
    permissions:
      id-token: write # aws-actions/configure-aws-credentials
@@ -982,7 +1017,7 @@ jobs:
                \"ci_job_name\": \"build-and-upload-extensions\",
                \"commit_hash\": \"$COMMIT_SHA\",
                \"remote_repo\": \"${{ github.repository }}\",
-                \"compute_image_tag\": \"${{ needs.tag.outputs.build-tag }}\",
+                \"compute_image_tag\": \"${{ needs.meta.outputs.build-tag }}\",
                \"remote_branch_name\": \"${{ github.ref_name }}\"
              }
            }"
@@ -1026,9 +1061,9 @@ jobs:
          exit 1

  deploy:
-    needs: [ check-permissions, push-neon-image-prod, push-compute-image-prod, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait ]
-    # `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod`
-    if: (github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute') && !failure() && !cancelled()
+    needs: [ check-permissions, push-neon-image-dev, push-compute-image-dev, push-neon-image-prod, push-compute-image-prod, meta, build-and-test-locally, trigger-custom-extensions-build-and-wait ]
+    # `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-neon-image-prod` and `push-compute-image-prod`
+    if: ${{ contains(fromJSON('["push-main", "storage-release", "proxy-release", "compute-release"]'), needs.meta.outputs.run-kind) && !failure() && !cancelled() }}
    permissions:
      id-token: write # aws-actions/configure-aws-credentials
      statuses: write
@@ -1039,108 +1074,103 @@ jobs:
      - uses: actions/checkout@v4

      - name: Create git tag and GitHub release
-        if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
+        if: ${{ contains(fromJSON('["storage-release", "proxy-release", "compute-release"]'), needs.meta.outputs.run-kind) }}
        uses: actions/github-script@v7
+        env:
+          TAG: "${{ needs.meta.outputs.build-tag }}"
+          BRANCH: "${{ github.ref_name }}"
+          PREVIOUS_RELEASE: >-
+            ${{
+              false
+              || needs.meta.outputs.run-kind == 'storage-release' && needs.meta.outputs.previous-storage-release
+              || needs.meta.outputs.run-kind == 'proxy-release' && needs.meta.outputs.previous-proxy-release
+              || needs.meta.outputs.run-kind == 'compute-release' && needs.meta.outputs.previous-compute-release
+              || 'unknown'
+            }}
        with:
          retries: 5
          script: |
-            const tag = "${{ needs.tag.outputs.build-tag }}";
-            const branch = "${{ github.ref_name }}";
+            const { TAG, BRANCH, PREVIOUS_RELEASE } = process.env

            try {
              const existingRef = await github.rest.git.getRef({
                owner: context.repo.owner,
                repo: context.repo.repo,
-                ref: `tags/${tag}`,
+                ref: `tags/${TAG}`,
              });

              if (existingRef.data.object.sha !== context.sha) {
-                throw new Error(`Tag ${tag} already exists but points to a different commit (expected: ${context.sha}, actual: ${existingRef.data.object.sha}).`);
+                throw new Error(`Tag ${TAG} already exists but points to a different commit (expected: ${context.sha}, actual: ${existingRef.data.object.sha}).`);
              }

-              console.log(`Tag ${tag} already exists and points to ${context.sha} as expected.`);
+              console.log(`Tag ${TAG} already exists and points to ${context.sha} as expected.`);
            } catch (error) {
              if (error.status !== 404) {
                throw error;
              }

-              console.log(`Tag ${tag} does not exist. Creating it...`);
+              console.log(`Tag ${TAG} does not exist. Creating it...`);
              await github.rest.git.createRef({
                owner: context.repo.owner,
                repo: context.repo.repo,
-                ref: `refs/tags/${tag}`,
+                ref: `refs/tags/${TAG}`,
                sha: context.sha,
              });
-              console.log(`Tag ${tag} created successfully.`);
+              console.log(`Tag ${TAG} created successfully.`);
            }

            try {
              const existingRelease = await github.rest.repos.getReleaseByTag({
                owner: context.repo.owner,
                repo: context.repo.repo,
-                tag: tag,
+                tag: TAG,
              });

-              console.log(`Release for tag ${tag} already exists (ID: ${existingRelease.data.id}).`);
+              console.log(`Release for tag ${TAG} already exists (ID: ${existingRelease.data.id}).`);
            } catch (error) {
              if (error.status !== 404) {
                throw error;
              }

-              console.log(`Release for tag ${tag} does not exist. Creating it...`);
+              console.log(`Release for tag ${TAG} does not exist. Creating it...`);

              // Find the PR number using the commit SHA
              const pullRequests = await github.rest.pulls.list({
                owner: context.repo.owner,
                repo: context.repo.repo,
                state: 'closed',
-                base: branch,
+                base: BRANCH,
              });

              const pr = pullRequests.data.find(pr => pr.merge_commit_sha === context.sha);
              const prNumber = pr ? pr.number : null;

-              // Find the previous release on the branch
-              const releases = await github.rest.repos.listReleases({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                per_page: 100,
-              });
-
-              const branchReleases = releases.data
-                .filter((release) => {
-                  const regex = new RegExp(`^${branch}-\\d+$`);
-                  return regex.test(release.tag_name) && !release.draft && !release.prerelease;
-                })
-                .sort((a, b) => new Date(b.created_at) - new Date(a.created_at));
-
-              const previousTag = branchReleases.length > 0 ? branchReleases[0].tag_name : null;
-
              const releaseNotes = [
                prNumber
                  ? `Release PR https://github.com/${context.repo.owner}/${context.repo.repo}/pull/${prNumber}.`
                  : 'Release PR not found.',
-                previousTag
-                  ? `Diff with the previous release https://github.com/${context.repo.owner}/${context.repo.repo}/compare/${previousTag}...${tag}.`
-                  : `No previous release found on branch ${branch}.`,
+                `Diff with the previous release https://github.com/${context.repo.owner}/${context.repo.repo}/compare/${PREVIOUS_RELEASE}...${TAG}.`
              ].join('\n\n');

              await github.rest.repos.createRelease({
                owner: context.repo.owner,
                repo: context.repo.repo,
-                tag_name: tag,
+                tag_name: TAG,
                body: releaseNotes,
              });
-              console.log(`Release for tag ${tag} created successfully.`);
+              console.log(`Release for tag ${TAG} created successfully.`);
            }

      - name: Trigger deploy workflow
        env:
          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
+          RUN_KIND: ${{ needs.meta.outputs.run-kind }}
        run: |
-          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
-            gh workflow --repo neondatabase/infra run deploy-dev.yml --ref main -f branch=main -f dockerTag=${{needs.tag.outputs.build-tag}} -f deployPreprodRegion=false
-          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
+          case ${RUN_KIND} in
+          push-main)
+            gh workflow --repo neondatabase/infra run deploy-dev.yml --ref main -f branch=main -f dockerTag=${{needs.meta.outputs.build-tag}} -f deployPreprodRegion=false
+            ;;
+          storage-release)
            gh workflow --repo neondatabase/infra run deploy-dev.yml --ref main \
              -f deployPgSniRouter=false \
              -f deployProxy=false \
@@ -1148,7 +1178,7 @@ jobs:
              -f deployStorageBroker=true \
              -f deployStorageController=true \
              -f branch=main \
-              -f dockerTag=${{needs.tag.outputs.build-tag}} \
+              -f dockerTag=${{needs.meta.outputs.build-tag}} \
              -f deployPreprodRegion=true

            gh workflow --repo neondatabase/infra run deploy-prod.yml --ref main \
@@ -1156,8 +1186,9 @@ jobs:
              -f deployStorageBroker=true \
              -f deployStorageController=true \
              -f branch=main \
-              -f dockerTag=${{needs.tag.outputs.build-tag}}
-          elif [[ "$GITHUB_REF_NAME" == "release-proxy" ]]; then
+              -f dockerTag=${{needs.meta.outputs.build-tag}}
+            ;;
+          proxy-release)
            gh workflow --repo neondatabase/infra run deploy-dev.yml --ref main \
              -f deployPgSniRouter=true \
              -f deployProxy=true \
@@ -1165,7 +1196,7 @@ jobs:
              -f deployStorageBroker=false \
              -f deployStorageController=false \
              -f branch=main \
-              -f dockerTag=${{needs.tag.outputs.build-tag}} \
+              -f dockerTag=${{needs.meta.outputs.build-tag}} \
              -f deployPreprodRegion=true

            gh workflow --repo neondatabase/infra run deploy-proxy-prod.yml --ref main \
@@ -1175,13 +1206,16 @@ jobs:
              -f deployProxyScram=true \
              -f deployProxyAuthBroker=true \
              -f branch=main \
-              -f dockerTag=${{needs.tag.outputs.build-tag}}
-          elif [[ "$GITHUB_REF_NAME" == "release-compute" ]]; then
-            gh workflow --repo neondatabase/infra run deploy-compute-dev.yml --ref main -f dockerTag=${{needs.tag.outputs.build-tag}}
-          else
-            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main', 'release', 'release-proxy' or 'release-compute'"
+              -f dockerTag=${{needs.meta.outputs.build-tag}}
+            ;;
+          compute-release)
+            gh workflow --repo neondatabase/infra run deploy-compute-dev.yml --ref main -f dockerTag=${{needs.meta.outputs.build-tag}}
+            ;;
+          *)
+            echo "RUN_KIND (value '${RUN_KIND}') is not set to either 'push-main', 'storage-release', 'proxy-release' or 'compute-release'"
            exit 1
-          fi
+            ;;
+          esac

  notify-storage-release-deploy-failure:
    needs: [ deploy ]
@@ -1197,7 +1231,7 @@ jobs:
          payload: |
            channel: ${{ vars.SLACK_STORAGE_CHANNEL_ID }}
            text: |
-              🔴 @oncall-storage: deploy job on release branch had unexpected status "${{ needs.deploy.result }}" <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>.
+              🔴 <!subteam^S06CJ87UMNY|@oncall-storage>: deploy job on release branch had unexpected status "${{ needs.deploy.result }}" <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>.

  # The job runs on `release` branch and copies compatibility data and Neon artifact from the last *release PR* to the latest directory
  promote-compatibility-data:
@@ -1206,7 +1240,7 @@ jobs:
      id-token: write # aws-actions/configure-aws-credentials
      statuses: write
      contents: read
-    # `!failure() && !cancelled()` is required because the workflow transitively depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod`
+    # `!failure() && !cancelled()` is required because the workflow transitively depends on the job that can be skipped: `push-neon-image-prod` and `push-compute-image-prod`
    if: github.ref_name == 'release' && !failure() && !cancelled()

    runs-on: ubuntu-22.04
@@ -1296,7 +1330,8 @@ jobs:

  pin-build-tools-image:
    needs: [ build-build-tools-image, test-images, build-and-test-locally ]
-    if: github.ref_name == 'main'
+    # `!failure() && !cancelled()` is required because the job (transitively) depends on jobs that can be skipped
+    if: github.ref_name == 'main' && !failure() && !cancelled()
    uses: ./.github/workflows/pin-build-tools-image.yml
    with:
      from-tag: ${{ needs.build-build-tools-image.outputs.image-tag }}
@@ -1315,6 +1350,7 @@ jobs:
    # Format `needs` differently to make the list more readable.
    # Usually we do `needs: [...]`
    needs:
+      - meta
      - build-and-test-locally
      - check-codestyle-python
      - check-codestyle-rust
@@ -1338,7 +1374,7 @@ jobs:
          || needs.check-codestyle-python.result == 'skipped'
          || needs.check-codestyle-rust.result == 'skipped'
          || needs.files-changed.result == 'skipped'
-          || needs.push-compute-image-dev.result == 'skipped'
-          || needs.push-neon-image-dev.result == 'skipped'
+          || (needs.push-compute-image-dev.result == 'skipped' && contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind))
+          || (needs.push-neon-image-dev.result == 'skipped' && contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind))
          || needs.test-images.result == 'skipped'
-          || needs.trigger-custom-extensions-build-and-wait.result == 'skipped'
+          || (needs.trigger-custom-extensions-build-and-wait.result == 'skipped' && contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind))
--- a/.github/workflows/cargo-deny.yml
+++ b/.github/workflows/cargo-deny.yml
@@ -7,7 +7,7 @@ on:
        required: false
        type: string
  schedule:
-    - cron: '0 0 * * *'
+    - cron: '0 10 * * *'

 jobs:
  cargo-deny:
@@ -50,8 +50,9 @@ jobs:
          method: chat.postMessage
          token: ${{ secrets.SLACK_BOT_TOKEN }}
          payload: |
-            channel: ${{ vars.SLACK_CICD_CHANNEL_ID }}
+            channel: ${{ vars.SLACK_ON_CALL_DEVPROD_STREAM }}
            text: |
              Periodic cargo-deny on ${{ matrix.ref }}: ${{ job.status }}
              <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
-              Pinging @oncall-devprod.
+              Fixing the problem should be fairly straight forward from the logs. If not, <#${{ vars.SLACK_RUST_CHANNEL_ID }}> is there to help.
+              Pinging <!subteam^S0838JPSH32|@oncall-devprod>.
--- a/.github/workflows/cloud-regress.yml
+++ b/.github/workflows/cloud-regress.yml
@@ -38,6 +38,9 @@ jobs:
    runs-on: us-east-2
    container:
      image: neondatabase/build-tools:pinned-bookworm
+      credentials:
+        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
      options: --init

    steps:
--- a/.github/workflows/force-test-extensions-upgrade.yml
+++ b/.github/workflows/force-test-extensions-upgrade.yml
@@ -52,8 +52,9 @@ jobs:
      - name: Test extension upgrade
        timeout-minutes: 20
        env:
-          NEWTAG: latest
-          OLDTAG: ${{ steps.get-last-compute-release-tag.outputs.tag }}
+          NEW_COMPUTE_TAG: latest
+          OLD_COMPUTE_TAG: ${{ steps.get-last-compute-release-tag.outputs.tag }}
+          TEST_EXTENSIONS_TAG: ${{ steps.get-last-compute-release-tag.outputs.tag }}
          PG_VERSION: ${{ matrix.pg-version }}
          FORCE_ALL_UPGRADE_TESTS: true
        run: ./docker-compose/test_extensions_upgrade.sh
--- a/.github/workflows/large_oltp_benchmark.yml
+++ b/.github/workflows/large_oltp_benchmark.yml
@@ -0,0 +1,147 @@
+name: large oltp benchmark
+
+on:
+  # uncomment to run on push for debugging your PR
+  push:
+    branches: [ bodobolero/synthetic_oltp_workload ]
+
+  schedule:
+    # * is a special character in YAML so you have to quote this string
+    #          ┌───────────── minute (0 - 59)
+    #          │ ┌───────────── hour (0 - 23)
+    #          │ │  ┌───────────── day of the month (1 - 31)
+    #          │ │  │ ┌───────────── month (1 - 12 or JAN-DEC)
+    #          │ │  │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
+    - cron:   '0 15 * * *' # run once a day, timezone is utc, avoid conflict with other benchmarks
+  workflow_dispatch: # adds ability to run this manually
+
+defaults:
+  run:
+    shell: bash -euxo pipefail {0}
+
+concurrency:
+  # Allow only one workflow globally because we need dedicated resources which only exist once
+  group: large-oltp-bench-workflow
+  cancel-in-progress: true
+
+jobs:
+  oltp:
+    strategy:
+      fail-fast: false # allow other variants to continue even if one fails
+      matrix:
+        include:
+          - target: new_branch 
+            custom_scripts: insert_webhooks.sql@2 select_any_webhook_with_skew.sql@4 select_recent_webhook.sql@4 
+          - target: reuse_branch 
+            custom_scripts: insert_webhooks.sql@2 select_any_webhook_with_skew.sql@4 select_recent_webhook.sql@4 
+      max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results
+    permissions:
+      contents: write
+      statuses: write
+      id-token: write # aws-actions/configure-aws-credentials
+    env:
+      TEST_PG_BENCH_DURATIONS_MATRIX: "1h" # todo update to > 1 h 
+      TEST_PGBENCH_CUSTOM_SCRIPTS: ${{ matrix.custom_scripts }}
+      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
+      PG_VERSION: 16 # pre-determined by pre-determined project
+      TEST_OUTPUT: /tmp/test_output
+      BUILD_TYPE: remote
+      SAVE_PERF_REPORT: ${{ github.ref_name == 'main' }}
+      PLATFORM: ${{ matrix.target }}
+
+    runs-on: [ self-hosted, us-east-2, x64 ]
+    container:
+      image: neondatabase/build-tools:pinned-bookworm
+      credentials:
+        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+      options: --init
+
+    # Increase timeout to 8h, default timeout is 6h
+    timeout-minutes: 480
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Configure AWS credentials # necessary to download artefacts
+      uses: aws-actions/configure-aws-credentials@v4
+      with:
+        aws-region: eu-central-1
+        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role
+
+    - name: Download Neon artifact
+      uses: ./.github/actions/download
+      with:
+        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
+        path: /tmp/neon/
+        prefix: latest
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+
+    - name: Create Neon Branch for large tenant
+      if: ${{ matrix.target == 'new_branch' }}
+      id: create-neon-branch-oltp-target
+      uses: ./.github/actions/neon-branch-create
+      with:
+          project_id: ${{ vars.BENCHMARK_LARGE_OLTP_PROJECTID }}
+          api_key: ${{ secrets.NEON_STAGING_API_KEY }}
+
+    - name: Set up Connection String
+      id: set-up-connstr
+      run: |
+          case "${{ matrix.target }}" in
+              new_branch)
+              CONNSTR=${{ steps.create-neon-branch-oltp-target.outputs.dsn }}
+              ;;
+              reuse_branch)
+              CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }}
+              ;;
+              *)
+              echo >&2 "Unknown target=${{ matrix.target }}"
+              exit 1
+              ;;
+          esac
+
+          echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
+
+    - name: Benchmark pgbench with custom-scripts
+      uses: ./.github/actions/run-python-test-set
+      with:
+        build_type: ${{ env.BUILD_TYPE }}
+        test_selection: performance
+        run_in_parallel: false
+        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
+        extra_params: -m remote_cluster --timeout 21600 -k test_perf_oltp_large_tenant
+        pg_version: ${{ env.PG_VERSION }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+      env:
+        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
+        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
+        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
+
+    - name: Delete Neon Branch for large tenant
+      if: ${{ always() && matrix.target == 'new_branch' }}
+      uses: ./.github/actions/neon-branch-delete
+      with:
+        project_id: ${{ vars.BENCHMARK_LARGE_OLTP_PROJECTID }}
+        branch_id: ${{ steps.create-neon-branch-oltp-target.outputs.branch_id }}
+        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
+
+    - name: Create Allure report
+      id: create-allure-report
+      if: ${{ !cancelled() }}
+      uses: ./.github/actions/allure-report-generate
+      with:
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+  
+    - name: Post to a Slack channel
+      if: ${{ github.event.schedule && failure() }}
+      uses: slackapi/slack-github-action@v1
+      with:
+        channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
+        slack-message: |
+          Periodic large oltp perf testing: ${{ job.status }}
+          <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
+          <${{ steps.create-allure-report.outputs.report-url }}|Allure report>
+      env:
+        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
--- a/.github/workflows/neon_extra_builds.yml
+++ b/.github/workflows/neon_extra_builds.yml
@@ -71,7 +71,7 @@ jobs:
    uses: ./.github/workflows/build-macos.yml
    with:
      pg_versions: ${{ needs.files-changed.outputs.postgres_changes }}
-      rebuild_rust_code: ${{ needs.files-changed.outputs.rebuild_rust_code }}
+      rebuild_rust_code: ${{ fromJson(needs.files-changed.outputs.rebuild_rust_code) }}
      rebuild_everything: ${{ fromJson(needs.files-changed.outputs.rebuild_everything) }}

  gather-rust-build-stats:
--- a/.github/workflows/periodic_pagebench.yml
+++ b/.github/workflows/periodic_pagebench.yml
@@ -3,12 +3,12 @@ name: Periodic pagebench performance test on dedicated EC2 machine in eu-central
 on:
  schedule:
    # * is a special character in YAML so you have to quote this string
-    #          ┌───────────── minute (0 - 59)
-    #          │ ┌───────────── hour (0 - 23)
-    #          │ │ ┌───────────── day of the month (1 - 31)
-    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
-    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
-    - cron:  '0 18 * * *' # Runs at 6 PM UTC every day
+    #        ┌───────────── minute (0 - 59)
+    #        │   ┌───────────── hour (0 - 23)
+    #        │   │ ┌───────────── day of the month (1 - 31)
+    #        │   │ │ ┌───────────── month (1 - 12 or JAN-DEC)
+    #        │   │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
+    - cron: '0 */3 * * *' # Runs every 3 hours
  workflow_dispatch: # Allows manual triggering of the workflow
    inputs:
      commit_hash:
@@ -78,8 +78,10 @@ jobs:
      run: |
        if [ -z "$INPUT_COMMIT_HASH" ]; then
          echo "COMMIT_HASH=$(curl -s https://api.github.com/repos/neondatabase/neon/commits/main | jq -r '.sha')" >> $GITHUB_ENV
+          echo "COMMIT_HASH_TYPE=latest" >> $GITHUB_ENV
        else
          echo "COMMIT_HASH=$INPUT_COMMIT_HASH" >> $GITHUB_ENV
+          echo "COMMIT_HASH_TYPE=manual" >> $GITHUB_ENV
        fi

    - name: Start Bench with run_id
@@ -89,7 +91,7 @@ jobs:
        -H 'accept: application/json' \
        -H 'Content-Type: application/json' \
        -H "Authorization: Bearer $API_KEY" \
-        -d "{\"neonRepoCommitHash\": \"${COMMIT_HASH}\"}"
+        -d "{\"neonRepoCommitHash\": \"${COMMIT_HASH}\", \"neonRepoCommitHashType\": \"${COMMIT_HASH_TYPE}\"}"

    - name: Poll Test Status
      id: poll_step
--- a/.github/workflows/trigger-e2e-tests.yml
+++ b/.github/workflows/trigger-e2e-tests.yml
@@ -5,6 +5,10 @@ on:
    types:
      - ready_for_review
  workflow_call:
+    inputs:
+      github-event-name:
+        type: string
+        required: true

 defaults:
  run:
@@ -19,7 +23,7 @@ jobs:
    if: ${{ !contains(github.event.pull_request.labels.*.name, 'run-no-ci') }}
    uses: ./.github/workflows/check-permissions.yml
    with:
-      github-event-name: ${{ github.event_name }}
+      github-event-name: ${{ inputs.github-event-name || github.event_name }}

  cancel-previous-e2e-tests:
    needs: [ check-permissions ]
@@ -35,46 +39,29 @@ jobs:
            run cancel-previous-in-concurrency-group.yml \
              --field concurrency_group="${{ env.E2E_CONCURRENCY_GROUP }}"

-  tag:
-    needs: [ check-permissions ]
-    runs-on: ubuntu-22.04
-    outputs:
-      build-tag: ${{ steps.build-tag.outputs.tag }}
-
-    steps:
-      # Need `fetch-depth: 0` to count the number of commits in the branch
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Get build tag
-        env:
-          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
-          CURRENT_BRANCH: ${{ github.head_ref || github.ref_name }}
-          CURRENT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
-        run: |
-          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
-            echo "tag=$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
-          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
-            echo "tag=release-$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
-          elif [[ "$GITHUB_REF_NAME" == "release-proxy" ]]; then
-            echo "tag=release-proxy-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
-          elif [[ "$GITHUB_REF_NAME" == "release-compute" ]]; then
-            echo "tag=release-compute-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
-          else
-            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
-            BUILD_AND_TEST_RUN_ID=$(gh run list -b $CURRENT_BRANCH -c $CURRENT_SHA -w 'Build and Test' -L 1 --json databaseId --jq '.[].databaseId')
-            echo "tag=$BUILD_AND_TEST_RUN_ID" | tee -a $GITHUB_OUTPUT
-          fi
-        id: build-tag
+  meta:
+    uses: ./.github/workflows/_meta.yml
+    with:
+      github-event-name: ${{ inputs.github-event-name || github.event_name }}

  trigger-e2e-tests:
-    needs: [ tag ]
+    needs: [ meta ]
    runs-on: ubuntu-22.04
    env:
      EVENT_ACTION: ${{ github.event.action }}
      GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
-      TAG: ${{ needs.tag.outputs.build-tag }}
+      TAG: >-
+        ${{
+          contains(fromJSON('["compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind)
+          && needs.meta.outputs.previous-storage-release
+          || needs.meta.outputs.build-tag
+        }}
+      COMPUTE_TAG: >-
+        ${{
+          contains(fromJSON('["storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind)
+          && needs.meta.outputs.previous-compute-release
+          || needs.meta.outputs.build-tag
+        }}
    steps:
      - name: Wait for `push-{neon,compute}-image-dev` job to finish
        # It's important to have a timeout here, the script in the step can run infinitely
@@ -157,6 +144,6 @@ jobs:
              --raw-field "commit_hash=$COMMIT_SHA" \
              --raw-field "remote_repo=${GITHUB_REPOSITORY}" \
              --raw-field "storage_image_tag=${TAG}" \
-              --raw-field "compute_image_tag=${TAG}" \
+              --raw-field "compute_image_tag=${COMPUTE_TAG}" \
              --raw-field "concurrency_group=${E2E_CONCURRENCY_GROUP}" \
              --raw-field "e2e-platforms=${E2E_PLATFORMS}"
--- a/4
+++ b/4
@@ -1,8 +1,8 @@
 # Autoscaling
 /libs/vm_monitor/ @neondatabase/autoscaling

-# DevProd
-/.github/ @neondatabase/developer-productivity
+# DevProd & PerfCorr
+/.github/ @neondatabase/developer-productivity @neondatabase/performance-correctness

 # Compute
 /pgxn/ @neondatabase/compute
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -783,6 +783,28 @@ dependencies = [
 "tracing",
 ]

+[[package]]
+name = "axum-extra"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "460fc6f625a1f7705c6cf62d0d070794e94668988b1c38111baeec177c715f7b"
+dependencies = [
+ "axum",
+ "axum-core",
+ "bytes",
+ "futures-util",
+ "headers",
+ "http 1.1.0",
+ "http-body 1.0.0",
+ "http-body-util",
+ "mime",
+ "pin-project-lite",
+ "serde",
+ "tower 0.5.2",
+ "tower-layer",
+ "tower-service",
+]
+
 [[package]]
 name = "azure_core"
 version = "0.21.0"
@@ -925,9 +947,9 @@ checksum = "0ea22880d78093b0cbe17c89f64a7d457941e65759157ec6cb31a31d652b05e5"

 [[package]]
 name = "base64"
-version = "0.21.1"
+version = "0.21.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3f1e31e207a6b8fb791a38ea3105e6cb541f55e4d029902d3039a4ad07cc4105"
+checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"

 [[package]]
 name = "base64"
@@ -984,9 +1006,9 @@ dependencies = [

 [[package]]
 name = "bindgen"
-version = "0.70.1"
+version = "0.71.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f"
+checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3"
 dependencies = [
 "bitflags 2.8.0",
 "cexpr",
@@ -997,7 +1019,7 @@ dependencies = [
 "proc-macro2",
 "quote",
 "regex",
- "rustc-hash",
+ "rustc-hash 2.1.1",
 "shlex",
 "syn 2.0.90",
 ]
@@ -1105,9 +1127,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"

 [[package]]
 name = "cc"
-version = "1.1.30"
+version = "1.2.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b16803a61b81d9eabb7eae2588776c4c1e584b738ede45fdbb4c972cec1e9945"
+checksum = "be714c154be609ec7f5dad223a33bf1482fff90472de28f7362806e6d4832b8c"
 dependencies = [
 "jobserver",
 "libc",
@@ -1305,6 +1327,7 @@ dependencies = [
 "aws-sdk-s3",
 "aws-smithy-types",
 "axum",
+ "axum-extra",
 "base64 0.13.1",
 "bytes",
 "camino",
@@ -1316,6 +1339,7 @@ dependencies = [
 "flate2",
 "futures",
 "http 1.1.0",
+ "jsonwebtoken",
 "metrics",
 "nix 0.27.1",
 "notify",
@@ -1342,7 +1366,9 @@ dependencies = [
 "tokio-util",
 "tower 0.5.2",
 "tower-http",
+ "tower-otel",
 "tracing",
+ "tracing-opentelemetry",
 "tracing-subscriber",
 "tracing-utils",
 "url",
@@ -2295,7 +2321,7 @@ name = "framed-websockets"
 version = "0.1.0"
 source = "git+https://github.com/neondatabase/framed-websockets#34eff3d6f8cfccbc5f35e4f65314ff7328621127"
 dependencies = [
- "base64 0.21.1",
+ "base64 0.21.7",
 "bytemuck",
 "bytes",
 "futures-core",
@@ -2408,9 +2434,9 @@ checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"

 [[package]]
 name = "futures-timer"
-version = "3.0.2"
+version = "3.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c"
+checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24"

 [[package]]
 name = "futures-util"
@@ -2513,6 +2539,27 @@ version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"

+[[package]]
+name = "governor"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "842dc78579ce01e6a1576ad896edc92fca002dd60c9c3746b7fc2bec6fb429d0"
+dependencies = [
+ "cfg-if",
+ "dashmap 6.1.0",
+ "futures-sink",
+ "futures-timer",
+ "futures-util",
+ "no-std-compat",
+ "nonzero_ext",
+ "parking_lot 0.12.1",
+ "portable-atomic",
+ "quanta",
+ "rand 0.8.5",
+ "smallvec",
+ "spinning_top",
+]
+
 [[package]]
 name = "group"
 version = "0.12.1"
@@ -2630,7 +2677,7 @@ version = "7.5.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "765c9198f173dd59ce26ff9f95ef0aafd0a0fe01fb9d72841bc5066a4c06511d"
 dependencies = [
- "base64 0.21.1",
+ "base64 0.21.7",
 "byteorder",
 "crossbeam-channel",
 "flate2",
@@ -2638,6 +2685,30 @@ dependencies = [
 "num-traits",
 ]

+[[package]]
+name = "headers"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "322106e6bd0cba2d5ead589ddb8150a13d7c4217cf80d7c4f682ca994ccc6aa9"
+dependencies = [
+ "base64 0.21.7",
+ "bytes",
+ "headers-core",
+ "http 1.1.0",
+ "httpdate",
+ "mime",
+ "sha1",
+]
+
+[[package]]
+name = "headers-core"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "54b4a22553d4242c49fddb9ba998a99962b5cc6f22cb5a3482bec22522403ce4"
+dependencies = [
+ "http 1.1.0",
+]
+
 [[package]]
 name = "heck"
 version = "0.5.0"
@@ -2775,12 +2846,10 @@ name = "http-utils"
 version = "0.1.0"
 dependencies = [
 "anyhow",
- "backtrace",
 "bytes",
 "fail",
- "flate2",
+ "futures",
 "hyper 0.14.30",
- "inferno 0.12.0",
 "itertools 0.10.5",
 "jemalloc_pprof",
 "metrics",
@@ -2793,6 +2862,7 @@ dependencies = [
 "serde_path_to_error",
 "thiserror 1.0.69",
 "tokio",
+ "tokio-rustls 0.26.0",
 "tokio-stream",
 "tokio-util",
 "tracing",
@@ -3279,9 +3349,9 @@ checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"

 [[package]]
 name = "jemalloc_pprof"
-version = "0.6.0"
+version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1a883828bd6a4b957cd9f618886ff19e5f3ebd34e06ba0e855849e049fef32fb"
+checksum = "5622af6d21ff86ed7797ef98e11b8f302da25ec69a7db9f6cde8e2e1c8df9992"
 dependencies = [
 "anyhow",
 "libc",
@@ -3365,7 +3435,7 @@ version = "9.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5c7ea04a7c5c055c175f189b6dc6ba036fd62306b58c66c9f6389036c503a3f4"
 dependencies = [
- "base64 0.21.1",
+ "base64 0.21.7",
 "js-sys",
 "pem",
 "ring",
@@ -3480,9 +3550,9 @@ dependencies = [

 [[package]]
 name = "mappings"
-version = "0.6.0"
+version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ce9229c438fbf1c333926e2053c4c091feabbd40a1b590ec62710fea2384af9e"
+checksum = "e434981a332777c2b3062652d16a55f8e74fa78e6b1882633f0d77399c84fc2a"
 dependencies = [
 "anyhow",
 "libc",
@@ -3535,7 +3605,7 @@ dependencies = [
 "measured-derive",
 "memchr",
 "parking_lot 0.12.1",
- "rustc-hash",
+ "rustc-hash 1.1.0",
 "ryu",
 ]

@@ -3723,6 +3793,12 @@ dependencies = [
 "memoffset 0.9.0",
 ]

+[[package]]
+name = "no-std-compat"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b93853da6d84c2e3c7d730d6473e8817692dd89be387eb01b94d7f108ecb5b8c"
+
 [[package]]
 name = "nom"
 version = "7.1.3"
@@ -3733,6 +3809,12 @@ dependencies = [
 "minimal-lexical",
 ]

+[[package]]
+name = "nonzero_ext"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21"
+
 [[package]]
 name = "notify"
 version = "8.0.0"
@@ -3982,7 +4064,7 @@ dependencies = [
 "opentelemetry-http",
 "opentelemetry-proto",
 "opentelemetry_sdk",
- "prost",
+ "prost 0.13.3",
 "reqwest",
 "thiserror 1.0.69",
 ]
@@ -3995,7 +4077,7 @@ checksum = "a6e05acbfada5ec79023c85368af14abd0b307c015e9064d249b2a950ef459a6"
 dependencies = [
 "opentelemetry",
 "opentelemetry_sdk",
- "prost",
+ "prost 0.13.3",
 "tonic",
 ]

@@ -4109,6 +4191,7 @@ dependencies = [
 "pageserver_api",
 "pageserver_client",
 "rand 0.8.5",
+ "reqwest",
 "serde",
 "serde_json",
 "tokio",
@@ -4198,6 +4281,9 @@ dependencies = [
 "remote_storage",
 "reqwest",
 "rpds",
+ "rustls 0.23.18",
+ "rustls-pemfile 2.1.1",
+ "rustls-pki-types",
 "scopeguard",
 "send-future",
 "serde",
@@ -4216,6 +4302,7 @@ dependencies = [
 "tokio-epoll-uring",
 "tokio-io-timeout",
 "tokio-postgres",
+ "tokio-rustls 0.26.0",
 "tokio-stream",
 "tokio-tar",
 "tokio-util",
@@ -4223,6 +4310,7 @@ dependencies = [
 "tracing",
 "url",
 "utils",
+ "uuid",
 "wal_decoder",
 "walkdir",
 "workspace_hack",
@@ -4305,9 +4393,9 @@ dependencies = [

 [[package]]
 name = "papaya"
-version = "0.1.8"
+version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc7c76487f7eaa00a0fc1d7f88dc6b295aec478d11b0fc79f857b62c2874124c"
+checksum = "aab21828b6b5952fdadd6c377728ffae53ec3a21b2febc47319ab65741f7e2fd"
 dependencies = [
 "equivalent",
 "seize",
@@ -4435,7 +4523,7 @@ version = "3.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1b8fcc794035347fb64beda2d3b462595dd2753e3f268d89c5aae77e8cf2c310"
 dependencies = [
- "base64 0.21.1",
+ "base64 0.21.7",
 "serde",
 ]

@@ -4484,18 +4572,18 @@ dependencies = [

 [[package]]
 name = "pin-project"
-version = "1.1.0"
+version = "1.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c95a7476719eab1e366eaf73d0260af3021184f18177925b07f54b30089ceead"
+checksum = "dfe2e71e1471fe07709406bf725f710b02927c9c54b2b5b2ec0e8087d97c327d"
 dependencies = [
 "pin-project-internal",
 ]

 [[package]]
 name = "pin-project-internal"
-version = "1.1.0"
+version = "1.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "39407670928234ebc5e6e580247dd567ad73a3578460c5990f9503df207e8f07"
+checksum = "f6e859e6e5bd50440ab63c47e3ebabc90f26251f7c73c3d3e837b74a1cc3fa67"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -4589,6 +4677,12 @@ dependencies = [
 "never-say-never",
 ]

+[[package]]
+name = "portable-atomic"
+version = "1.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6"
+
 [[package]]
 name = "postgres"
 version = "0.19.7"
@@ -4743,8 +4837,10 @@ dependencies = [
 "nix 0.26.4",
 "once_cell",
 "parking_lot 0.12.1",
- "protobuf",
- "protobuf-codegen-pure",
+ "prost 0.12.6",
+ "prost-build 0.12.6",
+ "prost-derive 0.12.6",
+ "sha2",
 "smallvec",
 "symbolic-demangle",
 "tempfile",
@@ -4753,15 +4849,17 @@ dependencies = [

 [[package]]
 name = "pprof_util"
-version = "0.6.0"
+version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "65c568b3f8c1c37886ae07459b1946249e725c315306b03be5632f84c239f781"
+checksum = "9fa015c78eed2130951e22c58d2095849391e73817ab2e74f71b0b9f63dd8416"
 dependencies = [
 "anyhow",
+ "backtrace",
 "flate2",
+ "inferno 0.12.0",
 "num",
 "paste",
- "prost",
+ "prost 0.13.3",
 ]

 [[package]]
@@ -4854,6 +4952,16 @@ dependencies = [
 "thiserror 1.0.69",
 ]

+[[package]]
+name = "prost"
+version = "0.12.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "deb1435c188b76130da55f17a466d252ff7b1418b2ad3e037d127b94e3411f29"
+dependencies = [
+ "bytes",
+ "prost-derive 0.12.6",
+]
+
 [[package]]
 name = "prost"
 version = "0.13.3"
@@ -4861,7 +4969,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f"
 dependencies = [
 "bytes",
- "prost-derive",
+ "prost-derive 0.13.3",
+]
+
+[[package]]
+name = "prost-build"
+version = "0.12.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4"
+dependencies = [
+ "bytes",
+ "heck",
+ "itertools 0.10.5",
+ "log",
+ "multimap",
+ "once_cell",
+ "petgraph",
+ "prettyplease",
+ "prost 0.12.6",
+ "prost-types 0.12.6",
+ "regex",
+ "syn 2.0.90",
+ "tempfile",
 ]

 [[package]]
@@ -4878,13 +5007,26 @@ dependencies = [
 "once_cell",
 "petgraph",
 "prettyplease",
- "prost",
- "prost-types",
+ "prost 0.13.3",
+ "prost-types 0.13.3",
 "regex",
 "syn 2.0.90",
 "tempfile",
 ]

+[[package]]
+name = "prost-derive"
+version = "0.12.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1"
+dependencies = [
+ "anyhow",
+ "itertools 0.10.5",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.90",
+]
+
 [[package]]
 name = "prost-derive"
 version = "0.13.3"
@@ -4898,38 +5040,22 @@ dependencies = [
 "syn 2.0.90",
 ]

+[[package]]
+name = "prost-types"
+version = "0.12.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9091c90b0a32608e984ff2fa4091273cbdd755d54935c51d520887f4a1dbd5b0"
+dependencies = [
+ "prost 0.12.6",
+]
+
 [[package]]
 name = "prost-types"
 version = "0.13.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670"
 dependencies = [
- "prost",
-]
-
-[[package]]
-name = "protobuf"
-version = "2.28.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94"
-
-[[package]]
-name = "protobuf-codegen"
-version = "2.28.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "033460afb75cf755fcfc16dfaed20b86468082a2ea24e05ac35ab4a099a017d6"
-dependencies = [
- "protobuf",
-]
-
-[[package]]
-name = "protobuf-codegen-pure"
-version = "2.28.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "95a29399fc94bcd3eeaa951c715f7bea69409b2445356b00519740bcd6ddd865"
-dependencies = [
- "protobuf",
- "protobuf-codegen",
+ "prost 0.13.3",
 ]

 [[package]]
@@ -5010,7 +5136,7 @@ dependencies = [
 "reqwest-tracing",
 "rsa",
 "rstest",
- "rustc-hash",
+ "rustc-hash 1.1.0",
 "rustls 0.23.18",
 "rustls-native-certs 0.8.0",
 "rustls-pemfile 2.1.1",
@@ -5050,6 +5176,21 @@ dependencies = [
 "zerocopy",
 ]

+[[package]]
+name = "quanta"
+version = "0.12.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3bd1fe6824cea6538803de3ff1bc0cf3949024db3d43c9643024bfb33a807c0e"
+dependencies = [
+ "crossbeam-utils",
+ "libc",
+ "once_cell",
+ "raw-cpuid",
+ "wasi 0.11.0+wasi-snapshot-preview1",
+ "web-sys",
+ "winapi",
+]
+
 [[package]]
 name = "quick-xml"
 version = "0.26.0"
@@ -5180,6 +5321,15 @@ dependencies = [
 "num-traits",
 ]

+[[package]]
+name = "raw-cpuid"
+version = "11.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c6928fa44c097620b706542d428957635951bade7143269085389d42c8a4927e"
+dependencies = [
+ "bitflags 2.8.0",
+]
+
 [[package]]
 name = "rayon"
 version = "1.7.0"
@@ -5514,16 +5664,16 @@ dependencies = [

 [[package]]
 name = "ring"
-version = "0.17.6"
+version = "0.17.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "684d5e6e18f669ccebf64a92236bb7db9a34f07be010e3627368182027180866"
+checksum = "70ac5d832aa16abd7d1def883a8545280c20a60f523a370aa3a9617c2b8550ee"
 dependencies = [
 "cc",
+ "cfg-if",
 "getrandom 0.2.11",
 "libc",
- "spin",
 "untrusted",
- "windows-sys 0.48.0",
+ "windows-sys 0.52.0",
 ]

 [[package]]
@@ -5628,6 +5778,12 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"

+[[package]]
+name = "rustc-hash"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
+
 [[package]]
 name = "rustc_version"
 version = "0.4.0"
@@ -5744,7 +5900,7 @@ version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d194b56d58803a43635bdc398cd17e383d6f71f9182b9a192c127ca42494a59b"
 dependencies = [
- "base64 0.21.1",
+ "base64 0.21.7",
 ]

 [[package]]
@@ -5753,15 +5909,15 @@ version = "2.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f48172685e6ff52a556baa527774f61fcaa884f59daf3375c62a3f1cd2549dab"
 dependencies = [
- "base64 0.21.1",
+ "base64 0.21.7",
 "rustls-pki-types",
 ]

 [[package]]
 name = "rustls-pki-types"
-version = "1.10.0"
+version = "1.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b"
+checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c"

 [[package]]
 name = "rustls-webpki"
@@ -5992,9 +6148,9 @@ dependencies = [

 [[package]]
 name = "seize"
-version = "0.4.9"
+version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d84b0c858bdd30cb56f5597f8b3bf702ec23829e652cc636a1e5a7b9de46ae93"
+checksum = "e4b8d813387d566f627f3ea1b914c068aac94c40ae27ec43f5f33bde65abefe7"
 dependencies = [
 "libc",
 "windows-sys 0.52.0",
@@ -6387,6 +6543,15 @@ version = "0.9.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"

+[[package]]
+name = "spinning_top"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d96d2d1d716fb500937168cc09353ffdc7a012be8475ac7308e1bdf0e3923300"
+dependencies = [
+ "lock_api",
+]
+
 [[package]]
 name = "spki"
 version = "0.6.0"
@@ -6438,7 +6603,7 @@ dependencies = [
 "metrics",
 "once_cell",
 "parking_lot 0.12.1",
- "prost",
+ "prost 0.13.3",
 "rustls 0.23.18",
 "tokio",
 "tonic",
@@ -6456,6 +6621,7 @@ dependencies = [
 "bytes",
 "chrono",
 "clap",
+ "clashmap",
 "control_plane",
 "cron",
 "diesel",
@@ -6463,6 +6629,7 @@ dependencies = [
 "diesel_migrations",
 "fail",
 "futures",
+ "governor",
 "hex",
 "http-utils",
 "humantime",
@@ -7209,7 +7376,7 @@ dependencies = [
 "hyper-util",
 "percent-encoding",
 "pin-project",
- "prost",
+ "prost 0.13.3",
 "rustls-native-certs 0.8.0",
 "rustls-pemfile 2.1.1",
 "tokio",
@@ -7229,8 +7396,8 @@ checksum = "9557ce109ea773b399c9b9e5dca39294110b74f1f342cb347a80d1fce8c26a11"
 dependencies = [
 "prettyplease",
 "proc-macro2",
- "prost-build",
- "prost-types",
+ "prost-build 0.13.3",
+ "prost-types 0.13.3",
 "quote",
 "syn 2.0.90",
 ]
@@ -7277,10 +7444,12 @@ version = "0.6.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "403fa3b783d4b626a8ad51d766ab03cb6d2dbfc46b1c5d4448395e6628dc9697"
 dependencies = [
+ "base64 0.22.1",
 "bitflags 2.8.0",
 "bytes",
 "http 1.1.0",
 "http-body 1.0.0",
+ "mime",
 "pin-project-lite",
 "tower-layer",
 "tower-service",
@@ -7294,6 +7463,20 @@ version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"

+[[package]]
+name = "tower-otel"
+version = "0.2.0"
+source = "git+https://github.com/mattiapenati/tower-otel?rev=56a7321053bcb72443888257b622ba0d43a11fcd#56a7321053bcb72443888257b622ba0d43a11fcd"
+dependencies = [
+ "http 1.1.0",
+ "opentelemetry",
+ "pin-project",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+ "tracing-opentelemetry",
+]
+
 [[package]]
 name = "tower-service"
 version = "0.3.3"
@@ -7620,7 +7803,6 @@ dependencies = [
 "anyhow",
 "arc-swap",
 "async-compression",
- "backtrace",
 "bincode",
 "byteorder",
 "bytes",
@@ -7748,7 +7930,7 @@ dependencies = [
 "pageserver_api",
 "postgres_ffi",
 "pprof",
- "prost",
+ "prost 0.13.3",
 "remote_storage",
 "serde",
 "serde_json",
@@ -8174,7 +8356,7 @@ dependencies = [
 "ahash",
 "anyhow",
 "base64 0.13.1",
- "base64 0.21.1",
+ "base64 0.21.7",
 "base64ct",
 "bytes",
 "camino",
@@ -8205,6 +8387,7 @@ dependencies = [
 "hyper-util",
 "indexmap 1.9.3",
 "indexmap 2.0.1",
+ "itertools 0.10.5",
 "itertools 0.12.1",
 "lazy_static",
 "libc",
@@ -8223,7 +8406,7 @@ dependencies = [
 "parquet",
 "prettyplease",
 "proc-macro2",
- "prost",
+ "prost 0.13.3",
 "quote",
 "rand 0.8.5",
 "regex",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -43,7 +43,7 @@ members = [
 ]

 [workspace.package]
-edition = "2021"
+edition = "2024"
 license = "Apache-2.0"

 ## All dependency versions, used in the project
@@ -53,7 +53,6 @@ anyhow = { version = "1.0", features = ["backtrace"] }
 arc-swap = "1.6"
 async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] }
 atomic-take = "1.1.0"
-backtrace = "0.3.74"
 flate2 = "1.0.26"
 assert-json-diff = "2"
 async-stream = "0.3"
@@ -68,9 +67,10 @@ aws-credential-types = "1.2.0"
 aws-sigv4 = { version = "1.2", features = ["sign-http"] }
 aws-types = "1.3"
 axum = { version = "0.8.1", features = ["ws"] }
+axum-extra = { version = "0.10.0", features = ["typed-header"] }
 base64 = "0.13.0"
 bincode = "1.3"
-bindgen = "0.70"
+bindgen = "0.71"
 bit_field = "0.10.2"
 bstr = "1.0"
 byteorder = "1.4"
@@ -95,6 +95,7 @@ futures = "0.3"
 futures-core = "0.3"
 futures-util = "0.3"
 git-version = "0.3"
+governor = "0.8"
 hashbrown = "0.14"
 hashlink = "0.9.1"
 hdrhistogram = "7.5.2"
@@ -113,11 +114,10 @@ hyper-util = "0.1"
 tokio-tungstenite = "0.21.0"
 indexmap = "2"
 indoc = "2"
-inferno = "0.12.0"
 ipnet = "2.10.0"
 itertools = "0.10"
 itoa = "1.0.11"
-jemalloc_pprof = "0.6"
+jemalloc_pprof = { version = "0.7", features = ["symbolize", "flamegraph"] }
 jsonwebtoken = "9"
 lasso = "0.7"
 libc = "0.2"
@@ -139,7 +139,7 @@ parquet = { version = "53", default-features = false, features = ["zstd"] }
 parquet_derive = "53"
 pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
 pin-project-lite = "0.2"
-pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointer", "protobuf", "protobuf-codec"] }
+pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointer", "prost-codec"] }
 procfs = "0.16"
 prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
 prost = "0.13"
@@ -155,6 +155,7 @@ rpds = "0.13"
 rustc-hash = "1.1.0"
 rustls = { version = "0.23.16", default-features = false }
 rustls-pemfile = "2"
+rustls-pki-types = "1.11"
 scopeguard = "1.1"
 sysinfo = "0.29.2"
 sd-notify = "0.4.1"
@@ -192,7 +193,11 @@ toml = "0.8"
 toml_edit = "0.22"
 tonic = {version = "0.12.3", default-features = false, features = ["channel", "tls", "tls-roots"]}
 tower = { version = "0.5.2", default-features = false }
-tower-http = { version = "0.6.2", features = ["request-id", "trace"] }
+tower-http = { version = "0.6.2", features = ["auth", "request-id", "trace"] }
+
+# This revision uses opentelemetry 0.27. There's no tag for it.
+tower-otel = { git = "https://github.com/mattiapenati/tower-otel", rev = "56a7321053bcb72443888257b622ba0d43a11fcd" }
+
 tower-service = "0.3.3"
 tracing = "0.1"
 tracing-error = "0.2"
--- a/7
+++ b/7
@@ -11,15 +11,16 @@ ICU_PREFIX_DIR := /usr/local/icu
 #
 BUILD_TYPE ?= debug
 WITH_SANITIZERS ?= no
+PG_CFLAGS = -fsigned-char
 ifeq ($(BUILD_TYPE),release)
 	PG_CONFIGURE_OPTS = --enable-debug --with-openssl
-	PG_CFLAGS = -O2 -g3 $(CFLAGS)
+	PG_CFLAGS += -O2 -g3 $(CFLAGS)
 	PG_LDFLAGS = $(LDFLAGS)
 	# Unfortunately, `--profile=...` is a nightly feature
 	CARGO_BUILD_FLAGS += --release
 else ifeq ($(BUILD_TYPE),debug)
 	PG_CONFIGURE_OPTS = --enable-debug --with-openssl --enable-cassert --enable-depend
-	PG_CFLAGS = -O0 -g3 $(CFLAGS)
+	PG_CFLAGS += -O0 -g3 $(CFLAGS)
 	PG_LDFLAGS = $(LDFLAGS)
 else
 	$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
@@ -159,6 +160,8 @@ postgres-%: postgres-configure-% \
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_visibility install
 	+@echo "Compiling pageinspect $*"
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect install
+	+@echo "Compiling pg_trgm $*"
+	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_trgm install
 	+@echo "Compiling amcheck $*"
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/amcheck install
 	+@echo "Compiling test_decoding $*"
--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -162,7 +162,7 @@ FROM build-deps AS pg-build
 ARG PG_VERSION
 COPY vendor/postgres-${PG_VERSION:?} postgres
 RUN cd postgres && \
-    export CONFIGURE_CMD="./configure CFLAGS='-O2 -g3' --enable-debug --with-openssl --with-uuid=ossp \
+    export CONFIGURE_CMD="./configure CFLAGS='-O2 -g3 -fsigned-char' --enable-debug --with-openssl --with-uuid=ossp \
    --with-icu --with-libxml --with-libxslt --with-lz4" && \
    if [ "${PG_VERSION:?}" != "v14" ]; then \
        # zstd is available only from PG15
@@ -1484,7 +1484,7 @@ WORKDIR /ext-src
 COPY compute/patches/pg_duckdb_v031.patch .
 COPY compute/patches/duckdb_v120.patch .
 # pg_duckdb build requires source dir to be a git repo to get submodules
-# allow neon_superuser to execute some functions that in pg_duckdb are available to superuser only: 
+# allow neon_superuser to execute some functions that in pg_duckdb are available to superuser only:
 # - extension management function duckdb.install_extension()
 # - access to duckdb.extensions table and its sequence
 RUN git clone --depth 1 --branch v0.3.1 https://github.com/duckdb/pg_duckdb.git pg_duckdb-src && \
@@ -1499,8 +1499,8 @@ ARG PG_VERSION
 COPY --from=pg_duckdb-src /ext-src/ /ext-src/
 WORKDIR /ext-src/pg_duckdb-src
 RUN make install -j $(getconf _NPROCESSORS_ONLN) && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_duckdb.control 
-        
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_duckdb.control
+
 #########################################################################################
 #
 # Layer "pg_repack"
@@ -1758,15 +1758,15 @@ ARG TARGETARCH
 # test_runner/regress/test_compute_metrics.py
 # See comment on the top of the file regading `echo`, `-e` and `\n`
 RUN if [ "$TARGETARCH" = "amd64" ]; then\
-        postgres_exporter_sha256='027e75dda7af621237ff8f5ac66b78a40b0093595f06768612b92b1374bd3105';\
+        postgres_exporter_sha256='59aa4a7bb0f7d361f5e05732f5ed8c03cc08f78449cef5856eadec33a627694b';\
        pgbouncer_exporter_sha256='c9f7cf8dcff44f0472057e9bf52613d93f3ffbc381ad7547a959daa63c5e84ac';\
        sql_exporter_sha256='38e439732bbf6e28ca4a94d7bc3686d3fa1abdb0050773d5617a9efdb9e64d08';\
    else\
-        postgres_exporter_sha256='131a376d25778ff9701a4c81f703f179e0b58db5c2c496e66fa43f8179484786';\
+        postgres_exporter_sha256='d1dedea97f56c6d965837bfd1fbb3e35a3b4a4556f8cccee8bd513d8ee086124';\
        pgbouncer_exporter_sha256='217c4afd7e6492ae904055bc14fe603552cf9bac458c063407e991d68c519da3';\
        sql_exporter_sha256='11918b00be6e2c3a67564adfdb2414fdcbb15a5db76ea17d1d1a944237a893c6';\
    fi\
-    && curl -sL https://github.com/prometheus-community/postgres_exporter/releases/download/v0.16.0/postgres_exporter-0.16.0.linux-${TARGETARCH}.tar.gz\
+    && curl -sL https://github.com/prometheus-community/postgres_exporter/releases/download/v0.17.1/postgres_exporter-0.17.1.linux-${TARGETARCH}.tar.gz\
     | tar xzf - --strip-components=1 -C.\
    && curl -sL https://github.com/prometheus-community/pgbouncer_exporter/releases/download/v0.10.2/pgbouncer_exporter-0.10.2.linux-${TARGETARCH}.tar.gz\
     | tar xzf - --strip-components=1 -C.\
@@ -1933,6 +1933,7 @@ RUN apt update && \
        locales \
        procps \
        ca-certificates \
+        rsyslog \
        $VERSION_INSTALLS && \
    apt clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
    localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
@@ -1978,6 +1979,13 @@ COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/neo
 # Make the libraries we built available
 RUN echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig

+# rsyslog config permissions
+# directory for rsyslogd pid file
+RUN mkdir /var/run/rsyslogd && \
+    chown -R postgres:postgres /var/run/rsyslogd && \
+    chown -R postgres:postgres /etc/rsyslog.d/
+
+
 ENV LANG=en_US.utf8
 USER postgres
 ENTRYPOINT ["/usr/local/bin/compute_ctl"]
--- a/compute/etc/neon_collector.jsonnet
+++ b/compute/etc/neon_collector.jsonnet
@@ -29,6 +29,7 @@
    import 'sql_exporter/lfc_approximate_working_set_size.libsonnet',
    import 'sql_exporter/lfc_approximate_working_set_size_windows.libsonnet',
    import 'sql_exporter/lfc_cache_size_limit.libsonnet',
+    import 'sql_exporter/lfc_chunk_size.libsonnet',
    import 'sql_exporter/lfc_hits.libsonnet',
    import 'sql_exporter/lfc_misses.libsonnet',
    import 'sql_exporter/lfc_used.libsonnet',
--- a/compute/etc/sql_exporter/db_total_size.sql
+++ b/compute/etc/sql_exporter/db_total_size.sql
@@ -1 +1,5 @@
-SELECT sum(pg_database_size(datname)) AS total FROM pg_database;
+SELECT sum(pg_database_size(datname)) AS total
+FROM pg_database
+-- Ignore invalid databases, as we will likely have problems with
+-- getting their size from the Pageserver.
+WHERE datconnlimit != -2;
--- a/compute/etc/sql_exporter/lfc_chunk_size.libsonnet
+++ b/compute/etc/sql_exporter/lfc_chunk_size.libsonnet
@@ -0,0 +1,10 @@
+{
+  metric_name: 'lfc_chunk_size',
+  type: 'gauge',
+  help: 'LFC chunk size, measured in 8KiB pages',
+  key_labels: null,
+  values: [
+    'lfc_chunk_size_pages',
+  ],
+  query: importstr 'sql_exporter/lfc_chunk_size.sql',
+}
--- a/compute/etc/sql_exporter/lfc_chunk_size.sql
+++ b/compute/etc/sql_exporter/lfc_chunk_size.sql
@@ -0,0 +1 @@
+SELECT lfc_value AS lfc_chunk_size_pages FROM neon.neon_lfc_stats WHERE lfc_key = 'file_cache_chunk_size_pages';
--- a/compute/etc/sql_exporter/pg_stats_userdb.sql
+++ b/compute/etc/sql_exporter/pg_stats_userdb.sql
@@ -1,10 +1,20 @@
 -- We export stats for 10 non-system databases. Without this limit it is too
 -- easy to abuse the system by creating lots of databases.

-SELECT pg_database_size(datname) AS db_size, deadlocks, tup_inserted AS inserted,
-  tup_updated AS updated, tup_deleted AS deleted, datname
+SELECT pg_database_size(datname) AS db_size,
+  deadlocks,
+  tup_inserted AS inserted,
+  tup_updated AS updated,
+  tup_deleted AS deleted,
+  datname
 FROM pg_stat_database
 WHERE datname IN (
  SELECT datname FROM pg_database
-  WHERE datname <> 'postgres' AND NOT datistemplate ORDER BY oid LIMIT 10
+  -- Ignore invalid databases, as we will likely have problems with
+  -- getting their size from the Pageserver.
+  WHERE datconnlimit != -2
+    AND datname <> 'postgres'
+    AND NOT datistemplate
+  ORDER BY oid
+  LIMIT 10
 );
--- a/compute/vm-image-spec-bookworm.yaml
+++ b/compute/vm-image-spec-bookworm.yaml
@@ -39,17 +39,26 @@ commands:
    user: nobody
    sysvInitAction: respawn
    shell: '/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499'
+  - name: rsyslogd
+    user: postgres
+    sysvInitAction: respawn
+    shell: '/usr/sbin/rsyslogd -n -i /var/run/rsyslogd/rsyslogd.pid -f /etc/compute_rsyslog.conf'
 shutdownHook: |
  su -p postgres --session-command '/usr/local/bin/pg_ctl stop -D /var/db/postgres/compute/pgdata -m fast --wait -t 10'
 files:
  - filename: compute_ctl-sudoers
    content: |
+      # Reverse hostname lookup doesn't currently work, and isn't needed anyway when all
+      # the rules use ALL as the hostname. Avoid the pointless lookups and the "unable to
+      # resolve host" log messages that they generate.
+      Defaults !fqdn
+      
      # Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap
      # and /neonvm/bin/set-disk-quota as root without requiring entering a password (NOPASSWD),
      # regardless of hostname (ALL)
      #
      # Also allow it to shut down the VM. The fast_import job does that when it's finished.
-      postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap, /neonvm/bin/set-disk-quota, /neonvm/bin/poweroff
+      postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap, /neonvm/bin/set-disk-quota, /neonvm/bin/poweroff, /usr/sbin/rsyslogd
  - filename: cgconfig.conf
    content: |
      # Configuration for cgroups in VM compute nodes
@@ -64,6 +73,12 @@ files:
          }
          memory {}
      }
+# Create dummy rsyslog config, because it refuses to start without at least one action configured.
+# compute_ctl will rewrite this file with the actual configuration, if needed.
+  - filename: compute_rsyslog.conf
+    content: |
+      *.*    /dev/null
+      $IncludeConfig /etc/rsyslog.d/*.conf
 build: |
  # Build cgroup-tools
  #
@@ -127,6 +142,12 @@ merge: |
  RUN set -e \
      && chmod 0644 /etc/cgconfig.conf

+
+  COPY compute_rsyslog.conf /etc/compute_rsyslog.conf
+  RUN chmod 0666 /etc/compute_rsyslog.conf
+  RUN chmod 0666 /var/log/
+
+
  COPY --from=libcgroup-builder /libcgroup-install/bin/*  /usr/bin/
  COPY --from=libcgroup-builder /libcgroup-install/lib/*  /usr/lib/
  COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/
--- a/compute/vm-image-spec-bullseye.yaml
+++ b/compute/vm-image-spec-bullseye.yaml
@@ -39,17 +39,26 @@ commands:
    user: nobody
    sysvInitAction: respawn
    shell: '/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499'
+  - name: rsyslogd
+    user: postgres
+    sysvInitAction: respawn
+    shell: '/usr/sbin/rsyslogd -n -i /var/run/rsyslogd/rsyslogd.pid -f /etc/compute_rsyslog.conf'
 shutdownHook: |
  su -p postgres --session-command '/usr/local/bin/pg_ctl stop -D /var/db/postgres/compute/pgdata -m fast --wait -t 10'
 files:
  - filename: compute_ctl-sudoers
    content: |
+      # Reverse hostname lookup doesn't currently work, and isn't needed anyway when all
+      # the rules use ALL as the hostname. Avoid the pointless lookups and the "unable to
+      # resolve host" log messages that they generate.
+      Defaults !fqdn
+      
      # Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap
      # and /neonvm/bin/set-disk-quota as root without requiring entering a password (NOPASSWD),
      # regardless of hostname (ALL)
      #
      # Also allow it to shut down the VM. The fast_import job does that when it's finished.
-      postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap, /neonvm/bin/set-disk-quota, /neonvm/bin/poweroff
+      postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap, /neonvm/bin/set-disk-quota, /neonvm/bin/poweroff, /usr/sbin/rsyslogd
  - filename: cgconfig.conf
    content: |
      # Configuration for cgroups in VM compute nodes
@@ -64,6 +73,12 @@ files:
          }
          memory {}
      }
+# Create dummy rsyslog config, because it refuses to start without at least one action configured.
+# compute_ctl will rewrite this file with the actual configuration, if needed.
+  - filename: compute_rsyslog.conf
+    content: |
+      *.*    /dev/null
+      $IncludeConfig /etc/rsyslog.d/*.conf
 build: |
  # Build cgroup-tools
  #
@@ -123,6 +138,11 @@ merge: |
  RUN set -e \
      && chmod 0644 /etc/cgconfig.conf

+  COPY compute_rsyslog.conf /etc/compute_rsyslog.conf
+  RUN chmod 0666 /etc/compute_rsyslog.conf
+  RUN chmod 0666 /var/log/
+
+
  COPY --from=libcgroup-builder /libcgroup-install/bin/*  /usr/bin/
  COPY --from=libcgroup-builder /libcgroup-install/lib/*  /usr/lib/
  COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "compute_tools"
 version = "0.1.0"
-edition.workspace = true
+edition = "2024"
 license.workspace = true

 [features]
@@ -17,6 +17,7 @@ aws-sdk-kms.workspace = true
 aws-smithy-types.workspace = true
 anyhow.workspace = true
 axum = { workspace = true, features = [] }
+axum-extra.workspace = true
 camino.workspace = true
 chrono.workspace = true
 cfg-if.workspace = true
@@ -25,6 +26,7 @@ fail.workspace = true
 flate2.workspace = true
 futures.workspace = true
 http.workspace = true
+jsonwebtoken.workspace = true
 metrics.workspace = true
 nix.workspace = true
 notify.workspace = true
@@ -46,7 +48,9 @@ tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
 tokio-postgres.workspace = true
 tokio-util.workspace = true
 tokio-stream.workspace = true
+tower-otel.workspace = true
 tracing.workspace = true
+tracing-opentelemetry.workspace = true
 tracing-subscriber.workspace = true
 tracing-utils.workspace = true
 thiserror.workspace = true
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -33,41 +33,28 @@
 //!             -b /usr/local/bin/postgres \
 //!             -r http://pg-ext-s3-gateway \
 //! ```
-use std::collections::HashMap;
 use std::ffi::OsString;
 use std::fs::File;
 use std::path::Path;
 use std::process::exit;
-use std::str::FromStr;
-use std::sync::atomic::Ordering;
-use std::sync::{mpsc, Arc, Condvar, Mutex, RwLock};
-use std::{thread, time::Duration};
+use std::sync::mpsc;
+use std::thread;
+use std::time::Duration;

 use anyhow::{Context, Result};
-use chrono::Utc;
 use clap::Parser;
-use compute_tools::disk_quota::set_disk_quota;
-use compute_tools::http::server::Server;
-use compute_tools::lsn_lease::launch_lsn_lease_bg_task_for_static;
-use signal_hook::consts::{SIGQUIT, SIGTERM};
-use signal_hook::{consts::SIGINT, iterator::Signals};
-use tracing::{error, info, warn};
-use url::Url;
-
-use compute_api::responses::{ComputeCtlConfig, ComputeStatus};
+use compute_api::responses::ComputeCtlConfig;
 use compute_api::spec::ComputeSpec;
-
-use compute_tools::compute::{
-    forward_termination_signal, ComputeNode, ComputeState, ParsedSpec, PG_PID,
-};
-use compute_tools::configurator::launch_configurator;
+use compute_tools::compute::{ComputeNode, ComputeNodeParams, forward_termination_signal};
 use compute_tools::extension_server::get_pg_version_string;
 use compute_tools::logger::*;
-use compute_tools::monitor::launch_monitor;
 use compute_tools::params::*;
 use compute_tools::spec::*;
-use compute_tools::swap::resize_swap;
-use rlimit::{setrlimit, Resource};
+use rlimit::{Resource, setrlimit};
+use signal_hook::consts::{SIGINT, SIGQUIT, SIGTERM};
+use signal_hook::iterator::Signals;
+use tracing::{error, info};
+use url::Url;
 use utils::failpoint_support;

 // this is an arbitrary build tag. Fine as a default / for testing purposes
@@ -149,6 +136,8 @@ struct Cli {
 fn main() -> Result<()> {
    let cli = Cli::parse();

+    let scenario = failpoint_support::init();
+
    // For historical reasons, the main thread that processes the spec and launches postgres
    // is synchronous, but we always have this tokio runtime available and we "enter" it so
    // that you can use tokio::spawn() and tokio::runtime::Handle::current().block_on(...)
@@ -160,34 +149,44 @@ fn main() -> Result<()> {

    let build_tag = runtime.block_on(init())?;

-    let scenario = failpoint_support::init();
-
    // enable core dumping for all child processes
    setrlimit(Resource::CORE, rlimit::INFINITY, rlimit::INFINITY)?;

-    let (pg_handle, start_pg_result) = {
-        // Enter startup tracing context
-        let _startup_context_guard = startup_context_from_env();
+    let connstr = Url::parse(&cli.connstr).context("cannot parse connstr as a URL")?;

-        let cli_spec = try_spec_from_cli(&cli)?;
+    let cli_spec = try_spec_from_cli(&cli)?;

-        let compute = wait_spec(build_tag, &cli, cli_spec)?;
+    let compute_node = ComputeNode::new(
+        ComputeNodeParams {
+            compute_id: cli.compute_id,
+            connstr,
+            pgdata: cli.pgdata.clone(),
+            pgbin: cli.pgbin.clone(),
+            pgversion: get_pg_version_string(&cli.pgbin),
+            external_http_port: cli.external_http_port,
+            internal_http_port: cli.internal_http_port,
+            ext_remote_storage: cli.remote_ext_config.clone(),
+            resize_swap_on_bind: cli.resize_swap_on_bind,
+            set_disk_quota_for_fs: cli.set_disk_quota_for_fs,
+            #[cfg(target_os = "linux")]
+            filecache_connstr: cli.filecache_connstr,
+            #[cfg(target_os = "linux")]
+            cgroup: cli.cgroup,
+            #[cfg(target_os = "linux")]
+            vm_monitor_addr: cli.vm_monitor_addr,
+            build_tag,

-        start_postgres(&cli, compute)?
+            live_config_allowed: cli_spec.live_config_allowed,
+        },
+        cli_spec.spec,
+        cli_spec.compute_ctl_config,
+    )?;

-        // Startup is finished, exit the startup tracing span
-    };
-
-    // PostgreSQL is now running, if startup was successful. Wait until it exits.
-    let wait_pg_result = wait_postgres(pg_handle)?;
-
-    let delay_exit = cleanup_after_postgres_exit(start_pg_result)?;
-
-    maybe_delay_exit(delay_exit);
+    let exit_code = compute_node.run()?;

    scenario.teardown();

-    deinit_and_exit(wait_pg_result);
+    deinit_and_exit(exit_code);
 }

 async fn init() -> Result<String> {
@@ -208,56 +207,6 @@ async fn init() -> Result<String> {
    Ok(build_tag)
 }

-fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
-    // Extract OpenTelemetry context for the startup actions from the
-    // TRACEPARENT and TRACESTATE env variables, and attach it to the current
-    // tracing context.
-    //
-    // This is used to propagate the context for the 'start_compute' operation
-    // from the neon control plane. This allows linking together the wider
-    // 'start_compute' operation that creates the compute container, with the
-    // startup actions here within the container.
-    //
-    // There is no standard for passing context in env variables, but a lot of
-    // tools use TRACEPARENT/TRACESTATE, so we use that convention too. See
-    // https://github.com/open-telemetry/opentelemetry-specification/issues/740
-    //
-    // Switch to the startup context here, and exit it once the startup has
-    // completed and Postgres is up and running.
-    //
-    // If this pod is pre-created without binding it to any particular endpoint
-    // yet, this isn't the right place to enter the startup context. In that
-    // case, the control plane should pass the tracing context as part of the
-    // /configure API call.
-    //
-    // NOTE: This is supposed to only cover the *startup* actions. Once
-    // postgres is configured and up-and-running, we exit this span. Any other
-    // actions that are performed on incoming HTTP requests, for example, are
-    // performed in separate spans.
-    //
-    // XXX: If the pod is restarted, we perform the startup actions in the same
-    // context as the original startup actions, which probably doesn't make
-    // sense.
-    let mut startup_tracing_carrier: HashMap<String, String> = HashMap::new();
-    if let Ok(val) = std::env::var("TRACEPARENT") {
-        startup_tracing_carrier.insert("traceparent".to_string(), val);
-    }
-    if let Ok(val) = std::env::var("TRACESTATE") {
-        startup_tracing_carrier.insert("tracestate".to_string(), val);
-    }
-    if !startup_tracing_carrier.is_empty() {
-        use opentelemetry::propagation::TextMapPropagator;
-        use opentelemetry_sdk::propagation::TraceContextPropagator;
-        let guard = TraceContextPropagator::new()
-            .extract(&startup_tracing_carrier)
-            .attach();
-        info!("startup tracing context attached");
-        Some(guard)
-    } else {
-        None
-    }
-}
-
 fn try_spec_from_cli(cli: &Cli) -> Result<CliSpecParams> {
    // First, try to get cluster spec from the cli argument
    if let Some(ref spec_json) = cli.spec_json {
@@ -308,342 +257,7 @@ struct CliSpecParams {
    live_config_allowed: bool,
 }

-fn wait_spec(
-    build_tag: String,
-    cli: &Cli,
-    CliSpecParams {
-        spec,
-        live_config_allowed,
-        compute_ctl_config: _,
-    }: CliSpecParams,
-) -> Result<Arc<ComputeNode>> {
-    let mut new_state = ComputeState::new();
-    let spec_set;
-
-    if let Some(spec) = spec {
-        let pspec = ParsedSpec::try_from(spec).map_err(|msg| anyhow::anyhow!(msg))?;
-        info!("new pspec.spec: {:?}", pspec.spec);
-        new_state.pspec = Some(pspec);
-        spec_set = true;
-    } else {
-        spec_set = false;
-    }
-    let connstr = Url::parse(&cli.connstr).context("cannot parse connstr as a URL")?;
-    let conn_conf = postgres::config::Config::from_str(connstr.as_str())
-        .context("cannot build postgres config from connstr")?;
-    let tokio_conn_conf = tokio_postgres::config::Config::from_str(connstr.as_str())
-        .context("cannot build tokio postgres config from connstr")?;
-    let compute_node = ComputeNode {
-        compute_id: cli.compute_id.clone(),
-        connstr,
-        conn_conf,
-        tokio_conn_conf,
-        pgdata: cli.pgdata.clone(),
-        pgbin: cli.pgbin.clone(),
-        pgversion: get_pg_version_string(&cli.pgbin),
-        external_http_port: cli.external_http_port,
-        internal_http_port: cli.internal_http_port,
-        live_config_allowed,
-        state: Mutex::new(new_state),
-        state_changed: Condvar::new(),
-        ext_remote_storage: cli.remote_ext_config.clone(),
-        ext_download_progress: RwLock::new(HashMap::new()),
-        build_tag,
-    };
-    let compute = Arc::new(compute_node);
-
-    // If this is a pooled VM, prewarm before starting HTTP server and becoming
-    // available for binding. Prewarming helps Postgres start quicker later,
-    // because QEMU will already have its memory allocated from the host, and
-    // the necessary binaries will already be cached.
-    if !spec_set {
-        compute.prewarm_postgres()?;
-    }
-
-    // Launch the external HTTP server first, so that we can serve control plane
-    // requests while configuration is still in progress.
-    Server::External(cli.external_http_port).launch(&compute);
-
-    // The internal HTTP server could be launched later, but there isn't much
-    // sense in waiting.
-    Server::Internal(cli.internal_http_port).launch(&compute);
-
-    if !spec_set {
-        // No spec provided, hang waiting for it.
-        info!("no compute spec provided, waiting");
-
-        let mut state = compute.state.lock().unwrap();
-        while state.status != ComputeStatus::ConfigurationPending {
-            state = compute.state_changed.wait(state).unwrap();
-
-            if state.status == ComputeStatus::ConfigurationPending {
-                info!("got spec, continue configuration");
-                // Spec is already set by the http server handler.
-                break;
-            }
-        }
-
-        // Record for how long we slept waiting for the spec.
-        let now = Utc::now();
-        state.metrics.wait_for_spec_ms = now
-            .signed_duration_since(state.start_time)
-            .to_std()
-            .unwrap()
-            .as_millis() as u64;
-
-        // Reset start time, so that the total startup time that is calculated later will
-        // not include the time that we waited for the spec.
-        state.start_time = now;
-    }
-
-    launch_lsn_lease_bg_task_for_static(&compute);
-
-    Ok(compute)
-}
-
-fn start_postgres(
-    cli: &Cli,
-    compute: Arc<ComputeNode>,
-) -> Result<(Option<PostgresHandle>, StartPostgresResult)> {
-    // We got all we need, update the state.
-    let mut state = compute.state.lock().unwrap();
-    state.set_status(ComputeStatus::Init, &compute.state_changed);
-
-    info!(
-        "running compute with features: {:?}",
-        state.pspec.as_ref().unwrap().spec.features
-    );
-    // before we release the mutex, fetch some parameters for later.
-    let &ComputeSpec {
-        swap_size_bytes,
-        disk_quota_bytes,
-        #[cfg(target_os = "linux")]
-        disable_lfc_resizing,
-        ..
-    } = &state.pspec.as_ref().unwrap().spec;
-    drop(state);
-
-    // Launch remaining service threads
-    let _monitor_handle = launch_monitor(&compute);
-    let _configurator_handle = launch_configurator(&compute);
-
-    let mut prestartup_failed = false;
-    let mut delay_exit = false;
-
-    // Resize swap to the desired size if the compute spec says so
-    if let (Some(size_bytes), true) = (swap_size_bytes, cli.resize_swap_on_bind) {
-        // To avoid 'swapoff' hitting postgres startup, we need to run resize-swap to completion
-        // *before* starting postgres.
-        //
-        // In theory, we could do this asynchronously if SkipSwapon was enabled for VMs, but this
-        // carries a risk of introducing hard-to-debug issues - e.g. if postgres sometimes gets
-        // OOM-killed during startup because swap wasn't available yet.
-        match resize_swap(size_bytes) {
-            Ok(()) => {
-                let size_mib = size_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
-                info!(%size_bytes, %size_mib, "resized swap");
-            }
-            Err(err) => {
-                let err = err.context("failed to resize swap");
-                error!("{err:#}");
-
-                // Mark compute startup as failed; don't try to start postgres, and report this
-                // error to the control plane when it next asks.
-                prestartup_failed = true;
-                compute.set_failed_status(err);
-                delay_exit = true;
-            }
-        }
-    }
-
-    // Set disk quota if the compute spec says so
-    if let (Some(disk_quota_bytes), Some(disk_quota_fs_mountpoint)) =
-        (disk_quota_bytes, cli.set_disk_quota_for_fs.as_ref())
-    {
-        match set_disk_quota(disk_quota_bytes, disk_quota_fs_mountpoint) {
-            Ok(()) => {
-                let size_mib = disk_quota_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
-                info!(%disk_quota_bytes, %size_mib, "set disk quota");
-            }
-            Err(err) => {
-                let err = err.context("failed to set disk quota");
-                error!("{err:#}");
-
-                // Mark compute startup as failed; don't try to start postgres, and report this
-                // error to the control plane when it next asks.
-                prestartup_failed = true;
-                compute.set_failed_status(err);
-                delay_exit = true;
-            }
-        }
-    }
-
-    // Start Postgres
-    let mut pg = None;
-    if !prestartup_failed {
-        pg = match compute.start_compute() {
-            Ok(pg) => {
-                info!(postmaster_pid = %pg.0.id(), "Postgres was started");
-                Some(pg)
-            }
-            Err(err) => {
-                error!("could not start the compute node: {:#}", err);
-                compute.set_failed_status(err);
-                delay_exit = true;
-                None
-            }
-        };
-    } else {
-        warn!("skipping postgres startup because pre-startup step failed");
-    }
-
-    // Start the vm-monitor if directed to. The vm-monitor only runs on linux
-    // because it requires cgroups.
-    cfg_if::cfg_if! {
-        if #[cfg(target_os = "linux")] {
-            use std::env;
-            use tokio_util::sync::CancellationToken;
-
-            // This token is used internally by the monitor to clean up all threads
-            let token = CancellationToken::new();
-
-            // don't pass postgres connection string to vm-monitor if we don't want it to resize LFC
-            let pgconnstr = if disable_lfc_resizing.unwrap_or(false) {
-                None
-            } else {
-                Some(cli.filecache_connstr.clone())
-            };
-
-            let vm_monitor = if env::var_os("AUTOSCALING").is_some() {
-                let vm_monitor = tokio::spawn(vm_monitor::start(
-                    Box::leak(Box::new(vm_monitor::Args {
-                        cgroup: Some(cli.cgroup.clone()),
-                        pgconnstr,
-                        addr: cli.vm_monitor_addr.clone(),
-                    })),
-                    token.clone(),
-                ));
-                Some(vm_monitor)
-            } else {
-                None
-            };
-        }
-    }
-
-    Ok((
-        pg,
-        StartPostgresResult {
-            delay_exit,
-            compute,
-            #[cfg(target_os = "linux")]
-            token,
-            #[cfg(target_os = "linux")]
-            vm_monitor,
-        },
-    ))
-}
-
-type PostgresHandle = (std::process::Child, tokio::task::JoinHandle<Result<()>>);
-
-struct StartPostgresResult {
-    delay_exit: bool,
-    // passed through from WaitSpecResult
-    compute: Arc<ComputeNode>,
-
-    #[cfg(target_os = "linux")]
-    token: tokio_util::sync::CancellationToken,
-    #[cfg(target_os = "linux")]
-    vm_monitor: Option<tokio::task::JoinHandle<Result<()>>>,
-}
-
-fn wait_postgres(pg: Option<PostgresHandle>) -> Result<WaitPostgresResult> {
-    // Wait for the child Postgres process forever. In this state Ctrl+C will
-    // propagate to Postgres and it will be shut down as well.
-    let mut exit_code = None;
-    if let Some((mut pg, logs_handle)) = pg {
-        info!(postmaster_pid = %pg.id(), "Waiting for Postgres to exit");
-
-        let ecode = pg
-            .wait()
-            .expect("failed to start waiting on Postgres process");
-        PG_PID.store(0, Ordering::SeqCst);
-
-        // Process has exited. Wait for the log collecting task to finish.
-        let _ = tokio::runtime::Handle::current()
-            .block_on(logs_handle)
-            .map_err(|e| tracing::error!("log task panicked: {:?}", e));
-
-        info!("Postgres exited with code {}, shutting down", ecode);
-        exit_code = ecode.code()
-    }
-
-    Ok(WaitPostgresResult { exit_code })
-}
-
-struct WaitPostgresResult {
-    exit_code: Option<i32>,
-}
-
-fn cleanup_after_postgres_exit(
-    StartPostgresResult {
-        mut delay_exit,
-        compute,
-        #[cfg(target_os = "linux")]
-        vm_monitor,
-        #[cfg(target_os = "linux")]
-        token,
-    }: StartPostgresResult,
-) -> Result<bool> {
-    // Terminate the vm_monitor so it releases the file watcher on
-    // /sys/fs/cgroup/neon-postgres.
-    // Note: the vm-monitor only runs on linux because it requires cgroups.
-    cfg_if::cfg_if! {
-        if #[cfg(target_os = "linux")] {
-            if let Some(handle) = vm_monitor {
-                // Kills all threads spawned by the monitor
-                token.cancel();
-                // Kills the actual task running the monitor
-                handle.abort();
-            }
-        }
-    }
-
-    // Maybe sync safekeepers again, to speed up next startup
-    let compute_state = compute.state.lock().unwrap().clone();
-    let pspec = compute_state.pspec.as_ref().expect("spec must be set");
-    if matches!(pspec.spec.mode, compute_api::spec::ComputeMode::Primary) {
-        info!("syncing safekeepers on shutdown");
-        let storage_auth_token = pspec.storage_auth_token.clone();
-        let lsn = compute.sync_safekeepers(storage_auth_token)?;
-        info!("synced safekeepers at lsn {lsn}");
-    }
-
-    let mut state = compute.state.lock().unwrap();
-    if state.status == ComputeStatus::TerminationPending {
-        state.status = ComputeStatus::Terminated;
-        compute.state_changed.notify_all();
-        // we were asked to terminate gracefully, don't exit to avoid restart
-        delay_exit = true
-    }
-    drop(state);
-
-    if let Err(err) = compute.check_for_core_dumps() {
-        error!("error while checking for core dumps: {err:?}");
-    }
-
-    Ok(delay_exit)
-}
-
-fn maybe_delay_exit(delay_exit: bool) {
-    // If launch failed, keep serving HTTP requests for a while, so the cloud
-    // control plane can get the actual error.
-    if delay_exit {
-        info!("giving control plane 30s to collect the error before shutdown");
-        thread::sleep(Duration::from_secs(30));
-    }
-}
-
-fn deinit_and_exit(WaitPostgresResult { exit_code }: WaitPostgresResult) -> ! {
+fn deinit_and_exit(exit_code: Option<i32>) -> ! {
    // Shutdown trace pipeline gracefully, so that it has a chance to send any
    // pending traces before we exit. Shutting down OTEL tracing provider may
    // hang for quite some time, see, for example:
--- a/compute_tools/src/bin/fast_import.rs
+++ b/compute_tools/src/bin/fast_import.rs
@@ -25,13 +25,13 @@
 //! docker push localhost:3030/localregistry/compute-node-v14:latest
 //! ```

-use anyhow::{bail, Context};
+use anyhow::{Context, bail};
 use aws_config::BehaviorVersion;
 use camino::{Utf8Path, Utf8PathBuf};
 use clap::{Parser, Subcommand};
-use compute_tools::extension_server::{get_pg_version, PostgresMajorVersion};
+use compute_tools::extension_server::{PostgresMajorVersion, get_pg_version};
 use nix::unistd::Pid;
-use tracing::{error, info, info_span, warn, Instrument};
+use tracing::{Instrument, error, info, info_span, warn};
 use utils::fs_ext::is_directory_empty;

 #[path = "fast_import/aws_s3_sync.rs"]
@@ -558,7 +558,9 @@ async fn cmd_dumprestore(
                    decode_connstring(kms_client.as_ref().unwrap(), &key_id, dest_ciphertext)
                        .await?
                } else {
-                    bail!("destination connection string must be provided in spec for dump_restore command");
+                    bail!(
+                        "destination connection string must be provided in spec for dump_restore command"
+                    );
                };

                (source, dest)
--- a/compute_tools/src/bin/fast_import/aws_s3_sync.rs
+++ b/compute_tools/src/bin/fast_import/aws_s3_sync.rs
@@ -1,11 +1,10 @@
 use camino::{Utf8Path, Utf8PathBuf};
 use tokio::task::JoinSet;
+use tracing::{info, warn};
 use walkdir::WalkDir;

 use super::s3_uri::S3Uri;

-use tracing::{info, warn};
-
 const MAX_PARALLEL_UPLOADS: usize = 10;

 /// Upload all files from 'local' to 'remote'
--- a/compute_tools/src/bin/fast_import/s3_uri.rs
+++ b/compute_tools/src/bin/fast_import/s3_uri.rs
@@ -1,6 +1,7 @@
-use anyhow::Result;
 use std::str::FromStr;

+use anyhow::Result;
+
 /// Struct to hold parsed S3 components
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct S3Uri {
--- a/compute_tools/src/catalog.rs
+++ b/compute_tools/src/catalog.rs
@@ -1,18 +1,20 @@
+use std::path::Path;
+use std::process::Stdio;
+use std::result::Result;
+use std::sync::Arc;
+
+use compute_api::responses::CatalogObjects;
 use futures::Stream;
 use postgres::NoTls;
-use std::{path::Path, process::Stdio, result::Result, sync::Arc};
-use tokio::{
-    io::{AsyncBufReadExt, BufReader},
-    process::Command,
-    spawn,
-};
+use tokio::io::{AsyncBufReadExt, BufReader};
+use tokio::process::Command;
+use tokio::spawn;
 use tokio_stream::{self as stream, StreamExt};
 use tokio_util::codec::{BytesCodec, FramedRead};
 use tracing::warn;

 use crate::compute::ComputeNode;
 use crate::pg_helpers::{get_existing_dbs_async, get_existing_roles_async, postgres_conf_for_db};
-use compute_api::responses::CatalogObjects;

 pub async fn get_dbs_and_roles(compute: &Arc<ComputeNode>) -> anyhow::Result<CatalogObjects> {
    let conf = compute.get_tokio_conn_conf(Some("compute_ctl:get_dbs_and_roles"));
@@ -55,15 +57,15 @@ pub enum SchemaDumpError {
 pub async fn get_database_schema(
    compute: &Arc<ComputeNode>,
    dbname: &str,
-) -> Result<impl Stream<Item = Result<bytes::Bytes, std::io::Error>>, SchemaDumpError> {
-    let pgbin = &compute.pgbin;
+) -> Result<impl Stream<Item = Result<bytes::Bytes, std::io::Error>> + use<>, SchemaDumpError> {
+    let pgbin = &compute.params.pgbin;
    let basepath = Path::new(pgbin).parent().unwrap();
    let pgdump = basepath.join("pg_dump");

    // Replace the DB in the connection string and disable it to parts.
    // This is the only option to handle DBs with special characters.
-    let conf =
-        postgres_conf_for_db(&compute.connstr, dbname).map_err(|_| SchemaDumpError::Unexpected)?;
+    let conf = postgres_conf_for_db(&compute.params.connstr, dbname)
+        .map_err(|_| SchemaDumpError::Unexpected)?;
    let host = conf
        .get_hosts()
        .first()
--- a/compute_tools/src/checker.rs
+++ b/compute_tools/src/checker.rs
@@ -1,4 +1,4 @@
-use anyhow::{anyhow, Ok, Result};
+use anyhow::{Ok, Result, anyhow};
 use tokio_postgres::NoTls;
 use tracing::{error, instrument, warn};

--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -1,13 +1,16 @@
+use anyhow::Result;
+use std::fmt::Write as FmtWrite;
 use std::fs::{File, OpenOptions};
 use std::io;
+use std::io::Write;
 use std::io::prelude::*;
 use std::path::Path;

-use anyhow::Result;
+use compute_api::spec::{ComputeAudit, ComputeMode, ComputeSpec, GenericOption};

-use crate::pg_helpers::escape_conf_value;
-use crate::pg_helpers::{GenericOptionExt, PgOptionsSerialize};
-use compute_api::spec::{ComputeMode, ComputeSpec, GenericOption};
+use crate::pg_helpers::{
+    GenericOptionExt, GenericOptionsSearch, PgOptionsSerialize, escape_conf_value,
+};

 /// Check that `line` is inside a text file and put it there if it is not.
 /// Create file if it doesn't exist.
@@ -56,10 +59,20 @@ pub fn write_postgres_conf(
        writeln!(file, "neon.stripe_size={stripe_size}")?;
    }
    if !spec.safekeeper_connstrings.is_empty() {
+        let mut neon_safekeepers_value = String::new();
+        tracing::info!(
+            "safekeepers_connstrings is not zero, gen: {:?}",
+            spec.safekeepers_generation
+        );
+        // If generation is given, prepend sk list with g#number:
+        if let Some(generation) = spec.safekeepers_generation {
+            write!(neon_safekeepers_value, "g#{}:", generation)?;
+        }
+        neon_safekeepers_value.push_str(&spec.safekeeper_connstrings.join(","));
        writeln!(
            file,
            "neon.safekeepers={}",
-            escape_conf_value(&spec.safekeeper_connstrings.join(","))
+            escape_conf_value(&neon_safekeepers_value)
        )?;
    }
    if let Some(s) = &spec.tenant_id {
@@ -127,6 +140,54 @@ pub fn write_postgres_conf(
        writeln!(file, "# Managed by compute_ctl: end")?;
    }

+    // If audit logging is enabled, configure pgaudit.
+    //
+    // Note, that this is called after the settings from spec are written.
+    // This way we always override the settings from the spec
+    // and don't allow the user or the control plane admin to change them.
+    if let ComputeAudit::Hipaa = spec.audit_log_level {
+        writeln!(file, "# Managed by compute_ctl audit settings: begin")?;
+        // This log level is very verbose
+        // but this is necessary for HIPAA compliance.
+        writeln!(file, "pgaudit.log='all'")?;
+        writeln!(file, "pgaudit.log_parameter=on")?;
+        // Disable logging of catalog queries
+        // The catalog doesn't contain sensitive data, so we don't need to audit it.
+        writeln!(file, "pgaudit.log_catalog=off")?;
+        // Set log rotation to 5 minutes
+        // TODO: tune this after performance testing
+        writeln!(file, "pgaudit.log_rotation_age=5")?;
+
+        // Add audit shared_preload_libraries, if they are not present.
+        //
+        // The caller who sets the flag is responsible for ensuring that the necessary
+        // shared_preload_libraries are present in the compute image,
+        // otherwise the compute start will fail.
+        if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") {
+            let mut extra_shared_preload_libraries = String::new();
+            if !libs.contains("pgaudit") {
+                extra_shared_preload_libraries.push_str(",pgaudit");
+            }
+            if !libs.contains("pgauditlogtofile") {
+                extra_shared_preload_libraries.push_str(",pgauditlogtofile");
+            }
+            writeln!(
+                file,
+                "shared_preload_libraries='{}{}'",
+                libs, extra_shared_preload_libraries
+            )?;
+        } else {
+            // Typically, this should be unreacheable,
+            // because we always set at least some shared_preload_libraries in the spec
+            // but let's handle it explicitly anyway.
+            writeln!(
+                file,
+                "shared_preload_libraries='neon,pgaudit,pgauditlogtofile'"
+            )?;
+        }
+        writeln!(file, "# Managed by compute_ctl audit settings: end")?;
+    }
+
    writeln!(file, "neon.extension_server_port={}", extension_server_port)?;

    if spec.drop_subscriptions_before_start {
--- a/compute_tools/src/config_template/compute_audit_rsyslog_template.conf
+++ b/compute_tools/src/config_template/compute_audit_rsyslog_template.conf
@@ -0,0 +1,10 @@
+# Load imfile module to read log files
+module(load="imfile")
+
+# Input configuration for log files in the specified directory
+# Replace {log_directory} with the directory containing the log files
+input(type="imfile" File="{log_directory}/*.log" Tag="{tag}" Severity="info" Facility="local0")
+global(workDirectory="/var/log")
+
+# Forward logs to remote syslog server
+*.* @@{remote_endpoint}
--- a/compute_tools/src/configurator.rs
+++ b/compute_tools/src/configurator.rs
@@ -1,9 +1,8 @@
 use std::sync::Arc;
 use std::thread;

-use tracing::{error, info, instrument};
-
 use compute_api::responses::ComputeStatus;
+use tracing::{error, info, instrument};

 use crate::compute::ComputeNode;

--- a/compute_tools/src/disk_quota.rs
+++ b/compute_tools/src/disk_quota.rs
@@ -1,9 +1,11 @@
 use anyhow::Context;
+use tracing::instrument;

 pub const DISK_QUOTA_BIN: &str = "/neonvm/bin/set-disk-quota";

 /// If size_bytes is 0, it disables the quota. Otherwise, it sets filesystem quota to size_bytes.
 /// `fs_mountpoint` should point to the mountpoint of the filesystem where the quota should be set.
+#[instrument]
 pub fn set_disk_quota(size_bytes: u64, fs_mountpoint: &str) -> anyhow::Result<()> {
    let size_kb = size_bytes / 1024;
    // run `/neonvm/bin/set-disk-quota {size_kb} {mountpoint}`
--- a/compute_tools/src/extension_server.rs
+++ b/compute_tools/src/extension_server.rs
@@ -71,15 +71,15 @@ More specifically, here is an example ext_index.json
    }
 }
 */
-use anyhow::Result;
-use anyhow::{bail, Context};
+use std::path::Path;
+use std::str;
+
+use anyhow::{Context, Result, bail};
 use bytes::Bytes;
 use compute_api::spec::RemoteExtSpec;
 use regex::Regex;
 use remote_storage::*;
 use reqwest::StatusCode;
-use std::path::Path;
-use std::str;
 use tar::Archive;
 use tracing::info;
 use tracing::log::warn;
@@ -202,8 +202,24 @@ pub async fn download_extension(
    // move contents of the libdir / sharedir in unzipped archive to the correct local paths
    for paths in [sharedir_paths, libdir_paths] {
        let (zip_dir, real_dir) = paths;
+
+        let dir = match std::fs::read_dir(&zip_dir) {
+            Ok(dir) => dir,
+            Err(e) => match e.kind() {
+                // In the event of a SQL-only extension, there would be nothing
+                // to move from the lib/ directory, so note that in the log and
+                // move on.
+                std::io::ErrorKind::NotFound => {
+                    info!("nothing to move from {}", zip_dir);
+                    continue;
+                }
+                _ => return Err(anyhow::anyhow!(e)),
+            },
+        };
+
        info!("mv {zip_dir:?}/*  {real_dir:?}");
-        for file in std::fs::read_dir(zip_dir)? {
+
+        for file in dir {
            let old_file = file?.path();
            let new_file =
                Path::new(&real_dir).join(old_file.file_name().context("error parsing file")?);
@@ -244,33 +260,40 @@ pub fn create_control_files(remote_extensions: &RemoteExtSpec, pgbin: &str) {
                info!("writing file {:?}{:?}", control_path, control_content);
                std::fs::write(control_path, control_content).unwrap();
            } else {
-                warn!("control file {:?} exists both locally and remotely. ignoring the remote version.", control_path);
+                warn!(
+                    "control file {:?} exists both locally and remotely. ignoring the remote version.",
+                    control_path
+                );
            }
        }
    }
 }

-// Do request to extension storage proxy, i.e.
+// Do request to extension storage proxy, e.g.,
 // curl http://pg-ext-s3-gateway/latest/v15/extensions/anon.tar.zst
-// using HHTP GET
-// and return the response body as bytes
-//
+// using HTTP GET and return the response body as bytes.
 async fn download_extension_tar(ext_remote_storage: &str, ext_path: &str) -> Result<Bytes> {
    let uri = format!("{}/{}", ext_remote_storage, ext_path);
+    let filename = Path::new(ext_path)
+        .file_name()
+        .unwrap_or_else(|| std::ffi::OsStr::new("unknown"))
+        .to_str()
+        .unwrap_or("unknown")
+        .to_string();

-    info!("Download extension {} from uri {}", ext_path, uri);
+    info!("Downloading extension file '{}' from uri {}", filename, uri);

    match do_extension_server_request(&uri).await {
        Ok(resp) => {
            info!("Successfully downloaded remote extension data {}", ext_path);
            REMOTE_EXT_REQUESTS_TOTAL
-                .with_label_values(&[&StatusCode::OK.to_string()])
+                .with_label_values(&[&StatusCode::OK.to_string(), &filename])
                .inc();
            Ok(resp)
        }
        Err((msg, status)) => {
            REMOTE_EXT_REQUESTS_TOTAL
-                .with_label_values(&[&status])
+                .with_label_values(&[&status, &filename])
                .inc();
            bail!(msg);
        }
--- a/compute_tools/src/http/extract/json.rs
+++ b/compute_tools/src/http/extract/json.rs
@@ -1,6 +1,7 @@
 use std::ops::{Deref, DerefMut};

-use axum::extract::{rejection::JsonRejection, FromRequest, Request};
+use axum::extract::rejection::JsonRejection;
+use axum::extract::{FromRequest, Request};
 use compute_api::responses::GenericAPIError;
 use http::StatusCode;

--- a/compute_tools/src/http/extract/mod.rs
+++ b/compute_tools/src/http/extract/mod.rs
@@ -1,7 +1,9 @@
 pub(crate) mod json;
 pub(crate) mod path;
 pub(crate) mod query;
+pub(crate) mod request_id;

 pub(crate) use json::Json;
 pub(crate) use path::Path;
 pub(crate) use query::Query;
+pub(crate) use request_id::RequestId;
--- a/compute_tools/src/http/extract/path.rs
+++ b/compute_tools/src/http/extract/path.rs
@@ -1,8 +1,10 @@
 use std::ops::{Deref, DerefMut};

-use axum::extract::{rejection::PathRejection, FromRequestParts};
+use axum::extract::FromRequestParts;
+use axum::extract::rejection::PathRejection;
 use compute_api::responses::GenericAPIError;
-use http::{request::Parts, StatusCode};
+use http::StatusCode;
+use http::request::Parts;

 /// Custom `Path` extractor, so that we can format errors into
 /// `JsonResponse<GenericAPIError>`.
--- a/compute_tools/src/http/extract/query.rs
+++ b/compute_tools/src/http/extract/query.rs
@@ -1,8 +1,10 @@
 use std::ops::{Deref, DerefMut};

-use axum::extract::{rejection::QueryRejection, FromRequestParts};
+use axum::extract::FromRequestParts;
+use axum::extract::rejection::QueryRejection;
 use compute_api::responses::GenericAPIError;
-use http::{request::Parts, StatusCode};
+use http::StatusCode;
+use http::request::Parts;

 /// Custom `Query` extractor, so that we can format errors into
 /// `JsonResponse<GenericAPIError>`.
--- a/compute_tools/src/http/extract/request_id.rs
+++ b/compute_tools/src/http/extract/request_id.rs
@@ -0,0 +1,86 @@
+use std::{
+    fmt::Display,
+    ops::{Deref, DerefMut},
+};
+
+use axum::{extract::FromRequestParts, response::IntoResponse};
+use http::{StatusCode, request::Parts};
+
+use crate::http::{JsonResponse, headers::X_REQUEST_ID};
+
+/// Extract the request ID from the `X-Request-Id` header.
+#[derive(Debug, Clone, Default)]
+pub(crate) struct RequestId(pub String);
+
+#[derive(Debug)]
+/// Rejection used for [`RequestId`].
+///
+/// Contains one variant for each way the [`RequestId`] extractor can
+/// fail.
+pub(crate) enum RequestIdRejection {
+    /// The request is missing the header.
+    MissingRequestId,
+
+    /// The value of the header is invalid UTF-8.
+    InvalidUtf8,
+}
+
+impl RequestIdRejection {
+    pub fn status(&self) -> StatusCode {
+        match self {
+            RequestIdRejection::MissingRequestId => StatusCode::INTERNAL_SERVER_ERROR,
+            RequestIdRejection::InvalidUtf8 => StatusCode::BAD_REQUEST,
+        }
+    }
+
+    pub fn message(&self) -> String {
+        match self {
+            RequestIdRejection::MissingRequestId => "request ID is missing",
+            RequestIdRejection::InvalidUtf8 => "request ID is invalid UTF-8",
+        }
+        .to_string()
+    }
+}
+
+impl IntoResponse for RequestIdRejection {
+    fn into_response(self) -> axum::response::Response {
+        JsonResponse::error(self.status(), self.message())
+    }
+}
+
+impl<S> FromRequestParts<S> for RequestId
+where
+    S: Send + Sync,
+{
+    type Rejection = RequestIdRejection;
+
+    async fn from_request_parts(parts: &mut Parts, _state: &S) -> Result<Self, Self::Rejection> {
+        match parts.headers.get(X_REQUEST_ID) {
+            Some(value) => match value.to_str() {
+                Ok(request_id) => Ok(Self(request_id.to_string())),
+                Err(_) => Err(RequestIdRejection::InvalidUtf8),
+            },
+            None => Err(RequestIdRejection::MissingRequestId),
+        }
+    }
+}
+
+impl Deref for RequestId {
+    type Target = String;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl DerefMut for RequestId {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.0
+    }
+}
+
+impl Display for RequestId {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str(&self.0)
+    }
+}
--- a/compute_tools/src/http/headers.rs
+++ b/compute_tools/src/http/headers.rs
@@ -0,0 +1,2 @@
+/// Constant for `X-Request-Id` header.
+pub const X_REQUEST_ID: &str = "x-request-id";
--- a/compute_tools/src/http/middleware/authorize.rs
+++ b/compute_tools/src/http/middleware/authorize.rs
@@ -0,0 +1,145 @@
+use std::{collections::HashSet, net::SocketAddr};
+
+use anyhow::{Result, anyhow};
+use axum::{RequestExt, body::Body, extract::ConnectInfo};
+use axum_extra::{
+    TypedHeader,
+    headers::{Authorization, authorization::Bearer},
+};
+use futures::future::BoxFuture;
+use http::{Request, Response, StatusCode};
+use jsonwebtoken::{Algorithm, DecodingKey, TokenData, Validation, jwk::JwkSet};
+use serde::Deserialize;
+use tower_http::auth::AsyncAuthorizeRequest;
+use tracing::warn;
+
+use crate::http::{JsonResponse, extract::RequestId};
+
+#[derive(Clone, Debug, Deserialize)]
+pub(in crate::http) struct Claims {
+    compute_id: String,
+}
+
+#[derive(Clone, Debug)]
+pub(in crate::http) struct Authorize {
+    compute_id: String,
+    jwks: JwkSet,
+    validation: Validation,
+}
+
+impl Authorize {
+    pub fn new(compute_id: String, jwks: JwkSet) -> Self {
+        let mut validation = Validation::new(Algorithm::EdDSA);
+        // Nothing is currently required
+        validation.required_spec_claims = HashSet::new();
+        validation.validate_exp = true;
+        // Unused by the control plane
+        validation.validate_aud = false;
+        // Unused by the control plane
+        validation.validate_nbf = false;
+
+        Self {
+            compute_id,
+            jwks,
+            validation,
+        }
+    }
+}
+
+impl AsyncAuthorizeRequest<Body> for Authorize {
+    type RequestBody = Body;
+    type ResponseBody = Body;
+    type Future = BoxFuture<'static, Result<Request<Body>, Response<Self::ResponseBody>>>;
+
+    fn authorize(&mut self, mut request: Request<Body>) -> Self::Future {
+        let compute_id = self.compute_id.clone();
+        let jwks = self.jwks.clone();
+        let validation = self.validation.clone();
+
+        Box::pin(async move {
+            let request_id = request.extract_parts::<RequestId>().await.unwrap();
+
+            // TODO: Remove this check after a successful rollout
+            if jwks.keys.is_empty() {
+                warn!(%request_id, "Authorization has not been configured");
+
+                return Ok(request);
+            }
+
+            let connect_info = request
+                .extract_parts::<ConnectInfo<SocketAddr>>()
+                .await
+                .unwrap();
+
+            // In the event the request is coming from the loopback interface,
+            // allow all requests
+            if connect_info.ip().is_loopback() {
+                warn!(%request_id, "Bypassed authorization because request is coming from the loopback interface");
+
+                return Ok(request);
+            }
+
+            let TypedHeader(Authorization(bearer)) = request
+                .extract_parts::<TypedHeader<Authorization<Bearer>>>()
+                .await
+                .map_err(|_| {
+                    JsonResponse::error(StatusCode::BAD_REQUEST, "invalid authorization token")
+                })?;
+
+            let data = match Self::verify(&jwks, bearer.token(), &validation) {
+                Ok(claims) => claims,
+                Err(e) => return Err(JsonResponse::error(StatusCode::UNAUTHORIZED, e)),
+            };
+
+            if data.claims.compute_id != compute_id {
+                return Err(JsonResponse::error(
+                    StatusCode::UNAUTHORIZED,
+                    "invalid claims in authorization token",
+                ));
+            }
+
+            // Make claims available to any subsequent middleware or request
+            // handlers
+            request.extensions_mut().insert(data.claims);
+
+            Ok(request)
+        })
+    }
+}
+
+impl Authorize {
+    /// Verify the token using the JSON Web Key set and return the token data.
+    fn verify(jwks: &JwkSet, token: &str, validation: &Validation) -> Result<TokenData<Claims>> {
+        debug_assert!(!jwks.keys.is_empty());
+
+        for jwk in jwks.keys.iter() {
+            let decoding_key = match DecodingKey::from_jwk(jwk) {
+                Ok(key) => key,
+                Err(e) => {
+                    warn!(
+                        "Failed to construct decoding key from {}: {}",
+                        jwk.common.key_id.as_ref().unwrap(),
+                        e
+                    );
+
+                    continue;
+                }
+            };
+
+            match jsonwebtoken::decode::<Claims>(token, &decoding_key, validation) {
+                Ok(data) => return Ok(data),
+                Err(e) => {
+                    warn!(
+                        "Failed to decode authorization token using {}: {}",
+                        jwk.common.key_id.as_ref().unwrap(),
+                        e
+                    );
+
+                    continue;
+                }
+            }
+        }
+
+        Err(anyhow!("Failed to verify authorization token"))
+    }
+}
--- a/compute_tools/src/http/middleware/mod.rs
+++ b/compute_tools/src/http/middleware/mod.rs
@@ -0,0 +1 @@
+pub(in crate::http) mod authorize;
--- a/compute_tools/src/http/mod.rs
+++ b/compute_tools/src/http/mod.rs
@@ -1,10 +1,14 @@
-use axum::{body::Body, response::Response};
+use axum::body::Body;
+use axum::response::Response;
 use compute_api::responses::{ComputeStatus, GenericAPIError};
-use http::{header::CONTENT_TYPE, StatusCode};
+use http::StatusCode;
+use http::header::CONTENT_TYPE;
 use serde::Serialize;
 use tracing::error;

 mod extract;
+mod headers;
+mod middleware;
 mod routes;
 pub mod server;

--- a/compute_tools/src/http/routes/check_writability.rs
+++ b/compute_tools/src/http/routes/check_writability.rs
@@ -1,10 +1,13 @@
 use std::sync::Arc;

-use axum::{extract::State, response::Response};
+use axum::extract::State;
+use axum::response::Response;
 use compute_api::responses::ComputeStatus;
 use http::StatusCode;

-use crate::{checker::check_writability, compute::ComputeNode, http::JsonResponse};
+use crate::checker::check_writability;
+use crate::compute::ComputeNode;
+use crate::http::JsonResponse;

 /// Check that the compute is currently running.
 pub(in crate::http) async fn is_writable(State(compute): State<Arc<ComputeNode>>) -> Response {
--- a/compute_tools/src/http/routes/configure.rs
+++ b/compute_tools/src/http/routes/configure.rs
@@ -1,18 +1,16 @@
 use std::sync::Arc;

-use axum::{extract::State, response::Response};
-use compute_api::{
-    requests::ConfigurationRequest,
-    responses::{ComputeStatus, ComputeStatusResponse},
-};
+use axum::extract::State;
+use axum::response::Response;
+use compute_api::requests::ConfigurationRequest;
+use compute_api::responses::{ComputeStatus, ComputeStatusResponse};
 use http::StatusCode;
 use tokio::task;
 use tracing::info;

-use crate::{
-    compute::{ComputeNode, ParsedSpec},
-    http::{extract::Json, JsonResponse},
-};
+use crate::compute::{ComputeNode, ParsedSpec};
+use crate::http::JsonResponse;
+use crate::http::extract::Json;

 // Accept spec in JSON format and request compute configuration. If anything
 // goes wrong after we set the compute status to `ConfigurationPending` and
@@ -24,7 +22,7 @@ pub(in crate::http) async fn configure(
    State(compute): State<Arc<ComputeNode>>,
    request: Json<ConfigurationRequest>,
 ) -> Response {
-    if !compute.live_config_allowed {
+    if !compute.params.live_config_allowed {
        return JsonResponse::error(
            StatusCode::PRECONDITION_FAILED,
            "live configuration is not allowed for this compute node".to_string(),
@@ -47,13 +45,18 @@ pub(in crate::http) async fn configure(
            return JsonResponse::invalid_status(state.status);
        }

+        // Pass the tracing span to the main thread that performs the startup,
+        // so that the start_compute operation is considered a child of this
+        // configure request for tracing purposes.
+        state.startup_span = Some(tracing::Span::current());
+
        state.pspec = Some(pspec);
        state.set_status(ComputeStatus::ConfigurationPending, &compute.state_changed);
        drop(state);
    }

    // Spawn a blocking thread to wait for compute to become Running. This is
-    // needed to do not block the main pool of workers and be able to serve
+    // needed to not block the main pool of workers and to be able to serve
    // other requests while some particular request is waiting for compute to
    // finish configuration.
    let c = compute.clone();
--- a/compute_tools/src/http/routes/database_schema.rs
+++ b/compute_tools/src/http/routes/database_schema.rs
@@ -1,14 +1,16 @@
 use std::sync::Arc;

-use axum::{body::Body, extract::State, response::Response};
-use http::{header::CONTENT_TYPE, StatusCode};
+use axum::body::Body;
+use axum::extract::State;
+use axum::response::Response;
+use http::StatusCode;
+use http::header::CONTENT_TYPE;
 use serde::Deserialize;

-use crate::{
-    catalog::{get_database_schema, SchemaDumpError},
-    compute::ComputeNode,
-    http::{extract::Query, JsonResponse},
-};
+use crate::catalog::{SchemaDumpError, get_database_schema};
+use crate::compute::ComputeNode;
+use crate::http::JsonResponse;
+use crate::http::extract::Query;

 #[derive(Debug, Clone, Deserialize)]
 pub(in crate::http) struct DatabaseSchemaParams {
--- a/compute_tools/src/http/routes/dbs_and_roles.rs
+++ b/compute_tools/src/http/routes/dbs_and_roles.rs
@@ -1,9 +1,12 @@
 use std::sync::Arc;

-use axum::{extract::State, response::Response};
+use axum::extract::State;
+use axum::response::Response;
 use http::StatusCode;

-use crate::{catalog::get_dbs_and_roles, compute::ComputeNode, http::JsonResponse};
+use crate::catalog::get_dbs_and_roles;
+use crate::compute::ComputeNode;
+use crate::http::JsonResponse;

 /// Get the databases and roles from the compute.
 pub(in crate::http) async fn get_catalog_objects(
--- a/compute_tools/src/http/routes/extension_server.rs
+++ b/compute_tools/src/http/routes/extension_server.rs
@@ -1,19 +1,13 @@
 use std::sync::Arc;

-use axum::{
-    extract::State,
-    response::{IntoResponse, Response},
-};
+use axum::extract::State;
+use axum::response::{IntoResponse, Response};
 use http::StatusCode;
 use serde::Deserialize;

-use crate::{
-    compute::ComputeNode,
-    http::{
-        extract::{Path, Query},
-        JsonResponse,
-    },
-};
+use crate::compute::ComputeNode;
+use crate::http::JsonResponse;
+use crate::http::extract::{Path, Query};

 #[derive(Debug, Clone, Deserialize)]
 pub(in crate::http) struct ExtensionServerParams {
@@ -24,11 +18,11 @@ pub(in crate::http) struct ExtensionServerParams {
 /// Download a remote extension.
 pub(in crate::http) async fn download_extension(
    Path(filename): Path<String>,
-    params: Query<ExtensionServerParams>,
+    ext_server_params: Query<ExtensionServerParams>,
    State(compute): State<Arc<ComputeNode>>,
 ) -> Response {
    // Don't even try to download extensions if no remote storage is configured
-    if compute.ext_remote_storage.is_none() {
+    if compute.params.ext_remote_storage.is_none() {
        return JsonResponse::error(
            StatusCode::PRECONDITION_FAILED,
            "remote storage is not configured",
@@ -52,9 +46,9 @@ pub(in crate::http) async fn download_extension(

        remote_extensions.get_ext(
            &filename,
-            params.is_library,
-            &compute.build_tag,
-            &compute.pgversion,
+            ext_server_params.is_library,
+            &compute.params.build_tag,
+            &compute.params.pgversion,
        )
    };

--- a/compute_tools/src/http/routes/extensions.rs
+++ b/compute_tools/src/http/routes/extensions.rs
@@ -1,16 +1,14 @@
 use std::sync::Arc;

-use axum::{extract::State, response::Response};
-use compute_api::{
-    requests::ExtensionInstallRequest,
-    responses::{ComputeStatus, ExtensionInstallResponse},
-};
+use axum::extract::State;
+use axum::response::Response;
+use compute_api::requests::ExtensionInstallRequest;
+use compute_api::responses::{ComputeStatus, ExtensionInstallResponse};
 use http::StatusCode;

-use crate::{
-    compute::ComputeNode,
-    http::{extract::Json, JsonResponse},
-};
+use crate::compute::ComputeNode;
+use crate::http::JsonResponse;
+use crate::http::extract::Json;

 /// Install a extension.
 pub(in crate::http) async fn install_extension(
--- a/compute_tools/src/http/routes/failpoints.rs
+++ b/compute_tools/src/http/routes/failpoints.rs
@@ -17,7 +17,8 @@ pub struct FailpointConfig {
    pub actions: String,
 }

-use crate::http::{extract::Json, JsonResponse};
+use crate::http::JsonResponse;
+use crate::http::extract::Json;

 /// Configure failpoints for testing purposes.
 pub(in crate::http) async fn configure_failpoints(
--- a/compute_tools/src/http/routes/grants.rs
+++ b/compute_tools/src/http/routes/grants.rs
@@ -1,16 +1,14 @@
 use std::sync::Arc;

-use axum::{extract::State, response::Response};
-use compute_api::{
-    requests::SetRoleGrantsRequest,
-    responses::{ComputeStatus, SetRoleGrantsResponse},
-};
+use axum::extract::State;
+use axum::response::Response;
+use compute_api::requests::SetRoleGrantsRequest;
+use compute_api::responses::{ComputeStatus, SetRoleGrantsResponse};
 use http::StatusCode;

-use crate::{
-    compute::ComputeNode,
-    http::{extract::Json, JsonResponse},
-};
+use crate::compute::ComputeNode;
+use crate::http::JsonResponse;
+use crate::http::extract::Json;

 /// Add grants for a role.
 pub(in crate::http) async fn add_grant(
--- a/compute_tools/src/http/routes/insights.rs
+++ b/compute_tools/src/http/routes/insights.rs
@@ -1,10 +1,12 @@
 use std::sync::Arc;

-use axum::{extract::State, response::Response};
+use axum::extract::State;
+use axum::response::Response;
 use compute_api::responses::ComputeStatus;
 use http::StatusCode;

-use crate::{compute::ComputeNode, http::JsonResponse};
+use crate::compute::ComputeNode;
+use crate::http::JsonResponse;

 /// Collect current Postgres usage insights.
 pub(in crate::http) async fn get_insights(State(compute): State<Arc<ComputeNode>>) -> Response {
--- a/compute_tools/src/http/routes/metrics.rs
+++ b/compute_tools/src/http/routes/metrics.rs
@@ -1,10 +1,12 @@
-use axum::{body::Body, response::Response};
-use http::header::CONTENT_TYPE;
+use axum::body::Body;
+use axum::response::Response;
 use http::StatusCode;
+use http::header::CONTENT_TYPE;
 use metrics::proto::MetricFamily;
 use metrics::{Encoder, TextEncoder};

-use crate::{http::JsonResponse, metrics::collect};
+use crate::http::JsonResponse;
+use crate::metrics::collect;

 /// Expose Prometheus metrics.
 pub(in crate::http) async fn get_metrics() -> Response {
--- a/compute_tools/src/http/routes/metrics_json.rs
+++ b/compute_tools/src/http/routes/metrics_json.rs
@@ -1,9 +1,11 @@
 use std::sync::Arc;

-use axum::{extract::State, response::Response};
+use axum::extract::State;
+use axum::response::Response;
 use http::StatusCode;

-use crate::{compute::ComputeNode, http::JsonResponse};
+use crate::compute::ComputeNode;
+use crate::http::JsonResponse;

 /// Get startup metrics.
 pub(in crate::http) async fn get_metrics(State(compute): State<Arc<ComputeNode>>) -> Response {
--- a/compute_tools/src/http/routes/status.rs
+++ b/compute_tools/src/http/routes/status.rs
@@ -1,9 +1,13 @@
-use std::{ops::Deref, sync::Arc};
+use std::ops::Deref;
+use std::sync::Arc;

-use axum::{extract::State, http::StatusCode, response::Response};
+use axum::extract::State;
+use axum::http::StatusCode;
+use axum::response::Response;
 use compute_api::responses::ComputeStatusResponse;

-use crate::{compute::ComputeNode, http::JsonResponse};
+use crate::compute::ComputeNode;
+use crate::http::JsonResponse;

 /// Retrieve the state of the comute.
 pub(in crate::http) async fn get_status(State(compute): State<Arc<ComputeNode>>) -> Response {
--- a/compute_tools/src/http/routes/terminate.rs
+++ b/compute_tools/src/http/routes/terminate.rs
@@ -1,18 +1,14 @@
 use std::sync::Arc;

-use axum::{
-    extract::State,
-    response::{IntoResponse, Response},
-};
+use axum::extract::State;
+use axum::response::{IntoResponse, Response};
 use compute_api::responses::ComputeStatus;
 use http::StatusCode;
 use tokio::task;
 use tracing::info;

-use crate::{
-    compute::{forward_termination_signal, ComputeNode},
-    http::JsonResponse,
-};
+use crate::compute::{ComputeNode, forward_termination_signal};
+use crate::http::JsonResponse;

 /// Terminate the compute.
 pub(in crate::http) async fn terminate(State(compute): State<Arc<ComputeNode>>) -> Response {
--- a/compute_tools/src/http/server.rs
+++ b/compute_tools/src/http/server.rs
@@ -1,60 +1,67 @@
-use std::{
-    fmt::Display,
-    net::{IpAddr, Ipv6Addr, SocketAddr},
-    sync::Arc,
-    time::Duration,
-};
+use std::fmt::Display;
+use std::net::{IpAddr, Ipv6Addr, SocketAddr};
+use std::sync::Arc;
+use std::time::Duration;

 use anyhow::Result;
-use axum::{
-    extract::Request,
-    middleware::{self, Next},
-    response::{IntoResponse, Response},
-    routing::{get, post},
-    Router,
-};
+use axum::Router;
+use axum::extract::Request;
+use axum::middleware::{self, Next};
+use axum::response::{IntoResponse, Response};
+use axum::routing::{get, post};
 use http::StatusCode;
+use jsonwebtoken::jwk::JwkSet;
 use tokio::net::TcpListener;
 use tower::ServiceBuilder;
-use tower_http::{request_id::PropagateRequestIdLayer, trace::TraceLayer};
-use tracing::{debug, error, info, Span};
+use tower_http::{
+    auth::AsyncRequireAuthorizationLayer, request_id::PropagateRequestIdLayer, trace::TraceLayer,
+};
+use tracing::{Span, error, info};
 use uuid::Uuid;

-use super::routes::{
-    check_writability, configure, database_schema, dbs_and_roles, extension_server, extensions,
-    grants, insights, metrics, metrics_json, status, terminate,
+use super::{
+    headers::X_REQUEST_ID,
+    middleware::authorize::Authorize,
+    routes::{
+        check_writability, configure, database_schema, dbs_and_roles, extension_server, extensions,
+        grants, insights, metrics, metrics_json, status, terminate,
+    },
 };
 use crate::compute::ComputeNode;

-const X_REQUEST_ID: &str = "x-request-id";
-
 /// `compute_ctl` has two servers: internal and external. The internal server
 /// binds to the loopback interface and handles communication from clients on
 /// the compute. The external server is what receives communication from the
 /// control plane, the metrics scraper, etc. We make the distinction because
 /// certain routes in `compute_ctl` only need to be exposed to local processes
 /// like Postgres via the neon extension and local_proxy.
-#[derive(Clone, Copy, Debug)]
+#[derive(Clone, Debug)]
 pub enum Server {
-    Internal(u16),
-    External(u16),
+    Internal {
+        port: u16,
+    },
+    External {
+        port: u16,
+        jwks: JwkSet,
+        compute_id: String,
+    },
 }

 impl Display for Server {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
-            Server::Internal(_) => f.write_str("internal"),
-            Server::External(_) => f.write_str("external"),
+            Server::Internal { .. } => f.write_str("internal"),
+            Server::External { .. } => f.write_str("external"),
        }
    }
 }

-impl From<Server> for Router<Arc<ComputeNode>> {
-    fn from(server: Server) -> Self {
+impl From<&Server> for Router<Arc<ComputeNode>> {
+    fn from(server: &Server) -> Self {
        let mut router = Router::<Arc<ComputeNode>>::new();

        router = match server {
-            Server::Internal(_) => {
+            Server::Internal { .. } => {
                router = router
                    .route(
                        "/extension_server/{*filename}",
@@ -72,58 +79,71 @@ impl From<Server> for Router<Arc<ComputeNode>> {

                router
            }
-            Server::External(_) => router
-                .route("/check_writability", post(check_writability::is_writable))
-                .route("/configure", post(configure::configure))
-                .route("/database_schema", get(database_schema::get_schema_dump))
-                .route("/dbs_and_roles", get(dbs_and_roles::get_catalog_objects))
-                .route("/insights", get(insights::get_insights))
-                .route("/metrics", get(metrics::get_metrics))
-                .route("/metrics.json", get(metrics_json::get_metrics))
-                .route("/status", get(status::get_status))
-                .route("/terminate", post(terminate::terminate)),
+            Server::External {
+                jwks, compute_id, ..
+            } => {
+                let unauthenticated_router =
+                    Router::<Arc<ComputeNode>>::new().route("/metrics", get(metrics::get_metrics));
+
+                let authenticated_router = Router::<Arc<ComputeNode>>::new()
+                    .route("/check_writability", post(check_writability::is_writable))
+                    .route("/configure", post(configure::configure))
+                    .route("/database_schema", get(database_schema::get_schema_dump))
+                    .route("/dbs_and_roles", get(dbs_and_roles::get_catalog_objects))
+                    .route("/insights", get(insights::get_insights))
+                    .route("/metrics.json", get(metrics_json::get_metrics))
+                    .route("/status", get(status::get_status))
+                    .route("/terminate", post(terminate::terminate))
+                    .layer(AsyncRequireAuthorizationLayer::new(Authorize::new(
+                        compute_id.clone(),
+                        jwks.clone(),
+                    )));
+
+                router
+                    .merge(unauthenticated_router)
+                    .merge(authenticated_router)
+            }
        };

-        router.fallback(Server::handle_404).method_not_allowed_fallback(Server::handle_405).layer(
-            ServiceBuilder::new()
-                // Add this middleware since we assume the request ID exists
-                .layer(middleware::from_fn(maybe_add_request_id_header))
-                .layer(
-                    TraceLayer::new_for_http()
-                        .on_request(|request: &http::Request<_>, _span: &Span| {
-                            let request_id = request
-                                .headers()
-                                .get(X_REQUEST_ID)
-                                .unwrap()
-                                .to_str()
-                                .unwrap();
-
-                            match request.uri().path() {
-                                "/metrics" => {
-                                    debug!(%request_id, "{} {}", request.method(), request.uri())
-                                }
-                                _ => info!(%request_id, "{} {}", request.method(), request.uri()),
-                            };
-                        })
-                        .on_response(
-                            |response: &http::Response<_>, latency: Duration, _span: &Span| {
-                                let request_id = response
+        router
+            .fallback(Server::handle_404)
+            .method_not_allowed_fallback(Server::handle_405)
+            .layer(
+                ServiceBuilder::new()
+                    .layer(tower_otel::trace::HttpLayer::server(tracing::Level::INFO))
+                    // Add this middleware since we assume the request ID exists
+                    .layer(middleware::from_fn(maybe_add_request_id_header))
+                    .layer(
+                        TraceLayer::new_for_http()
+                            .on_request(|request: &http::Request<_>, _span: &Span| {
+                                let request_id = request
                                    .headers()
                                    .get(X_REQUEST_ID)
                                    .unwrap()
                                    .to_str()
                                    .unwrap();

-                                info!(
-                                    %request_id,
-                                    code = response.status().as_u16(),
-                                    latency = latency.as_millis()
-                                )
-                            },
-                        ),
-                )
-                .layer(PropagateRequestIdLayer::x_request_id()),
-        )
+                                info!(%request_id, "{} {}", request.method(), request.uri());
+                            })
+                            .on_response(
+                                |response: &http::Response<_>, latency: Duration, _span: &Span| {
+                                    let request_id = response
+                                        .headers()
+                                        .get(X_REQUEST_ID)
+                                        .unwrap()
+                                        .to_str()
+                                        .unwrap();
+
+                                    info!(
+                                        %request_id,
+                                        code = response.status().as_u16(),
+                                        latency = latency.as_millis()
+                                    );
+                                },
+                            ),
+                    )
+                    .layer(PropagateRequestIdLayer::x_request_id()),
+            )
    }
 }

@@ -147,15 +167,15 @@ impl Server {
        match self {
            // TODO: Change this to Ipv6Addr::LOCALHOST when the GitHub runners
            // allow binding to localhost
-            Server::Internal(_) => IpAddr::from(Ipv6Addr::UNSPECIFIED),
-            Server::External(_) => IpAddr::from(Ipv6Addr::UNSPECIFIED),
+            Server::Internal { .. } => IpAddr::from(Ipv6Addr::UNSPECIFIED),
+            Server::External { .. } => IpAddr::from(Ipv6Addr::UNSPECIFIED),
        }
    }

-    fn port(self) -> u16 {
+    fn port(&self) -> u16 {
        match self {
-            Server::Internal(port) => port,
-            Server::External(port) => port,
+            Server::Internal { port, .. } => *port,
+            Server::External { port, .. } => *port,
        }
    }

@@ -182,7 +202,9 @@ impl Server {
            );
        }

-        let router = Router::from(self).with_state(compute);
+        let router = Router::from(&self)
+            .with_state(compute)
+            .into_make_service_with_connect_info::<SocketAddr>();

        if let Err(e) = axum::serve(listener, router).await {
            error!("compute_ctl {} HTTP server error: {}", self, e);
--- a/compute_tools/src/installed_extensions.rs
+++ b/compute_tools/src/installed_extensions.rs
@@ -1,7 +1,7 @@
-use compute_api::responses::{InstalledExtension, InstalledExtensions};
 use std::collections::HashMap;

 use anyhow::Result;
+use compute_api::responses::{InstalledExtension, InstalledExtensions};
 use postgres::{Client, NoTls};

 use crate::metrics::INSTALLED_EXTENSIONS;
--- a/compute_tools/src/lib.rs
+++ b/compute_tools/src/lib.rs
@@ -21,6 +21,7 @@ mod migration;
 pub mod monitor;
 pub mod params;
 pub mod pg_helpers;
+pub mod rsyslog;
 pub mod spec;
 mod spec_apply;
 pub mod swap;
--- a/compute_tools/src/logger.rs
+++ b/compute_tools/src/logger.rs
@@ -1,3 +1,5 @@
+use std::collections::HashMap;
+use tracing::info;
 use tracing_subscriber::layer::SubscriberExt;
 use tracing_subscriber::prelude::*;

@@ -42,3 +44,50 @@ pub async fn init_tracing_and_logging(default_log_level: &str) -> anyhow::Result
 pub fn inlinify(s: &str) -> String {
    s.replace('\n', "\u{200B}")
 }
+
+pub fn startup_context_from_env() -> Option<opentelemetry::Context> {
+    // Extract OpenTelemetry context for the startup actions from the
+    // TRACEPARENT and TRACESTATE env variables, and attach it to the current
+    // tracing context.
+    //
+    // This is used to propagate the context for the 'start_compute' operation
+    // from the neon control plane. This allows linking together the wider
+    // 'start_compute' operation that creates the compute container, with the
+    // startup actions here within the container.
+    //
+    // There is no standard for passing context in env variables, but a lot of
+    // tools use TRACEPARENT/TRACESTATE, so we use that convention too. See
+    // https://github.com/open-telemetry/opentelemetry-specification/issues/740
+    //
+    // Switch to the startup context here, and exit it once the startup has
+    // completed and Postgres is up and running.
+    //
+    // If this pod is pre-created without binding it to any particular endpoint
+    // yet, this isn't the right place to enter the startup context. In that
+    // case, the control plane should pass the tracing context as part of the
+    // /configure API call.
+    //
+    // NOTE: This is supposed to only cover the *startup* actions. Once
+    // postgres is configured and up-and-running, we exit this span. Any other
+    // actions that are performed on incoming HTTP requests, for example, are
+    // performed in separate spans.
+    //
+    // XXX: If the pod is restarted, we perform the startup actions in the same
+    // context as the original startup actions, which probably doesn't make
+    // sense.
+    let mut startup_tracing_carrier: HashMap<String, String> = HashMap::new();
+    if let Ok(val) = std::env::var("TRACEPARENT") {
+        startup_tracing_carrier.insert("traceparent".to_string(), val);
+    }
+    if let Ok(val) = std::env::var("TRACESTATE") {
+        startup_tracing_carrier.insert("tracestate".to_string(), val);
+    }
+    if !startup_tracing_carrier.is_empty() {
+        use opentelemetry::propagation::TextMapPropagator;
+        use opentelemetry_sdk::propagation::TraceContextPropagator;
+        info!("got startup tracing context from env variables");
+        Some(TraceContextPropagator::new().extract(&startup_tracing_carrier))
+    } else {
+        None
+    }
+}
--- a/compute_tools/src/lsn_lease.rs
+++ b/compute_tools/src/lsn_lease.rs
@@ -1,17 +1,15 @@
-use anyhow::bail;
-use anyhow::Result;
-use postgres::{NoTls, SimpleQueryMessage};
-use std::time::SystemTime;
-use std::{str::FromStr, sync::Arc, thread, time::Duration};
-use utils::id::TenantId;
-use utils::id::TimelineId;
+use std::str::FromStr;
+use std::sync::Arc;
+use std::thread;
+use std::time::{Duration, SystemTime};

+use anyhow::{Result, bail};
 use compute_api::spec::ComputeMode;
+use postgres::{NoTls, SimpleQueryMessage};
 use tracing::{info, warn};
-use utils::{
-    lsn::Lsn,
-    shard::{ShardCount, ShardNumber, TenantShardId},
-};
+use utils::id::{TenantId, TimelineId};
+use utils::lsn::Lsn;
+use utils::shard::{ShardCount, ShardNumber, TenantShardId};

 use crate::compute::ComputeNode;

--- a/compute_tools/src/metrics.rs
+++ b/compute_tools/src/metrics.rs
@@ -1,6 +1,6 @@
 use metrics::core::Collector;
 use metrics::proto::MetricFamily;
-use metrics::{register_int_counter_vec, register_uint_gauge_vec, IntCounterVec, UIntGaugeVec};
+use metrics::{IntCounterVec, UIntGaugeVec, register_int_counter_vec, register_uint_gauge_vec};
 use once_cell::sync::Lazy;

 pub(crate) static INSTALLED_EXTENSIONS: Lazy<UIntGaugeVec> = Lazy::new(|| {
@@ -54,9 +54,7 @@ pub(crate) static REMOTE_EXT_REQUESTS_TOTAL: Lazy<IntCounterVec> = Lazy::new(||
    register_int_counter_vec!(
        "compute_ctl_remote_ext_requests_total",
        "Total number of requests made by compute_ctl to download extensions from S3 proxy by status",
-        // Do not use any labels like extension name yet.
-        // We can add them later if needed.
-        &["http_status"]
+        &["http_status", "filename"]
    )
    .expect("failed to define a metric")
 });
--- a/compute_tools/src/monitor.rs
+++ b/compute_tools/src/monitor.rs
@@ -1,13 +1,14 @@
 use std::sync::Arc;
-use std::{thread, time::Duration};
+use std::thread;
+use std::time::Duration;

 use chrono::{DateTime, Utc};
+use compute_api::responses::ComputeStatus;
+use compute_api::spec::ComputeFeature;
 use postgres::{Client, NoTls};
 use tracing::{debug, error, info, warn};

 use crate::compute::ComputeNode;
-use compute_api::responses::ComputeStatus;
-use compute_api::spec::ComputeFeature;

 const MONITOR_CHECK_INTERVAL: Duration = Duration::from_millis(500);

@@ -17,7 +18,7 @@ const MONITOR_CHECK_INTERVAL: Duration = Duration::from_millis(500);
 // should be handled gracefully.
 fn watch_compute_activity(compute: &ComputeNode) {
    // Suppose that `connstr` doesn't change
-    let connstr = compute.connstr.clone();
+    let connstr = compute.params.connstr.clone();
    let conf = compute.get_conn_conf(Some("compute_ctl:activity_monitor"));

    // During startup and configuration we connect to every Postgres database,
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -9,7 +9,8 @@ use std::process::Child;
 use std::str::FromStr;
 use std::time::{Duration, Instant};

-use anyhow::{bail, Result};
+use anyhow::{Result, bail};
+use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};
 use futures::StreamExt;
 use ini::Ini;
 use notify::{RecursiveMode, Watcher};
@@ -21,8 +22,6 @@ use tokio_postgres;
 use tokio_postgres::NoTls;
 use tracing::{debug, error, info, instrument};

-use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};
-
 const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds

 /// Escape a string for including it in a SQL literal.
@@ -187,15 +186,40 @@ impl DatabaseExt for Database {
 /// Postgres SQL queries and DATABASE_URL.
 pub trait Escaping {
    fn pg_quote(&self) -> String;
+    fn pg_quote_dollar(&self) -> (String, String);
 }

 impl Escaping for PgIdent {
    /// This is intended to mimic Postgres quote_ident(), but for simplicity it
    /// always quotes provided string with `""` and escapes every `"`.
    /// **Not idempotent**, i.e. if string is already escaped it will be escaped again.
+    /// N.B. it's not useful for escaping identifiers that are used inside WHERE
+    /// clause, use `escape_literal()` instead.
    fn pg_quote(&self) -> String {
-        let result = format!("\"{}\"", self.replace('"', "\"\""));
-        result
+        format!("\"{}\"", self.replace('"', "\"\""))
+    }
+
+    /// This helper is intended to be used for dollar-escaping strings for usage
+    /// inside PL/pgSQL procedures. In addition to dollar-escaping the string,
+    /// it also returns a tag that is intended to be used inside the outer
+    /// PL/pgSQL procedure. If you do not need an outer tag, just discard it.
+    /// Here we somewhat mimic the logic of Postgres' `pg_get_functiondef()`,
+    /// <https://github.com/postgres/postgres/blob/8b49392b270b4ac0b9f5c210e2a503546841e832/src/backend/utils/adt/ruleutils.c#L2924>
+    fn pg_quote_dollar(&self) -> (String, String) {
+        let mut tag: String = "".to_string();
+        let mut outer_tag = "x".to_string();
+
+        // Find the first suitable tag that is not present in the string.
+        // Postgres' max role/DB name length is 63 bytes, so even in the
+        // worst case it won't take long.
+        while self.contains(&format!("${tag}$")) || self.contains(&format!("${outer_tag}$")) {
+            tag += "x";
+            outer_tag = tag.clone() + "x";
+        }
+
+        let escaped = format!("${tag}${self}${tag}$");
+
+        (escaped, outer_tag)
    }
 }

@@ -227,10 +251,13 @@ pub async fn get_existing_dbs_async(
    // invalid state. See:
    //   https://github.com/postgres/postgres/commit/a4b4cc1d60f7e8ccfcc8ff8cb80c28ee411ad9a9
    let rowstream = client
+        // We use a subquery instead of a fancy `datdba::regrole::text AS owner`,
+        // because the latter automatically wraps the result in double quotes,
+        // if the role name contains special characters.
        .query_raw::<str, &String, &[String; 0]>(
            "SELECT
                datname AS name,
-                datdba::regrole::text AS owner,
+                (SELECT rolname FROM pg_roles WHERE oid = datdba) AS owner,
                NOT datallowconn AS restrict_conn,
                datconnlimit = - 2 AS invalid
            FROM
--- a/compute_tools/src/rsyslog.rs
+++ b/compute_tools/src/rsyslog.rs
@@ -0,0 +1,77 @@
+use std::process::Command;
+use std::{fs::OpenOptions, io::Write};
+
+use anyhow::{Context, Result};
+use tracing::info;
+
+fn get_rsyslog_pid() -> Option<String> {
+    let output = Command::new("pgrep")
+        .arg("rsyslogd")
+        .output()
+        .expect("Failed to execute pgrep");
+
+    if !output.stdout.is_empty() {
+        let pid = std::str::from_utf8(&output.stdout)
+            .expect("Invalid UTF-8 in process output")
+            .trim()
+            .to_string();
+        Some(pid)
+    } else {
+        None
+    }
+}
+
+// Restart rsyslogd to apply the new configuration.
+// This is necessary, because there is no other way to reload the rsyslog configuration.
+//
+// Rsyslogd shouldn't lose any messages, because of the restart,
+// because it tracks the last read position in the log files
+// and will continue reading from that position.
+// TODO: test it properly
+//
+fn restart_rsyslog() -> Result<()> {
+    let old_pid = get_rsyslog_pid().context("rsyslogd is not running")?;
+    info!("rsyslogd is running with pid: {}, restart it", old_pid);
+
+    // kill it to restart
+    let _ = Command::new("pkill")
+        .arg("rsyslogd")
+        .output()
+        .context("Failed to stop rsyslogd")?;
+
+    Ok(())
+}
+
+pub fn configure_audit_rsyslog(
+    log_directory: &str,
+    tag: &str,
+    remote_endpoint: &str,
+) -> Result<()> {
+    let config_content: String = format!(
+        include_str!("config_template/compute_audit_rsyslog_template.conf"),
+        log_directory = log_directory,
+        tag = tag,
+        remote_endpoint = remote_endpoint
+    );
+
+    info!("rsyslog config_content: {}", config_content);
+
+    let rsyslog_conf_path = "/etc/rsyslog.d/compute_audit_rsyslog.conf";
+    let mut file = OpenOptions::new()
+        .create(true)
+        .write(true)
+        .truncate(true)
+        .open(rsyslog_conf_path)?;
+
+    file.write_all(config_content.as_bytes())?;
+
+    info!(
+        "rsyslog configuration file {} added successfully. Starting rsyslogd",
+        rsyslog_conf_path
+    );
+
+    // start the service, using the configuration
+    restart_rsyslog()?;
+
+    Ok(())
+}
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -1,20 +1,20 @@
-use anyhow::{anyhow, bail, Result};
-use reqwest::StatusCode;
 use std::fs::File;
 use std::path::Path;
-use tokio_postgres::Client;
-use tracing::{error, info, instrument, warn};
-
-use crate::config;
-use crate::metrics::{CPlaneRequestRPC, CPLANE_REQUESTS_TOTAL, UNKNOWN_HTTP_STATUS};
-use crate::migration::MigrationRunner;
-use crate::params::PG_HBA_ALL_MD5;
-use crate::pg_helpers::*;

+use anyhow::{Result, anyhow, bail};
 use compute_api::responses::{
    ComputeCtlConfig, ControlPlaneComputeStatus, ControlPlaneSpecResponse,
 };
 use compute_api::spec::ComputeSpec;
+use reqwest::StatusCode;
+use tokio_postgres::Client;
+use tracing::{error, info, instrument, warn};
+
+use crate::config;
+use crate::metrics::{CPLANE_REQUESTS_TOTAL, CPlaneRequestRPC, UNKNOWN_HTTP_STATUS};
+use crate::migration::MigrationRunner;
+use crate::params::PG_HBA_ALL_MD5;
+use crate::pg_helpers::*;

 // Do control plane request and return response if any. In case of error it
 // returns a bool flag indicating whether it makes sense to retry the request
@@ -141,7 +141,6 @@ pub fn get_spec_from_control_plane(
 /// Check `pg_hba.conf` and update if needed to allow external connections.
 pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> {
    // XXX: consider making it a part of spec.json
-    info!("checking pg_hba.conf");
    let pghba_path = pgdata_path.join("pg_hba.conf");

    if config::line_in_file(&pghba_path, PG_HBA_ALL_MD5)? {
@@ -156,12 +155,11 @@ pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> {
 /// Create a standby.signal file
 pub fn add_standby_signal(pgdata_path: &Path) -> Result<()> {
    // XXX: consider making it a part of spec.json
-    info!("adding standby.signal");
    let signalfile = pgdata_path.join("standby.signal");

    if !signalfile.exists() {
-        info!("created standby.signal");
        File::create(signalfile)?;
+        info!("created standby.signal");
    } else {
        info!("reused pre-existing standby.signal");
    }
@@ -170,7 +168,6 @@ pub fn add_standby_signal(pgdata_path: &Path) -> Result<()> {

 #[instrument(skip_all)]
 pub async fn handle_neon_extension_upgrade(client: &mut Client) -> Result<()> {
-    info!("handle neon extension upgrade");
    let query = "ALTER EXTENSION neon UPDATE";
    info!("update neon extension version with query: {}", query);
    client.simple_query(query).await?;
--- a/compute_tools/src/spec_apply.rs
+++ b/compute_tools/src/spec_apply.rs
@@ -1,18 +1,430 @@
 use std::collections::{HashMap, HashSet};
 use std::fmt::{Debug, Formatter};
 use std::future::Future;
-use std::iter::empty;
-use std::iter::once;
+use std::iter::{empty, once};
 use std::sync::Arc;

-use crate::compute::construct_superuser_query;
-use crate::pg_helpers::{escape_literal, DatabaseExt, Escaping, GenericOptionsSearch, RoleExt};
-use anyhow::Result;
-use compute_api::spec::{ComputeFeature, ComputeSpec, Database, PgIdent, Role};
+use anyhow::{Context, Result};
+use compute_api::responses::ComputeStatus;
+use compute_api::spec::{ComputeAudit, ComputeFeature, ComputeSpec, Database, PgIdent, Role};
 use futures::future::join_all;
 use tokio::sync::RwLock;
 use tokio_postgres::Client;
-use tracing::{debug, info_span, warn, Instrument};
+use tokio_postgres::error::SqlState;
+use tracing::{Instrument, debug, error, info, info_span, instrument, warn};
+
+use crate::compute::{ComputeNode, ComputeState};
+use crate::pg_helpers::{
+    DatabaseExt, Escaping, GenericOptionsSearch, RoleExt, get_existing_dbs_async,
+    get_existing_roles_async,
+};
+use crate::spec_apply::ApplySpecPhase::{
+    CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateNeonSuperuser,
+    CreatePgauditExtension, CreatePgauditlogtofileExtension, CreateSchemaNeon,
+    DisablePostgresDBPgAudit, DropInvalidDatabases, DropRoles, FinalizeDropLogicalSubscriptions,
+    HandleNeonExtension, HandleOtherExtensions, RenameAndDeleteDatabases, RenameRoles,
+    RunInEachDatabase,
+};
+use crate::spec_apply::PerDatabasePhase::{
+    ChangeSchemaPerms, DeleteDBRoleReferences, DropLogicalSubscriptions, HandleAnonExtension,
+};
+
+impl ComputeNode {
+    /// Apply the spec to the running PostgreSQL instance.
+    /// The caller can decide to run with multiple clients in parallel, or
+    /// single mode.  Either way, the commands executed will be the same, and
+    /// only commands run in different databases are parallelized.
+    #[instrument(skip_all)]
+    pub fn apply_spec_sql(
+        &self,
+        spec: Arc<ComputeSpec>,
+        conf: Arc<tokio_postgres::Config>,
+        concurrency: usize,
+    ) -> Result<()> {
+        info!("Applying config with max {} concurrency", concurrency);
+        debug!("Config: {:?}", spec);
+
+        let rt = tokio::runtime::Handle::current();
+        rt.block_on(async {
+            // Proceed with post-startup configuration. Note, that order of operations is important.
+            let client = Self::get_maintenance_client(&conf).await?;
+            let spec = spec.clone();
+
+            let databases = get_existing_dbs_async(&client).await?;
+            let roles = get_existing_roles_async(&client)
+                .await?
+                .into_iter()
+                .map(|role| (role.name.clone(), role))
+                .collect::<HashMap<String, Role>>();
+
+            // Check if we need to drop subscriptions before starting the endpoint.
+            //
+            // It is important to do this operation exactly once when endpoint starts on a new branch.
+            // Otherwise, we may drop not inherited, but newly created subscriptions.
+            //
+            // We cannot rely only on spec.drop_subscriptions_before_start flag,
+            // because if for some reason compute restarts inside VM,
+            // it will start again with the same spec and flag value.
+            //
+            // To handle this, we save the fact of the operation in the database
+            // in the neon.drop_subscriptions_done table.
+            // If the table does not exist, we assume that the operation was never performed, so we must do it.
+            // If table exists, we check if the operation was performed on the current timelilne.
+            //
+            let mut drop_subscriptions_done = false;
+
+            if spec.drop_subscriptions_before_start {
+                let timeline_id = self.get_timeline_id().context("timeline_id must be set")?;
+                let query = format!("select 1 from neon.drop_subscriptions_done where timeline_id = '{}'", timeline_id);
+
+                info!("Checking if drop subscription operation was already performed for timeline_id: {}", timeline_id);
+
+                drop_subscriptions_done =  match
+                    client.simple_query(&query).await {
+                    Ok(result) => {
+                        matches!(&result[0], postgres::SimpleQueryMessage::Row(_))
+                    },
+                    Err(e) =>
+                    {
+                        match e.code() {
+                            Some(&SqlState::UNDEFINED_TABLE) => false,
+                            _ => {
+                                // We don't expect any other error here, except for the schema/table not existing
+                                error!("Error checking if drop subscription operation was already performed: {}", e);
+                                return Err(e.into());
+                            }
+                        }
+                    }
+                }
+            };
+
+
+            let jwks_roles = Arc::new(
+                spec.as_ref()
+                    .local_proxy_config
+                    .iter()
+                    .flat_map(|it| &it.jwks)
+                    .flatten()
+                    .flat_map(|setting| &setting.role_names)
+                    .cloned()
+                    .collect::<HashSet<_>>(),
+            );
+
+            let ctx = Arc::new(tokio::sync::RwLock::new(MutableApplyContext {
+                roles,
+                dbs: databases,
+            }));
+
+            // Apply special pre drop database phase.
+            // NOTE: we use the code of RunInEachDatabase phase for parallelism
+            // and connection management, but we don't really run it in *each* database,
+            // only in databases, we're about to drop.
+            info!("Applying PerDatabase (pre-dropdb) phase");
+            let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency));
+
+            // Run the phase for each database that we're about to drop.
+            let db_processes = spec
+                .delta_operations
+                .iter()
+                .flatten()
+                .filter_map(move |op| {
+                    if op.action.as_str() == "delete_db" {
+                        Some(op.name.clone())
+                    } else {
+                        None
+                    }
+                })
+                .map(|dbname| {
+                    let spec = spec.clone();
+                    let ctx = ctx.clone();
+                    let jwks_roles = jwks_roles.clone();
+                    let mut conf = conf.as_ref().clone();
+                    let concurrency_token = concurrency_token.clone();
+                    // We only need dbname field for this phase, so set other fields to dummy values
+                    let db = DB::UserDB(Database {
+                        name: dbname.clone(),
+                        owner: "cloud_admin".to_string(),
+                        options: None,
+                        restrict_conn: false,
+                        invalid: false,
+                    });
+
+                    debug!("Applying per-database phases for Database {:?}", &db);
+
+                    match &db {
+                        DB::SystemDB => {}
+                        DB::UserDB(db) => {
+                            conf.dbname(db.name.as_str());
+                        }
+                    }
+
+                    let conf = Arc::new(conf);
+                    let fut = Self::apply_spec_sql_db(
+                        spec.clone(),
+                        conf,
+                        ctx.clone(),
+                        jwks_roles.clone(),
+                        concurrency_token.clone(),
+                        db,
+                        [DropLogicalSubscriptions].to_vec(),
+                    );
+
+                    Ok(tokio::spawn(fut))
+                })
+                .collect::<Vec<Result<_, anyhow::Error>>>();
+
+            for process in db_processes.into_iter() {
+                let handle = process?;
+                if let Err(e) = handle.await? {
+                    // Handle the error case where the database does not exist
+                    // We do not check whether the DB exists or not in the deletion phase,
+                    // so we shouldn't be strict about it in pre-deletion cleanup as well.
+                    if e.to_string().contains("does not exist") {
+                        warn!("Error dropping subscription: {}", e);
+                    } else {
+                        return Err(e);
+                    }
+                };
+            }
+
+            for phase in [
+                CreateNeonSuperuser,
+                DropInvalidDatabases,
+                RenameRoles,
+                CreateAndAlterRoles,
+                RenameAndDeleteDatabases,
+                CreateAndAlterDatabases,
+                CreateSchemaNeon,
+            ] {
+                info!("Applying phase {:?}", &phase);
+                apply_operations(
+                    spec.clone(),
+                    ctx.clone(),
+                    jwks_roles.clone(),
+                    phase,
+                    || async { Ok(&client) },
+                )
+                .await?;
+            }
+
+            info!("Applying RunInEachDatabase2 phase");
+            let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency));
+
+            let db_processes = spec
+                .cluster
+                .databases
+                .iter()
+                .map(|db| DB::new(db.clone()))
+                // include
+                .chain(once(DB::SystemDB))
+                .map(|db| {
+                    let spec = spec.clone();
+                    let ctx = ctx.clone();
+                    let jwks_roles = jwks_roles.clone();
+                    let mut conf = conf.as_ref().clone();
+                    let concurrency_token = concurrency_token.clone();
+                    let db = db.clone();
+
+                    debug!("Applying per-database phases for Database {:?}", &db);
+
+                    match &db {
+                        DB::SystemDB => {}
+                        DB::UserDB(db) => {
+                            conf.dbname(db.name.as_str());
+                        }
+                    }
+
+                    let conf = Arc::new(conf);
+                    let mut phases = vec![
+                        DeleteDBRoleReferences,
+                        ChangeSchemaPerms,
+                        HandleAnonExtension,
+                    ];
+
+                    if spec.drop_subscriptions_before_start && !drop_subscriptions_done {
+                        info!("Adding DropLogicalSubscriptions phase because drop_subscriptions_before_start is set");
+                        phases.push(DropLogicalSubscriptions);
+                    }
+
+                    let fut = Self::apply_spec_sql_db(
+                        spec.clone(),
+                        conf,
+                        ctx.clone(),
+                        jwks_roles.clone(),
+                        concurrency_token.clone(),
+                        db,
+                        phases,
+                    );
+
+                    Ok(tokio::spawn(fut))
+                })
+                .collect::<Vec<Result<_, anyhow::Error>>>();
+
+            for process in db_processes.into_iter() {
+                let handle = process?;
+                handle.await??;
+            }
+
+            let mut phases = vec![
+                HandleOtherExtensions,
+                HandleNeonExtension, // This step depends on CreateSchemaNeon
+                CreateAvailabilityCheck,
+                DropRoles,
+            ];
+
+            // This step depends on CreateSchemaNeon
+            if spec.drop_subscriptions_before_start && !drop_subscriptions_done {
+                info!("Adding FinalizeDropLogicalSubscriptions phase because drop_subscriptions_before_start is set");
+                phases.push(FinalizeDropLogicalSubscriptions);
+            }
+
+            // Keep DisablePostgresDBPgAudit phase at the end,
+            // so that all config operations are audit logged.
+            match spec.audit_log_level
+            {
+                ComputeAudit::Hipaa => {
+                    phases.push(CreatePgauditExtension);
+                    phases.push(CreatePgauditlogtofileExtension);
+                    phases.push(DisablePostgresDBPgAudit);
+                }
+                ComputeAudit::Log => { /* not implemented yet */ }
+                ComputeAudit::Disabled => {}
+            }
+
+            for phase in phases {
+                debug!("Applying phase {:?}", &phase);
+                apply_operations(
+                    spec.clone(),
+                    ctx.clone(),
+                    jwks_roles.clone(),
+                    phase,
+                    || async { Ok(&client) },
+                )
+                .await?;
+            }
+
+            Ok::<(), anyhow::Error>(())
+        })?;
+
+        Ok(())
+    }
+
+    /// Apply SQL migrations of the RunInEachDatabase phase.
+    ///
+    /// May opt to not connect to databases that don't have any scheduled
+    /// operations.  The function is concurrency-controlled with the provided
+    /// semaphore.  The caller has to make sure the semaphore isn't exhausted.
+    async fn apply_spec_sql_db(
+        spec: Arc<ComputeSpec>,
+        conf: Arc<tokio_postgres::Config>,
+        ctx: Arc<tokio::sync::RwLock<MutableApplyContext>>,
+        jwks_roles: Arc<HashSet<String>>,
+        concurrency_token: Arc<tokio::sync::Semaphore>,
+        db: DB,
+        subphases: Vec<PerDatabasePhase>,
+    ) -> Result<()> {
+        let _permit = concurrency_token.acquire().await?;
+
+        let mut client_conn = None;
+
+        for subphase in subphases {
+            apply_operations(
+                spec.clone(),
+                ctx.clone(),
+                jwks_roles.clone(),
+                RunInEachDatabase {
+                    db: db.clone(),
+                    subphase,
+                },
+                // Only connect if apply_operation actually wants a connection.
+                // It's quite possible this database doesn't need any queries,
+                // so by not connecting we save time and effort connecting to
+                // that database.
+                || async {
+                    if client_conn.is_none() {
+                        let db_client = Self::get_maintenance_client(&conf).await?;
+                        client_conn.replace(db_client);
+                    }
+                    let client = client_conn.as_ref().unwrap();
+                    Ok(client)
+                },
+            )
+            .await?;
+        }
+
+        drop(client_conn);
+
+        Ok::<(), anyhow::Error>(())
+    }
+
+    /// Choose how many concurrent connections to use for applying the spec changes.
+    pub fn max_service_connections(
+        &self,
+        compute_state: &ComputeState,
+        spec: &ComputeSpec,
+    ) -> usize {
+        // If the cluster is in Init state we don't have to deal with user connections,
+        // and can thus use all `max_connections` connection slots. However, that's generally not
+        // very efficient, so we generally still limit it to a smaller number.
+        if compute_state.status == ComputeStatus::Init {
+            // If the settings contain 'max_connections', use that as template
+            if let Some(config) = spec.cluster.settings.find("max_connections") {
+                config.parse::<usize>().ok()
+            } else {
+                // Otherwise, try to find the setting in the postgresql_conf string
+                spec.cluster
+                    .postgresql_conf
+                    .iter()
+                    .flat_map(|conf| conf.split("\n"))
+                    .filter_map(|line| {
+                        if !line.contains("max_connections") {
+                            return None;
+                        }
+
+                        let (key, value) = line.split_once("=")?;
+                        let key = key
+                            .trim_start_matches(char::is_whitespace)
+                            .trim_end_matches(char::is_whitespace);
+
+                        let value = value
+                            .trim_start_matches(char::is_whitespace)
+                            .trim_end_matches(char::is_whitespace);
+
+                        if key != "max_connections" {
+                            return None;
+                        }
+
+                        value.parse::<usize>().ok()
+                    })
+                    .next()
+            }
+            // If max_connections is present, use at most 1/3rd of that.
+            // When max_connections is lower than 30, try to use at least 10 connections, but
+            // never more than max_connections.
+            .map(|limit| match limit {
+                0..10 => limit,
+                10..30 => 10,
+                30.. => limit / 3,
+            })
+            // If we didn't find max_connections, default to 10 concurrent connections.
+            .unwrap_or(10)
+        } else {
+            // state == Running
+            // Because the cluster is already in the Running state, we should assume users are
+            // already connected to the cluster, and high concurrency could negatively
+            // impact user connectivity. Therefore, we can limit concurrency to the number of
+            // reserved superuser connections, which users wouldn't be able to use anyway.
+            spec.cluster
+                .settings
+                .find("superuser_reserved_connections")
+                .iter()
+                .filter_map(|val| val.parse::<usize>().ok())
+                .map(|val| if val > 1 { val - 1 } else { 1 })
+                .last()
+                .unwrap_or(3)
+        }
+    }
+}

 #[derive(Clone)]
 pub enum DB {
@@ -57,7 +469,7 @@ pub enum PerDatabasePhase {

 #[derive(Clone, Debug)]
 pub enum ApplySpecPhase {
-    CreateSuperUser,
+    CreateNeonSuperuser,
    DropInvalidDatabases,
    RenameRoles,
    CreateAndAlterRoles,
@@ -65,6 +477,9 @@ pub enum ApplySpecPhase {
    CreateAndAlterDatabases,
    CreateSchemaNeon,
    RunInEachDatabase { db: DB, subphase: PerDatabasePhase },
+    CreatePgauditExtension,
+    CreatePgauditlogtofileExtension,
+    DisablePostgresDBPgAudit,
    HandleOtherExtensions,
    HandleNeonExtension,
    CreateAvailabilityCheck,
@@ -181,14 +596,10 @@ async fn get_operations<'a>(
    apply_spec_phase: &'a ApplySpecPhase,
 ) -> Result<Box<dyn Iterator<Item = Operation> + 'a + Send>> {
    match apply_spec_phase {
-        ApplySpecPhase::CreateSuperUser => {
-            let query = construct_superuser_query(spec);
-
-            Ok(Box::new(once(Operation {
-                query,
-                comment: None,
-            })))
-        }
+        ApplySpecPhase::CreateNeonSuperuser => Ok(Box::new(once(Operation {
+            query: include_str!("sql/create_neon_superuser.sql").to_string(),
+            comment: None,
+        }))),
        ApplySpecPhase::DropInvalidDatabases => {
            let mut ctx = ctx.write().await;
            let databases = &mut ctx.dbs;
@@ -322,14 +733,15 @@ async fn get_operations<'a>(
                        // We do not check whether the DB exists or not,
                        // Postgres will take care of it for us
                        "delete_db" => {
+                            let (db_name, outer_tag) = op.name.pg_quote_dollar();
                            // In Postgres we can't drop a database if it is a template.
                            // So we need to unset the template flag first, but it could
                            // be a retry, so we could've already dropped the database.
                            // Check that database exists first to make it idempotent.
                            let unset_template_query: String = format!(
                                include_str!("sql/unset_template_for_drop_dbs.sql"),
-                                datname_str = escape_literal(&op.name),
-                                datname = &op.name.pg_quote()
+                                datname = db_name,
+                                outer_tag = outer_tag,
                            );

                            // Use FORCE to drop database even if there are active connections.
@@ -436,6 +848,8 @@ async fn get_operations<'a>(
                                comment: None,
                            },
                            Operation {
+                                // ALL PRIVILEGES grants CREATE, CONNECT, and TEMPORARY on the database
+                                // (see https://www.postgresql.org/docs/current/ddl-priv.html)
                                query: format!(
                                    "GRANT ALL PRIVILEGES ON DATABASE {} TO neon_superuser",
                                    db.name.pg_quote()
@@ -474,7 +888,10 @@ async fn get_operations<'a>(
                let edb = match databases.get(&db.name) {
                    Some(edb) => edb,
                    None => {
-                        warn!("skipping RunInEachDatabase phase {:?}, database {} doesn't exist in PostgreSQL", subphase, db.name);
+                        warn!(
+                            "skipping RunInEachDatabase phase {:?}, database {} doesn't exist in PostgreSQL",
+                            subphase, db.name
+                        );
                        return Ok(Box::new(empty()));
                    }
                };
@@ -492,9 +909,11 @@ async fn get_operations<'a>(
                PerDatabasePhase::DropLogicalSubscriptions => {
                    match &db {
                        DB::UserDB(db) => {
+                            let (db_name, outer_tag) = db.name.pg_quote_dollar();
                            let drop_subscription_query: String = format!(
                                include_str!("sql/drop_subscriptions.sql"),
-                                datname_str = escape_literal(&db.name),
+                                datname_str = db_name,
+                                outer_tag = outer_tag,
                            );

                            let operations = vec![Operation {
@@ -533,6 +952,7 @@ async fn get_operations<'a>(
                                    DB::SystemDB => PgIdent::from("cloud_admin").pg_quote(),
                                    DB::UserDB(db) => db.owner.pg_quote(),
                                };
+                                let (escaped_role, outer_tag) = op.name.pg_quote_dollar();

                                Some(vec![
                                    // This will reassign all dependent objects to the db owner
@@ -547,7 +967,9 @@ async fn get_operations<'a>(
                                    Operation {
                                        query: format!(
                                            include_str!("sql/pre_drop_role_revoke_privileges.sql"),
-                                            role_name = quoted,
+                                            // N.B. this has to be properly dollar-escaped with `pg_quote_dollar()`
+                                            role_name = escaped_role,
+                                            outer_tag = outer_tag,
                                        ),
                                        comment: None,
                                    },
@@ -572,12 +994,14 @@ async fn get_operations<'a>(
                        DB::SystemDB => return Ok(Box::new(empty())),
                        DB::UserDB(db) => db,
                    };
+                    let (db_owner, outer_tag) = db.owner.pg_quote_dollar();

                    let operations = vec![
                        Operation {
                            query: format!(
                                include_str!("sql/set_public_schema_owner.sql"),
-                                db_owner = db.owner.pg_quote()
+                                db_owner = db_owner,
+                                outer_tag = outer_tag,
                            ),
                            comment: None,
                        },
@@ -697,6 +1121,25 @@ async fn get_operations<'a>(
            }
            Ok(Box::new(empty()))
        }
+        ApplySpecPhase::CreatePgauditExtension => Ok(Box::new(once(Operation {
+            query: String::from("CREATE EXTENSION IF NOT EXISTS pgaudit"),
+            comment: Some(String::from("create pgaudit extensions")),
+        }))),
+        ApplySpecPhase::CreatePgauditlogtofileExtension => Ok(Box::new(once(Operation {
+            query: String::from("CREATE EXTENSION IF NOT EXISTS pgauditlogtofile"),
+            comment: Some(String::from("create pgauditlogtofile extensions")),
+        }))),
+        // Disable pgaudit logging for postgres database.
+        // Postgres is neon system database used by monitors
+        // and compute_ctl tuning functions and thus generates a lot of noise.
+        // We do not consider data stored in this database as sensitive.
+        ApplySpecPhase::DisablePostgresDBPgAudit => {
+            let query = "ALTER DATABASE postgres SET pgaudit.log to 'none'";
+            Ok(Box::new(once(Operation {
+                query: query.to_string(),
+                comment: Some(query.to_string()),
+            })))
+        }
        ApplySpecPhase::HandleNeonExtension => {
            let operations = vec![
                Operation {
--- a/compute_tools/src/sql/create_neon_superuser.sql
+++ b/compute_tools/src/sql/create_neon_superuser.sql
@@ -0,0 +1,8 @@
+DO $$
+    BEGIN
+        IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'neon_superuser')
+        THEN
+            CREATE ROLE neon_superuser CREATEDB CREATEROLE NOLOGIN REPLICATION BYPASSRLS IN ROLE pg_read_all_data, pg_write_all_data;
+        END IF;
+    END
+$$;
--- a/compute_tools/src/sql/drop_subscriptions.sql
+++ b/compute_tools/src/sql/drop_subscriptions.sql
@@ -1,4 +1,4 @@
-DO $$
+DO ${outer_tag}$
 DECLARE
    subname TEXT;
 BEGIN
@@ -9,4 +9,4 @@ BEGIN
        EXECUTE format('DROP SUBSCRIPTION %I;', subname);
    END LOOP;
 END;
-$$;
+${outer_tag}$;
--- a/compute_tools/src/sql/pre_drop_role_revoke_privileges.sql
+++ b/compute_tools/src/sql/pre_drop_role_revoke_privileges.sql
@@ -1,8 +1,7 @@
-SET SESSION ROLE neon_superuser;
-
-DO $$
+DO ${outer_tag}$
 DECLARE
    schema TEXT;
+    grantor TEXT;
    revoke_query TEXT;
 BEGIN
    FOR schema IN
@@ -15,14 +14,25 @@ BEGIN
        -- ii) it's easy to add more schemas to the list if needed.
        WHERE schema_name IN ('public')
    LOOP
-        revoke_query := format(
-            'REVOKE ALL PRIVILEGES ON ALL TABLES IN SCHEMA %I FROM {role_name} GRANTED BY neon_superuser;',
-            schema
-        );
+        FOR grantor IN EXECUTE
+            format(
+                'SELECT DISTINCT rtg.grantor FROM information_schema.role_table_grants AS rtg WHERE grantee = %s',
+                -- N.B. this has to be properly dollar-escaped with `pg_quote_dollar()`
+                quote_literal({role_name})
+            )
+        LOOP
+            EXECUTE format('SET LOCAL ROLE %I', grantor);

-        EXECUTE revoke_query;
+            revoke_query := format(
+                'REVOKE ALL PRIVILEGES ON ALL TABLES IN SCHEMA %I FROM %I GRANTED BY %I',
+                schema,
+                -- N.B. this has to be properly dollar-escaped with `pg_quote_dollar()`
+                {role_name},
+                grantor
+            );
+
+            EXECUTE revoke_query;
+        END LOOP;
    END LOOP;
 END;
-$$;
-
-RESET ROLE;
+${outer_tag}$;
--- a/compute_tools/src/sql/set_public_schema_owner.sql
+++ b/compute_tools/src/sql/set_public_schema_owner.sql
@@ -1,5 +1,4 @@
-DO
-$$
+DO ${outer_tag}$
    DECLARE
        schema_owner TEXT;
    BEGIN
@@ -16,8 +15,8 @@ $$

            IF schema_owner = 'cloud_admin' OR schema_owner = 'zenith_admin'
            THEN
-                ALTER SCHEMA public OWNER TO {db_owner};
+                EXECUTE format('ALTER SCHEMA public OWNER TO %I', {db_owner});
            END IF;
        END IF;
    END
-$$;
+${outer_tag}$;
--- a/compute_tools/src/sql/unset_template_for_drop_dbs.sql
+++ b/compute_tools/src/sql/unset_template_for_drop_dbs.sql
@@ -1,12 +1,12 @@
-DO $$
+DO ${outer_tag}$
    BEGIN
        IF EXISTS(
            SELECT 1
            FROM pg_catalog.pg_database
-            WHERE datname = {datname_str}
+            WHERE datname = {datname}
        )
        THEN
-            ALTER DATABASE {datname} is_template false;
+            EXECUTE format('ALTER DATABASE %I is_template false', {datname});
        END IF;
    END
-$$;
+${outer_tag}$;
--- a/compute_tools/src/swap.rs
+++ b/compute_tools/src/swap.rs
@@ -1,10 +1,11 @@
 use std::path::Path;

-use anyhow::{anyhow, Context};
-use tracing::warn;
+use anyhow::{Context, anyhow};
+use tracing::{instrument, warn};

 pub const RESIZE_SWAP_BIN: &str = "/neonvm/bin/resize-swap";

+#[instrument]
 pub fn resize_swap(size_bytes: u64) -> anyhow::Result<()> {
    // run `/neonvm/bin/resize-swap --once {size_bytes}`
    //
--- a/compute_tools/tests/config_test.rs
+++ b/compute_tools/tests/config_test.rs
@@ -1,7 +1,7 @@
 #[cfg(test)]
 mod config_tests {

-    use std::fs::{remove_file, File};
+    use std::fs::{File, remove_file};
    use std::io::{Read, Write};
    use std::path::Path;

--- a/compute_tools/tests/pg_helpers_tests.rs
+++ b/compute_tools/tests/pg_helpers_tests.rs
@@ -61,6 +61,23 @@ test.escaping = 'here''s a backslash \\ and a quote '' and a double-quote " hoor
        assert_eq!(ident.pg_quote(), "\"\"\"name\"\";\\n select 1;\"");
    }

+    #[test]
+    fn ident_pg_quote_dollar() {
+        let test_cases = vec![
+            ("name", ("$$name$$", "x")),
+            ("name$$", ("$x$name$$$x$", "xx")),
+            ("name$$$", ("$x$name$$$$x$", "xx")),
+            ("name$$$$", ("$x$name$$$$$x$", "xx")),
+            ("name$x$", ("$xx$name$x$$xx$", "xxx")),
+        ];
+
+        for (input, expected) in test_cases {
+            let (escaped, tag) = PgIdent::from(input).pg_quote_dollar();
+            assert_eq!(escaped, expected.0);
+            assert_eq!(tag, expected.1);
+        }
+    }
+
    #[test]
    fn generic_options_search() {
        let generic_options: GenericOptions = Some(vec![
--- a/control_plane/src/background_process.rs
+++ b/control_plane/src/background_process.rs
@@ -25,7 +25,7 @@ use anyhow::Context;
 use camino::{Utf8Path, Utf8PathBuf};
 use nix::errno::Errno;
 use nix::fcntl::{FcntlArg, FdFlag};
-use nix::sys::signal::{kill, Signal};
+use nix::sys::signal::{Signal, kill};
 use nix::unistd::Pid;
 use utils::pid_file::{self, PidFileRead};

--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -5,7 +5,16 @@
 //! easier to work with locally. The python tests in `test_runner`
 //! rely on `neon_local` to set up the environment for each test.
 //!
-use anyhow::{anyhow, bail, Context, Result};
+use std::borrow::Cow;
+use std::collections::{BTreeSet, HashMap};
+use std::fs::File;
+use std::os::fd::AsRawFd;
+use std::path::PathBuf;
+use std::process::exit;
+use std::str::FromStr;
+use std::time::Duration;
+
+use anyhow::{Context, Result, anyhow, bail};
 use clap::Parser;
 use compute_api::spec::ComputeMode;
 use control_plane::endpoint::ComputeControlPlane;
@@ -19,7 +28,7 @@ use control_plane::storage_controller::{
    NeonStorageControllerStartArgs, NeonStorageControllerStopArgs, StorageController,
 };
 use control_plane::{broker, local_env};
-use nix::fcntl::{flock, FlockArg};
+use nix::fcntl::{FlockArg, flock};
 use pageserver_api::config::{
    DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
    DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT,
@@ -31,27 +40,18 @@ use pageserver_api::models::{ShardParameters, TimelineCreateRequest, TimelineInf
 use pageserver_api::shard::{ShardCount, ShardStripeSize, TenantShardId};
 use postgres_backend::AuthType;
 use postgres_connection::parse_host_port;
+use safekeeper_api::membership::SafekeeperGeneration;
 use safekeeper_api::{
    DEFAULT_HTTP_LISTEN_PORT as DEFAULT_SAFEKEEPER_HTTP_PORT,
    DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT,
 };
-use std::borrow::Cow;
-use std::collections::{BTreeSet, HashMap};
-use std::fs::File;
-use std::os::fd::AsRawFd;
-use std::path::PathBuf;
-use std::process::exit;
-use std::str::FromStr;
-use std::time::Duration;
 use storage_broker::DEFAULT_LISTEN_ADDR as DEFAULT_BROKER_ADDR;
 use tokio::task::JoinSet;
 use url::Host;
-use utils::{
-    auth::{Claims, Scope},
-    id::{NodeId, TenantId, TenantTimelineId, TimelineId},
-    lsn::Lsn,
-    project_git_version,
-};
+use utils::auth::{Claims, Scope};
+use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId};
+use utils::lsn::Lsn;
+use utils::project_git_version;

 // Default id of a safekeeper node, if not specified on the command line.
 const DEFAULT_SAFEKEEPER_ID: NodeId = NodeId(1);
@@ -597,7 +597,15 @@ struct EndpointStartCmdArgs {
    #[clap(long = "pageserver-id")]
    endpoint_pageserver_id: Option<NodeId>,

-    #[clap(long)]
+    #[clap(
+        long,
+        help = "Safekeepers membership generation to prefix neon.safekeepers with. Normally neon_local sets it on its own, but this option allows to override. Non zero value forces endpoint to use membership configurations."
+    )]
+    safekeepers_generation: Option<u32>,
+    #[clap(
+        long,
+        help = "List of safekeepers endpoint will talk to. Normally neon_local chooses them on its own, but this option allows to override."
+    )]
    safekeepers: Option<String>,

    #[clap(
@@ -618,9 +626,9 @@ struct EndpointStartCmdArgs {
    )]
    allow_multiple: bool,

-    #[clap(short = 't', long, help = "timeout until we fail the command")]
-    #[arg(default_value = "10s")]
-    start_timeout: humantime::Duration,
+    #[clap(short = 't', long, value_parser= humantime::parse_duration, help = "timeout until we fail the command")]
+    #[arg(default_value = "90s")]
+    start_timeout: Duration,
 }

 #[derive(clap::Args)]
@@ -921,7 +929,9 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result<LocalEnv> {
    let init_conf: NeonLocalInitConf = if let Some(config_path) = &args.config {
        // User (likely the Python test suite) provided a description of the environment.
        if args.num_pageservers.is_some() {
-            bail!("Cannot specify both --num-pageservers and --config, use key `pageservers` in the --config file instead");
+            bail!(
+                "Cannot specify both --num-pageservers and --config, use key `pageservers` in the --config file instead"
+            );
        }
        // load and parse the file
        let contents = std::fs::read_to_string(config_path).with_context(|| {
@@ -953,6 +963,7 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result<LocalEnv> {
                        id: pageserver_id,
                        listen_pg_addr: format!("127.0.0.1:{pg_port}"),
                        listen_http_addr: format!("127.0.0.1:{http_port}"),
+                        listen_https_addr: None,
                        pg_auth_type: AuthType::Trust,
                        http_auth_type: AuthType::Trust,
                        other: Default::default(),
@@ -967,6 +978,7 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result<LocalEnv> {
            default_tenant_id: TenantId::from_array(std::array::from_fn(|_| 0)),
            storage_controller: None,
            control_plane_compute_hook_api: None,
+            generate_local_ssl_certs: false,
        }
    };

@@ -1315,10 +1327,14 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res

            match (mode, args.hot_standby) {
                (ComputeMode::Static(_), true) => {
-                    bail!("Cannot start a node in hot standby mode when it is already configured as a static replica")
+                    bail!(
+                        "Cannot start a node in hot standby mode when it is already configured as a static replica"
+                    )
                }
                (ComputeMode::Primary, true) => {
-                    bail!("Cannot start a node as a hot standby replica, it is already configured as primary node")
+                    bail!(
+                        "Cannot start a node as a hot standby replica, it is already configured as primary node"
+                    )
                }
                _ => {}
            }
@@ -1345,6 +1361,7 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
            let pageserver_id = args.endpoint_pageserver_id;
            let remote_ext_config = &args.remote_ext_config;

+            let safekeepers_generation = args.safekeepers_generation.map(SafekeeperGeneration::new);
            // If --safekeepers argument is given, use only the listed
            // safekeeper nodes; otherwise all from the env.
            let safekeepers = if let Some(safekeepers) = parse_safekeepers(&args.safekeepers)? {
@@ -1420,11 +1437,13 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
            endpoint
                .start(
                    &auth_token,
+                    safekeepers_generation,
                    safekeepers,
                    pageservers,
                    remote_ext_config.as_ref(),
                    stripe_size.0 as usize,
                    args.create_test_user,
+                    args.start_timeout,
                )
                .await?;
        }
--- a/control_plane/src/broker.rs
+++ b/control_plane/src/broker.rs
@@ -8,7 +8,6 @@
 use std::time::Duration;

 use anyhow::Context;
-
 use camino::Utf8PathBuf;

 use crate::{background_process, local_env};
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -37,29 +37,24 @@
 //! ```
 //!
 use std::collections::BTreeMap;
-use std::net::IpAddr;
-use std::net::Ipv4Addr;
-use std::net::SocketAddr;
-use std::net::TcpStream;
+use std::net::{IpAddr, Ipv4Addr, SocketAddr, TcpStream};
 use std::path::PathBuf;
 use std::process::Command;
 use std::str::FromStr;
 use std::sync::Arc;
-use std::time::Duration;
-use std::time::SystemTime;
-use std::time::UNIX_EPOCH;
+use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};

-use anyhow::{anyhow, bail, Context, Result};
+use anyhow::{Context, Result, anyhow, bail};
 use compute_api::requests::ConfigurationRequest;
-use compute_api::responses::ComputeCtlConfig;
-use compute_api::spec::Database;
-use compute_api::spec::PgIdent;
-use compute_api::spec::RemoteExtSpec;
-use compute_api::spec::Role;
-use nix::sys::signal::kill;
-use nix::sys::signal::Signal;
+use compute_api::responses::{ComputeCtlConfig, ComputeStatus, ComputeStatusResponse};
+use compute_api::spec::{
+    Cluster, ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, Database, PgIdent,
+    RemoteExtSpec, Role,
+};
+use nix::sys::signal::{Signal, kill};
 use pageserver_api::shard::ShardStripeSize;
 use reqwest::header::CONTENT_TYPE;
+use safekeeper_api::membership::SafekeeperGeneration;
 use serde::{Deserialize, Serialize};
 use tracing::debug;
 use url::Host;
@@ -69,9 +64,6 @@ use crate::local_env::LocalEnv;
 use crate::postgresql_conf::PostgresConf;
 use crate::storage_controller::StorageController;

-use compute_api::responses::{ComputeStatus, ComputeStatusResponse};
-use compute_api::spec::{Cluster, ComputeFeature, ComputeMode, ComputeSpec};
-
 // contents of a endpoint.json file
 #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
 pub struct EndpointConf {
@@ -237,7 +229,9 @@ impl ComputeControlPlane {
            });

            if let Some((key, _)) = duplicates.next() {
-                bail!("attempting to create a duplicate primary endpoint on tenant {tenant_id}, timeline {timeline_id}: endpoint {key:?} exists already. please don't do this, it is not supported.");
+                bail!(
+                    "attempting to create a duplicate primary endpoint on tenant {tenant_id}, timeline {timeline_id}: endpoint {key:?} exists already. please don't do this, it is not supported."
+                );
            }
        }
        Ok(())
@@ -584,14 +578,17 @@ impl Endpoint {
        Ok(safekeeper_connstrings)
    }

+    #[allow(clippy::too_many_arguments)]
    pub async fn start(
        &self,
        auth_token: &Option<String>,
+        safekeepers_generation: Option<SafekeeperGeneration>,
        safekeepers: Vec<NodeId>,
        pageservers: Vec<(Host, u16)>,
        remote_ext_config: Option<&String>,
        shard_stripe_size: usize,
        create_test_user: bool,
+        start_timeout: Duration,
    ) -> Result<()> {
        if self.status() == EndpointStatus::Running {
            anyhow::bail!("The endpoint is already running");
@@ -663,6 +660,7 @@ impl Endpoint {
            timeline_id: Some(self.timeline_id),
            mode: self.mode,
            pageserver_connstring: Some(pageserver_connstring),
+            safekeepers_generation: safekeepers_generation.map(|g| g.into_inner()),
            safekeeper_connstrings,
            storage_auth_token: auth_token.clone(),
            remote_extensions,
@@ -671,6 +669,7 @@ impl Endpoint {
            local_proxy_config: None,
            reconfigure_concurrency: self.reconfigure_concurrency,
            drop_subscriptions_before_start: self.drop_subscriptions_before_start,
+            audit_log_level: ComputeAudit::Disabled,
        };

        // this strange code is needed to support respec() in tests
@@ -778,17 +777,18 @@ impl Endpoint {
        std::fs::write(pidfile_path, pid.to_string())?;

        // Wait for it to start
-        let mut attempt = 0;
        const ATTEMPT_INTERVAL: Duration = Duration::from_millis(100);
-        const MAX_ATTEMPTS: u32 = 10 * 90; // Wait up to 1.5 min
+        let start_at = Instant::now();
        loop {
-            attempt += 1;
            match self.get_status().await {
                Ok(state) => {
                    match state.status {
                        ComputeStatus::Init => {
-                            if attempt == MAX_ATTEMPTS {
-                                bail!("compute startup timed out; still in Init state");
+                            if Instant::now().duration_since(start_at) > start_timeout {
+                                bail!(
+                                    "compute startup timed out {:?}; still in Init state",
+                                    start_timeout
+                                );
                            }
                            // keep retrying
                        }
@@ -815,8 +815,11 @@ impl Endpoint {
                    }
                }
                Err(e) => {
-                    if attempt == MAX_ATTEMPTS {
-                        return Err(e).context("timed out waiting to connect to compute_ctl HTTP");
+                    if Instant::now().duration_since(start_at) > start_timeout {
+                        return Err(e).context(format!(
+                            "timed out {:?} waiting to connect to compute_ctl HTTP",
+                            start_timeout,
+                        ));
                    }
                }
            }
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -3,28 +3,22 @@
 //! Now it also provides init method which acts like a stub for proper installation
 //! script which will use local paths.

-use anyhow::{bail, Context};
+use std::collections::HashMap;
+use std::net::{IpAddr, Ipv4Addr, SocketAddr};
+use std::path::{Path, PathBuf};
+use std::process::{Command, Stdio};
+use std::time::Duration;
+use std::{env, fs};

+use anyhow::{Context, bail};
 use clap::ValueEnum;
 use postgres_backend::AuthType;
 use reqwest::Url;
 use serde::{Deserialize, Serialize};
-use std::collections::HashMap;
-use std::env;
-use std::fs;
-use std::net::IpAddr;
-use std::net::Ipv4Addr;
-use std::net::SocketAddr;
-use std::path::{Path, PathBuf};
-use std::process::{Command, Stdio};
-use std::time::Duration;
-use utils::{
-    auth::{encode_from_key_file, Claims},
-    id::{NodeId, TenantId, TenantTimelineId, TimelineId},
-};
+use utils::auth::{Claims, encode_from_key_file};
+use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId};

-use crate::pageserver::PageServerNode;
-use crate::pageserver::PAGESERVER_REMOTE_STORAGE_DIR;
+use crate::pageserver::{PAGESERVER_REMOTE_STORAGE_DIR, PageServerNode};
 use crate::safekeeper::SafekeeperNode;

 pub const DEFAULT_PG_VERSION: u32 = 16;
@@ -87,6 +81,10 @@ pub struct LocalEnv {
    // but deserialization into a generic toml object as `toml::Value::try_from` fails with an error.
    // https://toml.io/en/v1.0.0 does not contain a concept of "a table inside another table".
    pub branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
+
+    /// Flag to generate SSL certificates for components that need it.
+    /// Also generates root CA certificate that is used to sign all other certificates.
+    pub generate_local_ssl_certs: bool,
 }

 /// On-disk state stored in `.neon/config`.
@@ -108,6 +106,10 @@ pub struct OnDiskConfig {
    pub control_plane_api: Option<Url>,
    pub control_plane_compute_hook_api: Option<Url>,
    branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
+    // Note: skip serializing because in compat tests old storage controller fails
+    // to load new config file. May be removed after this field is in release branch.
+    #[serde(skip_serializing_if = "std::ops::Not::not")]
+    pub generate_local_ssl_certs: bool,
 }

 fn fail_if_pageservers_field_specified<'de, D>(_: D) -> Result<Vec<PageServerConf>, D::Error>
@@ -135,6 +137,7 @@ pub struct NeonLocalInitConf {
    pub safekeepers: Vec<SafekeeperConf>,
    pub control_plane_api: Option<Url>,
    pub control_plane_compute_hook_api: Option<Option<Url>>,
+    pub generate_local_ssl_certs: bool,
 }

 /// Broker config for cluster internal communication.
@@ -171,6 +174,11 @@ pub struct NeonStorageControllerConf {

    #[serde(with = "humantime_serde")]
    pub long_reconcile_threshold: Option<Duration>,
+
+    #[serde(default)]
+    pub use_https_pageserver_api: bool,
+
+    pub timelines_onto_safekeepers: bool,
 }

 impl NeonStorageControllerConf {
@@ -194,6 +202,8 @@ impl Default for NeonStorageControllerConf {
            max_secondary_lag_bytes: None,
            heartbeat_interval: Self::DEFAULT_HEARTBEAT_INTERVAL,
            long_reconcile_threshold: None,
+            use_https_pageserver_api: false,
+            timelines_onto_safekeepers: false,
        }
    }
 }
@@ -223,6 +233,7 @@ pub struct PageServerConf {
    pub id: NodeId,
    pub listen_pg_addr: String,
    pub listen_http_addr: String,
+    pub listen_https_addr: Option<String>,
    pub pg_auth_type: AuthType,
    pub http_auth_type: AuthType,
    pub no_sync: bool,
@@ -234,6 +245,7 @@ impl Default for PageServerConf {
            id: NodeId(0),
            listen_pg_addr: String::new(),
            listen_http_addr: String::new(),
+            listen_https_addr: None,
            pg_auth_type: AuthType::Trust,
            http_auth_type: AuthType::Trust,
            no_sync: false,
@@ -249,6 +261,7 @@ pub struct NeonLocalInitPageserverConf {
    pub id: NodeId,
    pub listen_pg_addr: String,
    pub listen_http_addr: String,
+    pub listen_https_addr: Option<String>,
    pub pg_auth_type: AuthType,
    pub http_auth_type: AuthType,
    #[serde(default, skip_serializing_if = "std::ops::Not::not")]
@@ -263,6 +276,7 @@ impl From<&NeonLocalInitPageserverConf> for PageServerConf {
            id,
            listen_pg_addr,
            listen_http_addr,
+            listen_https_addr,
            pg_auth_type,
            http_auth_type,
            no_sync,
@@ -272,6 +286,7 @@ impl From<&NeonLocalInitPageserverConf> for PageServerConf {
            id: *id,
            listen_pg_addr: listen_pg_addr.clone(),
            listen_http_addr: listen_http_addr.clone(),
+            listen_https_addr: listen_https_addr.clone(),
            pg_auth_type: *pg_auth_type,
            http_auth_type: *http_auth_type,
            no_sync: *no_sync,
@@ -416,6 +431,41 @@ impl LocalEnv {
        }
    }

+    pub fn ssl_ca_cert_path(&self) -> Option<PathBuf> {
+        if self.generate_local_ssl_certs {
+            Some(self.base_data_dir.join("rootCA.crt"))
+        } else {
+            None
+        }
+    }
+
+    pub fn ssl_ca_key_path(&self) -> Option<PathBuf> {
+        if self.generate_local_ssl_certs {
+            Some(self.base_data_dir.join("rootCA.key"))
+        } else {
+            None
+        }
+    }
+
+    pub fn generate_ssl_ca_cert(&self) -> anyhow::Result<()> {
+        let cert_path = self.ssl_ca_cert_path().unwrap();
+        let key_path = self.ssl_ca_key_path().unwrap();
+        if !fs::exists(cert_path.as_path())? {
+            generate_ssl_ca_cert(cert_path.as_path(), key_path.as_path())?;
+        }
+        Ok(())
+    }
+
+    pub fn generate_ssl_cert(&self, cert_path: &Path, key_path: &Path) -> anyhow::Result<()> {
+        self.generate_ssl_ca_cert()?;
+        generate_ssl_cert(
+            cert_path,
+            key_path,
+            self.ssl_ca_cert_path().unwrap().as_path(),
+            self.ssl_ca_key_path().unwrap().as_path(),
+        )
+    }
+
    /// Inspect the base data directory and extract the instance id and instance directory path
    /// for all storage controller instances
    pub async fn storage_controller_instances(&self) -> std::io::Result<Vec<(u8, PathBuf)>> {
@@ -465,7 +515,9 @@ impl LocalEnv {
            if old_timeline_id == &timeline_id {
                Ok(())
            } else {
-                bail!("branch '{branch_name}' is already mapped to timeline {old_timeline_id}, cannot map to another timeline {timeline_id}");
+                bail!(
+                    "branch '{branch_name}' is already mapped to timeline {old_timeline_id}, cannot map to another timeline {timeline_id}"
+                );
            }
        } else {
            existing_values.push((tenant_id, timeline_id));
@@ -523,6 +575,7 @@ impl LocalEnv {
                control_plane_api,
                control_plane_compute_hook_api,
                branch_name_mappings,
+                generate_local_ssl_certs,
            } = on_disk_config;
            LocalEnv {
                base_data_dir: repopath.to_owned(),
@@ -537,6 +590,7 @@ impl LocalEnv {
                control_plane_api: control_plane_api.unwrap(),
                control_plane_compute_hook_api,
                branch_name_mappings,
+                generate_local_ssl_certs,
            }
        };

@@ -572,6 +626,7 @@ impl LocalEnv {
                struct PageserverConfigTomlSubset {
                    listen_pg_addr: String,
                    listen_http_addr: String,
+                    listen_https_addr: Option<String>,
                    pg_auth_type: AuthType,
                    http_auth_type: AuthType,
                    #[serde(default)]
@@ -596,6 +651,7 @@ impl LocalEnv {
                let PageserverConfigTomlSubset {
                    listen_pg_addr,
                    listen_http_addr,
+                    listen_https_addr,
                    pg_auth_type,
                    http_auth_type,
                    no_sync,
@@ -613,6 +669,7 @@ impl LocalEnv {
                    },
                    listen_pg_addr,
                    listen_http_addr,
+                    listen_https_addr,
                    pg_auth_type,
                    http_auth_type,
                    no_sync,
@@ -640,6 +697,7 @@ impl LocalEnv {
                control_plane_api: Some(self.control_plane_api.clone()),
                control_plane_compute_hook_api: self.control_plane_compute_hook_api.clone(),
                branch_name_mappings: self.branch_name_mappings.clone(),
+                generate_local_ssl_certs: self.generate_local_ssl_certs,
            },
        )
    }
@@ -722,6 +780,7 @@ impl LocalEnv {
            safekeepers,
            control_plane_api,
            control_plane_compute_hook_api,
+            generate_local_ssl_certs,
        } = conf;

        // Find postgres binaries.
@@ -770,8 +829,13 @@ impl LocalEnv {
            control_plane_api: control_plane_api.unwrap(),
            control_plane_compute_hook_api: control_plane_compute_hook_api.unwrap_or_default(),
            branch_name_mappings: Default::default(),
+            generate_local_ssl_certs,
        };

+        if generate_local_ssl_certs {
+            env.generate_ssl_ca_cert()?;
+        }
+
        // create endpoints dir
        fs::create_dir_all(env.endpoints_path())?;

@@ -855,3 +919,80 @@ fn generate_auth_keys(private_key_path: &Path, public_key_path: &Path) -> anyhow
    }
    Ok(())
 }
+
+fn generate_ssl_ca_cert(cert_path: &Path, key_path: &Path) -> anyhow::Result<()> {
+    // openssl req -x509 -newkey rsa:2048 -nodes -subj "/CN=Neon Local CA" -days 36500 \
+    // -out rootCA.crt -keyout rootCA.key
+    let keygen_output = Command::new("openssl")
+        .args([
+            "req", "-x509", "-newkey", "rsa:2048", "-nodes", "-days", "36500",
+        ])
+        .args(["-subj", "/CN=Neon Local CA"])
+        .args(["-out", cert_path.to_str().unwrap()])
+        .args(["-keyout", key_path.to_str().unwrap()])
+        .output()
+        .context("failed to generate CA certificate")?;
+    if !keygen_output.status.success() {
+        bail!(
+            "openssl failed: '{}'",
+            String::from_utf8_lossy(&keygen_output.stderr)
+        );
+    }
+    Ok(())
+}
+
+fn generate_ssl_cert(
+    cert_path: &Path,
+    key_path: &Path,
+    ca_cert_path: &Path,
+    ca_key_path: &Path,
+) -> anyhow::Result<()> {
+    // Generate Certificate Signing Request (CSR).
+    let mut csr_path = cert_path.to_path_buf();
+    csr_path.set_extension(".csr");
+
+    // openssl req -new -nodes -newkey rsa:2048 -keyout server.key -out server.csr \
+    // -subj "/CN=localhost" -addext "subjectAltName=DNS:localhost,IP:127.0.0.1"
+    let keygen_output = Command::new("openssl")
+        .args(["req", "-new", "-nodes"])
+        .args(["-newkey", "rsa:2048"])
+        .args(["-subj", "/CN=localhost"])
+        .args(["-addext", "subjectAltName=DNS:localhost,IP:127.0.0.1"])
+        .args(["-keyout", key_path.to_str().unwrap()])
+        .args(["-out", csr_path.to_str().unwrap()])
+        .output()
+        .context("failed to generate CSR")?;
+    if !keygen_output.status.success() {
+        bail!(
+            "openssl failed: '{}'",
+            String::from_utf8_lossy(&keygen_output.stderr)
+        );
+    }
+
+    // Sign CSR with CA key.
+    //
+    // openssl x509 -req -in server.csr -CA rootCA.crt -CAkey rootCA.key -CAcreateserial \
+    // -out server.crt -days 36500 -copy_extensions copyall
+    let keygen_output = Command::new("openssl")
+        .args(["x509", "-req"])
+        .args(["-in", csr_path.to_str().unwrap()])
+        .args(["-CA", ca_cert_path.to_str().unwrap()])
+        .args(["-CAkey", ca_key_path.to_str().unwrap()])
+        .arg("-CAcreateserial")
+        .args(["-out", cert_path.to_str().unwrap()])
+        .args(["-days", "36500"])
+        .args(["-copy_extensions", "copyall"])
+        .output()
+        .context("failed to sign CSR")?;
+    if !keygen_output.status.success() {
+        bail!(
+            "openssl failed: '{}'",
+            String::from_utf8_lossy(&keygen_output.stderr)
+        );
+    }
+
+    // Remove CSR file as it's not needed anymore.
+    fs::remove_file(csr_path)?;
+
+    Ok(())
+}
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -7,7 +7,6 @@
 //! ```
 //!
 use std::collections::HashMap;
-
 use std::io;
 use std::io::Write;
 use std::num::NonZeroU64;
@@ -15,22 +14,20 @@ use std::path::PathBuf;
 use std::str::FromStr;
 use std::time::Duration;

-use anyhow::{bail, Context};
+use anyhow::{Context, bail};
 use camino::Utf8PathBuf;
 use pageserver_api::models::{self, TenantInfo, TimelineInfo};
 use pageserver_api::shard::TenantShardId;
 use pageserver_client::mgmt_api;
 use postgres_backend::AuthType;
-use postgres_connection::{parse_host_port, PgConnectionConfig};
+use postgres_connection::{PgConnectionConfig, parse_host_port};
+use reqwest::Certificate;
 use utils::auth::{Claims, Scope};
-use utils::id::NodeId;
-use utils::{
-    id::{TenantId, TimelineId},
-    lsn::Lsn,
-};
+use utils::id::{NodeId, TenantId, TimelineId};
+use utils::lsn::Lsn;

-use crate::local_env::{NeonLocalInitPageserverConf, PageServerConf};
-use crate::{background_process, local_env::LocalEnv};
+use crate::background_process;
+use crate::local_env::{LocalEnv, NeonLocalInitPageserverConf, PageServerConf};

 /// Directory within .neon which will be used by default for LocalFs remote storage.
 pub const PAGESERVER_REMOTE_STORAGE_DIR: &str = "local_fs_remote_storage/pageserver";
@@ -53,12 +50,29 @@ impl PageServerNode {
        let (host, port) =
            parse_host_port(&conf.listen_pg_addr).expect("Unable to parse listen_pg_addr");
        let port = port.unwrap_or(5432);
+
+        let ssl_ca_cert = env.ssl_ca_cert_path().map(|ssl_ca_file| {
+            let buf = std::fs::read(ssl_ca_file).expect("SSL root CA file should exist");
+            Certificate::from_pem(&buf).expect("CA certificate should be valid")
+        });
+
+        let endpoint = if env.storage_controller.use_https_pageserver_api {
+            format!(
+                "https://{}",
+                conf.listen_https_addr.as_ref().expect(
+                    "listen https address should be specified if use_https_pageserver_api is on"
+                )
+            )
+        } else {
+            format!("http://{}", conf.listen_http_addr)
+        };
+
        Self {
            pg_connection_config: PgConnectionConfig::new_host_port(host, port),
            conf: conf.clone(),
            env: env.clone(),
            http_client: mgmt_api::Client::new(
-                format!("http://{}", conf.listen_http_addr),
+                endpoint,
                {
                    match conf.http_auth_type {
                        AuthType::Trust => None,
@@ -69,7 +83,9 @@ impl PageServerNode {
                    }
                }
                .as_deref(),
-            ),
+                ssl_ca_cert,
+            )
+            .expect("Client constructs with no errors"),
        }
    }

@@ -81,7 +97,11 @@ impl PageServerNode {
        &self,
        conf: NeonLocalInitPageserverConf,
    ) -> anyhow::Result<toml_edit::DocumentMut> {
-        assert_eq!(&PageServerConf::from(&conf), &self.conf, "during neon_local init, we derive the runtime state of ps conf (self.conf) from the --config flag fully");
+        assert_eq!(
+            &PageServerConf::from(&conf),
+            &self.conf,
+            "during neon_local init, we derive the runtime state of ps conf (self.conf) from the --config flag fully"
+        );

        // TODO(christian): instead of what we do here, create a pageserver_api::config::ConfigToml (PR #7656)

@@ -220,6 +240,13 @@ impl PageServerNode {
            .context("write identity toml")?;
        drop(identity_toml);

+        if self.env.generate_local_ssl_certs {
+            self.env.generate_ssl_cert(
+                datadir.join("server.crt").as_path(),
+                datadir.join("server.key").as_path(),
+            )?;
+        }
+
        // TODO: invoke a TBD config-check command to validate that pageserver will start with the written config

        // Write metadata file, used by pageserver on startup to register itself with
@@ -230,6 +257,15 @@ impl PageServerNode {
            parse_host_port(&self.conf.listen_http_addr).expect("Unable to parse listen_http_addr");
        let http_port = http_port.unwrap_or(9898);

+        let https_port = match self.conf.listen_https_addr.as_ref() {
+            Some(https_addr) => {
+                let (_https_host, https_port) =
+                    parse_host_port(https_addr).expect("Unable to parse listen_https_addr");
+                Some(https_port.unwrap_or(9899))
+            }
+            None => None,
+        };
+
        // Intentionally hand-craft JSON: this acts as an implicit format compat test
        // in case the pageserver-side structure is edited, and reflects the real life
        // situation: the metadata is written by some other script.
@@ -240,6 +276,7 @@ impl PageServerNode {
                postgres_port: self.pg_connection_config.port(),
                http_host: "localhost".to_string(),
                http_port,
+                https_port,
                other: HashMap::from([(
                    "availability_zone_id".to_string(),
                    serde_json::json!(az_id),
--- a/control_plane/src/postgresql_conf.rs
+++ b/control_plane/src/postgresql_conf.rs
@@ -1,3 +1,6 @@
+use std::collections::HashMap;
+use std::fmt;
+
 ///
 /// Module for parsing postgresql.conf file.
 ///
@@ -6,8 +9,6 @@
 /// funny stuff like include-directives or funny escaping.
 use once_cell::sync::Lazy;
 use regex::Regex;
-use std::collections::HashMap;
-use std::fmt;

 /// In-memory representation of a postgresql.conf file
 #[derive(Default, Debug)]
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -14,18 +14,15 @@ use std::{io, result};

 use anyhow::Context;
 use camino::Utf8PathBuf;
+use http_utils::error::HttpErrorBody;
 use postgres_connection::PgConnectionConfig;
 use reqwest::{IntoUrl, Method};
 use thiserror::Error;
-
-use http_utils::error::HttpErrorBody;
 use utils::auth::{Claims, Scope};
 use utils::id::NodeId;

-use crate::{
-    background_process,
-    local_env::{LocalEnv, SafekeeperConf},
-};
+use crate::background_process;
+use crate::local_env::{LocalEnv, SafekeeperConf};

 #[derive(Error, Debug)]
 pub enum SafekeeperHttpError {
--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -1,44 +1,36 @@
-use crate::{
-    background_process,
-    local_env::{LocalEnv, NeonStorageControllerConf},
-};
+use std::ffi::OsStr;
+use std::fs;
+use std::net::SocketAddr;
+use std::path::PathBuf;
+use std::process::ExitStatus;
+use std::str::FromStr;
+use std::sync::OnceLock;
+use std::time::{Duration, Instant};
+
 use camino::{Utf8Path, Utf8PathBuf};
 use hyper0::Uri;
 use nix::unistd::Pid;
-use pageserver_api::{
-    controller_api::{
-        NodeConfigureRequest, NodeDescribeResponse, NodeRegisterRequest, TenantCreateRequest,
-        TenantCreateResponse, TenantLocateResponse, TenantShardMigrateRequest,
-        TenantShardMigrateResponse,
-    },
-    models::{
-        TenantShardSplitRequest, TenantShardSplitResponse, TimelineCreateRequest, TimelineInfo,
-    },
-    shard::{ShardStripeSize, TenantShardId},
+use pageserver_api::controller_api::{
+    NodeConfigureRequest, NodeDescribeResponse, NodeRegisterRequest, TenantCreateRequest,
+    TenantCreateResponse, TenantLocateResponse,
 };
+use pageserver_api::models::{TimelineCreateRequest, TimelineInfo};
+use pageserver_api::shard::TenantShardId;
 use pageserver_client::mgmt_api::ResponseErrorMessageExt;
 use postgres_backend::AuthType;
 use reqwest::Method;
-use serde::{de::DeserializeOwned, Deserialize, Serialize};
-use std::{
-    ffi::OsStr,
-    fs,
-    net::SocketAddr,
-    path::PathBuf,
-    process::ExitStatus,
-    str::FromStr,
-    sync::OnceLock,
-    time::{Duration, Instant},
-};
+use serde::de::DeserializeOwned;
+use serde::{Deserialize, Serialize};
 use tokio::process::Command;
 use tracing::instrument;
 use url::Url;
-use utils::{
-    auth::{encode_from_key_file, Claims, Scope},
-    id::{NodeId, TenantId},
-};
+use utils::auth::{Claims, Scope, encode_from_key_file};
+use utils::id::{NodeId, TenantId};
 use whoami::username;

+use crate::background_process;
+use crate::local_env::{LocalEnv, NeonStorageControllerConf};
+
 pub struct StorageController {
    env: LocalEnv,
    private_key: Option<Vec<u8>>,
@@ -96,7 +88,8 @@ pub struct AttachHookRequest {

 #[derive(Serialize, Deserialize)]
 pub struct AttachHookResponse {
-    pub gen: Option<u32>,
+    #[serde(rename = "gen")]
+    pub generation: Option<u32>,
 }

 #[derive(Serialize, Deserialize)]
@@ -541,6 +534,14 @@ impl StorageController {
            args.push("--start-as-candidate".to_string());
        }

+        if self.config.use_https_pageserver_api {
+            args.push("--use-https-pageserver-api".to_string());
+        }
+
+        if let Some(ssl_ca_file) = self.env.ssl_ca_cert_path() {
+            args.push(format!("--ssl-ca-file={}", ssl_ca_file.to_str().unwrap()));
+        }
+
        if let Some(private_key) = &self.private_key {
            let claims = Claims::new(None, Scope::PageServerApi);
            let jwt_token =
@@ -583,6 +584,10 @@ impl StorageController {
            self.env.base_data_dir.display()
        ));

+        if self.config.timelines_onto_safekeepers {
+            args.push("--timelines-onto-safekeepers".to_string());
+        }
+
        background_process::start_process(
            COMMAND,
            &instance_dir,
@@ -779,7 +784,7 @@ impl StorageController {
            )
            .await?;

-        Ok(response.gen)
+        Ok(response.generation)
    }

    #[instrument(skip(self))]
@@ -829,41 +834,6 @@ impl StorageController {
        .await
    }

-    #[instrument(skip(self))]
-    pub async fn tenant_migrate(
-        &self,
-        tenant_shard_id: TenantShardId,
-        node_id: NodeId,
-    ) -> anyhow::Result<TenantShardMigrateResponse> {
-        self.dispatch(
-            Method::PUT,
-            format!("control/v1/tenant/{tenant_shard_id}/migrate"),
-            Some(TenantShardMigrateRequest {
-                node_id,
-                migration_config: None,
-            }),
-        )
-        .await
-    }
-
-    #[instrument(skip(self), fields(%tenant_id, %new_shard_count))]
-    pub async fn tenant_split(
-        &self,
-        tenant_id: TenantId,
-        new_shard_count: u8,
-        new_stripe_size: Option<ShardStripeSize>,
-    ) -> anyhow::Result<TenantShardSplitResponse> {
-        self.dispatch(
-            Method::PUT,
-            format!("control/v1/tenant/{tenant_id}/shard_split"),
-            Some(TenantShardSplitRequest {
-                new_shard_count,
-                new_stripe_size,
-            }),
-        )
-        .await
-    }
-
    #[instrument(skip_all, fields(node_id=%req.node_id))]
    pub async fn node_register(&self, req: NodeRegisterRequest) -> anyhow::Result<()> {
        self.dispatch::<_, ()>(Method::POST, "control/v1/node".to_string(), Some(req))
--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -1,34 +1,28 @@
-use futures::StreamExt;
-use std::{
-    collections::{HashMap, HashSet},
-    str::FromStr,
-    time::Duration,
-};
+use std::collections::{HashMap, HashSet};
+use std::path::PathBuf;
+use std::str::FromStr;
+use std::time::Duration;

 use clap::{Parser, Subcommand};
-use pageserver_api::{
-    controller_api::{
-        AvailabilityZone, NodeAvailabilityWrapper, NodeDescribeResponse, NodeShardResponse,
-        SafekeeperDescribeResponse, SafekeeperSchedulingPolicyRequest, ShardSchedulingPolicy,
-        ShardsPreferredAzsRequest, ShardsPreferredAzsResponse, SkSchedulingPolicy,
-        TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
-    },
-    models::{
-        EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary,
-        ShardParameters, TenantConfig, TenantConfigPatchRequest, TenantConfigRequest,
-        TenantShardSplitRequest, TenantShardSplitResponse,
-    },
-    shard::{ShardStripeSize, TenantShardId},
-};
-use pageserver_client::mgmt_api::{self};
-use reqwest::{Method, StatusCode, Url};
-use utils::id::{NodeId, TenantId, TimelineId};
-
+use futures::StreamExt;
 use pageserver_api::controller_api::{
-    NodeConfigureRequest, NodeRegisterRequest, NodeSchedulingPolicy, PlacementPolicy,
+    AvailabilityZone, MigrationConfig, NodeAvailabilityWrapper, NodeConfigureRequest,
+    NodeDescribeResponse, NodeRegisterRequest, NodeSchedulingPolicy, NodeShardResponse,
+    PlacementPolicy, SafekeeperDescribeResponse, SafekeeperSchedulingPolicyRequest,
+    ShardSchedulingPolicy, ShardsPreferredAzsRequest, ShardsPreferredAzsResponse,
+    SkSchedulingPolicy, TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
    TenantShardMigrateRequest, TenantShardMigrateResponse,
 };
+use pageserver_api::models::{
+    EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary, ShardParameters,
+    TenantConfig, TenantConfigPatchRequest, TenantConfigRequest, TenantShardSplitRequest,
+    TenantShardSplitResponse,
+};
+use pageserver_api::shard::{ShardStripeSize, TenantShardId};
+use pageserver_client::mgmt_api::{self};
+use reqwest::{Method, StatusCode, Url};
 use storage_controller_client::control_api::Client;
+use utils::id::{NodeId, TenantId, TimelineId};

 #[derive(Subcommand, Debug)]
 enum Command {
@@ -119,6 +113,15 @@ enum Command {
        tenant_shard_id: TenantShardId,
        #[arg(long)]
        node: NodeId,
+        #[arg(long, default_value_t = true, action = clap::ArgAction::Set)]
+        prewarm: bool,
+        #[arg(long, default_value_t = false, action = clap::ArgAction::Set)]
+        override_scheduler: bool,
+    },
+    /// Watch the location of a tenant shard evolve, e.g. while expecting it to migrate
+    TenantShardWatch {
+        #[arg(long)]
+        tenant_shard_id: TenantShardId,
    },
    /// Migrate the secondary location for a tenant shard to a specific pageserver.
    TenantShardMigrateSecondary {
@@ -276,6 +279,10 @@ struct Cli {
    /// a token with both scopes to use with this tool.
    jwt: Option<String>,

+    #[arg(long)]
+    /// Trusted root CA certificate to use in https APIs.
+    ssl_ca_file: Option<PathBuf>,
+
    #[command(subcommand)]
    command: Command,
 }
@@ -386,9 +393,17 @@ async fn main() -> anyhow::Result<()> {

    let storcon_client = Client::new(cli.api.clone(), cli.jwt.clone());

+    let ssl_ca_cert = match &cli.ssl_ca_file {
+        Some(ssl_ca_file) => {
+            let buf = tokio::fs::read(ssl_ca_file).await?;
+            Some(reqwest::Certificate::from_pem(&buf)?)
+        }
+        None => None,
+    };
+
    let mut trimmed = cli.api.to_string();
    trimmed.pop();
-    let vps_client = mgmt_api::Client::new(trimmed, cli.jwt.as_deref());
+    let vps_client = mgmt_api::Client::new(trimmed, cli.jwt.as_deref(), ssl_ca_cert)?;

    match cli.command {
        Command::NodeRegister {
@@ -626,19 +641,43 @@ async fn main() -> anyhow::Result<()> {
        Command::TenantShardMigrate {
            tenant_shard_id,
            node,
+            prewarm,
+            override_scheduler,
        } => {
-            let req = TenantShardMigrateRequest {
-                node_id: node,
-                migration_config: None,
+            let migration_config = MigrationConfig {
+                prewarm,
+                override_scheduler,
+                ..Default::default()
            };

-            storcon_client
+            let req = TenantShardMigrateRequest {
+                node_id: node,
+                origin_node_id: None,
+                migration_config,
+            };
+
+            match storcon_client
                .dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(
                    Method::PUT,
                    format!("control/v1/tenant/{tenant_shard_id}/migrate"),
                    Some(req),
                )
-                .await?;
+                .await
+            {
+                Err(mgmt_api::Error::ApiError(StatusCode::PRECONDITION_FAILED, msg)) => {
+                    anyhow::bail!(
+                        "Migration to {node} rejected, may require `--force` ({}) ",
+                        msg
+                    );
+                }
+                Err(e) => return Err(e.into()),
+                Ok(_) => {}
+            }
+
+            watch_tenant_shard(storcon_client, tenant_shard_id, Some(node)).await?;
+        }
+        Command::TenantShardWatch { tenant_shard_id } => {
+            watch_tenant_shard(storcon_client, tenant_shard_id, None).await?;
        }
        Command::TenantShardMigrateSecondary {
            tenant_shard_id,
@@ -646,7 +685,8 @@ async fn main() -> anyhow::Result<()> {
        } => {
            let req = TenantShardMigrateRequest {
                node_id: node,
-                migration_config: None,
+                origin_node_id: None,
+                migration_config: MigrationConfig::default(),
            };

            storcon_client
@@ -921,7 +961,9 @@ async fn main() -> anyhow::Result<()> {
        }
        Command::TenantDrop { tenant_id, unclean } => {
            if !unclean {
-                anyhow::bail!("This command is not a tenant deletion, and uncleanly drops all controller state for the tenant.  If you know what you're doing, add `--unclean` to proceed.")
+                anyhow::bail!(
+                    "This command is not a tenant deletion, and uncleanly drops all controller state for the tenant.  If you know what you're doing, add `--unclean` to proceed."
+                )
            }
            storcon_client
                .dispatch::<(), ()>(
@@ -933,7 +975,9 @@ async fn main() -> anyhow::Result<()> {
        }
        Command::NodeDrop { node_id, unclean } => {
            if !unclean {
-                anyhow::bail!("This command is not a clean node decommission, and uncleanly drops all controller state for the node, without checking if any tenants still refer to it.  If you know what you're doing, add `--unclean` to proceed.")
+                anyhow::bail!(
+                    "This command is not a clean node decommission, and uncleanly drops all controller state for the node, without checking if any tenants still refer to it.  If you know what you're doing, add `--unclean` to proceed."
+                )
            }
            storcon_client
                .dispatch::<(), ()>(Method::POST, format!("debug/v1/node/{node_id}/drop"), None)
@@ -1108,7 +1152,8 @@ async fn main() -> anyhow::Result<()> {
                                format!("control/v1/tenant/{}/migrate", mv.tenant_shard_id),
                                Some(TenantShardMigrateRequest {
                                    node_id: mv.to,
-                                    migration_config: None,
+                                    origin_node_id: Some(mv.from),
+                                    migration_config: MigrationConfig::default(),
                                }),
                            )
                            .await
@@ -1287,3 +1332,68 @@ async fn main() -> anyhow::Result<()> {

    Ok(())
 }
+
+static WATCH_INTERVAL: Duration = Duration::from_secs(5);
+
+async fn watch_tenant_shard(
+    storcon_client: Client,
+    tenant_shard_id: TenantShardId,
+    until_migrated_to: Option<NodeId>,
+) -> anyhow::Result<()> {
+    if let Some(until_migrated_to) = until_migrated_to {
+        println!(
+            "Waiting for tenant shard {} to be migrated to node {}",
+            tenant_shard_id, until_migrated_to
+        );
+    }
+
+    loop {
+        let desc = storcon_client
+            .dispatch::<(), TenantDescribeResponse>(
+                Method::GET,
+                format!("control/v1/tenant/{}", tenant_shard_id.tenant_id),
+                None,
+            )
+            .await?;
+
+        // Output the current state of the tenant shard
+        let shard = desc
+            .shards
+            .iter()
+            .find(|s| s.tenant_shard_id == tenant_shard_id)
+            .ok_or(anyhow::anyhow!("Tenant shard not found"))?;
+        let summary = format!(
+            "attached: {} secondary: {} {}",
+            shard
+                .node_attached
+                .map(|n| format!("{}", n))
+                .unwrap_or("none".to_string()),
+            shard
+                .node_secondary
+                .iter()
+                .map(|n| n.to_string())
+                .collect::<Vec<_>>()
+                .join(","),
+            if shard.is_reconciling {
+                "(reconciler active)"
+            } else {
+                "(reconciler idle)"
+            }
+        );
+        println!("{}", summary);
+
+        // Maybe drop out if we finished migration
+        if let Some(until_migrated_to) = until_migrated_to {
+            if shard.node_attached == Some(until_migrated_to) && !shard.is_reconciling {
+                println!(
+                    "Tenant shard {} is now on node {}",
+                    tenant_shard_id, until_migrated_to
+                );
+                break;
+            }
+        }
+
+        tokio::time::sleep(WATCH_INTERVAL).await;
+    }
+    Ok(())
+}
--- a/deny.toml
+++ b/deny.toml
@@ -27,6 +27,10 @@ yanked = "warn"
 id = "RUSTSEC-2023-0071"
 reason = "the marvin attack only affects private key decryption, not public key signature verification"

+[[advisories.ignore]]
+id = "RUSTSEC-2024-0436"
+reason = "The paste crate is a build-only dependency with no runtime components. It is unlikely to have any security impact."
+
 # This section is considered when running `cargo deny check licenses`
 # More documentation for the licenses section can be found here:
 # https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html
--- a/docker-compose/docker-compose.yml
+++ b/docker-compose/docker-compose.yml
@@ -186,7 +186,7 @@ services:

  neon-test-extensions:
    profiles: ["test-extensions"]
-    image: ${REPOSITORY:-neondatabase}/neon-test-extensions-v${PG_TEST_VERSION:-16}:${TAG:-latest}
+    image: ${REPOSITORY:-neondatabase}/neon-test-extensions-v${PG_TEST_VERSION:-16}:${TEST_EXTENSIONS_TAG:-${TAG:-latest}}
    environment:
      - PGPASSWORD=cloud_admin
    entrypoint:
--- a/docker-compose/ext-src/pgtap-src/test-upgrade.patch
+++ b/docker-compose/ext-src/pgtap-src/test-upgrade.patch
@@ -7,7 +7,7 @@ index f255fe6..0a0fa65 100644
 GENERATED_SCHEDULE_DEPS = $(TB_DIR)/all_tests $(TB_DIR)/exclude_tests
 REGRESS = --schedule $(TB_DIR)/run.sch # Set this again just to be safe
 -REGRESS_OPTS = --inputdir=test --max-connections=$(PARALLEL_CONN) --schedule $(SETUP_SCH) $(REGRESS_CONF)
-+REGRESS_OPTS = --use-existing --dbname=pgtap_regression --inputdir=test --max-connections=$(PARALLEL_CONN) --schedule $(SETUP_SCH) $(REGRESS_CONF)
+REGRESS_OPTS = --use-existing --dbname=contrib_regression --inputdir=test --max-connections=$(PARALLEL_CONN) --schedule $(SETUP_SCH) $(REGRESS_CONF)
 SETUP_SCH = test/schedule/main.sch # schedule to use for test setup; this can be forcibly changed by some targets!
 IGNORE_TESTS = $(notdir $(EXCLUDE_TEST_FILES:.sql=))
 PARALLEL_TESTS = $(filter-out $(IGNORE_TESTS),$(filter-out $(SERIAL_TESTS),$(ALL_TESTS)))
--- a/docker-compose/test_extensions_upgrade.sh
+++ b/docker-compose/test_extensions_upgrade.sh
@@ -6,12 +6,16 @@ generate_id() {
    local -n resvar=$1
    printf -v resvar '%08x%08x%08x%08x' $SRANDOM $SRANDOM $SRANDOM $SRANDOM
 }
-if [ -z ${OLDTAG+x} ] || [ -z ${NEWTAG+x} ] || [ -z "${OLDTAG}" ] || [ -z "${NEWTAG}" ]; then
-  echo OLDTAG and NEWTAG must be defined
+echo "${OLD_COMPUTE_TAG}"
+echo "${NEW_COMPUTE_TAG}"
+echo "${TEST_EXTENSIONS_TAG}"
+if [ -z "${OLD_COMPUTE_TAG:-}" ] || [ -z "${NEW_COMPUTE_TAG:-}" ] || [ -z "${TEST_EXTENSIONS_TAG:-}" ]; then
+  echo OLD_COMPUTE_TAG, NEW_COMPUTE_TAG and TEST_EXTENSIONS_TAG must be set
  exit 1
 fi
 export PG_VERSION=${PG_VERSION:-16}
 export PG_TEST_VERSION=${PG_VERSION}
+# Waits for compute node is ready
 function wait_for_ready {
  TIME=0
  while ! docker compose logs compute_is_ready | grep -q "accepting connections" && [ ${TIME} -le 300 ] ; do
@@ -23,11 +27,45 @@ function wait_for_ready {
    exit 2
  fi
 }
+# Creates extensions. Gets a string with space-separated extensions as a parameter
 function create_extensions() {
  for ext in ${1}; do
    docker compose exec neon-test-extensions psql -X -v ON_ERROR_STOP=1 -d contrib_regression -c "CREATE EXTENSION IF NOT EXISTS ${ext} CASCADE"
  done
 }
+# Creates a new timeline. Gets the parent ID and an extension name as parameters.
+# Saves the timeline ID in the variable EXT_TIMELINE
+function create_timeline() {
+  generate_id new_timeline_id
+
+  PARAMS=(
+      -sbf
+      -X POST
+      -H "Content-Type: application/json"
+      -d "{\"new_timeline_id\": \"${new_timeline_id}\", \"pg_version\": ${PG_VERSION}, \"ancestor_timeline_id\": \"${1}\"}"
+      "http://127.0.0.1:9898/v1/tenant/${tenant_id}/timeline/"
+  )
+  result=$(curl "${PARAMS[@]}")
+  echo $result | jq .
+  EXT_TIMELINE[${2}]=${new_timeline_id}
+}
+# Checks if the timeline ID of the compute node is expected. Gets the timeline ID as a parameter
+function check_timeline() {
+    TID=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.timeline_id")
+    if [ "${TID}" != "${1}" ]; then
+      echo Timeline mismatch
+      exit 1
+    fi
+}
+# Restarts the compute node with the required compute tag and timeline.
+# Accepts the tag for the compute node and the timeline as parameters.
+function restart_compute() {
+  docker compose down compute compute_is_ready
+  COMPUTE_TAG=${1} TENANT_ID=${tenant_id} TIMELINE_ID=${2} docker compose up --quiet-pull -d --build compute compute_is_ready
+  wait_for_ready
+  check_timeline ${2}
+}
+declare -A EXT_TIMELINE
 EXTENSIONS='[
 {"extname": "plv8", "extdir": "plv8-src"},
 {"extname": "vector", "extdir": "pgvector-src"},
@@ -47,7 +85,7 @@ EXTENSIONS='[
 {"extname": "pg_repack", "extdir": "pg_repack-src"}
 ]'
 EXTNAMES=$(echo ${EXTENSIONS} | jq -r '.[].extname' | paste -sd ' ' -)
-TAG=${NEWTAG} docker compose --profile test-extensions up --quiet-pull --build -d
+COMPUTE_TAG=${NEW_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d
 wait_for_ready
 docker compose exec neon-test-extensions psql -c "DROP DATABASE IF EXISTS contrib_regression"
 docker compose exec neon-test-extensions psql -c "CREATE DATABASE contrib_regression"
@@ -55,12 +93,14 @@ create_extensions "${EXTNAMES}"
 query="select json_object_agg(extname,extversion) from pg_extension where extname in ('${EXTNAMES// /\',\'}')"
 new_vers=$(docker compose exec neon-test-extensions psql -Aqt -d contrib_regression -c "$query")
 docker compose --profile test-extensions down
-TAG=${OLDTAG} docker compose --profile test-extensions up --quiet-pull --build -d --force-recreate
+COMPUTE_TAG=${OLD_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d --force-recreate
 wait_for_ready
 docker compose exec neon-test-extensions psql -c "DROP DATABASE IF EXISTS contrib_regression"
 docker compose exec neon-test-extensions psql -c "CREATE DATABASE contrib_regression"
-docker compose exec neon-test-extensions psql -c "CREATE DATABASE pgtap_regression"
-docker compose exec neon-test-extensions psql -d pgtap_regression -c "CREATE EXTENSION pgtap"
+tenant_id=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.tenant_id")
+EXT_TIMELINE["main"]=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.timeline_id")
+create_timeline "${EXT_TIMELINE["main"]}" init
+restart_compute "${OLD_COMPUTE_TAG}" "${EXT_TIMELINE["init"]}"
 create_extensions "${EXTNAMES}"
 if [ "${FORCE_ALL_UPGRADE_TESTS:-false}" = true ]; then
  exts="${EXTNAMES}"
@@ -71,29 +111,13 @@ fi
 if [ -z "${exts}" ]; then
  echo "No extensions were upgraded"
 else
-  tenant_id=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.tenant_id")
-  timeline_id=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.timeline_id")
  for ext in ${exts}; do
    echo Testing ${ext}...
+    create_timeline "${EXT_TIMELINE["main"]}" ${ext}
    EXTDIR=$(echo ${EXTENSIONS} | jq -r '.[] | select(.extname=="'${ext}'") | .extdir')
-    generate_id new_timeline_id
-    PARAMS=(
-        -sbf
-        -X POST
-        -H "Content-Type: application/json"
-        -d "{\"new_timeline_id\": \"${new_timeline_id}\", \"pg_version\": ${PG_VERSION}, \"ancestor_timeline_id\": \"${timeline_id}\"}"
-        "http://127.0.0.1:9898/v1/tenant/${tenant_id}/timeline/"
-    )
-    result=$(curl "${PARAMS[@]}")
-    echo $result | jq .
-    TENANT_ID=${tenant_id} TIMELINE_ID=${new_timeline_id} TAG=${OLDTAG} docker compose down compute compute_is_ready
-    COMPUTE_TAG=${NEWTAG} TAG=${OLDTAG} TENANT_ID=${tenant_id} TIMELINE_ID=${new_timeline_id} docker compose up --quiet-pull -d --build compute compute_is_ready
-    wait_for_ready
-    TID=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.timeline_id")
-    if [ ${TID} != ${new_timeline_id} ]; then
-      echo Timeline mismatch
-      exit 1
-    fi
+    restart_compute "${OLD_COMPUTE_TAG}" "${EXT_TIMELINE[${ext}]}"
+    docker compose exec neon-test-extensions psql -d contrib_regression -c "CREATE EXTENSION ${ext} CASCADE"
+    restart_compute "${NEW_COMPUTE_TAG}" "${EXT_TIMELINE[${ext}]}"
    docker compose exec neon-test-extensions psql -d contrib_regression -c "\dx ${ext}"
    if ! docker compose exec neon-test-extensions sh -c /ext-src/${EXTDIR}/test-upgrade.sh; then
      docker  compose exec neon-test-extensions  cat /ext-src/${EXTDIR}/regression.diffs
--- a/docs/rfcs/041-rel-sparse-keyspace.md
+++ b/docs/rfcs/041-rel-sparse-keyspace.md
@@ -0,0 +1,201 @@
+# Sparse Keyspace for Relation Directories
+
+## Summary
+
+This is an RFC describing a new storage strategy for storing relation directories.
+
+## Motivation
+
+Postgres maintains a directory structure for databases and relations. In Neon, we store these information
+by serializing the directory data in a single key (see `pgdatadir_mapping.rs`).
+
+```rust
+// DbDir:
+// 00 00000000 00000000 00000000 00   00000000
+
+// RelDir:
+// 00 SPCNODE  DBNODE   00000000 00   00000001 (Postgres never uses relfilenode 0)
+```
+
+We have a dedicated structure on the ingestion path to serialize the relation directory into this single key.
+
+```rust
+#[derive(Debug, Serialize, Deserialize, Default)]
+pub(crate) struct RelDirectory {
+    // Set of relations that exist. (relfilenode, forknum)
+    //
+    // TODO: Store it as a btree or radix tree or something else that spans multiple
+    // key-value pairs, if you have a lot of relations
+    pub(crate) rels: HashSet<(Oid, u8)>,
+}
+```
+
+The current codebase has the following three access patterns for the relation directory.
+
+1. Check if a relation exists.
+2. List all relations.
+3. Create/drop a relation.
+
+For (1), we currently have to get the reldir key, deserialize it, and check whether the relation exists in the
+hash set. For (2), we get the reldir key and the hash set. For (3), we need first to get
+and deserialize the key, add the new relation record to the hash set, and then serialize it and write it back.
+
+If we have 100k relations in a database, we would have a 100k-large hash set. Then, every
+relation created and dropped would have deserialized and serialized this 100k-large hash set. This makes the
+relation create/drop process to be quadratic. When we check if a relation exists in the ingestion path,
+we would have to deserialize this super big 100k-large key before checking if a single relation exists.
+
+In this RFC, we will propose a new way to store the reldir data in the sparse keyspace and propose how
+to seamlessly migrate users to use the new keyspace.
+
+The PoC patch is implemented in [PR10316](https://github.com/neondatabase/neon/pull/10316).
+
+## Key Mapping
+
+We will use the recently introduced sparse keyspace to store actual data. Sparse keyspace was proposed in
+[038-aux-file-v2.md](038-aux-file-v2.md). The original reldir has one single value of `HashSet<(Oid, u8)>`
+for each of the databases (identified as `spcnode, dbnode`). We encode the `Oid` (`relnode, forknum`),
+into the key.
+
+```plain
+(REL_DIR_KEY_PREFIX, spcnode, dbnode, relnode, forknum, 1) -> deleted
+(REL_DIR_KEY_PREFIX, spcnode, dbnode, relnode, forknum, 1) -> exists
+```
+
+Assume all reldir data are stored in this new keyspace; the 3 reldir operations we mentioned before can be
+implemented as follows.
+
+1. Check if a relation exists: check if the key maps to "exists".
+2. List all relations: scan the sprase keyspace over the `rel_dir_key_prefix`. Extract relnode and forknum from the key.
+3. Create/drop a relation: write "exists" or "deleted" to the corresponding key of the relation. The delete tombstone will
+   be removed during image layer generation upon compaction.
+
+Note that "exists" and "deleted" will be encoded as a single byte as two variants of an enum.
+The mapping is implemented as `rel_tag_sparse_key` in the PoC patch.
+
+## Changes to Sparse Keyspace
+
+Previously, we only used sparse keyspaces for the aux files, which did not carry over when branching. The reldir
+information needs to be preserved from the parent branch to the child branch. Therefore, the read path needs
+to be updated accordingly to accommodate such "inherited sparse keys". This is done in
+[PR#10313](https://github.com/neondatabase/neon/pull/10313).
+
+## Coexistence of the Old and New Keyspaces
+
+Migrating to the new keyspace will be done gradually: when we flip a config item to enable the new reldir keyspace, the
+ingestion path will start to write to the new keyspace and the old reldir data will be kept in the old one. The read
+path needs to combine the data from both keyspaces.
+
+Theoretically, we could do a rewrite at the startup time that scans all relation directories and copies that data into the
+new keyspace. However, this could take a long time, especially if we have thousands of tenants doing the migration
+process simultaneously after the pageserver restarts. Therefore, we propose the coexistence strategy so that the
+migration can happen seamlessly and imposes no potential downtime for the user.
+
+With the coexistence assumption, the 3 reldir operations will be implemented as follows:
+
+1. Check if a relation exists
+   - Check the new keyspace if the key maps to any value. If it maps to "exists" or "deleted", directly
+    return it to the user.
+   - Otherwise, deserialize the old reldir key and get the result.
+2. List all relations: scan the sparse keyspace over the `rel_dir_key_prefix` and deserialize the old reldir key.
+   Combine them to obtain the final result.
+3. Create/drop a relation: write "exists" or "deleted" to the corresponding key of the relation into the new keyspace.
+   - We assume no overwrite of relations will happen (i.e., the user won't create a relation at the same Oid). This will be implemented as a runtime check.
+   - For relation creation, we add `sparse_reldir_tableX -> exists` to the keyspace.
+   - For relation drop, we first check if the relation is recorded in the old keyspace. If yes, we deserialize the old reldir key,
+    remove the relation, and then write it back. Otherwise, we put `sparse_reldir_tableX -> deleted` to the keyspace.
+   - The delete tombstone will be removed during image layer generation upon compaction.
+
+This process ensures that the transition will not introduce any downtime and all new updates are written to the new keyspace. The total
+amount of data in the storage would be `O(relations_modifications)` and we can guarantee `O(current_relations)` after compaction.
+There could be some relations that exist in the old reldir key for a long time. Refer to the "Full Migration" section on how to deal
+with them. Plus, for relation modifications, it will have `O(old_relations)` complexity until we do the full migration, which gives
+us `O(1)` complexity after fully opt-in the sparse keyspace.
+
+The process also implies that a relation will only exists either in the old reldir key or in the new sparse keyspace. It is not possible
+to have a table to be recorded in the old reldir key while later having a delete tombstone for it in the sparse keyspace at any LSN.
+
+We will introduce a config item and an index_part record to record the current status of the migration process.
+
+- Config item `enable_reldir_v2`: controls whether the ingestion path writes the reldir info into the new keyspace.
+- `index_part.json` field `reldir_v2_status`: whether the timeline has written any key into the new reldir keyspace.
+
+If `enable_reldir_v2` is set to `true` and the timeline ingests the first key into the new reldir keyspace, it will update
+`index_part.json` to set `reldir_v2_status` to `Status::Migrating`. Even if `enable_reldir_v2` gets flipped back to
+`false` (i.e., when the pageserver restarts and such config isn't persisted), the read/write path will still
+read/write to the new keyspace to avoid data inconsistency. This also indicates that the migration is one-way only:
+once v2 is enabled, the user cannot go back to v1.
+
+## Next Steps
+
+### Full Migration
+
+This won't be implemented in the project's first phase but might be implemented in the future. Having both v1 and
+v2 existing in the system would force us to keep the code to deserialize the old reldir key forever. To entirely deprecate this
+code path, we must ensure the timeline has no old reldir data.
+
+We can trigger a special image layer generation process at the gc-horizon. The generated image layers will cover several keyspaces:
+the old reldir key in each of the databases, and the new reldir sparse keyspace. It will remove the old reldir key while
+copying them into the corresponding keys in the sparse keyspace in the resulting image. This special process happens in
+the background during compaction. For example, assume this special process is triggered at LSN 0/180. The `create_image_layers`
+process discovers the following keys at this LSN.
+
+```plain
+db1/reldir_key -> (table 1, table 2, table 3)
+...db1 rel keys
+db2/reldir_key -> (table 4, table 5, table 6)
+...db2 rel keys
+sparse_reldir_db2_table7 -> exists
+sparse_reldir_db1_table8 -> deleted
+```
+
+It will generate the following keys:
+
+```plain
+db1/reldir_key -> () # we have to keep the key because it is part of `collect_keyspace`.
+...db1 rel keys
+db2/reldir_key -> ()
+...db2 rel keys
+
+-- start image layer for the sparse keyspace at sparse_reldir_prefix at LSN 0/180
+sparse_reldir_db1_table1 -> exists
+sparse_reldir_db1_table2 -> exists
+sparse_reldir_db1_table3 -> exists
+sparse_reldir_db2_table4 -> exists
+sparse_reldir_db2_table5 -> exists
+sparse_reldir_db2_table6 -> exists
+sparse_reldir_db2_table7 -> exists
+-- end image layer for the sparse keyspace at sparse_reldir_prefix+1
+
+# The `sparse_reldir_db1_table8` key gets dropped as part of the image layer generation code for the sparse keyspace.
+# Note that the read path will stop reading if a key is not found in the image layer covering the key range so there
+# are no correctness issue.
+```
+
+We must verify that no pending modifications to the old reldir exists in the delta/image layers above the gc-horizon before
+we start this process (We can do a vectored read to get the full key history of the old reldir key and ensure there are no more images
+above the gc-horizon). Otherwise, it will violate the property that "a relation will only exists either in the old reldir key or
+in the new sparse keyspace". After we run this migration process, we can mark `reldir_v2_status` in the `index_part.json` to
+`Status::Migrated`, and the read path won't need to read from the old reldir anymore. Once the status is set to `Migrated`, we
+don't need to add the key into `collect_keyspace` and therefore all of them will be removed from all future image layers.
+
+The migration process can be proactively triggered across all attached/detached tenants to help us fully remove the old reldir code.
+
+### Consolidate Relation Size Keys
+
+We have relsize at the end of all relation nodes.
+
+```plain
+// RelSize:
+// 00 SPCNODE  DBNODE   RELNODE  FORK FFFFFFFF
+```
+
+This means that computing logical size requires us to do several single-key gets across the keyspace,
+potentially requiring downloading many layer files. We could consolidate them into a single
+keyspace, improving logical size calculation performance.
+
+### Migrate DBDir Keys
+
+We assume the number of databases created by the users will be small, and therefore, the current way
+of storing the database directory would be acceptable. In the future, we could also migrate DBDir keys into
+the sparse keyspace to support large amount of databases.
--- a/libs/compute_api/Cargo.toml
+++ b/libs/compute_api/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "compute_api"
 version = "0.1.0"
-edition.workspace = true
+edition = "2024"
 license.workspace = true

 [dependencies]
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`SELECT lfc_value AS lfc_chunk_size_pages FROM neon.neon_lfc_stats WHERE lfc_key = 'file_cache_chunk_size_pages';`