Prevent LFC overflow by skipping write when not unpinned pages are available

impr(timeline handles): add more tests covering reference cyle (#10446 )
The other test focus on the external interface usage while the tests added in this PR add some testing around HandleInner's lifecycle, ensuring we don't leak it once either connection gets dropped or per-timeline-state is shut down explicitly.
2026-01-22 12:52:55 +00:00 · 2025-01-20 18:19:06 +02:00 · 2025-01-20 14:37:24 +00:00 · 2025-01-20 12:50:44 +00:00 · 2025-01-20 09:47:23 +00:00 · 2025-01-20 09:20:31 +00:00
428 changed files with 22356 additions and 8743 deletions
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -3,6 +3,16 @@
 # by the RUSTDOCFLAGS env var in CI.
 rustdocflags = ["-Arustdoc::private_intra_doc_links"]

+# Enable frame pointers. This may have a minor performance overhead, but makes it easier and more
+# efficient to obtain stack traces (and thus CPU/heap profiles). It may also avoid seg faults that
+# we've seen with libunwind-based profiling. See also:
+#
+# * <https://www.brendangregg.com/blog/2024-03-17/the-return-of-the-frame-pointers.html>
+# * <https://github.com/rust-lang/rust/pull/122646>
+#
+# NB: the RUSTFLAGS envvar will replace this. Make sure to update e.g. Dockerfile as well.
+rustflags = ["-Cforce-frame-pointers=yes"]
+
 [alias]
 build_testing = ["build", "--features", "testing"]
 neon = ["run", "--bin", "neon_local"]
--- a/.github/actionlint.yml
+++ b/.github/actionlint.yml
@@ -23,3 +23,6 @@ config-variables:
  - BENCHMARK_INGEST_TARGET_PROJECTID
  - PGREGRESS_PG16_PROJECT_ID
  - PGREGRESS_PG17_PROJECT_ID
+  - SLACK_ON_CALL_QA_STAGING_STREAM
+  - DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN
+  - SLACK_ON_CALL_STORAGE_STAGING_STREAM
--- a/.github/actions/allure-report-generate/action.yml
+++ b/.github/actions/allure-report-generate/action.yml
@@ -7,10 +7,9 @@ inputs:
    type: boolean
    required: false
    default: false
-  aws_oicd_role_arn:
-    description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role'
-    required: false
-    default: ''
+  aws-oicd-role-arn:
+    description: 'OIDC role arn to interract with S3'
+    required: true

 outputs:
  base-url:
@@ -84,12 +83,11 @@ runs:
        ALLURE_VERSION: 2.27.0
        ALLURE_ZIP_SHA256: b071858fb2fa542c65d8f152c5c40d26267b2dfb74df1f1608a589ecca38e777

-    - name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test
-      if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }}
-      uses: aws-actions/configure-aws-credentials@v4
+    - uses: aws-actions/configure-aws-credentials@v4
+      if: ${{ !cancelled() }}
      with:
        aws-region: eu-central-1
-        role-to-assume: ${{ inputs.aws_oicd_role_arn }}
+        role-to-assume: ${{ inputs.aws-oicd-role-arn }}
        role-duration-seconds: 3600 # 1 hour should be more than enough to upload report

    # Potentially we could have several running build for the same key (for example, for the main branch), so we use improvised lock for this
--- a/.github/actions/allure-report-store/action.yml
+++ b/.github/actions/allure-report-store/action.yml
@@ -8,10 +8,9 @@ inputs:
  unique-key:
    description: 'string to distinguish different results in the same run'
    required: true
-  aws_oicd_role_arn:
-    description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role'
-    required: false
-    default: ''
+  aws-oicd-role-arn:
+    description: 'OIDC role arn to interract with S3'
+    required: true

 runs:
  using: "composite"
@@ -36,12 +35,11 @@ runs:
      env:
        REPORT_DIR: ${{ inputs.report-dir }}

-    - name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test
-      if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }}
-      uses: aws-actions/configure-aws-credentials@v4
+    - uses: aws-actions/configure-aws-credentials@v4
+      if: ${{ !cancelled() }}
      with:
        aws-region: eu-central-1
-        role-to-assume: ${{ inputs.aws_oicd_role_arn }}
+        role-to-assume: ${{ inputs.aws-oicd-role-arn }}
        role-duration-seconds: 3600 # 1 hour should be more than enough to upload report

    - name: Upload test results
--- a/.github/actions/download/action.yml
+++ b/.github/actions/download/action.yml
@@ -15,10 +15,19 @@ inputs:
  prefix:
    description: "S3 prefix. Default is '${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
    required: false
+  aws-oicd-role-arn:
+    description: 'OIDC role arn to interract with S3'
+    required: true

 runs:
  using: "composite"
  steps:
+    - uses: aws-actions/configure-aws-credentials@v4
+      with:
+        aws-region: eu-central-1
+        role-to-assume: ${{ inputs.aws-oicd-role-arn }}
+        role-duration-seconds: 3600
+
    - name: Download artifact
      id: download-artifact
      shell: bash -euxo pipefail {0}
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -48,10 +48,9 @@ inputs:
    description: 'benchmark durations JSON'
    required: false
    default: '{}'
-  aws_oicd_role_arn:
-    description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role'
-    required: false
-    default: ''
+  aws-oicd-role-arn:
+    description: 'OIDC role arn to interract with S3'
+    required: true

 runs:
  using: "composite"
@@ -62,6 +61,7 @@ runs:
      with:
        name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact
        path: /tmp/neon
+        aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}

    - name: Download Neon binaries for the previous release
      if: inputs.build_type != 'remote'
@@ -70,6 +70,7 @@ runs:
        name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact
        path: /tmp/neon-previous
        prefix: latest
+        aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}

    - name: Download compatibility snapshot
      if: inputs.build_type != 'remote'
@@ -81,6 +82,7 @@ runs:
        # The lack of compatibility snapshot (for example, for the new Postgres version)
        # shouldn't fail the whole job. Only relevant test should fail.
        skip-if-does-not-exist: true
+        aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}

    - name: Checkout
      if: inputs.needs_postgres_source == 'true'
@@ -218,17 +220,19 @@ runs:
        # The lack of compatibility snapshot shouldn't fail the job
        # (for example if we didn't run the test for non build-and-test workflow)
        skip-if-does-not-exist: true
+        aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}

-    - name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test
-      if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }}
-      uses: aws-actions/configure-aws-credentials@v4
+    - uses: aws-actions/configure-aws-credentials@v4
+      if: ${{ !cancelled() }}
      with:
        aws-region: eu-central-1
-        role-to-assume: ${{ inputs.aws_oicd_role_arn }}
+        role-to-assume: ${{ inputs.aws-oicd-role-arn }}
        role-duration-seconds: 3600 # 1 hour should be more than enough to upload report
+
    - name: Upload test results
      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report-store
      with:
        report-dir: /tmp/test_output/allure/results
        unique-key: ${{ inputs.build_type }}-${{ inputs.pg_version }}
+        aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
--- a/.github/actions/save-coverage-data/action.yml
+++ b/.github/actions/save-coverage-data/action.yml
@@ -14,9 +14,11 @@ runs:
        name: coverage-data-artifact
        path: /tmp/coverage
        skip-if-does-not-exist: true # skip if there's no previous coverage to download
+        aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}

    - name: Upload coverage data
      uses: ./.github/actions/upload
      with:
        name: coverage-data-artifact
        path: /tmp/coverage
+        aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
--- a/.github/actions/upload/action.yml
+++ b/.github/actions/upload/action.yml
@@ -14,6 +14,10 @@ inputs:
  prefix:
    description: "S3 prefix. Default is '${GITHUB_SHA}/${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
    required: false
+  aws-oicd-role-arn:
+    description: "the OIDC role arn for aws auth"
+    required: false
+    default: ""

 runs:
  using: "composite"
@@ -53,6 +57,13 @@ runs:

        echo 'SKIPPED=false' >> $GITHUB_OUTPUT

+    - name: Configure AWS credentials
+      uses: aws-actions/configure-aws-credentials@v4
+      with:
+        aws-region: eu-central-1
+        role-to-assume: ${{ inputs.aws-oicd-role-arn }}
+        role-duration-seconds: 3600
+
    - name: Upload artifact
      if: ${{ steps.prepare-artifact.outputs.SKIPPED == 'false' }}
      shell: bash -euxo pipefail {0}
--- a/.github/file-filters.yaml
+++ b/.github/file-filters.yaml
@@ -0,0 +1,12 @@
+rust_code: ['**/*.rs', '**/Cargo.toml', '**/Cargo.lock']
+
+v14: ['vendor/postgres-v14/**', 'Makefile', 'pgxn/**']
+v15: ['vendor/postgres-v15/**', 'Makefile', 'pgxn/**']
+v16: ['vendor/postgres-v16/**', 'Makefile', 'pgxn/**']
+v17: ['vendor/postgres-v17/**', 'Makefile', 'pgxn/**']
+
+rebuild_neon_extra:
+    - .github/workflows/neon_extra_builds.yml
+
+rebuild_macos:
+    - .github/workflows/build-macos.yml
--- a/.github/workflows/_benchmarking_preparation.yml
+++ b/.github/workflows/_benchmarking_preparation.yml
@@ -70,6 +70,7 @@ jobs:
        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
        path: /tmp/neon/
        prefix: latest
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    # we create a table that has one row for each database that we want to restore with the status whether the restore is done
    - name: Create benchmark_restore_status table if it does not exist
--- a/.github/workflows/_build-and-test-locally.yml
+++ b/.github/workflows/_build-and-test-locally.yml
@@ -31,12 +31,13 @@ defaults:
 env:
  RUST_BACKTRACE: 1
  COPT: '-Werror'
-  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
-  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}

 jobs:
  build-neon:
    runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large')) }}
+    permissions:
+      id-token: write # aws-actions/configure-aws-credentials
+      contents: read
    container:
      image: ${{ inputs.build-tools-image }}
      credentials:
@@ -205,6 +206,13 @@ jobs:
            done
          fi

+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          aws-region: eu-central-1
+          role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          role-duration-seconds: 18000 # 5 hours
+
      - name: Run rust tests
        env:
          NEXTEST_RETRIES: 3
@@ -256,6 +264,7 @@ jobs:
        with:
          name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-artifact
          path: /tmp/neon
+          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

      # XXX: keep this after the binaries.list is formed, so the coverage can properly work later
      - name: Merge and upload coverage data
@@ -265,6 +274,10 @@ jobs:
  regress-tests:
    # Don't run regression tests on debug arm64 builds
    if: inputs.build-type != 'debug' || inputs.arch != 'arm64'
+    permissions:
+      id-token: write # aws-actions/configure-aws-credentials
+      contents: read
+      statuses: write
    needs: [ build-neon ]
    runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large')) }}
    container:
@@ -283,7 +296,7 @@ jobs:
          submodules: true

      - name: Pytest regression tests
-        continue-on-error: ${{ matrix.lfc_state == 'with-lfc' }}
+        continue-on-error: ${{ matrix.lfc_state == 'with-lfc' && inputs.build-type == 'debug' }}
        uses: ./.github/actions/run-python-test-set
        timeout-minutes: 60
        with:
@@ -295,6 +308,7 @@ jobs:
          real_s3_region: eu-central-1
          rerun_failed: true
          pg_version: ${{ matrix.pg_version }}
+          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
        env:
          TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
          CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
--- a/.github/workflows/_check-codestyle-rust.yml
+++ b/.github/workflows/_check-codestyle-rust.yml
@@ -0,0 +1,91 @@
+name: Check Codestyle Rust
+
+on:
+  workflow_call:
+    inputs:
+      build-tools-image:
+        description: "build-tools image"
+        required: true
+        type: string
+      archs:
+        description: "Json array of architectures to run on"
+        type: string
+
+
+defaults:
+  run:
+    shell: bash -euxo pipefail {0}
+
+jobs:
+  check-codestyle-rust:
+    strategy:
+      matrix:
+        arch: ${{ fromJson(inputs.archs) }}
+    runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'small-arm64' || 'small')) }}
+
+    container:
+      image: ${{ inputs.build-tools-image }}
+      credentials:
+        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+      options: --init
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          submodules: true
+
+      - name: Cache cargo deps
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cargo/registry
+            !~/.cargo/registry/src
+            ~/.cargo/git
+            target
+          key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust
+
+      # Some of our rust modules use FFI and need those to be checked
+      - name: Get postgres headers
+        run: make postgres-headers -j$(nproc)
+
+      # cargo hack runs the given cargo subcommand (clippy in this case) for all feature combinations.
+      # This will catch compiler & clippy warnings in all feature combinations.
+      # TODO: use cargo hack for build and test as well, but, that's quite expensive.
+      # NB: keep clippy args in sync with ./run_clippy.sh
+      #
+      # The only difference between "clippy --debug" and "clippy --release" is that in --release mode,
+      # #[cfg(debug_assertions)] blocks are not built. It's not worth building everything for second
+      # time just for that, so skip "clippy --release".
+      - run: |
+          CLIPPY_COMMON_ARGS="$( source .neon_clippy_args; echo "$CLIPPY_COMMON_ARGS")"
+          if [ "$CLIPPY_COMMON_ARGS" = "" ]; then
+            echo "No clippy args found in .neon_clippy_args"
+            exit 1
+          fi
+          echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV
+      - name: Run cargo clippy (debug)
+        run: cargo hack --features default --ignore-unknown-features --feature-powerset clippy $CLIPPY_COMMON_ARGS
+
+      - name: Check documentation generation
+        run: cargo doc --workspace --no-deps --document-private-items
+        env:
+          RUSTDOCFLAGS: "-Dwarnings -Arustdoc::private_intra_doc_links"
+
+      # Use `${{ !cancelled() }}` to run quck tests after the longer clippy run
+      - name: Check formatting
+        if: ${{ !cancelled() }}
+        run: cargo fmt --all -- --check
+
+      # https://github.com/facebookincubator/cargo-guppy/tree/bec4e0eb29dcd1faac70b1b5360267fc02bf830e/tools/cargo-hakari#2-keep-the-workspace-hack-up-to-date-in-ci
+      - name: Check rust dependencies
+        if: ${{ !cancelled() }}
+        run: |
+          cargo hakari generate --diff  # workspace-hack Cargo.toml is up-to-date
+          cargo hakari manage-deps --dry-run  # all workspace crates depend on workspace-hack
+
+      # https://github.com/EmbarkStudios/cargo-deny
+      - name: Check rust licenses/bans/advisories/sources
+        if: ${{ !cancelled() }}
+        run: cargo deny check --hide-inclusion-graph
--- a/.github/workflows/actionlint.yml
+++ b/.github/workflows/actionlint.yml
@@ -33,7 +33,7 @@ jobs:
          # SC2086 - Double quote to prevent globbing and word splitting. - https://www.shellcheck.net/wiki/SC2086
          SHELLCHECK_OPTS: --exclude=SC2046,SC2086
        with:
-          fail_on_error: true
+          fail_level: error
          filter_mode: nofilter
          level: error

--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -105,6 +105,7 @@ jobs:
        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
        path: /tmp/neon/
        prefix: latest
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Create Neon Project
      id: create-neon-project
@@ -122,7 +123,7 @@ jobs:
        run_in_parallel: false
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
-        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
        # Set --sparse-ordering option of pytest-order plugin
        # to ensure tests are running in order of appears in the file.
        # It's important for test_perf_pgbench.py::test_pgbench_remote_* tests
@@ -152,7 +153,7 @@ jobs:
      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report-generate
      with:
-        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
@@ -204,6 +205,7 @@ jobs:
        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
        path: /tmp/neon/
        prefix: latest
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Run Logical Replication benchmarks
      uses: ./.github/actions/run-python-test-set
@@ -214,7 +216,7 @@ jobs:
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 5400
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
-        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -231,7 +233,7 @@ jobs:
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 5400
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
-        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -243,7 +245,7 @@ jobs:
      uses: ./.github/actions/allure-report-generate
      with:
        store-test-results-into-db: true
-        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}

@@ -306,6 +308,7 @@ jobs:
          "image": [ "'"$image_default"'" ],
          "include": [{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier",       "db_size": "3gb" ,"runner": '"$runner_default"', "image": "'"$image_default"'" },
                      { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new",            "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
+                      { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new-many-tables","db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
                      { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new",            "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
                      { "pg_version": 16, "region_id": "azure-eastus2",          "platform": "neonvm-azure-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_azure"',   "image": "neondatabase/build-tools:pinned-bookworm" },
                      { "pg_version": 16, "region_id": "azure-eastus2",          "platform": "neonvm-azure-captest-new",      "db_size": "10gb","runner": '"$runner_azure"',   "image": "neondatabase/build-tools:pinned-bookworm" },
@@ -405,9 +408,10 @@ jobs:
        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
        path: /tmp/neon/
        prefix: latest
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Create Neon Project
-      if: contains(fromJson('["neonvm-captest-new", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform)
+      if: contains(fromJson('["neonvm-captest-new", "neonvm-captest-new-many-tables", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform)
      id: create-neon-project
      uses: ./.github/actions/neon-project-create
      with:
@@ -426,7 +430,7 @@ jobs:
          neonvm-captest-sharding-reuse)
            CONNSTR=${{ secrets.BENCHMARK_CAPTEST_SHARDING_CONNSTR }}
            ;;
-          neonvm-captest-new | neonvm-captest-freetier | neonvm-azure-captest-new | neonvm-azure-captest-freetier)
+          neonvm-captest-new | neonvm-captest-new-many-tables | neonvm-captest-freetier | neonvm-azure-captest-new | neonvm-azure-captest-freetier)
            CONNSTR=${{ steps.create-neon-project.outputs.dsn }}
            ;;
          rds-aurora)
@@ -443,6 +447,26 @@ jobs:

        echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT

+    # we want to compare Neon project OLTP throughput and latency at scale factor 10 GB 
+    # without (neonvm-captest-new)
+    # and with (neonvm-captest-new-many-tables) many relations in the database
+    - name: Create many relations before the run
+      if: contains(fromJson('["neonvm-captest-new-many-tables"]'), matrix.platform)
+      uses: ./.github/actions/run-python-test-set
+      with:
+        build_type: ${{ env.BUILD_TYPE }}
+        test_selection: performance
+        run_in_parallel: false
+        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
+        extra_params: -m remote_cluster --timeout 21600 -k test_perf_many_relations
+        pg_version: ${{ env.DEFAULT_PG_VERSION }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+      env:
+        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
+        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
+        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
+        TEST_NUM_RELATIONS: 10000
+
    - name: Benchmark init
      uses: ./.github/actions/run-python-test-set
      with:
@@ -452,7 +476,7 @@ jobs:
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
-        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -467,7 +491,7 @@ jobs:
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
-        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -482,7 +506,7 @@ jobs:
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
-        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -500,7 +524,7 @@ jobs:
      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report-generate
      with:
-        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
@@ -611,7 +635,7 @@ jobs:
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600 -k test_pgvector_indexing
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
-        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -626,7 +650,7 @@ jobs:
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
-        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -637,7 +661,7 @@ jobs:
      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report-generate
      with:
-        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
@@ -708,6 +732,7 @@ jobs:
        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
        path: /tmp/neon/
        prefix: latest
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Set up Connection String
      id: set-up-connstr
@@ -739,7 +764,7 @@ jobs:
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 43200 -k test_clickbench
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
-        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -753,7 +778,7 @@ jobs:
      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report-generate
      with:
-        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
@@ -818,6 +843,7 @@ jobs:
        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
        path: /tmp/neon/
        prefix: latest
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Get Connstring Secret Name
      run: |
@@ -856,7 +882,7 @@ jobs:
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600 -k test_tpch
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
-        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -868,7 +894,7 @@ jobs:
      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report-generate
      with:
-        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
@@ -926,6 +952,7 @@ jobs:
        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
        path: /tmp/neon/
        prefix: latest
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Set up Connection String
      id: set-up-connstr
@@ -957,7 +984,7 @@ jobs:
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600 -k test_user_examples
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
-        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -968,7 +995,7 @@ jobs:
      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report-generate
      with:
-        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
--- a/.github/workflows/build-macos.yml
+++ b/.github/workflows/build-macos.yml
@@ -0,0 +1,241 @@
+name: Check neon with MacOS builds
+
+on:
+  workflow_call:
+    inputs:
+      pg_versions:
+        description: "Array of the pg versions to build for, for example: ['v14', 'v17']"
+        type: string
+        default: '[]'
+        required: false
+      rebuild_rust_code:
+        description: "Rebuild Rust code"
+        type: boolean
+        default: false
+        required: false
+      rebuild_everything:
+        description: "If true, rebuild for all versions"
+        type: boolean
+        default: false
+        required: false
+
+env:
+  RUST_BACKTRACE: 1
+  COPT: '-Werror'
+
+# TODO: move `check-*` and `files-changed` jobs to the "Caller" Workflow
+# We should care about that as Github has limitations:
+# - You can connect up to four levels of workflows
+# - You can call a maximum of 20 unique reusable workflows from a single workflow file.
+# https://docs.github.com/en/actions/sharing-automations/reusing-workflows#limitations
+jobs:
+  build-pgxn:
+    if: |
+      (inputs.pg_versions != '[]' || inputs.rebuild_everything) && (
+        contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
+        contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
+        github.ref_name == 'main'
+      )
+    timeout-minutes: 30
+    runs-on: macos-15
+    strategy:
+      matrix:
+        postgres-version: ${{ inputs.rebuild_everything && fromJson('["v14", "v15", "v16", "v17"]') || fromJSON(inputs.pg_versions) }}
+    env:
+      # Use release build only, to have less debug info around
+      # Hence keeping target/ (and general cache size) smaller
+      BUILD_TYPE: release
+    steps:
+      - name: Checkout main repo
+        uses: actions/checkout@v4
+
+      - name: Set pg ${{ matrix.postgres-version }} for caching
+        id: pg_rev
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-${{ matrix.postgres-version }}) | tee -a "${GITHUB_OUTPUT}"
+
+      - name: Cache postgres ${{ matrix.postgres-version }} build
+        id: cache_pg
+        uses: actions/cache@v4
+        with:
+          path: pg_install/${{ matrix.postgres-version }}
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ matrix.postgres-version }}-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+
+      - name: Checkout submodule vendor/postgres-${{ matrix.postgres-version }}
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          git submodule init vendor/postgres-${{ matrix.postgres-version }}
+          git submodule update --depth 1 --recursive
+
+      - name: Install build dependencies
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          brew install flex bison openssl protobuf icu4c
+
+      - name: Set extra env for macOS
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
+          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
+
+      - name: Build Postgres ${{ matrix.postgres-version }}
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          make postgres-${{ matrix.postgres-version }} -j$(sysctl -n hw.ncpu)
+
+      - name: Build Neon Pg Ext ${{ matrix.postgres-version }}
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          make "neon-pg-ext-${{ matrix.postgres-version }}" -j$(sysctl -n hw.ncpu)
+
+      - name: Get postgres headers ${{ matrix.postgres-version }}
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          make postgres-headers-${{ matrix.postgres-version }} -j$(sysctl -n hw.ncpu)
+
+  build-walproposer-lib:
+    if: |
+      (inputs.pg_versions != '[]' || inputs.rebuild_everything) && (
+        contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
+        contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
+        github.ref_name == 'main'
+      )
+    timeout-minutes: 30
+    runs-on: macos-15
+    needs: [build-pgxn]
+    env:
+      # Use release build only, to have less debug info around
+      # Hence keeping target/ (and general cache size) smaller
+      BUILD_TYPE: release
+    steps:
+      - name: Checkout main repo
+        uses: actions/checkout@v4
+
+      - name: Set pg v17 for caching
+        id: pg_rev
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) | tee -a "${GITHUB_OUTPUT}"
+
+      - name: Cache postgres v17 build
+        id: cache_pg
+        uses: actions/cache@v4
+        with:
+          path: pg_install/v17
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+
+      - name: Cache walproposer-lib
+        id: cache_walproposer_lib
+        uses: actions/cache@v4
+        with:
+          path: pg_install/build/walproposer-lib
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+
+      - name: Checkout submodule vendor/postgres-v17
+        if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
+        run: |
+          git submodule init vendor/postgres-v17
+          git submodule update --depth 1 --recursive
+
+      - name: Install build dependencies
+        if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
+        run: |
+          brew install flex bison openssl protobuf icu4c
+
+      - name: Set extra env for macOS
+        if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
+        run: |
+          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
+          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
+
+      - name: Build walproposer-lib (only for v17)
+        if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
+        run:
+          make walproposer-lib -j$(sysctl -n hw.ncpu)
+
+  cargo-build:
+    if: |
+      (inputs.pg_versions != '[]' || inputs.rebuild_rust_code || inputs.rebuild_everything) && (
+        contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
+        contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
+        github.ref_name == 'main'
+      )
+    timeout-minutes: 30
+    runs-on: macos-15
+    needs: [build-pgxn, build-walproposer-lib]
+    env:
+      # Use release build only, to have less debug info around
+      # Hence keeping target/ (and general cache size) smaller
+      BUILD_TYPE: release
+    steps:
+      - name: Checkout main repo
+        uses: actions/checkout@v4
+        with:
+          submodules: true
+
+      - name: Set pg v14 for caching
+        id: pg_rev_v14
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) | tee -a "${GITHUB_OUTPUT}"
+      - name: Set pg v15 for caching
+        id: pg_rev_v15
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) | tee -a "${GITHUB_OUTPUT}"
+      - name: Set pg v16 for caching
+        id: pg_rev_v16
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) | tee -a "${GITHUB_OUTPUT}"
+      - name: Set pg v17 for caching
+        id: pg_rev_v17
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) | tee -a "${GITHUB_OUTPUT}"
+
+      - name: Cache postgres v14 build
+        id: cache_pg
+        uses: actions/cache@v4
+        with:
+          path: pg_install/v14
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v14-${{ steps.pg_rev_v14.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+      - name: Cache postgres v15 build
+        id: cache_pg_v15
+        uses: actions/cache@v4
+        with:
+          path: pg_install/v15
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v15-${{ steps.pg_rev_v15.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+      - name: Cache postgres v16 build
+        id: cache_pg_v16
+        uses: actions/cache@v4
+        with:
+          path: pg_install/v16
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v16-${{ steps.pg_rev_v16.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+      - name: Cache postgres v17 build
+        id: cache_pg_v17
+        uses: actions/cache@v4
+        with:
+          path: pg_install/v17
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v17-${{ steps.pg_rev_v17.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+
+      - name: Cache cargo deps (only for v17)
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cargo/registry
+            !~/.cargo/registry/src
+            ~/.cargo/git
+            target
+          key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust
+
+      - name: Cache walproposer-lib
+        id: cache_walproposer_lib
+        uses: actions/cache@v4
+        with:
+          path: pg_install/build/walproposer-lib
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev_v17.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+
+      - name: Install build dependencies
+        run: |
+          brew install flex bison openssl protobuf icu4c
+
+      - name: Set extra env for macOS
+        run: |
+          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
+          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
+
+      - name: Run cargo build (only for v17)
+        run: PQ_LIB_DIR=$(pwd)/pg_install/v17/lib cargo build --all --release -j$(sysctl -n hw.ncpu)
+
+      - name: Check that no warnings are produced (only for v17)
+        run: ./run_clippy.sh
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -21,8 +21,6 @@ concurrency:
 env:
  RUST_BACKTRACE: 1
  COPT: '-Werror'
-  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
-  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
  # A concurrency group that we use for e2e-tests runs, matches `concurrency.group` above with `github.repository` as a prefix
  E2E_CONCURRENCY_GROUP: ${{ github.repository }}-e2e-tests-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}

@@ -166,77 +164,11 @@ jobs:

  check-codestyle-rust:
    needs: [ check-permissions, build-build-tools-image ]
-    strategy:
-      matrix:
-        arch: [ x64, arm64 ]
-    runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'small-arm64' || 'small')) }}
-
-    container:
-      image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
-      credentials:
-        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
-        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
-      options: --init
-
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-
-      - name: Cache cargo deps
-        uses: actions/cache@v4
-        with:
-          path: |
-            ~/.cargo/registry
-            !~/.cargo/registry/src
-            ~/.cargo/git
-            target
-          key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust
-
-      # Some of our rust modules use FFI and need those to be checked
-      - name: Get postgres headers
-        run: make postgres-headers -j$(nproc)
-
-      # cargo hack runs the given cargo subcommand (clippy in this case) for all feature combinations.
-      # This will catch compiler & clippy warnings in all feature combinations.
-      # TODO: use cargo hack for build and test as well, but, that's quite expensive.
-      # NB: keep clippy args in sync with ./run_clippy.sh
-      #
-      # The only difference between "clippy --debug" and "clippy --release" is that in --release mode,
-      # #[cfg(debug_assertions)] blocks are not built. It's not worth building everything for second
-      # time just for that, so skip "clippy --release".
-      - run: |
-          CLIPPY_COMMON_ARGS="$( source .neon_clippy_args; echo "$CLIPPY_COMMON_ARGS")"
-          if [ "$CLIPPY_COMMON_ARGS" = "" ]; then
-            echo "No clippy args found in .neon_clippy_args"
-            exit 1
-          fi
-          echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV
-      - name: Run cargo clippy (debug)
-        run: cargo hack --feature-powerset clippy $CLIPPY_COMMON_ARGS
-
-      - name: Check documentation generation
-        run: cargo doc --workspace --no-deps --document-private-items
-        env:
-            RUSTDOCFLAGS: "-Dwarnings -Arustdoc::private_intra_doc_links"
-
-      # Use `${{ !cancelled() }}` to run quck tests after the longer clippy run
-      - name: Check formatting
-        if: ${{ !cancelled() }}
-        run: cargo fmt --all -- --check
-
-      # https://github.com/facebookincubator/cargo-guppy/tree/bec4e0eb29dcd1faac70b1b5360267fc02bf830e/tools/cargo-hakari#2-keep-the-workspace-hack-up-to-date-in-ci
-      - name: Check rust dependencies
-        if: ${{ !cancelled() }}
-        run: |
-          cargo hakari generate --diff  # workspace-hack Cargo.toml is up-to-date
-          cargo hakari manage-deps --dry-run  # all workspace crates depend on workspace-hack
-
-      # https://github.com/EmbarkStudios/cargo-deny
-      - name: Check rust licenses/bans/advisories/sources
-        if: ${{ !cancelled() }}
-        run: cargo deny check --hide-inclusion-graph
+    uses: ./.github/workflows/_check-codestyle-rust.yml
+    with:
+      build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
+      archs: '["x64", "arm64"]'
+    secrets: inherit

  build-and-test-locally:
    needs: [ tag, build-build-tools-image ]
@@ -255,15 +187,15 @@ jobs:
      build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
      build-tag: ${{ needs.tag.outputs.build-tag }}
      build-type: ${{ matrix.build-type }}
-      # Run tests on all Postgres versions in release builds and only on the latest version in debug builds
-      # run without LFC on v17 release only
+      # Run tests on all Postgres versions in release builds and only on the latest version in debug builds.
+      # Run without LFC on v17 release and debug builds only. For all the other cases LFC is enabled.
      test-cfg: |
-        ${{ matrix.build-type == 'release' && '[{"pg_version":"v14", "lfc_state": "without-lfc"},
-                                                {"pg_version":"v15", "lfc_state": "without-lfc"},
-                                                {"pg_version":"v16", "lfc_state": "without-lfc"},
-                                                {"pg_version":"v17", "lfc_state": "without-lfc"},
-                                                {"pg_version":"v17", "lfc_state": "with-lfc"}]'
-                                           || '[{"pg_version":"v17", "lfc_state": "without-lfc"}]' }}
+        ${{ matrix.build-type == 'release' && '[{"pg_version":"v14", "lfc_state": "with-lfc"},
+                                                {"pg_version":"v15", "lfc_state": "with-lfc"},
+                                                {"pg_version":"v16", "lfc_state": "with-lfc"},
+                                                {"pg_version":"v17", "lfc_state": "with-lfc"},
+                                                {"pg_version":"v17", "lfc_state": "without-lfc"}]'
+                                           || '[{"pg_version":"v17", "lfc_state": "without-lfc" }]' }}
    secrets: inherit

  # Keep `benchmarks` job outside of `build-and-test-locally` workflow to make job failures non-blocking
@@ -305,6 +237,11 @@ jobs:
  benchmarks:
    if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks')
    needs: [ check-permissions, build-and-test-locally, build-build-tools-image, get-benchmarks-durations ]
+    permissions:
+      id-token: write # aws-actions/configure-aws-credentials
+      statuses: write
+      contents: write
+      pull-requests: write
    runs-on: [ self-hosted, small ]
    container:
      image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
@@ -333,6 +270,7 @@ jobs:
          extra_params: --splits 5 --group ${{ matrix.pytest_split_group }}
          benchmark_durations: ${{ needs.get-benchmarks-durations.outputs.json }}
          pg_version: v16
+          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
        env:
          VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
          PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -342,24 +280,31 @@ jobs:
      # XXX: no coverage data handling here, since benchmarks are run on release builds,
      # while coverage is currently collected for the debug ones

-  report-benchmarks-failures:
+  report-benchmarks-results-to-slack:
    needs: [ benchmarks, create-test-report ]
-    if: github.ref_name == 'main' && failure() && needs.benchmarks.result == 'failure'
+    if: github.ref_name == 'main' && !cancelled() && contains(fromJSON('["success", "failure"]'), needs.benchmarks.result)
    runs-on: ubuntu-22.04

    steps:
-    - uses: slackapi/slack-github-action@v1
+    - uses: slackapi/slack-github-action@v2
      with:
-        channel-id: C060CNA47S9 # on-call-staging-storage-stream
-        slack-message: |
-          Benchmarks failed on main <${{ github.event.head_commit.url }}|${{ github.sha }}>
-          <${{ needs.create-test-report.outputs.report-url }}|Allure report>
-      env:
-        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
+        method: chat.postMessage
+        token: ${{ secrets.SLACK_BOT_TOKEN }}
+        payload: |
+          channel: "${{ vars.SLACK_ON_CALL_STORAGE_STAGING_STREAM }}"
+          text: |
+            Benchmarks on main: *${{ needs.benchmarks.result }}*
+            - <${{ needs.create-test-report.outputs.report-url }}|Allure report>
+            - <${{ github.event.head_commit.url }}|${{ github.sha }}>

  create-test-report:
    needs: [ check-permissions, build-and-test-locally, coverage-report, build-build-tools-image, benchmarks ]
    if: ${{ !cancelled() && contains(fromJSON('["skipped", "success"]'), needs.check-permissions.result) }}
+    permissions:
+      id-token: write # aws-actions/configure-aws-credentials
+      statuses: write
+      contents: write
+      pull-requests: write
    outputs:
      report-url: ${{ steps.create-allure-report.outputs.report-url }}

@@ -380,6 +325,7 @@ jobs:
        uses: ./.github/actions/allure-report-generate
        with:
          store-test-results-into-db: true
+          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
        env:
          REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}

@@ -411,6 +357,10 @@ jobs:
  coverage-report:
    if: ${{ !startsWith(github.ref_name, 'release') }}
    needs: [ check-permissions, build-build-tools-image, build-and-test-locally ]
+    permissions:
+      id-token: write # aws-actions/configure-aws-credentials
+      statuses: write
+      contents: write
    runs-on: [ self-hosted, small ]
    container:
      image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
@@ -437,12 +387,14 @@ jobs:
        with:
          name: neon-${{ runner.os }}-${{ runner.arch }}-${{ matrix.build_type }}-artifact
          path: /tmp/neon
+          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

      - name: Get coverage artifact
        uses: ./.github/actions/download
        with:
          name: coverage-data-artifact
          path: /tmp/coverage
+          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

      - name: Merge coverage data
        run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
@@ -517,7 +469,7 @@ jobs:

  trigger-e2e-tests:
    if: ${{ !github.event.pull_request.draft || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft') || github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute' }}
-    needs: [ check-permissions, promote-images, tag ]
+    needs: [ check-permissions, promote-images-dev, tag ]
    uses: ./.github/workflows/trigger-e2e-tests.yml
    secrets: inherit

@@ -573,6 +525,10 @@ jobs:
  neon-image:
    needs: [ neon-image-arch, tag ]
    runs-on: ubuntu-22.04
+    permissions:
+      id-token: write # aws-actions/configure-aws-credentials
+      statuses: write
+      contents: read

    steps:
      - uses: docker/login-action@v3
@@ -587,11 +543,15 @@ jobs:
                                             neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-x64 \
                                             neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-arm64

-      - uses: docker/login-action@v3
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v4
        with:
-          registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com
-          username: ${{ secrets.AWS_ACCESS_KEY_DEV }}
-          password: ${{ secrets.AWS_SECRET_KEY_DEV }}
+          aws-region: eu-central-1
+          role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          role-duration-seconds: 3600
+
+      - name: Login to Amazon Dev ECR
+        uses: aws-actions/amazon-ecr-login@v2

      - name: Push multi-arch image to ECR
        run: |
@@ -600,6 +560,10 @@ jobs:

  compute-node-image-arch:
    needs: [ check-permissions, build-build-tools-image, tag ]
+    permissions:
+      id-token: write # aws-actions/configure-aws-credentials
+      statuses: write
+      contents: read
    strategy:
      fail-fast: false
      matrix:
@@ -640,11 +604,15 @@ jobs:
          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}

-      - uses: docker/login-action@v3
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v4
        with:
-          registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com
-          username: ${{ secrets.AWS_ACCESS_KEY_DEV }}
-          password: ${{ secrets.AWS_SECRET_KEY_DEV }}
+          aws-region: eu-central-1
+          role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          role-duration-seconds: 3600
+
+      - name: Login to Amazon Dev ECR
+        uses: aws-actions/amazon-ecr-login@v2

      - uses: docker/login-action@v3
        with:
@@ -691,32 +659,12 @@ jobs:
          tags: |
            neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.tag.outputs.build-tag}}-${{ matrix.version.debian }}-${{ matrix.arch }}

-      - name: Build compute-tools image
-        # compute-tools are Postgres independent, so build it only once
-        # We pick 16, because that builds on debian 11 with older glibc (and is
-        # thus compatible with newer glibc), rather than 17 on Debian 12, as
-        # that isn't guaranteed to be compatible with Debian 11
-        if: matrix.version.pg == 'v16'
-        uses: docker/build-push-action@v6
-        with:
-          target: compute-tools-image
-          context: .
-          build-args: |
-            GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
-            BUILD_TAG=${{ needs.tag.outputs.build-tag }}
-            TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-${{ matrix.version.debian }}
-            DEBIAN_VERSION=${{ matrix.version.debian }}
-          provenance: false
-          push: true
-          pull: true
-          file: compute/compute-node.Dockerfile
-          cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
-          cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-tools-{0}:cache-{1}-{2},mode=max', matrix.version.pg, matrix.version.debian, matrix.arch) || '' }}
-          tags: |
-            neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-${{ matrix.arch }}
-
  compute-node-image:
    needs: [ compute-node-image-arch, tag ]
+    permissions:
+      id-token: write # aws-actions/configure-aws-credentials
+      statuses: write
+      contents: read
    runs-on: ubuntu-22.04

    strategy:
@@ -753,31 +701,21 @@ jobs:
                                             neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
                                             neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-arm64

-      - name: Create multi-arch compute-tools image
-        if: matrix.version.pg == 'v16'
-        run: |
-          docker buildx imagetools create -t neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }} \
-                                          -t neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }} \
-                                             neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
-                                             neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-arm64
-
-      - uses: docker/login-action@v3
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v4
        with:
-          registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com
-          username: ${{ secrets.AWS_ACCESS_KEY_DEV }}
-          password: ${{ secrets.AWS_SECRET_KEY_DEV }}
+          aws-region: eu-central-1
+          role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          role-duration-seconds: 3600
+
+      - name: Login to Amazon Dev ECR
+        uses: aws-actions/amazon-ecr-login@v2

      - name: Push multi-arch compute-node-${{ matrix.version.pg }} image to ECR
        run: |
          docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \
                                                                                neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}

-      - name: Push multi-arch compute-tools image to ECR
-        if: matrix.version.pg == 'v16'
-        run: |
-          docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{ needs.tag.outputs.build-tag }} \
-                                                                                neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}
-
  vm-compute-node-image:
    needs: [ check-permissions, tag, compute-node-image ]
    runs-on: [ self-hosted, large ]
@@ -795,7 +733,7 @@ jobs:
          - pg: v17
            debian: bookworm
    env:
-      VM_BUILDER_VERSION: v0.35.0
+      VM_BUILDER_VERSION: v0.37.1

    steps:
      - uses: actions/checkout@v4
@@ -885,12 +823,14 @@ jobs:
          docker compose -f ./docker-compose/docker-compose.yml logs || 0
          docker compose -f ./docker-compose/docker-compose.yml down

-  promote-images:
-    needs: [ check-permissions, tag, test-images, vm-compute-node-image ]
+  promote-images-dev:
+    needs: [ check-permissions, tag, vm-compute-node-image, neon-image ]
    runs-on: ubuntu-22.04

    permissions:
-      id-token: write # for `aws-actions/configure-aws-credentials`
+      id-token: write # aws-actions/configure-aws-credentials
+      statuses: write
+      contents: read

    env:
      VERSIONS: v14 v15 v16 v17
@@ -901,12 +841,15 @@ jobs:
          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}

-      - name: Login to dev ECR
-        uses: docker/login-action@v3
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v4
        with:
-          registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com
-          username: ${{ secrets.AWS_ACCESS_KEY_DEV }}
-          password: ${{ secrets.AWS_SECRET_KEY_DEV }}
+          aws-region: eu-central-1
+          role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          role-duration-seconds: 3600
+
+      - name: Login to Amazon Dev ECR
+        uses: aws-actions/amazon-ecr-login@v2

      - name: Copy vm-compute-node images to ECR
        run: |
@@ -915,6 +858,35 @@ jobs:
                                               neondatabase/vm-compute-node-${version}:${{ needs.tag.outputs.build-tag }}
          done

+  promote-images-prod:
+    needs: [ check-permissions, tag, test-images, vm-compute-node-image ]
+    runs-on: ubuntu-22.04
+    if: github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
+
+    permissions:
+      id-token: write # aws-actions/configure-aws-credentials
+      statuses: write
+      contents: read
+
+    env:
+      VERSIONS: v14 v15 v16 v17
+
+    steps:
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          aws-region: eu-central-1
+          role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          role-duration-seconds: 3600
+
+      - name: Login to Amazon Dev ECR
+        uses: aws-actions/amazon-ecr-login@v2
+
+      - uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+
      - name: Add latest tag to images
        if: github.ref_name == 'main'
        run: |
@@ -922,9 +894,6 @@ jobs:
            docker buildx imagetools create -t $repo/neon:latest \
                                               $repo/neon:${{ needs.tag.outputs.build-tag }}

-            docker buildx imagetools create -t $repo/compute-tools:latest \
-                                               $repo/compute-tools:${{ needs.tag.outputs.build-tag }}
-
            for version in ${VERSIONS}; do
              docker buildx imagetools create -t $repo/compute-node-${version}:latest \
                                                 $repo/compute-node-${version}:${{ needs.tag.outputs.build-tag }}
@@ -953,31 +922,31 @@ jobs:
      - name: Copy all images to prod ECR
        if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
        run: |
-          for image in neon compute-tools {vm-,}compute-node-{v14,v15,v16,v17}; do
+          for image in neon {vm-,}compute-node-{v14,v15,v16,v17}; do
            docker buildx imagetools create -t 093970136003.dkr.ecr.eu-central-1.amazonaws.com/${image}:${{ needs.tag.outputs.build-tag }} \
                                               369495373322.dkr.ecr.eu-central-1.amazonaws.com/${image}:${{ needs.tag.outputs.build-tag }}
          done

  push-to-acr-dev:
    if: github.ref_name == 'main'
-    needs: [ tag, promote-images ]
+    needs: [ tag, promote-images-dev ]
    uses: ./.github/workflows/_push-to-acr.yml
    with:
      client_id: ${{ vars.AZURE_DEV_CLIENT_ID }}
      image_tag: ${{ needs.tag.outputs.build-tag }}
-      images: neon compute-tools vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 vm-compute-node-v17 compute-node-v14 compute-node-v15 compute-node-v16 compute-node-v17
+      images: neon vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 vm-compute-node-v17 compute-node-v14 compute-node-v15 compute-node-v16 compute-node-v17
      registry_name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}
      subscription_id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
      tenant_id: ${{ vars.AZURE_TENANT_ID }}

  push-to-acr-prod:
    if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
-    needs: [ tag, promote-images ]
+    needs: [ tag, promote-images-prod ]
    uses: ./.github/workflows/_push-to-acr.yml
    with:
      client_id: ${{ vars.AZURE_PROD_CLIENT_ID }}
      image_tag: ${{ needs.tag.outputs.build-tag }}
-      images: neon compute-tools vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 vm-compute-node-v17 compute-node-v14 compute-node-v15 compute-node-v16 compute-node-v17
+      images: neon vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 vm-compute-node-v17 compute-node-v14 compute-node-v15 compute-node-v16 compute-node-v17
      registry_name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}
      subscription_id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }}
      tenant_id: ${{ vars.AZURE_TENANT_ID }}
@@ -985,6 +954,11 @@ jobs:
  trigger-custom-extensions-build-and-wait:
    needs: [ check-permissions, tag ]
    runs-on: ubuntu-22.04
+    permissions:
+      id-token: write # aws-actions/configure-aws-credentials
+      statuses: write
+      contents: write
+      pull-requests: write
    steps:
      - name: Set PR's status to pending and request a remote CI test
        run: |
@@ -1057,15 +1031,82 @@ jobs:
          exit 1

  deploy:
-    needs: [ check-permissions, promote-images, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait, push-to-acr-dev, push-to-acr-prod ]
+    needs: [ check-permissions, promote-images-prod, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait, push-to-acr-dev, push-to-acr-prod ]
    # `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod`
    if: (github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute') && !failure() && !cancelled()
-
+    permissions:
+      id-token: write # aws-actions/configure-aws-credentials
+      statuses: write
+      contents: write
    runs-on: [ self-hosted, small ]
    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
    steps:
      - uses: actions/checkout@v4

+      - name: Create git tag and GitHub release
+        if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
+        uses: actions/github-script@v7
+        with:
+          retries: 5
+          script: |
+            const tag = "${{ needs.tag.outputs.build-tag }}";
+
+            try {
+              const existingRef = await github.rest.git.getRef({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                ref: `tags/${tag}`,
+              });
+
+              if (existingRef.data.object.sha !== context.sha) {
+                throw new Error(`Tag ${tag} already exists but points to a different commit (expected: ${context.sha}, actual: ${existingRef.data.object.sha}).`);
+              }
+
+              console.log(`Tag ${tag} already exists and points to ${context.sha} as expected.`);
+            } catch (error) {
+              if (error.status !== 404) {
+                throw error;
+              }
+
+              console.log(`Tag ${tag} does not exist. Creating it...`);
+              await github.rest.git.createRef({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                ref: `refs/tags/${tag}`,
+                sha: context.sha,
+              });
+              console.log(`Tag ${tag} created successfully.`);
+            }
+
+            // TODO: check how GitHub releases looks for proxy/compute releases and enable them if they're ok
+            if (context.ref !== 'refs/heads/release') {
+              console.log(`GitHub release skipped for ${context.ref}.`);
+              return;
+            }
+
+            try {
+              const existingRelease = await github.rest.repos.getReleaseByTag({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                tag: tag,
+              });
+
+              console.log(`Release for tag ${tag} already exists (ID: ${existingRelease.data.id}).`);
+            } catch (error) {
+              if (error.status !== 404) {
+                throw error;
+              }
+
+              console.log(`Release for tag ${tag} does not exist. Creating it...`);
+              await github.rest.repos.createRelease({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                tag_name: tag,
+                generate_release_notes: true,
+              });
+              console.log(`Release for tag ${tag} created successfully.`);
+            }
+
      - name: Trigger deploy workflow
        env:
          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
@@ -1115,38 +1156,13 @@ jobs:
            exit 1
          fi

-      - name: Create git tag
-        if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
-        uses: actions/github-script@v7
-        with:
-          # Retry script for 5XX server errors: https://github.com/actions/github-script#retries
-          retries: 5
-          script: |
-            await github.rest.git.createRef({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              ref: "refs/tags/${{ needs.tag.outputs.build-tag }}",
-              sha: context.sha,
-            })
-
-      # TODO: check how GitHub releases looks for proxy releases and enable it if it's ok
-      - name: Create GitHub release
-        if: github.ref_name == 'release'
-        uses: actions/github-script@v7
-        with:
-          # Retry script for 5XX server errors: https://github.com/actions/github-script#retries
-          retries: 5
-          script: |
-            await github.rest.repos.createRelease({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              tag_name: "${{ needs.tag.outputs.build-tag }}",
-              generate_release_notes: true,
-            })
-
  # The job runs on `release` branch and copies compatibility data and Neon artifact from the last *release PR* to the latest directory
  promote-compatibility-data:
    needs: [ deploy ]
+    permissions:
+      id-token: write # aws-actions/configure-aws-credentials
+      statuses: write
+      contents: read
    # `!failure() && !cancelled()` is required because the workflow transitively depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod`
    if: github.ref_name == 'release' && !failure() && !cancelled()

@@ -1183,6 +1199,12 @@ jobs:
          echo "run-id=${run_id}" | tee -a ${GITHUB_OUTPUT}
          echo "commit-sha=${last_commit_sha}" | tee -a ${GITHUB_OUTPUT}

+      - uses: aws-actions/configure-aws-credentials@v4
+        with:
+          aws-region: eu-central-1
+          role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          role-duration-seconds: 3600
+
      - name: Promote compatibility snapshot and Neon artifact
        env:
          BUCKET: neon-github-public-dev
@@ -1230,7 +1252,7 @@ jobs:
          done

  pin-build-tools-image:
-    needs: [ build-build-tools-image, promote-images, build-and-test-locally ]
+    needs: [ build-build-tools-image, promote-images-prod, build-and-test-locally ]
    if: github.ref_name == 'main'
    uses: ./.github/workflows/pin-build-tools-image.yml
    with:
@@ -1253,7 +1275,7 @@ jobs:
      - build-and-test-locally
      - check-codestyle-python
      - check-codestyle-rust
-      - promote-images
+      - promote-images-dev
      - test-images
      - trigger-custom-extensions-build-and-wait
    runs-on: ubuntu-22.04
--- a/.github/workflows/cloud-regress.yml
+++ b/.github/workflows/cloud-regress.yml
@@ -19,14 +19,17 @@ concurrency:
  group: ${{ github.workflow }}
  cancel-in-progress: true

+permissions:
+  id-token: write # aws-actions/configure-aws-credentials
+  statuses: write
+  contents: write
+
 jobs:
  regress:
    env:
      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
      TEST_OUTPUT: /tmp/test_output
      BUILD_TYPE: remote
-      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
-      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
    strategy:
      fail-fast: false
      matrix:
@@ -78,6 +81,7 @@ jobs:
          name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
          path: /tmp/neon/
          prefix: latest
+          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

      - name: Create a new branch
        id: create-branch
@@ -93,10 +97,12 @@ jobs:
          test_selection: cloud_regress
          pg_version: ${{matrix.pg-version}}
          extra_params: -m remote_cluster
+          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
        env:
          BENCHMARK_CONNSTR: ${{steps.create-branch.outputs.dsn}}

      - name: Delete branch
+        if: always()
        uses: ./.github/actions/neon-branch-delete
        with:
          api_key: ${{ secrets.NEON_STAGING_API_KEY }}
@@ -107,12 +113,14 @@ jobs:
        id: create-allure-report
        if: ${{ !cancelled() }}
        uses: ./.github/actions/allure-report-generate
+        with:
+          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

      - name: Post to a Slack channel
        if: ${{ github.event.schedule && failure() }}
        uses: slackapi/slack-github-action@v1
        with:
-          channel-id: "C033QLM5P7D" # on-call-staging-stream
+          channel-id: ${{ vars.SLACK_ON_CALL_QA_STAGING_STREAM }}
          slack-message: |
            Periodic pg_regress on staging: ${{ job.status }}
            <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
--- a/.github/workflows/ingest_benchmark.yml
+++ b/.github/workflows/ingest_benchmark.yml
@@ -13,7 +13,7 @@ on:
    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
    - cron:   '0 9 * * *' # run once a day, timezone is utc
  workflow_dispatch: # adds ability to run this manually
-    
+
 defaults:
  run:
    shell: bash -euxo pipefail {0}
@@ -28,7 +28,7 @@ jobs:
    strategy:
      fail-fast: false # allow other variants to continue even if one fails
      matrix:
-        target_project: [new_empty_project, large_existing_project]  
+        target_project: [new_empty_project, large_existing_project]
    permissions:
      contents: write
      statuses: write
@@ -56,7 +56,7 @@ jobs:
      with:
        aws-region: eu-central-1
        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
-        role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role 
+        role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role

    - name: Download Neon artifact
      uses: ./.github/actions/download
@@ -64,6 +64,7 @@ jobs:
        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
        path: /tmp/neon/
        prefix: latest
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Create Neon Project
      if: ${{ matrix.target_project == 'new_empty_project' }}
@@ -94,7 +95,7 @@ jobs:
        project_id: ${{ vars.BENCHMARK_INGEST_TARGET_PROJECTID }}
        api_key: ${{ secrets.NEON_STAGING_API_KEY }}

-    - name: Initialize Neon project 
+    - name: Initialize Neon project
      if: ${{ matrix.target_project == 'large_existing_project' }}
      env:
          BENCHMARK_INGEST_TARGET_CONNSTR: ${{ steps.create-neon-branch-ingest-target.outputs.dsn }}
@@ -122,7 +123,7 @@ jobs:
        ${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;"
        echo "BENCHMARK_INGEST_TARGET_CONNSTR=${BENCHMARK_INGEST_TARGET_CONNSTR}" >> $GITHUB_ENV

-    - name: Invoke pgcopydb  
+    - name: Invoke pgcopydb
      uses: ./.github/actions/run-python-test-set
      with:
        build_type: remote
@@ -131,7 +132,7 @@ jobs:
        extra_params: -s -m remote_cluster --timeout 86400 -k test_ingest_performance_using_pgcopydb
        pg_version: v16
        save_perf_report: true
-        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        BENCHMARK_INGEST_SOURCE_CONNSTR: ${{ secrets.BENCHMARK_INGEST_SOURCE_CONNSTR }}
        TARGET_PROJECT_TYPE: ${{ matrix.target_project }}
@@ -143,7 +144,7 @@ jobs:
      run: |
        export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
        ${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "\dt+"
-      
+
    - name: Delete Neon Project
      if: ${{ always() && matrix.target_project == 'new_empty_project' }}
      uses: ./.github/actions/neon-project-delete
--- a/.github/workflows/neon_extra_builds.yml
+++ b/.github/workflows/neon_extra_builds.yml
@@ -31,19 +31,15 @@ jobs:
    uses: ./.github/workflows/build-build-tools-image.yml
    secrets: inherit

-  check-macos-build:
-    needs: [ check-permissions ]
-    if: |
-      contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
-      contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
-      github.ref_name == 'main'
-    timeout-minutes: 90
-    runs-on: macos-15
-
-    env:
-      # Use release build only, to have less debug info around
-      # Hence keeping target/ (and general cache size) smaller
-      BUILD_TYPE: release
+  files-changed:
+    name: Detect what files changed
+    runs-on: ubuntu-22.04
+    timeout-minutes: 3
+    outputs:
+      v17: ${{ steps.files_changed.outputs.v17 }}
+      postgres_changes: ${{ steps.postgres_changes.outputs.changes }}
+      rebuild_rust_code: ${{ steps.files_changed.outputs.rust_code }}
+      rebuild_everything: ${{ steps.files_changed.outputs.rebuild_neon_extra || steps.files_changed.outputs.rebuild_macos }}

    steps:
      - name: Checkout
@@ -51,102 +47,45 @@ jobs:
        with:
          submodules: true

-      - name: Install macOS postgres dependencies
-        run: brew install flex bison openssl protobuf icu4c
-
-      - name: Set pg 14 revision for caching
-        id: pg_v14_rev
-        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
-
-      - name: Set pg 15 revision for caching
-        id: pg_v15_rev
-        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
-
-      - name: Set pg 16 revision for caching
-        id: pg_v16_rev
-        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
-
-      - name: Set pg 17 revision for caching
-        id: pg_v17_rev
-        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) >> $GITHUB_OUTPUT
-
-      - name: Cache postgres v14 build
-        id: cache_pg_14
-        uses: actions/cache@v4
+      - name: Check for Postgres changes
+        uses: dorny/paths-filter@1441771bbfdd59dcd748680ee64ebd8faab1a242  #v3
+        id: files_changed
        with:
-          path: pg_install/v14
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+          token: ${{ github.token }}
+          filters: .github/file-filters.yaml
+          base: ${{ github.event_name != 'pull_request' && (github.event.merge_group.base_ref || github.ref_name) || '' }}
+          ref: ${{ github.event_name != 'pull_request' && (github.event.merge_group.head_ref || github.ref) || '' }}

-      - name: Cache postgres v15 build
-        id: cache_pg_15
-        uses: actions/cache@v4
-        with:
-          path: pg_install/v15
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
-
-      - name: Cache postgres v16 build
-        id: cache_pg_16
-        uses: actions/cache@v4
-        with:
-          path: pg_install/v16
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
-
-      - name: Cache postgres v17 build
-        id: cache_pg_17
-        uses: actions/cache@v4
-        with:
-          path: pg_install/v17
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
-
-      - name: Set extra env for macOS
+      - name: Filter out only v-string for build matrix
+        id: postgres_changes
        run: |
-          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
-          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
+          v_strings_only_as_json_array=$(echo ${{ steps.files_changed.outputs.chnages }} | jq '.[]|select(test("v\\d+"))' | jq --slurp -c)
+          echo "changes=${v_strings_only_as_json_array}" | tee -a "${GITHUB_OUTPUT}"

-      - name: Cache cargo deps
-        uses: actions/cache@v4
-        with:
-          path: |
-            ~/.cargo/registry
-            !~/.cargo/registry/src
-            ~/.cargo/git
-            target
-          key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust
-
-      - name: Build postgres v14
-        if: steps.cache_pg_14.outputs.cache-hit != 'true'
-        run: make postgres-v14 -j$(sysctl -n hw.ncpu)
-
-      - name: Build postgres v15
-        if: steps.cache_pg_15.outputs.cache-hit != 'true'
-        run: make postgres-v15 -j$(sysctl -n hw.ncpu)
-
-      - name: Build postgres v16
-        if: steps.cache_pg_16.outputs.cache-hit != 'true'
-        run: make postgres-v16 -j$(sysctl -n hw.ncpu)
-
-      - name: Build postgres v17
-        if: steps.cache_pg_17.outputs.cache-hit != 'true'
-        run: make postgres-v17 -j$(sysctl -n hw.ncpu)
-
-      - name: Build neon extensions
-        run: make neon-pg-ext -j$(sysctl -n hw.ncpu)
-
-      - name: Build walproposer-lib
-        run: make walproposer-lib -j$(sysctl -n hw.ncpu)
-
-      - name: Run cargo build
-        run: PQ_LIB_DIR=$(pwd)/pg_install/v16/lib cargo build --all --release
-
-      - name: Check that no warnings are produced
-        run: ./run_clippy.sh
-
-  gather-rust-build-stats:
-    needs: [ check-permissions, build-build-tools-image ]
+  check-macos-build:
+    needs: [ check-permissions, files-changed ]
    if: |
-      contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats') ||
+      contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
      contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
      github.ref_name == 'main'
+    uses: ./.github/workflows/build-macos.yml
+    with:
+      pg_versions: ${{ needs.files-changed.outputs.postgres_changes }}
+      rebuild_rust_code: ${{ needs.files-changed.outputs.rebuild_rust_code }}
+      rebuild_everything: ${{ fromJson(needs.files-changed.outputs.rebuild_everything) }}
+
+  gather-rust-build-stats:
+    needs: [ check-permissions, build-build-tools-image, files-changed ]
+    permissions:
+      id-token: write # aws-actions/configure-aws-credentials
+      statuses: write
+      contents: write
+    if: |
+      (needs.files-changed.outputs.v17 == 'true' || needs.files-changed.outputs.rebuild_everything == 'true') && (
+        contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats') ||
+        contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
+        github.ref_name == 'main'
+      )
    runs-on: [ self-hosted, large ]
    container:
      image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
@@ -177,13 +116,18 @@ jobs:
      - name: Produce the build stats
        run: PQ_LIB_DIR=$(pwd)/pg_install/v17/lib cargo build --all --release --timings -j$(nproc)

+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          aws-region: eu-central-1
+          role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          role-duration-seconds: 3600
+
      - name: Upload the build stats
        id: upload-stats
        env:
          BUCKET: neon-github-public-dev
          SHA: ${{ github.event.pull_request.head.sha || github.sha }}
-          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
-          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
        run: |
          REPORT_URL=https://${BUCKET}.s3.amazonaws.com/build-stats/${SHA}/${GITHUB_RUN_ID}/cargo-timing.html
          aws s3 cp --only-show-errors ./target/cargo-timings/cargo-timing.html "s3://${BUCKET}/build-stats/${SHA}/${GITHUB_RUN_ID}/"
--- a/.github/workflows/periodic_pagebench.yml
+++ b/.github/workflows/periodic_pagebench.yml
@@ -27,6 +27,11 @@ concurrency:

 jobs:
  trigger_bench_on_ec2_machine_in_eu_central_1:
+    permissions:
+      id-token: write # aws-actions/configure-aws-credentials
+      statuses: write
+      contents: write
+      pull-requests: write
    runs-on: [ self-hosted, small ]
    container:
      image: neondatabase/build-tools:pinned-bookworm
@@ -38,8 +43,6 @@ jobs:
    env:
      API_KEY: ${{ secrets.PERIODIC_PAGEBENCH_EC2_RUNNER_API_KEY }}
      RUN_ID: ${{ github.run_id }}
-      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_EC2_US_TEST_RUNNER_ACCESS_KEY_ID }}
-      AWS_SECRET_ACCESS_KEY : ${{ secrets.AWS_EC2_US_TEST_RUNNER_ACCESS_KEY_SECRET }}
      AWS_DEFAULT_REGION : "eu-central-1"
      AWS_INSTANCE_ID : "i-02a59a3bf86bc7e74"
    steps:
@@ -50,6 +53,13 @@ jobs:
    - name: Show my own (github runner) external IP address - usefull for IP allowlisting
      run: curl https://ifconfig.me

+    - name: Assume AWS OIDC role that allows to manage (start/stop/describe... EC machine)
+      uses: aws-actions/configure-aws-credentials@v4
+      with:
+        aws-region: eu-central-1
+        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN }}
+        role-duration-seconds: 3600
+
    - name: Start EC2 instance and wait for the instance to boot up
      run: |
        aws ec2 start-instances --instance-ids $AWS_INSTANCE_ID
@@ -124,11 +134,10 @@ jobs:
        cat "test_log_${GITHUB_RUN_ID}"

    - name: Create Allure report
-      env:
-        AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report-generate
+      with:
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
@@ -148,6 +157,14 @@ jobs:
        -H "Authorization: Bearer $API_KEY" \
        -d ''

+    - name: Assume AWS OIDC role that allows to manage (start/stop/describe... EC machine)
+      if: always() && steps.poll_step.outputs.too_many_runs != 'true'
+      uses: aws-actions/configure-aws-credentials@v4
+      with:
+        aws-region: eu-central-1
+        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN }}
+        role-duration-seconds: 3600
+
    - name: Stop EC2 instance and wait for the instance to be stopped
      if: always() && steps.poll_step.outputs.too_many_runs != 'true'
      run: |
--- a/.github/workflows/pg-clients.yml
+++ b/.github/workflows/pg-clients.yml
@@ -25,11 +25,13 @@ defaults:
  run:
    shell: bash -euxo pipefail {0}

+permissions:
+  id-token: write # aws-actions/configure-aws-credentials
+  statuses: write # require for posting a status update
+
 env:
  DEFAULT_PG_VERSION: 16
  PLATFORM: neon-captest-new
-  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
-  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
  AWS_DEFAULT_REGION: eu-central-1

 jobs:
@@ -94,6 +96,7 @@ jobs:
          name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
          path: /tmp/neon/
          prefix: latest
+          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

      - name: Create Neon Project
        id: create-neon-project
@@ -110,6 +113,7 @@ jobs:
          run_in_parallel: false
          extra_params: -m remote_cluster
          pg_version: ${{ env.DEFAULT_PG_VERSION }}
+          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
        env:
          BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}

@@ -126,6 +130,7 @@ jobs:
        uses: ./.github/actions/allure-report-generate
        with:
          store-test-results-into-db: true
+          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
        env:
          REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}

@@ -159,6 +164,7 @@ jobs:
        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
        path: /tmp/neon/
        prefix: latest
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Create Neon Project
      id: create-neon-project
@@ -175,6 +181,7 @@ jobs:
        run_in_parallel: false
        extra_params: -m remote_cluster
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}

@@ -191,6 +198,7 @@ jobs:
      uses: ./.github/actions/allure-report-generate
      with:
        store-test-results-into-db: true
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}

--- a/.github/workflows/pin-build-tools-image.yml
+++ b/.github/workflows/pin-build-tools-image.yml
@@ -67,7 +67,7 @@ jobs:
    runs-on: ubuntu-22.04

    permissions:
-      id-token: write # for `azure/login`
+      id-token: write # for `azure/login` and aws auth

    steps:
      - uses: docker/login-action@v3
@@ -75,11 +75,15 @@ jobs:
          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}

-      - uses: docker/login-action@v3
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v4
        with:
-          registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com
-          username: ${{ secrets.AWS_ACCESS_KEY_DEV }}
-          password: ${{ secrets.AWS_SECRET_KEY_DEV }}
+          aws-region: eu-central-1
+          role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          role-duration-seconds: 3600
+
+      - name: Login to Amazon Dev ECR
+        uses: aws-actions/amazon-ecr-login@v2

      - name: Azure login
        uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a  # @v2.1.1
--- a/.github/workflows/pre-merge-checks.yml
+++ b/.github/workflows/pre-merge-checks.yml
@@ -1,6 +1,12 @@
 name: Pre-merge checks

 on:
+  pull_request:
+    paths:
+      - .github/workflows/_check-codestyle-python.yml
+      - .github/workflows/_check-codestyle-rust.yml
+      - .github/workflows/build-build-tools-image.yml
+      - .github/workflows/pre-merge-checks.yml
  merge_group:
    branches:
      - main
@@ -17,8 +23,10 @@ jobs:
    runs-on: ubuntu-22.04
    outputs:
      python-changed: ${{ steps.python-src.outputs.any_changed }}
+      rust-changed: ${{ steps.rust-src.outputs.any_changed }}
    steps:
      - uses: actions/checkout@v4
+
      - uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf # v45.0.4
        id: python-src
        with:
@@ -30,11 +38,25 @@ jobs:
            poetry.lock
            pyproject.toml

+      - uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf # v45.0.4
+        id: rust-src
+        with:
+          files: |
+            .github/workflows/_check-codestyle-rust.yml
+            .github/workflows/build-build-tools-image.yml
+            .github/workflows/pre-merge-checks.yml
+            **/**.rs
+            **/Cargo.toml
+            Cargo.toml
+            Cargo.lock
+
      - name: PRINT ALL CHANGED FILES FOR DEBUG PURPOSES
        env:
          PYTHON_CHANGED_FILES: ${{ steps.python-src.outputs.all_changed_files }}
+          RUST_CHANGED_FILES: ${{ steps.rust-src.outputs.all_changed_files }}
        run: |
          echo "${PYTHON_CHANGED_FILES}"
+          echo "${RUST_CHANGED_FILES}"

  build-build-tools-image:
    if: needs.get-changed-files.outputs.python-changed == 'true'
@@ -55,6 +77,16 @@ jobs:
      build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm-x64
    secrets: inherit

+  check-codestyle-rust:
+    if: needs.get-changed-files.outputs.rust-changed == 'true'
+    needs: [ get-changed-files, build-build-tools-image ]
+    uses: ./.github/workflows/_check-codestyle-rust.yml
+    with:
+      # `-bookworm-x64` suffix should match the combination in `build-build-tools-image`
+      build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm-x64
+      archs: '["x64"]'
+    secrets: inherit
+
  # To get items from the merge queue merged into main we need to satisfy "Status checks that are required".
  # Currently we require 2 jobs (checks with exact name):
  # - conclusion
@@ -63,9 +95,11 @@ jobs:
    if: always()
    permissions:
      statuses: write # for `github.repos.createCommitStatus(...)`
+      contents: write
    needs:
      - get-changed-files
      - check-codestyle-python
+      - check-codestyle-rust
    runs-on: ubuntu-22.04
    steps:
      - name: Create fake `neon-cloud-e2e` check
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -3,7 +3,7 @@ name: Create Release Branch
 on:
  schedule:
    # It should be kept in sync with if-condition in jobs
-    - cron: '0 6 * * MON' # Storage release
+    - cron: '0 6 * * FRI' # Storage release
    - cron: '0 6 * * THU' # Proxy release
  workflow_dispatch:
    inputs:
@@ -29,7 +29,7 @@ defaults:

 jobs:
  create-storage-release-branch:
-    if: ${{ github.event.schedule == '0 6 * * MON' || inputs.create-storage-release-branch }}
+    if: ${{ github.event.schedule == '0 6 * * FRI' || inputs.create-storage-release-branch }}

    permissions:
      contents: write
--- a/.github/workflows/trigger-e2e-tests.yml
+++ b/.github/workflows/trigger-e2e-tests.yml
@@ -68,7 +68,7 @@ jobs:
      GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
      TAG: ${{ needs.tag.outputs.build-tag }}
    steps:
-      - name: Wait for `promote-images` job to finish
+      - name: Wait for `promote-images-dev` job to finish
        # It's important to have a timeout here, the script in the step can run infinitely
        timeout-minutes: 60
        run: |
@@ -79,17 +79,17 @@ jobs:
          # For PRs we use the run id as the tag
          BUILD_AND_TEST_RUN_ID=${TAG}
          while true; do
-            conclusion=$(gh run --repo ${GITHUB_REPOSITORY} view ${BUILD_AND_TEST_RUN_ID} --json jobs --jq '.jobs[] | select(.name == "promote-images") | .conclusion')
+            conclusion=$(gh run --repo ${GITHUB_REPOSITORY} view ${BUILD_AND_TEST_RUN_ID} --json jobs --jq '.jobs[] | select(.name == "promote-images-dev") | .conclusion')
            case "$conclusion" in
              success)
                break
                ;;
              failure | cancelled | skipped)
-                echo "The 'promote-images' job didn't succeed: '${conclusion}'. Exiting..."
+                echo "The 'promote-images-dev' job didn't succeed: '${conclusion}'. Exiting..."
                exit 1
                ;;
              *)
-                echo "The 'promote-images' hasn't succeed yet. Waiting..."
+                echo "The 'promote-images-dev' hasn't succeed yet. Waiting..."
                sleep 60
                ;;
            esac
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -10,9 +10,9 @@ checksum = "8b5ace29ee3216de37c0546865ad08edef58b0f9e76838ed8959a84a990e58c5"

 [[package]]
 name = "addr2line"
-version = "0.21.0"
+version = "0.24.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb"
+checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1"
 dependencies = [
 "gimli",
 ]
@@ -23,6 +23,12 @@ version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"

+[[package]]
+name = "adler2"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
+
 [[package]]
 name = "ahash"
 version = "0.8.11"
@@ -712,13 +718,13 @@ dependencies = [

 [[package]]
 name = "axum"
-version = "0.7.5"
+version = "0.7.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf"
+checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
 dependencies = [
 "async-trait",
 "axum-core",
- "base64 0.21.1",
+ "base64 0.22.1",
 "bytes",
 "futures-util",
 "http 1.1.0",
@@ -740,8 +746,8 @@ dependencies = [
 "sha1",
 "sync_wrapper 1.0.1",
 "tokio",
- "tokio-tungstenite",
- "tower",
+ "tokio-tungstenite 0.24.0",
+ "tower 0.5.2",
 "tower-layer",
 "tower-service",
 "tracing",
@@ -871,17 +877,17 @@ dependencies = [

 [[package]]
 name = "backtrace"
-version = "0.3.69"
+version = "0.3.74"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837"
+checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a"
 dependencies = [
 "addr2line",
- "cc",
 "cfg-if",
 "libc",
- "miniz_oxide",
+ "miniz_oxide 0.8.0",
 "object",
 "rustc-demangle",
+ "windows-targets 0.52.6",
 ]

 [[package]]
@@ -1127,7 +1133,7 @@ dependencies = [
 "num-traits",
 "serde",
 "wasm-bindgen",
- "windows-targets 0.52.4",
+ "windows-targets 0.52.6",
 ]

 [[package]]
@@ -1261,6 +1267,7 @@ dependencies = [
 "aws-config",
 "aws-sdk-kms",
 "aws-sdk-s3",
+ "axum",
 "base64 0.13.1",
 "bytes",
 "camino",
@@ -1268,9 +1275,10 @@ dependencies = [
 "chrono",
 "clap",
 "compute_api",
+ "fail",
 "flate2",
 "futures",
- "hyper 0.14.30",
+ "http 1.1.0",
 "metrics",
 "nix 0.27.1",
 "notify",
@@ -1296,12 +1304,15 @@ dependencies = [
 "tokio-postgres",
 "tokio-stream",
 "tokio-util",
+ "tower 0.5.2",
+ "tower-http",
 "tracing",
 "tracing-opentelemetry",
 "tracing-subscriber",
 "tracing-utils",
 "url",
 "utils",
+ "uuid",
 "vm_monitor",
 "workspace_hack",
 "zstd",
@@ -1595,6 +1606,32 @@ dependencies = [
 "typenum",
 ]

+[[package]]
+name = "curve25519-dalek"
+version = "4.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be"
+dependencies = [
+ "cfg-if",
+ "cpufeatures",
+ "curve25519-dalek-derive",
+ "digest",
+ "fiat-crypto",
+ "rustc_version",
+ "subtle",
+]
+
+[[package]]
+name = "curve25519-dalek-derive"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.90",
+]
+
 [[package]]
 name = "darling"
 version = "0.20.1"
@@ -1643,6 +1680,20 @@ dependencies = [
 "parking_lot_core 0.9.8",
 ]

+[[package]]
+name = "dashmap"
+version = "6.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf"
+dependencies = [
+ "cfg-if",
+ "crossbeam-utils",
+ "hashbrown 0.14.5",
+ "lock_api",
+ "once_cell",
+ "parking_lot_core 0.9.8",
+]
+
 [[package]]
 name = "data-encoding"
 version = "2.4.0"
@@ -1726,9 +1777,9 @@ checksum = "ab03c107fafeb3ee9f5925686dbb7a73bc76e3932abb0d2b365cb64b169cf04c"

 [[package]]
 name = "diesel"
-version = "2.2.3"
+version = "2.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "65e13bab2796f412722112327f3e575601a3e9cdcbe426f0d30dbf43f3f5dc71"
+checksum = "ccf1bedf64cdb9643204a36dd15b19a6ce8e7aa7f7b105868e9f1fad5ffa7d12"
 dependencies = [
 "bitflags 2.4.1",
 "byteorder",
@@ -1851,6 +1902,28 @@ dependencies = [
 "spki 0.7.3",
 ]

+[[package]]
+name = "ed25519"
+version = "2.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "115531babc129696a58c64a4fef0a8bf9e9698629fb97e9e40767d235cfbcd53"
+dependencies = [
+ "signature 2.2.0",
+]
+
+[[package]]
+name = "ed25519-dalek"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a3daa8e81a3963a60642bcc1f90a670680bd4a77535faa384e9d1c79d620871"
+dependencies = [
+ "curve25519-dalek",
+ "ed25519",
+ "rand_core 0.6.4",
+ "sha2",
+ "subtle",
+]
+
 [[package]]
 name = "either"
 version = "1.8.1"
@@ -1942,6 +2015,15 @@ dependencies = [
 "syn 2.0.90",
 ]

+[[package]]
+name = "env_filter"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0"
+dependencies = [
+ "log",
+]
+
 [[package]]
 name = "env_logger"
 version = "0.10.2"
@@ -1955,6 +2037,16 @@ dependencies = [
 "termcolor",
 ]

+[[package]]
+name = "env_logger"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c012a26a7f605efc424dd53697843a72be7dc86ad2d01f7814337794a12231d"
+dependencies = [
+ "env_filter",
+ "log",
+]
+
 [[package]]
 name = "equator"
 version = "0.2.2"
@@ -2070,6 +2162,12 @@ dependencies = [
 "subtle",
 ]

+[[package]]
+name = "fiat-crypto"
+version = "0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d"
+
 [[package]]
 name = "filetime"
 version = "0.2.22"
@@ -2107,7 +2205,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743"
 dependencies = [
 "crc32fast",
- "miniz_oxide",
+ "miniz_oxide 0.7.1",
 ]

 [[package]]
@@ -2308,9 +2406,9 @@ dependencies = [

 [[package]]
 name = "gimli"
-version = "0.28.1"
+version = "0.31.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"
+checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"

 [[package]]
 name = "git-version"
@@ -2713,7 +2811,7 @@ dependencies = [
 "pin-project-lite",
 "socket2",
 "tokio",
- "tower",
+ "tower 0.4.13",
 "tower-service",
 "tracing",
 ]
@@ -2938,6 +3036,28 @@ dependencies = [
 "str_stack",
 ]

+[[package]]
+name = "inferno"
+version = "0.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75a5d75fee4d36809e6b021e4b96b686e763d365ffdb03af2bd00786353f84fe"
+dependencies = [
+ "ahash",
+ "clap",
+ "crossbeam-channel",
+ "crossbeam-utils",
+ "dashmap 6.1.0",
+ "env_logger 0.11.2",
+ "indexmap 2.0.1",
+ "itoa",
+ "log",
+ "num-format",
+ "once_cell",
+ "quick-xml 0.37.1",
+ "rgb",
+ "str_stack",
+]
+
 [[package]]
 name = "inotify"
 version = "0.9.6"
@@ -3145,7 +3265,7 @@ version = "0.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4644821e1c3d7a560fe13d842d13f587c07348a1a05d3a797152d41c90c56df2"
 dependencies = [
- "dashmap",
+ "dashmap 5.5.0",
 "hashbrown 0.13.2",
 ]

@@ -3253,9 +3373,9 @@ checksum = "b87248edafb776e59e6ee64a79086f65890d3510f2c656c000bf2a7e8a0aea40"

 [[package]]
 name = "matchit"
-version = "0.8.2"
+version = "0.8.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "540f1c43aed89909c0cc0cc604e3bb2f7e7a341a3728a9e6cfe760e733cd11ed"
+checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3"

 [[package]]
 name = "md-5"
@@ -3404,6 +3524,15 @@ dependencies = [
 "adler",
 ]

+[[package]]
+name = "miniz_oxide"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1"
+dependencies = [
+ "adler2",
+]
+
 [[package]]
 name = "mio"
 version = "0.8.11"
@@ -3638,9 +3767,9 @@ dependencies = [

 [[package]]
 name = "object"
-version = "0.32.2"
+version = "0.36.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441"
+checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e"
 dependencies = [
 "memchr",
 ]
@@ -3674,23 +3803,23 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"

 [[package]]
 name = "opentelemetry"
-version = "0.26.0"
+version = "0.27.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "570074cc999d1a58184080966e5bd3bf3a9a4af650c3b05047c2621e7405cd17"
+checksum = "ab70038c28ed37b97d8ed414b6429d343a8bbf44c9f79ec854f3a643029ba6d7"
 dependencies = [
 "futures-core",
 "futures-sink",
 "js-sys",
- "once_cell",
 "pin-project-lite",
 "thiserror",
+ "tracing",
 ]

 [[package]]
 name = "opentelemetry-http"
-version = "0.26.0"
+version = "0.27.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6351496aeaa49d7c267fb480678d85d1cd30c5edb20b497c48c56f62a8c14b99"
+checksum = "10a8a7f5f6ba7c1b286c2fbca0454eaba116f63bbe69ed250b642d36fbb04d80"
 dependencies = [
 "async-trait",
 "bytes",
@@ -3701,9 +3830,9 @@ dependencies = [

 [[package]]
 name = "opentelemetry-otlp"
-version = "0.26.0"
+version = "0.27.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "29e1f9c8b032d4f635c730c0efcf731d5e2530ea13fa8bef7939ddc8420696bd"
+checksum = "91cf61a1868dacc576bf2b2a1c3e9ab150af7272909e80085c3173384fe11f76"
 dependencies = [
 "async-trait",
 "futures-core",
@@ -3719,9 +3848,9 @@ dependencies = [

 [[package]]
 name = "opentelemetry-proto"
-version = "0.26.1"
+version = "0.27.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c9d3968ce3aefdcca5c27e3c4ea4391b37547726a70893aab52d3de95d5f8b34"
+checksum = "a6e05acbfada5ec79023c85368af14abd0b307c015e9064d249b2a950ef459a6"
 dependencies = [
 "opentelemetry",
 "opentelemetry_sdk",
@@ -3731,22 +3860,21 @@ dependencies = [

 [[package]]
 name = "opentelemetry-semantic-conventions"
-version = "0.26.0"
+version = "0.27.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "db945c1eaea8ac6a9677185357480d215bb6999faa9f691d0c4d4d641eab7a09"
+checksum = "bc1b6902ff63b32ef6c489e8048c5e253e2e4a803ea3ea7e783914536eb15c52"

 [[package]]
 name = "opentelemetry_sdk"
-version = "0.26.0"
+version = "0.27.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d2c627d9f4c9cdc1f21a29ee4bfbd6028fcb8bcf2a857b43f3abdf72c9c862f3"
+checksum = "231e9d6ceef9b0b2546ddf52335785ce41252bc7474ee8ba05bfad277be13ab8"
 dependencies = [
 "async-trait",
 "futures-channel",
 "futures-executor",
 "futures-util",
 "glob",
- "once_cell",
 "opentelemetry",
 "percent-encoding",
 "rand 0.8.5",
@@ -3754,6 +3882,7 @@ dependencies = [
 "thiserror",
 "tokio",
 "tokio-stream",
+ "tracing",
 ]

 [[package]]
@@ -3853,9 +3982,11 @@ name = "pagectl"
 version = "0.1.0"
 dependencies = [
 "anyhow",
+ "bincode",
 "camino",
 "clap",
 "humantime",
+ "itertools 0.10.5",
 "pageserver",
 "pageserver_api",
 "postgres_ffi",
@@ -3877,6 +4008,7 @@ dependencies = [
 "arc-swap",
 "async-compression",
 "async-stream",
+ "bincode",
 "bit_field",
 "byteorder",
 "bytes",
@@ -3916,6 +4048,7 @@ dependencies = [
 "postgres_connection",
 "postgres_ffi",
 "postgres_initdb",
+ "pprof",
 "pq_proto",
 "procfs",
 "rand 0.8.5",
@@ -4401,11 +4534,13 @@ dependencies = [
 "bindgen",
 "bytes",
 "crc32c",
- "env_logger",
+ "criterion",
+ "env_logger 0.10.2",
 "log",
 "memoffset 0.9.0",
 "once_cell",
 "postgres",
+ "pprof",
 "regex",
 "serde",
 "thiserror",
@@ -4441,7 +4576,7 @@ dependencies = [
 "cfg-if",
 "criterion",
 "findshlibs",
- "inferno",
+ "inferno 0.11.21",
 "libc",
 "log",
 "nix 0.26.4",
@@ -4476,9 +4611,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"

 [[package]]
 name = "pq-sys"
-version = "0.4.8"
+version = "0.6.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "31c0052426df997c0cbd30789eb44ca097e3541717a7b8fa36b1c464ee7edebd"
+checksum = "f6cc05d7ea95200187117196eee9edd0644424911821aeb28a18ce60ea0b8793"
 dependencies = [
 "vcpkg",
 ]
@@ -4667,9 +4802,10 @@ dependencies = [
 "clap",
 "compute_api",
 "consumption_metrics",
- "dashmap",
+ "dashmap 5.5.0",
 "ecdsa 0.16.9",
- "env_logger",
+ "ed25519-dalek",
+ "env_logger 0.10.2",
 "fallible-iterator",
 "flate2",
 "framed-websockets",
@@ -4740,7 +4876,7 @@ dependencies = [
 "tokio-postgres",
 "tokio-postgres2",
 "tokio-rustls 0.26.0",
- "tokio-tungstenite",
+ "tokio-tungstenite 0.21.0",
 "tokio-util",
 "tracing",
 "tracing-subscriber",
@@ -4776,6 +4912,15 @@ dependencies = [
 "serde",
 ]

+[[package]]
+name = "quick-xml"
+version = "0.37.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f22f29bdff3987b4d8632ef95fd6424ec7e4e0a57e2f4fc63e489e75357f6a03"
+dependencies = [
+ "memchr",
+]
+
 [[package]]
 name = "quote"
 version = "1.0.37"
@@ -5062,6 +5207,7 @@ dependencies = [
 "once_cell",
 "pin-project-lite",
 "rand 0.8.5",
+ "reqwest",
 "scopeguard",
 "serde",
 "serde_json",
@@ -5159,15 +5305,15 @@ dependencies = [

 [[package]]
 name = "reqwest-tracing"
-version = "0.5.4"
+version = "0.5.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ff82cf5730a1311fb9413b0bc2b8e743e0157cd73f010ab4ec374a923873b6a2"
+checksum = "73e6153390585f6961341b50e5a1931d6be6dee4292283635903c26ef9d980d2"
 dependencies = [
 "anyhow",
 "async-trait",
 "getrandom 0.2.11",
 "http 1.1.0",
- "matchit 0.8.2",
+ "matchit 0.8.4",
 "opentelemetry",
 "reqwest",
 "reqwest-middleware",
@@ -5320,9 +5466,9 @@ dependencies = [

 [[package]]
 name = "rustc-demangle"
-version = "0.1.23"
+version = "0.1.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
+checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"

 [[package]]
 name = "rustc-hash"
@@ -5513,6 +5659,7 @@ dependencies = [
 "crc32c",
 "criterion",
 "desim",
+ "env_logger 0.10.2",
 "fail",
 "futures",
 "hex",
@@ -5535,11 +5682,13 @@ dependencies = [
 "remote_storage",
 "reqwest",
 "safekeeper_api",
+ "safekeeper_client",
 "scopeguard",
 "sd-notify",
 "serde",
 "serde_json",
 "sha2",
+ "smallvec",
 "storage_broker",
 "strum",
 "strum_macros",
@@ -5564,11 +5713,29 @@ dependencies = [
 name = "safekeeper_api"
 version = "0.1.0"
 dependencies = [
+ "anyhow",
 "const_format",
+ "pageserver_api",
+ "postgres_ffi",
+ "pq_proto",
 "serde",
+ "serde_json",
+ "tokio",
 "utils",
 ]

+[[package]]
+name = "safekeeper_client"
+version = "0.1.0"
+dependencies = [
+ "reqwest",
+ "safekeeper_api",
+ "serde",
+ "thiserror",
+ "utils",
+ "workspace_hack",
+]
+
 [[package]]
 name = "same-file"
 version = "1.0.6"
@@ -6765,7 +6932,19 @@ dependencies = [
 "futures-util",
 "log",
 "tokio",
- "tungstenite",
+ "tungstenite 0.21.0",
+]
+
+[[package]]
+name = "tokio-tungstenite"
+version = "0.24.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edc5f74e248dc973e0dbb7b74c7e0d6fcc301c694ff50049504004ef4d0cdcd9"
+dependencies = [
+ "futures-util",
+ "log",
+ "tokio",
+ "tungstenite 0.24.0",
 ]

 [[package]]
@@ -6846,7 +7025,7 @@ dependencies = [
 "tokio",
 "tokio-rustls 0.26.0",
 "tokio-stream",
- "tower",
+ "tower 0.4.13",
 "tower-layer",
 "tower-service",
 "tracing",
@@ -6887,16 +7066,49 @@ dependencies = [
 ]

 [[package]]
-name = "tower-layer"
-version = "0.3.2"
+name = "tower"
+version = "0.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0"
+checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
+dependencies = [
+ "futures-core",
+ "futures-util",
+ "pin-project-lite",
+ "sync_wrapper 1.0.1",
+ "tokio",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "tower-http"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "403fa3b783d4b626a8ad51d766ab03cb6d2dbfc46b1c5d4448395e6628dc9697"
+dependencies = [
+ "bitflags 2.4.1",
+ "bytes",
+ "http 1.1.0",
+ "http-body 1.0.0",
+ "pin-project-lite",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+ "uuid",
+]
+
+[[package]]
+name = "tower-layer"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"

 [[package]]
 name = "tower-service"
-version = "0.3.2"
+version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52"
+checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"

 [[package]]
 name = "tracing"
@@ -6965,9 +7177,9 @@ dependencies = [

 [[package]]
 name = "tracing-opentelemetry"
-version = "0.27.0"
+version = "0.28.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc58af5d3f6c5811462cabb3289aec0093f7338e367e5a33d28c0433b3c7360b"
+checksum = "97a971f6058498b5c0f1affa23e7ea202057a7301dbff68e968b2d578bcbd053"
 dependencies = [
 "js-sys",
 "once_cell",
@@ -7051,6 +7263,24 @@ dependencies = [
 "utf-8",
 ]

+[[package]]
+name = "tungstenite"
+version = "0.24.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "18e5b8366ee7a95b16d32197d0b2604b43a0be89dc5fac9f8e96ccafbaedda8a"
+dependencies = [
+ "byteorder",
+ "bytes",
+ "data-encoding",
+ "http 1.1.0",
+ "httparse",
+ "log",
+ "rand 0.8.5",
+ "sha1",
+ "thiserror",
+ "utf-8",
+]
+
 [[package]]
 name = "twox-hash"
 version = "1.6.3"
@@ -7200,6 +7430,7 @@ dependencies = [
 "anyhow",
 "arc-swap",
 "async-compression",
+ "backtrace",
 "bincode",
 "byteorder",
 "bytes",
@@ -7210,12 +7441,15 @@ dependencies = [
 "criterion",
 "diatomic-waker",
 "fail",
+ "flate2",
 "futures",
 "git-version",
 "hex",
 "hex-literal",
 "humantime",
 "hyper 0.14.30",
+ "inferno 0.12.0",
+ "itertools 0.10.5",
 "jemalloc_pprof",
 "jsonwebtoken",
 "metrics",
@@ -7318,7 +7552,7 @@ dependencies = [
 "anyhow",
 "camino-tempfile",
 "clap",
- "env_logger",
+ "env_logger 0.10.2",
 "log",
 "postgres",
 "postgres_ffi",
@@ -7333,12 +7567,21 @@ dependencies = [
 "anyhow",
 "async-compression",
 "bytes",
+ "camino",
+ "camino-tempfile",
+ "criterion",
+ "futures",
 "pageserver_api",
 "postgres_ffi",
+ "pprof",
 "prost",
+ "remote_storage",
 "serde",
+ "serde_json",
 "thiserror",
+ "tikv-jemallocator",
 "tokio",
+ "tokio-util",
 "tonic",
 "tonic-build",
 "tracing",
@@ -7572,7 +7815,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be"
 dependencies = [
 "windows-core",
- "windows-targets 0.52.4",
+ "windows-targets 0.52.6",
 ]

 [[package]]
@@ -7581,7 +7824,7 @@ version = "0.52.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
 dependencies = [
- "windows-targets 0.52.4",
+ "windows-targets 0.52.6",
 ]

 [[package]]
@@ -7599,7 +7842,7 @@ version = "0.52.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
 dependencies = [
- "windows-targets 0.52.4",
+ "windows-targets 0.52.6",
 ]

 [[package]]
@@ -7619,17 +7862,18 @@ dependencies = [

 [[package]]
 name = "windows-targets"
-version = "0.52.4"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
 dependencies = [
- "windows_aarch64_gnullvm 0.52.4",
- "windows_aarch64_msvc 0.52.4",
- "windows_i686_gnu 0.52.4",
- "windows_i686_msvc 0.52.4",
- "windows_x86_64_gnu 0.52.4",
- "windows_x86_64_gnullvm 0.52.4",
- "windows_x86_64_msvc 0.52.4",
+ "windows_aarch64_gnullvm 0.52.6",
+ "windows_aarch64_msvc 0.52.6",
+ "windows_i686_gnu 0.52.6",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc 0.52.6",
+ "windows_x86_64_gnu 0.52.6",
+ "windows_x86_64_gnullvm 0.52.6",
+ "windows_x86_64_msvc 0.52.6",
 ]

 [[package]]
@@ -7640,9 +7884,9 @@ checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc"

 [[package]]
 name = "windows_aarch64_gnullvm"
-version = "0.52.4"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"

 [[package]]
 name = "windows_aarch64_msvc"
@@ -7652,9 +7896,9 @@ checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3"

 [[package]]
 name = "windows_aarch64_msvc"
-version = "0.52.4"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"

 [[package]]
 name = "windows_i686_gnu"
@@ -7664,9 +7908,15 @@ checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241"

 [[package]]
 name = "windows_i686_gnu"
-version = "0.52.4"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"

 [[package]]
 name = "windows_i686_msvc"
@@ -7676,9 +7926,9 @@ checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00"

 [[package]]
 name = "windows_i686_msvc"
-version = "0.52.4"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"

 [[package]]
 name = "windows_x86_64_gnu"
@@ -7688,9 +7938,9 @@ checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1"

 [[package]]
 name = "windows_x86_64_gnu"
-version = "0.52.4"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"

 [[package]]
 name = "windows_x86_64_gnullvm"
@@ -7700,9 +7950,9 @@ checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953"

 [[package]]
 name = "windows_x86_64_gnullvm"
-version = "0.52.4"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"

 [[package]]
 name = "windows_x86_64_msvc"
@@ -7712,9 +7962,9 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"

 [[package]]
 name = "windows_x86_64_msvc"
-version = "0.52.4"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"

 [[package]]
 name = "winnow"
@@ -7822,7 +8072,8 @@ dependencies = [
 "tokio-util",
 "toml_edit",
 "tonic",
- "tower",
+ "tower 0.4.13",
+ "tower 0.5.2",
 "tracing",
 "tracing-core",
 "url",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,6 +11,7 @@ members = [
    "pageserver/pagebench",
    "proxy",
    "safekeeper",
+    "safekeeper/client",
    "storage_broker",
    "storage_controller",
    "storage_controller/client",
@@ -51,6 +52,7 @@ anyhow = { version = "1.0", features = ["backtrace"] }
 arc-swap = "1.6"
 async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] }
 atomic-take = "1.1.0"
+backtrace = "0.3.74"
 flate2 = "1.0.26"
 async-stream = "0.3"
 async-trait = "0.1"
@@ -63,7 +65,7 @@ aws-smithy-types = "1.2"
 aws-credential-types = "1.2.0"
 aws-sigv4 = { version = "1.2", features = ["sign-http"] }
 aws-types = "1.3"
-axum = { version = "0.7.5", features = ["ws"] }
+axum = { version = "0.7.9", features = ["ws"] }
 base64 = "0.13.0"
 bincode = "1.3"
 bindgen = "0.70"
@@ -108,6 +110,7 @@ hyper-util = "0.1"
 tokio-tungstenite = "0.21.0"
 indexmap = "2"
 indoc = "2"
+inferno = "0.12.0"
 ipnet = "2.10.0"
 itertools = "0.10"
 itoa = "1.0.11"
@@ -124,16 +127,16 @@ notify = "6.0.0"
 num_cpus = "1.15"
 num-traits = "0.2.15"
 once_cell = "1.13"
-opentelemetry = "0.26"
-opentelemetry_sdk = "0.26"
-opentelemetry-otlp = { version = "0.26", default-features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
-opentelemetry-semantic-conventions = "0.26"
+opentelemetry = "0.27"
+opentelemetry_sdk = "0.27"
+opentelemetry-otlp = { version = "0.27", default-features = false, features = ["http-proto", "trace", "http", "reqwest-client"] }
+opentelemetry-semantic-conventions = "0.27"
 parking_lot = "0.12"
 parquet = { version = "53", default-features = false, features = ["zstd"] }
 parquet_derive = "53"
 pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
 pin-project-lite = "0.2"
-pprof = { version = "0.14", features = ["criterion", "flamegraph", "protobuf", "protobuf-codec"] }
+pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointer", "protobuf", "protobuf-codec"] }
 procfs = "0.16"
 prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
 prost = "0.13"
@@ -141,7 +144,7 @@ rand = "0.8"
 redis = { version = "0.25.2", features = ["tokio-rustls-comp", "keep-alive"] }
 regex = "1.10.2"
 reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] }
-reqwest-tracing = { version = "0.5", features = ["opentelemetry_0_26"] }
+reqwest-tracing = { version = "0.5", features = ["opentelemetry_0_27"] }
 reqwest-middleware = "0.4"
 reqwest-retry = "0.7"
 routerify = "3"
@@ -185,10 +188,12 @@ tokio-util = { version = "0.7.10", features = ["io", "rt"] }
 toml = "0.8"
 toml_edit = "0.22"
 tonic = {version = "0.12.3", features = ["tls", "tls-roots"]}
-tower-service = "0.3.2"
+tower = { version = "0.5.2", default-features = false }
+tower-http = { version = "0.6.2", features = ["request-id", "trace"] }
+tower-service = "0.3.3"
 tracing = "0.1"
 tracing-error = "0.2"
-tracing-opentelemetry = "0.27"
+tracing-opentelemetry = "0.28"
 tracing-subscriber = { version = "0.3", default-features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] }
 try-lock = "0.2.5"
 twox-hash = { version = "1.6.3", default-features = false }
@@ -233,6 +238,7 @@ postgres_initdb = { path = "./libs/postgres_initdb" }
 pq_proto = { version = "0.1", path = "./libs/pq_proto/" }
 remote_storage = { version = "0.1", path = "./libs/remote_storage/" }
 safekeeper_api = { version = "0.1", path = "./libs/safekeeper_api" }
+safekeeper_client = { path = "./safekeeper/client" }
 desim = { version = "0.1", path = "./libs/desim" }
 storage_broker = { version = "0.1", path = "./storage_broker/" } # Note: main broker code is inside the binary crate, so linking with the library shouldn't be heavy.
 storage_controller_client = { path = "./storage_controller/client" }
@@ -263,6 +269,8 @@ tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", br
 [profile.release]
 # This is useful for profiling and, to some extent, debug.
 # Besides, debug info should not affect the performance.
+#
+# NB: we also enable frame pointers for improved profiling, see .cargo/config.toml.
 debug = true

 # disable debug symbols for all packages except this one to decrease binaries size
--- a/10
+++ b/10
@@ -45,7 +45,7 @@ COPY --chown=nonroot . .

 ARG ADDITIONAL_RUSTFLAGS
 RUN set -e \
-    && PQ_LIB_DIR=$(pwd)/pg_install/v${STABLE_PG_VERSION}/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment ${ADDITIONAL_RUSTFLAGS}" cargo build \
+    && PQ_LIB_DIR=$(pwd)/pg_install/v${STABLE_PG_VERSION}/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}" cargo build \
      --bin pg_sni_router  \
      --bin pageserver  \
      --bin pagectl  \
@@ -69,6 +69,9 @@ RUN set -e \
        libreadline-dev \
        libseccomp-dev \
        ca-certificates \
+	# System postgres for use with client libraries (e.g. in storage controller)
+        postgresql-15 \
+        openssl \
    && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
    && useradd -d /data neon \
    && chown -R neon:neon /data
@@ -101,11 +104,6 @@ RUN mkdir -p /data/.neon/ && \
  > /data/.neon/pageserver.toml && \
  chown -R neon:neon /data/.neon

-# When running a binary that links with libpq, default to using our most recent postgres version.  Binaries
-# that want a particular postgres version will select it explicitly: this is just a default.
-ENV LD_LIBRARY_PATH=/usr/local/v${DEFAULT_PG_VERSION}/lib
-
-
 VOLUME ["/data"]
 USER neon
 EXPOSE 6400
--- a/3
+++ b/3
@@ -3,7 +3,6 @@ ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
 # Where to install Postgres, default is ./pg_install, maybe useful for package managers
 POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install/

-OPENSSL_PREFIX_DIR := /usr/local/openssl
 ICU_PREFIX_DIR := /usr/local/icu

 #
@@ -26,11 +25,9 @@ endif
 ifeq ($(shell test -e /home/nonroot/.docker_build && echo -n yes),yes)
 	# Exclude static build openssl, icu for local build (MacOS, Linux)
 	# Only keep for build type release and debug
-	PG_CFLAGS += -I$(OPENSSL_PREFIX_DIR)/include
 	PG_CONFIGURE_OPTS += --with-icu
 	PG_CONFIGURE_OPTS += ICU_CFLAGS='-I/$(ICU_PREFIX_DIR)/include -DU_STATIC_IMPLEMENTATION'
 	PG_CONFIGURE_OPTS += ICU_LIBS='-L$(ICU_PREFIX_DIR)/lib -L$(ICU_PREFIX_DIR)/lib64 -licui18n -licuuc -licudata -lstdc++ -Wl,-Bdynamic -lm'
-	PG_CONFIGURE_OPTS += LDFLAGS='-L$(OPENSSL_PREFIX_DIR)/lib -L$(OPENSSL_PREFIX_DIR)/lib64 -L$(ICU_PREFIX_DIR)/lib -L$(ICU_PREFIX_DIR)/lib64 -Wl,-Bstatic -lssl -lcrypto -Wl,-Bdynamic -lrt -lm -ldl -lpthread'
 endif

 UNAME_S := $(shell uname -s)
--- a/build-tools.Dockerfile
+++ b/build-tools.Dockerfile
@@ -115,7 +115,7 @@ RUN set -e \

 # Keep the version the same as in compute/compute-node.Dockerfile and
 # test_runner/regress/test_compute_metrics.py.
-ENV SQL_EXPORTER_VERSION=0.16.0
+ENV SQL_EXPORTER_VERSION=0.17.0
 RUN curl -fsSL \
    "https://github.com/burningalchemist/sql_exporter/releases/download/${SQL_EXPORTER_VERSION}/sql_exporter-${SQL_EXPORTER_VERSION}.linux-$(case "$(uname -m)" in x86_64) echo amd64;; aarch64) echo arm64;; esac).tar.gz" \
    --output sql_exporter.tar.gz \
@@ -190,21 +190,6 @@ RUN for package in Capture::Tiny DateTime Devel::Cover Digest::MD5 File::Spec JS
    && make install \
    && rm -rf ../lcov.tar.gz

-# Compile and install the static OpenSSL library
-ENV OPENSSL_VERSION=1.1.1w
-ENV OPENSSL_PREFIX=/usr/local/openssl
-RUN wget -O /tmp/openssl-${OPENSSL_VERSION}.tar.gz https://www.openssl.org/source/openssl-${OPENSSL_VERSION}.tar.gz && \
-    echo "cf3098950cb4d853ad95c0841f1f9c6d3dc102dccfcacd521d93925208b76ac8 /tmp/openssl-${OPENSSL_VERSION}.tar.gz" | sha256sum --check && \
-    cd /tmp && \
-    tar xzvf /tmp/openssl-${OPENSSL_VERSION}.tar.gz && \
-    rm /tmp/openssl-${OPENSSL_VERSION}.tar.gz && \
-    cd /tmp/openssl-${OPENSSL_VERSION} && \
-    ./config --prefix=${OPENSSL_PREFIX}  -static --static no-shared -fPIC && \
-    make -j "$(nproc)" && \
-    make install && \
-    cd /tmp && \
-    rm -rf /tmp/openssl-${OPENSSL_VERSION}
-
 # Use the same version of libicu as the compute nodes so that
 # clusters created using inidb on pageserver can be used by computes.
 #
@@ -258,7 +243,7 @@ WORKDIR /home/nonroot

 # Rust
 # Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
-ENV RUSTC_VERSION=1.83.0
+ENV RUSTC_VERSION=1.84.0
 ENV RUSTUP_HOME="/home/nonroot/.rustup"
 ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
 ARG RUSTFILT_VERSION=0.2.1
--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
--- a/compute/etc/neon_collector.jsonnet
+++ b/compute/etc/neon_collector.jsonnet
@@ -3,7 +3,7 @@
  metrics: [
    import 'sql_exporter/checkpoints_req.libsonnet',
    import 'sql_exporter/checkpoints_timed.libsonnet',
-    import 'sql_exporter/compute_backpressure_throttling_seconds.libsonnet',
+    import 'sql_exporter/compute_backpressure_throttling_seconds_total.libsonnet',
    import 'sql_exporter/compute_current_lsn.libsonnet',
    import 'sql_exporter/compute_logical_snapshot_files.libsonnet',
    import 'sql_exporter/compute_logical_snapshots_bytes.libsonnet',
--- a/compute/etc/pgbouncer.ini
+++ b/compute/etc/pgbouncer.ini
@@ -19,3 +19,10 @@ max_prepared_statements=0
 admin_users=postgres
 unix_socket_dir=/tmp/
 unix_socket_mode=0777
+
+;; Disable connection logging. It produces a lot of logs that no one looks at,
+;; and we can get similar log entries from the proxy too. We had incidents in
+;; the past where the logging significantly stressed the log device or pgbouncer
+;; itself.
+log_connections=0
+log_disconnections=0
--- a/compute/etc/sql_exporter/compute_backpressure_throttling_seconds_total.libsonnet
+++ b/compute/etc/sql_exporter/compute_backpressure_throttling_seconds_total.libsonnet
@@ -1,10 +1,10 @@
 {
-  metric_name: 'compute_backpressure_throttling_seconds',
-  type: 'gauge',
+  metric_name: 'compute_backpressure_throttling_seconds_total',
+  type: 'counter',
  help: 'Time compute has spent throttled',
  key_labels: null,
  values: [
    'throttled',
  ],
-  query: importstr 'sql_exporter/compute_backpressure_throttling_seconds.sql',
+  query: importstr 'sql_exporter/compute_backpressure_throttling_seconds_total.sql',
 }
--- a/compute/etc/sql_exporter/compute_backpressure_throttling_seconds_total.sql
+++ b/compute/etc/sql_exporter/compute_backpressure_throttling_seconds_total.sql
--- a/compute/patches/cloud_regress_pg16.patch
+++ b/compute/patches/cloud_regress_pg16.patch
@@ -981,7 +981,7 @@ index fc42d418bf..e38f517574 100644
 CREATE SCHEMA addr_nsp;
 SET search_path TO 'addr_nsp';
 diff --git a/src/test/regress/expected/password.out b/src/test/regress/expected/password.out
-index 8475231735..1afae5395f 100644
+index 8475231735..0653946337 100644
 --- a/src/test/regress/expected/password.out
 +++ b/src/test/regress/expected/password.out
@@ -12,11 +12,11 @@ SET password_encryption = 'md5'; -- ok
@@ -1006,65 +1006,63 @@ index 8475231735..1afae5395f 100644
 -----------------+---------------------------------------------------
 - regress_passwd1 | md5783277baca28003b33453252be4dbb34
 - regress_passwd2 | md54044304ba511dd062133eb5b4b84a2a3
-+ regress_passwd1 | NEON_MD5_PLACEHOLDER_regress_passwd1
-+ regress_passwd2 | NEON_MD5_PLACEHOLDER_regress_passwd2
+ regress_passwd1 | NEON_MD5_PLACEHOLDER:regress_passwd1
+ regress_passwd2 | NEON_MD5_PLACEHOLDER:regress_passwd2
  regress_passwd3 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
 - regress_passwd4 | 
 + regress_passwd4 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
 (4 rows)
 
 -- Rename a role
-@@ -54,24 +54,30 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2;
+@@ -54,24 +54,16 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2;
 -- passwords.
 SET password_encryption = 'md5';
 -- encrypt with MD5
 -ALTER ROLE regress_passwd2 PASSWORD 'foo';
+--- already encrypted, use as they are
+-ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70';
+-ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo=';
 +ALTER ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER;
- -- already encrypted, use as they are
- ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70';
-+ERROR:  Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"}
- ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo=';
-+ERROR:  Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"}
 SET password_encryption = 'scram-sha-256';
 -- create SCRAM secret
 -ALTER ROLE  regress_passwd4 PASSWORD 'foo';
+--- already encrypted with MD5, use as it is
+-CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023';
+--- This looks like a valid SCRAM-SHA-256 secret, but it is not
+--- so it should be hashed with SCRAM-SHA-256.
+-CREATE ROLE regress_passwd6 PASSWORD 'SCRAM-SHA-256$1234';
+--- These may look like valid MD5 secrets, but they are not, so they
+--- should be hashed with SCRAM-SHA-256.
+--- trailing garbage at the end
+-CREATE ROLE regress_passwd7 PASSWORD 'md5012345678901234567890123456789zz';
+--- invalid length
+-CREATE ROLE regress_passwd8 PASSWORD 'md501234567890123456789012345678901zz';
 +ALTER ROLE  regress_passwd4 PASSWORD NEON_PASSWORD_PLACEHOLDER;
- -- already encrypted with MD5, use as it is
- CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023';
-+ERROR:  Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"}
- -- This looks like a valid SCRAM-SHA-256 secret, but it is not
- -- so it should be hashed with SCRAM-SHA-256.
- CREATE ROLE regress_passwd6 PASSWORD 'SCRAM-SHA-256$1234';
-+ERROR:  Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"}
- -- These may look like valid MD5 secrets, but they are not, so they
- -- should be hashed with SCRAM-SHA-256.
- -- trailing garbage at the end
- CREATE ROLE regress_passwd7 PASSWORD 'md5012345678901234567890123456789zz';
-+ERROR:  Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"}
- -- invalid length
- CREATE ROLE regress_passwd8 PASSWORD 'md501234567890123456789012345678901zz';
-+ERROR:  Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"}
+-- Neon does not support encrypted passwords, use unencrypted instead
+CREATE ROLE regress_passwd5 PASSWORD NEON_PASSWORD_PLACEHOLDER;
+-- Neon does not support encrypted passwords, use unencrypted instead
+CREATE ROLE regress_passwd6 PASSWORD NEON_PASSWORD_PLACEHOLDER;
+CREATE ROLE regress_passwd7 PASSWORD NEON_PASSWORD_PLACEHOLDER;
+CREATE ROLE regress_passwd8 PASSWORD NEON_PASSWORD_PLACEHOLDER;
 -- Changing the SCRAM iteration count
 SET scram_iterations = 1024;
 CREATE ROLE regress_passwd9 PASSWORD 'alterediterationcount';
-@@ -81,63 +87,67 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+
+@@ -81,11 +73,11 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+
     ORDER BY rolname, rolpassword;
      rolname     |                rolpassword_masked                 
 -----------------+---------------------------------------------------
 - regress_passwd1 | md5cd3578025fe2c3d7ed1b9a9b26238b70
 - regress_passwd2 | md5dfa155cadd5f4ad57860162f3fab9cdb
-+ regress_passwd1 | NEON_MD5_PLACEHOLDER_regress_passwd1
-+ regress_passwd2 | NEON_MD5_PLACEHOLDER_regress_passwd2
+ regress_passwd1 | NEON_MD5_PLACEHOLDER:regress_passwd1
+ regress_passwd2 | NEON_MD5_PLACEHOLDER:regress_passwd2
  regress_passwd3 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
  regress_passwd4 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
 - regress_passwd5 | md5e73a4b11df52a6068f8b39f90be36023
- regress_passwd6 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
- regress_passwd7 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
- regress_passwd8 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
-  regress_passwd9 | SCRAM-SHA-256$1024:<salt>$<storedkey>:<serverkey>
-(9 rows)
-+(5 rows)
- 
+ regress_passwd5 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
+  regress_passwd6 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
+  regress_passwd7 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
+  regress_passwd8 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
+@@ -95,23 +87,20 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+
 -- An empty password is not allowed, in any form
 CREATE ROLE regress_passwd_empty PASSWORD '';
 NOTICE:  empty string is not a valid password, clearing password
@@ -1082,56 +1080,37 @@ index 8475231735..1afae5395f 100644
 -(1 row)
 +(0 rows)
 
- -- Test with invalid stored and server keys.
- --
- -- The first is valid, to act as a control. The others have too long
- -- stored/server keys. They will be re-hashed.
- CREATE ROLE regress_passwd_sha_len0 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI=';
-+ERROR:  Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"}
- CREATE ROLE regress_passwd_sha_len1 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96RqwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI=';
-+ERROR:  Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"}
- CREATE ROLE regress_passwd_sha_len2 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=';
-+ERROR:  Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"}
+--- Test with invalid stored and server keys.
+---
+--- The first is valid, to act as a control. The others have too long
+--- stored/server keys. They will be re-hashed.
+-CREATE ROLE regress_passwd_sha_len0 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI=';
+-CREATE ROLE regress_passwd_sha_len1 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96RqwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI=';
+-CREATE ROLE regress_passwd_sha_len2 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=';
+-- Neon does not support encrypted passwords, use unencrypted instead
+CREATE ROLE regress_passwd_sha_len0 PASSWORD NEON_PASSWORD_PLACEHOLDER;
+CREATE ROLE regress_passwd_sha_len1 PASSWORD NEON_PASSWORD_PLACEHOLDER;
+CREATE ROLE regress_passwd_sha_len2 PASSWORD NEON_PASSWORD_PLACEHOLDER;
 -- Check that the invalid secrets were re-hashed. A re-hashed secret
 -- should not contain the original salt.
 SELECT rolname, rolpassword not like '%A6xHKoH/494E941doaPOYg==%' as is_rolpassword_rehashed
-     FROM pg_authid
-     WHERE rolname LIKE 'regress_passwd_sha_len%'
+@@ -120,7 +109,7 @@ SELECT rolname, rolpassword not like '%A6xHKoH/494E941doaPOYg==%' as is_rolpassw
     ORDER BY rolname;
-         rolname         | is_rolpassword_rehashed 
--------------------------+-------------------------
+          rolname         | is_rolpassword_rehashed 
+ -------------------------+-------------------------
 - regress_passwd_sha_len0 | f
- regress_passwd_sha_len1 | t
- regress_passwd_sha_len2 | t
-(3 rows)
-+ rolname | is_rolpassword_rehashed 
-+---------+-------------------------
-+(0 rows)
- 
- DROP ROLE regress_passwd1;
- DROP ROLE regress_passwd2;
- DROP ROLE regress_passwd3;
- DROP ROLE regress_passwd4;
- DROP ROLE regress_passwd5;
-+ERROR:  role "regress_passwd5" does not exist
- DROP ROLE regress_passwd6;
-+ERROR:  role "regress_passwd6" does not exist
- DROP ROLE regress_passwd7;
-+ERROR:  role "regress_passwd7" does not exist
+ regress_passwd_sha_len0 | t
+  regress_passwd_sha_len1 | t
+  regress_passwd_sha_len2 | t
+ (3 rows)
+@@ -135,6 +124,7 @@ DROP ROLE regress_passwd7;
 DROP ROLE regress_passwd8;
-+ERROR:  role "regress_passwd8" does not exist
 DROP ROLE regress_passwd9;
 DROP ROLE regress_passwd_empty;
 +ERROR:  role "regress_passwd_empty" does not exist
 DROP ROLE regress_passwd_sha_len0;
-+ERROR:  role "regress_passwd_sha_len0" does not exist
 DROP ROLE regress_passwd_sha_len1;
-+ERROR:  role "regress_passwd_sha_len1" does not exist
 DROP ROLE regress_passwd_sha_len2;
-+ERROR:  role "regress_passwd_sha_len2" does not exist
- -- all entries should have been removed
- SELECT rolname, rolpassword
-     FROM pg_authid
 diff --git a/src/test/regress/expected/privileges.out b/src/test/regress/expected/privileges.out
 index 5b9dba7b32..cc408dad42 100644
 --- a/src/test/regress/expected/privileges.out
@@ -3194,7 +3173,7 @@ index 1a6c61f49d..1c31ac6a53 100644
 -- Test generic object addressing/identification functions
 CREATE SCHEMA addr_nsp;
 diff --git a/src/test/regress/sql/password.sql b/src/test/regress/sql/password.sql
-index 53e86b0b6c..f07cf1ec54 100644
+index 53e86b0b6c..0303fdfe96 100644
 --- a/src/test/regress/sql/password.sql
 +++ b/src/test/regress/sql/password.sql
@@ -10,11 +10,11 @@ SET password_encryption = 'scram-sha-256'; -- ok
@@ -3213,23 +3192,59 @@ index 53e86b0b6c..f07cf1ec54 100644
 
 -- check list of created entries
 --
-@@ -42,14 +42,14 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2;
+@@ -42,26 +42,18 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2;
 SET password_encryption = 'md5';
 
 -- encrypt with MD5
 -ALTER ROLE regress_passwd2 PASSWORD 'foo';
+--- already encrypted, use as they are
+-ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70';
+-ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo=';
 +ALTER ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER;
- -- already encrypted, use as they are
- ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70';
- ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo=';
 
 SET password_encryption = 'scram-sha-256';
 -- create SCRAM secret
 -ALTER ROLE  regress_passwd4 PASSWORD 'foo';
+--- already encrypted with MD5, use as it is
+-CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023';
 +ALTER ROLE  regress_passwd4 PASSWORD NEON_PASSWORD_PLACEHOLDER;
- -- already encrypted with MD5, use as it is
- CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023';
+-- Neon does not support encrypted passwords, use unencrypted instead
+CREATE ROLE regress_passwd5 PASSWORD NEON_PASSWORD_PLACEHOLDER;
 
+--- This looks like a valid SCRAM-SHA-256 secret, but it is not
+--- so it should be hashed with SCRAM-SHA-256.
+-CREATE ROLE regress_passwd6 PASSWORD 'SCRAM-SHA-256$1234';
+--- These may look like valid MD5 secrets, but they are not, so they
+--- should be hashed with SCRAM-SHA-256.
+--- trailing garbage at the end
+-CREATE ROLE regress_passwd7 PASSWORD 'md5012345678901234567890123456789zz';
+--- invalid length
+-CREATE ROLE regress_passwd8 PASSWORD 'md501234567890123456789012345678901zz';
+-- Neon does not support encrypted passwords, use unencrypted instead
+CREATE ROLE regress_passwd6 PASSWORD NEON_PASSWORD_PLACEHOLDER;
+CREATE ROLE regress_passwd7 PASSWORD NEON_PASSWORD_PLACEHOLDER;
+CREATE ROLE regress_passwd8 PASSWORD NEON_PASSWORD_PLACEHOLDER;
+ 
+ -- Changing the SCRAM iteration count
+ SET scram_iterations = 1024;
+@@ -78,13 +70,10 @@ ALTER ROLE regress_passwd_empty PASSWORD 'md585939a5ce845f1a1b620742e3c659e0a';
+ ALTER ROLE regress_passwd_empty PASSWORD 'SCRAM-SHA-256$4096:hpFyHTUsSWcR7O9P$LgZFIt6Oqdo27ZFKbZ2nV+vtnYM995pDh9ca6WSi120=:qVV5NeluNfUPkwm7Vqat25RjSPLkGeoZBQs6wVv+um4=';
+ SELECT rolpassword FROM pg_authid WHERE rolname='regress_passwd_empty';
+ 
+--- Test with invalid stored and server keys.
+---
+--- The first is valid, to act as a control. The others have too long
+--- stored/server keys. They will be re-hashed.
+-CREATE ROLE regress_passwd_sha_len0 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI=';
+-CREATE ROLE regress_passwd_sha_len1 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96RqwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI=';
+-CREATE ROLE regress_passwd_sha_len2 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=';
+-- Neon does not support encrypted passwords, use unencrypted instead
+CREATE ROLE regress_passwd_sha_len0 PASSWORD NEON_PASSWORD_PLACEHOLDER;
+CREATE ROLE regress_passwd_sha_len1 PASSWORD NEON_PASSWORD_PLACEHOLDER;
+CREATE ROLE regress_passwd_sha_len2 PASSWORD NEON_PASSWORD_PLACEHOLDER;
+ 
+ -- Check that the invalid secrets were re-hashed. A re-hashed secret
+ -- should not contain the original salt.
 diff --git a/src/test/regress/sql/privileges.sql b/src/test/regress/sql/privileges.sql
 index 249df17a58..b258e7f26a 100644
 --- a/src/test/regress/sql/privileges.sql
--- a/compute/patches/cloud_regress_pg17.patch
+++ b/compute/patches/cloud_regress_pg17.patch
@@ -1014,10 +1014,10 @@ index fc42d418bf..e38f517574 100644
 CREATE SCHEMA addr_nsp;
 SET search_path TO 'addr_nsp';
 diff --git a/src/test/regress/expected/password.out b/src/test/regress/expected/password.out
-index 924d6e001d..5966531db6 100644
+index 924d6e001d..7fdda73439 100644
 --- a/src/test/regress/expected/password.out
 +++ b/src/test/regress/expected/password.out
-@@ -12,13 +12,13 @@ SET password_encryption = 'md5'; -- ok
+@@ -12,13 +12,11 @@ SET password_encryption = 'md5'; -- ok
 SET password_encryption = 'scram-sha-256'; -- ok
 -- consistency of password entries
 SET password_encryption = 'md5';
@@ -1026,9 +1026,7 @@ index 924d6e001d..5966531db6 100644
 -CREATE ROLE regress_passwd2;
 -ALTER ROLE regress_passwd2 PASSWORD 'role_pwd2';
 +CREATE ROLE regress_passwd1 PASSWORD NEON_PASSWORD_PLACEHOLDER;
-+ALTER ROLE regress_passwd1 PASSWORD NEON_PASSWORD_PLACEHOLDER;
 +CREATE ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER;
-+ALTER ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER;
 SET password_encryption = 'scram-sha-256';
 -CREATE ROLE regress_passwd3 PASSWORD 'role_pwd3';
 -CREATE ROLE regress_passwd4 PASSWORD NULL;
@@ -1037,71 +1035,69 @@ index 924d6e001d..5966531db6 100644
 -- check list of created entries
 --
 -- The scram secret will look something like:
-@@ -32,10 +32,10 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+
+@@ -32,10 +30,10 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+
     ORDER BY rolname, rolpassword;
      rolname     |                rolpassword_masked                 
 -----------------+---------------------------------------------------
 - regress_passwd1 | md5783277baca28003b33453252be4dbb34
 - regress_passwd2 | md54044304ba511dd062133eb5b4b84a2a3
-+ regress_passwd1 | NEON_MD5_PLACEHOLDER_regress_passwd1
-+ regress_passwd2 | NEON_MD5_PLACEHOLDER_regress_passwd2
+ regress_passwd1 | NEON_MD5_PLACEHOLDER:regress_passwd1
+ regress_passwd2 | NEON_MD5_PLACEHOLDER:regress_passwd2
  regress_passwd3 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
 - regress_passwd4 | 
 + regress_passwd4 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
 (4 rows)
 
 -- Rename a role
-@@ -56,24 +56,30 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2;
+@@ -56,24 +54,17 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2;
 -- passwords.
 SET password_encryption = 'md5';
 -- encrypt with MD5
 -ALTER ROLE regress_passwd2 PASSWORD 'foo';
+--- already encrypted, use as they are
+-ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70';
+-ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo=';
 +ALTER ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER;
- -- already encrypted, use as they are
- ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70';
-+ERROR:  Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"}
- ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo=';
-+ERROR:  Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"}
 SET password_encryption = 'scram-sha-256';
 -- create SCRAM secret
 -ALTER ROLE  regress_passwd4 PASSWORD 'foo';
 +ALTER ROLE  regress_passwd4 PASSWORD NEON_PASSWORD_PLACEHOLDER;
 -- already encrypted with MD5, use as it is
- CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023';
-+ERROR:  Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"}
- -- This looks like a valid SCRAM-SHA-256 secret, but it is not
- -- so it should be hashed with SCRAM-SHA-256.
- CREATE ROLE regress_passwd6 PASSWORD 'SCRAM-SHA-256$1234';
-+ERROR:  Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"}
- -- These may look like valid MD5 secrets, but they are not, so they
- -- should be hashed with SCRAM-SHA-256.
- -- trailing garbage at the end
- CREATE ROLE regress_passwd7 PASSWORD 'md5012345678901234567890123456789zz';
-+ERROR:  Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"}
- -- invalid length
- CREATE ROLE regress_passwd8 PASSWORD 'md501234567890123456789012345678901zz';
-+ERROR:  Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"}
+-CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023';
+--- This looks like a valid SCRAM-SHA-256 secret, but it is not
+--- so it should be hashed with SCRAM-SHA-256.
+-CREATE ROLE regress_passwd6 PASSWORD 'SCRAM-SHA-256$1234';
+--- These may look like valid MD5 secrets, but they are not, so they
+--- should be hashed with SCRAM-SHA-256.
+--- trailing garbage at the end
+-CREATE ROLE regress_passwd7 PASSWORD 'md5012345678901234567890123456789zz';
+--- invalid length
+-CREATE ROLE regress_passwd8 PASSWORD 'md501234567890123456789012345678901zz';
+-- Neon does not support encrypted passwords, use unencrypted instead
+CREATE ROLE regress_passwd5 PASSWORD NEON_PASSWORD_PLACEHOLDER;
+-- Neon does not support encrypted passwords, use unencrypted instead
+CREATE ROLE regress_passwd6 PASSWORD NEON_PASSWORD_PLACEHOLDER;
+CREATE ROLE regress_passwd7 PASSWORD NEON_PASSWORD_PLACEHOLDER;
+CREATE ROLE regress_passwd8 PASSWORD NEON_PASSWORD_PLACEHOLDER;
 -- Changing the SCRAM iteration count
 SET scram_iterations = 1024;
 CREATE ROLE regress_passwd9 PASSWORD 'alterediterationcount';
-@@ -83,63 +89,67 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+
+@@ -83,11 +74,11 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+
     ORDER BY rolname, rolpassword;
      rolname     |                rolpassword_masked                 
 -----------------+---------------------------------------------------
 - regress_passwd1 | md5cd3578025fe2c3d7ed1b9a9b26238b70
 - regress_passwd2 | md5dfa155cadd5f4ad57860162f3fab9cdb
-+ regress_passwd1 | NEON_MD5_PLACEHOLDER_regress_passwd1
-+ regress_passwd2 | NEON_MD5_PLACEHOLDER_regress_passwd2
+ regress_passwd1 | NEON_MD5_PLACEHOLDER:regress_passwd1
+ regress_passwd2 | NEON_MD5_PLACEHOLDER:regress_passwd2
  regress_passwd3 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
  regress_passwd4 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
 - regress_passwd5 | md5e73a4b11df52a6068f8b39f90be36023
- regress_passwd6 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
- regress_passwd7 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
- regress_passwd8 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
-  regress_passwd9 | SCRAM-SHA-256$1024:<salt>$<storedkey>:<serverkey>
-(9 rows)
-+(5 rows)
- 
+ regress_passwd5 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
+  regress_passwd6 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
+  regress_passwd7 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
+  regress_passwd8 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
+@@ -97,23 +88,20 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+
 -- An empty password is not allowed, in any form
 CREATE ROLE regress_passwd_empty PASSWORD '';
 NOTICE:  empty string is not a valid password, clearing password
@@ -1119,56 +1115,37 @@ index 924d6e001d..5966531db6 100644
 -(1 row)
 +(0 rows)
 
- -- Test with invalid stored and server keys.
- --
- -- The first is valid, to act as a control. The others have too long
- -- stored/server keys. They will be re-hashed.
- CREATE ROLE regress_passwd_sha_len0 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI=';
-+ERROR:  Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"}
- CREATE ROLE regress_passwd_sha_len1 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96RqwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI=';
-+ERROR:  Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"}
- CREATE ROLE regress_passwd_sha_len2 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=';
-+ERROR:  Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"}
+--- Test with invalid stored and server keys.
+---
+--- The first is valid, to act as a control. The others have too long
+--- stored/server keys. They will be re-hashed.
+-CREATE ROLE regress_passwd_sha_len0 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI=';
+-CREATE ROLE regress_passwd_sha_len1 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96RqwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI=';
+-CREATE ROLE regress_passwd_sha_len2 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=';
+-- Neon does not support encrypted passwords, use unencrypted instead
+CREATE ROLE regress_passwd_sha_len0 PASSWORD NEON_PASSWORD_PLACEHOLDER;
+CREATE ROLE regress_passwd_sha_len1 PASSWORD NEON_PASSWORD_PLACEHOLDER;
+CREATE ROLE regress_passwd_sha_len2 PASSWORD NEON_PASSWORD_PLACEHOLDER;
 -- Check that the invalid secrets were re-hashed. A re-hashed secret
 -- should not contain the original salt.
 SELECT rolname, rolpassword not like '%A6xHKoH/494E941doaPOYg==%' as is_rolpassword_rehashed
-     FROM pg_authid
-     WHERE rolname LIKE 'regress_passwd_sha_len%'
+@@ -122,7 +110,7 @@ SELECT rolname, rolpassword not like '%A6xHKoH/494E941doaPOYg==%' as is_rolpassw
     ORDER BY rolname;
-         rolname         | is_rolpassword_rehashed 
--------------------------+-------------------------
+          rolname         | is_rolpassword_rehashed 
+ -------------------------+-------------------------
 - regress_passwd_sha_len0 | f
- regress_passwd_sha_len1 | t
- regress_passwd_sha_len2 | t
-(3 rows)
-+ rolname | is_rolpassword_rehashed 
-+---------+-------------------------
-+(0 rows)
- 
- DROP ROLE regress_passwd1;
- DROP ROLE regress_passwd2;
- DROP ROLE regress_passwd3;
- DROP ROLE regress_passwd4;
- DROP ROLE regress_passwd5;
-+ERROR:  role "regress_passwd5" does not exist
- DROP ROLE regress_passwd6;
-+ERROR:  role "regress_passwd6" does not exist
- DROP ROLE regress_passwd7;
-+ERROR:  role "regress_passwd7" does not exist
+ regress_passwd_sha_len0 | t
+  regress_passwd_sha_len1 | t
+  regress_passwd_sha_len2 | t
+ (3 rows)
+@@ -137,6 +125,7 @@ DROP ROLE regress_passwd7;
 DROP ROLE regress_passwd8;
-+ERROR:  role "regress_passwd8" does not exist
 DROP ROLE regress_passwd9;
 DROP ROLE regress_passwd_empty;
 +ERROR:  role "regress_passwd_empty" does not exist
 DROP ROLE regress_passwd_sha_len0;
-+ERROR:  role "regress_passwd_sha_len0" does not exist
 DROP ROLE regress_passwd_sha_len1;
-+ERROR:  role "regress_passwd_sha_len1" does not exist
 DROP ROLE regress_passwd_sha_len2;
-+ERROR:  role "regress_passwd_sha_len2" does not exist
- -- all entries should have been removed
- SELECT rolname, rolpassword
-     FROM pg_authid
 diff --git a/src/test/regress/expected/privileges.out b/src/test/regress/expected/privileges.out
 index 1296da0d57..f43fffa44c 100644
 --- a/src/test/regress/expected/privileges.out
@@ -3249,10 +3226,10 @@ index 1a6c61f49d..1c31ac6a53 100644
 -- Test generic object addressing/identification functions
 CREATE SCHEMA addr_nsp;
 diff --git a/src/test/regress/sql/password.sql b/src/test/regress/sql/password.sql
-index bb82aa4aa2..7424c91b10 100644
+index bb82aa4aa2..dd8a05e24d 100644
 --- a/src/test/regress/sql/password.sql
 +++ b/src/test/regress/sql/password.sql
-@@ -10,13 +10,13 @@ SET password_encryption = 'scram-sha-256'; -- ok
+@@ -10,13 +10,11 @@ SET password_encryption = 'scram-sha-256'; -- ok
 
 -- consistency of password entries
 SET password_encryption = 'md5';
@@ -3261,9 +3238,7 @@ index bb82aa4aa2..7424c91b10 100644
 -CREATE ROLE regress_passwd2;
 -ALTER ROLE regress_passwd2 PASSWORD 'role_pwd2';
 +CREATE ROLE regress_passwd1 PASSWORD NEON_PASSWORD_PLACEHOLDER;
-+ALTER ROLE regress_passwd1 PASSWORD NEON_PASSWORD_PLACEHOLDER;
 +CREATE ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER;
-+ALTER ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER;
 SET password_encryption = 'scram-sha-256';
 -CREATE ROLE regress_passwd3 PASSWORD 'role_pwd3';
 -CREATE ROLE regress_passwd4 PASSWORD NULL;
@@ -3272,23 +3247,59 @@ index bb82aa4aa2..7424c91b10 100644
 
 -- check list of created entries
 --
-@@ -44,14 +44,14 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2;
+@@ -44,26 +42,19 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2;
 SET password_encryption = 'md5';
 
 -- encrypt with MD5
 -ALTER ROLE regress_passwd2 PASSWORD 'foo';
+--- already encrypted, use as they are
+-ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70';
+-ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo=';
 +ALTER ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER;
- -- already encrypted, use as they are
- ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70';
- ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo=';
 
 SET password_encryption = 'scram-sha-256';
 -- create SCRAM secret
 -ALTER ROLE  regress_passwd4 PASSWORD 'foo';
 +ALTER ROLE  regress_passwd4 PASSWORD NEON_PASSWORD_PLACEHOLDER;
 -- already encrypted with MD5, use as it is
- CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023';
+-CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023';
+-- Neon does not support encrypted passwords, use unencrypted instead
+CREATE ROLE regress_passwd5 PASSWORD NEON_PASSWORD_PLACEHOLDER;
 
+--- This looks like a valid SCRAM-SHA-256 secret, but it is not
+--- so it should be hashed with SCRAM-SHA-256.
+-CREATE ROLE regress_passwd6 PASSWORD 'SCRAM-SHA-256$1234';
+--- These may look like valid MD5 secrets, but they are not, so they
+--- should be hashed with SCRAM-SHA-256.
+--- trailing garbage at the end
+-CREATE ROLE regress_passwd7 PASSWORD 'md5012345678901234567890123456789zz';
+--- invalid length
+-CREATE ROLE regress_passwd8 PASSWORD 'md501234567890123456789012345678901zz';
+-- Neon does not support encrypted passwords, use unencrypted instead
+CREATE ROLE regress_passwd6 PASSWORD NEON_PASSWORD_PLACEHOLDER;
+CREATE ROLE regress_passwd7 PASSWORD NEON_PASSWORD_PLACEHOLDER;
+CREATE ROLE regress_passwd8 PASSWORD NEON_PASSWORD_PLACEHOLDER;
+ 
+ -- Changing the SCRAM iteration count
+ SET scram_iterations = 1024;
+@@ -80,13 +71,10 @@ ALTER ROLE regress_passwd_empty PASSWORD 'md585939a5ce845f1a1b620742e3c659e0a';
+ ALTER ROLE regress_passwd_empty PASSWORD 'SCRAM-SHA-256$4096:hpFyHTUsSWcR7O9P$LgZFIt6Oqdo27ZFKbZ2nV+vtnYM995pDh9ca6WSi120=:qVV5NeluNfUPkwm7Vqat25RjSPLkGeoZBQs6wVv+um4=';
+ SELECT rolpassword FROM pg_authid WHERE rolname='regress_passwd_empty';
+ 
+--- Test with invalid stored and server keys.
+---
+--- The first is valid, to act as a control. The others have too long
+--- stored/server keys. They will be re-hashed.
+-CREATE ROLE regress_passwd_sha_len0 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI=';
+-CREATE ROLE regress_passwd_sha_len1 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96RqwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI=';
+-CREATE ROLE regress_passwd_sha_len2 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=';
+-- Neon does not support encrypted passwords, use unencrypted instead
+CREATE ROLE regress_passwd_sha_len0 PASSWORD NEON_PASSWORD_PLACEHOLDER;
+CREATE ROLE regress_passwd_sha_len1 PASSWORD NEON_PASSWORD_PLACEHOLDER;
+CREATE ROLE regress_passwd_sha_len2 PASSWORD NEON_PASSWORD_PLACEHOLDER;
+ 
+ -- Check that the invalid secrets were re-hashed. A re-hashed secret
+ -- should not contain the original salt.
 diff --git a/src/test/regress/sql/privileges.sql b/src/test/regress/sql/privileges.sql
 index 5880bc018d..27aa952b18 100644
 --- a/src/test/regress/sql/privileges.sql
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -7,7 +7,7 @@ license.workspace = true
 [features]
 default = []
 # Enables test specific features.
-testing = []
+testing = ["fail/failpoints"]

 [dependencies]
 base64.workspace = true
@@ -15,13 +15,15 @@ aws-config.workspace = true
 aws-sdk-s3.workspace = true
 aws-sdk-kms.workspace = true
 anyhow.workspace = true
+axum = { workspace = true, features = [] }
 camino.workspace = true
 chrono.workspace = true
 cfg-if.workspace = true
 clap.workspace = true
+fail.workspace = true
 flate2.workspace = true
 futures.workspace = true
-hyper0 = { workspace = true, features = ["full"] }
+http.workspace = true
 metrics.workspace = true
 nix.workspace = true
 notify.workspace = true
@@ -36,6 +38,8 @@ serde_with.workspace = true
 serde_json.workspace = true
 signal-hook.workspace = true
 tar.workspace = true
+tower.workspace = true
+tower-http.workspace = true
 reqwest = { workspace = true, features = ["json"] }
 tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
 tokio-postgres.workspace = true
@@ -47,6 +51,7 @@ tracing-subscriber.workspace = true
 tracing-utils.workspace = true
 thiserror.workspace = true
 url.workspace = true
+uuid.workspace = true
 prometheus.workspace = true

 postgres_initdb.workspace = true
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -60,19 +60,22 @@ use compute_tools::compute::{
 };
 use compute_tools::configurator::launch_configurator;
 use compute_tools::extension_server::get_pg_version_string;
-use compute_tools::http::api::launch_http_server;
+use compute_tools::http::launch_http_server;
 use compute_tools::logger::*;
 use compute_tools::monitor::launch_monitor;
 use compute_tools::params::*;
 use compute_tools::spec::*;
 use compute_tools::swap::resize_swap;
 use rlimit::{setrlimit, Resource};
+use utils::failpoint_support;

 // this is an arbitrary build tag. Fine as a default / for testing purposes
 // in-case of not-set environment var
 const BUILD_TAG_DEFAULT: &str = "latest";

 fn main() -> Result<()> {
+    let scenario = failpoint_support::init();
+
    let (build_tag, clap_args) = init()?;

    // enable core dumping for all child processes
@@ -100,17 +103,14 @@ fn main() -> Result<()> {

    maybe_delay_exit(delay_exit);

+    scenario.teardown();
+
    deinit_and_exit(wait_pg_result);
 }

 fn init() -> Result<(String, clap::ArgMatches)> {
    init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;

-    opentelemetry::global::set_error_handler(|err| {
-        tracing::info!("OpenTelemetry error: {err}");
-    })
-    .expect("global error handler lock poisoned");
-
    let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;
    thread::spawn(move || {
        for sig in signals.forever() {
@@ -246,47 +246,48 @@ fn try_spec_from_cli(
    let compute_id = matches.get_one::<String>("compute-id");
    let control_plane_uri = matches.get_one::<String>("control-plane-uri");

-    let spec;
-    let mut live_config_allowed = false;
-    match spec_json {
-        // First, try to get cluster spec from the cli argument
-        Some(json) => {
-            info!("got spec from cli argument {}", json);
-            spec = Some(serde_json::from_str(json)?);
-        }
-        None => {
-            // Second, try to read it from the file if path is provided
-            if let Some(sp) = spec_path {
-                let path = Path::new(sp);
-                let file = File::open(path)?;
-                spec = Some(serde_json::from_reader(file)?);
-                live_config_allowed = true;
-            } else if let Some(id) = compute_id {
-                if let Some(cp_base) = control_plane_uri {
-                    live_config_allowed = true;
-                    spec = match get_spec_from_control_plane(cp_base, id) {
-                        Ok(s) => s,
-                        Err(e) => {
-                            error!("cannot get response from control plane: {}", e);
-                            panic!("neither spec nor confirmation that compute is in the Empty state was received");
-                        }
-                    };
-                } else {
-                    panic!("must specify both --control-plane-uri and --compute-id or none");
-                }
-            } else {
-                panic!(
-                    "compute spec should be provided by one of the following ways: \
-                    --spec OR --spec-path OR --control-plane-uri and --compute-id"
-                );
-            }
-        }
+    // First, try to get cluster spec from the cli argument
+    if let Some(spec_json) = spec_json {
+        info!("got spec from cli argument {}", spec_json);
+        return Ok(CliSpecParams {
+            spec: Some(serde_json::from_str(spec_json)?),
+            live_config_allowed: false,
+        });
+    }
+
+    // Second, try to read it from the file if path is provided
+    if let Some(spec_path) = spec_path {
+        let file = File::open(Path::new(spec_path))?;
+        return Ok(CliSpecParams {
+            spec: Some(serde_json::from_reader(file)?),
+            live_config_allowed: true,
+        });
+    }
+
+    let Some(compute_id) = compute_id else {
+        panic!(
+            "compute spec should be provided by one of the following ways: \
+                --spec OR --spec-path OR --control-plane-uri and --compute-id"
+        );
+    };
+    let Some(control_plane_uri) = control_plane_uri else {
+        panic!("must specify both --control-plane-uri and --compute-id or none");
    };

-    Ok(CliSpecParams {
-        spec,
-        live_config_allowed,
-    })
+    match get_spec_from_control_plane(control_plane_uri, compute_id) {
+        Ok(spec) => Ok(CliSpecParams {
+            spec,
+            live_config_allowed: true,
+        }),
+        Err(e) => {
+            error!(
+                "cannot get response from control plane: {}\n\
+                neither spec nor confirmation that compute is in the Empty state was received",
+                e
+            );
+            Err(e)
+        }
+    }
 }

 struct CliSpecParams {
@@ -418,9 +419,14 @@ fn start_postgres(
        "running compute with features: {:?}",
        state.pspec.as_ref().unwrap().spec.features
    );
-    // before we release the mutex, fetch the swap size (if any) for later.
-    let swap_size_bytes = state.pspec.as_ref().unwrap().spec.swap_size_bytes;
-    let disk_quota_bytes = state.pspec.as_ref().unwrap().spec.disk_quota_bytes;
+    // before we release the mutex, fetch some parameters for later.
+    let &ComputeSpec {
+        swap_size_bytes,
+        disk_quota_bytes,
+        #[cfg(target_os = "linux")]
+        disable_lfc_resizing,
+        ..
+    } = &state.pspec.as_ref().unwrap().spec;
    drop(state);

    // Launch remaining service threads
@@ -482,7 +488,10 @@ fn start_postgres(
    let mut pg = None;
    if !prestartup_failed {
        pg = match compute.start_compute() {
-            Ok(pg) => Some(pg),
+            Ok(pg) => {
+                info!(postmaster_pid = %pg.0.id(), "Postgres was started");
+                Some(pg)
+            }
            Err(err) => {
                error!("could not start the compute node: {:#}", err);
                compute.set_failed_status(err);
@@ -525,11 +534,18 @@ fn start_postgres(
            // This token is used internally by the monitor to clean up all threads
            let token = CancellationToken::new();

+            // don't pass postgres connection string to vm-monitor if we don't want it to resize LFC
+            let pgconnstr = if disable_lfc_resizing.unwrap_or(false) {
+                None
+            } else {
+                file_cache_connstr.cloned()
+            };
+
            let vm_monitor = rt.as_ref().map(|rt| {
                rt.spawn(vm_monitor::start(
                    Box::leak(Box::new(vm_monitor::Args {
                        cgroup: cgroup.cloned(),
-                        pgconnstr: file_cache_connstr.cloned(),
+                        pgconnstr,
                        addr: vm_monitor_addr.clone(),
                    })),
                    token.clone(),
@@ -573,6 +589,8 @@ fn wait_postgres(pg: Option<PostgresHandle>) -> Result<WaitPostgresResult> {
    // propagate to Postgres and it will be shut down as well.
    let mut exit_code = None;
    if let Some((mut pg, logs_handle)) = pg {
+        info!(postmaster_pid = %pg.id(), "Waiting for Postgres to exit");
+
        let ecode = pg
            .wait()
            .expect("failed to start waiting on Postgres process");
--- a/compute_tools/src/bin/fast_import.rs
+++ b/compute_tools/src/bin/fast_import.rs
@@ -17,7 +17,7 @@
 //!
 //! # Local Testing
 //!
-//! - Comment out most of the pgxns in The Dockerfile.compute-tools to speed up the build.
+//! - Comment out most of the pgxns in compute-node.Dockerfile to speed up the build.
 //! - Build the image with the following command:
 //!
 //! ```bash
@@ -31,22 +31,29 @@ use camino::{Utf8Path, Utf8PathBuf};
 use clap::Parser;
 use compute_tools::extension_server::{get_pg_version, PostgresMajorVersion};
 use nix::unistd::Pid;
-use tracing::{info, info_span, warn, Instrument};
+use tracing::{error, info, info_span, warn, Instrument};
 use utils::fs_ext::is_directory_empty;

+#[path = "fast_import/aws_s3_sync.rs"]
+mod aws_s3_sync;
 #[path = "fast_import/child_stdio_to_log.rs"]
 mod child_stdio_to_log;
 #[path = "fast_import/s3_uri.rs"]
 mod s3_uri;
-#[path = "fast_import/s5cmd.rs"]
-mod s5cmd;
+
+const PG_WAIT_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(600);
+const PG_WAIT_RETRY_INTERVAL: std::time::Duration = std::time::Duration::from_millis(300);

 #[derive(clap::Parser)]
 struct Args {
    #[clap(long)]
    working_directory: Utf8PathBuf,
    #[clap(long, env = "NEON_IMPORTER_S3_PREFIX")]
-    s3_prefix: s3_uri::S3Uri,
+    s3_prefix: Option<s3_uri::S3Uri>,
+    #[clap(long)]
+    source_connection_string: Option<String>,
+    #[clap(short, long)]
+    interactive: bool,
    #[clap(long)]
    pg_bin_dir: Utf8PathBuf,
    #[clap(long)]
@@ -77,30 +84,70 @@ pub(crate) async fn main() -> anyhow::Result<()> {

    info!("starting");

-    let Args {
-        working_directory,
-        s3_prefix,
-        pg_bin_dir,
-        pg_lib_dir,
-    } = Args::parse();
+    let args = Args::parse();

-    let aws_config = aws_config::load_defaults(BehaviorVersion::v2024_03_28()).await;
+    // Validate arguments
+    if args.s3_prefix.is_none() && args.source_connection_string.is_none() {
+        anyhow::bail!("either s3_prefix or source_connection_string must be specified");
+    }
+    if args.s3_prefix.is_some() && args.source_connection_string.is_some() {
+        anyhow::bail!("only one of s3_prefix or source_connection_string can be specified");
+    }

-    let spec: Spec = {
-        let spec_key = s3_prefix.append("/spec.json");
-        let s3_client = aws_sdk_s3::Client::new(&aws_config);
-        let object = s3_client
-            .get_object()
-            .bucket(&spec_key.bucket)
-            .key(spec_key.key)
-            .send()
-            .await
-            .context("get spec from s3")?
-            .body
-            .collect()
-            .await
-            .context("download spec body")?;
-        serde_json::from_slice(&object.into_bytes()).context("parse spec as json")?
+    let working_directory = args.working_directory;
+    let pg_bin_dir = args.pg_bin_dir;
+    let pg_lib_dir = args.pg_lib_dir;
+
+    // Initialize AWS clients only if s3_prefix is specified
+    let (aws_config, kms_client) = if args.s3_prefix.is_some() {
+        let config = aws_config::load_defaults(BehaviorVersion::v2024_03_28()).await;
+        let kms = aws_sdk_kms::Client::new(&config);
+        (Some(config), Some(kms))
+    } else {
+        (None, None)
+    };
+
+    // Get source connection string either from S3 spec or direct argument
+    let source_connection_string = if let Some(s3_prefix) = &args.s3_prefix {
+        let spec: Spec = {
+            let spec_key = s3_prefix.append("/spec.json");
+            let s3_client = aws_sdk_s3::Client::new(aws_config.as_ref().unwrap());
+            let object = s3_client
+                .get_object()
+                .bucket(&spec_key.bucket)
+                .key(spec_key.key)
+                .send()
+                .await
+                .context("get spec from s3")?
+                .body
+                .collect()
+                .await
+                .context("download spec body")?;
+            serde_json::from_slice(&object.into_bytes()).context("parse spec as json")?
+        };
+
+        match spec.encryption_secret {
+            EncryptionSecret::KMS { key_id } => {
+                let mut output = kms_client
+                    .unwrap()
+                    .decrypt()
+                    .key_id(key_id)
+                    .ciphertext_blob(aws_sdk_s3::primitives::Blob::new(
+                        spec.source_connstring_ciphertext_base64,
+                    ))
+                    .send()
+                    .await
+                    .context("decrypt source connection string")?;
+                let plaintext = output
+                    .plaintext
+                    .take()
+                    .context("get plaintext source connection string")?;
+                String::from_utf8(plaintext.into_inner())
+                    .context("parse source connection string as utf8")?
+            }
+        }
+    } else {
+        args.source_connection_string.unwrap()
    };

    match tokio::fs::create_dir(&working_directory).await {
@@ -123,15 +170,6 @@ pub(crate) async fn main() -> anyhow::Result<()> {
        .await
        .context("create pgdata directory")?;

-    //
-    // Setup clients
-    //
-    let aws_config = aws_config::load_defaults(BehaviorVersion::v2024_03_28()).await;
-    let kms_client = aws_sdk_kms::Client::new(&aws_config);
-
-    //
-    //  Initialize pgdata
-    //
    let pgbin = pg_bin_dir.join("postgres");
    let pg_version = match get_pg_version(pgbin.as_ref()) {
        PostgresMajorVersion::V14 => 14,
@@ -170,7 +208,13 @@ pub(crate) async fn main() -> anyhow::Result<()> {
        .args(["-c", &format!("max_parallel_workers={nproc}")])
        .args(["-c", &format!("max_parallel_workers_per_gather={nproc}")])
        .args(["-c", &format!("max_worker_processes={nproc}")])
-        .args(["-c", "effective_io_concurrency=100"])
+        .args([
+            "-c",
+            &format!(
+                "effective_io_concurrency={}",
+                if cfg!(target_os = "macos") { 0 } else { 100 }
+            ),
+        ])
        .env_clear()
        .stdout(std::process::Stdio::piped())
        .stderr(std::process::Stdio::piped())
@@ -185,44 +229,58 @@ pub(crate) async fn main() -> anyhow::Result<()> {
        )
        .instrument(info_span!("postgres")),
    );
+
+    // Create neondb database in the running postgres
    let restore_pg_connstring =
        format!("host=localhost port=5432 user={superuser} dbname=postgres");
+
+    let start_time = std::time::Instant::now();
+
    loop {
-        let res = tokio_postgres::connect(&restore_pg_connstring, tokio_postgres::NoTls).await;
-        if res.is_ok() {
-            info!("postgres is ready, could connect to it");
-            break;
+        if start_time.elapsed() > PG_WAIT_TIMEOUT {
+            error!(
+                "timeout exceeded: failed to poll postgres and create database within 10 minutes"
+            );
+            std::process::exit(1);
+        }
+
+        match tokio_postgres::connect(&restore_pg_connstring, tokio_postgres::NoTls).await {
+            Ok((client, connection)) => {
+                // Spawn the connection handling task to maintain the connection
+                tokio::spawn(async move {
+                    if let Err(e) = connection.await {
+                        warn!("connection error: {}", e);
+                    }
+                });
+
+                match client.simple_query("CREATE DATABASE neondb;").await {
+                    Ok(_) => {
+                        info!("created neondb database");
+                        break;
+                    }
+                    Err(e) => {
+                        warn!(
+                            "failed to create database: {}, retying in {}s",
+                            e,
+                            PG_WAIT_RETRY_INTERVAL.as_secs_f32()
+                        );
+                        tokio::time::sleep(PG_WAIT_RETRY_INTERVAL).await;
+                        continue;
+                    }
+                }
+            }
+            Err(_) => {
+                info!(
+                    "postgres not ready yet, retrying in {}s",
+                    PG_WAIT_RETRY_INTERVAL.as_secs_f32()
+                );
+                tokio::time::sleep(PG_WAIT_RETRY_INTERVAL).await;
+                continue;
+            }
        }
    }

-    //
-    // Decrypt connection string
-    //
-    let source_connection_string = {
-        match spec.encryption_secret {
-            EncryptionSecret::KMS { key_id } => {
-                let mut output = kms_client
-                    .decrypt()
-                    .key_id(key_id)
-                    .ciphertext_blob(aws_sdk_s3::primitives::Blob::new(
-                        spec.source_connstring_ciphertext_base64,
-                    ))
-                    .send()
-                    .await
-                    .context("decrypt source connection string")?;
-                let plaintext = output
-                    .plaintext
-                    .take()
-                    .context("get plaintext source connection string")?;
-                String::from_utf8(plaintext.into_inner())
-                    .context("parse source connection string as utf8")?
-            }
-        }
-    };
-
-    //
-    // Start the work
-    //
+    let restore_pg_connstring = restore_pg_connstring.replace("dbname=postgres", "dbname=neondb");

    let dumpdir = working_directory.join("dumpdir");

@@ -310,6 +368,12 @@ pub(crate) async fn main() -> anyhow::Result<()> {
        }
    }

+    // If interactive mode, wait for Ctrl+C
+    if args.interactive {
+        info!("Running in interactive mode. Press Ctrl+C to shut down.");
+        tokio::signal::ctrl_c().await.context("wait for ctrl-c")?;
+    }
+
    info!("shutdown postgres");
    {
        nix::sys::signal::kill(
@@ -325,21 +389,24 @@ pub(crate) async fn main() -> anyhow::Result<()> {
            .context("wait for postgres to shut down")?;
    }

-    info!("upload pgdata");
-    s5cmd::sync(Utf8Path::new(&pgdata_dir), &s3_prefix.append("/"))
-        .await
-        .context("sync dump directory to destination")?;
-
-    info!("write status");
-    {
-        let status_dir = working_directory.join("status");
-        std::fs::create_dir(&status_dir).context("create status directory")?;
-        let status_file = status_dir.join("status");
-        std::fs::write(&status_file, serde_json::json!({"done": true}).to_string())
-            .context("write status file")?;
-        s5cmd::sync(&status_file, &s3_prefix.append("/status/pgdata"))
+    // Only sync if s3_prefix was specified
+    if let Some(s3_prefix) = args.s3_prefix {
+        info!("upload pgdata");
+        aws_s3_sync::sync(Utf8Path::new(&pgdata_dir), &s3_prefix.append("/pgdata/"))
            .await
-            .context("sync status directory to destination")?;
+            .context("sync dump directory to destination")?;
+
+        info!("write status");
+        {
+            let status_dir = working_directory.join("status");
+            std::fs::create_dir(&status_dir).context("create status directory")?;
+            let status_file = status_dir.join("pgdata");
+            std::fs::write(&status_file, serde_json::json!({"done": true}).to_string())
+                .context("write status file")?;
+            aws_s3_sync::sync(&status_dir, &s3_prefix.append("/status/"))
+                .await
+                .context("sync status directory to destination")?;
+        }
    }

    Ok(())
--- a/compute_tools/src/bin/fast_import/aws_s3_sync.rs
+++ b/compute_tools/src/bin/fast_import/aws_s3_sync.rs
@@ -4,24 +4,21 @@ use camino::Utf8Path;
 use super::s3_uri::S3Uri;

 pub(crate) async fn sync(local: &Utf8Path, remote: &S3Uri) -> anyhow::Result<()> {
-    let mut builder = tokio::process::Command::new("s5cmd");
-    // s5cmd uses aws-sdk-go v1, hence doesn't support AWS_ENDPOINT_URL
-    if let Some(val) = std::env::var_os("AWS_ENDPOINT_URL") {
-        builder.arg("--endpoint-url").arg(val);
-    }
+    let mut builder = tokio::process::Command::new("aws");
    builder
+        .arg("s3")
        .arg("sync")
        .arg(local.as_str())
        .arg(remote.to_string());
    let st = builder
        .spawn()
-        .context("spawn s5cmd")?
+        .context("spawn aws s3 sync")?
        .wait()
        .await
-        .context("wait for s5cmd")?;
+        .context("wait for aws s3 sync")?;
    if st.success() {
        Ok(())
    } else {
-        Err(anyhow::anyhow!("s5cmd failed"))
+        Err(anyhow::anyhow!("aws s3 sync failed"))
    }
 }
--- a/compute_tools/src/catalog.rs
+++ b/compute_tools/src/catalog.rs
@@ -36,11 +36,11 @@ pub async fn get_dbs_and_roles(compute: &Arc<ComputeNode>) -> anyhow::Result<Cat

 #[derive(Debug, thiserror::Error)]
 pub enum SchemaDumpError {
-    #[error("Database does not exist.")]
+    #[error("database does not exist")]
    DatabaseDoesNotExist,
-    #[error("Failed to execute pg_dump.")]
+    #[error("failed to execute pg_dump")]
    IO(#[from] std::io::Error),
-    #[error("Unexpected error.")]
+    #[error("unexpected I/O error")]
    Unexpected,
 }

--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -15,7 +15,7 @@ use std::time::Instant;

 use anyhow::{Context, Result};
 use chrono::{DateTime, Utc};
-use compute_api::spec::{PgIdent, Role};
+use compute_api::spec::{Database, PgIdent, Role};
 use futures::future::join_all;
 use futures::stream::FuturesUnordered;
 use futures::StreamExt;
@@ -45,8 +45,10 @@ use crate::spec_apply::ApplySpecPhase::{
    DropInvalidDatabases, DropRoles, HandleNeonExtension, HandleOtherExtensions,
    RenameAndDeleteDatabases, RenameRoles, RunInEachDatabase,
 };
+use crate::spec_apply::PerDatabasePhase;
 use crate::spec_apply::PerDatabasePhase::{
-    ChangeSchemaPerms, DeleteDBRoleReferences, HandleAnonExtension,
+    ChangeSchemaPerms, DeleteDBRoleReferences, DropSubscriptionsForDeletedDatabases,
+    HandleAnonExtension,
 };
 use crate::spec_apply::{apply_operations, MutableApplyContext, DB};
 use crate::sync_sk::{check_if_synced, ping_safekeeper};
@@ -834,7 +836,7 @@ impl ComputeNode {
        conf
    }

-    async fn get_maintenance_client(
+    pub async fn get_maintenance_client(
        conf: &tokio_postgres::Config,
    ) -> Result<tokio_postgres::Client> {
        let mut conf = conf.clone();
@@ -943,6 +945,78 @@ impl ComputeNode {
                dbs: databases,
            }));

+            // Apply special pre drop database phase.
+            // NOTE: we use the code of RunInEachDatabase phase for parallelism
+            // and connection management, but we don't really run it in *each* database,
+            // only in databases, we're about to drop.
+            info!("Applying PerDatabase (pre-dropdb) phase");
+            let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency));
+
+            // Run the phase for each database that we're about to drop.
+            let db_processes = spec
+                .delta_operations
+                .iter()
+                .flatten()
+                .filter_map(move |op| {
+                    if op.action.as_str() == "delete_db" {
+                        Some(op.name.clone())
+                    } else {
+                        None
+                    }
+                })
+                .map(|dbname| {
+                    let spec = spec.clone();
+                    let ctx = ctx.clone();
+                    let jwks_roles = jwks_roles.clone();
+                    let mut conf = conf.as_ref().clone();
+                    let concurrency_token = concurrency_token.clone();
+                    // We only need dbname field for this phase, so set other fields to dummy values
+                    let db = DB::UserDB(Database {
+                        name: dbname.clone(),
+                        owner: "cloud_admin".to_string(),
+                        options: None,
+                        restrict_conn: false,
+                        invalid: false,
+                    });
+
+                    debug!("Applying per-database phases for Database {:?}", &db);
+
+                    match &db {
+                        DB::SystemDB => {}
+                        DB::UserDB(db) => {
+                            conf.dbname(db.name.as_str());
+                        }
+                    }
+
+                    let conf = Arc::new(conf);
+                    let fut = Self::apply_spec_sql_db(
+                        spec.clone(),
+                        conf,
+                        ctx.clone(),
+                        jwks_roles.clone(),
+                        concurrency_token.clone(),
+                        db,
+                        [DropSubscriptionsForDeletedDatabases].to_vec(),
+                    );
+
+                    Ok(spawn(fut))
+                })
+                .collect::<Vec<Result<_, anyhow::Error>>>();
+
+            for process in db_processes.into_iter() {
+                let handle = process?;
+                if let Err(e) = handle.await? {
+                    // Handle the error case where the database does not exist
+                    // We do not check whether the DB exists or not in the deletion phase,
+                    // so we shouldn't be strict about it in pre-deletion cleanup as well.
+                    if e.to_string().contains("does not exist") {
+                        warn!("Error dropping subscription: {}", e);
+                    } else {
+                        return Err(e);
+                    }
+                };
+            }
+
            for phase in [
                CreateSuperUser,
                DropInvalidDatabases,
@@ -962,7 +1036,7 @@ impl ComputeNode {
                .await?;
            }

-            info!("Applying RunInEachDatabase phase");
+            info!("Applying RunInEachDatabase2 phase");
            let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency));

            let db_processes = spec
@@ -997,6 +1071,12 @@ impl ComputeNode {
                        jwks_roles.clone(),
                        concurrency_token.clone(),
                        db,
+                        [
+                            DeleteDBRoleReferences,
+                            ChangeSchemaPerms,
+                            HandleAnonExtension,
+                        ]
+                        .to_vec(),
                    );

                    Ok(spawn(fut))
@@ -1043,16 +1123,13 @@ impl ComputeNode {
        jwks_roles: Arc<HashSet<String>>,
        concurrency_token: Arc<tokio::sync::Semaphore>,
        db: DB,
+        subphases: Vec<PerDatabasePhase>,
    ) -> Result<()> {
        let _permit = concurrency_token.acquire().await?;

        let mut client_conn = None;

-        for subphase in [
-            DeleteDBRoleReferences,
-            ChangeSchemaPerms,
-            HandleAnonExtension,
-        ] {
+        for subphase in subphases {
            apply_operations(
                spec.clone(),
                ctx.clone(),
@@ -1181,8 +1258,19 @@ impl ComputeNode {
            let mut conf = postgres::config::Config::from(conf);
            conf.application_name("compute_ctl:migrations");

-            let mut client = conf.connect(NoTls)?;
-            handle_migrations(&mut client).context("apply_config handle_migrations")
+            match conf.connect(NoTls) {
+                Ok(mut client) => {
+                    if let Err(e) = handle_migrations(&mut client) {
+                        error!("Failed to run migrations: {}", e);
+                    }
+                }
+                Err(e) => {
+                    error!(
+                        "Failed to connect to the compute for running migrations: {}",
+                        e
+                    );
+                }
+            };
        });

        Ok::<(), anyhow::Error>(())
--- a/compute_tools/src/http/api.rs
+++ b/compute_tools/src/http/api.rs
@@ -1,591 +0,0 @@
-use std::convert::Infallible;
-use std::net::IpAddr;
-use std::net::Ipv6Addr;
-use std::net::SocketAddr;
-use std::sync::Arc;
-use std::thread;
-
-use crate::catalog::SchemaDumpError;
-use crate::catalog::{get_database_schema, get_dbs_and_roles};
-use crate::compute::forward_termination_signal;
-use crate::compute::{ComputeNode, ComputeState, ParsedSpec};
-use crate::installed_extensions;
-use compute_api::requests::{ConfigurationRequest, ExtensionInstallRequest, SetRoleGrantsRequest};
-use compute_api::responses::{
-    ComputeStatus, ComputeStatusResponse, ExtensionInstallResult, GenericAPIError,
-    SetRoleGrantsResponse,
-};
-
-use anyhow::Result;
-use hyper::header::CONTENT_TYPE;
-use hyper::service::{make_service_fn, service_fn};
-use hyper::{Body, Method, Request, Response, Server, StatusCode};
-use metrics::proto::MetricFamily;
-use metrics::Encoder;
-use metrics::TextEncoder;
-use tokio::task;
-use tracing::{debug, error, info, warn};
-use tracing_utils::http::OtelName;
-use utils::http::request::must_get_query_param;
-
-fn status_response_from_state(state: &ComputeState) -> ComputeStatusResponse {
-    ComputeStatusResponse {
-        start_time: state.start_time,
-        tenant: state
-            .pspec
-            .as_ref()
-            .map(|pspec| pspec.tenant_id.to_string()),
-        timeline: state
-            .pspec
-            .as_ref()
-            .map(|pspec| pspec.timeline_id.to_string()),
-        status: state.status,
-        last_active: state.last_active,
-        error: state.error.clone(),
-    }
-}
-
-// Service function to handle all available routes.
-async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body> {
-    //
-    // NOTE: The URI path is currently included in traces. That's OK because
-    // it doesn't contain any variable parts or sensitive information. But
-    // please keep that in mind if you change the routing here.
-    //
-    match (req.method(), req.uri().path()) {
-        // Serialized compute state.
-        (&Method::GET, "/status") => {
-            debug!("serving /status GET request");
-            let state = compute.state.lock().unwrap();
-            let status_response = status_response_from_state(&state);
-            Response::new(Body::from(serde_json::to_string(&status_response).unwrap()))
-        }
-
-        // Startup metrics in JSON format. Keep /metrics reserved for a possible
-        // future use for Prometheus metrics format.
-        (&Method::GET, "/metrics.json") => {
-            info!("serving /metrics.json GET request");
-            let metrics = compute.state.lock().unwrap().metrics.clone();
-            Response::new(Body::from(serde_json::to_string(&metrics).unwrap()))
-        }
-
-        // Prometheus metrics
-        (&Method::GET, "/metrics") => {
-            debug!("serving /metrics GET request");
-
-            // When we call TextEncoder::encode() below, it will immediately
-            // return an error if a metric family has no metrics, so we need to
-            // preemptively filter out metric families with no metrics.
-            let metrics = installed_extensions::collect()
-                .into_iter()
-                .filter(|m| !m.get_metric().is_empty())
-                .collect::<Vec<MetricFamily>>();
-
-            let encoder = TextEncoder::new();
-            let mut buffer = vec![];
-
-            if let Err(err) = encoder.encode(&metrics, &mut buffer) {
-                let msg = format!("error handling /metrics request: {err}");
-                error!(msg);
-                return render_json_error(&msg, StatusCode::INTERNAL_SERVER_ERROR);
-            }
-
-            match Response::builder()
-                .status(StatusCode::OK)
-                .header(CONTENT_TYPE, encoder.format_type())
-                .body(Body::from(buffer))
-            {
-                Ok(response) => response,
-                Err(err) => {
-                    let msg = format!("error handling /metrics request: {err}");
-                    error!(msg);
-                    render_json_error(&msg, StatusCode::INTERNAL_SERVER_ERROR)
-                }
-            }
-        }
-        // Collect Postgres current usage insights
-        (&Method::GET, "/insights") => {
-            info!("serving /insights GET request");
-            let status = compute.get_status();
-            if status != ComputeStatus::Running {
-                let msg = format!("compute is not running, current status: {:?}", status);
-                error!(msg);
-                return Response::new(Body::from(msg));
-            }
-
-            let insights = compute.collect_insights().await;
-            Response::new(Body::from(insights))
-        }
-
-        (&Method::POST, "/check_writability") => {
-            info!("serving /check_writability POST request");
-            let status = compute.get_status();
-            if status != ComputeStatus::Running {
-                let msg = format!(
-                    "invalid compute status for check_writability request: {:?}",
-                    status
-                );
-                error!(msg);
-                return Response::new(Body::from(msg));
-            }
-
-            let res = crate::checker::check_writability(compute).await;
-            match res {
-                Ok(_) => Response::new(Body::from("true")),
-                Err(e) => {
-                    error!("check_writability failed: {}", e);
-                    Response::new(Body::from(e.to_string()))
-                }
-            }
-        }
-
-        (&Method::POST, "/extensions") => {
-            info!("serving /extensions POST request");
-            let status = compute.get_status();
-            if status != ComputeStatus::Running {
-                let msg = format!(
-                    "invalid compute status for extensions request: {:?}",
-                    status
-                );
-                error!(msg);
-                return render_json_error(&msg, StatusCode::PRECONDITION_FAILED);
-            }
-
-            let request = hyper::body::to_bytes(req.into_body()).await.unwrap();
-            let request = serde_json::from_slice::<ExtensionInstallRequest>(&request).unwrap();
-            let res = compute
-                .install_extension(&request.extension, &request.database, request.version)
-                .await;
-            match res {
-                Ok(version) => render_json(Body::from(
-                    serde_json::to_string(&ExtensionInstallResult {
-                        extension: request.extension,
-                        version,
-                    })
-                    .unwrap(),
-                )),
-                Err(e) => {
-                    error!("install_extension failed: {}", e);
-                    render_json_error(&e.to_string(), StatusCode::INTERNAL_SERVER_ERROR)
-                }
-            }
-        }
-
-        (&Method::GET, "/info") => {
-            let num_cpus = num_cpus::get_physical();
-            info!("serving /info GET request. num_cpus: {}", num_cpus);
-            Response::new(Body::from(
-                serde_json::json!({
-                    "num_cpus": num_cpus,
-                })
-                .to_string(),
-            ))
-        }
-
-        // Accept spec in JSON format and request compute configuration. If
-        // anything goes wrong after we set the compute status to `ConfigurationPending`
-        // and update compute state with new spec, we basically leave compute
-        // in the potentially wrong state. That said, it's control-plane's
-        // responsibility to watch compute state after reconfiguration request
-        // and to clean restart in case of errors.
-        (&Method::POST, "/configure") => {
-            info!("serving /configure POST request");
-            match handle_configure_request(req, compute).await {
-                Ok(msg) => Response::new(Body::from(msg)),
-                Err((msg, code)) => {
-                    error!("error handling /configure request: {msg}");
-                    render_json_error(&msg, code)
-                }
-            }
-        }
-
-        (&Method::POST, "/terminate") => {
-            info!("serving /terminate POST request");
-            match handle_terminate_request(compute).await {
-                Ok(()) => Response::new(Body::empty()),
-                Err((msg, code)) => {
-                    error!("error handling /terminate request: {msg}");
-                    render_json_error(&msg, code)
-                }
-            }
-        }
-
-        (&Method::GET, "/dbs_and_roles") => {
-            info!("serving /dbs_and_roles GET request",);
-            match get_dbs_and_roles(compute).await {
-                Ok(res) => render_json(Body::from(serde_json::to_string(&res).unwrap())),
-                Err(_) => {
-                    render_json_error("can't get dbs and roles", StatusCode::INTERNAL_SERVER_ERROR)
-                }
-            }
-        }
-
-        (&Method::GET, "/database_schema") => {
-            let database = match must_get_query_param(&req, "database") {
-                Err(e) => return e.into_response(),
-                Ok(database) => database,
-            };
-            info!("serving /database_schema GET request with database: {database}",);
-            match get_database_schema(compute, &database).await {
-                Ok(res) => render_plain(Body::wrap_stream(res)),
-                Err(SchemaDumpError::DatabaseDoesNotExist) => {
-                    render_json_error("database does not exist", StatusCode::NOT_FOUND)
-                }
-                Err(e) => {
-                    error!("can't get schema dump: {}", e);
-                    render_json_error("can't get schema dump", StatusCode::INTERNAL_SERVER_ERROR)
-                }
-            }
-        }
-
-        (&Method::POST, "/grants") => {
-            info!("serving /grants POST request");
-            let status = compute.get_status();
-            if status != ComputeStatus::Running {
-                let msg = format!(
-                    "invalid compute status for set_role_grants request: {:?}",
-                    status
-                );
-                error!(msg);
-                return render_json_error(&msg, StatusCode::PRECONDITION_FAILED);
-            }
-
-            let request = hyper::body::to_bytes(req.into_body()).await.unwrap();
-            let request = serde_json::from_slice::<SetRoleGrantsRequest>(&request).unwrap();
-
-            let res = compute
-                .set_role_grants(
-                    &request.database,
-                    &request.schema,
-                    &request.privileges,
-                    &request.role,
-                )
-                .await;
-            match res {
-                Ok(()) => render_json(Body::from(
-                    serde_json::to_string(&SetRoleGrantsResponse {
-                        database: request.database,
-                        schema: request.schema,
-                        role: request.role,
-                        privileges: request.privileges,
-                    })
-                    .unwrap(),
-                )),
-                Err(e) => render_json_error(
-                    &format!("could not grant role privileges to the schema: {e}"),
-                    // TODO: can we filter on role/schema not found errors
-                    // and return appropriate error code?
-                    StatusCode::INTERNAL_SERVER_ERROR,
-                ),
-            }
-        }
-
-        // get the list of installed extensions
-        // currently only used in python tests
-        // TODO: call it from cplane
-        (&Method::GET, "/installed_extensions") => {
-            info!("serving /installed_extensions GET request");
-            let status = compute.get_status();
-            if status != ComputeStatus::Running {
-                let msg = format!(
-                    "invalid compute status for extensions request: {:?}",
-                    status
-                );
-                error!(msg);
-                return Response::new(Body::from(msg));
-            }
-
-            let conf = compute.get_conn_conf(None);
-            let res =
-                task::spawn_blocking(move || installed_extensions::get_installed_extensions(conf))
-                    .await
-                    .unwrap();
-
-            match res {
-                Ok(res) => render_json(Body::from(serde_json::to_string(&res).unwrap())),
-                Err(e) => render_json_error(
-                    &format!("could not get list of installed extensions: {}", e),
-                    StatusCode::INTERNAL_SERVER_ERROR,
-                ),
-            }
-        }
-
-        // download extension files from remote extension storage on demand
-        (&Method::POST, route) if route.starts_with("/extension_server/") => {
-            info!("serving {:?} POST request", route);
-            info!("req.uri {:?}", req.uri());
-
-            // don't even try to download extensions
-            // if no remote storage is configured
-            if compute.ext_remote_storage.is_none() {
-                info!("no extensions remote storage configured");
-                let mut resp = Response::new(Body::from("no remote storage configured"));
-                *resp.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
-                return resp;
-            }
-
-            let mut is_library = false;
-            if let Some(params) = req.uri().query() {
-                info!("serving {:?} POST request with params: {}", route, params);
-                if params == "is_library=true" {
-                    is_library = true;
-                } else {
-                    let mut resp = Response::new(Body::from("Wrong request parameters"));
-                    *resp.status_mut() = StatusCode::BAD_REQUEST;
-                    return resp;
-                }
-            }
-            let filename = route.split('/').last().unwrap().to_string();
-            info!("serving /extension_server POST request, filename: {filename:?} is_library: {is_library}");
-
-            // get ext_name and path from spec
-            // don't lock compute_state for too long
-            let ext = {
-                let compute_state = compute.state.lock().unwrap();
-                let pspec = compute_state.pspec.as_ref().expect("spec must be set");
-                let spec = &pspec.spec;
-
-                // debug only
-                info!("spec: {:?}", spec);
-
-                let remote_extensions = match spec.remote_extensions.as_ref() {
-                    Some(r) => r,
-                    None => {
-                        info!("no remote extensions spec was provided");
-                        let mut resp = Response::new(Body::from("no remote storage configured"));
-                        *resp.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
-                        return resp;
-                    }
-                };
-
-                remote_extensions.get_ext(
-                    &filename,
-                    is_library,
-                    &compute.build_tag,
-                    &compute.pgversion,
-                )
-            };
-
-            match ext {
-                Ok((ext_name, ext_path)) => {
-                    match compute.download_extension(ext_name, ext_path).await {
-                        Ok(_) => Response::new(Body::from("OK")),
-                        Err(e) => {
-                            error!("extension download failed: {}", e);
-                            let mut resp = Response::new(Body::from(e.to_string()));
-                            *resp.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
-                            resp
-                        }
-                    }
-                }
-                Err(e) => {
-                    warn!("extension download failed to find extension: {}", e);
-                    let mut resp = Response::new(Body::from("failed to find file"));
-                    *resp.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
-                    resp
-                }
-            }
-        }
-
-        // Return the `404 Not Found` for any other routes.
-        _ => {
-            let mut not_found = Response::new(Body::from("404 Not Found"));
-            *not_found.status_mut() = StatusCode::NOT_FOUND;
-            not_found
-        }
-    }
-}
-
-async fn handle_configure_request(
-    req: Request<Body>,
-    compute: &Arc<ComputeNode>,
-) -> Result<String, (String, StatusCode)> {
-    if !compute.live_config_allowed {
-        return Err((
-            "live configuration is not allowed for this compute node".to_string(),
-            StatusCode::PRECONDITION_FAILED,
-        ));
-    }
-
-    let body_bytes = hyper::body::to_bytes(req.into_body()).await.unwrap();
-    let spec_raw = String::from_utf8(body_bytes.to_vec()).unwrap();
-    if let Ok(request) = serde_json::from_str::<ConfigurationRequest>(&spec_raw) {
-        let spec = request.spec;
-
-        let parsed_spec = match ParsedSpec::try_from(spec) {
-            Ok(ps) => ps,
-            Err(msg) => return Err((msg, StatusCode::BAD_REQUEST)),
-        };
-
-        // XXX: wrap state update under lock in code blocks. Otherwise,
-        // we will try to `Send` `mut state` into the spawned thread
-        // bellow, which will cause error:
-        // ```
-        // error: future cannot be sent between threads safely
-        // ```
-        {
-            let mut state = compute.state.lock().unwrap();
-            if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
-                let msg = format!(
-                    "invalid compute status for configuration request: {:?}",
-                    state.status.clone()
-                );
-                return Err((msg, StatusCode::PRECONDITION_FAILED));
-            }
-            state.pspec = Some(parsed_spec);
-            state.set_status(ComputeStatus::ConfigurationPending, &compute.state_changed);
-            drop(state);
-            info!("set new spec and notified waiters");
-        }
-
-        // Spawn a blocking thread to wait for compute to become Running.
-        // This is needed to do not block the main pool of workers and
-        // be able to serve other requests while some particular request
-        // is waiting for compute to finish configuration.
-        let c = compute.clone();
-        task::spawn_blocking(move || {
-            let mut state = c.state.lock().unwrap();
-            while state.status != ComputeStatus::Running {
-                state = c.state_changed.wait(state).unwrap();
-                info!(
-                    "waiting for compute to become Running, current status: {:?}",
-                    state.status
-                );
-
-                if state.status == ComputeStatus::Failed {
-                    let err = state.error.as_ref().map_or("unknown error", |x| x);
-                    let msg = format!("compute configuration failed: {:?}", err);
-                    return Err((msg, StatusCode::INTERNAL_SERVER_ERROR));
-                }
-            }
-
-            Ok(())
-        })
-        .await
-        .unwrap()?;
-
-        // Return current compute state if everything went well.
-        let state = compute.state.lock().unwrap().clone();
-        let status_response = status_response_from_state(&state);
-        Ok(serde_json::to_string(&status_response).unwrap())
-    } else {
-        Err(("invalid spec".to_string(), StatusCode::BAD_REQUEST))
-    }
-}
-
-fn render_json_error(e: &str, status: StatusCode) -> Response<Body> {
-    let error = GenericAPIError {
-        error: e.to_string(),
-    };
-    Response::builder()
-        .status(status)
-        .header(CONTENT_TYPE, "application/json")
-        .body(Body::from(serde_json::to_string(&error).unwrap()))
-        .unwrap()
-}
-
-fn render_json(body: Body) -> Response<Body> {
-    Response::builder()
-        .header(CONTENT_TYPE, "application/json")
-        .body(body)
-        .unwrap()
-}
-
-fn render_plain(body: Body) -> Response<Body> {
-    Response::builder()
-        .header(CONTENT_TYPE, "text/plain")
-        .body(body)
-        .unwrap()
-}
-
-async fn handle_terminate_request(compute: &Arc<ComputeNode>) -> Result<(), (String, StatusCode)> {
-    {
-        let mut state = compute.state.lock().unwrap();
-        if state.status == ComputeStatus::Terminated {
-            return Ok(());
-        }
-        if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
-            let msg = format!(
-                "invalid compute status for termination request: {}",
-                state.status
-            );
-            return Err((msg, StatusCode::PRECONDITION_FAILED));
-        }
-        state.set_status(ComputeStatus::TerminationPending, &compute.state_changed);
-        drop(state);
-    }
-
-    forward_termination_signal();
-    info!("sent signal and notified waiters");
-
-    // Spawn a blocking thread to wait for compute to become Terminated.
-    // This is needed to do not block the main pool of workers and
-    // be able to serve other requests while some particular request
-    // is waiting for compute to finish configuration.
-    let c = compute.clone();
-    task::spawn_blocking(move || {
-        let mut state = c.state.lock().unwrap();
-        while state.status != ComputeStatus::Terminated {
-            state = c.state_changed.wait(state).unwrap();
-            info!(
-                "waiting for compute to become {}, current status: {:?}",
-                ComputeStatus::Terminated,
-                state.status
-            );
-        }
-
-        Ok(())
-    })
-    .await
-    .unwrap()?;
-    info!("terminated Postgres");
-    Ok(())
-}
-
-// Main Hyper HTTP server function that runs it and blocks waiting on it forever.
-#[tokio::main]
-async fn serve(port: u16, state: Arc<ComputeNode>) {
-    // this usually binds to both IPv4 and IPv6 on linux
-    // see e.g. https://github.com/rust-lang/rust/pull/34440
-    let addr = SocketAddr::new(IpAddr::from(Ipv6Addr::UNSPECIFIED), port);
-
-    let make_service = make_service_fn(move |_conn| {
-        let state = state.clone();
-        async move {
-            Ok::<_, Infallible>(service_fn(move |req: Request<Body>| {
-                let state = state.clone();
-                async move {
-                    Ok::<_, Infallible>(
-                        // NOTE: We include the URI path in the string. It
-                        // doesn't contain any variable parts or sensitive
-                        // information in this API.
-                        tracing_utils::http::tracing_handler(
-                            req,
-                            |req| routes(req, &state),
-                            OtelName::UriPath,
-                        )
-                        .await,
-                    )
-                }
-            }))
-        }
-    });
-
-    info!("starting HTTP server on {}", addr);
-
-    let server = Server::bind(&addr).serve(make_service);
-
-    // Run this server forever
-    if let Err(e) = server.await {
-        error!("server error: {}", e);
-    }
-}
-
-/// Launch a separate Hyper HTTP API server thread and return its `JoinHandle`.
-pub fn launch_http_server(port: u16, state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
-    let state = Arc::clone(state);
-
-    Ok(thread::Builder::new()
-        .name("http-endpoint".into())
-        .spawn(move || serve(port, state))?)
-}
--- a/compute_tools/src/http/extract/json.rs
+++ b/compute_tools/src/http/extract/json.rs
@@ -0,0 +1,48 @@
+use std::ops::{Deref, DerefMut};
+
+use axum::{
+    async_trait,
+    extract::{rejection::JsonRejection, FromRequest, Request},
+};
+use compute_api::responses::GenericAPIError;
+use http::StatusCode;
+
+/// Custom `Json` extractor, so that we can format errors into
+/// `JsonResponse<GenericAPIError>`.
+#[derive(Debug, Clone, Copy, Default)]
+pub(crate) struct Json<T>(pub T);
+
+#[async_trait]
+impl<S, T> FromRequest<S> for Json<T>
+where
+    axum::Json<T>: FromRequest<S, Rejection = JsonRejection>,
+    S: Send + Sync,
+{
+    type Rejection = (StatusCode, axum::Json<GenericAPIError>);
+
+    async fn from_request(req: Request, state: &S) -> Result<Self, Self::Rejection> {
+        match axum::Json::<T>::from_request(req, state).await {
+            Ok(value) => Ok(Self(value.0)),
+            Err(rejection) => Err((
+                rejection.status(),
+                axum::Json(GenericAPIError {
+                    error: rejection.body_text().to_lowercase(),
+                }),
+            )),
+        }
+    }
+}
+
+impl<T> Deref for Json<T> {
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl<T> DerefMut for Json<T> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.0
+    }
+}
--- a/compute_tools/src/http/extract/mod.rs
+++ b/compute_tools/src/http/extract/mod.rs
@@ -0,0 +1,7 @@
+pub(crate) mod json;
+pub(crate) mod path;
+pub(crate) mod query;
+
+pub(crate) use json::Json;
+pub(crate) use path::Path;
+pub(crate) use query::Query;
--- a/compute_tools/src/http/extract/path.rs
+++ b/compute_tools/src/http/extract/path.rs
@@ -0,0 +1,48 @@
+use std::ops::{Deref, DerefMut};
+
+use axum::{
+    async_trait,
+    extract::{rejection::PathRejection, FromRequestParts},
+};
+use compute_api::responses::GenericAPIError;
+use http::{request::Parts, StatusCode};
+
+/// Custom `Path` extractor, so that we can format errors into
+/// `JsonResponse<GenericAPIError>`.
+#[derive(Debug, Clone, Copy, Default)]
+pub(crate) struct Path<T>(pub T);
+
+#[async_trait]
+impl<S, T> FromRequestParts<S> for Path<T>
+where
+    axum::extract::Path<T>: FromRequestParts<S, Rejection = PathRejection>,
+    S: Send + Sync,
+{
+    type Rejection = (StatusCode, axum::Json<GenericAPIError>);
+
+    async fn from_request_parts(parts: &mut Parts, state: &S) -> Result<Self, Self::Rejection> {
+        match axum::extract::Path::<T>::from_request_parts(parts, state).await {
+            Ok(value) => Ok(Self(value.0)),
+            Err(rejection) => Err((
+                rejection.status(),
+                axum::Json(GenericAPIError {
+                    error: rejection.body_text().to_ascii_lowercase(),
+                }),
+            )),
+        }
+    }
+}
+
+impl<T> Deref for Path<T> {
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl<T> DerefMut for Path<T> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.0
+    }
+}
--- a/compute_tools/src/http/extract/query.rs
+++ b/compute_tools/src/http/extract/query.rs
@@ -0,0 +1,48 @@
+use std::ops::{Deref, DerefMut};
+
+use axum::{
+    async_trait,
+    extract::{rejection::QueryRejection, FromRequestParts},
+};
+use compute_api::responses::GenericAPIError;
+use http::{request::Parts, StatusCode};
+
+/// Custom `Query` extractor, so that we can format errors into
+/// `JsonResponse<GenericAPIError>`.
+#[derive(Debug, Clone, Copy, Default)]
+pub(crate) struct Query<T>(pub T);
+
+#[async_trait]
+impl<S, T> FromRequestParts<S> for Query<T>
+where
+    axum::extract::Query<T>: FromRequestParts<S, Rejection = QueryRejection>,
+    S: Send + Sync,
+{
+    type Rejection = (StatusCode, axum::Json<GenericAPIError>);
+
+    async fn from_request_parts(parts: &mut Parts, state: &S) -> Result<Self, Self::Rejection> {
+        match axum::extract::Query::<T>::from_request_parts(parts, state).await {
+            Ok(value) => Ok(Self(value.0)),
+            Err(rejection) => Err((
+                rejection.status(),
+                axum::Json(GenericAPIError {
+                    error: rejection.body_text().to_ascii_lowercase(),
+                }),
+            )),
+        }
+    }
+}
+
+impl<T> Deref for Query<T> {
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl<T> DerefMut for Query<T> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.0
+    }
+}
--- a/compute_tools/src/http/mod.rs
+++ b/compute_tools/src/http/mod.rs
@@ -1 +1,56 @@
-pub mod api;
+use axum::{body::Body, response::Response};
+use compute_api::responses::{ComputeStatus, GenericAPIError};
+use http::{header::CONTENT_TYPE, StatusCode};
+use serde::Serialize;
+use tracing::error;
+
+pub use server::launch_http_server;
+
+mod extract;
+mod routes;
+mod server;
+
+/// Convenience response builder for JSON responses
+struct JsonResponse;
+
+impl JsonResponse {
+    /// Helper for actually creating a response
+    fn create_response(code: StatusCode, body: impl Serialize) -> Response {
+        Response::builder()
+            .status(code)
+            .header(CONTENT_TYPE.as_str(), "application/json")
+            .body(Body::from(serde_json::to_string(&body).unwrap()))
+            .unwrap()
+    }
+
+    /// Create a successful error response
+    pub(self) fn success(code: StatusCode, body: impl Serialize) -> Response {
+        assert!({
+            let code = code.as_u16();
+
+            (200..300).contains(&code)
+        });
+
+        Self::create_response(code, body)
+    }
+
+    /// Create an error response
+    pub(self) fn error(code: StatusCode, error: impl ToString) -> Response {
+        assert!(code.as_u16() >= 400);
+
+        let message = error.to_string();
+        error!(message);
+
+        Self::create_response(code, &GenericAPIError { error: message })
+    }
+
+    /// Create an error response related to the compute being in an invalid state
+    pub(self) fn invalid_status(status: ComputeStatus) -> Response {
+        Self::create_response(
+            StatusCode::PRECONDITION_FAILED,
+            &GenericAPIError {
+                error: format!("invalid compute status: {status}"),
+            },
+        )
+    }
+}
--- a/compute_tools/src/http/openapi_spec.yaml
+++ b/compute_tools/src/http/openapi_spec.yaml
@@ -37,7 +37,7 @@ paths:
              schema:
                $ref: "#/components/schemas/ComputeMetrics"

-  /metrics
+  /metrics:
    get:
      tags:
      - Info
@@ -537,12 +537,14 @@ components:
            properties:
              extname:
                type: string
-              versions:
-                type: array
+              version:
+                type: string
                items:
                  type: string
              n_databases:
                type: integer
+              owned_by_superuser:
+                type: integer

    SetRoleGrantsRequest:
      type: object
--- a/compute_tools/src/http/routes/check_writability.rs
+++ b/compute_tools/src/http/routes/check_writability.rs
@@ -0,0 +1,20 @@
+use std::sync::Arc;
+
+use axum::{extract::State, response::Response};
+use compute_api::responses::ComputeStatus;
+use http::StatusCode;
+
+use crate::{checker::check_writability, compute::ComputeNode, http::JsonResponse};
+
+/// Check that the compute is currently running.
+pub(in crate::http) async fn is_writable(State(compute): State<Arc<ComputeNode>>) -> Response {
+    let status = compute.get_status();
+    if status != ComputeStatus::Running {
+        return JsonResponse::invalid_status(status);
+    }
+
+    match check_writability(&compute).await {
+        Ok(_) => JsonResponse::success(StatusCode::OK, true),
+        Err(e) => JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e),
+    }
+}
--- a/compute_tools/src/http/routes/configure.rs
+++ b/compute_tools/src/http/routes/configure.rs
@@ -0,0 +1,91 @@
+use std::sync::Arc;
+
+use axum::{extract::State, response::Response};
+use compute_api::{
+    requests::ConfigurationRequest,
+    responses::{ComputeStatus, ComputeStatusResponse},
+};
+use http::StatusCode;
+use tokio::task;
+use tracing::info;
+
+use crate::{
+    compute::{ComputeNode, ParsedSpec},
+    http::{extract::Json, JsonResponse},
+};
+
+// Accept spec in JSON format and request compute configuration. If anything
+// goes wrong after we set the compute status to `ConfigurationPending` and
+// update compute state with new spec, we basically leave compute in the
+// potentially wrong state. That said, it's control-plane's responsibility to
+// watch compute state after reconfiguration request and to clean restart in
+// case of errors.
+pub(in crate::http) async fn configure(
+    State(compute): State<Arc<ComputeNode>>,
+    request: Json<ConfigurationRequest>,
+) -> Response {
+    if !compute.live_config_allowed {
+        return JsonResponse::error(
+            StatusCode::PRECONDITION_FAILED,
+            "live configuration is not allowed for this compute node".to_string(),
+        );
+    }
+
+    let pspec = match ParsedSpec::try_from(request.spec.clone()) {
+        Ok(p) => p,
+        Err(e) => return JsonResponse::error(StatusCode::BAD_REQUEST, e),
+    };
+
+    // XXX: wrap state update under lock in a code block. Otherwise, we will try
+    // to `Send` `mut state` into the spawned thread bellow, which will cause
+    // the following rustc error:
+    //
+    // error: future cannot be sent between threads safely
+    {
+        let mut state = compute.state.lock().unwrap();
+        if !matches!(state.status, ComputeStatus::Empty | ComputeStatus::Running) {
+            return JsonResponse::invalid_status(state.status);
+        }
+
+        state.pspec = Some(pspec);
+        state.set_status(ComputeStatus::ConfigurationPending, &compute.state_changed);
+        drop(state);
+    }
+
+    // Spawn a blocking thread to wait for compute to become Running. This is
+    // needed to do not block the main pool of workers and be able to serve
+    // other requests while some particular request is waiting for compute to
+    // finish configuration.
+    let c = compute.clone();
+    let completed = task::spawn_blocking(move || {
+        let mut state = c.state.lock().unwrap();
+        while state.status != ComputeStatus::Running {
+            state = c.state_changed.wait(state).unwrap();
+            info!(
+                "waiting for compute to become {}, current status: {}",
+                ComputeStatus::Running,
+                state.status
+            );
+
+            if state.status == ComputeStatus::Failed {
+                let err = state.error.as_ref().map_or("unknown error", |x| x);
+                let msg = format!("compute configuration failed: {:?}", err);
+                return Err(msg);
+            }
+        }
+
+        Ok(())
+    })
+    .await
+    .unwrap();
+
+    if let Err(e) = completed {
+        return JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e);
+    }
+
+    // Return current compute state if everything went well.
+    let state = compute.state.lock().unwrap().clone();
+    let body = ComputeStatusResponse::from(&state);
+
+    JsonResponse::success(StatusCode::OK, body)
+}
--- a/compute_tools/src/http/routes/database_schema.rs
+++ b/compute_tools/src/http/routes/database_schema.rs
@@ -0,0 +1,34 @@
+use std::sync::Arc;
+
+use axum::{body::Body, extract::State, response::Response};
+use http::{header::CONTENT_TYPE, StatusCode};
+use serde::Deserialize;
+
+use crate::{
+    catalog::{get_database_schema, SchemaDumpError},
+    compute::ComputeNode,
+    http::{extract::Query, JsonResponse},
+};
+
+#[derive(Debug, Clone, Deserialize)]
+pub(in crate::http) struct DatabaseSchemaParams {
+    database: String,
+}
+
+/// Get a schema dump of the requested database.
+pub(in crate::http) async fn get_schema_dump(
+    params: Query<DatabaseSchemaParams>,
+    State(compute): State<Arc<ComputeNode>>,
+) -> Response {
+    match get_database_schema(&compute, &params.database).await {
+        Ok(schema) => Response::builder()
+            .status(StatusCode::OK)
+            .header(CONTENT_TYPE.as_str(), "application/json")
+            .body(Body::from_stream(schema))
+            .unwrap(),
+        Err(SchemaDumpError::DatabaseDoesNotExist) => {
+            JsonResponse::error(StatusCode::NOT_FOUND, SchemaDumpError::DatabaseDoesNotExist)
+        }
+        Err(e) => JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e),
+    }
+}
--- a/compute_tools/src/http/routes/dbs_and_roles.rs
+++ b/compute_tools/src/http/routes/dbs_and_roles.rs
@@ -0,0 +1,16 @@
+use std::sync::Arc;
+
+use axum::{extract::State, response::Response};
+use http::StatusCode;
+
+use crate::{catalog::get_dbs_and_roles, compute::ComputeNode, http::JsonResponse};
+
+/// Get the databases and roles from the compute.
+pub(in crate::http) async fn get_catalog_objects(
+    State(compute): State<Arc<ComputeNode>>,
+) -> Response {
+    match get_dbs_and_roles(&compute).await {
+        Ok(catalog_objects) => JsonResponse::success(StatusCode::OK, catalog_objects),
+        Err(e) => JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e),
+    }
+}
--- a/compute_tools/src/http/routes/extension_server.rs
+++ b/compute_tools/src/http/routes/extension_server.rs
@@ -0,0 +1,68 @@
+use std::sync::Arc;
+
+use axum::{
+    extract::State,
+    response::{IntoResponse, Response},
+};
+use http::StatusCode;
+use serde::Deserialize;
+
+use crate::{
+    compute::ComputeNode,
+    http::{
+        extract::{Path, Query},
+        JsonResponse,
+    },
+};
+
+#[derive(Debug, Clone, Deserialize)]
+pub(in crate::http) struct ExtensionServerParams {
+    #[serde(default)]
+    is_library: bool,
+}
+
+/// Download a remote extension.
+pub(in crate::http) async fn download_extension(
+    Path(filename): Path<String>,
+    params: Query<ExtensionServerParams>,
+    State(compute): State<Arc<ComputeNode>>,
+) -> Response {
+    // Don't even try to download extensions if no remote storage is configured
+    if compute.ext_remote_storage.is_none() {
+        return JsonResponse::error(
+            StatusCode::PRECONDITION_FAILED,
+            "remote storage is not configured",
+        );
+    }
+
+    let ext = {
+        let state = compute.state.lock().unwrap();
+        let pspec = state.pspec.as_ref().unwrap();
+        let spec = &pspec.spec;
+
+        let remote_extensions = match spec.remote_extensions.as_ref() {
+            Some(r) => r,
+            None => {
+                return JsonResponse::error(
+                    StatusCode::CONFLICT,
+                    "information about remote extensions is unavailable",
+                );
+            }
+        };
+
+        remote_extensions.get_ext(
+            &filename,
+            params.is_library,
+            &compute.build_tag,
+            &compute.pgversion,
+        )
+    };
+
+    match ext {
+        Ok((ext_name, ext_path)) => match compute.download_extension(ext_name, ext_path).await {
+            Ok(_) => StatusCode::OK.into_response(),
+            Err(e) => JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e),
+        },
+        Err(e) => JsonResponse::error(StatusCode::NOT_FOUND, e),
+    }
+}
--- a/compute_tools/src/http/routes/extensions.rs
+++ b/compute_tools/src/http/routes/extensions.rs
@@ -0,0 +1,45 @@
+use std::sync::Arc;
+
+use axum::{extract::State, response::Response};
+use compute_api::{
+    requests::ExtensionInstallRequest,
+    responses::{ComputeStatus, ExtensionInstallResponse},
+};
+use http::StatusCode;
+
+use crate::{
+    compute::ComputeNode,
+    http::{extract::Json, JsonResponse},
+};
+
+/// Install a extension.
+pub(in crate::http) async fn install_extension(
+    State(compute): State<Arc<ComputeNode>>,
+    request: Json<ExtensionInstallRequest>,
+) -> Response {
+    let status = compute.get_status();
+    if status != ComputeStatus::Running {
+        return JsonResponse::invalid_status(status);
+    }
+
+    match compute
+        .install_extension(
+            &request.extension,
+            &request.database,
+            request.version.to_string(),
+        )
+        .await
+    {
+        Ok(version) => JsonResponse::success(
+            StatusCode::CREATED,
+            Some(ExtensionInstallResponse {
+                extension: request.extension.clone(),
+                version,
+            }),
+        ),
+        Err(e) => JsonResponse::error(
+            StatusCode::INTERNAL_SERVER_ERROR,
+            format!("failed to install extension: {e}"),
+        ),
+    }
+}
--- a/compute_tools/src/http/routes/failpoints.rs
+++ b/compute_tools/src/http/routes/failpoints.rs
@@ -0,0 +1,35 @@
+use axum::response::{IntoResponse, Response};
+use http::StatusCode;
+use tracing::info;
+use utils::failpoint_support::{apply_failpoint, ConfigureFailpointsRequest};
+
+use crate::http::{extract::Json, JsonResponse};
+
+/// Configure failpoints for testing purposes.
+pub(in crate::http) async fn configure_failpoints(
+    failpoints: Json<ConfigureFailpointsRequest>,
+) -> Response {
+    if !fail::has_failpoints() {
+        return JsonResponse::error(
+            StatusCode::PRECONDITION_FAILED,
+            "Cannot manage failpoints because neon was compiled without failpoints support",
+        );
+    }
+
+    for fp in &*failpoints {
+        info!("cfg failpoint: {} {}", fp.name, fp.actions);
+
+        // We recognize one extra "action" that's not natively recognized
+        // by the failpoints crate: exit, to immediately kill the process
+        let cfg_result = apply_failpoint(&fp.name, &fp.actions);
+
+        if let Err(e) = cfg_result {
+            return JsonResponse::error(
+                StatusCode::BAD_REQUEST,
+                format!("failed to configure failpoints: {e}"),
+            );
+        }
+    }
+
+    StatusCode::OK.into_response()
+}
--- a/compute_tools/src/http/routes/grants.rs
+++ b/compute_tools/src/http/routes/grants.rs
@@ -0,0 +1,48 @@
+use std::sync::Arc;
+
+use axum::{extract::State, response::Response};
+use compute_api::{
+    requests::SetRoleGrantsRequest,
+    responses::{ComputeStatus, SetRoleGrantsResponse},
+};
+use http::StatusCode;
+
+use crate::{
+    compute::ComputeNode,
+    http::{extract::Json, JsonResponse},
+};
+
+/// Add grants for a role.
+pub(in crate::http) async fn add_grant(
+    State(compute): State<Arc<ComputeNode>>,
+    request: Json<SetRoleGrantsRequest>,
+) -> Response {
+    let status = compute.get_status();
+    if status != ComputeStatus::Running {
+        return JsonResponse::invalid_status(status);
+    }
+
+    match compute
+        .set_role_grants(
+            &request.database,
+            &request.schema,
+            &request.privileges,
+            &request.role,
+        )
+        .await
+    {
+        Ok(()) => JsonResponse::success(
+            StatusCode::CREATED,
+            Some(SetRoleGrantsResponse {
+                database: request.database.clone(),
+                schema: request.schema.clone(),
+                role: request.role.clone(),
+                privileges: request.privileges.clone(),
+            }),
+        ),
+        Err(e) => JsonResponse::error(
+            StatusCode::INTERNAL_SERVER_ERROR,
+            format!("failed to grant role privileges to the schema: {e}"),
+        ),
+    }
+}
--- a/compute_tools/src/http/routes/info.rs
+++ b/compute_tools/src/http/routes/info.rs
@@ -0,0 +1,11 @@
+use axum::response::Response;
+use compute_api::responses::InfoResponse;
+use http::StatusCode;
+
+use crate::http::JsonResponse;
+
+/// Get information about the physical characteristics about the compute.
+pub(in crate::http) async fn get_info() -> Response {
+    let num_cpus = num_cpus::get_physical();
+    JsonResponse::success(StatusCode::OK, &InfoResponse { num_cpus })
+}
--- a/compute_tools/src/http/routes/insights.rs
+++ b/compute_tools/src/http/routes/insights.rs
@@ -0,0 +1,18 @@
+use std::sync::Arc;
+
+use axum::{extract::State, response::Response};
+use compute_api::responses::ComputeStatus;
+use http::StatusCode;
+
+use crate::{compute::ComputeNode, http::JsonResponse};
+
+/// Collect current Postgres usage insights.
+pub(in crate::http) async fn get_insights(State(compute): State<Arc<ComputeNode>>) -> Response {
+    let status = compute.get_status();
+    if status != ComputeStatus::Running {
+        return JsonResponse::invalid_status(status);
+    }
+
+    let insights = compute.collect_insights().await;
+    JsonResponse::success(StatusCode::OK, insights)
+}
--- a/compute_tools/src/http/routes/installed_extensions.rs
+++ b/compute_tools/src/http/routes/installed_extensions.rs
@@ -0,0 +1,33 @@
+use std::sync::Arc;
+
+use axum::{extract::State, response::Response};
+use compute_api::responses::ComputeStatus;
+use http::StatusCode;
+use tokio::task;
+
+use crate::{compute::ComputeNode, http::JsonResponse, installed_extensions};
+
+/// Get a list of installed extensions.
+pub(in crate::http) async fn get_installed_extensions(
+    State(compute): State<Arc<ComputeNode>>,
+) -> Response {
+    let status = compute.get_status();
+    if status != ComputeStatus::Running {
+        return JsonResponse::invalid_status(status);
+    }
+
+    let conf = compute.get_conn_conf(None);
+    let res = task::spawn_blocking(move || installed_extensions::get_installed_extensions(conf))
+        .await
+        .unwrap();
+
+    match res {
+        Ok(installed_extensions) => {
+            JsonResponse::success(StatusCode::OK, Some(installed_extensions))
+        }
+        Err(e) => JsonResponse::error(
+            StatusCode::INTERNAL_SERVER_ERROR,
+            format!("failed to get list of installed extensions: {e}"),
+        ),
+    }
+}
--- a/compute_tools/src/http/routes/metrics.rs
+++ b/compute_tools/src/http/routes/metrics.rs
@@ -0,0 +1,32 @@
+use axum::{body::Body, response::Response};
+use http::header::CONTENT_TYPE;
+use http::StatusCode;
+use metrics::proto::MetricFamily;
+use metrics::Encoder;
+use metrics::TextEncoder;
+
+use crate::{http::JsonResponse, installed_extensions};
+
+/// Expose Prometheus metrics.
+pub(in crate::http) async fn get_metrics() -> Response {
+    // When we call TextEncoder::encode() below, it will immediately return an
+    // error if a metric family has no metrics, so we need to preemptively
+    // filter out metric families with no metrics.
+    let metrics = installed_extensions::collect()
+        .into_iter()
+        .filter(|m| !m.get_metric().is_empty())
+        .collect::<Vec<MetricFamily>>();
+
+    let encoder = TextEncoder::new();
+    let mut buffer = vec![];
+
+    if let Err(e) = encoder.encode(&metrics, &mut buffer) {
+        return JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e);
+    }
+
+    Response::builder()
+        .status(StatusCode::OK)
+        .header(CONTENT_TYPE, encoder.format_type())
+        .body(Body::from(buffer))
+        .unwrap()
+}
--- a/compute_tools/src/http/routes/metrics_json.rs
+++ b/compute_tools/src/http/routes/metrics_json.rs
@@ -0,0 +1,12 @@
+use std::sync::Arc;
+
+use axum::{extract::State, response::Response};
+use http::StatusCode;
+
+use crate::{compute::ComputeNode, http::JsonResponse};
+
+/// Get startup metrics.
+pub(in crate::http) async fn get_metrics(State(compute): State<Arc<ComputeNode>>) -> Response {
+    let metrics = compute.state.lock().unwrap().metrics.clone();
+    JsonResponse::success(StatusCode::OK, metrics)
+}
--- a/compute_tools/src/http/routes/mod.rs
+++ b/compute_tools/src/http/routes/mod.rs
@@ -0,0 +1,38 @@
+use compute_api::responses::ComputeStatusResponse;
+
+use crate::compute::ComputeState;
+
+pub(in crate::http) mod check_writability;
+pub(in crate::http) mod configure;
+pub(in crate::http) mod database_schema;
+pub(in crate::http) mod dbs_and_roles;
+pub(in crate::http) mod extension_server;
+pub(in crate::http) mod extensions;
+pub(in crate::http) mod failpoints;
+pub(in crate::http) mod grants;
+pub(in crate::http) mod info;
+pub(in crate::http) mod insights;
+pub(in crate::http) mod installed_extensions;
+pub(in crate::http) mod metrics;
+pub(in crate::http) mod metrics_json;
+pub(in crate::http) mod status;
+pub(in crate::http) mod terminate;
+
+impl From<&ComputeState> for ComputeStatusResponse {
+    fn from(state: &ComputeState) -> Self {
+        ComputeStatusResponse {
+            start_time: state.start_time,
+            tenant: state
+                .pspec
+                .as_ref()
+                .map(|pspec| pspec.tenant_id.to_string()),
+            timeline: state
+                .pspec
+                .as_ref()
+                .map(|pspec| pspec.timeline_id.to_string()),
+            status: state.status,
+            last_active: state.last_active,
+            error: state.error.clone(),
+        }
+    }
+}
--- a/compute_tools/src/http/routes/status.rs
+++ b/compute_tools/src/http/routes/status.rs
@@ -0,0 +1,14 @@
+use std::{ops::Deref, sync::Arc};
+
+use axum::{extract::State, http::StatusCode, response::Response};
+use compute_api::responses::ComputeStatusResponse;
+
+use crate::{compute::ComputeNode, http::JsonResponse};
+
+/// Retrieve the state of the comute.
+pub(in crate::http) async fn get_status(State(compute): State<Arc<ComputeNode>>) -> Response {
+    let state = compute.state.lock().unwrap();
+    let body = ComputeStatusResponse::from(state.deref());
+
+    JsonResponse::success(StatusCode::OK, body)
+}
--- a/compute_tools/src/http/routes/terminate.rs
+++ b/compute_tools/src/http/routes/terminate.rs
@@ -0,0 +1,58 @@
+use std::sync::Arc;
+
+use axum::{
+    extract::State,
+    response::{IntoResponse, Response},
+};
+use compute_api::responses::ComputeStatus;
+use http::StatusCode;
+use tokio::task;
+use tracing::info;
+
+use crate::{
+    compute::{forward_termination_signal, ComputeNode},
+    http::JsonResponse,
+};
+
+/// Terminate the compute.
+pub(in crate::http) async fn terminate(State(compute): State<Arc<ComputeNode>>) -> Response {
+    {
+        let mut state = compute.state.lock().unwrap();
+        if state.status == ComputeStatus::Terminated {
+            return StatusCode::CREATED.into_response();
+        }
+
+        if !matches!(state.status, ComputeStatus::Empty | ComputeStatus::Running) {
+            return JsonResponse::invalid_status(state.status);
+        }
+
+        state.set_status(ComputeStatus::TerminationPending, &compute.state_changed);
+        drop(state);
+    }
+
+    forward_termination_signal();
+    info!("sent signal and notified waiters");
+
+    // Spawn a blocking thread to wait for compute to become Terminated.
+    // This is needed to do not block the main pool of workers and
+    // be able to serve other requests while some particular request
+    // is waiting for compute to finish configuration.
+    let c = compute.clone();
+    task::spawn_blocking(move || {
+        let mut state = c.state.lock().unwrap();
+        while state.status != ComputeStatus::Terminated {
+            state = c.state_changed.wait(state).unwrap();
+            info!(
+                "waiting for compute to become {}, current status: {:?}",
+                ComputeStatus::Terminated,
+                state.status
+            );
+        }
+    })
+    .await
+    .unwrap();
+
+    info!("terminated Postgres");
+
+    StatusCode::OK.into_response()
+}
--- a/compute_tools/src/http/server.rs
+++ b/compute_tools/src/http/server.rs
@@ -0,0 +1,155 @@
+use std::{
+    net::{IpAddr, Ipv6Addr, SocketAddr},
+    sync::Arc,
+    thread,
+    time::Duration,
+};
+
+use anyhow::Result;
+use axum::{
+    extract::Request,
+    middleware::{self, Next},
+    response::{IntoResponse, Response},
+    routing::{get, post},
+    Router,
+};
+use http::StatusCode;
+use tokio::net::TcpListener;
+use tower::ServiceBuilder;
+use tower_http::{request_id::PropagateRequestIdLayer, trace::TraceLayer};
+use tracing::{debug, error, info, Span};
+use uuid::Uuid;
+
+use super::routes::{
+    check_writability, configure, database_schema, dbs_and_roles, extension_server, extensions,
+    grants, info as info_route, insights, installed_extensions, metrics, metrics_json, status,
+    terminate,
+};
+use crate::compute::ComputeNode;
+
+async fn handle_404() -> Response {
+    StatusCode::NOT_FOUND.into_response()
+}
+
+const X_REQUEST_ID: &str = "x-request-id";
+
+/// This middleware function allows compute_ctl to generate its own request ID
+/// if one isn't supplied. The control plane will always send one as a UUID. The
+/// neon Postgres extension on the other hand does not send one.
+async fn maybe_add_request_id_header(mut request: Request, next: Next) -> Response {
+    let headers = request.headers_mut();
+
+    if headers.get(X_REQUEST_ID).is_none() {
+        headers.append(X_REQUEST_ID, Uuid::new_v4().to_string().parse().unwrap());
+    }
+
+    next.run(request).await
+}
+
+/// Run the HTTP server and wait on it forever.
+#[tokio::main]
+async fn serve(port: u16, compute: Arc<ComputeNode>) {
+    let mut app = Router::new()
+        .route("/check_writability", post(check_writability::is_writable))
+        .route("/configure", post(configure::configure))
+        .route("/database_schema", get(database_schema::get_schema_dump))
+        .route("/dbs_and_roles", get(dbs_and_roles::get_catalog_objects))
+        .route(
+            "/extension_server/*filename",
+            post(extension_server::download_extension),
+        )
+        .route("/extensions", post(extensions::install_extension))
+        .route("/grants", post(grants::add_grant))
+        .route("/info", get(info_route::get_info))
+        .route("/insights", get(insights::get_insights))
+        .route(
+            "/installed_extensions",
+            get(installed_extensions::get_installed_extensions),
+        )
+        .route("/metrics", get(metrics::get_metrics))
+        .route("/metrics.json", get(metrics_json::get_metrics))
+        .route("/status", get(status::get_status))
+        .route("/terminate", post(terminate::terminate))
+        .fallback(handle_404)
+        .layer(
+            ServiceBuilder::new()
+                // Add this middleware since we assume the request ID exists
+                .layer(middleware::from_fn(maybe_add_request_id_header))
+                .layer(
+                    TraceLayer::new_for_http()
+                        .on_request(|request: &http::Request<_>, _span: &Span| {
+                            let request_id = request
+                                .headers()
+                                .get(X_REQUEST_ID)
+                                .unwrap()
+                                .to_str()
+                                .unwrap();
+
+                            match request.uri().path() {
+                                "/metrics" => {
+                                    debug!(%request_id, "{} {}", request.method(), request.uri())
+                                }
+                                _ => info!(%request_id, "{} {}", request.method(), request.uri()),
+                            };
+                        })
+                        .on_response(
+                            |response: &http::Response<_>, latency: Duration, _span: &Span| {
+                                let request_id = response
+                                    .headers()
+                                    .get(X_REQUEST_ID)
+                                    .unwrap()
+                                    .to_str()
+                                    .unwrap();
+
+                                info!(
+                                    %request_id,
+                                    code = response.status().as_u16(),
+                                    latency = latency.as_millis()
+                                )
+                            },
+                        ),
+                )
+                .layer(PropagateRequestIdLayer::x_request_id()),
+        )
+        .with_state(compute);
+
+    // Add in any testing support
+    if cfg!(feature = "testing") {
+        use super::routes::failpoints;
+
+        app = app.route("/failpoints", post(failpoints::configure_failpoints))
+    }
+
+    // This usually binds to both IPv4 and IPv6 on Linux, see
+    // https://github.com/rust-lang/rust/pull/34440 for more information
+    let addr = SocketAddr::new(IpAddr::from(Ipv6Addr::UNSPECIFIED), port);
+    let listener = match TcpListener::bind(&addr).await {
+        Ok(listener) => listener,
+        Err(e) => {
+            error!(
+                "failed to bind the compute_ctl HTTP server to port {}: {}",
+                port, e
+            );
+            return;
+        }
+    };
+
+    if let Ok(local_addr) = listener.local_addr() {
+        info!("compute_ctl HTTP server listening on {}", local_addr);
+    } else {
+        info!("compute_ctl HTTP server listening on port {}", port);
+    }
+
+    if let Err(e) = axum::serve(listener, app).await {
+        error!("compute_ctl HTTP server error: {}", e);
+    }
+}
+
+/// Launch a separate HTTP server thread and return its `JoinHandle`.
+pub fn launch_http_server(port: u16, state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
+    let state = Arc::clone(state);
+
+    Ok(thread::Builder::new()
+        .name("http-server".into())
+        .spawn(move || serve(port, state))?)
+}
--- a/compute_tools/src/installed_extensions.rs
+++ b/compute_tools/src/installed_extensions.rs
@@ -1,7 +1,6 @@
 use compute_api::responses::{InstalledExtension, InstalledExtensions};
 use metrics::proto::MetricFamily;
 use std::collections::HashMap;
-use std::collections::HashSet;

 use anyhow::Result;
 use postgres::{Client, NoTls};
@@ -38,61 +37,77 @@ fn list_dbs(client: &mut Client) -> Result<Vec<String>> {
 /// Connect to every database (see list_dbs above) and get the list of installed extensions.
 ///
 /// Same extension can be installed in multiple databases with different versions,
-/// we only keep the highest and lowest version across all databases.
+/// so we report a separate metric (number of databases where it is installed)
+/// for each extension version.
 pub fn get_installed_extensions(mut conf: postgres::config::Config) -> Result<InstalledExtensions> {
    conf.application_name("compute_ctl:get_installed_extensions");
    let mut client = conf.connect(NoTls)?;
-
    let databases: Vec<String> = list_dbs(&mut client)?;

-    let mut extensions_map: HashMap<String, InstalledExtension> = HashMap::new();
+    let mut extensions_map: HashMap<(String, String, String), InstalledExtension> = HashMap::new();
    for db in databases.iter() {
        conf.dbname(db);
        let mut db_client = conf.connect(NoTls)?;
-        let extensions: Vec<(String, String)> = db_client
+        let extensions: Vec<(String, String, i32)> = db_client
            .query(
-                "SELECT extname, extversion FROM pg_catalog.pg_extension;",
+                "SELECT extname, extversion, extowner::integer FROM pg_catalog.pg_extension",
                &[],
            )?
            .iter()
-            .map(|row| (row.get("extname"), row.get("extversion")))
+            .map(|row| {
+                (
+                    row.get("extname"),
+                    row.get("extversion"),
+                    row.get("extowner"),
+                )
+            })
            .collect();

-        for (extname, v) in extensions.iter() {
+        for (extname, v, extowner) in extensions.iter() {
            let version = v.to_string();

-            // increment the number of databases where the version of extension is installed
-            INSTALLED_EXTENSIONS
-                .with_label_values(&[extname, &version])
-                .inc();
+            // check if the extension is owned by superuser
+            // 10 is the oid of superuser
+            let owned_by_superuser = if *extowner == 10 { "1" } else { "0" };

            extensions_map
-                .entry(extname.to_string())
+                .entry((
+                    extname.to_string(),
+                    version.clone(),
+                    owned_by_superuser.to_string(),
+                ))
                .and_modify(|e| {
-                    e.versions.insert(version.clone());
                    // count the number of databases where the extension is installed
                    e.n_databases += 1;
                })
                .or_insert(InstalledExtension {
                    extname: extname.to_string(),
-                    versions: HashSet::from([version.clone()]),
+                    version: version.clone(),
                    n_databases: 1,
+                    owned_by_superuser: owned_by_superuser.to_string(),
                });
        }
    }

-    let res = InstalledExtensions {
-        extensions: extensions_map.into_values().collect(),
-    };
+    for (key, ext) in extensions_map.iter() {
+        let (extname, version, owned_by_superuser) = key;
+        let n_databases = ext.n_databases as u64;

-    Ok(res)
+        INSTALLED_EXTENSIONS
+            .with_label_values(&[extname, version, owned_by_superuser])
+            .set(n_databases);
+    }
+
+    Ok(InstalledExtensions {
+        extensions: extensions_map.into_values().collect(),
+    })
 }

 static INSTALLED_EXTENSIONS: Lazy<UIntGaugeVec> = Lazy::new(|| {
    register_uint_gauge_vec!(
        "compute_installed_extensions",
        "Number of databases where the version of extension is installed",
-        &["extension_name", "version"]
+        &["extension_name", "version", "owned_by_superuser"]
    )
    .expect("failed to define a metric")
 });
--- a/compute_tools/src/lib.rs
+++ b/compute_tools/src/lib.rs
@@ -3,8 +3,6 @@
 #![deny(unsafe_code)]
 #![deny(clippy::undocumented_unsafe_blocks)]

-extern crate hyper0 as hyper;
-
 pub mod checker;
 pub mod config;
 pub mod configurator;
--- a/compute_tools/src/migration.rs
+++ b/compute_tools/src/migration.rs
@@ -1,13 +1,16 @@
 use anyhow::{Context, Result};
-use postgres::Client;
+use fail::fail_point;
+use postgres::{Client, Transaction};
 use tracing::info;

+/// Runs a series of migrations on a target database
 pub(crate) struct MigrationRunner<'m> {
    client: &'m mut Client,
    migrations: &'m [&'m str],
 }

 impl<'m> MigrationRunner<'m> {
+    /// Create a new migration runner
    pub fn new(client: &'m mut Client, migrations: &'m [&'m str]) -> Self {
        // The neon_migration.migration_id::id column is a bigint, which is equivalent to an i64
        assert!(migrations.len() + 1 < i64::MAX as usize);
@@ -15,87 +18,110 @@ impl<'m> MigrationRunner<'m> {
        Self { client, migrations }
    }

+    /// Get the current value neon_migration.migration_id
    fn get_migration_id(&mut self) -> Result<i64> {
-        let query = "SELECT id FROM neon_migration.migration_id";
        let row = self
            .client
-            .query_one(query, &[])
-            .context("run_migrations get migration_id")?;
+            .query_one("SELECT id FROM neon_migration.migration_id", &[])?;

        Ok(row.get::<&str, i64>("id"))
    }

-    fn update_migration_id(&mut self, migration_id: i64) -> Result<()> {
-        let setval = format!("UPDATE neon_migration.migration_id SET id={}", migration_id);
+    /// Update the neon_migration.migration_id value
+    ///
+    /// This function has a fail point called compute-migration, which can be
+    /// used if you would like to fail the application of a series of migrations
+    /// at some point.
+    fn update_migration_id(txn: &mut Transaction, migration_id: i64) -> Result<()> {
+        // We use this fail point in order to check that failing in the
+        // middle of applying a series of migrations fails in an expected
+        // manner
+        if cfg!(feature = "testing") {
+            let fail = (|| {
+                fail_point!("compute-migration", |fail_migration_id| {
+                    migration_id == fail_migration_id.unwrap().parse::<i64>().unwrap()
+                });

+                false
+            })();
+
+            if fail {
+                return Err(anyhow::anyhow!(format!(
+                    "migration {} was configured to fail because of a failpoint",
+                    migration_id
+                )));
+            }
+        }
+
+        txn.query(
+            "UPDATE neon_migration.migration_id SET id = $1",
+            &[&migration_id],
+        )
+        .with_context(|| format!("update neon_migration.migration_id to {migration_id}"))?;
+
+        Ok(())
+    }
+
+    /// Prepare the migrations the target database for handling migrations
+    fn prepare_database(&mut self) -> Result<()> {
        self.client
-            .simple_query(&setval)
-            .context("run_migrations update id")?;
+            .simple_query("CREATE SCHEMA IF NOT EXISTS neon_migration")?;
+        self.client.simple_query("CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)")?;
+        self.client.simple_query(
+            "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING",
+        )?;
+        self.client
+            .simple_query("ALTER SCHEMA neon_migration OWNER TO cloud_admin")?;
+        self.client
+            .simple_query("REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC")?;

        Ok(())
    }

-    fn prepare_migrations(&mut self) -> Result<()> {
-        let query = "CREATE SCHEMA IF NOT EXISTS neon_migration";
-        self.client.simple_query(query)?;
+    /// Run an individual migration
+    fn run_migration(txn: &mut Transaction, migration_id: i64, migration: &str) -> Result<()> {
+        if migration.starts_with("-- SKIP") {
+            info!("Skipping migration id={}", migration_id);

-        let query = "CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)";
-        self.client.simple_query(query)?;
+            // Even though we are skipping the migration, updating the
+            // migration ID should help keep logic easy to understand when
+            // trying to understand the state of a cluster.
+            Self::update_migration_id(txn, migration_id)?;
+        } else {
+            info!("Running migration id={}:\n{}\n", migration_id, migration);

-        let query = "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING";
-        self.client.simple_query(query)?;
+            txn.simple_query(migration)
+                .with_context(|| format!("apply migration {migration_id}"))?;

-        let query = "ALTER SCHEMA neon_migration OWNER TO cloud_admin";
-        self.client.simple_query(query)?;
-
-        let query = "REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC";
-        self.client.simple_query(query)?;
+            Self::update_migration_id(txn, migration_id)?;
+        }

        Ok(())
    }

+    /// Run the configured set of migrations
    pub fn run_migrations(mut self) -> Result<()> {
-        self.prepare_migrations()?;
+        self.prepare_database()
+            .context("prepare database to handle migrations")?;

        let mut current_migration = self.get_migration_id()? as usize;
        while current_migration < self.migrations.len() {
-            macro_rules! migration_id {
-                ($cm:expr) => {
-                    ($cm + 1) as i64
-                };
-            }
+            // The index lags the migration ID by 1, so the current migration
+            // ID is also the next index
+            let migration_id = (current_migration + 1) as i64;

-            let migration = self.migrations[current_migration];
+            let mut txn = self
+                .client
+                .transaction()
+                .with_context(|| format!("begin transaction for migration {migration_id}"))?;

-            if migration.starts_with("-- SKIP") {
-                info!("Skipping migration id={}", migration_id!(current_migration));
-            } else {
-                info!(
-                    "Running migration id={}:\n{}\n",
-                    migration_id!(current_migration),
-                    migration
-                );
+            Self::run_migration(&mut txn, migration_id, self.migrations[current_migration])
+                .with_context(|| format!("running migration {migration_id}"))?;

-                self.client
-                    .simple_query("BEGIN")
-                    .context("begin migration")?;
+            txn.commit()
+                .with_context(|| format!("commit transaction for migration {migration_id}"))?;

-                self.client.simple_query(migration).with_context(|| {
-                    format!(
-                        "run_migrations migration id={}",
-                        migration_id!(current_migration)
-                    )
-                })?;
-
-                // Migration IDs start at 1
-                self.update_migration_id(migration_id!(current_migration))?;
-
-                self.client
-                    .simple_query("COMMIT")
-                    .context("commit migration")?;
-
-                info!("Finished migration id={}", migration_id!(current_migration));
-            }
+            info!("Finished migration id={}", migration_id);

            current_migration += 1;
        }
--- a/compute_tools/src/migrations/tests/0001-neon_superuser_bypass_rls.sql
+++ b/compute_tools/src/migrations/tests/0001-neon_superuser_bypass_rls.sql
@@ -0,0 +1,9 @@
+DO $$
+DECLARE
+    bypassrls boolean;
+BEGIN
+    SELECT rolbypassrls INTO bypassrls FROM pg_roles WHERE rolname = 'neon_superuser';
+    IF NOT bypassrls THEN
+        RAISE EXCEPTION 'neon_superuser cannot bypass RLS';
+    END IF;
+END $$;
--- a/compute_tools/src/migrations/tests/0002-alter_roles.sql
+++ b/compute_tools/src/migrations/tests/0002-alter_roles.sql
@@ -0,0 +1,25 @@
+DO $$
+DECLARE
+    role record;
+BEGIN
+    FOR role IN
+        SELECT rolname AS name, rolinherit AS inherit
+        FROM pg_roles
+        WHERE pg_has_role(rolname, 'neon_superuser', 'member')
+    LOOP
+        IF NOT role.inherit THEN
+            RAISE EXCEPTION '% cannot inherit', quote_ident(role.name);
+        END IF;
+    END LOOP;
+
+    FOR role IN
+        SELECT rolname AS name, rolbypassrls AS bypassrls
+        FROM pg_roles
+        WHERE NOT pg_has_role(rolname, 'neon_superuser', 'member')
+            AND NOT starts_with(rolname, 'pg_')
+    LOOP
+        IF role.bypassrls THEN
+            RAISE EXCEPTION  '% can bypass RLS', quote_ident(role.name);
+        END IF;
+    END LOOP;
+END $$;
--- a/compute_tools/src/migrations/tests/0003-grant_pg_create_subscription_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/tests/0003-grant_pg_create_subscription_to_neon_superuser.sql
@@ -0,0 +1,10 @@
+DO $$
+BEGIN
+    IF (SELECT current_setting('server_version_num')::numeric < 160000) THEN
+        RETURN;
+    END IF;
+
+    IF NOT (SELECT pg_has_role('neon_superuser', 'pg_create_subscription', 'member')) THEN
+        RAISE EXCEPTION 'neon_superuser cannot execute pg_create_subscription';
+    END IF;
+END $$;
--- a/compute_tools/src/migrations/tests/0004-grant_pg_monitor_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/tests/0004-grant_pg_monitor_to_neon_superuser.sql
@@ -0,0 +1,19 @@
+DO $$
+DECLARE
+    monitor record;
+BEGIN
+    SELECT pg_has_role('neon_superuser', 'pg_monitor', 'member') AS member,
+            admin_option AS admin
+        INTO monitor
+        FROM pg_auth_members
+        WHERE roleid = 'pg_monitor'::regrole
+            AND member = 'pg_monitor'::regrole;
+
+    IF NOT monitor.member THEN
+        RAISE EXCEPTION 'neon_superuser is not a member of pg_monitor';
+    END IF;
+
+    IF NOT monitor.admin THEN
+        RAISE EXCEPTION 'neon_superuser cannot grant pg_monitor';
+    END IF;
+END $$;
--- a/compute_tools/src/migrations/tests/0005-grant_all_on_tables_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/tests/0005-grant_all_on_tables_to_neon_superuser.sql
@@ -0,0 +1,2 @@
+-- This test was never written becuase at the time migration tests were added
+-- the accompanying migration was already skipped.
--- a/compute_tools/src/migrations/tests/0006-grant_all_on_sequences_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/tests/0006-grant_all_on_sequences_to_neon_superuser.sql
@@ -0,0 +1,2 @@
+-- This test was never written becuase at the time migration tests were added
+-- the accompanying migration was already skipped.
--- a/compute_tools/src/migrations/tests/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql
+++ b/compute_tools/src/migrations/tests/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql
@@ -0,0 +1,2 @@
+-- This test was never written becuase at the time migration tests were added
+-- the accompanying migration was already skipped.
--- a/compute_tools/src/migrations/tests/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql
+++ b/compute_tools/src/migrations/tests/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql
@@ -0,0 +1,2 @@
+-- This test was never written becuase at the time migration tests were added
+-- the accompanying migration was already skipped.
--- a/compute_tools/src/migrations/tests/0009-revoke_replication_for_previously_allowed_roles.sql
+++ b/compute_tools/src/migrations/tests/0009-revoke_replication_for_previously_allowed_roles.sql
@@ -0,0 +1,2 @@
+-- This test was never written becuase at the time migration tests were added
+-- the accompanying migration was already skipped.
--- a/compute_tools/src/migrations/tests/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/tests/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql
@@ -0,0 +1,13 @@
+DO $$
+DECLARE
+    can_execute boolean;
+BEGIN
+    SELECT bool_and(has_function_privilege('neon_superuser', oid, 'execute'))
+       INTO can_execute
+       FROM pg_proc
+       WHERE proname IN ('pg_export_snapshot', 'pg_log_standby_snapshot')
+           AND pronamespace = 'pg_catalog'::regnamespace;
+    IF NOT can_execute THEN
+        RAISE EXCEPTION 'neon_superuser cannot execute both pg_export_snapshot and pg_log_standby_snapshot';
+    END IF;
+END $$;
--- a/compute_tools/src/migrations/tests/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/tests/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql
@@ -0,0 +1,13 @@
+DO $$
+DECLARE
+    can_execute boolean;
+BEGIN
+    SELECT has_function_privilege('neon_superuser', oid, 'execute')
+       INTO can_execute
+       FROM pg_proc
+       WHERE proname = 'pg_show_replication_origin_status'
+           AND pronamespace = 'pg_catalog'::regnamespace;
+    IF NOT can_execute THEN
+        RAISE EXCEPTION 'neon_superuser cannot execute pg_show_replication_origin_status';
+    END IF;
+END $$;
--- a/compute_tools/src/spec_apply.rs
+++ b/compute_tools/src/spec_apply.rs
@@ -47,6 +47,7 @@ pub enum PerDatabasePhase {
    DeleteDBRoleReferences,
    ChangeSchemaPerms,
    HandleAnonExtension,
+    DropSubscriptionsForDeletedDatabases,
 }

 #[derive(Clone, Debug)]
@@ -74,7 +75,7 @@ pub struct MutableApplyContext {
    pub dbs: HashMap<String, Database>,
 }

-/// Appply the operations that belong to the given spec apply phase.
+/// Apply the operations that belong to the given spec apply phase.
 ///
 /// Commands within a single phase are executed in order of Iterator yield.
 /// Commands of ApplySpecPhase::RunInEachDatabase will execute in the database
@@ -326,13 +327,12 @@ async fn get_operations<'a>(

                            // Use FORCE to drop database even if there are active connections.
                            // We run this from `cloud_admin`, so it should have enough privileges.
+                            //
                            // NB: there could be other db states, which prevent us from dropping
                            // the database. For example, if db is used by any active subscription
                            // or replication slot.
-                            // TODO: deal with it once we allow logical replication. Proper fix should
-                            // involve returning an error code to the control plane, so it could
-                            // figure out that this is a non-retryable error, return it to the user
-                            // and fail operation permanently.
+                            // Such cases are handled in the DropSubscriptionsForDeletedDatabases
+                            // phase. We do all the cleanup before actually dropping the database.
                            let drop_db_query: String = format!(
                                "DROP DATABASE IF EXISTS {} WITH (FORCE)",
                                &op.name.pg_quote()
@@ -444,6 +444,30 @@ async fn get_operations<'a>(
        }
        ApplySpecPhase::RunInEachDatabase { db, subphase } => {
            match subphase {
+                PerDatabasePhase::DropSubscriptionsForDeletedDatabases => {
+                    match &db {
+                        DB::UserDB(db) => {
+                            let drop_subscription_query: String = format!(
+                                include_str!("sql/drop_subscription_for_drop_dbs.sql"),
+                                datname_str = escape_literal(&db.name),
+                            );
+
+                            let operations = vec![Operation {
+                                query: drop_subscription_query,
+                                comment: Some(format!(
+                                    "optionally dropping subscriptions for DB {}",
+                                    db.name,
+                                )),
+                            }]
+                            .into_iter();
+
+                            Ok(Box::new(operations))
+                        }
+                        // skip this cleanup for the system databases
+                        // because users can't drop them
+                        DB::SystemDB => Ok(Box::new(empty())),
+                    }
+                }
                PerDatabasePhase::DeleteDBRoleReferences => {
                    let ctx = ctx.read().await;

@@ -474,7 +498,19 @@ async fn get_operations<'a>(
                                        ),
                                        comment: None,
                                    },
+                                    // Revoke some potentially blocking privileges (Neon-specific currently)
+                                    Operation {
+                                        query: format!(
+                                            include_str!("sql/pre_drop_role_revoke_privileges.sql"),
+                                            role_name = quoted,
+                                        ),
+                                        comment: None,
+                                    },
                                    // This now will only drop privileges of the role
+                                    // TODO: this is obviously not 100% true because of the above case,
+                                    // there could be still some privileges that are not revoked. Maybe this
+                                    // only drops privileges that were granted *by this* role, not *to this* role,
+                                    // but this has to be checked.
                                    Operation {
                                        query: format!("DROP OWNED BY {}", quoted),
                                        comment: None,
--- a/compute_tools/src/sql/drop_subscription_for_drop_dbs.sql
+++ b/compute_tools/src/sql/drop_subscription_for_drop_dbs.sql
@@ -0,0 +1,11 @@
+DO $$
+DECLARE
+    subname TEXT;
+BEGIN
+    FOR subname IN SELECT pg_subscription.subname FROM pg_subscription WHERE subdbid = (SELECT oid FROM pg_database WHERE datname = {datname_str}) LOOP
+        EXECUTE format('ALTER SUBSCRIPTION %I DISABLE;', subname);
+        EXECUTE format('ALTER SUBSCRIPTION %I SET (slot_name = NONE);', subname);
+        EXECUTE format('DROP SUBSCRIPTION %I;', subname);
+    END LOOP;
+END;
+$$;
--- a/compute_tools/src/sql/pre_drop_role_revoke_privileges.sql
+++ b/compute_tools/src/sql/pre_drop_role_revoke_privileges.sql
@@ -0,0 +1,28 @@
+SET SESSION ROLE neon_superuser;
+
+DO $$
+DECLARE
+    schema TEXT;
+    revoke_query TEXT;
+BEGIN
+    FOR schema IN
+        SELECT schema_name
+        FROM information_schema.schemata
+        -- So far, we only had issues with 'public' schema. Probably, because we do some additional grants,
+        -- e.g., make DB owner the owner of 'public' schema automatically (when created via API).
+        -- See https://github.com/neondatabase/cloud/issues/13582 for the context.
+        -- Still, keep the loop because i) it efficiently handles the case when there is no 'public' schema,
+        -- ii) it's easy to add more schemas to the list if needed.
+        WHERE schema_name IN ('public')
+    LOOP
+        revoke_query := format(
+            'REVOKE ALL PRIVILEGES ON ALL TABLES IN SCHEMA %I FROM {role_name} GRANTED BY neon_superuser;',
+            schema
+        );
+
+        EXECUTE revoke_query;
+    END LOOP;
+END;
+$$;
+
+RESET ROLE;
--- a/control_plane/src/background_process.rs
+++ b/control_plane/src/background_process.rs
@@ -274,6 +274,7 @@ fn fill_remote_storage_secrets_vars(mut cmd: &mut Command) -> &mut Command {
    for env_key in [
        "AWS_ACCESS_KEY_ID",
        "AWS_SECRET_ACCESS_KEY",
+        "AWS_SESSION_TOKEN",
        "AWS_PROFILE",
        // HOME is needed in combination with `AWS_PROFILE` to pick up the SSO sessions.
        "HOME",
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -19,6 +19,7 @@ use control_plane::storage_controller::{
    NeonStorageControllerStartArgs, NeonStorageControllerStopArgs, StorageController,
 };
 use control_plane::{broker, local_env};
+use nix::fcntl::{flock, FlockArg};
 use pageserver_api::config::{
    DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
    DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT,
@@ -36,6 +37,8 @@ use safekeeper_api::{
 };
 use std::borrow::Cow;
 use std::collections::{BTreeSet, HashMap};
+use std::fs::File;
+use std::os::fd::AsRawFd;
 use std::path::PathBuf;
 use std::process::exit;
 use std::str::FromStr;
@@ -689,6 +692,21 @@ struct TimelineTreeEl {
    pub children: BTreeSet<TimelineId>,
 }

+/// A flock-based guard over the neon_local repository directory
+struct RepoLock {
+    _file: File,
+}
+
+impl RepoLock {
+    fn new() -> Result<Self> {
+        let repo_dir = File::open(local_env::base_path())?;
+        let repo_dir_fd = repo_dir.as_raw_fd();
+        flock(repo_dir_fd, FlockArg::LockExclusive)?;
+
+        Ok(Self { _file: repo_dir })
+    }
+}
+
 // Main entry point for the 'neon_local' CLI utility
 //
 // This utility helps to manage neon installation. That includes following:
@@ -700,9 +718,14 @@ fn main() -> Result<()> {
    let cli = Cli::parse();

    // Check for 'neon init' command first.
-    let subcommand_result = if let NeonLocalCmd::Init(args) = cli.command {
-        handle_init(&args).map(|env| Some(Cow::Owned(env)))
+    let (subcommand_result, _lock) = if let NeonLocalCmd::Init(args) = cli.command {
+        (handle_init(&args).map(|env| Some(Cow::Owned(env))), None)
    } else {
+        // This tool uses a collection of simple files to store its state, and consequently
+        // it is not generally safe to run multiple commands concurrently.  Rather than expect
+        // all callers to know this, use a lock file to protect against concurrent execution.
+        let _repo_lock = RepoLock::new().unwrap();
+
        // all other commands need an existing config
        let env = LocalEnv::load_config(&local_env::base_path()).context("Error loading config")?;
        let original_env = env.clone();
@@ -728,11 +751,12 @@ fn main() -> Result<()> {
            NeonLocalCmd::Mappings(subcmd) => handle_mappings(&subcmd, env),
        };

-        if &original_env != env {
+        let subcommand_result = if &original_env != env {
            subcommand_result.map(|()| Some(Cow::Borrowed(env)))
        } else {
            subcommand_result.map(|()| None)
-        }
+        };
+        (subcommand_result, Some(_repo_lock))
    };

    match subcommand_result {
@@ -922,7 +946,7 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result<LocalEnv> {
    } else {
        // User (likely interactive) did not provide a description of the environment, give them the default
        NeonLocalInitConf {
-            control_plane_api: Some(Some(DEFAULT_PAGESERVER_CONTROL_PLANE_API.parse().unwrap())),
+            control_plane_api: Some(DEFAULT_PAGESERVER_CONTROL_PLANE_API.parse().unwrap()),
            broker: NeonBroker {
                listen_addr: DEFAULT_BROKER_ADDR.parse().unwrap(),
            },
@@ -1718,18 +1742,15 @@ async fn handle_start_all_impl(
            broker::start_broker_process(env, &retry_timeout).await
        });

-        // Only start the storage controller if the pageserver is configured to need it
-        if env.control_plane_api.is_some() {
-            js.spawn(async move {
-                let storage_controller = StorageController::from_env(env);
-                storage_controller
-                    .start(NeonStorageControllerStartArgs::with_default_instance_id(
-                        retry_timeout,
-                    ))
-                    .await
-                    .map_err(|e| e.context("start storage_controller"))
-            });
-        }
+        js.spawn(async move {
+            let storage_controller = StorageController::from_env(env);
+            storage_controller
+                .start(NeonStorageControllerStartArgs::with_default_instance_id(
+                    retry_timeout,
+                ))
+                .await
+                .map_err(|e| e.context("start storage_controller"))
+        });

        for ps_conf in &env.pageservers {
            js.spawn(async move {
@@ -1774,10 +1795,6 @@ async fn neon_start_status_check(
    const RETRY_INTERVAL: Duration = Duration::from_millis(100);
    const NOTICE_AFTER_RETRIES: Duration = Duration::from_secs(5);

-    if env.control_plane_api.is_none() {
-        return Ok(());
-    }
-
    let storcon = StorageController::from_env(env);

    let retries = retry_timeout.as_millis() / RETRY_INTERVAL.as_millis();
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -62,7 +62,7 @@ use crate::local_env::LocalEnv;
 use crate::postgresql_conf::PostgresConf;
 use crate::storage_controller::StorageController;

-use compute_api::responses::{ComputeState, ComputeStatus};
+use compute_api::responses::{ComputeStatus, ComputeStatusResponse};
 use compute_api::spec::{Cluster, ComputeFeature, ComputeMode, ComputeSpec};

 // contents of a endpoint.json file
@@ -316,6 +316,10 @@ impl Endpoint {
        // and can cause errors like 'no unpinned buffers available', see
        // <https://github.com/neondatabase/neon/issues/9956>
        conf.append("shared_buffers", "1MB");
+        // Postgres defaults to effective_io_concurrency=1, which does not exercise the pageserver's
+        // batching logic.  Set this to 2 so that we exercise the code a bit without letting
+        // individual tests do a lot of concurrent work on underpowered test machines
+        conf.append("effective_io_concurrency", "2");
        conf.append("fsync", "off");
        conf.append("max_connections", "100");
        conf.append("wal_level", "logical");
@@ -581,6 +585,7 @@ impl Endpoint {
            features: self.features.clone(),
            swap_size_bytes: None,
            disk_quota_bytes: None,
+            disable_lfc_resizing: None,
            cluster: Cluster {
                cluster_id: None, // project ID: not used
                name: None,       // project name: not used
@@ -734,7 +739,7 @@ impl Endpoint {
    }

    // Call the /status HTTP API
-    pub async fn get_status(&self) -> Result<ComputeState> {
+    pub async fn get_status(&self) -> Result<ComputeStatusResponse> {
        let client = reqwest::Client::new();

        let response = client
@@ -810,7 +815,7 @@ impl Endpoint {
        }

        let client = reqwest::Client::builder()
-            .timeout(Duration::from_secs(30))
+            .timeout(Duration::from_secs(120))
            .build()
            .unwrap();
        let response = client
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -76,7 +76,7 @@ pub struct LocalEnv {

    // Control plane upcall API for pageserver: if None, we will not run storage_controller  If set, this will
    // be propagated into each pageserver's configuration.
-    pub control_plane_api: Option<Url>,
+    pub control_plane_api: Url,

    // Control plane upcall API for storage controller.  If set, this will be propagated into the
    // storage controller's configuration.
@@ -133,7 +133,7 @@ pub struct NeonLocalInitConf {
    pub storage_controller: Option<NeonStorageControllerConf>,
    pub pageservers: Vec<NeonLocalInitPageserverConf>,
    pub safekeepers: Vec<SafekeeperConf>,
-    pub control_plane_api: Option<Option<Url>>,
+    pub control_plane_api: Option<Url>,
    pub control_plane_compute_hook_api: Option<Option<Url>>,
 }

@@ -180,7 +180,7 @@ impl NeonStorageControllerConf {
    const DEFAULT_MAX_WARMING_UP_INTERVAL: std::time::Duration = std::time::Duration::from_secs(30);

    // Very tight heartbeat interval to speed up tests
-    const DEFAULT_HEARTBEAT_INTERVAL: std::time::Duration = std::time::Duration::from_millis(100);
+    const DEFAULT_HEARTBEAT_INTERVAL: std::time::Duration = std::time::Duration::from_millis(1000);
 }

 impl Default for NeonStorageControllerConf {
@@ -483,7 +483,6 @@ impl LocalEnv {
            .iter()
            .find(|(mapped_tenant_id, _)| mapped_tenant_id == &tenant_id)
            .map(|&(_, timeline_id)| timeline_id)
-            .map(TimelineId::from)
    }

    pub fn timeline_name_mappings(&self) -> HashMap<TenantTimelineId, String> {
@@ -535,7 +534,7 @@ impl LocalEnv {
                storage_controller,
                pageservers,
                safekeepers,
-                control_plane_api,
+                control_plane_api: control_plane_api.unwrap(),
                control_plane_compute_hook_api,
                branch_name_mappings,
            }
@@ -638,7 +637,7 @@ impl LocalEnv {
                storage_controller: self.storage_controller.clone(),
                pageservers: vec![], // it's skip_serializing anyway
                safekeepers: self.safekeepers.clone(),
-                control_plane_api: self.control_plane_api.clone(),
+                control_plane_api: Some(self.control_plane_api.clone()),
                control_plane_compute_hook_api: self.control_plane_compute_hook_api.clone(),
                branch_name_mappings: self.branch_name_mappings.clone(),
            },
@@ -768,7 +767,7 @@ impl LocalEnv {
            storage_controller: storage_controller.unwrap_or_default(),
            pageservers: pageservers.iter().map(Into::into).collect(),
            safekeepers,
-            control_plane_api: control_plane_api.unwrap_or_default(),
+            control_plane_api: control_plane_api.unwrap(),
            control_plane_compute_hook_api: control_plane_compute_hook_api.unwrap_or_default(),
            branch_name_mappings: Default::default(),
        };
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -95,21 +95,19 @@ impl PageServerNode {

        let mut overrides = vec![pg_distrib_dir_param, broker_endpoint_param];

-        if let Some(control_plane_api) = &self.env.control_plane_api {
-            overrides.push(format!(
-                "control_plane_api='{}'",
-                control_plane_api.as_str()
-            ));
+        overrides.push(format!(
+            "control_plane_api='{}'",
+            self.env.control_plane_api.as_str()
+        ));

-            // Storage controller uses the same auth as pageserver: if JWT is enabled
-            // for us, we will also need it to talk to them.
-            if matches!(conf.http_auth_type, AuthType::NeonJWT) {
-                let jwt_token = self
-                    .env
-                    .generate_auth_token(&Claims::new(None, Scope::GenerationsApi))
-                    .unwrap();
-                overrides.push(format!("control_plane_api_token='{}'", jwt_token));
-            }
+        // Storage controller uses the same auth as pageserver: if JWT is enabled
+        // for us, we will also need it to talk to them.
+        if matches!(conf.http_auth_type, AuthType::NeonJWT) {
+            let jwt_token = self
+                .env
+                .generate_auth_token(&Claims::new(None, Scope::GenerationsApi))
+                .unwrap();
+            overrides.push(format!("control_plane_api_token='{}'", jwt_token));
        }

        if !conf.other.contains_key("remote_storage") {
@@ -435,7 +433,7 @@ impl PageServerNode {
    ) -> anyhow::Result<()> {
        let config = Self::parse_config(settings)?;
        self.http_client
-            .tenant_config(&models::TenantConfigRequest { tenant_id, config })
+            .set_tenant_config(&models::TenantConfigRequest { tenant_id, config })
            .await?;

        Ok(())
--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -338,7 +338,7 @@ impl StorageController {
                        .port(),
                )
            } else {
-                let listen_url = self.env.control_plane_api.clone().unwrap();
+                let listen_url = self.env.control_plane_api.clone();

                let listen = format!(
                    "{}:{}",
@@ -708,7 +708,7 @@ impl StorageController {
        } else {
            // The configured URL has the /upcall path prefix for pageservers to use: we will strip that out
            // for general purpose API access.
-            let listen_url = self.env.control_plane_api.clone().unwrap();
+            let listen_url = self.env.control_plane_api.clone();
            Url::from_str(&format!(
                "http://{}:{}/{path}",
                listen_url.host_str().unwrap(),
@@ -822,10 +822,7 @@ impl StorageController {
        self.dispatch(
            Method::PUT,
            format!("control/v1/tenant/{tenant_shard_id}/migrate"),
-            Some(TenantShardMigrateRequest {
-                tenant_shard_id,
-                node_id,
-            }),
+            Some(TenantShardMigrateRequest { node_id }),
        )
        .await
    }
--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -1,16 +1,22 @@
 use futures::StreamExt;
-use std::{str::FromStr, time::Duration};
+use std::{
+    collections::{HashMap, HashSet},
+    str::FromStr,
+    time::Duration,
+};

 use clap::{Parser, Subcommand};
 use pageserver_api::{
    controller_api::{
        AvailabilityZone, NodeAvailabilityWrapper, NodeDescribeResponse, NodeShardResponse,
-        ShardSchedulingPolicy, TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
+        SafekeeperDescribeResponse, SafekeeperSchedulingPolicyRequest, ShardSchedulingPolicy,
+        ShardsPreferredAzsRequest, SkSchedulingPolicy, TenantCreateRequest, TenantDescribeResponse,
+        TenantPolicyRequest,
    },
    models::{
        EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary,
-        ShardParameters, TenantConfig, TenantConfigRequest, TenantShardSplitRequest,
-        TenantShardSplitResponse,
+        ShardParameters, TenantConfig, TenantConfigPatchRequest, TenantConfigRequest,
+        TenantShardSplitRequest, TenantShardSplitResponse,
    },
    shard::{ShardStripeSize, TenantShardId},
 };
@@ -111,14 +117,31 @@ enum Command {
        #[arg(long)]
        node: NodeId,
    },
+    /// Migrate the secondary location for a tenant shard to a specific pageserver.
+    TenantShardMigrateSecondary {
+        #[arg(long)]
+        tenant_shard_id: TenantShardId,
+        #[arg(long)]
+        node: NodeId,
+    },
    /// Cancel any ongoing reconciliation for this shard
    TenantShardCancelReconcile {
        #[arg(long)]
        tenant_shard_id: TenantShardId,
    },
-    /// Modify the pageserver tenant configuration of a tenant: this is the configuration structure
+    /// Set the pageserver tenant configuration of a tenant: this is the configuration structure
    /// that is passed through to pageservers, and does not affect storage controller behavior.
-    TenantConfig {
+    /// Any previous tenant configs are overwritten.
+    SetTenantConfig {
+        #[arg(long)]
+        tenant_id: TenantId,
+        #[arg(long)]
+        config: String,
+    },
+    /// Patch the pageserver tenant configuration of a tenant. Any fields with null values in the
+    /// provided JSON are unset from the tenant config and all fields with non-null values are set.
+    /// Unspecified fields are not changed.
+    PatchTenantConfig {
        #[arg(long)]
        tenant_id: TenantId,
        #[arg(long)]
@@ -135,6 +158,12 @@ enum Command {
        #[arg(long)]
        tenant_id: TenantId,
    },
+    TenantSetPreferredAz {
+        #[arg(long)]
+        tenant_id: TenantId,
+        #[arg(long)]
+        preferred_az: Option<String>,
+    },
    /// Uncleanly drop a tenant from the storage controller: this doesn't delete anything from pageservers. Appropriate
    /// if you e.g. used `tenant-warmup` by mistake on a tenant ID that doesn't really exist, or is in some other region.
    TenantDrop {
@@ -201,6 +230,15 @@ enum Command {
        #[arg(long)]
        timeout: humantime::Duration,
    },
+    /// List safekeepers known to the storage controller
+    Safekeepers {},
+    /// Set the scheduling policy of the specified safekeeper
+    SafekeeperScheduling {
+        #[arg(long)]
+        node_id: NodeId,
+        #[arg(long)]
+        scheduling_policy: SkSchedulingPolicyArg,
+    },
 }

 #[derive(Parser)]
@@ -253,6 +291,17 @@ impl FromStr for PlacementPolicyArg {
    }
 }

+#[derive(Debug, Clone)]
+struct SkSchedulingPolicyArg(SkSchedulingPolicy);
+
+impl FromStr for SkSchedulingPolicyArg {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        SkSchedulingPolicy::from_str(s).map(Self)
+    }
+}
+
 #[derive(Debug, Clone)]
 struct ShardSchedulingPolicyArg(ShardSchedulingPolicy);

@@ -382,11 +431,12 @@ async fn main() -> anyhow::Result<()> {
            resp.sort_by(|a, b| a.listen_http_addr.cmp(&b.listen_http_addr));

            let mut table = comfy_table::Table::new();
-            table.set_header(["Id", "Hostname", "Scheduling", "Availability"]);
+            table.set_header(["Id", "Hostname", "AZ", "Scheduling", "Availability"]);
            for node in resp {
                table.add_row([
                    format!("{}", node.id),
                    node.listen_http_addr,
+                    node.availability_zone_id,
                    format!("{:?}", node.scheduling),
                    format!("{:?}", node.availability),
                ]);
@@ -446,33 +496,65 @@ async fn main() -> anyhow::Result<()> {
            println!("{table}");
        }
        Command::Tenants { node_id: None } => {
-            let mut resp = storcon_client
-                .dispatch::<(), Vec<TenantDescribeResponse>>(
-                    Method::GET,
-                    "control/v1/tenant".to_string(),
-                    None,
-                )
-                .await?;
-
-            resp.sort_by(|a, b| a.tenant_id.cmp(&b.tenant_id));
-
+            // Set up output formatting
            let mut table = comfy_table::Table::new();
            table.set_header([
                "TenantId",
+                "Preferred AZ",
                "ShardCount",
                "StripeSize",
                "Placement",
                "Scheduling",
            ]);
-            for tenant in resp {
-                let shard_zero = tenant.shards.into_iter().next().unwrap();
-                table.add_row([
-                    format!("{}", tenant.tenant_id),
-                    format!("{}", shard_zero.tenant_shard_id.shard_count.literal()),
-                    format!("{:?}", tenant.stripe_size),
-                    format!("{:?}", tenant.policy),
-                    format!("{:?}", shard_zero.scheduling_policy),
-                ]);
+
+            // Pagination loop over listing API
+            let mut start_after = None;
+            const LIMIT: usize = 1000;
+            loop {
+                let path = match start_after {
+                    None => format!("control/v1/tenant?limit={LIMIT}"),
+                    Some(start_after) => {
+                        format!("control/v1/tenant?limit={LIMIT}&start_after={start_after}")
+                    }
+                };
+
+                let resp = storcon_client
+                    .dispatch::<(), Vec<TenantDescribeResponse>>(Method::GET, path, None)
+                    .await?;
+
+                if resp.is_empty() {
+                    // End of data reached
+                    break;
+                }
+
+                // Give some visual feedback while we're building up the table (comfy_table doesn't have
+                // streaming output)
+                if resp.len() >= LIMIT {
+                    eprint!(".");
+                }
+
+                start_after = Some(resp.last().unwrap().tenant_id);
+
+                for tenant in resp {
+                    let shard_zero = tenant.shards.into_iter().next().unwrap();
+                    table.add_row([
+                        format!("{}", tenant.tenant_id),
+                        shard_zero
+                            .preferred_az_id
+                            .as_ref()
+                            .cloned()
+                            .unwrap_or("".to_string()),
+                        format!("{}", shard_zero.tenant_shard_id.shard_count.literal()),
+                        format!("{:?}", tenant.stripe_size),
+                        format!("{:?}", tenant.policy),
+                        format!("{:?}", shard_zero.scheduling_policy),
+                    ]);
+                }
+            }
+
+            // Terminate progress dots
+            if table.row_count() > LIMIT {
+                eprint!("");
            }

            println!("{table}");
@@ -527,10 +609,7 @@ async fn main() -> anyhow::Result<()> {
            tenant_shard_id,
            node,
        } => {
-            let req = TenantShardMigrateRequest {
-                tenant_shard_id,
-                node_id: node,
-            };
+            let req = TenantShardMigrateRequest { node_id: node };

            storcon_client
                .dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(
@@ -540,6 +619,20 @@ async fn main() -> anyhow::Result<()> {
                )
                .await?;
        }
+        Command::TenantShardMigrateSecondary {
+            tenant_shard_id,
+            node,
+        } => {
+            let req = TenantShardMigrateRequest { node_id: node };
+
+            storcon_client
+                .dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(
+                    Method::PUT,
+                    format!("control/v1/tenant/{tenant_shard_id}/migrate_secondary"),
+                    Some(req),
+                )
+                .await?;
+        }
        Command::TenantShardCancelReconcile { tenant_shard_id } => {
            storcon_client
                .dispatch::<(), ()>(
@@ -549,11 +642,21 @@ async fn main() -> anyhow::Result<()> {
                )
                .await?;
        }
-        Command::TenantConfig { tenant_id, config } => {
+        Command::SetTenantConfig { tenant_id, config } => {
            let tenant_conf = serde_json::from_str(&config)?;

            vps_client
-                .tenant_config(&TenantConfigRequest {
+                .set_tenant_config(&TenantConfigRequest {
+                    tenant_id,
+                    config: tenant_conf,
+                })
+                .await?;
+        }
+        Command::PatchTenantConfig { tenant_id, config } => {
+            let tenant_conf = serde_json::from_str(&config)?;
+
+            vps_client
+                .patch_tenant_config(&TenantConfigPatchRequest {
                    tenant_id,
                    config: tenant_conf,
                })
@@ -573,6 +676,19 @@ async fn main() -> anyhow::Result<()> {
                    None,
                )
                .await?;
+
+            let nodes = storcon_client
+                .dispatch::<(), Vec<NodeDescribeResponse>>(
+                    Method::GET,
+                    "control/v1/node".to_string(),
+                    None,
+                )
+                .await?;
+            let nodes = nodes
+                .into_iter()
+                .map(|n| (n.id, n))
+                .collect::<HashMap<_, _>>();
+
            println!("Tenant {tenant_id}");
            let mut table = comfy_table::Table::new();
            table.add_row(["Policy", &format!("{:?}", policy)]);
@@ -581,7 +697,14 @@ async fn main() -> anyhow::Result<()> {
            println!("{table}");
            println!("Shards:");
            let mut table = comfy_table::Table::new();
-            table.set_header(["Shard", "Attached", "Secondary", "Last error", "status"]);
+            table.set_header([
+                "Shard",
+                "Attached",
+                "Attached AZ",
+                "Secondary",
+                "Last error",
+                "status",
+            ]);
            for shard in shards {
                let secondary = shard
                    .node_secondary
@@ -604,11 +727,18 @@ async fn main() -> anyhow::Result<()> {
                }
                let status = status_parts.join(",");

+                let attached_node = shard
+                    .node_attached
+                    .as_ref()
+                    .map(|id| nodes.get(id).expect("Shard references nonexistent node"));
+
                table.add_row([
                    format!("{}", shard.tenant_shard_id),
-                    shard
-                        .node_attached
-                        .map(|n| format!("{}", n))
+                    attached_node
+                        .map(|n| format!("{} ({})", n.listen_http_addr, n.id))
+                        .unwrap_or(String::new()),
+                    attached_node
+                        .map(|n| n.availability_zone_id.clone())
                        .unwrap_or(String::new()),
                    secondary,
                    shard.last_error,
@@ -617,6 +747,66 @@ async fn main() -> anyhow::Result<()> {
            }
            println!("{table}");
        }
+        Command::TenantSetPreferredAz {
+            tenant_id,
+            preferred_az,
+        } => {
+            // First learn about the tenant's shards
+            let describe_response = storcon_client
+                .dispatch::<(), TenantDescribeResponse>(
+                    Method::GET,
+                    format!("control/v1/tenant/{tenant_id}"),
+                    None,
+                )
+                .await?;
+
+            // Learn about nodes to validate the AZ ID
+            let nodes = storcon_client
+                .dispatch::<(), Vec<NodeDescribeResponse>>(
+                    Method::GET,
+                    "control/v1/node".to_string(),
+                    None,
+                )
+                .await?;
+
+            if let Some(preferred_az) = &preferred_az {
+                let azs = nodes
+                    .into_iter()
+                    .map(|n| (n.availability_zone_id))
+                    .collect::<HashSet<_>>();
+                if !azs.contains(preferred_az) {
+                    anyhow::bail!(
+                        "AZ {} not found on any node: known AZs are: {:?}",
+                        preferred_az,
+                        azs
+                    );
+                }
+            } else {
+                // Make it obvious to the user that since they've omitted an AZ, we're clearing it
+                eprintln!("Clearing preferred AZ for tenant {}", tenant_id);
+            }
+
+            // Construct a request that modifies all the tenant's shards
+            let req = ShardsPreferredAzsRequest {
+                preferred_az_ids: describe_response
+                    .shards
+                    .into_iter()
+                    .map(|s| {
+                        (
+                            s.tenant_shard_id,
+                            preferred_az.clone().map(AvailabilityZone),
+                        )
+                    })
+                    .collect(),
+            };
+            storcon_client
+                .dispatch::<ShardsPreferredAzsRequest, ()>(
+                    Method::PUT,
+                    "control/v1/preferred_azs".to_string(),
+                    Some(req),
+                )
+                .await?;
+        }
        Command::TenantWarmup { tenant_id } => {
            let describe_response = storcon_client
                .dispatch::<(), TenantDescribeResponse>(
@@ -736,7 +926,7 @@ async fn main() -> anyhow::Result<()> {
            threshold,
        } => {
            vps_client
-                .tenant_config(&TenantConfigRequest {
+                .set_tenant_config(&TenantConfigRequest {
                    tenant_id,
                    config: TenantConfig {
                        eviction_policy: Some(EvictionPolicy::LayerAccessThreshold(
@@ -892,10 +1082,7 @@ async fn main() -> anyhow::Result<()> {
                            .dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(
                                Method::PUT,
                                format!("control/v1/tenant/{}/migrate", mv.tenant_shard_id),
-                                Some(TenantShardMigrateRequest {
-                                    tenant_shard_id: mv.tenant_shard_id,
-                                    node_id: mv.to,
-                                }),
+                                Some(TenantShardMigrateRequest { node_id: mv.to }),
                            )
                            .await
                            .map_err(|e| (mv.tenant_shard_id, mv.from, mv.to, e))
@@ -1000,6 +1187,57 @@ async fn main() -> anyhow::Result<()> {
                "Fill was cancelled for node {node_id}. Schedulling policy is now {final_policy:?}"
            );
        }
+        Command::Safekeepers {} => {
+            let mut resp = storcon_client
+                .dispatch::<(), Vec<SafekeeperDescribeResponse>>(
+                    Method::GET,
+                    "control/v1/safekeeper".to_string(),
+                    None,
+                )
+                .await?;
+
+            resp.sort_by(|a, b| a.id.cmp(&b.id));
+
+            let mut table = comfy_table::Table::new();
+            table.set_header([
+                "Id",
+                "Version",
+                "Host",
+                "Port",
+                "Http Port",
+                "AZ Id",
+                "Scheduling",
+            ]);
+            for sk in resp {
+                table.add_row([
+                    format!("{}", sk.id),
+                    format!("{}", sk.version),
+                    sk.host,
+                    format!("{}", sk.port),
+                    format!("{}", sk.http_port),
+                    sk.availability_zone_id.clone(),
+                    String::from(sk.scheduling_policy),
+                ]);
+            }
+            println!("{table}");
+        }
+        Command::SafekeeperScheduling {
+            node_id,
+            scheduling_policy,
+        } => {
+            let scheduling_policy = scheduling_policy.0;
+            storcon_client
+                .dispatch::<SafekeeperSchedulingPolicyRequest, ()>(
+                    Method::POST,
+                    format!("control/v1/safekeeper/{node_id}/scheduling_policy"),
+                    Some(SafekeeperSchedulingPolicyRequest { scheduling_policy }),
+                )
+                .await?;
+            println!(
+                "Scheduling policy of {node_id} set to {}",
+                String::from(scheduling_policy)
+            );
+        }
    }

    Ok(())
--- a/docker-compose/compute_wrapper/var/db/postgres/specs/spec.json
+++ b/docker-compose/compute_wrapper/var/db/postgres/specs/spec.json
@@ -132,11 +132,6 @@
                "name": "cron.database",
                "value": "postgres",
                "vartype": "string"
-            },
-            {
-                "name": "session_preload_libraries",
-                "value": "anon",
-                "vartype": "string"
            }
        ]
    },
--- a/docker-compose/docker_compose_test.sh
+++ b/docker-compose/docker_compose_test.sh
@@ -35,11 +35,11 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
    echo "clean up containers if exists"
    cleanup
    PG_TEST_VERSION=$((pg_version < 16 ? 16 : pg_version))
-    # The support of pg_anon not yet added to PG17, so we have to remove the corresponding option
-    if [ $pg_version -eq 17 ]; then
+    # The support of pg_anon not yet added to PG17, so we have to add the corresponding option for other PG versions
+    if [ "${pg_version}" -ne 17 ]; then
      SPEC_PATH="compute_wrapper/var/db/postgres/specs"
      mv $SPEC_PATH/spec.json $SPEC_PATH/spec.bak
-      jq 'del(.cluster.settings[] | select (.name == "session_preload_libraries"))' $SPEC_PATH/spec.bak > $SPEC_PATH/spec.json
+      jq '.cluster.settings += [{"name": "session_preload_libraries","value": "anon","vartype": "string"}]' "${SPEC_PATH}/spec.bak" > "${SPEC_PATH}/spec.json"
    fi
    PG_VERSION=$pg_version PG_TEST_VERSION=$PG_TEST_VERSION docker compose --profile test-extensions -f $COMPOSE_FILE up --build -d

@@ -106,8 +106,8 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
        fi
    fi
    cleanup
-    # The support of pg_anon not yet added to PG17, so we have to remove the corresponding option
-    if [ $pg_version -eq 17 ]; then
-      mv $SPEC_PATH/spec.bak $SPEC_PATH/spec.json
+    # Restore the original spec.json
+    if [ "$pg_version" -ne 17 ]; then
+      mv "$SPEC_PATH/spec.bak" "$SPEC_PATH/spec.json"
    fi
 done
--- a/docs/docker.md
+++ b/docs/docker.md
@@ -7,15 +7,11 @@ Currently we build two main images:
 - [neondatabase/neon](https://hub.docker.com/repository/docker/neondatabase/neon) — image with pre-built `pageserver`, `safekeeper` and `proxy` binaries and all the required runtime dependencies. Built from [/Dockerfile](/Dockerfile).
 - [neondatabase/compute-node-v16](https://hub.docker.com/repository/docker/neondatabase/compute-node-v16) — compute node image with pre-built Postgres binaries from [neondatabase/postgres](https://github.com/neondatabase/postgres). Similar images exist for v15 and v14. Built from [/compute-node/Dockerfile](/compute/compute-node.Dockerfile).

-And additional intermediate image:
-
- [neondatabase/compute-tools](https://hub.docker.com/repository/docker/neondatabase/compute-tools) — compute node configuration management tools.
-
 ## Build pipeline

 We build all images after a successful `release` tests run and push automatically to Docker Hub with two parallel CI jobs

-1. `neondatabase/compute-tools` and `neondatabase/compute-node-v16` (and -v15 and -v14)
+1. `neondatabase/compute-node-v17` (and -16, -v15, -v14)

 2. `neondatabase/neon`

--- a/docs/rfcs/035-safekeeper-dynamic-membership-change.md
+++ b/docs/rfcs/035-safekeeper-dynamic-membership-change.md
@@ -81,7 +81,7 @@ configuration generation in them is less than its current one. Namely, it
 refuses to vote, to truncate WAL in `handle_elected` and to accept WAL. In
 response it sends its current configuration generation to let walproposer know.

-Safekeeper gets `PUT /v1/tenants/{tenant_id}/timelines/{timeline_id}/configuration` 
+Safekeeper gets `PUT /v1/tenants/{tenant_id}/timelines/{timeline_id}/configuration`
 accepting `Configuration`. Safekeeper switches to the given conf it is higher than its
 current one and ignores it otherwise. In any case it replies with
 ```
@@ -103,7 +103,7 @@ currently and tries to communicate with all of them. However, the list does not
 define consensus members. Instead, on start walproposer tracks highest
 configuration it receives from `AcceptorGreeting`s. Once it assembles greetings
 from majority of `sk_set` and majority of `new_sk_set` (if it is present), it
-establishes this configuration as its own and moves to voting. 
+establishes this configuration as its own and moves to voting.

 It should stop talking to safekeepers not listed in the configuration at this
 point, though it is not unsafe to continue doing so.
@@ -119,7 +119,7 @@ refusal to accept due to configuration change) it simply restarts.
 The following algorithm can be executed anywhere having access to configuration
 storage and safekeepers. It is safe to interrupt / restart it and run multiple
 instances of it concurrently, though likely one of them won't make
-progress then. It accepts `desired_set: Vec<NodeId>` as input. 
+progress then. It accepts `desired_set: Vec<NodeId>` as input.

 Algorithm will refuse to make the change if it encounters previous interrupted
 change attempt, but in this case it will try to finish it.
@@ -140,7 +140,7 @@ storage are reachable.
   safe. Failed CAS aborts the procedure.
 4) Call `PUT` `configuration` on safekeepers from the current set,
   delivering them `joint_conf`. Collecting responses from majority is required
-   to proceed. If any response returned generation higher than 
+   to proceed. If any response returned generation higher than
   `joint_conf.generation`, abort (another switch raced us). Otherwise, choose
   max `<last_log_term, flush_lsn>` among responses and establish it as
   (in memory) `sync_position`. Also choose max `term` and establish it as (in
@@ -149,49 +149,49 @@ storage are reachable.
   without ack from the new set. Similarly, we'll bump term on new majority
   to `sync_term` so that two computes with the same term are never elected.
 4) Initialize timeline on safekeeper(s) from `new_sk_set` where it
-   doesn't exist yet by doing `pull_timeline` from the majority of the 
+   doesn't exist yet by doing `pull_timeline` from the majority of the
   current set. Doing that on majority of `new_sk_set` is enough to
   proceed, but it is reasonable to ensure that all `new_sk_set` members
   are initialized -- if some of them are down why are we migrating there?
-5) Call `POST` `bump_term(sync_term)` on safekeepers from the new set. 
+5) Call `POST` `bump_term(sync_term)` on safekeepers from the new set.
   Success on majority is enough.
 6) Repeatedly call `PUT` `configuration` on safekeepers from the new set,
   delivering them `joint_conf` and collecting their positions. This will
-   switch them to the `joint_conf` which generally won't be needed 
+   switch them to the `joint_conf` which generally won't be needed
   because `pull_timeline` already includes it and plus additionally would be
   broadcast by compute. More importantly, we may proceed to the next step
-   only when `<last_log_term, flush_lsn>` on the majority of the new set reached 
-   `sync_position`. Similarly, on the happy path no waiting is not needed because 
+   only when `<last_log_term, flush_lsn>` on the majority of the new set reached
+   `sync_position`. Similarly, on the happy path no waiting is not needed because
   `pull_timeline` already includes it. However, we should double
    check to be safe. For example, timeline could have been created earlier e.g.
-    manually or after try-to-migrate, abort, try-to-migrate-again sequence. 
-7) Create `new_conf: Configuration` incrementing `join_conf` generation and having new 
-   safekeeper set as `sk_set` and None `new_sk_set`. Write it to configuration 
+    manually or after try-to-migrate, abort, try-to-migrate-again sequence.
+7) Create `new_conf: Configuration` incrementing `join_conf` generation and having new
+   safekeeper set as `sk_set` and None `new_sk_set`. Write it to configuration
   storage under one more CAS.
 8) Call `PUT` `configuration` on safekeepers from the new set,
-   delivering them `new_conf`. It is enough to deliver it to the majority 
+   delivering them `new_conf`. It is enough to deliver it to the majority
   of the new set; the rest can be updated by compute.

 I haven't put huge effort to make the description above very precise, because it
 is natural language prone to interpretations anyway. Instead I'd like to make TLA+
 spec of it.

-Description above focuses on safety. To make the flow practical and live, here a few more 
+Description above focuses on safety. To make the flow practical and live, here a few more
 considerations.
-1) It makes sense to ping new set to ensure it we are migrating to live node(s) before 
+1) It makes sense to ping new set to ensure it we are migrating to live node(s) before
  step 3.
-2) If e.g. accidentally wrong new sk set has been specified, before CAS in step `6` is completed 
+2) If e.g. accidentally wrong new sk set has been specified, before CAS in step `6` is completed
   it is safe to rollback to the old conf with one more CAS.
-3) On step 4 timeline might be already created on members of the new set for various reasons; 
+3) On step 4 timeline might be already created on members of the new set for various reasons;
   the simplest is the procedure restart. There are more complicated scenarious like mentioned
-   in step 5. Deleting and re-doing `pull_timeline` is generally unsafe without involving 
-   generations, so seems simpler to treat existing timeline as success. However, this also 
+   in step 5. Deleting and re-doing `pull_timeline` is generally unsafe without involving
+   generations, so seems simpler to treat existing timeline as success. However, this also
   has a disadvantage: you might imagine an surpassingly unlikely schedule where condition in
   the step 5 is never reached until compute is (re)awaken up to synchronize new member(s).
   I don't think we'll observe this in practice, but can add waking up compute if needed.
 4) In the end timeline should be locally deleted on the safekeeper(s) which are
   in the old set but not in the new one, unless they are unreachable. To be
-   safe this also should be done under generation number (deletion proceeds only if 
+   safe this also should be done under generation number (deletion proceeds only if
   current configuration is <= than one in request and safekeeper is not memeber of it).
 5) If current conf fetched on step 1 is already not joint and members equal to `desired_set`,
   jump to step 7, using it as `new_conf`.
@@ -202,47 +202,87 @@ The procedure ought to be driven from somewhere. Obvious candidates are control
 plane and storage_controller; and as each of them already has db we don't want
 yet another storage. I propose to manage safekeepers in storage_controller
 because 1) since it is in rust it simplifies simulation testing (more on this
-below) 2) it already manages pageservers. 
+below) 2) it already manages pageservers.

 This assumes that migration will be fully usable only after we migrate all
 tenants/timelines to storage_controller. It is discussible whether we want also
 to manage pageserver attachments for all of these, but likely we do.

-This requires us to define storcon <-> cplane interface.
+This requires us to define storcon <-> cplane interface and changes.

-### storage_controller <-> control plane interface
+### storage_controller <-> control plane interface and changes

 First of all, control plane should
 [change](https://neondb.slack.com/archives/C03438W3FLZ/p1719226543199829)
 storing safekeepers per timeline instead of per tenant because we can't migrate
-tenants atomically. 
+tenants atomically.

 The important question is how updated configuration is delivered from
 storage_controller to control plane to provide it to computes. As always, there
 are two options, pull and push. Let's do it the same push as with pageserver
 `/notify-attach` because 1) it keeps storage_controller out of critical compute
-start path 2) provides easier upgrade: there won't be such a thing as 'timeline
-managed by control plane / storcon', cplane just takes the value out of its db
-when needed 3) uniformity. It makes storage_controller responsible for retrying notifying
-control plane until it succeeds.
+start path 2) uniformity. It makes storage_controller responsible for retrying
+notifying control plane until it succeeds.

-So, cplane `/notify-safekeepers` for the timeline accepts `Configuration` and
-updates it in the db if the provided conf generation is higher (the cplane db
-should also store generations for this). Similarly to [`/notify-attach`](https://www.notion.so/neondatabase/Storage-Controller-Control-Plane-interface-6de56dd310a043bfa5c2f5564fa98365), it
-should update db which makes the call successful, and then try to schedule
-`apply_config` if possible, it is ok if not. storage_controller 
-should rate limit calling the endpoint, but likely this won't be needed, as migration
+It is not needed for the control plane to fully know the `Configuration`. It is
+enough for it to only to be aware of the list of safekeepers in the latest
+configuration to supply it to compute, plus associated generation number to
+protect from stale update requests and to also pass it to compute.
+
+So, cplane `/notify-safekeepers` for the timeline can accept JSON like
+```
+{
+   tenant_id: String,
+   timeline_id: String,
+   generation: u32,
+   safekeepers: Vec<SafekeeperId>,
+}
+```
+where `SafekeeperId` is
+```
+{
+   node_id: u64,
+   host: String
+}
+```
+In principle `host` is redundant, but may be useful for observability.
+
+The request updates list of safekeepers in the db if the provided conf
+generation is higher (the cplane db should also store generations for this).
+Similarly to
+[`/notify-attach`](https://www.notion.so/neondatabase/Storage-Controller-Control-Plane-interface-6de56dd310a043bfa5c2f5564fa98365),
+it should update db which makes the call successful, and then try to schedule
+`apply_config` if possible, it is ok if not. storage_controller should rate
+limit calling the endpoint, but likely this won't be needed, as migration
 throughput is limited by `pull_timeline`.

 Timeline (branch) creation in cplane should call storage_controller POST
 `tenant/:tenant_id/timeline` like it currently does for sharded tenants.
-Response should be augmented with `safekeeper_conf: Configuration`. The call
-should be retried until succeeds.
+Response should be augmented with `safekeepers_generation` and `safekeepers`
+fields like described in `/notify-safekeepers` above. Initially (currently)
+these fields may be absent; in this case cplane chooses safekeepers on its own
+like it currently does. The call should be retried until succeeds.

 Timeline deletion and tenant deletion in cplane should call appropriate
 storage_controller endpoints like it currently does for sharded tenants. The
 calls should be retried until they succeed.

+When compute receives safekeepers list from control plane it needs to know the
+generation to checked whether it should be updated (note that compute may get
+safekeeper list from either cplane or safekeepers). Currently `neon.safekeepers`
+GUC is just a comma separates list of `host:port`. Let's prefix it with
+`g#<generation>:` to this end, so it will look like
+```
+g#42:safekeeper-0.eu-central-1.aws.neon.tech:6401,safekeeper-2.eu-central-1.aws.neon.tech:6401,safekeeper-1.eu-central-1.aws.neon.tech:6401
+```
+
+To summarize, list of cplane changes:
+- per tenant -> per timeline safekeepers management and addition of int `safekeeper_generation` field.
+- `/notify-safekeepers` endpoint.
+- Branch creation call may return list of safekeepers and when it is
+  present cplane should adopt it instead of choosing on its own like it does currently.
+- `neon.safekeepers` GUC should be prefixed with `g#<generation>:`.
+
 ### storage_controller implementation

 Current 'load everything on startup and keep in memory' easy design is fine.
@@ -360,10 +400,10 @@ source safekeeper might fail, which is not a problem if we are going to
 decomission the node but leaves garbage otherwise. I'd propose in the first version
 1) Don't attempt deletion at all if node status is `offline`.
 2) If it failed, just issue warning.
-And add PUT `/control/v1/safekeepers/:node_id/scrub` endpoint which would find and 
-remove garbage timelines for manual use. It will 1) list all timelines on the 
-safekeeper 2) compare each one against configuration storage: if timeline 
-doesn't exist at all (had been deleted), it can be deleted. Otherwise, it can 
+And add PUT `/control/v1/safekeepers/:node_id/scrub` endpoint which would find and
+remove garbage timelines for manual use. It will 1) list all timelines on the
+safekeeper 2) compare each one against configuration storage: if timeline
+doesn't exist at all (had been deleted), it can be deleted. Otherwise, it can
 be deleted under generation number if node is not member of current generation.

 Automating this is untrivial; we'd need to register all potential missing
@@ -412,8 +452,8 @@ There should be following layers of tests:
 3) Since simulation testing injects at relatively high level points (not
   syscalls), it omits some code, in particular `pull_timeline`. Thus it is
   better to have basic tests covering whole system as well. Extended version of
-   `test_restarts_under_load` would do: start background load and do migration 
-   under it, then restart endpoint and check that no reported commits 
+   `test_restarts_under_load` would do: start background load and do migration
+   under it, then restart endpoint and check that no reported commits
   had been lost. I'd also add one more creating classic network split scenario, with
   one compute talking to AC and another to BD while migration from nodes ABC to ABD
   happens.
@@ -422,35 +462,51 @@ There should be following layers of tests:

 ## Order of implementation and rollout

-Note that 
+Note that
 - Control plane parts and integration with it is fully independent from everything else
  (tests would use simulation and neon_local).
+- It is reasonable to make compute <-> safekeepers protocol change
+  independent of enabling generations.
 - There is a lot of infra work making storage_controller aware of timelines and safekeepers
  and its impl/rollout should be separate from migration itself.
- Initially walproposer can just stop working while it observers joint configuration.
+- Initially walproposer can just stop working while it observes joint configuration.
  Such window would be typically very short anyway.
+- Obviously we want to test the whole thing thoroughly on staging and only then
+  gradually enable in prod.

-To rollout smoothly, both walproposer and safekeeper should have flag
-`configurations_enabled`; when set to false, they would work as currently, i.e.
-walproposer is able to commit on whatever safekeeper set it is provided. Until
-all timelines are managed by storcon we'd need to use current script to migrate
-and update/drop entries in the storage_controller database if it has any.
+Let's have the following implementation bits for gradual rollout:
+- compute gets `neon.safekeepers_proto_version` flag.
+  Initially both compute and safekeepers will be able to talk both
+  versions so that we can delay force restart of them and for
+  simplicity of rollback in case it is needed.
+- storcon gets `-set-safekeepers` config option disabled by
+  default. Timeline creation request chooses safekeepers
+  (and returns them in response to cplane) only when it is set to
+  true.
+- control_plane [see above](storage_controller-<->-control-plane interface-and-changes)
+  prefixes `neon.safekeepers` GUC with generation number. When it is 0
+  (or prefix not present at all), walproposer behaves as currently, committing on
+  the provided safekeeper list -- generations are disabled.
+  If it is non 0 it follows this RFC rules.
+- We provide a script for manual migration to storage controller.
+  It selects timeline(s) from control plane (specified or all of them) db
+  and calls special import endpoint on storage controller which is very
+  similar to timeline creation: it inserts into the db, sets
+  configuration to initial on the safekeepers, calls cplane
+  `notify-safekeepers`.

-Safekeepers would need to be able to talk both current and new protocol version
-with compute to reduce number of computes restarted in prod once v2 protocol is
-deployed (though before completely switching we'd need to force this).
-
-Let's have the following rollout order:
- storage_controller becomes aware of safekeepers;
- storage_controller gets timeline creation for new timelines and deletion requests, but
-  doesn't manage all timelines yet. Migration can be tested on these new timelines.
-  To keep control plane and storage_controller databases in sync while control 
-  plane still chooses the safekeepers initially (until all timelines are imported
-  it can choose better), `TimelineCreateRequest` can get optional safekeepers
-  field with safekeepers chosen by cplane.
- Then we can import all existing timelines from control plane to
-  storage_controller and gradually enable configurations region by region.
+Then the rollout for a region would be:
+- Current situation: safekeepers are choosen by control_plane.
+- We manually migrate some timelines, test moving them around.
+- Then we enable `--set-safekeepers` so that all new timelines
+  are on storage controller.
+- Finally migrate all existing timelines using the script (no
+  compute should be speaking old proto version at this point).

+Until all timelines are managed by storcon we'd need to use current ad hoc
+script to migrate if needed. To keep state clean, all storage controller managed
+timelines must be migrated before that, or controller db and configurations
+state of safekeepers dropped manually.

 Very rough implementation order:
 - Add concept of configurations to safekeepers (including control file),
@@ -458,10 +514,10 @@ Very rough implementation order:
 - Implement walproposer changes, including protocol.
 - Implement storconn part. Use it in neon_local (and pytest).
 - Make cplane store safekeepers per timeline instead of per tenant.
- Implement cplane/storcon integration. Route branch creation/deletion 
+- Implement cplane/storcon integration. Route branch creation/deletion
  through storcon. Then we can test migration of new branches.
- Finally import existing branches. Then we can drop cplane 
-  safekeeper selection code. Gradually enable configurations at 
+- Finally import existing branches. Then we can drop cplane
+  safekeeper selection code. Gradually enable configurations at
  computes and safekeepers. Before that, all computes must talk only
  v3 protocol version.

--- a/docs/rfcs/040-profiling.md
+++ b/docs/rfcs/040-profiling.md
@@ -0,0 +1,247 @@
+# CPU and Memory Profiling
+
+Created 2025-01-12 by Erik Grinaker.
+
+See also [internal user guide](https://www.notion.so/neondatabase/Storage-CPU-Memory-Profiling-14bf189e004780228ec7d04442742324?pvs=4).
+
+## Summary
+
+This document proposes a standard cross-team pattern for CPU and memory profiling across
+applications and languages, using the [pprof](https://github.com/google/pprof) profile format.
+
+It enables both ad hoc profiles via HTTP endpoints, and continuous profiling across the fleet via
+[Grafana Cloud Profiles](https://grafana.com/docs/grafana-cloud/monitor-applications/profiles/).
+Continuous profiling incurs an overhead of about 0.1% CPU usage and 3% slower heap allocations.
+
+## Motivation
+
+CPU and memory profiles are crucial observability tools for understanding performance issues,
+resource exhaustion, and resource costs. They allow answering questions like:
+
+* Why is this process using 100% CPU?
+* How do I make this go faster?
+* Why did this process run out of memory?
+* Why are we paying for all these CPU cores and memory chips?
+
+Go has [first-class support](https://pkg.go.dev/net/http/pprof) for profiling included in its
+standard library, using the [pprof](https://github.com/google/pprof) profile format and associated
+tooling.
+
+This is not the case for Rust and C, where obtaining profiles can be rather cumbersome. It requires
+installing and running additional tools like `perf` as root on production nodes, with analysis tools
+that can be hard to use and often don't give good results. This is not only annoying, but can also
+significantly affect the resolution time of production incidents.
+
+This proposal will:
+
+* Provide CPU and heap profiles in pprof format via HTTP API.
+* Record continuous profiles in Grafana for aggregate historical analysis.
+* Make it easy for anyone to see a flamegraph in less than one minute.
+* Be reasonably consistent across teams and services (Rust, Go, C).
+
+## Non Goals (For Now)
+
+* [Additional profile types](https://grafana.com/docs/pyroscope/next/configure-client/profile-types/)
+  like mutexes, locks, goroutines, etc.
+* [Runtime trace integration](https://grafana.com/docs/pyroscope/next/configure-client/trace-span-profiles/).
+* [Profile-guided optimization](https://en.wikipedia.org/wiki/Profile-guided_optimization).
+
+## Using Profiles
+
+Ready-to-use profiles can be obtained using e.g. `curl`. For Rust services:
+
+```
+$ curl localhost:9898/profile/cpu >profile.pb.gz
+```
+
+pprof profiles can be explored using the [`pprof`](https://github.com/google/pprof) web UI, which
+provides flamegraphs, call graphs, plain text listings, and more:
+
+```
+$ pprof -http :6060 <profile>
+```
+
+Some endpoints (e.g. Rust-based ones) can also generate flamegraph SVGs directly:
+
+```
+$ curl localhost:9898/profile/cpu?format=svg >profile.svg
+$ open profile.svg
+```
+
+Continuous profiles are available in Grafana under Explore → Profiles → Explore Profiles
+(currently only in [staging](https://neonstaging.grafana.net/a/grafana-pyroscope-app/profiles-explorer)).
+
+## API Requirements
+
+* HTTP endpoints that return a profile in pprof format (with symbols).
+  * CPU: records a profile over the request time interval (`seconds` query parameter).
+  * Memory: returns the current in-use heap allocations.
+* Unauthenticated, as it should not expose user data or pose a denial-of-service risk.
+* Default sample frequency should not impact service (maximum 5% CPU overhead).
+* Linux-compatibility.
+
+Nice to have:
+
+* Return flamegraph SVG directly from the HTTP endpoint if requested.
+* Configurable sample frequency for CPU profiles.
+* Historical heap allocations, by count and bytes.
+* macOS-compatiblity.
+
+## Rust Profiling
+
+[`libs/utils/src/http/endpoint.rs`](https://github.com/neondatabase/neon/blob/8327f68043e692c77f70d6a6dafa463636c01578/libs/utils/src/http/endpoint.rs)
+contains ready-to-use HTTP endpoints for CPU and memory profiling:
+[`profile_cpu_handler`](https://github.com/neondatabase/neon/blob/8327f68043e692c77f70d6a6dafa463636c01578/libs/utils/src/http/endpoint.rs#L338) and [`profile_heap_handler`](https://github.com/neondatabase/neon/blob/8327f68043e692c77f70d6a6dafa463636c01578/libs/utils/src/http/endpoint.rs#L416).
+
+### CPU
+
+CPU profiles are provided by [pprof-rs](https://github.com/tikv/pprof-rs) via
+[`profile_cpu_handler`](https://github.com/neondatabase/neon/blob/8327f68043e692c77f70d6a6dafa463636c01578/libs/utils/src/http/endpoint.rs#L338).
+Expose it unauthenticated at `/profile/cpu`.
+
+Parameters:
+
+* `format`: profile output format (`pprof` or `svg`; default `pprof`).
+* `seconds`: duration to collect profile over, in seconds (default `5`).
+* `frequency`: how often to sample thread stacks, in Hz (default `99`).
+* `force`: if `true`, cancel a running profile and start a new one (default `false`).
+
+Works on Linux and macOS.
+
+### Memory
+
+Use the jemalloc allocator via [`tikv-jemallocator`](https://github.com/tikv/jemallocator),
+and enable profiling with samples every 2 MB allocated:
+
+```rust
+#[global_allocator]
+static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
+
+#[allow(non_upper_case_globals)]
+#[export_name = "malloc_conf"]
+pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:21\0";
+```
+
+pprof profiles are generated by
+[`jemalloc-pprof`](https://github.com/polarsignals/rust-jemalloc-pprof) via
+[`profile_heap_handler`](https://github.com/neondatabase/neon/blob/8327f68043e692c77f70d6a6dafa463636c01578/libs/utils/src/http/endpoint.rs#L416).
+Expose it unauthenticated at `/profile/heap`.
+
+Parameters:
+
+* `format`: profile output format (`pprof`, `svg`, or `jemalloc`; default `pprof`).
+
+Works on Linux only, due to [jemalloc limitations](https://github.com/jemalloc/jemalloc/issues/26).
+
+## Go Profiling
+
+The Go standard library includes pprof profiling via HTTP API in
+[`net/http/pprof`](https://pkg.go.dev/net/http/pprof). Expose it unauthenticated at
+`/debug/pprof`.
+
+Works on Linux and macOS.
+
+### CPU 
+
+Via `/debug/pprof/profile`. Parameters:
+
+* `debug`: profile output format (`0` is pprof, `1` or above is plaintext; default `0`).
+* `seconds`: duration to collect profile over, in seconds (default `30`).
+
+Does not support a frequency parameter (see [#57488](https://github.com/golang/go/issues/57488)),
+and defaults to 100 Hz. A lower frequency can be hardcoded via `SetCPUProfileRate`, but the default
+is likely ok (estimated 1% overhead).
+
+### Memory
+
+Via `/debug/pprof/heap`. Parameters:
+
+* `seconds`: take a delta profile over the given duration, in seconds (default `0`).
+* `gc`: if `1`, garbage collect before taking profile.
+
+## C Profiling
+
+[gperftools](https://github.com/gperftools/gperftools) provides in-process CPU and heap profiling
+with pprof output.
+
+However, continuous profiling of PostgreSQL is expensive (many computes), and has limited value
+since we don't own the internals anyway.
+
+Ad hoc profiling might still be useful, but the compute team considers existing tooling sufficient,
+so this is not a priority at the moment.
+
+## Grafana Continuous Profiling
+
+[Grafana Alloy](https://grafana.com/docs/alloy/latest/) continually scrapes CPU and memory profiles
+across the fleet, and archives them as time series. This can be used to analyze resource usage over
+time, either in aggregate or zoomed in to specific events and nodes.
+
+Profiles are retained for 30 days. Profile ingestion volume for CPU+heap at 60-second intervals
+is about 0.5 GB/node/day, or about $0.25/node/day = $7.5/node/month ($0.50/GB).
+
+It is currently enabled in [staging](https://neonstaging.grafana.net/a/grafana-pyroscope-app/profiles-explorer)
+for Pageserver and Safekeeper.
+
+### Scraping
+
+* CPU profiling: 59 seconds at 19 Hz every 60 seconds.
+* Heap profiling: heap snapshot with 2 MB frequency every 60 seconds.
+
+There are two main approaches that can be taken for CPU profiles:
+
+* Continuous low-frequency profiles (e.g. 19 Hz for 60 seconds every 60 seconds).
+* Occasional high-frequency profiles (e.g. 99 Hz for 5 seconds every 60 seconds).
+
+We choose continuous low-frequency profiles where possible. This has a fixed low overhead, instead
+of a spiky high overhead. It likely also gives a more representative view of resource usage.
+However, a 19 Hz rate gives a minimum resolution of 52.6 ms per sample, which may be larger than the
+actual runtime of small functions. Note that Go does not support a frequency parameter, so we must
+use a fixed frequency for all profiles via `SetCPUProfileRate()` (default 100 Hz).
+
+Only one CPU profile can be taken at a time. With continuous profiling, one will always be running.
+To allow also taking an ad hoc CPU profile, the Rust endpoint supports a `force` query parameter to
+cancel a running profile and start a new one.
+
+### Overhead
+
+With Rust:
+
+* CPU profiles at 19 Hz frequency: 0.1% overhead.
+* Heap profiles at 2 MB frequency: 3% allocation overhead.
+* Profile call/encoding/symbolization: 20 ms every 60 seconds, or 0.03% of 1 CPU (for Pageserver).
+* Profile symbolization caches: 125 MB memory, or 0.4% of 32 GB (for Pageserver).
+
+Benchmarks with pprof-rs showed that the CPU time for taking a stack trace of a 40-frame stack was
+11 µs using the `frame-pointer` feature, and 1.4 µs using `libunwind` with DWARF. `libunwind` saw
+frequent seg faults, so we use `frame-pointer` and build binaries with frame pointers (negligible
+overhead).
+
+CPU profiles work by installing an `ITIMER_PROF` for the process, which triggers a `SIGPROF` signal
+after a given amount of cumulative CPU time across all CPUs. The signal handler will run for one
+of the currently executing threads and take a stack trace. Thus, a 19 Hz profile will take 1 stack
+trace every 52.6 ms CPU time -- assuming 11 µs for a stack trace, this is 0.02% overhead, but
+likely 0.1% in practice (given e.g. context switches).
+
+Heap profiles work by probabilistically taking a stack trace on allocations, adjusted for the
+allocation size. A 1 MB allocation takes about 15 µs in benchmarks, and a stack trace about 1 µs,
+so we can estimate that a 2 MB sampling frequency has about 3% allocation overhead -- this is 
+consistent with benchmarks. This is significantly larger than CPU profiles, but mitigated by the
+fact that performance-sensitive code will avoid allocations as far as possible.
+
+Profile symbolization uses in-memory caches for symbol lookups. These take about 125 MB for
+Pageserver.
+
+## Alternatives Considered
+
+* eBPF profiles.
+  * Don't require instrumenting the binary.
+  * Use less resources.
+  * Can profile in kernel space too.
+  * Supported by Grafana.
+  * Less information about stack frames and spans.
+  * Limited tooling for local analysis.
+  * Does not support heap profiles.
+  * Does not work on macOS.
+
+* [Polar Signals](https://www.polarsignals.com) instead of Grafana.
+  * We already use Grafana for everything else. Appears good enough.
--- a/libs/compute_api/src/responses.rs
+++ b/libs/compute_api/src/responses.rs
@@ -1,6 +1,5 @@
 //! Structs representing the JSON formats used in the compute_ctl's HTTP API.

-use std::collections::HashSet;
 use std::fmt::Display;

 use chrono::{DateTime, Utc};
@@ -16,6 +15,17 @@ pub struct GenericAPIError {
    pub error: String,
 }

+#[derive(Debug, Clone, Serialize)]
+pub struct InfoResponse {
+    pub num_cpus: usize,
+}
+
+#[derive(Debug, Clone, Serialize)]
+pub struct ExtensionInstallResponse {
+    pub extension: PgIdent,
+    pub version: ExtVersion,
+}
+
 /// Response of the /status API
 #[derive(Serialize, Debug, Deserialize)]
 #[serde(rename_all = "snake_case")]
@@ -29,16 +39,6 @@ pub struct ComputeStatusResponse {
    pub error: Option<String>,
 }

-#[derive(Deserialize, Serialize)]
-#[serde(rename_all = "snake_case")]
-pub struct ComputeState {
-    pub status: ComputeStatus,
-    /// Timestamp of the last Postgres activity
-    #[serde(serialize_with = "rfc3339_serialize")]
-    pub last_active: Option<DateTime<Utc>>,
-    pub error: Option<String>,
-}
-
 #[derive(Serialize, Clone, Copy, Debug, Deserialize, PartialEq, Eq)]
 #[serde(rename_all = "snake_case")]
 pub enum ComputeStatus {
@@ -79,7 +79,7 @@ impl Display for ComputeStatus {
    }
 }

-fn rfc3339_serialize<S>(x: &Option<DateTime<Utc>>, s: S) -> Result<S::Ok, S::Error>
+pub fn rfc3339_serialize<S>(x: &Option<DateTime<Utc>>, s: S) -> Result<S::Ok, S::Error>
 where
    S: Serializer,
 {
@@ -163,8 +163,9 @@ pub enum ControlPlaneComputeStatus {
 #[derive(Clone, Debug, Default, Serialize)]
 pub struct InstalledExtension {
    pub extname: String,
-    pub versions: HashSet<String>,
+    pub version: String,
    pub n_databases: u32, // Number of databases using this extension
+    pub owned_by_superuser: String,
 }

 #[derive(Clone, Debug, Default, Serialize)]
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -67,6 +67,15 @@ pub struct ComputeSpec {
    #[serde(default)]
    pub disk_quota_bytes: Option<u64>,

+    /// Disables the vm-monitor behavior that resizes LFC on upscale/downscale, instead relying on
+    /// the initial size of LFC.
+    ///
+    /// This is intended for use when the LFC size is being overridden from the default but
+    /// autoscaling is still enabled, and we don't want the vm-monitor to interfere with the custom
+    /// LFC sizing.
+    #[serde(default)]
+    pub disable_lfc_resizing: Option<bool>,
+
    /// Expected cluster state at the end of transition process.
    pub cluster: Cluster,
    pub delta_operations: Option<Vec<DeltaOp>>,
--- a/libs/desim/src/time.rs
+++ b/libs/desim/src/time.rs
@@ -91,7 +91,7 @@ impl Timing {

    /// Return true if there is a ready event.
    fn is_event_ready(&self, queue: &mut BinaryHeap<Pending>) -> bool {
-        queue.peek().map_or(false, |x| x.time <= self.now())
+        queue.peek().is_some_and(|x| x.time <= self.now())
    }

    /// Clear all pending events.
--- a/libs/pageserver_api/src/controller_api.rs
+++ b/libs/pageserver_api/src/controller_api.rs
@@ -75,7 +75,7 @@ pub struct TenantPolicyRequest {
    pub scheduling: Option<ShardSchedulingPolicy>,
 }

-#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Hash, Debug)]
+#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Hash, Debug, PartialOrd, Ord)]
 pub struct AvailabilityZone(pub String);

 impl Display for AvailabilityZone {
@@ -87,7 +87,7 @@ impl Display for AvailabilityZone {
 #[derive(Serialize, Deserialize)]
 pub struct ShardsPreferredAzsRequest {
    #[serde(flatten)]
-    pub preferred_az_ids: HashMap<TenantShardId, AvailabilityZone>,
+    pub preferred_az_ids: HashMap<TenantShardId, Option<AvailabilityZone>>,
 }

 #[derive(Serialize, Deserialize)]
@@ -144,6 +144,8 @@ pub struct NodeDescribeResponse {
    pub availability: NodeAvailabilityWrapper,
    pub scheduling: NodeSchedulingPolicy,

+    pub availability_zone_id: String,
+
    pub listen_http_addr: String,
    pub listen_http_port: u16,

@@ -179,7 +181,6 @@ pub struct TenantDescribeResponseShard {
 /// specifies some constraints, e.g. asking it to get off particular node(s)
 #[derive(Serialize, Deserialize, Debug)]
 pub struct TenantShardMigrateRequest {
-    pub tenant_shard_id: TenantShardId,
    pub node_id: NodeId,
 }

@@ -320,6 +321,42 @@ impl From<NodeSchedulingPolicy> for String {
    }
 }

+#[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq, Debug)]
+pub enum SkSchedulingPolicy {
+    Active,
+    Pause,
+    Decomissioned,
+}
+
+impl FromStr for SkSchedulingPolicy {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        Ok(match s {
+            "active" => Self::Active,
+            "pause" => Self::Pause,
+            "decomissioned" => Self::Decomissioned,
+            _ => {
+                return Err(anyhow::anyhow!(
+                    "Unknown scheduling policy '{s}', try active,pause,decomissioned"
+                ))
+            }
+        })
+    }
+}
+
+impl From<SkSchedulingPolicy> for String {
+    fn from(value: SkSchedulingPolicy) -> String {
+        use SkSchedulingPolicy::*;
+        match value {
+            Active => "active",
+            Pause => "pause",
+            Decomissioned => "decomissioned",
+        }
+        .to_string()
+    }
+}
+
 /// Controls how tenant shards are mapped to locations on pageservers, e.g. whether
 /// to create secondary locations.
 #[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq)]
@@ -336,6 +373,16 @@ pub enum PlacementPolicy {
    Detached,
 }

+impl PlacementPolicy {
+    pub fn want_secondaries(&self) -> usize {
+        match self {
+            PlacementPolicy::Attached(secondary_count) => *secondary_count,
+            PlacementPolicy::Secondary => 1,
+            PlacementPolicy::Detached => 0,
+        }
+    }
+}
+
 #[derive(Serialize, Deserialize, Debug)]
 pub struct TenantShardMigrateResponse {}

@@ -372,6 +419,27 @@ pub struct MetadataHealthListOutdatedResponse {
    pub health_records: Vec<MetadataHealthRecord>,
 }

+/// Publicly exposed safekeeper description
+#[derive(Serialize, Deserialize, Clone)]
+pub struct SafekeeperDescribeResponse {
+    pub id: NodeId,
+    pub region_id: String,
+    /// 1 is special, it means just created (not currently posted to storcon).
+    /// Zero or negative is not really expected.
+    /// Otherwise the number from `release-$(number_of_commits_on_branch)` tag.
+    pub version: i64,
+    pub host: String,
+    pub port: i32,
+    pub http_port: i32,
+    pub availability_zone_id: String,
+    pub scheduling_policy: SkSchedulingPolicy,
+}
+
+#[derive(Serialize, Deserialize, Clone)]
+pub struct SafekeeperSchedulingPolicyRequest {
+    pub scheduling_policy: SkSchedulingPolicy,
+}
+
 #[cfg(test)]
 mod test {
    use super::*;
--- a/Show More
+++ b/Show More