removed anon patch

removed unused imports
Merge branch 'main' into thesuhas/remove_anon_extension
2026-05-20 14:40:37 +00:00 · 2025-03-18 11:14:18 -04:00 · 2025-03-17 13:13:25 -04:00 · 2025-03-17 11:30:40 -04:00 · 2025-03-17 13:53:23 +00:00 · 2025-03-17 09:26:45 +00:00
113 changed files with 2282 additions and 1096 deletions
--- a/.github/workflows/_meta.yml
+++ b/.github/workflows/_meta.yml
@@ -125,5 +125,5 @@ jobs:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          CURRENT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
        run: |
-          RELEASE_PR_RUN_ID=$(gh api "/repos/${GITHUB_REPOSITORY}/actions/runs?head_sha=$CURRENT_SHA" | jq '[.workflow_runs[] | select(.name == "Build and Test") | select(.head_branch | test("^rc/release(-(proxy)|(compute))?/[0-9]{4}-[0-9]{2}-[0-9]{2}$"; "s"))] | first | .id // ("Faied to find Build and Test run from  RC PR!" | halt_error(1))')
+          RELEASE_PR_RUN_ID=$(gh api "/repos/${GITHUB_REPOSITORY}/actions/runs?head_sha=$CURRENT_SHA" | jq '[.workflow_runs[] | select(.name == "Build and Test") | select(.head_branch | test("^rc/release(-(proxy|compute))?/[0-9]{4}-[0-9]{2}-[0-9]{2}$"; "s"))] | first | .id // ("Failed to find Build and Test run from  RC PR!" | halt_error(1))')
          echo "release-pr-run-id=$RELEASE_PR_RUN_ID" | tee -a $GITHUB_OUTPUT
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -978,16 +978,55 @@ jobs:
      acr-registry-name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}
    secrets: inherit

-  # This is a bit of a special case so we're not using a generated image map.
-  add-latest-tag-to-neon-extensions-test-image:
-    if: github.ref_name == 'main'
+  push-neon-test-extensions-image-ghcr:
+    if: ${{ contains(fromJSON('["push-main", "pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
    needs: [ meta, compute-node-image ]
    uses: ./.github/workflows/_push-to-container-registry.yml
    with:
      image-map: |
        {
-          "docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v16:latest"],
-          "docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v17:latest"]
+          "docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}": [
+            "ghcr.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}"
+          ],
+          "docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}": [
+            "ghcr.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}"
+          ]
+        }
+    secrets: inherit
+
+  add-latest-tag-to-neon-test-extensions-image:
+    if: ${{ needs.meta.outputs.run-kind == 'push-main' }}
+    needs: [ meta, compute-node-image ]
+    uses: ./.github/workflows/_push-to-container-registry.yml
+    with:
+      image-map: |
+        {
+          "docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}": [
+            "docker.io/neondatabase/neon-test-extensions-v16:latest",
+            "ghcr.io/neondatabase/neon-test-extensions-v16:latest"
+          ],
+          "docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}": [
+            "docker.io/neondatabase/neon-test-extensions-v17:latest",
+            "ghcr.io/neondatabase/neon-test-extensions-v17:latest"
+          ]
+        }
+    secrets: inherit
+
+  add-release-tag-to-neon-test-extensions-image:
+    if: ${{ needs.meta.outputs.run-kind == 'compute-release' }}
+    needs: [ meta, compute-node-image ]
+    uses: ./.github/workflows/_push-to-container-registry.yml
+    with:
+      image-map: |
+        {
+          "docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.release-pr-run-id }}": [
+            "docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}",
+            "ghcr.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}"
+          ],
+          "docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.release-pr-run-id }}": [
+            "docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}",
+            "ghcr.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}"
+          ]
        }
    secrets: inherit

--- a/.github/workflows/large_oltp_benchmark.yml
+++ b/.github/workflows/large_oltp_benchmark.yml
@@ -2,8 +2,8 @@ name: large oltp benchmark

 on:
  # uncomment to run on push for debugging your PR
-  push:
-    branches: [ bodobolero/synthetic_oltp_workload ]
+  #push:
+  #  branches: [ bodobolero/synthetic_oltp_workload ]

  schedule:
    # * is a special character in YAML so you have to quote this string
@@ -12,7 +12,7 @@ on:
    #          │ │  ┌───────────── day of the month (1 - 31)
    #          │ │  │ ┌───────────── month (1 - 12 or JAN-DEC)
    #          │ │  │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
-    - cron:   '0 15 * * *' # run once a day, timezone is utc, avoid conflict with other benchmarks
+    - cron:   '0 15 * * 0,2,4' # run on Sunday, Tuesday, Thursday at 3 PM UTC
  workflow_dispatch: # adds ability to run this manually

 defaults:
@@ -22,7 +22,7 @@ defaults:
 concurrency:
  # Allow only one workflow globally because we need dedicated resources which only exist once
  group: large-oltp-bench-workflow
-  cancel-in-progress: true
+  cancel-in-progress: false

 jobs:
  oltp:
@@ -31,9 +31,9 @@ jobs:
      matrix:
        include:
          - target: new_branch 
-            custom_scripts: insert_webhooks.sql@2 select_any_webhook_with_skew.sql@4 select_recent_webhook.sql@4 
+            custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
          - target: reuse_branch 
-            custom_scripts: insert_webhooks.sql@2 select_any_webhook_with_skew.sql@4 select_recent_webhook.sql@4 
+            custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
      max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results
    permissions:
      contents: write
@@ -46,7 +46,6 @@ jobs:
      PG_VERSION: 16 # pre-determined by pre-determined project
      TEST_OUTPUT: /tmp/test_output
      BUILD_TYPE: remote
-      SAVE_PERF_REPORT: ${{ github.ref_name == 'main' }}
      PLATFORM: ${{ matrix.target }}

    runs-on: [ self-hosted, us-east-2, x64 ]
@@ -57,8 +56,10 @@ jobs:
        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
      options: --init

-    # Increase timeout to 8h, default timeout is 6h
-    timeout-minutes: 480
+    # Increase timeout to 2 days, default timeout is 6h - database maintenance can take a long time
+    # (normally 1h pgbench, 3h vacuum analyze 3.5h re-index) x 2 = 15h, leave some buffer for regressions
+    # in one run vacuum didn't finish within 12 hours
+    timeout-minutes: 2880

    steps:
    - uses: actions/checkout@v4
@@ -89,29 +90,45 @@ jobs:
    - name: Set up Connection String
      id: set-up-connstr
      run: |
-          case "${{ matrix.target }}" in
-              new_branch)
-              CONNSTR=${{ steps.create-neon-branch-oltp-target.outputs.dsn }}
-              ;;
-              reuse_branch)
-              CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }}
-              ;;
-              *)
-              echo >&2 "Unknown target=${{ matrix.target }}"
-              exit 1
-              ;;
-          esac
+        case "${{ matrix.target }}" in
+          new_branch)
+          CONNSTR=${{ steps.create-neon-branch-oltp-target.outputs.dsn }}
+          ;;
+          reuse_branch)
+          CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }}
+          ;;
+          *)
+          echo >&2 "Unknown target=${{ matrix.target }}"
+          exit 1
+          ;;
+        esac

-          echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
+        CONNSTR_WITHOUT_POOLER="${CONNSTR//-pooler/}"

-    - name: Benchmark pgbench with custom-scripts
+        echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
+        echo "connstr_without_pooler=${CONNSTR_WITHOUT_POOLER}" >> $GITHUB_OUTPUT
+
+    - name: Delete rows from prior runs in reuse branch
+      if: ${{ matrix.target == 'reuse_branch' }}
+      env:
+          BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr_without_pooler }}
+          PG_CONFIG: /tmp/neon/pg_install/v16/bin/pg_config
+          PSQL: /tmp/neon/pg_install/v16/bin/psql
+          PG_16_LIB_PATH: /tmp/neon/pg_install/v16/lib
+      run: |
+        echo "$(date '+%Y-%m-%d %H:%M:%S') - Deleting rows in table webhook.incoming_webhooks from prior runs"
+        export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
+        ${PSQL} "${BENCHMARK_CONNSTR}" -c "SET statement_timeout = 0; DELETE FROM webhook.incoming_webhooks WHERE created_at > '2025-02-27 23:59:59+00';"
+        echo "$(date '+%Y-%m-%d %H:%M:%S') - Finished deleting rows in table webhook.incoming_webhooks from prior runs"
+
+    - name: Benchmark pgbench with custom-scripts 
      uses: ./.github/actions/run-python-test-set
      with:
        build_type: ${{ env.BUILD_TYPE }}
        test_selection: performance
        run_in_parallel: false
-        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
-        extra_params: -m remote_cluster --timeout 21600 -k test_perf_oltp_large_tenant
+        save_perf_report: true
+        extra_params: -m remote_cluster --timeout 7200 -k test_perf_oltp_large_tenant_pgbench
        pg_version: ${{ env.PG_VERSION }}
        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
@@ -119,6 +136,21 @@ jobs:
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"

+    - name: Benchmark database maintenance
+      uses: ./.github/actions/run-python-test-set
+      with:
+        build_type: ${{ env.BUILD_TYPE }}
+        test_selection: performance
+        run_in_parallel: false
+        save_perf_report: true
+        extra_params: -m remote_cluster --timeout 172800 -k test_perf_oltp_large_tenant_maintenance
+        pg_version: ${{ env.PG_VERSION }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+      env:
+        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr_without_pooler }}
+        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
+        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
+
    - name: Delete Neon Branch for large tenant
      if: ${{ always() && matrix.target == 'new_branch' }}
      uses: ./.github/actions/neon-branch-delete
@@ -127,6 +159,13 @@ jobs:
        branch_id: ${{ steps.create-neon-branch-oltp-target.outputs.branch_id }}
        api_key: ${{ secrets.NEON_STAGING_API_KEY }}

+    - name: Configure AWS credentials # again because prior steps could have exceeded 5 hours
+      uses: aws-actions/configure-aws-credentials@v4
+      with:
+        aws-region: eu-central-1
+        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        role-duration-seconds: 18000 # 5 hours
+
    - name: Create Allure report
      id: create-allure-report
      if: ${{ !cancelled() }}
--- a/.github/workflows/pre-merge-checks.yml
+++ b/.github/workflows/pre-merge-checks.yml
@@ -27,7 +27,7 @@ jobs:
    steps:
      - uses: actions/checkout@v4

-      - uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf # v45.0.4
+      - uses: step-security/changed-files@3dbe17c78367e7d60f00d78ae6781a35be47b4a1 # v45.0.1
        id: python-src
        with:
          files: |
@@ -38,7 +38,7 @@ jobs:
            poetry.lock
            pyproject.toml

-      - uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf # v45.0.4
+      - uses: step-security/changed-files@3dbe17c78367e7d60f00d78ae6781a35be47b4a1 # v45.0.1
        id: rust-src
        with:
          files: |
@@ -148,7 +148,7 @@ jobs:
          ${{
            always()
            && github.event_name == 'merge_group'
-            && contains(fromJson('["release", "release-proxy", "release-compute"]'), github.base_ref)
+            && contains(fromJson('["release", "release-proxy", "release-compute"]'), needs.meta.outputs.branch)
          }}
        env:
          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1309,6 +1309,7 @@ version = "0.1.0"
 dependencies = [
 "anyhow",
 "chrono",
+ "indexmap 2.0.1",
 "jsonwebtoken",
 "regex",
 "remote_storage",
@@ -1339,6 +1340,7 @@ dependencies = [
 "flate2",
 "futures",
 "http 1.1.0",
+ "indexmap 2.0.1",
 "jsonwebtoken",
 "metrics",
 "nix 0.27.1",
@@ -1347,17 +1349,20 @@ dependencies = [
 "once_cell",
 "opentelemetry",
 "opentelemetry_sdk",
+ "p256 0.13.2",
 "postgres",
 "postgres_initdb",
 "regex",
 "remote_storage",
 "reqwest",
+ "ring",
 "rlimit",
 "rust-ini",
 "serde",
 "serde_json",
 "serde_with",
 "signal-hook",
+ "spki 0.7.3",
 "tar",
 "thiserror 1.0.69",
 "tokio",
@@ -1377,6 +1382,7 @@ dependencies = [
 "vm_monitor",
 "walkdir",
 "workspace_hack",
+ "x509-cert",
 "zstd",
 ]

@@ -1801,6 +1807,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fffa369a668c8af7dbf8b5e56c9f744fbd399949ed171606040001947de40b1c"
 dependencies = [
 "const-oid",
+ "der_derive",
+ "flagset",
 "pem-rfc7468",
 "zeroize",
 ]
@@ -1819,6 +1827,17 @@ dependencies = [
 "rusticata-macros",
 ]

+[[package]]
+name = "der_derive"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8034092389675178f570469e6c3b0465d3d30b4505c294a6550db47f3c17ad18"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.100",
+]
+
 [[package]]
 name = "deranged"
 version = "0.3.11"
@@ -2282,6 +2301,12 @@ version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"

+[[package]]
+name = "flagset"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b3ea1ec5f8307826a5b71094dd91fc04d4ae75d5709b20ad351c7fb4815c86ec"
+
 [[package]]
 name = "flate2"
 version = "1.0.26"
@@ -2838,6 +2863,7 @@ version = "0.1.0"
 dependencies = [
 "anyhow",
 "bytes",
+ "camino",
 "fail",
 "futures",
 "hyper 0.14.30",
@@ -2848,6 +2874,7 @@ dependencies = [
 "pprof",
 "regex",
 "routerify",
+ "rustls-pemfile 2.1.1",
 "serde",
 "serde_json",
 "serde_path_to_error",
@@ -2877,9 +2904,9 @@ checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421"

 [[package]]
 name = "humantime"
-version = "2.1.0"
+version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
+checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f"

 [[package]]
 name = "humantime-serde"
@@ -4302,8 +4329,6 @@ dependencies = [
 "reqwest",
 "rpds",
 "rustls 0.23.18",
- "rustls-pemfile 2.1.1",
- "rustls-pki-types",
 "scopeguard",
 "send-future",
 "serde",
@@ -6019,6 +6044,7 @@ dependencies = [
 "regex",
 "remote_storage",
 "reqwest",
+ "rustls 0.23.18",
 "safekeeper_api",
 "safekeeper_client",
 "scopeguard",
@@ -6035,6 +6061,7 @@ dependencies = [
 "tokio",
 "tokio-io-timeout",
 "tokio-postgres",
+ "tokio-rustls 0.26.0",
 "tokio-stream",
 "tokio-tar",
 "tokio-util",
@@ -6425,9 +6452,9 @@ dependencies = [

 [[package]]
 name = "sha1"
-version = "0.10.5"
+version = "0.10.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f04293dc80c3993519f2d7f6f511707ee7094fe0c6d3406feb330cdb3540eba3"
+checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
 dependencies = [
 "cfg-if",
 "cpufeatures",
@@ -7135,6 +7162,27 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"

+[[package]]
+name = "tls_codec"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0de2e01245e2bb89d6f05801c564fa27624dbd7b1846859876c7dad82e90bf6b"
+dependencies = [
+ "tls_codec_derive",
+ "zeroize",
+]
+
+[[package]]
+name = "tls_codec_derive"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2d2e76690929402faae40aebdda620a2c0e25dd6d3b9afe48867dfd95991f4bd"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.100",
+]
+
 [[package]]
 name = "tokio"
 version = "1.43.0"
@@ -8387,12 +8435,15 @@ dependencies = [
 "chrono",
 "clap",
 "clap_builder",
+ "const-oid",
 "crypto-bigint 0.5.5",
 "der 0.7.8",
 "deranged",
 "digest",
 "displaydoc",
+ "ecdsa 0.16.9",
 "either",
+ "elliptic-curve 0.13.8",
 "env_filter",
 "env_logger",
 "fail",
@@ -8427,6 +8478,7 @@ dependencies = [
 "num-rational",
 "num-traits",
 "once_cell",
+ "p256 0.13.2",
 "parquet",
 "prettyplease",
 "proc-macro2",
@@ -8439,6 +8491,7 @@ dependencies = [
 "reqwest",
 "rustls 0.23.18",
 "scopeguard",
+ "sec1 0.7.3",
 "serde",
 "serde_json",
 "sha2",
@@ -8484,6 +8537,18 @@ version = "0.5.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51"

+[[package]]
+name = "x509-cert"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1301e935010a701ae5f8655edc0ad17c44bad3ac5ce8c39185f75453b720ae94"
+dependencies = [
+ "const-oid",
+ "der 0.7.8",
+ "spki 0.7.3",
+ "tls_codec",
+]
+
 [[package]]
 name = "x509-certificate"
 version = "0.23.1"
@@ -8612,9 +8677,9 @@ dependencies = [

 [[package]]
 name = "zeroize"
-version = "1.7.0"
+version = "1.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d"
+checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde"
 dependencies = [
 "serde",
 "zeroize_derive",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -106,13 +106,13 @@ hostname = "0.4"
 http = {version = "1.1.0", features = ["std"]}
 http-types = { version = "2", default-features = false }
 http-body-util = "0.1.2"
-humantime = "2.1"
+humantime = "2.2"
 humantime-serde = "1.1.1"
 hyper0 = { package = "hyper", version = "0.14" }
 hyper = "1.4"
 hyper-util = "0.1"
 tokio-tungstenite = "0.21.0"
-indexmap = "2"
+indexmap = { version = "2", features = ["serde"] }
 indoc = "2"
 ipnet = "2.10.0"
 itertools = "0.10"
--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -1055,6 +1055,34 @@ RUN  if [ -d pg_embedding-src ]; then \
        make -j $(getconf _NPROCESSORS_ONLN) install; \
    fi

+#########################################################################################
+#
+# Layer "pg_anon-build"
+# compile anon extension
+#
+#########################################################################################
+FROM build-deps AS pg_anon-src
+ARG PG_VERSION
+
+# This is an experimental extension, never got to real production.
+# !Do not remove! It can be present in shared_preload_libraries and compute will fail to start if library is not found.
+WORKDIR /ext-src
+RUN case "${PG_VERSION:?}" in "v17") \
+    echo "postgresql_anonymizer does not yet support PG17" && exit 0;; \
+    esac && \
+    wget  https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \
+    echo "321ea8d5c1648880aafde850a2c576e4a9e7b9933a34ce272efc839328999fa9  pg_anon.tar.gz" | sha256sum --check && \
+    mkdir pg_anon-src && cd pg_anon-src && tar xzf ../pg_anon.tar.gz --strip-components=1 -C .
+
+FROM pg-build AS pg_anon-build
+COPY --from=pg_anon-src /ext-src/ /ext-src/
+WORKDIR /ext-src
+RUN if [ -d pg_anon-src ]; then \
+        cd pg_anon-src && \
+        make -j $(getconf _NPROCESSORS_ONLN) install && \
+        echo 'trusted = true' >> /usr/local/pgsql/share/extension/anon.control; \
+    fi
+
 #########################################################################################
 #
 # Layer "pg build with nonroot user and cargo installed"
@@ -1352,27 +1380,6 @@ COPY --from=pg_session_jwt-src /ext-src/ /ext-src/
 WORKDIR /ext-src/pg_session_jwt-src
 RUN cargo pgrx install --release

-#########################################################################################
-#
-# Layer "pg-anon-pg-build"
-# compile anon extension
-#
-#########################################################################################
-FROM rust-extensions-build-pgrx12 AS pg-anon-pg-build
-ARG PG_VERSION
-COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-
-# This is an experimental extension, never got to real production.
-# !Do not remove! It can be present in shared_preload_libraries and compute will fail to start if library is not found.
-ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN wget https://gitlab.com/dalibo/postgresql_anonymizer/-/archive/latest/postgresql_anonymizer-latest.tar.gz -O pg_anon.tar.gz && \
-    mkdir pg_anon-src && cd pg_anon-src && tar xzf ../pg_anon.tar.gz --strip-components=1 -C . && \
-    find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt && \
-    sed -i 's/pgrx = "0.12.9"/pgrx = { version = "=0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
-    make -j $(getconf _NPROCESSORS_ONLN) extension PG_CONFIG=/usr/local/pgsql/bin/pg_config PGVER=pg$(echo "$PG_VERSION" | sed 's/^v//') && \
-    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config PGVER=pg$(echo "$PG_VERSION" | sed 's/^v//') && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/anon.control
-
 #########################################################################################
 #
 # Layer "wal2json-build"
@@ -1670,6 +1677,7 @@ COPY --from=pg_roaringbitmap-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_semver-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_embedding-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=wal2json-build /usr/local/pgsql /usr/local/pgsql
+COPY --from=pg_anon-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_ivm-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_partman-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_mooncake-build /usr/local/pgsql/ /usr/local/pgsql/
@@ -1727,6 +1735,8 @@ RUN set -e \
        libevent-dev \
        libtool \
        pkg-config \
+        libcurl4-openssl-dev \
+        libssl-dev \
    && apt clean && rm -rf /var/lib/apt/lists/*

 # Use `dist_man_MANS=` to skip manpage generation (which requires python3/pandoc)
@@ -1735,7 +1745,7 @@ RUN set -e \
    && git clone --recurse-submodules --depth 1 --branch ${PGBOUNCER_TAG} https://github.com/pgbouncer/pgbouncer.git pgbouncer \
    && cd pgbouncer \
    && ./autogen.sh \
-    && ./configure --prefix=/usr/local/pgbouncer --without-openssl \
+    && ./configure --prefix=/usr/local/pgbouncer \
    && make -j $(nproc) dist_man_MANS= \
    && make install dist_man_MANS=

--- a/compute/patches/pg_anon.patch
+++ b/compute/patches/pg_anon.patch
@@ -1,265 +0,0 @@
-commit 00aa659afc9c7336ab81036edec3017168aabf40
-Author: Heikki Linnakangas <heikki@neon.tech>
-Date:   Tue Nov 12 16:59:19 2024 +0200
-
-    Temporarily disable test that depends on timezone
-
-diff --git a/tests/expected/generalization.out b/tests/expected/generalization.out
-index 23ef5fa..9e60deb 100644
--- a/ext-src/pg_anon-src/tests/expected/generalization.out
-+++ b/ext-src/pg_anon-src/tests/expected/generalization.out
-@@ -284,12 +284,9 @@ SELECT anon.generalize_tstzrange('19041107','century');
-  ["Tue Jan 01 00:00:00 1901 PST","Mon Jan 01 00:00:00 2001 PST")
- (1 row)
- 
-SELECT anon.generalize_tstzrange('19041107','millennium');
-                      generalize_tstzrange                       
------------------------------------------------------------------
- ["Thu Jan 01 00:00:00 1001 PST","Mon Jan 01 00:00:00 2001 PST")
-(1 row)
-
-+-- temporarily disabled, see:
-+-- https://gitlab.com/dalibo/postgresql_anonymizer/-/commit/199f0a392b37c59d92ae441fb8f037e094a11a52#note_2148017485
-+--SELECT anon.generalize_tstzrange('19041107','millennium');
- -- generalize_daterange
- SELECT anon.generalize_daterange('19041107');
-   generalize_daterange   
-diff --git a/tests/sql/generalization.sql b/tests/sql/generalization.sql
-index b868344..b4fc977 100644
--- a/ext-src/pg_anon-src/tests/sql/generalization.sql
-+++ b/ext-src/pg_anon-src/tests/sql/generalization.sql
-@@ -61,7 +61,9 @@ SELECT anon.generalize_tstzrange('19041107','month');
- SELECT anon.generalize_tstzrange('19041107','year');
- SELECT anon.generalize_tstzrange('19041107','decade');
- SELECT anon.generalize_tstzrange('19041107','century');
-SELECT anon.generalize_tstzrange('19041107','millennium');
-+-- temporarily disabled, see:
-+-- https://gitlab.com/dalibo/postgresql_anonymizer/-/commit/199f0a392b37c59d92ae441fb8f037e094a11a52#note_2148017485
-+--SELECT anon.generalize_tstzrange('19041107','millennium');
- 
- -- generalize_daterange
- SELECT anon.generalize_daterange('19041107');
-
-commit 7dd414ee75f2875cffb1d6ba474df1f135a6fc6f
-Author: Alexey Masterov <alexeymasterov@neon.tech>
-Date:   Fri May 31 06:34:26 2024 +0000
-
-    These alternative expected files were added to consider the neon features
-
-diff --git a/ext-src/pg_anon-src/tests/expected/permissions_masked_role_1.out b/ext-src/pg_anon-src/tests/expected/permissions_masked_role_1.out
-new file mode 100644
-index 0000000..2539cfd
--- /dev/null
-+++ b/ext-src/pg_anon-src/tests/expected/permissions_masked_role_1.out
-@@ -0,0 +1,101 @@
-+BEGIN;
-+CREATE EXTENSION anon CASCADE;
-+NOTICE:  installing required extension "pgcrypto"
-+SELECT anon.init();
-+ init 
-+------
-+ t
-+(1 row)
-+
-+CREATE ROLE mallory_the_masked_user;
-+SECURITY LABEL FOR anon ON ROLE mallory_the_masked_user IS 'MASKED';
-+CREATE TABLE t1(i INT);
-+ALTER TABLE t1 ADD COLUMN t TEXT;
-+SECURITY LABEL FOR anon ON COLUMN t1.t
-+IS 'MASKED WITH VALUE NULL';
-+INSERT INTO t1 VALUES (1,'test');
-+--
-+-- We're checking the owner's permissions
-+--
-+-- see
-+-- https://postgresql-anonymizer.readthedocs.io/en/latest/SECURITY/#permissions
-+--
-+SET ROLE mallory_the_masked_user;
-+SELECT anon.pseudo_first_name(0) IS NOT NULL;
-+ ?column? 
-+----------
-+ t
-+(1 row)
-+
-+-- SHOULD FAIL
-+DO $$
-+BEGIN
-+  PERFORM anon.init();
-+  EXCEPTION WHEN insufficient_privilege
-+  THEN RAISE NOTICE 'insufficient_privilege';
-+END$$;
-+NOTICE:  insufficient_privilege
-+-- SHOULD FAIL
-+DO $$
-+BEGIN
-+  PERFORM anon.anonymize_table('t1');
-+  EXCEPTION WHEN insufficient_privilege
-+  THEN RAISE NOTICE 'insufficient_privilege';
-+END$$;
-+NOTICE:  insufficient_privilege
-+-- SHOULD FAIL
-+SAVEPOINT fail_start_engine;
-+SELECT anon.start_dynamic_masking();
-+ERROR:  Only supersusers can start the dynamic masking engine.
-+CONTEXT:  PL/pgSQL function anon.start_dynamic_masking(boolean) line 18 at RAISE
-+ROLLBACK TO fail_start_engine;
-+RESET ROLE;
-+SELECT anon.start_dynamic_masking();
-+ start_dynamic_masking 
-+-----------------------
-+ t
-+(1 row)
-+
-+SET ROLE mallory_the_masked_user;
-+SELECT * FROM mask.t1;
-+ i | t 
-+---+---
-+ 1 | 
-+(1 row)
-+
-+-- SHOULD FAIL
-+DO $$
-+BEGIN
-+  SELECT * FROM public.t1;
-+  EXCEPTION WHEN insufficient_privilege
-+  THEN RAISE NOTICE 'insufficient_privilege';
-+END$$;
-+NOTICE:  insufficient_privilege
-+-- SHOULD FAIL
-+SAVEPOINT fail_stop_engine;
-+SELECT anon.stop_dynamic_masking();
-+ERROR:  Only supersusers can stop the dynamic masking engine.
-+CONTEXT:  PL/pgSQL function anon.stop_dynamic_masking() line 18 at RAISE
-+ROLLBACK TO fail_stop_engine;
-+RESET ROLE;
-+SELECT anon.stop_dynamic_masking();
-+NOTICE:  The previous priviledges of 'mallory_the_masked_user' are not restored. You need to grant them manually.
-+ stop_dynamic_masking 
-+----------------------
-+ t
-+(1 row)
-+
-+SET ROLE mallory_the_masked_user;
-+SELECT COUNT(*)=1 FROM anon.pg_masking_rules;
-+ ?column? 
-+----------
-+ t
-+(1 row)
-+
-+-- SHOULD FAIL
-+SAVEPOINT fail_seclabel_on_role;
-+SECURITY LABEL FOR anon ON ROLE mallory_the_masked_user IS NULL;
-+ERROR:  permission denied
-+DETAIL:  The current user must have the CREATEROLE attribute.
-+ROLLBACK TO fail_seclabel_on_role;
-+ROLLBACK;
-diff --git a/ext-src/pg_anon-src/tests/expected/permissions_owner_1.out b/ext-src/pg_anon-src/tests/expected/permissions_owner_1.out
-new file mode 100644
-index 0000000..8b090fe
--- /dev/null
-+++ b/ext-src/pg_anon-src/tests/expected/permissions_owner_1.out
-@@ -0,0 +1,104 @@
-+BEGIN;
-+CREATE EXTENSION anon CASCADE;
-+NOTICE:  installing required extension "pgcrypto"
-+SELECT anon.init();
-+ init 
-+------
-+ t
-+(1 row)
-+
-+CREATE ROLE oscar_the_owner;
-+ALTER DATABASE :DBNAME OWNER TO oscar_the_owner;
-+CREATE ROLE mallory_the_masked_user;
-+SECURITY LABEL FOR anon ON ROLE mallory_the_masked_user IS 'MASKED';
-+--
-+-- We're checking the owner's permissions
-+--
-+-- see
-+-- https://postgresql-anonymizer.readthedocs.io/en/latest/SECURITY/#permissions
-+--
-+SET ROLE oscar_the_owner;
-+SELECT anon.pseudo_first_name(0) IS NOT NULL;
-+ ?column? 
-+----------
-+ t
-+(1 row)
-+
-+-- SHOULD FAIL
-+DO $$
-+BEGIN
-+  PERFORM anon.init();
-+  EXCEPTION WHEN insufficient_privilege
-+  THEN RAISE NOTICE 'insufficient_privilege';
-+END$$;
-+NOTICE:  insufficient_privilege
-+CREATE TABLE t1(i INT);
-+ALTER TABLE t1 ADD COLUMN t TEXT;
-+SECURITY LABEL FOR anon ON COLUMN t1.t
-+IS 'MASKED WITH VALUE NULL';
-+INSERT INTO t1 VALUES (1,'test');
-+SELECT anon.anonymize_table('t1');
-+ anonymize_table 
-+-----------------
-+ t
-+(1 row)
-+
-+SELECT * FROM t1;
-+ i | t 
-+---+---
-+ 1 | 
-+(1 row)
-+
-+UPDATE t1 SET t='test' WHERE i=1;
-+-- SHOULD FAIL
-+SAVEPOINT fail_start_engine;
-+SELECT anon.start_dynamic_masking();
-+ start_dynamic_masking 
-+-----------------------
-+ t
-+(1 row)
-+
-+ROLLBACK TO fail_start_engine;
-+RESET ROLE;
-+SELECT anon.start_dynamic_masking();
-+ start_dynamic_masking 
-+-----------------------
-+ t
-+(1 row)
-+
-+SET ROLE oscar_the_owner;
-+SELECT * FROM t1;
-+ i |  t   
-+---+------
-+ 1 | test
-+(1 row)
-+
-+--SELECT * FROM mask.t1;
-+-- SHOULD FAIL
-+SAVEPOINT fail_stop_engine;
-+SELECT anon.stop_dynamic_masking();
-+ERROR:  permission denied for schema mask
-+CONTEXT:  SQL statement "DROP VIEW mask.t1;"
-+PL/pgSQL function anon.mask_drop_view(oid) line 3 at EXECUTE
-+SQL statement "SELECT anon.mask_drop_view(oid)
-+  FROM pg_catalog.pg_class
-+  WHERE relnamespace=quote_ident(pg_catalog.current_setting('anon.sourceschema'))::REGNAMESPACE
-+  AND relkind IN ('r','p','f')"
-+PL/pgSQL function anon.stop_dynamic_masking() line 22 at PERFORM
-+ROLLBACK TO fail_stop_engine;
-+RESET ROLE;
-+SELECT anon.stop_dynamic_masking();
-+NOTICE:  The previous priviledges of 'mallory_the_masked_user' are not restored. You need to grant them manually.
-+ stop_dynamic_masking 
-+----------------------
-+ t
-+(1 row)
-+
-+SET ROLE oscar_the_owner;
-+-- SHOULD FAIL
-+SAVEPOINT fail_seclabel_on_role;
-+SECURITY LABEL FOR anon ON ROLE mallory_the_masked_user IS NULL;
-+ERROR:  permission denied
-+DETAIL:  The current user must have the CREATEROLE attribute.
-+ROLLBACK TO fail_seclabel_on_role;
-+ROLLBACK;
--- a/compute/vm-image-spec-bookworm.yaml
+++ b/compute/vm-image-spec-bookworm.yaml
@@ -39,6 +39,13 @@ commands:
    user: nobody
    sysvInitAction: respawn
    shell: '/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499'
+  # Rsyslog by default creates a unix socket under /dev/log . That's where Postgres sends logs also.
+  # We run syslog with postgres user so it can't create /dev/log. Instead we configure rsyslog to
+  # use a different path for the socket. The symlink actually points to our custom path.
+  - name: rsyslogd-socket-symlink
+    user: root
+    sysvInitAction: sysinit
+    shell: "ln -s /var/db/postgres/rsyslogpipe /dev/log"
  - name: rsyslogd
    user: postgres
    sysvInitAction: respawn
@@ -77,6 +84,9 @@ files:
 # compute_ctl will rewrite this file with the actual configuration, if needed.
  - filename: compute_rsyslog.conf
    content: |
+      # Syslock.Name specifies a non-default pipe location that is writeable for the postgres user.
+      module(load="imuxsock" SysSock.Name="/var/db/postgres/rsyslogpipe") # provides support for local system logging
+
      *.*    /dev/null
      $IncludeConfig /etc/rsyslog.d/*.conf
 build: |
@@ -145,7 +155,7 @@ merge: |

  COPY compute_rsyslog.conf /etc/compute_rsyslog.conf
  RUN chmod 0666 /etc/compute_rsyslog.conf
-  RUN chmod 0666 /var/log/
+  RUN mkdir /var/log/rsyslog && chown -R postgres /var/log/rsyslog


  COPY --from=libcgroup-builder /libcgroup-install/bin/*  /usr/bin/
--- a/compute/vm-image-spec-bullseye.yaml
+++ b/compute/vm-image-spec-bullseye.yaml
@@ -39,6 +39,13 @@ commands:
    user: nobody
    sysvInitAction: respawn
    shell: '/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499'
+  # Rsyslog by default creates a unix socket under /dev/log . That's where Postgres sends logs also.
+  # We run syslog with postgres user so it can't create /dev/log. Instead we configure rsyslog to
+  # use a different path for the socket. The symlink actually points to our custom path.
+  - name: rsyslogd-socket-symlink
+    user: root
+    sysvInitAction: sysinit
+    shell: "ln -s /var/db/postgres/rsyslogpipe /dev/log"
  - name: rsyslogd
    user: postgres
    sysvInitAction: respawn
@@ -77,6 +84,9 @@ files:
 # compute_ctl will rewrite this file with the actual configuration, if needed.
  - filename: compute_rsyslog.conf
    content: |
+      # Syslock.Name specifies a non-default pipe location that is writeable for the postgres user.
+      module(load="imuxsock" SysSock.Name="/var/db/postgres/rsyslogpipe") # provides support for local system logging
+
      *.*    /dev/null
      $IncludeConfig /etc/rsyslog.d/*.conf
 build: |
@@ -140,7 +150,7 @@ merge: |

  COPY compute_rsyslog.conf /etc/compute_rsyslog.conf
  RUN chmod 0666 /etc/compute_rsyslog.conf
-  RUN chmod 0666 /var/log/
+  RUN mkdir /var/log/rsyslog && chown -R postgres /var/log/rsyslog


  COPY --from=libcgroup-builder /libcgroup-install/bin/*  /usr/bin/
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -26,6 +26,7 @@ fail.workspace = true
 flate2.workspace = true
 futures.workspace = true
 http.workspace = true
+indexmap.workspace = true
 jsonwebtoken.workspace = true
 metrics.workspace = true
 nix.workspace = true
@@ -34,16 +35,19 @@ num_cpus.workspace = true
 once_cell.workspace = true
 opentelemetry.workspace = true
 opentelemetry_sdk.workspace = true
+p256 = { version = "0.13", features = ["pem"] }
 postgres.workspace = true
 regex.workspace = true
+reqwest = { workspace = true, features = ["json"] }
+ring = "0.17"
 serde.workspace = true
 serde_with.workspace = true
 serde_json.workspace = true
 signal-hook.workspace = true
+spki = { version = "0.7.3", features = ["std"] }
 tar.workspace = true
 tower.workspace = true
 tower-http.workspace = true
-reqwest = { workspace = true, features = ["json"] }
 tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
 tokio-postgres.workspace = true
 tokio-util.workspace = true
@@ -57,6 +61,7 @@ thiserror.workspace = true
 url.workspace = true
 uuid.workspace = true
 walkdir.workspace = true
+x509-cert = { version = "0.2.5" }

 postgres_initdb.workspace = true
 compute_api.workspace = true
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -37,10 +37,14 @@ use crate::logger::startup_context_from_env;
 use crate::lsn_lease::launch_lsn_lease_bg_task_for_static;
 use crate::monitor::launch_monitor;
 use crate::pg_helpers::*;
-use crate::rsyslog::configure_audit_rsyslog;
+use crate::rsyslog::{
+    PostgresLogsRsyslogConfig, configure_audit_rsyslog, configure_postgres_logs_export,
+    launch_pgaudit_gc,
+};
 use crate::spec::*;
 use crate::swap::resize_swap;
 use crate::sync_sk::{check_if_synced, ping_safekeeper};
+use crate::tls::watch_cert_for_changes;
 use crate::{config, extension_server, local_proxy};

 pub static SYNC_SAFEKEEPERS_PID: AtomicU32 = AtomicU32::new(0);
@@ -112,6 +116,7 @@ pub struct ComputeNode {

    // key: ext_archive_name, value: started download time, download_completed?
    pub ext_download_progress: RwLock<HashMap<String, (DateTime<Utc>, bool)>>,
+    pub compute_ctl_config: ComputeCtlConfig,
 }

 // store some metrics about download size that might impact startup time
@@ -135,8 +140,6 @@ pub struct ComputeState {
    /// passed by the control plane with a /configure HTTP request.
    pub pspec: Option<ParsedSpec>,

-    pub compute_ctl_config: ComputeCtlConfig,
-
    /// If the spec is passed by a /configure request, 'startup_span' is the
    /// /configure request's tracing span. The main thread enters it when it
    /// processes the compute startup, so that the compute startup is considered
@@ -160,7 +163,6 @@ impl ComputeState {
            last_active: None,
            error: None,
            pspec: None,
-            compute_ctl_config: ComputeCtlConfig::default(),
            startup_span: None,
            metrics: ComputeMetrics::default(),
        }
@@ -314,7 +316,6 @@ impl ComputeNode {
            let pspec = ParsedSpec::try_from(cli_spec).map_err(|msg| anyhow::anyhow!(msg))?;
            new_state.pspec = Some(pspec);
        }
-        new_state.compute_ctl_config = compute_ctl_config;

        Ok(ComputeNode {
            params,
@@ -323,6 +324,7 @@ impl ComputeNode {
            state: Mutex::new(new_state),
            state_changed: Condvar::new(),
            ext_download_progress: RwLock::new(HashMap::new()),
+            compute_ctl_config,
        })
    }

@@ -345,7 +347,7 @@ impl ComputeNode {
        // requests while configuration is still in progress.
        crate::http::server::Server::External {
            port: this.params.external_http_port,
-            jwks: this.state.lock().unwrap().compute_ctl_config.jwks.clone(),
+            config: this.compute_ctl_config.clone(),
            compute_id: this.params.compute_id.clone(),
        }
        .launch(&this);
@@ -524,6 +526,16 @@ impl ComputeNode {
        // Collect all the tasks that must finish here
        let mut pre_tasks = tokio::task::JoinSet::new();

+        // Make sure TLS certificates are properly loaded and in the right place.
+        if self.compute_ctl_config.tls.is_some() {
+            let this = self.clone();
+            pre_tasks.spawn(async move {
+                this.watch_cert_for_changes().await;
+
+                Ok::<(), anyhow::Error>(())
+            });
+        }
+
        // If there are any remote extensions in shared_preload_libraries, start downloading them
        if pspec.spec.remote_extensions.is_some() {
            let (this, spec) = (self.clone(), pspec.spec.clone());
@@ -579,11 +591,13 @@ impl ComputeNode {
        if let Some(pgbouncer_settings) = &pspec.spec.pgbouncer_settings {
            info!("tuning pgbouncer");

+            let pgbouncer_settings = pgbouncer_settings.clone();
+            let tls_config = self.compute_ctl_config.tls.clone();
+
            // Spawn a background task to do the tuning,
            // so that we don't block the main thread that starts Postgres.
-            let pgbouncer_settings = pgbouncer_settings.clone();
            let _handle = tokio::spawn(async move {
-                let res = tune_pgbouncer(pgbouncer_settings).await;
+                let res = tune_pgbouncer(pgbouncer_settings, tls_config).await;
                if let Err(err) = res {
                    error!("error while tuning pgbouncer: {err:?}");
                    // Continue with the startup anyway
@@ -606,7 +620,7 @@ impl ComputeNode {
            });
        }

-        // Configure and start rsyslog if necessary
+        // Configure and start rsyslog for HIPAA if necessary
        if let ComputeAudit::Hipaa = pspec.spec.audit_log_level {
            let remote_endpoint = std::env::var("AUDIT_LOGGING_ENDPOINT").unwrap_or("".to_string());
            if remote_endpoint.is_empty() {
@@ -614,13 +628,22 @@ impl ComputeNode {
            }

            let log_directory_path = Path::new(&self.params.pgdata).join("log");
-            // TODO: make this more robust
-            // now rsyslog starts once and there is no monitoring or restart if it fails
-            configure_audit_rsyslog(
-                log_directory_path.to_str().unwrap(),
-                "hipaa",
-                &remote_endpoint,
-            )?;
+            let log_directory_path = log_directory_path.to_string_lossy().to_string();
+            configure_audit_rsyslog(log_directory_path.clone(), "hipaa", &remote_endpoint)?;
+
+            // Launch a background task to clean up the audit logs
+            launch_pgaudit_gc(log_directory_path);
+        }
+
+        // Configure and start rsyslog for Postgres logs export
+        if self.has_feature(ComputeFeature::PostgresLogsExport) {
+            if let Some(ref project_id) = pspec.spec.cluster.cluster_id {
+                let host = PostgresLogsRsyslogConfig::default_host(project_id);
+                let conf = PostgresLogsRsyslogConfig::new(Some(&host));
+                configure_postgres_logs_export(conf)?;
+            } else {
+                warn!("not configuring rsyslog for Postgres logs export: project ID is missing")
+            }
        }

        // Launch remaining service threads
@@ -1105,9 +1128,10 @@ impl ComputeNode {
        // Remove/create an empty pgdata directory and put configuration there.
        self.create_pgdata()?;
        config::write_postgres_conf(
-            &pgdata_path.join("postgresql.conf"),
+            pgdata_path,
            &pspec.spec,
            self.params.internal_http_port,
+            &self.compute_ctl_config.tls,
        )?;

        // Syncing safekeepers is only safe with primary nodes: if a primary
@@ -1489,11 +1513,13 @@ impl ComputeNode {
        if let Some(ref pgbouncer_settings) = spec.pgbouncer_settings {
            info!("tuning pgbouncer");

+            let pgbouncer_settings = pgbouncer_settings.clone();
+            let tls_config = self.compute_ctl_config.tls.clone();
+
            // Spawn a background task to do the tuning,
            // so that we don't block the main thread that starts Postgres.
-            let pgbouncer_settings = pgbouncer_settings.clone();
            tokio::spawn(async move {
-                let res = tune_pgbouncer(pgbouncer_settings).await;
+                let res = tune_pgbouncer(pgbouncer_settings, tls_config).await;
                if let Err(err) = res {
                    error!("error while tuning pgbouncer: {err:?}");
                }
@@ -1505,7 +1531,8 @@ impl ComputeNode {

            // Spawn a background task to do the configuration,
            // so that we don't block the main thread that starts Postgres.
-            let local_proxy = local_proxy.clone();
+            let mut local_proxy = local_proxy.clone();
+            local_proxy.tls = self.compute_ctl_config.tls.clone();
            tokio::spawn(async move {
                if let Err(err) = local_proxy::configure(&local_proxy) {
                    error!("error while configuring local_proxy: {err:?}");
@@ -1515,8 +1542,12 @@ impl ComputeNode {

        // Write new config
        let pgdata_path = Path::new(&self.params.pgdata);
-        let postgresql_conf_path = pgdata_path.join("postgresql.conf");
-        config::write_postgres_conf(&postgresql_conf_path, &spec, self.params.internal_http_port)?;
+        config::write_postgres_conf(
+            pgdata_path,
+            &spec,
+            self.params.internal_http_port,
+            &self.compute_ctl_config.tls,
+        )?;

        if !spec.skip_pg_catalog_updates {
            let max_concurrent_connections = spec.reconfigure_concurrency;
@@ -1587,6 +1618,56 @@ impl ComputeNode {
        Ok(())
    }

+    pub async fn watch_cert_for_changes(self: Arc<Self>) {
+        // update status on cert renewal
+        if let Some(tls_config) = &self.compute_ctl_config.tls {
+            let tls_config = tls_config.clone();
+
+            // wait until the cert exists.
+            let mut cert_watch = watch_cert_for_changes(tls_config.cert_path.clone()).await;
+
+            tokio::task::spawn_blocking(move || {
+                let handle = tokio::runtime::Handle::current();
+                'cert_update: loop {
+                    // let postgres/pgbouncer/local_proxy know the new cert/key exists.
+                    // we need to wait until it's configurable first.
+
+                    let mut state = self.state.lock().unwrap();
+                    'status_update: loop {
+                        match state.status {
+                            // let's update the state to config pending
+                            ComputeStatus::ConfigurationPending | ComputeStatus::Running => {
+                                state.set_status(
+                                    ComputeStatus::ConfigurationPending,
+                                    &self.state_changed,
+                                );
+                                break 'status_update;
+                            }
+
+                            // exit loop
+                            ComputeStatus::Failed
+                            | ComputeStatus::TerminationPending
+                            | ComputeStatus::Terminated => break 'cert_update,
+
+                            // wait
+                            ComputeStatus::Init
+                            | ComputeStatus::Configuration
+                            | ComputeStatus::Empty => {
+                                state = self.state_changed.wait(state).unwrap();
+                            }
+                        }
+                    }
+                    drop(state);
+
+                    // wait for a new certificate update
+                    if handle.block_on(cert_watch.changed()).is_err() {
+                        break;
+                    }
+                }
+            });
+        }
+    }
+
    /// Update the `last_active` in the shared state, but ensure that it's a more recent one.
    pub fn update_last_active(&self, last_active: Option<DateTime<Utc>>) {
        let mut state = self.state.lock().unwrap();
--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -6,11 +6,13 @@ use std::io::Write;
 use std::io::prelude::*;
 use std::path::Path;

-use compute_api::spec::{ComputeAudit, ComputeMode, ComputeSpec, GenericOption};
+use compute_api::responses::TlsConfig;
+use compute_api::spec::{ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, GenericOption};

 use crate::pg_helpers::{
    GenericOptionExt, GenericOptionsSearch, PgOptionsSerialize, escape_conf_value,
 };
+use crate::tls::{self, SERVER_CRT, SERVER_KEY};

 /// Check that `line` is inside a text file and put it there if it is not.
 /// Create file if it doesn't exist.
@@ -38,10 +40,12 @@ pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {

 /// Create or completely rewrite configuration file specified by `path`
 pub fn write_postgres_conf(
-    path: &Path,
+    pgdata_path: &Path,
    spec: &ComputeSpec,
    extension_server_port: u16,
+    tls_config: &Option<TlsConfig>,
 ) -> Result<()> {
+    let path = pgdata_path.join("postgresql.conf");
    // File::create() destroys the file content if it exists.
    let mut file = File::create(path)?;

@@ -86,6 +90,20 @@ pub fn write_postgres_conf(
        )?;
    }

+    // tls
+    if let Some(tls_config) = tls_config {
+        writeln!(file, "ssl = on")?;
+
+        // postgres requires the keyfile to be in a secure file,
+        // currently too complicated to ensure that at the VM level,
+        // so we just copy them to another file instead. :shrug:
+        tls::update_key_path_blocking(pgdata_path, tls_config);
+
+        // these are the default, but good to be explicit.
+        writeln!(file, "ssl_cert_file = '{}'", SERVER_CRT)?;
+        writeln!(file, "ssl_key_file = '{}'", SERVER_KEY)?;
+    }
+
    // Locales
    if cfg!(target_os = "macos") {
        writeln!(file, "lc_messages='C'")?;
@@ -149,7 +167,8 @@ pub fn write_postgres_conf(
        writeln!(file, "# Managed by compute_ctl audit settings: begin")?;
        // This log level is very verbose
        // but this is necessary for HIPAA compliance.
-        writeln!(file, "pgaudit.log='all'")?;
+        // Exclude 'misc' category, because it doesn't contain anythig relevant.
+        writeln!(file, "pgaudit.log='all, -misc'")?;
        writeln!(file, "pgaudit.log_parameter=on")?;
        // Disable logging of catalog queries
        // The catalog doesn't contain sensitive data, so we don't need to audit it.
@@ -197,6 +216,12 @@ pub fn write_postgres_conf(
        writeln!(file, "neon.disable_logical_replication_subscribers=false")?;
    }

+    // We need Postgres to send logs to rsyslog so that we can forward them
+    // further to customers' log aggregation systems.
+    if spec.features.contains(&ComputeFeature::PostgresLogsExport) {
+        writeln!(file, "log_destination='stderr,syslog'")?;
+    }
+
    // This is essential to keep this line at the end of the file,
    // because it is intended to override any settings above.
    writeln!(file, "include_if_exists = 'compute_ctl_temp_override.conf'")?;
--- a/compute_tools/src/config_template/compute_audit_rsyslog_template.conf
+++ b/compute_tools/src/config_template/compute_audit_rsyslog_template.conf
@@ -4,7 +4,8 @@ module(load="imfile")
 # Input configuration for log files in the specified directory
 # Replace {log_directory} with the directory containing the log files
 input(type="imfile" File="{log_directory}/*.log" Tag="{tag}" Severity="info" Facility="local0")
-global(workDirectory="/var/log")
+# the directory to store rsyslog state files
+global(workDirectory="/var/log/rsyslog")

 # Forward logs to remote syslog server
-*.* @@{remote_endpoint}
+*.* @@{remote_endpoint}
--- a/compute_tools/src/config_template/compute_rsyslog_postgres_export_template.conf
+++ b/compute_tools/src/config_template/compute_rsyslog_postgres_export_template.conf
@@ -0,0 +1,10 @@
+# Program name comes from postgres' syslog_facility configuration: https://www.postgresql.org/docs/current/runtime-config-logging.html#GUC-SYSLOG-IDENT
+# Default value is 'postgres'.
+if $programname == 'postgres' then {{
+    # Forward Postgres logs to telemetry otel collector
+    action(type="omfwd" target="{logs_export_target}" port="{logs_export_port}" protocol="tcp"
+           template="RSYSLOG_SyslogProtocol23Format"
+           action.resumeRetryCount="3"
+           queue.type="linkedList" queue.size="1000")
+    stop
+}}
--- a/compute_tools/src/http/openapi_spec.yaml
+++ b/compute_tools/src/http/openapi_spec.yaml
@@ -306,6 +306,36 @@ paths:
              schema:
                $ref: "#/components/schemas/GenericError"

+  /configure_telemetry:
+    post:
+      tags:
+        - Configure
+      summary: Configure rsyslog
+      description: |
+        This API endpoint configures rsyslog to forward Postgres logs
+        to a specified otel collector.
+      operationId: configureTelemetry
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                logs_export_host:
+                  type: string
+                  description: |
+                    Hostname and the port of the otel collector. Leave empty to disable logs forwarding.
+                    Example: config-shy-breeze-123-collector-monitoring.neon-telemetry.svc.cluster.local:54526
+      responses:
+        204:
+          description: "Telemetry configured successfully"
+        500:
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/GenericError"
+
 components:
  securitySchemes:
    JWT:
--- a/compute_tools/src/http/routes/configure.rs
+++ b/compute_tools/src/http/routes/configure.rs
@@ -1,9 +1,11 @@
 use std::sync::Arc;

+use axum::body::Body;
 use axum::extract::State;
 use axum::response::Response;
-use compute_api::requests::ConfigurationRequest;
+use compute_api::requests::{ConfigurationRequest, ConfigureTelemetryRequest};
 use compute_api::responses::{ComputeStatus, ComputeStatusResponse};
+use compute_api::spec::ComputeFeature;
 use http::StatusCode;
 use tokio::task;
 use tracing::info;
@@ -11,6 +13,7 @@ use tracing::info;
 use crate::compute::{ComputeNode, ParsedSpec};
 use crate::http::JsonResponse;
 use crate::http::extract::Json;
+use crate::rsyslog::{PostgresLogsRsyslogConfig, configure_postgres_logs_export};

 // Accept spec in JSON format and request compute configuration. If anything
 // goes wrong after we set the compute status to `ConfigurationPending` and
@@ -92,3 +95,25 @@ pub(in crate::http) async fn configure(

    JsonResponse::success(StatusCode::OK, body)
 }
+
+pub(in crate::http) async fn configure_telemetry(
+    State(compute): State<Arc<ComputeNode>>,
+    request: Json<ConfigureTelemetryRequest>,
+) -> Response {
+    if !compute.has_feature(ComputeFeature::PostgresLogsExport) {
+        return JsonResponse::error(
+            StatusCode::PRECONDITION_FAILED,
+            "Postgres logs export feature is not enabled".to_string(),
+        );
+    }
+
+    let conf = PostgresLogsRsyslogConfig::new(request.logs_export_host.as_deref());
+    if let Err(err) = configure_postgres_logs_export(conf) {
+        return JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, err.to_string());
+    }
+
+    Response::builder()
+        .status(StatusCode::NO_CONTENT)
+        .body(Body::from(""))
+        .unwrap()
+}
--- a/compute_tools/src/http/server.rs
+++ b/compute_tools/src/http/server.rs
@@ -8,8 +8,8 @@ use axum::Router;
 use axum::middleware::{self};
 use axum::response::IntoResponse;
 use axum::routing::{get, post};
+use compute_api::responses::ComputeCtlConfig;
 use http::StatusCode;
-use jsonwebtoken::jwk::JwkSet;
 use tokio::net::TcpListener;
 use tower::ServiceBuilder;
 use tower_http::{
@@ -41,7 +41,7 @@ pub enum Server {
    },
    External {
        port: u16,
-        jwks: JwkSet,
+        config: ComputeCtlConfig,
        compute_id: String,
    },
 }
@@ -79,7 +79,7 @@ impl From<&Server> for Router<Arc<ComputeNode>> {
                router
            }
            Server::External {
-                jwks, compute_id, ..
+                config, compute_id, ..
            } => {
                let unauthenticated_router =
                    Router::<Arc<ComputeNode>>::new().route("/metrics", get(metrics::get_metrics));
@@ -87,6 +87,7 @@ impl From<&Server> for Router<Arc<ComputeNode>> {
                let authenticated_router = Router::<Arc<ComputeNode>>::new()
                    .route("/check_writability", post(check_writability::is_writable))
                    .route("/configure", post(configure::configure))
+                    .route("/configure_telemetry", post(configure::configure_telemetry))
                    .route("/database_schema", get(database_schema::get_schema_dump))
                    .route("/dbs_and_roles", get(dbs_and_roles::get_catalog_objects))
                    .route("/insights", get(insights::get_insights))
@@ -95,7 +96,7 @@ impl From<&Server> for Router<Arc<ComputeNode>> {
                    .route("/terminate", post(terminate::terminate))
                    .layer(AsyncRequireAuthorizationLayer::new(Authorize::new(
                        compute_id.clone(),
-                        jwks.clone(),
+                        config.jwks.clone(),
                    )));

                router
--- a/compute_tools/src/lib.rs
+++ b/compute_tools/src/lib.rs
@@ -26,3 +26,4 @@ pub mod spec;
 mod spec_apply;
 pub mod swap;
 pub mod sync_sk;
+pub mod tls;
--- a/compute_tools/src/metrics.rs
+++ b/compute_tools/src/metrics.rs
@@ -1,6 +1,8 @@
-use metrics::core::Collector;
+use metrics::core::{AtomicF64, Collector, GenericGauge};
 use metrics::proto::MetricFamily;
-use metrics::{IntCounterVec, UIntGaugeVec, register_int_counter_vec, register_uint_gauge_vec};
+use metrics::{
+    IntCounterVec, UIntGaugeVec, register_gauge, register_int_counter_vec, register_uint_gauge_vec,
+};
 use once_cell::sync::Lazy;

 pub(crate) static INSTALLED_EXTENSIONS: Lazy<UIntGaugeVec> = Lazy::new(|| {
@@ -59,10 +61,20 @@ pub(crate) static REMOTE_EXT_REQUESTS_TOTAL: Lazy<IntCounterVec> = Lazy::new(||
    .expect("failed to define a metric")
 });

+// Size of audit log directory in bytes
+pub(crate) static AUDIT_LOG_DIR_SIZE: Lazy<GenericGauge<AtomicF64>> = Lazy::new(|| {
+    register_gauge!(
+        "compute_audit_log_dir_size",
+        "Size of audit log directory in bytes",
+    )
+    .expect("failed to define a metric")
+});
+
 pub fn collect() -> Vec<MetricFamily> {
    let mut metrics = INSTALLED_EXTENSIONS.collect();
    metrics.extend(CPLANE_REQUESTS_TOTAL.collect());
    metrics.extend(REMOTE_EXT_REQUESTS_TOTAL.collect());
    metrics.extend(DB_MIGRATION_FAILED.collect());
+    metrics.extend(AUDIT_LOG_DIR_SIZE.collect());
    metrics
 }
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -10,8 +10,10 @@ use std::str::FromStr;
 use std::time::{Duration, Instant};

 use anyhow::{Result, bail};
+use compute_api::responses::TlsConfig;
 use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};
 use futures::StreamExt;
+use indexmap::IndexMap;
 use ini::Ini;
 use notify::{RecursiveMode, Watcher};
 use postgres::config::Config;
@@ -206,8 +208,8 @@ impl Escaping for PgIdent {
    /// Here we somewhat mimic the logic of Postgres' `pg_get_functiondef()`,
    /// <https://github.com/postgres/postgres/blob/8b49392b270b4ac0b9f5c210e2a503546841e832/src/backend/utils/adt/ruleutils.c#L2924>
    fn pg_quote_dollar(&self) -> (String, String) {
-        let mut tag: String = "".to_string();
-        let mut outer_tag = "x".to_string();
+        let mut tag: String = "x".to_string();
+        let mut outer_tag = "xx".to_string();

        // Find the first suitable tag that is not present in the string.
        // Postgres' max role/DB name length is 63 bytes, so even in the
@@ -406,7 +408,7 @@ pub fn create_pgdata(pgdata: &str) -> Result<()> {

 /// Update pgbouncer.ini with provided options
 fn update_pgbouncer_ini(
-    pgbouncer_config: HashMap<String, String>,
+    pgbouncer_config: IndexMap<String, String>,
    pgbouncer_ini_path: &str,
 ) -> Result<()> {
    let mut conf = Ini::load_from_file(pgbouncer_ini_path)?;
@@ -427,7 +429,10 @@ fn update_pgbouncer_ini(
 /// Tune pgbouncer.
 /// 1. Apply new config using pgbouncer admin console
 /// 2. Add new values to pgbouncer.ini to preserve them after restart
-pub async fn tune_pgbouncer(pgbouncer_config: HashMap<String, String>) -> Result<()> {
+pub async fn tune_pgbouncer(
+    mut pgbouncer_config: IndexMap<String, String>,
+    tls_config: Option<TlsConfig>,
+) -> Result<()> {
    let pgbouncer_connstr = if std::env::var_os("AUTOSCALING").is_some() {
        // for VMs use pgbouncer specific way to connect to
        // pgbouncer admin console without password
@@ -473,19 +478,21 @@ pub async fn tune_pgbouncer(pgbouncer_config: HashMap<String, String>) -> Result
        }
    };

-    // Apply new config
-    for (option_name, value) in pgbouncer_config.iter() {
-        let query = format!("SET {}={}", option_name, value);
-        // keep this log line for debugging purposes
-        info!("Applying pgbouncer setting change: {}", query);
+    if let Some(tls_config) = tls_config {
+        // pgbouncer starts in a half-ok state if it cannot find these files.
+        // It will default to client_tls_sslmode=deny, which causes proxy to error.
+        // There is a small window at startup where these files don't yet exist in the VM.
+        // Best to wait until it exists.
+        loop {
+            if let Ok(true) = tokio::fs::try_exists(&tls_config.key_path).await {
+                break;
+            }
+            tokio::time::sleep(Duration::from_millis(500)).await
+        }

-        if let Err(err) = client.simple_query(&query).await {
-            // Don't fail on error, just print it into log
-            error!(
-                "Failed to apply pgbouncer setting change: {},  {}",
-                query, err
-            );
-        };
+        pgbouncer_config.insert("client_tls_cert_file".to_string(), tls_config.cert_path);
+        pgbouncer_config.insert("client_tls_key_file".to_string(), tls_config.key_path);
+        pgbouncer_config.insert("client_tls_sslmode".to_string(), "allow".to_string());
    }

    // save values to pgbouncer.ini
@@ -501,6 +508,13 @@ pub async fn tune_pgbouncer(pgbouncer_config: HashMap<String, String>) -> Result
    };
    update_pgbouncer_ini(pgbouncer_config, &pgbouncer_ini_path)?;

+    info!("Applying pgbouncer setting change");
+
+    if let Err(err) = client.simple_query("RELOAD").await {
+        // Don't fail on error, just print it into log
+        error!("Failed to apply pgbouncer setting change,  {err}",);
+    };
+
    Ok(())
 }

--- a/compute_tools/src/rsyslog.rs
+++ b/compute_tools/src/rsyslog.rs
@@ -1,8 +1,14 @@
+use std::fs;
+use std::io::ErrorKind;
+use std::path::Path;
 use std::process::Command;
+use std::time::Duration;
 use std::{fs::OpenOptions, io::Write};

-use anyhow::{Context, Result};
-use tracing::info;
+use anyhow::{Context, Result, anyhow};
+use tracing::{error, info, instrument, warn};
+
+const POSTGRES_LOGS_CONF_PATH: &str = "/etc/rsyslog.d/postgres_logs.conf";

 fn get_rsyslog_pid() -> Option<String> {
    let output = Command::new("pgrep")
@@ -43,7 +49,7 @@ fn restart_rsyslog() -> Result<()> {
 }

 pub fn configure_audit_rsyslog(
-    log_directory: &str,
+    log_directory: String,
    tag: &str,
    remote_endpoint: &str,
 ) -> Result<()> {
@@ -75,3 +81,196 @@ pub fn configure_audit_rsyslog(

    Ok(())
 }
+
+/// Configuration for enabling Postgres logs forwarding from rsyslogd
+pub struct PostgresLogsRsyslogConfig<'a> {
+    pub host: Option<&'a str>,
+}
+
+impl<'a> PostgresLogsRsyslogConfig<'a> {
+    pub fn new(host: Option<&'a str>) -> Self {
+        Self { host }
+    }
+
+    pub fn build(&self) -> Result<String> {
+        match self.host {
+            Some(host) => {
+                if let Some((target, port)) = host.split_once(":") {
+                    Ok(format!(
+                        include_str!(
+                            "config_template/compute_rsyslog_postgres_export_template.conf"
+                        ),
+                        logs_export_target = target,
+                        logs_export_port = port,
+                    ))
+                } else {
+                    Err(anyhow!("Invalid host format for Postgres logs export"))
+                }
+            }
+            None => Ok("".to_string()),
+        }
+    }
+
+    fn current_config() -> Result<String> {
+        let config_content = match std::fs::read_to_string(POSTGRES_LOGS_CONF_PATH) {
+            Ok(c) => c,
+            Err(err) if err.kind() == ErrorKind::NotFound => String::new(),
+            Err(err) => return Err(err.into()),
+        };
+        Ok(config_content)
+    }
+
+    /// Returns the default host for otel collector that receives Postgres logs
+    pub fn default_host(project_id: &str) -> String {
+        format!(
+            "config-{}-collector.neon-telemetry.svc.cluster.local:10514",
+            project_id
+        )
+    }
+}
+
+pub fn configure_postgres_logs_export(conf: PostgresLogsRsyslogConfig) -> Result<()> {
+    let new_config = conf.build()?;
+    let current_config = PostgresLogsRsyslogConfig::current_config()?;
+
+    if new_config == current_config {
+        info!("postgres logs rsyslog configuration is up-to-date");
+        return Ok(());
+    }
+
+    // When new config is empty we can simply remove the configuration file.
+    if new_config.is_empty() {
+        info!("removing rsyslog config file: {}", POSTGRES_LOGS_CONF_PATH);
+        match std::fs::remove_file(POSTGRES_LOGS_CONF_PATH) {
+            Ok(_) => {}
+            Err(err) if err.kind() == ErrorKind::NotFound => {}
+            Err(err) => return Err(err.into()),
+        }
+        restart_rsyslog()?;
+        return Ok(());
+    }
+
+    info!(
+        "configuring rsyslog for postgres logs export to: {:?}",
+        conf.host
+    );
+
+    let mut file = OpenOptions::new()
+        .create(true)
+        .write(true)
+        .truncate(true)
+        .open(POSTGRES_LOGS_CONF_PATH)?;
+    file.write_all(new_config.as_bytes())?;
+
+    info!(
+        "rsyslog configuration file {} added successfully. Starting rsyslogd",
+        POSTGRES_LOGS_CONF_PATH
+    );
+
+    restart_rsyslog()?;
+    Ok(())
+}
+
+#[instrument(skip_all)]
+async fn pgaudit_gc_main_loop(log_directory: String) -> Result<()> {
+    info!("running pgaudit GC main loop");
+    loop {
+        // Check log_directory for old pgaudit logs and delete them.
+        // New log files are checked every 5 minutes, as set in pgaudit.log_rotation_age
+        // Find files that were not modified in the last 15 minutes and delete them.
+        // This should be enough time for rsyslog to process the logs and for us to catch the alerts.
+        //
+        // In case of a very high load, we might need to adjust this value and pgaudit.log_rotation_age.
+        //
+        // TODO: add some smarter logic to delete the files that are fully streamed according to rsyslog
+        // imfile-state files, but for now just do a simple GC to avoid filling up the disk.
+        let _ = Command::new("find")
+            .arg(&log_directory)
+            .arg("-name")
+            .arg("audit*.log")
+            .arg("-mmin")
+            .arg("+15")
+            .arg("-delete")
+            .output()?;
+
+        // also collect the metric for the size of the log directory
+        async fn get_log_files_size(path: &Path) -> Result<u64> {
+            let mut total_size = 0;
+
+            for entry in fs::read_dir(path)? {
+                let entry = entry?;
+                let entry_path = entry.path();
+
+                if entry_path.is_file() && entry_path.to_string_lossy().ends_with("log") {
+                    total_size += entry.metadata()?.len();
+                }
+            }
+
+            Ok(total_size)
+        }
+
+        let log_directory_size = get_log_files_size(Path::new(&log_directory))
+            .await
+            .unwrap_or_else(|e| {
+                warn!("Failed to get log directory size: {}", e);
+                0
+            });
+        crate::metrics::AUDIT_LOG_DIR_SIZE.set(log_directory_size as f64);
+        tokio::time::sleep(Duration::from_secs(60)).await;
+    }
+}
+
+// launch pgaudit GC thread to clean up the old pgaudit logs stored in the log_directory
+pub fn launch_pgaudit_gc(log_directory: String) {
+    tokio::spawn(async move {
+        if let Err(e) = pgaudit_gc_main_loop(log_directory).await {
+            error!("pgaudit GC main loop failed: {}", e);
+        }
+    });
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::rsyslog::PostgresLogsRsyslogConfig;
+
+    #[test]
+    fn test_postgres_logs_config() {
+        {
+            // Verify empty config
+            let conf = PostgresLogsRsyslogConfig::new(None);
+            let res = conf.build();
+            assert!(res.is_ok());
+            let conf_str = res.unwrap();
+            assert_eq!(&conf_str, "");
+        }
+
+        {
+            // Verify config
+            let conf = PostgresLogsRsyslogConfig::new(Some("collector.cvc.local:514"));
+            let res = conf.build();
+            assert!(res.is_ok());
+            let conf_str = res.unwrap();
+            assert!(conf_str.contains("omfwd"));
+            assert!(conf_str.contains(r#"target="collector.cvc.local""#));
+            assert!(conf_str.contains(r#"port="514""#));
+        }
+
+        {
+            // Verify invalid config
+            let conf = PostgresLogsRsyslogConfig::new(Some("invalid"));
+            let res = conf.build();
+            assert!(res.is_err());
+        }
+
+        {
+            // Verify config with default host
+            let host = PostgresLogsRsyslogConfig::default_host("shy-breeze-123");
+            let conf = PostgresLogsRsyslogConfig::new(Some(&host));
+            let res = conf.build();
+            assert!(res.is_ok());
+            let conf_str = res.unwrap();
+            assert!(conf_str.contains(r#"shy-breeze-123"#));
+            assert!(conf_str.contains(r#"port="10514""#));
+        }
+    }
+}
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -8,13 +8,12 @@ use compute_api::responses::{
 use compute_api::spec::ComputeSpec;
 use reqwest::StatusCode;
 use tokio_postgres::Client;
-use tracing::{error, info, instrument, warn};
+use tracing::{error, info, instrument};

 use crate::config;
 use crate::metrics::{CPLANE_REQUESTS_TOTAL, CPlaneRequestRPC, UNKNOWN_HTTP_STATUS};
 use crate::migration::MigrationRunner;
 use crate::params::PG_HBA_ALL_MD5;
-use crate::pg_helpers::*;

 // Do control plane request and return response if any. In case of error it
 // returns a bool flag indicating whether it makes sense to retry the request
@@ -212,122 +211,3 @@ pub async fn handle_migrations(client: &mut Client) -> Result<()> {

    Ok(())
 }
-
-/// Connect to the database as superuser and pre-create anon extension
-/// if it is present in shared_preload_libraries
-#[instrument(skip_all)]
-pub async fn handle_extension_anon(
-    spec: &ComputeSpec,
-    db_owner: &str,
-    db_client: &mut Client,
-    grants_only: bool,
-) -> Result<()> {
-    info!("handle extension anon");
-
-    if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") {
-        if libs.contains("anon") {
-            if !grants_only {
-                // check if extension is already initialized using anon.is_initialized()
-                let query = "SELECT anon.is_initialized()";
-                match db_client.query(query, &[]).await {
-                    Ok(rows) => {
-                        if !rows.is_empty() {
-                            let is_initialized: bool = rows[0].get(0);
-                            if is_initialized {
-                                info!("anon extension is already initialized");
-                                return Ok(());
-                            }
-                        }
-                    }
-                    Err(e) => {
-                        warn!(
-                            "anon extension is_installed check failed with expected error: {}",
-                            e
-                        );
-                    }
-                };
-
-                // Create anon extension if this compute needs it
-                // Users cannot create it themselves, because superuser is required.
-                let mut query = "CREATE EXTENSION IF NOT EXISTS anon CASCADE";
-                info!("creating anon extension with query: {}", query);
-                match db_client.query(query, &[]).await {
-                    Ok(_) => {}
-                    Err(e) => {
-                        error!("anon extension creation failed with error: {}", e);
-                        return Ok(());
-                    }
-                }
-
-                // check that extension is installed
-                query = "SELECT extname FROM pg_extension WHERE extname = 'anon'";
-                let rows = db_client.query(query, &[]).await?;
-                if rows.is_empty() {
-                    error!("anon extension is not installed");
-                    return Ok(());
-                }
-
-                // Initialize anon extension
-                // This also requires superuser privileges, so users cannot do it themselves.
-                query = "SELECT anon.init()";
-                match db_client.query(query, &[]).await {
-                    Ok(_) => {}
-                    Err(e) => {
-                        error!("anon.init() failed with error: {}", e);
-                        return Ok(());
-                    }
-                }
-            }
-
-            // check that extension is installed, if not bail early
-            let query = "SELECT extname FROM pg_extension WHERE extname = 'anon'";
-            match db_client.query(query, &[]).await {
-                Ok(rows) => {
-                    if rows.is_empty() {
-                        error!("anon extension is not installed");
-                        return Ok(());
-                    }
-                }
-                Err(e) => {
-                    error!("anon extension check failed with error: {}", e);
-                    return Ok(());
-                }
-            };
-
-            let query = format!("GRANT ALL ON SCHEMA anon TO {}", db_owner);
-            info!("granting anon extension permissions with query: {}", query);
-            db_client.simple_query(&query).await?;
-
-            // Grant permissions to db_owner to use anon extension functions
-            let query = format!("GRANT ALL ON ALL FUNCTIONS IN SCHEMA anon TO {}", db_owner);
-            info!("granting anon extension permissions with query: {}", query);
-            db_client.simple_query(&query).await?;
-
-            // This is needed, because some functions are defined as SECURITY DEFINER.
-            // In Postgres SECURITY DEFINER functions are executed with the privileges
-            // of the owner.
-            // In anon extension this it is needed to access some GUCs, which are only accessible to
-            // superuser. But we've patched postgres to allow db_owner to access them as well.
-            // So we need to change owner of these functions to db_owner.
-            let query = format!("
-                SELECT 'ALTER FUNCTION '||nsp.nspname||'.'||p.proname||'('||pg_get_function_identity_arguments(p.oid)||') OWNER TO {};'
-                from pg_proc p
-                join pg_namespace nsp ON p.pronamespace = nsp.oid
-                where nsp.nspname = 'anon';", db_owner);
-
-            info!("change anon extension functions owner to db owner");
-            db_client.simple_query(&query).await?;
-
-            //  affects views as well
-            let query = format!("GRANT ALL ON ALL TABLES IN SCHEMA anon TO {}", db_owner);
-            info!("granting anon extension permissions with query: {}", query);
-            db_client.simple_query(&query).await?;
-
-            let query = format!("GRANT ALL ON ALL SEQUENCES IN SCHEMA anon TO {}", db_owner);
-            info!("granting anon extension permissions with query: {}", query);
-            db_client.simple_query(&query).await?;
-        }
-    }
-
-    Ok(())
-}
--- a/compute_tools/src/spec_apply.rs
+++ b/compute_tools/src/spec_apply.rs
@@ -6,7 +6,7 @@ use std::sync::Arc;

 use anyhow::{Context, Result};
 use compute_api::responses::ComputeStatus;
-use compute_api::spec::{ComputeAudit, ComputeFeature, ComputeSpec, Database, PgIdent, Role};
+use compute_api::spec::{ComputeAudit, ComputeSpec, Database, PgIdent, Role};
 use futures::future::join_all;
 use tokio::sync::RwLock;
 use tokio_postgres::Client;
@@ -26,7 +26,7 @@ use crate::spec_apply::ApplySpecPhase::{
    RunInEachDatabase,
 };
 use crate::spec_apply::PerDatabasePhase::{
-    ChangeSchemaPerms, DeleteDBRoleReferences, DropLogicalSubscriptions, HandleAnonExtension,
+    ChangeSchemaPerms, DeleteDBRoleReferences, DropLogicalSubscriptions,
 };

 impl ComputeNode {
@@ -238,7 +238,6 @@ impl ComputeNode {
                    let mut phases = vec![
                        DeleteDBRoleReferences,
                        ChangeSchemaPerms,
-                        HandleAnonExtension,
                    ];

                    if spec.drop_subscriptions_before_start && !drop_subscriptions_done {
@@ -458,7 +457,6 @@ impl Debug for DB {
 pub enum PerDatabasePhase {
    DeleteDBRoleReferences,
    ChangeSchemaPerms,
-    HandleAnonExtension,
    /// This is a shared phase, used for both i) dropping dangling LR subscriptions
    /// before dropping the DB, and ii) dropping all subscriptions after creating
    /// a fresh branch.
@@ -1012,98 +1010,6 @@ async fn get_operations<'a>(
                    ]
                    .into_iter();

-                    Ok(Box::new(operations))
-                }
-                // TODO: remove this completely https://github.com/neondatabase/cloud/issues/22663
-                PerDatabasePhase::HandleAnonExtension => {
-                    // Only install Anon into user databases
-                    let db = match &db {
-                        DB::SystemDB => return Ok(Box::new(empty())),
-                        DB::UserDB(db) => db,
-                    };
-                    // Never install Anon when it's not enabled as feature
-                    if !spec.features.contains(&ComputeFeature::AnonExtension) {
-                        return Ok(Box::new(empty()));
-                    }
-
-                    // Only install Anon when it's added in preload libraries
-                    let opt_libs = spec.cluster.settings.find("shared_preload_libraries");
-
-                    let libs = match opt_libs {
-                        Some(libs) => libs,
-                        None => return Ok(Box::new(empty())),
-                    };
-
-                    if !libs.contains("anon") {
-                        return Ok(Box::new(empty()));
-                    }
-
-                    let db_owner = db.owner.pg_quote();
-
-                    let operations = vec![
-                        // Create anon extension if this compute needs it
-                        // Users cannot create it themselves, because superuser is required.
-                        Operation {
-                            query: String::from("CREATE EXTENSION IF NOT EXISTS anon CASCADE"),
-                            comment: Some(String::from("creating anon extension")),
-                        },
-                        // Initialize anon extension
-                        // This also requires superuser privileges, so users cannot do it themselves.
-                        Operation {
-                            query: String::from("SELECT anon.init()"),
-                            comment: Some(String::from("initializing anon extension data")),
-                        },
-                        Operation {
-                            query: format!("GRANT ALL ON SCHEMA anon TO {}", db_owner),
-                            comment: Some(String::from(
-                                "granting anon extension schema permissions",
-                            )),
-                        },
-                        Operation {
-                            query: format!(
-                                "GRANT ALL ON ALL FUNCTIONS IN SCHEMA anon TO {}",
-                                db_owner
-                            ),
-                            comment: Some(String::from(
-                                "granting anon extension schema functions permissions",
-                            )),
-                        },
-                        // We need this, because some functions are defined as SECURITY DEFINER.
-                        // In Postgres SECURITY DEFINER functions are executed with the privileges
-                        // of the owner.
-                        // In anon extension this it is needed to access some GUCs, which are only accessible to
-                        // superuser. But we've patched postgres to allow db_owner to access them as well.
-                        // So we need to change owner of these functions to db_owner.
-                        Operation {
-                            query: format!(
-                                include_str!("sql/anon_ext_fn_reassign.sql"),
-                                db_owner = db_owner,
-                            ),
-                            comment: Some(String::from(
-                                "change anon extension functions owner to database_owner",
-                            )),
-                        },
-                        Operation {
-                            query: format!(
-                                "GRANT ALL ON ALL TABLES IN SCHEMA anon TO {}",
-                                db_owner,
-                            ),
-                            comment: Some(String::from(
-                                "granting anon extension tables permissions",
-                            )),
-                        },
-                        Operation {
-                            query: format!(
-                                "GRANT ALL ON ALL SEQUENCES IN SCHEMA anon TO {}",
-                                db_owner,
-                            ),
-                            comment: Some(String::from(
-                                "granting anon extension sequences permissions",
-                            )),
-                        },
-                    ]
-                    .into_iter();
-
                    Ok(Box::new(operations))
                }
            }
--- a/compute_tools/src/tls.rs
+++ b/compute_tools/src/tls.rs
@@ -0,0 +1,118 @@
+use std::{io::Write, os::unix::fs::OpenOptionsExt, path::Path, time::Duration};
+
+use anyhow::{Context, Result, bail};
+use compute_api::responses::TlsConfig;
+use ring::digest;
+use spki::ObjectIdentifier;
+use spki::der::{Decode, PemReader};
+use x509_cert::Certificate;
+
+#[derive(Clone, Copy)]
+pub struct CertDigest(digest::Digest);
+
+pub async fn watch_cert_for_changes(cert_path: String) -> tokio::sync::watch::Receiver<CertDigest> {
+    let mut digest = compute_digest(&cert_path).await;
+    let (tx, rx) = tokio::sync::watch::channel(digest);
+    tokio::spawn(async move {
+        while !tx.is_closed() {
+            let new_digest = compute_digest(&cert_path).await;
+            if digest.0.as_ref() != new_digest.0.as_ref() {
+                digest = new_digest;
+                _ = tx.send(digest);
+            }
+
+            tokio::time::sleep(Duration::from_secs(60)).await
+        }
+    });
+    rx
+}
+
+async fn compute_digest(cert_path: &str) -> CertDigest {
+    loop {
+        match try_compute_digest(cert_path).await {
+            Ok(d) => break d,
+            Err(e) => {
+                tracing::error!("could not read cert file {e:?}");
+                tokio::time::sleep(Duration::from_secs(1)).await
+            }
+        }
+    }
+}
+
+async fn try_compute_digest(cert_path: &str) -> Result<CertDigest> {
+    let data = tokio::fs::read(cert_path).await?;
+    // sha256 is extremely collision resistent. can safely assume the digest to be unique
+    Ok(CertDigest(digest::digest(&digest::SHA256, &data)))
+}
+
+pub const SERVER_CRT: &str = "server.crt";
+pub const SERVER_KEY: &str = "server.key";
+
+pub fn update_key_path_blocking(pg_data: &Path, tls_config: &TlsConfig) {
+    loop {
+        match try_update_key_path_blocking(pg_data, tls_config) {
+            Ok(()) => break,
+            Err(e) => {
+                tracing::error!("could not create key file {e:?}");
+                std::thread::sleep(Duration::from_secs(1))
+            }
+        }
+    }
+}
+
+// Postgres requires the keypath be "secure". This means
+// 1. Owned by the postgres user.
+// 2. Have permission 600.
+fn try_update_key_path_blocking(pg_data: &Path, tls_config: &TlsConfig) -> Result<()> {
+    let key = std::fs::read_to_string(&tls_config.key_path)?;
+    let crt = std::fs::read_to_string(&tls_config.cert_path)?;
+
+    // to mitigate a race condition during renewal.
+    verify_key_cert(&key, &crt)?;
+
+    let mut key_file = std::fs::OpenOptions::new()
+        .write(true)
+        .create(true)
+        .truncate(true)
+        .mode(0o600)
+        .open(pg_data.join(SERVER_KEY))?;
+
+    let mut crt_file = std::fs::OpenOptions::new()
+        .write(true)
+        .create(true)
+        .truncate(true)
+        .mode(0o600)
+        .open(pg_data.join(SERVER_CRT))?;
+
+    key_file.write_all(key.as_bytes())?;
+    crt_file.write_all(crt.as_bytes())?;
+
+    Ok(())
+}
+
+fn verify_key_cert(key: &str, cert: &str) -> Result<()> {
+    const ECDSA_WITH_SHA256: ObjectIdentifier = ObjectIdentifier::new_unwrap("1.2.840.10045.4.3.2");
+
+    let cert = Certificate::decode(&mut PemReader::new(cert.as_bytes()).context("pem reader")?)
+        .context("decode cert")?;
+
+    match cert.signature_algorithm.oid {
+        ECDSA_WITH_SHA256 => {
+            let key = p256::SecretKey::from_sec1_pem(key).context("parse key")?;
+
+            let a = key.public_key().to_sec1_bytes();
+            let b = cert
+                .tbs_certificate
+                .subject_public_key_info
+                .subject_public_key
+                .raw_bytes();
+
+            if *a != *b {
+                bail!("private key file does not match certificate")
+            }
+        }
+        _ => bail!("unknown TLS key type"),
+    }
+
+    Ok(())
+}
--- a/compute_tools/tests/pg_helpers_tests.rs
+++ b/compute_tools/tests/pg_helpers_tests.rs
@@ -64,7 +64,8 @@ test.escaping = 'here''s a backslash \\ and a quote '' and a double-quote " hoor
    #[test]
    fn ident_pg_quote_dollar() {
        let test_cases = vec![
-            ("name", ("$$name$$", "x")),
+            ("name", ("$x$name$x$", "xx")),
+            ("name$", ("$x$name$$x$", "xx")),
            ("name$$", ("$x$name$$$x$", "xx")),
            ("name$$$", ("$x$name$$$$x$", "xx")),
            ("name$$$$", ("$x$name$$$$$x$", "xx")),
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -979,7 +979,7 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result<LocalEnv> {
            neon_distrib_dir: None,
            default_tenant_id: TenantId::from_array(std::array::from_fn(|_| 0)),
            storage_controller: None,
-            control_plane_compute_hook_api: None,
+            control_plane_hooks_api: None,
            generate_local_ssl_certs: false,
        }
    };
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -72,9 +72,9 @@ pub struct LocalEnv {
    // be propagated into each pageserver's configuration.
    pub control_plane_api: Url,

-    // Control plane upcall API for storage controller.  If set, this will be propagated into the
+    // Control plane upcall APIs for storage controller.  If set, this will be propagated into the
    // storage controller's configuration.
-    pub control_plane_compute_hook_api: Option<Url>,
+    pub control_plane_hooks_api: Option<Url>,

    /// Keep human-readable aliases in memory (and persist them to config), to hide ZId hex strings from the user.
    // A `HashMap<String, HashMap<TenantId, TimelineId>>` would be more appropriate here,
@@ -104,6 +104,7 @@ pub struct OnDiskConfig {
    pub pageservers: Vec<PageServerConf>,
    pub safekeepers: Vec<SafekeeperConf>,
    pub control_plane_api: Option<Url>,
+    pub control_plane_hooks_api: Option<Url>,
    pub control_plane_compute_hook_api: Option<Url>,
    branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
    // Note: skip serializing because in compat tests old storage controller fails
@@ -136,7 +137,7 @@ pub struct NeonLocalInitConf {
    pub pageservers: Vec<NeonLocalInitPageserverConf>,
    pub safekeepers: Vec<SafekeeperConf>,
    pub control_plane_api: Option<Url>,
-    pub control_plane_compute_hook_api: Option<Option<Url>>,
+    pub control_plane_hooks_api: Option<Url>,
    pub generate_local_ssl_certs: bool,
 }

@@ -148,7 +149,7 @@ pub struct NeonBroker {
    pub listen_addr: SocketAddr,
 }

-/// Broker config for cluster internal communication.
+/// A part of storage controller's config the neon_local knows about.
 #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
 #[serde(default)]
 pub struct NeonStorageControllerConf {
@@ -175,10 +176,11 @@ pub struct NeonStorageControllerConf {
    #[serde(with = "humantime_serde")]
    pub long_reconcile_threshold: Option<Duration>,

-    #[serde(default)]
    pub use_https_pageserver_api: bool,

    pub timelines_onto_safekeepers: bool,
+
+    pub use_https_safekeeper_api: bool,
 }

 impl NeonStorageControllerConf {
@@ -204,6 +206,7 @@ impl Default for NeonStorageControllerConf {
            long_reconcile_threshold: None,
            use_https_pageserver_api: false,
            timelines_onto_safekeepers: false,
+            use_https_safekeeper_api: false,
        }
    }
 }
@@ -301,6 +304,7 @@ pub struct SafekeeperConf {
    pub pg_port: u16,
    pub pg_tenant_only_port: Option<u16>,
    pub http_port: u16,
+    pub https_port: Option<u16>,
    pub sync: bool,
    pub remote_storage: Option<String>,
    pub backup_threads: Option<u32>,
@@ -315,6 +319,7 @@ impl Default for SafekeeperConf {
            pg_port: 0,
            pg_tenant_only_port: None,
            http_port: 0,
+            https_port: None,
            sync: true,
            remote_storage: None,
            backup_threads: None,
@@ -573,7 +578,8 @@ impl LocalEnv {
                pageservers,
                safekeepers,
                control_plane_api,
-                control_plane_compute_hook_api,
+                control_plane_hooks_api,
+                control_plane_compute_hook_api: _,
                branch_name_mappings,
                generate_local_ssl_certs,
            } = on_disk_config;
@@ -588,7 +594,7 @@ impl LocalEnv {
                pageservers,
                safekeepers,
                control_plane_api: control_plane_api.unwrap(),
-                control_plane_compute_hook_api,
+                control_plane_hooks_api,
                branch_name_mappings,
                generate_local_ssl_certs,
            }
@@ -695,7 +701,8 @@ impl LocalEnv {
                pageservers: vec![], // it's skip_serializing anyway
                safekeepers: self.safekeepers.clone(),
                control_plane_api: Some(self.control_plane_api.clone()),
-                control_plane_compute_hook_api: self.control_plane_compute_hook_api.clone(),
+                control_plane_hooks_api: self.control_plane_hooks_api.clone(),
+                control_plane_compute_hook_api: None,
                branch_name_mappings: self.branch_name_mappings.clone(),
                generate_local_ssl_certs: self.generate_local_ssl_certs,
            },
@@ -779,8 +786,8 @@ impl LocalEnv {
            pageservers,
            safekeepers,
            control_plane_api,
-            control_plane_compute_hook_api,
            generate_local_ssl_certs,
+            control_plane_hooks_api,
        } = conf;

        // Find postgres binaries.
@@ -827,7 +834,7 @@ impl LocalEnv {
            pageservers: pageservers.iter().map(Into::into).collect(),
            safekeepers,
            control_plane_api: control_plane_api.unwrap(),
-            control_plane_compute_hook_api: control_plane_compute_hook_api.unwrap_or_default(),
+            control_plane_hooks_api,
            branch_name_mappings: Default::default(),
            generate_local_ssl_certs,
        };
@@ -842,6 +849,9 @@ impl LocalEnv {
        // create safekeeper dirs
        for safekeeper in &env.safekeepers {
            fs::create_dir_all(SafekeeperNode::datadir_path_by_id(&env, safekeeper.id))?;
+            SafekeeperNode::from_env(&env, safekeeper)
+                .initialize()
+                .context("safekeeper init failed")?;
        }

        // initialize pageserver state
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -111,6 +111,18 @@ impl SafekeeperNode {
            .expect("non-Unicode path")
    }

+    /// Initializes a safekeeper node by creating all necessary files,
+    /// e.g. SSL certificates.
+    pub fn initialize(&self) -> anyhow::Result<()> {
+        if self.env.generate_local_ssl_certs {
+            self.env.generate_ssl_cert(
+                &self.datadir_path().join("server.crt"),
+                &self.datadir_path().join("server.key"),
+            )?;
+        }
+        Ok(())
+    }
+
    pub async fn start(
        &self,
        extra_opts: &[String],
@@ -196,6 +208,16 @@ impl SafekeeperNode {
            ]);
        }

+        if let Some(https_port) = self.conf.https_port {
+            args.extend([
+                "--listen-https".to_owned(),
+                format!("{}:{}", self.listen_addr, https_port),
+            ]);
+        }
+        if let Some(ssl_ca_file) = self.env.ssl_ca_cert_path() {
+            args.push(format!("--ssl-ca-file={}", ssl_ca_file.to_str().unwrap()));
+        }
+
        args.extend_from_slice(extra_opts);

        background_process::start_process(
--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -538,6 +538,10 @@ impl StorageController {
            args.push("--use-https-pageserver-api".to_string());
        }

+        if self.config.use_https_safekeeper_api {
+            args.push("--use-https-safekeeper-api".to_string());
+        }
+
        if let Some(ssl_ca_file) = self.env.ssl_ca_cert_path() {
            args.push(format!("--ssl-ca-file={}", ssl_ca_file.to_str().unwrap()));
        }
@@ -558,10 +562,8 @@ impl StorageController {
            args.push(format!("--public-key=\"{public_key}\""));
        }

-        if let Some(control_plane_compute_hook_api) = &self.env.control_plane_compute_hook_api {
-            args.push(format!(
-                "--compute-hook-url={control_plane_compute_hook_api}"
-            ));
+        if let Some(control_plane_hooks_api) = &self.env.control_plane_hooks_api {
+            args.push(format!("--control-plane-url={control_plane_hooks_api}"));
        }

        if let Some(split_threshold) = self.config.split_threshold.as_ref() {
--- a/deny.toml
+++ b/deny.toml
@@ -31,10 +31,6 @@ reason = "the marvin attack only affects private key decryption, not public key
 id = "RUSTSEC-2024-0436"
 reason = "The paste crate is a build-only dependency with no runtime components. It is unlikely to have any security impact."

-[[advisories.ignore]]
-id = "RUSTSEC-2025-0014"
-reason = "The humantime is widely used and is not easy to replace right now. It is unmaintained, but it has no known vulnerabilities to care about. #11179"
-
 # This section is considered when running `cargo deny check licenses`
 # More documentation for the licenses section can be found here:
 # https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html
--- a/docs/rfcs/001-cluster-size-limits.md
+++ b/docs/rfcs/001-cluster-size-limits.md
--- a/docs/rfcs/README.md
+++ b/docs/rfcs/README.md
@@ -1,3 +1,7 @@
+# Neon RFCs
+
+## Overview
+
 This directory contains Request for Comments documents, or RFCs, for
 features or concepts that have been proposed. Alternative names:
 technical design doc, ERD, one-pager
@@ -59,37 +63,10 @@ RFC lifecycle:

 ### RFC template

+Use template with `YYYY-MM-DD-copy-me.md` as a starting point. Timestamp prefix helps to avoid awkward 'id' collisions.
+
+```sh
+cp docs/rfcs/YYYY-MM-DD-copy-me.md docs/rfcs/$(date +"%Y-%m-%d")-<name>.md
+```
+
 Note, a lot of the sections are marked as ‘if relevant’. They are included into the template as a reminder and to help inspiration.
-
-```
-# Name
-Created on ..
-Implemented on ..
-
-## Summary
-
-## Motivation
-
-## Non Goals (if relevant)
-
-## Impacted components (e.g. pageserver, safekeeper, console, etc)
-
-## Proposed implementation
-
-### Reliability, failure modes and corner cases (if relevant)
-
-### Interaction/Sequence diagram (if relevant)
-
-### Scalability (if relevant)
-
-### Security implications (if relevant)
-
-### Unresolved questions (if relevant)
-
-## Alternative implementation (if relevant)
-
-## Pros/cons of proposed approaches (if relevant)
-
-## Definition of Done (if relevant)
-
-```
--- a/docs/rfcs/YYYY-MM-DD-copy-me.md
+++ b/docs/rfcs/YYYY-MM-DD-copy-me.md
@@ -0,0 +1,30 @@
+# Name
+
+Created on YYYY-MM-DD
+Implemented on _TBD_
+
+## Summary
+
+## Motivation
+
+## Non Goals (if relevant)
+
+## Impacted components (e.g. pageserver, safekeeper, console, etc)
+
+## Proposed implementation
+
+### Reliability, failure modes and corner cases (if relevant)
+
+### Interaction/Sequence diagram (if relevant)
+
+### Scalability (if relevant)
+
+### Security implications (if relevant)
+
+### Unresolved questions (if relevant)
+
+## Alternative implementation (if relevant)
+
+## Pros/cons of proposed approaches (if relevant)
+
+## Definition of Done (if relevant)
--- a/docs/storage_controller.md
+++ b/docs/storage_controller.md
@@ -101,15 +101,25 @@ changes such as a pageserver node becoming unavailable, or the tenant's shard co
 postgres clients to handle such changes, the storage controller calls an API hook when a tenant's pageserver
 location changes.

-The hook is configured using the storage controller's `--control-plane-url` CLI option. If the hook requires
-JWT auth, the token may be provided with `--control-plane-jwt-token`. The hook will be invoked with a `PUT` request.
+The hook is configured using the storage controller's `--control-plane-url` CLI option, from which the hook URL is computed.

-In the Neon cloud service, this hook is implemented by Neon's internal cloud control plane. In `neon_local` systems
+Currently, there is two hooks, each computed by appending the name to the provided control plane URL prefix:
+
+- `notify-attach`, called whenever attachment for pageservers changes
+- `notify-safekeepers`, called whenever attachment for safekeepers changes
+
+If the hooks require JWT auth, the token may be provided with `--control-plane-jwt-token`.
+The hooks will be invoked with a `PUT` request.
+
+In the Neon cloud service, these hooks are implemented by Neon's internal cloud control plane. In `neon_local` systems,
 the storage controller integrates directly with neon_local to reconfigure local postgres processes instead of calling
 the compute hook.

-When implementing an on-premise Neon deployment, you must implement a service that handles the compute hook. This is not complicated:
-the request body has format of the `ComputeHookNotifyRequest` structure, provided below for convenience.
+When implementing an on-premise Neon deployment, you must implement a service that handles the compute hooks. This is not complicated.
+
+### `notify-attach` body
+
+The `notify-attach` request body follows the format of the `ComputeHookNotifyRequest` structure, provided below for convenience.

 ```
 struct ComputeHookNotifyRequestShard {
@@ -128,15 +138,15 @@ When a notification is received:

 1. Modify postgres configuration for this tenant:

-   - set `neon.pageserver_connstr` to a comma-separated list of postgres connection strings to pageservers according to the `shards` list. The
+   - set `neon.pageserver_connstring` to a comma-separated list of postgres connection strings to pageservers according to the `shards` list. The
     shards identified by `NodeId` must be converted to the address+port of the node.
-   - if stripe_size is not None, set `neon.stripe_size` to this value
+   - if stripe_size is not None, set `neon.shard_stripe_size` to this value

 2. Send SIGHUP to postgres to reload configuration
 3. Respond with 200 to the notification request. Do not return success if postgres was not updated: if an error is returned, the controller
   will retry the notification until it succeeds..

-### Example notification body
+Example body:

 ```
 {
@@ -148,3 +158,34 @@ When a notification is received:
  ],
 }
 ```
+
+### `notify-safekeepers` body
+
+The `notify-safekeepers` request body forllows the format of the `SafekeepersNotifyRequest` structure, provided below for convenience.
+
+```
+pub struct SafekeeperInfo {
+    pub id: NodeId,
+    pub hostname: String,
+}
+
+pub struct SafekeepersNotifyRequest {
+    pub tenant_id: TenantId,
+    pub timeline_id: TimelineId,
+    pub generation: u32,
+    pub safekeepers: Vec<SafekeeperInfo>,
+}
+```
+
+When a notification is received:
+
+1. Modify postgres configuration for this tenant:
+
+   - set `neon.safekeeper_connstrings` to an array of postgres connection strings to safekeepers according to the `safekeepers` list. The
+     safekeepers identified by `NodeId` must be converted to the address+port of the respective safekeeper.
+     The hostname is provided for debugging purposes, so we reserve changes to how we pass it.
+   - set `neon.safekeepers_generation` to the provided `generation` value.
+
+2. Send SIGHUP to postgres to reload configuration
+3. Respond with 200 to the notification request. Do not return success if postgres was not updated: if an error is returned, the controller
+   will retry the notification until it succeeds..
--- a/libs/compute_api/Cargo.toml
+++ b/libs/compute_api/Cargo.toml
@@ -7,6 +7,7 @@ license.workspace = true
 [dependencies]
 anyhow.workspace = true
 chrono.workspace = true
+indexmap.workspace = true
 jsonwebtoken.workspace = true
 serde.workspace = true
 serde_json.workspace = true
--- a/libs/compute_api/src/requests.rs
+++ b/libs/compute_api/src/requests.rs
@@ -30,3 +30,9 @@ pub struct SetRoleGrantsRequest {
    pub privileges: Vec<Privilege>,
    pub role: PgIdent,
 }
+
+/// Request of the /configure_telemetry API
+#[derive(Debug, Deserialize, Serialize)]
+pub struct ConfigureTelemetryRequest {
+    pub logs_export_host: Option<String>,
+}
--- a/libs/compute_api/src/responses.rs
+++ b/libs/compute_api/src/responses.rs
@@ -139,6 +139,7 @@ pub struct ComputeCtlConfig {
    /// Set of JSON web keys that the compute can use to authenticate
    /// communication from the control plane.
    pub jwks: JwkSet,
+    pub tls: Option<TlsConfig>,
 }

 impl Default for ComputeCtlConfig {
@@ -147,10 +148,17 @@ impl Default for ComputeCtlConfig {
            jwks: JwkSet {
                keys: Vec::default(),
            },
+            tls: None,
        }
    }
 }

+#[derive(Clone, Debug, Deserialize, Serialize)]
+pub struct TlsConfig {
+    pub key_path: String,
+    pub cert_path: String,
+}
+
 /// Response of the `/computes/{compute_id}/spec` control-plane API.
 #[derive(Deserialize, Debug)]
 pub struct ControlPlaneSpecResponse {
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -5,12 +5,15 @@
 //! and connect it to the storage nodes.
 use std::collections::HashMap;

+use indexmap::IndexMap;
 use regex::Regex;
 use remote_storage::RemotePath;
 use serde::{Deserialize, Serialize};
 use utils::id::{TenantId, TimelineId};
 use utils::lsn::Lsn;

+use crate::responses::TlsConfig;
+
 /// String type alias representing Postgres identifier and
 /// intended to be used for DB / role names.
 pub type PgIdent = String;
@@ -125,7 +128,7 @@ pub struct ComputeSpec {
    // information about available remote extensions
    pub remote_extensions: Option<RemoteExtSpec>,

-    pub pgbouncer_settings: Option<HashMap<String, String>>,
+    pub pgbouncer_settings: Option<IndexMap<String, String>>,

    // Stripe size for pageserver sharding, in pages
    #[serde(default)]
@@ -176,8 +179,8 @@ pub enum ComputeFeature {
    /// track short-lived connections as user activity.
    ActivityMonitorExperimental,

-    /// Pre-install and initialize anon extension for every database in the cluster
-    AnonExtension,
+    /// Allow to configure rsyslog for Postgres logs export
+    PostgresLogsExport,

    /// This is a special feature flag that is used to represent unknown feature flags.
    /// Basically all unknown to enum flags are represented as this one. See unit test
@@ -357,6 +360,9 @@ pub struct LocalProxySpec {
    #[serde(default)]
    #[serde(skip_serializing_if = "Option::is_none")]
    pub jwks: Option<Vec<JwksSettings>>,
+    #[serde(default)]
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tls: Option<TlsConfig>,
 }

 #[derive(Clone, Debug, Deserialize, Serialize)]
--- a/libs/compute_api/tests/cluster_spec.json
+++ b/libs/compute_api/tests/cluster_spec.json
@@ -208,7 +208,6 @@
    ],
    "remote_extensions": {
        "library_index": {
-          "anon": "anon",
          "postgis-3": "postgis",
          "libpgrouting-3.4": "postgis",
          "postgis_raster-3": "postgis",
@@ -217,12 +216,6 @@
          "address_standardizer-3": "postgis"
        },
        "extension_data": {
-          "anon": {
-            "archive_path": "5834329303/v15/extensions/anon.tar.zst",
-            "control_data": {
-              "anon.control": "# PostgreSQL Anonymizer (anon) extension\ncomment = ''Data anonymization tools''\ndefault_version = ''1.1.0''\ndirectory=''extension/anon''\nrelocatable = false\nrequires = ''pgcrypto''\nsuperuser = false\nmodule_pathname = ''$libdir/anon''\ntrusted = true\n"
-            }
-          },
          "postgis": {
            "archive_path": "5834329303/v15/extensions/postgis.tar.zst",
            "control_data": {
@@ -238,7 +231,6 @@
          }
        },
        "custom_extensions": [
-          "anon"
        ],
        "public_extensions": [
          "postgis"
--- a/libs/http-utils/Cargo.toml
+++ b/libs/http-utils/Cargo.toml
@@ -7,6 +7,7 @@ license.workspace = true
 [dependencies]
 anyhow.workspace = true
 bytes.workspace = true
+camino.workspace = true
 fail.workspace = true
 futures.workspace = true
 hyper0.workspace = true
@@ -16,6 +17,7 @@ once_cell.workspace = true
 pprof.workspace = true
 regex.workspace = true
 routerify.workspace = true
+rustls-pemfile.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 serde_path_to_error.workspace = true
--- a/libs/http-utils/src/lib.rs
+++ b/libs/http-utils/src/lib.rs
@@ -4,6 +4,7 @@ pub mod failpoints;
 pub mod json;
 pub mod request;
 pub mod server;
+pub mod tls_certs;

 extern crate hyper0 as hyper;

--- a/libs/http-utils/src/tls_certs.rs
+++ b/libs/http-utils/src/tls_certs.rs
@@ -0,0 +1,21 @@
+use camino::Utf8Path;
+use tokio_rustls::rustls::pki_types::{CertificateDer, PrivateKeyDer};
+
+pub fn load_cert_chain(filename: &Utf8Path) -> anyhow::Result<Vec<CertificateDer<'static>>> {
+    let file = std::fs::File::open(filename)?;
+    let mut reader = std::io::BufReader::new(file);
+
+    Ok(rustls_pemfile::certs(&mut reader).collect::<Result<Vec<_>, _>>()?)
+}
+
+pub fn load_private_key(filename: &Utf8Path) -> anyhow::Result<PrivateKeyDer<'static>> {
+    let file = std::fs::File::open(filename)?;
+    let mut reader = std::io::BufReader::new(file);
+
+    let key = rustls_pemfile::private_key(&mut reader)?;
+
+    key.ok_or(anyhow::anyhow!(
+        "no private key found in {}",
+        filename.as_str(),
+    ))
+}
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -272,15 +272,16 @@ pub struct TenantConfigToml {
    /// size exceeds `compaction_upper_limit * checkpoint_distance`.
    pub compaction_upper_limit: usize,
    pub compaction_algorithm: crate::models::CompactionAlgorithmSettings,
-    /// If true, compact down L0 across all tenant timelines before doing regular compaction.
+    /// If true, compact down L0 across all tenant timelines before doing regular compaction. L0
+    /// compaction must be responsive to avoid read amp during heavy ingestion. Defaults to true.
    pub compaction_l0_first: bool,
    /// If true, use a separate semaphore (i.e. concurrency limit) for the L0 compaction pass. Only
-    /// has an effect if `compaction_l0_first` is `true`.
+    /// has an effect if `compaction_l0_first` is true. Defaults to true.
    pub compaction_l0_semaphore: bool,
-    /// Level0 delta layer threshold at which to delay layer flushes for compaction backpressure,
-    /// such that they take 2x as long, and start waiting for layer flushes during ephemeral layer
-    /// rolls. This helps compaction keep up with WAL ingestion, and avoids read amplification
-    /// blowing up. Should be >compaction_threshold. 0 to disable. Disabled by default.
+    /// Level0 delta layer threshold at which to delay layer flushes such that they take 2x as long,
+    /// and block on layer flushes during ephemeral layer rolls, for compaction backpressure. This
+    /// helps compaction keep up with WAL ingestion, and avoids read amplification blowing up.
+    /// Should be >compaction_threshold. 0 to disable. Defaults to 3x compaction_threshold.
    pub l0_flush_delay_threshold: Option<usize>,
    /// Level0 delta layer threshold at which to stall layer flushes. Must be >compaction_threshold
    /// to avoid deadlock. 0 to disable. Disabled by default.
@@ -288,6 +289,8 @@ pub struct TenantConfigToml {
    /// If true, Level0 delta layer flushes will wait for S3 upload before flushing the next
    /// layer. This is a temporary backpressure mechanism which should be removed once
    /// l0_flush_{delay,stall}_threshold is fully enabled.
+    ///
+    /// TODO: this is no longer enabled, remove it when the config option is no longer set.
    pub l0_flush_wait_upload: bool,
    // Determines how much history is retained, to allow
    // branching and read replicas at an older point in time.
@@ -567,13 +570,15 @@ pub mod tenant_conf_defaults {
    // be reduced later by optimizing L0 hole calculation to avoid loading all keys into memory). So
    // with this config, we can get a maximum peak compaction usage of 9 GB.
    pub const DEFAULT_COMPACTION_UPPER_LIMIT: usize = 20;
-    pub const DEFAULT_COMPACTION_L0_FIRST: bool = false;
+    // Enable L0 compaction pass and semaphore by default. L0 compaction must be responsive to avoid
+    // read amp.
+    pub const DEFAULT_COMPACTION_L0_FIRST: bool = true;
    pub const DEFAULT_COMPACTION_L0_SEMAPHORE: bool = true;

    pub const DEFAULT_COMPACTION_ALGORITHM: crate::models::CompactionAlgorithm =
        crate::models::CompactionAlgorithm::Legacy;

-    pub const DEFAULT_L0_FLUSH_WAIT_UPLOAD: bool = true;
+    pub const DEFAULT_L0_FLUSH_WAIT_UPLOAD: bool = false;

    pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;

@@ -584,9 +589,8 @@ pub mod tenant_conf_defaults {
    pub const DEFAULT_GC_PERIOD: &str = "1 hr";
    pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
    // If there are more than threshold * compaction_threshold (that is 3 * 10 in the default config) L0 layers, image
-    // layer creation will end immediately. Set to 0 to disable. The target default will be 3 once we
-    // want to enable this feature.
-    pub const DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD: usize = 0;
+    // layer creation will end immediately. Set to 0 to disable.
+    pub const DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD: usize = 3;
    pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
    pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "10 seconds";
    pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -176,6 +176,39 @@ impl LsnLease {
    }
 }

+/// Controls the detach ancestor behavior.
+/// - When set to `NoAncestorAndReparent`, we will only detach a branch if its ancestor is a root branch. It will automatically reparent any children of the ancestor before and at the branch point.
+/// - When set to `MultiLevelAndNoReparent`, we will detach a branch from multiple levels of ancestors, and no reparenting will happen at all.
+#[derive(Debug, Clone, Copy, Default)]
+pub enum DetachBehavior {
+    #[default]
+    NoAncestorAndReparent,
+    MultiLevelAndNoReparent,
+}
+
+impl std::str::FromStr for DetachBehavior {
+    type Err = &'static str;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "no_ancestor_and_reparent" => Ok(DetachBehavior::NoAncestorAndReparent),
+            "multi_level_and_no_reparent" => Ok(DetachBehavior::MultiLevelAndNoReparent),
+            "v1" => Ok(DetachBehavior::NoAncestorAndReparent),
+            "v2" => Ok(DetachBehavior::MultiLevelAndNoReparent),
+            _ => Err("cannot parse detach behavior"),
+        }
+    }
+}
+
+impl std::fmt::Display for DetachBehavior {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            DetachBehavior::NoAncestorAndReparent => write!(f, "no_ancestor_and_reparent"),
+            DetachBehavior::MultiLevelAndNoReparent => write!(f, "multi_level_and_no_reparent"),
+        }
+    }
+}
+
 /// The only [`TenantState`] variants we could be `TenantState::Activating` from.
 ///
 /// XXX: We used to have more variants here, but now it's just one, which makes this rather
--- a/libs/safekeeper_api/src/models.rs
+++ b/libs/safekeeper_api/src/models.rs
@@ -221,6 +221,11 @@ pub struct TimelineMembershipSwitchResponse {
    pub current_conf: Configuration,
 }

+#[derive(Clone, Copy, Serialize, Deserialize)]
+pub struct TimelineDeleteResult {
+    pub dir_existed: bool,
+}
+
 fn lsn_invalid() -> Lsn {
    Lsn::INVALID
 }
--- a/libs/utils/benches/benchmarks.rs
+++ b/libs/utils/benches/benchmarks.rs
@@ -49,7 +49,13 @@ pub fn bench_log_slow(c: &mut Criterion) {
        // performance too. Use a simple noop future that yields once, to avoid any scheduler fast
        // paths for a ready future.
        if enabled {
-            b.iter(|| runtime.block_on(log_slow("ready", THRESHOLD, tokio::task::yield_now())));
+            b.iter(|| {
+                runtime.block_on(log_slow(
+                    "ready",
+                    THRESHOLD,
+                    std::pin::pin!(tokio::task::yield_now()),
+                ))
+            });
        } else {
            b.iter(|| runtime.block_on(tokio::task::yield_now()));
        }
--- a/libs/utils/src/logging.rs
+++ b/libs/utils/src/logging.rs
@@ -331,37 +331,90 @@ impl std::fmt::Debug for SecretString {
 ///
 /// TODO: consider upgrading this to a warning, but currently it fires too often.
 #[inline]
-pub async fn log_slow<O>(name: &str, threshold: Duration, f: impl Future<Output = O>) -> O {
-    // TODO: we unfortunately have to pin the future on the heap, since GetPage futures are huge and
-    // won't fit on the stack.
-    let mut f = Box::pin(f);
+pub async fn log_slow<F, O>(name: &str, threshold: Duration, f: std::pin::Pin<&mut F>) -> O
+where
+    F: Future<Output = O>,
+{
+    monitor_slow_future(
+        threshold,
+        threshold, // period = threshold
+        f,
+        |MonitorSlowFutureCallback {
+             ready,
+             is_slow,
+             elapsed_total,
+             elapsed_since_last_callback: _,
+         }| {
+            if !is_slow {
+                return;
+            }
+            if ready {
+                info!(
+                    "slow {name} completed after {:.3}s",
+                    elapsed_total.as_secs_f64()
+                );
+            } else {
+                info!(
+                    "slow {name} still running after {:.3}s",
+                    elapsed_total.as_secs_f64()
+                );
+            }
+        },
+    )
+    .await
+}

+/// Poll future `fut` to completion, invoking callback `cb` at the given `threshold` and every
+/// `period` afterwards, and also unconditionally when the future completes.
+#[inline]
+pub async fn monitor_slow_future<F, O>(
+    threshold: Duration,
+    period: Duration,
+    mut fut: std::pin::Pin<&mut F>,
+    mut cb: impl FnMut(MonitorSlowFutureCallback),
+) -> O
+where
+    F: Future<Output = O>,
+{
    let started = Instant::now();
    let mut attempt = 1;
-
+    let mut last_cb = started;
    loop {
        // NB: use timeout_at() instead of timeout() to avoid an extra clock reading in the common
        // case where the timeout doesn't fire.
-        let deadline = started + attempt * threshold;
-        if let Ok(output) = tokio::time::timeout_at(deadline, &mut f).await {
-            // NB: we check if we exceeded the threshold even if the timeout never fired, because
-            // scheduling or execution delays may cause the future to succeed even if it exceeds the
-            // timeout. This costs an extra unconditional clock reading, but seems worth it to avoid
-            // false negatives.
-            let elapsed = started.elapsed();
-            if elapsed >= threshold {
-                info!("slow {name} completed after {:.3}s", elapsed.as_secs_f64());
-            }
+        let deadline = started + threshold + (attempt - 1) * period;
+        // TODO: still call the callback if the future panics? Copy how we do it for the page_service flush_in_progress counter.
+        let res = tokio::time::timeout_at(deadline, &mut fut).await;
+        let now = Instant::now();
+        let elapsed_total = now - started;
+        cb(MonitorSlowFutureCallback {
+            ready: res.is_ok(),
+            is_slow: elapsed_total >= threshold,
+            elapsed_total,
+            elapsed_since_last_callback: now - last_cb,
+        });
+        last_cb = now;
+        if let Ok(output) = res {
            return output;
        }
-
-        let elapsed = started.elapsed().as_secs_f64();
-        info!("slow {name} still running after {elapsed:.3}s",);
-
        attempt += 1;
    }
 }

+/// See [`monitor_slow_future`].
+pub struct MonitorSlowFutureCallback {
+    /// Whether the future completed. If true, there will be no more callbacks.
+    pub ready: bool,
+    /// Whether the future is taking `>=` the specififed threshold duration to complete.
+    /// Monotonic: if true in one callback invocation, true in all subsequent onces.
+    pub is_slow: bool,
+    /// The time elapsed since the [`monitor_slow_future`] was first polled.
+    pub elapsed_total: Duration,
+    /// The time elapsed since the last callback invocation.
+    /// For the initial callback invocation, the time elapsed since the [`monitor_slow_future`] was first polled.
+    pub elapsed_since_last_callback: Duration,
+}
+
 #[cfg(test)]
 mod tests {
    use metrics::IntCounterVec;
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -48,8 +48,6 @@ pprof.workspace = true
 rand.workspace = true
 range-set-blaze = { version = "0.1.16", features = ["alloc"] }
 regex.workspace = true
-rustls-pemfile.workspace = true
-rustls-pki-types.workspace = true
 rustls.workspace = true
 scopeguard.workspace = true
 send-future.workspace = true
--- a/pageserver/client/src/mgmt_api.rs
+++ b/pageserver/client/src/mgmt_api.rs
@@ -7,7 +7,7 @@ use http_utils::error::HttpErrorBody;
 use pageserver_api::models::*;
 use pageserver_api::shard::TenantShardId;
 pub use reqwest::Body as ReqwestBody;
-use reqwest::{Certificate, IntoUrl, Method, StatusCode};
+use reqwest::{Certificate, IntoUrl, Method, StatusCode, Url};
 use utils::id::{TenantId, TimelineId};
 use utils::lsn::Lsn;

@@ -458,13 +458,21 @@ impl Client {
        &self,
        tenant_shard_id: TenantShardId,
        timeline_id: TimelineId,
+        behavior: Option<DetachBehavior>,
    ) -> Result<AncestorDetached> {
        let uri = format!(
            "{}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/detach_ancestor",
            self.mgmt_api_endpoint
        );
+        let mut uri = Url::parse(&uri)
+            .map_err(|e| Error::ApiError(StatusCode::INTERNAL_SERVER_ERROR, format!("{e}")))?;

-        self.request(Method::PUT, &uri, ())
+        if let Some(behavior) = behavior {
+            uri.query_pairs_mut()
+                .append_pair("detach_behavior", &behavior.to_string());
+        }
+
+        self.request(Method::PUT, uri, ())
            .await?
            .json()
            .await
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -30,7 +30,6 @@ use pageserver::{
 };
 use postgres_backend::AuthType;
 use remote_storage::GenericRemoteStorage;
-use rustls_pki_types::{CertificateDer, PrivateKeyDer};
 use tokio::signal::unix::SignalKind;
 use tokio::time::Instant;
 use tokio_util::sync::CancellationToken;
@@ -622,8 +621,8 @@ fn start_pageserver(

        let https_task = match https_listener {
            Some(https_listener) => {
-                let certs = load_certs(&conf.ssl_cert_file)?;
-                let key = load_private_key(&conf.ssl_key_file)?;
+                let certs = http_utils::tls_certs::load_cert_chain(&conf.ssl_cert_file)?;
+                let key = http_utils::tls_certs::load_private_key(&conf.ssl_key_file)?;

                let server_config = rustls::ServerConfig::builder()
                    .with_no_client_auth()
@@ -735,25 +734,6 @@ fn start_pageserver(
    })
 }

-fn load_certs(filename: &Utf8Path) -> std::io::Result<Vec<CertificateDer<'static>>> {
-    let file = std::fs::File::open(filename)?;
-    let mut reader = std::io::BufReader::new(file);
-
-    rustls_pemfile::certs(&mut reader).collect()
-}
-
-fn load_private_key(filename: &Utf8Path) -> anyhow::Result<PrivateKeyDer<'static>> {
-    let file = std::fs::File::open(filename)?;
-    let mut reader = std::io::BufReader::new(file);
-
-    let key = rustls_pemfile::private_key(&mut reader)?;
-
-    key.ok_or(anyhow::anyhow!(
-        "no private key found in {}",
-        filename.as_str(),
-    ))
-}
-
 async fn create_remote_storage_client(
    conf: &'static PageServerConf,
 ) -> anyhow::Result<GenericRemoteStorage> {
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -28,9 +28,9 @@ use hyper::{Body, Request, Response, StatusCode, Uri, header};
 use metrics::launch_timestamp::LaunchTimestamp;
 use pageserver_api::models::virtual_file::IoMode;
 use pageserver_api::models::{
-    DownloadRemoteLayersTaskSpawnRequest, IngestAuxFilesRequest, ListAuxFilesRequest,
-    LocationConfig, LocationConfigListResponse, LocationConfigMode, LsnLease, LsnLeaseRequest,
-    OffloadedTimelineInfo, PageTraceEvent, ShardParameters, StatusResponse,
+    DetachBehavior, DownloadRemoteLayersTaskSpawnRequest, IngestAuxFilesRequest,
+    ListAuxFilesRequest, LocationConfig, LocationConfigListResponse, LocationConfigMode, LsnLease,
+    LsnLeaseRequest, OffloadedTimelineInfo, PageTraceEvent, ShardParameters, StatusResponse,
    TenantConfigPatchRequest, TenantConfigRequest, TenantDetails, TenantInfo,
    TenantLocationConfigRequest, TenantLocationConfigResponse, TenantScanRemoteStorageResponse,
    TenantScanRemoteStorageShard, TenantShardLocation, TenantShardSplitRequest,
@@ -72,7 +72,6 @@ use crate::tenant::remote_timeline_client::{
 use crate::tenant::secondary::SecondaryController;
 use crate::tenant::size::ModelInputs;
 use crate::tenant::storage_layer::{IoConcurrency, LayerAccessStatsReset, LayerName};
-use crate::tenant::timeline::detach_ancestor::DetachBehavior;
 use crate::tenant::timeline::offload::{OffloadError, offload_timeline};
 use crate::tenant::timeline::{
    CompactFlags, CompactOptions, CompactRequest, CompactionError, Timeline, WaitLsnTimeout,
@@ -2392,6 +2391,7 @@ async fn timeline_checkpoint_handler(
    let state = get_state(&request);

    let mut flags = EnumSet::empty();
+    flags |= CompactFlags::NoYield; // run compaction to completion
    if Some(true) == parse_query_param::<_, bool>(&request, "force_l0_compaction")? {
        flags |= CompactFlags::ForceL0Compaction;
    }
@@ -2507,6 +2507,7 @@ async fn timeline_detach_ancestor_handler(
    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
    let behavior: Option<DetachBehavior> = parse_query_param(&request, "detach_behavior")?;
+
    let behavior = behavior.unwrap_or_default();

    let span = tracing::info_span!("detach_ancestor", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id);
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -465,12 +465,40 @@ pub(crate) fn page_cache_errors_inc(error_kind: PageCacheErrorKind) {
 pub(crate) static WAIT_LSN_TIME: Lazy<Histogram> = Lazy::new(|| {
    register_histogram!(
        "pageserver_wait_lsn_seconds",
-        "Time spent waiting for WAL to arrive",
+        "Time spent waiting for WAL to arrive. Updated on completion of the wait_lsn operation.",
        CRITICAL_OP_BUCKETS.into(),
    )
    .expect("failed to define a metric")
 });

+pub(crate) static WAIT_LSN_START_FINISH_COUNTERPAIR: Lazy<IntCounterPairVec> = Lazy::new(|| {
+    register_int_counter_pair_vec!(
+        "pageserver_wait_lsn_started_count",
+        "Number of wait_lsn operations started.",
+        "pageserver_wait_lsn_finished_count",
+        "Number of wait_lsn operations finished.",
+        &["tenant_id", "shard_id", "timeline_id"],
+    )
+    .expect("failed to define a metric")
+});
+
+pub(crate) static WAIT_LSN_IN_PROGRESS_MICROS: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "pageserver_wait_lsn_in_progress_micros",
+        "Time spent waiting for WAL to arrive, by timeline_id. Updated periodically while waiting.",
+        &["tenant_id", "shard_id", "timeline_id"],
+    )
+    .expect("failed to define a metric")
+});
+
+pub(crate) static WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS: Lazy<IntCounter> = Lazy::new(|| {
+    register_int_counter!(
+        "pageserver_wait_lsn_in_progress_micros_global",
+        "Time spent waiting for WAL to arrive, globally. Updated periodically while waiting."
+    )
+    .expect("failed to define a metric")
+});
+
 static FLUSH_WAIT_UPLOAD_TIME: Lazy<GaugeVec> = Lazy::new(|| {
    register_gauge_vec!(
        "pageserver_flush_wait_upload_seconds",
@@ -2830,7 +2858,6 @@ impl StorageTimeMetrics {
    }
 }

-#[derive(Debug)]
 pub(crate) struct TimelineMetrics {
    tenant_id: String,
    shard_id: String,
@@ -2863,6 +2890,8 @@ pub(crate) struct TimelineMetrics {
    pub valid_lsn_lease_count_gauge: UIntGauge,
    pub wal_records_received: IntCounter,
    pub storage_io_size: StorageIoSizeMetrics,
+    pub wait_lsn_in_progress_micros: GlobalAndPerTenantIntCounter,
+    pub wait_lsn_start_finish_counterpair: IntCounterPair,
    shutdown: std::sync::atomic::AtomicBool,
 }

@@ -3000,6 +3029,17 @@ impl TimelineMetrics {

        let storage_io_size = StorageIoSizeMetrics::new(&tenant_id, &shard_id, &timeline_id);

+        let wait_lsn_in_progress_micros = GlobalAndPerTenantIntCounter {
+            global: WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS.clone(),
+            per_tenant: WAIT_LSN_IN_PROGRESS_MICROS
+                .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
+                .unwrap(),
+        };
+
+        let wait_lsn_start_finish_counterpair = WAIT_LSN_START_FINISH_COUNTERPAIR
+            .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
+            .unwrap();
+
        TimelineMetrics {
            tenant_id,
            shard_id,
@@ -3032,6 +3072,8 @@ impl TimelineMetrics {
            storage_io_size,
            valid_lsn_lease_count_gauge,
            wal_records_received,
+            wait_lsn_in_progress_micros,
+            wait_lsn_start_finish_counterpair,
            shutdown: std::sync::atomic::AtomicBool::default(),
        }
    }
@@ -3224,6 +3266,15 @@ impl TimelineMetrics {
            let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, shard_id, timeline_id]);
        }

+        let _ =
+            WAIT_LSN_IN_PROGRESS_MICROS.remove_label_values(&[tenant_id, shard_id, timeline_id]);
+
+        {
+            let mut res = [Ok(()), Ok(())];
+            WAIT_LSN_START_FINISH_COUNTERPAIR
+                .remove_label_values(&mut res, &[tenant_id, shard_id, timeline_id]);
+        }
+
        let _ = SMGR_QUERY_STARTED_PER_TENANT_TIMELINE.remove_label_values(&[
            SmgrQueryType::GetPageAtLsn.into(),
            tenant_id,
@@ -3836,27 +3887,29 @@ pub mod tokio_epoll_uring {
    });
 }

+pub(crate) struct GlobalAndPerTenantIntCounter {
+    global: IntCounter,
+    per_tenant: IntCounter,
+}
+
+impl GlobalAndPerTenantIntCounter {
+    #[inline(always)]
+    pub(crate) fn inc(&self) {
+        self.inc_by(1)
+    }
+    #[inline(always)]
+    pub(crate) fn inc_by(&self, n: u64) {
+        self.global.inc_by(n);
+        self.per_tenant.inc_by(n);
+    }
+}
+
 pub(crate) mod tenant_throttling {
-    use metrics::{IntCounter, register_int_counter_vec};
+    use metrics::register_int_counter_vec;
    use once_cell::sync::Lazy;
    use utils::shard::TenantShardId;

-    pub(crate) struct GlobalAndPerTenantIntCounter {
-        global: IntCounter,
-        per_tenant: IntCounter,
-    }
-
-    impl GlobalAndPerTenantIntCounter {
-        #[inline(always)]
-        pub(crate) fn inc(&self) {
-            self.inc_by(1)
-        }
-        #[inline(always)]
-        pub(crate) fn inc_by(&self, n: u64) {
-            self.global.inc_by(n);
-            self.per_tenant.inc_by(n);
-        }
-    }
+    use super::GlobalAndPerTenantIntCounter;

    pub(crate) struct Metrics<const KIND: usize> {
        pub(super) count_accounted_start: GlobalAndPerTenantIntCounter,
@@ -4102,6 +4155,7 @@ pub fn preinitialize_metrics(conf: &'static PageServerConf) {
        &CIRCUIT_BREAKERS_BROKEN,
        &CIRCUIT_BREAKERS_UNBROKEN,
        &PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL,
+        &WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS,
    ]
    .into_iter()
    .for_each(|c| {
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -1106,12 +1106,19 @@ impl PageServerHandler {
        };

        // Dispatch the batch to the appropriate request handler.
-        let (mut handler_results, span) = log_slow(
-            batch.as_static_str(),
-            LOG_SLOW_GETPAGE_THRESHOLD,
-            self.pagestream_dispatch_batched_message(batch, io_concurrency, ctx),
-        )
-        .await?;
+        let log_slow_name = batch.as_static_str();
+        let (mut handler_results, span) = {
+            // TODO: we unfortunately have to pin the future on the heap, since GetPage futures are huge and
+            // won't fit on the stack.
+            let mut boxpinned =
+                Box::pin(self.pagestream_dispatch_batched_message(batch, io_concurrency, ctx));
+            log_slow(
+                log_slow_name,
+                LOG_SLOW_GETPAGE_THRESHOLD,
+                boxpinned.as_mut(),
+            )
+            .await?
+        };

        // We purposefully don't count flush time into the smgr operation timer.
        //
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -6559,7 +6559,11 @@ mod tests {

        tline.freeze_and_flush().await?;
        tline
-            .compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
+            .compact(
+                &CancellationToken::new(),
+                CompactFlags::NoYield.into(),
+                &ctx,
+            )
            .await?;

        let mut writer = tline.writer().await;
@@ -6576,7 +6580,11 @@ mod tests {

        tline.freeze_and_flush().await?;
        tline
-            .compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
+            .compact(
+                &CancellationToken::new(),
+                CompactFlags::NoYield.into(),
+                &ctx,
+            )
            .await?;

        let mut writer = tline.writer().await;
@@ -6593,7 +6601,11 @@ mod tests {

        tline.freeze_and_flush().await?;
        tline
-            .compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
+            .compact(
+                &CancellationToken::new(),
+                CompactFlags::NoYield.into(),
+                &ctx,
+            )
            .await?;

        let mut writer = tline.writer().await;
@@ -6610,7 +6622,11 @@ mod tests {

        tline.freeze_and_flush().await?;
        tline
-            .compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
+            .compact(
+                &CancellationToken::new(),
+                CompactFlags::NoYield.into(),
+                &ctx,
+            )
            .await?;

        assert_eq!(
@@ -6693,7 +6709,9 @@ mod tests {
            timeline.freeze_and_flush().await?;
            if compact {
                // this requires timeline to be &Arc<Timeline>
-                timeline.compact(&cancel, EnumSet::empty(), ctx).await?;
+                timeline
+                    .compact(&cancel, CompactFlags::NoYield.into(), ctx)
+                    .await?;
            }

            // this doesn't really need to use the timeline_id target, but it is closer to what it
@@ -7020,6 +7038,7 @@ mod tests {
        child_timeline.freeze_and_flush().await?;
        let mut flags = EnumSet::new();
        flags.insert(CompactFlags::ForceRepartition);
+        flags.insert(CompactFlags::NoYield);
        child_timeline
            .compact(&CancellationToken::new(), flags, &ctx)
            .await?;
@@ -7398,7 +7417,9 @@ mod tests {

            // Perform a cycle of flush, compact, and GC
            tline.freeze_and_flush().await?;
-            tline.compact(&cancel, EnumSet::empty(), &ctx).await?;
+            tline
+                .compact(&cancel, CompactFlags::NoYield.into(), &ctx)
+                .await?;
            tenant
                .gc_iteration(Some(tline.timeline_id), 0, Duration::ZERO, &cancel, &ctx)
                .await?;
@@ -7727,6 +7748,7 @@ mod tests {
                            let mut flags = EnumSet::new();
                            flags.insert(CompactFlags::ForceImageLayerCreation);
                            flags.insert(CompactFlags::ForceRepartition);
+                            flags.insert(CompactFlags::NoYield);
                            flags
                        } else {
                            EnumSet::empty()
@@ -7777,7 +7799,9 @@ mod tests {
        let before_num_l0_delta_files =
            tline.layers.read().await.layer_map()?.level0_deltas().len();

-        tline.compact(&cancel, EnumSet::empty(), &ctx).await?;
+        tline
+            .compact(&cancel, CompactFlags::NoYield.into(), &ctx)
+            .await?;

        let after_num_l0_delta_files = tline.layers.read().await.layer_map()?.level0_deltas().len();

@@ -7893,7 +7917,6 @@ mod tests {
            Ok((res, reconstruct_state.get_delta_layers_visited() as usize))
        }

-        #[allow(clippy::needless_range_loop)]
        for blknum in 0..NUM_KEYS {
            lsn = Lsn(lsn.0 + 0x10);
            test_key.field6 = (blknum * STEP) as u32;
@@ -7943,6 +7966,7 @@ mod tests {
                            let mut flags = EnumSet::new();
                            flags.insert(CompactFlags::ForceImageLayerCreation);
                            flags.insert(CompactFlags::ForceRepartition);
+                            flags.insert(CompactFlags::NoYield);
                            flags
                        },
                        &ctx,
@@ -8405,6 +8429,7 @@ mod tests {
                    let mut flags = EnumSet::new();
                    flags.insert(CompactFlags::ForceImageLayerCreation);
                    flags.insert(CompactFlags::ForceRepartition);
+                    flags.insert(CompactFlags::NoYield);
                    flags
                },
                &ctx,
@@ -8472,6 +8497,7 @@ mod tests {
                    let mut flags = EnumSet::new();
                    flags.insert(CompactFlags::ForceImageLayerCreation);
                    flags.insert(CompactFlags::ForceRepartition);
+                    flags.insert(CompactFlags::NoYield);
                    flags
                },
                &ctx,
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -14,7 +14,7 @@ use futures::StreamExt;
 use itertools::Itertools;
 use once_cell::sync::Lazy;
 use pageserver_api::key::Key;
-use pageserver_api::models::LocationConfigMode;
+use pageserver_api::models::{DetachBehavior, LocationConfigMode};
 use pageserver_api::shard::{
    ShardCount, ShardIdentity, ShardIndex, ShardNumber, ShardStripeSize, TenantShardId,
 };
@@ -1914,7 +1914,7 @@ impl TenantManager {
        tenant_shard_id: TenantShardId,
        timeline_id: TimelineId,
        prepared: PreparedTimelineDetach,
-        behavior: detach_ancestor::DetachBehavior,
+        behavior: DetachBehavior,
        mut attempt: detach_ancestor::Attempt,
        ctx: &RequestContext,
    ) -> Result<HashSet<TimelineId>, detach_ancestor::Error> {
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -45,8 +45,9 @@ use pageserver_api::key::{
 use pageserver_api::keyspace::{KeySpaceAccum, KeySpaceRandomAccum, SparseKeyPartitioning};
 use pageserver_api::models::{
    CompactKeyRange, CompactLsnRange, CompactionAlgorithm, CompactionAlgorithmSettings,
-    DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest, EvictionPolicy,
-    InMemoryLayerInfo, LayerMapInfo, LsnLease, PageTraceEvent, RelSizeMigration, TimelineState,
+    DetachBehavior, DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest,
+    EvictionPolicy, InMemoryLayerInfo, LayerMapInfo, LsnLease, PageTraceEvent, RelSizeMigration,
+    TimelineState,
 };
 use pageserver_api::reltag::{BlockNumber, RelTag};
 use pageserver_api::shard::{ShardIdentity, ShardIndex, ShardNumber, TenantShardId};
@@ -67,6 +68,7 @@ use tracing::*;
 use utils::generation::Generation;
 use utils::guard_arc_swap::GuardArcSwap;
 use utils::id::TimelineId;
+use utils::logging::{MonitorSlowFutureCallback, monitor_slow_future};
 use utils::lsn::{AtomicLsn, Lsn, RecordLsn};
 use utils::postgres_client::PostgresClientProtocol;
 use utils::rate_limit::RateLimit;
@@ -439,6 +441,8 @@ pub struct Timeline {
    heatmap_layers_downloader: Mutex<Option<heatmap_layers_downloader::HeatmapLayersDownloader>>,

    pub(crate) rel_size_v2_status: ArcSwapOption<RelSizeMigration>,
+
+    wait_lsn_log_slow: tokio::sync::Semaphore,
 }

 pub(crate) enum PreviousHeatmap {
@@ -1479,17 +1483,67 @@ impl Timeline {
            WaitLsnTimeout::Default => self.conf.wait_lsn_timeout,
        };

-        let _timer = crate::metrics::WAIT_LSN_TIME.start_timer();
+        let timer = crate::metrics::WAIT_LSN_TIME.start_timer();
+        let start_finish_counterpair_guard = self.metrics.wait_lsn_start_finish_counterpair.guard();

-        match self.last_record_lsn.wait_for_timeout(lsn, timeout).await {
+        let wait_for_timeout = self.last_record_lsn.wait_for_timeout(lsn, timeout);
+        let wait_for_timeout = std::pin::pin!(wait_for_timeout);
+        // Use threshold of 1 because even 1 second of wait for ingest is very much abnormal.
+        let log_slow_threshold = Duration::from_secs(1);
+        // Use period of 10 to avoid flooding logs during an outage that affects all timelines.
+        let log_slow_period = Duration::from_secs(10);
+        let mut logging_permit = None;
+        let wait_for_timeout = monitor_slow_future(
+            log_slow_threshold,
+            log_slow_period,
+            wait_for_timeout,
+            |MonitorSlowFutureCallback {
+                 ready,
+                 is_slow,
+                 elapsed_total,
+                 elapsed_since_last_callback,
+             }| {
+                self.metrics
+                    .wait_lsn_in_progress_micros
+                    .inc_by(u64::try_from(elapsed_since_last_callback.as_micros()).unwrap());
+                if !is_slow {
+                    return;
+                }
+                // It's slow, see if we should log it.
+                // (We limit the logging to one per invocation per timeline to avoid excessive
+                // logging during an extended broker / networking outage that affects all timelines.)
+                if logging_permit.is_none() {
+                    logging_permit = self.wait_lsn_log_slow.try_acquire().ok();
+                }
+                if logging_permit.is_none() {
+                    return;
+                }
+                // We log it.
+                if ready {
+                    info!(
+                        "slow wait_lsn completed after {:.3}s",
+                        elapsed_total.as_secs_f64()
+                    );
+                } else {
+                    info!(
+                        "slow wait_lsn still running for {:.3}s",
+                        elapsed_total.as_secs_f64()
+                    );
+                }
+            },
+        );
+        let res = wait_for_timeout.await;
+        // don't count the time spent waiting for lock below, and also in walreceiver.status(), towards the wait_lsn_time_histo
+        drop(logging_permit);
+        drop(start_finish_counterpair_guard);
+        drop(timer);
+        match res {
            Ok(()) => Ok(()),
            Err(e) => {
                use utils::seqwait::SeqWaitError::*;
                match e {
                    Shutdown => Err(WaitLsnError::Shutdown),
                    Timeout => {
-                        // don't count the time spent waiting for lock below, and also in walreceiver.status(), towards the wait_lsn_time_histo
-                        drop(_timer);
                        let walreceiver_status = self.walreceiver_status();
                        Err(WaitLsnError::Timeout(format!(
                            "Timed out while waiting for WAL record at LSN {} to arrive, last_record_lsn {} disk consistent LSN={}, WalReceiver status: {}",
@@ -2423,8 +2477,9 @@ impl Timeline {
    }

    fn get_l0_flush_delay_threshold(&self) -> Option<usize> {
-        // Disable L0 flushes by default. This and compaction needs further tuning.
-        const DEFAULT_L0_FLUSH_DELAY_FACTOR: usize = 0; // TODO: default to e.g. 3
+        // By default, delay L0 flushes at 3x the compaction threshold. The compaction threshold
+        // defaults to 10, and L0 compaction is generally able to keep L0 counts below 30.
+        const DEFAULT_L0_FLUSH_DELAY_FACTOR: usize = 3;

        // If compaction is disabled, don't delay.
        if self.get_compaction_period() == Duration::ZERO {
@@ -2452,8 +2507,9 @@ impl Timeline {
    }

    fn get_l0_flush_stall_threshold(&self) -> Option<usize> {
-        // Disable L0 stalls by default. In ingest benchmarks, we see image compaction take >10
-        // minutes, blocking L0 compaction, and we can't stall L0 flushes for that long.
+        // Disable L0 stalls by default. Stalling can cause unavailability if L0 compaction isn't
+        // responsive, and it can e.g. block on other compaction via the compaction semaphore or
+        // sibling timelines. We need more confidence before enabling this.
        const DEFAULT_L0_FLUSH_STALL_FACTOR: usize = 0; // TODO: default to e.g. 5

        // If compaction is disabled, don't stall.
@@ -2821,6 +2877,8 @@ impl Timeline {
                heatmap_layers_downloader: Mutex::new(None),

                rel_size_v2_status: ArcSwapOption::from_pointee(rel_size_v2_status),
+
+                wait_lsn_log_slow: tokio::sync::Semaphore::new(1),
            };

            result.repartition_threshold =
@@ -5388,7 +5446,7 @@ impl Timeline {
        self: &Arc<Timeline>,
        tenant: &crate::tenant::Tenant,
        options: detach_ancestor::Options,
-        behavior: detach_ancestor::DetachBehavior,
+        behavior: DetachBehavior,
        ctx: &RequestContext,
    ) -> Result<detach_ancestor::Progress, detach_ancestor::Error> {
        detach_ancestor::prepare(self, tenant, behavior, options, ctx).await
@@ -5409,7 +5467,7 @@ impl Timeline {
        prepared: detach_ancestor::PreparedTimelineDetach,
        ancestor_timeline_id: TimelineId,
        ancestor_lsn: Lsn,
-        behavior: detach_ancestor::DetachBehavior,
+        behavior: DetachBehavior,
        ctx: &RequestContext,
    ) -> Result<detach_ancestor::DetachingAndReparenting, detach_ancestor::Error> {
        detach_ancestor::detach_and_reparent(
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -3189,7 +3189,11 @@ impl Timeline {
        }

        // TODO: move the below part to the loop body
-        let last_key = last_key.expect("no keys produced during compaction");
+        let Some(last_key) = last_key else {
+            return Err(CompactionError::Other(anyhow!(
+                "no keys produced during compaction"
+            )));
+        };
        stat.on_unique_key_visited();

        let retention = self
--- a/pageserver/src/tenant/timeline/detach_ancestor.rs
+++ b/pageserver/src/tenant/timeline/detach_ancestor.rs
@@ -3,6 +3,7 @@ use std::sync::Arc;

 use anyhow::Context;
 use http_utils::error::ApiError;
+use pageserver_api::models::DetachBehavior;
 use pageserver_api::models::detach_ancestor::AncestorDetached;
 use pageserver_api::shard::ShardIdentity;
 use tokio::sync::Semaphore;
@@ -139,30 +140,6 @@ pub(crate) struct Options {
    pub(crate) copy_concurrency: std::num::NonZeroUsize,
 }

-/// Controls the detach ancestor behavior.
-/// - When set to `NoAncestorAndReparent`, we will only detach a branch if its ancestor is a root branch. It will automatically reparent any children of the ancestor before and at the branch point.
-/// - When set to `MultiLevelAndNoReparent`, we will detach a branch from multiple levels of ancestors, and no reparenting will happen at all.
-#[derive(Debug, Clone, Copy, Default)]
-pub enum DetachBehavior {
-    #[default]
-    NoAncestorAndReparent,
-    MultiLevelAndNoReparent,
-}
-
-impl std::str::FromStr for DetachBehavior {
-    type Err = &'static str;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        match s {
-            "no_ancestor_and_reparent" => Ok(DetachBehavior::NoAncestorAndReparent),
-            "multi_level_and_no_reparent" => Ok(DetachBehavior::MultiLevelAndNoReparent),
-            "v1" => Ok(DetachBehavior::NoAncestorAndReparent),
-            "v2" => Ok(DetachBehavior::MultiLevelAndNoReparent),
-            _ => Err("cannot parse detach behavior"),
-        }
-    }
-}
-
 impl Default for Options {
    fn default() -> Self {
        Self {
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -2898,6 +2898,11 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
 						relpath(reln->smgr_rlocator, forkNum),
 						InvalidBlockNumber)));

+#ifdef DEBUG_COMPARE_LOCAL
+	if (IS_LOCAL_REL(reln))
+		mdzeroextend(reln, forkNum, blocknum, nblocks, skipFsync);
+#endif
+
 	/* Don't log any pages if we're not allowed to do so. */
 	if (!XLogInsertAllowed())
 		return;
@@ -4171,8 +4176,10 @@ neon_start_unlogged_build(SMgrRelation reln)
 	 * FIXME: should we pass isRedo true to create the tablespace dir if it
 	 * doesn't exist? Is it needed?
 	 */
-	if (!IsParallelWorker())
+#ifndef DEBUG_COMPARE_LOCAL
+ 	if (!IsParallelWorker())
 		mdcreate(reln, MAIN_FORKNUM, false);
+#endif
 }

 /*
@@ -4247,8 +4254,10 @@ neon_end_unlogged_build(SMgrRelation reln)

 			forget_cached_relsize(InfoFromNInfoB(rinfob), forknum);
 			mdclose(reln, forknum);
+#ifndef DEBUG_COMPARE_LOCAL
 			/* use isRedo == true, so that we drop it immediately */
 			mdunlink(rinfob, forknum, true);
+#endif
 		}
 	}

--- a/poetry.lock
+++ b/poetry.lock
@@ -1491,14 +1491,38 @@ files = [

 [[package]]
 name = "jsonnet"
-version = "0.20.0"
-description = "Python bindings for Jsonnet - The data templating language"
+version = "0.21.0rc2"
+description = "Python bindings for Jsonnet - The data templating language "
 optional = false
 python-versions = "*"
 groups = ["main"]
-markers = "python_version < \"3.13\""
 files = [
-    {file = "jsonnet-0.20.0.tar.gz", hash = "sha256:7e770c7bf3a366b97b650a39430450f77612e74406731eb75c5bd59f3f104d4f"},
+    {file = "jsonnet-0.21.0rc2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8779ac6820fee44ef736df2baedc3ae93e8cd5d672ee105015c2a47fe627a727"},
+    {file = "jsonnet-0.21.0rc2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:99affe8c71e2551465064a8039bb3d1cba27a0b73b2b9ff1b652e06f17d4ea8b"},
+    {file = "jsonnet-0.21.0rc2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a9dffb9aa01013d100ddfb7230d1eeb80f2a8eef712b1825a60cad57106d8bd"},
+    {file = "jsonnet-0.21.0rc2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cca6c95f2879dcab52650b7aa09a4e82a139b084931b1f6f8c840f834fecc08a"},
+    {file = "jsonnet-0.21.0rc2-cp310-cp310-win_amd64.whl", hash = "sha256:016d6afdb302a6d00bf3bce6a0c3d9c093b992e33f9bc67c64a868035892258e"},
+    {file = "jsonnet-0.21.0rc2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e893ab2c9bf10d8ec9e9b0cee8961879c88d0619cc6d8f75ea284a78e06ae32b"},
+    {file = "jsonnet-0.21.0rc2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c06b353cd3daa2781e6cd308e05f2f116396376994bcb5f59aaadbc6a752c7f2"},
+    {file = "jsonnet-0.21.0rc2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eb2bc8e62b73101329072da322f7e2a1bdb3ac530b94669128d1b480e311e55"},
+    {file = "jsonnet-0.21.0rc2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:113766fd0c25620807bcf04d4c739f461c971a4f0e4aece9ba62b4e762de9598"},
+    {file = "jsonnet-0.21.0rc2-cp311-cp311-win_amd64.whl", hash = "sha256:8dab208c2c2760be60f87d1ceb8b28c86b51ed0e31129a7d90cd5fe890b41225"},
+    {file = "jsonnet-0.21.0rc2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:95f5b9dd26a41d6f258d1baa8d22e557051beeed8c52a6202584f1becca9dcb5"},
+    {file = "jsonnet-0.21.0rc2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cecc6d76e2b377260fae0a060097c113e6ac361b8f739903ea7f3f5f64cdebdf"},
+    {file = "jsonnet-0.21.0rc2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aaa2d18224af7e63872ef4a101e93962505456cf5f5439c3cfc25dad6845f8b1"},
+    {file = "jsonnet-0.21.0rc2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2a9063f811554487ed552445e964aeec969cafb266b965029c8d6b091ce47950"},
+    {file = "jsonnet-0.21.0rc2-cp312-cp312-win_amd64.whl", hash = "sha256:80d171182c169761f744ba50068a4ad35d48e52b91d25bf4c7bb9a72f0a04f71"},
+    {file = "jsonnet-0.21.0rc2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3657938f87cb6bc6da20ca631d437b5faf469ca060a7c7def9c8fd2f25a5e06"},
+    {file = "jsonnet-0.21.0rc2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3dcebc30cb991b58bc416ee05e9387004d04716d5c0b89714ff042bd069af5c8"},
+    {file = "jsonnet-0.21.0rc2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ac52c95482df3ed93c908468ca2f40d4825b6baba284b395ddc47bd663b8c3a"},
+    {file = "jsonnet-0.21.0rc2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8b34450823a7a1861de892fef9f29de1b4c19e1a79e27d81ffe7e57646cc89d6"},
+    {file = "jsonnet-0.21.0rc2-cp313-cp313-win_amd64.whl", hash = "sha256:573fd2580e46f4875ec505f1732f9e804b7063cba790342ed6fdafe9a6b30556"},
+    {file = "jsonnet-0.21.0rc2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:871ca1411de3626499bda60b330d37f85a592918f99ba4809089bbb8d4f5bfe4"},
+    {file = "jsonnet-0.21.0rc2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5d33b25a9c5bf9099100b9b16cb385a2876d891fbe639ee9d476fc75c861903a"},
+    {file = "jsonnet-0.21.0rc2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2bac374565c7f89a4675f19fd2b624ed1376519267f4e444f49b6fc0368f6e5"},
+    {file = "jsonnet-0.21.0rc2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:fab7bbd88f9159f88a7350701a97bda24de9e3b9eef14c2501ba8b9224160d60"},
+    {file = "jsonnet-0.21.0rc2-cp39-cp39-win_amd64.whl", hash = "sha256:ed71ffba0fd233a1bca7b0f7be79730792c5383e562a9dc7da152478d9ee1612"},
+    {file = "jsonnet-0.21.0rc2.tar.gz", hash = "sha256:2b83ec4b5a771c3732e0972be23a71f042ad2940db6918d3a52aade69bc394fb"},
 ]

 [[package]]
@@ -3820,4 +3844,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.11"
-content-hash = "010ffce959bb256880ab5a267048c182e4612b3151f9a94e3bf5d3a7807962fe"
+content-hash = "715fc8c896dcfa1b15054deeddcdec557ef93af91b26e1c8e4688fe4dbef5296"
--- a/proxy/src/binary/local_proxy.rs
+++ b/proxy/src/binary/local_proxy.rs
@@ -5,6 +5,7 @@ use std::sync::Arc;
 use std::time::Duration;

 use anyhow::{Context, bail, ensure};
+use arc_swap::ArcSwapOption;
 use camino::{Utf8Path, Utf8PathBuf};
 use clap::Parser;
 use compute_api::spec::LocalProxySpec;
@@ -27,6 +28,7 @@ use crate::config::{
 };
 use crate::control_plane::locks::ApiLocks;
 use crate::control_plane::messages::{EndpointJwksResponse, JwksSettings};
+use crate::ext::TaskExt;
 use crate::http::health_server::AppMetrics;
 use crate::intern::RoleNameInt;
 use crate::metrics::{Metrics, ThreadPoolMetrics};
@@ -190,7 +192,11 @@ pub async fn run() -> anyhow::Result<()> {
    // 2. The config file is written but the signal hook is not yet received
    // 3. local_proxy completes startup but has no config loaded, despite there being a registerd config.
    refresh_config_notify.notify_one();
-    tokio::spawn(refresh_config_loop(args.config_path, refresh_config_notify));
+    tokio::spawn(refresh_config_loop(
+        config,
+        args.config_path,
+        refresh_config_notify,
+    ));

    maintenance_tasks.spawn(crate::http::health_server::task_main(
        metrics_listener,
@@ -269,7 +275,7 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig
    };

    Ok(Box::leak(Box::new(ProxyConfig {
-        tls_config: None,
+        tls_config: ArcSwapOption::from(None),
        metric_collection: None,
        http_config,
        authentication_config: AuthenticationConfig {
@@ -311,14 +317,16 @@ enum RefreshConfigError {
    Parse(#[from] serde_json::Error),
    #[error(transparent)]
    Validate(anyhow::Error),
+    #[error(transparent)]
+    Tls(anyhow::Error),
 }

-async fn refresh_config_loop(path: Utf8PathBuf, rx: Arc<Notify>) {
+async fn refresh_config_loop(config: &ProxyConfig, path: Utf8PathBuf, rx: Arc<Notify>) {
    let mut init = true;
    loop {
        rx.notified().await;

-        match refresh_config_inner(&path).await {
+        match refresh_config_inner(config, &path).await {
            Ok(()) => {}
            // don't log for file not found errors if this is the first time we are checking
            // for computes that don't use local_proxy, this is not an error.
@@ -327,6 +335,9 @@ async fn refresh_config_loop(path: Utf8PathBuf, rx: Arc<Notify>) {
            {
                debug!(error=?e, ?path, "could not read config file");
            }
+            Err(RefreshConfigError::Tls(e)) => {
+                error!(error=?e, ?path, "could not read TLS certificates");
+            }
            Err(e) => {
                error!(error=?e, ?path, "could not read config file");
            }
@@ -336,7 +347,10 @@ async fn refresh_config_loop(path: Utf8PathBuf, rx: Arc<Notify>) {
    }
 }

-async fn refresh_config_inner(path: &Utf8Path) -> Result<(), RefreshConfigError> {
+async fn refresh_config_inner(
+    config: &ProxyConfig,
+    path: &Utf8Path,
+) -> Result<(), RefreshConfigError> {
    let bytes = tokio::fs::read(&path).await?;
    let data: LocalProxySpec = serde_json::from_slice(&bytes)?;

@@ -406,5 +420,20 @@ async fn refresh_config_inner(path: &Utf8Path) -> Result<(), RefreshConfigError>
    info!("successfully loaded new config");
    JWKS_ROLE_MAP.store(Some(Arc::new(EndpointJwksResponse { jwks: jwks_set })));

+    if let Some(tls_config) = data.tls {
+        let tls_config = tokio::task::spawn_blocking(move || {
+            crate::tls::server_config::configure_tls(
+                &tls_config.key_path,
+                &tls_config.cert_path,
+                None,
+                false,
+            )
+        })
+        .await
+        .propagate_task_panic()
+        .map_err(RefreshConfigError::Tls)?;
+        config.tls_config.store(Some(Arc::new(tls_config)));
+    }
+
    Ok(())
 }
--- a/proxy/src/binary/proxy.rs
+++ b/proxy/src/binary/proxy.rs
@@ -4,6 +4,7 @@ use std::sync::Arc;
 use std::time::Duration;

 use anyhow::bail;
+use arc_swap::ArcSwapOption;
 use futures::future::Either;
 use remote_storage::RemoteStorageConfig;
 use tokio::net::TcpListener;
@@ -563,6 +564,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
        (None, None) => None,
        _ => bail!("either both or neither tls-key and tls-cert must be specified"),
    };
+    let tls_config = ArcSwapOption::from(tls_config.map(Arc::new));

    let backup_metric_collection_config = config::MetricBackupCollectionConfig {
        remote_storage_config: args.metric_backup_collection_remote_storage.clone(),
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -3,6 +3,7 @@ use std::sync::Arc;
 use std::time::Duration;

 use anyhow::{Context, Ok, bail, ensure};
+use arc_swap::ArcSwapOption;
 use clap::ValueEnum;
 use remote_storage::RemoteStorageConfig;

@@ -17,7 +18,7 @@ pub use crate::tls::server_config::{TlsConfig, configure_tls};
 use crate::types::Host;

 pub struct ProxyConfig {
-    pub tls_config: Option<TlsConfig>,
+    pub tls_config: ArcSwapOption<TlsConfig>,
    pub metric_collection: Option<MetricCollectionConfig>,
    pub http_config: HttpConfig,
    pub authentication_config: AuthenticationConfig,
--- a/proxy/src/console_redirect_proxy.rs
+++ b/proxy/src/console_redirect_proxy.rs
@@ -177,7 +177,8 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
    let proto = ctx.protocol();
    let request_gauge = metrics.connection_requests.guard(proto);

-    let tls = config.tls_config.as_ref();
+    let tls = config.tls_config.load();
+    let tls = tls.as_deref();

    let record_handshake_error = !ctx.has_private_peer_addr();
    let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
--- a/proxy/src/metrics.rs
+++ b/proxy/src/metrics.rs
@@ -30,7 +30,16 @@ pub struct Metrics {
 static SELF: OnceLock<Metrics> = OnceLock::new();
 impl Metrics {
    pub fn install(thread_pool: Arc<ThreadPoolMetrics>) {
-        SELF.set(Metrics::new(thread_pool))
+        let mut metrics = Metrics::new(thread_pool);
+
+        metrics.proxy.errors_total.init_all_dense();
+        metrics.proxy.redis_errors_total.init_all_dense();
+        metrics.proxy.redis_events_count.init_all_dense();
+        metrics.proxy.retries_metric.init_all_dense();
+        metrics.proxy.invalid_endpoints_total.init_all_dense();
+        metrics.proxy.connection_failures_total.init_all_dense();
+
+        SELF.set(metrics)
            .ok()
            .expect("proxy metrics must not be installed more than once");
    }
--- a/proxy/src/proxy/handshake.rs
+++ b/proxy/src/proxy/handshake.rs
@@ -114,7 +114,7 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(

                        let mut read_buf = read_buf.reader();
                        let mut res = Ok(());
-                        let accept = tokio_rustls::TlsAcceptor::from(tls.to_server_config())
+                        let accept = tokio_rustls::TlsAcceptor::from(tls.pg_config.clone())
                            .accept_with(raw, |session| {
                                // push the early data to the tls session
                                while !read_buf.get_ref().is_empty() {
--- a/proxy/src/proxy/mod.rs
+++ b/proxy/src/proxy/mod.rs
@@ -278,7 +278,8 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
    let proto = ctx.protocol();
    let request_gauge = metrics.connection_requests.guard(proto);

-    let tls = config.tls_config.as_ref();
+    let tls = config.tls_config.load();
+    let tls = tls.as_deref();

    let record_handshake_error = !ctx.has_private_peer_addr();
    let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
--- a/proxy/src/proxy/passthrough.rs
+++ b/proxy/src/proxy/passthrough.rs
@@ -10,7 +10,7 @@ use crate::config::ComputeConfig;
 use crate::control_plane::messages::MetricsAuxInfo;
 use crate::metrics::{Direction, Metrics, NumClientConnectionsGuard, NumConnectionRequestsGuard};
 use crate::stream::Stream;
-use crate::usage_metrics::{Ids, MetricCounterRecorder, TrafficDirection, USAGE_METRICS};
+use crate::usage_metrics::{Ids, MetricCounterRecorder, USAGE_METRICS};

 /// Forward bytes in both directions (client <-> compute).
 #[tracing::instrument(skip_all)]
@@ -24,7 +24,6 @@ pub(crate) async fn proxy_pass(
    let usage_tx = USAGE_METRICS.register(Ids {
        endpoint_id: aux.endpoint_id,
        branch_id: aux.branch_id,
-        direction: TrafficDirection::Egress,
        private_link_id,
    });

@@ -47,6 +46,7 @@ pub(crate) async fn proxy_pass(
        |cnt| {
            // Number of bytes the client sent to the compute node (inbound).
            metrics.get_metric(m_recv).inc_by(cnt as u64);
+            usage_tx.record_ingress(cnt as u64);
        },
    );

--- a/proxy/src/proxy/tests/mod.rs
+++ b/proxy/src/proxy/tests/mod.rs
@@ -96,16 +96,18 @@ fn generate_tls_config<'a>(
                .with_safe_default_protocol_versions()
                .context("ring should support the default protocol versions")?
                .with_no_client_auth()
-                .with_single_cert(vec![cert.clone()], key.clone_key())?
-                .into();
+                .with_single_cert(vec![cert.clone()], key.clone_key())?;

        let mut cert_resolver = CertResolver::new();
        cert_resolver.add_cert(key, vec![cert], true)?;

        let common_names = cert_resolver.get_common_names();

+        let config = Arc::new(config);
+
        TlsConfig {
-            config,
+            http_config: config.clone(),
+            pg_config: config,
            common_names,
            cert_resolver: Arc::new(cert_resolver),
        }
--- a/proxy/src/serverless/conn_pool_lib.rs
+++ b/proxy/src/serverless/conn_pool_lib.rs
@@ -22,7 +22,7 @@ use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo};
 use crate::metrics::{HttpEndpointPoolsGuard, Metrics};
 use crate::protocol2::ConnectionInfoExtra;
 use crate::types::{DbName, EndpointCacheKey, RoleName};
-use crate::usage_metrics::{Ids, MetricCounter, TrafficDirection, USAGE_METRICS};
+use crate::usage_metrics::{Ids, MetricCounter, USAGE_METRICS};

 #[derive(Debug, Clone)]
 pub(crate) struct ConnInfo {
@@ -639,11 +639,7 @@ impl<C: ClientInnerExt> Client<C> {
        (&mut inner.inner, Discard { conn_info, pool })
    }

-    pub(crate) fn metrics(
-        &self,
-        direction: TrafficDirection,
-        ctx: &RequestContext,
-    ) -> Arc<MetricCounter> {
+    pub(crate) fn metrics(&self, ctx: &RequestContext) -> Arc<MetricCounter> {
        let aux = &self
            .inner
            .as_ref()
@@ -659,7 +655,6 @@ impl<C: ClientInnerExt> Client<C> {
        USAGE_METRICS.register(Ids {
            endpoint_id: aux.endpoint_id,
            branch_id: aux.branch_id,
-            direction,
            private_link_id,
        })
    }
--- a/proxy/src/serverless/http_conn_pool.rs
+++ b/proxy/src/serverless/http_conn_pool.rs
@@ -19,7 +19,7 @@ use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo};
 use crate::metrics::{HttpEndpointPoolsGuard, Metrics};
 use crate::protocol2::ConnectionInfoExtra;
 use crate::types::EndpointCacheKey;
-use crate::usage_metrics::{Ids, MetricCounter, TrafficDirection, USAGE_METRICS};
+use crate::usage_metrics::{Ids, MetricCounter, USAGE_METRICS};

 pub(crate) type Send = http2::SendRequest<hyper::body::Incoming>;
 pub(crate) type Connect = http2::Connection<TokioIo<AsyncRW>, hyper::body::Incoming, TokioExecutor>;
@@ -265,11 +265,7 @@ impl<C: ClientInnerExt + Clone> Client<C> {
        Self { inner }
    }

-    pub(crate) fn metrics(
-        &self,
-        direction: TrafficDirection,
-        ctx: &RequestContext,
-    ) -> Arc<MetricCounter> {
+    pub(crate) fn metrics(&self, ctx: &RequestContext) -> Arc<MetricCounter> {
        let aux = &self.inner.aux;

        let private_link_id = match ctx.extra() {
@@ -281,7 +277,6 @@ impl<C: ClientInnerExt + Clone> Client<C> {
        USAGE_METRICS.register(Ids {
            endpoint_id: aux.endpoint_id,
            branch_id: aux.branch_id,
-            direction,
            private_link_id,
        })
    }
--- a/proxy/src/serverless/mod.rs
+++ b/proxy/src/serverless/mod.rs
@@ -19,6 +19,7 @@ use std::pin::{Pin, pin};
 use std::sync::Arc;

 use anyhow::Context;
+use arc_swap::ArcSwapOption;
 use async_trait::async_trait;
 use atomic_take::AtomicTake;
 use bytes::Bytes;
@@ -117,18 +118,7 @@ pub async fn task_main(
        auth_backend,
        endpoint_rate_limiter: Arc::clone(&endpoint_rate_limiter),
    });
-    let tls_acceptor: Arc<dyn MaybeTlsAcceptor> = match config.tls_config.as_ref() {
-        Some(config) => {
-            let mut tls_server_config = rustls::ServerConfig::clone(&config.to_server_config());
-            // prefer http2, but support http/1.1
-            tls_server_config.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()];
-            Arc::new(tls_server_config)
-        }
-        None => {
-            warn!("TLS config is missing");
-            Arc::new(NoTls)
-        }
-    };
+    let tls_acceptor: Arc<dyn MaybeTlsAcceptor> = Arc::new(&config.tls_config);

    let connections = tokio_util::task::task_tracker::TaskTracker::new();
    connections.close(); // allows `connections.wait to complete`
@@ -216,22 +206,20 @@ pub(crate) type AsyncRW = Pin<Box<dyn AsyncReadWrite>>;

 #[async_trait]
 trait MaybeTlsAcceptor: Send + Sync + 'static {
-    async fn accept(self: Arc<Self>, conn: ChainRW<TcpStream>) -> std::io::Result<AsyncRW>;
+    async fn accept(&self, conn: ChainRW<TcpStream>) -> std::io::Result<AsyncRW>;
 }

 #[async_trait]
-impl MaybeTlsAcceptor for rustls::ServerConfig {
-    async fn accept(self: Arc<Self>, conn: ChainRW<TcpStream>) -> std::io::Result<AsyncRW> {
-        Ok(Box::pin(TlsAcceptor::from(self).accept(conn).await?))
-    }
-}
-
-struct NoTls;
-
-#[async_trait]
-impl MaybeTlsAcceptor for NoTls {
-    async fn accept(self: Arc<Self>, conn: ChainRW<TcpStream>) -> std::io::Result<AsyncRW> {
-        Ok(Box::pin(conn))
+impl MaybeTlsAcceptor for &'static ArcSwapOption<crate::config::TlsConfig> {
+    async fn accept(&self, conn: ChainRW<TcpStream>) -> std::io::Result<AsyncRW> {
+        match &*self.load() {
+            Some(config) => Ok(Box::pin(
+                TlsAcceptor::from(config.http_config.clone())
+                    .accept(conn)
+                    .await?,
+            )),
+            None => Ok(Box::pin(conn)),
+        }
    }
 }

--- a/proxy/src/serverless/sql_over_http.rs
+++ b/proxy/src/serverless/sql_over_http.rs
@@ -42,7 +42,7 @@ use crate::metrics::{HttpDirection, Metrics};
 use crate::proxy::{NeonOptions, run_until_cancelled};
 use crate::serverless::backend::HttpConnError;
 use crate::types::{DbName, RoleName};
-use crate::usage_metrics::{MetricCounter, MetricCounterRecorder, TrafficDirection};
+use crate::usage_metrics::{MetricCounter, MetricCounterRecorder};

 #[derive(serde::Deserialize)]
 #[serde(rename_all = "camelCase")]
@@ -614,7 +614,9 @@ async fn handle_inner(
        &config.authentication_config,
        ctx,
        request.headers(),
-        config.tls_config.as_ref(),
+        // todo: race condition?
+        // we're unlikely to change the common names.
+        config.tls_config.load().as_deref(),
    )?;
    info!(
        user = conn_info.conn_info.user_info.user.as_str(),
@@ -661,6 +663,7 @@ async fn handle_db_inner(

    let parsed_headers = HttpHeaders::try_parse(headers)?;

+    let mut request_len = 0;
    let fetch_and_process_request = Box::pin(
        async {
            let body = read_body_with_limit(
@@ -669,6 +672,8 @@ async fn handle_db_inner(
            )
            .await?;

+            request_len = body.len();
+
            Metrics::get()
                .proxy
                .http_conn_content_length_bytes
@@ -763,7 +768,7 @@ async fn handle_db_inner(
        }
    };

-    let metrics = client.metrics(TrafficDirection::Egress, ctx);
+    let metrics = client.metrics(ctx);

    let len = json_output.len();
    let response = response
@@ -779,6 +784,8 @@ async fn handle_db_inner(
    // count the egress bytes - we miss the TLS and header overhead but oh well...
    // moving this later in the stack is going to be a lot of effort and ehhhh
    metrics.record_egress(len as u64);
+    metrics.record_ingress(request_len as u64);
+
    Metrics::get()
        .proxy
        .http_conn_content_length_bytes
@@ -836,7 +843,7 @@ async fn handle_auth_broker_inner(
        .expect("all headers and params received via hyper should be valid for request");

    // todo: map body to count egress
-    let _metrics = client.metrics(TrafficDirection::Egress, ctx);
+    let _metrics = client.metrics(ctx);

    Ok(client
        .inner
@@ -1166,10 +1173,10 @@ enum Discard<'a> {
 }

 impl Client {
-    fn metrics(&self, direction: TrafficDirection, ctx: &RequestContext) -> Arc<MetricCounter> {
+    fn metrics(&self, ctx: &RequestContext) -> Arc<MetricCounter> {
        match self {
-            Client::Remote(client) => client.metrics(direction, ctx),
-            Client::Local(local_client) => local_client.metrics(direction, ctx),
+            Client::Remote(client) => client.metrics(ctx),
+            Client::Local(local_client) => local_client.metrics(ctx),
        }
    }

--- a/proxy/src/tls/server_config.rs
+++ b/proxy/src/tls/server_config.rs
@@ -9,17 +9,14 @@ use rustls::pki_types::{CertificateDer, PrivateKeyDer};
 use super::{PG_ALPN_PROTOCOL, TlsServerEndPoint};

 pub struct TlsConfig {
-    pub config: Arc<rustls::ServerConfig>,
+    // unfortunate split since we cannot change the ALPN on demand.
+    // <https://github.com/rustls/rustls/issues/2260>
+    pub http_config: Arc<rustls::ServerConfig>,
+    pub pg_config: Arc<rustls::ServerConfig>,
    pub common_names: HashSet<String>,
    pub cert_resolver: Arc<CertResolver>,
 }

-impl TlsConfig {
-    pub fn to_server_config(&self) -> Arc<rustls::ServerConfig> {
-        self.config.clone()
-    }
-}
-
 /// Configure TLS for the main endpoint.
 pub fn configure_tls(
    key_path: &str,
@@ -71,8 +68,15 @@ pub fn configure_tls(
        config.key_log = Arc::new(rustls::KeyLogFile::new());
    }

+    let mut http_config = config.clone();
+    let mut pg_config = config;
+
+    http_config.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()];
+    pg_config.alpn_protocols = vec![b"postgresql".to_vec()];
+
    Ok(TlsConfig {
-        config: Arc::new(config),
+        http_config: Arc::new(http_config),
+        pg_config: Arc::new(pg_config),
        common_names,
        cert_resolver,
    })
--- a/proxy/src/usage_metrics.rs
+++ b/proxy/src/usage_metrics.rs
@@ -44,11 +44,17 @@ const HTTP_REPORTING_RETRY_DURATION: Duration = Duration::from_secs(60);
 pub(crate) struct Ids {
    pub(crate) endpoint_id: EndpointIdInt,
    pub(crate) branch_id: BranchIdInt,
-    pub(crate) direction: TrafficDirection,
    #[serde(with = "none_as_empty_string")]
    pub(crate) private_link_id: Option<SmolStr>,
 }

+#[derive(Eq, Hash, PartialEq, Serialize, Deserialize, Debug, Clone)]
+struct Extra {
+    #[serde(flatten)]
+    ids: Ids,
+    direction: TrafficDirection,
+}
+
 mod none_as_empty_string {
    use serde::Deserialize;
    use smol_str::SmolStr;
@@ -76,18 +82,23 @@ pub(crate) enum TrafficDirection {
 pub(crate) trait MetricCounterRecorder {
    /// Record that some bytes were sent from the proxy to the client
    fn record_egress(&self, bytes: u64);
+
+    /// Record that some bytes were sent from the client to the proxy
+    fn record_ingress(&self, bytes: u64);
+
    /// Record that some connections were opened
    fn record_connection(&self, count: usize);
 }

 trait MetricCounterReporter {
-    fn get_metrics(&mut self) -> (u64, usize);
-    fn move_metrics(&self) -> (u64, usize);
+    fn get_metrics(&mut self) -> MetricsData;
+    fn move_metrics(&self) -> MetricsData;
 }

 #[derive(Debug)]
 pub(crate) struct MetricCounter {
    transmitted: AtomicU64,
+    received: AtomicU64,
    opened_connections: AtomicUsize,
 }

@@ -97,6 +108,11 @@ impl MetricCounterRecorder for MetricCounter {
        self.transmitted.fetch_add(bytes, Ordering::Relaxed);
    }

+    /// Record that some bytes were sent from the proxy to the client
+    fn record_ingress(&self, bytes: u64) {
+        self.received.fetch_add(bytes, Ordering::Relaxed);
+    }
+
    /// Record that some connections were opened
    fn record_connection(&self, count: usize) {
        self.opened_connections.fetch_add(count, Ordering::Relaxed);
@@ -104,29 +120,43 @@ impl MetricCounterRecorder for MetricCounter {
 }

 impl MetricCounterReporter for MetricCounter {
-    fn get_metrics(&mut self) -> (u64, usize) {
-        (
-            *self.transmitted.get_mut(),
-            *self.opened_connections.get_mut(),
-        )
+    fn get_metrics(&mut self) -> MetricsData {
+        MetricsData {
+            received: *self.received.get_mut(),
+            transmitted: *self.transmitted.get_mut(),
+            connections: *self.opened_connections.get_mut(),
+        }
    }
-    fn move_metrics(&self) -> (u64, usize) {
-        (
-            self.transmitted.swap(0, Ordering::Relaxed),
-            self.opened_connections.swap(0, Ordering::Relaxed),
-        )
+
+    fn move_metrics(&self) -> MetricsData {
+        MetricsData {
+            received: self.received.swap(0, Ordering::Relaxed),
+            transmitted: self.transmitted.swap(0, Ordering::Relaxed),
+            connections: self.opened_connections.swap(0, Ordering::Relaxed),
+        }
    }
 }

+struct MetricsData {
+    transmitted: u64,
+    received: u64,
+    connections: usize,
+}
+
+struct BytesSent {
+    transmitted: u64,
+    received: u64,
+}
+
 trait Clearable {
    /// extract the value that should be reported
-    fn should_report(self: &Arc<Self>) -> Option<u64>;
+    fn should_report(self: &Arc<Self>) -> Option<BytesSent>;
    /// Determine whether the counter should be cleared from the global map.
    fn should_clear(self: &mut Arc<Self>) -> bool;
 }

 impl<C: MetricCounterReporter> Clearable for C {
-    fn should_report(self: &Arc<Self>) -> Option<u64> {
+    fn should_report(self: &Arc<Self>) -> Option<BytesSent> {
        // heuristic to see if the branch is still open
        // if a clone happens while we are observing, the heuristic will be incorrect.
        //
@@ -139,14 +169,21 @@ impl<C: MetricCounterReporter> Clearable for C {
        // (to avoid sending the same metrics twice)
        // see the relevant discussion on why to do so even if the status is not success:
        // https://github.com/neondatabase/neon/pull/4563#discussion_r1246710956
-        let (value, opened) = self.move_metrics();
+        let MetricsData {
+            transmitted,
+            received,
+            connections,
+        } = self.move_metrics();

        // Our only requirement is that we report in every interval if there was an open connection
        // if there were no opened connections since, then we don't need to report
-        if value == 0 && !is_open && opened == 0 {
+        if transmitted == 0 && received == 0 && !is_open && connections == 0 {
            None
        } else {
-            Some(value)
+            Some(BytesSent {
+                transmitted,
+                received,
+            })
        }
    }
    fn should_clear(self: &mut Arc<Self>) -> bool {
@@ -154,9 +191,13 @@ impl<C: MetricCounterReporter> Clearable for C {
        let Some(counter) = Arc::get_mut(self) else {
            return false;
        };
-        let (opened, value) = counter.get_metrics();
+        let MetricsData {
+            transmitted,
+            received,
+            connections,
+        } = counter.get_metrics();
        // clear if there's no data to report
-        value == 0 && opened == 0
+        transmitted == 0 && received == 0 && connections == 0
    }
 }

@@ -178,6 +219,7 @@ impl Metrics {
                .entry(ids)
                .or_insert_with(|| {
                    Arc::new(MetricCounter {
+                        received: AtomicU64::new(0),
                        transmitted: AtomicU64::new(0),
                        opened_connections: AtomicUsize::new(0),
                    })
@@ -242,10 +284,10 @@ pub async fn task_main(config: &MetricCollectionConfig) -> anyhow::Result<Infall

 fn collect_and_clear_metrics<C: Clearable>(
    endpoints: &ClashMap<Ids, Arc<C>, FastHasher>,
-) -> Vec<(Ids, u64)> {
+) -> Vec<(Ids, BytesSent)> {
    let mut metrics_to_clear = Vec::new();

-    let metrics_to_send: Vec<(Ids, u64)> = endpoints
+    let metrics_to_send: Vec<(Ids, BytesSent)> = endpoints
        .iter()
        .filter_map(|counter| {
            let key = counter.key().clone();
@@ -271,26 +313,46 @@ fn collect_and_clear_metrics<C: Clearable>(
 }

 fn create_event_chunks<'a>(
-    metrics_to_send: &'a [(Ids, u64)],
+    metrics_to_send: &'a [(Ids, BytesSent)],
    hostname: &'a str,
    prev: DateTime<Utc>,
    now: DateTime<Utc>,
    chunk_size: usize,
-) -> impl Iterator<Item = EventChunk<'a, Event<Ids, &'static str>>> + 'a {
+) -> impl Iterator<Item = EventChunk<'a, Event<Extra, &'static str>>> + 'a {
    metrics_to_send
        .chunks(chunk_size)
        .map(move |chunk| EventChunk {
            events: chunk
                .iter()
-                .map(|(ids, value)| Event {
-                    kind: EventType::Incremental {
-                        start_time: prev,
-                        stop_time: now,
-                    },
-                    metric: PROXY_IO_BYTES_PER_CLIENT,
-                    idempotency_key: idempotency_key(hostname),
-                    value: *value,
-                    extra: ids.clone(),
+                .flat_map(|(ids, bytes)| {
+                    [
+                        Event {
+                            kind: EventType::Incremental {
+                                start_time: prev,
+                                stop_time: now,
+                            },
+                            metric: PROXY_IO_BYTES_PER_CLIENT,
+                            idempotency_key: idempotency_key(hostname),
+                            value: bytes.transmitted,
+                            extra: Extra {
+                                ids: ids.clone(),
+                                direction: TrafficDirection::Egress,
+                            },
+                        },
+                        Event {
+                            kind: EventType::Incremental {
+                                start_time: prev,
+                                stop_time: now,
+                            },
+                            metric: PROXY_IO_BYTES_PER_CLIENT,
+                            idempotency_key: idempotency_key(hostname),
+                            value: bytes.received,
+                            extra: Extra {
+                                ids: ids.clone(),
+                                direction: TrafficDirection::Ingress,
+                            },
+                        },
+                    ]
                })
                .collect(),
        })
@@ -350,7 +412,7 @@ fn create_remote_path_prefix(now: DateTime<Utc>) -> String {
 async fn upload_main_events_chunked(
    client: &http::ClientWithMiddleware,
    metric_collection_endpoint: &reqwest::Url,
-    chunk: &EventChunk<'_, Event<Ids, &str>>,
+    chunk: &EventChunk<'_, Event<Extra, &str>>,
    subchunk_size: usize,
 ) {
    // Split into smaller chunks to avoid exceeding the max request size
@@ -384,7 +446,7 @@ async fn upload_main_events_chunked(

 async fn upload_backup_events(
    storage: Option<&GenericRemoteStorage>,
-    chunk: &EventChunk<'_, Event<Ids, &'static str>>,
+    chunk: &EventChunk<'_, Event<Extra, &'static str>>,
    path_prefix: &str,
    cancel: &CancellationToken,
 ) -> anyhow::Result<()> {
@@ -461,7 +523,7 @@ mod tests {

    #[tokio::test]
    async fn metrics() {
-        type Report = EventChunk<'static, Event<Ids, String>>;
+        type Report = EventChunk<'static, Event<Extra, String>>;
        let reports: Arc<Mutex<Vec<Report>>> = Arc::default();

        let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
@@ -533,7 +595,6 @@ mod tests {
        let counter = metrics.register(Ids {
            endpoint_id: (&EndpointId::from("e1")).into(),
            branch_id: (&BranchId::from("b1")).into(),
-            direction: TrafficDirection::Egress,
            private_link_id: None,
        });

@@ -551,13 +612,19 @@ mod tests {
        .await;
        let r = std::mem::take(&mut *reports.lock().unwrap());
        assert_eq!(r.len(), 1);
-        assert_eq!(r[0].events.len(), 1);
+        assert_eq!(r[0].events.len(), 2);
        assert_eq!(r[0].events[0].value, 0);
+        assert_eq!(r[0].events[0].extra.direction, TrafficDirection::Egress);
+        assert_eq!(r[0].events[1].value, 0);
+        assert_eq!(r[0].events[1].extra.direction, TrafficDirection::Ingress);
        pushed_chunks.extend(r);

        // record egress
        counter.record_egress(1);

+        // record ingress
+        counter.record_ingress(2);
+
        // egress should be observered
        collect_metrics_iteration(
            &metrics.endpoints,
@@ -572,8 +639,11 @@ mod tests {
        .await;
        let r = std::mem::take(&mut *reports.lock().unwrap());
        assert_eq!(r.len(), 1);
-        assert_eq!(r[0].events.len(), 1);
+        assert_eq!(r[0].events.len(), 2);
        assert_eq!(r[0].events[0].value, 1);
+        assert_eq!(r[0].events[0].extra.direction, TrafficDirection::Egress);
+        assert_eq!(r[0].events[1].value, 2);
+        assert_eq!(r[0].events[1].extra.direction, TrafficDirection::Ingress);
        pushed_chunks.extend(r);

        // release counter
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,8 +48,8 @@ types-jwcrypto = "^1.5.0.20240925"
 pyyaml = "^6.0.2"
 types-pyyaml = "^6.0.12.20240917"
 testcontainers = "^4.9.0"
-# Jsonnet doesn't support Python 3.13 yet
-jsonnet = { version = "^0.20.0", markers = "python_version < '3.13'" }
+# Install a release candidate of `jsonnet`, as it supports Python 3.13
+jsonnet = "^0.21.0-rc2"

 [tool.poetry.group.dev.dependencies]
 mypy = "==1.13.0"
--- a/safekeeper/Cargo.toml
+++ b/safekeeper/Cargo.toml
@@ -35,8 +35,9 @@ postgres-protocol.workspace = true
 pprof.workspace = true
 rand.workspace = true
 regex.workspace = true
-scopeguard.workspace = true
 reqwest = { workspace = true, features = ["json"] }
+rustls.workspace = true
+scopeguard.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 smallvec.workspace = true
@@ -45,10 +46,11 @@ strum_macros.workspace = true
 thiserror.workspace = true
 tikv-jemallocator.workspace = true
 tokio = { workspace = true, features = ["fs"] }
-tokio-util = { workspace = true }
 tokio-io-timeout.workspace = true
 tokio-postgres.workspace = true
+tokio-rustls.workspace = true
 tokio-tar.workspace = true
+tokio-util = { workspace = true }
 tracing.workspace = true
 url.workspace = true
 metrics.workspace = true
--- a/safekeeper/client/src/mgmt_api.rs
+++ b/safekeeper/client/src/mgmt_api.rs
@@ -8,7 +8,7 @@ use std::error::Error as _;
 use http_utils::error::HttpErrorBody;
 use reqwest::{IntoUrl, Method, StatusCode};
 use safekeeper_api::models::{
-    PullTimelineRequest, PullTimelineResponse, SafekeeperUtilization, TimelineCreateRequest,
+    self, PullTimelineRequest, PullTimelineResponse, SafekeeperUtilization, TimelineCreateRequest,
    TimelineStatus,
 };
 use utils::id::{NodeId, TenantId, TimelineId};
@@ -96,11 +96,25 @@ impl Client {
        resp.json().await.map_err(Error::ReceiveBody)
    }

+    pub async fn exclude_timeline(
+        &self,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+        req: &models::TimelineMembershipSwitchRequest,
+    ) -> Result<models::TimelineDeleteResult> {
+        let uri = format!(
+            "{}/v1/tenant/{}/timeline/{}/exclude",
+            self.mgmt_api_endpoint, tenant_id, timeline_id
+        );
+        let resp = self.put(&uri, req).await?;
+        resp.json().await.map_err(Error::ReceiveBody)
+    }
+
    pub async fn delete_timeline(
        &self,
        tenant_id: TenantId,
        timeline_id: TimelineId,
-    ) -> Result<TimelineStatus> {
+    ) -> Result<models::TimelineDeleteResult> {
        let uri = format!(
            "{}/v1/tenant/{}/timeline/{}",
            self.mgmt_api_endpoint, tenant_id, timeline_id
@@ -109,6 +123,20 @@ impl Client {
        resp.json().await.map_err(Error::ReceiveBody)
    }

+    pub async fn bump_timeline_term(
+        &self,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+        req: &models::TimelineTermBumpRequest,
+    ) -> Result<models::TimelineTermBumpResponse> {
+        let uri = format!(
+            "{}/v1/tenant/{}/timeline/{}/term_bump",
+            self.mgmt_api_endpoint, tenant_id, timeline_id
+        );
+        let resp = self.post(&uri, req).await?;
+        resp.json().await.map_err(Error::ReceiveBody)
+    }
+
    pub async fn timeline_status(
        &self,
        tenant_id: TenantId,
@@ -149,6 +177,14 @@ impl Client {
        self.request(Method::POST, uri, body).await
    }

+    async fn put<B: serde::Serialize, U: IntoUrl>(
+        &self,
+        uri: U,
+        body: B,
+    ) -> Result<reqwest::Response> {
+        self.request(Method::PUT, uri, body).await
+    }
+
    async fn get<U: IntoUrl>(&self, uri: U) -> Result<reqwest::Response> {
        self.request(Method::GET, uri, ()).await
    }
--- a/safekeeper/src/bin/safekeeper.rs
+++ b/safekeeper/src/bin/safekeeper.rs
@@ -16,10 +16,12 @@ use futures::stream::FuturesUnordered;
 use futures::{FutureExt, StreamExt};
 use metrics::set_build_info_metric;
 use remote_storage::RemoteStorageConfig;
+use reqwest::Certificate;
 use safekeeper::defaults::{
    DEFAULT_CONTROL_FILE_SAVE_INTERVAL, DEFAULT_EVICTION_MIN_RESIDENT, DEFAULT_HEARTBEAT_TIMEOUT,
    DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_MAX_OFFLOADER_LAG_BYTES, DEFAULT_PARTIAL_BACKUP_CONCURRENCY,
-    DEFAULT_PARTIAL_BACKUP_TIMEOUT, DEFAULT_PG_LISTEN_ADDR,
+    DEFAULT_PARTIAL_BACKUP_TIMEOUT, DEFAULT_PG_LISTEN_ADDR, DEFAULT_SSL_CERT_FILE,
+    DEFAULT_SSL_KEY_FILE,
 };
 use safekeeper::{
    BROKER_RUNTIME, GlobalTimelines, HTTP_RUNTIME, SafeKeeperConf, WAL_SERVICE_RUNTIME, broker,
@@ -94,6 +96,9 @@ struct Args {
    /// Listen http endpoint for management and metrics in the form host:port.
    #[arg(long, default_value = DEFAULT_HTTP_LISTEN_ADDR)]
    listen_http: String,
+    /// Listen https endpoint for management and metrics in the form host:port.
+    #[arg(long, default_value = None)]
+    listen_https: Option<String>,
    /// Advertised endpoint for receiving/sending WAL in the form host:port. If not
    /// specified, listen_pg is used to advertise instead.
    #[arg(long, default_value = None)]
@@ -203,6 +208,15 @@ struct Args {
    /// and the current position of the reader is smaller than this value.
    #[arg(long)]
    max_delta_for_fanout: Option<u64>,
+    /// Path to a file with certificate's private key for https API.
+    #[arg(long, default_value = DEFAULT_SSL_KEY_FILE)]
+    ssl_key_file: Utf8PathBuf,
+    /// Path to a file with a X509 certificate for https API.
+    #[arg(long, default_value = DEFAULT_SSL_CERT_FILE)]
+    ssl_cert_file: Utf8PathBuf,
+    /// Trusted root CA certificate to use in https APIs.
+    #[arg(long)]
+    ssl_ca_file: Option<Utf8PathBuf>,
 }

 // Like PathBufValueParser, but allows empty string.
@@ -336,12 +350,22 @@ async fn main() -> anyhow::Result<()> {
        }
    };

+    let ssl_ca_cert = match args.ssl_ca_file.as_ref() {
+        Some(ssl_ca_file) => {
+            tracing::info!("Using ssl root CA file: {ssl_ca_file:?}");
+            let buf = tokio::fs::read(ssl_ca_file).await?;
+            Some(Certificate::from_pem(&buf)?)
+        }
+        None => None,
+    };
+
    let conf = Arc::new(SafeKeeperConf {
        workdir,
        my_id: id,
        listen_pg_addr: args.listen_pg,
        listen_pg_addr_tenant_only: args.listen_pg_tenant_only,
        listen_http_addr: args.listen_http,
+        listen_https_addr: args.listen_https,
        advertise_pg_addr: args.advertise_pg,
        availability_zone: args.availability_zone,
        no_sync: args.no_sync,
@@ -368,6 +392,9 @@ async fn main() -> anyhow::Result<()> {
        eviction_min_resident: args.eviction_min_resident,
        wal_reader_fanout: args.wal_reader_fanout,
        max_delta_for_fanout: args.max_delta_for_fanout,
+        ssl_key_file: args.ssl_key_file,
+        ssl_cert_file: args.ssl_cert_file,
+        ssl_ca_cert,
    });

    // initialize sentry if SENTRY_DSN is provided
@@ -428,6 +455,17 @@ async fn start_safekeeper(conf: Arc<SafeKeeperConf>) -> Result<()> {
        e
    })?;

+    let https_listener = match conf.listen_https_addr.as_ref() {
+        Some(listen_https_addr) => {
+            info!("starting safekeeper HTTPS service on {}", listen_https_addr);
+            Some(tcp_listener::bind(listen_https_addr).map_err(|e| {
+                error!("failed to bind to address {}: {}", listen_https_addr, e);
+                e
+            })?)
+        }
+        None => None,
+    };
+
    let global_timelines = Arc::new(GlobalTimelines::new(conf.clone()));

    // Register metrics collector for active timelines. It's important to do this
@@ -501,7 +539,7 @@ async fn start_safekeeper(conf: Arc<SafeKeeperConf>) -> Result<()> {
    let http_handle = current_thread_rt
        .as_ref()
        .unwrap_or_else(|| HTTP_RUNTIME.handle())
-        .spawn(http::task_main(
+        .spawn(http::task_main_http(
            conf.clone(),
            http_listener,
            global_timelines.clone(),
@@ -509,6 +547,19 @@ async fn start_safekeeper(conf: Arc<SafeKeeperConf>) -> Result<()> {
        .map(|res| ("HTTP service main".to_owned(), res));
    tasks_handles.push(Box::pin(http_handle));

+    if let Some(https_listener) = https_listener {
+        let https_handle = current_thread_rt
+            .as_ref()
+            .unwrap_or_else(|| HTTP_RUNTIME.handle())
+            .spawn(http::task_main_https(
+                conf.clone(),
+                https_listener,
+                global_timelines.clone(),
+            ))
+            .map(|res| ("HTTPS service main".to_owned(), res));
+        tasks_handles.push(Box::pin(https_handle));
+    }
+
    let broker_task_handle = current_thread_rt
        .as_ref()
        .unwrap_or_else(|| BROKER_RUNTIME.handle())
--- a/safekeeper/src/http/mod.rs
+++ b/safekeeper/src/http/mod.rs
@@ -3,10 +3,11 @@ use std::sync::Arc;

 pub use routes::make_router;
 pub use safekeeper_api::models;
+use tokio_util::sync::CancellationToken;

 use crate::{GlobalTimelines, SafeKeeperConf};

-pub async fn task_main(
+pub async fn task_main_http(
    conf: Arc<SafeKeeperConf>,
    http_listener: std::net::TcpListener,
    global_timelines: Arc<GlobalTimelines>,
@@ -14,8 +15,37 @@ pub async fn task_main(
    let router = make_router(conf, global_timelines)
        .build()
        .map_err(|err| anyhow::anyhow!(err))?;
-    let service = http_utils::RouterService::new(router).unwrap();
-    let server = hyper::Server::from_tcp(http_listener)?;
-    server.serve(service).await?;
+
+    let service = Arc::new(
+        http_utils::RequestServiceBuilder::new(router).map_err(|err| anyhow::anyhow!(err))?,
+    );
+    let server = http_utils::server::Server::new(service, http_listener, None)?;
+    server.serve(CancellationToken::new()).await?;
+    Ok(()) // unreachable
+}
+
+pub async fn task_main_https(
+    conf: Arc<SafeKeeperConf>,
+    https_listener: std::net::TcpListener,
+    global_timelines: Arc<GlobalTimelines>,
+) -> anyhow::Result<()> {
+    let certs = http_utils::tls_certs::load_cert_chain(&conf.ssl_cert_file)?;
+    let key = http_utils::tls_certs::load_private_key(&conf.ssl_key_file)?;
+
+    let server_config = rustls::ServerConfig::builder()
+        .with_no_client_auth()
+        .with_single_cert(certs, key)?;
+
+    let tls_acceptor = tokio_rustls::TlsAcceptor::from(Arc::new(server_config));
+
+    let router = make_router(conf, global_timelines)
+        .build()
+        .map_err(|err| anyhow::anyhow!(err))?;
+
+    let service = Arc::new(
+        http_utils::RequestServiceBuilder::new(router).map_err(|err| anyhow::anyhow!(err))?,
+    );
+    let server = http_utils::server::Server::new(service, https_listener, Some(tls_acceptor))?;
+    server.serve(CancellationToken::new()).await?;
    Ok(()) // unreachable
 }
--- a/safekeeper/src/http/routes.rs
+++ b/safekeeper/src/http/routes.rs
@@ -17,7 +17,8 @@ use hyper::{Body, Request, Response, StatusCode};
 use postgres_ffi::WAL_SEGMENT_SIZE;
 use safekeeper_api::models::{
    AcceptorStateStatus, PullTimelineRequest, SafekeeperStatus, SkTimelineInfo, TermSwitchApiEntry,
-    TimelineCopyRequest, TimelineCreateRequest, TimelineStatus, TimelineTermBumpRequest,
+    TimelineCopyRequest, TimelineCreateRequest, TimelineDeleteResult, TimelineStatus,
+    TimelineTermBumpRequest,
 };
 use safekeeper_api::{ServerInfo, membership, models};
 use storage_broker::proto::{SafekeeperTimelineInfo, TenantTimelineId as ProtoTenantTimelineId};
@@ -32,7 +33,7 @@ use utils::lsn::Lsn;

 use crate::debug_dump::TimelineDigestRequest;
 use crate::safekeeper::TermLsn;
-use crate::timelines_global_map::{DeleteOrExclude, TimelineDeleteResult};
+use crate::timelines_global_map::DeleteOrExclude;
 use crate::{
    GlobalTimelines, SafeKeeperConf, copy_timeline, debug_dump, patch_control_file, pull_timeline,
 };
@@ -231,9 +232,14 @@ async fn timeline_pull_handler(mut request: Request<Body>) -> Result<Response<Bo
    let conf = get_conf(&request);
    let global_timelines = get_global_timelines(&request);

-    let resp = pull_timeline::handle_request(data, conf.sk_auth_token.clone(), global_timelines)
-        .await
-        .map_err(ApiError::InternalServerError)?;
+    let resp = pull_timeline::handle_request(
+        data,
+        conf.sk_auth_token.clone(),
+        conf.ssl_ca_cert.clone(),
+        global_timelines,
+    )
+    .await
+    .map_err(ApiError::InternalServerError)?;
    json_response(StatusCode::OK, resp)
 }

--- a/safekeeper/src/lib.rs
+++ b/safekeeper/src/lib.rs
@@ -7,6 +7,7 @@ use std::time::Duration;
 use camino::Utf8PathBuf;
 use once_cell::sync::Lazy;
 use remote_storage::RemoteStorageConfig;
+use reqwest::Certificate;
 use storage_broker::Uri;
 use tokio::runtime::Runtime;
 use utils::auth::SwappableJwtAuth;
@@ -69,6 +70,9 @@ pub mod defaults {
    // before uploading a partial segment, so that in normal operation the eviction can happen
    // as soon as we have done the partial segment upload.
    pub const DEFAULT_EVICTION_MIN_RESIDENT: &str = DEFAULT_PARTIAL_BACKUP_TIMEOUT;
+
+    pub const DEFAULT_SSL_KEY_FILE: &str = "server.key";
+    pub const DEFAULT_SSL_CERT_FILE: &str = "server.crt";
 }

 #[derive(Debug, Clone)]
@@ -84,6 +88,7 @@ pub struct SafeKeeperConf {
    pub listen_pg_addr: String,
    pub listen_pg_addr_tenant_only: Option<String>,
    pub listen_http_addr: String,
+    pub listen_https_addr: Option<String>,
    pub advertise_pg_addr: Option<String>,
    pub availability_zone: Option<String>,
    pub no_sync: bool,
@@ -111,6 +116,9 @@ pub struct SafeKeeperConf {
    pub eviction_min_resident: Duration,
    pub wal_reader_fanout: bool,
    pub max_delta_for_fanout: Option<u64>,
+    pub ssl_key_file: Utf8PathBuf,
+    pub ssl_cert_file: Utf8PathBuf,
+    pub ssl_ca_cert: Option<Certificate>,
 }

 impl SafeKeeperConf {
@@ -127,6 +135,7 @@ impl SafeKeeperConf {
            listen_pg_addr: defaults::DEFAULT_PG_LISTEN_ADDR.to_string(),
            listen_pg_addr_tenant_only: None,
            listen_http_addr: defaults::DEFAULT_HTTP_LISTEN_ADDR.to_string(),
+            listen_https_addr: None,
            advertise_pg_addr: None,
            availability_zone: None,
            remote_storage: None,
@@ -155,6 +164,9 @@ impl SafeKeeperConf {
            eviction_min_resident: Duration::ZERO,
            wal_reader_fanout: false,
            max_delta_for_fanout: None,
+            ssl_key_file: Utf8PathBuf::from(defaults::DEFAULT_SSL_KEY_FILE),
+            ssl_cert_file: Utf8PathBuf::from(defaults::DEFAULT_SSL_CERT_FILE),
+            ssl_ca_cert: None,
        }
    }
 }
--- a/safekeeper/src/pull_timeline.rs
+++ b/safekeeper/src/pull_timeline.rs
@@ -8,6 +8,7 @@ use camino::Utf8PathBuf;
 use chrono::{DateTime, Utc};
 use futures::{SinkExt, StreamExt, TryStreamExt};
 use postgres_ffi::{PG_TLI, XLogFileName, XLogSegNo};
+use reqwest::Certificate;
 use safekeeper_api::Term;
 use safekeeper_api::models::{PullTimelineRequest, PullTimelineResponse, TimelineStatus};
 use safekeeper_client::mgmt_api;
@@ -392,6 +393,7 @@ pub struct DebugDumpResponse {
 pub async fn handle_request(
    request: PullTimelineRequest,
    sk_auth_token: Option<SecretString>,
+    ssl_ca_cert: Option<Certificate>,
    global_timelines: Arc<GlobalTimelines>,
 ) -> Result<PullTimelineResponse> {
    let existing_tli = global_timelines.get(TenantTimelineId::new(
@@ -402,9 +404,11 @@ pub async fn handle_request(
        bail!("Timeline {} already exists", request.timeline_id);
    }

-    // TODO(DimasKovas): add ssl root CA certificate when implementing safekeeper's
-    // part of https support (#24836).
-    let http_client = reqwest::Client::new();
+    let mut http_client = reqwest::Client::builder();
+    if let Some(ssl_ca_cert) = ssl_ca_cert {
+        http_client = http_client.add_root_certificate(ssl_ca_cert);
+    }
+    let http_client = http_client.build()?;

    let http_hosts = request.http_hosts.clone();

@@ -441,13 +445,21 @@ pub async fn handle_request(
    assert!(status.tenant_id == request.tenant_id);
    assert!(status.timeline_id == request.timeline_id);

-    pull_timeline(status, safekeeper_host, sk_auth_token, global_timelines).await
+    pull_timeline(
+        status,
+        safekeeper_host,
+        sk_auth_token,
+        http_client,
+        global_timelines,
+    )
+    .await
 }

 async fn pull_timeline(
    status: TimelineStatus,
    host: String,
    sk_auth_token: Option<SecretString>,
+    http_client: reqwest::Client,
    global_timelines: Arc<GlobalTimelines>,
 ) -> Result<PullTimelineResponse> {
    let ttid = TenantTimelineId::new(status.tenant_id, status.timeline_id);
@@ -464,9 +476,6 @@ async fn pull_timeline(
    let conf = &global_timelines.get_global_config();

    let (_tmp_dir, tli_dir_path) = create_temp_timeline_dir(conf, ttid).await?;
-    // TODO(DimasKovas): add ssl root CA certificate when implementing safekeeper's
-    // part of https support (#24836).
-    let http_client = reqwest::Client::new();
    let client = Client::new(http_client, host.clone(), sk_auth_token.clone());
    // Request stream with basebackup archive.
    let bb_resp = client
--- a/safekeeper/src/timelines_global_map.rs
+++ b/safekeeper/src/timelines_global_map.rs
@@ -11,9 +11,8 @@ use anyhow::{Context, Result, bail};
 use camino::Utf8PathBuf;
 use camino_tempfile::Utf8TempDir;
 use safekeeper_api::membership::Configuration;
-use safekeeper_api::models::SafekeeperUtilization;
+use safekeeper_api::models::{SafekeeperUtilization, TimelineDeleteResult};
 use safekeeper_api::{ServerInfo, membership};
-use serde::Serialize;
 use tokio::fs;
 use tracing::*;
 use utils::crashsafe::{durable_rename, fsync_async_opt};
@@ -579,11 +578,6 @@ impl GlobalTimelines {
    }
 }

-#[derive(Clone, Copy, Serialize)]
-pub struct TimelineDeleteResult {
-    pub dir_existed: bool,
-}
-
 /// Action for delete_or_exclude.
 #[derive(Clone, Debug)]
 pub enum DeleteOrExclude {
--- a/safekeeper/tests/walproposer_sim/safekeeper.rs
+++ b/safekeeper/tests/walproposer_sim/safekeeper.rs
@@ -152,6 +152,7 @@ pub fn run_server(os: NodeOs, disk: Arc<SafekeeperDisk>) -> Result<()> {
        my_id: NodeId(os.id() as u64),
        listen_pg_addr: String::new(),
        listen_http_addr: String::new(),
+        listen_https_addr: None,
        no_sync: false,
        broker_endpoint: "/".parse::<Uri>().unwrap(),
        broker_keepalive_interval: Duration::from_secs(0),
@@ -179,6 +180,9 @@ pub fn run_server(os: NodeOs, disk: Arc<SafekeeperDisk>) -> Result<()> {
        eviction_min_resident: Duration::ZERO,
        wal_reader_fanout: false,
        max_delta_for_fanout: None,
+        ssl_key_file: Utf8PathBuf::from(""),
+        ssl_cert_file: Utf8PathBuf::from(""),
+        ssl_ca_cert: None,
    };

    let mut global = GlobalMap::new(disk, conf.clone())?;
--- a/storage_controller/src/http.rs
+++ b/storage_controller/src/http.rs
@@ -24,7 +24,7 @@ use pageserver_api::controller_api::{
    ShardsPreferredAzsRequest, TenantCreateRequest, TenantPolicyRequest, TenantShardMigrateRequest,
 };
 use pageserver_api::models::{
-    TenantConfigPatchRequest, TenantConfigRequest, TenantLocationConfigRequest,
+    DetachBehavior, TenantConfigPatchRequest, TenantConfigRequest, TenantLocationConfigRequest,
    TenantShardSplitRequest, TenantTimeTravelRequest, TimelineArchivalConfigRequest,
    TimelineCreateRequest,
 };
@@ -525,6 +525,7 @@ async fn handle_tenant_timeline_detach_ancestor(
 ) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
    let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
+    let behavior: Option<DetachBehavior> = parse_query_param(&req, "detach_behavior")?;

    check_permissions(&req, Scope::PageServerApi)?;
    maybe_rate_limit(&req, tenant_id).await;
@@ -537,7 +538,7 @@ async fn handle_tenant_timeline_detach_ancestor(
    };

    let res = service
-        .tenant_timeline_detach_ancestor(tenant_id, timeline_id)
+        .tenant_timeline_detach_ancestor(tenant_id, timeline_id, behavior)
        .await?;

    json_response(StatusCode::OK, res)
--- a/storage_controller/src/pageserver_client.rs
+++ b/storage_controller/src/pageserver_client.rs
@@ -1,9 +1,9 @@
 use pageserver_api::models::detach_ancestor::AncestorDetached;
 use pageserver_api::models::{
-    LocationConfig, LocationConfigListResponse, PageserverUtilization, SecondaryProgress,
-    TenantScanRemoteStorageResponse, TenantShardSplitRequest, TenantShardSplitResponse,
-    TenantWaitLsnRequest, TimelineArchivalConfigRequest, TimelineCreateRequest, TimelineInfo,
-    TopTenantShardsRequest, TopTenantShardsResponse,
+    DetachBehavior, LocationConfig, LocationConfigListResponse, PageserverUtilization,
+    SecondaryProgress, TenantScanRemoteStorageResponse, TenantShardSplitRequest,
+    TenantShardSplitResponse, TenantWaitLsnRequest, TimelineArchivalConfigRequest,
+    TimelineCreateRequest, TimelineInfo, TopTenantShardsRequest, TopTenantShardsResponse,
 };
 use pageserver_api::shard::TenantShardId;
 use pageserver_client::BlockUnblock;
@@ -252,13 +252,14 @@ impl PageserverClient {
        &self,
        tenant_shard_id: TenantShardId,
        timeline_id: TimelineId,
+        behavior: Option<DetachBehavior>,
    ) -> Result<AncestorDetached> {
        measured_request!(
            "timeline_detach_ancestor",
            crate::metrics::Method::Put,
            &self.node_id_label,
            self.inner
-                .timeline_detach_ancestor(tenant_shard_id, timeline_id)
+                .timeline_detach_ancestor(tenant_shard_id, timeline_id, behavior)
                .await
        )
    }
--- a/storage_controller/src/persistence.rs
+++ b/storage_controller/src/persistence.rs
@@ -967,10 +967,26 @@ impl Persistence {
        &self,
        split_tenant_id: TenantId,
        old_shard_count: ShardCount,
+        new_shard_count: ShardCount,
    ) -> DatabaseResult<()> {
        use crate::schema::tenant_shards::dsl::*;
        self.with_measured_conn(DatabaseOperation::CompleteShardSplit, move |conn| {
            Box::pin(async move {
+                // Sanity: child shards must still exist, as we're deleting parent shards
+                let child_shards_query = tenant_shards
+                    .filter(tenant_id.eq(split_tenant_id.to_string()))
+                    .filter(shard_count.eq(new_shard_count.literal() as i32));
+                let child_shards = child_shards_query
+                    .load::<TenantShardPersistence>(conn)
+                    .await?;
+                if child_shards.len() != new_shard_count.count() as usize {
+                    return Err(DatabaseError::Logical(format!(
+                        "Unexpected child shard count {} while completing split to \
+                            count {new_shard_count:?} on tenant {split_tenant_id}",
+                        child_shards.len()
+                    )));
+                }
+
                // Drop parent shards
                diesel::delete(tenant_shards)
                    .filter(tenant_id.eq(split_tenant_id.to_string()))
--- a/storage_controller/src/safekeeper_client.rs
+++ b/storage_controller/src/safekeeper_client.rs
@@ -1,5 +1,5 @@
 use safekeeper_api::models::{
-    PullTimelineRequest, PullTimelineResponse, SafekeeperUtilization, TimelineCreateRequest,
+    self, PullTimelineRequest, PullTimelineResponse, SafekeeperUtilization, TimelineCreateRequest,
    TimelineStatus,
 };
 use safekeeper_client::mgmt_api::{Client, Result};
@@ -69,11 +69,28 @@ impl SafekeeperClient {
        )
    }

+    #[allow(unused)]
+    pub(crate) async fn exclude_timeline(
+        &self,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+        req: &models::TimelineMembershipSwitchRequest,
+    ) -> Result<models::TimelineDeleteResult> {
+        measured_request!(
+            "exclude_timeline",
+            crate::metrics::Method::Post,
+            &self.node_id_label,
+            self.inner
+                .exclude_timeline(tenant_id, timeline_id, req)
+                .await
+        )
+    }
+
    pub(crate) async fn delete_timeline(
        &self,
        tenant_id: TenantId,
        timeline_id: TimelineId,
-    ) -> Result<TimelineStatus> {
+    ) -> Result<models::TimelineDeleteResult> {
        measured_request!(
            "delete_timeline",
            crate::metrics::Method::Delete,
@@ -94,6 +111,23 @@ impl SafekeeperClient {
        )
    }

+    #[allow(unused)]
+    pub(crate) async fn bump_timeline_term(
+        &self,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+        req: &models::TimelineTermBumpRequest,
+    ) -> Result<models::TimelineTermBumpResponse> {
+        measured_request!(
+            "term_bump",
+            crate::metrics::Method::Post,
+            &self.node_id_label,
+            self.inner
+                .bump_timeline_term(tenant_id, timeline_id, req)
+                .await
+        )
+    }
+
    pub(crate) async fn get_utilization(&self) -> Result<SafekeeperUtilization> {
        measured_request!(
            "utilization",
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -34,9 +34,9 @@ use pageserver_api::controller_api::{
    TenantShardMigrateResponse,
 };
 use pageserver_api::models::{
-    self, LocationConfig, LocationConfigListResponse, LocationConfigMode, PageserverUtilization,
-    SafekeeperInfo, SafekeepersInfo, SecondaryProgress, ShardParameters, TenantConfig,
-    TenantConfigPatchRequest, TenantConfigRequest, TenantLocationConfigRequest,
+    self, DetachBehavior, LocationConfig, LocationConfigListResponse, LocationConfigMode,
+    PageserverUtilization, SafekeeperInfo, SafekeepersInfo, SecondaryProgress, ShardParameters,
+    TenantConfig, TenantConfigPatchRequest, TenantConfigRequest, TenantLocationConfigRequest,
    TenantLocationConfigResponse, TenantShardLocation, TenantShardSplitRequest,
    TenantShardSplitResponse, TenantSorting, TenantTimeTravelRequest,
    TimelineArchivalConfigRequest, TimelineCreateRequest, TimelineCreateResponseStorcon,
@@ -4041,6 +4041,7 @@ impl Service {
        &self,
        tenant_id: TenantId,
        timeline_id: TimelineId,
+        behavior: Option<DetachBehavior>,
    ) -> Result<models::detach_ancestor::AncestorDetached, ApiError> {
        tracing::info!("Detaching timeline {tenant_id}/{timeline_id}",);

@@ -4064,6 +4065,7 @@ impl Service {
                node: Node,
                jwt: Option<String>,
                ssl_ca_cert: Option<Certificate>,
+                behavior: Option<DetachBehavior>,
            ) -> Result<(ShardNumber, models::detach_ancestor::AncestorDetached), ApiError> {
                tracing::info!(
                    "Detaching timeline on shard {tenant_shard_id}/{timeline_id}, attached to node {node}",
@@ -4073,7 +4075,7 @@ impl Service {
                    .map_err(|e| passthrough_api_error(&node, e))?;

                client
-                    .timeline_detach_ancestor(tenant_shard_id, timeline_id)
+                    .timeline_detach_ancestor(tenant_shard_id, timeline_id, behavior)
                    .await
                    .map_err(|e| {
                        use mgmt_api::Error;
@@ -4111,6 +4113,7 @@ impl Service {
                        node,
                        self.config.pageserver_jwt_token.clone(),
                        self.config.ssl_ca_cert.clone(),
+                        behavior,
                    ))
                })
                .await?;
@@ -4265,7 +4268,8 @@ impl Service {

    /// Helper for concurrently calling a pageserver API on a number of shards, such as timeline creation.
    ///
-    /// On success, the returned vector contains exactly the same number of elements as the input `locations`.
+    /// On success, the returned vector contains exactly the same number of elements as the input `locations`
+    /// and returned element at index `i` is the result for `req_fn(op(locations[i])`.
    async fn tenant_for_shards<F, R>(
        &self,
        locations: Vec<(TenantShardId, Node)>,
@@ -4281,18 +4285,23 @@ impl Service {
        let mut futs = FuturesUnordered::new();
        let mut results = Vec::with_capacity(locations.len());

-        for (tenant_shard_id, node) in locations {
-            futs.push(req_fn(tenant_shard_id, node));
+        for (idx, (tenant_shard_id, node)) in locations.into_iter().enumerate() {
+            let fut = req_fn(tenant_shard_id, node);
+            futs.push(async move { (idx, fut.await) });
        }

-        while let Some(r) = futs.next().await {
-            results.push(r?);
+        while let Some((idx, r)) = futs.next().await {
+            results.push((idx, r?));
        }

-        Ok(results)
+        results.sort_by_key(|(idx, _)| *idx);
+        Ok(results.into_iter().map(|(_, r)| r).collect())
    }

-    /// Concurrently invoke a pageserver API call on many shards at once
+    /// Concurrently invoke a pageserver API call on many shards at once.
+    ///
+    /// The returned Vec has the same length as the `locations` Vec,
+    /// and returned element at index `i` is the result for `op(locations[i])`.
    pub(crate) async fn tenant_for_shards_api<T, O, F>(
        &self,
        locations: Vec<(TenantShardId, Node)>,
@@ -4309,27 +4318,29 @@ impl Service {
        let mut futs = FuturesUnordered::new();
        let mut results = Vec::with_capacity(locations.len());

-        for (tenant_shard_id, node) in locations {
+        for (idx, (tenant_shard_id, node)) in locations.into_iter().enumerate() {
            futs.push(async move {
-                node.with_client_retries(
-                    |client| op(tenant_shard_id, client),
-                    &self.config.pageserver_jwt_token,
-                    &self.config.ssl_ca_cert,
-                    warn_threshold,
-                    max_retries,
-                    timeout,
-                    cancel,
-                )
-                .await
+                let r = node
+                    .with_client_retries(
+                        |client| op(tenant_shard_id, client),
+                        &self.config.pageserver_jwt_token,
+                        &self.config.ssl_ca_cert,
+                        warn_threshold,
+                        max_retries,
+                        timeout,
+                        cancel,
+                    )
+                    .await;
+                (idx, r)
            });
        }

-        while let Some(r) = futs.next().await {
-            let r = r.unwrap_or(Err(mgmt_api::Error::Cancelled));
-            results.push(r);
+        while let Some((idx, r)) = futs.next().await {
+            results.push((idx, r.unwrap_or(Err(mgmt_api::Error::Cancelled))));
        }

-        results
+        results.sort_by_key(|(idx, _)| *idx);
+        results.into_iter().map(|(_, r)| r).collect()
    }

    /// Helper for safely working with the shards in a tenant remotely on pageservers, for example
@@ -5742,7 +5753,7 @@ impl Service {
        //  it doesn't match, but that requires more retry logic on this side)

        self.persistence
-            .complete_shard_split(tenant_id, old_shard_count)
+            .complete_shard_split(tenant_id, old_shard_count, new_shard_count)
            .await?;

        fail::fail_point!("shard-split-post-complete", |_| Err(
--- a/test_runner/fixtures/compute_reconfigure.py
+++ b/test_runner/fixtures/compute_reconfigure.py
@@ -19,7 +19,7 @@ if TYPE_CHECKING:
 class ComputeReconfigure:
    def __init__(self, server: HTTPServer):
        self.server = server
-        self.control_plane_compute_hook_api = f"http://{server.host}:{server.port}/notify-attach"
+        self.control_plane_hooks_api = f"http://{server.host}:{server.port}/"
        self.workloads: dict[TenantId, Any] = {}
        self.on_notify: Callable[[Any], None] | None = None

--- a/test_runner/fixtures/metrics.py
+++ b/test_runner/fixtures/metrics.py
@@ -175,6 +175,9 @@ PAGESERVER_PER_TENANT_METRICS: tuple[str, ...] = (
    counter("pageserver_tenant_throttling_count"),
    counter("pageserver_timeline_wal_records_received"),
    counter("pageserver_page_service_pagestream_flush_in_progress_micros"),
+    counter("pageserver_wait_lsn_in_progress_micros"),
+    counter("pageserver_wait_lsn_started_count"),
+    counter("pageserver_wait_lsn_finished_count"),
    *histogram("pageserver_page_service_batch_size"),
    *histogram("pageserver_page_service_pagestream_batch_wait_time_seconds"),
    *PAGESERVER_PER_TENANT_REMOTE_TIMELINE_CLIENT_METRICS,
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -460,12 +460,15 @@ class NeonEnvBuilder:
        self.overlay_mounts_created_by_us: list[tuple[str, Path]] = []
        self.config_init_force: str | None = None
        self.top_output_dir = top_output_dir
-        self.control_plane_compute_hook_api: str | None = None
+        self.control_plane_hooks_api: str | None = None
        self.storage_controller_config: dict[Any, Any] | None = None

        # Flag to enable https listener in pageserver, generate local ssl certs,
        # and force storage controller to use https for pageserver api.
        self.use_https_pageserver_api: bool = False
+        # Flag to enable https listener in safekeeper, generate local ssl certs,
+        # and force storage controller to use https for safekeeper api.
+        self.use_https_safekeeper_api: bool = False

        self.pageserver_virtual_file_io_engine: str | None = pageserver_virtual_file_io_engine
        self.pageserver_get_vectored_concurrent_io: str | None = (
@@ -1063,7 +1066,9 @@ class NeonEnv:
        self.initial_tenant = config.initial_tenant
        self.initial_timeline = config.initial_timeline

-        self.generate_local_ssl_certs = config.use_https_pageserver_api
+        self.generate_local_ssl_certs = (
+            config.use_https_pageserver_api or config.use_https_safekeeper_api
+        )
        self.ssl_ca_file = (
            self.repo_dir.joinpath("rootCA.crt") if self.generate_local_ssl_certs else None
        )
@@ -1116,7 +1121,7 @@ class NeonEnv:
        self.control_plane_api: str = self.storage_controller.upcall_api_endpoint()

        # For testing this with a fake HTTP server, enable passing through a URL from config
-        self.control_plane_compute_hook_api = config.control_plane_compute_hook_api
+        self.control_plane_hooks_api = config.control_plane_hooks_api

        self.pageserver_virtual_file_io_engine = config.pageserver_virtual_file_io_engine
        self.pageserver_virtual_file_io_mode = config.pageserver_virtual_file_io_mode
@@ -1137,8 +1142,8 @@ class NeonEnv:
        if self.control_plane_api is not None:
            cfg["control_plane_api"] = self.control_plane_api

-        if self.control_plane_compute_hook_api is not None:
-            cfg["control_plane_compute_hook_api"] = self.control_plane_compute_hook_api
+        if self.control_plane_hooks_api is not None:
+            cfg["control_plane_hooks_api"] = self.control_plane_hooks_api

        storage_controller_config = self.storage_controller_config

@@ -1146,6 +1151,10 @@ class NeonEnv:
            storage_controller_config = storage_controller_config or {}
            storage_controller_config["use_https_pageserver_api"] = True

+        if config.use_https_safekeeper_api:
+            storage_controller_config = storage_controller_config or {}
+            storage_controller_config["use_https_safekeeper_api"] = True
+
        if storage_controller_config is not None:
            cfg["storage_controller"] = storage_controller_config

@@ -1248,6 +1257,7 @@ class NeonEnv:
                pg=self.port_distributor.get_port(),
                pg_tenant_only=self.port_distributor.get_port(),
                http=self.port_distributor.get_port(),
+                https=self.port_distributor.get_port() if config.use_https_safekeeper_api else None,
            )
            id = config.safekeepers_id_start + i  # assign ids sequentially
            sk_cfg: dict[str, Any] = {
@@ -1255,6 +1265,7 @@ class NeonEnv:
                "pg_port": port.pg,
                "pg_tenant_only_port": port.pg_tenant_only,
                "http_port": port.http,
+                "https_port": port.https,
                "sync": config.safekeepers_enable_fsync,
            }
            if config.auth_enabled:
@@ -4475,6 +4486,7 @@ class SafekeeperPort:
    pg: int
    pg_tenant_only: int
    http: int
+    https: int | None


@dataclass
--- a/test_runner/fixtures/pageserver/http.py
+++ b/test_runner/fixtures/pageserver/http.py
@@ -1070,14 +1070,14 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
        tenant_id: TenantId | TenantShardId,
        timeline_id: TimelineId,
        batch_size: int | None = None,
-        behavior_v2: bool = False,
+        detach_behavior: str | None = None,
        **kwargs,
    ) -> set[TimelineId]:
        params: dict[str, Any] = {}
        if batch_size is not None:
            params["batch_size"] = batch_size
-        if behavior_v2:
-            params["detach_behavior"] = "v2"
+        if detach_behavior:
+            params["detach_behavior"] = detach_behavior
        res = self.put(
            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/detach_ancestor",
            params=params,
--- a/test_runner/performance/large_synthetic_oltp/IUD_one_transaction.sql
+++ b/test_runner/performance/large_synthetic_oltp/IUD_one_transaction.sql
@@ -0,0 +1,162 @@
+\set min_id 1
+\set max_id 1500000000
+\set range_size 100
+
+-- Use uniform random instead of random_zipfian
+\set random_id random(:min_id, :max_id)
+\set random_mar_id random(1, 65536)
+\set random_delete_id random(:min_id, :max_id)
+
+-- Update exactly one row (if it exists) using the uniformly chosen random_id
+UPDATE transaction.transaction
+   SET state = 'COMPLETED',
+       settlement_date = CURRENT_DATE,
+       mar_identifier = (:random_mar_id)::int
+ WHERE id = (:random_id)::bigint;
+
+-- Insert exactly one row
+INSERT INTO transaction.transaction (
+    user_id,
+    card_id,
+    business_id,
+    preceding_transaction_id,
+    is_last,
+    is_mocked,
+    type,
+    state,
+    network,
+    subnetwork,
+    user_transaction_time,
+    settlement_date,
+    request_amount,
+    amount,
+    currency_code,
+    approval_code,
+    response,
+    gpa,
+    gpa_order_unload,
+    gpa_order,
+    program_transfer,
+    fee_transfer,
+    peer_transfer,
+    msa_orders,
+    risk_assessment,
+    auto_reload,
+    direct_deposit,
+    polarity,
+    real_time_fee_group,
+    fee,
+    chargeback,
+    standin_approved_by,
+    acquirer_fee_amount,
+    funded_account_holder,
+    digital_wallet_token,
+    network_fees,
+    card_security_code_verification,
+    fraud,
+    cardholder_authentication_data,
+    currency_conversion,
+    merchant,
+    store,
+    card_acceptor,
+    acquirer,
+    pos,
+    avs,
+    mar_token,
+    mar_preceding_related_transaction_token,
+    mar_business_token,
+    mar_acting_user_token,
+    mar_card_token,
+    mar_duration,
+    mar_created_time,
+    issuer_interchange_amount,
+    offer_orders,
+    transaction_canonical_id,
+    mar_identifier,
+    created_at,
+    card_acceptor_mid,
+    card_acceptor_name,
+    address_verification,
+    issuing_product,
+    mar_enhanced_data_token,
+    standin_reason
+)
+SELECT
+    (:random_id % 100000) + 1 AS user_id,
+    (:random_id % 500000) + 1 AS card_id,
+    (:random_id % 20000) + 1  AS business_id,
+    NULL                     AS preceding_transaction_id,
+    (:random_id % 2) = 0     AS is_last,
+    (:random_id % 5) = 0     AS is_mocked,
+    'authorization'          AS type,
+    'PENDING'                AS state,
+    'VISA'                   AS network,
+    'VISANET'                AS subnetwork,
+    now() - ((:random_id % 100) || ' days')::interval AS user_transaction_time,
+    now() - ((:random_id % 100) || ' days')::interval AS settlement_date,
+    random() * 1000          AS request_amount,
+    random() * 1000          AS amount,
+    'USD'                    AS currency_code,
+    md5((:random_id)::text)  AS approval_code,
+    '{}'::jsonb              AS response,
+    '{}'::jsonb              AS gpa,
+    '{}'::jsonb              AS gpa_order_unload,
+    '{}'::jsonb              AS gpa_order,
+    '{}'::jsonb              AS program_transfer,
+    '{}'::jsonb              AS fee_transfer,
+    '{}'::jsonb              AS peer_transfer,
+    '{}'::jsonb              AS msa_orders,
+    '{}'::jsonb              AS risk_assessment,
+    '{}'::jsonb              AS auto_reload,
+    '{}'::jsonb              AS direct_deposit,
+    '{}'::jsonb              AS polarity,
+    '{}'::jsonb              AS real_time_fee_group,
+    '{}'::jsonb              AS fee,
+    '{}'::jsonb              AS chargeback,
+    NULL                     AS standin_approved_by,
+    random() * 100           AS acquirer_fee_amount,
+    '{}'::jsonb              AS funded_account_holder,
+    '{}'::jsonb              AS digital_wallet_token,
+    '{}'::jsonb              AS network_fees,
+    '{}'::jsonb              AS card_security_code_verification,
+    '{}'::jsonb              AS fraud,
+    '{}'::jsonb              AS cardholder_authentication_data,
+    '{}'::jsonb              AS currency_conversion,
+    '{}'::jsonb              AS merchant,
+    '{}'::jsonb              AS store,
+    '{}'::jsonb              AS card_acceptor,
+    '{}'::jsonb              AS acquirer,
+    '{}'::jsonb              AS pos,
+    '{}'::jsonb              AS avs,
+    md5((:random_id)::text || 'token') AS mar_token,
+    NULL                     AS mar_preceding_related_transaction_token,
+    NULL                     AS mar_business_token,
+    NULL                     AS mar_acting_user_token,
+    NULL                     AS mar_card_token,
+    random() * 1000          AS mar_duration,
+    now()                    AS mar_created_time,
+    random() * 100           AS issuer_interchange_amount,
+    '{}'::jsonb              AS offer_orders,
+    (:random_id % 500) + 1   AS transaction_canonical_id,
+    :random_id::integer      AS mar_identifier,
+    now()                    AS created_at,
+    NULL                     AS card_acceptor_mid,
+    NULL                     AS card_acceptor_name,
+    '{}'::jsonb              AS address_verification,
+    'DEFAULT_PRODUCT'        AS issuing_product,
+    NULL                     AS mar_enhanced_data_token,
+    NULL                     AS standin_reason
+FROM (SELECT 1) AS dummy;
+
+-- Delete exactly one row using the uniformly chosen random_delete_id
+WITH to_delete AS (
+    SELECT id
+      FROM transaction.transaction
+     WHERE id >= (:random_delete_id)::bigint
+       AND id < ((:random_delete_id)::bigint + :range_size)
+     ORDER BY id
+     LIMIT 1
+)
+DELETE FROM transaction.transaction
+USING to_delete
+WHERE transaction.transaction.id = to_delete.id;
--- a/test_runner/performance/large_synthetic_oltp/select_prefetch_webhook.sql
+++ b/test_runner/performance/large_synthetic_oltp/select_prefetch_webhook.sql
@@ -0,0 +1,25 @@
+-- enforce a controlled number of getpages prefetch requests from a range of
+-- 40 million first pages (320 GB) of a 500 GiB table
+-- the table has 55 million pages
+
+
+-- Zipfian distributions model real-world access patterns where:
+--	A few values (popular IDs) are accessed frequently.
+--	Many values are accessed rarely.
+-- This is useful for simulating realistic workloads
+
+\set alpha 1.2  
+\set min_page 1
+\set max_page 40000000 
+
+\set zipf_random_page random_zipfian(:min_page, :max_page, :alpha)
+
+-- Read 500 consecutive pages from a Zipfian-distributed random start page
+-- This enforces PostgreSQL prefetching
+WITH random_page AS (
+    SELECT :zipf_random_page::int AS start_page
+)
+SELECT MAX(created_at)
+FROM webhook.incoming_webhooks
+WHERE ctid >= (SELECT format('(%s,1)', start_page)::tid FROM random_page)
+AND ctid < (SELECT format('(%s,1)', start_page + 500)::tid FROM random_page);
--- a/test_runner/performance/test_perf_oltp_large_tenant.py
+++ b/test_runner/performance/test_perf_oltp_large_tenant.py
@@ -2,11 +2,13 @@ from __future__ import annotations

 import os
 import timeit
+from contextlib import closing
 from pathlib import Path

 import pytest
 from fixtures.benchmark_fixture import PgBenchRunResult
 from fixtures.compare_fixtures import PgCompare
+from fixtures.log_helper import log

 from performance.test_perf_pgbench import get_durations_matrix, utc_now_timestamp

@@ -82,9 +84,81 @@ def run_pgbench(env: PgCompare, prefix: str, cmdline, password: None):
    env.zenbenchmark.record_pg_bench_result(prefix, res)


+def run_database_maintenance(env: PgCompare):
+    with closing(env.pg.connect()) as conn:
+        with conn.cursor() as cur:
+            log.info("start vacuum analyze transaction.transaction")
+            with env.zenbenchmark.record_duration("vacuum_analyze"):
+                cur.execute("SET statement_timeout = 0;")
+                cur.execute("SET max_parallel_maintenance_workers = 7;")
+                cur.execute("SET maintenance_work_mem = '10GB';")
+                cur.execute("vacuum analyze transaction.transaction;")
+            log.info("finished vacuum analyze transaction.transaction")
+
+            # recover previously failed or canceled re-indexing
+            cur.execute(
+                """
+                DO $$
+                DECLARE
+                    invalid_index TEXT;
+                BEGIN
+                    FOR invalid_index IN
+                        SELECT c.relname
+                        FROM pg_class c
+                        JOIN pg_index i ON i.indexrelid = c.oid
+                        JOIN pg_namespace n ON n.oid = c.relnamespace
+                        WHERE n.nspname = 'transaction'
+                        AND i.indisvalid = FALSE
+                        AND c.relname LIKE '%_ccnew%'
+                    LOOP
+                        EXECUTE 'DROP INDEX IF EXISTS transaction.' || invalid_index;
+                    END LOOP;
+                END $$;
+                """
+            )
+            # also recover failed or canceled re-indexing on toast part of table
+            cur.execute(
+                """
+                DO $$
+                DECLARE
+                    invalid_index TEXT;
+                BEGIN
+                    FOR invalid_index IN
+                        SELECT c.relname
+                        FROM pg_class c
+                        JOIN pg_index i ON i.indexrelid = c.oid
+                        JOIN pg_namespace n ON n.oid = c.relnamespace
+                        WHERE n.nspname = 'pg_toast'
+                        AND i.indisvalid = FALSE
+                        AND c.relname LIKE '%_ccnew%'
+                        AND i.indrelid = (
+                            SELECT reltoastrelid FROM pg_class
+                            WHERE relname = 'transaction'
+                            AND relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'transaction')
+                        )
+                    LOOP
+                        EXECUTE 'DROP INDEX IF EXISTS pg_toast.' || invalid_index;
+                    END LOOP;
+                END $$;
+                """
+            )
+
+            log.info("start REINDEX TABLE CONCURRENTLY transaction.transaction")
+            with env.zenbenchmark.record_duration("reindex concurrently"):
+                cur.execute("REINDEX TABLE CONCURRENTLY transaction.transaction;")
+            log.info("finished REINDEX TABLE CONCURRENTLY transaction.transaction")
+
+
@pytest.mark.parametrize("custom_scripts", get_custom_scripts())
@pytest.mark.parametrize("duration", get_durations_matrix())
@pytest.mark.remote_cluster
-def test_perf_oltp_large_tenant(remote_compare: PgCompare, custom_scripts: str, duration: int):
+def test_perf_oltp_large_tenant_pgbench(
+    remote_compare: PgCompare, custom_scripts: str, duration: int
+):
    run_test_pgbench(remote_compare, custom_scripts, duration)
-    # todo: run re-index, analyze, vacuum, etc. after the test and measure and report its duration
+
+
+@pytest.mark.remote_cluster
+def test_perf_oltp_large_tenant_maintenance(remote_compare: PgCompare):
+    # run analyze, vacuum, re-index after the test and measure and report its duration
+    run_database_maintenance(remote_compare)
--- a/test_runner/performance/test_storage_controller_scale.py
+++ b/test_runner/performance/test_storage_controller_scale.py
@@ -83,9 +83,7 @@ def test_storage_controller_many_tenants(
        "max_offline": "30s",
        "max_warming_up": "300s",
    }
-    neon_env_builder.control_plane_compute_hook_api = (
-        compute_reconfigure_listener.control_plane_compute_hook_api
-    )
+    neon_env_builder.control_plane_hooks_api = compute_reconfigure_listener.control_plane_hooks_api

    AZS = ["alpha", "bravo", "charlie"]

--- a/test_runner/regress/test_attach_tenant_config.py
+++ b/test_runner/regress/test_attach_tenant_config.py
@@ -144,7 +144,7 @@ def test_fully_custom_config(positive_env: NeonEnv):
        "compaction_l0_semaphore": False,
        "l0_flush_delay_threshold": 25,
        "l0_flush_stall_threshold": 42,
-        "l0_flush_wait_upload": False,
+        "l0_flush_wait_upload": True,
        "compaction_target_size": 1048576,
        "checkpoint_distance": 10000,
        "checkpoint_timeout": "13m",
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Suhas Thalanki	33a55ba8f8	removed anon patch	2025-03-18 11:14:18 -04:00
Suhas Thalanki	90cf481ad8	removed unused imports	2025-03-17 13:13:25 -04:00
Suhas Thalanki	9a9e479b32	Merge branch 'main' into thesuhas/remove_anon_extension	2025-03-17 11:30:40 -04:00
Roman Zaynetdinov	db30e1669c	Add /configure_telemetry API endpoint (#11117 ) Work on https://github.com/neondatabase/cloud/issues/23721 and https://github.com/neondatabase/cloud/issues/23714 Depends on https://github.com/neondatabase/neon/pull/11111 - Add `/configure_telemetry` API endpoint - Support second rsyslog configuration for Postgres logs export - Enable logs export when compute feature is enabled and configure Postgres to send logs to syslog I have used `/configure_telemetry` name because in the future I see it also being used for configuring a `pg_tracing` extension to export traces. Let me know if you'd rather have these APIs separate. In this case we can rename it to `/configure_rsyslog`.	2025-03-17 13:53:23 +00:00
JC Grünhage	fdf04d4d81	fix(ci): use correct branch ref for checking whether this is a release merge queue (#11270 ) ## Problem https://github.com/neondatabase/neon/actions/runs/13894288475/job/38871819190 shows the "Add fast-fordward label to PR to trigger fast-forward merge" job being skipped. This is due to not using the right variable for checking which branch the merge queue is merging into. ## Summary of changes Use the `branch` output of the `meta` task for checking the target branch of a merge group.	2025-03-17 09:26:45 +00:00
Alexander Bayandin	136cae76c2	fix(ci): correct regex to detect release-compute RC PRs (#11269 ) ## Problem The regex in `_meta.yml` workflow doesn't detect RC PRs for compute releases: https://neondb.slack.com/archives/C059ZC138NR/p1742164884669389 ## Summary of changes - Fix regex --------- Co-authored-by: Peter Bendel <peterbendel@neon.tech>	2025-03-17 07:25:12 +00:00
Konstantin Knizhnik	15e63afe7d	Support DEBUG_COMPARE_LOCAL mode for unloggedindex build (#11257 ) ## Problem In unlogged index build (used fir GIST/SPGIST/GIN indexes) files is created on disk and then removed at the end. It contradicts to the logic of DEBUG_COMPARE_LOCAL mode. ## Summary of changes Do not create and unlink files in unlogged build in DEBUG_COMPARE_LOCAL mode. Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>	2025-03-17 06:07:24 +00:00
Alexey Kondratov	966abd3bd6	fix(compute_ctl): Dollar escaping helper fixes (#11263 ) ## Problem In the previous PR #11045, one edge-case wasn't covered, when an ident contains only one `$`, we were picking `$$` as a 'wrapper'. Yet, when this `$` is at the beginning or at the end of the ident, then we end up with `$$$` in a row which breaks the escaping. ## Summary of changes Start from `x` tag instead of a blank string. Slack: https://neondb.slack.com/archives/C08HV951W2W/p1742076675079769?thread_ts=1742004205.461159&cid=C08HV951W2W	2025-03-16 18:39:54 +00:00
Alexey Kondratov	8566cad23b	chore(docs): Refresh RFC guide to suggest using YYYY-MM-DD prefix (#11252 ) ## Problem Serial/numeric IDs lead to collisions, which is not critical but looks awkward. Previous discussion: https://neondb.slack.com/archives/C033A2WE6BZ/p1741891345869979 ## Summary of changes Suggest using the `YYYY-MM-DD` prefix, which i) has less chance of collision; ii) provides out-of-the-box lexicographic sorting; iii) even if it collides, it's not a big deal -- just two RFCs have been started on the same day. --------- Co-authored-by: Alexander Bayandin <alexander@neon.tech>	2025-03-16 17:17:58 +00:00
Peter Bendel	228bb75354	Extend large tenant OLTP workload ... (#11166 ) ... to better match the workload characteristics of real Neon customers ## Problem We analyzed workloads of large Neon users and want to extend the oltp workload to include characteristics seen in those workloads. ## Summary of changes - for re-use branch delete inserted rows from last run - adjust expected run-time (time-outs) in GitHub workflow - add queries that exposes the prefetch getpages path - add I/U/D transactions for another table (so far the workload was insert/append-only) - add an explicit vacuum analyze step and measure its time - add reindex concurrently step and measure its time (and take care that this step succeeds even if prior reindex runs have failed or were canceled) - create a second connection string for the pooled connection that removes the `-pooler` suffix from the hostname because we want to run long-running statements (database maintenance) and bypass the pooler which doesn't support unlimited statement timeout ## Test run https://github.com/neondatabase/neon/actions/runs/13851772887/job/38760172415	2025-03-16 14:04:48 +00:00
Cihan Demirci	a5b00b87ba	CI(pre-merge-checks): use step-security/changed-files (#11265 ) Use Step Security maintained version of `tj-actions/changed-files`. https://www.stepsecurity.io/blog/harden-runner-detection-tj-actions-changed-files-action-is-compromised#use-the-stepsecurity-maintained-changed-files-action	2025-03-16 13:53:27 +00:00
John Spray	a674ed8caf	storcon: safety check when completing shard split (#11256 ) ## Problem There is a rare race between controller graceful deployment and shard splitting where we may incorrectly both abort _and_ complete the split (on different pods), and thereby leave no shards at all in the database. Related: #11254 ## Summary of changes - In complete_shard_split, refuse to delete anything if child shards are not found	2025-03-14 20:08:24 +00:00
Erik Grinaker	53d50c7ea5	pageserver: deflake compaction tests (#11246 ) These need to set `NoYield`, otherwise they may be preempted by pending L0 compaction.	2025-03-14 17:45:18 +00:00
Dmitrii Kovalkov	3168bd0e3a	tests: suppress "Cancelled request finished with an error" in test_timeline_archive (#11241 ) ## Problem Previous PR https://github.com/neondatabase/neon/pull/11190 didn't suppress `Cancelled request finished with an error` messages, which are also expected, so the test https://github.com/neondatabase/neon/issues/11177 is still flaky. ## Summary of changes - Suppress `Cancelled request finished with an error` in `test_timeline_archive`	2025-03-14 17:42:09 +00:00
Alexander Bayandin	4a97cd0b7e	test_runner: fix tests with jsonnet for Python 3.13 (#11240 ) ## Problem Python's `jsonnet` 0.20.0 doesn't support Python 3.13, so we have a couple of tests xfailed because of that. ## Summary of changes - Bump `jsonnet` to `0.21.0rc2` which supports Python 3.13 - Unxfail `test_sql_exporter_metrics_e2e` and `test_sql_exporter_metrics_smoke` on Python 3.13	2025-03-14 17:02:55 +00:00
Anastasia Lubennikova	b7c6738524	feat(compute_ctl): add pgaudt log gc to compute_ctl (#11169 ) - add pgaudt_gc thread to compute_ctl to cleanup old pgaudit logs if they exist. pgaudit can rotate files, but it doesn't delete the old files - Add AUDIT_LOG_DIR_SIZE metric to compute_ctl to track the size of the audit log directory in bytes. - Fix permissions for rsyslog state files directory	2025-03-14 14:08:16 +00:00
Conrad Ludgate	7fe5a689b4	feat(proxy): export ingress metrics (#11244 ) ## Problem We exposed the direction tag in #10925 but didn't actually include the ingress tag in the export to allow for an adaption period. ## Summary of changes We now export the ingress direction	2025-03-14 13:54:57 +00:00
Dmitrii Kovalkov	b0922967e0	Bump humantime version and remove advisories.ignore (#11242 ) ## Problem - Closes: https://github.com/neondatabase/neon/issues/11179#issuecomment-2724222041 ## Summary of changes - Bump humantime version to `2.2` - Remove `RUSTSEC-2025-0014` from `advisories.ignore`	2025-03-14 11:51:11 +00:00
Dmitrii Kovalkov	f68be2b5e2	safekeeper: https for management API (#11171 ) ## Problem Storage controller uses unencrypted HTTP requests for safekeeper management API. - Closes: https://github.com/neondatabase/cloud/issues/24836 ## Summary of changes - Replace `hyper0::server::Server` with `http_utils::server::Server` in safekeeper. - Add HTTPS handler for safekeeper management API.	2025-03-14 11:41:22 +00:00
Christian Schwarz	04370b48b3	fix(storcon): optimization validation makes decisions based on wrong SecondaryProgress (#11229 ) # Refs - fixes https://github.com/neondatabase/neon/issues/11228 # Problem High-Level When storcon validates whether a `ScheduleOptimizationAction` should be applied, it retrieves the `tenant_secondary_status` to determine whether a secondary is ready for the optimization. When collecting results, it associates secondary statuses with the wrong optimization actions in the batch of optimizations that we're validating. The result is that we make the decision for shard/location X based on the SecondaryStatus of a random secondary location Y in the current batch of optimizations. A possible symptom is an early cutover, as seen in this engineering investigation here: - https://github.com/neondatabase/cloud/issues/25734 # Problem Code-Level This code here in `optimize_all_validate` `97e2e27f68/storage_controller/src/service.rs (L7012-L7029)` zips the `want_secondary_status` with the Vec returned from `tenant_for_shards_api` . However, the Vec returned from `want_secondary_status` is not ordered (it uses FuturesUnordered internally). # Solution Sort the Vec in input order before returning it. `optimize_all_validate` was the only caller affected by this problem While at it, also future-proof similar-looking function `tenant_for_shards`. None of its callers care about the order, but this type of function signature is easy to use incorrectly. # Future Work Avoid the additional iteration, map, and allocation. Change API to leverage AsyncFn (async closure). And/or invert `tenant_for_shards_api` into a Future ext trait / iterator adaptor thing.	2025-03-14 11:21:16 +00:00
Arpad Müller	5359cf717c	storcon: add API definitions for exclude_timeline and term_bump (#11197 ) Adds API definitions for the safekeeper API endpoints `exclude_timeline` and `term_bump`. Also does a bugfix to return the correct type from `delete_timeline`. Part of #8614	2025-03-14 00:00:37 +00:00
Erik Grinaker	d6d78a050f	pageserver: disable `l0_flush_wait_upload` by default (#11215 ) ## Problem This is already disabled in production, as it is replaced by L0 flush delays. It will be removed in a later PR, once the config option is no longer specified in production. ## Summary of changes Disable `l0_flush_wait_upload` by default.	2025-03-13 21:08:28 +00:00
Erik Grinaker	4ff000c042	pageserver: deflake `test_metadata_image_creation` (#11230 ) ## Problem `test_metadata_image_creation ` became flaky with #11212, since image compaction may yield to L0 compaction. ## Summary of changes Set `NoYield` when compacting in tenant tests.	2025-03-13 20:46:21 +00:00
Conrad Ludgate	9a3020d2ce	chore(proxy): pre-initialise metricvecs (#11226 ) ## Problem We noticed that error metrics didn't show for some services with light load. This is not great and can cause problems for dashboards/alerts ## Summary of changes Pre-initialise some metricvecs.	2025-03-13 20:23:53 +00:00
Alex Chi Z.	23b713900e	feat(storcon): passthrough ancestor detach behavior (#11199 ) ## Problem https://github.com/neondatabase/neon/issues/10310 https://github.com/neondatabase/neon/pull/11158 ## Summary of changes We need to passthrough the new detach behavior through the storcon API. Signed-off-by: Alex Chi Z <chi@neon.tech>	2025-03-13 20:21:23 +00:00
Arpad Müller	b1a1be6a4c	switch pytests and neon_local to control_plane_hooks_api (#11195 ) We want to switch away from and deprecate the `--compute-hook-url` param for the storcon in favour of `--control-plane-url` because it allows us to construct urls with `notify-safekeepers`. This PR switches the pytests and neon_local from a `control_plane_compute_hook_api` to a new param named `control_plane_hooks_api` which is supposed to point to the parent of the `notify-attach` URL. We still support reading the old url from disk to not be too disruptive with existing deployments, but we just ignore it. Also add docs for the `notify-safekeepers` upcall API. Follow-up of #11173 Part of https://github.com/neondatabase/neon/issues/11163	2025-03-13 19:50:52 +00:00
Erik Grinaker	8afae9d03c	pageserver: enable `l0_flush_delay_threshold` by default (#11214 ) ## Problem `l0_flush_delay_threshold` has already been set to 30 in production for a couple of weeks. Let's harmonize the default. ## Summary of changes Update `DEFAULT_L0_FLUSH_DELAY_FACTOR` to 3 such that the default `l0_flush_delay_threshold` is `3 * compaction_threshold`. This differs from the production setting, which is hardcoded to 30 (with `compaction_threshold` at 10), and is more appropriate for any tenants that have custom `compaction_threshold` overrides.	2025-03-13 19:15:22 +00:00
JC Grünhage	066b0a1be9	fix(ci): correctly push neon-test-extensions in releases and to ghcr (#11225 ) ## Problem `ef0d4a48a` adjusted how we build container images and how we push them, and the neon-test-extensions image was overlooked. Additionally, is was also missed in `1f0dea9a1`, which pushed our container images to GHCR. ## Summary of changes Push neon-test-extensions to GHCR and also push release tags for it.	2025-03-13 18:18:55 +00:00
Konstantin Knizhnik	398d2794eb	Handle DEBUG_COMPARE_LOCAL mode in neon_zeroextend (#11220 ) ## Problem DEBUG_COMPARE_LOCAL is not supported in neon_zeroextend added in PG16 ## Summary of changes Add support of DEBUG_COMPARE_LOCAL in neon_zeroextend Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>	2025-03-13 16:30:32 +00:00
Erik Grinaker	3c3b9dc919	pageserver: enable `image_creation_preempt_threshold` by default (#11216 ) ## Problem This is already set in production, we should harmonize the default. ## Summary of changes Default `image_creation_preempt_threshold` to 3.	2025-03-13 16:28:21 +00:00
Christian Schwarz	ed31dd2a3c	pageserver: better observability for slow wait_lsn (#11176 ) # Problem We leave too few observability breadcrumbs in the case where wait_lsn is exceptionally slow. # Changes - refactor: extract the monitoring logic out of `log_slow` into `monitor_slow_future` - add global + per-timeline counter for time spent waiting for wait_lsn - It is updated while we're still waiting, similar to what we do for page_service response flush. - add per-timeline counterpair for started & finished wait_lsn count - add slow-logging to leave breadcrumbs in logs, not just metrics For the slow-logging, we need to consider not flooding the logs during a broker or network outage/blip. The solution is a "log-streak-level" concurrency limit per timeline. At any given time, there is at most one slow wait_lsn that is logging the "still running" and "completed" sequence of logs. Other concurrent slow wait_lsn's don't log at all. This leaves at least one breadcrumb in each timeline's logs if some wait_lsn was exceptionally slow during a given period. The full degree of slowness can then be determined by looking at the per-timeline metric. # Performance Reran the `bench_log_slow` benchmark, no difference, so, existing call sites are fine. We do use a Semaphore, but only try_acquire it _after_ things have already been determined to be slow. So, no baseline overhead anticipated. # Refs - https://github.com/neondatabase/cloud/issues/23486#issuecomment-2711587222	2025-03-13 15:03:53 +00:00
Conrad Ludgate	3dec117572	feat(compute_ctl): use TLS if configured (#10972 ) Closes: https://github.com/neondatabase/cloud/issues/22998 If control-plane reports that TLS should be used, load the certificates (and watch for updates), make sure postgres use them, and detects updates. Procedure: 1. Load certificates 2. Reconfigure postgres/pgbouncer 3. Loop on a timer until certificates have loaded 4. Go to 1 Notes: 1. We only run this procedure if requested on startup by control plane. 2. We needed to compile pgbouncer with openssl enabled 3. Postgres doesn't allow tls keys to be globally accessible - must be read only to the postgres user. I couldn't convince the autoscaling team to let me put this logic into the VM settings, so instead compute_ctl will copy the keys to be read-only by postgres. 4. To mitigate a race condition, we also verify that the key matches the cert.	2025-03-13 15:03:22 +00:00
Alex Chi Z.	b2286f5bcb	fix(pageserver): don't panic if gc-compaction find no keys (#11200 ) ## Problem There was a panic on staging that compaction didn't find any keys. This is possible if all layers selected for compaction does not contain any keys within the current shard. ## Summary of changes Make panic an error. In the future, we can try creating an empty image layer so that GC can clean up those layers. Otherwise, for now, we can only rely on shard ancestor compaction to remove these data. Signed-off-by: Alex Chi Z <chi@neon.tech>	2025-03-13 14:38:45 +00:00
Erik Grinaker	c036fec065	pageserver: enable `compaction_l0_first` by default (#11212 ) ## Problem `compaction_l0_first` has already been enabled in production for a couple of weeks. ## Summary of changes Enable `compaction_l0_first` by default. Also set `CompactFlags::NoYield` in `timeline_checkpoint_handler`, to ensure explicitly requested compaction runs to completion. This endpoint is mainly used in tests, and caused some flakiness where tests expected compaction to complete.	2025-03-13 14:28:42 +00:00
Suhas Thalanki	8af19c6a13	added a copy statement back to reset dockerfile changes	2025-02-24 14:51:43 -05:00
Suhas Thalanki	2e7c56182f	Merge branch 'thesuhas/remove_anon_extension' of github.com:neondatabase/neon into thesuhas/remove_anon_extension	2025-02-24 14:50:25 -05:00
Suhas Thalanki	370dfee4c8	reset dockerfile to open a new PR for just the dockerfile	2025-02-24 14:50:09 -05:00
Suhas Thalanki	dc75717bc0	Merge branch 'main' into thesuhas/remove_anon_extension	2025-02-24 12:22:03 -05:00
Suhas Thalanki	b1ef701a06	Merge branch 'main' into thesuhas/remove_anon_extension	2025-02-24 11:27:23 -05:00
Suhas Thalanki	3a86c48367	Merge branch 'main' into thesuhas/remove_anon_extension	2025-02-24 09:32:05 -05:00
Suhas Thalanki	418305d250	fix: removed unused imports	2025-02-24 09:30:56 -05:00
Suhas Thalanki	9ba7421ec5	fix: cargo fmt file	2025-02-21 17:49:47 -05:00
Suhas Thalanki	f2d94e3cf3	fix: removed anon pg extension	2025-02-21 17:18:13 -05:00