update readme

s/ssl/tls
refactor pgbouncer tuning
2026-06-01 20:40:37 +00:00 · 2025-07-31 11:51:44 +01:00 · 2025-07-30 14:03:22 +01:00 · 2025-07-30 12:34:36 +01:00 · 2025-07-30 12:34:31 +01:00 · 2025-07-30 12:32:23 +01:00
127 changed files with 1257 additions and 4257 deletions
--- a/.github/workflows/large_oltp_growth.yml
+++ b/.github/workflows/large_oltp_growth.yml
@@ -2,6 +2,9 @@ name: large oltp growth
 # workflow to grow the reuse branch of large oltp benchmark continuously (about 16 GB per run)

 on:
+  # uncomment to run on push for debugging your PR
+  # push:
+  #  branches: [ bodobolero/increase_large_oltp_workload ]

  schedule:
    # * is a special character in YAML so you have to quote this string
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -145,9 +145,9 @@ dependencies = [

 [[package]]
 name = "anyhow"
-version = "1.0.98"
+version = "1.0.94"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
+checksum = "c1fd03a028ef38ba2276dce7e33fcd6369c158a1bca17946c4b1b701891c1ff7"
 dependencies = [
 "backtrace",
 ]
@@ -2402,9 +2402,9 @@ dependencies = [

 [[package]]
 name = "futures"
-version = "0.3.31"
+version = "0.3.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876"
+checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40"
 dependencies = [
 "futures-channel",
 "futures-core",
@@ -2433,9 +2433,9 @@ checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"

 [[package]]
 name = "futures-executor"
-version = "0.3.31"
+version = "0.3.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f"
+checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0"
 dependencies = [
 "futures-core",
 "futures-task",
@@ -2510,33 +2510,6 @@ dependencies = [
 "slab",
 ]

-[[package]]
-name = "gcp_auth"
-version = "0.12.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dbf67f30198e045a039264c01fb44659ce82402d7771c50938beb41a5ac87733"
-dependencies = [
- "async-trait",
- "base64 0.22.1",
- "bytes",
- "chrono",
- "home",
- "http 1.3.1",
- "http-body-util",
- "hyper 1.4.1",
- "hyper-rustls 0.27.5",
- "hyper-util",
- "ring",
- "rustls-pemfile 2.1.1",
- "serde",
- "serde_json",
- "thiserror 1.0.69",
- "tokio",
- "tracing",
- "tracing-futures",
- "url",
-]
-
 [[package]]
 name = "gen_ops"
 version = "0.4.0"
@@ -2867,15 +2840,6 @@ dependencies = [
 "digest",
 ]

-[[package]]
-name = "home"
-version = "0.5.11"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf"
-dependencies = [
- "windows-sys 0.59.0",
-]
-
 [[package]]
 name = "hostname"
 version = "0.4.0"
@@ -3111,24 +3075,6 @@ dependencies = [
 "tower-service",
 ]

-[[package]]
-name = "hyper-rustls"
-version = "0.27.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d191583f3da1305256f22463b9bb0471acad48a4e534a5218b9963e9c1f59b2"
-dependencies = [
- "futures-util",
- "http 1.3.1",
- "hyper 1.4.1",
- "hyper-util",
- "rustls 0.23.29",
- "rustls-native-certs 0.8.0",
- "rustls-pki-types",
- "tokio",
- "tokio-rustls 0.26.2",
- "tower-service",
-]
-
 [[package]]
 name = "hyper-timeout"
 version = "0.5.1"
@@ -3906,16 +3852,6 @@ version = "0.3.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"

-[[package]]
-name = "mime_guess"
-version = "2.0.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e"
-dependencies = [
- "mime",
- "unicase",
-]
-
 [[package]]
 name = "minimal-lexical"
 version = "0.2.1"
@@ -5936,11 +5872,8 @@ dependencies = [
 "bytes",
 "camino",
 "camino-tempfile",
- "chrono",
 "futures",
 "futures-util",
- "gcp_auth",
- "http 1.3.1",
 "http-body-util",
 "http-types",
 "humantime-serde",
@@ -5961,9 +5894,7 @@ dependencies = [
 "tokio-util",
 "toml_edit",
 "tracing",
- "url",
 "utils",
- "uuid",
 ]

 [[package]]
@@ -5993,7 +5924,6 @@ dependencies = [
 "js-sys",
 "log",
 "mime",
- "mime_guess",
 "once_cell",
 "percent-encoding",
 "pin-project-lite",
@@ -8103,16 +8033,6 @@ dependencies = [
 "tracing-subscriber",
 ]

-[[package]]
-name = "tracing-futures"
-version = "0.2.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2"
-dependencies = [
- "pin-project",
- "tracing",
-]
-
 [[package]]
 name = "tracing-log"
 version = "0.2.0"
@@ -8288,12 +8208,6 @@ dependencies = [
 "libc",
 ]

-[[package]]
-name = "unicase"
-version = "2.8.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539"
-
 [[package]]
 name = "unicode-bidi"
 version = "0.3.17"
--- a/8
+++ b/8
@@ -78,7 +78,6 @@ WORKDIR /home/nonroot
 ARG GIT_VERSION=local
 ARG BUILD_TAG
 ARG ADDITIONAL_RUSTFLAGS=""
-ARG IO_ALIGNMENT=512
 ENV CARGO_FEATURES="default"

 # 3. Build cargo dependencies. Note that this step doesn't depend on anything else than
@@ -102,12 +101,7 @@ COPY --chown=nonroot --from=plan     /home/nonroot/Cargo.lock               Carg
 RUN  --mount=type=secret,uid=1000,id=SUBZERO_ACCESS_TOKEN \
    set -e \
    && if [ -s /run/secrets/SUBZERO_ACCESS_TOKEN ]; then \
-        export CARGO_FEATURES="${CARGO_FEATURES},rest_broker"; \
-    fi \
-    && if [ "$IO_ALIGNMENT" = "4k" ]; then \
-        export CARGO_FEATURES="${CARGO_FEATURES},io-align-4k"; \
-    elif [ "$IO_ALIGNMENT" = "512" ]; then \
-        export CARGO_FEATURES="${CARGO_FEATURES},io-align-512"; \
+        export CARGO_FEATURES="rest_broker"; \
    fi \
    && RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}" cargo auditable build \
      --features $CARGO_FEATURES \
--- a/README.md
+++ b/README.md
@@ -1,13 +1,13 @@
-[![Neon](https://github.com/user-attachments/assets/fd91da5f-44a9-41c7-9075-36a5b5608083)](https://neon.com)
+[![Neon](https://github.com/neondatabase/neon/assets/11527560/f15a17f0-836e-40c5-b35d-030606a6b660)](https://neon.tech)



 # Neon

-Neon is an open-source serverless Postgres database platform. It separates storage and compute and substitutes the PostgreSQL storage layer by redistributing data across a cluster of nodes.
+Neon is a serverless open-source alternative to AWS Aurora Postgres. It separates storage and compute and substitutes the PostgreSQL storage layer by redistributing data across a cluster of nodes.

 ## Quick start
-Try the [Neon Free Tier](https://neon.com/signup) to create a serverless Postgres instance. Then connect to it with your preferred Postgres client (psql, dbeaver, etc) or use the online [SQL Editor](https://neon.com/docs/get-started-with-neon/query-with-neon-sql-editor/). See [Connect from any application](https://neon.com/docs/connect/connect-from-any-app/) for connection instructions.
+Try the [Neon Free Tier](https://neon.tech/github) to create a serverless Postgres instance. Then connect to it with your preferred Postgres client (psql, dbeaver, etc) or use the online [SQL Editor](https://neon.tech/docs/get-started-with-neon/query-with-neon-sql-editor/). See [Connect from any application](https://neon.tech/docs/connect/connect-from-any-app/) for connection instructions.

 Alternatively, compile and run the project [locally](#running-local-installation).

@@ -301,8 +301,8 @@ See also README files in some source directories, and `rustdoc` style documentat

 Other resources:

- [SELECT 'Hello, World'](https://neon.com/blog/hello-world/): Blog post by Nikita Shamgunov on the high level architecture
- [Architecture decisions in Neon](https://neon.com/blog/architecture-decisions-in-neon/): Blog post by Heikki Linnakangas
+- [SELECT 'Hello, World'](https://neon.tech/blog/hello-world/): Blog post by Nikita Shamgunov on the high level architecture
+- [Architecture decisions in Neon](https://neon.tech/blog/architecture-decisions-in-neon/): Blog post by Heikki Linnakangas
 - [Neon: Serverless PostgreSQL!](https://www.youtube.com/watch?v=rES0yzeERns): Presentation on storage system by Heikki Linnakangas in the CMU Database Group seminar series

 ### Postgres-specific terms
--- a/compute/etc/sql_exporter/checkpoints_req.17.sql
+++ b/compute/etc/sql_exporter/checkpoints_req.17.sql
@@ -1 +1 @@
-SELECT num_requested AS checkpoints_req FROM pg_catalog.pg_stat_checkpointer;
+SELECT num_requested AS checkpoints_req FROM pg_stat_checkpointer;
--- a/compute/etc/sql_exporter/checkpoints_req.sql
+++ b/compute/etc/sql_exporter/checkpoints_req.sql
@@ -1 +1 @@
-SELECT checkpoints_req FROM pg_catalog.pg_stat_bgwriter;
+SELECT checkpoints_req FROM pg_stat_bgwriter;
--- a/compute/etc/sql_exporter/checkpoints_timed.sql
+++ b/compute/etc/sql_exporter/checkpoints_timed.sql
@@ -1 +1 @@
-SELECT checkpoints_timed FROM pg_catalog.pg_stat_bgwriter;
+SELECT checkpoints_timed FROM pg_stat_bgwriter;
--- a/compute/etc/sql_exporter/compute_backpressure_throttling_seconds_total.sql
+++ b/compute/etc/sql_exporter/compute_backpressure_throttling_seconds_total.sql
@@ -1 +1 @@
-SELECT (neon.backpressure_throttling_time()::pg_catalog.float8 / 1000000) AS throttled;
+SELECT (neon.backpressure_throttling_time()::float8 / 1000000) AS throttled;
--- a/compute/etc/sql_exporter/compute_current_lsn.sql
+++ b/compute/etc/sql_exporter/compute_current_lsn.sql
@@ -1,4 +1,4 @@
 SELECT CASE
-  WHEN pg_catalog.pg_is_in_recovery() THEN (pg_catalog.pg_last_wal_replay_lsn() - '0/0')::pg_catalog.FLOAT8
-  ELSE (pg_catalog.pg_current_wal_lsn() - '0/0')::pg_catalog.FLOAT8
+  WHEN pg_catalog.pg_is_in_recovery() THEN (pg_last_wal_replay_lsn() - '0/0')::FLOAT8
+  ELSE (pg_current_wal_lsn() - '0/0')::FLOAT8
 END AS lsn;
--- a/compute/etc/sql_exporter/compute_logical_snapshot_files.sql
+++ b/compute/etc/sql_exporter/compute_logical_snapshot_files.sql
@@ -1,7 +1,7 @@
 SELECT
-  (SELECT setting FROM pg_catalog.pg_settings WHERE name = 'neon.timeline_id') AS timeline_id,
+  (SELECT setting FROM pg_settings WHERE name = 'neon.timeline_id') AS timeline_id,
  -- Postgres creates temporary snapshot files of the form %X-%X.snap.%d.tmp.
  -- These temporary snapshot files are renamed to the actual snapshot files
  -- after they are completely built. We only WAL-log the completely built
  -- snapshot files
-  (SELECT COUNT(*) FROM pg_catalog.pg_ls_dir('pg_logical/snapshots') AS name WHERE name LIKE '%.snap') AS num_logical_snapshot_files;
+  (SELECT COUNT(*) FROM pg_ls_dir('pg_logical/snapshots') AS name WHERE name LIKE '%.snap') AS num_logical_snapshot_files;
--- a/compute/etc/sql_exporter/compute_logical_snapshots_bytes.15.sql
+++ b/compute/etc/sql_exporter/compute_logical_snapshots_bytes.15.sql
@@ -1,7 +1,7 @@
 SELECT
-  (SELECT pg_catalog.current_setting('neon.timeline_id')) AS timeline_id,
+  (SELECT current_setting('neon.timeline_id')) AS timeline_id,
  -- Postgres creates temporary snapshot files of the form %X-%X.snap.%d.tmp.
  -- These temporary snapshot files are renamed to the actual snapshot files
  -- after they are completely built. We only WAL-log the completely built
  -- snapshot files
-  (SELECT COALESCE(pg_catalog.sum(size), 0) FROM pg_catalog.pg_ls_logicalsnapdir() WHERE name LIKE '%.snap') AS logical_snapshots_bytes;
+  (SELECT COALESCE(sum(size), 0) FROM pg_ls_logicalsnapdir() WHERE name LIKE '%.snap') AS logical_snapshots_bytes;
--- a/compute/etc/sql_exporter/compute_logical_snapshots_bytes.sql
+++ b/compute/etc/sql_exporter/compute_logical_snapshots_bytes.sql
@@ -1,9 +1,9 @@
 SELECT
-  (SELECT setting FROM pg_catalog.pg_settings WHERE name = 'neon.timeline_id') AS timeline_id,
+  (SELECT setting FROM pg_settings WHERE name = 'neon.timeline_id') AS timeline_id,
  -- Postgres creates temporary snapshot files of the form %X-%X.snap.%d.tmp.
  -- These temporary snapshot files are renamed to the actual snapshot files
  -- after they are completely built. We only WAL-log the completely built
  -- snapshot files
-  (SELECT COALESCE(pg_catalog.sum((pg_catalog.pg_stat_file('pg_logical/snapshots/' || name, missing_ok => true)).size), 0)
-   FROM (SELECT * FROM pg_catalog.pg_ls_dir('pg_logical/snapshots') WHERE pg_ls_dir LIKE '%.snap') AS name
+  (SELECT COALESCE(sum((pg_stat_file('pg_logical/snapshots/' || name, missing_ok => true)).size), 0)
+    FROM (SELECT * FROM pg_ls_dir('pg_logical/snapshots') WHERE pg_ls_dir LIKE '%.snap') AS name
  ) AS logical_snapshots_bytes;
--- a/compute/etc/sql_exporter/compute_max_connections.sql
+++ b/compute/etc/sql_exporter/compute_max_connections.sql
@@ -1 +1 @@
-SELECT pg_catalog.current_setting('max_connections') AS max_connections;
+SELECT current_setting('max_connections') as max_connections;
--- a/compute/etc/sql_exporter/compute_pg_oldest_frozen_xid_age.sql
+++ b/compute/etc/sql_exporter/compute_pg_oldest_frozen_xid_age.sql
@@ -1,4 +1,4 @@
 SELECT datname database_name,
-   pg_catalog.age(datfrozenxid) frozen_xid_age
-FROM pg_catalog.pg_database
+  age(datfrozenxid) frozen_xid_age
+FROM pg_database
 ORDER BY frozen_xid_age DESC LIMIT 10;
--- a/compute/etc/sql_exporter/compute_pg_oldest_mxid_age.sql
+++ b/compute/etc/sql_exporter/compute_pg_oldest_mxid_age.sql
@@ -1,4 +1,4 @@
 SELECT datname database_name,
-  pg_catalog.mxid_age(datminmxid) min_mxid_age
-FROM pg_catalog.pg_database
+  mxid_age(datminmxid) min_mxid_age
+FROM pg_database
 ORDER BY min_mxid_age DESC LIMIT 10;
--- a/compute/etc/sql_exporter/compute_receive_lsn.sql
+++ b/compute/etc/sql_exporter/compute_receive_lsn.sql
@@ -1,4 +1,4 @@
 SELECT CASE
-  WHEN pg_catalog.pg_is_in_recovery() THEN (pg_catalog.pg_last_wal_receive_lsn() - '0/0')::pg_catalog.FLOAT8
+  WHEN pg_catalog.pg_is_in_recovery() THEN (pg_last_wal_receive_lsn() - '0/0')::FLOAT8
  ELSE 0
 END AS lsn;
--- a/compute/etc/sql_exporter/compute_subscriptions_count.sql
+++ b/compute/etc/sql_exporter/compute_subscriptions_count.sql
@@ -1 +1 @@
-SELECT subenabled::pg_catalog.text AS enabled, pg_catalog.count(*) AS subscriptions_count FROM pg_catalog.pg_subscription GROUP BY subenabled;
+SELECT subenabled::text AS enabled, count(*) AS subscriptions_count FROM pg_subscription GROUP BY subenabled;
--- a/compute/etc/sql_exporter/connection_counts.sql
+++ b/compute/etc/sql_exporter/connection_counts.sql
@@ -1 +1 @@
-SELECT datname, state, pg_catalog.count(*) AS count FROM pg_catalog.pg_stat_activity WHERE state <> '' GROUP BY datname, state;
+SELECT datname, state, count(*) AS count FROM pg_stat_activity WHERE state <> '' GROUP BY datname, state;
--- a/compute/etc/sql_exporter/db_total_size.sql
+++ b/compute/etc/sql_exporter/db_total_size.sql
@@ -1,5 +1,5 @@
-SELECT pg_catalog.sum(pg_catalog.pg_database_size(datname)) AS total
-FROM pg_catalog.pg_database
+SELECT sum(pg_database_size(datname)) AS total
+FROM pg_database
 -- Ignore invalid databases, as we will likely have problems with
 -- getting their size from the Pageserver.
 WHERE datconnlimit != -2;
--- a/compute/etc/sql_exporter/lfc_approximate_working_set_size_windows.autoscaling.sql
+++ b/compute/etc/sql_exporter/lfc_approximate_working_set_size_windows.autoscaling.sql
@@ -3,6 +3,6 @@
 -- minutes.

 SELECT
-  x::pg_catalog.text AS duration_seconds,
+  x::text as duration_seconds,
  neon.approximate_working_set_size_seconds(x) AS size
 FROM (SELECT generate_series * 60 AS x FROM generate_series(1, 60)) AS t (x);
--- a/compute/etc/sql_exporter/lfc_approximate_working_set_size_windows.sql
+++ b/compute/etc/sql_exporter/lfc_approximate_working_set_size_windows.sql
@@ -3,6 +3,6 @@

 SELECT
  x AS duration,
-  neon.approximate_working_set_size_seconds(extract('epoch' FROM x::pg_catalog.interval)::pg_catalog.int4) AS size FROM (
+  neon.approximate_working_set_size_seconds(extract('epoch' FROM x::interval)::int) AS size FROM (
    VALUES ('5m'), ('15m'), ('1h')
  ) AS t (x);
--- a/compute/etc/sql_exporter/lfc_cache_size_limit.sql
+++ b/compute/etc/sql_exporter/lfc_cache_size_limit.sql
@@ -1 +1 @@
-SELECT pg_catalog.pg_size_bytes(pg_catalog.current_setting('neon.file_cache_size_limit')) AS lfc_cache_size_limit;
+SELECT pg_size_bytes(current_setting('neon.file_cache_size_limit')) AS lfc_cache_size_limit;
--- a/compute/etc/sql_exporter/logical_slot_restart_lsn.sql
+++ b/compute/etc/sql_exporter/logical_slot_restart_lsn.sql
@@ -1,3 +1,3 @@
-SELECT slot_name, (restart_lsn - '0/0')::pg_catalog.FLOAT8 AS restart_lsn
-FROM pg_catalog.pg_replication_slots
+SELECT slot_name, (restart_lsn - '0/0')::FLOAT8 as restart_lsn
+FROM pg_replication_slots
 WHERE slot_type = 'logical';
--- a/compute/etc/sql_exporter/max_cluster_size.sql
+++ b/compute/etc/sql_exporter/max_cluster_size.sql
@@ -1 +1 @@
-SELECT setting::pg_catalog.int4 AS max_cluster_size FROM pg_catalog.pg_settings WHERE name = 'neon.max_cluster_size';
+SELECT setting::int AS max_cluster_size FROM pg_settings WHERE name = 'neon.max_cluster_size';
--- a/compute/etc/sql_exporter/pg_stats_userdb.sql
+++ b/compute/etc/sql_exporter/pg_stats_userdb.sql
@@ -1,13 +1,13 @@
 -- We export stats for 10 non-system databases. Without this limit it is too
 -- easy to abuse the system by creating lots of databases.

-SELECT pg_catalog.pg_database_size(datname) AS db_size,
+SELECT pg_database_size(datname) AS db_size,
  deadlocks,
  tup_inserted AS inserted,
  tup_updated AS updated,
  tup_deleted AS deleted,
  datname
-FROM pg_catalog.pg_stat_database
+FROM pg_stat_database
 WHERE datname IN (
  SELECT datname FROM pg_database
  -- Ignore invalid databases, as we will likely have problems with
--- a/compute/etc/sql_exporter/replication_delay_bytes.sql
+++ b/compute/etc/sql_exporter/replication_delay_bytes.sql
@@ -3,4 +3,4 @@
 -- replay LSN may have advanced past the receive LSN we are using for the
 -- calculation.

-SELECT GREATEST(0, pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_last_wal_receive_lsn(), pg_catalog.pg_last_wal_replay_lsn())) AS replication_delay_bytes;
+SELECT GREATEST(0, pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn())) AS replication_delay_bytes;
--- a/compute/etc/sql_exporter/replication_delay_seconds.sql
+++ b/compute/etc/sql_exporter/replication_delay_seconds.sql
@@ -1,5 +1,5 @@
 SELECT
  CASE
-    WHEN pg_catalog.pg_last_wal_receive_lsn() = pg_catalog.pg_last_wal_replay_lsn() THEN 0
-    ELSE GREATEST(0, EXTRACT (EPOCH FROM pg_catalog.now() - pg_catalog.pg_last_xact_replay_timestamp()))
+    WHEN pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn() THEN 0
+    ELSE GREATEST(0, EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp()))
  END AS replication_delay_seconds;
--- a/compute/etc/sql_exporter/retained_wal.sql
+++ b/compute/etc/sql_exporter/retained_wal.sql
@@ -1,10 +1,10 @@
 SELECT
  slot_name,
-  pg_catalog.pg_wal_lsn_diff(
+  pg_wal_lsn_diff(
    CASE
-      WHEN pg_catalog.pg_is_in_recovery() THEN pg_catalog.pg_last_wal_replay_lsn()
-      ELSE pg_catalog.pg_current_wal_lsn()
+      WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn()
+      ELSE pg_current_wal_lsn()
    END,
-    restart_lsn)::pg_catalog.FLOAT8 AS retained_wal
-FROM pg_catalog.pg_replication_slots
+    restart_lsn)::FLOAT8 AS retained_wal
+FROM pg_replication_slots
 WHERE active = false;
--- a/compute/etc/sql_exporter/wal_is_lost.sql
+++ b/compute/etc/sql_exporter/wal_is_lost.sql
@@ -4,4 +4,4 @@ SELECT
    WHEN wal_status = 'lost' THEN 1
    ELSE 0
  END AS wal_is_lost
-FROM pg_catalog.pg_replication_slots;
+FROM pg_replication_slots;
--- a/compute_tools/README.md
+++ b/compute_tools/README.md
@@ -57,6 +57,9 @@ stateDiagram-v2
  RefreshConfigurationPending --> RefreshConfiguration: Received compute spec and started configuration
  RefreshConfiguration --> Running : Compute has been re-configured
  RefreshConfiguration --> RefreshConfigurationPending : Configuration failed and to be retried
+  Running --> Reloading : Local changes (TLS certificate renewal) were detected and postgres is being reloaded
+  Reloading --> Running : Postgres was reloaded
+  Reloading --> Failed : Failed to reload postgres
  TerminationPendingFast --> Terminated compute with 30s delay for cplane to inspect status
  TerminationPendingImmediate --> Terminated : Terminated compute immediately
  Failed --> RefreshConfigurationPending : Received a /refresh_configuration request
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -279,7 +279,7 @@ fn main() -> Result<()> {
        config,
    )?;

-    let exit_code = compute_node.run().context("running compute node")?;
+    let exit_code = compute_node.run()?;

    scenario.teardown();

--- a/compute_tools/src/checker.rs
+++ b/compute_tools/src/checker.rs
@@ -24,9 +24,9 @@ pub async fn check_writability(compute: &ComputeNode) -> Result<()> {
    });

    let query = "
-    INSERT INTO public.health_check VALUES (1, pg_catalog.now())
+    INSERT INTO health_check VALUES (1, now())
        ON CONFLICT (id) DO UPDATE
-         SET updated_at = pg_catalog.now();";
+         SET updated_at = now();";

    match client.simple_query(query).await {
        Result::Ok(result) => {
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -28,16 +28,12 @@ use std::path::Path;
 use std::process::{Command, Stdio};
 use std::str::FromStr;
 use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
-use std::sync::{Arc, Condvar, Mutex, RwLock};
+use std::sync::{Arc, Condvar, Mutex, MutexGuard, RwLock};
 use std::time::{Duration, Instant};
 use std::{env, fs};
 use tokio::{spawn, sync::watch, task::JoinHandle, time};
-use tokio_util::sync::CancellationToken;
 use tracing::{Instrument, debug, error, info, instrument, warn};
 use url::Url;
-use utils::backoff::{
-    DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS, exponential_backoff_duration,
-};
 use utils::id::{TenantId, TimelineId};
 use utils::lsn::Lsn;
 use utils::measured_stream::MeasuredReader;
@@ -61,7 +57,6 @@ use crate::rsyslog::{
 use crate::spec::*;
 use crate::swap::resize_swap;
 use crate::sync_sk::{check_if_synced, ping_safekeeper};
-use crate::tls::watch_cert_for_changes;
 use crate::{config, extension_server, local_proxy};

 pub static SYNC_SAFEKEEPERS_PID: AtomicU32 = AtomicU32::new(0);
@@ -196,7 +191,6 @@ pub struct ComputeState {
    pub startup_span: Option<tracing::span::Span>,

    pub lfc_prewarm_state: LfcPrewarmState,
-    pub lfc_prewarm_token: CancellationToken,
    pub lfc_offload_state: LfcOffloadState,

    /// WAL flush LSN that is set after terminating Postgres and syncing safekeepers if
@@ -222,7 +216,6 @@ impl ComputeState {
            lfc_offload_state: LfcOffloadState::default(),
            terminate_flush_lsn: None,
            promote_state: None,
-            lfc_prewarm_token: CancellationToken::new(),
        }
    }

@@ -589,7 +582,7 @@ impl ComputeNode {
        // that can affect `compute_ctl` and prevent it from properly configuring the database schema.
        // Unset them via connection string options before connecting to the database.
        // N.B. keep it in sync with `ZENITH_OPTIONS` in `get_maintenance_client()`.
-        const EXTRA_OPTIONS: &str = "-c role=cloud_admin -c default_transaction_read_only=off -c search_path='' -c statement_timeout=0 -c pgaudit.log=none";
+        const EXTRA_OPTIONS: &str = "-c role=cloud_admin -c default_transaction_read_only=off -c search_path=public -c statement_timeout=0 -c pgaudit.log=none";
        let options = match conn_conf.get_options() {
            // Allow the control plane to override any options set by the
            // compute
@@ -848,14 +841,11 @@ impl ComputeNode {
        let mut pre_tasks = tokio::task::JoinSet::new();

        // Make sure TLS certificates are properly loaded and in the right place.
-        if self.compute_ctl_config.tls.is_some() {
+        let tls_task = self.compute_ctl_config.tls.as_ref().map(|tls_config| {
            let this = self.clone();
-            pre_tasks.spawn(async move {
-                this.watch_cert_for_changes().await;
-
-                Ok::<(), anyhow::Error>(())
-            });
-        }
+            let tls_config = tls_config.clone();
+            tokio::task::spawn_blocking(|| this.watch_cert_for_changes(tls_config))
+        });

        let tls_config = self.tls_config(&pspec.spec);

@@ -910,6 +900,13 @@ impl ComputeNode {
            });
        }

+        // Wait for TLS certificates to be issued before updating pgbouncer and local proxy.
+        let rt = tokio::runtime::Handle::current();
+        if let Some(tls_task) = tls_task {
+            rt.block_on(tls_task)
+                .context("TLS certificate renewal task panicked")?;
+        }
+
        // tune pgbouncer
        if let Some(pgbouncer_settings) = &pspec.spec.pgbouncer_settings {
            info!("tuning pgbouncer");
@@ -992,7 +989,6 @@ impl ComputeNode {
        let _configurator_handle = launch_configurator(self);

        // Wait for all the pre-tasks to finish before starting postgres
-        let rt = tokio::runtime::Handle::current();
        while let Some(res) = rt.block_on(pre_tasks.join_next()) {
            res??;
        }
@@ -1560,41 +1556,6 @@ impl ComputeNode {
        Ok(lsn)
    }

-    fn sync_safekeepers_with_retries(&self, storage_auth_token: Option<String>) -> Result<Lsn> {
-        let max_retries = 5;
-        let mut attempts = 0;
-        loop {
-            let result = self.sync_safekeepers(storage_auth_token.clone());
-            match &result {
-                Ok(_) => {
-                    if attempts > 0 {
-                        tracing::info!("sync_safekeepers succeeded after {attempts} retries");
-                    }
-                    return result;
-                }
-                Err(e) if attempts < max_retries => {
-                    tracing::info!(
-                        "sync_safekeepers failed, will retry (attempt {attempts}): {e:#}"
-                    );
-                }
-                Err(err) => {
-                    tracing::warn!(
-                        "sync_safekeepers still failed after {attempts} retries, giving up: {err:?}"
-                    );
-                    return result;
-                }
-            }
-            // sleep and retry
-            let backoff = exponential_backoff_duration(
-                attempts,
-                DEFAULT_BASE_BACKOFF_SECONDS,
-                DEFAULT_MAX_BACKOFF_SECONDS,
-            );
-            std::thread::sleep(backoff);
-            attempts += 1;
-        }
-    }
-
    /// Do all the preparations like PGDATA directory creation, configuration,
    /// safekeepers sync, basebackup, etc.
    #[instrument(skip_all)]
@@ -1630,7 +1591,7 @@ impl ComputeNode {
                    lsn
                } else {
                    info!("starting safekeepers syncing");
-                    self.sync_safekeepers_with_retries(pspec.storage_auth_token.clone())
+                    self.sync_safekeepers(pspec.storage_auth_token.clone())
                        .with_context(|| "failed to sync safekeepers")?
                };
                info!("safekeepers synced at LSN {}", lsn);
@@ -1925,7 +1886,7 @@ impl ComputeNode {

                    // It doesn't matter what were the options before, here we just want
                    // to connect and create a new superuser role.
-                    const ZENITH_OPTIONS: &str = "-c role=zenith_admin -c default_transaction_read_only=off -c search_path='' -c statement_timeout=0";
+                    const ZENITH_OPTIONS: &str = "-c role=zenith_admin -c default_transaction_read_only=off -c search_path=public -c statement_timeout=0";
                    zenith_admin_conf.options(ZENITH_OPTIONS);

                    let mut client =
@@ -1990,10 +1951,7 @@ impl ComputeNode {
                .clone(),
        );

-        let mut tls_config = None::<TlsConfig>;
-        if spec.features.contains(&ComputeFeature::TlsExperimental) {
-            tls_config = self.compute_ctl_config.tls.clone();
-        }
+        let tls_config = self.tls_config(&spec);

        self.update_installed_extensions_collection_interval(&spec);

@@ -2175,6 +2133,60 @@ impl ComputeNode {
        Ok(())
    }

+    /// Tell postgres/pgbouncer/local_proxy to reload their configurations.
+    #[instrument(skip_all)]
+    pub fn reload(&self, spec: ComputeSpec) -> Result<()> {
+        let rt = tokio::runtime::Handle::current();
+        if spec.pgbouncer_settings.is_some() {
+            rt.block_on(reload_pgbouncer())?;
+        }
+        if spec.local_proxy_config.is_some() {
+            local_proxy::reload()?;
+        }
+        self.pg_reload_conf()?;
+
+        let unknown_op = "unknown".to_string();
+        let op_id = spec.operation_uuid.as_ref().unwrap_or(&unknown_op);
+        info!("finished reload of compute node for operation {op_id}");
+
+        Ok(())
+    }
+
+    /// Acquire the "reloading" lock while running the supplied function.
+    ///
+    /// This ensures that this thread is the only thread that
+    /// can issue signals to postgres.
+    ///
+    /// If the supplied function errors, the compute status is marked as failed.
+    pub fn lock_while_reloading<T>(
+        &self,
+        mut state: MutexGuard<'_, ComputeState>,
+        f: impl FnOnce(ComputeSpec) -> Result<T>,
+    ) -> Result<T> {
+        let old_status = state.status;
+
+        // transition to the reloading state.
+        state.set_status(ComputeStatus::Reloading, &self.state_changed);
+        let spec = state.pspec.as_ref().unwrap().spec.clone();
+        // unlock while reloading, so we don't block other tasks.
+        drop(state);
+
+        let res = f(spec);
+
+        let new_status = if res.is_ok() {
+            old_status
+        } else {
+            ComputeStatus::Failed
+        };
+
+        let mut state = self.state.lock().unwrap();
+        // make sure our invariants are upheld
+        assert_eq!(state.status, ComputeStatus::Reloading);
+        state.set_status(new_status, &self.state_changed);
+
+        res
+    }
+
    #[instrument(skip_all)]
    pub fn configure_as_primary(&self, compute_state: &ComputeState) -> Result<()> {
        let pspec = compute_state.pspec.as_ref().expect("spec must be set");
@@ -2209,57 +2221,103 @@ impl ComputeNode {
        Ok(())
    }

-    pub async fn watch_cert_for_changes(self: Arc<Self>) {
-        // update status on cert renewal
-        if let Some(tls_config) = &self.compute_ctl_config.tls {
-            let tls_config = tls_config.clone();
+    pub fn watch_cert_for_changes(self: Arc<Self>, tls_config: TlsConfig) {
+        // wait until the cert exists.
+        let mut digest = crate::tls::compute_digest(&tls_config.cert_path);
+        info!(
+            cert_path = tls_config.cert_path,
+            key_path = tls_config.key_path,
+            "TLS certificates found"
+        );

-            // wait until the cert exists.
-            let mut cert_watch = watch_cert_for_changes(tls_config.cert_path.clone()).await;
+        // ensure the keys are saved before continuing.
+        let key_pair = crate::tls::load_certs_blocking(&tls_config);
+        while let Err(e) =
+            crate::tls::update_key_path_blocking(Path::new(&self.params.pgdata), &key_pair)
+        {
+            error!("could not save TLS certificates: {e}");
+            std::thread::sleep(Duration::from_millis(20));
+        }

-            tokio::task::spawn_blocking(move || {
-                let handle = tokio::runtime::Handle::current();
-                'cert_update: loop {
-                    // let postgres/pgbouncer/local_proxy know the new cert/key exists.
-                    // we need to wait until it's configurable first.
+        tokio::task::spawn_blocking(move || {
+            'cert_update: loop {
+                // wait for a new certificate update
+                let new_digest = crate::tls::wait_until_cert_changed(digest, &tls_config.cert_path);

-                    let mut state = self.state.lock().unwrap();
-                    'status_update: loop {
-                        match state.status {
-                            // let's update the state to config pending
-                            ComputeStatus::ConfigurationPending | ComputeStatus::Running => {
-                                state.set_status(
-                                    ComputeStatus::ConfigurationPending,
-                                    &self.state_changed,
-                                );
-                                break 'status_update;
-                            }
+                // load the corresponding keys
+                let key_pair = crate::tls::load_certs_blocking(&tls_config);

-                            // exit loop
-                            ComputeStatus::Failed
-                            | ComputeStatus::TerminationPendingFast
-                            | ComputeStatus::TerminationPendingImmediate
-                            | ComputeStatus::Terminated => break 'cert_update,
+                // let postgres/pgbouncer/local_proxy know the new cert/key exists.
+                // we need to wait until it's configurable first.

-                            // wait
-                            ComputeStatus::Init
-                            | ComputeStatus::Configuration
-                            | ComputeStatus::RefreshConfiguration
-                            | ComputeStatus::RefreshConfigurationPending
-                            | ComputeStatus::Empty => {
-                                state = self.state_changed.wait(state).unwrap();
-                            }
+                let mut state = self.state.lock().unwrap();
+                'status_update: loop {
+                    match state.status {
+                        // let's update the state to config pending
+                        ComputeStatus::Running => {
+                            info!("reloading compute due to TLS certificate renewal");
+                            break 'status_update;
+                        }
+
+                        // exit loop
+                        ComputeStatus::Failed
+                        | ComputeStatus::TerminationPendingFast
+                        | ComputeStatus::TerminationPendingImmediate
+                        | ComputeStatus::Terminated => break 'cert_update,
+
+                        // wait
+                        ComputeStatus::Init
+                        | ComputeStatus::Configuration
+                        | ComputeStatus::ConfigurationPending
+                        | ComputeStatus::RefreshConfiguration
+                        | ComputeStatus::RefreshConfigurationPending
+                        | ComputeStatus::Reloading
+                        | ComputeStatus::Empty => {
+                            state = self.state_changed.wait(state).unwrap();
                        }
                    }
-                    drop(state);
+                }

-                    // wait for a new certificate update
-                    if handle.block_on(cert_watch.changed()).is_err() {
-                        break;
+                let result = self.lock_while_reloading(state, |spec| {
+                    // ensure the keys are saved before continuing.
+                    // we do this while holding the 'reloading' state so that we know we're not interfering with any
+                    // active configuration stages.
+                    if let Err(e) = crate::tls::update_key_path_blocking(
+                        Path::new(&self.params.pgdata),
+                        &key_pair,
+                    ) {
+                        return Ok(Err(e));
+                    }
+
+                    // reload postgres/pgbouncer/local_proxy to pick up our new certificates.
+                    self.reload(spec)?;
+
+                    Ok(Ok(()))
+                });
+
+                match result {
+                    // Reload failed. Compute is in a bad state.
+                    Err(e) => {
+                        error!("could not reload compute node: {}", e);
+                        return;
+                    }
+                    // Updating the certificates failed. Retry
+                    Ok(Err(e)) => {
+                        error!("could not save TLS certificates: {e}");
+                        std::thread::sleep(Duration::from_millis(20));
+                    }
+                    // Successful. Acknowledge that we've saved these certificates.
+                    Ok(Ok(())) => {
+                        digest = new_digest;
+                        info!(
+                            cert_path = tls_config.cert_path,
+                            key_path = tls_config.key_path,
+                            "TLS certificates renewed",
+                        );
                    }
                }
-            });
-        }
+            }
+        });
    }

    pub fn tls_config(&self, spec: &ComputeSpec) -> &Option<TlsConfig> {
@@ -2380,13 +2438,13 @@ impl ComputeNode {
        let result = client
            .simple_query(
                "SELECT
-    pg_catalog.row_to_json(pss)
+    row_to_json(pg_stat_statements)
 FROM
-    public.pg_stat_statements pss
+    pg_stat_statements
 WHERE
-    pss.userid != 'cloud_admin'::pg_catalog.regrole::pg_catalog.oid
+    userid != 'cloud_admin'::regrole::oid
 ORDER BY
-    (pss.mean_exec_time + pss.mean_plan_time) DESC
+    (mean_exec_time + mean_plan_time) DESC
 LIMIT 100",
            )
            .await;
@@ -2514,11 +2572,11 @@ LIMIT 100",

        // check the role grants first - to gracefully handle read-replicas.
        let select = "SELECT privilege_type
-            FROM pg_catalog.pg_namespace
-                JOIN LATERAL (SELECT * FROM aclexplode(nspacl) AS x) AS acl ON true
-                JOIN pg_catalog.pg_user users ON acl.grantee = users.usesysid
-            WHERE users.usename OPERATOR(pg_catalog.=) $1::pg_catalog.name
-                AND nspname OPERATOR(pg_catalog.=) $2::pg_catalog.name";
+            FROM pg_namespace
+                JOIN LATERAL (SELECT * FROM aclexplode(nspacl) AS x) acl ON true
+                JOIN pg_user users ON acl.grantee = users.usesysid
+            WHERE users.usename = $1
+                AND nspname = $2";
        let rows = db_client
            .query(select, &[role_name, schema_name])
            .await
@@ -2587,9 +2645,8 @@ LIMIT 100",
                .await
                .with_context(|| format!("Failed to execute query: {query}"))?;
        } else {
-            let query = format!(
-                "CREATE EXTENSION IF NOT EXISTS {ext_name} WITH SCHEMA public VERSION {quoted_version}"
-            );
+            let query =
+                format!("CREATE EXTENSION IF NOT EXISTS {ext_name} WITH VERSION {quoted_version}");
            db_client
                .simple_query(&query)
                .await
@@ -2780,7 +2837,7 @@ LIMIT 100",
                // 4. We start again and try to prewarm with the state from 2. instead of the previous complete state
                if matches!(
                    prewarm_state,
-                    LfcPrewarmState::Completed { .. }
+                    LfcPrewarmState::Completed
                        | LfcPrewarmState::NotPrewarmed
                        | LfcPrewarmState::Skipped
                ) {
--- a/compute_tools/src/compute_prewarm.rs
+++ b/compute_tools/src/compute_prewarm.rs
@@ -7,11 +7,18 @@ use http::StatusCode;
 use reqwest::Client;
 use std::mem::replace;
 use std::sync::Arc;
-use std::time::Instant;
-use tokio::{io::AsyncReadExt, select, spawn};
-use tokio_util::sync::CancellationToken;
+use tokio::{io::AsyncReadExt, spawn};
 use tracing::{error, info};

+#[derive(serde::Serialize, Default)]
+pub struct LfcPrewarmStateWithProgress {
+    #[serde(flatten)]
+    base: LfcPrewarmState,
+    total: i32,
+    prewarmed: i32,
+    skipped: i32,
+}
+
 /// A pair of url and a token to query endpoint storage for LFC prewarm-related tasks
 struct EndpointStoragePair {
    url: String,
@@ -20,7 +27,7 @@ struct EndpointStoragePair {

 const KEY: &str = "lfc_state";
 impl EndpointStoragePair {
-    /// endpoint_id is set to None while prewarming from other endpoint, see compute_promote.rs
+    /// endpoint_id is set to None while prewarming from other endpoint, see replica promotion
    /// If not None, takes precedence over pspec.spec.endpoint_id
    fn from_spec_and_endpoint(
        pspec: &crate::compute::ParsedSpec,
@@ -46,8 +53,36 @@ impl EndpointStoragePair {
 }

 impl ComputeNode {
-    pub async fn lfc_prewarm_state(&self) -> LfcPrewarmState {
-        self.state.lock().unwrap().lfc_prewarm_state.clone()
+    // If prewarm failed, we want to get overall number of segments as well as done ones.
+    // However, this function should be reliable even if querying postgres failed.
+    pub async fn lfc_prewarm_state(&self) -> LfcPrewarmStateWithProgress {
+        info!("requesting LFC prewarm state from postgres");
+        let mut state = LfcPrewarmStateWithProgress::default();
+        {
+            state.base = self.state.lock().unwrap().lfc_prewarm_state.clone();
+        }
+
+        let client = match ComputeNode::get_maintenance_client(&self.tokio_conn_conf).await {
+            Ok(client) => client,
+            Err(err) => {
+                error!(%err, "connecting to postgres");
+                return state;
+            }
+        };
+        let row = match client
+            .query_one("select * from neon.get_prewarm_info()", &[])
+            .await
+        {
+            Ok(row) => row,
+            Err(err) => {
+                error!(%err, "querying LFC prewarm status");
+                return state;
+            }
+        };
+        state.total = row.try_get(0).unwrap_or_default();
+        state.prewarmed = row.try_get(1).unwrap_or_default();
+        state.skipped = row.try_get(2).unwrap_or_default();
+        state
    }

    pub fn lfc_offload_state(&self) -> LfcOffloadState {
@@ -57,35 +92,34 @@ impl ComputeNode {
    /// If there is a prewarm request ongoing, return `false`, `true` otherwise.
    /// Has a failpoint "compute-prewarm"
    pub fn prewarm_lfc(self: &Arc<Self>, from_endpoint: Option<String>) -> bool {
-        let token: CancellationToken;
        {
-            let state = &mut self.state.lock().unwrap();
-            token = state.lfc_prewarm_token.clone();
-            if let LfcPrewarmState::Prewarming =
-                replace(&mut state.lfc_prewarm_state, LfcPrewarmState::Prewarming)
-            {
+            let state = &mut self.state.lock().unwrap().lfc_prewarm_state;
+            if let LfcPrewarmState::Prewarming = replace(state, LfcPrewarmState::Prewarming) {
                return false;
            }
        }
        crate::metrics::LFC_PREWARMS.inc();

-        let this = self.clone();
+        let cloned = self.clone();
        spawn(async move {
-            let prewarm_state = match this.prewarm_impl(from_endpoint, token).await {
-                Ok(state) => state,
+            let state = match cloned.prewarm_impl(from_endpoint).await {
+                Ok(true) => LfcPrewarmState::Completed,
+                Ok(false) => {
+                    info!(
+                        "skipping LFC prewarm because LFC state is not found in endpoint storage"
+                    );
+                    LfcPrewarmState::Skipped
+                }
                Err(err) => {
                    crate::metrics::LFC_PREWARM_ERRORS.inc();
                    error!(%err, "could not prewarm LFC");
-                    let error = format!("{err:#}");
-                    LfcPrewarmState::Failed { error }
+                    LfcPrewarmState::Failed {
+                        error: format!("{err:#}"),
+                    }
                }
            };

-            let state = &mut this.state.lock().unwrap();
-            if let LfcPrewarmState::Cancelled = prewarm_state {
-                state.lfc_prewarm_token = CancellationToken::new();
-            }
-            state.lfc_prewarm_state = prewarm_state;
+            cloned.state.lock().unwrap().lfc_prewarm_state = state;
        });
        true
    }
@@ -97,101 +131,55 @@ impl ComputeNode {
    }

    /// Request LFC state from endpoint storage and load corresponding pages into Postgres.
-    async fn prewarm_impl(
-        &self,
-        from_endpoint: Option<String>,
-        token: CancellationToken,
-    ) -> Result<LfcPrewarmState> {
-        let EndpointStoragePair {
-            url,
-            token: storage_token,
-        } = self.endpoint_storage_pair(from_endpoint)?;
+    /// Returns a result with `false` if the LFC state is not found in endpoint storage.
+    async fn prewarm_impl(&self, from_endpoint: Option<String>) -> Result<bool> {
+        let EndpointStoragePair { url, token } = self.endpoint_storage_pair(from_endpoint)?;

        #[cfg(feature = "testing")]
-        fail::fail_point!("compute-prewarm", |_| bail!("compute-prewarm failpoint"));
+        fail::fail_point!("compute-prewarm", |_| {
+            bail!("prewarm configured to fail because of a failpoint")
+        });

        info!(%url, "requesting LFC state from endpoint storage");
-        let mut now = Instant::now();
-        let request = Client::new().get(&url).bearer_auth(storage_token);
-        let response = select! {
-            _ = token.cancelled() => return Ok(LfcPrewarmState::Cancelled),
-            response = request.send() => response
-        }
-        .context("querying endpoint storage")?;
-
-        match response.status() {
+        let request = Client::new().get(&url).bearer_auth(token);
+        let res = request.send().await.context("querying endpoint storage")?;
+        match res.status() {
            StatusCode::OK => (),
-            StatusCode::NOT_FOUND => return Ok(LfcPrewarmState::Skipped),
+            StatusCode::NOT_FOUND => {
+                return Ok(false);
+            }
            status => bail!("{status} querying endpoint storage"),
        }
-        let state_download_time_ms = now.elapsed().as_millis() as u32;
-        now = Instant::now();

        let mut uncompressed = Vec::new();
-        let lfc_state = select! {
-            _ = token.cancelled() => return Ok(LfcPrewarmState::Cancelled),
-            lfc_state = response.bytes() => lfc_state
-        }
-        .context("getting request body from endpoint storage")?;
-
-        let mut decoder = ZstdDecoder::new(lfc_state.iter().as_slice());
-        select! {
-            _ = token.cancelled() => return Ok(LfcPrewarmState::Cancelled),
-            read = decoder.read_to_end(&mut uncompressed) => read
-        }
-        .context("decoding LFC state")?;
-        let uncompress_time_ms = now.elapsed().as_millis() as u32;
-        now = Instant::now();
-
+        let lfc_state = res
+            .bytes()
+            .await
+            .context("getting request body from endpoint storage")?;
+        ZstdDecoder::new(lfc_state.iter().as_slice())
+            .read_to_end(&mut uncompressed)
+            .await
+            .context("decoding LFC state")?;
        let uncompressed_len = uncompressed.len();
-        info!(%url, "downloaded LFC state, uncompressed size {uncompressed_len}");

-        // Client connection and prewarm info querying are fast and therefore don't need
-        // cancellation
-        let client = ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
+        info!(%url, "downloaded LFC state, uncompressed size {uncompressed_len}, loading into Postgres");
+
+        ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
            .await
-            .context("connecting to postgres")?;
-        let pg_token = client.cancel_token();
-
-        let params: Vec<&(dyn postgres_types::ToSql + Sync)> = vec![&uncompressed];
-        select! {
-            res = client.query_one("select neon.prewarm_local_cache($1)", &params) => res,
-            _ = token.cancelled() => {
-                pg_token.cancel_query(postgres::NoTls).await
-                    .context("cancelling neon.prewarm_local_cache()")?;
-                return Ok(LfcPrewarmState::Cancelled)
-            }
-        }
-        .context("loading LFC state into postgres")
-        .map(|_| ())?;
-        let prewarm_time_ms = now.elapsed().as_millis() as u32;
-
-        let row = client
-            .query_one("select * from neon.get_prewarm_info()", &[])
+            .context("connecting to postgres")?
+            .query_one("select neon.prewarm_local_cache($1)", &[&uncompressed])
            .await
-            .context("querying prewarm info")?;
-        let total = row.try_get(0).unwrap_or_default();
-        let prewarmed = row.try_get(1).unwrap_or_default();
-        let skipped = row.try_get(2).unwrap_or_default();
+            .context("loading LFC state into postgres")
+            .map(|_| ())?;

-        Ok(LfcPrewarmState::Completed {
-            total,
-            prewarmed,
-            skipped,
-            state_download_time_ms,
-            uncompress_time_ms,
-            prewarm_time_ms,
-        })
+        Ok(true)
    }

    /// If offload request is ongoing, return false, true otherwise
    pub fn offload_lfc(self: &Arc<Self>) -> bool {
        {
            let state = &mut self.state.lock().unwrap().lfc_offload_state;
-            if matches!(
-                replace(state, LfcOffloadState::Offloading),
-                LfcOffloadState::Offloading
-            ) {
+            if replace(state, LfcOffloadState::Offloading) == LfcOffloadState::Offloading {
                return false;
            }
        }
@@ -203,10 +191,7 @@ impl ComputeNode {
    pub async fn offload_lfc_async(self: &Arc<Self>) {
        {
            let state = &mut self.state.lock().unwrap().lfc_offload_state;
-            if matches!(
-                replace(state, LfcOffloadState::Offloading),
-                LfcOffloadState::Offloading
-            ) {
+            if replace(state, LfcOffloadState::Offloading) == LfcOffloadState::Offloading {
                return;
            }
        }
@@ -215,23 +200,23 @@ impl ComputeNode {

    async fn offload_lfc_with_state_update(&self) {
        crate::metrics::LFC_OFFLOADS.inc();
-        let state = match self.offload_lfc_impl().await {
-            Ok(state) => state,
-            Err(err) => {
-                crate::metrics::LFC_OFFLOAD_ERRORS.inc();
-                error!(%err, "could not offload LFC");
-                let error = format!("{err:#}");
-                LfcOffloadState::Failed { error }
-            }
+
+        let Err(err) = self.offload_lfc_impl().await else {
+            self.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Completed;
+            return;
+        };
+
+        crate::metrics::LFC_OFFLOAD_ERRORS.inc();
+        error!(%err, "could not offload LFC state to endpoint storage");
+        self.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Failed {
+            error: format!("{err:#}"),
        };
-        self.state.lock().unwrap().lfc_offload_state = state;
    }

-    async fn offload_lfc_impl(&self) -> Result<LfcOffloadState> {
+    async fn offload_lfc_impl(&self) -> Result<()> {
        let EndpointStoragePair { url, token } = self.endpoint_storage_pair(None)?;
        info!(%url, "requesting LFC state from Postgres");

-        let mut now = Instant::now();
        let row = ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
            .await
            .context("connecting to postgres")?
@@ -243,41 +228,26 @@ impl ComputeNode {
            .context("deserializing LFC state")?;
        let Some(state) = state else {
            info!(%url, "empty LFC state, not exporting");
-            return Ok(LfcOffloadState::Skipped);
+            return Ok(());
        };
-        let state_query_time_ms = now.elapsed().as_millis() as u32;
-        now = Instant::now();

        let mut compressed = Vec::new();
        ZstdEncoder::new(state)
            .read_to_end(&mut compressed)
            .await
            .context("compressing LFC state")?;
-        let compress_time_ms = now.elapsed().as_millis() as u32;
-        now = Instant::now();

        let compressed_len = compressed.len();
-        info!(%url, "downloaded LFC state, compressed size {compressed_len}");
+        info!(%url, "downloaded LFC state, compressed size {compressed_len}, writing to endpoint storage");

        let request = Client::new().put(url).bearer_auth(token).body(compressed);
-        let response = request
-            .send()
-            .await
-            .context("writing to endpoint storage")?;
-        let state_upload_time_ms = now.elapsed().as_millis() as u32;
-        let status = response.status();
-        if status != StatusCode::OK {
-            bail!("request to endpoint storage failed: {status}");
+        match request.send().await {
+            Ok(res) if res.status() == StatusCode::OK => Ok(()),
+            Ok(res) => bail!(
+                "Request to endpoint storage failed with status: {}",
+                res.status()
+            ),
+            Err(err) => Err(err).context("writing to endpoint storage"),
        }
-
-        Ok(LfcOffloadState::Completed {
-            compress_time_ms,
-            state_query_time_ms,
-            state_upload_time_ms,
-        })
-    }
-
-    pub fn cancel_prewarm(self: &Arc<Self>) {
-        self.state.lock().unwrap().lfc_prewarm_token.cancel();
    }
 }
--- a/compute_tools/src/compute_promote.rs
+++ b/compute_tools/src/compute_promote.rs
@@ -1,24 +1,32 @@
 use crate::compute::ComputeNode;
-use anyhow::{Context, bail};
+use anyhow::{Context, Result, bail};
 use compute_api::responses::{LfcPrewarmState, PromoteConfig, PromoteState};
-use std::time::Instant;
+use compute_api::spec::ComputeMode;
+use itertools::Itertools;
+use std::collections::HashMap;
+use std::{sync::Arc, time::Duration};
+use tokio::time::sleep;
 use tracing::info;
+use utils::lsn::Lsn;

 impl ComputeNode {
-    /// Returns only when promote fails or succeeds. If http client calling this function
-    /// disconnects, this does not stop promotion, and subsequent calls block until promote finishes.
+    /// Returns only when promote fails or succeeds. If a network error occurs
+    /// and http client disconnects, this does not stop promotion, and subsequent
+    /// calls block until promote finishes.
    /// Called by control plane on secondary after primary endpoint is terminated
    /// Has a failpoint "compute-promotion"
-    pub async fn promote(self: &std::sync::Arc<Self>, cfg: PromoteConfig) -> PromoteState {
-        let this = self.clone();
-        let promote_fn = async move || match this.promote_impl(cfg).await {
-            Ok(state) => state,
-            Err(err) => {
-                tracing::error!(%err, "promoting replica");
-                let error = format!("{err:#}");
-                PromoteState::Failed { error }
+    pub async fn promote(self: &Arc<Self>, cfg: PromoteConfig) -> PromoteState {
+        let cloned = self.clone();
+        let promote_fn = async move || {
+            let Err(err) = cloned.promote_impl(cfg).await else {
+                return PromoteState::Completed;
+            };
+            tracing::error!(%err, "promoting");
+            PromoteState::Failed {
+                error: format!("{err:#}"),
            }
        };
+
        let start_promotion = || {
            let (tx, rx) = tokio::sync::watch::channel(PromoteState::NotPromoted);
            tokio::spawn(async move { tx.send(promote_fn().await) });
@@ -26,31 +34,36 @@ impl ComputeNode {
        };

        let mut task;
-        // promote_impl locks self.state so we need to unlock it before calling task.changed()
+        // self.state is unlocked after block ends so we lock it in promote_impl
+        // and task.changed() is reached
        {
-            let promote_state = &mut self.state.lock().unwrap().promote_state;
-            task = promote_state.get_or_insert_with(start_promotion).clone()
-        }
-        if task.changed().await.is_err() {
-            let error = "promote sender dropped".to_string();
-            return PromoteState::Failed { error };
+            task = self
+                .state
+                .lock()
+                .unwrap()
+                .promote_state
+                .get_or_insert_with(start_promotion)
+                .clone()
        }
+        task.changed().await.expect("promote sender dropped");
        task.borrow().clone()
    }

-    async fn promote_impl(&self, cfg: PromoteConfig) -> anyhow::Result<PromoteState> {
+    async fn promote_impl(&self, mut cfg: PromoteConfig) -> Result<()> {
        {
            let state = self.state.lock().unwrap();
            let mode = &state.pspec.as_ref().unwrap().spec.mode;
-            if *mode != compute_api::spec::ComputeMode::Replica {
-                bail!("compute mode \"{}\" is not replica", mode.to_type_str());
+            if *mode != ComputeMode::Replica {
+                bail!("{} is not replica", mode.to_type_str());
            }
+
+            // we don't need to query Postgres so not self.lfc_prewarm_state()
            match &state.lfc_prewarm_state {
-                status @ (LfcPrewarmState::NotPrewarmed | LfcPrewarmState::Prewarming) => {
-                    bail!("compute {status}")
+                LfcPrewarmState::NotPrewarmed | LfcPrewarmState::Prewarming => {
+                    bail!("prewarm not requested or pending")
                }
                LfcPrewarmState::Failed { error } => {
-                    tracing::warn!(%error, "compute prewarm failed")
+                    tracing::warn!(%error, "replica prewarm failed")
                }
                _ => {}
            }
@@ -59,29 +72,26 @@ impl ComputeNode {
        let client = ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
            .await
            .context("connecting to postgres")?;
-        let mut now = Instant::now();

        let primary_lsn = cfg.wal_flush_lsn;
-        let mut standby_lsn = utils::lsn::Lsn::INVALID;
+        let mut last_wal_replay_lsn: Lsn = Lsn::INVALID;
        const RETRIES: i32 = 20;
        for i in 0..=RETRIES {
            let row = client
-                .query_one("SELECT pg_catalog.pg_last_wal_replay_lsn()", &[])
+                .query_one("SELECT pg_last_wal_replay_lsn()", &[])
                .await
                .context("getting last replay lsn")?;
            let lsn: u64 = row.get::<usize, postgres_types::PgLsn>(0).into();
-            standby_lsn = lsn.into();
-            if standby_lsn >= primary_lsn {
+            last_wal_replay_lsn = lsn.into();
+            if last_wal_replay_lsn >= primary_lsn {
                break;
            }
-            info!(%standby_lsn, %primary_lsn, "catching up, try {i}");
-            tokio::time::sleep(std::time::Duration::from_secs(1)).await;
+            info!("Try {i}, replica lsn {last_wal_replay_lsn}, primary lsn {primary_lsn}");
+            sleep(Duration::from_secs(1)).await;
        }
-        if standby_lsn < primary_lsn {
+        if last_wal_replay_lsn < primary_lsn {
            bail!("didn't catch up with primary in {RETRIES} retries");
        }
-        let lsn_wait_time_ms = now.elapsed().as_millis() as u32;
-        now = Instant::now();

        // using $1 doesn't work with ALTER SYSTEM SET
        let safekeepers_sql = format!(
@@ -93,32 +103,26 @@ impl ComputeNode {
            .await
            .context("setting safekeepers")?;
        client
-            .query(
-                "ALTER SYSTEM SET synchronous_standby_names=walproposer",
-                &[],
-            )
-            .await
-            .context("setting synchronous_standby_names")?;
-        client
-            .query("SELECT pg_catalog.pg_reload_conf()", &[])
+            .query("SELECT pg_reload_conf()", &[])
            .await
            .context("reloading postgres config")?;

        #[cfg(feature = "testing")]
-        fail::fail_point!("compute-promotion", |_| bail!(
-            "compute-promotion failpoint"
-        ));
+        fail::fail_point!("compute-promotion", |_| {
+            bail!("promotion configured to fail because of a failpoint")
+        });

        let row = client
-            .query_one("SELECT * FROM pg_catalog.pg_promote()", &[])
+            .query_one("SELECT * FROM pg_promote()", &[])
            .await
            .context("pg_promote")?;
        if !row.get::<usize, bool>(0) {
-            bail!("pg_promote() failed");
+            bail!("pg_promote() returned false");
        }
-        let pg_promote_time_ms = now.elapsed().as_millis() as u32;
-        let now = Instant::now();

+        let client = ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
+            .await
+            .context("connecting to postgres")?;
        let row = client
            .query_one("SHOW transaction_read_only", &[])
            .await
@@ -127,47 +131,36 @@ impl ComputeNode {
            bail!("replica in read only mode after promotion");
        }

-        // Already checked validity in http handler
-        #[allow(unused_mut)]
-        let mut new_pspec = crate::compute::ParsedSpec::try_from(cfg.spec).expect("invalid spec");
        {
            let mut state = self.state.lock().unwrap();
-
-            // Local setup has different ports for pg process (port=) for primary and secondary.
-            // Primary is stopped so we need secondary's "port" value
-            #[cfg(feature = "testing")]
-            {
-                let old_spec = &state.pspec.as_ref().unwrap().spec;
-                let Some(old_conf) = old_spec.cluster.postgresql_conf.as_ref() else {
-                    bail!("pspec.spec.cluster.postgresql_conf missing for endpoint");
-                };
-                let set: std::collections::HashMap<&str, &str> = old_conf
-                    .split_terminator('\n')
-                    .map(|e| e.split_once("=").expect("invalid item"))
-                    .collect();
-
-                let Some(new_conf) = new_pspec.spec.cluster.postgresql_conf.as_mut() else {
-                    bail!("pspec.spec.cluster.postgresql_conf missing for supplied config");
-                };
-                new_conf.push_str(&format!("port={}\n", set["port"]));
-            }
-
-            tracing::debug!("applied spec: {:#?}", new_pspec.spec);
-            if self.params.lakebase_mode {
-                ComputeNode::set_spec(&self.params, &mut state, new_pspec);
-            } else {
-                state.pspec = Some(new_pspec);
-            }
+            let spec = &mut state.pspec.as_mut().unwrap().spec;
+            spec.mode = ComputeMode::Primary;
+            let new_conf = cfg.spec.cluster.postgresql_conf.as_mut().unwrap();
+            let existing_conf = spec.cluster.postgresql_conf.as_ref().unwrap();
+            Self::merge_spec(new_conf, existing_conf);
        }
-
        info!("applied new spec, reconfiguring as primary");
-        self.reconfigure()?;
-        let reconfigure_time_ms = now.elapsed().as_millis() as u32;
+        self.reconfigure()
+    }

-        Ok(PromoteState::Completed {
-            lsn_wait_time_ms,
-            pg_promote_time_ms,
-            reconfigure_time_ms,
-        })
+    /// Merge old and new Postgres conf specs to apply on secondary.
+    /// Change new spec's port and safekeepers since they are supplied
+    /// differenly
+    fn merge_spec(new_conf: &mut String, existing_conf: &str) {
+        let mut new_conf_set: HashMap<&str, &str> = new_conf
+            .split_terminator('\n')
+            .map(|e| e.split_once("=").expect("invalid item"))
+            .collect();
+        new_conf_set.remove("neon.safekeepers");
+
+        let existing_conf_set: HashMap<&str, &str> = existing_conf
+            .split_terminator('\n')
+            .map(|e| e.split_once("=").expect("invalid item"))
+            .collect();
+        new_conf_set.insert("port", existing_conf_set["port"]);
+        *new_conf = new_conf_set
+            .iter()
+            .map(|(k, v)| format!("{k}={v}"))
+            .join("\n");
    }
 }
--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -16,7 +16,7 @@ use crate::pg_helpers::{
    DatabricksSettingsExt as _, GenericOptionExt, GenericOptionsSearch, PgOptionsSerialize,
    escape_conf_value,
 };
-use crate::tls::{self, SERVER_CRT, SERVER_KEY};
+use crate::tls::{SERVER_CRT, SERVER_KEY};

 use utils::shard::{ShardIndex, ShardNumber};

@@ -65,19 +65,14 @@ pub fn write_postgres_conf(
        writeln!(file, "{conf}")?;
    }

+    // Stripe size GUC should be defined prior to connection string
+    if let Some(stripe_size) = spec.shard_stripe_size {
+        writeln!(file, "neon.stripe_size={stripe_size}")?;
+    }
    // Add options for connecting to storage
    writeln!(file, "# Neon storage settings")?;
    writeln!(file)?;
    if let Some(conninfo) = &spec.pageserver_connection_info {
-        // Stripe size GUC should be defined prior to connection string
-        if let Some(stripe_size) = conninfo.stripe_size {
-            writeln!(
-                file,
-                "# from compute spec's pageserver_connection_info.stripe_size field"
-            )?;
-            writeln!(file, "neon.stripe_size={stripe_size}")?;
-        }
-
        let mut libpq_urls: Option<Vec<String>> = Some(Vec::new());
        let num_shards = if conninfo.shard_count.0 == 0 {
            1 // unsharded, treat it as a single shard
@@ -115,7 +110,7 @@ pub fn write_postgres_conf(
        if let Some(libpq_urls) = libpq_urls {
            writeln!(
                file,
-                "# derived from compute spec's pageserver_connection_info field"
+                "# derived from compute spec's pageserver_conninfo field"
            )?;
            writeln!(
                file,
@@ -125,16 +120,24 @@ pub fn write_postgres_conf(
        } else {
            writeln!(file, "# no neon.pageserver_connstring")?;
        }
-    } else {
-        // Stripe size GUC should be defined prior to connection string
-        if let Some(stripe_size) = spec.shard_stripe_size {
-            writeln!(file, "# from compute spec's shard_stripe_size field")?;
+
+        if let Some(stripe_size) = conninfo.stripe_size {
+            writeln!(
+                file,
+                "# from compute spec's pageserver_conninfo.stripe_size field"
+            )?;
            writeln!(file, "neon.stripe_size={stripe_size}")?;
        }
+    } else {
        if let Some(s) = &spec.pageserver_connstring {
            writeln!(file, "# from compute spec's pageserver_connstring field")?;
            writeln!(file, "neon.pageserver_connstring={}", escape_conf_value(s))?;
        }
+
+        if let Some(stripe_size) = spec.shard_stripe_size {
+            writeln!(file, "# from compute spec's shard_stripe_size field")?;
+            writeln!(file, "neon.stripe_size={stripe_size}")?;
+        }
    }

    if !spec.safekeeper_connstrings.is_empty() {
@@ -175,14 +178,9 @@ pub fn write_postgres_conf(
    }

    // tls
-    if let Some(tls_config) = tls_config {
+    if tls_config.is_some() {
        writeln!(file, "ssl = on")?;

-        // postgres requires the keyfile to be in a secure file,
-        // currently too complicated to ensure that at the VM level,
-        // so we just copy them to another file instead. :shrug:
-        tls::update_key_path_blocking(pgdata_path, tls_config);
-
        // these are the default, but good to be explicit.
        writeln!(file, "ssl_cert_file = '{SERVER_CRT}'")?;
        writeln!(file, "ssl_key_file = '{SERVER_KEY}'")?;
--- a/compute_tools/src/http/openapi_spec.yaml
+++ b/compute_tools/src/http/openapi_spec.yaml
@@ -139,15 +139,6 @@ paths:
            application/json:
              schema:
                $ref: "#/components/schemas/LfcPrewarmState"
-    delete:
-      tags:
-        - Prewarm
-      summary: Cancel ongoing LFC prewarm
-      description: ""
-      operationId: cancelLfcPrewarm
-      responses:
-        202:
-          description: Prewarm cancelled

  /lfc/offload:
    post:
@@ -617,6 +608,9 @@ components:
      type: object
      required:
        - status
+        - total
+        - prewarmed
+        - skipped
      properties:
        status:
          description: LFC prewarm status
@@ -634,15 +628,6 @@ components:
        skipped:
          description: Pages processed but not prewarmed
          type: integer
-        state_download_time_ms:
-          description: Time it takes to download LFC state to compute
-          type: integer
-        uncompress_time_ms:
-          description: Time it takes to uncompress LFC state
-          type: integer
-        prewarm_time_ms:
-          description: Time it takes to prewarm LFC state in Postgres
-          type: integer

    LfcOffloadState:
      type: object
@@ -651,21 +636,11 @@ components:
      properties:
        status:
          description: LFC offload status
-          enum: [not_offloaded, offloading, completed, skipped, failed]
+          enum: [not_offloaded, offloading, completed, failed]
          type: string
        error:
          description: LFC offload error, if any
          type: string
-        state_query_time_ms:
-          description: Time it takes to get LFC state from Postgres
-          type: integer
-        compress_time_ms:
-          description: Time it takes to compress LFC state
-          type: integer
-        state_upload_time_ms:
-          description: Time it takes to upload LFC state to endpoint storage
-          type: integer
-

    PromoteState:
      type: object
@@ -679,15 +654,6 @@ components:
        error:
          description: Promote error, if any
          type: string
-        lsn_wait_time_ms:
-          description: Time it takes for secondary to catch up with primary WAL flush LSN
-          type: integer
-        pg_promote_time_ms:
-          description: Time it takes to call pg_promote on secondary
-          type: integer
-        reconfigure_time_ms:
-          description: Time it takes to reconfigure promoted secondary
-          type: integer

    SetRoleGrantsRequest:
      type: object
--- a/compute_tools/src/http/routes/check_writability.rs
+++ b/compute_tools/src/http/routes/check_writability.rs
@@ -12,8 +12,10 @@ use crate::http::JsonResponse;
 /// Check that the compute is currently running.
 pub(in crate::http) async fn is_writable(State(compute): State<Arc<ComputeNode>>) -> Response {
    let status = compute.get_status();
-    if status != ComputeStatus::Running {
-        return JsonResponse::invalid_status(status);
+    match status {
+        // If we are running, or just reloading the config, we are ok to write a new config.
+        ComputeStatus::Running | ComputeStatus::Reloading => {}
+        _ => return JsonResponse::invalid_status(status),
    }

    match check_writability(&compute).await {
--- a/compute_tools/src/http/routes/configure.rs
+++ b/compute_tools/src/http/routes/configure.rs
@@ -27,32 +27,6 @@ pub(in crate::http) async fn configure(
        Err(e) => return JsonResponse::error(StatusCode::BAD_REQUEST, e),
    };

-    // XXX: wrap state update under lock in a code block. Otherwise, we will try
-    // to `Send` `mut state` into the spawned thread bellow, which will cause
-    // the following rustc error:
-    //
-    // error: future cannot be sent between threads safely
-    {
-        let mut state = compute.state.lock().unwrap();
-        if !matches!(state.status, ComputeStatus::Empty | ComputeStatus::Running) {
-            return JsonResponse::invalid_status(state.status);
-        }
-
-        // Pass the tracing span to the main thread that performs the startup,
-        // so that the start_compute operation is considered a child of this
-        // configure request for tracing purposes.
-        state.startup_span = Some(tracing::Span::current());
-
-        if compute.params.lakebase_mode {
-            ComputeNode::set_spec(&compute.params, &mut state, pspec);
-        } else {
-            state.pspec = Some(pspec);
-        }
-
-        state.set_status(ComputeStatus::ConfigurationPending, &compute.state_changed);
-        drop(state);
-    }
-
    // Spawn a blocking thread to wait for compute to become Running. This is
    // needed to not block the main pool of workers and to be able to serve
    // other requests while some particular request is waiting for compute to
@@ -60,6 +34,32 @@ pub(in crate::http) async fn configure(
    let c = compute.clone();
    let completed = task::spawn_blocking(move || {
        let mut state = c.state.lock().unwrap();
+        loop {
+            match state.status {
+                // ideal state.
+                ComputeStatus::Empty | ComputeStatus::Running => break,
+                // we need to wait until reloaded
+                ComputeStatus::Reloading => {
+                    state = c.state_changed.wait(state).unwrap();
+                }
+                // All other cases are unexpected.
+                _ => return Err(JsonResponse::invalid_status(state.status)),
+            }
+        }
+
+        // Pass the tracing span to the main thread that performs the startup,
+        // so that the start_compute operation is considered a child of this
+        // configure request for tracing purposes.
+        state.startup_span = Some(tracing::Span::current());
+
+        if c.params.lakebase_mode {
+            ComputeNode::set_spec(&c.params, &mut state, pspec);
+        } else {
+            state.pspec = Some(pspec);
+        }
+
+        state.set_status(ComputeStatus::ConfigurationPending, &c.state_changed);
+
        while state.status != ComputeStatus::Running {
            state = c.state_changed.wait(state).unwrap();
            info!(
@@ -71,7 +71,7 @@ pub(in crate::http) async fn configure(
            if state.status == ComputeStatus::Failed {
                let err = state.error.as_ref().map_or("unknown error", |x| x);
                let msg = format!("compute configuration failed: {err:?}");
-                return Err(msg);
+                return Err(JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, msg));
            }
        }

@@ -81,7 +81,7 @@ pub(in crate::http) async fn configure(
    .unwrap();

    if let Err(e) = completed {
-        return JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e);
+        return e;
    }

    // Return current compute state if everything went well.
--- a/compute_tools/src/http/routes/lfc.rs
+++ b/compute_tools/src/http/routes/lfc.rs
@@ -1,11 +1,12 @@
+use crate::compute_prewarm::LfcPrewarmStateWithProgress;
 use crate::http::JsonResponse;
 use axum::response::{IntoResponse, Response};
 use axum::{Json, http::StatusCode};
 use axum_extra::extract::OptionalQuery;
-use compute_api::responses::{LfcOffloadState, LfcPrewarmState};
+use compute_api::responses::LfcOffloadState;
 type Compute = axum::extract::State<std::sync::Arc<crate::compute::ComputeNode>>;

-pub(in crate::http) async fn prewarm_state(compute: Compute) -> Json<LfcPrewarmState> {
+pub(in crate::http) async fn prewarm_state(compute: Compute) -> Json<LfcPrewarmStateWithProgress> {
    Json(compute.lfc_prewarm_state().await)
 }

@@ -45,8 +46,3 @@ pub(in crate::http) async fn offload(compute: Compute) -> Response {
        )
    }
 }
-
-pub(in crate::http) async fn cancel_prewarm(compute: Compute) -> StatusCode {
-    compute.cancel_prewarm();
-    StatusCode::ACCEPTED
-}
--- a/compute_tools/src/http/routes/promote.rs
+++ b/compute_tools/src/http/routes/promote.rs
@@ -1,22 +1,11 @@
 use crate::http::JsonResponse;
 use axum::extract::Json;
-use compute_api::responses::PromoteConfig;
 use http::StatusCode;

 pub(in crate::http) async fn promote(
    compute: axum::extract::State<std::sync::Arc<crate::compute::ComputeNode>>,
-    Json(cfg): Json<PromoteConfig>,
+    Json(cfg): Json<compute_api::responses::PromoteConfig>,
 ) -> axum::response::Response {
-    // Return early at the cost of extra parsing spec
-    let pspec = match crate::compute::ParsedSpec::try_from(cfg.spec) {
-        Ok(p) => p,
-        Err(e) => return JsonResponse::error(StatusCode::BAD_REQUEST, e),
-    };
-
-    let cfg = PromoteConfig {
-        spec: pspec.spec,
-        wal_flush_lsn: cfg.wal_flush_lsn,
-    };
    let state = compute.promote(cfg).await;
    if let compute_api::responses::PromoteState::Failed { error: _ } = state {
        return JsonResponse::create_response(StatusCode::INTERNAL_SERVER_ERROR, state);
--- a/compute_tools/src/http/server.rs
+++ b/compute_tools/src/http/server.rs
@@ -99,12 +99,7 @@ impl From<&Server> for Router<Arc<ComputeNode>> {
                    );

                let authenticated_router = Router::<Arc<ComputeNode>>::new()
-                    .route(
-                        "/lfc/prewarm",
-                        get(lfc::prewarm_state)
-                            .post(lfc::prewarm)
-                            .delete(lfc::cancel_prewarm),
-                    )
+                    .route("/lfc/prewarm", get(lfc::prewarm_state).post(lfc::prewarm))
                    .route("/lfc/offload", get(lfc::offload_state).post(lfc::offload))
                    .route("/promote", post(promote::promote))
                    .route("/check_writability", post(check_writability::is_writable))
--- a/compute_tools/src/installed_extensions.rs
+++ b/compute_tools/src/installed_extensions.rs
@@ -19,7 +19,7 @@ async fn list_dbs(client: &mut Client) -> Result<Vec<String>, PostgresError> {
        .query(
            "SELECT datname FROM pg_catalog.pg_database
                WHERE datallowconn
-                AND datconnlimit OPERATOR(pg_catalog.<>) (OPERATOR(pg_catalog.-) 2::pg_catalog.int4)
+                AND datconnlimit <> - 2
                LIMIT 500",
            &[],
        )
@@ -67,7 +67,7 @@ pub async fn get_installed_extensions(

        let extensions: Vec<(String, String, i32)> = client
            .query(
-                "SELECT extname, extversion, extowner::pg_catalog.int4 FROM pg_catalog.pg_extension",
+                "SELECT extname, extversion, extowner::integer FROM pg_catalog.pg_extension",
                &[],
            )
            .await?
--- a/compute_tools/src/local_proxy.rs
+++ b/compute_tools/src/local_proxy.rs
@@ -11,9 +11,11 @@ use utils::pid_file::{self, PidFileRead};

 pub fn configure(local_proxy: &LocalProxySpec) -> Result<()> {
    write_local_proxy_conf("/etc/local_proxy/config.json".as_ref(), local_proxy)?;
-    notify_local_proxy("/etc/local_proxy/pid".as_ref())?;
+    reload()
+}

-    Ok(())
+pub fn reload() -> Result<()> {
+    notify_local_proxy("/etc/local_proxy/pid".as_ref())
 }

 /// Create or completely rewrite configuration file specified by `path`
--- a/compute_tools/src/migration.rs
+++ b/compute_tools/src/migration.rs
@@ -76,7 +76,7 @@ impl<'m> MigrationRunner<'m> {
        self.client
            .simple_query("CREATE SCHEMA IF NOT EXISTS neon_migration")
            .await?;
-        self.client.simple_query("CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key pg_catalog.int4 NOT NULL PRIMARY KEY, id pg_catalog.int8 NOT NULL DEFAULT 0)").await?;
+        self.client.simple_query("CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)").await?;
        self.client
            .simple_query(
                "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING",
--- a/compute_tools/src/migrations/0002-alter_roles.sql
+++ b/compute_tools/src/migrations/0002-alter_roles.sql
@@ -15,17 +15,17 @@ DO $$
 DECLARE
    role_name text;
 BEGIN
-    FOR role_name IN SELECT rolname FROM pg_catalog.pg_roles WHERE pg_catalog.pg_has_role(rolname, '{privileged_role_name}', 'member')
+    FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, '{privileged_role_name}', 'member')
    LOOP
-        RAISE NOTICE 'EXECUTING ALTER ROLE % INHERIT', pg_catalog.quote_ident(role_name);
-        EXECUTE pg_catalog.format('ALTER ROLE %I INHERIT;', role_name);
+        RAISE NOTICE 'EXECUTING ALTER ROLE % INHERIT', quote_ident(role_name);
+        EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' INHERIT';
    END LOOP;

-    FOR role_name IN SELECT rolname FROM pg_catalog.pg_roles
+    FOR role_name IN SELECT rolname FROM pg_roles
        WHERE
-            NOT pg_catalog.pg_has_role(rolname, '{privileged_role_name}', 'member') AND NOT pg_catalog.starts_with(rolname, 'pg_')
+            NOT pg_has_role(rolname, '{privileged_role_name}', 'member') AND NOT starts_with(rolname, 'pg_')
    LOOP
-        RAISE NOTICE 'EXECUTING ALTER ROLE % NOBYPASSRLS', pg_catalog.quote_ident(role_name);
-        EXECUTE pg_catalog.format('ALTER ROLE %I NOBYPASSRLS;', role_name);
+        RAISE NOTICE 'EXECUTING ALTER ROLE % NOBYPASSRLS', quote_ident(role_name);
+        EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOBYPASSRLS';
    END LOOP;
 END $$;
--- a/compute_tools/src/migrations/0003-grant_pg_create_subscription_to_privileged_role.sql
+++ b/compute_tools/src/migrations/0003-grant_pg_create_subscription_to_privileged_role.sql
@@ -1,6 +1,6 @@
 DO $$
 BEGIN
-    IF (SELECT setting::pg_catalog.numeric >= 160000 FROM pg_catalog.pg_settings WHERE name = 'server_version_num') THEN
+    IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
        EXECUTE 'GRANT pg_create_subscription TO {privileged_role_name}';
    END IF;
 END $$;
--- a/compute_tools/src/migrations/0009-revoke_replication_for_previously_allowed_roles.sql
+++ b/compute_tools/src/migrations/0009-revoke_replication_for_previously_allowed_roles.sql
@@ -5,9 +5,9 @@ DO $$
 DECLARE
    role_name TEXT;
 BEGIN
-    FOR role_name IN SELECT rolname FROM pg_catalog.pg_roles WHERE rolreplication IS TRUE
+    FOR role_name IN SELECT rolname FROM pg_roles WHERE rolreplication IS TRUE
    LOOP
-        RAISE NOTICE 'EXECUTING ALTER ROLE % NOREPLICATION', pg_catalog.quote_ident(role_name);
-        EXECUTE pg_catalog.format('ALTER ROLE %I NOREPLICATION;', role_name);
+        RAISE NOTICE 'EXECUTING ALTER ROLE % NOREPLICATION', quote_ident(role_name);
+        EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOREPLICATION';
    END LOOP;
 END $$;
--- a/compute_tools/src/migrations/0010-grant_snapshot_synchronization_funcs_to_privileged_role.sql
+++ b/compute_tools/src/migrations/0010-grant_snapshot_synchronization_funcs_to_privileged_role.sql
@@ -1,6 +1,6 @@
 DO $$
 BEGIN
-    IF (SELECT setting::pg_catalog.numeric >= 160000 FROM pg_catalog.pg_settings WHERE name OPERATOR(pg_catalog.=) 'server_version_num'::pg_catalog.text) THEN
+    IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
       EXECUTE 'GRANT EXECUTE ON FUNCTION pg_export_snapshot TO {privileged_role_name}';
       EXECUTE 'GRANT EXECUTE ON FUNCTION pg_log_standby_snapshot TO {privileged_role_name}';
    END IF;
--- a/compute_tools/src/migrations/tests/0001-add_bypass_rls_to_privileged_role.sql
+++ b/compute_tools/src/migrations/tests/0001-add_bypass_rls_to_privileged_role.sql
@@ -2,7 +2,7 @@ DO $$
 DECLARE
    bypassrls boolean;
 BEGIN
-    SELECT rolbypassrls INTO bypassrls FROM pg_catalog.pg_roles WHERE rolname = 'neon_superuser';
+    SELECT rolbypassrls INTO bypassrls FROM pg_roles WHERE rolname = 'neon_superuser';
    IF NOT bypassrls THEN
        RAISE EXCEPTION 'neon_superuser cannot bypass RLS';
    END IF;
--- a/compute_tools/src/migrations/tests/0002-alter_roles.sql
+++ b/compute_tools/src/migrations/tests/0002-alter_roles.sql
@@ -4,8 +4,8 @@ DECLARE
 BEGIN
    FOR role IN
        SELECT rolname AS name, rolinherit AS inherit
-        FROM pg_catalog.pg_roles
-        WHERE pg_catalog.pg_has_role(rolname, 'neon_superuser', 'member')
+        FROM pg_roles
+        WHERE pg_has_role(rolname, 'neon_superuser', 'member')
    LOOP
        IF NOT role.inherit THEN
            RAISE EXCEPTION '% cannot inherit', quote_ident(role.name);
@@ -14,12 +14,12 @@ BEGIN

    FOR role IN
        SELECT rolname AS name, rolbypassrls AS bypassrls
-        FROM pg_catalog.pg_roles
-        WHERE NOT pg_catalog.pg_has_role(rolname, 'neon_superuser', 'member')
-            AND NOT pg_catalog.starts_with(rolname, 'pg_')
+        FROM pg_roles
+        WHERE NOT pg_has_role(rolname, 'neon_superuser', 'member')
+            AND NOT starts_with(rolname, 'pg_')
    LOOP
        IF role.bypassrls THEN
-            RAISE EXCEPTION  '% can bypass RLS', pg_catalog.quote_ident(role.name);
+            RAISE EXCEPTION  '% can bypass RLS', quote_ident(role.name);
        END IF;
    END LOOP;
 END $$;
--- a/compute_tools/src/migrations/tests/0003-grant_pg_create_subscription_to_privileged_role.sql
+++ b/compute_tools/src/migrations/tests/0003-grant_pg_create_subscription_to_privileged_role.sql
@@ -1,10 +1,10 @@
 DO $$
 BEGIN
-    IF (SELECT pg_catalog.current_setting('server_version_num')::pg_catalog.numeric < 160000) THEN
+    IF (SELECT current_setting('server_version_num')::numeric < 160000) THEN
        RETURN;
    END IF;

-    IF NOT (SELECT pg_catalog.pg_has_role('neon_superuser', 'pg_create_subscription', 'member')) THEN
+    IF NOT (SELECT pg_has_role('neon_superuser', 'pg_create_subscription', 'member')) THEN
        RAISE EXCEPTION 'neon_superuser cannot execute pg_create_subscription';
    END IF;
 END $$;
--- a/compute_tools/src/migrations/tests/0004-grant_pg_monitor_to_privileged_role.sql
+++ b/compute_tools/src/migrations/tests/0004-grant_pg_monitor_to_privileged_role.sql
@@ -2,12 +2,12 @@ DO $$
 DECLARE
    monitor record;
 BEGIN
-    SELECT pg_catalog.pg_has_role('neon_superuser', 'pg_monitor', 'member') AS member,
+    SELECT pg_has_role('neon_superuser', 'pg_monitor', 'member') AS member,
            admin_option AS admin
        INTO monitor
-        FROM pg_catalog.pg_auth_members
-        WHERE roleid = 'pg_monitor'::pg_catalog.regrole
-            AND member = 'neon_superuser'::pg_catalog.regrole;
+        FROM pg_auth_members
+        WHERE roleid = 'pg_monitor'::regrole
+            AND member = 'neon_superuser'::regrole;

    IF monitor IS NULL THEN
        RAISE EXCEPTION 'no entry in pg_auth_members for neon_superuser and pg_monitor';
--- a/compute_tools/src/migrations/tests/0010-grant_snapshot_synchronization_funcs_to_privileged_role.sql
+++ b/compute_tools/src/migrations/tests/0010-grant_snapshot_synchronization_funcs_to_privileged_role.sql
@@ -2,11 +2,11 @@ DO $$
 DECLARE
    can_execute boolean;
 BEGIN
-    SELECT pg_catalog.bool_and(pg_catalog.has_function_privilege('neon_superuser', oid, 'execute'))
+    SELECT bool_and(has_function_privilege('neon_superuser', oid, 'execute'))
       INTO can_execute
-       FROM pg_catalog.pg_proc
+       FROM pg_proc
       WHERE proname IN ('pg_export_snapshot', 'pg_log_standby_snapshot')
-           AND pronamespace = 'pg_catalog'::pg_catalog.regnamespace;
+           AND pronamespace = 'pg_catalog'::regnamespace;
    IF NOT can_execute THEN
        RAISE EXCEPTION 'neon_superuser cannot execute both pg_export_snapshot and pg_log_standby_snapshot';
    END IF;
--- a/compute_tools/src/migrations/tests/0011-grant_pg_show_replication_origin_status_to_privileged_role.sql
+++ b/compute_tools/src/migrations/tests/0011-grant_pg_show_replication_origin_status_to_privileged_role.sql
@@ -2,9 +2,9 @@ DO $$
 DECLARE
    can_execute boolean;
 BEGIN
-    SELECT pg_catalog.has_function_privilege('neon_superuser', oid, 'execute')
+    SELECT has_function_privilege('neon_superuser', oid, 'execute')
       INTO can_execute
-       FROM pg_catalog.pg_proc
+       FROM pg_proc
       WHERE proname = 'pg_show_replication_origin_status'
           AND pronamespace = 'pg_catalog'::regnamespace;
    IF NOT can_execute THEN
--- a/compute_tools/src/migrations/tests/0012-grant_pg_signal_backend_to_privileged_role.sql
+++ b/compute_tools/src/migrations/tests/0012-grant_pg_signal_backend_to_privileged_role.sql
@@ -2,10 +2,10 @@ DO $$
 DECLARE
    signal_backend record;
 BEGIN
-    SELECT pg_catalog.pg_has_role('neon_superuser', 'pg_signal_backend', 'member') AS member,
+    SELECT pg_has_role('neon_superuser', 'pg_signal_backend', 'member') AS member,
            admin_option AS admin
        INTO signal_backend
-        FROM pg_catalog.pg_auth_members
+        FROM pg_auth_members
        WHERE roleid = 'pg_signal_backend'::regrole
            AND member = 'neon_superuser'::regrole;

--- a/compute_tools/src/monitor.rs
+++ b/compute_tools/src/monitor.rs
@@ -407,9 +407,9 @@ fn get_database_stats(cli: &mut Client) -> anyhow::Result<(f64, i64)> {
    // like `postgres_exporter` use it to query Postgres statistics.
    // Use explicit 8 bytes type casts to match Rust types.
    let stats = cli.query_one(
-        "SELECT pg_catalog.coalesce(pg_catalog.sum(active_time), 0.0)::pg_catalog.float8 AS total_active_time,
-            pg_catalog.coalesce(pg_catalog.sum(sessions), 0)::pg_catalog.bigint AS total_sessions
-        FROM pg_catalog.pg_stat_database
+        "SELECT coalesce(sum(active_time), 0.0)::float8 AS total_active_time,
+            coalesce(sum(sessions), 0)::bigint AS total_sessions
+        FROM pg_stat_database
        WHERE datname NOT IN (
                'postgres',
                'template0',
@@ -445,11 +445,11 @@ fn get_backends_state_change(cli: &mut Client) -> anyhow::Result<Option<DateTime
    let mut last_active: Option<DateTime<Utc>> = None;
    // Get all running client backends except ourself, use RFC3339 DateTime format.
    let backends = cli.query(
-        "SELECT state, pg_catalog.to_char(state_change, 'YYYY-MM-DD\"T\"HH24:MI:SS.US\"Z\"'::pg_catalog.text) AS state_change
+        "SELECT state, to_char(state_change, 'YYYY-MM-DD\"T\"HH24:MI:SS.US\"Z\"') AS state_change
                FROM pg_stat_activity
-                    WHERE backend_type OPERATOR(pg_catalog.=) 'client backend'::pg_catalog.text
-                    AND pid OPERATOR(pg_catalog.!=) pg_catalog.pg_backend_pid()
-                    AND usename OPERATOR(pg_catalog.!=) 'cloud_admin'::pg_catalog.name;", // XXX: find a better way to filter other monitors?
+                    WHERE backend_type = 'client backend'
+                    AND pid != pg_backend_pid()
+                    AND usename != 'cloud_admin';", // XXX: find a better way to filter other monitors?
        &[],
    );

--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -299,9 +299,9 @@ pub async fn get_existing_dbs_async(
        .query_raw::<str, &String, &[String; 0]>(
            "SELECT
                datname AS name,
-                (SELECT rolname FROM pg_catalog.pg_roles WHERE oid OPERATOR(pg_catalog.=) datdba) AS owner,
+                (SELECT rolname FROM pg_roles WHERE oid = datdba) AS owner,
                NOT datallowconn AS restrict_conn,
-                datconnlimit OPERATOR(pg_catalog.=) (OPERATOR(pg_catalog.-) 2) AS invalid
+                datconnlimit = - 2 AS invalid
            FROM
                pg_catalog.pg_database;",
            &[],
@@ -466,13 +466,7 @@ fn update_pgbouncer_ini(
    Ok(())
 }

-/// Tune pgbouncer.
-/// 1. Apply new config using pgbouncer admin console
-/// 2. Add new values to pgbouncer.ini to preserve them after restart
-pub async fn tune_pgbouncer(
-    mut pgbouncer_config: IndexMap<String, String>,
-    tls_config: Option<TlsConfig>,
-) -> Result<()> {
+async fn connect() -> Result<tokio_postgres::Client> {
    let pgbouncer_connstr = if std::env::var_os("AUTOSCALING").is_some() {
        // for VMs use pgbouncer specific way to connect to
        // pgbouncer admin console without password
@@ -518,18 +512,17 @@ pub async fn tune_pgbouncer(
        }
    };

-    if let Some(tls_config) = tls_config {
-        // pgbouncer starts in a half-ok state if it cannot find these files.
-        // It will default to client_tls_sslmode=deny, which causes proxy to error.
-        // There is a small window at startup where these files don't yet exist in the VM.
-        // Best to wait until it exists.
-        loop {
-            if let Ok(true) = tokio::fs::try_exists(&tls_config.key_path).await {
-                break;
-            }
-            tokio::time::sleep(Duration::from_millis(500)).await
-        }
+    Ok(client)
+}

+/// Tune pgbouncer.
+/// 1. Apply new config to pgbouncer.ini
+/// 2. Notify pgbouncer to reload
+pub async fn tune_pgbouncer(
+    mut pgbouncer_config: IndexMap<String, String>,
+    tls_config: Option<TlsConfig>,
+) -> Result<()> {
+    if let Some(tls_config) = tls_config {
        pgbouncer_config.insert("client_tls_cert_file".to_string(), tls_config.cert_path);
        pgbouncer_config.insert("client_tls_key_file".to_string(), tls_config.key_path);
        pgbouncer_config.insert("client_tls_sslmode".to_string(), "allow".to_string());
@@ -550,10 +543,17 @@ pub async fn tune_pgbouncer(

    info!("Applying pgbouncer setting change");

+    reload_pgbouncer().await
+}
+
+/// Reload pgbouncer.
+pub async fn reload_pgbouncer() -> Result<()> {
+    let client = connect().await?;
+
    if let Err(err) = client.simple_query("RELOAD").await {
        // Don't fail on error, just print it into log
-        error!("Failed to apply pgbouncer setting change,  {err}",);
-    };
+        error!("Failed to apply pgbouncer setting change: {err}",);
+    }

    Ok(())
 }
--- a/compute_tools/src/spec_apply.rs
+++ b/compute_tools/src/spec_apply.rs
@@ -82,7 +82,7 @@ impl ComputeNode {
                info!("Checking if drop subscription operation was already performed for timeline_id: {}", timeline_id);

                drop_subscriptions_done = match
-                    client.query("select 1 from neon.drop_subscriptions_done where timeline_id OPERATOR(pg_catalog.=) $1", &[&timeline_id.to_string()]).await {
+                    client.query("select 1 from neon.drop_subscriptions_done where timeline_id = $1", &[&timeline_id.to_string()]).await {
                    Ok(result) => !result.is_empty(),
                    Err(e) =>
                    {
@@ -1142,9 +1142,7 @@ async fn get_operations<'a>(
            if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") {
                if libs.contains("pg_stat_statements") {
                    return Ok(Box::new(once(Operation {
-                        query: String::from(
-                            "CREATE EXTENSION IF NOT EXISTS pg_stat_statements WITH SCHEMA public",
-                        ),
+                        query: String::from("CREATE EXTENSION IF NOT EXISTS pg_stat_statements"),
                        comment: Some(String::from("create system extensions")),
                    })));
                }
@@ -1152,13 +1150,11 @@ async fn get_operations<'a>(
            Ok(Box::new(empty()))
        }
        ApplySpecPhase::CreatePgauditExtension => Ok(Box::new(once(Operation {
-            query: String::from("CREATE EXTENSION IF NOT EXISTS pgaudit WITH SCHEMA public"),
+            query: String::from("CREATE EXTENSION IF NOT EXISTS pgaudit"),
            comment: Some(String::from("create pgaudit extensions")),
        }))),
        ApplySpecPhase::CreatePgauditlogtofileExtension => Ok(Box::new(once(Operation {
-            query: String::from(
-                "CREATE EXTENSION IF NOT EXISTS pgauditlogtofile WITH SCHEMA public",
-            ),
+            query: String::from("CREATE EXTENSION IF NOT EXISTS pgauditlogtofile"),
            comment: Some(String::from("create pgauditlogtofile extensions")),
        }))),
        // Disable pgaudit logging for postgres database.
@@ -1182,7 +1178,7 @@ async fn get_operations<'a>(
                },
                Operation {
                    query: String::from(
-                        "UPDATE pg_catalog.pg_extension SET extrelocatable = true WHERE extname OPERATOR(pg_catalog.=) 'neon'::pg_catalog.name AND extrelocatable OPERATOR(pg_catalog.=) false",
+                        "UPDATE pg_extension SET extrelocatable = true WHERE extname = 'neon'",
                    ),
                    comment: Some(String::from("compat/fix: make neon relocatable")),
                },
--- a/compute_tools/src/sql/add_availabilitycheck_tables.sql
+++ b/compute_tools/src/sql/add_availabilitycheck_tables.sql
@@ -3,17 +3,16 @@ BEGIN
    IF NOT EXISTS(
        SELECT 1
        FROM pg_catalog.pg_tables
-        WHERE tablename::pg_catalog.name OPERATOR(pg_catalog.=) 'health_check'::pg_catalog.name
-        AND schemaname::pg_catalog.name OPERATOR(pg_catalog.=) 'public'::pg_catalog.name
+        WHERE tablename = 'health_check'
    )
    THEN
-    CREATE TABLE public.health_check (
-        id pg_catalog.int4 primary key generated by default as identity,
-        updated_at pg_catalog.timestamptz default pg_catalog.now()
+    CREATE TABLE health_check (
+        id serial primary key,
+        updated_at timestamptz default now()
    );
-    INSERT INTO public.health_check VALUES (1, pg_catalog.now())
+    INSERT INTO health_check VALUES (1, now())
        ON CONFLICT (id) DO UPDATE
-         SET updated_at = pg_catalog.now();
+         SET updated_at = now();
    END IF;
 END
 $$
--- a/compute_tools/src/sql/anon_ext_fn_reassign.sql
+++ b/compute_tools/src/sql/anon_ext_fn_reassign.sql
@@ -0,0 +1,12 @@
+DO $$
+DECLARE
+    query varchar;
+BEGIN
+    FOR query IN SELECT 'ALTER FUNCTION '||nsp.nspname||'.'||p.proname||'('||pg_get_function_identity_arguments(p.oid)||') OWNER TO {db_owner};'
+    FROM pg_proc p
+        JOIN pg_namespace nsp ON p.pronamespace = nsp.oid
+    WHERE nsp.nspname = 'anon' LOOP
+        EXECUTE query;
+    END LOOP;
+END
+$$;
--- a/compute_tools/src/sql/create_privileged_role.sql
+++ b/compute_tools/src/sql/create_privileged_role.sql
@@ -1,6 +1,6 @@
 DO $$
    BEGIN
-        IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname OPERATOR(pg_catalog.=) '{privileged_role_name}'::pg_catalog.name)
+        IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{privileged_role_name}')
        THEN
            CREATE ROLE {privileged_role_name} {privileges} IN ROLE pg_read_all_data, pg_write_all_data;
        END IF;
--- a/compute_tools/src/sql/default_grants.sql
+++ b/compute_tools/src/sql/default_grants.sql
@@ -4,14 +4,14 @@ $$
        IF EXISTS(
            SELECT nspname
            FROM pg_catalog.pg_namespace
-            WHERE nspname OPERATOR(pg_catalog.=) 'public'
+            WHERE nspname = 'public'
        ) AND
-           pg_catalog.current_setting('server_version_num')::int OPERATOR(pg_catalog./) 10000 OPERATOR(pg_catalog.>=) 15
+           current_setting('server_version_num')::int / 10000 >= 15
        THEN
            IF EXISTS(
                SELECT rolname
                FROM pg_catalog.pg_roles
-                WHERE rolname OPERATOR(pg_catalog.=) 'web_access'
+                WHERE rolname = 'web_access'
            )
            THEN
                GRANT CREATE ON SCHEMA public TO web_access;
@@ -20,7 +20,7 @@ $$
        IF EXISTS(
            SELECT nspname
            FROM pg_catalog.pg_namespace
-            WHERE nspname OPERATOR(pg_catalog.=) 'public'
+            WHERE nspname = 'public'
        )
        THEN
            ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser WITH GRANT OPTION;
--- a/compute_tools/src/sql/drop_subscriptions.sql
+++ b/compute_tools/src/sql/drop_subscriptions.sql
@@ -2,17 +2,11 @@ DO ${outer_tag}$
 DECLARE
    subname TEXT;
 BEGIN
-    LOCK TABLE pg_catalog.pg_subscription IN ACCESS EXCLUSIVE MODE;
-    FOR subname IN
-        SELECT pg_subscription.subname
-        FROM pg_catalog.pg_subscription
-        WHERE subdbid OPERATOR(pg_catalog.=) (
-            SELECT oid FROM pg_database WHERE datname OPERATOR(pg_catalog.=) {datname_str}::pg_catalog.name
-        )
-    LOOP
-        EXECUTE pg_catalog.format('ALTER SUBSCRIPTION %I DISABLE;', subname);
-        EXECUTE pg_catalog.format('ALTER SUBSCRIPTION %I SET (slot_name = NONE);', subname);
-        EXECUTE pg_catalog.format('DROP SUBSCRIPTION %I;', subname);
+    LOCK TABLE pg_subscription IN ACCESS EXCLUSIVE MODE;
+    FOR subname IN SELECT pg_subscription.subname FROM pg_subscription WHERE subdbid = (SELECT oid FROM pg_database WHERE datname = {datname_str}) LOOP
+        EXECUTE format('ALTER SUBSCRIPTION %I DISABLE;', subname);
+        EXECUTE format('ALTER SUBSCRIPTION %I SET (slot_name = NONE);', subname);
+        EXECUTE format('DROP SUBSCRIPTION %I;', subname);
    END LOOP;
 END;
 ${outer_tag}$;
--- a/compute_tools/src/sql/finalize_drop_subscriptions.sql
+++ b/compute_tools/src/sql/finalize_drop_subscriptions.sql
@@ -3,19 +3,19 @@ BEGIN
    IF NOT EXISTS(
        SELECT 1
        FROM pg_catalog.pg_tables
-        WHERE tablename OPERATOR(pg_catalog.=) 'drop_subscriptions_done'::pg_catalog.name
-        AND schemaname OPERATOR(pg_catalog.=) 'neon'::pg_catalog.name
+        WHERE tablename = 'drop_subscriptions_done'
+        AND schemaname = 'neon'
    )
    THEN
        CREATE TABLE neon.drop_subscriptions_done
-        (id pg_catalog.int4 primary key generated by default as identity, timeline_id pg_catalog.text);
+        (id serial primary key, timeline_id text);
    END IF;

    -- preserve the timeline_id of the last drop_subscriptions run
    -- to ensure that the cleanup of a timeline is executed only once.
    -- use upsert to avoid the table bloat in case of cascade branching (branch of a branch)
-    INSERT INTO neon.drop_subscriptions_done VALUES (1, pg_catalog.current_setting('neon.timeline_id'))
+    INSERT INTO neon.drop_subscriptions_done VALUES (1, current_setting('neon.timeline_id'))
    ON CONFLICT (id) DO UPDATE
-    SET timeline_id = pg_catalog.current_setting('neon.timeline_id')::pg_catalog.text;
+    SET timeline_id = current_setting('neon.timeline_id');
 END
 $$
--- a/compute_tools/src/sql/pre_drop_role_revoke_privileges.sql
+++ b/compute_tools/src/sql/pre_drop_role_revoke_privileges.sql
@@ -15,15 +15,15 @@ BEGIN
        WHERE schema_name IN ('public')
    LOOP
        FOR grantor IN EXECUTE
-            pg_catalog.format(
-                'SELECT DISTINCT rtg.grantor FROM information_schema.role_table_grants AS rtg WHERE grantee OPERATOR(pg_catalog.=) %s',
+            format(
+                'SELECT DISTINCT rtg.grantor FROM information_schema.role_table_grants AS rtg WHERE grantee = %s',
                -- N.B. this has to be properly dollar-escaped with `pg_quote_dollar()`
                quote_literal({role_name})
            )
        LOOP
-            EXECUTE pg_catalog.format('SET LOCAL ROLE %I', grantor);
+            EXECUTE format('SET LOCAL ROLE %I', grantor);

-            revoke_query := pg_catalog.format(
+            revoke_query := format(
                'REVOKE ALL PRIVILEGES ON ALL TABLES IN SCHEMA %I FROM %I GRANTED BY %I',
                schema,
                -- N.B. this has to be properly dollar-escaped with `pg_quote_dollar()`
--- a/compute_tools/src/sql/set_public_schema_owner.sql
+++ b/compute_tools/src/sql/set_public_schema_owner.sql
@@ -5,17 +5,17 @@ DO ${outer_tag}$
        IF EXISTS(
            SELECT nspname
            FROM pg_catalog.pg_namespace
-            WHERE nspname OPERATOR(pg_catalog.=) 'public'::pg_catalog.name
+            WHERE nspname = 'public'
        )
        THEN
            SELECT nspowner::regrole::text
            FROM pg_catalog.pg_namespace
-            WHERE nspname OPERATOR(pg_catalog.=) 'public'::pg_catalog.text
+            WHERE nspname = 'public'
            INTO schema_owner;

-            IF schema_owner OPERATOR(pg_catalog.=) 'cloud_admin'::pg_catalog.text OR schema_owner OPERATOR(pg_catalog.=) 'zenith_admin'::pg_catalog.text
+            IF schema_owner = 'cloud_admin' OR schema_owner = 'zenith_admin'
            THEN
-                EXECUTE pg_catalog.format('ALTER SCHEMA public OWNER TO %I', {db_owner});
+                EXECUTE format('ALTER SCHEMA public OWNER TO %I', {db_owner});
            END IF;
        END IF;
    END
--- a/compute_tools/src/sql/unset_template_for_drop_dbs.sql
+++ b/compute_tools/src/sql/unset_template_for_drop_dbs.sql
@@ -3,10 +3,10 @@ DO ${outer_tag}$
        IF EXISTS(
            SELECT 1
            FROM pg_catalog.pg_database
-            WHERE datname OPERATOR(pg_catalog.=) {datname}::pg_catalog.name
+            WHERE datname = {datname}
        )
        THEN
-            EXECUTE pg_catalog.format('ALTER DATABASE %I is_template false', {datname});
+            EXECUTE format('ALTER DATABASE %I is_template false', {datname});
        END IF;
    END
 ${outer_tag}$;
--- a/compute_tools/src/tls.rs
+++ b/compute_tools/src/tls.rs
@@ -3,42 +3,43 @@ use std::{io::Write, os::unix::fs::OpenOptionsExt, path::Path, time::Duration};
 use anyhow::{Context, Result, bail};
 use compute_api::responses::TlsConfig;
 use ring::digest;
-use x509_cert::Certificate;

 #[derive(Clone, Copy)]
 pub struct CertDigest(digest::Digest);

-pub async fn watch_cert_for_changes(cert_path: String) -> tokio::sync::watch::Receiver<CertDigest> {
-    let mut digest = compute_digest(&cert_path).await;
-    let (tx, rx) = tokio::sync::watch::channel(digest);
-    tokio::spawn(async move {
-        while !tx.is_closed() {
-            let new_digest = compute_digest(&cert_path).await;
-            if digest.0.as_ref() != new_digest.0.as_ref() {
-                digest = new_digest;
-                _ = tx.send(digest);
-            }
-
-            tokio::time::sleep(Duration::from_secs(60)).await
-        }
-    });
-    rx
+impl PartialEq for CertDigest {
+    fn eq(&self, other: &Self) -> bool {
+        self.0.as_ref() == other.0.as_ref()
+    }
 }

-async fn compute_digest(cert_path: &str) -> CertDigest {
+pub fn wait_until_cert_changed(digest: CertDigest, cert_path: &str) -> CertDigest {
    loop {
-        match try_compute_digest(cert_path).await {
+        let new_digest = compute_digest(cert_path);
+        if digest != new_digest {
+            break new_digest;
+        }
+
+        // Wait a while before checking the certificates.
+        // We renew on a daily basis, so there's no rush.
+        std::thread::sleep(Duration::from_secs(60));
+    }
+}
+
+pub fn compute_digest(cert_path: &str) -> CertDigest {
+    loop {
+        match try_compute_digest(cert_path) {
            Ok(d) => break d,
            Err(e) => {
                tracing::error!("could not read cert file {e:?}");
-                tokio::time::sleep(Duration::from_secs(1)).await
+                std::thread::sleep(Duration::from_secs(1))
            }
        }
    }
 }

-async fn try_compute_digest(cert_path: &str) -> Result<CertDigest> {
-    let data = tokio::fs::read(cert_path).await?;
+fn try_compute_digest(cert_path: &str) -> Result<CertDigest> {
+    let data = std::fs::read(cert_path)?;
    // sha256 is extremely collision resistent. can safely assume the digest to be unique
    Ok(CertDigest(digest::digest(&digest::SHA256, &data)))
 }
@@ -46,28 +47,37 @@ async fn try_compute_digest(cert_path: &str) -> Result<CertDigest> {
 pub const SERVER_CRT: &str = "server.crt";
 pub const SERVER_KEY: &str = "server.key";

-pub fn update_key_path_blocking(pg_data: &Path, tls_config: &TlsConfig) {
+pub struct KeyPair {
+    crt: String,
+    key: String,
+}
+
+pub fn load_certs_blocking(tls_config: &TlsConfig) -> KeyPair {
    loop {
-        match try_update_key_path_blocking(pg_data, tls_config) {
-            Ok(()) => break,
+        match try_load_certs_blocking(tls_config) {
+            Ok(key_pair) => break key_pair,
            Err(e) => {
-                tracing::error!(error = ?e, "could not create key file");
+                tracing::error!(error = ?e, "could not load certs");
                std::thread::sleep(Duration::from_secs(1))
            }
        }
    }
 }

-// Postgres requires the keypath be "secure". This means
-// 1. Owned by the postgres user.
-// 2. Have permission 600.
-fn try_update_key_path_blocking(pg_data: &Path, tls_config: &TlsConfig) -> Result<()> {
+fn try_load_certs_blocking(tls_config: &TlsConfig) -> Result<KeyPair> {
    let key = std::fs::read_to_string(&tls_config.key_path)?;
    let crt = std::fs::read_to_string(&tls_config.cert_path)?;

    // to mitigate a race condition during renewal.
    verify_key_cert(&key, &crt)?;

+    Ok(KeyPair { key, crt })
+}
+
+// Postgres requires the keypath be "secure". This means
+// 1. Owned by the postgres user.
+// 2. Have permission 600.
+pub fn update_key_path_blocking(pg_data: &Path, key_pair: &KeyPair) -> Result<()> {
    let mut key_file = std::fs::OpenOptions::new()
        .write(true)
        .create(true)
@@ -82,14 +92,22 @@ fn try_update_key_path_blocking(pg_data: &Path, tls_config: &TlsConfig) -> Resul
        .mode(0o600)
        .open(pg_data.join(SERVER_CRT))?;

-    key_file.write_all(key.as_bytes())?;
-    crt_file.write_all(crt.as_bytes())?;
+    // NOTE: We currently ensure that an explicit reload does not happen during TLS renewal, but
+    // there's a chance that postgres/pgbouncer/local_proxy reloads implicitly halfway between
+    // these writes. This could allow them to reads the wrong keys to the wrong certs.
+    // There doesn't seem to be any way to prevent that. However, we will issue a reload shortly
+    // after which should at least correct it.
+    key_file.write_all(key_pair.key.as_bytes())?;
+    crt_file.write_all(key_pair.crt.as_bytes())?;

    Ok(())
 }

 fn verify_key_cert(key: &str, cert: &str) -> Result<()> {
+    use x509_cert::Certificate;
    use x509_cert::der::oid::db::rfc5912::ECDSA_WITH_SHA_256;
+    use x509_cert::der::oid::db::rfc8410::ID_ED_25519;
+    use x509_cert::der::pem;

    let certs = Certificate::load_pem_chain(cert.as_bytes())
        .context("decoding PEM encoded certificates")?;
@@ -100,22 +118,30 @@ fn verify_key_cert(key: &str, cert: &str) -> Result<()> {
        bail!("no certificates found");
    };

+    let pubkey = cert
+        .tbs_certificate
+        .subject_public_key_info
+        .subject_public_key
+        .raw_bytes();
+
    match cert.signature_algorithm.oid {
        ECDSA_WITH_SHA_256 => {
            let key = p256::SecretKey::from_sec1_pem(key).context("parse key")?;
-
-            let a = key.public_key().to_sec1_bytes();
-            let b = cert
-                .tbs_certificate
-                .subject_public_key_info
-                .subject_public_key
-                .raw_bytes();
-
-            if *a != *b {
+            if *key.public_key().to_sec1_bytes() != *pubkey {
                bail!("private key file does not match certificate")
            }
        }
-        _ => bail!("unknown TLS key type"),
+        ID_ED_25519 => {
+            use ring::signature::{Ed25519KeyPair, KeyPair};
+
+            let (_, bytes) = pem::decode_vec(key.as_bytes())
+                .map_err(|_| anyhow::anyhow!("invalid key encoding"))?;
+            let key = Ed25519KeyPair::from_pkcs8_maybe_unchecked(&bytes).context("parse key")?;
+            if *key.public_key().as_ref() != *pubkey {
+                bail!("private key file does not match certificate")
+            }
+        }
+        oid => bail!("unknown TLS key type: {oid}"),
    }

    Ok(())
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -71,9 +71,8 @@ const DEFAULT_PG_VERSION_NUM: &str = "17";

 const DEFAULT_PAGESERVER_CONTROL_PLANE_API: &str = "http://127.0.0.1:1234/upcall/v1/";

-/// Neon CLI.
 #[derive(clap::Parser)]
-#[command(version = GIT_VERSION, name = "Neon CLI")]
+#[command(version = GIT_VERSION, about, name = "Neon CLI")]
 struct Cli {
    #[command(subcommand)]
    command: NeonLocalCmd,
@@ -108,31 +107,30 @@ enum NeonLocalCmd {
    Stop(StopCmdArgs),
 }

-/// Initialize a new Neon repository, preparing configs for services to start with.
 #[derive(clap::Args)]
+#[clap(about = "Initialize a new Neon repository, preparing configs for services to start with")]
 struct InitCmdArgs {
-    /// How many pageservers to create (default 1).
-    #[clap(long)]
+    #[clap(long, help("How many pageservers to create (default 1)"))]
    num_pageservers: Option<u16>,

    #[clap(long)]
    config: Option<PathBuf>,

-    /// Force initialization even if the repository is not empty.
-    #[clap(long, default_value = "must-not-exist")]
+    #[clap(long, help("Force initialization even if the repository is not empty"))]
    #[arg(value_parser)]
+    #[clap(default_value = "must-not-exist")]
    force: InitForceMode,
 }

-/// Start pageserver and safekeepers.
 #[derive(clap::Args)]
+#[clap(about = "Start pageserver and safekeepers")]
 struct StartCmdArgs {
    #[clap(long = "start-timeout", default_value = "10s")]
    timeout: humantime::Duration,
 }

-/// Stop pageserver and safekeepers.
 #[derive(clap::Args)]
+#[clap(about = "Stop pageserver and safekeepers")]
 struct StopCmdArgs {
    #[arg(value_enum)]
    #[clap(long, default_value_t = StopMode::Fast)]
@@ -145,8 +143,8 @@ enum StopMode {
    Immediate,
 }

-/// Manage tenants.
 #[derive(clap::Subcommand)]
+#[clap(about = "Manage tenants")]
 enum TenantCmd {
    List,
    Create(TenantCreateCmdArgs),
@@ -157,36 +155,38 @@ enum TenantCmd {

 #[derive(clap::Args)]
 struct TenantCreateCmdArgs {
-    /// Tenant ID, as a 32-byte hexadecimal string.
-    #[clap(long = "tenant-id")]
+    #[clap(
+        long = "tenant-id",
+        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
+    )]
    tenant_id: Option<TenantId>,

-    /// Use a specific timeline id when creating a tenant and its initial timeline.
-    #[clap(long)]
+    #[clap(
+        long,
+        help = "Use a specific timeline id when creating a tenant and its initial timeline"
+    )]
    timeline_id: Option<TimelineId>,

    #[clap(short = 'c')]
    config: Vec<String>,

-    /// Postgres version to use for the initial timeline.
    #[arg(default_value = DEFAULT_PG_VERSION_NUM)]
-    #[clap(long)]
+    #[clap(long, help = "Postgres version to use for the initial timeline")]
    pg_version: PgMajorVersion,

-    /// Use this tenant in future CLI commands where tenant_id is needed, but not specified.
-    #[clap(long)]
+    #[clap(
+        long,
+        help = "Use this tenant in future CLI commands where tenant_id is needed, but not specified"
+    )]
    set_default: bool,

-    /// Number of shards in the new tenant.
-    #[clap(long)]
+    #[clap(long, help = "Number of shards in the new tenant")]
    #[arg(default_value_t = 0)]
    shard_count: u8,
-    /// Sharding stripe size in pages.
-    #[clap(long)]
+    #[clap(long, help = "Sharding stripe size in pages")]
    shard_stripe_size: Option<u32>,

-    /// Placement policy shards in this tenant.
-    #[clap(long)]
+    #[clap(long, help = "Placement policy shards in this tenant")]
    #[arg(value_parser = parse_placement_policy)]
    placement_policy: Option<PlacementPolicy>,
 }
@@ -195,35 +195,44 @@ fn parse_placement_policy(s: &str) -> anyhow::Result<PlacementPolicy> {
    Ok(serde_json::from_str::<PlacementPolicy>(s)?)
 }

-/// Set a particular tenant as default in future CLI commands where tenant_id is needed, but not
-/// specified.
 #[derive(clap::Args)]
+#[clap(
+    about = "Set a particular tenant as default in future CLI commands where tenant_id is needed, but not specified"
+)]
 struct TenantSetDefaultCmdArgs {
-    /// Tenant ID, as a 32-byte hexadecimal string.
-    #[clap(long = "tenant-id")]
+    #[clap(
+        long = "tenant-id",
+        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
+    )]
    tenant_id: TenantId,
 }

 #[derive(clap::Args)]
 struct TenantConfigCmdArgs {
-    /// Tenant ID, as a 32-byte hexadecimal string.
-    #[clap(long = "tenant-id")]
+    #[clap(
+        long = "tenant-id",
+        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
+    )]
    tenant_id: Option<TenantId>,

    #[clap(short = 'c')]
    config: Vec<String>,
 }

-/// Import a tenant that is present in remote storage, and create branches for its timelines.
 #[derive(clap::Args)]
+#[clap(
+    about = "Import a tenant that is present in remote storage, and create branches for its timelines"
+)]
 struct TenantImportCmdArgs {
-    /// Tenant ID, as a 32-byte hexadecimal string.
-    #[clap(long = "tenant-id")]
+    #[clap(
+        long = "tenant-id",
+        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
+    )]
    tenant_id: TenantId,
 }

-/// Manage timelines.
 #[derive(clap::Subcommand)]
+#[clap(about = "Manage timelines")]
 enum TimelineCmd {
    List(TimelineListCmdArgs),
    Branch(TimelineBranchCmdArgs),
@@ -231,87 +240,98 @@ enum TimelineCmd {
    Import(TimelineImportCmdArgs),
 }

-/// List all timelines available to this pageserver.
 #[derive(clap::Args)]
+#[clap(about = "List all timelines available to this pageserver")]
 struct TimelineListCmdArgs {
-    /// Tenant ID, as a 32-byte hexadecimal string.
-    #[clap(long = "tenant-id")]
+    #[clap(
+        long = "tenant-id",
+        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
+    )]
    tenant_shard_id: Option<TenantShardId>,
 }

-/// Create a new timeline, branching off from another timeline.
 #[derive(clap::Args)]
+#[clap(about = "Create a new timeline, branching off from another timeline")]
 struct TimelineBranchCmdArgs {
-    /// Tenant ID, as a 32-byte hexadecimal string.
-    #[clap(long = "tenant-id")]
+    #[clap(
+        long = "tenant-id",
+        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
+    )]
    tenant_id: Option<TenantId>,
-    /// New timeline's ID, as a 32-byte hexadecimal string.
-    #[clap(long)]
+
+    #[clap(long, help = "New timeline's ID")]
    timeline_id: Option<TimelineId>,
-    /// Human-readable alias for the new timeline.
-    #[clap(long)]
+
+    #[clap(long, help = "Human-readable alias for the new timeline")]
    branch_name: String,
-    /// Use last Lsn of another timeline (and its data) as base when creating the new timeline. The
-    /// timeline gets resolved by its branch name.
-    #[clap(long)]
+
+    #[clap(
+        long,
+        help = "Use last Lsn of another timeline (and its data) as base when creating the new timeline. The timeline gets resolved by its branch name."
+    )]
    ancestor_branch_name: Option<String>,
-    /// When using another timeline as base, use a specific Lsn in it instead of the latest one.
-    #[clap(long)]
+
+    #[clap(
+        long,
+        help = "When using another timeline as base, use a specific Lsn in it instead of the latest one"
+    )]
    ancestor_start_lsn: Option<Lsn>,
 }

-/// Create a new blank timeline.
 #[derive(clap::Args)]
+#[clap(about = "Create a new blank timeline")]
 struct TimelineCreateCmdArgs {
-    /// Tenant ID, as a 32-byte hexadecimal string.
-    #[clap(long = "tenant-id")]
+    #[clap(
+        long = "tenant-id",
+        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
+    )]
    tenant_id: Option<TenantId>,
-    /// New timeline's ID, as a 32-byte hexadecimal string.
-    #[clap(long)]
+
+    #[clap(long, help = "New timeline's ID")]
    timeline_id: Option<TimelineId>,
-    /// Human-readable alias for the new timeline.
-    #[clap(long)]
+
+    #[clap(long, help = "Human-readable alias for the new timeline")]
    branch_name: String,

-    /// Postgres version.
    #[arg(default_value = DEFAULT_PG_VERSION_NUM)]
-    #[clap(long)]
+    #[clap(long, help = "Postgres version")]
    pg_version: PgMajorVersion,
 }

-/// Import a timeline from a basebackup directory.
 #[derive(clap::Args)]
+#[clap(about = "Import timeline from a basebackup directory")]
 struct TimelineImportCmdArgs {
-    /// Tenant ID, as a 32-byte hexadecimal string.
-    #[clap(long = "tenant-id")]
+    #[clap(
+        long = "tenant-id",
+        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
+    )]
    tenant_id: Option<TenantId>,
-    /// New timeline's ID, as a 32-byte hexadecimal string.
-    #[clap(long)]
+
+    #[clap(long, help = "New timeline's ID")]
    timeline_id: TimelineId,
-    /// Human-readable alias for the new timeline.
-    #[clap(long)]
+
+    #[clap(long, help = "Human-readable alias for the new timeline")]
    branch_name: String,
-    /// Basebackup tarfile to import.
-    #[clap(long)]
+
+    #[clap(long, help = "Basebackup tarfile to import")]
    base_tarfile: PathBuf,
-    /// LSN the basebackup starts at.
-    #[clap(long)]
+
+    #[clap(long, help = "Lsn the basebackup starts at")]
    base_lsn: Lsn,
-    /// WAL to add after base.
-    #[clap(long)]
+
+    #[clap(long, help = "Wal to add after base")]
    wal_tarfile: Option<PathBuf>,
-    /// LSN the basebackup ends at.
-    #[clap(long)]
+
+    #[clap(long, help = "Lsn the basebackup ends at")]
    end_lsn: Option<Lsn>,

-    /// Postgres version of the basebackup being imported.
    #[arg(default_value = DEFAULT_PG_VERSION_NUM)]
-    #[clap(long)]
+    #[clap(long, help = "Postgres version of the backup being imported")]
    pg_version: PgMajorVersion,
 }

-/// Manage pageservers.
 #[derive(clap::Subcommand)]
+#[clap(about = "Manage pageservers")]
 enum PageserverCmd {
    Status(PageserverStatusCmdArgs),
    Start(PageserverStartCmdArgs),
@@ -319,202 +339,223 @@ enum PageserverCmd {
    Restart(PageserverRestartCmdArgs),
 }

-/// Show status of a local pageserver.
 #[derive(clap::Args)]
+#[clap(about = "Show status of a local pageserver")]
 struct PageserverStatusCmdArgs {
-    /// Pageserver ID.
-    #[clap(long = "id")]
+    #[clap(long = "id", help = "pageserver id")]
    pageserver_id: Option<NodeId>,
 }

-/// Start local pageserver.
 #[derive(clap::Args)]
+#[clap(about = "Start local pageserver")]
 struct PageserverStartCmdArgs {
-    /// Pageserver ID.
-    #[clap(long = "id")]
+    #[clap(long = "id", help = "pageserver id")]
    pageserver_id: Option<NodeId>,
-    /// Timeout until we fail the command.
-    #[clap(short = 't', long)]
+
+    #[clap(short = 't', long, help = "timeout until we fail the command")]
    #[arg(default_value = "10s")]
    start_timeout: humantime::Duration,
 }

-/// Stop local pageserver.
 #[derive(clap::Args)]
+#[clap(about = "Stop local pageserver")]
 struct PageserverStopCmdArgs {
-    /// Pageserver ID.
-    #[clap(long = "id")]
+    #[clap(long = "id", help = "pageserver id")]
    pageserver_id: Option<NodeId>,
-    /// If 'immediate', don't flush repository data at shutdown
-    #[clap(short = 'm')]
+
+    #[clap(
+        short = 'm',
+        help = "If 'immediate', don't flush repository data at shutdown"
+    )]
    #[arg(value_enum, default_value = "fast")]
    stop_mode: StopMode,
 }

-/// Restart local pageserver.
 #[derive(clap::Args)]
+#[clap(about = "Restart local pageserver")]
 struct PageserverRestartCmdArgs {
-    /// Pageserver ID.
-    #[clap(long = "id")]
+    #[clap(long = "id", help = "pageserver id")]
    pageserver_id: Option<NodeId>,
-    /// Timeout until we fail the command.
-    #[clap(short = 't', long)]
+
+    #[clap(short = 't', long, help = "timeout until we fail the command")]
    #[arg(default_value = "10s")]
    start_timeout: humantime::Duration,
 }

-/// Manage storage controller.
 #[derive(clap::Subcommand)]
+#[clap(about = "Manage storage controller")]
 enum StorageControllerCmd {
    Start(StorageControllerStartCmdArgs),
    Stop(StorageControllerStopCmdArgs),
 }

-/// Start storage controller.
 #[derive(clap::Args)]
+#[clap(about = "Start storage controller")]
 struct StorageControllerStartCmdArgs {
-    /// Timeout until we fail the command.
-    #[clap(short = 't', long)]
+    #[clap(short = 't', long, help = "timeout until we fail the command")]
    #[arg(default_value = "10s")]
    start_timeout: humantime::Duration,
-    /// Identifier used to distinguish storage controller instances.
-    #[clap(long)]
+
+    #[clap(
+        long,
+        help = "Identifier used to distinguish storage controller instances"
+    )]
    #[arg(default_value_t = 1)]
    instance_id: u8,
-    /// Base port for the storage controller instance identified by instance-id (defaults to
-    /// pageserver cplane api).
-    #[clap(long)]
+
+    #[clap(
+        long,
+        help = "Base port for the storage controller instance idenfified by instance-id (defaults to pageserver cplane api)"
+    )]
    base_port: Option<u16>,

-    /// Whether the storage controller should handle pageserver-reported local disk loss events.
-    #[clap(long)]
+    #[clap(
+        long,
+        help = "Whether the storage controller should handle pageserver-reported local disk loss events."
+    )]
    handle_ps_local_disk_loss: Option<bool>,
 }

-/// Stop storage controller.
 #[derive(clap::Args)]
+#[clap(about = "Stop storage controller")]
 struct StorageControllerStopCmdArgs {
-    /// If 'immediate', don't flush repository data at shutdown
-    #[clap(short = 'm')]
+    #[clap(
+        short = 'm',
+        help = "If 'immediate', don't flush repository data at shutdown"
+    )]
    #[arg(value_enum, default_value = "fast")]
    stop_mode: StopMode,
-    /// Identifier used to distinguish storage controller instances.
-    #[clap(long)]
+
+    #[clap(
+        long,
+        help = "Identifier used to distinguish storage controller instances"
+    )]
    #[arg(default_value_t = 1)]
    instance_id: u8,
 }

-/// Manage storage broker.
 #[derive(clap::Subcommand)]
+#[clap(about = "Manage storage broker")]
 enum StorageBrokerCmd {
    Start(StorageBrokerStartCmdArgs),
    Stop(StorageBrokerStopCmdArgs),
 }

-/// Start broker.
 #[derive(clap::Args)]
+#[clap(about = "Start broker")]
 struct StorageBrokerStartCmdArgs {
-    /// Timeout until we fail the command.
-    #[clap(short = 't', long, default_value = "10s")]
+    #[clap(short = 't', long, help = "timeout until we fail the command")]
+    #[arg(default_value = "10s")]
    start_timeout: humantime::Duration,
 }

-/// Stop broker.
 #[derive(clap::Args)]
+#[clap(about = "stop broker")]
 struct StorageBrokerStopCmdArgs {
-    /// If 'immediate', don't flush repository data on shutdown.
-    #[clap(short = 'm')]
+    #[clap(
+        short = 'm',
+        help = "If 'immediate', don't flush repository data at shutdown"
+    )]
    #[arg(value_enum, default_value = "fast")]
    stop_mode: StopMode,
 }

-/// Manage safekeepers.
 #[derive(clap::Subcommand)]
+#[clap(about = "Manage safekeepers")]
 enum SafekeeperCmd {
    Start(SafekeeperStartCmdArgs),
    Stop(SafekeeperStopCmdArgs),
    Restart(SafekeeperRestartCmdArgs),
 }

-/// Manage object storage.
 #[derive(clap::Subcommand)]
+#[clap(about = "Manage object storage")]
 enum EndpointStorageCmd {
    Start(EndpointStorageStartCmd),
    Stop(EndpointStorageStopCmd),
 }

-/// Start object storage.
 #[derive(clap::Args)]
+#[clap(about = "Start object storage")]
 struct EndpointStorageStartCmd {
-    /// Timeout until we fail the command.
-    #[clap(short = 't', long)]
+    #[clap(short = 't', long, help = "timeout until we fail the command")]
    #[arg(default_value = "10s")]
    start_timeout: humantime::Duration,
 }

-/// Stop object storage.
 #[derive(clap::Args)]
+#[clap(about = "Stop object storage")]
 struct EndpointStorageStopCmd {
-    /// If 'immediate', don't flush repository data on shutdown.
-    #[clap(short = 'm')]
    #[arg(value_enum, default_value = "fast")]
+    #[clap(
+        short = 'm',
+        help = "If 'immediate', don't flush repository data at shutdown"
+    )]
    stop_mode: StopMode,
 }

-/// Start local safekeeper.
 #[derive(clap::Args)]
+#[clap(about = "Start local safekeeper")]
 struct SafekeeperStartCmdArgs {
-    /// Safekeeper ID.
+    #[clap(help = "safekeeper id")]
    #[arg(default_value_t = NodeId(1))]
    id: NodeId,

-    /// Additional safekeeper invocation options, e.g. -e=--http-auth-public-key-path=foo.
-    #[clap(short = 'e', long = "safekeeper-extra-opt")]
+    #[clap(
+        short = 'e',
+        long = "safekeeper-extra-opt",
+        help = "Additional safekeeper invocation options, e.g. -e=--http-auth-public-key-path=foo"
+    )]
    extra_opt: Vec<String>,

-    /// Timeout until we fail the command.
-    #[clap(short = 't', long)]
+    #[clap(short = 't', long, help = "timeout until we fail the command")]
    #[arg(default_value = "10s")]
    start_timeout: humantime::Duration,
 }

-/// Stop local safekeeper.
 #[derive(clap::Args)]
+#[clap(about = "Stop local safekeeper")]
 struct SafekeeperStopCmdArgs {
-    /// Safekeeper ID.
+    #[clap(help = "safekeeper id")]
    #[arg(default_value_t = NodeId(1))]
    id: NodeId,

-    /// If 'immediate', don't flush repository data on shutdown.
    #[arg(value_enum, default_value = "fast")]
-    #[clap(short = 'm')]
+    #[clap(
+        short = 'm',
+        help = "If 'immediate', don't flush repository data at shutdown"
+    )]
    stop_mode: StopMode,
 }

-/// Restart local safekeeper.
 #[derive(clap::Args)]
+#[clap(about = "Restart local safekeeper")]
 struct SafekeeperRestartCmdArgs {
-    /// Safekeeper ID.
+    #[clap(help = "safekeeper id")]
    #[arg(default_value_t = NodeId(1))]
    id: NodeId,

-    /// If 'immediate', don't flush repository data on shutdown.
    #[arg(value_enum, default_value = "fast")]
-    #[clap(short = 'm')]
+    #[clap(
+        short = 'm',
+        help = "If 'immediate', don't flush repository data at shutdown"
+    )]
    stop_mode: StopMode,

-    /// Additional safekeeper invocation options, e.g. -e=--http-auth-public-key-path=foo.
-    #[clap(short = 'e', long = "safekeeper-extra-opt")]
+    #[clap(
+        short = 'e',
+        long = "safekeeper-extra-opt",
+        help = "Additional safekeeper invocation options, e.g. -e=--http-auth-public-key-path=foo"
+    )]
    extra_opt: Vec<String>,

-    /// Timeout until we fail the command.
-    #[clap(short = 't', long)]
+    #[clap(short = 't', long, help = "timeout until we fail the command")]
    #[arg(default_value = "10s")]
    start_timeout: humantime::Duration,
 }

-/// Manage Postgres instances.
 #[derive(clap::Subcommand)]
+#[clap(about = "Manage Postgres instances")]
 enum EndpointCmd {
    List(EndpointListCmdArgs),
    Create(EndpointCreateCmdArgs),
@@ -526,27 +567,33 @@ enum EndpointCmd {
    GenerateJwt(EndpointGenerateJwtCmdArgs),
 }

-/// List endpoints.
 #[derive(clap::Args)]
+#[clap(about = "List endpoints")]
 struct EndpointListCmdArgs {
-    /// Tenant ID, as a 32-byte hexadecimal string.
-    #[clap(long = "tenant-id")]
+    #[clap(
+        long = "tenant-id",
+        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
+    )]
    tenant_shard_id: Option<TenantShardId>,
 }

-/// Create a compute endpoint.
 #[derive(clap::Args)]
+#[clap(about = "Create a compute endpoint")]
 struct EndpointCreateCmdArgs {
-    /// Tenant ID, as a 32-byte hexadecimal string.
-    #[clap(long = "tenant-id")]
+    #[clap(
+        long = "tenant-id",
+        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
+    )]
    tenant_id: Option<TenantId>,
-    /// Postgres endpoint ID.
+
+    #[clap(help = "Postgres endpoint id")]
    endpoint_id: Option<String>,
-    /// Name of the branch the endpoint will run on.
-    #[clap(long)]
+    #[clap(long, help = "Name of the branch the endpoint will run on")]
    branch_name: Option<String>,
-    /// Specify LSN on the timeline to start from. By default, end of the timeline would be used.
-    #[clap(long)]
+    #[clap(
+        long,
+        help = "Specify Lsn on the timeline to start from. By default, end of the timeline would be used"
+    )]
    lsn: Option<Lsn>,
    #[clap(long)]
    pg_port: Option<u16>,
@@ -557,13 +604,16 @@ struct EndpointCreateCmdArgs {
    #[clap(long = "pageserver-id")]
    endpoint_pageserver_id: Option<NodeId>,

-    /// Don't do basebackup, create endpoint directory with only config files.
-    #[clap(long, action = clap::ArgAction::Set, default_value_t = false)]
+    #[clap(
+        long,
+        help = "Don't do basebackup, create endpoint directory with only config files",
+        action = clap::ArgAction::Set,
+        default_value_t = false
+    )]
    config_only: bool,

-    /// Postgres version.
    #[arg(default_value = DEFAULT_PG_VERSION_NUM)]
-    #[clap(long)]
+    #[clap(long, help = "Postgres version")]
    pg_version: PgMajorVersion,

    /// Use gRPC to communicate with Pageservers, by generating grpc:// connstrings.
@@ -574,140 +624,170 @@ struct EndpointCreateCmdArgs {
    #[clap(long)]
    grpc: bool,

-    /// If set, the node will be a hot replica on the specified timeline.
-    #[clap(long, action = clap::ArgAction::Set, default_value_t = false)]
+    #[clap(
+        long,
+        help = "If set, the node will be a hot replica on the specified timeline",
+        action = clap::ArgAction::Set,
+        default_value_t = false
+    )]
    hot_standby: bool,
-    /// If set, will set up the catalog for neon_superuser.
-    #[clap(long)]
+
+    #[clap(long, help = "If set, will set up the catalog for neon_superuser")]
    update_catalog: bool,
-    /// Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but
-    /// useful for tests.
-    #[clap(long)]
+
+    #[clap(
+        long,
+        help = "Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but useful for tests."
+    )]
    allow_multiple: bool,

-    /// Name of the privileged role for the endpoint.
-    // Only allow changing it on creation.
-    #[clap(long)]
+    /// Only allow changing it on creation
+    #[clap(long, help = "Name of the privileged role for the endpoint")]
    privileged_role_name: Option<String>,
 }

-/// Start Postgres. If the endpoint doesn't exist yet, it is created.
 #[derive(clap::Args)]
+#[clap(about = "Start postgres. If the endpoint doesn't exist yet, it is created.")]
 struct EndpointStartCmdArgs {
-    /// Postgres endpoint ID.
+    #[clap(help = "Postgres endpoint id")]
    endpoint_id: String,
-    /// Pageserver ID.
    #[clap(long = "pageserver-id")]
    endpoint_pageserver_id: Option<NodeId>,
-    /// Safekeepers membership generation to prefix neon.safekeepers with.
-    #[clap(long)]
+
+    #[clap(
+        long,
+        help = "Safekeepers membership generation to prefix neon.safekeepers with. Normally neon_local sets it on its own, but this option allows to override. Non zero value forces endpoint to use membership configurations."
+    )]
    safekeepers_generation: Option<u32>,
-    /// List of safekeepers endpoint will talk to.
-    #[clap(long)]
+    #[clap(
+        long,
+        help = "List of safekeepers endpoint will talk to. Normally neon_local chooses them on its own, but this option allows to override."
+    )]
    safekeepers: Option<String>,
-    /// Configure the remote extensions storage proxy gateway URL to request for extensions.
-    #[clap(long, alias = "remote-ext-config")]
+
+    #[clap(
+        long,
+        help = "Configure the remote extensions storage proxy gateway URL to request for extensions.",
+        alias = "remote-ext-config"
+    )]
    remote_ext_base_url: Option<String>,
-    /// If set, will create test user `user` and `neondb` database. Requires `update-catalog = true`
-    #[clap(long)]
+
+    #[clap(
+        long,
+        help = "If set, will create test user `user` and `neondb` database. Requires `update-catalog = true`"
+    )]
    create_test_user: bool,
-    /// Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but
-    /// useful for tests.
-    #[clap(long)]
+
+    #[clap(
+        long,
+        help = "Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but useful for tests."
+    )]
    allow_multiple: bool,
-    /// Timeout until we fail the command.
-    #[clap(short = 't', long, value_parser= humantime::parse_duration)]
+
+    #[clap(short = 't', long, value_parser= humantime::parse_duration, help = "timeout until we fail the command")]
    #[arg(default_value = "90s")]
    start_timeout: Duration,

-    /// Download LFC cache from endpoint storage on endpoint startup
-    #[clap(long, default_value = "false")]
+    #[clap(
+        long,
+        help = "Download LFC cache from endpoint storage on endpoint startup",
+        default_value = "false"
+    )]
    autoprewarm: bool,

-    /// Upload LFC cache to endpoint storage periodically
-    #[clap(long)]
+    #[clap(long, help = "Upload LFC cache to endpoint storage periodically")]
    offload_lfc_interval_seconds: Option<std::num::NonZeroU64>,

-    /// Run in development mode, skipping VM-specific operations like process termination
-    #[clap(long, action = clap::ArgAction::SetTrue)]
+    #[clap(
+        long,
+        help = "Run in development mode, skipping VM-specific operations like process termination",
+        action = clap::ArgAction::SetTrue
+    )]
    dev: bool,
 }

-/// Reconfigure an endpoint.
 #[derive(clap::Args)]
+#[clap(about = "Reconfigure an endpoint")]
 struct EndpointReconfigureCmdArgs {
-    /// Tenant id. Represented as a hexadecimal string 32 symbols length
-    #[clap(long = "tenant-id")]
+    #[clap(
+        long = "tenant-id",
+        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
+    )]
    tenant_id: Option<TenantId>,
-    /// Postgres endpoint ID.
+
+    #[clap(help = "Postgres endpoint id")]
    endpoint_id: String,
-    /// Pageserver ID.
    #[clap(long = "pageserver-id")]
    endpoint_pageserver_id: Option<NodeId>,
+
    #[clap(long)]
    safekeepers: Option<String>,
 }

-/// Refresh the endpoint's configuration by forcing it reload it's spec
 #[derive(clap::Args)]
+#[clap(about = "Refresh the endpoint's configuration by forcing it reload it's spec")]
 struct EndpointRefreshConfigurationArgs {
-    /// Postgres endpoint id
+    #[clap(help = "Postgres endpoint id")]
    endpoint_id: String,
 }

-/// Stop an endpoint.
 #[derive(clap::Args)]
+#[clap(about = "Stop an endpoint")]
 struct EndpointStopCmdArgs {
-    /// Postgres endpoint ID.
+    #[clap(help = "Postgres endpoint id")]
    endpoint_id: String,
-    /// Also delete data directory (now optional, should be default in future).
-    #[clap(long)]
+
+    #[clap(
+        long,
+        help = "Also delete data directory (now optional, should be default in future)"
+    )]
    destroy: bool,

-    /// Postgres shutdown mode, passed to `pg_ctl -m <mode>`.
-    #[clap(long)]
+    #[clap(long, help = "Postgres shutdown mode")]
    #[clap(default_value = "fast")]
    mode: EndpointTerminateMode,
 }

-/// Update the pageservers in the spec file of the compute endpoint
 #[derive(clap::Args)]
+#[clap(about = "Update the pageservers in the spec file of the compute endpoint")]
 struct EndpointUpdatePageserversCmdArgs {
-    /// Postgres endpoint id
+    #[clap(help = "Postgres endpoint id")]
    endpoint_id: String,

-    /// Specified pageserver id
-    #[clap(short = 'p', long)]
+    #[clap(short = 'p', long, help = "Specified pageserver id")]
    pageserver_id: Option<NodeId>,
 }

-/// Generate a JWT for an endpoint.
 #[derive(clap::Args)]
+#[clap(about = "Generate a JWT for an endpoint")]
 struct EndpointGenerateJwtCmdArgs {
-    /// Postgres endpoint ID.
+    #[clap(help = "Postgres endpoint id")]
    endpoint_id: String,
-    /// Scope to generate the JWT with.
-    #[clap(short = 's', long, value_parser = ComputeClaimsScope::from_str)]
+
+    #[clap(short = 's', long, help = "Scope to generate the JWT with", value_parser = ComputeClaimsScope::from_str)]
    scope: Option<ComputeClaimsScope>,
 }

-/// Manage neon_local branch name mappings.
 #[derive(clap::Subcommand)]
+#[clap(about = "Manage neon_local branch name mappings")]
 enum MappingsCmd {
    Map(MappingsMapCmdArgs),
 }

-/// Create new mapping which cannot exist already.
 #[derive(clap::Args)]
+#[clap(about = "Create new mapping which cannot exist already")]
 struct MappingsMapCmdArgs {
-    /// Tenant ID, as a 32-byte hexadecimal string.
-    #[clap(long)]
+    #[clap(
+        long,
+        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
+    )]
    tenant_id: TenantId,
-    /// Timeline ID, as a 32-byte hexadecimal string.
-    #[clap(long)]
+    #[clap(
+        long,
+        help = "Timeline id. Represented as a hexadecimal string 32 symbols length"
+    )]
    timeline_id: TimelineId,
-    /// Branch name to give to the timeline.
-    #[clap(long)]
+    #[clap(long, help = "Branch name to give to the timeline")]
    branch_name: String,
 }

@@ -1009,7 +1089,8 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result<LocalEnv> {
            default_tenant_id: TenantId::from_array(std::array::from_fn(|_| 0)),
            storage_controller: None,
            control_plane_hooks_api: None,
-            generate_local_ssl_certs: false,
+            generate_local_tls_certs: false,
+            generate_compute_tls_certs: false,
        }
    };

--- a/control_plane/src/broker.rs
+++ b/control_plane/src/broker.rs
@@ -23,7 +23,7 @@ impl StorageBroker {
    }

    pub fn initialize(&self) -> anyhow::Result<()> {
-        if self.env.generate_local_ssl_certs {
+        if self.env.generate_local_tls_certs {
            self.env.generate_ssl_cert(
                &self.env.storage_broker_data_dir().join("server.crt"),
                &self.env.storage_broker_data_dir().join("server.key"),
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -54,7 +54,6 @@ use compute_api::requests::{
 };
 use compute_api::responses::{
    ComputeConfig, ComputeCtlConfig, ComputeStatus, ComputeStatusResponse, TerminateResponse,
-    TlsConfig,
 };
 use compute_api::spec::{
    Cluster, ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, Database, PageserverProtocol,
@@ -213,8 +212,13 @@ impl ComputeControlPlane {
        let internal_http_port = internal_http_port.unwrap_or_else(|| external_http_port + 1);
        let compute_ctl_config = ComputeCtlConfig {
            jwks: Self::create_jwks_from_pem(&self.env.read_public_key()?)?,
-            tls: None::<TlsConfig>,
+            tls: self.env.get_tls_config()?,
        };
+        let mut features = vec![];
+        if compute_ctl_config.tls.is_some() {
+            features.push(ComputeFeature::TlsExperimental);
+        }
+
        let ep = Arc::new(Endpoint {
            endpoint_id: endpoint_id.to_owned(),
            pg_address: SocketAddr::new(IpAddr::from(Ipv4Addr::LOCALHOST), pg_port),
@@ -241,7 +245,7 @@ impl ComputeControlPlane {
            drop_subscriptions_before_start,
            grpc,
            reconfigure_concurrency: 1,
-            features: vec![],
+            features: features.clone(),
            cluster: None,
            compute_ctl_config: compute_ctl_config.clone(),
            privileged_role_name: privileged_role_name.clone(),
@@ -263,7 +267,7 @@ impl ComputeControlPlane {
                skip_pg_catalog_updates,
                drop_subscriptions_before_start,
                reconfigure_concurrency: 1,
-                features: vec![],
+                features,
                cluster: None,
                compute_ctl_config,
                privileged_role_name,
@@ -953,7 +957,7 @@ impl Endpoint {
                            }
                            // keep retrying
                        }
-                        ComputeStatus::Running => {
+                        ComputeStatus::Reloading | ComputeStatus::Running => {
                            // All good!
                            break;
                        }
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -12,6 +12,7 @@ use std::{env, fs};

 use anyhow::{Context, bail};
 use clap::ValueEnum;
+use compute_api::responses::TlsConfig;
 use pageserver_api::config::PostHogConfig;
 use pem::Pem;
 use postgres_backend::AuthType;
@@ -95,7 +96,10 @@ pub struct LocalEnv {

    /// Flag to generate SSL certificates for components that need it.
    /// Also generates root CA certificate that is used to sign all other certificates.
-    pub generate_local_ssl_certs: bool,
+    pub generate_local_tls_certs: bool,
+
+    /// Flag to generate SSL certificates for compute.
+    pub generate_compute_tls_certs: bool,
 }

 /// On-disk state stored in `.neon/config`.
@@ -123,7 +127,11 @@ pub struct OnDiskConfig {
    // Note: skip serializing because in compat tests old storage controller fails
    // to load new config file. May be removed after this field is in release branch.
    #[serde(skip_serializing_if = "std::ops::Not::not")]
-    pub generate_local_ssl_certs: bool,
+    pub generate_local_tls_certs: bool,
+    // Note: skip serializing because in compat tests old storage controller fails
+    // to load new config file. May be removed after this field is in release branch.
+    #[serde(skip_serializing_if = "std::ops::Not::not")]
+    pub generate_compute_tls_certs: bool,
 }

 fn fail_if_pageservers_field_specified<'de, D>(_: D) -> Result<Vec<PageServerConf>, D::Error>
@@ -152,7 +160,8 @@ pub struct NeonLocalInitConf {
    pub endpoint_storage: EndpointStorageConf,
    pub control_plane_api: Option<Url>,
    pub control_plane_hooks_api: Option<Url>,
-    pub generate_local_ssl_certs: bool,
+    pub generate_local_tls_certs: bool,
+    pub generate_compute_tls_certs: bool,
 }

 #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
@@ -511,7 +520,7 @@ impl LocalEnv {
    }

    pub fn ssl_ca_cert_path(&self) -> Option<PathBuf> {
-        if self.generate_local_ssl_certs {
+        if self.generate_local_tls_certs {
            Some(self.base_data_dir.join("rootCA.crt"))
        } else {
            None
@@ -519,7 +528,7 @@ impl LocalEnv {
    }

    pub fn ssl_ca_key_path(&self) -> Option<PathBuf> {
-        if self.generate_local_ssl_certs {
+        if self.generate_local_tls_certs {
            Some(self.base_data_dir.join("rootCA.key"))
        } else {
            None
@@ -545,6 +554,33 @@ impl LocalEnv {
        )
    }

+    fn compute_ssl_paths(&self) -> Option<(PathBuf, PathBuf)> {
+        if self.generate_compute_tls_certs {
+            Some((
+                self.base_data_dir.join("compute_server.crt"),
+                self.base_data_dir.join("compute_server.key"),
+            ))
+        } else {
+            None
+        }
+    }
+
+    pub fn generate_compute_ssl_cert(&self) -> anyhow::Result<()> {
+        self.generate_ssl_ca_cert()?;
+
+        let (cert_path, key_path) = self.compute_ssl_paths().unwrap();
+        if !fs::exists(&cert_path)? {
+            generate_ssl_cert(
+                &cert_path,
+                &key_path,
+                self.ssl_ca_cert_path().unwrap().as_path(),
+                self.ssl_ca_key_path().unwrap().as_path(),
+            )?;
+        }
+
+        Ok(())
+    }
+
    /// Creates HTTP client with local SSL CA certificates.
    pub fn create_http_client(&self) -> reqwest::Client {
        let ssl_ca_certs = self.ssl_ca_cert_path().map(|ssl_ca_file| {
@@ -673,7 +709,8 @@ impl LocalEnv {
                control_plane_hooks_api,
                control_plane_compute_hook_api: _,
                branch_name_mappings,
-                generate_local_ssl_certs,
+                generate_local_tls_certs,
+                generate_compute_tls_certs,
                endpoint_storage,
            } = on_disk_config;
            LocalEnv {
@@ -690,7 +727,8 @@ impl LocalEnv {
                control_plane_api: control_plane_api.unwrap(),
                control_plane_hooks_api,
                branch_name_mappings,
-                generate_local_ssl_certs,
+                generate_local_tls_certs,
+                generate_compute_tls_certs,
                endpoint_storage,
            }
        };
@@ -806,7 +844,8 @@ impl LocalEnv {
                control_plane_hooks_api: self.control_plane_hooks_api.clone(),
                control_plane_compute_hook_api: None,
                branch_name_mappings: self.branch_name_mappings.clone(),
-                generate_local_ssl_certs: self.generate_local_ssl_certs,
+                generate_local_tls_certs: self.generate_local_tls_certs,
+                generate_compute_tls_certs: self.generate_compute_tls_certs,
                endpoint_storage: self.endpoint_storage.clone(),
            },
        )
@@ -861,6 +900,21 @@ impl LocalEnv {
        Ok(pem)
    }

+    /// Get the TLS config if set.
+    pub fn get_tls_config(&self) -> anyhow::Result<Option<TlsConfig>> {
+        match self.compute_ssl_paths() {
+            Some((cert_path, key_path)) => {
+                self.generate_compute_ssl_cert()?;
+
+                Ok(Some(TlsConfig {
+                    key_path: key_path.to_str().context("utf8")?.to_string(),
+                    cert_path: cert_path.to_str().context("utf8")?.to_string(),
+                }))
+            }
+            None => Ok(None),
+        }
+    }
+
    /// Materialize the [`NeonLocalInitConf`] to disk. Called during [`neon_local init`].
    pub fn init(conf: NeonLocalInitConf, force: &InitForceMode) -> anyhow::Result<()> {
        let base_path = base_path();
@@ -912,7 +966,8 @@ impl LocalEnv {
            pageservers,
            safekeepers,
            control_plane_api,
-            generate_local_ssl_certs,
+            generate_local_tls_certs,
+            generate_compute_tls_certs,
            control_plane_hooks_api,
            endpoint_storage,
        } = conf;
@@ -965,13 +1020,17 @@ impl LocalEnv {
            control_plane_api: control_plane_api.unwrap(),
            control_plane_hooks_api,
            branch_name_mappings: Default::default(),
-            generate_local_ssl_certs,
+            generate_local_tls_certs,
+            generate_compute_tls_certs,
            endpoint_storage,
        };

-        if generate_local_ssl_certs {
+        if generate_local_tls_certs {
            env.generate_ssl_ca_cert()?;
        }
+        if generate_compute_tls_certs {
+            env.generate_compute_ssl_cert()?;
+        }

        // create endpoints dir
        fs::create_dir_all(env.endpoints_path())?;
@@ -1074,7 +1133,7 @@ fn generate_auth_keys(private_key_path: &Path, public_key_path: &Path) -> anyhow
 }

 fn generate_ssl_ca_cert(cert_path: &Path, key_path: &Path) -> anyhow::Result<()> {
-    // openssl req -x509 -newkey ed25519 -nodes -subj "/CN=Neon Local CA" -days 36500 \
+    // openssl req -x509 -newkey rsa:2048 -nodes -subj "/CN=Neon Local CA" -days 36500 \
    // -out rootCA.crt -keyout rootCA.key
    let keygen_output = Command::new("openssl")
        .args([
@@ -1104,7 +1163,7 @@ fn generate_ssl_cert(
    let mut csr_path = cert_path.to_path_buf();
    csr_path.set_extension(".csr");

-    // openssl req -new -nodes -newkey ed25519 -keyout server.key -out server.csr \
+    // openssl req -new -nodes -newkey rsa:2048 -keyout server.key -out server.csr \
    // -subj "/CN=localhost" -addext "subjectAltName=DNS:localhost,IP:127.0.0.1"
    let keygen_output = Command::new("openssl")
        .args(["req", "-new", "-nodes"])
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -233,15 +233,15 @@ impl PageServerNode {
        let mut identity_file = std::fs::OpenOptions::new()
            .create_new(true)
            .write(true)
-            .open(&identity_file_path)
-            .with_context(|| format!("open identity toml for write: {identity_file_path:?}"))?;
+            .open(identity_file_path)
+            .with_context(|| format!("open identity toml for write: {config_file_path:?}"))?;
        let identity_toml = self.pageserver_make_identity_toml(node_id);
        identity_file
            .write_all(identity_toml.to_string().as_bytes())
            .context("write identity toml")?;
        drop(identity_toml);

-        if self.env.generate_local_ssl_certs {
+        if self.env.generate_local_tls_certs {
            self.env.generate_ssl_cert(
                datadir.join("server.crt").as_path(),
                datadir.join("server.key").as_path(),
@@ -560,12 +560,12 @@ impl PageServerNode {
                .remove("sampling_ratio")
                .map(serde_json::from_str)
                .transpose()
-                .context("Failed to parse 'sampling_ratio'")?,
+                .context("Falied to parse 'sampling_ratio'")?,
            relsize_snapshot_cache_capacity: settings
                .remove("relsize snapshot cache capacity")
                .map(|x| x.parse::<usize>())
                .transpose()
-                .context("Failed to parse 'relsize_snapshot_cache_capacity' as integer")?,
+                .context("Falied to parse 'relsize_snapshot_cache_capacity' as integer")?,
            basebackup_cache_enabled: settings
                .remove("basebackup_cache_enabled")
                .map(|x| x.parse::<bool>())
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -102,7 +102,7 @@ impl SafekeeperNode {
    /// Initializes a safekeeper node by creating all necessary files,
    /// e.g. SSL certificates and JWT token file.
    pub fn initialize(&self) -> anyhow::Result<()> {
-        if self.env.generate_local_ssl_certs {
+        if self.env.generate_local_tls_certs {
            self.env.generate_ssl_cert(
                &self.datadir_path().join("server.crt"),
                &self.datadir_path().join("server.key"),
--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -353,7 +353,7 @@ impl StorageController {
            }
        }

-        if self.env.generate_local_ssl_certs {
+        if self.env.generate_local_tls_certs {
            self.env.generate_ssl_cert(
                &instance_dir.join("server.crt"),
                &instance_dir.join("server.key"),
--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -303,13 +303,6 @@ enum Command {
        #[arg(long, required = true, value_delimiter = ',')]
        new_sk_set: Vec<NodeId>,
    },
-    /// Abort ongoing safekeeper migration.
-    TimelineSafekeeperMigrateAbort {
-        #[arg(long)]
-        tenant_id: TenantId,
-        #[arg(long)]
-        timeline_id: TimelineId,
-    },
 }

 #[derive(Parser)]
@@ -1403,17 +1396,6 @@ async fn main() -> anyhow::Result<()> {
                )
                .await?;
        }
-        Command::TimelineSafekeeperMigrateAbort {
-            tenant_id,
-            timeline_id,
-        } => {
-            let path =
-                format!("v1/tenant/{tenant_id}/timeline/{timeline_id}/safekeeper_migrate_abort");
-
-            storcon_client
-                .dispatch::<(), ()>(Method::POST, path, None)
-                .await?;
-        }
    }

    Ok(())
--- a/docs/rfcs/2025-07-07-node-deletion-api-improvement.md
+++ b/docs/rfcs/2025-07-07-node-deletion-api-improvement.md
@@ -1,246 +0,0 @@
-# Node deletion API improvement
-
-Created on 2025-07-07
-Implemented on _TBD_
-
-## Summary
-
-This RFC describes improvements to the storage controller API for gracefully deleting pageserver
-nodes.
-
-## Motivation
-
-The basic node deletion API introduced in [#8226](https://github.com/neondatabase/neon/issues/8333)
-has several limitations:
-
- Deleted nodes can re-add themselves if they restart (e.g., a flaky node that keeps restarting and
-we cannot reach via SSH to stop the pageserver). This issue has been resolved by tombstone
-mechanism in [#12036](https://github.com/neondatabase/neon/issues/12036)
- Process of node deletion is not graceful, i.e. it just imitates a node failure
-
-In this context, "graceful" node deletion means that users do not experience any disruption or
-negative effects, provided the system remains in a healthy state (i.e., the remaining pageservers
-can handle the workload and all requirements are met). To achieve this, the system must perform
-live migration of all tenant shards from the node being deleted while the node is still running
-and continue processing all incoming requests. The node is removed only after all tenant shards
-have been safely migrated.
-
-Although live migrations can be achieved with the drain functionality, it leads to incorrect shard
-placement, such as not matching availability zones. This results in unnecessary work to optimize
-the placement that was just recently performed.
-
-If we delete a node before its tenant shards are fully moved, the new node won't have all the
-needed data (e.g. heatmaps) ready. This means user requests to the new node will be much slower at
-first. If there are many tenant shards, this slowdown affects a huge amount of users.
-
-Graceful node deletion is more complicated and can introduce new issues. It takes longer because
-live migration of each tenant shard can last several minutes. Using non-blocking accessors may
-also cause deletion to wait if other processes are holding inner state lock. It also gets trickier
-because we need to handle other requests, like drain and fill, at the same time.
-
-## Impacted components (e.g. pageserver, safekeeper, console, etc)
-
- storage controller
- pageserver (indirectly)
-
-## Proposed implementation
-
-### Tombstones
-
-To resolve the problem of deleted nodes re-adding themselves, a tombstone mechanism was introduced
-as part of the node stored information. Each node has a separate `NodeLifecycle` field with two
-possible states: `Active` and `Deleted`. When node deletion completes, the database row is not
-deleted but instead has its `NodeLifecycle` column switched to `Deleted`. Nodes with `Deleted`
-lifecycle are treated as if the row is absent for most handlers, with several exceptions: reattach
-and register functionality must be aware of tombstones. Additionally, new debug handlers are
-available for listing and deleting tombstones via the `/debug/v1/tombstone` path.
-
-### Gracefulness
-
-The problem of making node deletion graceful is complex and involves several challenges:
-
- **Cancellable**: The operation must be cancellable to allow administrators to abort the process
-if needed, e.g. if run by mistake.
- **Non-blocking**: We don't want to block deployment operations like draining/filling on the node
-deletion process. We need clear policies for handling concurrent operations: what happens when a
-drain/fill request arrives while deletion is in progress, and what happens when a delete request
-arrives while drain/fill is in progress.
- **Persistent**: If the storage controller restarts during this long-running operation, we must
-preserve progress and automatically resume the deletion process after the storage controller
-restarts.
- **Migrated correctly**: We cannot simply use the existing drain mechanism for nodes scheduled
-for deletion, as this would move shards to irrelevant locations. The drain process expects the
-node to return, so it only moves shards to backup locations, not to their preferred AZs. It also
-leaves secondary locations unmoved. This could result in unnecessary load on the storage
-controller and inefficient resource utilization.
- **Force option**: Administrators need the ability to force immediate, non-graceful deletion when
-time constraints or emergency situations require it, bypassing the normal graceful migration
-process.
-
-See below for a detailed breakdown of the proposed changes and mechanisms.
-
-#### Node lifecycle
-
-New `NodeLifecycle` enum and a matching database field with these values:
- `Active`: The normal state. All operations are allowed.
- `ScheduledForDeletion`: The node is marked to be deleted soon. Deletion may be in progress or
-will happen later, but the node will eventually be removed. All operations are allowed.
- `Deleted`: The node is fully deleted. No operations are allowed, and the node cannot be brought
-back. The only action left is to remove its record from the database. Any attempt to register a
-node in this state will fail.
-
-This state persists across storage controller restarts.
-
-**State transition**
-```
-        +--------------------+
-    +---|       Active       |<---------------------+
-    |   +--------------------+                      |
-    |                     ^                         |
-    | start_node_delete   | cancel_node_delete      |
-    v                     |                         |
-  +----------------------------------+              |
-  |       ScheduledForDeletion       |              |
-  +----------------------------------+              |
-       |                                            |
-       |                              node_register |
-       |                                            |
-       | delete_node (at the finish)                |
-       |                                            |
-       v                                            |
-  +---------+         tombstone_delete        +----------+
-  | Deleted |-------------------------------->|  no row  |
-  +---------+                                 +----------+
-```
-
-#### NodeSchedulingPolicy::Deleting
-
-A `Deleting` variant to the `NodeSchedulingPolicy` enum. This means the deletion function is
-running for the node right now. Only one node can have the `Deleting` policy at a time.
-
-The `NodeSchedulingPolicy::Deleting` state is persisted in the database. However, after a storage
-controller restart, any node previously marked as `Deleting` will have its scheduling policy reset
-to `Pause`. The policy will only transition back to `Deleting` when the deletion operation is
-actively started again, as triggered by the node's `NodeLifecycle::ScheduledForDeletion` state.
-
-`NodeSchedulingPolicy` transition details:
-1. When `node_delete` begins, set the policy to `NodeSchedulingPolicy::Deleting`.
-2. If `node_delete` is cancelled (for example, due to a concurrent drain operation), revert the
-policy to its previous value. The policy is persisted in storcon DB.
-3. After `node_delete` completes, the final value of the scheduling policy is irrelevant, since
-`NodeLifecycle::Deleted` prevents any further access to this field.
-
-The deletion process cannot be initiated for nodes currently undergoing deployment-related
-operations (`Draining`, `Filling`, or `PauseForRestart` policies). Deletion will only be triggered
-once the node transitions to either the `Active` or `Pause` state.
-
-#### OperationTracker
-
-A replacement for `Option<OperationHandler> ongoing_operation`, the `OperationTracker` is a
-dedicated service state object responsible for managing all long-running node operations (drain,
-fill, delete) with robust concurrency control.
-
-Key responsibilities:
- Orchestrates the execution of operations
- Supports cancellation of currently running operations
- Enforces operation constraints, e.g. allowing only single drain/fill operation at a time
- Persists deletion state, enabling recovery of pending deletions across restarts
- Ensures thread safety across concurrent requests
-
-#### Attached tenant shard processing
-
-When deleting a node, handle each attached tenant shard as follows:
-
-1. Pick the best node to become the new attached (the candidate).
-2. If the candidate already has this shard as a secondary:
-    - Create a new secondary for the shard on another suitable node.
-   Otherwise:
-    - Create a secondary for the shard on the candidate node.
-3. Wait until all secondaries are ready and pre-warmed.
-4. Promote the candidate's secondary to attached.
-5. Remove the secondary from the node being deleted.
-
-This process safely moves all attached shards before deleting the node.
-
-#### Secondary tenant shard processing
-
-When deleting a node, handle each secondary tenant shard as follows:
-
-1. Choose the best node to become the new secondary.
-2. Create a secondary for the shard on that node.
-3. Wait until the new secondary is ready.
-4. Remove the secondary from the node being deleted.
-
-This ensures all secondary shards are safely moved before deleting the node.
-
-### Reliability, failure modes and corner cases
-
-In case of a storage controller failure and following restart, the system behavior depends on the
-`NodeLifecycle` state:
-
- If `NodeLifecycle` is `Active`: No action is taken for this node.
- If `NodeLifecycle` is `Deleted`: The node will not be re-added.
- If `NodeLifecycle` is `ScheduledForDeletion`: A deletion background task will be launched for
-this node.
-
-In case of a pageserver node failure during deletion, the behavior depends on the `force` flag:
- If `force` is set: The node deletion will proceed regardless of the node's availability.
- If `force` is not set: The deletion will be retried a limited number of times. If the node
-remains unavailable, the deletion process will pause and automatically resume when the node
-becomes healthy again.
-
-### Operations concurrency
-
-The following sections describe the behavior when different types of requests arrive at the storage
-controller and how they interact with ongoing operations.
-
-#### Delete request
-
-Handler: `PUT /control/v1/node/:node_id/delete`
-
-1. If node lifecycle is `NodeLifecycle::ScheduledForDeletion`:
-    - Return `200 OK`: there is already an ongoing deletion request for this node
-2. Update & persist lifecycle to `NodeLifecycle::ScheduledForDeletion`
-3. Persist current scheduling policy
-4. If there is no active operation (drain/fill/delete):
-    - Run deletion process for this node
-
-#### Cancel delete request
-
-Handler: `DELETE /control/v1/node/:node_id/delete`
-
-1. If node lifecycle is not `NodeLifecycle::ScheduledForDeletion`:
-    - Return `404 Not Found`: there is no current deletion request for this node
-2. If the active operation is deleting this node, cancel it
-3. Update & persist lifecycle to `NodeLifecycle::Active`
-4. Restore the last scheduling policy from persistence
-
-#### Drain/fill request
-
-1. If there are already ongoing drain/fill processes:
-    - Return `409 Conflict`: queueing of drain/fill processes is not supported
-2. If there is an ongoing delete process:
-    - Cancel it and wait until it is cancelled
-3. Run the drain/fill process
-4. After the drain/fill process is cancelled or finished:
-    - Try to find another candidate to delete and run the deletion process for that node
-
-#### Drain/fill cancel request
-
-1. If the active operation is not the related process:
-    - Return `400 Bad Request`: cancellation request is incorrect, operations are not the same
-2. Cancel the active operation
-3. Try to find another candidate to delete and run the deletion process for that node
-
-## Definition of Done
-
- [x] Fix flaky node scenario and introduce related debug handlers
- [ ] Node deletion intent is persistent - a node will be eventually deleted after a deletion
-request regardless of draining/filling requests and restarts
- [ ] Node deletion can be graceful - deletion completes only after moving all tenant shards to
-recommended locations
- [ ] Deploying does not break due to long deletions - drain/fill operations override deletion
-process and deletion resumes after drain/fill completes
- [ ] `force` flag is implemented and provides fast, failure-tolerant node removal (e.g., when a
-pageserver node does not respond)
- [ ] Legacy delete handler code is removed from storage_controller, test_runner, and storcon_cli
--- a/libs/compute_api/src/responses.rs
+++ b/libs/compute_api/src/responses.rs
@@ -1,9 +1,10 @@
 //! Structs representing the JSON formats used in the compute_ctl's HTTP API.

+use std::fmt::Display;
+
 use chrono::{DateTime, Utc};
 use jsonwebtoken::jwk::JwkSet;
 use serde::{Deserialize, Serialize, Serializer};
-use std::fmt::Display;

 use crate::privilege::Privilege;
 use crate::spec::{ComputeSpec, Database, ExtVersion, PgIdent, Role};
@@ -26,7 +27,6 @@ pub struct ComputeConfig {
    pub spec: Option<ComputeSpec>,

    /// The compute_ctl configuration
-    #[allow(dead_code)]
    pub compute_ctl_config: ComputeCtlConfig,
 }

@@ -48,7 +48,7 @@ pub struct ExtensionInstallResponse {
 /// Status of the LFC prewarm process. The same state machine is reused for
 /// both autoprewarm (prewarm after compute/Postgres start using the previously
 /// stored LFC state) and explicit prewarming via API.
-#[derive(Serialize, Default, Debug, Clone)]
+#[derive(Serialize, Default, Debug, Clone, PartialEq)]
 #[serde(tag = "status", rename_all = "snake_case")]
 pub enum LfcPrewarmState {
    /// Default value when compute boots up.
@@ -58,14 +58,7 @@ pub enum LfcPrewarmState {
    Prewarming,
    /// We found requested LFC state in the endpoint storage and
    /// completed prewarming successfully.
-    Completed {
-        total: i32,
-        prewarmed: i32,
-        skipped: i32,
-        state_download_time_ms: u32,
-        uncompress_time_ms: u32,
-        prewarm_time_ms: u32,
-    },
+    Completed,
    /// Unexpected error happened during prewarming. Note, `Not Found 404`
    /// response from the endpoint storage is explicitly excluded here
    /// because it can normally happen on the first compute start,
@@ -74,15 +67,11 @@ pub enum LfcPrewarmState {
    /// We tried to fetch the corresponding LFC state from the endpoint storage,
    /// but received `Not Found 404`. This should normally happen only during the
    /// first endpoint start after creation with `autoprewarm: true`.
-    /// This may also happen if LFC is turned off or not initialized
    ///
    /// During the orchestrated prewarm via API, when a caller explicitly
    /// provides the LFC state key to prewarm from, it's the caller responsibility
    /// to handle this status as an error state in this case.
    Skipped,
-    /// LFC prewarm was cancelled. Some pages in LFC cache may be prewarmed if query
-    /// has started working before cancellation
-    Cancelled,
 }

 impl Display for LfcPrewarmState {
@@ -90,44 +79,32 @@ impl Display for LfcPrewarmState {
        match self {
            LfcPrewarmState::NotPrewarmed => f.write_str("NotPrewarmed"),
            LfcPrewarmState::Prewarming => f.write_str("Prewarming"),
-            LfcPrewarmState::Completed { .. } => f.write_str("Completed"),
+            LfcPrewarmState::Completed => f.write_str("Completed"),
            LfcPrewarmState::Skipped => f.write_str("Skipped"),
            LfcPrewarmState::Failed { error } => write!(f, "Error({error})"),
-            LfcPrewarmState::Cancelled => f.write_str("Cancelled"),
        }
    }
 }

-#[derive(Serialize, Default, Debug, Clone)]
+#[derive(Serialize, Default, Debug, Clone, PartialEq)]
 #[serde(tag = "status", rename_all = "snake_case")]
 pub enum LfcOffloadState {
    #[default]
    NotOffloaded,
    Offloading,
-    Completed {
-        state_query_time_ms: u32,
-        compress_time_ms: u32,
-        state_upload_time_ms: u32,
-    },
+    Completed,
    Failed {
        error: String,
    },
-    /// LFC state was empty so it wasn't offloaded
-    Skipped,
 }

-#[derive(Serialize, Debug, Clone)]
+#[derive(Serialize, Debug, Clone, PartialEq)]
 #[serde(tag = "status", rename_all = "snake_case")]
+/// Response of /promote
 pub enum PromoteState {
    NotPromoted,
-    Completed {
-        lsn_wait_time_ms: u32,
-        pg_promote_time_ms: u32,
-        reconfigure_time_ms: u32,
-    },
-    Failed {
-        error: String,
-    },
+    Completed,
+    Failed { error: String },
 }

 #[derive(Deserialize, Default, Debug)]
@@ -177,6 +154,8 @@ pub enum ComputeStatus {
    Empty,
    // Compute configuration was requested.
    ConfigurationPending,
+    // Postgres, pgbouncer, and local_proxy is currently being reloaded.
+    Reloading,
    // Compute node has spec and initial startup and
    // configuration is in progress.
    Init,
@@ -211,6 +190,7 @@ impl Display for ComputeStatus {
        match self {
            ComputeStatus::Empty => f.write_str("empty"),
            ComputeStatus::ConfigurationPending => f.write_str("configuration-pending"),
+            ComputeStatus::Reloading => f.write_str("reloading"),
            ComputeStatus::RefreshConfiguration => f.write_str("refresh-configuration"),
            ComputeStatus::RefreshConfigurationPending => {
                f.write_str("refresh-configuration-pending")
--- a/libs/pageserver_api/Cargo.toml
+++ b/libs/pageserver_api/Cargo.toml
@@ -5,12 +5,8 @@ edition = "2024"
 license.workspace = true

 [features]
-default = ["io-align-512"]
 # See pageserver/Cargo.toml
 testing = ["dep:nix"]
-# Direct IO alignment options (mutually exclusive)
-io-align-512 = []
-io-align-4k = []

 [dependencies]
 serde.workspace = true
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -703,11 +703,6 @@ pub mod defaults {

    pub const DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB: usize = 0;

-    #[cfg(feature = "io-align-4k")]
-    pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 4096;
-    #[cfg(all(feature = "io-align-512", not(feature = "io-align-4k")))]
-    pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 512;
-    #[cfg(not(any(feature = "io-align-512", feature = "io-align-4k")))]
    pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 512;

    pub const DEFAULT_SSL_KEY_FILE: &str = "server.key";
--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -19,8 +19,7 @@ camino = { workspace = true, features = ["serde1"] }
 humantime-serde.workspace = true
 hyper = { workspace = true, features = ["client"] }
 futures.workspace = true
-reqwest = { workspace = true, features = ["multipart", "stream"] }
-chrono = { version = "0.4", default-features = false, features = ["clock"] }
+reqwest.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 tokio = { workspace = true, features = ["sync", "fs", "io-util"] }
@@ -42,10 +41,6 @@ http-types.workspace = true
 http-body-util.workspace = true
 itertools.workspace = true
 sync_wrapper = { workspace = true, features = ["futures"] }
-gcp_auth = "0.12.3"
-url.workspace = true
-http.workspace = true
-uuid.workspace = true

 byteorder = "1.4"
 rand.workspace = true
--- a/libs/remote_storage/src/config.rs
+++ b/libs/remote_storage/src/config.rs
@@ -41,7 +41,6 @@ impl RemoteStorageKind {
            RemoteStorageKind::LocalFs { .. } => None,
            RemoteStorageKind::AwsS3(config) => Some(&config.bucket_name),
            RemoteStorageKind::AzureContainer(config) => Some(&config.container_name),
-            RemoteStorageKind::GCS(config) => Some(&config.bucket_name),
        }
    }
 }
@@ -52,7 +51,6 @@ impl RemoteStorageConfig {
        match &self.storage {
            RemoteStorageKind::LocalFs { .. } => DEFAULT_REMOTE_STORAGE_LOCALFS_CONCURRENCY_LIMIT,
            RemoteStorageKind::AwsS3(c) => c.concurrency_limit.into(),
-            RemoteStorageKind::GCS(c) => c.concurrency_limit.into(),
            RemoteStorageKind::AzureContainer(c) => c.concurrency_limit.into(),
        }
    }
@@ -87,9 +85,6 @@ pub enum RemoteStorageKind {
    /// Azure Blob based storage, storing all files in the container
    /// specified by the config
    AzureContainer(AzureConfig),
-    /// Google Cloud based storage, storing all files in the GCS bucket
-    /// specified by the config
-    GCS(GCSConfig),
 }

 #[derive(Deserialize)]
@@ -181,32 +176,6 @@ impl Debug for S3Config {
    }
 }

-#[derive(Clone, PartialEq, Eq, Deserialize, Serialize)]
-pub struct GCSConfig {
-    /// Name of the bucket to connect to.
-    pub bucket_name: String,
-    /// A "subfolder" in the bucket, to use the same bucket separately by multiple remote storage users at once.
-    pub prefix_in_bucket: Option<String>,
-    #[serde(default = "default_remote_storage_s3_concurrency_limit")]
-    pub concurrency_limit: NonZeroUsize,
-    #[serde(default = "default_max_keys_per_list_response")]
-    pub max_keys_per_list_response: Option<i32>,
-}
-
-impl Debug for GCSConfig {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("GCSConfig")
-            .field("bucket_name", &self.bucket_name)
-            .field("prefix_in_bucket", &self.prefix_in_bucket)
-            .field("concurrency_limit", &self.concurrency_limit)
-            .field(
-                "max_keys_per_list_response",
-                &self.max_keys_per_list_response,
-            )
-            .finish()
-    }
-}
-
 /// Azure  bucket coordinates and access credentials to manage the bucket contents (read and write).
 #[derive(Clone, PartialEq, Eq, Serialize, Deserialize)]
 pub struct AzureConfig {
@@ -335,30 +304,6 @@ timeout = '5s'";
        );
    }

-    #[test]
-    fn test_gcs_parsing() {
-        let toml = "\
-    bucket_name = 'foo-bar'
-    prefix_in_bucket = 'pageserver/'
-    ";
-
-        let config = parse(toml).unwrap();
-
-        assert_eq!(
-            config,
-            RemoteStorageConfig {
-                storage: RemoteStorageKind::GCS(GCSConfig {
-                    bucket_name: "foo-bar".into(),
-                    prefix_in_bucket: Some("pageserver/".into()),
-                    max_keys_per_list_response: DEFAULT_MAX_KEYS_PER_LIST_RESPONSE,
-                    concurrency_limit: std::num::NonZero::new(100).unwrap(),
-                }),
-                timeout: Duration::from_secs(120),
-                small_timeout: RemoteStorageConfig::DEFAULT_SMALL_TIMEOUT
-            }
-        );
-    }
-
    #[test]
    fn test_s3_parsing() {
        let toml = "\
--- a/libs/remote_storage/src/gcs_bucket.rs
+++ b/libs/remote_storage/src/gcs_bucket.rs
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -12,7 +12,6 @@
 mod azure_blob;
 mod config;
 mod error;
-mod gcs_bucket;
 mod local_fs;
 mod metrics;
 mod s3_bucket;
@@ -44,11 +43,10 @@ use tokio_util::sync::CancellationToken;
 use tracing::info;

 pub use self::azure_blob::AzureBlobStorage;
-pub use self::gcs_bucket::GCSBucket;
 pub use self::local_fs::LocalFs;
 pub use self::s3_bucket::S3Bucket;
 pub use self::simulate_failures::UnreliableWrapper;
-pub use crate::config::{AzureConfig, GCSConfig, RemoteStorageConfig, RemoteStorageKind, S3Config};
+pub use crate::config::{AzureConfig, RemoteStorageConfig, RemoteStorageKind, S3Config};

 /// Default concurrency limit for S3 operations
 ///
@@ -83,12 +81,8 @@ pub const MAX_KEYS_PER_DELETE_S3: usize = 1000;
 /// <https://learn.microsoft.com/en-us/rest/api/storageservices/blob-batch>
 pub const MAX_KEYS_PER_DELETE_AZURE: usize = 256;

-pub const MAX_KEYS_PER_DELETE_GCS: usize = 1000;
-
 const REMOTE_STORAGE_PREFIX_SEPARATOR: char = '/';

-const GCS_SCOPES: &[&str] = &["https://www.googleapis.com/auth/cloud-platform"];
-
 /// Path on the remote storage, relative to some inner prefix.
 /// The prefix is an implementation detail, that allows representing local paths
 /// as the remote ones, stripping the local storage prefix away.
@@ -188,7 +182,6 @@ pub struct VersionListing {
    pub versions: Vec<Version>,
 }

-#[derive(Debug)]
 pub struct Version {
    pub key: RemotePath,
    pub last_modified: SystemTime,
@@ -210,47 +203,6 @@ pub enum VersionKind {
    Version(VersionId),
 }

-// I was going to do an `enum GenericVersion` but this feels cleaner.
-#[derive(Default)]
-pub struct GCSVersionListing {
-    pub versions: Vec<GCSVersion>,
-}
-
-#[derive(Debug)]
-pub struct GCSVersion {
-    pub key: RemotePath,
-    pub last_modified: SystemTime,
-    pub id: VersionId,
-    pub time_deleted: Option<SystemTime>,
-}
-
-impl From<GCSVersionListing> for VersionListing {
-    fn from(gcs_listing: GCSVersionListing) -> Self {
-        let version_listing = gcs_listing
-            .versions
-            .into_iter()
-            .map(
-                |GCSVersion {
-                     key,
-                     last_modified,
-                     id,
-                     ..
-                 }| {
-                    Version {
-                        key,
-                        last_modified,
-                        kind: VersionKind::Version(VersionId(id.0)),
-                    }
-                },
-            )
-            .collect::<Vec<Version>>();
-
-        VersionListing {
-            versions: version_listing,
-        }
-    }
-}
-
 /// Options for downloads. The default value is a plain GET.
 pub struct DownloadOpts {
    /// If given, returns [`DownloadError::Unmodified`] if the object still has
@@ -529,7 +481,6 @@ pub enum GenericRemoteStorage<Other: Clone = Arc<UnreliableWrapper>> {
    AwsS3(Arc<S3Bucket>),
    AzureBlob(Arc<AzureBlobStorage>),
    Unreliable(Other),
-    GCS(Arc<GCSBucket>),
 }

 impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
@@ -546,7 +497,6 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
            Self::AwsS3(s) => s.list(prefix, mode, max_keys, cancel).await,
            Self::AzureBlob(s) => s.list(prefix, mode, max_keys, cancel).await,
            Self::Unreliable(s) => s.list(prefix, mode, max_keys, cancel).await,
-            Self::GCS(s) => s.list(prefix, mode, max_keys, cancel).await,
        }
    }

@@ -564,7 +514,6 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
            Self::AwsS3(s) => Box::pin(s.list_streaming(prefix, mode, max_keys, cancel)),
            Self::AzureBlob(s) => Box::pin(s.list_streaming(prefix, mode, max_keys, cancel)),
            Self::Unreliable(s) => Box::pin(s.list_streaming(prefix, mode, max_keys, cancel)),
-            Self::GCS(s) => Box::pin(s.list_streaming(prefix, mode, max_keys, cancel)),
        }
    }

@@ -581,7 +530,6 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
            Self::AwsS3(s) => s.list_versions(prefix, mode, max_keys, cancel).await,
            Self::AzureBlob(s) => s.list_versions(prefix, mode, max_keys, cancel).await,
            Self::Unreliable(s) => s.list_versions(prefix, mode, max_keys, cancel).await,
-            Self::GCS(s) => s.list_versions(prefix, mode, max_keys, cancel).await,
        }
    }

@@ -596,7 +544,6 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
            Self::AwsS3(s) => s.head_object(key, cancel).await,
            Self::AzureBlob(s) => s.head_object(key, cancel).await,
            Self::Unreliable(s) => s.head_object(key, cancel).await,
-            Self::GCS(s) => s.head_object(key, cancel).await,
        }
    }

@@ -614,7 +561,6 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
            Self::AwsS3(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,
            Self::AzureBlob(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,
            Self::Unreliable(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,
-            Self::GCS(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,
        }
    }

@@ -630,7 +576,6 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
            Self::AwsS3(s) => s.download(from, opts, cancel).await,
            Self::AzureBlob(s) => s.download(from, opts, cancel).await,
            Self::Unreliable(s) => s.download(from, opts, cancel).await,
-            Self::GCS(s) => s.download(from, opts, cancel).await,
        }
    }

@@ -645,7 +590,6 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
            Self::AwsS3(s) => s.delete(path, cancel).await,
            Self::AzureBlob(s) => s.delete(path, cancel).await,
            Self::Unreliable(s) => s.delete(path, cancel).await,
-            Self::GCS(s) => s.delete(path, cancel).await,
        }
    }

@@ -660,7 +604,6 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
            Self::AwsS3(s) => s.delete_objects(paths, cancel).await,
            Self::AzureBlob(s) => s.delete_objects(paths, cancel).await,
            Self::Unreliable(s) => s.delete_objects(paths, cancel).await,
-            Self::GCS(s) => s.delete_objects(paths, cancel).await,
        }
    }

@@ -671,7 +614,6 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
            Self::AwsS3(s) => s.max_keys_per_delete(),
            Self::AzureBlob(s) => s.max_keys_per_delete(),
            Self::Unreliable(s) => s.max_keys_per_delete(),
-            Self::GCS(s) => s.max_keys_per_delete(),
        }
    }

@@ -686,7 +628,6 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
            Self::AwsS3(s) => s.delete_prefix(prefix, cancel).await,
            Self::AzureBlob(s) => s.delete_prefix(prefix, cancel).await,
            Self::Unreliable(s) => s.delete_prefix(prefix, cancel).await,
-            Self::GCS(s) => s.delete_prefix(prefix, cancel).await,
        }
    }

@@ -702,7 +643,6 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
            Self::AwsS3(s) => s.copy(from, to, cancel).await,
            Self::AzureBlob(s) => s.copy(from, to, cancel).await,
            Self::Unreliable(s) => s.copy(from, to, cancel).await,
-            Self::GCS(s) => s.copy(from, to, cancel).await,
        }
    }

@@ -732,10 +672,6 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
                s.time_travel_recover(prefix, timestamp, done_if_after, cancel, complexity_limit)
                    .await
            }
-            Self::GCS(s) => {
-                s.time_travel_recover(prefix, timestamp, done_if_after, cancel, complexity_limit)
-                    .await
-            }
        }
    }
 }
@@ -751,18 +687,11 @@ impl GenericRemoteStorage {
    }

    pub async fn from_config(storage_config: &RemoteStorageConfig) -> anyhow::Result<Self> {
-        info!("RemoteStorageConfig: {:?}", storage_config);
-
        let timeout = storage_config.timeout;

-        // If someone overrides timeout to be small without adjusting small_timeout, then adjust it automatically
+        // If somkeone overrides timeout to be small without adjusting small_timeout, then adjust it automatically
        let small_timeout = std::cmp::min(storage_config.small_timeout, timeout);

-        info!(
-            "RemoteStorageConfig's storage attribute: {:?}",
-            storage_config.storage
-        );
-
        Ok(match &storage_config.storage {
            RemoteStorageKind::LocalFs { local_path: path } => {
                info!("Using fs root '{path}' as a remote storage");
@@ -800,16 +729,6 @@ impl GenericRemoteStorage {
                    small_timeout,
                )?))
            }
-            RemoteStorageKind::GCS(gcs_config) => {
-                let google_application_credentials =
-                    std::env::var("GOOGLE_APPLICATION_CREDENTIALS")
-                        .unwrap_or_else(|_| "<none>".into());
-                info!(
-                    "Using gcs bucket '{}' as a remote storage, prefix in bucket: '{:?}', GOOGLE_APPLICATION_CREDENTIALS: {google_application_credentials }",
-                    gcs_config.bucket_name, gcs_config.prefix_in_bucket
-                );
-                Self::GCS(Arc::new(GCSBucket::new(gcs_config, timeout).await?))
-            }
        })
    }

@@ -845,7 +764,6 @@ impl GenericRemoteStorage {
            Self::AwsS3(s) => Some(s.bucket_name()),
            Self::AzureBlob(s) => Some(s.container_name()),
            Self::Unreliable(_s) => None,
-            Self::GCS(s) => Some(s.bucket_name()),
        }
    }
 }
--- a/libs/remote_storage/src/simulate_failures.rs
+++ b/libs/remote_storage/src/simulate_failures.rs
@@ -63,7 +63,6 @@ impl UnreliableWrapper {
            GenericRemoteStorage::Unreliable(_s) => {
                panic!("Can't wrap unreliable wrapper unreliably")
            }
-            GenericRemoteStorage::GCS(s) => GenericRemoteStorage::GCS(s),
        };
        let actual_attempt_failure_probability = cmp::min(attempt_failure_probability, 100);
        UnreliableWrapper {
--- a/libs/remote_storage/tests/test_real_gcs.rs
+++ b/libs/remote_storage/tests/test_real_gcs.rs
@@ -1,277 +0,0 @@
-#![allow(dead_code)]
-#![allow(unused)]
-
-mod common;
-
-use crate::common::{download_to_vec, upload_stream};
-use anyhow::Context;
-use camino::Utf8Path;
-use futures::StreamExt;
-use futures::stream::Stream;
-use remote_storage::{
-    DownloadKind, DownloadOpts, GCSConfig, GenericRemoteStorage, ListingMode, RemotePath,
-    RemoteStorageConfig, RemoteStorageKind, StorageMetadata,
-};
-use std::collections::HashMap;
-#[path = "common/tests.rs"]
-use std::collections::HashSet;
-use std::fmt::{Debug, Display};
-use std::io::Cursor;
-use std::ops::Bound;
-use std::pin::pin;
-use std::sync::Arc;
-use std::time::Duration;
-use std::time::SystemTime;
-use test_context::{AsyncTestContext, test_context};
-use tokio_util::sync::CancellationToken;
-use utils::backoff;
-
-// A minimal working GCS client I can pass around in async context
-
-const BASE_PREFIX: &str = "test";
-
-async fn create_gcs_client() -> anyhow::Result<Arc<GenericRemoteStorage>> {
-    let bucket_name = std::env::var("GCS_TEST_BUCKET").expect("GCS_TEST_BUCKET must be set");
-    let gcs_config = GCSConfig {
-        bucket_name,
-        prefix_in_bucket: Some("testing-path/".into()),
-        max_keys_per_list_response: Some(100),
-        concurrency_limit: std::num::NonZero::new(100).unwrap(),
-    };
-
-    let remote_storage_config = RemoteStorageConfig {
-        storage: RemoteStorageKind::GCS(gcs_config),
-        timeout: Duration::from_secs(120),
-        small_timeout: std::time::Duration::from_secs(120),
-    };
-    Ok(Arc::new(
-        GenericRemoteStorage::from_config(&remote_storage_config)
-            .await
-            .context("remote storage init")?,
-    ))
-}
-
-struct EnabledGCS {
-    client: Arc<GenericRemoteStorage>,
-    base_prefix: &'static str,
-}
-
-impl EnabledGCS {
-    async fn setup() -> Self {
-        let client = create_gcs_client()
-            .await
-            .context("gcs client creation")
-            .expect("gcs client creation failed");
-        EnabledGCS {
-            client,
-            base_prefix: BASE_PREFIX,
-        }
-    }
-}
-
-impl AsyncTestContext for EnabledGCS {
-    async fn setup() -> Self {
-        Self::setup().await
-    }
-}
-
-#[test_context(EnabledGCS)]
-#[tokio::test]
-async fn gcs_get_object_bytes_range_header(ctx: &mut EnabledGCS) -> anyhow::Result<()> {
-    let cancel = CancellationToken::new();
-    let path = RemotePath::new(Utf8Path::new(
-        format!("{}/000000010000028000000086", ctx.base_prefix).as_str(),
-    ))
-    .with_context(|| "RemotePath conversion")?;
-
-    let (data, len) = upload_stream("hello, world".as_bytes().into());
-
-    ctx.client.upload(data, len, &path, None, &cancel).await?;
-
-    let opts = DownloadOpts {
-        byte_start: Bound::Included(7),
-        ..Default::default()
-    };
-    let dl_object = download_to_vec(ctx.client.download(&path, &opts, &cancel).await?).await?;
-    let s = String::from_utf8(dl_object).unwrap();
-    assert_eq!(5, s.len());
-    Ok(())
-}
-#[test_context(EnabledGCS)]
-#[tokio::test]
-async fn gcs_test_suite(ctx: &mut EnabledGCS) -> anyhow::Result<()> {
-    // ------------------------------------------------
-    // --- `time_travel_recover`, showcasing `upload`, `delete_objects`, `copy`
-    // ------------------------------------------------
-
-    // Our test depends on discrepancies in the clock between S3 and the environment the tests
-    // run in. Therefore, wait a little bit before and after. The alternative would be
-    // to take the time from S3 response headers.
-    const WAIT_TIME: Duration = Duration::from_millis(3_000);
-
-    async fn retry<T, O, F, E>(op: O) -> Result<T, E>
-    where
-        E: Display + Debug + 'static,
-        O: FnMut() -> F,
-        F: Future<Output = Result<T, E>>,
-    {
-        let warn_threshold = 3;
-        let max_retries = 10;
-        backoff::retry(
-            op,
-            |_e| false,
-            warn_threshold,
-            max_retries,
-            "test retry",
-            &CancellationToken::new(),
-        )
-        .await
-        .expect("never cancelled")
-    }
-
-    async fn time_point() -> SystemTime {
-        tokio::time::sleep(WAIT_TIME).await;
-        let ret = SystemTime::now();
-        tokio::time::sleep(WAIT_TIME).await;
-        ret
-    }
-
-    async fn list_files(
-        client: &Arc<GenericRemoteStorage>,
-        cancel: &CancellationToken,
-    ) -> anyhow::Result<HashSet<RemotePath>> {
-        Ok(
-            retry(|| client.list(None, ListingMode::NoDelimiter, None, cancel))
-                .await
-                .context("list root files failure")?
-                .keys
-                .into_iter()
-                .map(|o| o.key)
-                .collect::<HashSet<_>>(),
-        )
-    }
-
-    let cancel = CancellationToken::new();
-
-    let path1 = RemotePath::new(Utf8Path::new(format!("{}/path1", ctx.base_prefix).as_str()))
-        .with_context(|| "RemotePath conversion")?;
-
-    let path2 = RemotePath::new(Utf8Path::new(format!("{}/path2", ctx.base_prefix).as_str()))
-        .with_context(|| "RemotePath conversion")?;
-
-    let path3 = RemotePath::new(Utf8Path::new(format!("{}/path3", ctx.base_prefix).as_str()))
-        .with_context(|| "RemotePath conversion")?;
-
-    // ---------------- t0 ---------------
-    // Upload 'path1'
-    retry(|| {
-        let (data, len) = upload_stream("remote blob data1".as_bytes().into());
-        ctx.client.upload(data, len, &path1, None, &cancel)
-    })
-    .await?;
-    let t0_files = list_files(&ctx.client, &cancel).await?;
-    let t0 = time_point().await;
-
-    // Show 'path1'
-    println!("at t0: {t0_files:?}");
-
-    // Upload 'path2'
-    let old_data = "remote blob data2";
-    retry(|| {
-        let (data, len) = upload_stream(old_data.as_bytes().into());
-        ctx.client.upload(data, len, &path2, None, &cancel)
-    })
-    .await?;
-
-    // ---------------- t1 ---------------
-    // Show 'path1' and 'path2'
-    let t1_files = list_files(&ctx.client, &cancel).await?;
-    let t1 = time_point().await;
-    println!("at t1: {t1_files:?}");
-
-    {
-        let opts = DownloadOpts::default();
-        let dl = retry(|| ctx.client.download(&path2, &opts, &cancel)).await?;
-        let last_modified = dl.last_modified;
-        let half_wt = WAIT_TIME.mul_f32(0.5);
-        let t0_hwt = t0 + half_wt;
-        let t1_hwt = t1 - half_wt;
-        if !(t0_hwt..=t1_hwt).contains(&last_modified) {
-            panic!(
-                "last_modified={last_modified:?} is not between t0_hwt={t0_hwt:?} and t1_hwt={t1_hwt:?}. \
-                This likely means a large lock discrepancy between S3 and the local clock."
-            );
-        }
-    }
-
-    // Upload 'path3'
-    retry(|| {
-        let (data, len) = upload_stream("remote blob data3".as_bytes().into());
-        ctx.client.upload(data, len, &path3, None, &cancel)
-    })
-    .await?;
-
-    // Overwrite 'path2'
-    let new_data = "new remote blob data2";
-    retry(|| {
-        let (data, len) = upload_stream(new_data.as_bytes().into());
-        ctx.client.upload(data, len, &path2, None, &cancel)
-    })
-    .await?;
-
-    // Delete 'path1'
-    retry(|| ctx.client.delete(&path1, &cancel)).await?;
-
-    // Show 'path2' and `path3`
-    let t2_files = list_files(&ctx.client, &cancel).await?;
-    let t2 = time_point().await;
-    println!("at t2: {t2_files:?}");
-
-    // No changes after recovery to t2 (no-op)
-    let t_final = time_point().await;
-    ctx.client
-        .time_travel_recover(None, t2, t_final, &cancel, None)
-        .await?;
-    let t2_files_recovered = list_files(&ctx.client, &cancel).await?;
-    println!("after recovery to t2: {t2_files_recovered:?}");
-
-    assert_eq!(t2_files, t2_files_recovered);
-    let path2_recovered_t2 = download_to_vec(
-        ctx.client
-            .download(&path2, &DownloadOpts::default(), &cancel)
-            .await?,
-    )
-    .await?;
-    assert_eq!(path2_recovered_t2, new_data.as_bytes());
-
-    // after recovery to t1: path1 is back, path2 has the old content
-    let t_final = time_point().await;
-    ctx.client
-        .time_travel_recover(None, t1, t_final, &cancel, None)
-        .await?;
-    let t1_files_recovered = list_files(&ctx.client, &cancel).await?;
-    println!("after recovery to t1: {t1_files_recovered:?}");
-    assert_eq!(t1_files, t1_files_recovered);
-    let path2_recovered_t1 = download_to_vec(
-        ctx.client
-            .download(&path2, &DownloadOpts::default(), &cancel)
-            .await?,
-    )
-    .await?;
-    assert_eq!(path2_recovered_t1, old_data.as_bytes());
-
-    // after recovery to t0: everything is gone except for path1
-    let t_final = time_point().await;
-    ctx.client
-        .time_travel_recover(None, t0, t_final, &cancel, None)
-        .await?;
-    let t0_files_recovered = list_files(&ctx.client, &cancel).await?;
-    println!("after recovery to t0: {t0_files_recovered:?}");
-    assert_eq!(t0_files, t0_files_recovered);
-
-    // cleanup
-    let paths = &[path1, path2, path3];
-    retry(|| ctx.client.delete_objects(paths, &cancel)).await?;
-
-    Ok(())
-}
--- a/libs/walproposer/src/api_bindings.rs
+++ b/libs/walproposer/src/api_bindings.rs
@@ -341,34 +341,6 @@ extern "C-unwind" fn log_internal(
    }
 }

-/* BEGIN_HADRON */
-extern "C" fn reset_safekeeper_statuses_for_metrics(wp: *mut WalProposer, num_safekeepers: u32) {
-    unsafe {
-        let callback_data = (*(*wp).config).callback_data;
-        let api = callback_data as *mut Box<dyn ApiImpl>;
-        if api.is_null() {
-            return;
-        }
-        (*api).reset_safekeeper_statuses_for_metrics(&mut (*wp), num_safekeepers);
-    }
-}
-
-extern "C" fn update_safekeeper_status_for_metrics(
-    wp: *mut WalProposer,
-    sk_index: u32,
-    status: u8,
-) {
-    unsafe {
-        let callback_data = (*(*wp).config).callback_data;
-        let api = callback_data as *mut Box<dyn ApiImpl>;
-        if api.is_null() {
-            return;
-        }
-        (*api).update_safekeeper_status_for_metrics(&mut (*wp), sk_index, status);
-    }
-}
-/* END_HADRON */
-
 #[derive(Debug, PartialEq)]
 pub enum Level {
    Debug5,
@@ -442,10 +414,6 @@ pub(crate) fn create_api() -> walproposer_api {
        finish_sync_safekeepers: Some(finish_sync_safekeepers),
        process_safekeeper_feedback: Some(process_safekeeper_feedback),
        log_internal: Some(log_internal),
-        /* BEGIN_HADRON */
-        reset_safekeeper_statuses_for_metrics: Some(reset_safekeeper_statuses_for_metrics),
-        update_safekeeper_status_for_metrics: Some(update_safekeeper_status_for_metrics),
-        /* END_HADRON */
    }
 }

@@ -483,8 +451,6 @@ pub fn empty_shmem() -> crate::bindings::WalproposerShmemState {
        replica_promote: false,
        min_ps_feedback: empty_feedback,
        wal_rate_limiter: empty_wal_rate_limiter,
-        num_safekeepers: 0,
-        safekeeper_status: [0; 32],
    }
 }

--- a/libs/walproposer/src/walproposer.rs
+++ b/libs/walproposer/src/walproposer.rs
@@ -159,21 +159,6 @@ pub trait ApiImpl {
    fn after_election(&self, _wp: &mut WalProposer) {
        todo!()
    }
-
-    /* BEGIN_HADRON */
-    fn reset_safekeeper_statuses_for_metrics(&self, _wp: &mut WalProposer, _num_safekeepers: u32) {
-        // Do nothing for testing purposes.
-    }
-
-    fn update_safekeeper_status_for_metrics(
-        &self,
-        _wp: &mut WalProposer,
-        _sk_index: u32,
-        _status: u8,
-    ) {
-        // Do nothing for testing purposes.
-    }
-    /* END_HADRON */
 }

 #[derive(Debug)]
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -10,10 +10,6 @@ default = []
 # which adds some runtime cost to run tests on outage conditions
 testing = ["fail/failpoints", "pageserver_api/testing", "wal_decoder/testing", "pageserver_client/testing"]

-# Direct IO alignment options (propagated to pageserver_api)
-io-align-512 = ["pageserver_api/io-align-512"]
-io-align-4k = ["pageserver_api/io-align-4k"]
-
 fuzz-read-path = ["testing"]

 # Enables benchmarking only APIs
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -353,10 +353,6 @@ fn start_pageserver(
        launch_ts.to_string(),
        BUILD_TAG,
    );
-    info!(
-        "IO buffer alignment: {} bytes",
-        pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT
-    );
    set_build_info_metric(GIT_VERSION, BUILD_TAG);
    set_launch_timestamp_metric(launch_ts);
    #[cfg(target_os = "linux")]
--- a/pageserver/src/feature_resolver.rs
+++ b/pageserver/src/feature_resolver.rs
@@ -110,12 +110,6 @@ impl FeatureResolver {
                                PostHogFlagFilterPropertyValue::String("local".to_string()),
                            );
                        }
-                        RemoteStorageKind::GCS { .. } => {
-                            properties.insert(
-                                "region".to_string(),
-                                PostHogFlagFilterPropertyValue::String("local".to_string()),
-                            );
-                        }
                    }
                }
                // TODO: move this to a background task so that we don't block startup in case of slow disk
--- a/pageserver/src/tenant/remote_timeline_client/upload.rs
+++ b/pageserver/src/tenant/remote_timeline_client/upload.rs
@@ -158,7 +158,6 @@ pub(super) async fn upload_timeline_layer<'a>(
        GenericRemoteStorage::LocalFs(_) => {}
        GenericRemoteStorage::AwsS3(_) => {}
        GenericRemoteStorage::Unreliable(_) => {}
-        GenericRemoteStorage::GCS(_) => {}
    };
    /* END_HADRON */
    let reader = tokio_util::io::ReaderStream::with_capacity(source_file, super::BUFFER_SIZE);
--- a/pageserver/src/tenant/storage_layer/inmemory_layer/vectored_dio_read.rs
+++ b/pageserver/src/tenant/storage_layer/inmemory_layer/vectored_dio_read.rs
@@ -73,7 +73,7 @@ pub trait Buffer: std::ops::Deref<Target = [u8]> {
 }

 /// The minimum alignment and size requirement for disk offsets and memory buffer size for direct IO.
-const DIO_CHUNK_SIZE: usize = crate::virtual_file::get_io_buffer_alignment();
+const DIO_CHUNK_SIZE: usize = 512;

 /// If multiple chunks need to be read, merge adjacent chunk reads into batches of max size `MAX_CHUNK_BATCH_SIZE`.
 /// (The unit is the number of chunks.)
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -852,7 +852,7 @@ impl VirtualFileInner {
        // Because the alloctor might return _more_ aligned addresses than requested,
        // there is a chance that testing would not catch violations of a runtime requirement stricter than 512.
        {
-            let requirement = get_io_buffer_alignment();
+            let requirement = 512;
            let remainder = addr % requirement;
            assert!(
                remainder == 0,
@@ -866,7 +866,7 @@ impl VirtualFileInner {
        // So enforce just that and not anything more restrictive.
        // Even the shallowest testing will expose more restrictive requirements if those ever arise.
        {
-            let requirement = get_io_buffer_alignment() as u64;
+            let requirement = 512;
            let remainder = offset % requirement;
            assert!(
                remainder == 0,
@@ -879,7 +879,7 @@ impl VirtualFileInner {
        // The requirement in Linux 6.1 is bdev_logical_block_size().
        // On our production systems, that is 512.
        {
-            let requirement = get_io_buffer_alignment();
+            let requirement = 512;
            let remainder = size % requirement;
            assert!(
                remainder == 0,
--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -49,7 +49,6 @@
 #include "neon.h"
 #include "neon_lwlsncache.h"
 #include "neon_perf_counters.h"
-#include "neon_utils.h"
 #include "pagestore_client.h"
 #include "communicator.h"

@@ -674,19 +673,8 @@ lfc_get_state(size_t max_entries)
 			{
 				if (GET_STATE(entry, j) != UNAVAILABLE)
 				{
-					/* Validate the buffer tag before including it */
-					BufferTag test_tag = entry->key;
-					test_tag.blockNum += j;
-
-					if (BufferTagIsValid(&test_tag))
-					{
-						BITMAP_SET(bitmap, i*lfc_blocks_per_chunk + j);
-						n_pages += 1;
-					}
-					else
-					{
-						elog(ERROR, "LFC: Skipping invalid buffer tag during cache state capture: blockNum=%u", test_tag.blockNum);
-					}
+					BITMAP_SET(bitmap, i*lfc_blocks_per_chunk + j);
+					n_pages += 1;
 				}
 			}
 			if (++i == n_entries)
@@ -695,7 +683,7 @@ lfc_get_state(size_t max_entries)
 		Assert(i == n_entries);
 		fcs->n_pages = n_pages;
 		Assert(pg_popcount((char*)bitmap, ((n_entries << lfc_chunk_size_log) + 7)/8) == n_pages);
-		elog(LOG, "LFC: save state of %d chunks %d pages (validated)", (int)n_entries, (int)n_pages);
+		elog(LOG, "LFC: save state of %d chunks %d pages", (int)n_entries, (int)n_pages);
 	}

 	LWLockRelease(lfc_lock);
@@ -714,7 +702,6 @@ lfc_prewarm(FileCacheState* fcs, uint32 n_workers)
 	size_t n_entries;
 	size_t prewarm_batch = Min(lfc_prewarm_batch, readahead_buffer_size);
 	size_t fcs_size;
-	uint32_t max_prefetch_pages;
 	dsm_segment *seg;
 	BackgroundWorkerHandle* bgw_handle[MAX_PREWARM_WORKERS];

@@ -759,11 +746,6 @@ lfc_prewarm(FileCacheState* fcs, uint32 n_workers)
 	n_entries = Min(fcs->n_chunks, lfc_prewarm_limit);
 	Assert(n_entries != 0);

-	max_prefetch_pages = n_entries << fcs_chunk_size_log;
-	if (fcs->n_pages > max_prefetch_pages) {
-		elog(ERROR, "LFC: Number of pages in file cache state (%d) is more than the limit (%d)", fcs->n_pages, max_prefetch_pages);
-	}
-
 	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);

 	/* Do not prewarm more entries than LFC limit */
@@ -916,11 +898,6 @@ lfc_prewarm_main(Datum main_arg)
 				{
 					tag = fcs->chunks[snd_idx >> fcs_chunk_size_log];
 					tag.blockNum += snd_idx & ((1 << fcs_chunk_size_log) - 1);
-
-					if (!BufferTagIsValid(&tag)) {
-						elog(ERROR, "LFC: Invalid buffer tag: %u", tag.blockNum);
-					}
-
 					if (!lfc_cache_contains(BufTagGetNRelFileInfo(tag), tag.forkNum, tag.blockNum))
 					{
 						(void)communicator_prefetch_register_bufferv(tag, NULL, 1, NULL);
--- a/pgxn/neon/neon_perf_counters.c
+++ b/pgxn/neon/neon_perf_counters.c
@@ -391,12 +391,6 @@ neon_get_perf_counters(PG_FUNCTION_ARGS)
 	neon_per_backend_counters totals = {0};
 	metric_t   *metrics;

-	/* BEGIN_HADRON */
-	WalproposerShmemState *wp_shmem;
-	uint32 num_safekeepers;
-	uint32 num_active_safekeepers;
-	/* END_HADRON */
-
 	/* We put all the tuples into a tuplestore in one go. */
 	InitMaterializedSRF(fcinfo, 0);

@@ -443,32 +437,11 @@ neon_get_perf_counters(PG_FUNCTION_ARGS)
 		// Not ideal but piggyback our databricks counters into the neon perf counters view
 		// so that we don't need to introduce neon--1.x+1.sql to add a new view.
 		{
-		// Keeping this code in its own block to work around the C90 "don't mix declarations and code" rule when we define
-		// the `databricks_metrics` array in the next block. Yes, we are seriously dealing with C90 rules in 2025.
-
-		// Read safekeeper status from wal proposer shared memory first.
-		// Note that we are taking a mutex when reading from walproposer shared memory so that the total safekeeper count is
-		// consistent with the active wal acceptors count. Assuming that we don't query this view too often the mutex should
-		// not be a huge deal.
-		wp_shmem = GetWalpropShmemState();
-		SpinLockAcquire(&wp_shmem->mutex);
-		num_safekeepers = wp_shmem->num_safekeepers;
-		num_active_safekeepers = 0;
-		for (int i = 0; i < num_safekeepers; i++) {
-			if (wp_shmem->safekeeper_status[i] == 1) {
-				num_active_safekeepers++;
-			}
-		}
-		SpinLockRelease(&wp_shmem->mutex);
-	}
-	{
 			metric_t databricks_metrics[] = {
 				{"sql_index_corruption_count", false, 0, (double) pg_atomic_read_u32(&databricks_metrics_shared->index_corruption_count)},
 				{"sql_data_corruption_count", false, 0, (double) pg_atomic_read_u32(&databricks_metrics_shared->data_corruption_count)},
 				{"sql_internal_error_count", false, 0, (double) pg_atomic_read_u32(&databricks_metrics_shared->internal_error_count)},
 				{"ps_corruption_detected", false, 0, (double) pg_atomic_read_u32(&databricks_metrics_shared->ps_corruption_detected)},
-				{"num_active_safekeepers", false, 0.0, (double) num_active_safekeepers},
-				{"num_configured_safekeepers", false, 0.0, (double) num_safekeepers},
 				{NULL, false, 0, 0},
 			};
 			for (int i = 0; databricks_metrics[i].name != NULL; i++)
--- a/pgxn/neon/neon_utils.c
+++ b/pgxn/neon/neon_utils.c
@@ -183,22 +183,3 @@ alloc_curl_handle(void)
 }

 #endif
-
-/*
- * Check if a BufferTag is valid by verifying all its fields are not invalid.
- */
-bool
-BufferTagIsValid(const BufferTag *tag)
-{
-	#if PG_MAJORVERSION_NUM >= 16
-	return (tag->spcOid != InvalidOid) &&
-		(tag->relNumber != InvalidRelFileNumber) &&
-		(tag->forkNum != InvalidForkNumber) &&
-		(tag->blockNum != InvalidBlockNumber);
-	#else
-	return (tag->rnode.spcNode != InvalidOid) &&
-		(tag->rnode.relNode != InvalidOid) &&
-		(tag->forkNum != InvalidForkNumber) &&
-		(tag->blockNum != InvalidBlockNumber);
-	#endif
-}
--- a/pgxn/neon/neon_utils.h
+++ b/pgxn/neon/neon_utils.h
@@ -2,7 +2,6 @@
 #define __NEON_UTILS_H__

 #include "lib/stringinfo.h"
-#include "storage/buf_internals.h"

 #ifndef WALPROPOSER_LIB
 #include <curl/curl.h>
@@ -17,9 +16,6 @@ void		pq_sendint32_le(StringInfo buf, uint32 i);
 void		pq_sendint64_le(StringInfo buf, uint64 i);
 void        disable_core_dump(void);

-/* Buffer tag validation function */
-bool		BufferTagIsValid(const BufferTag *tag);
-
 #ifndef WALPROPOSER_LIB

 CURL *		alloc_curl_handle(void);
--- a/pgxn/neon/walproposer.c
+++ b/pgxn/neon/walproposer.c
@@ -154,9 +154,7 @@ WalProposerCreate(WalProposerConfig *config, walproposer_api api)
 		wp->safekeeper[wp->n_safekeepers].state = SS_OFFLINE;
 		wp->safekeeper[wp->n_safekeepers].active_state = SS_ACTIVE_SEND;
 		wp->safekeeper[wp->n_safekeepers].wp = wp;
-		/* BEGIN_HADRON */
-		wp->safekeeper[wp->n_safekeepers].index = wp->n_safekeepers;
-		/* END_HADRON */
+
 		{
 			Safekeeper *sk = &wp->safekeeper[wp->n_safekeepers];
 			int			written = 0;
@@ -185,10 +183,6 @@ WalProposerCreate(WalProposerConfig *config, walproposer_api api)
 	if (wp->safekeepers_generation > INVALID_GENERATION && wp->config->proto_version < 3)
 		wp_log(FATAL, "enabling generations requires protocol version 3");
 	wp_log(LOG, "using safekeeper protocol version %d", wp->config->proto_version);
-	
-	/* BEGIN_HADRON */
-	wp->api.reset_safekeeper_statuses_for_metrics(wp, wp->n_safekeepers);
-	/* END_HADRON */

 	/* Fill the greeting package */
 	wp->greetRequest.pam.tag = 'g';
@@ -361,10 +355,6 @@ ShutdownConnection(Safekeeper *sk)
 	sk->state = SS_OFFLINE;
 	sk->streamingAt = InvalidXLogRecPtr;

-	/* BEGIN_HADRON */
-	sk->wp->api.update_safekeeper_status_for_metrics(sk->wp, sk->index, 0);
-	/* END_HADRON */
-
 	MembershipConfigurationFree(&sk->greetResponse.mconf);
 	if (sk->voteResponse.termHistory.entries)
 		pfree(sk->voteResponse.termHistory.entries);
@@ -1540,10 +1530,6 @@ StartStreaming(Safekeeper *sk)
 	sk->active_state = SS_ACTIVE_SEND;
 	sk->streamingAt = sk->startStreamingAt;

-	/* BEGIN_HADRON */
-	sk->wp->api.update_safekeeper_status_for_metrics(sk->wp, sk->index, 1);
-	/* END_HADRON */
-
 	/*
 	 * Donors can only be in SS_ACTIVE state, so we potentially update the
 	 * donor when we switch one to SS_ACTIVE.
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Conrad Ludgate	cc66f78d01	update readme	2025-07-31 11:51:44 +01:00
Conrad Ludgate	f9e6802974	s/ssl/tls	2025-07-30 14:03:22 +01:00
Conrad Ludgate	74afc9d96f	refactor pgbouncer tuning	2025-07-30 12:34:36 +01:00
Conrad Ludgate	86fe3150f0	add basic tls test	2025-07-30 12:34:31 +01:00
Conrad Ludgate	52be0146d3	fix runtime	2025-07-30 12:32:23 +01:00
Conrad Ludgate	a3f2a2cae5	add fast path for TLS renewal configuration	2025-07-30 12:29:41 +01:00
Conrad Ludgate	a24a0032ad	update certificate files in the watch task	2025-07-30 11:47:34 +01:00
Conrad Ludgate	70cb02742a	pass in the tls_config as a param to watch_certs_for_changes, also wait for it to complete before configuring pgbouncer/local_proxy	2025-07-30 11:47:07 +01:00
Conrad Ludgate	a845295cb3	refactor TLS processing. Only use blocking-IO, split out the loading of certificates from the updating of certificates	2025-07-30 10:29:03 +01:00
Conrad Ludgate	e288cd2198	fix concurrent reconfigure while TLS configuration is taking place	2025-07-30 10:14:20 +01:00
Conrad Ludgate	ffa9e595b8	introduce separate reload commands	2025-07-30 10:14:17 +01:00
Conrad Ludgate	e7b1f63f68	add logs for TLS	2025-07-30 10:08:04 +01:00