add test case

Signed-off-by: Alex Chi Z <chi@neon.tech>
2026-05-14 03:30:36 +00:00 · 2024-03-22 10:46:35 -04:00
112 changed files with 1306 additions and 2809 deletions
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -1121,16 +1121,10 @@ jobs:
        run: |
          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
            gh workflow --repo neondatabase/aws run deploy-dev.yml --ref main -f branch=main -f dockerTag=${{needs.tag.outputs.build-tag}} -f deployPreprodRegion=false
-          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
-            gh workflow --repo neondatabase/aws run deploy-dev.yml --ref main \
-              -f deployPgSniRouter=false \
-              -f deployProxy=false \
-              -f deployStorage=true \
-              -f deployStorageBroker=true \
-              -f branch=main \
-              -f dockerTag=${{needs.tag.outputs.build-tag}} \
-              -f deployPreprodRegion=true

+            # TODO: move deployPreprodRegion to release (`"$GITHUB_REF_NAME" == "release"` block), once Staging support different compute tag prefixes for different regions
+            gh workflow --repo neondatabase/aws run deploy-dev.yml --ref main -f branch=main -f dockerTag=${{needs.tag.outputs.build-tag}} -f deployPreprodRegion=true
+          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
            gh workflow --repo neondatabase/aws run deploy-prod.yml --ref main \
              -f deployPgSniRouter=false \
              -f deployProxy=false \
@@ -1139,15 +1133,6 @@ jobs:
              -f branch=main \
              -f dockerTag=${{needs.tag.outputs.build-tag}}
          elif [[ "$GITHUB_REF_NAME" == "release-proxy" ]]; then
-            gh workflow --repo neondatabase/aws run deploy-dev.yml --ref main \
-              -f deployPgSniRouter=true \
-              -f deployProxy=true \
-              -f deployStorage=false \
-              -f deployStorageBroker=false \
-              -f branch=main \
-              -f dockerTag=${{needs.tag.outputs.build-tag}} \
-              -f deployPreprodRegion=true
-
            gh workflow --repo neondatabase/aws run deploy-proxy-prod.yml --ref main \
              -f deployPgSniRouter=true \
              -f deployProxy=true \
--- a/.github/workflows/trigger-e2e-tests.yml
+++ b/.github/workflows/trigger-e2e-tests.yml
@@ -62,14 +62,14 @@ jobs:

  trigger-e2e-tests:
    needs: [ tag ]
-    runs-on: ubuntu-latest
+    runs-on: [ self-hosted, gen3, small ]
    env:
      TAG: ${{ needs.tag.outputs.build-tag }}
+    container:
+      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
+      options: --init
    steps:
      - name: check if ecr image are present
-        env:
-          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
-          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
        run: |
          for REPO in neon compute-tools compute-node-v14 vm-compute-node-v14 compute-node-v15 vm-compute-node-v15 compute-node-v16 vm-compute-node-v16; do
            OUTPUT=$(aws ecr describe-images --repository-name ${REPO} --region eu-central-1 --query "imageDetails[?imageTags[?contains(@, '${TAG}')]]" --output text)
@@ -79,55 +79,41 @@ jobs:
            fi
          done

-      - name: Set e2e-platforms
-        id: e2e-platforms
-        env:
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          # Default set of platforms to run e2e tests on
-          platforms='["docker", "k8s"]'
-
-          # If the PR changes vendor/, pgxn/ or libs/vm_monitor/ directories, or Dockerfile.compute-node, add k8s-neonvm to the list of platforms.
-          # If the workflow run is not a pull request, add k8s-neonvm to the list.
-          if [ "$GITHUB_EVENT_NAME" == "pull_request" ]; then
-            for f in $(gh api "/repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}/files" --paginate --jq '.[].filename'); do
-              case "$f" in
-                vendor/*|pgxn/*|libs/vm_monitor/*|Dockerfile.compute-node)
-                  platforms=$(echo "${platforms}" | jq --compact-output '. += ["k8s-neonvm"] | unique')
-                  ;;
-                *)
-                  # no-op
-                  ;;
-              esac
-            done
-          else
-            platforms=$(echo "${platforms}" | jq --compact-output '. += ["k8s-neonvm"] | unique')
-          fi
-
-          echo "e2e-platforms=${platforms}" | tee -a $GITHUB_OUTPUT
-
      - name: Set PR's status to pending and request a remote CI test
-        env:
-          E2E_PLATFORMS: ${{ steps.e2e-platforms.outputs.e2e-platforms }}
-          COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
-          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
        run: |
-          REMOTE_REPO="${GITHUB_REPOSITORY_OWNER}/cloud"
+          # For pull requests, GH Actions set "github.sha" variable to point at a fake merge commit
+          # but we need to use a real sha of a latest commit in the PR's branch for the e2e job,
+          # to place a job run status update later.
+          COMMIT_SHA=${{ github.event.pull_request.head.sha }}
+          # For non-PR kinds of runs, the above will produce an empty variable, pick the original sha value for those
+          COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}

-          gh api "/repos/${GITHUB_REPOSITORY}/statuses/${COMMIT_SHA}" \
-            --method POST \
-            --raw-field "state=pending" \
-            --raw-field "description=[$REMOTE_REPO] Remote CI job is about to start" \
-            --raw-field "context=neon-cloud-e2e"
+          REMOTE_REPO="${{ github.repository_owner }}/cloud"

-          gh workflow --repo ${REMOTE_REPO} \
-            run testing.yml \
-              --ref "main" \
-              --raw-field "ci_job_name=neon-cloud-e2e" \
-              --raw-field "commit_hash=$COMMIT_SHA" \
-              --raw-field "remote_repo=${GITHUB_REPOSITORY}" \
-              --raw-field "storage_image_tag=${TAG}" \
-              --raw-field "compute_image_tag=${TAG}" \
-              --raw-field "concurrency_group=${E2E_CONCURRENCY_GROUP}" \
-              --raw-field "e2e-platforms=${E2E_PLATFORMS}"
+          curl -f -X POST \
+          https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
+          -H "Accept: application/vnd.github.v3+json" \
+          --user "${{ secrets.CI_ACCESS_TOKEN }}" \
+          --data \
+            "{
+              \"state\": \"pending\",
+              \"context\": \"neon-cloud-e2e\",
+              \"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
+            }"
+
+          curl -f -X POST \
+          https://api.github.com/repos/$REMOTE_REPO/actions/workflows/testing.yml/dispatches \
+          -H "Accept: application/vnd.github.v3+json" \
+          --user "${{ secrets.CI_ACCESS_TOKEN }}" \
+          --data \
+            "{
+              \"ref\": \"main\",
+              \"inputs\": {
+                \"ci_job_name\": \"neon-cloud-e2e\",
+                \"commit_hash\": \"$COMMIT_SHA\",
+                \"remote_repo\": \"${{ github.repository }}\",
+                \"storage_image_tag\": \"${TAG}\",
+                \"compute_image_tag\": \"${TAG}\",
+                \"concurrency_group\": \"${{ env.E2E_CONCURRENCY_GROUP }}\"
+              }
+            }"
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -276,6 +276,7 @@ version = "0.1.0"
 dependencies = [
 "anyhow",
 "aws-config",
+ "aws-sdk-secretsmanager",
 "bytes",
 "camino",
 "clap",
@@ -346,9 +347,9 @@ dependencies = [

 [[package]]
 name = "aws-credential-types"
-version = "1.1.8"
+version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fa8587ae17c8e967e4b05a62d495be2fb7701bec52a97f7acfe8a29f938384c8"
+checksum = "33cc49dcdd31c8b6e79850a179af4c367669150c7ac0135f176c61bec81a70f7"
 dependencies = [
 "aws-smithy-async",
 "aws-smithy-runtime-api",
@@ -358,9 +359,9 @@ dependencies = [

 [[package]]
 name = "aws-runtime"
-version = "1.1.8"
+version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b13dc54b4b49f8288532334bba8f87386a40571c47c37b1304979b556dc613c8"
+checksum = "eb031bff99877c26c28895766f7bb8484a05e24547e370768d6cc9db514662aa"
 dependencies = [
 "aws-credential-types",
 "aws-sigv4",
@@ -380,29 +381,6 @@ dependencies = [
 "uuid",
 ]

-[[package]]
-name = "aws-sdk-iam"
-version = "1.17.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b8ae76026bfb1b80a6aed0bb400c1139cd9c0563e26bce1986cd021c6a968c7b"
-dependencies = [
- "aws-credential-types",
- "aws-runtime",
- "aws-smithy-async",
- "aws-smithy-http",
- "aws-smithy-json",
- "aws-smithy-query",
- "aws-smithy-runtime",
- "aws-smithy-runtime-api",
- "aws-smithy-types",
- "aws-smithy-xml",
- "aws-types",
- "http 0.2.9",
- "once_cell",
- "regex-lite",
- "tracing",
-]
-
 [[package]]
 name = "aws-sdk-s3"
 version = "1.14.0"
@@ -432,6 +410,29 @@ dependencies = [
 "url",
 ]

+[[package]]
+name = "aws-sdk-secretsmanager"
+version = "1.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a0b64e61e7d632d9df90a2e0f32630c68c24960cab1d27d848718180af883d3"
+dependencies = [
+ "aws-credential-types",
+ "aws-runtime",
+ "aws-smithy-async",
+ "aws-smithy-http",
+ "aws-smithy-json",
+ "aws-smithy-runtime",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "aws-types",
+ "bytes",
+ "fastrand 2.0.0",
+ "http 0.2.9",
+ "once_cell",
+ "regex-lite",
+ "tracing",
+]
+
 [[package]]
 name = "aws-sdk-sso"
 version = "1.12.0"
@@ -501,9 +502,9 @@ dependencies = [

 [[package]]
 name = "aws-sigv4"
-version = "1.2.0"
+version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "11d6f29688a4be9895c0ba8bef861ad0c0dac5c15e9618b9b7a6c233990fc263"
+checksum = "c371c6b0ac54d4605eb6f016624fb5c7c2925d315fdf600ac1bf21b19d5f1742"
 dependencies = [
 "aws-credential-types",
 "aws-smithy-eventstream",
@@ -516,7 +517,7 @@ dependencies = [
 "hex",
 "hmac",
 "http 0.2.9",
- "http 1.1.0",
+ "http 1.0.0",
 "once_cell",
 "p256",
 "percent-encoding",
@@ -530,9 +531,9 @@ dependencies = [

 [[package]]
 name = "aws-smithy-async"
-version = "1.1.8"
+version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d26ea8fa03025b2face2b3038a63525a10891e3d8829901d502e5384a0d8cd46"
+checksum = "72ee2d09cce0ef3ae526679b522835d63e75fb427aca5413cd371e490d52dcc6"
 dependencies = [
 "futures-util",
 "pin-project-lite",
@@ -573,9 +574,9 @@ dependencies = [

 [[package]]
 name = "aws-smithy-http"
-version = "0.60.7"
+version = "0.60.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3f10fa66956f01540051b0aa7ad54574640f748f9839e843442d99b970d3aff9"
+checksum = "dab56aea3cd9e1101a0a999447fb346afb680ab1406cebc44b32346e25b4117d"
 dependencies = [
 "aws-smithy-eventstream",
 "aws-smithy-runtime-api",
@@ -594,18 +595,18 @@ dependencies = [

 [[package]]
 name = "aws-smithy-json"
-version = "0.60.7"
+version = "0.60.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4683df9469ef09468dad3473d129960119a0d3593617542b7d52086c8486f2d6"
+checksum = "fd3898ca6518f9215f62678870064398f00031912390efd03f1f6ef56d83aa8e"
 dependencies = [
 "aws-smithy-types",
 ]

 [[package]]
 name = "aws-smithy-query"
-version = "0.60.7"
+version = "0.60.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2fbd61ceb3fe8a1cb7352e42689cec5335833cd9f94103a61e98f9bb61c64bb"
+checksum = "bda4b1dfc9810e35fba8a620e900522cd1bd4f9578c446e82f49d1ce41d2e9f9"
 dependencies = [
 "aws-smithy-types",
 "urlencoding",
@@ -613,9 +614,9 @@ dependencies = [

 [[package]]
 name = "aws-smithy-runtime"
-version = "1.1.8"
+version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec81002d883e5a7fd2bb063d6fb51c4999eb55d404f4fff3dd878bf4733b9f01"
+checksum = "fafdab38f40ad7816e7da5dec279400dd505160780083759f01441af1bbb10ea"
 dependencies = [
 "aws-smithy-async",
 "aws-smithy-http",
@@ -638,15 +639,14 @@ dependencies = [

 [[package]]
 name = "aws-smithy-runtime-api"
-version = "1.2.0"
+version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9acb931e0adaf5132de878f1398d83f8677f90ba70f01f65ff87f6d7244be1c5"
+checksum = "c18276dd28852f34b3bf501f4f3719781f4999a51c7bff1a5c6dc8c4529adc29"
 dependencies = [
 "aws-smithy-async",
 "aws-smithy-types",
 "bytes",
 "http 0.2.9",
- "http 1.1.0",
 "pin-project-lite",
 "tokio",
 "tracing",
@@ -655,9 +655,9 @@ dependencies = [

 [[package]]
 name = "aws-smithy-types"
-version = "1.1.8"
+version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "abe14dceea1e70101d38fbf2a99e6a34159477c0fb95e68e05c66bd7ae4c3729"
+checksum = "bb3e134004170d3303718baa2a4eb4ca64ee0a1c0a7041dca31b38be0fb414f3"
 dependencies = [
 "base64-simd",
 "bytes",
@@ -678,18 +678,18 @@ dependencies = [

 [[package]]
 name = "aws-smithy-xml"
-version = "0.60.7"
+version = "0.60.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "872c68cf019c0e4afc5de7753c4f7288ce4b71663212771bf5e4542eb9346ca9"
+checksum = "8604a11b25e9ecaf32f9aa56b9fe253c5e2f606a3477f0071e96d3155a5ed218"
 dependencies = [
 "xmlparser",
 ]

 [[package]]
 name = "aws-types"
-version = "1.1.8"
+version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0dbf2f3da841a8930f159163175cf6a3d16ddde517c1b0fba7aa776822800f40"
+checksum = "789bbe008e65636fe1b6dbbb374c40c8960d1232b96af5ff4aec349f9c4accf4"
 dependencies = [
 "aws-credential-types",
 "aws-smithy-async",
@@ -2396,9 +2396,9 @@ dependencies = [

 [[package]]
 name = "http"
-version = "1.1.0"
+version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258"
+checksum = "b32afd38673a8016f7c9ae69e5af41a58f81b1d31689040f2f1959594ce194ea"
 dependencies = [
 "bytes",
 "fnv",
@@ -2498,7 +2498,7 @@ dependencies = [
 "hyper",
 "log",
 "rustls 0.21.9",
- "rustls-native-certs 0.6.2",
+ "rustls-native-certs",
 "tokio",
 "tokio-rustls 0.24.0",
 ]
@@ -3581,7 +3581,6 @@ dependencies = [
 "strum_macros",
 "svg_fmt",
 "sync_wrapper",
- "sysinfo",
 "tenant_size_model",
 "thiserror",
 "tokio",
@@ -4200,10 +4199,6 @@ version = "0.1.0"
 dependencies = [
 "anyhow",
 "async-trait",
- "aws-config",
- "aws-sdk-iam",
- "aws-sigv4",
- "aws-types",
 "base64 0.13.1",
 "bstr",
 "bytes",
@@ -4214,7 +4209,6 @@ dependencies = [
 "consumption_metrics",
 "dashmap",
 "env_logger",
- "fallible-iterator",
 "futures",
 "git-version",
 "hashbrown 0.13.2",
@@ -4222,7 +4216,6 @@ dependencies = [
 "hex",
 "hmac",
 "hostname",
- "http 1.1.0",
 "humantime",
 "hyper",
 "hyper-tungstenite",
@@ -4438,9 +4431,9 @@ dependencies = [

 [[package]]
 name = "redis"
-version = "0.25.2"
+version = "0.24.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "71d64e978fd98a0e6b105d066ba4889a7301fca65aeac850a877d8797343feeb"
+checksum = "c580d9cbbe1d1b479e8d67cf9daf6a62c957e6846048408b80b43ac3f6af84cd"
 dependencies = [
 "async-trait",
 "bytes",
@@ -4449,15 +4442,15 @@ dependencies = [
 "itoa",
 "percent-encoding",
 "pin-project-lite",
- "rustls 0.22.2",
- "rustls-native-certs 0.7.0",
- "rustls-pemfile 2.1.1",
- "rustls-pki-types",
+ "rustls 0.21.9",
+ "rustls-native-certs",
+ "rustls-pemfile 1.0.2",
+ "rustls-webpki 0.101.7",
 "ryu",
 "sha1_smol",
- "socket2 0.5.5",
+ "socket2 0.4.9",
 "tokio",
- "tokio-rustls 0.25.0",
+ "tokio-rustls 0.24.0",
 "tokio-util",
 "url",
 ]
@@ -4886,19 +4879,6 @@ dependencies = [
 "security-framework",
 ]

-[[package]]
-name = "rustls-native-certs"
-version = "0.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8f1fb85efa936c42c6d5fc28d2629bb51e4b2f4b8a5211e297d599cc5a093792"
-dependencies = [
- "openssl-probe",
- "rustls-pemfile 2.1.1",
- "rustls-pki-types",
- "schannel",
- "security-framework",
-]
-
 [[package]]
 name = "rustls-pemfile"
 version = "1.0.2"
@@ -6166,7 +6146,7 @@ dependencies = [
 "percent-encoding",
 "pin-project",
 "prost",
- "rustls-native-certs 0.6.2",
+ "rustls-native-certs",
 "rustls-pemfile 1.0.2",
 "tokio",
 "tokio-rustls 0.24.0",
@@ -7051,6 +7031,7 @@ dependencies = [
 "aws-sigv4",
 "aws-smithy-async",
 "aws-smithy-http",
+ "aws-smithy-runtime-api",
 "aws-smithy-types",
 "axum",
 "base64 0.21.1",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -52,12 +52,10 @@ async-stream = "0.3"
 async-trait = "0.1"
 aws-config = { version = "1.1.4", default-features = false, features=["rustls"] }
 aws-sdk-s3 = "1.14"
-aws-sdk-iam = "1.15.0"
+aws-sdk-secretsmanager = { version = "1.14.0" }
 aws-smithy-async = { version = "1.1.4", default-features = false, features=["rt-tokio"] }
 aws-smithy-types = "1.1.4"
 aws-credential-types = "1.1.4"
-aws-sigv4 = { version = "1.2.0", features = ["sign-http"] }
-aws-types = "1.1.7"
 axum = { version = "0.6.20", features = ["ws"] }
 base64 = "0.13.0"
 bincode = "1.3"
@@ -78,7 +76,6 @@ either = "1.8"
 enum-map = "2.4.2"
 enumset = "1.0.12"
 fail = "0.5.0"
-fallible-iterator = "0.2"
 fs2 = "0.4.3"
 futures = "0.3"
 futures-core = "0.3"
@@ -91,7 +88,6 @@ hex = "0.4"
 hex-literal = "0.4"
 hmac = "0.12.1"
 hostname = "0.3.1"
-http = {version = "1.1.0", features = ["std"]}
 http-types = { version = "2", default-features = false }
 humantime = "2.1"
 humantime-serde = "1.1.1"
@@ -125,7 +121,7 @@ procfs = "0.14"
 prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
 prost = "0.11"
 rand = "0.8"
-redis = { version = "0.25.2", features = ["tokio-rustls-comp", "keep-alive"] }
+redis = { version = "0.24.0", features = ["tokio-rustls-comp", "keep-alive"] }
 regex = "1.10.2"
 reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"] }
 reqwest-tracing = { version = "0.4.7", features = ["opentelemetry_0_20"] }
--- a/Dockerfile.build-tools
+++ b/Dockerfile.build-tools
@@ -135,7 +135,7 @@ WORKDIR /home/nonroot

 # Rust
 # Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
-ENV RUSTC_VERSION=1.77.0
+ENV RUSTC_VERSION=1.76.0
 ENV RUSTUP_HOME="/home/nonroot/.rustup"
 ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
 RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && whoami && \
@@ -149,7 +149,7 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
    cargo install --git https://github.com/paritytech/cachepot && \
    cargo install rustfilt && \
    cargo install cargo-hakari && \
-    cargo install cargo-deny --locked && \
+    cargo install cargo-deny && \
    cargo install cargo-hack && \
    cargo install cargo-nextest && \
    rm -rf /home/nonroot/.cargo/registry && \
--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -17,7 +17,6 @@ pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
        .write(true)
        .create(true)
        .append(false)
-        .truncate(false)
        .open(path)?;
    let buf = io::BufReader::new(&file);
    let mut count: usize = 0;
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -302,9 +302,9 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
            RoleAction::Create => {
                // This branch only runs when roles are created through the console, so it is
                // safe to add more permissions here. BYPASSRLS and REPLICATION are inherited
-                // from neon_superuser.
+                // from neon_superuser. (NOTE: REPLICATION has been removed from here for now).
                let mut query: String = format!(
-                    "CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser",
+                    "CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS IN ROLE neon_superuser",
                    name.pg_quote()
                );
                info!("running role create query: '{}'", &query);
@@ -806,8 +806,19 @@ $$;"#,
        "",
        "",
        "",
-        "",
        // Add new migrations below.
+        r#"
+DO $$
+DECLARE
+    role_name TEXT;
+BEGIN
+    FOR role_name IN SELECT rolname FROM pg_roles WHERE rolreplication IS TRUE
+    LOOP
+        RAISE NOTICE 'EXECUTING ALTER ROLE % NOREPLICATION', quote_ident(role_name);
+        EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOREPLICATION';
+    END LOOP;
+END
+$$;"#,
    ];

    let mut query = "CREATE SCHEMA IF NOT EXISTS neon_migration";
--- a/control_plane/attachment_service/Cargo.toml
+++ b/control_plane/attachment_service/Cargo.toml
@@ -16,6 +16,7 @@ testing = []
 [dependencies]
 anyhow.workspace = true
 aws-config.workspace = true
+aws-sdk-secretsmanager.workspace = true
 bytes.workspace = true
 camino.workspace = true
 clap.workspace = true
--- a/control_plane/attachment_service/src/heartbeater.rs
+++ b/control_plane/attachment_service/src/heartbeater.rs
@@ -139,7 +139,7 @@ impl HeartbeaterTask {
                        .with_client_retries(
                            |client| async move { client.get_utilization().await },
                            &jwt_token,
-                            3,
+                            2,
                            3,
                            Duration::from_secs(1),
                            &cancel,
--- a/control_plane/attachment_service/src/main.rs
+++ b/control_plane/attachment_service/src/main.rs
@@ -3,6 +3,7 @@ use attachment_service::http::make_router;
 use attachment_service::metrics::preinitialize_metrics;
 use attachment_service::persistence::Persistence;
 use attachment_service::service::{Config, Service, MAX_UNAVAILABLE_INTERVAL_DEFAULT};
+use aws_config::{BehaviorVersion, Region};
 use camino::Utf8PathBuf;
 use clap::Parser;
 use diesel::Connection;
@@ -54,31 +55,11 @@ struct Cli {
    #[arg(long)]
    database_url: Option<String>,

-    /// Flag to enable dev mode, which permits running without auth
-    #[arg(long, default_value = "false")]
-    dev: bool,
-
    /// Grace period before marking unresponsive pageserver offline
    #[arg(long)]
    max_unavailable_interval: Option<humantime::Duration>,
 }

-enum StrictMode {
-    /// In strict mode, we will require that all secrets are loaded, i.e. security features
-    /// may not be implicitly turned off by omitting secrets in the environment.
-    Strict,
-    /// In dev mode, secrets are optional, and omitting a particular secret will implicitly
-    /// disable the auth related to it (e.g. no pageserver jwt key -> send unauthenticated
-    /// requests, no public key -> don't authenticate incoming requests).
-    Dev,
-}
-
-impl Default for StrictMode {
-    fn default() -> Self {
-        Self::Strict
-    }
-}
-
 /// Secrets may either be provided on the command line (for testing), or loaded from AWS SecretManager: this
 /// type encapsulates the logic to decide which and do the loading.
 struct Secrets {
@@ -89,6 +70,13 @@ struct Secrets {
 }

 impl Secrets {
+    const DATABASE_URL_SECRET: &'static str = "rds-neon-storage-controller-url";
+    const PAGESERVER_JWT_TOKEN_SECRET: &'static str =
+        "neon-storage-controller-pageserver-jwt-token";
+    const CONTROL_PLANE_JWT_TOKEN_SECRET: &'static str =
+        "neon-storage-controller-control-plane-jwt-token";
+    const PUBLIC_KEY_SECRET: &'static str = "neon-storage-controller-public-key";
+
    const DATABASE_URL_ENV: &'static str = "DATABASE_URL";
    const PAGESERVER_JWT_TOKEN_ENV: &'static str = "PAGESERVER_JWT_TOKEN";
    const CONTROL_PLANE_JWT_TOKEN_ENV: &'static str = "CONTROL_PLANE_JWT_TOKEN";
@@ -99,41 +87,111 @@ impl Secrets {
    /// - Environment variables if DATABASE_URL is set.
    /// - AWS Secrets Manager secrets
    async fn load(args: &Cli) -> anyhow::Result<Self> {
-        let Some(database_url) =
-            Self::load_secret(&args.database_url, Self::DATABASE_URL_ENV).await
-        else {
-            anyhow::bail!(
-                "Database URL is not set (set `--database-url`, or `DATABASE_URL` environment)"
-            )
-        };
-
-        let public_key = match Self::load_secret(&args.public_key, Self::PUBLIC_KEY_ENV).await {
-            Some(v) => Some(JwtAuth::from_key(v).context("Loading public key")?),
-            None => None,
-        };
-
-        let this = Self {
-            database_url,
-            public_key,
-            jwt_token: Self::load_secret(&args.jwt_token, Self::PAGESERVER_JWT_TOKEN_ENV).await,
-            control_plane_jwt_token: Self::load_secret(
-                &args.control_plane_jwt_token,
-                Self::CONTROL_PLANE_JWT_TOKEN_ENV,
-            )
-            .await,
-        };
-
-        Ok(this)
+        match &args.database_url {
+            Some(url) => Self::load_cli(url, args),
+            None => match std::env::var(Self::DATABASE_URL_ENV) {
+                Ok(database_url) => Self::load_env(database_url),
+                Err(_) => Self::load_aws_sm().await,
+            },
+        }
    }

-    async fn load_secret(cli: &Option<String>, env_name: &str) -> Option<String> {
-        if let Some(v) = cli {
-            Some(v.clone())
-        } else if let Ok(v) = std::env::var(env_name) {
-            Some(v)
-        } else {
-            None
+    fn load_env(database_url: String) -> anyhow::Result<Self> {
+        let public_key = match std::env::var(Self::PUBLIC_KEY_ENV) {
+            Ok(public_key) => Some(JwtAuth::from_key(public_key).context("Loading public key")?),
+            Err(_) => None,
+        };
+        Ok(Self {
+            database_url,
+            public_key,
+            jwt_token: std::env::var(Self::PAGESERVER_JWT_TOKEN_ENV).ok(),
+            control_plane_jwt_token: std::env::var(Self::CONTROL_PLANE_JWT_TOKEN_ENV).ok(),
+        })
+    }
+
+    async fn load_aws_sm() -> anyhow::Result<Self> {
+        let Ok(region) = std::env::var("AWS_REGION") else {
+            anyhow::bail!("AWS_REGION is not set, cannot load secrets automatically: either set this, or use CLI args to supply secrets");
+        };
+        let config = aws_config::defaults(BehaviorVersion::v2023_11_09())
+            .region(Region::new(region.clone()))
+            .load()
+            .await;
+
+        let asm = aws_sdk_secretsmanager::Client::new(&config);
+
+        let Some(database_url) = asm
+            .get_secret_value()
+            .secret_id(Self::DATABASE_URL_SECRET)
+            .send()
+            .await?
+            .secret_string()
+            .map(str::to_string)
+        else {
+            anyhow::bail!(
+                "Database URL secret not found at {region}/{}",
+                Self::DATABASE_URL_SECRET
+            )
+        };
+
+        let jwt_token = asm
+            .get_secret_value()
+            .secret_id(Self::PAGESERVER_JWT_TOKEN_SECRET)
+            .send()
+            .await?
+            .secret_string()
+            .map(str::to_string);
+        if jwt_token.is_none() {
+            tracing::warn!("No pageserver JWT token set: this will only work if authentication is disabled on the pageserver");
        }
+
+        let control_plane_jwt_token = asm
+            .get_secret_value()
+            .secret_id(Self::CONTROL_PLANE_JWT_TOKEN_SECRET)
+            .send()
+            .await?
+            .secret_string()
+            .map(str::to_string);
+        if jwt_token.is_none() {
+            tracing::warn!("No control plane JWT token set: this will only work if authentication is disabled on the pageserver");
+        }
+
+        let public_key = asm
+            .get_secret_value()
+            .secret_id(Self::PUBLIC_KEY_SECRET)
+            .send()
+            .await?
+            .secret_string()
+            .map(str::to_string);
+        let public_key = match public_key {
+            Some(key) => Some(JwtAuth::from_key(key)?),
+            None => {
+                tracing::warn!(
+                    "No public key set: inccoming HTTP requests will not be authenticated"
+                );
+                None
+            }
+        };
+
+        Ok(Self {
+            database_url,
+            public_key,
+            jwt_token,
+            control_plane_jwt_token,
+        })
+    }
+
+    fn load_cli(database_url: &str, args: &Cli) -> anyhow::Result<Self> {
+        let public_key = match &args.public_key {
+            None => None,
+            Some(key) => Some(JwtAuth::from_key(key.clone()).context("Loading public key")?),
+        };
+        Ok(Self {
+            database_url: database_url.to_owned(),
+            public_key,
+            jwt_token: args.jwt_token.clone(),
+            control_plane_jwt_token: args.control_plane_jwt_token.clone(),
+        })
    }
 }

@@ -189,42 +247,8 @@ async fn async_main() -> anyhow::Result<()> {
        args.listen
    );

-    let strict_mode = if args.dev {
-        StrictMode::Dev
-    } else {
-        StrictMode::Strict
-    };
-
    let secrets = Secrets::load(&args).await?;

-    // Validate required secrets and arguments are provided in strict mode
-    match strict_mode {
-        StrictMode::Strict
-            if (secrets.public_key.is_none()
-                || secrets.jwt_token.is_none()
-                || secrets.control_plane_jwt_token.is_none()) =>
-        {
-            // Production systems should always have secrets configured: if public_key was not set
-            // then we would implicitly disable auth.
-            anyhow::bail!(
-                    "Insecure config!  One or more secrets is not set.  This is only permitted in `--dev` mode"
-                );
-        }
-        StrictMode::Strict if args.compute_hook_url.is_none() => {
-            // Production systems should always have a compute hook set, to prevent falling
-            // back to trying to use neon_local.
-            anyhow::bail!(
-                "`--compute-hook-url` is not set: this is only permitted in `--dev` mode"
-            );
-        }
-        StrictMode::Strict => {
-            tracing::info!("Starting in strict mode: configuration is OK.")
-        }
-        StrictMode::Dev => {
-            tracing::warn!("Starting in dev mode: this may be an insecure configuration.")
-        }
-    }
-
    let config = Config {
        jwt_token: secrets.jwt_token,
        control_plane_jwt_token: secrets.control_plane_jwt_token,
--- a/control_plane/attachment_service/src/service.rs
+++ b/control_plane/attachment_service/src/service.rs
@@ -1523,8 +1523,6 @@ impl Service {
        &self,
        create_req: TenantCreateRequest,
    ) -> Result<TenantCreateResponse, ApiError> {
-        let tenant_id = create_req.new_tenant_id.tenant_id;
-
        // Exclude any concurrent attempts to create/access the same tenant ID
        let _tenant_lock = self
            .tenant_op_locks
@@ -1533,12 +1531,7 @@ impl Service {

        let (response, waiters) = self.do_tenant_create(create_req).await?;

-        if let Err(e) = self.await_waiters(waiters, SHORT_RECONCILE_TIMEOUT).await {
-            // Avoid deadlock: reconcile may fail while notifying compute, if the cloud control plane refuses to
-            // accept compute notifications while it is in the process of creating.  Reconciliation will
-            // be retried in the background.
-            tracing::warn!(%tenant_id, "Reconcile not done yet while creating tenant ({e})");
-        }
+        self.await_waiters(waiters, SHORT_RECONCILE_TIMEOUT).await?;
        Ok(response)
    }

@@ -1617,25 +1610,13 @@ impl Service {
                splitting: SplitState::default(),
            })
            .collect();
-
-        match self
-            .persistence
+        self.persistence
            .insert_tenant_shards(persist_tenant_shards)
            .await
-        {
-            Ok(_) => {}
-            Err(DatabaseError::Query(diesel::result::Error::DatabaseError(
-                DatabaseErrorKind::UniqueViolation,
-                _,
-            ))) => {
-                // Unique key violation: this is probably a retry.  Because the shard count is part of the unique key,
-                // if we see a unique key violation it means that the creation request's shard count matches the previous
-                // creation's shard count.
-                tracing::info!("Tenant shards already present in database, proceeding with idempotent creation...");
-            }
-            // Any other database error is unexpected and a bug.
-            Err(e) => return Err(ApiError::InternalServerError(anyhow::anyhow!(e))),
-        };
+            .map_err(|e| {
+                // TODO: distinguish primary key constraint (idempotent, OK), from other errors
+                ApiError::InternalServerError(anyhow::anyhow!(e))
+            })?;

        let (waiters, response_shards) = {
            let mut locked = self.inner.write().unwrap();
--- a/control_plane/src/background_process.rs
+++ b/control_plane/src/background_process.rs
@@ -294,7 +294,7 @@ where
    //      is in state 'taken' but the thread that would unlock it is
    //      not there.
    //   2. A rust object that represented some external resource in the
-    //      parent now got implicitly copied by the fork, even though
+    //      parent now got implicitly copied by the the fork, even though
    //      the object's type is not `Copy`. The parent program may use
    //      non-copyability as way to enforce unique ownership of an
    //      external resource in the typesystem. The fork breaks that
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -12,7 +12,7 @@
 //!
 //! The endpoint is managed by the `compute_ctl` binary. When an endpoint is
 //! started, we launch `compute_ctl` It synchronizes the safekeepers, downloads
-//! the basebackup from the pageserver to initialize the data directory, and
+//! the basebackup from the pageserver to initialize the the data directory, and
 //! finally launches the PostgreSQL process. It watches the PostgreSQL process
 //! until it exits.
 //!
--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -279,7 +279,6 @@ impl StorageController {
            &self.listen,
            "-p",
            self.path.as_ref(),
-            "--dev",
            "--database-url",
            &database_url,
            "--max-unavailable-interval",
--- a/libs/metrics/src/hll.rs
+++ b/libs/metrics/src/hll.rs
@@ -40,7 +40,7 @@ macro_rules! register_hll {
    }};

    ($N:literal, $NAME:expr, $HELP:expr $(,)?) => {{
-        $crate::register_hll!($N, $crate::opts!($NAME, $HELP))
+        $crate::register_hll!($N, $crate::opts!($NAME, $HELP), $LABELS_NAMES)
    }};
 }

--- a/libs/postgres_ffi/wal_craft/src/bin/wal_craft.rs
+++ b/libs/postgres_ffi/wal_craft/src/bin/wal_craft.rs
@@ -1,6 +1,5 @@
 use anyhow::*;
 use clap::{value_parser, Arg, ArgMatches, Command};
-use postgres::Client;
 use std::{path::PathBuf, str::FromStr};
 use wal_craft::*;

@@ -9,8 +8,8 @@ fn main() -> Result<()> {
        .init();
    let arg_matches = cli().get_matches();

-    let wal_craft = |arg_matches: &ArgMatches, client: &mut Client| {
-        let intermediate_lsns = match arg_matches
+    let wal_craft = |arg_matches: &ArgMatches, client| {
+        let (intermediate_lsns, end_of_wal_lsn) = match arg_matches
            .get_one::<String>("type")
            .map(|s| s.as_str())
            .context("'type' is required")?
@@ -26,7 +25,6 @@ fn main() -> Result<()> {
            LastWalRecordCrossingSegment::NAME => LastWalRecordCrossingSegment::craft(client)?,
            a => panic!("Unknown --type argument: {a}"),
        };
-        let end_of_wal_lsn = client.pg_current_wal_insert_lsn()?;
        for lsn in intermediate_lsns {
            println!("intermediate_lsn = {lsn}");
        }
--- a/libs/postgres_ffi/wal_craft/src/lib.rs
+++ b/libs/postgres_ffi/wal_craft/src/lib.rs
@@ -5,6 +5,7 @@ use postgres::types::PgLsn;
 use postgres::Client;
 use postgres_ffi::{WAL_SEGMENT_SIZE, XLOG_BLCKSZ};
 use postgres_ffi::{XLOG_SIZE_OF_XLOG_RECORD, XLOG_SIZE_OF_XLOG_SHORT_PHD};
+use std::cmp::Ordering;
 use std::path::{Path, PathBuf};
 use std::process::Command;
 use std::time::{Duration, Instant};
@@ -231,52 +232,59 @@ pub fn ensure_server_config(client: &mut impl postgres::GenericClient) -> anyhow
 pub trait Crafter {
    const NAME: &'static str;

-    /// Generates WAL using the client `client`. Returns a vector of some valid
-    /// "interesting" intermediate LSNs which one may start reading from.
-    /// test_end_of_wal uses this to check various starting points.
-    ///
-    /// Note that postgres is generally keen about writing some WAL. While we
-    /// try to disable it (autovacuum, big wal_writer_delay, etc) it is always
-    /// possible, e.g. xl_running_xacts are dumped each 15s. So checks about
-    /// stable WAL end would be flaky unless postgres is shut down. For this
-    /// reason returning potential end of WAL here is pointless. Most of the
-    /// time this doesn't happen though, so it is reasonable to create needed
-    /// WAL structure and immediately kill postgres like test_end_of_wal does.
-    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<Vec<PgLsn>>;
+    /// Generates WAL using the client `client`. Returns a pair of:
+    /// * A vector of some valid "interesting" intermediate LSNs which one may start reading from.
+    ///   May include or exclude Lsn(0) and the end-of-wal.
+    /// * The expected end-of-wal LSN.
+    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<(Vec<PgLsn>, PgLsn)>;
 }

-/// Wraps some WAL craft function, providing current LSN to it before the
-/// insertion and flushing WAL afterwards. Also pushes initial LSN to the
-/// result.
 fn craft_internal<C: postgres::GenericClient>(
    client: &mut C,
-    f: impl Fn(&mut C, PgLsn) -> anyhow::Result<Vec<PgLsn>>,
-) -> anyhow::Result<Vec<PgLsn>> {
+    f: impl Fn(&mut C, PgLsn) -> anyhow::Result<(Vec<PgLsn>, Option<PgLsn>)>,
+) -> anyhow::Result<(Vec<PgLsn>, PgLsn)> {
    ensure_server_config(client)?;

    let initial_lsn = client.pg_current_wal_insert_lsn()?;
    info!("LSN initial = {}", initial_lsn);

-    let mut intermediate_lsns = f(client, initial_lsn)?;
+    let (mut intermediate_lsns, last_lsn) = f(client, initial_lsn)?;
+    let last_lsn = match last_lsn {
+        None => client.pg_current_wal_insert_lsn()?,
+        Some(last_lsn) => {
+            let insert_lsn = client.pg_current_wal_insert_lsn()?;
+            match last_lsn.cmp(&insert_lsn) {
+                Ordering::Less => bail!(
+                    "Some records were inserted after the crafted WAL: {} vs {}",
+                    last_lsn,
+                    insert_lsn
+                ),
+                Ordering::Equal => last_lsn,
+                Ordering::Greater => bail!("Reported LSN is greater than insert_lsn"),
+            }
+        }
+    };
    if !intermediate_lsns.starts_with(&[initial_lsn]) {
        intermediate_lsns.insert(0, initial_lsn);
    }

    // Some records may be not flushed, e.g. non-transactional logical messages.
-    //
-    // Note: this is broken if pg_current_wal_insert_lsn is at page boundary
-    // because pg_current_wal_insert_lsn skips page headers.
    client.execute("select neon_xlogflush(pg_current_wal_insert_lsn())", &[])?;
-    Ok(intermediate_lsns)
+    match last_lsn.cmp(&client.pg_current_wal_flush_lsn()?) {
+        Ordering::Less => bail!("Some records were flushed after the crafted WAL"),
+        Ordering::Equal => {}
+        Ordering::Greater => bail!("Reported LSN is greater than flush_lsn"),
+    }
+    Ok((intermediate_lsns, last_lsn))
 }

 pub struct Simple;
 impl Crafter for Simple {
    const NAME: &'static str = "simple";
-    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<Vec<PgLsn>> {
+    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<(Vec<PgLsn>, PgLsn)> {
        craft_internal(client, |client, _| {
            client.execute("CREATE table t(x int)", &[])?;
-            Ok(Vec::new())
+            Ok((Vec::new(), None))
        })
    }
 }
@@ -284,36 +292,29 @@ impl Crafter for Simple {
 pub struct LastWalRecordXlogSwitch;
 impl Crafter for LastWalRecordXlogSwitch {
    const NAME: &'static str = "last_wal_record_xlog_switch";
-    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<Vec<PgLsn>> {
-        // Do not use craft_internal because here we end up with flush_lsn exactly on
+    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<(Vec<PgLsn>, PgLsn)> {
+        // Do not use generate_internal because here we end up with flush_lsn exactly on
        // the segment boundary and insert_lsn after the initial page header, which is unusual.
        ensure_server_config(client)?;

        client.execute("CREATE table t(x int)", &[])?;
        let before_xlog_switch = client.pg_current_wal_insert_lsn()?;
-        // pg_switch_wal returns end of last record of the switched segment,
-        // i.e. end of SWITCH itself.
-        let xlog_switch_record_end: PgLsn = client.query_one("SELECT pg_switch_wal()", &[])?.get(0);
-        let before_xlog_switch_u64 = u64::from(before_xlog_switch);
-        let next_segment = PgLsn::from(
-            before_xlog_switch_u64 - (before_xlog_switch_u64 % WAL_SEGMENT_SIZE as u64)
-                + WAL_SEGMENT_SIZE as u64,
-        );
+        let after_xlog_switch: PgLsn = client.query_one("SELECT pg_switch_wal()", &[])?.get(0);
+        let next_segment = PgLsn::from(0x0200_0000);
        ensure!(
-            xlog_switch_record_end <= next_segment,
-            "XLOG_SWITCH record ended after the expected segment boundary: {} > {}",
-            xlog_switch_record_end,
+            after_xlog_switch <= next_segment,
+            "XLOG_SWITCH message ended after the expected segment boundary: {} > {}",
+            after_xlog_switch,
            next_segment
        );
-        Ok(vec![before_xlog_switch, xlog_switch_record_end])
+        Ok((vec![before_xlog_switch, after_xlog_switch], next_segment))
    }
 }

 pub struct LastWalRecordXlogSwitchEndsOnPageBoundary;
-/// Craft xlog SWITCH record ending at page boundary.
 impl Crafter for LastWalRecordXlogSwitchEndsOnPageBoundary {
    const NAME: &'static str = "last_wal_record_xlog_switch_ends_on_page_boundary";
-    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<Vec<PgLsn>> {
+    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<(Vec<PgLsn>, PgLsn)> {
        // Do not use generate_internal because here we end up with flush_lsn exactly on
        // the segment boundary and insert_lsn after the initial page header, which is unusual.
        ensure_server_config(client)?;
@@ -360,29 +361,28 @@ impl Crafter for LastWalRecordXlogSwitchEndsOnPageBoundary {

        // Emit the XLOG_SWITCH
        let before_xlog_switch = client.pg_current_wal_insert_lsn()?;
-        let xlog_switch_record_end: PgLsn = client.query_one("SELECT pg_switch_wal()", &[])?.get(0);
+        let after_xlog_switch: PgLsn = client.query_one("SELECT pg_switch_wal()", &[])?.get(0);
        let next_segment = PgLsn::from(0x0200_0000);
        ensure!(
-            xlog_switch_record_end < next_segment,
-            "XLOG_SWITCH record ended on or after the expected segment boundary: {} > {}",
-            xlog_switch_record_end,
+            after_xlog_switch < next_segment,
+            "XLOG_SWITCH message ended on or after the expected segment boundary: {} > {}",
+            after_xlog_switch,
            next_segment
        );
        ensure!(
-            u64::from(xlog_switch_record_end) as usize % XLOG_BLCKSZ == XLOG_SIZE_OF_XLOG_SHORT_PHD,
+            u64::from(after_xlog_switch) as usize % XLOG_BLCKSZ == XLOG_SIZE_OF_XLOG_SHORT_PHD,
            "XLOG_SWITCH message ended not on page boundary: {}, offset = {}",
-            xlog_switch_record_end,
-            u64::from(xlog_switch_record_end) as usize % XLOG_BLCKSZ
+            after_xlog_switch,
+            u64::from(after_xlog_switch) as usize % XLOG_BLCKSZ
        );
-        Ok(vec![before_xlog_switch, xlog_switch_record_end])
+        Ok((vec![before_xlog_switch, after_xlog_switch], next_segment))
    }
 }

-/// Write ~16MB logical message; it should cross WAL segment.
-fn craft_seg_size_logical_message(
+fn craft_single_logical_message(
    client: &mut impl postgres::GenericClient,
    transactional: bool,
-) -> anyhow::Result<Vec<PgLsn>> {
+) -> anyhow::Result<(Vec<PgLsn>, PgLsn)> {
    craft_internal(client, |client, initial_lsn| {
        ensure!(
            initial_lsn < PgLsn::from(0x0200_0000 - 1024 * 1024),
@@ -405,24 +405,34 @@ fn craft_seg_size_logical_message(
            "Logical message crossed two segments"
        );

-        Ok(vec![message_lsn])
+        if transactional {
+            // Transactional logical messages are part of a transaction, so the one above is
+            // followed by a small COMMIT record.
+
+            let after_message_lsn = client.pg_current_wal_insert_lsn()?;
+            ensure!(
+                message_lsn < after_message_lsn,
+                "No record found after the emitted message"
+            );
+            Ok((vec![message_lsn], Some(after_message_lsn)))
+        } else {
+            Ok((Vec::new(), Some(message_lsn)))
+        }
    })
 }

 pub struct WalRecordCrossingSegmentFollowedBySmallOne;
 impl Crafter for WalRecordCrossingSegmentFollowedBySmallOne {
    const NAME: &'static str = "wal_record_crossing_segment_followed_by_small_one";
-    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<Vec<PgLsn>> {
-        // Transactional message crossing WAL segment will be followed by small
-        // commit record.
-        craft_seg_size_logical_message(client, true)
+    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<(Vec<PgLsn>, PgLsn)> {
+        craft_single_logical_message(client, true)
    }
 }

 pub struct LastWalRecordCrossingSegment;
 impl Crafter for LastWalRecordCrossingSegment {
    const NAME: &'static str = "last_wal_record_crossing_segment";
-    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<Vec<PgLsn>> {
-        craft_seg_size_logical_message(client, false)
+    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<(Vec<PgLsn>, PgLsn)> {
+        craft_single_logical_message(client, false)
    }
 }
--- a/libs/postgres_ffi/wal_craft/src/xlog_utils_test.rs
+++ b/libs/postgres_ffi/wal_craft/src/xlog_utils_test.rs
@@ -11,15 +11,13 @@ use utils::const_assert;
 use utils::lsn::Lsn;

 fn init_logging() {
-    let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or(format!(
-        "crate=info,postgres_ffi::{PG_MAJORVERSION}::xlog_utils=trace"
-    )))
+    let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or(
+        format!("crate=info,postgres_ffi::{PG_MAJORVERSION}::xlog_utils=trace"),
+    ))
    .is_test(true)
    .try_init();
 }

-/// Test that find_end_of_wal returns the same results as pg_dump on various
-/// WALs created by Crafter.
 fn test_end_of_wal<C: crate::Crafter>(test_name: &str) {
    use crate::*;

@@ -40,13 +38,13 @@ fn test_end_of_wal<C: crate::Crafter>(test_name: &str) {
    }
    cfg.initdb().unwrap();
    let srv = cfg.start_server().unwrap();
-    let intermediate_lsns = C::craft(&mut srv.connect_with_timeout().unwrap()).unwrap();
+    let (intermediate_lsns, expected_end_of_wal_partial) =
+        C::craft(&mut srv.connect_with_timeout().unwrap()).unwrap();
    let intermediate_lsns: Vec<Lsn> = intermediate_lsns
        .iter()
        .map(|&lsn| u64::from(lsn).into())
        .collect();
-    // Kill postgres. Note that it might have inserted to WAL something after
-    // 'craft' did its job.
+    let expected_end_of_wal: Lsn = u64::from(expected_end_of_wal_partial).into();
    srv.kill();

    // Check find_end_of_wal on the initial WAL
@@ -58,7 +56,7 @@ fn test_end_of_wal<C: crate::Crafter>(test_name: &str) {
        .filter(|fname| IsXLogFileName(fname))
        .max()
        .unwrap();
-    let expected_end_of_wal = find_pg_waldump_end_of_wal(&cfg, &last_segment);
+    check_pg_waldump_end_of_wal(&cfg, &last_segment, expected_end_of_wal);
    for start_lsn in intermediate_lsns
        .iter()
        .chain(std::iter::once(&expected_end_of_wal))
@@ -93,7 +91,11 @@ fn test_end_of_wal<C: crate::Crafter>(test_name: &str) {
    }
 }

-fn find_pg_waldump_end_of_wal(cfg: &crate::Conf, last_segment: &str) -> Lsn {
+fn check_pg_waldump_end_of_wal(
+    cfg: &crate::Conf,
+    last_segment: &str,
+    expected_end_of_wal: Lsn,
+) {
    // Get the actual end of WAL by pg_waldump
    let waldump_output = cfg
        .pg_waldump("000000010000000000000001", last_segment)
@@ -111,8 +113,11 @@ fn find_pg_waldump_end_of_wal(cfg: &crate::Conf, last_segment: &str) -> Lsn {
        }
    };
    let waldump_wal_end = Lsn::from_str(caps.get(1).unwrap().as_str()).unwrap();
-    info!("waldump erred on {}", waldump_wal_end);
-    waldump_wal_end
+    info!(
+        "waldump erred on {}, expected wal end at {}",
+        waldump_wal_end, expected_end_of_wal
+    );
+    assert_eq!(waldump_wal_end, expected_end_of_wal);
 }

 fn check_end_of_wal(
@@ -205,9 +210,9 @@ pub fn test_update_next_xid() {
 #[test]
 pub fn test_encode_logical_message() {
    let expected = [
-        64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 0, 0, 170, 34, 166, 227, 255, 38,
-        0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 112, 114, 101, 102,
-        105, 120, 0, 109, 101, 115, 115, 97, 103, 101,
+        64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 0, 0, 170, 34, 166, 227, 255,
+        38, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 112, 114,
+        101, 102, 105, 120, 0, 109, 101, 115, 115, 97, 103, 101,
    ];
    let actual = encode_logical_message("prefix", "message");
    assert_eq!(expected, actual[..]);
--- a/libs/remote_storage/src/local_fs.rs
+++ b/libs/remote_storage/src/local_fs.rs
@@ -198,7 +198,6 @@ impl LocalFs {
            fs::OpenOptions::new()
                .write(true)
                .create(true)
-                .truncate(true)
                .open(&temp_file_path)
                .await
                .with_context(|| {
--- a/libs/tenant_size_model/tests/tests.rs
+++ b/libs/tenant_size_model/tests/tests.rs
@@ -247,7 +247,7 @@ fn scenario_4() {
    //
    // This is in total 5000 + 1000 + 5000 + 1000 = 12000
    //
-    // (If we used the method from the previous scenario, and
+    // (If we used the the method from the previous scenario, and
    // kept only snapshot at the branch point, we'd need to keep
    // all the WAL between 10000-18000 on the main branch, so
    // the total size would be 5000 + 1000 + 8000 = 14000. The
--- a/libs/utils/src/lock_file.rs
+++ b/libs/utils/src/lock_file.rs
@@ -63,7 +63,6 @@ impl UnwrittenLockFile {
 pub fn create_exclusive(lock_file_path: &Utf8Path) -> anyhow::Result<UnwrittenLockFile> {
    let lock_file = fs::OpenOptions::new()
        .create(true) // O_CREAT
-        .truncate(true)
        .write(true)
        .open(lock_file_path)
        .context("open lock file")?;
--- a/libs/vm_monitor/src/runner.rs
+++ b/libs/vm_monitor/src/runner.rs
@@ -69,7 +69,7 @@ pub struct Config {
    /// should be removed once we have a better solution there.
    sys_buffer_bytes: u64,

-    /// Minimum fraction of total system memory reserved *before* the cgroup threshold; in
+    /// Minimum fraction of total system memory reserved *before* the the cgroup threshold; in
    /// other words, providing a ceiling for the highest value of the threshold by enforcing that
    /// there's at least `cgroup_min_overhead_fraction` of the total memory remaining beyond the
    /// threshold.
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -59,7 +59,6 @@ signal-hook.workspace = true
 smallvec = { workspace = true, features = ["write"] }
 svg_fmt.workspace = true
 sync_wrapper.workspace = true
-sysinfo.workspace = true
 tokio-tar.workspace = true
 thiserror.workspace = true
 tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time"] }
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -600,37 +600,32 @@ fn start_pageserver(
            None,
            "consumption metrics collection",
            true,
-            {
-                let tenant_manager = tenant_manager.clone();
-                async move {
-                    // first wait until background jobs are cleared to launch.
-                    //
-                    // this is because we only process active tenants and timelines, and the
-                    // Timeline::get_current_logical_size will spawn the logical size calculation,
-                    // which will not be rate-limited.
-                    let cancel = task_mgr::shutdown_token();
+            async move {
+                // first wait until background jobs are cleared to launch.
+                //
+                // this is because we only process active tenants and timelines, and the
+                // Timeline::get_current_logical_size will spawn the logical size calculation,
+                // which will not be rate-limited.
+                let cancel = task_mgr::shutdown_token();

-                    tokio::select! {
-                        _ = cancel.cancelled() => { return Ok(()); },
-                        _ = background_jobs_barrier.wait() => {}
-                    };
+                tokio::select! {
+                    _ = cancel.cancelled() => { return Ok(()); },
+                    _ = background_jobs_barrier.wait() => {}
+                };

-                    pageserver::consumption_metrics::collect_metrics(
-                        tenant_manager,
-                        metric_collection_endpoint,
-                        &conf.metric_collection_bucket,
-                        conf.metric_collection_interval,
-                        conf.cached_metric_collection_interval,
-                        conf.synthetic_size_calculation_interval,
-                        conf.id,
-                        local_disk_storage,
-                        cancel,
-                        metrics_ctx,
-                    )
-                    .instrument(info_span!("metrics_collection"))
-                    .await?;
-                    Ok(())
-                }
+                pageserver::consumption_metrics::collect_metrics(
+                    metric_collection_endpoint,
+                    conf.metric_collection_interval,
+                    conf.cached_metric_collection_interval,
+                    conf.synthetic_size_calculation_interval,
+                    conf.id,
+                    local_disk_storage,
+                    cancel,
+                    metrics_ctx,
+                )
+                .instrument(info_span!("metrics_collection"))
+                .await?;
+                Ok(())
            },
        );
    }
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -95,8 +95,6 @@ pub mod defaults {

    pub const DEFAULT_VALIDATE_VECTORED_GET: bool = true;

-    pub const DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB: usize = 0;
-
    ///
    /// Default built-in configuration file.
    ///
@@ -158,8 +156,6 @@ pub mod defaults {
 #heatmap_upload_concurrency = {DEFAULT_HEATMAP_UPLOAD_CONCURRENCY}
 #secondary_download_concurrency = {DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY}

-#ephemeral_bytes_per_memory_kb = {DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB}
-
 [remote_storage]

 "#
@@ -238,7 +234,6 @@ pub struct PageServerConf {
    // How often to send unchanged cached metrics to the metrics endpoint.
    pub cached_metric_collection_interval: Duration,
    pub metric_collection_endpoint: Option<Url>,
-    pub metric_collection_bucket: Option<RemoteStorageConfig>,
    pub synthetic_size_calculation_interval: Duration,

    pub disk_usage_based_eviction: Option<DiskUsageEvictionTaskConfig>,
@@ -283,13 +278,6 @@ pub struct PageServerConf {
    pub max_vectored_read_bytes: MaxVectoredReadBytes,

    pub validate_vectored_get: bool,
-
-    /// How many bytes of ephemeral layer content will we allow per kilobyte of RAM.  When this
-    /// is exceeded, we start proactively closing ephemeral layers to limit the total amount
-    /// of ephemeral data.
-    ///
-    /// Setting this to zero disables limits on total ephemeral layer size.
-    pub ephemeral_bytes_per_memory_kb: usize,
 }

 /// We do not want to store this in a PageServerConf because the latter may be logged
@@ -385,7 +373,6 @@ struct PageServerConfigBuilder {
    cached_metric_collection_interval: BuilderValue<Duration>,
    metric_collection_endpoint: BuilderValue<Option<Url>>,
    synthetic_size_calculation_interval: BuilderValue<Duration>,
-    metric_collection_bucket: BuilderValue<Option<RemoteStorageConfig>>,

    disk_usage_based_eviction: BuilderValue<Option<DiskUsageEvictionTaskConfig>>,

@@ -411,8 +398,6 @@ struct PageServerConfigBuilder {
    max_vectored_read_bytes: BuilderValue<MaxVectoredReadBytes>,

    validate_vectored_get: BuilderValue<bool>,
-
-    ephemeral_bytes_per_memory_kb: BuilderValue<usize>,
 }

 impl PageServerConfigBuilder {
@@ -470,8 +455,6 @@ impl PageServerConfigBuilder {
            .expect("cannot parse default synthetic size calculation interval")),
            metric_collection_endpoint: Set(DEFAULT_METRIC_COLLECTION_ENDPOINT),

-            metric_collection_bucket: Set(None),
-
            disk_usage_based_eviction: Set(None),

            test_remote_failures: Set(0),
@@ -499,7 +482,6 @@ impl PageServerConfigBuilder {
                NonZeroUsize::new(DEFAULT_MAX_VECTORED_READ_BYTES).unwrap(),
            )),
            validate_vectored_get: Set(DEFAULT_VALIDATE_VECTORED_GET),
-            ephemeral_bytes_per_memory_kb: Set(DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
        }
    }
 }
@@ -604,13 +586,6 @@ impl PageServerConfigBuilder {
        self.metric_collection_endpoint = BuilderValue::Set(metric_collection_endpoint)
    }

-    pub fn metric_collection_bucket(
-        &mut self,
-        metric_collection_bucket: Option<RemoteStorageConfig>,
-    ) {
-        self.metric_collection_bucket = BuilderValue::Set(metric_collection_bucket)
-    }
-
    pub fn synthetic_size_calculation_interval(
        &mut self,
        synthetic_size_calculation_interval: Duration,
@@ -679,10 +654,6 @@ impl PageServerConfigBuilder {
        self.validate_vectored_get = BuilderValue::Set(value);
    }

-    pub fn get_ephemeral_bytes_per_memory_kb(&mut self, value: usize) {
-        self.ephemeral_bytes_per_memory_kb = BuilderValue::Set(value);
-    }
-
    pub fn build(self) -> anyhow::Result<PageServerConf> {
        let default = Self::default_values();

@@ -723,7 +694,6 @@ impl PageServerConfigBuilder {
                metric_collection_interval,
                cached_metric_collection_interval,
                metric_collection_endpoint,
-                metric_collection_bucket,
                synthetic_size_calculation_interval,
                disk_usage_based_eviction,
                test_remote_failures,
@@ -738,7 +708,6 @@ impl PageServerConfigBuilder {
                get_vectored_impl,
                max_vectored_read_bytes,
                validate_vectored_get,
-                ephemeral_bytes_per_memory_kb,
            }
            CUSTOM LOGIC
            {
@@ -973,9 +942,6 @@ impl PageServerConf {
                    let endpoint = parse_toml_string(key, item)?.parse().context("failed to parse metric_collection_endpoint")?;
                    builder.metric_collection_endpoint(Some(endpoint));
                },
-                "metric_collection_bucket" => {
-                    builder.metric_collection_bucket(RemoteStorageConfig::from_toml(item)?)
-                }
                "synthetic_size_calculation_interval" =>
                    builder.synthetic_size_calculation_interval(parse_toml_duration(key, item)?),
                "test_remote_failures" => builder.test_remote_failures(parse_toml_u64(key, item)?),
@@ -1029,9 +995,6 @@ impl PageServerConf {
                "validate_vectored_get" => {
                    builder.get_validate_vectored_get(parse_toml_bool("validate_vectored_get", item)?)
                }
-                "ephemeral_bytes_per_memory_kb" => {
-                    builder.get_ephemeral_bytes_per_memory_kb(parse_toml_u64("ephemeral_bytes_per_memory_kb", item)? as usize)
-                }
                _ => bail!("unrecognized pageserver option '{key}'"),
            }
        }
@@ -1094,7 +1057,6 @@ impl PageServerConf {
            metric_collection_interval: Duration::from_secs(60),
            cached_metric_collection_interval: Duration::from_secs(60 * 60),
            metric_collection_endpoint: defaults::DEFAULT_METRIC_COLLECTION_ENDPOINT,
-            metric_collection_bucket: None,
            synthetic_size_calculation_interval: Duration::from_secs(60),
            disk_usage_based_eviction: None,
            test_remote_failures: 0,
@@ -1113,7 +1075,6 @@ impl PageServerConf {
                    .expect("Invalid default constant"),
            ),
            validate_vectored_get: defaults::DEFAULT_VALIDATE_VECTORED_GET,
-            ephemeral_bytes_per_memory_kb: defaults::DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB,
        }
    }
 }
@@ -1328,7 +1289,6 @@ background_task_maximum_delay = '334 s'
                    defaults::DEFAULT_CACHED_METRIC_COLLECTION_INTERVAL
                )?,
                metric_collection_endpoint: defaults::DEFAULT_METRIC_COLLECTION_ENDPOINT,
-                metric_collection_bucket: None,
                synthetic_size_calculation_interval: humantime::parse_duration(
                    defaults::DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL
                )?,
@@ -1351,7 +1311,6 @@ background_task_maximum_delay = '334 s'
                        .expect("Invalid default constant")
                ),
                validate_vectored_get: defaults::DEFAULT_VALIDATE_VECTORED_GET,
-                ephemeral_bytes_per_memory_kb: defaults::DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB
            },
            "Correct defaults should be used when no config values are provided"
        );
@@ -1404,7 +1363,6 @@ background_task_maximum_delay = '334 s'
                metric_collection_interval: Duration::from_secs(222),
                cached_metric_collection_interval: Duration::from_secs(22200),
                metric_collection_endpoint: Some(Url::parse("http://localhost:80/metrics")?),
-                metric_collection_bucket: None,
                synthetic_size_calculation_interval: Duration::from_secs(333),
                disk_usage_based_eviction: None,
                test_remote_failures: 0,
@@ -1423,7 +1381,6 @@ background_task_maximum_delay = '334 s'
                        .expect("Invalid default constant")
                ),
                validate_vectored_get: defaults::DEFAULT_VALIDATE_VECTORED_GET,
-                ephemeral_bytes_per_memory_kb: defaults::DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB
            },
            "Should be able to parse all basic config values correctly"
        );
--- a/pageserver/src/consumption_metrics.rs
+++ b/pageserver/src/consumption_metrics.rs
@@ -3,13 +3,10 @@
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::task_mgr::{self, TaskKind, BACKGROUND_RUNTIME};
 use crate::tenant::tasks::BackgroundLoopKind;
-use crate::tenant::{
-    mgr::TenantManager, LogicalSizeCalculationCause, PageReconstructError, Tenant,
-};
+use crate::tenant::{mgr, LogicalSizeCalculationCause, PageReconstructError, Tenant};
 use camino::Utf8PathBuf;
 use consumption_metrics::EventType;
 use pageserver_api::models::TenantState;
-use remote_storage::{GenericRemoteStorage, RemoteStorageConfig};
 use reqwest::Url;
 use std::collections::HashMap;
 use std::sync::Arc;
@@ -43,9 +40,7 @@ type Cache = HashMap<MetricsKey, (EventType, u64)>;
 /// Main thread that serves metrics collection
 #[allow(clippy::too_many_arguments)]
 pub async fn collect_metrics(
-    tenant_manager: Arc<TenantManager>,
    metric_collection_endpoint: &Url,
-    metric_collection_bucket: &Option<RemoteStorageConfig>,
    metric_collection_interval: Duration,
    _cached_metric_collection_interval: Duration,
    synthetic_size_calculation_interval: Duration,
@@ -70,19 +65,15 @@ pub async fn collect_metrics(
        None,
        "synthetic size calculation",
        false,
-        {
-            let tenant_manager = tenant_manager.clone();
-            async move {
-                calculate_synthetic_size_worker(
-                    tenant_manager,
-                    synthetic_size_calculation_interval,
-                    &cancel,
-                    &worker_ctx,
-                )
-                .instrument(info_span!("synthetic_size_worker"))
-                .await?;
-                Ok(())
-            }
+        async move {
+            calculate_synthetic_size_worker(
+                synthetic_size_calculation_interval,
+                &cancel,
+                &worker_ctx,
+            )
+            .instrument(info_span!("synthetic_size_worker"))
+            .await?;
+            Ok(())
        },
    );

@@ -103,27 +94,13 @@ pub async fn collect_metrics(
        .build()
        .expect("Failed to create http client with timeout");

-    let bucket_client = if let Some(bucket_config) = metric_collection_bucket {
-        match GenericRemoteStorage::from_config(bucket_config) {
-            Ok(client) => Some(client),
-            Err(e) => {
-                // Non-fatal error: if we were given an invalid config, we will proceed
-                // with sending metrics over the network, but not to S3.
-                tracing::warn!("Invalid configuration for metric_collection_bucket: {e}");
-                None
-            }
-        }
-    } else {
-        None
-    };
-
    let node_id = node_id.to_string();

    loop {
        let started_at = Instant::now();

        // these are point in time, with variable "now"
-        let metrics = metrics::collect_all_metrics(&tenant_manager, &cached_metrics, &ctx).await;
+        let metrics = metrics::collect_all_metrics(&cached_metrics, &ctx).await;

        let metrics = Arc::new(metrics);

@@ -141,18 +118,10 @@ pub async fn collect_metrics(
                    tracing::error!("failed to persist metrics to {path:?}: {e:#}");
                }
            }
-
-            if let Some(bucket_client) = &bucket_client {
-                let res =
-                    upload::upload_metrics_bucket(bucket_client, &cancel, &node_id, &metrics).await;
-                if let Err(e) = res {
-                    tracing::error!("failed to upload to S3: {e:#}");
-                }
-            }
        };

        let upload = async {
-            let res = upload::upload_metrics_http(
+            let res = upload::upload_metrics(
                &client,
                metric_collection_endpoint,
                &cancel,
@@ -163,7 +132,7 @@ pub async fn collect_metrics(
            .await;
            if let Err(e) = res {
                // serialization error which should never happen
-                tracing::error!("failed to upload via HTTP due to {e:#}");
+                tracing::error!("failed to upload due to {e:#}");
            }
        };

@@ -278,7 +247,6 @@ async fn reschedule(

 /// Caclculate synthetic size for each active tenant
 async fn calculate_synthetic_size_worker(
-    tenant_manager: Arc<TenantManager>,
    synthetic_size_calculation_interval: Duration,
    cancel: &CancellationToken,
    ctx: &RequestContext,
@@ -291,7 +259,7 @@ async fn calculate_synthetic_size_worker(
    loop {
        let started_at = Instant::now();

-        let tenants = match tenant_manager.list_tenants() {
+        let tenants = match mgr::list_tenants().await {
            Ok(tenants) => tenants,
            Err(e) => {
                warn!("cannot get tenant list: {e:#}");
@@ -310,14 +278,10 @@ async fn calculate_synthetic_size_worker(
                continue;
            }

-            let Ok(tenant) = tenant_manager.get_attached_tenant_shard(tenant_shard_id) else {
+            let Ok(tenant) = mgr::get_tenant(tenant_shard_id, true) else {
                continue;
            };

-            if !tenant.is_active() {
-                continue;
-            }
-
            // there is never any reason to exit calculate_synthetic_size_worker following any
            // return value -- we don't need to care about shutdown because no tenant is found when
            // pageserver is shut down.
@@ -355,7 +319,9 @@ async fn calculate_and_log(tenant: &Tenant, cancel: &CancellationToken, ctx: &Re
    };

    // this error can be returned if timeline is shutting down, but it does not
-    // mean the synthetic size worker should terminate.
+    // mean the synthetic size worker should terminate. we do not need any checks
+    // in this function because `mgr::get_tenant` will error out after shutdown has
+    // progressed to shutting down tenants.
    let shutting_down = matches!(
        e.downcast_ref::<PageReconstructError>(),
        Some(PageReconstructError::Cancelled | PageReconstructError::AncestorStopping(_))
--- a/pageserver/src/consumption_metrics/metrics.rs
+++ b/pageserver/src/consumption_metrics/metrics.rs
@@ -1,4 +1,3 @@
-use crate::tenant::mgr::TenantManager;
 use crate::{context::RequestContext, tenant::timeline::logical_size::CurrentLogicalSize};
 use chrono::{DateTime, Utc};
 use consumption_metrics::EventType;
@@ -182,7 +181,6 @@ impl MetricsKey {
 }

 pub(super) async fn collect_all_metrics(
-    tenant_manager: &Arc<TenantManager>,
    cached_metrics: &Cache,
    ctx: &RequestContext,
 ) -> Vec<RawMetric> {
@@ -190,7 +188,7 @@ pub(super) async fn collect_all_metrics(

    let started_at = std::time::Instant::now();

-    let tenants = match tenant_manager.list_tenants() {
+    let tenants = match crate::tenant::mgr::list_tenants().await {
        Ok(tenants) => tenants,
        Err(err) => {
            tracing::error!("failed to list tenants: {:?}", err);
@@ -202,8 +200,7 @@ pub(super) async fn collect_all_metrics(
        if state != TenantState::Active || !id.is_zero() {
            None
        } else {
-            tenant_manager
-                .get_attached_tenant_shard(id)
+            crate::tenant::mgr::get_tenant(id, true)
                .ok()
                .map(|tenant| (id.tenant_id, tenant))
        }
--- a/pageserver/src/consumption_metrics/upload.rs
+++ b/pageserver/src/consumption_metrics/upload.rs
@@ -1,9 +1,4 @@
-use std::time::SystemTime;
-
-use chrono::{DateTime, Utc};
 use consumption_metrics::{Event, EventChunk, IdempotencyKey, CHUNK_SIZE};
-use remote_storage::{GenericRemoteStorage, RemotePath};
-use tokio::io::AsyncWriteExt;
 use tokio_util::sync::CancellationToken;
 use tracing::Instrument;

@@ -18,9 +13,8 @@ struct Ids {
    pub(super) timeline_id: Option<TimelineId>,
 }

-/// Serialize and write metrics to an HTTP endpoint
 #[tracing::instrument(skip_all, fields(metrics_total = %metrics.len()))]
-pub(super) async fn upload_metrics_http(
+pub(super) async fn upload_metrics(
    client: &reqwest::Client,
    metric_collection_endpoint: &reqwest::Url,
    cancel: &CancellationToken,
@@ -80,60 +74,6 @@ pub(super) async fn upload_metrics_http(
    Ok(())
 }

-/// Serialize and write metrics to a remote storage object
-#[tracing::instrument(skip_all, fields(metrics_total = %metrics.len()))]
-pub(super) async fn upload_metrics_bucket(
-    client: &GenericRemoteStorage,
-    cancel: &CancellationToken,
-    node_id: &str,
-    metrics: &[RawMetric],
-) -> anyhow::Result<()> {
-    if metrics.is_empty() {
-        // Skip uploads if we have no metrics, so that readers don't have to handle the edge case
-        // of an empty object.
-        return Ok(());
-    }
-
-    // Compose object path
-    let datetime: DateTime<Utc> = SystemTime::now().into();
-    let ts_prefix = datetime.format("year=%Y/month=%m/day=%d/%H:%M:%SZ");
-    let path = RemotePath::from_string(&format!("{ts_prefix}_{node_id}.ndjson.gz"))?;
-
-    // Set up a gzip writer into a buffer
-    let mut compressed_bytes: Vec<u8> = Vec::new();
-    let compressed_writer = std::io::Cursor::new(&mut compressed_bytes);
-    let mut gzip_writer = async_compression::tokio::write::GzipEncoder::new(compressed_writer);
-
-    // Serialize and write into compressed buffer
-    let started_at = std::time::Instant::now();
-    for res in serialize_in_chunks(CHUNK_SIZE, metrics, node_id) {
-        let (_chunk, body) = res?;
-        gzip_writer.write_all(&body).await?;
-    }
-    gzip_writer.flush().await?;
-    gzip_writer.shutdown().await?;
-    let compressed_length = compressed_bytes.len();
-
-    // Write to remote storage
-    client
-        .upload_storage_object(
-            futures::stream::once(futures::future::ready(Ok(compressed_bytes.into()))),
-            compressed_length,
-            &path,
-            cancel,
-        )
-        .await?;
-    let elapsed = started_at.elapsed();
-
-    tracing::info!(
-        compressed_length,
-        elapsed_ms = elapsed.as_millis(),
-        "write metrics bucket at {path}",
-    );
-
-    Ok(())
-}
-
 // The return type is quite ugly, but we gain testability in isolation
 fn serialize_in_chunks<'a, F>(
    chunk_size: usize,
--- a/pageserver/src/disk_usage_eviction_task.rs
+++ b/pageserver/src/disk_usage_eviction_task.rs
@@ -61,6 +61,7 @@ use crate::{
    metrics::disk_usage_based_eviction::METRICS,
    task_mgr::{self, TaskKind, BACKGROUND_RUNTIME},
    tenant::{
+        self,
        mgr::TenantManager,
        remote_timeline_client::LayerFileMetadata,
        secondary::SecondaryTenant,
@@ -813,8 +814,8 @@ async fn collect_eviction_candidates(
    const LOG_DURATION_THRESHOLD: std::time::Duration = std::time::Duration::from_secs(10);

    // get a snapshot of the list of tenants
-    let tenants = tenant_manager
-        .list_tenants()
+    let tenants = tenant::mgr::list_tenants()
+        .await
        .context("get list of tenants")?;

    // TODO: avoid listing every layer in every tenant: this loop can block the executor,
@@ -826,12 +827,8 @@ async fn collect_eviction_candidates(
        if cancel.is_cancelled() {
            return Ok(EvictionCandidates::Cancelled);
        }
-        let tenant = match tenant_manager.get_attached_tenant_shard(tenant_id) {
-            Ok(tenant) if tenant.is_active() => tenant,
-            Ok(_) => {
-                debug!(tenant_id=%tenant_id.tenant_id, shard_id=%tenant_id.shard_slug(), "Tenant shard is not active");
-                continue;
-            }
+        let tenant = match tenant::mgr::get_tenant(tenant_id, true) {
+            Ok(tenant) => tenant,
            Err(e) => {
                // this can happen if tenant has lifecycle transition after we fetched it
                debug!("failed to get tenant: {e:#}");
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -1038,7 +1038,7 @@ paths:
                  format: hex
      responses:
        "201":
-          description: Timeline was created, or already existed with matching parameters
+          description: TimelineInfo
          content:
            application/json:
              schema:
@@ -1068,17 +1068,11 @@ paths:
              schema:
                $ref: "#/components/schemas/Error"
        "409":
-          description: Timeline already exists, with different parameters.  Creation cannot proceed.
+          description: Timeline already exists, creation skipped
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ConflictError"
-        "429":
-          description: A creation request was sent for the same Timeline Id while a creation was already in progress.  Back off and retry.
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/Error"
        "500":
          description: Generic operation error
          content:
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -49,8 +49,8 @@ use crate::task_mgr::TaskKind;
 use crate::tenant::config::{LocationConf, TenantConfOpt};
 use crate::tenant::mgr::GetActiveTenantError;
 use crate::tenant::mgr::{
-    GetTenantError, TenantManager, TenantMapError, TenantMapInsertError, TenantSlotError,
-    TenantSlotUpsertError, TenantStateError,
+    GetTenantError, SetNewTenantConfigError, TenantManager, TenantMapError, TenantMapInsertError,
+    TenantSlotError, TenantSlotUpsertError, TenantStateError,
 };
 use crate::tenant::mgr::{TenantSlot, UpsertLocationError};
 use crate::tenant::remote_timeline_client;
@@ -249,11 +249,16 @@ impl From<GetTenantError> for ApiError {
    fn from(tse: GetTenantError) -> ApiError {
        match tse {
            GetTenantError::NotFound(tid) => ApiError::NotFound(anyhow!("tenant {}", tid).into()),
+            GetTenantError::Broken(reason) => {
+                ApiError::InternalServerError(anyhow!("tenant is broken: {}", reason))
+            }
            GetTenantError::NotActive(_) => {
                // Why is this not `ApiError::NotFound`?
                // Because we must be careful to never return 404 for a tenant if it does
                // in fact exist locally. If we did, the caller could draw the conclusion
                // that it can attach the tenant to another PS and we'd be in split-brain.
+                //
+                // (We can produce this variant only in `mgr::get_tenant(..., active=true)` calls).
                ApiError::ResourceUnavailable("Tenant not yet active".into())
            }
            GetTenantError::MapState(e) => ApiError::ResourceUnavailable(format!("{e}").into()),
@@ -264,9 +269,6 @@ impl From<GetTenantError> for ApiError {
 impl From<GetActiveTenantError> for ApiError {
    fn from(e: GetActiveTenantError) -> ApiError {
        match e {
-            GetActiveTenantError::Broken(reason) => {
-                ApiError::InternalServerError(anyhow!("tenant is broken: {}", reason))
-            }
            GetActiveTenantError::WillNotBecomeActive(_) => ApiError::Conflict(format!("{}", e)),
            GetActiveTenantError::Cancelled => ApiError::ShuttingDown,
            GetActiveTenantError::NotFound(gte) => gte.into(),
@@ -277,6 +279,19 @@ impl From<GetActiveTenantError> for ApiError {
    }
 }

+impl From<SetNewTenantConfigError> for ApiError {
+    fn from(e: SetNewTenantConfigError) -> ApiError {
+        match e {
+            SetNewTenantConfigError::GetTenant(tid) => {
+                ApiError::NotFound(anyhow!("tenant {}", tid).into())
+            }
+            e @ (SetNewTenantConfigError::Persist(_) | SetNewTenantConfigError::Other(_)) => {
+                ApiError::InternalServerError(anyhow::Error::new(e))
+            }
+        }
+    }
+}
+
 impl From<crate::tenant::DeleteTimelineError> for ApiError {
    fn from(value: crate::tenant::DeleteTimelineError) -> Self {
        use crate::tenant::DeleteTimelineError::*;
@@ -480,7 +495,7 @@ async fn timeline_create_handler(
    async {
        let tenant = state
            .tenant_manager
-            .get_attached_tenant_shard(tenant_shard_id)?;
+            .get_attached_tenant_shard(tenant_shard_id, false)?;

        tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;

@@ -520,13 +535,10 @@ async fn timeline_create_handler(
                    HttpErrorBody::from_msg("Tenant shutting down".to_string()),
                )
            }
-            Err(e @ tenant::CreateTimelineError::Conflict) => {
-                json_response(StatusCode::CONFLICT, HttpErrorBody::from_msg(e.to_string()))
-            }
-            Err(e @ tenant::CreateTimelineError::AlreadyCreating) => json_response(
-                StatusCode::TOO_MANY_REQUESTS,
-                HttpErrorBody::from_msg(e.to_string()),
-            ),
+            Err(
+                e @ tenant::CreateTimelineError::Conflict
+                | e @ tenant::CreateTimelineError::AlreadyCreating,
+            ) => json_response(StatusCode::CONFLICT, HttpErrorBody::from_msg(e.to_string())),
            Err(tenant::CreateTimelineError::AncestorLsn(err)) => json_response(
                StatusCode::NOT_ACCEPTABLE,
                HttpErrorBody::from_msg(format!("{err:#}")),
@@ -569,7 +581,7 @@ async fn timeline_list_handler(
    let response_data = async {
        let tenant = state
            .tenant_manager
-            .get_attached_tenant_shard(tenant_shard_id)?;
+            .get_attached_tenant_shard(tenant_shard_id, false)?;

        tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;

@@ -607,7 +619,6 @@ async fn timeline_preserve_initdb_handler(
    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
-    let state = get_state(&request);

    // Part of the process for disaster recovery from safekeeper-stored WAL:
    // If we don't recover into a new timeline but want to keep the timeline ID,
@@ -615,9 +626,7 @@ async fn timeline_preserve_initdb_handler(
    // location where timeline recreation cand find it.

    async {
-        let tenant = state
-            .tenant_manager
-            .get_attached_tenant_shard(tenant_shard_id)?;
+        let tenant = mgr::get_tenant(tenant_shard_id, false)?;

        let timeline = tenant
            .get_timeline(timeline_id, false)
@@ -659,7 +668,7 @@ async fn timeline_detail_handler(
    let timeline_info = async {
        let tenant = state
            .tenant_manager
-            .get_attached_tenant_shard(tenant_shard_id)?;
+            .get_attached_tenant_shard(tenant_shard_id, false)?;

        tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;

@@ -846,7 +855,7 @@ async fn timeline_delete_handler(

    let tenant = state
        .tenant_manager
-        .get_attached_tenant_shard(tenant_shard_id)
+        .get_attached_tenant_shard(tenant_shard_id, false)
        .map_err(|e| {
            match e {
                // GetTenantError has a built-in conversion to ApiError, but in this context we don't
@@ -964,11 +973,10 @@ async fn tenant_list_handler(
    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
    check_permission(&request, None)?;
-    let state = get_state(&request);

-    let response_data = state
-        .tenant_manager
-        .list_tenants()
+    let response_data = mgr::list_tenants()
+        .instrument(info_span!("tenant_list"))
+        .await
        .map_err(|_| {
            ApiError::ResourceUnavailable("Tenant map is initializing or shutting down".into())
        })?
@@ -991,12 +999,9 @@ async fn tenant_status(
 ) -> Result<Response<Body>, ApiError> {
    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
-    let state = get_state(&request);

    let tenant_info = async {
-        let tenant = state
-            .tenant_manager
-            .get_attached_tenant_shard(tenant_shard_id)?;
+        let tenant = mgr::get_tenant(tenant_shard_id, false)?;

        // Calculate total physical size of all timelines
        let mut current_physical_size = 0;
@@ -1069,7 +1074,9 @@ async fn tenant_size_handler(
    let inputs_only: Option<bool> = parse_query_param(&request, "inputs_only")?;
    let retention_period: Option<u64> = parse_query_param(&request, "retention_period")?;
    let headers = request.headers();
-    let state = get_state(&request);
+
+    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
+    let tenant = mgr::get_tenant(tenant_shard_id, true)?;

    if !tenant_shard_id.is_zero() {
        return Err(ApiError::BadRequest(anyhow!(
@@ -1077,12 +1084,6 @@ async fn tenant_size_handler(
        )));
    }

-    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-    let tenant = state
-        .tenant_manager
-        .get_attached_tenant_shard(tenant_shard_id)?;
-    tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;
-
    // this can be long operation
    let inputs = tenant
        .gather_size_inputs(
@@ -1151,15 +1152,10 @@ async fn tenant_shard_split_handler(
    let state = get_state(&request);
    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);

-    let tenant = state
-        .tenant_manager
-        .get_attached_tenant_shard(tenant_shard_id)?;
-    tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;
-
    let new_shards = state
        .tenant_manager
        .shard_split(
-            tenant,
+            tenant_shard_id,
            ShardCount::new(req.new_shard_count),
            req.new_stripe_size,
            &ctx,
@@ -1377,11 +1373,8 @@ async fn get_tenant_config_handler(
 ) -> Result<Response<Body>, ApiError> {
    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
-    let state = get_state(&request);

-    let tenant = state
-        .tenant_manager
-        .get_attached_tenant_shard(tenant_shard_id)?;
+    let tenant = mgr::get_tenant(tenant_shard_id, false)?;

    let response = HashMap::from([
        (
@@ -1409,31 +1402,15 @@ async fn update_tenant_config_handler(
    let tenant_id = request_data.tenant_id;
    check_permission(&request, Some(tenant_id))?;

-    let new_tenant_conf =
+    let tenant_conf =
        TenantConfOpt::try_from(&request_data.config).map_err(ApiError::BadRequest)?;

    let state = get_state(&request);
-
-    let tenant_shard_id = TenantShardId::unsharded(tenant_id);
-
-    let tenant = state
+    state
        .tenant_manager
-        .get_attached_tenant_shard(tenant_shard_id)?;
-    tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;
-
-    // This is a legacy API that only operates on attached tenants: the preferred
-    // API to use is the location_config/ endpoint, which lets the caller provide
-    // the full LocationConf.
-    let location_conf = LocationConf::attached_single(
-        new_tenant_conf.clone(),
-        tenant.get_generation(),
-        &ShardParameters::default(),
-    );
-
-    crate::tenant::Tenant::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)
-        .await
-        .map_err(ApiError::InternalServerError)?;
-    tenant.set_new_tenant_config(new_tenant_conf);
+        .set_new_tenant_config(tenant_conf, tenant_id)
+        .instrument(info_span!("tenant_config", %tenant_id))
+        .await?;

    json_response(StatusCode::OK, ())
 }
@@ -1657,12 +1634,10 @@ async fn handle_tenant_break(
 ) -> Result<Response<Body>, ApiError> {
    let tenant_shard_id: TenantShardId = parse_request_param(&r, "tenant_shard_id")?;

-    let state = get_state(&r);
-    state
-        .tenant_manager
-        .get_attached_tenant_shard(tenant_shard_id)?
-        .set_broken("broken from test".to_owned())
-        .await;
+    let tenant = crate::tenant::mgr::get_tenant(tenant_shard_id, true)
+        .map_err(|_| ApiError::Conflict(String::from("no active tenant found")))?;
+
+    tenant.set_broken("broken from test".to_owned()).await;

    json_response(StatusCode::OK, ())
 }
@@ -1906,7 +1881,7 @@ async fn active_timeline_of_active_tenant(
    tenant_shard_id: TenantShardId,
    timeline_id: TimelineId,
 ) -> Result<Arc<Timeline>, ApiError> {
-    let tenant = tenant_manager.get_attached_tenant_shard(tenant_shard_id)?;
+    let tenant = tenant_manager.get_attached_tenant_shard(tenant_shard_id, false)?;

    tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;

--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -435,7 +435,7 @@ pub(crate) static RESIDENT_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(||
 static REMOTE_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
    register_uint_gauge_vec!(
        "pageserver_remote_physical_size",
-        "The size of the layer files present in the remote storage that are listed in the remote index_part.json.",
+        "The size of the layer files present in the remote storage that are listed in the the remote index_part.json.",
        // Corollary: If any files are missing from the index part, they won't be included here.
        &["tenant_id", "shard_id", "timeline_id"]
    )
@@ -699,14 +699,6 @@ pub static STARTUP_IS_LOADING: Lazy<UIntGauge> = Lazy::new(|| {
    .expect("Failed to register pageserver_startup_is_loading")
 });

-pub(crate) static TIMELINE_EPHEMERAL_BYTES: Lazy<UIntGauge> = Lazy::new(|| {
-    register_uint_gauge!(
-        "pageserver_timeline_ephemeral_bytes",
-        "Total number of bytes in ephemeral layers, summed for all timelines.  Approximate, lazily updated."
-    )
-    .expect("Failed to register metric")
-});
-
 /// Metrics related to the lifecycle of a [`crate::tenant::Tenant`] object: things
 /// like how long it took to load.
 ///
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -760,7 +760,6 @@ impl PageServerHandler {
        let mut copyin_reader = pin!(StreamReader::new(self.copyin_stream(pgb, &tenant.cancel)));
        timeline
            .import_basebackup_from_tar(
-                tenant.clone(),
                &mut copyin_reader,
                base_lsn,
                self.broker_client.clone(),
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -1411,7 +1411,7 @@ impl Tenant {
    /// the same timeline ID already exists, returns CreateTimelineError::AlreadyExists.
    #[allow(clippy::too_many_arguments)]
    pub(crate) async fn create_timeline(
-        self: &Arc<Tenant>,
+        &self,
        new_timeline_id: TimelineId,
        ancestor_timeline_id: Option<TimelineId>,
        mut ancestor_start_lsn: Option<Lsn>,
@@ -1559,7 +1559,7 @@ impl Tenant {
            })?;
        }

-        loaded_timeline.activate(self.clone(), broker_client, None, ctx);
+        loaded_timeline.activate(broker_client, None, ctx);

        Ok(loaded_timeline)
    }
@@ -1731,12 +1731,7 @@ impl Tenant {
            let mut activated_timelines = 0;

            for timeline in timelines_to_activate {
-                timeline.activate(
-                    self.clone(),
-                    broker_client.clone(),
-                    background_jobs_can_start,
-                    ctx,
-                );
+                timeline.activate(broker_client.clone(), background_jobs_can_start, ctx);
                activated_timelines += 1;
            }

@@ -2068,12 +2063,7 @@ impl Tenant {
                TenantState::Active { .. } => {
                    return Ok(());
                }
-                TenantState::Broken { reason, .. } => {
-                    // This is fatal, and reported distinctly from the general case of "will never be active" because
-                    // it's logically a 500 to external API users (broken is always a bug).
-                    return Err(GetActiveTenantError::Broken(reason));
-                }
-                TenantState::Stopping { .. } => {
+                TenantState::Broken { .. } | TenantState::Stopping { .. } => {
                    // There's no chance the tenant can transition back into ::Active
                    return Err(GetActiveTenantError::WillNotBecomeActive(current_state));
                }
@@ -2151,7 +2141,7 @@ impl Tenant {

            // Shut down the timeline's remote client: this means that the indices we write
            // for child shards will not be invalidated by the parent shard deleting layers.
-            tl_client.shutdown().await;
+            tl_client.shutdown().await?;

            // Download methods can still be used after shutdown, as they don't flow through the remote client's
            // queue.  In principal the RemoteTimelineClient could provide this without downloading it, but this
--- a/pageserver/src/tenant/delete.rs
+++ b/pageserver/src/tenant/delete.rs
@@ -111,7 +111,6 @@ async fn create_local_delete_mark(
    let _ = std::fs::OpenOptions::new()
        .write(true)
        .create(true)
-        .truncate(true)
        .open(&marker_path)
        .with_context(|| format!("could not create delete marker file {marker_path:?}"))?;

--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -4,7 +4,7 @@
 use camino::{Utf8DirEntry, Utf8Path, Utf8PathBuf};
 use itertools::Itertools;
 use pageserver_api::key::Key;
-use pageserver_api::models::LocationConfigMode;
+use pageserver_api::models::{LocationConfigMode, ShardParameters};
 use pageserver_api::shard::{
    ShardCount, ShardIdentity, ShardNumber, ShardStripeSize, TenantShardId,
 };
@@ -16,7 +16,6 @@ use std::collections::{BTreeMap, HashMap};
 use std::ops::Deref;
 use std::sync::Arc;
 use std::time::{Duration, Instant};
-use sysinfo::SystemExt;
 use tokio::fs;
 use utils::timeout::{timeout_cancellable, TimeoutCancellableError};

@@ -40,10 +39,10 @@ use crate::metrics::{TENANT, TENANT_MANAGER as METRICS};
 use crate::task_mgr::{self, TaskKind};
 use crate::tenant::config::{
    AttachedLocationConfig, AttachmentMode, LocationConf, LocationMode, SecondaryLocationConfig,
+    TenantConfOpt,
 };
 use crate::tenant::delete::DeleteTenantFlow;
 use crate::tenant::span::debug_assert_current_span_has_tenant_id;
-use crate::tenant::storage_layer::inmemory_layer;
 use crate::tenant::{AttachedTenantConf, SpawnMode, Tenant, TenantState};
 use crate::{InitializationOrder, IGNORED_TENANT_FILE_NAME, METADATA_FILE_NAME, TEMP_FILE_SUFFIX};

@@ -544,18 +543,6 @@ pub async fn init_tenant_mgr(

    let ctx = RequestContext::todo_child(TaskKind::Startup, DownloadBehavior::Warn);

-    // Initialize dynamic limits that depend on system resources
-    let system_memory =
-        sysinfo::System::new_with_specifics(sysinfo::RefreshKind::new().with_memory())
-            .total_memory();
-    let max_ephemeral_layer_bytes =
-        conf.ephemeral_bytes_per_memory_kb as u64 * (system_memory / 1024);
-    tracing::info!("Initialized ephemeral layer size limit to {max_ephemeral_layer_bytes}, for {system_memory} bytes of memory");
-    inmemory_layer::GLOBAL_RESOURCES.max_dirty_bytes.store(
-        max_ephemeral_layer_bytes,
-        std::sync::atomic::Ordering::Relaxed,
-    );
-
    // Scan local filesystem for attached tenants
    let tenant_configs = init_load_tenant_configs(conf).await?;

@@ -888,6 +875,16 @@ async fn shutdown_all_tenants0(tenants: &std::sync::RwLock<TenantsMap>) {
    // caller will log how long we took
 }

+#[derive(Debug, thiserror::Error)]
+pub(crate) enum SetNewTenantConfigError {
+    #[error(transparent)]
+    GetTenant(#[from] GetTenantError),
+    #[error(transparent)]
+    Persist(anyhow::Error),
+    #[error(transparent)]
+    Other(anyhow::Error),
+}
+
 #[derive(thiserror::Error, Debug)]
 pub(crate) enum UpsertLocationError {
    #[error("Bad config request: {0}")]
@@ -913,21 +910,32 @@ impl TenantManager {
        self.conf
    }

-    /// Gets the attached tenant from the in-memory data, erroring if it's absent, in secondary mode, or currently
-    /// undergoing a state change (i.e. slot is InProgress).
-    ///
-    /// The return Tenant is not guaranteed to be active: check its status after obtaing it, or
-    /// use [`Tenant::wait_to_become_active`] before using it if you will do I/O on it.
+    /// Gets the attached tenant from the in-memory data, erroring if it's absent, in secondary mode, or is not fitting to the query.
+    /// `active_only = true` allows to query only tenants that are ready for operations, erroring on other kinds of tenants.
    pub(crate) fn get_attached_tenant_shard(
        &self,
        tenant_shard_id: TenantShardId,
+        active_only: bool,
    ) -> Result<Arc<Tenant>, GetTenantError> {
        let locked = self.tenants.read().unwrap();

        let peek_slot = tenant_map_peek_slot(&locked, &tenant_shard_id, TenantSlotPeekMode::Read)?;

        match peek_slot {
-            Some(TenantSlot::Attached(tenant)) => Ok(Arc::clone(tenant)),
+            Some(TenantSlot::Attached(tenant)) => match tenant.current_state() {
+                TenantState::Broken {
+                    reason,
+                    backtrace: _,
+                } if active_only => Err(GetTenantError::Broken(reason)),
+                TenantState::Active => Ok(Arc::clone(tenant)),
+                _ => {
+                    if active_only {
+                        Err(GetTenantError::NotActive(tenant_shard_id))
+                    } else {
+                        Ok(Arc::clone(tenant))
+                    }
+                }
+            },
            Some(TenantSlot::InProgress(_)) => Err(GetTenantError::NotActive(tenant_shard_id)),
            None | Some(TenantSlot::Secondary(_)) => {
                Err(GetTenantError::NotFound(tenant_shard_id.tenant_id))
@@ -1420,8 +1428,7 @@ impl TenantManager {
                    .wait_to_become_active(activation_timeout)
                    .await
                    .map_err(|e| match e {
-                        GetActiveTenantError::WillNotBecomeActive(_)
-                        | GetActiveTenantError::Broken(_) => {
+                        GetActiveTenantError::WillNotBecomeActive(_) => {
                            DeleteTenantError::InvalidState(tenant.current_state())
                        }
                        GetActiveTenantError::Cancelled => DeleteTenantError::Cancelled,
@@ -1448,30 +1455,29 @@ impl TenantManager {
        result
    }

-    #[instrument(skip_all, fields(tenant_id=%tenant.get_tenant_shard_id().tenant_id, shard_id=%tenant.get_tenant_shard_id().shard_slug(), new_shard_count=%new_shard_count.literal()))]
+    #[instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), new_shard_count=%new_shard_count.literal()))]
    pub(crate) async fn shard_split(
        &self,
-        tenant: Arc<Tenant>,
+        tenant_shard_id: TenantShardId,
        new_shard_count: ShardCount,
        new_stripe_size: Option<ShardStripeSize>,
        ctx: &RequestContext,
    ) -> anyhow::Result<Vec<TenantShardId>> {
-        let tenant_shard_id = *tenant.get_tenant_shard_id();
        let r = self
-            .do_shard_split(tenant, new_shard_count, new_stripe_size, ctx)
+            .do_shard_split(tenant_shard_id, new_shard_count, new_stripe_size, ctx)
            .await;
        if r.is_err() {
            // Shard splitting might have left the original shard in a partially shut down state (it
            // stops the shard's remote timeline client).  Reset it to ensure we leave things in
            // a working state.
            if self.get(tenant_shard_id).is_some() {
-                tracing::warn!("Resetting after shard split failure");
+                tracing::warn!("Resetting {tenant_shard_id} after shard split failure");
                if let Err(e) = self.reset_tenant(tenant_shard_id, false, ctx).await {
                    // Log this error because our return value will still be the original error, not this one.  This is
                    // a severe error: if this happens, we might be leaving behind a tenant that is not fully functional
                    // (e.g. has uploads disabled).  We can't do anything else: if reset fails then shutting the tenant down or
                    // setting it broken probably won't help either.
-                    tracing::error!("Failed to reset: {e}");
+                    tracing::error!("Failed to reset {tenant_shard_id}: {e}");
                }
            }
        }
@@ -1481,12 +1487,12 @@ impl TenantManager {

    pub(crate) async fn do_shard_split(
        &self,
-        tenant: Arc<Tenant>,
+        tenant_shard_id: TenantShardId,
        new_shard_count: ShardCount,
        new_stripe_size: Option<ShardStripeSize>,
        ctx: &RequestContext,
    ) -> anyhow::Result<Vec<TenantShardId>> {
-        let tenant_shard_id = *tenant.get_tenant_shard_id();
+        let tenant = get_tenant(tenant_shard_id, true)?;

        // Validate the incoming request
        if new_shard_count.count() <= tenant_shard_id.shard_count.count() {
@@ -1532,6 +1538,7 @@ impl TenantManager {
            // If [`Tenant::split_prepare`] fails, we must reload the tenant, because it might
            // have been left in a partially-shut-down state.
            tracing::warn!("Failed to prepare for split: {e}, reloading Tenant before returning");
+            self.reset_tenant(tenant_shard_id, false, ctx).await?;
            return Err(e);
        }

@@ -1929,23 +1936,38 @@ impl TenantManager {
        removal_result
    }

-    pub(crate) fn list_tenants(
+    pub(crate) async fn set_new_tenant_config(
        &self,
-    ) -> Result<Vec<(TenantShardId, TenantState, Generation)>, TenantMapListError> {
-        let tenants = TENANTS.read().unwrap();
-        let m = match &*tenants {
-            TenantsMap::Initializing => return Err(TenantMapListError::Initializing),
-            TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => m,
-        };
-        Ok(m.iter()
-            .filter_map(|(id, tenant)| match tenant {
-                TenantSlot::Attached(tenant) => {
-                    Some((*id, tenant.current_state(), tenant.generation()))
-                }
-                TenantSlot::Secondary(_) => None,
-                TenantSlot::InProgress(_) => None,
-            })
-            .collect())
+        new_tenant_conf: TenantConfOpt,
+        tenant_id: TenantId,
+    ) -> Result<(), SetNewTenantConfigError> {
+        // Legacy API: does not support sharding
+        let tenant_shard_id = TenantShardId::unsharded(tenant_id);
+
+        info!("configuring tenant {tenant_id}");
+        let tenant = get_tenant(tenant_shard_id, true)?;
+
+        if !tenant.tenant_shard_id().shard_count.is_unsharded() {
+            // Note that we use ShardParameters::default below.
+            return Err(SetNewTenantConfigError::Other(anyhow::anyhow!(
+            "This API may only be used on single-sharded tenants, use the /location_config API for sharded tenants"
+        )));
+        }
+
+        // This is a legacy API that only operates on attached tenants: the preferred
+        // API to use is the location_config/ endpoint, which lets the caller provide
+        // the full LocationConf.
+        let location_conf = LocationConf::attached_single(
+            new_tenant_conf.clone(),
+            tenant.generation,
+            &ShardParameters::default(),
+        );
+
+        Tenant::persist_tenant_config(self.conf, &tenant_shard_id, &location_conf)
+            .await
+            .map_err(SetNewTenantConfigError::Persist)?;
+        tenant.set_new_tenant_config(new_tenant_conf);
+        Ok(())
    }
 }

@@ -1958,12 +1980,51 @@ pub(crate) enum GetTenantError {

    #[error("Tenant {0} is not active")]
    NotActive(TenantShardId),
+    /// Broken is logically a subset of NotActive, but a distinct error is useful as
+    /// NotActive is usually a retryable state for API purposes, whereas Broken
+    /// is a stuck error state
+    #[error("Tenant is broken: {0}")]
+    Broken(String),

    // Initializing or shutting down: cannot authoritatively say whether we have this tenant
    #[error("Tenant map is not available: {0}")]
    MapState(#[from] TenantMapError),
 }

+/// Gets the tenant from the in-memory data, erroring if it's absent or is not fitting to the query.
+/// `active_only = true` allows to query only tenants that are ready for operations, erroring on other kinds of tenants.
+///
+/// This method is cancel-safe.
+pub(crate) fn get_tenant(
+    tenant_shard_id: TenantShardId,
+    active_only: bool,
+) -> Result<Arc<Tenant>, GetTenantError> {
+    let locked = TENANTS.read().unwrap();
+
+    let peek_slot = tenant_map_peek_slot(&locked, &tenant_shard_id, TenantSlotPeekMode::Read)?;
+
+    match peek_slot {
+        Some(TenantSlot::Attached(tenant)) => match tenant.current_state() {
+            TenantState::Broken {
+                reason,
+                backtrace: _,
+            } if active_only => Err(GetTenantError::Broken(reason)),
+            TenantState::Active => Ok(Arc::clone(tenant)),
+            _ => {
+                if active_only {
+                    Err(GetTenantError::NotActive(tenant_shard_id))
+                } else {
+                    Ok(Arc::clone(tenant))
+                }
+            }
+        },
+        Some(TenantSlot::InProgress(_)) => Err(GetTenantError::NotActive(tenant_shard_id)),
+        None | Some(TenantSlot::Secondary(_)) => {
+            Err(GetTenantError::NotFound(tenant_shard_id.tenant_id))
+        }
+    }
+}
+
 #[derive(thiserror::Error, Debug)]
 pub(crate) enum GetActiveTenantError {
    /// We may time out either while TenantSlot is InProgress, or while the Tenant
@@ -1987,12 +2048,6 @@ pub(crate) enum GetActiveTenantError {
    /// Tenant exists, but is in a state that cannot become active (e.g. Stopping, Broken)
    #[error("will not become active.  Current state: {0}")]
    WillNotBecomeActive(TenantState),
-
-    /// Broken is logically a subset of WillNotBecomeActive, but a distinct error is useful as
-    /// WillNotBecomeActive is a permitted error under some circumstances, whereas broken should
-    /// never happen.
-    #[error("Tenant is broken: {0}")]
-    Broken(String),
 }

 /// Get a [`Tenant`] in its active state. If the tenant_id is currently in [`TenantSlot::InProgress`]
@@ -2212,6 +2267,27 @@ pub(crate) enum TenantMapListError {
    Initializing,
 }

+///
+/// Get list of tenants, for the mgmt API
+///
+pub(crate) async fn list_tenants(
+) -> Result<Vec<(TenantShardId, TenantState, Generation)>, TenantMapListError> {
+    let tenants = TENANTS.read().unwrap();
+    let m = match &*tenants {
+        TenantsMap::Initializing => return Err(TenantMapListError::Initializing),
+        TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => m,
+    };
+    Ok(m.iter()
+        .filter_map(|(id, tenant)| match tenant {
+            TenantSlot::Attached(tenant) => {
+                Some((*id, tenant.current_state(), tenant.generation()))
+            }
+            TenantSlot::Secondary(_) => None,
+            TenantSlot::InProgress(_) => None,
+        })
+        .collect())
+}
+
 #[derive(Debug, thiserror::Error)]
 pub(crate) enum TenantMapInsertError {
    #[error(transparent)]
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -217,7 +217,7 @@ use crate::task_mgr::shutdown_token;
 use crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id;
 use crate::tenant::remote_timeline_client::download::download_retry;
 use crate::tenant::storage_layer::AsLayerDesc;
-use crate::tenant::upload_queue::{Delete, UploadQueueStoppedDeletable};
+use crate::tenant::upload_queue::Delete;
 use crate::tenant::TIMELINES_SEGMENT_NAME;
 use crate::{
    config::PageServerConf,
@@ -266,6 +266,15 @@ pub enum MaybeDeletedIndexPart {
    Deleted(IndexPart),
 }

+/// Errors that can arise when calling [`RemoteTimelineClient::stop`].
+#[derive(Debug, thiserror::Error)]
+pub enum StopError {
+    /// Returned if the upload queue was never initialized.
+    /// See [`RemoteTimelineClient::init_upload_queue`] and [`RemoteTimelineClient::init_upload_queue_for_empty_remote`].
+    #[error("queue is not initialized")]
+    QueueUninitialized,
+}
+
 #[derive(Debug, thiserror::Error)]
 pub enum PersistIndexPartWithDeletedFlagError {
    #[error("another task is already setting the deleted_flag, started at {0:?}")]
@@ -390,10 +399,15 @@ impl RemoteTimelineClient {
            "bug: it is responsibility of the caller to provide index part from MaybeDeletedIndexPart::Deleted"
        ))?;

+        {
+            let mut upload_queue = self.upload_queue.lock().unwrap();
+            upload_queue.initialize_with_current_remote_index_part(index_part)?;
+            self.update_remote_physical_size_gauge(Some(index_part));
+        }
+        // also locks upload queue, without dropping the guard above it will be a deadlock
+        self.stop().expect("initialized line above");
+
        let mut upload_queue = self.upload_queue.lock().unwrap();
-        upload_queue.initialize_with_current_remote_index_part(index_part)?;
-        self.update_remote_physical_size_gauge(Some(index_part));
-        self.stop_impl(&mut upload_queue);

        upload_queue
            .stopped_mut()
@@ -407,8 +421,7 @@ impl RemoteTimelineClient {
        match &mut *self.upload_queue.lock().unwrap() {
            UploadQueue::Uninitialized => None,
            UploadQueue::Initialized(q) => q.get_last_remote_consistent_lsn_projected(),
-            UploadQueue::Stopped(UploadQueueStopped::Uninitialized) => None,
-            UploadQueue::Stopped(UploadQueueStopped::Deletable(q)) => q
+            UploadQueue::Stopped(q) => q
                .upload_queue_for_deletion
                .get_last_remote_consistent_lsn_projected(),
        }
@@ -418,8 +431,7 @@ impl RemoteTimelineClient {
        match &mut *self.upload_queue.lock().unwrap() {
            UploadQueue::Uninitialized => None,
            UploadQueue::Initialized(q) => Some(q.get_last_remote_consistent_lsn_visible()),
-            UploadQueue::Stopped(UploadQueueStopped::Uninitialized) => None,
-            UploadQueue::Stopped(UploadQueueStopped::Deletable(q)) => Some(
+            UploadQueue::Stopped(q) => Some(
                q.upload_queue_for_deletion
                    .get_last_remote_consistent_lsn_visible(),
            ),
@@ -886,7 +898,7 @@ impl RemoteTimelineClient {
    /// Wait for all previously scheduled operations to complete, and then stop.
    ///
    /// Not cancellation safe
-    pub(crate) async fn shutdown(self: &Arc<Self>) {
+    pub(crate) async fn shutdown(self: &Arc<Self>) -> Result<(), StopError> {
        // On cancellation the queue is left in ackward state of refusing new operations but
        // proper stop is yet to be called. On cancel the original or some later task must call
        // `stop` or `shutdown`.
@@ -897,12 +909,8 @@ impl RemoteTimelineClient {
        let fut = {
            let mut guard = self.upload_queue.lock().unwrap();
            let upload_queue = match &mut *guard {
-                UploadQueue::Stopped(_) => return,
-                UploadQueue::Uninitialized => {
-                    // transition into Stopped state
-                    self.stop_impl(&mut guard);
-                    return;
-                }
+                UploadQueue::Stopped(_) => return Ok(()),
+                UploadQueue::Uninitialized => return Err(StopError::QueueUninitialized),
                UploadQueue::Initialized(ref mut init) => init,
            };

@@ -934,7 +942,7 @@ impl RemoteTimelineClient {
            }
        }

-        self.stop();
+        self.stop()
    }

    /// Set the deleted_at field in the remote index file.
@@ -1316,7 +1324,12 @@ impl RemoteTimelineClient {
            // upload finishes or times out soon enough.
            if cancel.is_cancelled() {
                info!("upload task cancelled by shutdown request");
-                self.stop();
+                match self.stop() {
+                    Ok(()) => {}
+                    Err(StopError::QueueUninitialized) => {
+                        unreachable!("we never launch an upload task if the queue is uninitialized, and once it is initialized, we never go back")
+                    }
+                }
                return;
            }

@@ -1571,23 +1584,17 @@ impl RemoteTimelineClient {
    /// In-progress operations will still be running after this function returns.
    /// Use `task_mgr::shutdown_tasks(None, Some(self.tenant_id), Some(timeline_id))`
    /// to wait for them to complete, after calling this function.
-    pub(crate) fn stop(&self) {
+    pub(crate) fn stop(&self) -> Result<(), StopError> {
        // Whichever *task* for this RemoteTimelineClient grabs the mutex first will transition the queue
        // into stopped state, thereby dropping all off the queued *ops* which haven't become *tasks* yet.
        // The other *tasks* will come here and observe an already shut down queue and hence simply wrap up their business.
        let mut guard = self.upload_queue.lock().unwrap();
-        self.stop_impl(&mut guard);
-    }
-
-    fn stop_impl(&self, guard: &mut std::sync::MutexGuard<UploadQueue>) {
-        match &mut **guard {
-            UploadQueue::Uninitialized => {
-                info!("UploadQueue is in state Uninitialized, nothing to do");
-                **guard = UploadQueue::Stopped(UploadQueueStopped::Uninitialized);
-            }
+        match &mut *guard {
+            UploadQueue::Uninitialized => Err(StopError::QueueUninitialized),
            UploadQueue::Stopped(_) => {
                // nothing to do
                info!("another concurrent task already shut down the queue");
+                Ok(())
            }
            UploadQueue::Initialized(initialized) => {
                info!("shutting down upload queue");
@@ -1620,13 +1627,11 @@ impl RemoteTimelineClient {
                    };

                    let upload_queue = std::mem::replace(
-                        &mut **guard,
-                        UploadQueue::Stopped(UploadQueueStopped::Deletable(
-                            UploadQueueStoppedDeletable {
-                                upload_queue_for_deletion,
-                                deleted_at: SetDeletedFlagProgress::NotRunning,
-                            },
-                        )),
+                        &mut *guard,
+                        UploadQueue::Stopped(UploadQueueStopped {
+                            upload_queue_for_deletion,
+                            deleted_at: SetDeletedFlagProgress::NotRunning,
+                        }),
                    );
                    if let UploadQueue::Initialized(qi) = upload_queue {
                        qi
@@ -1655,6 +1660,10 @@ impl RemoteTimelineClient {
                    // which is exactly what we want to happen.
                    drop(op);
                }
+
+                // We're done.
+                drop(guard);
+                Ok(())
            }
        }
    }
--- a/pageserver/src/tenant/secondary/downloader.rs
+++ b/pageserver/src/tenant/secondary/downloader.rs
@@ -11,11 +11,11 @@ use crate::{
    disk_usage_eviction_task::{
        finite_f32, DiskUsageEvictionInfo, EvictionCandidate, EvictionLayer, EvictionSecondaryLayer,
    },
+    is_temporary,
    metrics::SECONDARY_MODE,
    tenant::{
        config::SecondaryLocationConfig,
        debug_assert_current_span_has_tenant_and_timeline_id,
-        ephemeral_file::is_ephemeral_file,
        remote_timeline_client::{
            index::LayerFileMetadata, is_temp_download_file, FAILED_DOWNLOAD_WARN_THRESHOLD,
            FAILED_REMOTE_OP_RETRIES,
@@ -964,7 +964,7 @@ async fn init_timeline_state(
            continue;
        } else if crate::is_temporary(&file_path)
            || is_temp_download_file(&file_path)
-            || is_ephemeral_file(file_name)
+            || is_temporary(&file_path)
        {
            // Temporary files are frequently left behind from restarting during downloads
            tracing::info!("Cleaning up temporary file {file_path}");
--- a/pageserver/src/tenant/secondary/heatmap_uploader.rs
+++ b/pageserver/src/tenant/secondary/heatmap_uploader.rs
@@ -9,7 +9,6 @@ use crate::{
    metrics::SECONDARY_MODE,
    tenant::{
        config::AttachmentMode,
-        mgr::GetTenantError,
        mgr::TenantManager,
        remote_timeline_client::remote_heatmap_path,
        span::debug_assert_current_span_has_tenant_id,
@@ -293,11 +292,8 @@ impl JobGenerator<UploadPending, WriteInProgress, WriteComplete, UploadCommand>
            "Starting heatmap write on command");
        let tenant = self
            .tenant_manager
-            .get_attached_tenant_shard(*tenant_shard_id)
+            .get_attached_tenant_shard(*tenant_shard_id, true)
            .map_err(|e| anyhow::anyhow!(e))?;
-        if !tenant.is_active() {
-            return Err(GetTenantError::NotActive(*tenant_shard_id).into());
-        }

        Ok(UploadPending {
            // Ignore our state for last digest: this forces an upload even if nothing has changed
--- a/pageserver/src/tenant/storage_layer.rs
+++ b/pageserver/src/tenant/storage_layer.rs
@@ -3,7 +3,7 @@
 pub mod delta_layer;
 mod filename;
 pub mod image_layer;
-pub(crate) mod inmemory_layer;
+mod inmemory_layer;
 pub(crate) mod layer;
 mod layer_desc;

--- a/pageserver/src/tenant/storage_layer/inmemory_layer.rs
+++ b/pageserver/src/tenant/storage_layer/inmemory_layer.rs
@@ -23,12 +23,8 @@ use tracing::*;
 use utils::{bin_ser::BeSer, id::TimelineId, lsn::Lsn, vec_map::VecMap};
 // avoid binding to Write (conflicts with std::io::Write)
 // while being able to use std::fmt::Write's methods
-use crate::metrics::TIMELINE_EPHEMERAL_BYTES;
-use std::cmp::Ordering;
 use std::fmt::Write as _;
 use std::ops::Range;
-use std::sync::atomic::Ordering as AtomicOrdering;
-use std::sync::atomic::{AtomicU64, AtomicUsize};
 use tokio::sync::{RwLock, RwLockWriteGuard};

 use super::{
@@ -74,8 +70,6 @@ pub struct InMemoryLayerInner {
    /// Each serialized Value is preceded by a 'u32' length field.
    /// PerSeg::page_versions map stores offsets into this file.
    file: EphemeralFile,
-
-    resource_units: GlobalResourceUnits,
 }

 impl std::fmt::Debug for InMemoryLayerInner {
@@ -84,121 +78,6 @@ impl std::fmt::Debug for InMemoryLayerInner {
    }
 }

-/// State shared by all in-memory (ephemeral) layers.  Updated infrequently during background ticks in Timeline,
-/// to minimize contention.
-///
-/// This global state is used to implement behaviors that require a global view of the system, e.g.
-/// rolling layers proactively to limit the total amount of dirty data.
-pub(crate) struct GlobalResources {
-    // Limit on how high dirty_bytes may grow before we start freezing layers to reduce it.
-    // Zero means unlimited.
-    pub(crate) max_dirty_bytes: AtomicU64,
-    // How many bytes are in all EphemeralFile objects
-    dirty_bytes: AtomicU64,
-    // How many layers are contributing to dirty_bytes
-    dirty_layers: AtomicUsize,
-}
-
-// Per-timeline RAII struct for its contribution to [`GlobalResources`]
-struct GlobalResourceUnits {
-    // How many dirty bytes have I added to the global dirty_bytes: this guard object is responsible
-    // for decrementing the global counter by this many bytes when dropped.
-    dirty_bytes: u64,
-}
-
-impl GlobalResourceUnits {
-    // Hint for the layer append path to update us when the layer size differs from the last
-    // call to update_size by this much.  If we don't reach this threshold, we'll still get
-    // updated when the Timeline "ticks" in the background.
-    const MAX_SIZE_DRIFT: u64 = 10 * 1024 * 1024;
-
-    fn new() -> Self {
-        GLOBAL_RESOURCES
-            .dirty_layers
-            .fetch_add(1, AtomicOrdering::Relaxed);
-        Self { dirty_bytes: 0 }
-    }
-
-    /// Do not call this frequently: all timelines will write to these same global atomics,
-    /// so this is a relatively expensive operation.  Wait at least a few seconds between calls.
-    ///
-    /// Returns the effective layer size limit that should be applied, if any, to keep
-    /// the total number of dirty bytes below the configured maximum.
-    fn publish_size(&mut self, size: u64) -> Option<u64> {
-        let new_global_dirty_bytes = match size.cmp(&self.dirty_bytes) {
-            Ordering::Equal => GLOBAL_RESOURCES.dirty_bytes.load(AtomicOrdering::Relaxed),
-            Ordering::Greater => {
-                let delta = size - self.dirty_bytes;
-                let old = GLOBAL_RESOURCES
-                    .dirty_bytes
-                    .fetch_add(delta, AtomicOrdering::Relaxed);
-                old + delta
-            }
-            Ordering::Less => {
-                let delta = self.dirty_bytes - size;
-                let old = GLOBAL_RESOURCES
-                    .dirty_bytes
-                    .fetch_sub(delta, AtomicOrdering::Relaxed);
-                old - delta
-            }
-        };
-
-        // This is a sloppy update: concurrent updates to the counter will race, and the exact
-        // value of the metric might not be the exact latest value of GLOBAL_RESOURCES::dirty_bytes.
-        // That's okay: as long as the metric contains some recent value, it doesn't have to always
-        // be literally the last update.
-        TIMELINE_EPHEMERAL_BYTES.set(new_global_dirty_bytes);
-
-        self.dirty_bytes = size;
-
-        let max_dirty_bytes = GLOBAL_RESOURCES
-            .max_dirty_bytes
-            .load(AtomicOrdering::Relaxed);
-        if max_dirty_bytes > 0 && new_global_dirty_bytes > max_dirty_bytes {
-            // Set the layer file limit to the average layer size: this implies that all above-average
-            // sized layers will be elegible for freezing.  They will be frozen in the order they
-            // next enter publish_size.
-            Some(
-                new_global_dirty_bytes
-                    / GLOBAL_RESOURCES.dirty_layers.load(AtomicOrdering::Relaxed) as u64,
-            )
-        } else {
-            None
-        }
-    }
-
-    // Call publish_size if the input size differs from last published size by more than
-    // the drift limit
-    fn maybe_publish_size(&mut self, size: u64) {
-        let publish = match size.cmp(&self.dirty_bytes) {
-            Ordering::Equal => false,
-            Ordering::Greater => size - self.dirty_bytes > Self::MAX_SIZE_DRIFT,
-            Ordering::Less => self.dirty_bytes - size > Self::MAX_SIZE_DRIFT,
-        };
-
-        if publish {
-            self.publish_size(size);
-        }
-    }
-}
-
-impl Drop for GlobalResourceUnits {
-    fn drop(&mut self) {
-        GLOBAL_RESOURCES
-            .dirty_layers
-            .fetch_sub(1, AtomicOrdering::Relaxed);
-
-        // Subtract our contribution to the global total dirty bytes
-        self.publish_size(0);
-    }
-}
-
-pub(crate) static GLOBAL_RESOURCES: GlobalResources = GlobalResources {
-    max_dirty_bytes: AtomicU64::new(0),
-    dirty_bytes: AtomicU64::new(0),
-    dirty_layers: AtomicUsize::new(0),
-};
-
 impl InMemoryLayer {
    pub(crate) fn get_timeline_id(&self) -> TimelineId {
        self.timeline_id
@@ -214,10 +93,6 @@ impl InMemoryLayer {
        }
    }

-    pub(crate) fn try_len(&self) -> Option<u64> {
-        self.inner.try_read().map(|i| i.file.len()).ok()
-    }
-
    pub(crate) fn assert_writable(&self) {
        assert!(self.end_lsn.get().is_none());
    }
@@ -453,7 +328,6 @@ impl InMemoryLayer {
            inner: RwLock::new(InMemoryLayerInner {
                index: HashMap::new(),
                file,
-                resource_units: GlobalResourceUnits::new(),
            }),
        })
    }
@@ -504,18 +378,9 @@ impl InMemoryLayer {
            warn!("Key {} at {} already exists", key, lsn);
        }

-        let size = locked_inner.file.len();
-        locked_inner.resource_units.maybe_publish_size(size);
-
        Ok(())
    }

-    pub(crate) async fn tick(&self) -> Option<u64> {
-        let mut inner = self.inner.write().await;
-        let size = inner.file.len();
-        inner.resource_units.publish_size(size)
-    }
-
    pub(crate) async fn put_tombstones(&self, _key_ranges: &[(Range<Key>, Lsn)]) -> Result<()> {
        // TODO: Currently, we just leak the storage for any deleted keys
        Ok(())
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -19,7 +19,7 @@ use pageserver_api::{
    keyspace::KeySpaceAccum,
    models::{
        CompactionAlgorithm, DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest,
-        EvictionPolicy, InMemoryLayerInfo, LayerMapInfo, TimelineState,
+        EvictionPolicy, LayerMapInfo, TimelineState,
    },
    reltag::BlockNumber,
    shard::{ShardIdentity, TenantShardId},
@@ -54,7 +54,6 @@ use std::{
    ops::ControlFlow,
 };

-use crate::deletion_queue::DeletionQueueClient;
 use crate::tenant::timeline::logical_size::CurrentLogicalSize;
 use crate::tenant::{
    layer_map::{LayerMap, SearchResult},
@@ -65,6 +64,7 @@ use crate::{
    disk_usage_eviction_task::DiskUsageEvictionInfo,
    pgdatadir_mapping::CollectKeySpaceError,
 };
+use crate::{deletion_queue::DeletionQueueClient, tenant::remote_timeline_client::StopError};
 use crate::{
    disk_usage_eviction_task::finite_f32,
    tenant::storage_layer::{
@@ -1142,79 +1142,6 @@ impl Timeline {
        self.flush_frozen_layers_and_wait().await
    }

-    /// If there is no writer, and conditions for rolling the latest layer are met, then freeze it.
-    ///
-    /// This is for use in background housekeeping, to provide guarantees of layers closing eventually
-    /// even if there are no ongoing writes to drive that.
-    async fn maybe_freeze_ephemeral_layer(&self) {
-        let Ok(_write_guard) = self.write_lock.try_lock() else {
-            // If the write lock is held, there is an active wal receiver: rolling open layers
-            // is their responsibility while they hold this lock.
-            return;
-        };
-
-        let Ok(layers_guard) = self.layers.try_read() else {
-            // Don't block if the layer lock is busy
-            return;
-        };
-
-        let Some(open_layer) = &layers_guard.layer_map().open_layer else {
-            // No open layer, no work to do.
-            return;
-        };
-
-        let Some(current_size) = open_layer.try_len() else {
-            // Unexpected: since we hold the write guard, nobody else should be writing to this layer, so
-            // read lock to get size should always succeed.
-            tracing::warn!("Lock conflict while reading size of open layer");
-            return;
-        };
-
-        let current_lsn = self.get_last_record_lsn();
-
-        let checkpoint_distance_override = open_layer.tick().await;
-
-        if let Some(size_override) = checkpoint_distance_override {
-            if current_size > size_override {
-                // This is not harmful, but it only happens in relatively rare cases where
-                // time-based checkpoints are not happening fast enough to keep the amount of
-                // ephemeral data within configured limits.  It's a sign of stress on the system.
-                tracing::info!("Early-rolling open layer at size {current_size} (limit {size_override}) due to dirty data pressure");
-            }
-        }
-
-        let checkpoint_distance =
-            checkpoint_distance_override.unwrap_or(self.get_checkpoint_distance());
-
-        if self.should_roll(
-            current_size,
-            current_size,
-            checkpoint_distance,
-            self.get_last_record_lsn(),
-            self.last_freeze_at.load(),
-            *self.last_freeze_ts.read().unwrap(),
-        ) {
-            match open_layer.info() {
-                InMemoryLayerInfo::Frozen { lsn_start, lsn_end } => {
-                    // We may reach this point if the layer was already frozen by not yet flushed: flushing
-                    // happens asynchronously in the background.
-                    tracing::debug!(
-                        "Not freezing open layer, it's already frozen ({lsn_start}..{lsn_end})"
-                    );
-                }
-                InMemoryLayerInfo::Open { .. } => {
-                    // Upgrade to a write lock and freeze the layer
-                    drop(layers_guard);
-                    let mut layers_guard = self.layers.write().await;
-                    layers_guard
-                        .try_freeze_in_memory_layer(current_lsn, &self.last_freeze_at)
-                        .await;
-                }
-            }
-            self.flush_frozen_layers();
-        }
-    }
-
    /// Outermost timeline compaction operation; downloads needed layers.
    pub(crate) async fn compact(
        self: &Arc<Self>,
@@ -1237,11 +1164,6 @@ impl Timeline {
            (guard, permit)
        };

-        // Prior to compaction, check if an open ephemeral layer should be closed: this provides
-        // background enforcement of checkpoint interval if there is no active WAL receiver, to avoid keeping
-        // an ephemeral layer open forever when idle.
-        self.maybe_freeze_ephemeral_layer().await;
-
        // this wait probably never needs any "long time spent" logging, because we already nag if
        // compaction task goes over it's period (20s) which is quite often in production.
        let (_guard, _permit) = tokio::select! {
@@ -1274,7 +1196,6 @@ impl Timeline {

    pub(crate) fn activate(
        self: &Arc<Self>,
-        parent: Arc<crate::tenant::Tenant>,
        broker_client: BrokerClientChannel,
        background_jobs_can_start: Option<&completion::Barrier>,
        ctx: &RequestContext,
@@ -1285,7 +1206,7 @@ impl Timeline {
        }
        self.launch_wal_receiver(ctx, broker_client);
        self.set_state(TimelineState::Active);
-        self.launch_eviction_task(parent, background_jobs_can_start);
+        self.launch_eviction_task(background_jobs_can_start);
    }

    /// Graceful shutdown, may do a lot of I/O as we flush any open layers to disk and then
@@ -1320,7 +1241,11 @@ impl Timeline {
                    // what is problematic is the shutting down of RemoteTimelineClient, because
                    // obviously it does not make sense to stop while we wait for it, but what
                    // about corner cases like s3 suddenly hanging up?
-                    client.shutdown().await;
+                    if let Err(e) = client.shutdown().await {
+                        // Non-fatal.  Shutdown is infallible.  Failures to flush just mean that
+                        // we have some extra WAL replay to do next time the timeline starts.
+                        warn!("failed to flush to remote storage: {e:#}");
+                    }
                }
            }
            Err(e) => {
@@ -1357,7 +1282,12 @@ impl Timeline {
        // Shut down remote timeline client: this gracefully moves its metadata into its Stopping state in
        // case our caller wants to use that for a deletion
        if let Some(remote_client) = self.remote_client.as_ref() {
-            remote_client.stop();
+            match remote_client.stop() {
+                Ok(()) => {}
+                Err(StopError::QueueUninitialized) => {
+                    // Shutting down during initialization is legal
+                }
+            }
        }

        tracing::debug!("Waiting for tasks...");
@@ -1513,53 +1443,6 @@ impl Timeline {
            Err(EvictionError::Timeout) => Ok(Some(false)),
        }
    }
-
-    fn should_roll(
-        &self,
-        layer_size: u64,
-        projected_layer_size: u64,
-        checkpoint_distance: u64,
-        projected_lsn: Lsn,
-        last_freeze_at: Lsn,
-        last_freeze_ts: Instant,
-    ) -> bool {
-        let distance = projected_lsn.widening_sub(last_freeze_at);
-
-        // Rolling the open layer can be triggered by:
-        // 1. The distance from the last LSN we rolled at. This bounds the amount of WAL that
-        //    the safekeepers need to store.  For sharded tenants, we multiply by shard count to
-        //    account for how writes are distributed across shards: we expect each node to consume
-        //    1/count of the LSN on average.
-        // 2. The size of the currently open layer.
-        // 3. The time since the last roll. It helps safekeepers to regard pageserver as caught
-        //    up and suspend activity.
-        if distance >= checkpoint_distance as i128 * self.shard_identity.count.count() as i128 {
-            info!(
-                "Will roll layer at {} with layer size {} due to LSN distance ({})",
-                projected_lsn, layer_size, distance
-            );
-
-            true
-        } else if projected_layer_size >= checkpoint_distance {
-            info!(
-                "Will roll layer at {} with layer size {} due to layer size ({})",
-                projected_lsn, layer_size, projected_layer_size
-            );
-
-            true
-        } else if distance > 0 && last_freeze_ts.elapsed() >= self.get_checkpoint_timeout() {
-            info!(
-                "Will roll layer at {} with layer size {} due to time since last flush ({:?})",
-                projected_lsn,
-                layer_size,
-                last_freeze_ts.elapsed()
-            );
-
-            true
-        } else {
-            false
-        }
-    }
 }

 /// Number of times we will compute partition within a checkpoint distance.
@@ -2713,10 +2596,6 @@ impl Timeline {
                    // Get all the data needed to reconstruct the page version from this layer.
                    // But if we have an older cached page image, no need to go past that.
                    let lsn_floor = max(cached_lsn + 1, start_lsn);
-
-                    let open_layer = open_layer.clone();
-                    drop(guard);
-
                    result = match open_layer
                        .get_value_reconstruct_data(
                            key,
@@ -2734,7 +2613,10 @@ impl Timeline {
                    traversal_path.push((
                        result,
                        cont_lsn,
-                        Box::new(move || open_layer.traversal_id()),
+                        Box::new({
+                            let open_layer = Arc::clone(open_layer);
+                            move || open_layer.traversal_id()
+                        }),
                    ));
                    continue 'outer;
                }
@@ -2744,10 +2626,6 @@ impl Timeline {
                if cont_lsn > start_lsn {
                    //info!("CHECKING for {} at {} on frozen layer {}", key, cont_lsn, frozen_layer.filename().display());
                    let lsn_floor = max(cached_lsn + 1, start_lsn);
-
-                    let frozen_layer = frozen_layer.clone();
-                    drop(guard);
-
                    result = match frozen_layer
                        .get_value_reconstruct_data(
                            key,
@@ -2765,7 +2643,10 @@ impl Timeline {
                    traversal_path.push((
                        result,
                        cont_lsn,
-                        Box::new(move || frozen_layer.traversal_id()),
+                        Box::new({
+                            let frozen_layer = Arc::clone(frozen_layer);
+                            move || frozen_layer.traversal_id()
+                        }),
                    ));
                    continue 'outer;
                }
@@ -2773,8 +2654,6 @@ impl Timeline {

            if let Some(SearchResult { lsn_floor, layer }) = layers.search(key, cont_lsn) {
                let layer = guard.get_from_desc(&layer);
-                drop(guard);
-
                // Get all the data needed to reconstruct the page version from this layer.
                // But if we have an older cached page image, no need to go past that.
                let lsn_floor = max(cached_lsn + 1, lsn_floor);
@@ -4581,6 +4460,49 @@ impl<'a> TimelineWriter<'a> {
        res
    }

+    /// "Tick" the timeline writer: it will roll the open layer if required
+    /// and do nothing else.
+    pub(crate) async fn tick(&mut self) -> anyhow::Result<()> {
+        self.open_layer_if_present().await?;
+
+        let last_record_lsn = self.get_last_record_lsn();
+        let action = self.get_open_layer_action(last_record_lsn, 0);
+        if action == OpenLayerAction::Roll {
+            self.roll_layer(last_record_lsn).await?;
+        }
+
+        Ok(())
+    }
+
+    /// Populate the timeline writer state only if an in-memory layer
+    /// is already open.
+    async fn open_layer_if_present(&mut self) -> anyhow::Result<()> {
+        assert!(self.write_guard.is_none());
+
+        let open_layer = {
+            let guard = self.layers.read().await;
+            let layers = guard.layer_map();
+            match layers.open_layer {
+                Some(ref open_layer) => open_layer.clone(),
+                None => {
+                    return Ok(());
+                }
+            }
+        };
+
+        let initial_size = open_layer.size().await?;
+        let last_freeze_at = self.last_freeze_at.load();
+        let last_freeze_ts = *self.last_freeze_ts.read().unwrap();
+        self.write_guard.replace(TimelineWriterState::new(
+            open_layer,
+            initial_size,
+            last_freeze_at,
+            last_freeze_ts,
+        ));
+
+        Ok(())
+    }
+
    async fn handle_open_layer_action(
        &mut self,
        at: Lsn,
@@ -4652,14 +4574,43 @@ impl<'a> TimelineWriter<'a> {
            return OpenLayerAction::None;
        }

-        if self.tl.should_roll(
-            state.current_size,
-            state.current_size + new_value_size,
-            self.get_checkpoint_distance(),
-            lsn,
-            state.cached_last_freeze_at,
-            state.cached_last_freeze_ts,
-        ) {
+        let distance = lsn.widening_sub(state.cached_last_freeze_at);
+        let proposed_open_layer_size = state.current_size + new_value_size;
+
+        // Rolling the open layer can be triggered by:
+        // 1. The distance from the last LSN we rolled at. This bounds the amount of WAL that
+        //    the safekeepers need to store.  For sharded tenants, we multiply by shard count to
+        //    account for how writes are distributed across shards: we expect each node to consume
+        //    1/count of the LSN on average.
+        // 2. The size of the currently open layer.
+        // 3. The time since the last roll. It helps safekeepers to regard pageserver as caught
+        //    up and suspend activity.
+        if distance
+            >= self.get_checkpoint_distance() as i128 * self.shard_identity.count.count() as i128
+        {
+            info!(
+                "Will roll layer at {} with layer size {} due to LSN distance ({})",
+                lsn, state.current_size, distance
+            );
+
+            OpenLayerAction::Roll
+        } else if proposed_open_layer_size >= self.get_checkpoint_distance() {
+            info!(
+                "Will roll layer at {} with layer size {} due to layer size ({})",
+                lsn, state.current_size, proposed_open_layer_size
+            );
+
+            OpenLayerAction::Roll
+        } else if distance > 0
+            && state.cached_last_freeze_ts.elapsed() >= self.get_checkpoint_timeout()
+        {
+            info!(
+                "Will roll layer at {} with layer size {} due to time since last flush ({:?})",
+                lsn,
+                state.current_size,
+                state.cached_last_freeze_ts.elapsed()
+            );
+
            OpenLayerAction::Roll
        } else {
            OpenLayerAction::None
--- a/pageserver/src/tenant/timeline/delete.rs
+++ b/pageserver/src/tenant/timeline/delete.rs
@@ -16,7 +16,9 @@ use crate::{
    tenant::{
        debug_assert_current_span_has_tenant_and_timeline_id,
        metadata::TimelineMetadata,
-        remote_timeline_client::{PersistIndexPartWithDeletedFlagError, RemoteTimelineClient},
+        remote_timeline_client::{
+            self, PersistIndexPartWithDeletedFlagError, RemoteTimelineClient,
+        },
        CreateTimelineCause, DeleteTimelineError, Tenant,
    },
 };
@@ -48,7 +50,19 @@ async fn stop_tasks(timeline: &Timeline) -> Result<(), DeleteTimelineError> {

    // Prevent new uploads from starting.
    if let Some(remote_client) = timeline.remote_client.as_ref() {
-        remote_client.stop();
+        let res = remote_client.stop();
+        match res {
+            Ok(()) => {}
+            Err(e) => match e {
+                remote_timeline_client::StopError::QueueUninitialized => {
+                    // This case shouldn't happen currently because the
+                    // load and attach code bails out if _any_ of the timeline fails to fetch its IndexPart.
+                    // That is, before we declare the Tenant as Active.
+                    // But we only allow calls to delete_timeline on Active tenants.
+                    return Err(DeleteTimelineError::Other(anyhow::anyhow!("upload queue is uninitialized, likely the timeline was in Broken state prior to this call because it failed to fetch IndexPart during load or attach, check the logs")));
+                }
+            },
+        }
    }

    // Stop & wait for the remaining timeline tasks, including upload tasks.
--- a/pageserver/src/tenant/timeline/eviction_task.rs
+++ b/pageserver/src/tenant/timeline/eviction_task.rs
@@ -51,7 +51,6 @@ pub struct EvictionTaskTenantState {
 impl Timeline {
    pub(super) fn launch_eviction_task(
        self: &Arc<Self>,
-        parent: Arc<Tenant>,
        background_tasks_can_start: Option<&completion::Barrier>,
    ) {
        let self_clone = Arc::clone(self);
@@ -73,14 +72,14 @@ impl Timeline {
                    _ = completion::Barrier::maybe_wait(background_tasks_can_start) => {}
                };

-                self_clone.eviction_task(parent, cancel).await;
+                self_clone.eviction_task(cancel).await;
                Ok(())
            },
        );
    }

    #[instrument(skip_all, fields(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id))]
-    async fn eviction_task(self: Arc<Self>, tenant: Arc<Tenant>, cancel: CancellationToken) {
+    async fn eviction_task(self: Arc<Self>, cancel: CancellationToken) {
        use crate::tenant::tasks::random_init_delay;

        // acquire the gate guard only once within a useful span
@@ -104,7 +103,7 @@ impl Timeline {
        loop {
            let policy = self.get_eviction_policy();
            let cf = self
-                .eviction_iteration(&tenant, &policy, &cancel, &guard, &ctx)
+                .eviction_iteration(&policy, &cancel, &guard, &ctx)
                .await;

            match cf {
@@ -124,7 +123,6 @@ impl Timeline {
    #[instrument(skip_all, fields(policy_kind = policy.discriminant_str()))]
    async fn eviction_iteration(
        self: &Arc<Self>,
-        tenant: &Tenant,
        policy: &EvictionPolicy,
        cancel: &CancellationToken,
        gate: &GateGuard,
@@ -139,7 +137,7 @@ impl Timeline {
            }
            EvictionPolicy::LayerAccessThreshold(p) => {
                match self
-                    .eviction_iteration_threshold(tenant, p, cancel, gate, ctx)
+                    .eviction_iteration_threshold(p, cancel, gate, ctx)
                    .await
                {
                    ControlFlow::Break(()) => return ControlFlow::Break(()),
@@ -148,11 +146,7 @@ impl Timeline {
                (p.period, p.threshold)
            }
            EvictionPolicy::OnlyImitiate(p) => {
-                if self
-                    .imitiate_only(tenant, p, cancel, gate, ctx)
-                    .await
-                    .is_break()
-                {
+                if self.imitiate_only(p, cancel, gate, ctx).await.is_break() {
                    return ControlFlow::Break(());
                }
                (p.period, p.threshold)
@@ -181,7 +175,6 @@ impl Timeline {

    async fn eviction_iteration_threshold(
        self: &Arc<Self>,
-        tenant: &Tenant,
        p: &EvictionPolicyLayerAccessThreshold,
        cancel: &CancellationToken,
        gate: &GateGuard,
@@ -200,10 +193,7 @@ impl Timeline {
            _ = self.cancel.cancelled() => return ControlFlow::Break(()),
        };

-        match self
-            .imitate_layer_accesses(tenant, p, cancel, gate, ctx)
-            .await
-        {
+        match self.imitate_layer_accesses(p, cancel, gate, ctx).await {
            ControlFlow::Break(()) => return ControlFlow::Break(()),
            ControlFlow::Continue(()) => (),
        }
@@ -325,7 +315,6 @@ impl Timeline {
    /// disk usage based eviction task.
    async fn imitiate_only(
        self: &Arc<Self>,
-        tenant: &Tenant,
        p: &EvictionPolicyLayerAccessThreshold,
        cancel: &CancellationToken,
        gate: &GateGuard,
@@ -342,8 +331,7 @@ impl Timeline {
            _ = self.cancel.cancelled() => return ControlFlow::Break(()),
        };

-        self.imitate_layer_accesses(tenant, p, cancel, gate, ctx)
-            .await
+        self.imitate_layer_accesses(p, cancel, gate, ctx).await
    }

    /// If we evict layers but keep cached values derived from those layers, then
@@ -373,7 +361,6 @@ impl Timeline {
    #[instrument(skip_all)]
    async fn imitate_layer_accesses(
        &self,
-        tenant: &Tenant,
        p: &EvictionPolicyLayerAccessThreshold,
        cancel: &CancellationToken,
        gate: &GateGuard,
@@ -409,11 +396,17 @@ impl Timeline {
        // Make one of the tenant's timelines draw the short straw and run the calculation.
        // The others wait until the calculation is done so that they take into account the
        // imitated accesses that the winner made.
+        let tenant = match crate::tenant::mgr::get_tenant(self.tenant_shard_id, true) {
+            Ok(t) => t,
+            Err(_) => {
+                return ControlFlow::Break(());
+            }
+        };
        let mut state = tenant.eviction_task_tenant_state.lock().await;
        match state.last_layer_access_imitation {
            Some(ts) if ts.elapsed() < inter_imitate_period => { /* no need to run */ }
            _ => {
-                self.imitate_synthetic_size_calculation_worker(tenant, cancel, ctx)
+                self.imitate_synthetic_size_calculation_worker(&tenant, cancel, ctx)
                    .await;
                state.last_layer_access_imitation = Some(tokio::time::Instant::now());
            }
@@ -487,7 +480,7 @@ impl Timeline {
    #[instrument(skip_all)]
    async fn imitate_synthetic_size_calculation_worker(
        &self,
-        tenant: &Tenant,
+        tenant: &Arc<Tenant>,
        cancel: &CancellationToken,
        ctx: &RequestContext,
    ) {
--- a/pageserver/src/tenant/timeline/uninit.rs
+++ b/pageserver/src/tenant/timeline/uninit.rs
@@ -86,7 +86,6 @@ impl<'t> UninitializedTimeline<'t> {
    /// Prepares timeline data by loading it from the basebackup archive.
    pub(crate) async fn import_basebackup_from_tar(
        self,
-        tenant: Arc<Tenant>,
        copyin_read: &mut (impl tokio::io::AsyncRead + Send + Sync + Unpin),
        base_lsn: Lsn,
        broker_client: storage_broker::BrokerClientChannel,
@@ -115,7 +114,7 @@ impl<'t> UninitializedTimeline<'t> {

        // All the data has been imported. Insert the Timeline into the tenant's timelines map
        let tl = self.finish_creation()?;
-        tl.activate(tenant, broker_client, None, ctx);
+        tl.activate(broker_client, None, ctx);
        Ok(tl)
    }

--- a/pageserver/src/tenant/timeline/walreceiver.rs
+++ b/pageserver/src/tenant/timeline/walreceiver.rs
@@ -33,9 +33,11 @@ use crate::tenant::timeline::walreceiver::connection_manager::{
 use pageserver_api::shard::TenantShardId;
 use std::future::Future;
 use std::num::NonZeroU64;
+use std::ops::ControlFlow;
 use std::sync::Arc;
 use std::time::Duration;
 use storage_broker::BrokerClientChannel;
+use tokio::select;
 use tokio::sync::watch;
 use tokio_util::sync::CancellationToken;
 use tracing::*;
@@ -89,27 +91,31 @@ impl WalReceiver {
            async move {
                debug_assert_current_span_has_tenant_and_timeline_id();
                debug!("WAL receiver manager started, connecting to broker");
-                let cancel = task_mgr::shutdown_token();
                let mut connection_manager_state = ConnectionManagerState::new(
                    timeline,
                    conf,
                );
-                while !cancel.is_cancelled() {
-                    let loop_step_result = connection_manager_loop_step(
-                        &mut broker_client,
-                        &mut connection_manager_state,
-                        &walreceiver_ctx,
-                        &cancel,
-                        &loop_status,
-                    ).await;
-                    match loop_step_result {
-                        Ok(()) => continue,
-                        Err(_cancelled) => {
-                            trace!("Connection manager loop ended, shutting down");
+                loop {
+                    select! {
+                        _ = task_mgr::shutdown_watcher() => {
+                            trace!("WAL receiver shutdown requested, shutting down");
                            break;
-                        }
+                        },
+                        loop_step_result = connection_manager_loop_step(
+                            &mut broker_client,
+                            &mut connection_manager_state,
+                            &walreceiver_ctx,
+                            &loop_status,
+                        ) => match loop_step_result {
+                            ControlFlow::Continue(()) => continue,
+                            ControlFlow::Break(()) => {
+                                trace!("Connection manager loop ended, shutting down");
+                                break;
+                            }
+                        },
                    }
                }
+
                connection_manager_state.shutdown().await;
                *loop_status.write().unwrap() = None;
                Ok(())
@@ -191,9 +197,6 @@ impl<E: Clone> TaskHandle<E> {
        }
    }

-    /// # Cancel-Safety
-    ///
-    /// Cancellation-safe.
    async fn next_task_event(&mut self) -> TaskEvent<E> {
        match self.events_receiver.changed().await {
            Ok(()) => TaskEvent::Update((self.events_receiver.borrow()).clone()),
--- a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
@@ -17,7 +17,7 @@ use crate::metrics::{
    WALRECEIVER_ACTIVE_MANAGERS, WALRECEIVER_BROKER_UPDATES, WALRECEIVER_CANDIDATES_ADDED,
    WALRECEIVER_CANDIDATES_REMOVED, WALRECEIVER_SWITCHES,
 };
-use crate::task_mgr::TaskKind;
+use crate::task_mgr::{shutdown_token, TaskKind};
 use crate::tenant::{debug_assert_current_span_has_tenant_and_timeline_id, Timeline};
 use anyhow::Context;
 use chrono::{NaiveDateTime, Utc};
@@ -27,7 +27,7 @@ use storage_broker::proto::SafekeeperTimelineInfo;
 use storage_broker::proto::SubscribeSafekeeperInfoRequest;
 use storage_broker::proto::TenantTimelineId as ProtoTenantTimelineId;
 use storage_broker::{BrokerClientChannel, Code, Streaming};
-use tokio_util::sync::CancellationToken;
+use tokio::select;
 use tracing::*;

 use postgres_connection::PgConnectionConfig;
@@ -45,33 +45,27 @@ use super::{
    TaskEvent, TaskHandle,
 };

-pub(crate) struct Cancelled;
-
 /// Attempts to subscribe for timeline updates, pushed by safekeepers into the broker.
 /// Based on the updates, desides whether to start, keep or stop a WAL receiver task.
 /// If storage broker subscription is cancelled, exits.
-///
-/// # Cancel-Safety
-///
-/// Not cancellation-safe. Use `cancel` token to request cancellation.
 pub(super) async fn connection_manager_loop_step(
    broker_client: &mut BrokerClientChannel,
    connection_manager_state: &mut ConnectionManagerState,
    ctx: &RequestContext,
-    cancel: &CancellationToken,
    manager_status: &std::sync::RwLock<Option<ConnectionManagerStatus>>,
-) -> Result<(), Cancelled> {
-    match tokio::select! {
-        _ = cancel.cancelled() => { return Err(Cancelled); },
-        st = connection_manager_state.timeline.wait_to_become_active(ctx) => { st }
-    } {
+) -> ControlFlow<(), ()> {
+    match connection_manager_state
+        .timeline
+        .wait_to_become_active(ctx)
+        .await
+    {
        Ok(()) => {}
        Err(new_state) => {
            debug!(
                ?new_state,
                "state changed, stopping wal connection manager loop"
            );
-            return Err(Cancelled);
+            return ControlFlow::Break(());
        }
    }

@@ -92,7 +86,7 @@ pub(super) async fn connection_manager_loop_step(
    // Subscribe to the broker updates. Stream shares underlying TCP connection
    // with other streams on this client (other connection managers). When
    // object goes out of scope, stream finishes in drop() automatically.
-    let mut broker_subscription = subscribe_for_timeline_updates(broker_client, id, cancel).await?;
+    let mut broker_subscription = subscribe_for_timeline_updates(broker_client, id).await;
    debug!("Subscribed for broker timeline updates");

    loop {
@@ -100,7 +94,6 @@ pub(super) async fn connection_manager_loop_step(

        // These things are happening concurrently:
        //
-        // - cancellation request
        //  - keep receiving WAL on the current connection
        //      - if the shared state says we need to change connection, disconnect and return
        //      - this runs in a separate task and we receive updates via a watch channel
@@ -108,11 +101,7 @@ pub(super) async fn connection_manager_loop_step(
        //  - receive updates from broker
        //      - this might change the current desired connection
        //  - timeline state changes to something that does not allow walreceiver to run concurrently
-
-        // NB: make sure each of the select expressions are cancellation-safe
-        // (no need for arms to be cancellation-safe).
-        tokio::select! {
-            _ = cancel.cancelled() => { return Err(Cancelled); }
+        select! {
            Some(wal_connection_update) = async {
                match connection_manager_state.wal_connection.as_mut() {
                    Some(wal_connection) => Some(wal_connection.connection_task.next_task_event().await),
@@ -144,7 +133,7 @@ pub(super) async fn connection_manager_loop_step(
            },

            // Got a new update from the broker
-            broker_update = broker_subscription.message() /* TODO: review cancellation-safety */ => {
+            broker_update = broker_subscription.message() => {
                match broker_update {
                    Ok(Some(broker_update)) => connection_manager_state.register_timeline_update(broker_update),
                    Err(status) => {
@@ -158,17 +147,16 @@ pub(super) async fn connection_manager_loop_step(
                                warn!("broker subscription failed: {status}");
                            }
                        }
-                        return Ok(());
+                        return ControlFlow::Continue(());
                    }
                    Ok(None) => {
                        error!("broker subscription stream ended"); // can't happen
-                        return Ok(());
+                        return ControlFlow::Continue(());
                    }
                }
            },

            new_event = async {
-                // Reminder: this match arm needs to be cancellation-safe.
                loop {
                    if connection_manager_state.timeline.current_state() == TimelineState::Loading {
                        warn!("wal connection manager should only be launched after timeline has become active");
@@ -194,11 +182,11 @@ pub(super) async fn connection_manager_loop_step(
                }
            } => match new_event {
                ControlFlow::Continue(()) => {
-                    return Ok(());
+                    return ControlFlow::Continue(());
                }
                ControlFlow::Break(()) => {
                    debug!("Timeline is no longer active, stopping wal connection manager loop");
-                    return Err(Cancelled);
+                    return ControlFlow::Break(());
                }
            },

@@ -230,15 +218,16 @@ pub(super) async fn connection_manager_loop_step(
 async fn subscribe_for_timeline_updates(
    broker_client: &mut BrokerClientChannel,
    id: TenantTimelineId,
-    cancel: &CancellationToken,
-) -> Result<Streaming<SafekeeperTimelineInfo>, Cancelled> {
+) -> Streaming<SafekeeperTimelineInfo> {
    let mut attempt = 0;
+    let cancel = shutdown_token();
+
    loop {
        exponential_backoff(
            attempt,
            DEFAULT_BASE_BACKOFF_SECONDS,
            DEFAULT_MAX_BACKOFF_SECONDS,
-            cancel,
+            &cancel,
        )
        .await;
        attempt += 1;
@@ -252,14 +241,9 @@ async fn subscribe_for_timeline_updates(
            subscription_key: Some(key),
        };

-        match {
-            tokio::select! {
-                r = broker_client.subscribe_safekeeper_info(request) => { r }
-                _ = cancel.cancelled() => { return Err(Cancelled); }
-            }
-        } {
+        match broker_client.subscribe_safekeeper_info(request).await {
            Ok(resp) => {
-                return Ok(resp.into_inner());
+                return resp.into_inner();
            }
            Err(e) => {
                // Safekeeper nodes can stop pushing timeline updates to the broker, when no new writes happen and
@@ -502,10 +486,6 @@ impl ConnectionManagerState {

    /// Drops the current connection (if any) and updates retry timeout for the next
    /// connection attempt to the same safekeeper.
-    ///
-    /// # Cancel-Safety
-    ///
-    /// Not cancellation-safe.
    async fn drop_old_connection(&mut self, needs_shutdown: bool) {
        let wal_connection = match self.wal_connection.take() {
            Some(wal_connection) => wal_connection,
@@ -513,14 +493,7 @@ impl ConnectionManagerState {
        };

        if needs_shutdown {
-            wal_connection
-                .connection_task
-                .shutdown()
-                // This here is why this function isn't cancellation-safe.
-                // If we got cancelled here, then self.wal_connection is already None and we lose track of the task.
-                // Even if our caller diligently calls Self::shutdown(), it will find a self.wal_connection=None
-                // and thus be ineffective.
-                .await;
+            wal_connection.connection_task.shutdown().await;
        }

        let retry = self
@@ -865,9 +838,6 @@ impl ConnectionManagerState {
        }
    }

-    /// # Cancel-Safety
-    ///
-    /// Not cancellation-safe.
    pub(super) async fn shutdown(mut self) {
        if let Some(wal_connection) = self.wal_connection.take() {
            wal_connection.connection_task.shutdown().await;
--- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
@@ -389,6 +389,17 @@ pub(super) async fn handle_walreceiver_connection(
            }
        }

+        {
+            // This is a hack. It piggybacks on the keepalive messages sent by the
+            // safekeeper in order to enforce `checkpoint_timeout` on the currently
+            // open layer. This hack doesn't provide a bound on the total size of
+            // in-memory layers on a pageserver. See https://github.com/neondatabase/neon/issues/6916.
+            let mut writer = timeline.writer().await;
+            if let Err(err) = writer.tick().await {
+                warn!("Timeline writer tick failed: {err}");
+            }
+        }
+
        if let Some(last_lsn) = status_update {
            let timeline_remote_consistent_lsn = timeline
                .get_remote_consistent_lsn_visible()
--- a/pageserver/src/tenant/upload_queue.rs
+++ b/pageserver/src/tenant/upload_queue.rs
@@ -121,16 +121,11 @@ pub(super) enum SetDeletedFlagProgress {
    Successful(NaiveDateTime),
 }

-pub(super) struct UploadQueueStoppedDeletable {
+pub(super) struct UploadQueueStopped {
    pub(super) upload_queue_for_deletion: UploadQueueInitialized,
    pub(super) deleted_at: SetDeletedFlagProgress,
 }

-pub(super) enum UploadQueueStopped {
-    Deletable(UploadQueueStoppedDeletable),
-    Uninitialized,
-}
-
 #[derive(thiserror::Error, Debug)]
 pub(crate) enum NotInitialized {
    #[error("queue is in state Uninitialized")]
@@ -254,15 +249,12 @@ impl UploadQueue {
        }
    }

-    pub(crate) fn stopped_mut(&mut self) -> anyhow::Result<&mut UploadQueueStoppedDeletable> {
+    pub(crate) fn stopped_mut(&mut self) -> anyhow::Result<&mut UploadQueueStopped> {
        match self {
            UploadQueue::Initialized(_) | UploadQueue::Uninitialized => {
                anyhow::bail!("queue is in state {}", self.as_str())
            }
-            UploadQueue::Stopped(UploadQueueStopped::Uninitialized) => {
-                anyhow::bail!("queue is in state Stopped(Uninitialized)")
-            }
-            UploadQueue::Stopped(UploadQueueStopped::Deletable(deletable)) => Ok(deletable),
+            UploadQueue::Stopped(stopped) => Ok(stopped),
        }
    }
 }
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -782,7 +782,7 @@ where
        }
    }
    // NB: don't use `buf.is_empty()` here; it is from the
-    // `impl Deref for Slice { Target = [u8] }`; the &[u8]
+    // `impl Deref for Slice { Target = [u8] }`; the the &[u8]
    // returned by it only covers the initialized portion of `buf`.
    // Whereas we're interested in ensuring that we filled the entire
    // buffer that the user passed in.
--- a/pgxn/neon/neon.c
+++ b/pgxn/neon/neon.c
@@ -312,7 +312,7 @@ pg_cluster_size(PG_FUNCTION_ARGS)
 {
 	int64		size;

-	size = GetNeonCurrentClusterSize();
+	size = GetZenithCurrentClusterSize();

 	if (size == 0)
 		PG_RETURN_NULL();
--- a/pgxn/neon/neon.h
+++ b/pgxn/neon/neon.h
@@ -26,8 +26,6 @@ extern void pg_init_libpagestore(void);
 extern void pg_init_walproposer(void);

 extern uint64 BackpressureThrottlingTime(void);
-extern void SetNeonCurrentClusterSize(uint64 size);
-extern uint64 GetNeonCurrentClusterSize(void);
 extern void replication_feedback_get_lsns(XLogRecPtr *writeLsn, XLogRecPtr *flushLsn, XLogRecPtr *applyLsn);

 extern void PGDLLEXPORT WalProposerSync(int argc, char *argv[]);
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -1831,7 +1831,7 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
 		reln->smgr_relpersistence == RELPERSISTENCE_PERMANENT &&
 		!IsAutoVacuumWorkerProcess())
 	{
-		uint64		current_size = GetNeonCurrentClusterSize();
+		uint64		current_size = GetZenithCurrentClusterSize();

 		if (current_size >= ((uint64) max_cluster_size) * 1024 * 1024)
 			ereport(ERROR,
@@ -1912,7 +1912,7 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
 		reln->smgr_relpersistence == RELPERSISTENCE_PERMANENT &&
 		!IsAutoVacuumWorkerProcess())
 	{
-		uint64		current_size = GetNeonCurrentClusterSize();
+		uint64		current_size = GetZenithCurrentClusterSize();

 		if (current_size >= ((uint64) max_cluster_size) * 1024 * 1024)
 			ereport(ERROR,
--- a/pgxn/neon/walproposer.h
+++ b/pgxn/neon/walproposer.h
@@ -287,7 +287,6 @@ typedef struct WalproposerShmemState
 	slock_t		mutex;
 	term_t		mineLastElectedTerm;
 	pg_atomic_uint64 backpressureThrottlingTime;
-	pg_atomic_uint64 currentClusterSize;

 	/* last feedback from each shard */
 	PageserverFeedback shard_ps_feedback[MAX_SHARDS];
--- a/pgxn/neon/walproposer_pg.c
+++ b/pgxn/neon/walproposer_pg.c
@@ -282,7 +282,6 @@ WalproposerShmemInit(void)
 		memset(walprop_shared, 0, WalproposerShmemSize());
 		SpinLockInit(&walprop_shared->mutex);
 		pg_atomic_init_u64(&walprop_shared->backpressureThrottlingTime, 0);
-		pg_atomic_init_u64(&walprop_shared->currentClusterSize, 0);
 	}
 	LWLockRelease(AddinShmemInitLock);

@@ -1973,7 +1972,7 @@ walprop_pg_process_safekeeper_feedback(WalProposer *wp, Safekeeper *sk)

 		/* Only one main shard sends non-zero currentClusterSize */
 		if (sk->appendResponse.ps_feedback.currentClusterSize > 0)
-			SetNeonCurrentClusterSize(sk->appendResponse.ps_feedback.currentClusterSize);
+			SetZenithCurrentClusterSize(sk->appendResponse.ps_feedback.currentClusterSize);

 		if (min_feedback.disk_consistent_lsn != standby_apply_lsn)
 		{
@@ -2095,18 +2094,6 @@ GetLogRepRestartLSN(WalProposer *wp)
 	return lrRestartLsn;
 }

-void SetNeonCurrentClusterSize(uint64 size)
-{
-	pg_atomic_write_u64(&walprop_shared->currentClusterSize, size);
-}
-
-uint64 GetNeonCurrentClusterSize(void)
-{
-	return pg_atomic_read_u64(&walprop_shared->currentClusterSize);
-}
-uint64 GetNeonCurrentClusterSize(void);
-
-
 static const walproposer_api walprop_pg = {
 	.get_shmem_state = walprop_pg_get_shmem_state,
 	.start_streaming = walprop_pg_start_streaming,
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -11,10 +11,6 @@ testing = []
 [dependencies]
 anyhow.workspace = true
 async-trait.workspace = true
-aws-config.workspace = true
-aws-sdk-iam.workspace = true
-aws-sigv4.workspace = true
-aws-types.workspace = true
 base64.workspace = true
 bstr.workspace = true
 bytes = { workspace = true, features = ["serde"] }
@@ -31,7 +27,6 @@ hashlink.workspace = true
 hex.workspace = true
 hmac.workspace = true
 hostname.workspace = true
-http.workspace = true
 humantime.workspace = true
 hyper-tungstenite.workspace = true
 hyper.workspace = true
@@ -97,7 +92,6 @@ workspace_hack.workspace = true

 [dev-dependencies]
 camino-tempfile.workspace = true
-fallible-iterator.workspace = true
 rcgen.workspace = true
 rstest.workspace = true
 tokio-postgres-rustls.workspace = true
--- a/proxy/src/auth/backend.rs
+++ b/proxy/src/auth/backend.rs
@@ -12,8 +12,6 @@ use crate::console::errors::GetAuthInfoError;
 use crate::console::provider::{CachedRoleSecret, ConsoleBackend};
 use crate::console::{AuthSecret, NodeInfo};
 use crate::context::RequestMonitoring;
-use crate::intern::EndpointIdInt;
-use crate::metrics::{AUTH_RATE_LIMIT_HITS, ENDPOINTS_AUTH_RATE_LIMITED};
 use crate::proxy::connect_compute::ComputeConnectBackend;
 use crate::proxy::NeonOptions;
 use crate::stream::Stream;
@@ -30,7 +28,7 @@ use crate::{
 use crate::{scram, EndpointCacheKey, EndpointId, RoleName};
 use std::sync::Arc;
 use tokio::io::{AsyncRead, AsyncWrite};
-use tracing::{info, warn};
+use tracing::info;

 /// Alternative to [`std::borrow::Cow`] but doesn't need `T: ToOwned` as we don't need that functionality
 pub enum MaybeOwned<'a, T> {
@@ -176,52 +174,6 @@ impl TryFrom<ComputeUserInfoMaybeEndpoint> for ComputeUserInfo {
    }
 }

-impl AuthenticationConfig {
-    pub fn check_rate_limit(
-        &self,
-
-        ctx: &mut RequestMonitoring,
-        secret: AuthSecret,
-        endpoint: &EndpointId,
-        is_cleartext: bool,
-    ) -> auth::Result<AuthSecret> {
-        // we have validated the endpoint exists, so let's intern it.
-        let endpoint_int = EndpointIdInt::from(endpoint);
-
-        // only count the full hash count if password hack or websocket flow.
-        // in other words, if proxy needs to run the hashing
-        let password_weight = if is_cleartext {
-            match &secret {
-                #[cfg(any(test, feature = "testing"))]
-                AuthSecret::Md5(_) => 1,
-                AuthSecret::Scram(s) => s.iterations + 1,
-            }
-        } else {
-            // validating scram takes just 1 hmac_sha_256 operation.
-            1
-        };
-
-        let limit_not_exceeded = self
-            .rate_limiter
-            .check((endpoint_int, ctx.peer_addr), password_weight);
-
-        if !limit_not_exceeded {
-            warn!(
-                enabled = self.rate_limiter_enabled,
-                "rate limiting authentication"
-            );
-            AUTH_RATE_LIMIT_HITS.inc();
-            ENDPOINTS_AUTH_RATE_LIMITED.measure(endpoint);
-
-            if self.rate_limiter_enabled {
-                return Err(auth::AuthError::too_many_connections());
-            }
-        }
-
-        Ok(secret)
-    }
-}
-
 /// True to its name, this function encapsulates our current auth trade-offs.
 /// Here, we choose the appropriate auth flow based on circumstances.
 ///
@@ -262,24 +214,14 @@ async fn auth_quirks(
        Some(secret) => secret,
        None => api.get_role_secret(ctx, &info).await?,
    };
-    let (cached_entry, secret) = cached_secret.take_value();
-
-    let secret = match secret {
-        Some(secret) => config.check_rate_limit(
-            ctx,
-            secret,
-            &info.endpoint,
-            unauthenticated_password.is_some() || allow_cleartext,
-        )?,
-        None => {
-            // If we don't have an authentication secret, we mock one to
-            // prevent malicious probing (possible due to missing protocol steps).
-            // This mocked secret will never lead to successful authentication.
-            info!("authentication info not found, mocking it");
-            AuthSecret::Scram(scram::ServerSecret::mock(rand::random()))
-        }
-    };

+    let secret = cached_secret.value.clone().unwrap_or_else(|| {
+        // If we don't have an authentication secret, we mock one to
+        // prevent malicious probing (possible due to missing protocol steps).
+        // This mocked secret will never lead to successful authentication.
+        info!("authentication info not found, mocking it");
+        AuthSecret::Scram(scram::ServerSecret::mock(&info.user, rand::random()))
+    });
    match authenticate_with_secret(
        ctx,
        secret,
@@ -295,7 +237,7 @@ async fn auth_quirks(
        Err(e) => {
            if e.is_auth_failed() {
                // The password could have been changed, so we invalidate the cache.
-                cached_entry.invalidate();
+                cached_secret.invalidate();
            }
            Err(e)
        }
@@ -466,232 +408,3 @@ impl ComputeConnectBackend for BackendType<'_, ComputeCredentials, &()> {
        }
    }
 }
-
-#[cfg(test)]
-mod tests {
-    use std::sync::Arc;
-
-    use bytes::BytesMut;
-    use fallible_iterator::FallibleIterator;
-    use once_cell::sync::Lazy;
-    use postgres_protocol::{
-        authentication::sasl::{ChannelBinding, ScramSha256},
-        message::{backend::Message as PgMessage, frontend},
-    };
-    use provider::AuthSecret;
-    use tokio::io::{AsyncRead, AsyncReadExt, AsyncWriteExt};
-
-    use crate::{
-        auth::{ComputeUserInfoMaybeEndpoint, IpPattern},
-        config::AuthenticationConfig,
-        console::{
-            self,
-            provider::{self, CachedAllowedIps, CachedRoleSecret},
-            CachedNodeInfo,
-        },
-        context::RequestMonitoring,
-        proxy::NeonOptions,
-        rate_limiter::{AuthRateLimiter, RateBucketInfo},
-        scram::ServerSecret,
-        stream::{PqStream, Stream},
-    };
-
-    use super::auth_quirks;
-
-    struct Auth {
-        ips: Vec<IpPattern>,
-        secret: AuthSecret,
-    }
-
-    impl console::Api for Auth {
-        async fn get_role_secret(
-            &self,
-            _ctx: &mut RequestMonitoring,
-            _user_info: &super::ComputeUserInfo,
-        ) -> Result<CachedRoleSecret, console::errors::GetAuthInfoError> {
-            Ok(CachedRoleSecret::new_uncached(Some(self.secret.clone())))
-        }
-
-        async fn get_allowed_ips_and_secret(
-            &self,
-            _ctx: &mut RequestMonitoring,
-            _user_info: &super::ComputeUserInfo,
-        ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), console::errors::GetAuthInfoError>
-        {
-            Ok((
-                CachedAllowedIps::new_uncached(Arc::new(self.ips.clone())),
-                Some(CachedRoleSecret::new_uncached(Some(self.secret.clone()))),
-            ))
-        }
-
-        async fn wake_compute(
-            &self,
-            _ctx: &mut RequestMonitoring,
-            _user_info: &super::ComputeUserInfo,
-        ) -> Result<CachedNodeInfo, console::errors::WakeComputeError> {
-            unimplemented!()
-        }
-    }
-
-    static CONFIG: Lazy<AuthenticationConfig> = Lazy::new(|| AuthenticationConfig {
-        scram_protocol_timeout: std::time::Duration::from_secs(5),
-        rate_limiter_enabled: true,
-        rate_limiter: AuthRateLimiter::new(&RateBucketInfo::DEFAULT_AUTH_SET),
-    });
-
-    async fn read_message(r: &mut (impl AsyncRead + Unpin), b: &mut BytesMut) -> PgMessage {
-        loop {
-            r.read_buf(&mut *b).await.unwrap();
-            if let Some(m) = PgMessage::parse(&mut *b).unwrap() {
-                break m;
-            }
-        }
-    }
-
-    #[tokio::test]
-    async fn auth_quirks_scram() {
-        let (mut client, server) = tokio::io::duplex(1024);
-        let mut stream = PqStream::new(Stream::from_raw(server));
-
-        let mut ctx = RequestMonitoring::test();
-        let api = Auth {
-            ips: vec![],
-            secret: AuthSecret::Scram(ServerSecret::build("my-secret-password").await.unwrap()),
-        };
-
-        let user_info = ComputeUserInfoMaybeEndpoint {
-            user: "conrad".into(),
-            endpoint_id: Some("endpoint".into()),
-            options: NeonOptions::default(),
-        };
-
-        let handle = tokio::spawn(async move {
-            let mut scram = ScramSha256::new(b"my-secret-password", ChannelBinding::unsupported());
-
-            let mut read = BytesMut::new();
-
-            // server should offer scram
-            match read_message(&mut client, &mut read).await {
-                PgMessage::AuthenticationSasl(a) => {
-                    let options: Vec<&str> = a.mechanisms().collect().unwrap();
-                    assert_eq!(options, ["SCRAM-SHA-256"]);
-                }
-                _ => panic!("wrong message"),
-            }
-
-            // client sends client-first-message
-            let mut write = BytesMut::new();
-            frontend::sasl_initial_response("SCRAM-SHA-256", scram.message(), &mut write).unwrap();
-            client.write_all(&write).await.unwrap();
-
-            // server response with server-first-message
-            match read_message(&mut client, &mut read).await {
-                PgMessage::AuthenticationSaslContinue(a) => {
-                    scram.update(a.data()).await.unwrap();
-                }
-                _ => panic!("wrong message"),
-            }
-
-            // client response with client-final-message
-            write.clear();
-            frontend::sasl_response(scram.message(), &mut write).unwrap();
-            client.write_all(&write).await.unwrap();
-
-            // server response with server-final-message
-            match read_message(&mut client, &mut read).await {
-                PgMessage::AuthenticationSaslFinal(a) => {
-                    scram.finish(a.data()).unwrap();
-                }
-                _ => panic!("wrong message"),
-            }
-        });
-
-        let _creds = auth_quirks(&mut ctx, &api, user_info, &mut stream, false, &CONFIG)
-            .await
-            .unwrap();
-
-        handle.await.unwrap();
-    }
-
-    #[tokio::test]
-    async fn auth_quirks_cleartext() {
-        let (mut client, server) = tokio::io::duplex(1024);
-        let mut stream = PqStream::new(Stream::from_raw(server));
-
-        let mut ctx = RequestMonitoring::test();
-        let api = Auth {
-            ips: vec![],
-            secret: AuthSecret::Scram(ServerSecret::build("my-secret-password").await.unwrap()),
-        };
-
-        let user_info = ComputeUserInfoMaybeEndpoint {
-            user: "conrad".into(),
-            endpoint_id: Some("endpoint".into()),
-            options: NeonOptions::default(),
-        };
-
-        let handle = tokio::spawn(async move {
-            let mut read = BytesMut::new();
-            let mut write = BytesMut::new();
-
-            // server should offer cleartext
-            match read_message(&mut client, &mut read).await {
-                PgMessage::AuthenticationCleartextPassword => {}
-                _ => panic!("wrong message"),
-            }
-
-            // client responds with password
-            write.clear();
-            frontend::password_message(b"my-secret-password", &mut write).unwrap();
-            client.write_all(&write).await.unwrap();
-        });
-
-        let _creds = auth_quirks(&mut ctx, &api, user_info, &mut stream, true, &CONFIG)
-            .await
-            .unwrap();
-
-        handle.await.unwrap();
-    }
-
-    #[tokio::test]
-    async fn auth_quirks_password_hack() {
-        let (mut client, server) = tokio::io::duplex(1024);
-        let mut stream = PqStream::new(Stream::from_raw(server));
-
-        let mut ctx = RequestMonitoring::test();
-        let api = Auth {
-            ips: vec![],
-            secret: AuthSecret::Scram(ServerSecret::build("my-secret-password").await.unwrap()),
-        };
-
-        let user_info = ComputeUserInfoMaybeEndpoint {
-            user: "conrad".into(),
-            endpoint_id: None,
-            options: NeonOptions::default(),
-        };
-
-        let handle = tokio::spawn(async move {
-            let mut read = BytesMut::new();
-
-            // server should offer cleartext
-            match read_message(&mut client, &mut read).await {
-                PgMessage::AuthenticationCleartextPassword => {}
-                _ => panic!("wrong message"),
-            }
-
-            // client responds with password
-            let mut write = BytesMut::new();
-            frontend::password_message(b"endpoint=my-endpoint;my-secret-password", &mut write)
-                .unwrap();
-            client.write_all(&write).await.unwrap();
-        });
-
-        let creds = auth_quirks(&mut ctx, &api, user_info, &mut stream, true, &CONFIG)
-            .await
-            .unwrap();
-
-        assert_eq!(creds.info.endpoint, "my-endpoint");
-
-        handle.await.unwrap();
-    }
-}
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -1,10 +1,3 @@
-use aws_config::environment::EnvironmentVariableCredentialsProvider;
-use aws_config::imds::credentials::ImdsCredentialsProvider;
-use aws_config::meta::credentials::CredentialsProviderChain;
-use aws_config::meta::region::RegionProviderChain;
-use aws_config::profile::ProfileFileCredentialsProvider;
-use aws_config::provider_config::ProviderConfig;
-use aws_config::web_identity_token::WebIdentityTokenCredentialsProvider;
 use futures::future::Either;
 use proxy::auth;
 use proxy::auth::backend::MaybeOwned;
@@ -17,15 +10,11 @@ use proxy::config::ProjectInfoCacheOptions;
 use proxy::console;
 use proxy::context::parquet::ParquetUploadArgs;
 use proxy::http;
-use proxy::metrics::NUM_CANCELLATION_REQUESTS_SOURCE_FROM_CLIENT;
-use proxy::rate_limiter::AuthRateLimiter;
 use proxy::rate_limiter::EndpointRateLimiter;
 use proxy::rate_limiter::RateBucketInfo;
 use proxy::rate_limiter::RateLimiterConfig;
-use proxy::redis::cancellation_publisher::RedisPublisherClient;
-use proxy::redis::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
-use proxy::redis::elasticache;
 use proxy::redis::notifications;
+use proxy::redis::publisher::RedisPublisherClient;
 use proxy::serverless::GlobalConnPoolOptions;
 use proxy::usage_metrics;

@@ -142,16 +131,10 @@ struct ProxyCliArgs {
    ///
    /// Provided in the form '<Requests Per Second>@<Bucket Duration Size>'.
    /// Can be given multiple times for different bucket sizes.
-    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_ENDPOINT_SET)]
+    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_SET)]
    endpoint_rps_limit: Vec<RateBucketInfo>,
-    /// Whether the auth rate limiter actually takes effect (for testing)
-    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
-    auth_rate_limit_enabled: bool,
-    /// Authentication rate limiter max number of hashes per second.
-    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_AUTH_SET)]
-    auth_rate_limit: Vec<RateBucketInfo>,
    /// Redis rate limiter max number of requests per second.
-    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_ENDPOINT_SET)]
+    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_SET)]
    redis_rps_limit: Vec<RateBucketInfo>,
    /// Initial limit for dynamic rate limiter. Makes sense only if `rate_limit_algorithm` is *not* `None`.
    #[clap(long, default_value_t = 100)]
@@ -167,24 +150,9 @@ struct ProxyCliArgs {
    /// disable ip check for http requests. If it is too time consuming, it could be turned off.
    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
    disable_ip_check_for_http: bool,
-    /// redis url for notifications (if empty, redis_host:port will be used for both notifications and streaming connections)
+    /// redis url for notifications.
    #[clap(long)]
    redis_notifications: Option<String>,
-    /// redis host for streaming connections (might be different from the notifications host)
-    #[clap(long)]
-    redis_host: Option<String>,
-    /// redis port for streaming connections (might be different from the notifications host)
-    #[clap(long)]
-    redis_port: Option<u16>,
-    /// redis cluster name, used in aws elasticache
-    #[clap(long)]
-    redis_cluster_name: Option<String>,
-    /// redis user_id, used in aws elasticache
-    #[clap(long)]
-    redis_user_id: Option<String>,
-    /// aws region to retrieve credentials
-    #[clap(long, default_value_t = String::new())]
-    aws_region: String,
    /// cache for `project_info` (use `size=0` to disable)
    #[clap(long, default_value = config::ProjectInfoCacheOptions::CACHE_DEFAULT_OPTIONS)]
    project_info_cache: String,
@@ -248,61 +216,6 @@ async fn main() -> anyhow::Result<()> {
    let config = build_config(&args)?;

    info!("Authentication backend: {}", config.auth_backend);
-    info!("Using region: {}", config.aws_region);
-
-    let region_provider = RegionProviderChain::default_provider().or_else(&*config.aws_region); // Replace with your Redis region if needed
-    let provider_conf =
-        ProviderConfig::without_region().with_region(region_provider.region().await);
-    let aws_credentials_provider = {
-        // uses "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"
-        CredentialsProviderChain::first_try("env", EnvironmentVariableCredentialsProvider::new())
-            // uses "AWS_PROFILE" / `aws sso login --profile <profile>`
-            .or_else(
-                "profile-sso",
-                ProfileFileCredentialsProvider::builder()
-                    .configure(&provider_conf)
-                    .build(),
-            )
-            // uses "AWS_WEB_IDENTITY_TOKEN_FILE", "AWS_ROLE_ARN", "AWS_ROLE_SESSION_NAME"
-            // needed to access remote extensions bucket
-            .or_else(
-                "token",
-                WebIdentityTokenCredentialsProvider::builder()
-                    .configure(&provider_conf)
-                    .build(),
-            )
-            // uses imds v2
-            .or_else("imds", ImdsCredentialsProvider::builder().build())
-    };
-    let elasticache_credentials_provider = Arc::new(elasticache::CredentialsProvider::new(
-        elasticache::AWSIRSAConfig::new(
-            config.aws_region.clone(),
-            args.redis_cluster_name,
-            args.redis_user_id,
-        ),
-        aws_credentials_provider,
-    ));
-    let redis_notifications_client =
-        match (args.redis_notifications, (args.redis_host, args.redis_port)) {
-            (Some(url), _) => {
-                info!("Starting redis notifications listener ({url})");
-                Some(ConnectionWithCredentialsProvider::new_with_static_credentials(url))
-            }
-            (None, (Some(host), Some(port))) => Some(
-                ConnectionWithCredentialsProvider::new_with_credentials_provider(
-                    host,
-                    port,
-                    elasticache_credentials_provider.clone(),
-                ),
-            ),
-            (None, (None, None)) => {
-                warn!("Redis is disabled");
-                None
-            }
-            _ => {
-                bail!("redis-host and redis-port must be specified together");
-            }
-        };

    // Check that we can bind to address before further initialization
    let http_address: SocketAddr = args.http.parse()?;
@@ -320,22 +233,17 @@ async fn main() -> anyhow::Result<()> {

    let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new(&config.endpoint_rps_limit));
    let cancel_map = CancelMap::default();
-
-    // let redis_notifications_client = redis_notifications_client.map(|x| Box::leak(Box::new(x)));
-    let redis_publisher = match &redis_notifications_client {
-        Some(redis_publisher) => Some(Arc::new(Mutex::new(RedisPublisherClient::new(
-            redis_publisher.clone(),
+    let redis_publisher = match &args.redis_notifications {
+        Some(url) => Some(Arc::new(Mutex::new(RedisPublisherClient::new(
+            url,
            args.region.clone(),
            &config.redis_rps_limit,
        )?))),
        None => None,
    };
-    let cancellation_handler = Arc::new(CancellationHandler::<
-        Option<Arc<tokio::sync::Mutex<RedisPublisherClient>>>,
-    >::new(
+    let cancellation_handler = Arc::new(CancellationHandler::new(
        cancel_map.clone(),
        redis_publisher,
-        NUM_CANCELLATION_REQUESTS_SOURCE_FROM_CLIENT,
    ));

    // client facing tasks. these will exit on error or on cancellation
@@ -382,16 +290,17 @@ async fn main() -> anyhow::Result<()> {

    if let auth::BackendType::Console(api, _) = &config.auth_backend {
        if let proxy::console::provider::ConsoleBackend::Console(api) = &**api {
-            if let Some(redis_notifications_client) = redis_notifications_client {
-                let cache = api.caches.project_info.clone();
+            let cache = api.caches.project_info.clone();
+            if let Some(url) = args.redis_notifications {
+                info!("Starting redis notifications listener ({url})");
                maintenance_tasks.spawn(notifications::task_main(
-                    redis_notifications_client.clone(),
+                    url.to_owned(),
                    cache.clone(),
                    cancel_map.clone(),
                    args.region.clone(),
                ));
-                maintenance_tasks.spawn(async move { cache.clone().gc_worker().await });
            }
+            maintenance_tasks.spawn(async move { cache.clone().gc_worker().await });
        }
    }

@@ -517,8 +426,6 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
    };
    let authentication_config = AuthenticationConfig {
        scram_protocol_timeout: args.scram_protocol_timeout,
-        rate_limiter_enabled: args.auth_rate_limit_enabled,
-        rate_limiter: AuthRateLimiter::new(args.auth_rate_limit.clone()),
    };

    let mut endpoint_rps_limit = args.endpoint_rps_limit.clone();
@@ -538,8 +445,8 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
        endpoint_rps_limit,
        redis_rps_limit,
        handshake_timeout: args.handshake_timeout,
+        // TODO: add this argument
        region: args.region.clone(),
-        aws_region: args.aws_region.clone(),
    }));

    Ok(config)
--- a/proxy/src/cache/common.rs
+++ b/proxy/src/cache/common.rs
@@ -43,16 +43,6 @@ impl<C: Cache, V> Cached<C, V> {
        Self { token: None, value }
    }

-    pub fn take_value(self) -> (Cached<C, ()>, V) {
-        (
-            Cached {
-                token: self.token,
-                value: (),
-            },
-            self.value,
-        )
-    }
-
    /// Drop this entry from a cache if it's still there.
    pub fn invalidate(self) -> V {
        if let Some((cache, info)) = &self.token {
--- a/proxy/src/cache/project_info.rs
+++ b/proxy/src/cache/project_info.rs
@@ -373,7 +373,10 @@ mod tests {
        let endpoint_id = "endpoint".into();
        let user1: RoleName = "user1".into();
        let user2: RoleName = "user2".into();
-        let secret1 = Some(AuthSecret::Scram(ServerSecret::mock([1; 32])));
+        let secret1 = Some(AuthSecret::Scram(ServerSecret::mock(
+            user1.as_str(),
+            [1; 32],
+        )));
        let secret2 = None;
        let allowed_ips = Arc::new(vec![
            "127.0.0.1".parse().unwrap(),
@@ -392,7 +395,10 @@ mod tests {

        // Shouldn't add more than 2 roles.
        let user3: RoleName = "user3".into();
-        let secret3 = Some(AuthSecret::Scram(ServerSecret::mock([3; 32])));
+        let secret3 = Some(AuthSecret::Scram(ServerSecret::mock(
+            user3.as_str(),
+            [3; 32],
+        )));
        cache.insert_role_secret(&project_id, &endpoint_id, &user3, secret3.clone());
        assert!(cache.get_role_secret(&endpoint_id, &user3).is_none());

@@ -425,8 +431,14 @@ mod tests {
        let endpoint_id = "endpoint".into();
        let user1: RoleName = "user1".into();
        let user2: RoleName = "user2".into();
-        let secret1 = Some(AuthSecret::Scram(ServerSecret::mock([1; 32])));
-        let secret2 = Some(AuthSecret::Scram(ServerSecret::mock([2; 32])));
+        let secret1 = Some(AuthSecret::Scram(ServerSecret::mock(
+            user1.as_str(),
+            [1; 32],
+        )));
+        let secret2 = Some(AuthSecret::Scram(ServerSecret::mock(
+            user2.as_str(),
+            [2; 32],
+        )));
        let allowed_ips = Arc::new(vec![
            "127.0.0.1".parse().unwrap(),
            "127.0.0.2".parse().unwrap(),
@@ -474,8 +486,14 @@ mod tests {
        let endpoint_id = "endpoint".into();
        let user1: RoleName = "user1".into();
        let user2: RoleName = "user2".into();
-        let secret1 = Some(AuthSecret::Scram(ServerSecret::mock([1; 32])));
-        let secret2 = Some(AuthSecret::Scram(ServerSecret::mock([2; 32])));
+        let secret1 = Some(AuthSecret::Scram(ServerSecret::mock(
+            user1.as_str(),
+            [1; 32],
+        )));
+        let secret2 = Some(AuthSecret::Scram(ServerSecret::mock(
+            user2.as_str(),
+            [2; 32],
+        )));
        let allowed_ips = Arc::new(vec![
            "127.0.0.1".parse().unwrap(),
            "127.0.0.2".parse().unwrap(),
--- a/proxy/src/cancellation.rs
+++ b/proxy/src/cancellation.rs
@@ -1,3 +1,4 @@
+use async_trait::async_trait;
 use dashmap::DashMap;
 use pq_proto::CancelKeyData;
 use std::{net::SocketAddr, sync::Arc};
@@ -9,26 +10,18 @@ use tracing::info;
 use uuid::Uuid;

 use crate::{
-    error::ReportableError,
-    metrics::NUM_CANCELLATION_REQUESTS,
-    redis::cancellation_publisher::{
-        CancellationPublisher, CancellationPublisherMut, RedisPublisherClient,
-    },
+    error::ReportableError, metrics::NUM_CANCELLATION_REQUESTS,
+    redis::publisher::RedisPublisherClient,
 };

 pub type CancelMap = Arc<DashMap<CancelKeyData, Option<CancelClosure>>>;
-pub type CancellationHandlerMain = CancellationHandler<Option<Arc<Mutex<RedisPublisherClient>>>>;
-pub type CancellationHandlerMainInternal = Option<Arc<Mutex<RedisPublisherClient>>>;

 /// Enables serving `CancelRequest`s.
 ///
-/// If `CancellationPublisher` is available, cancel request will be used to publish the cancellation key to other proxy instances.
-pub struct CancellationHandler<P> {
+/// If there is a `RedisPublisherClient` available, it will be used to publish the cancellation key to other proxy instances.
+pub struct CancellationHandler {
    map: CancelMap,
-    client: P,
-    /// This field used for the monitoring purposes.
-    /// Represents the source of the cancellation request.
-    from: &'static str,
+    redis_client: Option<Arc<Mutex<RedisPublisherClient>>>,
 }

 #[derive(Debug, Error)]
@@ -51,9 +44,49 @@ impl ReportableError for CancelError {
    }
 }

-impl<P: CancellationPublisher> CancellationHandler<P> {
+impl CancellationHandler {
+    pub fn new(map: CancelMap, redis_client: Option<Arc<Mutex<RedisPublisherClient>>>) -> Self {
+        Self { map, redis_client }
+    }
+    /// Cancel a running query for the corresponding connection.
+    pub async fn cancel_session(
+        &self,
+        key: CancelKeyData,
+        session_id: Uuid,
+    ) -> Result<(), CancelError> {
+        let from = "from_client";
+        // NB: we should immediately release the lock after cloning the token.
+        let Some(cancel_closure) = self.map.get(&key).and_then(|x| x.clone()) else {
+            tracing::warn!("query cancellation key not found: {key}");
+            if let Some(redis_client) = &self.redis_client {
+                NUM_CANCELLATION_REQUESTS
+                    .with_label_values(&[from, "not_found"])
+                    .inc();
+                info!("publishing cancellation key to Redis");
+                match redis_client.lock().await.try_publish(key, session_id).await {
+                    Ok(()) => {
+                        info!("cancellation key successfuly published to Redis");
+                    }
+                    Err(e) => {
+                        tracing::error!("failed to publish a message: {e}");
+                        return Err(CancelError::IO(std::io::Error::new(
+                            std::io::ErrorKind::Other,
+                            e.to_string(),
+                        )));
+                    }
+                }
+            }
+            return Ok(());
+        };
+        NUM_CANCELLATION_REQUESTS
+            .with_label_values(&[from, "found"])
+            .inc();
+        info!("cancelling query per user's request using key {key}");
+        cancel_closure.try_cancel_query().await
+    }
+
    /// Run async action within an ephemeral session identified by [`CancelKeyData`].
-    pub fn get_session(self: Arc<Self>) -> Session<P> {
+    pub fn get_session(self: Arc<Self>) -> Session {
        // HACK: We'd rather get the real backend_pid but tokio_postgres doesn't
        // expose it and we don't want to do another roundtrip to query
        // for it. The client will be able to notice that this is not the
@@ -79,39 +112,9 @@ impl<P: CancellationPublisher> CancellationHandler<P> {
            cancellation_handler: self,
        }
    }
-    /// Try to cancel a running query for the corresponding connection.
-    /// If the cancellation key is not found, it will be published to Redis.
-    pub async fn cancel_session(
-        &self,
-        key: CancelKeyData,
-        session_id: Uuid,
-    ) -> Result<(), CancelError> {
-        // NB: we should immediately release the lock after cloning the token.
-        let Some(cancel_closure) = self.map.get(&key).and_then(|x| x.clone()) else {
-            tracing::warn!("query cancellation key not found: {key}");
-            NUM_CANCELLATION_REQUESTS
-                .with_label_values(&[self.from, "not_found"])
-                .inc();
-            match self.client.try_publish(key, session_id).await {
-                Ok(()) => {} // do nothing
-                Err(e) => {
-                    return Err(CancelError::IO(std::io::Error::new(
-                        std::io::ErrorKind::Other,
-                        e.to_string(),
-                    )));
-                }
-            }
-            return Ok(());
-        };
-        NUM_CANCELLATION_REQUESTS
-            .with_label_values(&[self.from, "found"])
-            .inc();
-        info!("cancelling query per user's request using key {key}");
-        cancel_closure.try_cancel_query().await
-    }

    #[cfg(test)]
-    fn contains(&self, session: &Session<P>) -> bool {
+    fn contains(&self, session: &Session) -> bool {
        self.map.contains_key(&session.key)
    }

@@ -121,19 +124,31 @@ impl<P: CancellationPublisher> CancellationHandler<P> {
    }
 }

-impl CancellationHandler<()> {
-    pub fn new(map: CancelMap, from: &'static str) -> Self {
-        Self {
-            map,
-            client: (),
-            from,
-        }
-    }
+#[async_trait]
+pub trait NotificationsCancellationHandler {
+    async fn cancel_session_no_publish(&self, key: CancelKeyData) -> Result<(), CancelError>;
 }

-impl<P: CancellationPublisherMut> CancellationHandler<Option<Arc<Mutex<P>>>> {
-    pub fn new(map: CancelMap, client: Option<Arc<Mutex<P>>>, from: &'static str) -> Self {
-        Self { map, client, from }
+#[async_trait]
+impl NotificationsCancellationHandler for CancellationHandler {
+    async fn cancel_session_no_publish(&self, key: CancelKeyData) -> Result<(), CancelError> {
+        let from = "from_redis";
+        let cancel_closure = self.map.get(&key).and_then(|x| x.clone());
+        match cancel_closure {
+            Some(cancel_closure) => {
+                NUM_CANCELLATION_REQUESTS
+                    .with_label_values(&[from, "found"])
+                    .inc();
+                cancel_closure.try_cancel_query().await
+            }
+            None => {
+                NUM_CANCELLATION_REQUESTS
+                    .with_label_values(&[from, "not_found"])
+                    .inc();
+                tracing::warn!("query cancellation key not found: {key}");
+                Ok(())
+            }
+        }
    }
 }

@@ -163,14 +178,14 @@ impl CancelClosure {
 }

 /// Helper for registering query cancellation tokens.
-pub struct Session<P> {
+pub struct Session {
    /// The user-facing key identifying this session.
    key: CancelKeyData,
    /// The [`CancelMap`] this session belongs to.
-    cancellation_handler: Arc<CancellationHandler<P>>,
+    cancellation_handler: Arc<CancellationHandler>,
 }

-impl<P> Session<P> {
+impl Session {
    /// Store the cancel token for the given session.
    /// This enables query cancellation in `crate::proxy::prepare_client_connection`.
    pub fn enable_query_cancellation(&self, cancel_closure: CancelClosure) -> CancelKeyData {
@@ -183,7 +198,7 @@ impl<P> Session<P> {
    }
 }

-impl<P> Drop for Session<P> {
+impl Drop for Session {
    fn drop(&mut self) {
        self.cancellation_handler.map.remove(&self.key);
        info!("dropped query cancellation key {}", &self.key);
@@ -192,16 +207,14 @@ impl<P> Drop for Session<P> {

 #[cfg(test)]
 mod tests {
-    use crate::metrics::NUM_CANCELLATION_REQUESTS_SOURCE_FROM_REDIS;
-
    use super::*;

    #[tokio::test]
    async fn check_session_drop() -> anyhow::Result<()> {
-        let cancellation_handler = Arc::new(CancellationHandler::<()>::new(
-            CancelMap::default(),
-            NUM_CANCELLATION_REQUESTS_SOURCE_FROM_REDIS,
-        ));
+        let cancellation_handler = Arc::new(CancellationHandler {
+            map: CancelMap::default(),
+            redis_client: None,
+        });

        let session = cancellation_handler.clone().get_session();
        assert!(cancellation_handler.contains(&session));
@@ -211,19 +224,4 @@ mod tests {

        Ok(())
    }
-
-    #[tokio::test]
-    async fn cancel_session_noop_regression() {
-        let handler = CancellationHandler::<()>::new(Default::default(), "local");
-        handler
-            .cancel_session(
-                CancelKeyData {
-                    backend_pid: 0,
-                    cancel_key: 0,
-                },
-                Uuid::new_v4(),
-            )
-            .await
-            .unwrap();
-    }
 }
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -82,13 +82,14 @@ pub type ScramKeys = tokio_postgres::config::ScramKeys<32>;
 /// A config for establishing a connection to compute node.
 /// Eventually, `tokio_postgres` will be replaced with something better.
 /// Newtype allows us to implement methods on top of it.
-#[derive(Clone, Default)]
+#[derive(Clone)]
+#[repr(transparent)]
 pub struct ConnCfg(Box<tokio_postgres::Config>);

 /// Creation and initialization routines.
 impl ConnCfg {
    pub fn new() -> Self {
-        Self::default()
+        Self(Default::default())
    }

    /// Reuse password or auth keys from the other config.
@@ -164,6 +165,12 @@ impl std::ops::DerefMut for ConnCfg {
    }
 }

+impl Default for ConnCfg {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl ConnCfg {
    /// Establish a raw TCP connection to the compute node.
    async fn connect_raw(&self, timeout: Duration) -> io::Result<(SocketAddr, TcpStream, &str)> {
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -1,8 +1,4 @@
-use crate::{
-    auth,
-    rate_limiter::{AuthRateLimiter, RateBucketInfo},
-    serverless::GlobalConnPoolOptions,
-};
+use crate::{auth, rate_limiter::RateBucketInfo, serverless::GlobalConnPoolOptions};
 use anyhow::{bail, ensure, Context, Ok};
 use itertools::Itertools;
 use rustls::{
@@ -32,7 +28,6 @@ pub struct ProxyConfig {
    pub redis_rps_limit: Vec<RateBucketInfo>,
    pub region: String,
    pub handshake_timeout: Duration,
-    pub aws_region: String,
 }

 #[derive(Debug)]
@@ -54,8 +49,6 @@ pub struct HttpConfig {

 pub struct AuthenticationConfig {
    pub scram_protocol_timeout: tokio::time::Duration,
-    pub rate_limiter_enabled: bool,
-    pub rate_limiter: AuthRateLimiter,
 }

 impl TlsConfig {
--- a/proxy/src/console.rs
+++ b/proxy/src/console.rs
@@ -6,7 +6,7 @@ pub mod messages;

 /// Wrappers for console APIs and their mocks.
 pub mod provider;
-pub(crate) use provider::{errors, Api, AuthSecret, CachedNodeInfo, NodeInfo};
+pub use provider::{errors, Api, AuthSecret, CachedNodeInfo, NodeInfo};

 /// Various cache-related types.
 pub mod caches {
--- a/proxy/src/console/provider.rs
+++ b/proxy/src/console/provider.rs
@@ -14,6 +14,7 @@ use crate::{
    context::RequestMonitoring,
    scram, EndpointCacheKey, ProjectId,
 };
+use async_trait::async_trait;
 use dashmap::DashMap;
 use std::{sync::Arc, time::Duration};
 use tokio::sync::{OwnedSemaphorePermit, Semaphore};
@@ -325,7 +326,8 @@ pub type CachedAllowedIps = Cached<&'static ProjectInfoCacheImpl, Arc<Vec<IpPatt

 /// This will allocate per each call, but the http requests alone
 /// already require a few allocations, so it should be fine.
-pub(crate) trait Api {
+#[async_trait]
+pub trait Api {
    /// Get the client's auth secret for authentication.
    /// Returns option because user not found situation is special.
    /// We still have to mock the scram to avoid leaking information that user doesn't exist.
@@ -361,6 +363,7 @@ pub enum ConsoleBackend {
    Test(Box<dyn crate::auth::backend::TestBackend>),
 }

+#[async_trait]
 impl Api for ConsoleBackend {
    async fn get_role_secret(
        &self,
--- a/proxy/src/console/provider/mock.rs
+++ b/proxy/src/console/provider/mock.rs
@@ -8,6 +8,7 @@ use crate::console::provider::{CachedAllowedIps, CachedRoleSecret};
 use crate::context::RequestMonitoring;
 use crate::{auth::backend::ComputeUserInfo, compute, error::io_error, scram, url::ApiUrl};
 use crate::{auth::IpPattern, cache::Cached};
+use async_trait::async_trait;
 use futures::TryFutureExt;
 use std::{str::FromStr, sync::Arc};
 use thiserror::Error;
@@ -143,6 +144,7 @@ async fn get_execute_postgres_query(
    Ok(Some(entry))
 }

+#[async_trait]
 impl super::Api for Api {
    #[tracing::instrument(skip_all)]
    async fn get_role_secret(
--- a/proxy/src/console/provider/neon.rs
+++ b/proxy/src/console/provider/neon.rs
@@ -14,6 +14,7 @@ use crate::{
    context::RequestMonitoring,
    metrics::{ALLOWED_IPS_BY_CACHE_OUTCOME, ALLOWED_IPS_NUMBER},
 };
+use async_trait::async_trait;
 use futures::TryFutureExt;
 use std::sync::Arc;
 use tokio::time::Instant;
@@ -55,7 +56,7 @@ impl Api {
        ctx: &mut RequestMonitoring,
        user_info: &ComputeUserInfo,
    ) -> Result<AuthInfo, GetAuthInfoError> {
-        let request_id = ctx.session_id.to_string();
+        let request_id = uuid::Uuid::new_v4().to_string();
        let application_name = ctx.console_application_name();
        async {
            let request = self
@@ -112,7 +113,7 @@ impl Api {
        ctx: &mut RequestMonitoring,
        user_info: &ComputeUserInfo,
    ) -> Result<NodeInfo, WakeComputeError> {
-        let request_id = ctx.session_id.to_string();
+        let request_id = uuid::Uuid::new_v4().to_string();
        let application_name = ctx.console_application_name();
        async {
            let mut request_builder = self
@@ -167,6 +168,7 @@ impl Api {
    }
 }

+#[async_trait]
 impl super::Api for Api {
    #[tracing::instrument(skip_all)]
    async fn get_role_secret(
--- a/proxy/src/metrics.rs
+++ b/proxy/src/metrics.rs
@@ -4,10 +4,7 @@ use ::metrics::{
    register_int_gauge_vec, Histogram, HistogramVec, HyperLogLogVec, IntCounterPairVec,
    IntCounterVec, IntGauge, IntGaugeVec,
 };
-use metrics::{
-    register_hll, register_int_counter, register_int_counter_pair, HyperLogLog, IntCounter,
-    IntCounterPair,
-};
+use metrics::{register_int_counter, register_int_counter_pair, IntCounter, IntCounterPair};

 use once_cell::sync::Lazy;
 use tokio::time::{self, Instant};
@@ -164,9 +161,6 @@ pub static NUM_CANCELLATION_REQUESTS: Lazy<IntCounterVec> = Lazy::new(|| {
    .unwrap()
 });

-pub const NUM_CANCELLATION_REQUESTS_SOURCE_FROM_CLIENT: &str = "from_client";
-pub const NUM_CANCELLATION_REQUESTS_SOURCE_FROM_REDIS: &str = "from_redis";
-
 pub enum Waiting {
    Cplane,
    Client,
@@ -361,20 +355,3 @@ pub static TLS_HANDSHAKE_FAILURES: Lazy<IntCounter> = Lazy::new(|| {
    )
    .unwrap()
 });
-
-pub static ENDPOINTS_AUTH_RATE_LIMITED: Lazy<HyperLogLog<32>> = Lazy::new(|| {
-    register_hll!(
-        32,
-        "proxy_endpoints_auth_rate_limits",
-        "Number of endpoints affected by authentication rate limits",
-    )
-    .unwrap()
-});
-
-pub static AUTH_RATE_LIMIT_HITS: Lazy<IntCounter> = Lazy::new(|| {
-    register_int_counter!(
-        "proxy_requests_auth_rate_limits_total",
-        "Number of connection requests affected by authentication rate limits",
-    )
-    .unwrap()
-});
--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -10,7 +10,7 @@ pub mod wake_compute;

 use crate::{
    auth,
-    cancellation::{self, CancellationHandlerMain, CancellationHandlerMainInternal},
+    cancellation::{self, CancellationHandler},
    compute,
    config::{ProxyConfig, TlsConfig},
    context::RequestMonitoring,
@@ -62,7 +62,7 @@ pub async fn task_main(
    listener: tokio::net::TcpListener,
    cancellation_token: CancellationToken,
    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
-    cancellation_handler: Arc<CancellationHandlerMain>,
+    cancellation_handler: Arc<CancellationHandler>,
 ) -> anyhow::Result<()> {
    scopeguard::defer! {
        info!("proxy has shut down");
@@ -233,12 +233,12 @@ impl ReportableError for ClientRequestError {
 pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
    config: &'static ProxyConfig,
    ctx: &mut RequestMonitoring,
-    cancellation_handler: Arc<CancellationHandlerMain>,
+    cancellation_handler: Arc<CancellationHandler>,
    stream: S,
    mode: ClientMode,
    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
    conn_gauge: IntCounterPairGuard,
-) -> Result<Option<ProxyPassthrough<CancellationHandlerMainInternal, S>>, ClientRequestError> {
+) -> Result<Option<ProxyPassthrough<S>>, ClientRequestError> {
    info!("handling interactive connection from client");

    let proto = ctx.protocol;
@@ -280,7 +280,7 @@ pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(

    // check rate limit
    if let Some(ep) = user_info.get_endpoint() {
-        if !endpoint_rate_limiter.check(ep, 1) {
+        if !endpoint_rate_limiter.check(ep) {
            return stream
                .throw_error(auth::AuthError::too_many_connections())
                .await?;
@@ -338,9 +338,9 @@ pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(

 /// Finish client connection initialization: confirm auth success, send params, etc.
 #[tracing::instrument(skip_all)]
-async fn prepare_client_connection<P>(
+async fn prepare_client_connection(
    node: &compute::PostgresConnection,
-    session: &cancellation::Session<P>,
+    session: &cancellation::Session,
    stream: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
 ) -> Result<(), std::io::Error> {
    // Register compute's query cancellation token and produce a new, unique one.
--- a/proxy/src/proxy/passthrough.rs
+++ b/proxy/src/proxy/passthrough.rs
@@ -55,17 +55,17 @@ pub async fn proxy_pass(
    Ok(())
 }

-pub struct ProxyPassthrough<P, S> {
+pub struct ProxyPassthrough<S> {
    pub client: Stream<S>,
    pub compute: PostgresConnection,
    pub aux: MetricsAuxInfo,

    pub req: IntCounterPairGuard,
    pub conn: IntCounterPairGuard,
-    pub cancel: cancellation::Session<P>,
+    pub cancel: cancellation::Session,
 }

-impl<P, S: AsyncRead + AsyncWrite + Unpin> ProxyPassthrough<P, S> {
+impl<S: AsyncRead + AsyncWrite + Unpin> ProxyPassthrough<S> {
    pub async fn proxy_pass(self) -> anyhow::Result<()> {
        let res = proxy_pass(self.client, self.compute.stream, self.aux).await;
        self.compute.cancel_closure.try_cancel_query().await?;
--- a/proxy/src/proxy/tests.rs
+++ b/proxy/src/proxy/tests.rs
@@ -142,8 +142,8 @@ impl Scram {
        Ok(Scram(secret))
    }

-    fn mock() -> Self {
-        Scram(scram::ServerSecret::mock(rand::random()))
+    fn mock(user: &str) -> Self {
+        Scram(scram::ServerSecret::mock(user, rand::random()))
    }
 }

@@ -330,7 +330,11 @@ async fn scram_auth_mock() -> anyhow::Result<()> {

    let (client_config, server_config) =
        generate_tls_config("generic-project-name.localhost", "localhost")?;
-    let proxy = tokio::spawn(dummy_proxy(client, Some(server_config), Scram::mock()));
+    let proxy = tokio::spawn(dummy_proxy(
+        client,
+        Some(server_config),
+        Scram::mock("user"),
+    ));

    use rand::{distributions::Alphanumeric, Rng};
    let password: String = rand::thread_rng()
--- a/proxy/src/rate_limiter.rs
+++ b/proxy/src/rate_limiter.rs
@@ -4,4 +4,4 @@ mod limiter;
 pub use aimd::Aimd;
 pub use limit_algorithm::{AimdConfig, Fixed, RateLimitAlgorithm, RateLimiterConfig};
 pub use limiter::Limiter;
-pub use limiter::{AuthRateLimiter, EndpointRateLimiter, RateBucketInfo, RedisRateLimiter};
+pub use limiter::{EndpointRateLimiter, RateBucketInfo, RedisRateLimiter};
--- a/proxy/src/rate_limiter/limiter.rs
+++ b/proxy/src/rate_limiter/limiter.rs
@@ -1,8 +1,6 @@
 use std::{
-    borrow::Cow,
    collections::hash_map::RandomState,
-    hash::{BuildHasher, Hash},
-    net::IpAddr,
+    hash::BuildHasher,
    sync::{
        atomic::{AtomicUsize, Ordering},
        Arc, Mutex,
@@ -17,7 +15,7 @@ use tokio::sync::{Mutex as AsyncMutex, Semaphore, SemaphorePermit};
 use tokio::time::{timeout, Duration, Instant};
 use tracing::info;

-use crate::{intern::EndpointIdInt, EndpointId};
+use crate::EndpointId;

 use super::{
    limit_algorithm::{LimitAlgorithm, Sample},
@@ -51,11 +49,11 @@ impl RedisRateLimiter {
            .data
            .iter_mut()
            .zip(self.info)
-            .all(|(bucket, info)| bucket.should_allow_request(info, now, 1));
+            .all(|(bucket, info)| bucket.should_allow_request(info, now));

        if should_allow_request {
            // only increment the bucket counts if the request will actually be accepted
-            self.data.iter_mut().for_each(|b| b.inc(1));
+            self.data.iter_mut().for_each(RateBucket::inc);
        }

        should_allow_request
@@ -73,14 +71,9 @@ impl RedisRateLimiter {
 // saw SNI, before doing TLS handshake. User-side error messages in that case
 // does not look very nice (`SSL SYSCALL error: Undefined error: 0`), so for now
 // I went with a more expensive way that yields user-friendlier error messages.
-pub type EndpointRateLimiter = BucketRateLimiter<EndpointId, StdRng, RandomState>;
-
-// This can't be just per IP because that would limit some PaaS that share IP addresses
-pub type AuthRateLimiter = BucketRateLimiter<(EndpointIdInt, IpAddr), StdRng, RandomState>;
-
-pub struct BucketRateLimiter<Key, Rand = StdRng, Hasher = RandomState> {
-    map: DashMap<Key, Vec<RateBucket>, Hasher>,
-    info: Cow<'static, [RateBucketInfo]>,
+pub struct EndpointRateLimiter<Rand = StdRng, Hasher = RandomState> {
+    map: DashMap<EndpointId, Vec<RateBucket>, Hasher>,
+    info: &'static [RateBucketInfo],
    access_count: AtomicUsize,
    rand: Mutex<Rand>,
 }
@@ -92,9 +85,9 @@ struct RateBucket {
 }

 impl RateBucket {
-    fn should_allow_request(&mut self, info: &RateBucketInfo, now: Instant, n: u32) -> bool {
+    fn should_allow_request(&mut self, info: &RateBucketInfo, now: Instant) -> bool {
        if now - self.start < info.interval {
-            self.count + n <= info.max_rpi
+            self.count < info.max_rpi
        } else {
            // bucket expired, reset
            self.count = 0;
@@ -104,8 +97,8 @@ impl RateBucket {
        }
    }

-    fn inc(&mut self, n: u32) {
-        self.count += n;
+    fn inc(&mut self) {
+        self.count += 1;
    }
 }

@@ -118,7 +111,7 @@ pub struct RateBucketInfo {

 impl std::fmt::Display for RateBucketInfo {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let rps = (self.max_rpi as u64) * 1000 / self.interval.as_millis() as u64;
+        let rps = self.max_rpi * 1000 / self.interval.as_millis() as u32;
        write!(f, "{rps}@{}", humantime::format_duration(self.interval))
    }
 }
@@ -143,25 +136,12 @@ impl std::str::FromStr for RateBucketInfo {
 }

 impl RateBucketInfo {
-    pub const DEFAULT_ENDPOINT_SET: [Self; 3] = [
+    pub const DEFAULT_SET: [Self; 3] = [
        Self::new(300, Duration::from_secs(1)),
        Self::new(200, Duration::from_secs(60)),
        Self::new(100, Duration::from_secs(600)),
    ];

-    /// All of these are per endpoint-ip pair.
-    /// Context: 4096 rounds of pbkdf2 take about 1ms of cpu time to execute (1 milli-cpu-second or 1mcpus).
-    ///
-    /// First bucket: 300mcpus total per endpoint-ip pair
-    /// * 1228800 requests per second with 1 hash rounds. (endpoint rate limiter will catch this first)
-    /// * 300 requests per second with 4096 hash rounds.
-    /// * 2 requests per second with 600000 hash rounds.
-    pub const DEFAULT_AUTH_SET: [Self; 3] = [
-        Self::new(300 * 4096, Duration::from_secs(1)),
-        Self::new(200 * 4096, Duration::from_secs(60)),
-        Self::new(100 * 4096, Duration::from_secs(600)),
-    ];
-
    pub fn validate(info: &mut [Self]) -> anyhow::Result<()> {
        info.sort_unstable_by_key(|info| info.interval);
        let invalid = info
@@ -170,7 +150,7 @@ impl RateBucketInfo {
            .find(|(a, b)| a.max_rpi > b.max_rpi);
        if let Some((a, b)) = invalid {
            bail!(
-                "invalid bucket RPS limits. {b} allows fewer requests per bucket than {a} ({} vs {})",
+                "invalid endpoint RPS limits. {b} allows fewer requests per bucket than {a} ({} vs {})",
                b.max_rpi,
                a.max_rpi,
            );
@@ -182,24 +162,19 @@ impl RateBucketInfo {
    pub const fn new(max_rps: u32, interval: Duration) -> Self {
        Self {
            interval,
-            max_rpi: ((max_rps as u64) * (interval.as_millis() as u64) / 1000) as u32,
+            max_rpi: max_rps * interval.as_millis() as u32 / 1000,
        }
    }
 }

-impl<K: Hash + Eq> BucketRateLimiter<K> {
-    pub fn new(info: impl Into<Cow<'static, [RateBucketInfo]>>) -> Self {
+impl EndpointRateLimiter {
+    pub fn new(info: &'static [RateBucketInfo]) -> Self {
        Self::new_with_rand_and_hasher(info, StdRng::from_entropy(), RandomState::new())
    }
 }

-impl<K: Hash + Eq, R: Rng, S: BuildHasher + Clone> BucketRateLimiter<K, R, S> {
-    fn new_with_rand_and_hasher(
-        info: impl Into<Cow<'static, [RateBucketInfo]>>,
-        rand: R,
-        hasher: S,
-    ) -> Self {
-        let info = info.into();
+impl<R: Rng, S: BuildHasher + Clone> EndpointRateLimiter<R, S> {
+    fn new_with_rand_and_hasher(info: &'static [RateBucketInfo], rand: R, hasher: S) -> Self {
        info!(buckets = ?info, "endpoint rate limiter");
        Self {
            info,
@@ -210,7 +185,7 @@ impl<K: Hash + Eq, R: Rng, S: BuildHasher + Clone> BucketRateLimiter<K, R, S> {
    }

    /// Check that number of connections to the endpoint is below `max_rps` rps.
-    pub fn check(&self, key: K, n: u32) -> bool {
+    pub fn check(&self, endpoint: EndpointId) -> bool {
        // do a partial GC every 2k requests. This cleans up ~ 1/64th of the map.
        // worst case memory usage is about:
        //    = 2 * 2048 * 64 * (48B + 72B)
@@ -220,7 +195,7 @@ impl<K: Hash + Eq, R: Rng, S: BuildHasher + Clone> BucketRateLimiter<K, R, S> {
        }

        let now = Instant::now();
-        let mut entry = self.map.entry(key).or_insert_with(|| {
+        let mut entry = self.map.entry(endpoint).or_insert_with(|| {
            vec![
                RateBucket {
                    start: now,
@@ -232,12 +207,12 @@ impl<K: Hash + Eq, R: Rng, S: BuildHasher + Clone> BucketRateLimiter<K, R, S> {

        let should_allow_request = entry
            .iter_mut()
-            .zip(&*self.info)
-            .all(|(bucket, info)| bucket.should_allow_request(info, now, n));
+            .zip(self.info)
+            .all(|(bucket, info)| bucket.should_allow_request(info, now));

        if should_allow_request {
            // only increment the bucket counts if the request will actually be accepted
-            entry.iter_mut().for_each(|b| b.inc(n));
+            entry.iter_mut().for_each(RateBucket::inc);
        }

        should_allow_request
@@ -248,7 +223,7 @@ impl<K: Hash + Eq, R: Rng, S: BuildHasher + Clone> BucketRateLimiter<K, R, S> {
    /// But that way deletion does not aquire mutex on each entry access.
    pub fn do_gc(&self) {
        info!(
-            "cleaning up bucket rate limiter, current size = {}",
+            "cleaning up endpoint rate limiter, current size = {}",
            self.map.len()
        );
        let n = self.map.shards().len();
@@ -559,7 +534,7 @@ mod tests {
    use rustc_hash::FxHasher;
    use tokio::time;

-    use super::{BucketRateLimiter, EndpointRateLimiter, Limiter, Outcome};
+    use super::{EndpointRateLimiter, Limiter, Outcome};
    use crate::{
        rate_limiter::{RateBucketInfo, RateLimitAlgorithm},
        EndpointId,
@@ -697,12 +672,12 @@ mod tests {

    #[test]
    fn default_rate_buckets() {
-        let mut defaults = RateBucketInfo::DEFAULT_ENDPOINT_SET;
+        let mut defaults = RateBucketInfo::DEFAULT_SET;
        RateBucketInfo::validate(&mut defaults[..]).unwrap();
    }

    #[test]
-    #[should_panic = "invalid bucket RPS limits. 10@10s allows fewer requests per bucket than 300@1s (100 vs 300)"]
+    #[should_panic = "invalid endpoint RPS limits. 10@10s allows fewer requests per bucket than 300@1s (100 vs 300)"]
    fn rate_buckets_validate() {
        let mut rates: Vec<RateBucketInfo> = ["300@1s", "10@10s"]
            .into_iter()
@@ -718,42 +693,42 @@ mod tests {
            .map(|s| s.parse().unwrap())
            .collect();
        RateBucketInfo::validate(&mut rates).unwrap();
-        let limiter = EndpointRateLimiter::new(rates);
+        let limiter = EndpointRateLimiter::new(Vec::leak(rates));

        let endpoint = EndpointId::from("ep-my-endpoint-1234");

        time::pause();

        for _ in 0..100 {
-            assert!(limiter.check(endpoint.clone(), 1));
+            assert!(limiter.check(endpoint.clone()));
        }
        // more connections fail
-        assert!(!limiter.check(endpoint.clone(), 1));
+        assert!(!limiter.check(endpoint.clone()));

        // fail even after 500ms as it's in the same bucket
        time::advance(time::Duration::from_millis(500)).await;
-        assert!(!limiter.check(endpoint.clone(), 1));
+        assert!(!limiter.check(endpoint.clone()));

        // after a full 1s, 100 requests are allowed again
        time::advance(time::Duration::from_millis(500)).await;
        for _ in 1..6 {
-            for _ in 0..50 {
-                assert!(limiter.check(endpoint.clone(), 2));
+            for _ in 0..100 {
+                assert!(limiter.check(endpoint.clone()));
            }
            time::advance(time::Duration::from_millis(1000)).await;
        }

        // more connections after 600 will exceed the 20rps@30s limit
-        assert!(!limiter.check(endpoint.clone(), 1));
+        assert!(!limiter.check(endpoint.clone()));

        // will still fail before the 30 second limit
        time::advance(time::Duration::from_millis(30_000 - 6_000 - 1)).await;
-        assert!(!limiter.check(endpoint.clone(), 1));
+        assert!(!limiter.check(endpoint.clone()));

        // after the full 30 seconds, 100 requests are allowed again
        time::advance(time::Duration::from_millis(1)).await;
        for _ in 0..100 {
-            assert!(limiter.check(endpoint.clone(), 1));
+            assert!(limiter.check(endpoint.clone()));
        }
    }

@@ -763,41 +738,14 @@ mod tests {
        let rand = rand::rngs::StdRng::from_seed([1; 32]);
        let hasher = BuildHasherDefault::<FxHasher>::default();

-        let limiter = BucketRateLimiter::new_with_rand_and_hasher(
-            &RateBucketInfo::DEFAULT_ENDPOINT_SET,
+        let limiter = EndpointRateLimiter::new_with_rand_and_hasher(
+            &RateBucketInfo::DEFAULT_SET,
            rand,
            hasher,
        );
        for i in 0..1_000_000 {
-            limiter.check(i, 1);
+            limiter.check(format!("{i}").into());
        }
        assert!(limiter.map.len() < 150_000);
    }
-
-    #[test]
-    fn test_default_auth_set() {
-        // these values used to exceed u32::MAX
-        assert_eq!(
-            RateBucketInfo::DEFAULT_AUTH_SET,
-            [
-                RateBucketInfo {
-                    interval: Duration::from_secs(1),
-                    max_rpi: 300 * 4096,
-                },
-                RateBucketInfo {
-                    interval: Duration::from_secs(60),
-                    max_rpi: 200 * 4096 * 60,
-                },
-                RateBucketInfo {
-                    interval: Duration::from_secs(600),
-                    max_rpi: 100 * 4096 * 600,
-                }
-            ]
-        );
-
-        for x in RateBucketInfo::DEFAULT_AUTH_SET {
-            let y = x.to_string().parse().unwrap();
-            assert_eq!(x, y);
-        }
-    }
 }
--- a/proxy/src/redis.rs
+++ b/proxy/src/redis.rs
@@ -1,4 +1,2 @@
-pub mod cancellation_publisher;
-pub mod connection_with_credentials_provider;
-pub mod elasticache;
 pub mod notifications;
+pub mod publisher;
--- a/proxy/src/redis/cancellation_publisher.rs
+++ b/proxy/src/redis/cancellation_publisher.rs
@@ -1,161 +0,0 @@
-use std::sync::Arc;
-
-use pq_proto::CancelKeyData;
-use redis::AsyncCommands;
-use tokio::sync::Mutex;
-use uuid::Uuid;
-
-use crate::rate_limiter::{RateBucketInfo, RedisRateLimiter};
-
-use super::{
-    connection_with_credentials_provider::ConnectionWithCredentialsProvider,
-    notifications::{CancelSession, Notification, PROXY_CHANNEL_NAME},
-};
-
-pub trait CancellationPublisherMut: Send + Sync + 'static {
-    #[allow(async_fn_in_trait)]
-    async fn try_publish(
-        &mut self,
-        cancel_key_data: CancelKeyData,
-        session_id: Uuid,
-    ) -> anyhow::Result<()>;
-}
-
-pub trait CancellationPublisher: Send + Sync + 'static {
-    #[allow(async_fn_in_trait)]
-    async fn try_publish(
-        &self,
-        cancel_key_data: CancelKeyData,
-        session_id: Uuid,
-    ) -> anyhow::Result<()>;
-}
-
-impl CancellationPublisher for () {
-    async fn try_publish(
-        &self,
-        _cancel_key_data: CancelKeyData,
-        _session_id: Uuid,
-    ) -> anyhow::Result<()> {
-        Ok(())
-    }
-}
-
-impl<P: CancellationPublisher> CancellationPublisherMut for P {
-    async fn try_publish(
-        &mut self,
-        cancel_key_data: CancelKeyData,
-        session_id: Uuid,
-    ) -> anyhow::Result<()> {
-        <P as CancellationPublisher>::try_publish(self, cancel_key_data, session_id).await
-    }
-}
-
-impl<P: CancellationPublisher> CancellationPublisher for Option<P> {
-    async fn try_publish(
-        &self,
-        cancel_key_data: CancelKeyData,
-        session_id: Uuid,
-    ) -> anyhow::Result<()> {
-        if let Some(p) = self {
-            p.try_publish(cancel_key_data, session_id).await
-        } else {
-            Ok(())
-        }
-    }
-}
-
-impl<P: CancellationPublisherMut> CancellationPublisher for Arc<Mutex<P>> {
-    async fn try_publish(
-        &self,
-        cancel_key_data: CancelKeyData,
-        session_id: Uuid,
-    ) -> anyhow::Result<()> {
-        self.lock()
-            .await
-            .try_publish(cancel_key_data, session_id)
-            .await
-    }
-}
-
-pub struct RedisPublisherClient {
-    client: ConnectionWithCredentialsProvider,
-    region_id: String,
-    limiter: RedisRateLimiter,
-}
-
-impl RedisPublisherClient {
-    pub fn new(
-        client: ConnectionWithCredentialsProvider,
-        region_id: String,
-        info: &'static [RateBucketInfo],
-    ) -> anyhow::Result<Self> {
-        Ok(Self {
-            client,
-            region_id,
-            limiter: RedisRateLimiter::new(info),
-        })
-    }
-
-    async fn publish(
-        &mut self,
-        cancel_key_data: CancelKeyData,
-        session_id: Uuid,
-    ) -> anyhow::Result<()> {
-        let payload = serde_json::to_string(&Notification::Cancel(CancelSession {
-            region_id: Some(self.region_id.clone()),
-            cancel_key_data,
-            session_id,
-        }))?;
-        self.client.publish(PROXY_CHANNEL_NAME, payload).await?;
-        Ok(())
-    }
-    pub async fn try_connect(&mut self) -> anyhow::Result<()> {
-        match self.client.connect().await {
-            Ok(()) => {}
-            Err(e) => {
-                tracing::error!("failed to connect to redis: {e}");
-                return Err(e);
-            }
-        }
-        Ok(())
-    }
-    async fn try_publish_internal(
-        &mut self,
-        cancel_key_data: CancelKeyData,
-        session_id: Uuid,
-    ) -> anyhow::Result<()> {
-        if !self.limiter.check() {
-            tracing::info!("Rate limit exceeded. Skipping cancellation message");
-            return Err(anyhow::anyhow!("Rate limit exceeded"));
-        }
-        match self.publish(cancel_key_data, session_id).await {
-            Ok(()) => return Ok(()),
-            Err(e) => {
-                tracing::error!("failed to publish a message: {e}");
-            }
-        }
-        tracing::info!("Publisher is disconnected. Reconnectiong...");
-        self.try_connect().await?;
-        self.publish(cancel_key_data, session_id).await
-    }
-}
-
-impl CancellationPublisherMut for RedisPublisherClient {
-    async fn try_publish(
-        &mut self,
-        cancel_key_data: CancelKeyData,
-        session_id: Uuid,
-    ) -> anyhow::Result<()> {
-        tracing::info!("publishing cancellation key to Redis");
-        match self.try_publish_internal(cancel_key_data, session_id).await {
-            Ok(()) => {
-                tracing::info!("cancellation key successfuly published to Redis");
-                Ok(())
-            }
-            Err(e) => {
-                tracing::error!("failed to publish a message: {e}");
-                Err(e)
-            }
-        }
-    }
-}
--- a/proxy/src/redis/connection_with_credentials_provider.rs
+++ b/proxy/src/redis/connection_with_credentials_provider.rs
@@ -1,225 +0,0 @@
-use std::{sync::Arc, time::Duration};
-
-use futures::FutureExt;
-use redis::{
-    aio::{ConnectionLike, MultiplexedConnection},
-    ConnectionInfo, IntoConnectionInfo, RedisConnectionInfo, RedisResult,
-};
-use tokio::task::JoinHandle;
-use tracing::{error, info};
-
-use super::elasticache::CredentialsProvider;
-
-enum Credentials {
-    Static(ConnectionInfo),
-    Dynamic(Arc<CredentialsProvider>, redis::ConnectionAddr),
-}
-
-impl Clone for Credentials {
-    fn clone(&self) -> Self {
-        match self {
-            Credentials::Static(info) => Credentials::Static(info.clone()),
-            Credentials::Dynamic(provider, addr) => {
-                Credentials::Dynamic(Arc::clone(provider), addr.clone())
-            }
-        }
-    }
-}
-
-/// A wrapper around `redis::MultiplexedConnection` that automatically refreshes the token.
-/// Provides PubSub connection without credentials refresh.
-pub struct ConnectionWithCredentialsProvider {
-    credentials: Credentials,
-    con: Option<MultiplexedConnection>,
-    refresh_token_task: Option<JoinHandle<()>>,
-    mutex: tokio::sync::Mutex<()>,
-}
-
-impl Clone for ConnectionWithCredentialsProvider {
-    fn clone(&self) -> Self {
-        Self {
-            credentials: self.credentials.clone(),
-            con: None,
-            refresh_token_task: None,
-            mutex: tokio::sync::Mutex::new(()),
-        }
-    }
-}
-
-impl ConnectionWithCredentialsProvider {
-    pub fn new_with_credentials_provider(
-        host: String,
-        port: u16,
-        credentials_provider: Arc<CredentialsProvider>,
-    ) -> Self {
-        Self {
-            credentials: Credentials::Dynamic(
-                credentials_provider,
-                redis::ConnectionAddr::TcpTls {
-                    host,
-                    port,
-                    insecure: false,
-                    tls_params: None,
-                },
-            ),
-            con: None,
-            refresh_token_task: None,
-            mutex: tokio::sync::Mutex::new(()),
-        }
-    }
-
-    pub fn new_with_static_credentials<T: IntoConnectionInfo>(params: T) -> Self {
-        Self {
-            credentials: Credentials::Static(params.into_connection_info().unwrap()),
-            con: None,
-            refresh_token_task: None,
-            mutex: tokio::sync::Mutex::new(()),
-        }
-    }
-
-    pub async fn connect(&mut self) -> anyhow::Result<()> {
-        let _guard = self.mutex.lock().await;
-        if let Some(con) = self.con.as_mut() {
-            match redis::cmd("PING").query_async(con).await {
-                Ok(()) => {
-                    return Ok(());
-                }
-                Err(e) => {
-                    error!("Error during PING: {e:?}");
-                }
-            }
-        } else {
-            info!("Connection is not established");
-        }
-        info!("Establishing a new connection...");
-        self.con = None;
-        if let Some(f) = self.refresh_token_task.take() {
-            f.abort()
-        }
-        let con = self
-            .get_client()
-            .await?
-            .get_multiplexed_tokio_connection()
-            .await?;
-        if let Credentials::Dynamic(credentials_provider, _) = &self.credentials {
-            let credentials_provider = credentials_provider.clone();
-            let con2 = con.clone();
-            let f = tokio::spawn(async move {
-                let _ = Self::keep_connection(con2, credentials_provider).await;
-            });
-            self.refresh_token_task = Some(f);
-        }
-        self.con = Some(con);
-        Ok(())
-    }
-
-    async fn get_connection_info(&self) -> anyhow::Result<ConnectionInfo> {
-        match &self.credentials {
-            Credentials::Static(info) => Ok(info.clone()),
-            Credentials::Dynamic(provider, addr) => {
-                let (username, password) = provider.provide_credentials().await?;
-                Ok(ConnectionInfo {
-                    addr: addr.clone(),
-                    redis: RedisConnectionInfo {
-                        db: 0,
-                        username: Some(username),
-                        password: Some(password.clone()),
-                    },
-                })
-            }
-        }
-    }
-
-    async fn get_client(&self) -> anyhow::Result<redis::Client> {
-        let client = redis::Client::open(self.get_connection_info().await?)?;
-        Ok(client)
-    }
-
-    // PubSub does not support credentials refresh.
-    // Requires manual reconnection every 12h.
-    pub async fn get_async_pubsub(&self) -> anyhow::Result<redis::aio::PubSub> {
-        Ok(self.get_client().await?.get_async_pubsub().await?)
-    }
-
-    // The connection lives for 12h.
-    // It can be prolonged with sending `AUTH` commands with the refreshed token.
-    // https://docs.aws.amazon.com/AmazonElastiCache/latest/red-ug/auth-iam.html#auth-iam-limits
-    async fn keep_connection(
-        mut con: MultiplexedConnection,
-        credentials_provider: Arc<CredentialsProvider>,
-    ) -> anyhow::Result<()> {
-        loop {
-            // The connection lives for 12h, for the sanity check we refresh it every hour.
-            tokio::time::sleep(Duration::from_secs(60 * 60)).await;
-            match Self::refresh_token(&mut con, credentials_provider.clone()).await {
-                Ok(()) => {
-                    info!("Token refreshed");
-                }
-                Err(e) => {
-                    error!("Error during token refresh: {e:?}");
-                }
-            }
-        }
-    }
-    async fn refresh_token(
-        con: &mut MultiplexedConnection,
-        credentials_provider: Arc<CredentialsProvider>,
-    ) -> anyhow::Result<()> {
-        let (user, password) = credentials_provider.provide_credentials().await?;
-        redis::cmd("AUTH")
-            .arg(user)
-            .arg(password)
-            .query_async(con)
-            .await?;
-        Ok(())
-    }
-    /// Sends an already encoded (packed) command into the TCP socket and
-    /// reads the single response from it.
-    pub async fn send_packed_command(&mut self, cmd: &redis::Cmd) -> RedisResult<redis::Value> {
-        // Clone connection to avoid having to lock the ArcSwap in write mode
-        let con = self.con.as_mut().ok_or(redis::RedisError::from((
-            redis::ErrorKind::IoError,
-            "Connection not established",
-        )))?;
-        con.send_packed_command(cmd).await
-    }
-
-    /// Sends multiple already encoded (packed) command into the TCP socket
-    /// and reads `count` responses from it.  This is used to implement
-    /// pipelining.
-    pub async fn send_packed_commands(
-        &mut self,
-        cmd: &redis::Pipeline,
-        offset: usize,
-        count: usize,
-    ) -> RedisResult<Vec<redis::Value>> {
-        // Clone shared connection future to avoid having to lock the ArcSwap in write mode
-        let con = self.con.as_mut().ok_or(redis::RedisError::from((
-            redis::ErrorKind::IoError,
-            "Connection not established",
-        )))?;
-        con.send_packed_commands(cmd, offset, count).await
-    }
-}
-
-impl ConnectionLike for ConnectionWithCredentialsProvider {
-    fn req_packed_command<'a>(
-        &'a mut self,
-        cmd: &'a redis::Cmd,
-    ) -> redis::RedisFuture<'a, redis::Value> {
-        (async move { self.send_packed_command(cmd).await }).boxed()
-    }
-
-    fn req_packed_commands<'a>(
-        &'a mut self,
-        cmd: &'a redis::Pipeline,
-        offset: usize,
-        count: usize,
-    ) -> redis::RedisFuture<'a, Vec<redis::Value>> {
-        (async move { self.send_packed_commands(cmd, offset, count).await }).boxed()
-    }
-
-    fn get_db(&self) -> i64 {
-        0
-    }
-}
--- a/proxy/src/redis/elasticache.rs
+++ b/proxy/src/redis/elasticache.rs
@@ -1,110 +0,0 @@
-use std::time::{Duration, SystemTime};
-
-use aws_config::meta::credentials::CredentialsProviderChain;
-use aws_sdk_iam::config::ProvideCredentials;
-use aws_sigv4::http_request::{
-    self, SignableBody, SignableRequest, SignatureLocation, SigningSettings,
-};
-use tracing::info;
-
-#[derive(Debug)]
-pub struct AWSIRSAConfig {
-    region: String,
-    service_name: String,
-    cluster_name: String,
-    user_id: String,
-    token_ttl: Duration,
-    action: String,
-}
-
-impl AWSIRSAConfig {
-    pub fn new(region: String, cluster_name: Option<String>, user_id: Option<String>) -> Self {
-        AWSIRSAConfig {
-            region,
-            service_name: "elasticache".to_string(),
-            cluster_name: cluster_name.unwrap_or_default(),
-            user_id: user_id.unwrap_or_default(),
-            // "The IAM authentication token is valid for 15 minutes"
-            // https://docs.aws.amazon.com/memorydb/latest/devguide/auth-iam.html#auth-iam-limits
-            token_ttl: Duration::from_secs(15 * 60),
-            action: "connect".to_string(),
-        }
-    }
-}
-
-/// Credentials provider for AWS elasticache authentication.
-///
-/// Official documentation:
-/// <https://docs.aws.amazon.com/AmazonElastiCache/latest/red-ug/auth-iam.html>
-///
-/// Useful resources:
-/// <https://aws.amazon.com/blogs/database/simplify-managing-access-to-amazon-elasticache-for-redis-clusters-with-iam/>
-pub struct CredentialsProvider {
-    config: AWSIRSAConfig,
-    credentials_provider: CredentialsProviderChain,
-}
-
-impl CredentialsProvider {
-    pub fn new(config: AWSIRSAConfig, credentials_provider: CredentialsProviderChain) -> Self {
-        CredentialsProvider {
-            config,
-            credentials_provider,
-        }
-    }
-    pub async fn provide_credentials(&self) -> anyhow::Result<(String, String)> {
-        let aws_credentials = self
-            .credentials_provider
-            .provide_credentials()
-            .await?
-            .into();
-        info!("AWS credentials successfully obtained");
-        info!("Connecting to Redis with configuration: {:?}", self.config);
-        let mut settings = SigningSettings::default();
-        settings.signature_location = SignatureLocation::QueryParams;
-        settings.expires_in = Some(self.config.token_ttl);
-        let signing_params = aws_sigv4::sign::v4::SigningParams::builder()
-            .identity(&aws_credentials)
-            .region(&self.config.region)
-            .name(&self.config.service_name)
-            .time(SystemTime::now())
-            .settings(settings)
-            .build()?
-            .into();
-        let auth_params = [
-            ("Action", &self.config.action),
-            ("User", &self.config.user_id),
-        ];
-        let auth_params = url::form_urlencoded::Serializer::new(String::new())
-            .extend_pairs(auth_params)
-            .finish();
-        let auth_uri = http::Uri::builder()
-            .scheme("http")
-            .authority(self.config.cluster_name.as_bytes())
-            .path_and_query(format!("/?{auth_params}"))
-            .build()?;
-        info!("{}", auth_uri);
-
-        // Convert the HTTP request into a signable request
-        let signable_request = SignableRequest::new(
-            "GET",
-            auth_uri.to_string(),
-            std::iter::empty(),
-            SignableBody::Bytes(&[]),
-        )?;
-
-        // Sign and then apply the signature to the request
-        let (si, _) = http_request::sign(signable_request, &signing_params)?.into_parts();
-        let mut signable_request = http::Request::builder()
-            .method("GET")
-            .uri(auth_uri)
-            .body(())?;
-        si.apply_to_request_http1x(&mut signable_request);
-        Ok((
-            self.config.user_id.clone(),
-            signable_request
-                .uri()
-                .to_string()
-                .replacen("http://", "", 1),
-        ))
-    }
-}
--- a/proxy/src/redis/notifications.rs
+++ b/proxy/src/redis/notifications.rs
@@ -6,12 +6,11 @@ use redis::aio::PubSub;
 use serde::{Deserialize, Serialize};
 use uuid::Uuid;

-use super::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
 use crate::{
    cache::project_info::ProjectInfoCache,
-    cancellation::{CancelMap, CancellationHandler},
+    cancellation::{CancelMap, CancellationHandler, NotificationsCancellationHandler},
    intern::{ProjectIdInt, RoleNameInt},
-    metrics::{NUM_CANCELLATION_REQUESTS_SOURCE_FROM_REDIS, REDIS_BROKEN_MESSAGES},
+    metrics::REDIS_BROKEN_MESSAGES,
 };

 const CPLANE_CHANNEL_NAME: &str = "neondb-proxy-ws-updates";
@@ -19,13 +18,23 @@ pub(crate) const PROXY_CHANNEL_NAME: &str = "neondb-proxy-to-proxy-updates";
 const RECONNECT_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(20);
 const INVALIDATION_LAG: std::time::Duration = std::time::Duration::from_secs(20);

-async fn try_connect(client: &ConnectionWithCredentialsProvider) -> anyhow::Result<PubSub> {
-    let mut conn = client.get_async_pubsub().await?;
-    tracing::info!("subscribing to a channel `{CPLANE_CHANNEL_NAME}`");
-    conn.subscribe(CPLANE_CHANNEL_NAME).await?;
-    tracing::info!("subscribing to a channel `{PROXY_CHANNEL_NAME}`");
-    conn.subscribe(PROXY_CHANNEL_NAME).await?;
-    Ok(conn)
+struct RedisConsumerClient {
+    client: redis::Client,
+}
+
+impl RedisConsumerClient {
+    pub fn new(url: &str) -> anyhow::Result<Self> {
+        let client = redis::Client::open(url)?;
+        Ok(Self { client })
+    }
+    async fn try_connect(&self) -> anyhow::Result<PubSub> {
+        let mut conn = self.client.get_async_connection().await?.into_pubsub();
+        tracing::info!("subscribing to a channel `{CPLANE_CHANNEL_NAME}`");
+        conn.subscribe(CPLANE_CHANNEL_NAME).await?;
+        tracing::info!("subscribing to a channel `{PROXY_CHANNEL_NAME}`");
+        conn.subscribe(PROXY_CHANNEL_NAME).await?;
+        Ok(conn)
+    }
 }

 #[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
@@ -71,18 +80,21 @@ where
    serde_json::from_str(&s).map_err(<D::Error as serde::de::Error>::custom)
 }

-struct MessageHandler<C: ProjectInfoCache + Send + Sync + 'static> {
+struct MessageHandler<
+    C: ProjectInfoCache + Send + Sync + 'static,
+    H: NotificationsCancellationHandler + Send + Sync + 'static,
+> {
    cache: Arc<C>,
-    cancellation_handler: Arc<CancellationHandler<()>>,
+    cancellation_handler: Arc<H>,
    region_id: String,
 }

-impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
-    pub fn new(
-        cache: Arc<C>,
-        cancellation_handler: Arc<CancellationHandler<()>>,
-        region_id: String,
-    ) -> Self {
+impl<
+        C: ProjectInfoCache + Send + Sync + 'static,
+        H: NotificationsCancellationHandler + Send + Sync + 'static,
+    > MessageHandler<C, H>
+{
+    pub fn new(cache: Arc<C>, cancellation_handler: Arc<H>, region_id: String) -> Self {
        Self {
            cache,
            cancellation_handler,
@@ -127,7 +139,7 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
                // This instance of cancellation_handler doesn't have a RedisPublisherClient so it can't publish the message.
                match self
                    .cancellation_handler
-                    .cancel_session(cancel_session.cancel_key_data, uuid::Uuid::nil())
+                    .cancel_session_no_publish(cancel_session.cancel_key_data)
                    .await
                {
                    Ok(()) => {}
@@ -170,7 +182,7 @@ fn invalidate_cache<C: ProjectInfoCache>(cache: Arc<C>, msg: Notification) {
 /// Handle console's invalidation messages.
 #[tracing::instrument(name = "console_notifications", skip_all)]
 pub async fn task_main<C>(
-    redis: ConnectionWithCredentialsProvider,
+    url: String,
    cache: Arc<C>,
    cancel_map: CancelMap,
    region_id: String,
@@ -181,15 +193,13 @@ where
    cache.enable_ttl();
    let handler = MessageHandler::new(
        cache,
-        Arc::new(CancellationHandler::<()>::new(
-            cancel_map,
-            NUM_CANCELLATION_REQUESTS_SOURCE_FROM_REDIS,
-        )),
+        Arc::new(CancellationHandler::new(cancel_map, None)),
        region_id,
    );

    loop {
-        let mut conn = match try_connect(&redis).await {
+        let redis = RedisConsumerClient::new(&url)?;
+        let conn = match redis.try_connect().await {
            Ok(conn) => {
                handler.disable_ttl();
                conn
@@ -202,7 +212,7 @@ where
                continue;
            }
        };
-        let mut stream = conn.on_message();
+        let mut stream = conn.into_on_message();
        while let Some(msg) = stream.next().await {
            match handler.handle_message(msg).await {
                Ok(()) => {}
--- a/proxy/src/redis/publisher.rs
+++ b/proxy/src/redis/publisher.rs
@@ -0,0 +1,80 @@
+use pq_proto::CancelKeyData;
+use redis::AsyncCommands;
+use uuid::Uuid;
+
+use crate::rate_limiter::{RateBucketInfo, RedisRateLimiter};
+
+use super::notifications::{CancelSession, Notification, PROXY_CHANNEL_NAME};
+
+pub struct RedisPublisherClient {
+    client: redis::Client,
+    publisher: Option<redis::aio::Connection>,
+    region_id: String,
+    limiter: RedisRateLimiter,
+}
+
+impl RedisPublisherClient {
+    pub fn new(
+        url: &str,
+        region_id: String,
+        info: &'static [RateBucketInfo],
+    ) -> anyhow::Result<Self> {
+        let client = redis::Client::open(url)?;
+        Ok(Self {
+            client,
+            publisher: None,
+            region_id,
+            limiter: RedisRateLimiter::new(info),
+        })
+    }
+    pub async fn try_publish(
+        &mut self,
+        cancel_key_data: CancelKeyData,
+        session_id: Uuid,
+    ) -> anyhow::Result<()> {
+        if !self.limiter.check() {
+            tracing::info!("Rate limit exceeded. Skipping cancellation message");
+            return Err(anyhow::anyhow!("Rate limit exceeded"));
+        }
+        match self.publish(cancel_key_data, session_id).await {
+            Ok(()) => return Ok(()),
+            Err(e) => {
+                tracing::error!("failed to publish a message: {e}");
+                self.publisher = None;
+            }
+        }
+        tracing::info!("Publisher is disconnected. Reconnectiong...");
+        self.try_connect().await?;
+        self.publish(cancel_key_data, session_id).await
+    }
+
+    async fn publish(
+        &mut self,
+        cancel_key_data: CancelKeyData,
+        session_id: Uuid,
+    ) -> anyhow::Result<()> {
+        let conn = self
+            .publisher
+            .as_mut()
+            .ok_or_else(|| anyhow::anyhow!("not connected"))?;
+        let payload = serde_json::to_string(&Notification::Cancel(CancelSession {
+            region_id: Some(self.region_id.clone()),
+            cancel_key_data,
+            session_id,
+        }))?;
+        conn.publish(PROXY_CHANNEL_NAME, payload).await?;
+        Ok(())
+    }
+    pub async fn try_connect(&mut self) -> anyhow::Result<()> {
+        match self.client.get_async_connection().await {
+            Ok(conn) => {
+                self.publisher = Some(conn);
+            }
+            Err(e) => {
+                tracing::error!("failed to connect to redis: {e}");
+                return Err(e.into());
+            }
+        }
+        Ok(())
+    }
+}
--- a/proxy/src/scram/exchange.rs
+++ b/proxy/src/scram/exchange.rs
@@ -3,7 +3,9 @@
 use std::convert::Infallible;

 use hmac::{Hmac, Mac};
-use sha2::Sha256;
+use sha2::digest::FixedOutput;
+use sha2::{Digest, Sha256};
+use subtle::{Choice, ConstantTimeEq};
 use tokio::task::yield_now;

 use super::messages::{
@@ -11,7 +13,6 @@ use super::messages::{
 };
 use super::secret::ServerSecret;
 use super::signature::SignatureBuilder;
-use super::ScramKey;
 use crate::config;
 use crate::sasl::{self, ChannelBinding, Error as SaslError};

@@ -103,7 +104,7 @@ async fn pbkdf2(str: &[u8], salt: &[u8], iterations: u32) -> [u8; 32] {
 }

 // copied from <https://github.com/neondatabase/rust-postgres/blob/20031d7a9ee1addeae6e0968e3899ae6bf01cee2/postgres-protocol/src/authentication/sasl.rs#L236-L248>
-async fn derive_client_key(password: &[u8], salt: &[u8], iterations: u32) -> ScramKey {
+async fn derive_keys(password: &[u8], salt: &[u8], iterations: u32) -> ([u8; 32], [u8; 32]) {
    let salted_password = pbkdf2(password, salt, iterations).await;

    let make_key = |name| {
@@ -115,7 +116,7 @@ async fn derive_client_key(password: &[u8], salt: &[u8], iterations: u32) -> Scr
        <[u8; 32]>::from(key.into_bytes())
    };

-    make_key(b"Client Key").into()
+    (make_key(b"Client Key"), make_key(b"Server Key"))
 }

 pub async fn exchange(
@@ -123,12 +124,21 @@ pub async fn exchange(
    password: &[u8],
 ) -> sasl::Result<sasl::Outcome<super::ScramKey>> {
    let salt = base64::decode(&secret.salt_base64)?;
-    let client_key = derive_client_key(password, &salt, secret.iterations).await;
+    let (client_key, server_key) = derive_keys(password, &salt, secret.iterations).await;
+    let stored_key: [u8; 32] = Sha256::default()
+        .chain_update(client_key)
+        .finalize_fixed()
+        .into();

-    if secret.is_password_invalid(&client_key).into() {
-        Ok(sasl::Outcome::Failure("password doesn't match"))
+    // constant time to not leak partial key match
+    let valid = stored_key.ct_eq(&secret.stored_key.as_bytes())
+        | server_key.ct_eq(&secret.server_key.as_bytes())
+        | Choice::from(secret.doomed as u8);
+
+    if valid.into() {
+        Ok(sasl::Outcome::Success(super::ScramKey::from(client_key)))
    } else {
-        Ok(sasl::Outcome::Success(client_key))
+        Ok(sasl::Outcome::Failure("password doesn't match"))
    }
 }

@@ -210,7 +220,7 @@ impl SaslSentInner {
            .derive_client_key(&client_final_message.proof);

        // Auth fails either if keys don't match or it's pre-determined to fail.
-        if secret.is_password_invalid(&client_key).into() {
+        if client_key.sha256() != secret.stored_key || secret.doomed {
            return Ok(sasl::Step::Failure("password doesn't match"));
        }

--- a/proxy/src/scram/key.rs
+++ b/proxy/src/scram/key.rs
@@ -1,31 +1,17 @@
 //! Tools for client/server/stored key management.

-use subtle::ConstantTimeEq;
-
 /// Faithfully taken from PostgreSQL.
 pub const SCRAM_KEY_LEN: usize = 32;

 /// One of the keys derived from the user's password.
 /// We use the same structure for all keys, i.e.
 /// `ClientKey`, `StoredKey`, and `ServerKey`.
-#[derive(Clone, Default, Eq, Debug)]
+#[derive(Clone, Default, PartialEq, Eq, Debug)]
 #[repr(transparent)]
 pub struct ScramKey {
    bytes: [u8; SCRAM_KEY_LEN],
 }

-impl PartialEq for ScramKey {
-    fn eq(&self, other: &Self) -> bool {
-        self.ct_eq(other).into()
-    }
-}
-
-impl ConstantTimeEq for ScramKey {
-    fn ct_eq(&self, other: &Self) -> subtle::Choice {
-        self.bytes.ct_eq(&other.bytes)
-    }
-}
-
 impl ScramKey {
    pub fn sha256(&self) -> Self {
        super::sha256([self.as_ref()]).into()
--- a/proxy/src/scram/messages.rs
+++ b/proxy/src/scram/messages.rs
@@ -206,28 +206,6 @@ mod tests {
        }
    }

-    #[test]
-    fn parse_client_first_message_with_invalid_gs2_authz() {
-        assert!(ClientFirstMessage::parse("n,authzid,n=user,r=nonce").is_none())
-    }
-
-    #[test]
-    fn parse_client_first_message_with_extra_params() {
-        let msg = ClientFirstMessage::parse("n,,n=user,r=nonce,a=foo,b=bar,c=baz").unwrap();
-        assert_eq!(msg.bare, "n=user,r=nonce,a=foo,b=bar,c=baz");
-        assert_eq!(msg.username, "user");
-        assert_eq!(msg.nonce, "nonce");
-        assert_eq!(msg.cbind_flag, ChannelBinding::NotSupportedClient);
-    }
-
-    #[test]
-    fn parse_client_first_message_with_extra_params_invalid() {
-        // must be of the form `<ascii letter>=<...>`
-        assert!(ClientFirstMessage::parse("n,,n=user,r=nonce,abc=foo").is_none());
-        assert!(ClientFirstMessage::parse("n,,n=user,r=nonce,1=foo").is_none());
-        assert!(ClientFirstMessage::parse("n,,n=user,r=nonce,a").is_none());
-    }
-
    #[test]
    fn parse_client_final_message() {
        let input = [
--- a/proxy/src/scram/secret.rs
+++ b/proxy/src/scram/secret.rs
@@ -1,7 +1,5 @@
 //! Tools for SCRAM server secret management.

-use subtle::{Choice, ConstantTimeEq};
-
 use super::base64_decode_array;
 use super::key::ScramKey;

@@ -42,21 +40,16 @@ impl ServerSecret {
        Some(secret)
    }

-    pub fn is_password_invalid(&self, client_key: &ScramKey) -> Choice {
-        // constant time to not leak partial key match
-        client_key.sha256().ct_ne(&self.stored_key) | Choice::from(self.doomed as u8)
-    }
-
    /// To avoid revealing information to an attacker, we use a
    /// mocked server secret even if the user doesn't exist.
    /// See `auth-scram.c : mock_scram_secret` for details.
-    pub fn mock(nonce: [u8; 32]) -> Self {
+    pub fn mock(user: &str, nonce: [u8; 32]) -> Self {
+        // Refer to `auth-scram.c : scram_mock_salt`.
+        let mocked_salt = super::sha256([user.as_bytes(), &nonce]);
+
        Self {
-            // this doesn't reveal much information as we're going to use
-            // iteration count 1 for our generated passwords going forward.
-            // PG16 users can set iteration count=1 already today.
-            iterations: 1,
-            salt_base64: base64::encode(nonce),
+            iterations: 4096,
+            salt_base64: base64::encode(mocked_salt),
            stored_key: ScramKey::default(),
            server_key: ScramKey::default(),
            doomed: true,
--- a/proxy/src/serverless.rs
+++ b/proxy/src/serverless.rs
@@ -21,12 +21,11 @@ pub use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware};
 use tokio_util::task::TaskTracker;
 use tracing::instrument::Instrumented;

-use crate::cancellation::CancellationHandlerMain;
-use crate::config::ProxyConfig;
 use crate::context::RequestMonitoring;
 use crate::protocol2::{ProxyProtocolAccept, WithClientIp, WithConnectionGuard};
 use crate::rate_limiter::EndpointRateLimiter;
 use crate::serverless::backend::PoolingBackend;
+use crate::{cancellation::CancellationHandler, config::ProxyConfig};
 use hyper::{
    server::conn::{AddrIncoming, AddrStream},
    Body, Method, Request, Response,
@@ -48,7 +47,7 @@ pub async fn task_main(
    ws_listener: TcpListener,
    cancellation_token: CancellationToken,
    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
-    cancellation_handler: Arc<CancellationHandlerMain>,
+    cancellation_handler: Arc<CancellationHandler>,
 ) -> anyhow::Result<()> {
    scopeguard::defer! {
        info!("websocket server has shut down");
@@ -238,7 +237,7 @@ async fn request_handler(
    config: &'static ProxyConfig,
    backend: Arc<PoolingBackend>,
    ws_connections: TaskTracker,
-    cancellation_handler: Arc<CancellationHandlerMain>,
+    cancellation_handler: Arc<CancellationHandler>,
    peer_addr: IpAddr,
    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
    // used to cancel in-flight HTTP requests. not used to cancel websockets
--- a/proxy/src/serverless/backend.rs
+++ b/proxy/src/serverless/backend.rs
@@ -42,12 +42,7 @@ impl PoolingBackend {
        };

        let secret = match cached_secret.value.clone() {
-            Some(secret) => self.config.authentication_config.check_rate_limit(
-                ctx,
-                secret,
-                &user_info.endpoint,
-                true,
-            )?,
+            Some(secret) => secret,
            None => {
                // If we don't have an authentication secret, for the http flow we can just return an error.
                info!("authentication info not found");
--- a/proxy/src/serverless/websocket.rs
+++ b/proxy/src/serverless/websocket.rs
@@ -1,5 +1,5 @@
 use crate::{
-    cancellation::CancellationHandlerMain,
+    cancellation::CancellationHandler,
    config::ProxyConfig,
    context::RequestMonitoring,
    error::{io_error, ReportableError},
@@ -134,7 +134,7 @@ pub async fn serve_websocket(
    config: &'static ProxyConfig,
    mut ctx: RequestMonitoring,
    websocket: HyperWebsocket,
-    cancellation_handler: Arc<CancellationHandlerMain>,
+    cancellation_handler: Arc<CancellationHandler>,
    hostname: Option<String>,
    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
 ) -> anyhow::Result<()> {
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,5 +1,5 @@
 [toolchain]
-channel = "1.77.0"
+channel = "1.76.0"
 profile = "default"
 # The default profile includes rustc, rust-std, cargo, rust-docs, rustfmt and clippy.
 # https://rust-lang.github.io/rustup/concepts/profiles.html
--- a/safekeeper/src/copy_timeline.rs
+++ b/safekeeper/src/copy_timeline.rs
@@ -225,7 +225,6 @@ async fn write_segment(
    assert!(from <= to);
    assert!(to <= wal_seg_size);

-    #[allow(clippy::suspicious_open_options)]
    let mut file = OpenOptions::new()
        .create(true)
        .write(true)
--- a/safekeeper/src/wal_storage.rs
+++ b/safekeeper/src/wal_storage.rs
@@ -221,7 +221,6 @@ impl PhysicalStorage {
            // half initialized segment, first bake it under tmp filename and
            // then rename.
            let tmp_path = self.timeline_dir.join("waltmp");
-            #[allow(clippy::suspicious_open_options)]
            let mut file = OpenOptions::new()
                .create(true)
                .write(true)
--- a/safekeeper/tests/walproposer_sim/walproposer_api.rs
+++ b/safekeeper/tests/walproposer_sim/walproposer_api.rs
@@ -244,7 +244,6 @@ impl SimulationApi {
                mutex: 0,
                mineLastElectedTerm: 0,
                backpressureThrottlingTime: pg_atomic_uint64 { value: 0 },
-                currentClusterSize: pg_atomic_uint64 { value: 0 },
                shard_ps_feedback: [empty_feedback; 128],
                num_shards: 0,
                min_ps_feedback: empty_feedback,
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -1155,17 +1155,13 @@ class NeonEnv:
        After this method returns, there should be no child processes running.
        """
        self.endpoints.stop_all()
-
-        # Stop storage controller before pageservers: we don't want it to spuriously
-        # detect a pageserver "failure" during test teardown
-        self.storage_controller.stop(immediate=immediate)
-
        for sk in self.safekeepers:
            sk.stop(immediate=immediate)
        for pageserver in self.pageservers:
            if ps_assert_metric_no_errors:
                pageserver.assert_no_metric_errors()
            pageserver.stop(immediate=immediate)
+        self.storage_controller.stop(immediate=immediate)
        self.broker.stop(immediate=immediate)

    @property
@@ -2126,8 +2122,6 @@ class NeonStorageController(MetricsGetter):
            shard_params = {"count": shard_count}
            if shard_stripe_size is not None:
                shard_params["stripe_size"] = shard_stripe_size
-            else:
-                shard_params["stripe_size"] = 32768

            body["shard_parameters"] = shard_params

@@ -2141,7 +2135,6 @@ class NeonStorageController(MetricsGetter):
            json=body,
            headers=self.headers(TokenScope.PAGE_SERVER_API),
        )
-        response.raise_for_status()
        log.info(f"tenant_create success: {response.json()}")

    def locate(self, tenant_id: TenantId) -> list[dict[str, Any]]:
--- a/test_runner/fixtures/pageserver/allowed_errors.py
+++ b/test_runner/fixtures/pageserver/allowed_errors.py
@@ -86,9 +86,6 @@ DEFAULT_PAGESERVER_ALLOWED_ERRORS = (
    # This is especially pronounced in tests that set small checkpoint
    # distances.
    ".*Flushed oversized open layer with size.*",
-    # During teardown, we stop the storage controller before the pageservers, so pageservers
-    # can experience connection errors doing background deletion queue work.
-    ".*WARN deletion backend: calling control plane generation validation API failed.*Connection refused.*",
 )


@@ -99,8 +96,6 @@ DEFAULT_STORAGE_CONTROLLER_ALLOWED_ERRORS = [
    ".*Call to node.*management API.*failed.*ReceiveBody.*",
    # Many tests will start up with a node offline
    ".*startup_reconcile: Could not scan node.*",
-    # Tests run in dev mode
-    ".*Starting in dev mode.*",
 ]


--- a/test_runner/fixtures/pageserver/utils.py
+++ b/test_runner/fixtures/pageserver/utils.py
@@ -62,7 +62,9 @@ def wait_for_upload(
        )
        time.sleep(1)
    raise Exception(
-        f"timed out while waiting for {tenant}/{timeline} remote_consistent_lsn to reach {lsn}, was {current_lsn}"
+        "timed out while waiting for remote_consistent_lsn to reach {}, was {}".format(
+            lsn, current_lsn
+        )
    )


--- a/test_runner/performance/test_bulk_insert.py
+++ b/test_runner/performance/test_bulk_insert.py
@@ -1,6 +1,5 @@
 from contextlib import closing

-import pytest
 from fixtures.benchmark_fixture import MetricReport
 from fixtures.compare_fixtures import NeonCompare, PgCompare
 from fixtures.pageserver.utils import wait_tenant_status_404
@@ -18,7 +17,6 @@ from fixtures.types import Lsn
 # 3. Disk space used
 # 4. Peak memory usage
 #
-@pytest.mark.skip("See https://github.com/neondatabase/neon/issues/7124")
 def test_bulk_insert(neon_with_baseline: PgCompare):
    env = neon_with_baseline

--- a/test_runner/regress/test_auth.py
+++ b/test_runner/regress/test_auth.py
@@ -105,7 +105,7 @@ def test_pageserver_multiple_keys(neon_env_builder: NeonEnvBuilder):
    # The neon_local tool generates one key pair at a hardcoded path by default.
    # As a preparation for our test, move the public key of the key pair into a
    # directory at the same location as the hardcoded path by:
-    # 1. moving the file at `configured_pub_key_path` to a temporary location
+    # 1. moving the the file at `configured_pub_key_path` to a temporary location
    # 2. creating a new directory at `configured_pub_key_path`
    # 3. moving the file from the temporary location into the newly created directory
    configured_pub_key_path = Path(env.repo_dir) / "auth_public_key.pem"
--- a/test_runner/regress/test_compatibility.py
+++ b/test_runner/regress/test_compatibility.py
@@ -267,10 +267,9 @@ def test_forward_compatibility(

 def check_neon_works(env: NeonEnv, test_output_dir: Path, sql_dump_path: Path, repo_dir: Path):
    ep = env.endpoints.create_start("main")
-    connstr = ep.connstr()
-
    pg_bin = PgBin(test_output_dir, env.pg_distrib_dir, env.pg_version)

+    connstr = ep.connstr()
    pg_bin.run_capture(
        ["pg_dumpall", f"--dbname={connstr}", f"--file={test_output_dir / 'dump.sql'}"]
    )
@@ -287,9 +286,6 @@ def check_neon_works(env: NeonEnv, test_output_dir: Path, sql_dump_path: Path, r
    timeline_id = env.initial_timeline
    pg_version = env.pg_version

-    # Stop endpoint while we recreate timeline
-    ep.stop()
-
    try:
        pageserver_http.timeline_preserve_initdb_archive(tenant_id, timeline_id)
    except PageserverApiException as e:
@@ -314,9 +310,6 @@ def check_neon_works(env: NeonEnv, test_output_dir: Path, sql_dump_path: Path, r
        existing_initdb_timeline_id=timeline_id,
    )

-    # Timeline exists again: restart the endpoint
-    ep.start()
-
    pg_bin.run_capture(
        ["pg_dumpall", f"--dbname={connstr}", f"--file={test_output_dir / 'dump-from-wal.sql'}"]
    )
--- a/test_runner/regress/test_hot_standby.py
+++ b/test_runner/regress/test_hot_standby.py
@@ -85,7 +85,6 @@ def test_hot_standby(neon_simple_env: NeonEnv):
    if slow_down_send:
        sk_http.configure_failpoints(("sk-send-wal-replica-sleep", "off"))

-
 def test_2_replicas_start(neon_simple_env: NeonEnv):
    env = neon_simple_env

@@ -94,11 +93,6 @@ def test_2_replicas_start(neon_simple_env: NeonEnv):
        endpoint_id="primary",
    ) as primary:
        time.sleep(1)
-        with env.endpoints.new_replica_start(
-            origin=primary, endpoint_id="secondary1"
-        ) as secondary1:
-            with env.endpoints.new_replica_start(
-                origin=primary, endpoint_id="secondary2"
-            ) as secondary2:
-                wait_replica_caughtup(primary, secondary1)
-                wait_replica_caughtup(primary, secondary2)
+        with env.endpoints.new_replica_start(origin=primary, endpoint_id="secondary1") as secondary1:
+            with env.endpoints.new_replica_start(origin=primary, endpoint_id="secondary2") as secondary2:
+                pass
--- a/test_runner/regress/test_pageserver_layer_rolling.py
+++ b/test_runner/regress/test_pageserver_layer_rolling.py
@@ -1,275 +0,0 @@
-import asyncio
-import os
-from typing import Tuple
-
-import psutil
-import pytest
-from fixtures.log_helper import log
-from fixtures.neon_fixtures import (
-    NeonEnv,
-    NeonEnvBuilder,
-    tenant_get_shards,
-)
-from fixtures.pageserver.http import PageserverHttpClient
-from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_for_upload
-from fixtures.types import Lsn, TenantId, TimelineId
-from fixtures.utils import wait_until
-
-TIMELINE_COUNT = 10
-ENTRIES_PER_TIMELINE = 10_000
-CHECKPOINT_TIMEOUT_SECONDS = 60
-
-
-async def run_worker(env: NeonEnv, tenant_conf, entries: int) -> Tuple[TenantId, TimelineId, Lsn]:
-    tenant, timeline = env.neon_cli.create_tenant(conf=tenant_conf)
-    with env.endpoints.create_start("main", tenant_id=tenant) as ep:
-        conn = await ep.connect_async()
-        try:
-            await conn.execute("CREATE TABLE IF NOT EXISTS t(key serial primary key, value text)")
-            await conn.execute(
-                f"INSERT INTO t SELECT i, CONCAT('payload_', i) FROM generate_series(0,{entries}) as i"
-            )
-        finally:
-            await conn.close(timeout=10)
-
-        last_flush_lsn = Lsn(ep.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
-        return tenant, timeline, last_flush_lsn
-
-
-async def workload(
-    env: NeonEnv, tenant_conf, timelines: int, entries: int
-) -> list[Tuple[TenantId, TimelineId, Lsn]]:
-    workers = [asyncio.create_task(run_worker(env, tenant_conf, entries)) for _ in range(timelines)]
-    return await asyncio.gather(*workers)
-
-
-def wait_until_pageserver_is_caught_up(
-    env: NeonEnv, last_flush_lsns: list[Tuple[TenantId, TimelineId, Lsn]]
-):
-    for tenant, timeline, last_flush_lsn in last_flush_lsns:
-        shards = tenant_get_shards(env, tenant)
-        for tenant_shard_id, pageserver in shards:
-            waited = wait_for_last_record_lsn(
-                pageserver.http_client(), tenant_shard_id, timeline, last_flush_lsn
-            )
-            assert waited >= last_flush_lsn
-
-
-def wait_until_pageserver_has_uploaded(
-    env: NeonEnv, last_flush_lsns: list[Tuple[TenantId, TimelineId, Lsn]]
-):
-    for tenant, timeline, last_flush_lsn in last_flush_lsns:
-        shards = tenant_get_shards(env, tenant)
-        for tenant_shard_id, pageserver in shards:
-            wait_for_upload(pageserver.http_client(), tenant_shard_id, timeline, last_flush_lsn)
-
-
-def wait_for_wal_ingest_metric(pageserver_http: PageserverHttpClient) -> float:
-    def query():
-        value = pageserver_http.get_metric_value("pageserver_wal_ingest_records_received_total")
-        assert value is not None
-        return value
-
-    # The metric gets initialised on the first update.
-    # Retry a few times, but return 0 if it's stable.
-    try:
-        return float(wait_until(3, 0.5, query))
-    except Exception:
-        return 0
-
-
-def get_dirty_bytes(env):
-    v = env.pageserver.http_client().get_metric_value("pageserver_timeline_ephemeral_bytes") or 0
-    log.info(f"dirty_bytes: {v}")
-    return v
-
-
-def assert_dirty_bytes(env, v):
-    assert get_dirty_bytes(env) == v
-
-
-def assert_dirty_bytes_nonzero(env):
-    assert get_dirty_bytes(env) > 0
-
-
-@pytest.mark.parametrize("immediate_shutdown", [True, False])
-def test_pageserver_small_inmemory_layers(
-    neon_env_builder: NeonEnvBuilder, immediate_shutdown: bool
-):
-    """
-    Test that open layers get flushed after the `checkpoint_timeout` config
-    and do not require WAL reingest upon restart.
-
-    The workload creates a number of timelines and writes some data to each,
-    but not enough to trigger flushes via the `checkpoint_distance` config.
-    """
-    tenant_conf = {
-        # Large `checkpoint_distance` effectively disables size
-        # based checkpointing.
-        "checkpoint_distance": f"{2 * 1024 ** 3}",
-        "checkpoint_timeout": f"{CHECKPOINT_TIMEOUT_SECONDS}s",
-        "compaction_period": "1s",
-    }
-
-    env = neon_env_builder.init_configs()
-    env.start()
-
-    last_flush_lsns = asyncio.run(workload(env, tenant_conf, TIMELINE_COUNT, ENTRIES_PER_TIMELINE))
-    wait_until_pageserver_is_caught_up(env, last_flush_lsns)
-
-    # We didn't write enough data to trigger a size-based checkpoint: we should see dirty data.
-    wait_until(10, 1, lambda: assert_dirty_bytes_nonzero(env))  # type: ignore
-
-    ps_http_client = env.pageserver.http_client()
-    total_wal_ingested_before_restart = wait_for_wal_ingest_metric(ps_http_client)
-
-    # Within ~ the checkpoint interval, all the ephemeral layers should be frozen and flushed,
-    # such that there are zero bytes of ephemeral layer left on the pageserver
-    log.info("Waiting for background checkpoints...")
-    wait_until(CHECKPOINT_TIMEOUT_SECONDS * 2, 1, lambda: assert_dirty_bytes(env, 0))  # type: ignore
-
-    # Zero ephemeral layer bytes does not imply that all the frozen layers were uploaded: they
-    # must be uploaded to remain visible to the pageserver after restart.
-    wait_until_pageserver_has_uploaded(env, last_flush_lsns)
-
-    env.pageserver.restart(immediate=immediate_shutdown)
-    wait_until_pageserver_is_caught_up(env, last_flush_lsns)
-
-    # Catching up with WAL ingest should have resulted in zero bytes of ephemeral layers, since
-    # we froze, flushed and uploaded everything before restarting.  There can be no more WAL writes
-    # because we shut down compute endpoints before flushing.
-    assert get_dirty_bytes(env) == 0
-
-    total_wal_ingested_after_restart = wait_for_wal_ingest_metric(ps_http_client)
-
-    log.info(f"WAL ingested before restart: {total_wal_ingested_before_restart}")
-    log.info(f"WAL ingested after restart: {total_wal_ingested_after_restart}")
-
-    assert total_wal_ingested_after_restart == 0
-
-
-def test_idle_checkpoints(neon_env_builder: NeonEnvBuilder):
-    """
-    Test that `checkpoint_timeout` is enforced even if there is no safekeeper input.
-    """
-    tenant_conf = {
-        # Large `checkpoint_distance` effectively disables size
-        # based checkpointing.
-        "checkpoint_distance": f"{2 * 1024 ** 3}",
-        "checkpoint_timeout": f"{CHECKPOINT_TIMEOUT_SECONDS}s",
-        "compaction_period": "1s",
-    }
-
-    env = neon_env_builder.init_configs()
-    env.start()
-
-    last_flush_lsns = asyncio.run(workload(env, tenant_conf, TIMELINE_COUNT, ENTRIES_PER_TIMELINE))
-    wait_until_pageserver_is_caught_up(env, last_flush_lsns)
-
-    # We didn't write enough data to trigger a size-based checkpoint: we should see dirty data.
-    wait_until(10, 1, lambda: assert_dirty_bytes_nonzero(env))  # type: ignore
-
-    # Stop the safekeepers, so that we cannot have any more WAL receiver connections
-    for sk in env.safekeepers:
-        sk.stop()
-
-    # We should have got here fast enough that we didn't hit the background interval yet,
-    # and the teardown of SK connections shouldn't prompt any layer freezing.
-    assert get_dirty_bytes(env) > 0
-
-    # Within ~ the checkpoint interval, all the ephemeral layers should be frozen and flushed,
-    # such that there are zero bytes of ephemeral layer left on the pageserver
-    log.info("Waiting for background checkpoints...")
-    wait_until(CHECKPOINT_TIMEOUT_SECONDS * 2, 1, lambda: assert_dirty_bytes(env, 0))  # type: ignore
-
-
-@pytest.mark.skipif(
-    # We have to use at least ~100MB of data to hit the lowest limit we can configure, which is
-    # prohibitively slow in debug mode
-    os.getenv("BUILD_TYPE") == "debug",
-    reason="Avoid running bulkier ingest tests in debug mode",
-)
-def test_total_size_limit(neon_env_builder: NeonEnvBuilder):
-    """
-    Test that checkpoints are done based on total ephemeral layer size, even if no one timeline is
-    individually exceeding checkpoint thresholds.
-    """
-
-    system_memory = psutil.virtual_memory().total
-
-    # The smallest total size limit we can configure is 1/1024th of the system memory (e.g. 128MB on
-    # a system with 128GB of RAM).  We will then write enough data to violate this limit.
-    max_dirty_data = 128 * 1024 * 1024
-    ephemeral_bytes_per_memory_kb = (max_dirty_data * 1024) // system_memory
-    assert ephemeral_bytes_per_memory_kb > 0
-
-    neon_env_builder.pageserver_config_override = f"""
-        ephemeral_bytes_per_memory_kb={ephemeral_bytes_per_memory_kb}
-        """
-
-    compaction_period_s = 10
-
-    tenant_conf = {
-        # Large space + time thresholds: effectively disable these limits
-        "checkpoint_distance": f"{1024 ** 4}",
-        "checkpoint_timeout": "3600s",
-        "compaction_period": f"{compaction_period_s}s",
-    }
-
-    env = neon_env_builder.init_configs()
-    env.start()
-
-    timeline_count = 10
-
-    # This is about 2MiB of data per timeline
-    entries_per_timeline = 100_000
-
-    last_flush_lsns = asyncio.run(workload(env, tenant_conf, timeline_count, entries_per_timeline))
-    wait_until_pageserver_is_caught_up(env, last_flush_lsns)
-
-    total_bytes_ingested = 0
-    for tenant, timeline, last_flush_lsn in last_flush_lsns:
-        http_client = env.pageserver.http_client()
-        initdb_lsn = Lsn(http_client.timeline_detail(tenant, timeline)["initdb_lsn"])
-        total_bytes_ingested += last_flush_lsn - initdb_lsn
-
-    log.info(f"Ingested {total_bytes_ingested} bytes since initdb (vs max dirty {max_dirty_data})")
-    assert total_bytes_ingested > max_dirty_data
-
-    # Expected end state: the total physical size of all the tenants is in excess of the max dirty
-    # data, but the total amount of dirty data is less than the limit: this demonstrates that we
-    # have exceeded the threshold but then rolled layers in response
-    def get_total_historic_layers():
-        total_ephemeral_layers = 0
-        total_historic_bytes = 0
-        for tenant, timeline, _last_flush_lsn in last_flush_lsns:
-            http_client = env.pageserver.http_client()
-            initdb_lsn = Lsn(http_client.timeline_detail(tenant, timeline)["initdb_lsn"])
-            layer_map = http_client.layer_map_info(tenant, timeline)
-            total_historic_bytes += sum(
-                layer.layer_file_size
-                for layer in layer_map.historic_layers
-                if layer.layer_file_size is not None and Lsn(layer.lsn_start) > initdb_lsn
-            )
-            total_ephemeral_layers += len(layer_map.in_memory_layers)
-
-        log.info(
-            f"Total historic layer bytes: {total_historic_bytes} ({total_ephemeral_layers} ephemeral layers)"
-        )
-
-        return total_historic_bytes
-
-    def assert_bytes_rolled():
-        assert total_bytes_ingested - get_total_historic_layers() <= max_dirty_data
-
-    # Wait until enough layers have rolled that the amount of dirty data is under the threshold.
-    # We do this indirectly via layer maps, rather than the dirty bytes metric, to avoid false-passing
-    # if that metric isn't updated quickly enough to reflect the dirty bytes exceeding the limit.
-    wait_until(compaction_period_s * 2, 1, assert_bytes_rolled)
-
-    # The end state should also have the reported metric under the limit
-    def assert_dirty_data_limited():
-        dirty_bytes = get_dirty_bytes(env)
-        assert dirty_bytes < max_dirty_data
-
-    wait_until(compaction_period_s * 2, 1, lambda: assert_dirty_data_limited())  # type: ignore
--- a/test_runner/regress/test_pageserver_metric_collection.py
+++ b/test_runner/regress/test_pageserver_metric_collection.py
@@ -1,6 +1,4 @@
-import gzip
 import json
-import os
 import time
 from dataclasses import dataclass
 from pathlib import Path
@@ -12,11 +10,7 @@ from fixtures.neon_fixtures import (
    NeonEnvBuilder,
    wait_for_last_flush_lsn,
 )
-from fixtures.remote_storage import (
-    LocalFsStorage,
-    RemoteStorageKind,
-    remote_storage_to_toml_inline_table,
-)
+from fixtures.remote_storage import RemoteStorageKind
 from fixtures.types import TenantId, TimelineId
 from pytest_httpserver import HTTPServer
 from werkzeug.wrappers.request import Request
@@ -46,9 +40,6 @@ def test_metric_collection(
        uploads.put((events, is_last == "true"))
        return Response(status=200)

-    neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
-    assert neon_env_builder.pageserver_remote_storage is not None
-
    # Require collecting metrics frequently, since we change
    # the timeline and want something to be logged about it.
    #
@@ -57,11 +48,12 @@ def test_metric_collection(
    neon_env_builder.pageserver_config_override = f"""
        metric_collection_interval="1s"
        metric_collection_endpoint="{metric_collection_endpoint}"
-        metric_collection_bucket={remote_storage_to_toml_inline_table(neon_env_builder.pageserver_remote_storage)}
        cached_metric_collection_interval="0s"
        synthetic_size_calculation_interval="3s"
        """

+    neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
+
    log.info(f"test_metric_collection endpoint is {metric_collection_endpoint}")

    # mock http server that returns OK for the metrics
@@ -78,7 +70,6 @@ def test_metric_collection(
            # we have a fast rate of calculation, these can happen at shutdown
            ".*synthetic_size_worker:calculate_synthetic_size.*:gather_size_inputs.*: failed to calculate logical size at .*: cancelled.*",
            ".*synthetic_size_worker: failed to calculate synthetic size for tenant .*: failed to calculate some logical_sizes",
-            ".*metrics_collection: failed to upload to S3: Failed to upload data of length .* to storage path.*",
        ]
    )

@@ -175,20 +166,6 @@ def test_metric_collection(

    httpserver.check()

-    # Check that at least one bucket output object is present, and that all
-    # can be decompressed and decoded.
-    bucket_dumps = {}
-    assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
-    for dirpath, _dirs, files in os.walk(env.pageserver_remote_storage.root):
-        for file in files:
-            file_path = os.path.join(dirpath, file)
-            log.info(file_path)
-            if file.endswith(".gz"):
-                bucket_dumps[file_path] = json.load(gzip.open(file_path))
-
-    assert len(bucket_dumps) >= 1
-    assert all("events" in data for data in bucket_dumps.values())
-

 def test_metric_collection_cleans_up_tempfile(
    httpserver: HTTPServer,
--- a/test_runner/regress/test_pageserver_secondary.py
+++ b/test_runner/regress/test_pageserver_secondary.py
@@ -11,7 +11,6 @@ from fixtures.pageserver.utils import (
    assert_prefix_empty,
    poll_for_remote_storage_iterations,
    tenant_delete_wait_completed,
-    wait_for_upload_queue_empty,
 )
 from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind, S3Storage
 from fixtures.types import TenantId, TimelineId
@@ -90,8 +89,6 @@ def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, seed: int):
                # this shutdown case is logged at WARN severity by the time it bubbles up to logical size calculation code
                # WARN ...: initial size calculation failed: downloading failed, possibly for shutdown
                ".*downloading failed, possibly for shutdown",
-                # {tenant_id=... timeline_id=...}:handle_pagerequests:handle_get_page_at_lsn_request{rel=1664/0/1260 blkno=0 req_lsn=0/149F0D8}: error reading relation or page version: Not found: will not become active.  Current state: Stopping\n'
-                ".*page_service.*will not become active.*",
            ]
        )

@@ -475,10 +472,6 @@ def test_secondary_downloads(neon_env_builder: NeonEnvBuilder):
    log.info("Synchronizing after initial write...")
    ps_attached.http_client().tenant_heatmap_upload(tenant_id)

-    # Ensure that everything which appears in the heatmap is also present in S3: heatmap writers
-    # are allowed to upload heatmaps that reference layers which are only enqueued for upload
-    wait_for_upload_queue_empty(ps_attached.http_client(), tenant_id, timeline_id)
-
    ps_secondary.http_client().tenant_secondary_download(tenant_id)

    assert list_layers(ps_attached, tenant_id, timeline_id) == list_layers(
@@ -491,11 +484,6 @@ def test_secondary_downloads(neon_env_builder: NeonEnvBuilder):
    workload.churn_rows(128, ps_attached.id)

    ps_attached.http_client().tenant_heatmap_upload(tenant_id)
-
-    # Ensure that everything which appears in the heatmap is also present in S3: heatmap writers
-    # are allowed to upload heatmaps that reference layers which are only enqueued for upload
-    wait_for_upload_queue_empty(ps_attached.http_client(), tenant_id, timeline_id)
-
    ps_secondary.http_client().tenant_secondary_download(tenant_id)

    assert list_layers(ps_attached, tenant_id, timeline_id) == list_layers(
--- a/Show More
+++ b/Show More