Add neon.max_vacuum_defer_cleanup_age to restrict bloat of master in case of using hot_standby_feedback

2026-05-16 12:40:36 +00:00 · 2024-07-05 14:21:25 +03:00
69 changed files with 1011 additions and 2810 deletions
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -115,7 +115,6 @@ runs:
        export POSTGRES_DISTRIB_DIR=${POSTGRES_DISTRIB_DIR:-/tmp/neon/pg_install}
        export DEFAULT_PG_VERSION=${PG_VERSION#v}
        export LD_LIBRARY_PATH=${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/lib
-        export BENCHMARK_CONNSTR=${BENCHMARK_CONNSTR:-}

        if [ "${BUILD_TYPE}" = "remote" ]; then
          export REMOTE_ENV=1
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -99,14 +99,7 @@ jobs:
        # Set --sparse-ordering option of pytest-order plugin
        # to ensure tests are running in order of appears in the file.
        # It's important for test_perf_pgbench.py::test_pgbench_remote_* tests
-        extra_params:
-          -m remote_cluster
-          --sparse-ordering
-          --timeout 5400
-          --ignore test_runner/performance/test_perf_olap.py
-          --ignore test_runner/performance/test_perf_pgvector_queries.py
-          --ignore test_runner/performance/test_logical_replication.py
-          --ignore test_runner/performance/test_physical_replication.py
+        extra_params: -m remote_cluster --sparse-ordering --timeout 5400 --ignore test_runner/performance/test_perf_olap.py --ignore test_runner/performance/test_perf_pgvector_queries.py
      env:
        BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -132,69 +125,6 @@ jobs:
      env:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

-  replication-tests:
-    env:
-      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
-      DEFAULT_PG_VERSION: 14
-      TEST_OUTPUT: /tmp/test_output
-      BUILD_TYPE: remote
-      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
-      PLATFORM: "neon-staging"
-
-    runs-on: [ self-hosted, us-east-2, x64 ]
-    container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
-      options: --init
-
-    steps:
-    - uses: actions/checkout@v4
-
-    - name: Download Neon artifact
-      uses: ./.github/actions/download
-      with:
-        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
-        path: /tmp/neon/
-        prefix: latest
-
-    - name: Run benchmark
-      uses: ./.github/actions/run-python-test-set
-      with:
-        build_type: ${{ env.BUILD_TYPE }}
-        test_selection: performance/test_logical_replication.py
-        run_in_parallel: false
-        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
-        extra_params: -m remote_cluster --timeout 5400
-      env:
-        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
-        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
-        NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
-
-    - name: Run benchmark
-      uses: ./.github/actions/run-python-test-set
-      with:
-        build_type: ${{ env.BUILD_TYPE }}
-        test_selection: performance/test_physical_replication.py
-        run_in_parallel: false
-        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
-        extra_params: -m remote_cluster --timeout 5400
-      env:
-        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
-        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
-        NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
-
-    - name: Create Allure report
-      if: ${{ !cancelled() }}
-      uses: ./.github/actions/allure-report-generate
-
-    - name: Post to a Slack channel
-      if: ${{ github.event.schedule && failure() }}
-      uses: slackapi/slack-github-action@v1
-      with:
-        channel-id: "C033QLM5P7D" # dev-staging-stream
-        slack-message: "Periodic replication testing: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
-      env:
-        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
-
  generate-matrices:
    if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
    # Create matrices for the benchmarking jobs, so we run benchmarks on rds only once a week (on Saturday)
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -1336,7 +1336,6 @@ jobs:
        env:
          BUCKET: neon-github-public-dev
          PREFIX: artifacts/latest
-          COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
        run: |
          # Update compatibility snapshot for the release
          for pg_version in v14 v15 v16; do
@@ -1350,7 +1349,7 @@ jobs:

          # Update Neon artifact for the release (reuse already uploaded artifact)
          for build_type in debug release; do
-            OLD_PREFIX=artifacts/${COMMIT_SHA}/${GITHUB_RUN_ID}
+            OLD_PREFIX=artifacts/${GITHUB_RUN_ID}
            FILENAME=neon-${{ runner.os }}-${{ runner.arch }}-${build_type}-artifact.tar.zst

            S3_KEY=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${OLD_PREFIX} | jq -r '.Contents[]?.Key' | grep ${FILENAME} | sort --version-sort | tail -1 || true)
--- a/.github/workflows/periodic_pagebench.yml
+++ b/.github/workflows/periodic_pagebench.yml
@@ -43,10 +43,6 @@ jobs:
      AWS_DEFAULT_REGION : "eu-central-1"
      AWS_INSTANCE_ID : "i-02a59a3bf86bc7e74"
    steps:
-    # we don't need the neon source code because we run everything remotely
-    # however we still need the local github actions to run the allure step below
-    - uses: actions/checkout@v4
-
    - name: Show my own (github runner) external IP address - usefull for IP allowlisting
      run: curl https://ifconfig.me

@@ -94,12 +90,10 @@ jobs:
          set +x
          status=$(echo $response | jq -r '.status')
          echo "Test status: $status"
-          if [[ "$status" == "failure" ]]; then
-            echo "Test failed"
-            exit 1 # Fail the job step if status is failure
-          elif [[ "$status" == "success" || "$status" == "null" ]]; then
+          if [[ "$status" == "failure" || "$status" == "success" || "$status" == "null" ]]; then
            break
-          elif [[ "$status" == "too_many_runs" ]]; then
+          fi
+          if [[ "$status" == "too_many_runs" ]]; then
            echo "Too many runs already running"
            echo "too_many_runs=true" >> "$GITHUB_OUTPUT"
            exit 1
@@ -109,7 +103,6 @@ jobs:
        done

    - name: Retrieve Test Logs
-      if: always() && steps.poll_step.outputs.too_many_runs != 'true'
      run: |
        curl -k -X 'GET' \
        "${EC2_MACHINE_URL_US}/test_log/${GITHUB_RUN_ID}" \
@@ -118,15 +111,11 @@ jobs:
        --output "test_log_${GITHUB_RUN_ID}.gz"
    
    - name: Unzip Test Log and Print it into this job's log
-      if: always() && steps.poll_step.outputs.too_many_runs != 'true'
      run: |
        gzip -d "test_log_${GITHUB_RUN_ID}.gz"
        cat "test_log_${GITHUB_RUN_ID}"

    - name: Create Allure report
-      env:
-        AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report-generate

--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1397,9 +1397,9 @@ dependencies = [

 [[package]]
 name = "crc32c"
-version = "0.6.8"
+version = "0.6.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a47af21622d091a8f0fb295b88bc886ac74efcc613efc19f5d0b21de5c89e47"
+checksum = "89254598aa9b9fa608de44b3ae54c810f0f06d755e24c50177f1f8f31ff50ce2"
 dependencies = [
 "rustc_version",
 ]
@@ -1651,16 +1651,6 @@ dependencies = [
 "rusticata-macros",
 ]

-[[package]]
-name = "deranged"
-version = "0.3.11"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4"
-dependencies = [
- "powerfmt",
- "serde",
-]
-
 [[package]]
 name = "desim"
 version = "0.1.0"
@@ -3018,9 +3008,9 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"

 [[package]]
 name = "measured"
-version = "0.0.22"
+version = "0.0.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3051f3a030d55d680cdef6ca50e80abd1182f8da29f2344a7c9cb575721138f0"
+checksum = "652bc741286361c06de8cb4d89b21a6437f120c508c51713663589eeb9928ac5"
 dependencies = [
 "bytes",
 "crossbeam-utils",
@@ -3036,9 +3026,9 @@ dependencies = [

 [[package]]
 name = "measured-derive"
-version = "0.0.22"
+version = "0.0.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b9e6777fc80a575f9503d908c8b498782a6c3ee88a06cb416dc3941401e43b94"
+checksum = "6ea497f33e1e856a376c32ad916f69a0bd3c597db1f912a399f842b01a4a685d"
 dependencies = [
 "heck 0.5.0",
 "proc-macro2",
@@ -3048,9 +3038,9 @@ dependencies = [

 [[package]]
 name = "measured-process"
-version = "0.0.22"
+version = "0.0.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7c4b80445aeb08e832d87bf1830049a924cdc1d6b7ef40b6b9b365bff17bf8ec"
+checksum = "b364ccb66937a814b6b2ad751d1a2f7a9d5a78c761144036825fb36bb0771000"
 dependencies = [
 "libc",
 "measured",
@@ -3285,12 +3275,6 @@ dependencies = [
 "num-traits",
 ]

-[[package]]
-name = "num-conv"
-version = "0.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
-
 [[package]]
 name = "num-integer"
 version = "0.1.45"
@@ -3683,7 +3667,6 @@ dependencies = [
 "sysinfo",
 "tenant_size_model",
 "thiserror",
- "tikv-jemallocator",
 "tokio",
 "tokio-epoll-uring",
 "tokio-io-timeout",
@@ -4094,7 +4077,6 @@ dependencies = [
 "tokio-postgres",
 "tokio-postgres-rustls",
 "tokio-rustls 0.25.0",
- "tokio-util",
 "tracing",
 "workspace_hack",
 ]
@@ -4135,12 +4117,6 @@ dependencies = [
 "workspace_hack",
 ]

-[[package]]
-name = "powerfmt"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
-
 [[package]]
 name = "ppv-lite86"
 version = "0.2.17"
@@ -5420,9 +5396,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"

 [[package]]
 name = "serde"
-version = "1.0.203"
+version = "1.0.183"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094"
+checksum = "32ac8da02677876d532745a130fc9d8e6edfa81a269b107c5b00829b91d8eb3c"
 dependencies = [
 "serde_derive",
 ]
@@ -5439,9 +5415,9 @@ dependencies = [

 [[package]]
 name = "serde_derive"
-version = "1.0.203"
+version = "1.0.183"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba"
+checksum = "aafe972d60b0b9bee71a91b92fee2d4fb3c9d7e8f6b179aa99f27203d99a4816"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -6131,15 +6107,12 @@ dependencies = [

 [[package]]
 name = "time"
-version = "0.3.36"
+version = "0.3.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885"
+checksum = "8f3403384eaacbca9923fa06940178ac13e4edb725486d70e8e15881d0c836cc"
 dependencies = [
- "deranged",
 "itoa",
 "js-sys",
- "num-conv",
- "powerfmt",
 "serde",
 "time-core",
 "time-macros",
@@ -6147,17 +6120,16 @@ dependencies = [

 [[package]]
 name = "time-core"
-version = "0.1.2"
+version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
+checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb"

 [[package]]
 name = "time-macros"
-version = "0.2.18"
+version = "0.2.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf"
+checksum = "372950940a5f07bf38dbe211d7283c9e6d7327df53794992d293e534c733d09b"
 dependencies = [
- "num-conv",
 "time-core",
 ]

@@ -7455,12 +7427,13 @@ dependencies = [
 "clap",
 "clap_builder",
 "crossbeam-utils",
- "deranged",
 "either",
 "fail",
 "futures-channel",
+ "futures-core",
 "futures-executor",
 "futures-io",
+ "futures-sink",
 "futures-util",
 "getrandom 0.2.11",
 "hashbrown 0.14.5",
@@ -7478,9 +7451,7 @@ dependencies = [
 "num-traits",
 "once_cell",
 "parquet",
- "proc-macro2",
 "prost",
- "quote",
 "rand 0.8.5",
 "regex",
 "regex-automata 0.4.3",
@@ -7497,7 +7468,6 @@ dependencies = [
 "syn 1.0.109",
 "syn 2.0.52",
 "sync_wrapper",
- "tikv-jemalloc-sys",
 "time",
 "time-macros",
 "tokio",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -111,8 +111,8 @@ lasso = "0.7"
 leaky-bucket = "1.0.1"
 libc = "0.2"
 md5 = "0.7.0"
-measured = { version = "0.0.22", features=["lasso"] }
-measured-process = { version = "0.0.22" }
+measured = { version = "0.0.21", features=["lasso"] }
+measured-process = { version = "0.0.21" }
 memoffset = "0.8"
 nix = { version = "0.27", features = ["fs", "process", "socket", "signal", "poll"] }
 notify = "6.0.0"
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -798,11 +798,7 @@ impl ComputeNode {
        // In this case we need to connect with old `zenith_admin` name
        // and create new user. We cannot simply rename connected user,
        // but we can create a new one and grant it all privileges.
-        let mut connstr = self.connstr.clone();
-        connstr
-            .query_pairs_mut()
-            .append_pair("application_name", "apply_config");
-
+        let connstr = self.connstr.clone();
        let mut client = match Client::connect(connstr.as_str(), NoTls) {
            Err(e) => match e.code() {
                Some(&SqlState::INVALID_PASSWORD)
@@ -871,11 +867,6 @@ impl ComputeNode {

        // Run migrations separately to not hold up cold starts
        thread::spawn(move || {
-            let mut connstr = connstr.clone();
-            connstr
-                .query_pairs_mut()
-                .append_pair("application_name", "migrations");
-
            let mut client = Client::connect(connstr.as_str(), NoTls)?;
            handle_migrations(&mut client).context("apply_config handle_migrations")
        });
@@ -1395,9 +1386,7 @@ pub fn forward_termination_signal() {
    let pg_pid = PG_PID.load(Ordering::SeqCst);
    if pg_pid != 0 {
        let pg_pid = nix::unistd::Pid::from_raw(pg_pid as i32);
-        // Use 'fast' shutdown (SIGINT) because it also creates a shutdown checkpoint, which is important for
-        // ROs to get a list of running xacts faster instead of going through the CLOG.
-        // See https://www.postgresql.org/docs/current/server-shutdown.html for the list of modes and signals.
-        kill(pg_pid, Signal::SIGINT).ok();
+        // use 'immediate' shutdown (SIGQUIT): https://www.postgresql.org/docs/current/server-shutdown.html
+        kill(pg_pid, Signal::SIGQUIT).ok();
    }
 }
--- a/compute_tools/src/lib.rs
+++ b/compute_tools/src/lib.rs
@@ -11,7 +11,6 @@ pub mod logger;
 pub mod catalog;
 pub mod compute;
 pub mod extension_server;
-mod migration;
 pub mod monitor;
 pub mod params;
 pub mod pg_helpers;
--- a/compute_tools/src/migration.rs
+++ b/compute_tools/src/migration.rs
@@ -1,100 +0,0 @@
-use anyhow::{Context, Result};
-use postgres::Client;
-use tracing::info;
-
-pub(crate) struct MigrationRunner<'m> {
-    client: &'m mut Client,
-    migrations: &'m [&'m str],
-}
-
-impl<'m> MigrationRunner<'m> {
-    pub fn new(client: &'m mut Client, migrations: &'m [&'m str]) -> Self {
-        Self { client, migrations }
-    }
-
-    fn get_migration_id(&mut self) -> Result<i64> {
-        let query = "SELECT id FROM neon_migration.migration_id";
-        let row = self
-            .client
-            .query_one(query, &[])
-            .context("run_migrations get migration_id")?;
-
-        Ok(row.get::<&str, i64>("id"))
-    }
-
-    fn update_migration_id(&mut self) -> Result<()> {
-        let setval = format!(
-            "UPDATE neon_migration.migration_id SET id={}",
-            self.migrations.len()
-        );
-
-        self.client
-            .simple_query(&setval)
-            .context("run_migrations update id")?;
-
-        Ok(())
-    }
-
-    fn prepare_migrations(&mut self) -> Result<()> {
-        let query = "CREATE SCHEMA IF NOT EXISTS neon_migration";
-        self.client.simple_query(query)?;
-
-        let query = "CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)";
-        self.client.simple_query(query)?;
-
-        let query = "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING";
-        self.client.simple_query(query)?;
-
-        let query = "ALTER SCHEMA neon_migration OWNER TO cloud_admin";
-        self.client.simple_query(query)?;
-
-        let query = "REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC";
-        self.client.simple_query(query)?;
-
-        Ok(())
-    }
-
-    pub fn run_migrations(mut self) -> Result<()> {
-        self.prepare_migrations()?;
-
-        let mut current_migration: usize = self.get_migration_id()? as usize;
-        let starting_migration_id = current_migration;
-
-        let query = "BEGIN";
-        self.client
-            .simple_query(query)
-            .context("run_migrations begin")?;
-
-        while current_migration < self.migrations.len() {
-            let migration = self.migrations[current_migration];
-
-            if migration.starts_with("-- SKIP") {
-                info!("Skipping migration id={}", current_migration);
-            } else {
-                info!(
-                    "Running migration id={}:\n{}\n",
-                    current_migration, migration
-                );
-                self.client.simple_query(migration).with_context(|| {
-                    format!("run_migration current_migration={}", current_migration)
-                })?;
-            }
-
-            current_migration += 1;
-        }
-
-        self.update_migration_id()?;
-
-        let query = "COMMIT";
-        self.client
-            .simple_query(query)
-            .context("run_migrations commit")?;
-
-        info!(
-            "Ran {} migrations",
-            (self.migrations.len() - starting_migration_id)
-        );
-
-        Ok(())
-    }
-}
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -10,7 +10,6 @@ use tracing::{error, info, info_span, instrument, span_enabled, warn, Level};

 use crate::config;
 use crate::logger::inlinify;
-use crate::migration::MigrationRunner;
 use crate::params::PG_HBA_ALL_MD5;
 use crate::pg_helpers::*;

@@ -792,7 +791,69 @@ pub fn handle_migrations(client: &mut Client) -> Result<()> {
        include_str!("./migrations/0008-revoke_replication_for_previously_allowed_roles.sql"),
    ];

-    MigrationRunner::new(client, &migrations).run_migrations()?;
+    let mut func = || {
+        let query = "CREATE SCHEMA IF NOT EXISTS neon_migration";
+        client.simple_query(query)?;
+
+        let query = "CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)";
+        client.simple_query(query)?;
+
+        let query = "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING";
+        client.simple_query(query)?;
+
+        let query = "ALTER SCHEMA neon_migration OWNER TO cloud_admin";
+        client.simple_query(query)?;
+
+        let query = "REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC";
+        client.simple_query(query)?;
+        Ok::<_, anyhow::Error>(())
+    };
+    func().context("handle_migrations prepare")?;
+
+    let query = "SELECT id FROM neon_migration.migration_id";
+    let row = client
+        .query_one(query, &[])
+        .context("handle_migrations get migration_id")?;
+    let mut current_migration: usize = row.get::<&str, i64>("id") as usize;
+    let starting_migration_id = current_migration;
+
+    let query = "BEGIN";
+    client
+        .simple_query(query)
+        .context("handle_migrations begin")?;
+
+    while current_migration < migrations.len() {
+        let migration = &migrations[current_migration];
+        if migration.starts_with("-- SKIP") {
+            info!("Skipping migration id={}", current_migration);
+        } else {
+            info!(
+                "Running migration id={}:\n{}\n",
+                current_migration, migration
+            );
+            client.simple_query(migration).with_context(|| {
+                format!("handle_migrations current_migration={}", current_migration)
+            })?;
+        }
+        current_migration += 1;
+    }
+    let setval = format!(
+        "UPDATE neon_migration.migration_id SET id={}",
+        migrations.len()
+    );
+    client
+        .simple_query(&setval)
+        .context("handle_migrations update id")?;
+
+    let query = "COMMIT";
+    client
+        .simple_query(query)
+        .context("handle_migrations commit")?;
+
+    info!(
+        "Ran {} migrations",
+        (migrations.len() - starting_migration_id)
+    );

    Ok(())
 }
--- a/libs/metrics/src/hll.rs
+++ b/libs/metrics/src/hll.rs
@@ -13,7 +13,11 @@ use std::{

 use measured::{
    label::{LabelGroupVisitor, LabelName, LabelValue, LabelVisitor},
-    metric::{counter::CounterState, name::MetricNameEncoder, Metric, MetricType, MetricVec},
+    metric::{
+        group::{Encoding, MetricValue},
+        name::MetricNameEncoder,
+        Metric, MetricType, MetricVec,
+    },
    text::TextEncoder,
    LabelGroup,
 };
@@ -140,7 +144,6 @@ impl<const N: usize> HyperLogLogState<N> {
        })
    }
 }
-
 impl<W: std::io::Write, const N: usize> measured::metric::MetricEncoding<TextEncoder<W>>
    for HyperLogLogState<N>
 {
@@ -179,13 +182,12 @@ impl<W: std::io::Write, const N: usize> measured::metric::MetricEncoding<TextEnc
            .into_iter()
            .enumerate()
            .try_for_each(|(hll_shard, val)| {
-                CounterState::new(val as u64).collect_into(
-                    &(),
+                enc.write_metric_value(
+                    name.by_ref(),
                    labels.by_ref().compose_with(HllShardLabel {
                        hll_shard: hll_shard as i64,
                    }),
-                    name.by_ref(),
-                    enc,
+                    MetricValue::Int(val as i64),
                )
            })
    }
--- a/libs/metrics/src/lib.rs
+++ b/libs/metrics/src/lib.rs
@@ -9,7 +9,7 @@ use measured::{
    metric::{
        counter::CounterState,
        gauge::GaugeState,
-        group::Encoding,
+        group::{Encoding, MetricValue},
        name::{MetricName, MetricNameEncoder},
        MetricEncoding, MetricFamilyEncoding,
    },
@@ -171,11 +171,8 @@ fn write_gauge<Enc: Encoding>(
    labels: impl LabelGroup,
    name: impl MetricNameEncoder,
    enc: &mut Enc,
-) -> Result<(), Enc::Err>
-where
-    GaugeState: MetricEncoding<Enc>,
-{
-    GaugeState::new(x).collect_into(&(), labels, name, enc)
+) -> Result<(), Enc::Err> {
+    enc.write_metric_value(name, labels, MetricValue::Int(x))
 }

 #[derive(Default)]
@@ -547,6 +544,15 @@ impl<T: Encoding> Encoding for Inc<T> {
    fn write_help(&mut self, name: impl MetricNameEncoder, help: &str) -> Result<(), Self::Err> {
        self.0.write_help(name, help)
    }
+
+    fn write_metric_value(
+        &mut self,
+        name: impl MetricNameEncoder,
+        labels: impl LabelGroup,
+        value: MetricValue,
+    ) -> Result<(), Self::Err> {
+        self.0.write_metric_value(name, labels, value)
+    }
 }

 impl<T: Encoding> MetricEncoding<Inc<T>> for MeasuredCounterPairState
@@ -573,6 +579,15 @@ impl<T: Encoding> Encoding for Dec<T> {
    fn write_help(&mut self, name: impl MetricNameEncoder, help: &str) -> Result<(), Self::Err> {
        self.0.write_help(name, help)
    }
+
+    fn write_metric_value(
+        &mut self,
+        name: impl MetricNameEncoder,
+        labels: impl LabelGroup,
+        value: MetricValue,
+    ) -> Result<(), Self::Err> {
+        self.0.write_metric_value(name, labels, value)
+    }
 }

 /// Write the dec counter to the encoder
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -9,7 +9,6 @@ use std::{
    collections::HashMap,
    io::{BufRead, Read},
    num::{NonZeroU64, NonZeroUsize},
-    str::FromStr,
    sync::atomic::AtomicUsize,
    time::{Duration, SystemTime},
 };
@@ -438,7 +437,18 @@ pub enum CompactionAlgorithm {
    Tiered,
 }

-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[derive(
+    Debug,
+    Clone,
+    Copy,
+    PartialEq,
+    Eq,
+    Serialize,
+    Deserialize,
+    strum_macros::FromRepr,
+    strum_macros::EnumString,
+)]
+#[strum(serialize_all = "kebab-case")]
 pub enum ImageCompressionAlgorithm {
    /// Disabled for writes, and never decompress during reading.
    /// Never set this after you've enabled compression once!
@@ -458,31 +468,6 @@ impl ImageCompressionAlgorithm {
    }
 }

-impl FromStr for ImageCompressionAlgorithm {
-    type Err = anyhow::Error;
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        let mut components = s.split(['(', ')']);
-        let first = components
-            .next()
-            .ok_or_else(|| anyhow::anyhow!("empty string"))?;
-        match first {
-            "disabled-no-decompress" => Ok(ImageCompressionAlgorithm::DisabledNoDecompress),
-            "disabled" => Ok(ImageCompressionAlgorithm::Disabled),
-            "zstd" => {
-                let level = if let Some(v) = components.next() {
-                    let v: i8 = v.parse()?;
-                    Some(v)
-                } else {
-                    None
-                };
-
-                Ok(ImageCompressionAlgorithm::Zstd { level })
-            }
-            _ => anyhow::bail!("invalid specifier '{first}'"),
-        }
-    }
-}
-
 #[derive(Eq, PartialEq, Debug, Clone, Serialize, Deserialize)]
 pub struct CompactionAlgorithmSettings {
    pub kind: CompactionAlgorithm,
@@ -1675,29 +1660,4 @@ mod tests {
            AuxFilePolicy::CrossValidation
        );
    }
-
-    #[test]
-    fn test_image_compression_algorithm_parsing() {
-        use ImageCompressionAlgorithm::*;
-        assert_eq!(
-            ImageCompressionAlgorithm::from_str("disabled").unwrap(),
-            Disabled
-        );
-        assert_eq!(
-            ImageCompressionAlgorithm::from_str("disabled-no-decompress").unwrap(),
-            DisabledNoDecompress
-        );
-        assert_eq!(
-            ImageCompressionAlgorithm::from_str("zstd").unwrap(),
-            Zstd { level: None }
-        );
-        assert_eq!(
-            ImageCompressionAlgorithm::from_str("zstd(18)").unwrap(),
-            Zstd { level: Some(18) }
-        );
-        assert_eq!(
-            ImageCompressionAlgorithm::from_str("zstd(-3)").unwrap(),
-            Zstd { level: Some(-3) }
-        );
-    }
 }
--- a/libs/pageserver_api/src/shard.rs
+++ b/libs/pageserver_api/src/shard.rs
@@ -1,42 +1,59 @@
-//! See docs/rfcs/031-sharding-static.md for an overview of sharding.
-//!
-//! This module contains a variety of types used to represent the concept of sharding
-//! a Neon tenant across multiple physical shards.  Since there are quite a few of these,
-//! we provide an summary here.
-//!
-//! Types used to describe shards:
-//! - [`ShardCount`] describes how many shards make up a tenant, plus the magic `unsharded` value
-//!   which identifies a tenant which is not shard-aware.  This means its storage paths do not include
-//!   a shard suffix.
-//! - [`ShardNumber`] is simply the zero-based index of a shard within a tenant.
-//! - [`ShardIndex`] is the 2-tuple of `ShardCount` and `ShardNumber`, it's just like a `TenantShardId`
-//!   without the tenant ID.  This is useful for things that are implicitly scoped to a particular
-//!   tenant, such as layer files.
-//! - [`ShardIdentity`]` is the full description of a particular shard's parameters, in sufficient
-//!   detail to convert a [`Key`] to a [`ShardNumber`] when deciding where to write/read.
-//! - The [`ShardSlug`] is a terse formatter for ShardCount and ShardNumber, written as
-//!   four hex digits.  An unsharded tenant is `0000`.
-//! - [`TenantShardId`] is the unique ID of a particular shard within a particular tenant
-//!
-//! Types used to describe the parameters for data distribution in a sharded tenant:
-//! - [`ShardStripeSize`] controls how long contiguous runs of [`Key`]s (stripes) are when distributed across
-//!   multiple shards.  Its value is given in 8kiB pages.
-//! - [`ShardLayout`] describes the data distribution scheme, and at time of writing is
-//!   always zero: this is provided for future upgrades that might introduce different
-//!   data distribution schemes.
-//!
-//! Examples:
-//! - A legacy unsharded tenant has one shard with ShardCount(0), ShardNumber(0), and its slug is 0000
-//! - A single sharded tenant has one shard with ShardCount(1), ShardNumber(0), and its slug is 0001
-//! - In a tenant with 4 shards, each shard has ShardCount(N), ShardNumber(i) where i in 0..N-1 (inclusive),
-//!   and their slugs are 0004, 0104, 0204, and 0304.
+use std::{ops::RangeInclusive, str::FromStr};

 use crate::{key::Key, models::ShardParameters};
+use hex::FromHex;
 use postgres_ffi::relfile_utils::INIT_FORKNUM;
 use serde::{Deserialize, Serialize};
+use utils::id::TenantId;

-#[doc(inline)]
-pub use ::utils::shard::*;
+/// See docs/rfcs/031-sharding-static.md for an overview of sharding.
+///
+/// This module contains a variety of types used to represent the concept of sharding
+/// a Neon tenant across multiple physical shards.  Since there are quite a few of these,
+/// we provide an summary here.
+///
+/// Types used to describe shards:
+/// - [`ShardCount`] describes how many shards make up a tenant, plus the magic `unsharded` value
+///   which identifies a tenant which is not shard-aware.  This means its storage paths do not include
+///   a shard suffix.
+/// - [`ShardNumber`] is simply the zero-based index of a shard within a tenant.
+/// - [`ShardIndex`] is the 2-tuple of `ShardCount` and `ShardNumber`, it's just like a `TenantShardId`
+///   without the tenant ID.  This is useful for things that are implicitly scoped to a particular
+///   tenant, such as layer files.
+/// - [`ShardIdentity`]` is the full description of a particular shard's parameters, in sufficient
+///   detail to convert a [`Key`] to a [`ShardNumber`] when deciding where to write/read.
+/// - The [`ShardSlug`] is a terse formatter for ShardCount and ShardNumber, written as
+///   four hex digits.  An unsharded tenant is `0000`.
+/// - [`TenantShardId`] is the unique ID of a particular shard within a particular tenant
+///
+/// Types used to describe the parameters for data distribution in a sharded tenant:
+/// - [`ShardStripeSize`] controls how long contiguous runs of [`Key`]s (stripes) are when distributed across
+///   multiple shards.  Its value is given in 8kiB pages.
+/// - [`ShardLayout`] describes the data distribution scheme, and at time of writing is
+///   always zero: this is provided for future upgrades that might introduce different
+///   data distribution schemes.
+///
+/// Examples:
+/// - A legacy unsharded tenant has one shard with ShardCount(0), ShardNumber(0), and its slug is 0000
+/// - A single sharded tenant has one shard with ShardCount(1), ShardNumber(0), and its slug is 0001
+/// - In a tenant with 4 shards, each shard has ShardCount(N), ShardNumber(i) where i in 0..N-1 (inclusive),
+///   and their slugs are 0004, 0104, 0204, and 0304.
+
+#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]
+pub struct ShardNumber(pub u8);
+
+#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]
+pub struct ShardCount(u8);
+
+/// Combination of ShardNumber and ShardCount.  For use within the context of a particular tenant,
+/// when we need to know which shard we're dealing with, but do not need to know the full
+/// ShardIdentity (because we won't be doing any page->shard mapping), and do not need to know
+/// the fully qualified TenantShardId.
+#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]
+pub struct ShardIndex {
+    pub shard_number: ShardNumber,
+    pub shard_count: ShardCount,
+}

 /// The ShardIdentity contains enough information to map a [`Key`] to a [`ShardNumber`],
 /// and to check whether that [`ShardNumber`] is the same as the current shard.
@@ -48,6 +65,362 @@ pub struct ShardIdentity {
    layout: ShardLayout,
 }

+/// Formatting helper, for generating the `shard_id` label in traces.
+struct ShardSlug<'a>(&'a TenantShardId);
+
+/// TenantShardId globally identifies a particular shard in a particular tenant.
+///
+/// These are written as `<TenantId>-<ShardSlug>`, for example:
+///   # The second shard in a two-shard tenant
+///   072f1291a5310026820b2fe4b2968934-0102
+///
+/// If the `ShardCount` is _unsharded_, the `TenantShardId` is written without
+/// a shard suffix and is equivalent to the encoding of a `TenantId`: this enables
+/// an unsharded [`TenantShardId`] to be used interchangably with a [`TenantId`].
+///
+/// The human-readable encoding of an unsharded TenantShardId, such as used in API URLs,
+/// is both forward and backward compatible with TenantId: a legacy TenantId can be
+/// decoded as a TenantShardId, and when re-encoded it will be parseable
+/// as a TenantId.
+#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]
+pub struct TenantShardId {
+    pub tenant_id: TenantId,
+    pub shard_number: ShardNumber,
+    pub shard_count: ShardCount,
+}
+
+impl ShardCount {
+    pub const MAX: Self = Self(u8::MAX);
+
+    /// The internal value of a ShardCount may be zero, which means "1 shard, but use
+    /// legacy format for TenantShardId that excludes the shard suffix", also known
+    /// as [`TenantShardId::unsharded`].
+    ///
+    /// This method returns the actual number of shards, i.e. if our internal value is
+    /// zero, we return 1 (unsharded tenants have 1 shard).
+    pub fn count(&self) -> u8 {
+        if self.0 > 0 {
+            self.0
+        } else {
+            1
+        }
+    }
+
+    /// The literal internal value: this is **not** the number of shards in the
+    /// tenant, as we have a special zero value for legacy unsharded tenants.  Use
+    /// [`Self::count`] if you want to know the cardinality of shards.
+    pub fn literal(&self) -> u8 {
+        self.0
+    }
+
+    /// Whether the `ShardCount` is for an unsharded tenant, so uses one shard but
+    /// uses the legacy format for `TenantShardId`. See also the documentation for
+    /// [`Self::count`].
+    pub fn is_unsharded(&self) -> bool {
+        self.0 == 0
+    }
+
+    /// `v` may be zero, or the number of shards in the tenant.  `v` is what
+    /// [`Self::literal`] would return.
+    pub const fn new(val: u8) -> Self {
+        Self(val)
+    }
+}
+
+impl ShardNumber {
+    pub const MAX: Self = Self(u8::MAX);
+}
+
+impl TenantShardId {
+    pub fn unsharded(tenant_id: TenantId) -> Self {
+        Self {
+            tenant_id,
+            shard_number: ShardNumber(0),
+            shard_count: ShardCount(0),
+        }
+    }
+
+    /// The range of all TenantShardId that belong to a particular TenantId.  This is useful when
+    /// you have a BTreeMap of TenantShardId, and are querying by TenantId.
+    pub fn tenant_range(tenant_id: TenantId) -> RangeInclusive<Self> {
+        RangeInclusive::new(
+            Self {
+                tenant_id,
+                shard_number: ShardNumber(0),
+                shard_count: ShardCount(0),
+            },
+            Self {
+                tenant_id,
+                shard_number: ShardNumber::MAX,
+                shard_count: ShardCount::MAX,
+            },
+        )
+    }
+
+    pub fn shard_slug(&self) -> impl std::fmt::Display + '_ {
+        ShardSlug(self)
+    }
+
+    /// Convenience for code that has special behavior on the 0th shard.
+    pub fn is_shard_zero(&self) -> bool {
+        self.shard_number == ShardNumber(0)
+    }
+
+    /// The "unsharded" value is distinct from simply having a single shard: it represents
+    /// a tenant which is not shard-aware at all, and whose storage paths will not include
+    /// a shard suffix.
+    pub fn is_unsharded(&self) -> bool {
+        self.shard_number == ShardNumber(0) && self.shard_count.is_unsharded()
+    }
+
+    /// Convenience for dropping the tenant_id and just getting the ShardIndex: this
+    /// is useful when logging from code that is already in a span that includes tenant ID, to
+    /// keep messages reasonably terse.
+    pub fn to_index(&self) -> ShardIndex {
+        ShardIndex {
+            shard_number: self.shard_number,
+            shard_count: self.shard_count,
+        }
+    }
+
+    /// Calculate the children of this TenantShardId when splitting the overall tenant into
+    /// the given number of shards.
+    pub fn split(&self, new_shard_count: ShardCount) -> Vec<TenantShardId> {
+        let effective_old_shard_count = std::cmp::max(self.shard_count.0, 1);
+        let mut child_shards = Vec::new();
+        for shard_number in 0..ShardNumber(new_shard_count.0).0 {
+            // Key mapping is based on a round robin mapping of key hash modulo shard count,
+            // so our child shards are the ones which the same keys would map to.
+            if shard_number % effective_old_shard_count == self.shard_number.0 {
+                child_shards.push(TenantShardId {
+                    tenant_id: self.tenant_id,
+                    shard_number: ShardNumber(shard_number),
+                    shard_count: new_shard_count,
+                })
+            }
+        }
+
+        child_shards
+    }
+}
+
+impl<'a> std::fmt::Display for ShardSlug<'a> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "{:02x}{:02x}",
+            self.0.shard_number.0, self.0.shard_count.0
+        )
+    }
+}
+
+impl std::fmt::Display for TenantShardId {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        if self.shard_count != ShardCount(0) {
+            write!(f, "{}-{}", self.tenant_id, self.shard_slug())
+        } else {
+            // Legacy case (shard_count == 0) -- format as just the tenant id.  Note that this
+            // is distinct from the normal single shard case (shard count == 1).
+            self.tenant_id.fmt(f)
+        }
+    }
+}
+
+impl std::fmt::Debug for TenantShardId {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        // Debug is the same as Display: the compact hex representation
+        write!(f, "{}", self)
+    }
+}
+
+impl std::str::FromStr for TenantShardId {
+    type Err = hex::FromHexError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        // Expect format: 16 byte TenantId, '-', 1 byte shard number, 1 byte shard count
+        if s.len() == 32 {
+            // Legacy case: no shard specified
+            Ok(Self {
+                tenant_id: TenantId::from_str(s)?,
+                shard_number: ShardNumber(0),
+                shard_count: ShardCount(0),
+            })
+        } else if s.len() == 37 {
+            let bytes = s.as_bytes();
+            let tenant_id = TenantId::from_hex(&bytes[0..32])?;
+            let mut shard_parts: [u8; 2] = [0u8; 2];
+            hex::decode_to_slice(&bytes[33..37], &mut shard_parts)?;
+            Ok(Self {
+                tenant_id,
+                shard_number: ShardNumber(shard_parts[0]),
+                shard_count: ShardCount(shard_parts[1]),
+            })
+        } else {
+            Err(hex::FromHexError::InvalidStringLength)
+        }
+    }
+}
+
+impl From<[u8; 18]> for TenantShardId {
+    fn from(b: [u8; 18]) -> Self {
+        let tenant_id_bytes: [u8; 16] = b[0..16].try_into().unwrap();
+
+        Self {
+            tenant_id: TenantId::from(tenant_id_bytes),
+            shard_number: ShardNumber(b[16]),
+            shard_count: ShardCount(b[17]),
+        }
+    }
+}
+
+impl ShardIndex {
+    pub fn new(number: ShardNumber, count: ShardCount) -> Self {
+        Self {
+            shard_number: number,
+            shard_count: count,
+        }
+    }
+    pub fn unsharded() -> Self {
+        Self {
+            shard_number: ShardNumber(0),
+            shard_count: ShardCount(0),
+        }
+    }
+
+    /// The "unsharded" value is distinct from simply having a single shard: it represents
+    /// a tenant which is not shard-aware at all, and whose storage paths will not include
+    /// a shard suffix.
+    pub fn is_unsharded(&self) -> bool {
+        self.shard_number == ShardNumber(0) && self.shard_count == ShardCount(0)
+    }
+
+    /// For use in constructing remote storage paths: concatenate this with a TenantId
+    /// to get a fully qualified TenantShardId.
+    ///
+    /// Backward compat: this function returns an empty string if Self::is_unsharded, such
+    /// that the legacy pre-sharding remote key format is preserved.
+    pub fn get_suffix(&self) -> String {
+        if self.is_unsharded() {
+            "".to_string()
+        } else {
+            format!("-{:02x}{:02x}", self.shard_number.0, self.shard_count.0)
+        }
+    }
+}
+
+impl std::fmt::Display for ShardIndex {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{:02x}{:02x}", self.shard_number.0, self.shard_count.0)
+    }
+}
+
+impl std::fmt::Debug for ShardIndex {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        // Debug is the same as Display: the compact hex representation
+        write!(f, "{}", self)
+    }
+}
+
+impl std::str::FromStr for ShardIndex {
+    type Err = hex::FromHexError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        // Expect format: 1 byte shard number, 1 byte shard count
+        if s.len() == 4 {
+            let bytes = s.as_bytes();
+            let mut shard_parts: [u8; 2] = [0u8; 2];
+            hex::decode_to_slice(bytes, &mut shard_parts)?;
+            Ok(Self {
+                shard_number: ShardNumber(shard_parts[0]),
+                shard_count: ShardCount(shard_parts[1]),
+            })
+        } else {
+            Err(hex::FromHexError::InvalidStringLength)
+        }
+    }
+}
+
+impl From<[u8; 2]> for ShardIndex {
+    fn from(b: [u8; 2]) -> Self {
+        Self {
+            shard_number: ShardNumber(b[0]),
+            shard_count: ShardCount(b[1]),
+        }
+    }
+}
+
+impl Serialize for TenantShardId {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        if serializer.is_human_readable() {
+            serializer.collect_str(self)
+        } else {
+            // Note: while human encoding of [`TenantShardId`] is backward and forward
+            // compatible, this binary encoding is not.
+            let mut packed: [u8; 18] = [0; 18];
+            packed[0..16].clone_from_slice(&self.tenant_id.as_arr());
+            packed[16] = self.shard_number.0;
+            packed[17] = self.shard_count.0;
+
+            packed.serialize(serializer)
+        }
+    }
+}
+
+impl<'de> Deserialize<'de> for TenantShardId {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        struct IdVisitor {
+            is_human_readable_deserializer: bool,
+        }
+
+        impl<'de> serde::de::Visitor<'de> for IdVisitor {
+            type Value = TenantShardId;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
+                if self.is_human_readable_deserializer {
+                    formatter.write_str("value in form of hex string")
+                } else {
+                    formatter.write_str("value in form of integer array([u8; 18])")
+                }
+            }
+
+            fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
+            where
+                A: serde::de::SeqAccess<'de>,
+            {
+                let s = serde::de::value::SeqAccessDeserializer::new(seq);
+                let id: [u8; 18] = Deserialize::deserialize(s)?;
+                Ok(TenantShardId::from(id))
+            }
+
+            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
+            where
+                E: serde::de::Error,
+            {
+                TenantShardId::from_str(v).map_err(E::custom)
+            }
+        }
+
+        if deserializer.is_human_readable() {
+            deserializer.deserialize_str(IdVisitor {
+                is_human_readable_deserializer: true,
+            })
+        } else {
+            deserializer.deserialize_tuple(
+                18,
+                IdVisitor {
+                    is_human_readable_deserializer: false,
+                },
+            )
+        }
+    }
+}
+
 /// Stripe size in number of pages
 #[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)]
 pub struct ShardStripeSize(pub u32);
@@ -212,6 +585,77 @@ impl ShardIdentity {
    }
 }

+impl Serialize for ShardIndex {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        if serializer.is_human_readable() {
+            serializer.collect_str(self)
+        } else {
+            // Binary encoding is not used in index_part.json, but is included in anticipation of
+            // switching various structures (e.g. inter-process communication, remote metadata) to more
+            // compact binary encodings in future.
+            let mut packed: [u8; 2] = [0; 2];
+            packed[0] = self.shard_number.0;
+            packed[1] = self.shard_count.0;
+            packed.serialize(serializer)
+        }
+    }
+}
+
+impl<'de> Deserialize<'de> for ShardIndex {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        struct IdVisitor {
+            is_human_readable_deserializer: bool,
+        }
+
+        impl<'de> serde::de::Visitor<'de> for IdVisitor {
+            type Value = ShardIndex;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
+                if self.is_human_readable_deserializer {
+                    formatter.write_str("value in form of hex string")
+                } else {
+                    formatter.write_str("value in form of integer array([u8; 2])")
+                }
+            }
+
+            fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
+            where
+                A: serde::de::SeqAccess<'de>,
+            {
+                let s = serde::de::value::SeqAccessDeserializer::new(seq);
+                let id: [u8; 2] = Deserialize::deserialize(s)?;
+                Ok(ShardIndex::from(id))
+            }
+
+            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
+            where
+                E: serde::de::Error,
+            {
+                ShardIndex::from_str(v).map_err(E::custom)
+            }
+        }
+
+        if deserializer.is_human_readable() {
+            deserializer.deserialize_str(IdVisitor {
+                is_human_readable_deserializer: true,
+            })
+        } else {
+            deserializer.deserialize_tuple(
+                2,
+                IdVisitor {
+                    is_human_readable_deserializer: false,
+                },
+            )
+        }
+    }
+}
+
 /// Whether this key is always held on shard 0 (e.g. shard 0 holds all SLRU keys
 /// in order to be able to serve basebackup requests without peer communication).
 fn key_is_shard0(key: &Key) -> bool {
@@ -293,9 +737,7 @@ pub fn describe(

 #[cfg(test)]
 mod tests {
-    use std::str::FromStr;
-
-    use utils::{id::TenantId, Hex};
+    use utils::Hex;

    use super::*;

--- a/libs/postgres_backend/Cargo.toml
+++ b/libs/postgres_backend/Cargo.toml
@@ -13,7 +13,6 @@ rustls.workspace = true
 serde.workspace = true
 thiserror.workspace = true
 tokio.workspace = true
-tokio-util.workspace = true
 tokio-rustls.workspace = true
 tracing.workspace = true

@@ -24,4 +23,4 @@ workspace_hack.workspace = true
 once_cell.workspace = true
 rustls-pemfile.workspace = true
 tokio-postgres.workspace = true
-tokio-postgres-rustls.workspace = true
+tokio-postgres-rustls.workspace = true
--- a/libs/postgres_backend/src/lib.rs
+++ b/libs/postgres_backend/src/lib.rs
@@ -16,7 +16,6 @@ use std::{fmt, io};
 use std::{future::Future, str::FromStr};
 use tokio::io::{AsyncRead, AsyncWrite};
 use tokio_rustls::TlsAcceptor;
-use tokio_util::sync::CancellationToken;
 use tracing::{debug, error, info, trace, warn};

 use pq_proto::framed::{ConnectionError, Framed, FramedReader, FramedWriter};
@@ -401,15 +400,21 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
    }

    /// Wrapper for run_message_loop() that shuts down socket when we are done
-    pub async fn run(
+    pub async fn run<F, S>(
        mut self,
        handler: &mut impl Handler<IO>,
-        cancel: &CancellationToken,
-    ) -> Result<(), QueryError> {
-        let ret = self.run_message_loop(handler, cancel).await;
+        shutdown_watcher: F,
+    ) -> Result<(), QueryError>
+    where
+        F: Fn() -> S + Clone,
+        S: Future,
+    {
+        let ret = self
+            .run_message_loop(handler, shutdown_watcher.clone())
+            .await;

        tokio::select! {
-            _ = cancel.cancelled() => {
+            _ = shutdown_watcher() => {
                // do nothing; we most likely got already stopped by shutdown and will log it next.
            }
            _ = self.framed.shutdown() => {
@@ -439,17 +444,21 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
        }
    }

-    async fn run_message_loop(
+    async fn run_message_loop<F, S>(
        &mut self,
        handler: &mut impl Handler<IO>,
-        cancel: &CancellationToken,
-    ) -> Result<(), QueryError> {
+        shutdown_watcher: F,
+    ) -> Result<(), QueryError>
+    where
+        F: Fn() -> S,
+        S: Future,
+    {
        trace!("postgres backend to {:?} started", self.peer_addr);

        tokio::select!(
            biased;

-            _ = cancel.cancelled() => {
+            _ = shutdown_watcher() => {
                // We were requested to shut down.
                tracing::info!("shutdown request received during handshake");
                return Err(QueryError::Shutdown)
@@ -464,7 +473,7 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
        let mut query_string = Bytes::new();
        while let Some(msg) = tokio::select!(
            biased;
-            _ = cancel.cancelled() => {
+            _ = shutdown_watcher() => {
                // We were requested to shut down.
                tracing::info!("shutdown request received in run_message_loop");
                return Err(QueryError::Shutdown)
@@ -476,7 +485,7 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
            let result = self.process_message(handler, msg, &mut query_string).await;
            tokio::select!(
                biased;
-                _ = cancel.cancelled() => {
+                _ = shutdown_watcher() => {
                    // We were requested to shut down.
                    tracing::info!("shutdown request received during response flush");

--- a/libs/postgres_backend/tests/simple_select.rs
+++ b/libs/postgres_backend/tests/simple_select.rs
@@ -3,14 +3,13 @@ use once_cell::sync::Lazy;
 use postgres_backend::{AuthType, Handler, PostgresBackend, QueryError};
 use pq_proto::{BeMessage, RowDescriptor};
 use std::io::Cursor;
-use std::sync::Arc;
+use std::{future, sync::Arc};
 use tokio::io::{AsyncRead, AsyncWrite};
 use tokio::net::{TcpListener, TcpStream};
 use tokio_postgres::config::SslMode;
 use tokio_postgres::tls::MakeTlsConnect;
 use tokio_postgres::{Config, NoTls, SimpleQueryMessage};
 use tokio_postgres_rustls::MakeRustlsConnect;
-use tokio_util::sync::CancellationToken;

 // generate client, server test streams
 async fn make_tcp_pair() -> (TcpStream, TcpStream) {
@@ -51,7 +50,7 @@ async fn simple_select() {

    tokio::spawn(async move {
        let mut handler = TestHandler {};
-        pgbackend.run(&mut handler, &CancellationToken::new()).await
+        pgbackend.run(&mut handler, future::pending::<()>).await
    });

    let conf = Config::new();
@@ -103,7 +102,7 @@ async fn simple_select_ssl() {

    tokio::spawn(async move {
        let mut handler = TestHandler {};
-        pgbackend.run(&mut handler, &CancellationToken::new()).await
+        pgbackend.run(&mut handler, future::pending::<()>).await
    });

    let client_cfg = rustls::ClientConfig::builder()
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -26,8 +26,6 @@ pub mod auth;
 // utility functions and helper traits for unified unique id generation/serialization etc.
 pub mod id;

-pub mod shard;
-
 mod hex;
 pub use hex::Hex;

--- a/libs/utils/src/shard.rs
+++ b/libs/utils/src/shard.rs
@@ -1,451 +0,0 @@
-//! See `pageserver_api::shard` for description on sharding.
-
-use std::{ops::RangeInclusive, str::FromStr};
-
-use hex::FromHex;
-use serde::{Deserialize, Serialize};
-
-use crate::id::TenantId;
-
-#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]
-pub struct ShardNumber(pub u8);
-
-#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]
-pub struct ShardCount(pub u8);
-
-/// Combination of ShardNumber and ShardCount.  For use within the context of a particular tenant,
-/// when we need to know which shard we're dealing with, but do not need to know the full
-/// ShardIdentity (because we won't be doing any page->shard mapping), and do not need to know
-/// the fully qualified TenantShardId.
-#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]
-pub struct ShardIndex {
-    pub shard_number: ShardNumber,
-    pub shard_count: ShardCount,
-}
-
-/// Formatting helper, for generating the `shard_id` label in traces.
-pub struct ShardSlug<'a>(&'a TenantShardId);
-
-/// TenantShardId globally identifies a particular shard in a particular tenant.
-///
-/// These are written as `<TenantId>-<ShardSlug>`, for example:
-///   # The second shard in a two-shard tenant
-///   072f1291a5310026820b2fe4b2968934-0102
-///
-/// If the `ShardCount` is _unsharded_, the `TenantShardId` is written without
-/// a shard suffix and is equivalent to the encoding of a `TenantId`: this enables
-/// an unsharded [`TenantShardId`] to be used interchangably with a [`TenantId`].
-///
-/// The human-readable encoding of an unsharded TenantShardId, such as used in API URLs,
-/// is both forward and backward compatible with TenantId: a legacy TenantId can be
-/// decoded as a TenantShardId, and when re-encoded it will be parseable
-/// as a TenantId.
-#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]
-pub struct TenantShardId {
-    pub tenant_id: TenantId,
-    pub shard_number: ShardNumber,
-    pub shard_count: ShardCount,
-}
-
-impl ShardCount {
-    pub const MAX: Self = Self(u8::MAX);
-
-    /// The internal value of a ShardCount may be zero, which means "1 shard, but use
-    /// legacy format for TenantShardId that excludes the shard suffix", also known
-    /// as [`TenantShardId::unsharded`].
-    ///
-    /// This method returns the actual number of shards, i.e. if our internal value is
-    /// zero, we return 1 (unsharded tenants have 1 shard).
-    pub fn count(&self) -> u8 {
-        if self.0 > 0 {
-            self.0
-        } else {
-            1
-        }
-    }
-
-    /// The literal internal value: this is **not** the number of shards in the
-    /// tenant, as we have a special zero value for legacy unsharded tenants.  Use
-    /// [`Self::count`] if you want to know the cardinality of shards.
-    pub fn literal(&self) -> u8 {
-        self.0
-    }
-
-    /// Whether the `ShardCount` is for an unsharded tenant, so uses one shard but
-    /// uses the legacy format for `TenantShardId`. See also the documentation for
-    /// [`Self::count`].
-    pub fn is_unsharded(&self) -> bool {
-        self.0 == 0
-    }
-
-    /// `v` may be zero, or the number of shards in the tenant.  `v` is what
-    /// [`Self::literal`] would return.
-    pub const fn new(val: u8) -> Self {
-        Self(val)
-    }
-}
-
-impl ShardNumber {
-    pub const MAX: Self = Self(u8::MAX);
-}
-
-impl TenantShardId {
-    pub fn unsharded(tenant_id: TenantId) -> Self {
-        Self {
-            tenant_id,
-            shard_number: ShardNumber(0),
-            shard_count: ShardCount(0),
-        }
-    }
-
-    /// The range of all TenantShardId that belong to a particular TenantId.  This is useful when
-    /// you have a BTreeMap of TenantShardId, and are querying by TenantId.
-    pub fn tenant_range(tenant_id: TenantId) -> RangeInclusive<Self> {
-        RangeInclusive::new(
-            Self {
-                tenant_id,
-                shard_number: ShardNumber(0),
-                shard_count: ShardCount(0),
-            },
-            Self {
-                tenant_id,
-                shard_number: ShardNumber::MAX,
-                shard_count: ShardCount::MAX,
-            },
-        )
-    }
-
-    pub fn shard_slug(&self) -> impl std::fmt::Display + '_ {
-        ShardSlug(self)
-    }
-
-    /// Convenience for code that has special behavior on the 0th shard.
-    pub fn is_shard_zero(&self) -> bool {
-        self.shard_number == ShardNumber(0)
-    }
-
-    /// The "unsharded" value is distinct from simply having a single shard: it represents
-    /// a tenant which is not shard-aware at all, and whose storage paths will not include
-    /// a shard suffix.
-    pub fn is_unsharded(&self) -> bool {
-        self.shard_number == ShardNumber(0) && self.shard_count.is_unsharded()
-    }
-
-    /// Convenience for dropping the tenant_id and just getting the ShardIndex: this
-    /// is useful when logging from code that is already in a span that includes tenant ID, to
-    /// keep messages reasonably terse.
-    pub fn to_index(&self) -> ShardIndex {
-        ShardIndex {
-            shard_number: self.shard_number,
-            shard_count: self.shard_count,
-        }
-    }
-
-    /// Calculate the children of this TenantShardId when splitting the overall tenant into
-    /// the given number of shards.
-    pub fn split(&self, new_shard_count: ShardCount) -> Vec<TenantShardId> {
-        let effective_old_shard_count = std::cmp::max(self.shard_count.0, 1);
-        let mut child_shards = Vec::new();
-        for shard_number in 0..ShardNumber(new_shard_count.0).0 {
-            // Key mapping is based on a round robin mapping of key hash modulo shard count,
-            // so our child shards are the ones which the same keys would map to.
-            if shard_number % effective_old_shard_count == self.shard_number.0 {
-                child_shards.push(TenantShardId {
-                    tenant_id: self.tenant_id,
-                    shard_number: ShardNumber(shard_number),
-                    shard_count: new_shard_count,
-                })
-            }
-        }
-
-        child_shards
-    }
-}
-
-impl<'a> std::fmt::Display for ShardSlug<'a> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "{:02x}{:02x}",
-            self.0.shard_number.0, self.0.shard_count.0
-        )
-    }
-}
-
-impl std::fmt::Display for TenantShardId {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        if self.shard_count != ShardCount(0) {
-            write!(f, "{}-{}", self.tenant_id, self.shard_slug())
-        } else {
-            // Legacy case (shard_count == 0) -- format as just the tenant id.  Note that this
-            // is distinct from the normal single shard case (shard count == 1).
-            self.tenant_id.fmt(f)
-        }
-    }
-}
-
-impl std::fmt::Debug for TenantShardId {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        // Debug is the same as Display: the compact hex representation
-        write!(f, "{}", self)
-    }
-}
-
-impl std::str::FromStr for TenantShardId {
-    type Err = hex::FromHexError;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        // Expect format: 16 byte TenantId, '-', 1 byte shard number, 1 byte shard count
-        if s.len() == 32 {
-            // Legacy case: no shard specified
-            Ok(Self {
-                tenant_id: TenantId::from_str(s)?,
-                shard_number: ShardNumber(0),
-                shard_count: ShardCount(0),
-            })
-        } else if s.len() == 37 {
-            let bytes = s.as_bytes();
-            let tenant_id = TenantId::from_hex(&bytes[0..32])?;
-            let mut shard_parts: [u8; 2] = [0u8; 2];
-            hex::decode_to_slice(&bytes[33..37], &mut shard_parts)?;
-            Ok(Self {
-                tenant_id,
-                shard_number: ShardNumber(shard_parts[0]),
-                shard_count: ShardCount(shard_parts[1]),
-            })
-        } else {
-            Err(hex::FromHexError::InvalidStringLength)
-        }
-    }
-}
-
-impl From<[u8; 18]> for TenantShardId {
-    fn from(b: [u8; 18]) -> Self {
-        let tenant_id_bytes: [u8; 16] = b[0..16].try_into().unwrap();
-
-        Self {
-            tenant_id: TenantId::from(tenant_id_bytes),
-            shard_number: ShardNumber(b[16]),
-            shard_count: ShardCount(b[17]),
-        }
-    }
-}
-
-impl ShardIndex {
-    pub fn new(number: ShardNumber, count: ShardCount) -> Self {
-        Self {
-            shard_number: number,
-            shard_count: count,
-        }
-    }
-    pub fn unsharded() -> Self {
-        Self {
-            shard_number: ShardNumber(0),
-            shard_count: ShardCount(0),
-        }
-    }
-
-    /// The "unsharded" value is distinct from simply having a single shard: it represents
-    /// a tenant which is not shard-aware at all, and whose storage paths will not include
-    /// a shard suffix.
-    pub fn is_unsharded(&self) -> bool {
-        self.shard_number == ShardNumber(0) && self.shard_count == ShardCount(0)
-    }
-
-    /// For use in constructing remote storage paths: concatenate this with a TenantId
-    /// to get a fully qualified TenantShardId.
-    ///
-    /// Backward compat: this function returns an empty string if Self::is_unsharded, such
-    /// that the legacy pre-sharding remote key format is preserved.
-    pub fn get_suffix(&self) -> String {
-        if self.is_unsharded() {
-            "".to_string()
-        } else {
-            format!("-{:02x}{:02x}", self.shard_number.0, self.shard_count.0)
-        }
-    }
-}
-
-impl std::fmt::Display for ShardIndex {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{:02x}{:02x}", self.shard_number.0, self.shard_count.0)
-    }
-}
-
-impl std::fmt::Debug for ShardIndex {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        // Debug is the same as Display: the compact hex representation
-        write!(f, "{}", self)
-    }
-}
-
-impl std::str::FromStr for ShardIndex {
-    type Err = hex::FromHexError;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        // Expect format: 1 byte shard number, 1 byte shard count
-        if s.len() == 4 {
-            let bytes = s.as_bytes();
-            let mut shard_parts: [u8; 2] = [0u8; 2];
-            hex::decode_to_slice(bytes, &mut shard_parts)?;
-            Ok(Self {
-                shard_number: ShardNumber(shard_parts[0]),
-                shard_count: ShardCount(shard_parts[1]),
-            })
-        } else {
-            Err(hex::FromHexError::InvalidStringLength)
-        }
-    }
-}
-
-impl From<[u8; 2]> for ShardIndex {
-    fn from(b: [u8; 2]) -> Self {
-        Self {
-            shard_number: ShardNumber(b[0]),
-            shard_count: ShardCount(b[1]),
-        }
-    }
-}
-
-impl Serialize for TenantShardId {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        if serializer.is_human_readable() {
-            serializer.collect_str(self)
-        } else {
-            // Note: while human encoding of [`TenantShardId`] is backward and forward
-            // compatible, this binary encoding is not.
-            let mut packed: [u8; 18] = [0; 18];
-            packed[0..16].clone_from_slice(&self.tenant_id.as_arr());
-            packed[16] = self.shard_number.0;
-            packed[17] = self.shard_count.0;
-
-            packed.serialize(serializer)
-        }
-    }
-}
-
-impl<'de> Deserialize<'de> for TenantShardId {
-    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
-    where
-        D: serde::Deserializer<'de>,
-    {
-        struct IdVisitor {
-            is_human_readable_deserializer: bool,
-        }
-
-        impl<'de> serde::de::Visitor<'de> for IdVisitor {
-            type Value = TenantShardId;
-
-            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
-                if self.is_human_readable_deserializer {
-                    formatter.write_str("value in form of hex string")
-                } else {
-                    formatter.write_str("value in form of integer array([u8; 18])")
-                }
-            }
-
-            fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
-            where
-                A: serde::de::SeqAccess<'de>,
-            {
-                let s = serde::de::value::SeqAccessDeserializer::new(seq);
-                let id: [u8; 18] = Deserialize::deserialize(s)?;
-                Ok(TenantShardId::from(id))
-            }
-
-            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
-            where
-                E: serde::de::Error,
-            {
-                TenantShardId::from_str(v).map_err(E::custom)
-            }
-        }
-
-        if deserializer.is_human_readable() {
-            deserializer.deserialize_str(IdVisitor {
-                is_human_readable_deserializer: true,
-            })
-        } else {
-            deserializer.deserialize_tuple(
-                18,
-                IdVisitor {
-                    is_human_readable_deserializer: false,
-                },
-            )
-        }
-    }
-}
-
-impl Serialize for ShardIndex {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        if serializer.is_human_readable() {
-            serializer.collect_str(self)
-        } else {
-            // Binary encoding is not used in index_part.json, but is included in anticipation of
-            // switching various structures (e.g. inter-process communication, remote metadata) to more
-            // compact binary encodings in future.
-            let mut packed: [u8; 2] = [0; 2];
-            packed[0] = self.shard_number.0;
-            packed[1] = self.shard_count.0;
-            packed.serialize(serializer)
-        }
-    }
-}
-
-impl<'de> Deserialize<'de> for ShardIndex {
-    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
-    where
-        D: serde::Deserializer<'de>,
-    {
-        struct IdVisitor {
-            is_human_readable_deserializer: bool,
-        }
-
-        impl<'de> serde::de::Visitor<'de> for IdVisitor {
-            type Value = ShardIndex;
-
-            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
-                if self.is_human_readable_deserializer {
-                    formatter.write_str("value in form of hex string")
-                } else {
-                    formatter.write_str("value in form of integer array([u8; 2])")
-                }
-            }
-
-            fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
-            where
-                A: serde::de::SeqAccess<'de>,
-            {
-                let s = serde::de::value::SeqAccessDeserializer::new(seq);
-                let id: [u8; 2] = Deserialize::deserialize(s)?;
-                Ok(ShardIndex::from(id))
-            }
-
-            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
-            where
-                E: serde::de::Error,
-            {
-                ShardIndex::from_str(v).map_err(E::custom)
-            }
-        }
-
-        if deserializer.is_human_readable() {
-            deserializer.deserialize_str(IdVisitor {
-                is_human_readable_deserializer: true,
-            })
-        } else {
-            deserializer.deserialize_tuple(
-                2,
-                IdVisitor {
-                    is_human_readable_deserializer: false,
-                },
-            )
-        }
-    }
-}
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -62,7 +62,6 @@ sync_wrapper.workspace = true
 sysinfo.workspace = true
 tokio-tar.workspace = true
 thiserror.workspace = true
-tikv-jemallocator.workspace = true
 tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time"] }
 tokio-epoll-uring.workspace = true
 tokio-io-timeout.workspace = true
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -47,9 +47,6 @@ use utils::{
 project_git_version!(GIT_VERSION);
 project_build_tag!(BUILD_TAG);

-#[global_allocator]
-static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
-
 const PID_FILE_NAME: &str = "pageserver.pid";

 const FEATURES: &[&str] = &[
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -1456,12 +1456,10 @@ impl<'a, 'c> BasebackupQueryTimeOngoingRecording<'a, 'c> {
    }
 }

-pub(crate) static LIVE_CONNECTIONS: Lazy<IntCounterPairVec> = Lazy::new(|| {
-    register_int_counter_pair_vec!(
-        "pageserver_live_connections_started",
-        "Number of network connections that we started handling",
-        "pageserver_live_connections_finished",
-        "Number of network connections that we finished handling",
+pub(crate) static LIVE_CONNECTIONS_COUNT: Lazy<IntGaugeVec> = Lazy::new(|| {
+    register_int_gauge_vec!(
+        "pageserver_live_connections",
+        "Number of live network connections",
        &["pageserver_connection_kind"]
    )
    .expect("failed to define a metric")
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -55,7 +55,7 @@ use crate::basebackup::BasebackupError;
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::import_datadir::import_wal_from_tar;
 use crate::metrics;
-use crate::metrics::{ComputeCommandKind, COMPUTE_COMMANDS_COUNTERS, LIVE_CONNECTIONS};
+use crate::metrics::{ComputeCommandKind, COMPUTE_COMMANDS_COUNTERS, LIVE_CONNECTIONS_COUNT};
 use crate::pgdatadir_mapping::Version;
 use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
 use crate::span::debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id;
@@ -215,9 +215,14 @@ async fn page_service_conn_main(
    auth_type: AuthType,
    connection_ctx: RequestContext,
 ) -> anyhow::Result<()> {
-    let _guard = LIVE_CONNECTIONS
-        .with_label_values(&["page_service"])
-        .guard();
+    // Immediately increment the gauge, then create a job to decrement it on task exit.
+    // One of the pros of `defer!` is that this will *most probably*
+    // get called, even in presence of panics.
+    let gauge = LIVE_CONNECTIONS_COUNT.with_label_values(&["page_service"]);
+    gauge.inc();
+    scopeguard::defer! {
+        gauge.dec();
+    }

    socket
        .set_nodelay(true)
@@ -267,7 +272,7 @@ async fn page_service_conn_main(
    let pgbackend = PostgresBackend::new_from_io(socket, peer_addr, auth_type, None)?;

    match pgbackend
-        .run(&mut conn_handler, &task_mgr::shutdown_token())
+        .run(&mut conn_handler, task_mgr::shutdown_watcher)
        .await
    {
        Ok(()) => {
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -854,14 +854,13 @@ impl Timeline {
        result.add_key(DBDIR_KEY);

        // Fetch list of database dirs and iterate them
-        let dbdir = self.list_dbdirs(lsn, ctx).await?;
-        let mut dbs: Vec<((Oid, Oid), bool)> = dbdir.into_iter().collect();
+        let buf = self.get(DBDIR_KEY, lsn, ctx).await?;
+        let dbdir = DbDirectory::des(&buf)?;

-        dbs.sort_unstable_by(|(k_a, _), (k_b, _)| k_a.cmp(k_b));
-        for ((spcnode, dbnode), has_relmap_file) in dbs {
-            if has_relmap_file {
-                result.add_key(relmap_file_key(spcnode, dbnode));
-            }
+        let mut dbs: Vec<(Oid, Oid)> = dbdir.dbdirs.keys().cloned().collect();
+        dbs.sort_unstable();
+        for (spcnode, dbnode) in dbs {
+            result.add_key(relmap_file_key(spcnode, dbnode));
            result.add_key(rel_dir_to_key(spcnode, dbnode));

            let mut rels: Vec<RelTag> = self
@@ -920,9 +919,6 @@ impl Timeline {
            result.add_key(AUX_FILES_KEY);
        }

-        // Add extra keyspaces in the test cases. Some test cases write keys into the storage without
-        // creating directory keys. These test cases will add such keyspaces into `extra_test_dense_keyspace`
-        // and the keys will not be garbage-colllected.
        #[cfg(test)]
        {
            let guard = self.extra_test_dense_keyspace.load();
@@ -931,48 +927,13 @@ impl Timeline {
            }
        }

-        let dense_keyspace = result.to_keyspace();
-        let sparse_keyspace = SparseKeySpace(KeySpace {
-            ranges: vec![Key::metadata_aux_key_range(), repl_origin_key_range()],
-        });
-
-        if cfg!(debug_assertions) {
-            // Verify if the sparse keyspaces are ordered and non-overlapping.
-
-            // We do not use KeySpaceAccum for sparse_keyspace because we want to ensure each
-            // category of sparse keys are split into their own image/delta files. If there
-            // are overlapping keyspaces, they will be automatically merged by keyspace accum,
-            // and we want the developer to keep the keyspaces separated.
-
-            let ranges = &sparse_keyspace.0.ranges;
-
-            // TODO: use a single overlaps_with across the codebase
-            fn overlaps_with<T: Ord>(a: &Range<T>, b: &Range<T>) -> bool {
-                !(a.end <= b.start || b.end <= a.start)
-            }
-            for i in 0..ranges.len() {
-                for j in 0..i {
-                    if overlaps_with(&ranges[i], &ranges[j]) {
-                        panic!(
-                            "overlapping sparse keyspace: {}..{} and {}..{}",
-                            ranges[i].start, ranges[i].end, ranges[j].start, ranges[j].end
-                        );
-                    }
-                }
-            }
-            for i in 1..ranges.len() {
-                assert!(
-                    ranges[i - 1].end <= ranges[i].start,
-                    "unordered sparse keyspace: {}..{} and {}..{}",
-                    ranges[i - 1].start,
-                    ranges[i - 1].end,
-                    ranges[i].start,
-                    ranges[i].end
-                );
-            }
-        }
-
-        Ok((dense_keyspace, sparse_keyspace))
+        Ok((
+            result.to_keyspace(),
+            /* AUX sparse key space */
+            SparseKeySpace(KeySpace {
+                ranges: vec![repl_origin_key_range(), Key::metadata_aux_key_range()],
+            }),
+        ))
    }

    /// Get cached size of relation if it not updated after specified LSN
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -365,7 +365,6 @@ pub struct Timeline {
    repartition_threshold: u64,

    last_image_layer_creation_check_at: AtomicLsn,
-    last_image_layer_creation_check_instant: std::sync::Mutex<Option<Instant>>,

    /// Current logical size of the "datadir", at the last LSN.
    current_logical_size: LogicalSize,
@@ -728,9 +727,6 @@ impl From<CreateImageLayersError> for CompactionError {
    fn from(e: CreateImageLayersError) -> Self {
        match e {
            CreateImageLayersError::Cancelled => CompactionError::ShuttingDown,
-            CreateImageLayersError::Other(e) => {
-                CompactionError::Other(e.context("create image layers"))
-            }
            _ => CompactionError::Other(e.into()),
        }
    }
@@ -2388,7 +2384,6 @@ impl Timeline {
                )),
                repartition_threshold: 0,
                last_image_layer_creation_check_at: AtomicLsn::new(0),
-                last_image_layer_creation_check_instant: Mutex::new(None),

                last_received_wal: Mutex::new(None),
                rel_size_cache: RwLock::new(RelSizeCache {
@@ -4469,58 +4464,6 @@ impl Timeline {
        }
    }

-    /// Predicate function which indicates whether we should check if new image layers
-    /// are required. Since checking if new image layers are required is expensive in
-    /// terms of CPU, we only do it in the following cases:
-    /// 1. If the timeline has ingested sufficient WAL to justify the cost
-    /// 2. If enough time has passed since the last check
-    /// 2.1. For large tenants, we wish to perform the check more often since they
-    /// suffer from the lack of image layers
-    /// 2.2. For small tenants (that can mostly fit in RAM), we use a much longer interval
-    fn should_check_if_image_layers_required(self: &Arc<Timeline>, lsn: Lsn) -> bool {
-        const LARGE_TENANT_THRESHOLD: u64 = 2 * 1024 * 1024 * 1024;
-
-        let last_checks_at = self.last_image_layer_creation_check_at.load();
-        let distance = lsn
-            .checked_sub(last_checks_at)
-            .expect("Attempt to compact with LSN going backwards");
-        let min_distance =
-            self.get_image_layer_creation_check_threshold() as u64 * self.get_checkpoint_distance();
-
-        let distance_based_decision = distance.0 >= min_distance;
-
-        let mut time_based_decision = false;
-        let mut last_check_instant = self.last_image_layer_creation_check_instant.lock().unwrap();
-        if let CurrentLogicalSize::Exact(logical_size) = self.current_logical_size.current_size() {
-            let check_required_after = if Into::<u64>::into(&logical_size) >= LARGE_TENANT_THRESHOLD
-            {
-                self.get_checkpoint_timeout()
-            } else {
-                Duration::from_secs(3600 * 48)
-            };
-
-            time_based_decision = match *last_check_instant {
-                Some(last_check) => {
-                    let elapsed = last_check.elapsed();
-                    elapsed >= check_required_after
-                }
-                None => true,
-            };
-        }
-
-        // Do the expensive delta layer counting only if this timeline has ingested sufficient
-        // WAL since the last check or a checkpoint timeout interval has elapsed since the last
-        // check.
-        let decision = distance_based_decision || time_based_decision;
-
-        if decision {
-            self.last_image_layer_creation_check_at.store(lsn);
-            *last_check_instant = Some(Instant::now());
-        }
-
-        decision
-    }
-
    #[tracing::instrument(skip_all, fields(%lsn, %mode))]
    async fn create_image_layers(
        self: &Arc<Timeline>,
@@ -4543,7 +4486,22 @@ impl Timeline {
        // image layers  <100000000..100000099> and <200000000..200000199> are not completely covering it.
        let mut start = Key::MIN;

-        let check_for_image_layers = self.should_check_if_image_layers_required(lsn);
+        let check_for_image_layers = {
+            let last_checks_at = self.last_image_layer_creation_check_at.load();
+            let distance = lsn
+                .checked_sub(last_checks_at)
+                .expect("Attempt to compact with LSN going backwards");
+            let min_distance = self.get_image_layer_creation_check_threshold() as u64
+                * self.get_checkpoint_distance();
+
+            // Skip the expensive delta layer counting if this timeline has not ingested sufficient
+            // WAL since the last check.
+            distance.0 >= min_distance
+        };
+
+        if check_for_image_layers {
+            self.last_image_layer_creation_check_at.store(lsn);
+        }

        for partition in partitioning.parts.iter() {
            let img_range = start..partition.ranges.last().unwrap().end;
--- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
@@ -26,7 +26,7 @@ use tracing::{debug, error, info, trace, warn, Instrument};
 use super::TaskStateUpdate;
 use crate::{
    context::RequestContext,
-    metrics::{LIVE_CONNECTIONS, WALRECEIVER_STARTED_CONNECTIONS, WAL_INGEST},
+    metrics::{LIVE_CONNECTIONS_COUNT, WALRECEIVER_STARTED_CONNECTIONS, WAL_INGEST},
    task_mgr::TaskKind,
    task_mgr::WALRECEIVER_RUNTIME,
    tenant::{debug_assert_current_span_has_tenant_and_timeline_id, Timeline, WalReceiverInfo},
@@ -208,9 +208,14 @@ pub(super) async fn handle_walreceiver_connection(
        .instrument(tracing::info_span!("poller")),
    );

-    let _guard = LIVE_CONNECTIONS
-        .with_label_values(&["wal_receiver"])
-        .guard();
+    // Immediately increment the gauge, then create a job to decrement it on task exit.
+    // One of the pros of `defer!` is that this will *most probably*
+    // get called, even in presence of panics.
+    let gauge = LIVE_CONNECTIONS_COUNT.with_label_values(&["wal_receiver"]);
+    gauge.inc();
+    scopeguard::defer! {
+        gauge.dec();
+    }

    let identify = identify_system(&replication_client).await?;
    info!("{identify:?}");
--- a/pgxn/neon/Makefile
+++ b/pgxn/neon/Makefile
@@ -6,7 +6,6 @@ OBJS = \
 	$(WIN32RES) \
 	extension_server.o \
 	file_cache.o \
-	hll.o \
 	libpagestore.o \
 	neon.o \
 	neon_utils.o \
@@ -23,7 +22,7 @@ SHLIB_LINK_INTERNAL = $(libpq)
 SHLIB_LINK = -lcurl

 EXTENSION = neon
-DATA = neon--1.0.sql neon--1.0--1.1.sql neon--1.1--1.2.sql neon--1.2--1.3.sql neon--1.3--1.2.sql neon--1.2--1.1.sql neon--1.1--1.0.sql  neon--1.3--1.4.sql neon--1.4--1.3.sql
+DATA = neon--1.0.sql neon--1.0--1.1.sql neon--1.1--1.2.sql neon--1.2--1.3.sql neon--1.3--1.2.sql neon--1.2--1.1.sql neon--1.1--1.0.sql
 PGFILEDESC = "neon - cloud storage for PostgreSQL"

 EXTRA_CLEAN = \
--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -26,6 +26,7 @@
 #include "miscadmin.h"
 #include "pagestore_client.h"
 #include "common/hashfn.h"
+#include "lib/hyperloglog.h"
 #include "pgstat.h"
 #include "postmaster/bgworker.h"
 #include RELFILEINFO_HDR
@@ -39,8 +40,6 @@
 #include "utils/dynahash.h"
 #include "utils/guc.h"

-#include "hll.h"
-
 /*
 * Local file cache is used to temporary store relations pages in local file system.
 * All blocks of all relations are stored inside one file and addressed using shared hash map.
@@ -63,6 +62,7 @@
 #define BLOCKS_PER_CHUNK	128 /* 1Mb chunk */
 #define MB					((uint64)1024*1024)

+#define HYPER_LOG_LOG_BIT_WIDTH   10
 #define SIZE_MB_TO_CHUNKS(size) ((uint32)((size) * MB / BLCKSZ / BLOCKS_PER_CHUNK))

 typedef struct FileCacheEntry
@@ -87,7 +87,8 @@ typedef struct FileCacheControl
 	uint64		writes;
 	dlist_head	lru;			/* double linked list for LRU replacement
 								 * algorithm */
-	HyperLogLogState wss_estimation; /* estimation of working set size */
+	hyperLogLogState wss_estimation; /* estimation of wroking set size */
+	uint8_t		hyperloglog_hashes[(1 << HYPER_LOG_LOG_BIT_WIDTH) + 1];
 } FileCacheControl;

 static HTAB *lfc_hash;
@@ -237,7 +238,12 @@ lfc_shmem_startup(void)
 		dlist_init(&lfc_ctl->lru);

 		/* Initialize hyper-log-log structure for estimating working set size */
-		initSHLL(&lfc_ctl->wss_estimation);
+		initHyperLogLog(&lfc_ctl->wss_estimation, HYPER_LOG_LOG_BIT_WIDTH);
+
+		/* We need hashes in shared memory */
+		pfree(lfc_ctl->wss_estimation.hashesArr);
+		memset(lfc_ctl->hyperloglog_hashes, 0, sizeof lfc_ctl->hyperloglog_hashes);
+		lfc_ctl->wss_estimation.hashesArr = lfc_ctl->hyperloglog_hashes;

 		/* Recreate file cache on restart */
 		fd = BasicOpenFile(lfc_path, O_RDWR | O_CREAT | O_TRUNC);
@@ -539,7 +545,7 @@ lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,

 	/* Approximate working set */
 	tag.blockNum = blkno;
-	addSHLL(&lfc_ctl->wss_estimation, hash_bytes((uint8_t const*)&tag, sizeof(tag)));
+	addHyperLogLog(&lfc_ctl->wss_estimation, hash_bytes((uint8_t const*)&tag, sizeof(tag)));

 	if (entry == NULL || (entry->bitmap[chunk_offs >> 5] & (1 << (chunk_offs & 31))) == 0)
 	{
@@ -980,38 +986,20 @@ local_cache_pages(PG_FUNCTION_ARGS)
 		SRF_RETURN_DONE(funcctx);
 }

-PG_FUNCTION_INFO_V1(approximate_working_set_size_seconds);
-
-Datum
-approximate_working_set_size_seconds(PG_FUNCTION_ARGS)
-{
-	if (lfc_size_limit != 0)
-	{
-		int32 dc;
-		time_t duration = PG_ARGISNULL(0) ? (time_t)-1 : PG_GETARG_INT32(0);
-		LWLockAcquire(lfc_lock, LW_SHARED);
-		dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, duration);
-		LWLockRelease(lfc_lock);
-		PG_RETURN_INT32(dc);
-	}
-	PG_RETURN_NULL();
-}
-
 PG_FUNCTION_INFO_V1(approximate_working_set_size);

 Datum
 approximate_working_set_size(PG_FUNCTION_ARGS)
 {
+	int32 dc = -1;
 	if (lfc_size_limit != 0)
 	{
-		int32 dc;
 		bool reset = PG_GETARG_BOOL(0);
 		LWLockAcquire(lfc_lock, reset ? LW_EXCLUSIVE : LW_SHARED);
-		dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, (time_t)-1);
+		dc = (int32) estimateHyperLogLog(&lfc_ctl->wss_estimation);
 		if (reset)
-			memset(lfc_ctl->wss_estimation.regs, 0, sizeof lfc_ctl->wss_estimation.regs);
+			memset(lfc_ctl->hyperloglog_hashes, 0, sizeof lfc_ctl->hyperloglog_hashes);
 		LWLockRelease(lfc_lock);
-		PG_RETURN_INT32(dc);
 	}
-	PG_RETURN_NULL();
+	PG_RETURN_INT32(dc);
 }
--- a/pgxn/neon/hll.c
+++ b/pgxn/neon/hll.c
@@ -1,193 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * hll.c
- *	  Sliding HyperLogLog cardinality estimator
- *
- * Portions Copyright (c) 2014-2023, PostgreSQL Global Development Group
- *
- * Implements https://hal.science/hal-00465313/document
- * 
- * Based on Hideaki Ohno's C++ implementation.  This is probably not ideally
- * suited to estimating the cardinality of very large sets;  in particular, we
- * have not attempted to further optimize the implementation as described in
- * the Heule, Nunkesser and Hall paper "HyperLogLog in Practice: Algorithmic
- * Engineering of a State of The Art Cardinality Estimation Algorithm".
- *
- * A sparse representation of HyperLogLog state is used, with fixed space
- * overhead.
- *
- * The copyright terms of Ohno's original version (the MIT license) follow.
- *
- * IDENTIFICATION
- *	  src/backend/lib/hyperloglog.c
- *
- *-------------------------------------------------------------------------
- */
-
-/*
- * Copyright (c) 2013 Hideaki Ohno <hide.o.j55{at}gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the 'Software'), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include <math.h>
-
-#include "postgres.h"
-#include "funcapi.h"
-#include "port/pg_bitutils.h"
-#include "utils/timestamp.h"
-#include "hll.h"
-
-
-#define POW_2_32			(4294967296.0)
-#define NEG_POW_2_32		(-4294967296.0)
-
-#define ALPHA_MM ((0.7213 / (1.0 + 1.079 / HLL_N_REGISTERS)) * HLL_N_REGISTERS * HLL_N_REGISTERS)
-
-/*
- * Worker for addHyperLogLog().
- *
- * Calculates the position of the first set bit in first b bits of x argument
- * starting from the first, reading from most significant to least significant
- * bits.
- *
- * Example (when considering fist 10 bits of x):
- *
- * rho(x = 0b1000000000)   returns 1
- * rho(x = 0b0010000000)   returns 3
- * rho(x = 0b0000000000)   returns b + 1
- *
- * "The binary address determined by the first b bits of x"
- *
- * Return value "j" used to index bit pattern to watch.
- */
-static inline uint8
-rho(uint32 x, uint8 b)
-{
-	uint8		j = 1;
-
-	if (x == 0)
-		return b + 1;
-
-	j = 32 - pg_leftmost_one_pos32(x);
-
-	if (j > b)
-		return b + 1;
-
-	return j;
-}
-
-/*
- * Initialize HyperLogLog track state
- */
-void
-initSHLL(HyperLogLogState *cState)
-{
-	memset(cState->regs, 0, sizeof(cState->regs));
-}
-
-/*
- * Adds element to the estimator, from caller-supplied hash.
- *
- * It is critical that the hash value passed be an actual hash value, typically
- * generated using hash_any().  The algorithm relies on a specific bit-pattern
- * observable in conjunction with stochastic averaging.  There must be a
- * uniform distribution of bits in hash values for each distinct original value
- * observed.
- */
-void
-addSHLL(HyperLogLogState *cState, uint32 hash)
-{
-	uint8		count;
-	uint32		index;
-	size_t		i;
-	size_t		j;
-
-	TimestampTz	now = GetCurrentTimestamp();
-	/* Use the first "k" (registerWidth) bits as a zero based index */
-	index = hash >> HLL_C_BITS;
-
-	/* Compute the rank of the remaining 32 - "k" (registerWidth) bits */
-	count = rho(hash << HLL_BIT_WIDTH, HLL_C_BITS);
-
-	cState->regs[index][count] = now;
-}
-
-static uint8
-getMaximum(const TimestampTz* reg, TimestampTz since)
-{
-	uint8 max = 0;
-
-	for (size_t i = 0; i < HLL_C_BITS + 1; i++)
-	{
-		if (reg[i] >= since)
-		{
-			max = i;
-		}
-	}
-
-	return max;
-}
-
-
-/*
- * Estimates cardinality, based on elements added so far
- */
-double
-estimateSHLL(HyperLogLogState *cState, time_t duration)
-{
-	double		result;
-	double		sum = 0.0;
-	size_t		i;
-	uint8       R[HLL_N_REGISTERS];
-	/* 0 indicates uninitialized timestamp, so if we need to cover the whole range than starts with 1 */
-	TimestampTz since = duration == (time_t)-1 ? 1 : GetCurrentTimestamp() - duration * USECS_PER_SEC;
-
-	for (i = 0; i < HLL_N_REGISTERS; i++)
-	{
-		R[i] = getMaximum(cState->regs[i], since);
-		sum += 1.0 / pow(2.0, R[i]);
-	}
-
-	/* result set to "raw" HyperLogLog estimate (E in the HyperLogLog paper) */
-	result = ALPHA_MM / sum;
-
-	if (result <= (5.0 / 2.0) * HLL_N_REGISTERS)
-	{
-		/* Small range correction */
-		int			zero_count = 0;
-
-		for (i = 0; i < HLL_N_REGISTERS; i++)
-		{
-			zero_count += R[i] == 0;
-		}
-
-		if (zero_count != 0)
-			result = HLL_N_REGISTERS * log((double) HLL_N_REGISTERS /
-										   zero_count);
-	}
-	else if (result > (1.0 / 30.0) * POW_2_32)
-	{
-		/* Large range correction */
-		result = NEG_POW_2_32 * log(1.0 - (result / POW_2_32));
-	}
-
-	return result;
-}
-
--- a/pgxn/neon/hll.h
+++ b/pgxn/neon/hll.h
@@ -1,86 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * hll.h
- *	  Sliding HyperLogLog cardinality estimator
- *
- * Portions Copyright (c) 2014-2023, PostgreSQL Global Development Group
- *
- * Implements https://hal.science/hal-00465313/document
- * 
- * Based on Hideaki Ohno's C++ implementation.  This is probably not ideally
- * suited to estimating the cardinality of very large sets;  in particular, we
- * have not attempted to further optimize the implementation as described in
- * the Heule, Nunkesser and Hall paper "HyperLogLog in Practice: Algorithmic
- * Engineering of a State of The Art Cardinality Estimation Algorithm".
- *
- * A sparse representation of HyperLogLog state is used, with fixed space
- * overhead.
- *
- * The copyright terms of Ohno's original version (the MIT license) follow.
- *
- * IDENTIFICATION
- *	  src/backend/lib/hyperloglog.c
- *
- *-------------------------------------------------------------------------
- */
-
-/*
- * Copyright (c) 2013 Hideaki Ohno <hide.o.j55{at}gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the 'Software'), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef HLL_H
-#define HLL_H
-
-#define HLL_BIT_WIDTH   10
-#define HLL_C_BITS      (32 - HLL_BIT_WIDTH)
-#define HLL_N_REGISTERS (1 << HLL_BIT_WIDTH)
-
-/*
- * HyperLogLog is an approximate technique for computing the number of distinct
- * entries in a set.  Importantly, it does this by using a fixed amount of
- * memory.  See the 2007 paper "HyperLogLog: the analysis of a near-optimal
- * cardinality estimation algorithm" for more.
- *
- * Instead of a single counter for every bits register, we have a timestamp
- * for every valid number of bits we can encounter. Every time we encounter
- * a certain number of bits, we update the timestamp in those registers to
- * the current timestamp.
- *
- * We can query the sketch's stored cardinality for the range of some timestamp
- * up to now: For each register, we return the highest bits bucket that has a
- * modified timestamp >= the query timestamp. This value is the number of bits
- * for this register in the normal HLL calculation.
- *
- * The memory usage is 2^B * (C + 1) * sizeof(TimetampTz), or 184kiB.
- * Usage could be halved if we decide to reduce the required time dimension
- * precision; as 32 bits in second precision should be enough for statistics.
- * However, that is not yet implemented.
- */
-typedef struct HyperLogLogState
-{
-	TimestampTz regs[HLL_N_REGISTERS][HLL_C_BITS + 1];
-} HyperLogLogState;
-
-extern void   initSHLL(HyperLogLogState *cState);
-extern void   addSHLL(HyperLogLogState *cState, uint32 hash);
-extern double estimateSHLL(HyperLogLogState *cState, time_t dutration);
-
-#endif
--- a/pgxn/neon/neon--1.3--1.4.sql
+++ b/pgxn/neon/neon--1.3--1.4.sql
@@ -1,9 +0,0 @@
-\echo Use "ALTER EXTENSION neon UPDATE TO '1.4'" to load this file. \quit
-
-CREATE FUNCTION approximate_working_set_size_seconds(duration integer default null)
-RETURNS integer
-AS 'MODULE_PATHNAME', 'approximate_working_set_size_seconds'
-LANGUAGE C PARALLEL SAFE;
-
-GRANT EXECUTE ON FUNCTION approximate_working_set_size_seconds(integer) TO pg_monitor;
-
--- a/pgxn/neon/neon--1.4--1.3.sql
+++ b/pgxn/neon/neon--1.4--1.3.sql
@@ -1 +0,0 @@
-DROP FUNCTION IF EXISTS approximate_working_set_size_seconds(integer) CASCADE;
--- a/pgxn/neon/walproposer_pg.c
+++ b/pgxn/neon/walproposer_pg.c
@@ -63,6 +63,8 @@ char	   *wal_acceptors_list = "";
 int			wal_acceptor_reconnect_timeout = 1000;
 int			wal_acceptor_connection_timeout = 10000;

+int         max_vacuum_defer_cleanup_age = 0;
+
 /* Set to true in the walproposer bgw. */
 static bool am_walproposer;
 static WalproposerShmemState *walprop_shared;
@@ -218,6 +220,16 @@ nwp_register_gucs(void)
 							PGC_SIGHUP,
 							GUC_UNIT_MS,
 							NULL, NULL, NULL);
+
+	DefineCustomIntVariable(
+							"neon.max_vacuum_defer_cleanup_age",
+							"Restrict oldest xmin pinned by hot standby feedback to prevent bloating of master",
+							NULL,
+							&max_vacuum_defer_cleanup_age,
+							0, 0, INT_MAX,
+							PGC_SIGHUP,
+							0,
+							NULL, NULL, NULL);
 }

 /*
@@ -1855,6 +1867,7 @@ walprop_pg_process_safekeeper_feedback(WalProposer *wp, Safekeeper *sk)
 		FullTransactionId xmin = hsFeedback.xmin;
 		FullTransactionId catalog_xmin = hsFeedback.catalog_xmin;
 		FullTransactionId next_xid = ReadNextFullTransactionId();
+
 		/*
 		 * Page server is updating nextXid in checkpoint each 1024 transactions,
 		 * so feedback xmin can be actually larger then nextXid and
@@ -1863,8 +1876,14 @@ walprop_pg_process_safekeeper_feedback(WalProposer *wp, Safekeeper *sk)
 		 */
 		if (FullTransactionIdPrecedes(next_xid, xmin))
 			xmin = next_xid;
+		else if (max_vacuum_defer_cleanup_age != 0 && xmin.value < next_xid.value - max_vacuum_defer_cleanup_age)
+			xmin.value = next_xid.value - max_vacuum_defer_cleanup_age;
+
 		if (FullTransactionIdPrecedes(next_xid, catalog_xmin))
 			catalog_xmin = next_xid;
+		else if (max_vacuum_defer_cleanup_age != 0 && catalog_xmin.value < next_xid.value - max_vacuum_defer_cleanup_age)
+			catalog_xmin.value = next_xid.value - max_vacuum_defer_cleanup_age;
+
 		agg_hs_feedback = hsFeedback;
 		elog(DEBUG2, "ProcessStandbyHSFeedback(xmin=%d, catalog_xmin=%d", XidFromFullTransactionId(hsFeedback.xmin), XidFromFullTransactionId(hsFeedback.catalog_xmin));
 		ProcessStandbyHSFeedback(hsFeedback.ts,
--- a/pgxn/neon_test_utils/Makefile
+++ b/pgxn/neon_test_utils/Makefile
@@ -7,7 +7,7 @@ OBJS = \
 	neontest.o

 EXTENSION = neon_test_utils
-DATA = neon_test_utils--1.3.sql
+DATA = neon_test_utils--1.2.sql
 PGFILEDESC = "neon_test_utils - helpers for neon testing and debugging"

 PG_CONFIG = pg_config
--- a/pgxn/neon_test_utils/neon_test_utils--1.2.sql
+++ b/pgxn/neon_test_utils/neon_test_utils--1.2.sql
@@ -45,21 +45,3 @@ CREATE FUNCTION neon_xlogflush(lsn pg_lsn DEFAULT NULL)
 RETURNS VOID
 AS 'MODULE_PATHNAME', 'neon_xlogflush'
 LANGUAGE C PARALLEL UNSAFE;
-
-CREATE FUNCTION trigger_panic()
-RETURNS VOID
-AS 'MODULE_PATHNAME', 'trigger_panic'
-LANGUAGE C PARALLEL UNSAFE;
-
-CREATE FUNCTION trigger_segfault()
-RETURNS VOID
-AS 'MODULE_PATHNAME', 'trigger_segfault'
-LANGUAGE C PARALLEL UNSAFE;
-
-- Alias for `trigger_segfault`, just because `SELECT 💣()` looks fun
-CREATE OR REPLACE FUNCTION 💣() RETURNS void
-LANGUAGE plpgsql AS $$
-BEGIN
-    PERFORM trigger_segfault();
-END;
-$$;
--- a/pgxn/neon_test_utils/neon_test_utils.control
+++ b/pgxn/neon_test_utils/neon_test_utils.control
@@ -1,6 +1,6 @@
 # neon_test_utils extension
 comment = 'helpers for neon testing and debugging'
-default_version = '1.3'
+default_version = '1.2'
 module_pathname = '$libdir/neon_test_utils'
 relocatable = true
 trusted = true
--- a/pgxn/neon_test_utils/neontest.c
+++ b/pgxn/neon_test_utils/neontest.c
@@ -42,8 +42,6 @@ PG_FUNCTION_INFO_V1(clear_buffer_cache);
 PG_FUNCTION_INFO_V1(get_raw_page_at_lsn);
 PG_FUNCTION_INFO_V1(get_raw_page_at_lsn_ex);
 PG_FUNCTION_INFO_V1(neon_xlogflush);
-PG_FUNCTION_INFO_V1(trigger_panic);
-PG_FUNCTION_INFO_V1(trigger_segfault);

 /*
 * Linkage to functions in neon module.
@@ -491,24 +489,3 @@ neon_xlogflush(PG_FUNCTION_ARGS)
 	XLogFlush(lsn);
 	PG_RETURN_VOID();
 }
-
-/*
- * Function to trigger panic.
- */
-Datum
-trigger_panic(PG_FUNCTION_ARGS)
-{
-    elog(PANIC, "neon_test_utils: panic");
-    PG_RETURN_VOID();
-}
-
-/*
- * Function to trigger a segfault.
- */
-Datum
-trigger_segfault(PG_FUNCTION_ARGS)
-{
-    int *ptr = NULL;
-    *ptr = 42;
-    PG_RETURN_VOID();
-}
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.

 [[package]]
 name = "aiohttp"
@@ -734,13 +734,13 @@ typing-extensions = ">=4.1.0"

 [[package]]
 name = "certifi"
-version = "2024.7.4"
+version = "2023.7.22"
 description = "Python package for providing Mozilla's CA Bundle."
 optional = false
 python-versions = ">=3.6"
 files = [
-    {file = "certifi-2024.7.4-py3-none-any.whl", hash = "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90"},
-    {file = "certifi-2024.7.4.tar.gz", hash = "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b"},
+    {file = "certifi-2023.7.22-py3-none-any.whl", hash = "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"},
+    {file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"},
 ]

 [[package]]
@@ -3133,18 +3133,18 @@ multidict = ">=4.0"

 [[package]]
 name = "zipp"
-version = "3.19.1"
+version = "3.8.1"
 description = "Backport of pathlib-compatible object wrapper for zip files"
 optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.7"
 files = [
-    {file = "zipp-3.19.1-py3-none-any.whl", hash = "sha256:2828e64edb5386ea6a52e7ba7cdb17bb30a73a858f5eb6eb93d8d36f5ea26091"},
-    {file = "zipp-3.19.1.tar.gz", hash = "sha256:35427f6d5594f4acf82d25541438348c26736fa9b3afa2754bcd63cdb99d8e8f"},
+    {file = "zipp-3.8.1-py3-none-any.whl", hash = "sha256:47c40d7fe183a6f21403a199b3e4192cca5774656965b0a4988ad2f8feb5f009"},
+    {file = "zipp-3.8.1.tar.gz", hash = "sha256:05b45f1ee8f807d0cc928485ca40a07cb491cf092ff587c0df9cb1fd154848d2"},
 ]

 [package.extras]
-doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
-test = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"]
+docs = ["jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx"]
+testing = ["func-timeout", "jaraco.itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"]

 [[package]]
 name = "zstandard"
--- a/proxy/src/console/mgmt.rs
+++ b/proxy/src/console/mgmt.rs
@@ -6,9 +6,8 @@ use anyhow::Context;
 use once_cell::sync::Lazy;
 use postgres_backend::{AuthType, PostgresBackend, PostgresBackendTCP, QueryError};
 use pq_proto::{BeMessage, SINGLE_COL_ROWDESC};
-use std::convert::Infallible;
+use std::{convert::Infallible, future};
 use tokio::net::{TcpListener, TcpStream};
-use tokio_util::sync::CancellationToken;
 use tracing::{error, info, info_span, Instrument};

 static CPLANE_WAITERS: Lazy<Waiters<ComputeReady>> = Lazy::new(Default::default);
@@ -68,9 +67,7 @@ pub async fn task_main(listener: TcpListener) -> anyhow::Result<Infallible> {

 async fn handle_connection(socket: TcpStream) -> Result<(), QueryError> {
    let pgbackend = PostgresBackend::new(socket, AuthType::Trust, None)?;
-    pgbackend
-        .run(&mut MgmtHandler, &CancellationToken::new())
-        .await
+    pgbackend.run(&mut MgmtHandler, future::pending::<()>).await
 }

 /// A message received by `mgmt` when a compute node is ready.
--- a/proxy/src/jemalloc.rs
+++ b/proxy/src/jemalloc.rs
@@ -3,8 +3,8 @@ use std::marker::PhantomData;
 use measured::{
    label::NoLabels,
    metric::{
-        gauge::GaugeState, group::Encoding, name::MetricNameEncoder, MetricEncoding,
-        MetricFamilyEncoding, MetricType,
+        gauge::GaugeState, group::Encoding, group::MetricValue, name::MetricNameEncoder,
+        MetricEncoding, MetricFamilyEncoding, MetricType,
    },
    text::TextEncoder,
    LabelGroup, MetricGroup,
@@ -100,7 +100,7 @@ macro_rules! jemalloc_gauge {
                enc: &mut TextEncoder<W>,
            ) -> Result<(), std::io::Error> {
                if let Ok(v) = mib.read() {
-                    GaugeState::new(v as i64).collect_into(&(), labels, name, enc)?;
+                    enc.write_metric_value(name, labels, MetricValue::Int(v as i64))?;
                }
                Ok(())
            }
--- a/proxy/src/metrics.rs
+++ b/proxy/src/metrics.rs
@@ -2,7 +2,7 @@ use std::sync::{Arc, OnceLock};

 use lasso::ThreadedRodeo;
 use measured::{
-    label::{FixedCardinalitySet, LabelGroupSet, LabelName, LabelSet, LabelValue, StaticLabelSet},
+    label::{FixedCardinalitySet, LabelName, LabelSet, LabelValue, StaticLabelSet},
    metric::{histogram::Thresholds, name::MetricName},
    Counter, CounterVec, FixedCardinalityLabel, Gauge, GaugeVec, Histogram, HistogramVec,
    LabelGroup, MetricGroup,
@@ -577,32 +577,6 @@ impl LabelGroup for ThreadPoolWorkerId {
    }
 }

-impl LabelGroupSet for ThreadPoolWorkers {
-    type Group<'a> = ThreadPoolWorkerId;
-
-    fn cardinality(&self) -> Option<usize> {
-        Some(self.0)
-    }
-
-    fn encode_dense(&self, value: Self::Unique) -> Option<usize> {
-        Some(value)
-    }
-
-    fn decode_dense(&self, value: usize) -> Self::Group<'_> {
-        ThreadPoolWorkerId(value)
-    }
-
-    type Unique = usize;
-
-    fn encode(&self, value: Self::Group<'_>) -> Option<Self::Unique> {
-        Some(value.0)
-    }
-
-    fn decode(&self, value: &Self::Unique) -> Self::Group<'_> {
-        ThreadPoolWorkerId(*value)
-    }
-}
-
 impl LabelSet for ThreadPoolWorkers {
    type Value<'a> = ThreadPoolWorkerId;

--- a/proxy/src/serverless/sql_over_http.rs
+++ b/proxy/src/serverless/sql_over_http.rs
@@ -838,9 +838,8 @@ async fn query_to_json<T: GenericClient>(
        "finished reading rows"
    );

-    let columns_len = row_stream.columns().len();
-    let mut fields = Vec::with_capacity(columns_len);
-    let mut columns = Vec::with_capacity(columns_len);
+    let mut fields = vec![];
+    let mut columns = vec![];

    for c in row_stream.columns() {
        fields.push(json!({
--- a/safekeeper/src/bin/safekeeper.rs
+++ b/safekeeper/src/bin/safekeeper.rs
@@ -445,19 +445,6 @@ async fn start_safekeeper(conf: SafeKeeperConf) -> Result<()> {
        .map(|res| ("WAL service main".to_owned(), res));
    tasks_handles.push(Box::pin(wal_service_handle));

-    let timeline_housekeeping_handle = current_thread_rt
-        .as_ref()
-        .unwrap_or_else(|| WAL_SERVICE_RUNTIME.handle())
-        .spawn(async move {
-            const TOMBSTONE_TTL: Duration = Duration::from_secs(3600 * 24);
-            loop {
-                tokio::time::sleep(TOMBSTONE_TTL).await;
-                GlobalTimelines::housekeeping(&TOMBSTONE_TTL);
-            }
-        })
-        .map(|res| ("Timeline map housekeeping".to_owned(), res));
-    tasks_handles.push(Box::pin(timeline_housekeeping_handle));
-
    if let Some(pg_listener_tenant_only) = pg_listener_tenant_only {
        let conf_ = conf.clone();
        let wal_service_handle = current_thread_rt
--- a/safekeeper/src/timelines_global_map.rs
+++ b/safekeeper/src/timelines_global_map.rs
@@ -15,19 +15,12 @@ use std::collections::HashMap;
 use std::str::FromStr;
 use std::sync::atomic::Ordering;
 use std::sync::{Arc, Mutex};
-use std::time::{Duration, Instant};
 use tracing::*;
 use utils::id::{TenantId, TenantTimelineId, TimelineId};
 use utils::lsn::Lsn;

 struct GlobalTimelinesState {
    timelines: HashMap<TenantTimelineId, Arc<Timeline>>,
-
-    // A tombstone indicates this timeline used to exist has been deleted.  These are used to prevent
-    // on-demand timeline creation from recreating deleted timelines.  This is only soft-enforced, as
-    // this map is dropped on restart.
-    tombstones: HashMap<TenantTimelineId, Instant>,
-
    conf: Option<SafeKeeperConf>,
    broker_active_set: Arc<TimelinesSet>,
    load_lock: Arc<tokio::sync::Mutex<TimelineLoadLock>>,
@@ -71,17 +64,11 @@ impl GlobalTimelinesState {
            .cloned()
            .ok_or(TimelineError::NotFound(*ttid))
    }
-
-    fn delete(&mut self, ttid: TenantTimelineId) {
-        self.timelines.remove(&ttid);
-        self.tombstones.insert(ttid, Instant::now());
-    }
 }

 static TIMELINES_STATE: Lazy<Mutex<GlobalTimelinesState>> = Lazy::new(|| {
    Mutex::new(GlobalTimelinesState {
        timelines: HashMap::new(),
-        tombstones: HashMap::new(),
        conf: None,
        broker_active_set: Arc::new(TimelinesSet::default()),
        load_lock: Arc::new(tokio::sync::Mutex::new(TimelineLoadLock)),
@@ -211,17 +198,11 @@ impl GlobalTimelines {
                let tli = Arc::new(timeline);

                // TODO: prevent concurrent timeline creation/loading
-                {
-                    let mut state = TIMELINES_STATE.lock().unwrap();
-
-                    // We may be have been asked to load a timeline that was previously deleted (e.g. from `pull_timeline.rs`).  We trust
-                    // that the human doing this manual intervention knows what they are doing, and remove its tombstone.
-                    if state.tombstones.remove(&ttid).is_some() {
-                        warn!("Un-deleted timeline {ttid}");
-                    }
-
-                    state.timelines.insert(ttid, tli.clone());
-                }
+                TIMELINES_STATE
+                    .lock()
+                    .unwrap()
+                    .timelines
+                    .insert(ttid, tli.clone());

                tli.bootstrap(&conf, broker_active_set, partial_backup_rate_limiter);

@@ -248,7 +229,7 @@ impl GlobalTimelines {

    /// Create a new timeline with the given id. If the timeline already exists, returns
    /// an existing timeline.
-    pub(crate) async fn create(
+    pub async fn create(
        ttid: TenantTimelineId,
        server_info: ServerInfo,
        commit_lsn: Lsn,
@@ -260,11 +241,6 @@ impl GlobalTimelines {
                // Timeline already exists, return it.
                return Ok(timeline);
            }
-
-            if state.tombstones.contains_key(&ttid) {
-                anyhow::bail!("Timeline {ttid} is deleted, refusing to recreate");
-            }
-
            state.get_dependencies()
        };

@@ -324,19 +300,17 @@ impl GlobalTimelines {
    /// Get a timeline from the global map. If it's not present, it doesn't exist on disk,
    /// or was corrupted and couldn't be loaded on startup. Returned timeline is always valid,
    /// i.e. loaded in memory and not cancelled.
-    pub(crate) fn get(ttid: TenantTimelineId) -> Result<Arc<Timeline>, TimelineError> {
-        let tli_res = {
-            let state = TIMELINES_STATE.lock().unwrap();
-            state.get(&ttid)
-        };
-        match tli_res {
+    pub fn get(ttid: TenantTimelineId) -> Result<Arc<Timeline>, TimelineError> {
+        let res = TIMELINES_STATE.lock().unwrap().get(&ttid);
+
+        match res {
            Ok(tli) => {
                if tli.is_cancelled() {
                    return Err(TimelineError::Cancelled(ttid));
                }
                Ok(tli)
            }
-            _ => tli_res,
+            _ => res,
        }
    }

@@ -365,26 +339,12 @@ impl GlobalTimelines {

    /// Cancels timeline, then deletes the corresponding data directory.
    /// If only_local, doesn't remove WAL segments in remote storage.
-    pub(crate) async fn delete(
+    pub async fn delete(
        ttid: &TenantTimelineId,
        only_local: bool,
    ) -> Result<TimelineDeleteForceResult> {
-        let tli_res = {
-            let state = TIMELINES_STATE.lock().unwrap();
-
-            if state.tombstones.contains_key(ttid) {
-                // Presence of a tombstone guarantees that a previous deletion has completed and there is no work to do.
-                info!("Timeline {ttid} was already deleted");
-                return Ok(TimelineDeleteForceResult {
-                    dir_existed: false,
-                    was_active: false,
-                });
-            }
-
-            state.get(ttid)
-        };
-
-        let result = match tli_res {
+        let tli_res = TIMELINES_STATE.lock().unwrap().get(ttid);
+        match tli_res {
            Ok(timeline) => {
                let was_active = timeline.broker_active.load(Ordering::Relaxed);

@@ -394,6 +354,11 @@ impl GlobalTimelines {
                info!("deleting timeline {}, only_local={}", ttid, only_local);
                let dir_existed = timeline.delete(&mut shared_state, only_local).await?;

+                // Remove timeline from the map.
+                // FIXME: re-enable it once we fix the issue with recreation of deleted timelines
+                // https://github.com/neondatabase/neon/issues/3146
+                // TIMELINES_STATE.lock().unwrap().timelines.remove(ttid);
+
                Ok(TimelineDeleteForceResult {
                    dir_existed,
                    was_active, // TODO: we probably should remove this field
@@ -409,14 +374,7 @@ impl GlobalTimelines {
                    was_active: false,
                })
            }
-        };
-
-        // Finalize deletion, by dropping Timeline objects and storing smaller tombstones.  The tombstones
-        // are used to prevent still-running computes from re-creating the same timeline when they send data,
-        // and to speed up repeated deletion calls by avoiding re-listing objects.
-        TIMELINES_STATE.lock().unwrap().delete(*ttid);
-
-        result
+        }
    }

    /// Deactivates and deletes all timelines for the tenant. Returns map of all timelines which
@@ -462,20 +420,19 @@ impl GlobalTimelines {
            tenant_id,
        ))?;

+        // FIXME: we temporarily disabled removing timelines from the map, see `delete_force`
+        // let tlis_after_delete = Self::get_all_for_tenant(*tenant_id);
+        // if !tlis_after_delete.is_empty() {
+        //     // Some timelines were created while we were deleting them, returning error
+        //     // to the caller, so it can retry later.
+        //     bail!(
+        //         "failed to delete all timelines for tenant {}: some timelines were created while we were deleting them",
+        //         tenant_id
+        //     );
+        // }
+
        Ok(deleted)
    }
-
-    pub fn housekeeping(tombstone_ttl: &Duration) {
-        let mut state = TIMELINES_STATE.lock().unwrap();
-
-        // We keep tombstones long enough to have a good chance of preventing rogue computes from re-creating deleted
-        // timelines.  If a compute kept running for longer than this TTL (or across a safekeeper restart) then they
-        // may recreate a deleted timeline.
-        let now = Instant::now();
-        state
-            .tombstones
-            .retain(|_, v| now.duration_since(*v) < *tombstone_ttl);
-    }
 }

 #[derive(Clone, Copy, Serialize)]
--- a/safekeeper/src/wal_service.rs
+++ b/safekeeper/src/wal_service.rs
@@ -4,10 +4,9 @@
 //!
 use anyhow::{Context, Result};
 use postgres_backend::QueryError;
-use std::time::Duration;
+use std::{future, time::Duration};
 use tokio::net::TcpStream;
 use tokio_io_timeout::TimeoutReader;
-use tokio_util::sync::CancellationToken;
 use tracing::*;
 use utils::{auth::Scope, measured_stream::MeasuredStream};

@@ -101,7 +100,7 @@ async fn handle_socket(
    // libpq protocol between safekeeper and walproposer / pageserver
    // We don't use shutdown.
    pgbackend
-        .run(&mut conn_handler, &CancellationToken::new())
+        .run(&mut conn_handler, future::pending::<()>)
        .await
 }

--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -4062,14 +4062,7 @@ impl Service {
                placement_policy: Some(PlacementPolicy::Attached(0)), // No secondaries, for convenient debug/hacking

                // There is no way to know what the tenant's config was: revert to defaults
-                //
-                // TODO: remove `switch_aux_file_policy` once we finish auxv2 migration
-                //
-                // we write to both v1+v2 storage, so that the test case can use either storage format for testing
-                config: TenantConfig {
-                    switch_aux_file_policy: Some(models::AuxFilePolicy::CrossValidation),
-                    ..TenantConfig::default()
-                },
+                config: TenantConfig::default(),
            })
            .await?;

--- a/storage_scrubber/src/find_large_objects.rs
+++ b/storage_scrubber/src/find_large_objects.rs
@@ -1,4 +1,4 @@
-use futures::{StreamExt, TryStreamExt};
+use futures::StreamExt;
 use pageserver::tenant::storage_layer::LayerName;
 use serde::{Deserialize, Serialize};

@@ -29,7 +29,7 @@ impl LargeObjectKind {
    }
 }

-#[derive(Serialize, Deserialize, Clone)]
+#[derive(Serialize, Deserialize)]
 pub struct LargeObject {
    pub key: String,
    pub size: u64,
@@ -45,76 +45,53 @@ pub async fn find_large_objects(
    bucket_config: BucketConfig,
    min_size: u64,
    ignore_deltas: bool,
-    concurrency: usize,
 ) -> anyhow::Result<LargeObjectListing> {
-    let (s3_client, target) = init_remote(bucket_config.clone(), NodeKind::Pageserver).await?;
-    let tenants = std::pin::pin!(stream_tenants(&s3_client, &target));
-
-    let objects_stream = tenants.map_ok(|tenant_shard_id| {
-        let mut tenant_root = target.tenant_root(&tenant_shard_id);
-        let s3_client = s3_client.clone();
-        async move {
-            let mut objects = Vec::new();
-            let mut total_objects_ctr = 0u64;
-            // We want the objects and not just common prefixes
-            tenant_root.delimiter.clear();
-            let mut continuation_token = None;
-            loop {
-                let fetch_response =
-                    list_objects_with_retries(&s3_client, &tenant_root, continuation_token.clone())
-                        .await?;
-                for obj in fetch_response.contents().iter().filter(|o| {
-                    if let Some(obj_size) = o.size {
-                        min_size as i64 <= obj_size
-                    } else {
-                        false
-                    }
-                }) {
-                    let key = obj.key().expect("couldn't get key").to_owned();
-                    let kind = LargeObjectKind::from_key(&key);
-                    if ignore_deltas && kind == LargeObjectKind::DeltaLayer {
-                        continue;
-                    }
-                    objects.push(LargeObject {
-                        key,
-                        size: obj.size.unwrap() as u64,
-                        kind,
-                    })
-                }
-                total_objects_ctr += fetch_response.contents().len() as u64;
-                match fetch_response.next_continuation_token {
-                    Some(new_token) => continuation_token = Some(new_token),
-                    None => break,
-                }
-            }
-
-            Ok((tenant_shard_id, objects, total_objects_ctr))
-        }
-    });
-    let mut objects_stream = std::pin::pin!(objects_stream.try_buffer_unordered(concurrency));
-
+    let (s3_client, target) = init_remote(bucket_config.clone(), NodeKind::Pageserver)?;
+    let mut tenants = std::pin::pin!(stream_tenants(&s3_client, &target));
    let mut objects = Vec::new();
-
    let mut tenant_ctr = 0u64;
    let mut object_ctr = 0u64;
-    while let Some(res) = objects_stream.next().await {
-        let (tenant_shard_id, objects_slice, total_objects_ctr) = res?;
-        objects.extend_from_slice(&objects_slice);
+    while let Some(tenant_shard_id) = tenants.next().await {
+        let tenant_shard_id = tenant_shard_id?;
+        let mut tenant_root = target.tenant_root(&tenant_shard_id);
+        // We want the objects and not just common prefixes
+        tenant_root.delimiter.clear();
+        let mut continuation_token = None;
+        loop {
+            let fetch_response =
+                list_objects_with_retries(&s3_client, &tenant_root, continuation_token.clone())
+                    .await?;
+            for obj in fetch_response.contents().iter().filter(|o| {
+                if let Some(obj_size) = o.size {
+                    min_size as i64 <= obj_size
+                } else {
+                    false
+                }
+            }) {
+                let key = obj.key().expect("couldn't get key").to_owned();
+                let kind = LargeObjectKind::from_key(&key);
+                if ignore_deltas && kind == LargeObjectKind::DeltaLayer {
+                    continue;
+                }
+                objects.push(LargeObject {
+                    key,
+                    size: obj.size.unwrap() as u64,
+                    kind,
+                })
+            }
+            object_ctr += fetch_response.contents().len() as u64;
+            match fetch_response.next_continuation_token {
+                Some(new_token) => continuation_token = Some(new_token),
+                None => break,
+            }
+        }

-        object_ctr += total_objects_ctr;
        tenant_ctr += 1;
-        if tenant_ctr % 100 == 0 {
+        if tenant_ctr % 50 == 0 {
            tracing::info!(
-                "Scanned {tenant_ctr} shards. objects={object_ctr}, found={}, current={tenant_shard_id}.",
-                objects.len()
+                "Scanned {tenant_ctr} shards. objects={object_ctr}, found={}, current={tenant_shard_id}.", objects.len()
            );
        }
    }
-
-    let bucket_name = target.bucket_name();
-    tracing::info!(
-        "Scan of {bucket_name} finished. Scanned {tenant_ctr} shards. objects={object_ctr}, found={}.",
-        objects.len()
-    );
    Ok(LargeObjectListing { objects })
 }
--- a/storage_scrubber/src/garbage.rs
+++ b/storage_scrubber/src/garbage.rs
@@ -140,7 +140,7 @@ async fn find_garbage_inner(
    node_kind: NodeKind,
 ) -> anyhow::Result<GarbageList> {
    // Construct clients for S3 and for Console API
-    let (s3_client, target) = init_remote(bucket_config.clone(), node_kind).await?;
+    let (s3_client, target) = init_remote(bucket_config.clone(), node_kind)?;
    let cloud_admin_api_client = Arc::new(CloudAdminApiClient::new(console_config));

    // Build a set of console-known tenants, for quickly eliminating known-active tenants without having
@@ -432,7 +432,7 @@ pub async fn purge_garbage(
    );

    let (s3_client, target) =
-        init_remote(garbage_list.bucket_config.clone(), garbage_list.node_kind).await?;
+        init_remote(garbage_list.bucket_config.clone(), garbage_list.node_kind)?;

    // Sanity checks on the incoming list
    if garbage_list.active_tenant_count == 0 {
--- a/storage_scrubber/src/lib.rs
+++ b/storage_scrubber/src/lib.rs
@@ -15,10 +15,17 @@ use std::fmt::Display;
 use std::sync::Arc;
 use std::time::Duration;

-use anyhow::{anyhow, Context};
-use aws_sdk_s3::config::Region;
-use aws_sdk_s3::error::DisplayErrorContext;
-use aws_sdk_s3::Client;
+use anyhow::Context;
+use aws_config::environment::EnvironmentVariableCredentialsProvider;
+use aws_config::imds::credentials::ImdsCredentialsProvider;
+use aws_config::meta::credentials::CredentialsProviderChain;
+use aws_config::profile::ProfileFileCredentialsProvider;
+use aws_config::retry::RetryConfig;
+use aws_config::sso::SsoCredentialsProvider;
+use aws_config::BehaviorVersion;
+use aws_sdk_s3::config::{AsyncSleep, Region, SharedAsyncSleep};
+use aws_sdk_s3::{Client, Config};
+use aws_smithy_async::rt::sleep::TokioSleep;

 use camino::{Utf8Path, Utf8PathBuf};
 use clap::ValueEnum;
@@ -235,53 +242,85 @@ impl ConsoleConfig {
    }
 }

-pub fn init_logging(file_name: &str) -> Option<WorkerGuard> {
+pub fn init_logging(file_name: &str) -> WorkerGuard {
+    let (file_writer, guard) =
+        tracing_appender::non_blocking(tracing_appender::rolling::never("./logs/", file_name));
+
+    let file_logs = fmt::Layer::new()
+        .with_target(false)
+        .with_ansi(false)
+        .with_writer(file_writer);
    let stderr_logs = fmt::Layer::new()
        .with_target(false)
        .with_writer(std::io::stderr);
+    tracing_subscriber::registry()
+        .with(EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")))
+        .with(file_logs)
+        .with(stderr_logs)
+        .init();

-    let disable_file_logging = match std::env::var("PAGESERVER_DISABLE_FILE_LOGGING") {
-        Ok(s) => s == "1" || s.to_lowercase() == "true",
-        Err(_) => false,
+    guard
+}
+
+pub fn init_s3_client(bucket_region: Region) -> Client {
+    let credentials_provider = {
+        // uses "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"
+        let chain = CredentialsProviderChain::first_try(
+            "env",
+            EnvironmentVariableCredentialsProvider::new(),
+        )
+        // uses "AWS_PROFILE" / `aws sso login --profile <profile>`
+        .or_else(
+            "profile-sso",
+            ProfileFileCredentialsProvider::builder().build(),
+        );
+
+        // Use SSO if we were given an account ID
+        match std::env::var("SSO_ACCOUNT_ID").ok() {
+            Some(sso_account) => chain.or_else(
+                "sso",
+                SsoCredentialsProvider::builder()
+                    .account_id(sso_account)
+                    .role_name("PowerUserAccess")
+                    .start_url("https://neondb.awsapps.com/start")
+                    .region(bucket_region.clone())
+                    .build(),
+            ),
+            None => chain,
+        }
+        .or_else(
+            // Finally try IMDS
+            "imds",
+            ImdsCredentialsProvider::builder().build(),
+        )
    };

-    if disable_file_logging {
-        tracing_subscriber::registry()
-            .with(EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")))
-            .with(stderr_logs)
-            .init();
-        None
-    } else {
-        let (file_writer, guard) =
-            tracing_appender::non_blocking(tracing_appender::rolling::never("./logs/", file_name));
-        let file_logs = fmt::Layer::new()
-            .with_target(false)
-            .with_ansi(false)
-            .with_writer(file_writer);
-        tracing_subscriber::registry()
-            .with(EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")))
-            .with(stderr_logs)
-            .with(file_logs)
-            .init();
-        Some(guard)
-    }
-}
+    let sleep_impl: Arc<dyn AsyncSleep> = Arc::new(TokioSleep::new());

-pub async fn init_s3_client(bucket_region: Region) -> Client {
-    let config = aws_config::defaults(aws_config::BehaviorVersion::v2024_03_28())
+    let mut builder = Config::builder()
+        .behavior_version(
+            #[allow(deprecated)] /* TODO: https://github.com/neondatabase/neon/issues/7665 */
+            BehaviorVersion::v2023_11_09(),
+        )
        .region(bucket_region)
-        .load()
-        .await;
-    Client::new(&config)
+        .retry_config(RetryConfig::adaptive().with_max_attempts(3))
+        .sleep_impl(SharedAsyncSleep::from(sleep_impl))
+        .credentials_provider(credentials_provider);
+
+    if let Ok(endpoint) = env::var("AWS_ENDPOINT_URL") {
+        builder = builder.endpoint_url(endpoint)
+    }
+
+    Client::from_conf(builder.build())
 }

-async fn init_remote(
+fn init_remote(
    bucket_config: BucketConfig,
    node_kind: NodeKind,
 ) -> anyhow::Result<(Arc<Client>, RootTarget)> {
    let bucket_region = Region::new(bucket_config.region);
    let delimiter = "/".to_string();
-    let s3_client = Arc::new(init_s3_client(bucket_region).await);
+    let s3_client = Arc::new(init_s3_client(bucket_region));

    let s3_root = match node_kind {
        NodeKind::Pageserver => RootTarget::Pageserver(S3Target {
@@ -306,7 +345,7 @@ async fn list_objects_with_retries(
    s3_target: &S3Target,
    continuation_token: Option<String>,
 ) -> anyhow::Result<aws_sdk_s3::operation::list_objects_v2::ListObjectsV2Output> {
-    for trial in 0..MAX_RETRIES {
+    for _ in 0..MAX_RETRIES {
        match s3_client
            .list_objects_v2()
            .bucket(&s3_target.bucket_name)
@@ -318,22 +357,16 @@ async fn list_objects_with_retries(
        {
            Ok(response) => return Ok(response),
            Err(e) => {
-                if trial == MAX_RETRIES - 1 {
-                    return Err(e)
-                        .with_context(|| format!("Failed to list objects {MAX_RETRIES} times"));
-                }
                error!(
-                    "list_objects_v2 query failed: bucket_name={}, prefix={}, delimiter={}, error={}",
-                    s3_target.bucket_name,
-                    s3_target.prefix_in_bucket,
-                    s3_target.delimiter,
-                    DisplayErrorContext(e),
+                    "list_objects_v2 query failed: {e}, bucket_name={}, prefix={}, delimiter={}",
+                    s3_target.bucket_name, s3_target.prefix_in_bucket, s3_target.delimiter
                );
                tokio::time::sleep(Duration::from_secs(1)).await;
            }
        }
    }
-    Err(anyhow!("unreachable unless MAX_RETRIES==0"))
+
+    anyhow::bail!("Failed to list objects {MAX_RETRIES} times")
 }

 async fn download_object_with_retries(
--- a/storage_scrubber/src/main.rs
+++ b/storage_scrubber/src/main.rs
@@ -78,8 +78,6 @@ enum Command {
        min_size: u64,
        #[arg(short, long, default_value_t = false)]
        ignore_deltas: bool,
-        #[arg(long = "concurrency", short = 'j', default_value_t = 64)]
-        concurrency: usize,
    },
 }

@@ -196,7 +194,7 @@ async fn main() -> anyhow::Result<()> {
            concurrency,
        } => {
            let downloader =
-                SnapshotDownloader::new(bucket_config, tenant_id, output_path, concurrency).await?;
+                SnapshotDownloader::new(bucket_config, tenant_id, output_path, concurrency)?;
            downloader.download().await
        }
        Command::PageserverPhysicalGc {
@@ -212,15 +210,10 @@ async fn main() -> anyhow::Result<()> {
        Command::FindLargeObjects {
            min_size,
            ignore_deltas,
-            concurrency,
        } => {
-            let summary = find_large_objects::find_large_objects(
-                bucket_config,
-                min_size,
-                ignore_deltas,
-                concurrency,
-            )
-            .await?;
+            let summary =
+                find_large_objects::find_large_objects(bucket_config, min_size, ignore_deltas)
+                    .await?;
            println!("{}", serde_json::to_string(&summary).unwrap());
            Ok(())
        }
--- a/storage_scrubber/src/pageserver_physical_gc.rs
+++ b/storage_scrubber/src/pageserver_physical_gc.rs
@@ -160,7 +160,7 @@ pub async fn pageserver_physical_gc(
    min_age: Duration,
    mode: GcMode,
 ) -> anyhow::Result<GcSummary> {
-    let (s3_client, target) = init_remote(bucket_config.clone(), NodeKind::Pageserver).await?;
+    let (s3_client, target) = init_remote(bucket_config.clone(), NodeKind::Pageserver)?;

    let tenants = if tenant_ids.is_empty() {
        futures::future::Either::Left(stream_tenants(&s3_client, &target))
--- a/storage_scrubber/src/scan_pageserver_metadata.rs
+++ b/storage_scrubber/src/scan_pageserver_metadata.rs
@@ -199,7 +199,7 @@ pub async fn scan_metadata(
    bucket_config: BucketConfig,
    tenant_ids: Vec<TenantShardId>,
 ) -> anyhow::Result<MetadataSummary> {
-    let (s3_client, target) = init_remote(bucket_config, NodeKind::Pageserver).await?;
+    let (s3_client, target) = init_remote(bucket_config, NodeKind::Pageserver)?;

    let tenants = if tenant_ids.is_empty() {
        futures::future::Either::Left(stream_tenants(&s3_client, &target))
--- a/storage_scrubber/src/scan_safekeeper_metadata.rs
+++ b/storage_scrubber/src/scan_safekeeper_metadata.rs
@@ -106,7 +106,7 @@ pub async fn scan_safekeeper_metadata(
    let timelines = client.query(&query, &[]).await?;
    info!("loaded {} timelines", timelines.len());

-    let (s3_client, target) = init_remote(bucket_config, NodeKind::Safekeeper).await?;
+    let (s3_client, target) = init_remote(bucket_config, NodeKind::Safekeeper)?;
    let console_config = ConsoleConfig::from_env()?;
    let cloud_admin_api_client = CloudAdminApiClient::new(console_config);

--- a/storage_scrubber/src/tenant_snapshot.rs
+++ b/storage_scrubber/src/tenant_snapshot.rs
@@ -28,13 +28,13 @@ pub struct SnapshotDownloader {
 }

 impl SnapshotDownloader {
-    pub async fn new(
+    pub fn new(
        bucket_config: BucketConfig,
        tenant_id: TenantId,
        output_path: Utf8PathBuf,
        concurrency: usize,
    ) -> anyhow::Result<Self> {
-        let (s3_client, s3_root) = init_remote(bucket_config.clone(), NodeKind::Pageserver).await?;
+        let (s3_client, s3_root) = init_remote(bucket_config.clone(), NodeKind::Pageserver)?;
        Ok(Self {
            s3_client,
            s3_root,
@@ -215,8 +215,7 @@ impl SnapshotDownloader {
    }

    pub async fn download(&self) -> anyhow::Result<()> {
-        let (s3_client, target) =
-            init_remote(self.bucket_config.clone(), NodeKind::Pageserver).await?;
+        let (s3_client, target) = init_remote(self.bucket_config.clone(), NodeKind::Pageserver)?;

        // Generate a stream of TenantShardId
        let shards = stream_tenant_shards(&s3_client, &target, self.tenant_id).await?;
--- a/test_runner/fixtures/neon_api.py
+++ b/test_runner/fixtures/neon_api.py
@@ -1,263 +0,0 @@
-from __future__ import annotations
-
-import time
-from typing import TYPE_CHECKING, cast
-
-import requests
-
-if TYPE_CHECKING:
-    from typing import Any, Dict, Literal, Optional, Union
-
-    from fixtures.pg_version import PgVersion
-
-
-def connection_parameters_to_env(params: Dict[str, str]) -> Dict[str, str]:
-    return {
-        "PGHOST": params["host"],
-        "PGDATABASE": params["database"],
-        "PGUSER": params["role"],
-        "PGPASSWORD": params["password"],
-    }
-
-
-class NeonAPI:
-    def __init__(self, neon_api_key: str, neon_api_base_url: str):
-        self.__neon_api_key = neon_api_key
-        self.__neon_api_base_url = neon_api_base_url.strip("/")
-
-    def __request(
-        self, method: Union[str, bytes], endpoint: str, **kwargs: Any
-    ) -> requests.Response:
-        if "headers" not in kwargs:
-            kwargs["headers"] = {}
-        kwargs["headers"]["Authorization"] = f"Bearer {self.__neon_api_key}"
-
-        return requests.request(method, f"{self.__neon_api_base_url}{endpoint}", **kwargs)
-
-    def create_project(
-        self,
-        pg_version: Optional[PgVersion] = None,
-        name: Optional[str] = None,
-        branch_name: Optional[str] = None,
-        branch_role_name: Optional[str] = None,
-        branch_database_name: Optional[str] = None,
-    ) -> Dict[str, Any]:
-        data: Dict[str, Any] = {
-            "project": {
-                "branch": {},
-            },
-        }
-        if name:
-            data["project"]["name"] = name
-        if pg_version:
-            data["project"]["pg_version"] = int(pg_version)
-        if branch_name:
-            data["project"]["branch"]["name"] = branch_name
-        if branch_role_name:
-            data["project"]["branch"]["role_name"] = branch_role_name
-        if branch_database_name:
-            data["project"]["branch"]["database_name"] = branch_database_name
-
-        resp = self.__request(
-            "POST",
-            "/projects",
-            headers={
-                "Accept": "application/json",
-                "Content-Type": "application/json",
-            },
-            json=data,
-        )
-
-        assert resp.status_code == 201
-
-        return cast("Dict[str, Any]", resp.json())
-
-    def get_project_details(self, project_id: str) -> Dict[str, Any]:
-        resp = self.__request(
-            "GET",
-            f"/projects/{project_id}",
-            headers={
-                "Accept": "application/json",
-                "Content-Type": "application/json",
-            },
-        )
-        assert resp.status_code == 200
-        return cast("Dict[str, Any]", resp.json())
-
-    def delete_project(
-        self,
-        project_id: str,
-    ) -> Dict[str, Any]:
-        resp = self.__request(
-            "DELETE",
-            f"/projects/{project_id}",
-            headers={
-                "Accept": "application/json",
-                "Content-Type": "application/json",
-            },
-        )
-
-        assert resp.status_code == 200
-
-        return cast("Dict[str, Any]", resp.json())
-
-    def start_endpoint(
-        self,
-        project_id: str,
-        endpoint_id: str,
-    ) -> Dict[str, Any]:
-        resp = self.__request(
-            "POST",
-            f"/projects/{project_id}/endpoints/{endpoint_id}/start",
-            headers={
-                "Accept": "application/json",
-            },
-        )
-
-        assert resp.status_code == 200
-
-        return cast("Dict[str, Any]", resp.json())
-
-    def suspend_endpoint(
-        self,
-        project_id: str,
-        endpoint_id: str,
-    ) -> Dict[str, Any]:
-        resp = self.__request(
-            "POST",
-            f"/projects/{project_id}/endpoints/{endpoint_id}/suspend",
-            headers={
-                "Accept": "application/json",
-            },
-        )
-
-        assert resp.status_code == 200
-
-        return cast("Dict[str, Any]", resp.json())
-
-    def restart_endpoint(
-        self,
-        project_id: str,
-        endpoint_id: str,
-    ) -> Dict[str, Any]:
-        resp = self.__request(
-            "POST",
-            f"/projects/{project_id}/endpoints/{endpoint_id}/restart",
-            headers={
-                "Accept": "application/json",
-            },
-        )
-
-        assert resp.status_code == 200
-
-        return cast("Dict[str, Any]", resp.json())
-
-    def create_endpoint(
-        self,
-        project_id: str,
-        branch_id: str,
-        endpoint_type: Literal["read_write", "read_only"],
-        settings: Dict[str, Any],
-    ) -> Dict[str, Any]:
-        data: Dict[str, Any] = {
-            "endpoint": {
-                "branch_id": branch_id,
-            },
-        }
-
-        if endpoint_type:
-            data["endpoint"]["type"] = endpoint_type
-        if settings:
-            data["endpoint"]["settings"] = settings
-
-        resp = self.__request(
-            "POST",
-            f"/projects/{project_id}/endpoints",
-            headers={
-                "Accept": "application/json",
-                "Content-Type": "application/json",
-            },
-            json=data,
-        )
-
-        assert resp.status_code == 201
-
-        return cast("Dict[str, Any]", resp.json())
-
-    def get_connection_uri(
-        self,
-        project_id: str,
-        branch_id: Optional[str] = None,
-        endpoint_id: Optional[str] = None,
-        database_name: str = "neondb",
-        role_name: str = "neondb_owner",
-        pooled: bool = True,
-    ) -> Dict[str, Any]:
-        resp = self.__request(
-            "GET",
-            f"/projects/{project_id}/connection_uri",
-            params={
-                "branch_id": branch_id,
-                "endpoint_id": endpoint_id,
-                "database_name": database_name,
-                "role_name": role_name,
-                "pooled": pooled,
-            },
-            headers={
-                "Accept": "application/json",
-            },
-        )
-
-        assert resp.status_code == 200
-
-        return cast("Dict[str, Any]", resp.json())
-
-    def get_branches(self, project_id: str) -> Dict[str, Any]:
-        resp = self.__request(
-            "GET",
-            f"/projects/{project_id}/branches",
-            headers={
-                "Accept": "application/json",
-            },
-        )
-
-        assert resp.status_code == 200
-
-        return cast("Dict[str, Any]", resp.json())
-
-    def get_endpoints(self, project_id: str) -> Dict[str, Any]:
-        resp = self.__request(
-            "GET",
-            f"/projects/{project_id}/endpoints",
-            headers={
-                "Accept": "application/json",
-            },
-        )
-
-        assert resp.status_code == 200
-
-        return cast("Dict[str, Any]", resp.json())
-
-    def get_operations(self, project_id: str) -> Dict[str, Any]:
-        resp = self.__request(
-            "GET",
-            f"/projects/{project_id}/operations",
-            headers={
-                "Accept": "application/json",
-                "Authorization": f"Bearer {self.__neon_api_key}",
-            },
-        )
-
-        assert resp.status_code == 200
-
-        return cast("Dict[str, Any]", resp.json())
-
-    def wait_for_operation_to_finish(self, project_id: str):
-        has_running = True
-        while has_running:
-            has_running = False
-            operations = self.get_operations(project_id)["operations"]
-            for op in operations:
-                if op["status"] in {"scheduling", "running", "cancelling"}:
-                    has_running = True
-            time.sleep(0.5)
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -87,8 +87,6 @@ from fixtures.utils import (
 )
 from fixtures.utils import AuxFileStore as AuxFileStore  # reexport

-from .neon_api import NeonAPI
-
 """
 This file contains pytest fixtures. A fixture is a test resource that can be
 summoned by placing its name in the test's arguments.
@@ -186,25 +184,6 @@ def versioned_pg_distrib_dir(pg_distrib_dir: Path, pg_version: PgVersion) -> Ite
    yield versioned_dir


-@pytest.fixture(scope="session")
-def neon_api_key() -> str:
-    api_key = os.getenv("NEON_API_KEY")
-    if not api_key:
-        raise AssertionError("Set the NEON_API_KEY environment variable")
-
-    return api_key
-
-
-@pytest.fixture(scope="session")
-def neon_api_base_url() -> str:
-    return os.getenv("NEON_API_BASE_URL", "https://console-stage.neon.build/api/v2")
-
-
-@pytest.fixture(scope="session")
-def neon_api(neon_api_key: str, neon_api_base_url: str) -> NeonAPI:
-    return NeonAPI(neon_api_key, neon_api_base_url)
-
-
 def shareable_scope(fixture_name: str, config: Config) -> Literal["session", "function"]:
    """Return either session of function scope, depending on TEST_SHARED_FIXTURES envvar.

@@ -964,8 +943,6 @@ class NeonEnvBuilder:
                # if the test threw an exception, don't check for errors
                # as a failing assertion would cause the cleanup below to fail
                ps_assert_metric_no_errors=(exc_type is None),
-                # do not fail on endpoint errors to allow the rest of cleanup to proceed
-                fail_on_endpoint_errors=False,
            )
            cleanup_error = None

@@ -1237,11 +1214,11 @@ class NeonEnv:
        for f in futs:
            f.result()

-    def stop(self, immediate=False, ps_assert_metric_no_errors=False, fail_on_endpoint_errors=True):
+    def stop(self, immediate=False, ps_assert_metric_no_errors=False):
        """
        After this method returns, there should be no child processes running.
        """
-        self.endpoints.stop_all(fail_on_endpoint_errors)
+        self.endpoints.stop_all()

        # Stop storage controller before pageservers: we don't want it to spuriously
        # detect a pageserver "failure" during test teardown
@@ -2883,45 +2860,14 @@ class PgBin:
        env.update(env_add)
        return env

-    def _log_env(self, env: dict[str, str]) -> None:
-        env_s = {}
-        for k, v in env.items():
-            if k.startswith("PG") and k != "PGPASSWORD":
-                env_s[k] = v
-        log.debug(f"Environment: {env_s}")
-
-    def run_nonblocking(
-        self,
-        command: List[str],
-        env: Optional[Env] = None,
-        cwd: Optional[Union[str, Path]] = None,
-    ) -> subprocess.Popen[Any]:
-        """
-        Run one of the postgres binaries, not waiting for it to finish
-
-        The command should be in list form, e.g. ['pgbench', '-p', '55432']
-
-        All the necessary environment variables will be set.
-
-        If the first argument (the command name) doesn't include a path (no '/'
-        characters present), then it will be edited to include the correct path.
-
-        If you want stdout/stderr captured to files, use `run_capture` instead.
-        """
-        self._fixpath(command)
-        log.info(f"Running command '{' '.join(command)}'")
-        env = self._build_env(env)
-        self._log_env(env)
-        return subprocess.Popen(command, env=env, cwd=cwd, stdout=subprocess.PIPE, text=True)
-
    def run(
        self,
        command: List[str],
        env: Optional[Env] = None,
        cwd: Optional[Union[str, Path]] = None,
-    ) -> None:
+    ):
        """
-        Run one of the postgres binaries, waiting for it to finish
+        Run one of the postgres binaries.

        The command should be in list form, e.g. ['pgbench', '-p', '55432']

@@ -2932,10 +2878,11 @@ class PgBin:

        If you want stdout/stderr captured to files, use `run_capture` instead.
        """
-        proc = self.run_nonblocking(command, env, cwd)
-        proc.wait()
-        if proc.returncode != 0:
-            raise subprocess.CalledProcessError(proc.returncode, proc.args)
+
+        self._fixpath(command)
+        log.info(f"Running command '{' '.join(command)}'")
+        env = self._build_env(env)
+        subprocess.run(command, env=env, cwd=cwd, check=True)

    def run_capture(
        self,
@@ -2955,7 +2902,6 @@ class PgBin:
        self._fixpath(command)
        log.info(f"Running command '{' '.join(command)}'")
        env = self._build_env(env)
-        self._log_env(env)
        base_path, _, _ = subprocess_capture(
            self.log_dir,
            command,
@@ -3953,17 +3899,9 @@ class EndpointFactory:
            pageserver_id=pageserver_id,
        )

-    def stop_all(self, fail_on_error=True) -> "EndpointFactory":
-        exception = None
+    def stop_all(self) -> "EndpointFactory":
        for ep in self.endpoints:
-            try:
-                ep.stop()
-            except Exception as e:
-                log.error(f"Failed to stop endpoint {ep.endpoint_id}: {e}")
-                exception = e
-
-        if fail_on_error and exception is not None:
-            raise exception
+            ep.stop()

        return self

--- a/test_runner/performance/test_logical_replication.py
+++ b/test_runner/performance/test_logical_replication.py
@@ -1,24 +1,8 @@
-from __future__ import annotations
-
 import time
-import traceback
-from typing import TYPE_CHECKING

-import psycopg2
-import psycopg2.extras
 import pytest
-from fixtures.benchmark_fixture import MetricReport
-from fixtures.common_types import Lsn
 from fixtures.log_helper import log
-from fixtures.neon_api import connection_parameters_to_env
-from fixtures.neon_fixtures import AuxFileStore, logical_replication_sync
-from fixtures.pg_version import PgVersion
-
-if TYPE_CHECKING:
-    from fixtures.benchmark_fixture import NeonBenchmarker
-    from fixtures.neon_api import NeonAPI
-    from fixtures.neon_fixtures import NeonEnv, PgBin
-    from fixtures.pg_version import PgVersion
+from fixtures.neon_fixtures import AuxFileStore, NeonEnv, PgBin, logical_replication_sync


@pytest.mark.parametrize("pageserver_aux_file_policy", [AuxFileStore.V2])
@@ -42,6 +26,7 @@ def test_logical_replication(neon_simple_env: NeonEnv, pg_bin: PgBin, vanilla_pg
    vanilla_pg.safe_psql("truncate table pgbench_history")

    connstr = endpoint.connstr().replace("'", "''")
+    print(f"connstr='{connstr}'")
    vanilla_pg.safe_psql(f"create subscription sub1 connection '{connstr}' publication pub1")

    # Wait logical replication channel to be established
@@ -57,286 +42,3 @@ def test_logical_replication(neon_simple_env: NeonEnv, pg_bin: PgBin, vanilla_pg
    sum_master = endpoint.safe_psql("select sum(abalance) from pgbench_accounts")[0][0]
    sum_replica = vanilla_pg.safe_psql("select sum(abalance) from pgbench_accounts")[0][0]
    assert sum_master == sum_replica
-
-
-def check_pgbench_still_running(pgbench, label=""):
-    rc = pgbench.poll()
-    if rc is not None:
-        raise RuntimeError(f"{label} pgbench terminated early with return code {rc}")
-
-
-def measure_logical_replication_lag(sub_cur, pub_cur, timeout_sec=600):
-    start = time.time()
-    pub_cur.execute("SELECT pg_current_wal_flush_lsn()")
-    pub_lsn = Lsn(pub_cur.fetchall()[0][0])
-    while (time.time() - start) < timeout_sec:
-        sub_cur.execute("SELECT latest_end_lsn FROM pg_catalog.pg_stat_subscription")
-        res = sub_cur.fetchall()[0][0]
-        if res:
-            log.info(f"subscriber_lsn={res}")
-            sub_lsn = Lsn(res)
-            log.info(f"Subscriber LSN={sub_lsn}, publisher LSN={pub_lsn}")
-            if sub_lsn >= pub_lsn:
-                return time.time() - start
-        time.sleep(0.5)
-    raise TimeoutError(f"Logical replication sync took more than {timeout_sec} sec")
-
-
-@pytest.mark.remote_cluster
-@pytest.mark.timeout(2 * 60 * 60)
-def test_subscriber_lag(
-    pg_bin: PgBin,
-    neon_api: NeonAPI,
-    pg_version: PgVersion,
-    zenbenchmark: NeonBenchmarker,
-):
-    """
-    Creates a publisher and subscriber, runs pgbench inserts on publisher and pgbench selects
-    on subscriber. Periodically restarts subscriber while still running the inserts, and
-    measures how long sync takes after restart.
-    """
-    test_duration_min = 60
-    sync_interval_min = 5
-    pgbench_duration = f"-T{test_duration_min * 60 * 2}"
-
-    pub_project = neon_api.create_project(pg_version)
-    pub_project_id = pub_project["project"]["id"]
-    neon_api.wait_for_operation_to_finish(pub_project_id)
-    error_occurred = False
-    try:
-        sub_project = neon_api.create_project(pg_version)
-        sub_project_id = sub_project["project"]["id"]
-        sub_endpoint_id = sub_project["endpoints"][0]["id"]
-        neon_api.wait_for_operation_to_finish(sub_project_id)
-        try:
-            pub_env = connection_parameters_to_env(
-                pub_project["connection_uris"][0]["connection_parameters"]
-            )
-            sub_env = connection_parameters_to_env(
-                sub_project["connection_uris"][0]["connection_parameters"]
-            )
-            pub_connstr = pub_project["connection_uris"][0]["connection_uri"]
-            sub_connstr = sub_project["connection_uris"][0]["connection_uri"]
-
-            pg_bin.run_capture(["pgbench", "-i", "-s100"], env=pub_env)
-            pg_bin.run_capture(["pgbench", "-i", "-s100"], env=sub_env)
-
-            pub_conn = psycopg2.connect(pub_connstr)
-            sub_conn = psycopg2.connect(sub_connstr)
-            pub_conn.autocommit = True
-            sub_conn.autocommit = True
-            with pub_conn.cursor() as pub_cur, sub_conn.cursor() as sub_cur:
-                sub_cur.execute("truncate table pgbench_accounts")
-                sub_cur.execute("truncate table pgbench_history")
-
-                pub_cur.execute(
-                    "create publication pub1 for table pgbench_accounts, pgbench_history"
-                )
-                sub_cur.execute(
-                    f"create subscription sub1 connection '{pub_connstr}' publication pub1"
-                )
-
-                initial_sync_lag = measure_logical_replication_lag(sub_cur, pub_cur)
-            pub_conn.close()
-            sub_conn.close()
-
-            zenbenchmark.record(
-                "initial_sync_lag", initial_sync_lag, "s", MetricReport.LOWER_IS_BETTER
-            )
-
-            pub_workload = pg_bin.run_nonblocking(
-                ["pgbench", "-c10", pgbench_duration, "-Mprepared"], env=pub_env
-            )
-            try:
-                sub_workload = pg_bin.run_nonblocking(
-                    ["pgbench", "-c10", pgbench_duration, "-S"],
-                    env=sub_env,
-                )
-                try:
-                    start = time.time()
-                    while time.time() - start < test_duration_min * 60:
-                        time.sleep(sync_interval_min * 60)
-                        check_pgbench_still_running(pub_workload, "pub")
-                        check_pgbench_still_running(sub_workload, "sub")
-
-                        with psycopg2.connect(pub_connstr) as pub_conn, psycopg2.connect(
-                            sub_connstr
-                        ) as sub_conn:
-                            with pub_conn.cursor() as pub_cur, sub_conn.cursor() as sub_cur:
-                                lag = measure_logical_replication_lag(sub_cur, pub_cur)
-
-                        log.info(f"Replica lagged behind master by {lag} seconds")
-                        zenbenchmark.record("replica_lag", lag, "s", MetricReport.LOWER_IS_BETTER)
-                        sub_workload.terminate()
-                        neon_api.restart_endpoint(
-                            sub_project_id,
-                            sub_endpoint_id,
-                        )
-                        neon_api.wait_for_operation_to_finish(sub_project_id)
-                        sub_workload = pg_bin.run_nonblocking(
-                            ["pgbench", "-c10", pgbench_duration, "-S"],
-                            env=sub_env,
-                        )
-
-                        # Measure storage to make sure replication information isn't bloating storage
-                        sub_storage = neon_api.get_project_details(sub_project_id)["project"][
-                            "synthetic_storage_size"
-                        ]
-                        pub_storage = neon_api.get_project_details(pub_project_id)["project"][
-                            "synthetic_storage_size"
-                        ]
-                        zenbenchmark.record(
-                            "sub_storage", sub_storage, "B", MetricReport.LOWER_IS_BETTER
-                        )
-                        zenbenchmark.record(
-                            "pub_storage", pub_storage, "B", MetricReport.LOWER_IS_BETTER
-                        )
-
-                finally:
-                    sub_workload.terminate()
-            finally:
-                pub_workload.terminate()
-        except Exception as e:
-            error_occurred = True
-            log.error(f"Caught exception {e}")
-            log.error(traceback.format_exc())
-        finally:
-            if not error_occurred:
-                neon_api.delete_project(sub_project_id)
-    except Exception as e:
-        error_occurred = True
-        log.error(f"Caught exception {e}")
-        log.error(traceback.format_exc())
-    finally:
-        assert not error_occurred
-        neon_api.delete_project(pub_project_id)
-
-
-@pytest.mark.remote_cluster
-@pytest.mark.timeout(2 * 60 * 60)
-def test_publisher_restart(
-    pg_bin: PgBin,
-    neon_api: NeonAPI,
-    pg_version: PgVersion,
-    zenbenchmark: NeonBenchmarker,
-):
-    """
-    Creates a publisher and subscriber, runs pgbench inserts on publisher and pgbench selects
-    on subscriber. Periodically restarts publisher (to exercise on-demand WAL download), and
-    measures how long sync takes after restart.
-    """
-    test_duration_min = 60
-    sync_interval_min = 5
-    pgbench_duration = f"-T{test_duration_min * 60 * 2}"
-
-    pub_project = neon_api.create_project(pg_version)
-    pub_project_id = pub_project["project"]["id"]
-    pub_endpoint_id = pub_project["endpoints"][0]["id"]
-    neon_api.wait_for_operation_to_finish(pub_project_id)
-    error_occurred = False
-    try:
-        sub_project = neon_api.create_project(pg_version)
-        sub_project_id = sub_project["project"]["id"]
-        neon_api.wait_for_operation_to_finish(sub_project_id)
-        try:
-            pub_env = connection_parameters_to_env(
-                pub_project["connection_uris"][0]["connection_parameters"]
-            )
-            sub_env = connection_parameters_to_env(
-                sub_project["connection_uris"][0]["connection_parameters"]
-            )
-            pub_connstr = pub_project["connection_uris"][0]["connection_uri"]
-            sub_connstr = sub_project["connection_uris"][0]["connection_uri"]
-
-            pg_bin.run_capture(["pgbench", "-i", "-s100"], env=pub_env)
-            pg_bin.run_capture(["pgbench", "-i", "-s100"], env=sub_env)
-
-            pub_conn = psycopg2.connect(pub_connstr)
-            sub_conn = psycopg2.connect(sub_connstr)
-            pub_conn.autocommit = True
-            sub_conn.autocommit = True
-            with pub_conn.cursor() as pub_cur, sub_conn.cursor() as sub_cur:
-                sub_cur.execute("truncate table pgbench_accounts")
-                sub_cur.execute("truncate table pgbench_history")
-
-                pub_cur.execute(
-                    "create publication pub1 for table pgbench_accounts, pgbench_history"
-                )
-                sub_cur.execute(
-                    f"create subscription sub1 connection '{pub_connstr}' publication pub1"
-                )
-
-                initial_sync_lag = measure_logical_replication_lag(sub_cur, pub_cur)
-            pub_conn.close()
-            sub_conn.close()
-
-            zenbenchmark.record(
-                "initial_sync_lag", initial_sync_lag, "s", MetricReport.LOWER_IS_BETTER
-            )
-
-            pub_workload = pg_bin.run_nonblocking(
-                ["pgbench", "-c10", pgbench_duration, "-Mprepared"], env=pub_env
-            )
-            try:
-                sub_workload = pg_bin.run_nonblocking(
-                    ["pgbench", "-c10", pgbench_duration, "-S"],
-                    env=sub_env,
-                )
-                try:
-                    start = time.time()
-                    while time.time() - start < test_duration_min * 60:
-                        time.sleep(sync_interval_min * 60)
-                        check_pgbench_still_running(pub_workload, "pub")
-                        check_pgbench_still_running(sub_workload, "sub")
-
-                        pub_workload.terminate()
-                        neon_api.restart_endpoint(
-                            pub_project_id,
-                            pub_endpoint_id,
-                        )
-                        neon_api.wait_for_operation_to_finish(pub_project_id)
-                        pub_workload = pg_bin.run_nonblocking(
-                            ["pgbench", "-c10", pgbench_duration, "-Mprepared"],
-                            env=pub_env,
-                        )
-                        with psycopg2.connect(pub_connstr) as pub_conn, psycopg2.connect(
-                            sub_connstr
-                        ) as sub_conn:
-                            with pub_conn.cursor() as pub_cur, sub_conn.cursor() as sub_cur:
-                                lag = measure_logical_replication_lag(sub_cur, pub_cur)
-
-                        log.info(f"Replica lagged behind master by {lag} seconds")
-                        zenbenchmark.record("replica_lag", lag, "s", MetricReport.LOWER_IS_BETTER)
-
-                        # Measure storage to make sure replication information isn't bloating storage
-                        sub_storage = neon_api.get_project_details(sub_project_id)["project"][
-                            "synthetic_storage_size"
-                        ]
-                        pub_storage = neon_api.get_project_details(pub_project_id)["project"][
-                            "synthetic_storage_size"
-                        ]
-                        zenbenchmark.record(
-                            "sub_storage", sub_storage, "B", MetricReport.LOWER_IS_BETTER
-                        )
-                        zenbenchmark.record(
-                            "pub_storage", pub_storage, "B", MetricReport.LOWER_IS_BETTER
-                        )
-
-                finally:
-                    sub_workload.terminate()
-            finally:
-                pub_workload.terminate()
-        except Exception as e:
-            error_occurred = True
-            log.error(f"Caught exception {e}")
-            log.error(traceback.format_exc())
-        finally:
-            if not error_occurred:
-                neon_api.delete_project(sub_project_id)
-    except Exception as e:
-        error_occurred = True
-        log.error(f"Caught exception {e}")
-        log.error(traceback.format_exc())
-    finally:
-        assert not error_occurred
-        neon_api.delete_project(pub_project_id)
--- a/test_runner/performance/test_physical_replication.py
+++ b/test_runner/performance/test_physical_replication.py
@@ -1,296 +0,0 @@
-from __future__ import annotations
-
-import csv
-import os
-import subprocess
-import time
-import traceback
-from pathlib import Path
-from typing import TYPE_CHECKING
-
-import psycopg2
-import psycopg2.extras
-import pytest
-from fixtures.benchmark_fixture import MetricReport
-from fixtures.common_types import Lsn
-from fixtures.log_helper import log
-from fixtures.neon_api import connection_parameters_to_env
-from fixtures.pg_version import PgVersion
-
-if TYPE_CHECKING:
-    from typing import Any, List, Optional
-
-    from fixtures.benchmark_fixture import NeonBenchmarker
-    from fixtures.neon_api import NeonAPI
-    from fixtures.neon_fixtures import PgBin
-
-
-# Granularity of ~0.5 sec
-def measure_replication_lag(master, replica, timeout_sec=600):
-    start = time.time()
-    master.execute("SELECT pg_current_wal_flush_lsn()")
-    master_lsn = Lsn(master.fetchall()[0][0])
-    while (time.time() - start) < timeout_sec:
-        replica.execute("select pg_last_wal_replay_lsn()")
-        replica_lsn = replica.fetchall()[0][0]
-        if replica_lsn:
-            if Lsn(replica_lsn) >= master_lsn:
-                return time.time() - start
-        time.sleep(0.5)
-    raise TimeoutError(f"Replication sync took more than {timeout_sec} sec")
-
-
-def check_pgbench_still_running(pgbench):
-    rc = pgbench.poll()
-    if rc is not None:
-        raise RuntimeError(f"Pgbench terminated early with return code {rc}")
-
-
-@pytest.mark.remote_cluster
-@pytest.mark.timeout(2 * 60 * 60)
-def test_ro_replica_lag(
-    pg_bin: PgBin,
-    neon_api: NeonAPI,
-    pg_version: PgVersion,
-    zenbenchmark: NeonBenchmarker,
-):
-    test_duration_min = 60
-    sync_interval_min = 10
-
-    pgbench_duration = f"-T{test_duration_min * 60 * 2}"
-
-    project = neon_api.create_project(pg_version)
-    project_id = project["project"]["id"]
-    neon_api.wait_for_operation_to_finish(project_id)
-    error_occurred = False
-    try:
-        branch_id = project["branch"]["id"]
-        master_connstr = project["connection_uris"][0]["connection_uri"]
-        master_env = connection_parameters_to_env(
-            project["connection_uris"][0]["connection_parameters"]
-        )
-
-        replica = neon_api.create_endpoint(
-            project_id,
-            branch_id,
-            endpoint_type="read_only",
-            settings={"pg_settings": {"hot_standby_feedback": "on"}},
-        )
-        replica_env = master_env.copy()
-        replica_env["PGHOST"] = replica["endpoint"]["host"]
-        neon_api.wait_for_operation_to_finish(project_id)
-
-        replica_connstr = neon_api.get_connection_uri(
-            project_id,
-            endpoint_id=replica["endpoint"]["id"],
-        )["uri"]
-
-        pg_bin.run_capture(["pgbench", "-i", "-s100"], env=master_env)
-
-        master_workload = pg_bin.run_nonblocking(
-            ["pgbench", "-c10", pgbench_duration, "-Mprepared"],
-            env=master_env,
-        )
-        try:
-            replica_workload = pg_bin.run_nonblocking(
-                ["pgbench", "-c10", pgbench_duration, "-S"],
-                env=replica_env,
-            )
-            try:
-                start = time.time()
-                while time.time() - start < test_duration_min * 60:
-                    check_pgbench_still_running(master_workload)
-                    check_pgbench_still_running(replica_workload)
-                    time.sleep(sync_interval_min * 60)
-                    with psycopg2.connect(master_connstr) as conn_master, psycopg2.connect(
-                        replica_connstr
-                    ) as conn_replica:
-                        with conn_master.cursor() as cur_master, conn_replica.cursor() as cur_replica:
-                            lag = measure_replication_lag(cur_master, cur_replica)
-                    log.info(f"Replica lagged behind master by {lag} seconds")
-                    zenbenchmark.record("replica_lag", lag, "s", MetricReport.LOWER_IS_BETTER)
-            finally:
-                replica_workload.terminate()
-        finally:
-            master_workload.terminate()
-    except Exception as e:
-        error_occurred = True
-        log.error(f"Caught exception: {e}")
-        log.error(traceback.format_exc())
-    finally:
-        assert not error_occurred  # Fail the test if an error occurred
-        neon_api.delete_project(project_id)
-
-
-def report_pgbench_aggregate_intervals(
-    output_dir: Path,
-    prefix: str,
-    zenbenchmark: NeonBenchmarker,
-):
-    for filename in os.listdir(output_dir):
-        if filename.startswith(prefix):
-            # The file will be in the form <prefix>_<node>.<pid>
-            # So we first lop off the .<pid>, and then lop off the prefix and the _
-            node = filename.split(".")[0][len(prefix) + 1 :]
-            with open(output_dir / filename) as f:
-                reader = csv.reader(f, delimiter=" ")
-                for line in reader:
-                    num_transactions = int(line[1])
-                    if num_transactions == 0:
-                        continue
-                    sum_latency = int(line[2])
-                    sum_lag = int(line[3])
-                    zenbenchmark.record(
-                        f"{node}_num_txns", num_transactions, "txns", MetricReport.HIGHER_IS_BETTER
-                    )
-                    zenbenchmark.record(
-                        f"{node}_avg_latency",
-                        sum_latency / num_transactions,
-                        "s",
-                        MetricReport.LOWER_IS_BETTER,
-                    )
-                    zenbenchmark.record(
-                        f"{node}_avg_lag",
-                        sum_lag / num_transactions,
-                        "s",
-                        MetricReport.LOWER_IS_BETTER,
-                    )
-
-
-@pytest.mark.remote_cluster
-@pytest.mark.timeout(2 * 60 * 60)
-def test_replication_start_stop(
-    pg_bin: PgBin,
-    test_output_dir: Path,
-    neon_api: NeonAPI,
-    pg_version: PgVersion,
-    zenbenchmark: NeonBenchmarker,
-):
-    """
-    Cycles through different configurations of read replicas being enabled disabled. The whole time,
-    there's a pgbench read/write workload going on the master. For each replica, we either turn it
-    on or off, and see how long it takes to catch up after some set amount of time of replicating
-    the pgbench.
-    """
-
-    prefix = "pgbench_agg"
-    num_replicas = 2
-    configuration_test_time_sec = 10 * 60
-    pgbench_duration = f"-T{2 ** num_replicas * configuration_test_time_sec}"
-    error_occurred = False
-
-    project = neon_api.create_project(pg_version)
-    project_id = project["project"]["id"]
-    neon_api.wait_for_operation_to_finish(project_id)
-    try:
-        branch_id = project["branch"]["id"]
-        master_connstr = project["connection_uris"][0]["connection_uri"]
-        master_env = connection_parameters_to_env(
-            project["connection_uris"][0]["connection_parameters"]
-        )
-
-        replicas = []
-        for _ in range(num_replicas):
-            replicas.append(
-                neon_api.create_endpoint(
-                    project_id,
-                    branch_id,
-                    endpoint_type="read_only",
-                    settings={"pg_settings": {"hot_standby_feedback": "on"}},
-                )
-            )
-            neon_api.wait_for_operation_to_finish(project_id)
-
-        replica_connstr = [
-            neon_api.get_connection_uri(
-                project_id,
-                endpoint_id=replicas[i]["endpoint"]["id"],
-            )["uri"]
-            for i in range(num_replicas)
-        ]
-        replica_env = [master_env.copy() for _ in range(num_replicas)]
-        for i in range(num_replicas):
-            replica_env[i]["PGHOST"] = replicas[i]["endpoint"]["host"]
-
-        pg_bin.run_capture(["pgbench", "-i", "-s10"], env=master_env)
-
-        # Sync replicas
-        with psycopg2.connect(master_connstr) as conn_master:
-            with conn_master.cursor() as cur_master:
-                for i in range(num_replicas):
-                    conn_replica = psycopg2.connect(replica_connstr[i])
-                    measure_replication_lag(cur_master, conn_replica.cursor())
-
-        master_pgbench = pg_bin.run_nonblocking(
-            [
-                "pgbench",
-                "-c10",
-                pgbench_duration,
-                "-Mprepared",
-                "--log",
-                f"--log-prefix={test_output_dir}/{prefix}_master",
-                f"--aggregate-interval={configuration_test_time_sec}",
-            ],
-            env=master_env,
-        )
-        replica_pgbench: List[Optional[subprocess.Popen[Any]]] = [None for _ in range(num_replicas)]
-
-        # Use the bits of iconfig to tell us which configuration we are on. For example
-        # a iconfig of 2 is 10 in binary, indicating replica 0 is suspended and replica 1 is
-        # alive.
-        for iconfig in range((1 << num_replicas) - 1, -1, -1):
-
-            def replica_enabled(iconfig: int = iconfig):
-                return bool((iconfig >> 1) & 1)
-
-            # Change configuration
-            for ireplica in range(num_replicas):
-                if replica_enabled() and replica_pgbench[ireplica] is None:
-                    replica_pgbench[ireplica] = pg_bin.run_nonblocking(
-                        [
-                            "pgbench",
-                            "-c10",
-                            "-S",
-                            pgbench_duration,
-                            "--log",
-                            f"--log-prefix={test_output_dir}/{prefix}_replica_{ireplica}",
-                            f"--aggregate-interval={configuration_test_time_sec}",
-                        ],
-                        env=replica_env[ireplica],
-                    )
-                elif not replica_enabled() and replica_pgbench[ireplica] is not None:
-                    pgb = replica_pgbench[ireplica]
-                    assert pgb is not None
-                    pgb.terminate()
-                    pgb.wait()
-                    replica_pgbench[ireplica] = None
-
-                    neon_api.suspend_endpoint(
-                        project_id,
-                        replicas[ireplica]["endpoint"]["id"],
-                    )
-                    neon_api.wait_for_operation_to_finish(project_id)
-
-            time.sleep(configuration_test_time_sec)
-
-            with psycopg2.connect(master_connstr) as conn_master:
-                with conn_master.cursor() as cur_master:
-                    for ireplica in range(num_replicas):
-                        replica_conn = psycopg2.connect(replica_connstr[ireplica])
-                        lag = measure_replication_lag(cur_master, replica_conn.cursor())
-                        zenbenchmark.record(
-                            f"Replica {ireplica} lag", lag, "s", MetricReport.LOWER_IS_BETTER
-                        )
-                        log.info(
-                            f"Replica {ireplica} lagging behind master by {lag} seconds after configuration {iconfig:>b}"
-                        )
-        master_pgbench.terminate()
-    except Exception as e:
-        error_occurred = True
-        log.error(f"Caught exception {e}")
-        log.error(traceback.format_exc())
-    finally:
-        assert not error_occurred
-        neon_api.delete_project(project_id)
-        # Only report results if we didn't error out
-        report_pgbench_aggregate_intervals(test_output_dir, prefix, zenbenchmark)
--- a/test_runner/regress/test_endpoint_crash.py
+++ b/test_runner/regress/test_endpoint_crash.py
@@ -1,23 +0,0 @@
-import pytest
-from fixtures.neon_fixtures import NeonEnvBuilder
-
-
-@pytest.mark.parametrize(
-    "sql_func",
-    [
-        "trigger_panic",
-        "trigger_segfault",
-        "💣",  # calls `trigger_segfault` internally
-    ],
-)
-def test_endpoint_crash(neon_env_builder: NeonEnvBuilder, sql_func: str):
-    """
-    Test that triggering crash from neon_test_utils crashes the endpoint
-    """
-    env = neon_env_builder.init_start()
-    env.neon_cli.create_branch("test_endpoint_crash")
-    endpoint = env.endpoints.create_start("test_endpoint_crash")
-
-    endpoint.safe_psql("CREATE EXTENSION neon_test_utils;")
-    with pytest.raises(Exception, match="This probably means the server terminated abnormally"):
-        endpoint.safe_psql(f"SELECT {sql_func}();")
--- a/test_runner/regress/test_lfc_working_set_approximation.py
+++ b/test_runner/regress/test_lfc_working_set_approximation.py
@@ -1,4 +1,3 @@
-import time
 from pathlib import Path

 from fixtures.log_helper import log
@@ -73,46 +72,3 @@ WITH (fillfactor='100');
    blocks = query_scalar(cur, "select approximate_working_set_size(true)")
    log.info(f"working set size after some index access of a few select pages only {blocks}")
    assert blocks < 10
-
-
-def test_sliding_working_set_approximation(neon_simple_env: NeonEnv):
-    env = neon_simple_env
-
-    endpoint = env.endpoints.create_start(
-        branch_name="main",
-        config_lines=[
-            "autovacuum = off",
-            "shared_buffers=1MB",
-            "neon.max_file_cache_size=256MB",
-            "neon.file_cache_size_limit=245MB",
-        ],
-    )
-    conn = endpoint.connect()
-    cur = conn.cursor()
-    cur.execute("create extension neon version '1.4'")
-    cur.execute(
-        "create table t(pk integer primary key, count integer default 0, payload text default repeat('?', 128))"
-    )
-    cur.execute("insert into t (pk) values (generate_series(1,1000000))")
-    time.sleep(2)
-    before_10k = time.monotonic()
-    cur.execute("select sum(count) from t where pk between 10000 and 20000")
-    time.sleep(2)
-    before_1k = time.monotonic()
-    cur.execute("select sum(count) from t where pk between 1000 and 2000")
-    after = time.monotonic()
-
-    cur.execute(f"select approximate_working_set_size_seconds({int(after - before_1k + 1)})")
-    estimation_1k = cur.fetchall()[0][0]
-    log.info(f"Working set size for selecting 1k records {estimation_1k}")
-
-    cur.execute(f"select approximate_working_set_size_seconds({int(after - before_10k + 1)})")
-    estimation_10k = cur.fetchall()[0][0]
-    log.info(f"Working set size for selecting 10k records {estimation_10k}")
-
-    cur.execute("select pg_table_size('t')")
-    size = cur.fetchall()[0][0] // 8192
-    log.info(f"Table size {size} blocks")
-
-    assert estimation_1k >= 20 and estimation_1k <= 40
-    assert estimation_10k >= 200 and estimation_10k <= 400
--- a/test_runner/regress/test_neon_extension.py
+++ b/test_runner/regress/test_neon_extension.py
@@ -50,7 +50,7 @@ def test_neon_extension_compatibility(neon_env_builder: NeonEnvBuilder):
            # Ensure that the default version is also updated in the neon.control file
            assert cur.fetchone() == ("1.3",)
            cur.execute("SELECT * from neon.NEON_STAT_FILE_CACHE")
-            all_versions = ["1.4", "1.3", "1.2", "1.1", "1.0"]
+            all_versions = ["1.3", "1.2", "1.1", "1.0"]
            current_version = "1.3"
            for idx, begin_version in enumerate(all_versions):
                for target_version in all_versions[idx + 1 :]:
--- a/test_runner/regress/test_pageserver_secondary.py
+++ b/test_runner/regress/test_pageserver_secondary.py
@@ -16,8 +16,6 @@ from fixtures.pageserver.utils import (
 from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind, S3Storage, s3_storage
 from fixtures.utils import wait_until
 from fixtures.workload import Workload
-from werkzeug.wrappers.request import Request
-from werkzeug.wrappers.response import Response

 # A tenant configuration that is convenient for generating uploads and deletions
 # without a large amount of postgres traffic.
@@ -61,7 +59,7 @@ def evict_random_layers(


@pytest.mark.parametrize("seed", [1, 2, 3])
-def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, make_httpserver, seed: int):
+def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, seed: int):
    """
    Issue many location configuration changes, ensure that tenants
    remain readable & we don't get any unexpected errors.  We should
@@ -75,20 +73,6 @@ def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, make_httpserver,
    neon_env_builder.enable_pageserver_remote_storage(
        remote_storage_kind=s3_storage(),
    )
-    neon_env_builder.control_plane_compute_hook_api = (
-        f"http://{make_httpserver.host}:{make_httpserver.port}/notify-attach"
-    )
-
-    def ignore_notify(request: Request):
-        # This test does all its own compute configuration (by passing explicit pageserver ID to Workload functions),
-        # so we send controller notifications to /dev/null to prevent it fighting the test for control of the compute.
-        log.info(f"Ignoring storage controller compute notification: {request.json}")
-        return Response(status=200)
-
-    make_httpserver.expect_request("/notify-attach", method="PUT").respond_with_handler(
-        ignore_notify
-    )
-
    env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)

    pageservers = env.pageservers
@@ -115,15 +99,6 @@ def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, make_httpserver,
    workload.init(env.pageservers[0].id)
    workload.write_rows(256, env.pageservers[0].id)

-    # Discourage the storage controller from interfering with the changes we will make directly on the pageserver
-    env.storage_controller.tenant_policy_update(
-        tenant_id,
-        {
-            "scheduling": "Stop",
-        },
-    )
-    env.storage_controller.allowed_errors.append(".*Scheduling is disabled by policy Stop.*")
-
    # We use a fixed seed to make the test reproducible: we want a randomly
    # chosen order, but not to change the order every time we run the test.
    rng = random.Random(seed)
--- a/test_runner/regress/test_pg_regress.py
+++ b/test_runner/regress/test_pg_regress.py
@@ -8,11 +8,8 @@ from typing import TYPE_CHECKING, cast

 import pytest
 from fixtures.neon_fixtures import (
-    Endpoint,
-    NeonEnv,
    NeonEnvBuilder,
    check_restored_datadir_content,
-    tenant_get_shards,
 )
 from fixtures.pg_version import PgVersion
 from fixtures.remote_storage import s3_storage
@@ -24,97 +21,6 @@ if TYPE_CHECKING:
    from pytest import CaptureFixture


-TENANT_CONF = {
-    # Scaled down thresholds so that we are exercising the pageserver beyond just writing
-    # ephemeral/L0 layers, and because debug-mode code is slow to read from full sized ephemeral layer files.
-    "pitr_interval": "60s",
-    "checkpoint_distance": f"{8 * 1024 * 1024}",
-    "compaction_target_size": f"{8 * 1024 * 1024}",
-}
-
-# # Ensure that compaction works, on a timeline containing all the diversity that postgres regression tests create.
-# # There should have been compactions mid-test as well, this final check is in addition those.
-# for (shard, pageserver) in tenant_get_shards(env, env.initial_tenant):
-#     pageserver.http_client().timeline_checkpoint(env.initial_tenant, env.initial_timeline, force_repartition=True, force_image_layer_creation=True)
-
-
-def post_checks(env: NeonEnv, test_output_dir: Path, db_name: str, endpoint: Endpoint):
-    """
-    After running some opaque tests that create interesting content in a timeline, run
-    some generic integrity checks that the storage stack is able to reproduce the written
-    data properly.
-    """
-
-    ignored_files: Optional[list[str]] = None
-
-    # Neon handles unlogged relations in a special manner. During a
-    # basebackup, we ship the init fork as the main fork. This presents a
-    # problem in that the endpoint's data directory and the basebackup will
-    # have differences and will fail the eventual file comparison.
-    #
-    # Unlogged tables were introduced in version 9.1. ALTER TABLE grew
-    # support for setting the persistence of a table in 9.5. The reason that
-    # this doesn't affect versions < 15 (but probably would between 9.1 and
-    # 9.5) is that all the regression tests that deal with unlogged tables
-    # up until that point dropped the unlogged tables or set them to logged
-    # at some point during the test.
-    #
-    # In version 15, Postgres grew support for unlogged sequences, and with
-    # that came a few more regression tests. These tests did not all drop
-    # the unlogged tables/sequences prior to finishing.
-    #
-    # But unlogged sequences came with a bug in that, sequences didn't
-    # inherit the persistence of their "parent" tables if they had one. This
-    # was fixed and backported to 15, thus exacerbating our problem a bit.
-    #
-    # So what we can do is just ignore file differences between the data
-    # directory and basebackup for unlogged relations.
-    results = cast(
-        "list[tuple[str, str]]",
-        endpoint.safe_psql(
-            """
-        SELECT
-            relkind,
-            pg_relation_filepath(
-                pg_filenode_relation(reltablespace, relfilenode)
-            ) AS unlogged_relation_paths
-        FROM pg_class
-        WHERE relpersistence = 'u'
-        """,
-            dbname=db_name,
-        ),
-    )
-
-    unlogged_relation_files: list[str] = []
-    for r in results:
-        unlogged_relation_files.append(r[1])
-        # This is related to the following Postgres commit:
-        #
-        # commit ccadf73163ca88bdaa74b8223d4dde05d17f550b
-        # Author: Heikki Linnakangas <heikki.linnakangas@iki.fi>
-        # Date:   2023-08-23 09:21:31 -0500
-        #
-        # Use the buffer cache when initializing an unlogged index.
-        #
-        # This patch was backpatched to 16. Without it, the LSN in the
-        # page header would be 0/0 in the data directory, which wouldn't
-        # match the LSN generated during the basebackup, thus creating
-        # a difference.
-        if env.pg_version <= PgVersion.V15 and r[0] == "i":
-            unlogged_relation_files.append(f"{r[1]}_init")
-
-    ignored_files = unlogged_relation_files
-
-    check_restored_datadir_content(test_output_dir, env, endpoint, ignored_files=ignored_files)
-
-    # Ensure that compaction works, on a timeline containing all the diversity that postgres regression tests create.
-    # There should have been compactions mid-test as well, this final check is in addition those.
-    for shard, pageserver in tenant_get_shards(env, env.initial_tenant):
-        pageserver.http_client().timeline_checkpoint(
-            shard, env.initial_timeline, force_repartition=True, force_image_layer_creation=True
-        )
-
-
 # Run the main PostgreSQL regression tests, in src/test/regress.
 #
@pytest.mark.timeout(600)
@@ -139,10 +45,7 @@ def test_pg_regress(

    neon_env_builder.enable_pageserver_remote_storage(s3_storage())
    neon_env_builder.enable_scrub_on_exit()
-    env = neon_env_builder.init_start(
-        initial_tenant_conf=TENANT_CONF,
-        initial_tenant_shard_count=shard_count,
-    )
+    env = neon_env_builder.init_start(initial_tenant_shard_count=shard_count)

    # Connect to postgres and create a database called "regression".
    endpoint = env.endpoints.create_start("main")
@@ -181,7 +84,67 @@ def test_pg_regress(
    with capsys.disabled():
        pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath)

-    post_checks(env, test_output_dir, DBNAME, endpoint)
+        ignored_files: Optional[list[str]] = None
+
+        # Neon handles unlogged relations in a special manner. During a
+        # basebackup, we ship the init fork as the main fork. This presents a
+        # problem in that the endpoint's data directory and the basebackup will
+        # have differences and will fail the eventual file comparison.
+        #
+        # Unlogged tables were introduced in version 9.1. ALTER TABLE grew
+        # support for setting the persistence of a table in 9.5. The reason that
+        # this doesn't affect versions < 15 (but probably would between 9.1 and
+        # 9.5) is that all the regression tests that deal with unlogged tables
+        # up until that point dropped the unlogged tables or set them to logged
+        # at some point during the test.
+        #
+        # In version 15, Postgres grew support for unlogged sequences, and with
+        # that came a few more regression tests. These tests did not all drop
+        # the unlogged tables/sequences prior to finishing.
+        #
+        # But unlogged sequences came with a bug in that, sequences didn't
+        # inherit the persistence of their "parent" tables if they had one. This
+        # was fixed and backported to 15, thus exacerbating our problem a bit.
+        #
+        # So what we can do is just ignore file differences between the data
+        # directory and basebackup for unlogged relations.
+        results = cast(
+            "list[tuple[str, str]]",
+            endpoint.safe_psql(
+                """
+            SELECT
+                relkind,
+                pg_relation_filepath(
+                    pg_filenode_relation(reltablespace, relfilenode)
+                ) AS unlogged_relation_paths
+            FROM pg_class
+            WHERE relpersistence = 'u'
+            """,
+                dbname=DBNAME,
+            ),
+        )
+
+        unlogged_relation_files: list[str] = []
+        for r in results:
+            unlogged_relation_files.append(r[1])
+            # This is related to the following Postgres commit:
+            #
+            # commit ccadf73163ca88bdaa74b8223d4dde05d17f550b
+            # Author: Heikki Linnakangas <heikki.linnakangas@iki.fi>
+            # Date:   2023-08-23 09:21:31 -0500
+            #
+            # Use the buffer cache when initializing an unlogged index.
+            #
+            # This patch was backpatched to 16. Without it, the LSN in the
+            # page header would be 0/0 in the data directory, which wouldn't
+            # match the LSN generated during the basebackup, thus creating
+            # a difference.
+            if env.pg_version <= PgVersion.V15 and r[0] == "i":
+                unlogged_relation_files.append(f"{r[1]}_init")
+
+        ignored_files = unlogged_relation_files
+
+        check_restored_datadir_content(test_output_dir, env, endpoint, ignored_files=ignored_files)


 # Run the PostgreSQL "isolation" tests, in src/test/isolation.
@@ -196,20 +159,16 @@ def test_isolation(
    pg_distrib_dir: Path,
    shard_count: Optional[int],
 ):
-    DBNAME = "isolation_regression"
-
    if shard_count is not None:
        neon_env_builder.num_pageservers = shard_count
    neon_env_builder.enable_pageserver_remote_storage(s3_storage())
    neon_env_builder.enable_scrub_on_exit()
-    env = neon_env_builder.init_start(
-        initial_tenant_conf=TENANT_CONF, initial_tenant_shard_count=shard_count
-    )
+    env = neon_env_builder.init_start(initial_tenant_shard_count=shard_count)

    # Connect to postgres and create a database called "regression".
    # isolation tests use prepared transactions, so enable them
    endpoint = env.endpoints.create_start("main", config_lines=["max_prepared_transactions=100"])
-    endpoint.safe_psql(f"CREATE DATABASE {DBNAME}")
+    endpoint.safe_psql("CREATE DATABASE isolation_regression")

    # Create some local directories for pg_isolation_regress to run in.
    runpath = test_output_dir / "regress"
@@ -243,9 +202,6 @@ def test_isolation(
    with capsys.disabled():
        pg_bin.run(pg_isolation_regress_command, env=env_vars, cwd=runpath)

-    # This fails with a mismatch on `pg_multixact/offsets/0000`
-    # post_checks(env, test_output_dir, DBNAME, endpoint)
-

 # Run extra Neon-specific pg_regress-based tests. The tests and their
 # schedule file are in the sql_regress/ directory.
@@ -259,19 +215,15 @@ def test_sql_regress(
    pg_distrib_dir: Path,
    shard_count: Optional[int],
 ):
-    DBNAME = "regression"
-
    if shard_count is not None:
        neon_env_builder.num_pageservers = shard_count
    neon_env_builder.enable_pageserver_remote_storage(s3_storage())
    neon_env_builder.enable_scrub_on_exit()
-    env = neon_env_builder.init_start(
-        initial_tenant_conf=TENANT_CONF, initial_tenant_shard_count=shard_count
-    )
+    env = neon_env_builder.init_start(initial_tenant_shard_count=shard_count)

    # Connect to postgres and create a database called "regression".
    endpoint = env.endpoints.create_start("main")
-    endpoint.safe_psql(f"CREATE DATABASE {DBNAME}")
+    endpoint.safe_psql("CREATE DATABASE regression")

    # Create some local directories for pg_regress to run in.
    runpath = test_output_dir / "regress"
@@ -306,4 +258,4 @@ def test_sql_regress(
    with capsys.disabled():
        pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath)

-    post_checks(env, test_output_dir, DBNAME, endpoint)
+        check_restored_datadir_content(test_output_dir, env, endpoint)
--- a/test_runner/regress/test_physical_replication.py
+++ b/test_runner/regress/test_physical_replication.py
@@ -1,11 +1,7 @@
-from __future__ import annotations
-
 import random
 import time
-from typing import TYPE_CHECKING

-if TYPE_CHECKING:
-    from fixtures.neon_fixtures import NeonEnv
+from fixtures.neon_fixtures import NeonEnv


 def test_physical_replication(neon_simple_env: NeonEnv):
--- a/test_runner/regress/test_subscriber_restart.py
+++ b/test_runner/regress/test_subscriber_restart.py
@@ -54,4 +54,4 @@ def test_subscriber_restart(neon_simple_env: NeonEnv):
        pcur.execute(f"INSERT into t values ({n_records}, 0)")
        n_records += 1
        with sub.cursor() as scur:
-            wait_until(60, 0.5, check_that_changes_propagated)
+            wait_until(10, 0.5, check_that_changes_propagated)
--- a/test_runner/regress/test_tenant_delete.py
+++ b/test_runner/regress/test_tenant_delete.py
@@ -67,9 +67,8 @@ def test_tenant_delete_smoke(

    # first try to delete non existing tenant
    tenant_id = TenantId.generate()
-    env.pageserver.allowed_errors.extend(
-        [".*NotFound.*", ".*simulated failure.*", ".*failed to delete .+ objects.*"]
-    )
+    env.pageserver.allowed_errors.append(".*NotFound.*")
+    env.pageserver.allowed_errors.append(".*simulated failure.*")

    # Check that deleting a non-existent tenant gives the expected result: this is a loop because we
    # may need to retry on some remote storage errors injected by the test harness
--- a/test_runner/regress/test_tenant_size.py
+++ b/test_runner/regress/test_tenant_size.py
@@ -720,30 +720,9 @@ def test_lsn_lease_size(neon_env_builder: NeonEnvBuilder, test_output_dir: Path,
    They should have the same effect.
    """

-    def assert_size_approx_equal_for_lease_test(size_lease, size_branch):
-        """
-        Tests that evaluate sizes are checking the pageserver space consumption
-        that sits many layers below the user input.  The exact space needed
-        varies slightly depending on postgres behavior.
-
-        Rather than expecting postgres to be determinstic and occasionally
-        failing the test, we permit sizes for the same data to vary by a few pages.
-        """
-
-        # FIXME(yuchen): The delta is too large, used as temp solution to pass the test reliably.
-        # Investigate and reduce the threshold.
-        threshold = 22 * 8272
-
-        log.info(
-            f"delta: size_branch({size_branch}) -  size_lease({size_lease}) = {size_branch - size_lease}"
-        )
-
-        assert size_lease == pytest.approx(size_branch, abs=threshold)
-
    conf = {
        "pitr_interval": "0s" if zero_gc else "3600s",
        "gc_period": "0s",
-        "compaction_period": "0s",
    }

    env = neon_env_builder.init_start(initial_tenant_conf=conf)
@@ -755,7 +734,7 @@ def test_lsn_lease_size(neon_env_builder: NeonEnvBuilder, test_output_dir: Path,
    tenant, timeline = env.neon_cli.create_tenant(conf=conf)
    lease_res = insert_with_action(env, tenant, timeline, test_output_dir, action="lease")

-    assert_size_approx_equal_for_lease_test(lease_res, ro_branch_res)
+    assert_size_approx_equal(lease_res, ro_branch_res)


 def insert_with_action(
@@ -775,11 +754,7 @@ def insert_with_action(
    """

    client = env.pageserver.http_client()
-    with env.endpoints.create_start(
-        "main",
-        tenant_id=tenant,
-        config_lines=["autovacuum=off"],
-    ) as ep:
+    with env.endpoints.create_start("main", tenant_id=tenant) as ep:
        initial_size = client.tenant_size(tenant)
        log.info(f"initial size: {initial_size}")

--- a/test_runner/regress/test_timeline_size.py
+++ b/test_runner/regress/test_timeline_size.py
@@ -152,12 +152,10 @@ def test_timeline_size_quota_on_startup(neon_env_builder: NeonEnvBuilder):

    client.timeline_wait_logical_size(env.initial_tenant, new_timeline_id)

-    size_limit_mb = 30
-
    endpoint_main = env.endpoints.create(
        "test_timeline_size_quota_on_startup",
        # Set small limit for the test
-        config_lines=[f"neon.max_cluster_size={size_limit_mb}MB"],
+        config_lines=["neon.max_cluster_size=30MB"],
    )
    endpoint_main.start()

@@ -167,39 +165,17 @@ def test_timeline_size_quota_on_startup(neon_env_builder: NeonEnvBuilder):

            # Insert many rows. This query must fail because of space limit
            try:
-
-                def write_rows(count):
-                    for _i in range(count):
-                        cur.execute(
-                            """
-                            INSERT INTO foo
-                                SELECT 'long string to consume some space' || g
-                                FROM generate_series(1, 100) g
+                for _i in range(5000):
+                    cur.execute(
                        """
-                        )
+                        INSERT INTO foo
+                            SELECT 'long string to consume some space' || g
+                            FROM generate_series(1, 100) g
+                    """
+                    )

-                # Write some data that exceeds limit, then let the pageserver ingest it to guarantee that some feedback has made it to
-                # the safekeeper, then try to write some more.  We expect either the initial writes or the ones after
-                # the wait_for_last_flush_lsn to generate an exception.
-                #
-                # Without the wait_for_last_flush_lsn, the size limit sometimes isn't enforced (see https://github.com/neondatabase/neon/issues/6562)
-                write_rows(2500)
-                wait_for_last_flush_lsn(env, endpoint_main, env.initial_tenant, new_timeline_id)
-                logical_size = env.pageserver.http_client().timeline_detail(
-                    env.initial_tenant, new_timeline_id
-                )["current_logical_size"]
-                assert logical_size > size_limit_mb * 1024 * 1024
-                write_rows(2500)
-
-                # If we get here, the timeline size limit failed.  Find out from the pageserver how large it
-                # thinks the timeline is.
-                wait_for_last_flush_lsn(env, endpoint_main, env.initial_tenant, new_timeline_id)
-                logical_size = env.pageserver.http_client().timeline_detail(
-                    env.initial_tenant, new_timeline_id
-                )["current_logical_size"]
-                log.error(
-                    f"Query unexpectedly succeeded, pageserver logical size is {logical_size}"
-                )
+                # If we get here, the timeline size limit failed
+                log.error("Query unexpectedly succeeded")
                raise AssertionError()

            except psycopg2.errors.DiskFull as err:
--- a/workspace_hack/Cargo.toml
+++ b/workspace_hack/Cargo.toml
@@ -30,12 +30,13 @@ chrono = { version = "0.4", default-features = false, features = ["clock", "serd
 clap = { version = "4", features = ["derive", "string"] }
 clap_builder = { version = "4", default-features = false, features = ["color", "help", "std", "string", "suggestions", "usage"] }
 crossbeam-utils = { version = "0.8" }
-deranged = { version = "0.3", default-features = false, features = ["powerfmt", "serde", "std"] }
 either = { version = "1" }
 fail = { version = "0.5", default-features = false, features = ["failpoints"] }
 futures-channel = { version = "0.3", features = ["sink"] }
+futures-core = { version = "0.3" }
 futures-executor = { version = "0.3" }
 futures-io = { version = "0.3" }
+futures-sink = { version = "0.3" }
 futures-util = { version = "0.3", features = ["channel", "io", "sink"] }
 getrandom = { version = "0.2", default-features = false, features = ["std"] }
 hashbrown = { version = "0.14", features = ["raw"] }
@@ -68,7 +69,6 @@ sha2 = { version = "0.10", features = ["asm"] }
 smallvec = { version = "1", default-features = false, features = ["const_new", "write"] }
 subtle = { version = "2" }
 sync_wrapper = { version = "0.1", default-features = false, features = ["futures"] }
-tikv-jemalloc-sys = { version = "0.5" }
 time = { version = "0.3", features = ["macros", "serde-well-known"] }
 tokio = { version = "1", features = ["fs", "io-std", "io-util", "macros", "net", "process", "rt-multi-thread", "signal", "test-util"] }
 tokio-rustls = { version = "0.24" }
@@ -106,9 +106,7 @@ num-integer = { version = "0.1", features = ["i128"] }
 num-traits = { version = "0.2", features = ["i128", "libm"] }
 once_cell = { version = "1" }
 parquet = { git = "https://github.com/apache/arrow-rs", branch = "master", default-features = false, features = ["zstd"] }
-proc-macro2 = { version = "1" }
 prost = { version = "0.11" }
-quote = { version = "1" }
 regex = { version = "1" }
 regex-automata = { version = "0.4", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] }
 regex-syntax = { version = "0.8" }
				`@@ -1 +0,0 @@`
				`DROP FUNCTION IF EXISTS approximate_working_set_size_seconds(integer) CASCADE;`