Add wait events without query to metric.

Add query to pg_wait_sampling metric
Add pg_wait_sampling metric for vms.
2026-03-06 01:40:37 +00:00 · 2023-11-16 23:56:04 +01:00 · 2023-11-16 22:42:08 +01:00 · 2023-11-16 22:04:29 +01:00 · 2023-11-16 20:54:02 +00:00 · 2023-11-16 20:54:02 +00:00
76 changed files with 483 additions and 2096 deletions
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -1,3 +1,17 @@
+# The binaries are really slow, if you compile them in 'dev' mode with the defaults.
+# Enable some optimizations even in 'dev' mode, to make tests faster. The basic
+# optimizations enabled by "opt-level=1" don't affect debuggability too much.
+#
+# See https://www.reddit.com/r/rust/comments/gvrgca/this_is_a_neat_trick_for_getting_good_runtime/
+#
+[profile.dev.package."*"]
+# Set the default for dependencies in Development mode.
+opt-level = 3
+
+[profile.dev]
+# Turn on a small amount of optimization in Development mode.
+opt-level = 1
+
 [build]
 # This is only present for local builds, as it will be overridden
 # by the RUSTDOCFLAGS env var in CI.
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -2,7 +2,7 @@ name: Create Release Branch

 on:
  schedule:
-    - cron: '0 6 * * 1'
+    - cron: '0 7 * * 5'
  workflow_dispatch:

 jobs:
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -9,24 +9,6 @@ refactoring, additional comments, and so forth. Let's try to raise the
 bar, and clean things up as we go. Try to leave code in a better shape
 than it was before.

-## Pre-commit hook
-
-We have a sample pre-commit hook in `pre-commit.py`.
-To set it up, run:
-
-```bash
-ln -s ../../pre-commit.py .git/hooks/pre-commit
-```
-
-This will run following checks on staged files before each commit:
- `rustfmt`
- checks for python files, see [obligatory checks](/docs/sourcetree.md#obligatory-checks).
-
-There is also a separate script `./run_clippy.sh` that runs `cargo clippy` on the whole project
-and `./scripts/reformat` that runs all formatting tools to ensure the project is up to date.
-
-If you want to skip the hook, run `git commit` with `--no-verify` option.
-
 ## Submitting changes

 1. Get at least one +1 on your PR before you push.
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -193,8 +193,6 @@ dependencies = [
 "memchr",
 "pin-project-lite",
 "tokio",
- "zstd",
- "zstd-safe",
 ]

 [[package]]
@@ -2907,8 +2905,6 @@ dependencies = [
 "git-version",
 "pageserver",
 "postgres_ffi",
- "serde",
- "serde_json",
 "svg_fmt",
 "tokio",
 "utils",
@@ -3225,7 +3221,7 @@ dependencies = [
 [[package]]
 name = "postgres"
 version = "0.19.4"
-source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#988d0ddb4184c408fa7fc1bd0ecca7993c02978f"
+source = "git+https://github.com/neondatabase/rust-postgres.git?rev=6ce32f791526e27533cab0232a6bb243b2c32584#6ce32f791526e27533cab0232a6bb243b2c32584"
 dependencies = [
 "bytes",
 "fallible-iterator",
@@ -3238,7 +3234,7 @@ dependencies = [
 [[package]]
 name = "postgres-native-tls"
 version = "0.5.0"
-source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#988d0ddb4184c408fa7fc1bd0ecca7993c02978f"
+source = "git+https://github.com/neondatabase/rust-postgres.git?rev=6ce32f791526e27533cab0232a6bb243b2c32584#6ce32f791526e27533cab0232a6bb243b2c32584"
 dependencies = [
 "native-tls",
 "tokio",
@@ -3249,7 +3245,7 @@ dependencies = [
 [[package]]
 name = "postgres-protocol"
 version = "0.6.4"
-source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#988d0ddb4184c408fa7fc1bd0ecca7993c02978f"
+source = "git+https://github.com/neondatabase/rust-postgres.git?rev=6ce32f791526e27533cab0232a6bb243b2c32584#6ce32f791526e27533cab0232a6bb243b2c32584"
 dependencies = [
 "base64 0.20.0",
 "byteorder",
@@ -3267,7 +3263,7 @@ dependencies = [
 [[package]]
 name = "postgres-types"
 version = "0.2.4"
-source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#988d0ddb4184c408fa7fc1bd0ecca7993c02978f"
+source = "git+https://github.com/neondatabase/rust-postgres.git?rev=6ce32f791526e27533cab0232a6bb243b2c32584#6ce32f791526e27533cab0232a6bb243b2c32584"
 dependencies = [
 "bytes",
 "fallible-iterator",
@@ -4137,7 +4133,6 @@ dependencies = [
 "reqwest",
 "safekeeper_api",
 "scopeguard",
- "sd-notify",
 "serde",
 "serde_json",
 "serde_with",
@@ -4200,12 +4195,6 @@ dependencies = [
 "untrusted",
 ]

-[[package]]
-name = "sd-notify"
-version = "0.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "621e3680f3e07db4c9c2c3fb07c6223ab2fab2e54bd3c04c3ae037990f428c32"
-
 [[package]]
 name = "security-framework"
 version = "2.9.1"
@@ -4944,7 +4933,7 @@ dependencies = [
 [[package]]
 name = "tokio-postgres"
 version = "0.7.7"
-source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#988d0ddb4184c408fa7fc1bd0ecca7993c02978f"
+source = "git+https://github.com/neondatabase/rust-postgres.git?rev=6ce32f791526e27533cab0232a6bb243b2c32584#6ce32f791526e27533cab0232a6bb243b2c32584"
 dependencies = [
 "async-trait",
 "byteorder",
@@ -6042,9 +6031,6 @@ dependencies = [
 "tungstenite",
 "url",
 "uuid",
- "zstd",
- "zstd-safe",
- "zstd-sys",
 ]

 [[package]]
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -37,7 +37,7 @@ license = "Apache-2.0"
 [workspace.dependencies]
 anyhow = { version = "1.0", features = ["backtrace"] }
 arc-swap = "1.6"
-async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] }
+async-compression = { version = "0.4.0", features = ["tokio", "gzip"] }
 azure_core = "0.16"
 azure_identity = "0.16"
 azure_storage = "0.16"
@@ -122,7 +122,6 @@ rustls-pemfile = "1"
 rustls-split = "0.3"
 scopeguard = "1.1"
 sysinfo = "0.29.2"
-sd-notify = "0.4.1"
 sentry = { version = "0.31", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
@@ -166,11 +165,11 @@ env_logger = "0.10"
 log = "0.4"

 ## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
-postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
-postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
-postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
-tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
+postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="6ce32f791526e27533cab0232a6bb243b2c32584" }
+postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", rev="6ce32f791526e27533cab0232a6bb243b2c32584" }
+postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="6ce32f791526e27533cab0232a6bb243b2c32584" }
+postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="6ce32f791526e27533cab0232a6bb243b2c32584" }
+tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="6ce32f791526e27533cab0232a6bb243b2c32584" }

 ## Other git libraries
 heapless = { default-features=false, features=[], git = "https://github.com/japaric/heapless.git", rev = "644653bf3b831c6bb4963be2de24804acf5e5001" } # upstream release pending
@@ -207,7 +206,7 @@ tonic-build = "0.9"

 # This is only needed for proxy's tests.
 # TODO: we should probably fork `tokio-postgres-rustls` instead.
-tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
+tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="6ce32f791526e27533cab0232a6bb243b2c32584" }

 ################# Binary contents sections

--- a/Dockerfile.compute-node
+++ b/Dockerfile.compute-node
@@ -714,6 +714,23 @@ RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.3.tar.gz -
    cargo pgrx install --release && \
    echo "trusted = true" >> /usr/local/pgsql/share/extension/ulid.control

+#########################################################################################
+#
+# Layer "pg-wait-sampling-pg-build"
+# compile pg_wait_sampling extension
+#
+#########################################################################################
+FROM build-deps AS pg-wait-sampling-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
+RUN wget https://github.com/postgrespro/pg_wait_sampling/archive/refs/tags/v1.1.5.tar.gz -O pg_wait_sampling.tar.gz && \
+    echo 'a03da6a413f5652ce470a3635ed6ebba528c74cb26aa4cfced8aff8a8441f81ec6dd657ff62cd6ce96a4e6ce02cad9f2519ae9525367ece60497aa20faafde5c  pg_wait_sampling.tar.gz' | sha512sum -c && \
+    mkdir pg_wait_sampling-src && cd pg_wait_sampling-src && tar xvzf ../pg_wait_sampling.tar.gz --strip-components=1 -C . && \
+    make USE_PGXS=1 -j $(getconf _NPROCESSORS_ONLN) && \
+    make USE_PGXS=1 -j $(getconf _NPROCESSORS_ONLN) install && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_wait_sampling.control
+
 #########################################################################################
 #
 # Layer "neon-pg-ext-build"
@@ -750,6 +767,7 @@ COPY --from=rdkit-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-uuidv7-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-roaringbitmap-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-embedding-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg-wait-sampling-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY pgxn/ pgxn/

 RUN make -j $(getconf _NPROCESSORS_ONLN) \
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -479,6 +479,13 @@ fn cli() -> clap::Command {
                )
                .value_name("FILECACHE_CONNSTR"),
        )
+        .arg(
+            // DEPRECATED, NO LONGER DOES ANYTHING.
+            // See https://github.com/neondatabase/cloud/issues/7516
+            Arg::new("file-cache-on-disk")
+                .long("file-cache-on-disk")
+                .action(clap::ArgAction::SetTrue),
+        )
 }

 #[test]
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -2,6 +2,7 @@ use std::collections::HashMap;
 use std::env;
 use std::fs;
 use std::io::BufRead;
+use std::io::Write;
 use std::os::unix::fs::PermissionsExt;
 use std::path::Path;
 use std::process::{Command, Stdio};
@@ -14,6 +15,7 @@ use chrono::{DateTime, Utc};
 use futures::future::join_all;
 use futures::stream::FuturesUnordered;
 use futures::StreamExt;
+use notify::event;
 use postgres::{Client, NoTls};
 use tokio;
 use tokio_postgres;
@@ -644,9 +646,30 @@ impl ComputeNode {
            } else {
                vec![]
            })
+            .stderr(Stdio::piped())
            .spawn()
            .expect("cannot start postgres process");

+        let stderr = pg.stderr.take().unwrap();
+        std::thread::spawn(move || {
+            let reader = std::io::BufReader::new(stderr);
+            let mut last_lines = vec![];
+            for line in reader.lines() {
+                if let Ok(line) = line {
+                    if line.starts_with("2023-") {
+                        // print all lines from the previous postgres instance
+                        let combined = format!("PG:{}\n", last_lines.join("\u{200B}"));
+                        let res = std::io::stderr().lock().write_all(combined.as_bytes());
+                        if let Err(e) = res {
+                            error!("failed to write to stderr: {}", e);
+                        }
+                        last_lines.clear();
+                    }
+                    last_lines.push(line);
+                }
+            }
+        });
+
        wait_for_postgres(&mut pg, pgdata_path)?;

        Ok(pg)
@@ -698,7 +721,6 @@ impl ComputeNode {
        handle_role_deletions(spec, self.connstr.as_str(), &mut client)?;
        handle_grants(spec, &mut client, self.connstr.as_str())?;
        handle_extensions(spec, &mut client)?;
-        handle_extension_neon(&mut client)?;
        create_availability_check_data(&mut client)?;

        // 'Close' connection
@@ -743,7 +765,6 @@ impl ComputeNode {
            handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
            handle_grants(&spec, &mut client, self.connstr.as_str())?;
            handle_extensions(&spec, &mut client)?;
-            handle_extension_neon(&mut client)?;
        }

        // 'Close' connection
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -670,37 +670,13 @@ pub fn handle_extensions(spec: &ComputeSpec, client: &mut Client) -> Result<()>
            info!("creating system extensions with query: {}", query);
            client.simple_query(query)?;
        }
+        if libs.contains("pg_wait_sampling") {
+            // Create extension only if this compute really needs it
+            let query = "CREATE EXTENSION IF NOT EXISTS pg_wait_sampling";
+            info!("creating system extensions with query: {}", query);
+            client.simple_query(query)?;
+        }
    }

    Ok(())
 }
-
-/// Run CREATE and ALTER EXTENSION neon UPDATE for postgres database
-#[instrument(skip_all)]
-pub fn handle_extension_neon(client: &mut Client) -> Result<()> {
-    info!("handle extension neon");
-
-    let mut query = "CREATE SCHEMA IF NOT EXISTS neon";
-    client.simple_query(query)?;
-
-    query = "CREATE EXTENSION IF NOT EXISTS neon WITH SCHEMA neon";
-    info!("create neon extension with query: {}", query);
-    client.simple_query(query)?;
-
-    query = "UPDATE pg_extension SET extrelocatable = true WHERE extname = 'neon'";
-    client.simple_query(query)?;
-
-    query = "ALTER EXTENSION neon SET SCHEMA neon";
-    info!("alter neon extension schema with query: {}", query);
-    client.simple_query(query)?;
-
-    // this will be a no-op if extension is already up to date,
-    // which may happen in two cases:
-    // - extension was just installed
-    // - extension was already installed and is up to date
-    let query = "ALTER EXTENSION neon UPDATE";
-    info!("update neon extension schema with query: {}", query);
-    client.simple_query(query)?;
-
-    Ok(())
-}
--- a/control_plane/src/bin/attachment_service.rs
+++ b/control_plane/src/bin/attachment_service.rs
@@ -286,7 +286,6 @@ async fn main() -> anyhow::Result<()> {
    logging::init(
        LogFormat::Plain,
        logging::TracingErrorLayerEnablement::Disabled,
-        logging::Output::Stdout,
    )?;

    let args = Cli::parse();
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -487,15 +487,8 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
                .copied()
                .context("Failed to parse postgres version from the argument string")?;

-            let new_timeline_id_opt = parse_timeline_id(create_match)?;
-
-            let timeline_info = pageserver.timeline_create(
-                tenant_id,
-                new_timeline_id_opt,
-                None,
-                None,
-                Some(pg_version),
-            )?;
+            let timeline_info =
+                pageserver.timeline_create(tenant_id, None, None, None, Some(pg_version))?;
            let new_timeline_id = timeline_info.timeline_id;

            let last_record_lsn = timeline_info.last_record_lsn;
@@ -1315,7 +1308,6 @@ fn cli() -> Command {
            .subcommand(Command::new("create")
                .about("Create a new blank timeline")
                .arg(tenant_id_arg.clone())
-                .arg(timeline_id_arg.clone())
                .arg(branch_name_arg.clone())
                .arg(pg_version_arg.clone())
            )
--- a/libs/remote_storage/tests/test_real_azure.rs
+++ b/libs/remote_storage/tests/test_real_azure.rs
@@ -281,7 +281,6 @@ fn ensure_logging_ready() {
        utils::logging::init(
            utils::logging::LogFormat::Test,
            utils::logging::TracingErrorLayerEnablement::Disabled,
-            utils::logging::Output::Stdout,
        )
        .expect("logging init failed");
    });
--- a/libs/remote_storage/tests/test_real_s3.rs
+++ b/libs/remote_storage/tests/test_real_s3.rs
@@ -210,7 +210,6 @@ fn ensure_logging_ready() {
        utils::logging::init(
            utils::logging::LogFormat::Test,
            utils::logging::TracingErrorLayerEnablement::Disabled,
-            utils::logging::Output::Stdout,
        )
        .expect("logging init failed");
    });
--- a/libs/utils/scripts/restore_from_wal_initdb.sh
+++ b/libs/utils/scripts/restore_from_wal_initdb.sh
@@ -1,21 +0,0 @@
-#!/bin/bash
-
-# like restore_from_wal.sh, but takes existing initdb.tar.zst
-
-set -euxo pipefail
-
-PG_BIN=$1
-WAL_PATH=$2
-DATA_DIR=$3
-PORT=$4
-echo "port=$PORT" >> "$DATA_DIR"/postgresql.conf
-echo "shared_preload_libraries='\$libdir/neon_rmgr.so'" >> "$DATA_DIR"/postgresql.conf
-REDO_POS=0x$("$PG_BIN"/pg_controldata -D "$DATA_DIR" | grep -F "REDO location"| cut -c 42-)
-declare -i WAL_SIZE=$REDO_POS+114
-"$PG_BIN"/pg_ctl -D "$DATA_DIR" -l "$DATA_DIR/logfile.log" start
-"$PG_BIN"/pg_ctl -D "$DATA_DIR" -l "$DATA_DIR/logfile.log" stop -m immediate
-cp "$DATA_DIR"/pg_wal/000000010000000000000001 .
-cp "$WAL_PATH"/* "$DATA_DIR"/pg_wal/
-for partial in "$DATA_DIR"/pg_wal/*.partial ; do mv "$partial" "${partial%.partial}" ; done
-dd if=000000010000000000000001 of="$DATA_DIR"/pg_wal/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc
-rm -f 000000010000000000000001
--- a/libs/utils/src/logging.rs
+++ b/libs/utils/src/logging.rs
@@ -66,17 +66,9 @@ pub enum TracingErrorLayerEnablement {
    EnableWithRustLogFilter,
 }

-/// Where the logging should output to.
-#[derive(Clone, Copy)]
-pub enum Output {
-    Stdout,
-    Stderr,
-}
-
 pub fn init(
    log_format: LogFormat,
    tracing_error_layer_enablement: TracingErrorLayerEnablement,
-    output: Output,
 ) -> anyhow::Result<()> {
    // We fall back to printing all spans at info-level or above if
    // the RUST_LOG environment variable is not set.
@@ -93,12 +85,7 @@ pub fn init(
        let log_layer = tracing_subscriber::fmt::layer()
            .with_target(false)
            .with_ansi(false)
-            .with_writer(move || -> Box<dyn std::io::Write> {
-                match output {
-                    Output::Stdout => Box::new(std::io::stdout()),
-                    Output::Stderr => Box::new(std::io::stderr()),
-                }
-            });
+            .with_writer(std::io::stdout);
        let log_layer = match log_format {
            LogFormat::Json => log_layer.json().boxed(),
            LogFormat::Plain => log_layer.boxed(),
--- a/pageserver/ctl/Cargo.toml
+++ b/pageserver/ctl/Cargo.toml
@@ -18,5 +18,3 @@ tokio.workspace = true
 utils.workspace = true
 svg_fmt.workspace = true
 workspace_hack.workspace = true
-serde.workspace = true
-serde_json.workspace = true
--- a/pageserver/ctl/src/index_part.rs
+++ b/pageserver/ctl/src/index_part.rs
@@ -1,38 +0,0 @@
-use std::collections::HashMap;
-
-use anyhow::Context;
-use camino::Utf8PathBuf;
-use pageserver::tenant::remote_timeline_client::index::IndexLayerMetadata;
-use pageserver::tenant::storage_layer::LayerFileName;
-use pageserver::tenant::{metadata::TimelineMetadata, IndexPart};
-use utils::lsn::Lsn;
-
-#[derive(clap::Subcommand)]
-pub(crate) enum IndexPartCmd {
-    Dump { path: Utf8PathBuf },
-}
-
-pub(crate) async fn main(cmd: &IndexPartCmd) -> anyhow::Result<()> {
-    match cmd {
-        IndexPartCmd::Dump { path } => {
-            let bytes = tokio::fs::read(path).await.context("read file")?;
-            let des: IndexPart = IndexPart::from_s3_bytes(&bytes).context("deserialize")?;
-            #[derive(serde::Serialize)]
-            struct Output<'a> {
-                layer_metadata: &'a HashMap<LayerFileName, IndexLayerMetadata>,
-                disk_consistent_lsn: Lsn,
-                timeline_metadata: &'a TimelineMetadata,
-            }
-
-            let output = Output {
-                layer_metadata: &des.layer_metadata,
-                disk_consistent_lsn: des.get_disk_consistent_lsn(),
-                timeline_metadata: &des.metadata,
-            };
-
-            let output = serde_json::to_string_pretty(&output).context("serialize output")?;
-            println!("{output}");
-            Ok(())
-        }
-    }
-}
--- a/pageserver/ctl/src/main.rs
+++ b/pageserver/ctl/src/main.rs
@@ -5,13 +5,11 @@
 //! Separate, `metadata` subcommand allows to print and update pageserver's metadata file.

 mod draw_timeline_dir;
-mod index_part;
 mod layer_map_analyzer;
 mod layers;

 use camino::{Utf8Path, Utf8PathBuf};
 use clap::{Parser, Subcommand};
-use index_part::IndexPartCmd;
 use layers::LayerCmd;
 use pageserver::{
    context::{DownloadBehavior, RequestContext},
@@ -40,8 +38,6 @@ struct CliOpts {
 #[derive(Subcommand)]
 enum Commands {
    Metadata(MetadataCmd),
-    #[command(subcommand)]
-    IndexPart(IndexPartCmd),
    PrintLayerFile(PrintLayerFileCmd),
    DrawTimeline {},
    AnalyzeLayerMap(AnalyzeLayerMapCmd),
@@ -87,9 +83,6 @@ async fn main() -> anyhow::Result<()> {
        Commands::Metadata(cmd) => {
            handle_metadata(&cmd)?;
        }
-        Commands::IndexPart(cmd) => {
-            index_part::main(&cmd).await?;
-        }
        Commands::DrawTimeline {} => {
            draw_timeline_dir::main()?;
        }
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -103,11 +103,7 @@ fn main() -> anyhow::Result<()> {
    } else {
        TracingErrorLayerEnablement::Disabled
    };
-    logging::init(
-        conf.log_format,
-        tracing_error_layer_enablement,
-        logging::Output::Stdout,
-    )?;
+    logging::init(conf.log_format, tracing_error_layer_enablement)?;

    // mind the order required here: 1. logging, 2. panic_hook, 3. sentry.
    // disarming this hook on pageserver, because we never tear down tracing.
@@ -625,7 +621,6 @@ fn start_pageserver(
                    conf.synthetic_size_calculation_interval,
                    conf.id,
                    local_disk_storage,
-                    cancel,
                    metrics_ctx,
                )
                .instrument(info_span!("metrics_collection"))
--- a/pageserver/src/consumption_metrics.rs
+++ b/pageserver/src/consumption_metrics.rs
@@ -3,7 +3,7 @@
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::task_mgr::{self, TaskKind, BACKGROUND_RUNTIME};
 use crate::tenant::tasks::BackgroundLoopKind;
-use crate::tenant::{mgr, LogicalSizeCalculationCause, PageReconstructError};
+use crate::tenant::{mgr, LogicalSizeCalculationCause};
 use camino::Utf8PathBuf;
 use consumption_metrics::EventType;
 use pageserver_api::models::TenantState;
@@ -12,7 +12,6 @@ use std::collections::HashMap;
 use std::sync::Arc;
 use std::time::{Duration, SystemTime};
 use tokio::time::Instant;
-use tokio_util::sync::CancellationToken;
 use tracing::*;
 use utils::id::NodeId;

@@ -38,7 +37,6 @@ type RawMetric = (MetricsKey, (EventType, u64));
 type Cache = HashMap<MetricsKey, (EventType, u64)>;

 /// Main thread that serves metrics collection
-#[allow(clippy::too_many_arguments)]
 pub async fn collect_metrics(
    metric_collection_endpoint: &Url,
    metric_collection_interval: Duration,
@@ -46,7 +44,6 @@ pub async fn collect_metrics(
    synthetic_size_calculation_interval: Duration,
    node_id: NodeId,
    local_disk_storage: Utf8PathBuf,
-    cancel: CancellationToken,
    ctx: RequestContext,
 ) -> anyhow::Result<()> {
    if _cached_metric_collection_interval != Duration::ZERO {
@@ -66,13 +63,9 @@ pub async fn collect_metrics(
        "synthetic size calculation",
        false,
        async move {
-            calculate_synthetic_size_worker(
-                synthetic_size_calculation_interval,
-                &cancel,
-                &worker_ctx,
-            )
-            .instrument(info_span!("synthetic_size_worker"))
-            .await?;
+            calculate_synthetic_size_worker(synthetic_size_calculation_interval, &worker_ctx)
+                .instrument(info_span!("synthetic_size_worker"))
+                .await?;
            Ok(())
        },
    );
@@ -248,7 +241,6 @@ async fn reschedule(
 /// Caclculate synthetic size for each active tenant
 async fn calculate_synthetic_size_worker(
    synthetic_size_calculation_interval: Duration,
-    cancel: &CancellationToken,
    ctx: &RequestContext,
 ) -> anyhow::Result<()> {
    info!("starting calculate_synthetic_size_worker");
@@ -280,12 +272,7 @@ async fn calculate_synthetic_size_worker(
                // Same for the loop that fetches computed metrics.
                // By using the same limiter, we centralize metrics collection for "start" and "finished" counters,
                // which turns out is really handy to understand the system.
-                if let Err(e) = tenant.calculate_synthetic_size(cause, cancel, ctx).await {
-                    if let Some(PageReconstructError::Cancelled) =
-                        e.downcast_ref::<PageReconstructError>()
-                    {
-                        return Ok(());
-                    }
+                if let Err(e) = tenant.calculate_synthetic_size(cause, ctx).await {
                    error!("failed to calculate synthetic size for tenant {tenant_id}: {e:#}");
                }
            }
--- a/pageserver/src/deletion_queue.rs
+++ b/pageserver/src/deletion_queue.rs
@@ -513,7 +513,6 @@ impl DeletionQueueClient {
    ) -> Result<(), DeletionQueueError> {
        if current_generation.is_none() {
            debug!("Enqueuing deletions in legacy mode, skipping queue");
-
            let mut layer_paths = Vec::new();
            for (layer, generation) in layers {
                layer_paths.push(remote_layer_path(
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -6,7 +6,6 @@ use std::str::FromStr;
 use std::sync::Arc;

 use anyhow::{anyhow, Context, Result};
-use enumset::EnumSet;
 use futures::TryFutureExt;
 use humantime::format_rfc3339;
 use hyper::header;
@@ -43,7 +42,6 @@ use crate::tenant::mgr::{
 };
 use crate::tenant::size::ModelInputs;
 use crate::tenant::storage_layer::LayerAccessStatsReset;
-use crate::tenant::timeline::CompactFlags;
 use crate::tenant::timeline::Timeline;
 use crate::tenant::{LogicalSizeCalculationCause, PageReconstructError, TenantSharedResources};
 use crate::{config::PageServerConf, tenant::mgr};
@@ -550,7 +548,7 @@ async fn timeline_detail_handler(

 async fn get_lsn_by_timestamp_handler(
    request: Request<Body>,
-    cancel: CancellationToken,
+    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    check_permission(&request, Some(tenant_id))?;
@@ -566,9 +564,7 @@ async fn get_lsn_by_timestamp_handler(

    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
-    let result = timeline
-        .find_lsn_for_timestamp(timestamp_pg, &cancel, &ctx)
-        .await?;
+    let result = timeline.find_lsn_for_timestamp(timestamp_pg, &ctx).await?;

    if version.unwrap_or(0) > 1 {
        #[derive(serde::Serialize)]
@@ -844,7 +840,7 @@ async fn tenant_delete_handler(
 /// without modifying anything anyway.
 async fn tenant_size_handler(
    request: Request<Body>,
-    cancel: CancellationToken,
+    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    check_permission(&request, Some(tenant_id))?;
@@ -860,7 +856,6 @@ async fn tenant_size_handler(
        .gather_size_inputs(
            retention_period,
            LogicalSizeCalculationCause::TenantSizeHandler,
-            &cancel,
            &ctx,
        )
        .await
@@ -1245,7 +1240,7 @@ async fn failpoints_handler(
 // Run GC immediately on given timeline.
 async fn timeline_gc_handler(
    mut request: Request<Body>,
-    cancel: CancellationToken,
+    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
@@ -1254,7 +1249,7 @@ async fn timeline_gc_handler(
    let gc_req: TimelineGcRequest = json_request(&mut request).await?;

    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-    let wait_task_done = mgr::immediate_gc(tenant_id, timeline_id, gc_req, cancel, &ctx).await?;
+    let wait_task_done = mgr::immediate_gc(tenant_id, timeline_id, gc_req, &ctx).await?;
    let gc_result = wait_task_done
        .await
        .context("wait for gc task")
@@ -1273,15 +1268,11 @@ async fn timeline_compact_handler(
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
    check_permission(&request, Some(tenant_id))?;

-    let mut flags = EnumSet::empty();
-    if Some(true) == parse_query_param::<_, bool>(&request, "force_repartition")? {
-        flags |= CompactFlags::ForceRepartition;
-    }
    async {
        let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
        let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
        timeline
-            .compact(&cancel, flags, &ctx)
+            .compact(&cancel, &ctx)
            .await
            .map_err(|e| ApiError::InternalServerError(e.into()))?;
        json_response(StatusCode::OK, ())
@@ -1298,11 +1289,6 @@ async fn timeline_checkpoint_handler(
    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
    check_permission(&request, Some(tenant_id))?;
-
-    let mut flags = EnumSet::empty();
-    if Some(true) == parse_query_param::<_, bool>(&request, "force_repartition")? {
-        flags |= CompactFlags::ForceRepartition;
-    }
    async {
        let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
        let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
@@ -1311,7 +1297,7 @@ async fn timeline_checkpoint_handler(
            .await
            .map_err(ApiError::InternalServerError)?;
        timeline
-            .compact(&cancel, flags, &ctx)
+            .compact(&cancel, &ctx)
            .await
            .map_err(|e| ApiError::InternalServerError(e.into()))?;

@@ -1689,24 +1675,8 @@ where
                let token_cloned = token.clone();
                let result = handler(r, token).await;
                if token_cloned.is_cancelled() {
-                    // dropguard has executed: we will never turn this result into response.
-                    //
-                    // at least temporarily do {:?} logging; these failures are rare enough but
-                    // could hide difficult errors.
-                    match &result {
-                        Ok(response) => {
-                            let status = response.status();
-                            info!(%status, "Cancelled request finished successfully")
-                        }
-                        Err(e) => error!("Cancelled request finished with an error: {e:?}"),
-                    }
+                    info!("Cancelled request finished");
                }
-                // only logging for cancelled panicked request handlers is the tracing_panic_hook,
-                // which should suffice.
-                //
-                // there is still a chance to lose the result due to race between
-                // returning from here and the actual connection closing happening
-                // before outer task gets to execute. leaving that up for #5815.
                result
            }
            .in_current_span(),
--- a/pageserver/src/import_datadir.rs
+++ b/pageserver/src/import_datadir.rs
@@ -3,25 +3,18 @@
 //! a neon Timeline.
 //!
 use std::path::{Path, PathBuf};
-use std::pin::Pin;
-use std::task::{self, Poll};

 use anyhow::{bail, ensure, Context, Result};
-use async_compression::{tokio::write::ZstdEncoder, zstd::CParameter, Level};
 use bytes::Bytes;
 use camino::Utf8Path;
 use futures::StreamExt;
-use nix::NixPath;
-use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};
+use tokio::io::{AsyncRead, AsyncReadExt};
 use tokio_tar::Archive;
-use tokio_tar::Builder;
-use tokio_tar::HeaderMode;
 use tracing::*;
 use walkdir::WalkDir;

 use crate::context::RequestContext;
 use crate::pgdatadir_mapping::*;
-use crate::tenant::remote_timeline_client::INITDB_PATH;
 use crate::tenant::Timeline;
 use crate::walingest::WalIngest;
 use crate::walrecord::DecodedWALRecord;
@@ -40,9 +33,7 @@ use utils::lsn::Lsn;
 pub fn get_lsn_from_controlfile(path: &Utf8Path) -> Result<Lsn> {
    // Read control file to extract the LSN
    let controlfile_path = path.join("global").join("pg_control");
-    let controlfile_buf = std::fs::read(&controlfile_path)
-        .with_context(|| format!("reading controlfile: {controlfile_path}"))?;
-    let controlfile = ControlFileData::decode(&controlfile_buf)?;
+    let controlfile = ControlFileData::decode(&std::fs::read(controlfile_path)?)?;
    let lsn = controlfile.checkPoint;

    Ok(Lsn(lsn))
@@ -627,108 +618,3 @@ async fn read_all_bytes(reader: &mut (impl AsyncRead + Unpin)) -> Result<Bytes>
    reader.read_to_end(&mut buf).await?;
    Ok(Bytes::from(buf))
 }
-
-/// An in-memory buffer implementing `AsyncWrite`, inserting yields every now and then
-///
-/// The number of yields is bounded by above by the number of times poll_write is called,
-/// so calling it with 8 KB chunks and 8 MB chunks gives the same number of yields in total.
-/// This is an explicit choice as the `YieldingVec` is meant to give the async executor
-/// breathing room between units of CPU intensive preparation of buffers to be written.
-/// Once a write call is issued, the whole buffer has been prepared already, so there is no
-/// gain in splitting up the memcopy further.
-struct YieldingVec {
-    yield_budget: usize,
-    // the buffer written into
-    buf: Vec<u8>,
-}
-
-impl YieldingVec {
-    fn new() -> Self {
-        Self {
-            yield_budget: 0,
-            buf: Vec::new(),
-        }
-    }
-    // Whether we should yield for a read operation of given size
-    fn should_yield(&mut self, add_buf_len: usize) -> bool {
-        // Set this limit to a small value so that we are a
-        // good async citizen and yield repeatedly (but not
-        // too often for many small writes to cause many yields)
-        const YIELD_DIST: usize = 1024;
-
-        let target_buf_len = self.buf.len() + add_buf_len;
-        let ret = self.yield_budget / YIELD_DIST < target_buf_len / YIELD_DIST;
-        if self.yield_budget < target_buf_len {
-            self.yield_budget += add_buf_len;
-        }
-        ret
-    }
-}
-
-impl AsyncWrite for YieldingVec {
-    fn poll_write(
-        mut self: Pin<&mut Self>,
-        cx: &mut task::Context<'_>,
-        buf: &[u8],
-    ) -> Poll<std::io::Result<usize>> {
-        if self.should_yield(buf.len()) {
-            cx.waker().wake_by_ref();
-            return Poll::Pending;
-        }
-        self.get_mut().buf.extend_from_slice(buf);
-        Poll::Ready(Ok(buf.len()))
-    }
-
-    fn poll_flush(self: Pin<&mut Self>, _cx: &mut task::Context<'_>) -> Poll<std::io::Result<()>> {
-        Poll::Ready(Ok(()))
-    }
-
-    fn poll_shutdown(
-        self: Pin<&mut Self>,
-        _cx: &mut task::Context<'_>,
-    ) -> Poll<std::io::Result<()>> {
-        Poll::Ready(Ok(()))
-    }
-}
-
-pub async fn create_tar_zst(pgdata_path: &Utf8Path) -> Result<Vec<u8>> {
-    let mut paths = Vec::new();
-    for entry in WalkDir::new(pgdata_path) {
-        let entry = entry?;
-        let metadata = entry.metadata().expect("error getting dir entry metadata");
-        // Also allow directories so that we also get empty directories
-        if !(metadata.is_file() || metadata.is_dir()) {
-            continue;
-        }
-        let path = entry.into_path();
-        paths.push(path);
-    }
-    // Do a sort to get a more consistent listing
-    paths.sort_unstable();
-    let zstd = ZstdEncoder::with_quality_and_params(
-        YieldingVec::new(),
-        Level::Default,
-        &[CParameter::enable_long_distance_matching(true)],
-    );
-    let mut builder = Builder::new(zstd);
-    // Use reproducible header mode
-    builder.mode(HeaderMode::Deterministic);
-    for path in paths {
-        let rel_path = path.strip_prefix(pgdata_path)?;
-        if rel_path.is_empty() {
-            // The top directory should not be compressed,
-            // the tar crate doesn't like that
-            continue;
-        }
-        builder.append_path_with_name(&path, rel_path).await?;
-    }
-    let mut zstd = builder.into_inner().await?;
-    zstd.shutdown().await?;
-    let compressed = zstd.into_inner();
-    let compressed_len = compressed.buf.len();
-    const INITDB_TAR_ZST_WARN_LIMIT: usize = 2_000_000;
-    if compressed_len > INITDB_TAR_ZST_WARN_LIMIT {
-        warn!("compressed {INITDB_PATH} size of {compressed_len} is above limit {INITDB_TAR_ZST_WARN_LIMIT}.");
-    }
-    Ok(compressed.buf)
-}
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -638,7 +638,7 @@ const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
 ///
 /// Operations:
 /// - open ([`std::fs::OpenOptions::open`])
-/// - close (dropping [`crate::virtual_file::VirtualFile`])
+/// - close (dropping [`std::fs::File`])
 /// - close-by-replace (close by replacement algorithm)
 /// - read (`read_at`)
 /// - write (`write_at`)
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -21,7 +21,6 @@ use serde::{Deserialize, Serialize};
 use std::collections::{hash_map, HashMap, HashSet};
 use std::ops::ControlFlow;
 use std::ops::Range;
-use tokio_util::sync::CancellationToken;
 use tracing::{debug, trace, warn};
 use utils::bin_ser::DeserializeError;
 use utils::{bin_ser::BeSer, lsn::Lsn};
@@ -366,7 +365,6 @@ impl Timeline {
    pub async fn find_lsn_for_timestamp(
        &self,
        search_timestamp: TimestampTz,
-        cancel: &CancellationToken,
        ctx: &RequestContext,
    ) -> Result<LsnForTimestamp, PageReconstructError> {
        let gc_cutoff_lsn_guard = self.get_latest_gc_cutoff_lsn();
@@ -385,9 +383,6 @@ impl Timeline {
        let mut found_smaller = false;
        let mut found_larger = false;
        while low < high {
-            if cancel.is_cancelled() {
-                return Err(PageReconstructError::Cancelled);
-            }
            // cannot overflow, high and low are both smaller than u64::MAX / 2
            let mid = (high + low) / 2;

--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -12,9 +12,7 @@
 //!

 use anyhow::{bail, Context};
-use bytes::Bytes;
 use camino::{Utf8Path, Utf8PathBuf};
-use enumset::EnumSet;
 use futures::FutureExt;
 use pageserver_api::models::TimelineState;
 use remote_storage::DownloadError;
@@ -25,7 +23,6 @@ use tokio::sync::watch;
 use tokio::task::JoinSet;
 use tokio_util::sync::CancellationToken;
 use tracing::*;
-use utils::backoff;
 use utils::completion;
 use utils::crashsafe::path_with_suffix_extension;
 use utils::fs_ext;
@@ -294,16 +291,6 @@ impl From<harness::TestRedoManager> for WalRedoManager {
 }

 impl WalRedoManager {
-    pub(crate) fn maybe_quiesce(&self, idle_timeout: Duration) {
-        match self {
-            Self::Prod(mgr) => mgr.maybe_quiesce(idle_timeout),
-            #[cfg(test)]
-            Self::Test(_) => {
-                // Not applicable to test redo manager
-            }
-        }
-    }
-
    pub async fn request_redo(
        &self,
        key: crate::repository::Key,
@@ -1632,7 +1619,6 @@ impl Tenant {
        target_timeline_id: Option<TimelineId>,
        horizon: u64,
        pitr: Duration,
-        cancel: &CancellationToken,
        ctx: &RequestContext,
    ) -> anyhow::Result<GcResult> {
        // Don't start doing work during shutdown
@@ -1655,7 +1641,7 @@ impl Tenant {
            }
        }

-        self.gc_iteration_internal(target_timeline_id, horizon, pitr, cancel, ctx)
+        self.gc_iteration_internal(target_timeline_id, horizon, pitr, ctx)
            .await
    }

@@ -1663,16 +1649,22 @@ impl Tenant {
    /// This function is periodically called by compactor task.
    /// Also it can be explicitly requested per timeline through page server
    /// api's 'compact' command.
-    async fn compaction_iteration(
+    pub async fn compaction_iteration(
        &self,
        cancel: &CancellationToken,
        ctx: &RequestContext,
-    ) -> anyhow::Result<(), timeline::CompactionError> {
-        // Don't start doing work during shutdown, or when broken, we do not need those in the logs
-        if !self.is_active() {
+    ) -> anyhow::Result<()> {
+        // Don't start doing work during shutdown
+        if let TenantState::Stopping { .. } = self.current_state() {
            return Ok(());
        }

+        // We should only be called once the tenant has activated.
+        anyhow::ensure!(
+            self.is_active(),
+            "Cannot run compaction iteration on inactive tenant"
+        );
+
        {
            let conf = self.tenant_conf.read().unwrap();
            if !conf.location.may_delete_layers_hint() || !conf.location.may_upload_layers_hint() {
@@ -1703,7 +1695,7 @@ impl Tenant {

        for (timeline_id, timeline) in &timelines_to_compact {
            timeline
-                .compact(cancel, EnumSet::empty(), ctx)
+                .compact(cancel, ctx)
                .instrument(info_span!("compact_timeline", %timeline_id))
                .await?;
        }
@@ -1858,7 +1850,6 @@ impl Tenant {
                });
            })
        };
-        // test_long_timeline_create_then_tenant_delete is leaning on this message
        tracing::info!("Waiting for timelines...");
        while let Some(res) = js.join_next().await {
            match res {
@@ -2573,30 +2564,14 @@ impl Tenant {
        target_timeline_id: Option<TimelineId>,
        horizon: u64,
        pitr: Duration,
-        cancel: &CancellationToken,
        ctx: &RequestContext,
    ) -> anyhow::Result<GcResult> {
        let mut totals: GcResult = Default::default();
        let now = Instant::now();

-        let gc_timelines = match self
-            .refresh_gc_info_internal(target_timeline_id, horizon, pitr, cancel, ctx)
-            .await
-        {
-            Ok(result) => result,
-            Err(e) => {
-                if let Some(PageReconstructError::Cancelled) =
-                    e.downcast_ref::<PageReconstructError>()
-                {
-                    // Handle cancellation
-                    totals.elapsed = now.elapsed();
-                    return Ok(totals);
-                } else {
-                    // Propagate other errors
-                    return Err(e);
-                }
-            }
-        };
+        let gc_timelines = self
+            .refresh_gc_info_internal(target_timeline_id, horizon, pitr, ctx)
+            .await?;

        crate::failpoint_support::sleep_millis_async!(
            "gc_iteration_internal_after_getting_gc_timelines"
@@ -2620,7 +2595,7 @@ impl Tenant {
        // See comments in [`Tenant::branch_timeline`] for more information
        // about why branch creation task can run concurrently with timeline's GC iteration.
        for timeline in gc_timelines {
-            if task_mgr::is_shutdown_requested() || cancel.is_cancelled() {
+            if task_mgr::is_shutdown_requested() {
                // We were requested to shut down. Stop and return with the progress we
                // made.
                break;
@@ -2640,7 +2615,6 @@ impl Tenant {
    /// This is usually executed as part of periodic gc, but can now be triggered more often.
    pub async fn refresh_gc_info(
        &self,
-        cancel: &CancellationToken,
        ctx: &RequestContext,
    ) -> anyhow::Result<Vec<Arc<Timeline>>> {
        // since this method can now be called at different rates than the configured gc loop, it
@@ -2652,7 +2626,7 @@ impl Tenant {
        // refresh all timelines
        let target_timeline_id = None;

-        self.refresh_gc_info_internal(target_timeline_id, horizon, pitr, cancel, ctx)
+        self.refresh_gc_info_internal(target_timeline_id, horizon, pitr, ctx)
            .await
    }

@@ -2661,7 +2635,6 @@ impl Tenant {
        target_timeline_id: Option<TimelineId>,
        horizon: u64,
        pitr: Duration,
-        cancel: &CancellationToken,
        ctx: &RequestContext,
    ) -> anyhow::Result<Vec<Arc<Timeline>>> {
        // grab mutex to prevent new timelines from being created here.
@@ -2735,7 +2708,7 @@ impl Tenant {
                    .map(|&x| x.1)
                    .collect();
                timeline
-                    .update_gc_info(branchpoints, cutoff, pitr, cancel, ctx)
+                    .update_gc_info(branchpoints, cutoff, pitr, ctx)
                    .await?;

                gc_timelines.push(timeline);
@@ -2898,7 +2871,7 @@ impl Tenant {
    }

    /// - run initdb to init temporary instance and get bootstrap data
-    /// - after initialization completes, tar up the temp dir and upload it to S3.
+    /// - after initialization complete, remove the temp dir.
    ///
    /// The caller is responsible for activating the returned timeline.
    async fn bootstrap_timeline(
@@ -2939,30 +2912,6 @@ impl Tenant {
        let pgdata_path = &initdb_path;
        let pgdata_lsn = import_datadir::get_lsn_from_controlfile(pgdata_path)?.align();

-        // Upload the created data dir to S3
-        if let Some(storage) = &self.remote_storage {
-            let pgdata_zstd = import_datadir::create_tar_zst(pgdata_path).await?;
-            let pgdata_zstd = Bytes::from(pgdata_zstd);
-            backoff::retry(
-                || async {
-                    self::remote_timeline_client::upload_initdb_dir(
-                        storage,
-                        &self.tenant_id,
-                        &timeline_id,
-                        pgdata_zstd.clone(),
-                    )
-                    .await
-                },
-                |_| false,
-                3,
-                u32::MAX,
-                "persist_initdb_tar_zst",
-                // TODO: use a cancellation token (https://github.com/neondatabase/neon/issues/5066)
-                backoff::Cancel::new(CancellationToken::new(), || unreachable!()),
-            )
-            .await?;
-        }
-
        // Import the contents of the data directory at the initial checkpoint
        // LSN, and any WAL after that.
        // Initdb lsn will be equal to last_record_lsn which will be set after import.
@@ -3147,10 +3096,7 @@ impl Tenant {
        let uninit_mark_path = self
            .conf
            .timeline_uninit_mark_file_path(tenant_id, timeline_id);
-        fs::OpenOptions::new()
-            .write(true)
-            .create_new(true)
-            .open(&uninit_mark_path)
+        fs::File::create(&uninit_mark_path)
            .context("Failed to create uninit mark file")
            .and_then(|_| {
                crashsafe::fsync_file_and_parent(&uninit_mark_path)
@@ -3175,7 +3121,6 @@ impl Tenant {
        // (only if it is shorter than the real cutoff).
        max_retention_period: Option<u64>,
        cause: LogicalSizeCalculationCause,
-        cancel: &CancellationToken,
        ctx: &RequestContext,
    ) -> anyhow::Result<size::ModelInputs> {
        let logical_sizes_at_once = self
@@ -3198,7 +3143,6 @@ impl Tenant {
            max_retention_period,
            &mut shared_cache,
            cause,
-            cancel,
            ctx,
        )
        .await
@@ -3211,10 +3155,9 @@ impl Tenant {
    pub async fn calculate_synthetic_size(
        &self,
        cause: LogicalSizeCalculationCause,
-        cancel: &CancellationToken,
        ctx: &RequestContext,
    ) -> anyhow::Result<u64> {
-        let inputs = self.gather_size_inputs(None, cause, cancel, ctx).await?;
+        let inputs = self.gather_size_inputs(None, cause, ctx).await?;

        let size = inputs.calculate()?;

@@ -3557,7 +3500,6 @@ pub(crate) mod harness {
                // enable it in case the tests exercise code paths that use
                // debug_assert_current_span_has_tenant_and_timeline_id
                logging::TracingErrorLayerEnablement::EnableWithRustLogFilter,
-                logging::Output::Stdout,
            )
            .expect("Failed to init test logging")
        });
@@ -3986,13 +3928,7 @@ mod tests {
        // and compaction works. But it does set the 'cutoff' point so that the cross check
        // below should fail.
        tenant
-            .gc_iteration(
-                Some(TIMELINE_ID),
-                0x10,
-                Duration::ZERO,
-                &CancellationToken::new(),
-                &ctx,
-            )
+            .gc_iteration(Some(TIMELINE_ID), 0x10, Duration::ZERO, &ctx)
            .await?;

        // try to branch at lsn 25, should fail because we already garbage collected the data
@@ -4095,13 +4031,7 @@ mod tests {
        tline.set_broken("test".to_owned());

        tenant
-            .gc_iteration(
-                Some(TIMELINE_ID),
-                0x10,
-                Duration::ZERO,
-                &CancellationToken::new(),
-                &ctx,
-            )
+            .gc_iteration(Some(TIMELINE_ID), 0x10, Duration::ZERO, &ctx)
            .await?;

        // The branchpoints should contain all timelines, even ones marked
@@ -4147,13 +4077,7 @@ mod tests {
            .expect("Should have a local timeline");
        // this removes layers before lsn 40 (50 minus 10), so there are two remaining layers, image and delta for 31-50
        tenant
-            .gc_iteration(
-                Some(TIMELINE_ID),
-                0x10,
-                Duration::ZERO,
-                &CancellationToken::new(),
-                &ctx,
-            )
+            .gc_iteration(Some(TIMELINE_ID), 0x10, Duration::ZERO, &ctx)
            .await?;
        assert!(newtline.get(*TEST_KEY, Lsn(0x25), &ctx).await.is_ok());

@@ -4181,13 +4105,7 @@ mod tests {

        // run gc on parent
        tenant
-            .gc_iteration(
-                Some(TIMELINE_ID),
-                0x10,
-                Duration::ZERO,
-                &CancellationToken::new(),
-                &ctx,
-            )
+            .gc_iteration(Some(TIMELINE_ID), 0x10, Duration::ZERO, &ctx)
            .await?;

        // Check that the data is still accessible on the branch.
@@ -4376,9 +4294,7 @@ mod tests {
        drop(writer);

        tline.freeze_and_flush().await?;
-        tline
-            .compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
-            .await?;
+        tline.compact(&CancellationToken::new(), &ctx).await?;

        let writer = tline.writer().await;
        writer
@@ -4393,9 +4309,7 @@ mod tests {
        drop(writer);

        tline.freeze_and_flush().await?;
-        tline
-            .compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
-            .await?;
+        tline.compact(&CancellationToken::new(), &ctx).await?;

        let writer = tline.writer().await;
        writer
@@ -4410,9 +4324,7 @@ mod tests {
        drop(writer);

        tline.freeze_and_flush().await?;
-        tline
-            .compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
-            .await?;
+        tline.compact(&CancellationToken::new(), &ctx).await?;

        let writer = tline.writer().await;
        writer
@@ -4427,9 +4339,7 @@ mod tests {
        drop(writer);

        tline.freeze_and_flush().await?;
-        tline
-            .compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
-            .await?;
+        tline.compact(&CancellationToken::new(), &ctx).await?;

        assert_eq!(
            tline.get(*TEST_KEY, Lsn(0x10), &ctx).await?,
@@ -4497,18 +4407,10 @@ mod tests {
            let cutoff = tline.get_last_record_lsn();

            tline
-                .update_gc_info(
-                    Vec::new(),
-                    cutoff,
-                    Duration::ZERO,
-                    &CancellationToken::new(),
-                    &ctx,
-                )
+                .update_gc_info(Vec::new(), cutoff, Duration::ZERO, &ctx)
                .await?;
            tline.freeze_and_flush().await?;
-            tline
-                .compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
-                .await?;
+            tline.compact(&CancellationToken::new(), &ctx).await?;
            tline.gc().await?;
        }

@@ -4585,18 +4487,10 @@ mod tests {
            // Perform a cycle of flush, compact, and GC
            let cutoff = tline.get_last_record_lsn();
            tline
-                .update_gc_info(
-                    Vec::new(),
-                    cutoff,
-                    Duration::ZERO,
-                    &CancellationToken::new(),
-                    &ctx,
-                )
+                .update_gc_info(Vec::new(), cutoff, Duration::ZERO, &ctx)
                .await?;
            tline.freeze_and_flush().await?;
-            tline
-                .compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
-                .await?;
+            tline.compact(&CancellationToken::new(), &ctx).await?;
            tline.gc().await?;
        }

@@ -4683,18 +4577,10 @@ mod tests {
            // Perform a cycle of flush, compact, and GC
            let cutoff = tline.get_last_record_lsn();
            tline
-                .update_gc_info(
-                    Vec::new(),
-                    cutoff,
-                    Duration::ZERO,
-                    &CancellationToken::new(),
-                    &ctx,
-                )
+                .update_gc_info(Vec::new(), cutoff, Duration::ZERO, &ctx)
                .await?;
            tline.freeze_and_flush().await?;
-            tline
-                .compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
-                .await?;
+            tline.compact(&CancellationToken::new(), &ctx).await?;
            tline.gc().await?;
        }

--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -1944,7 +1944,6 @@ pub(crate) async fn immediate_gc(
    tenant_id: TenantId,
    timeline_id: TimelineId,
    gc_req: TimelineGcRequest,
-    cancel: CancellationToken,
    ctx: &RequestContext,
 ) -> Result<tokio::sync::oneshot::Receiver<Result<GcResult, anyhow::Error>>, ApiError> {
    let guard = TENANTS.read().unwrap();
@@ -1971,7 +1970,7 @@ pub(crate) async fn immediate_gc(
        async move {
            fail::fail_point!("immediate_gc_task_pre");
            let result = tenant
-                .gc_iteration(Some(timeline_id), gc_horizon, pitr, &cancel, &ctx)
+                .gc_iteration(Some(timeline_id), gc_horizon, pitr, &ctx)
                .instrument(info_span!("manual_gc", %tenant_id, %timeline_id))
                .await;
                // FIXME: `gc_iteration` can return an error for multiple reasons; we should handle it
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -190,7 +190,6 @@ use chrono::{NaiveDateTime, Utc};

 use scopeguard::ScopeGuard;
 use tokio_util::sync::CancellationToken;
-pub(crate) use upload::upload_initdb_dir;
 use utils::backoff::{
    self, exponential_backoff, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS,
 };
@@ -250,8 +249,6 @@ pub(crate) const FAILED_REMOTE_OP_RETRIES: u32 = 10;
 // retries. Uploads and deletions are retried forever, though.
 pub(crate) const FAILED_UPLOAD_WARN_THRESHOLD: u32 = 3;

-pub(crate) const INITDB_PATH: &str = "initdb.tar.zst";
-
 pub enum MaybeDeletedIndexPart {
    IndexPart(IndexPart),
    Deleted(IndexPart),
@@ -819,7 +816,7 @@ impl RemoteTimelineClient {
        let mut receiver = {
            let mut guard = self.upload_queue.lock().unwrap();
            let upload_queue = guard.initialized_mut()?;
-            self.schedule_barrier0(upload_queue)
+            self.schedule_barrier(upload_queue)
        };

        if receiver.changed().await.is_err() {
@@ -828,14 +825,7 @@ impl RemoteTimelineClient {
        Ok(())
    }

-    pub(crate) fn schedule_barrier(self: &Arc<Self>) -> anyhow::Result<()> {
-        let mut guard = self.upload_queue.lock().unwrap();
-        let upload_queue = guard.initialized_mut()?;
-        self.schedule_barrier0(upload_queue);
-        Ok(())
-    }
-
-    fn schedule_barrier0(
+    fn schedule_barrier(
        self: &Arc<Self>,
        upload_queue: &mut UploadQueueInitialized,
    ) -> tokio::sync::watch::Receiver<()> {
@@ -1239,18 +1229,16 @@ impl RemoteTimelineClient {
                    }
                    res
                }
-                UploadOp::Delete(delete) => {
-                    pausable_failpoint!("before-delete-layer-pausable");
-                    self.deletion_queue_client
-                        .push_layers(
-                            self.tenant_id,
-                            self.timeline_id,
-                            self.generation,
-                            delete.layers.clone(),
-                        )
-                        .await
-                        .map_err(|e| anyhow::anyhow!(e))
-                }
+                UploadOp::Delete(delete) => self
+                    .deletion_queue_client
+                    .push_layers(
+                        self.tenant_id,
+                        self.timeline_id,
+                        self.generation,
+                        delete.layers.clone(),
+                    )
+                    .await
+                    .map_err(|e| anyhow::anyhow!(e)),
                UploadOp::Barrier(_) => {
                    // unreachable. Barrier operations are handled synchronously in
                    // launch_queued_tasks
@@ -1540,13 +1528,6 @@ pub fn remote_layer_path(
    RemotePath::from_string(&path).expect("Failed to construct path")
 }

-pub fn remote_initdb_archive_path(tenant_id: &TenantId, timeline_id: &TimelineId) -> RemotePath {
-    RemotePath::from_string(&format!(
-        "tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{INITDB_PATH}"
-    ))
-    .expect("Failed to construct path")
-}
-
 pub fn remote_index_path(
    tenant_id: &TenantId,
    timeline_id: &TimelineId,
--- a/pageserver/src/tenant/remote_timeline_client/index.rs
+++ b/pageserver/src/tenant/remote_timeline_client/index.rs
@@ -128,14 +128,6 @@ impl IndexPart {
    pub fn get_disk_consistent_lsn(&self) -> Lsn {
        self.disk_consistent_lsn
    }
-
-    pub fn from_s3_bytes(bytes: &[u8]) -> Result<Self, serde_json::Error> {
-        serde_json::from_slice::<IndexPart>(bytes)
-    }
-
-    pub fn to_s3_bytes(&self) -> serde_json::Result<Vec<u8>> {
-        serde_json::to_vec(self)
-    }
 }

 impl TryFrom<&UploadQueueInitialized> for IndexPart {
@@ -209,7 +201,7 @@ mod tests {
            deleted_at: None,
        };

-        let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
+        let part = serde_json::from_str::<IndexPart>(example).unwrap();
        assert_eq!(part, expected);
    }

@@ -247,7 +239,7 @@ mod tests {
            deleted_at: None,
        };

-        let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
+        let part = serde_json::from_str::<IndexPart>(example).unwrap();
        assert_eq!(part, expected);
    }

@@ -287,7 +279,7 @@ mod tests {
                "2023-07-31T09:00:00.123000000", "%Y-%m-%dT%H:%M:%S.%f").unwrap())
        };

-        let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
+        let part = serde_json::from_str::<IndexPart>(example).unwrap();
        assert_eq!(part, expected);
    }

@@ -331,7 +323,7 @@ mod tests {
            deleted_at: None,
        };

-        let empty_layers_parsed = IndexPart::from_s3_bytes(empty_layers_json.as_bytes()).unwrap();
+        let empty_layers_parsed = serde_json::from_str::<IndexPart>(empty_layers_json).unwrap();

        assert_eq!(empty_layers_parsed, expected);
    }
@@ -369,7 +361,7 @@ mod tests {
                "2023-07-31T09:00:00.123000000", "%Y-%m-%dT%H:%M:%S.%f").unwrap())
        };

-        let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
+        let part = serde_json::from_str::<IndexPart>(example).unwrap();
        assert_eq!(part, expected);
    }
 }
--- a/pageserver/src/tenant/remote_timeline_client/upload.rs
+++ b/pageserver/src/tenant/remote_timeline_client/upload.rs
@@ -1,7 +1,6 @@
 //! Helper functions to upload files to remote storage with a RemoteStorage

 use anyhow::{bail, Context};
-use bytes::Bytes;
 use camino::Utf8Path;
 use fail::fail_point;
 use std::io::ErrorKind;
@@ -10,9 +9,7 @@ use tokio::fs;
 use super::Generation;
 use crate::{
    config::PageServerConf,
-    tenant::remote_timeline_client::{
-        index::IndexPart, remote_index_path, remote_initdb_archive_path, remote_path,
-    },
+    tenant::remote_timeline_client::{index::IndexPart, remote_index_path, remote_path},
 };
 use remote_storage::GenericRemoteStorage;
 use utils::id::{TenantId, TimelineId};
@@ -36,9 +33,8 @@ pub(super) async fn upload_index_part<'a>(
    });
    pausable_failpoint!("before-upload-index-pausable");

-    let index_part_bytes = index_part
-        .to_s3_bytes()
-        .context("serialize index part file into bytes")?;
+    let index_part_bytes =
+        serde_json::to_vec(&index_part).context("serialize index part file into bytes")?;
    let index_part_size = index_part_bytes.len();
    let index_part_bytes = tokio::io::BufReader::new(std::io::Cursor::new(index_part_bytes));

@@ -107,22 +103,3 @@ pub(super) async fn upload_timeline_layer<'a>(

    Ok(())
 }
-
-/// Uploads the given `initdb` data to the remote storage.
-pub(crate) async fn upload_initdb_dir(
-    storage: &GenericRemoteStorage,
-    tenant_id: &TenantId,
-    timeline_id: &TimelineId,
-    initdb_dir: Bytes,
-) -> anyhow::Result<()> {
-    tracing::trace!("uploading initdb dir");
-
-    let size = initdb_dir.len();
-    let bytes = tokio::io::BufReader::new(std::io::Cursor::new(initdb_dir));
-
-    let remote_path = remote_initdb_archive_path(tenant_id, timeline_id);
-    storage
-        .upload_storage_object(bytes, size, &remote_path)
-        .await
-        .with_context(|| format!("upload initdb dir for '{tenant_id} / {timeline_id}'"))
-}
--- a/pageserver/src/tenant/size.rs
+++ b/pageserver/src/tenant/size.rs
@@ -6,7 +6,6 @@ use std::sync::Arc;
 use anyhow::{bail, Context};
 use tokio::sync::oneshot::error::RecvError;
 use tokio::sync::Semaphore;
-use tokio_util::sync::CancellationToken;

 use crate::context::RequestContext;
 use crate::pgdatadir_mapping::CalculateLogicalSizeError;
@@ -114,12 +113,11 @@ pub(super) async fn gather_inputs(
    max_retention_period: Option<u64>,
    logical_size_cache: &mut HashMap<(TimelineId, Lsn), u64>,
    cause: LogicalSizeCalculationCause,
-    cancel: &CancellationToken,
    ctx: &RequestContext,
 ) -> anyhow::Result<ModelInputs> {
    // refresh is needed to update gc related pitr_cutoff and horizon_cutoff
    tenant
-        .refresh_gc_info(cancel, ctx)
+        .refresh_gc_info(ctx)
        .await
        .context("Failed to refresh gc_info before gathering inputs")?;

--- a/pageserver/src/tenant/storage_layer/delta_layer.rs
+++ b/pageserver/src/tenant/storage_layer/delta_layer.rs
@@ -289,9 +289,7 @@ impl DeltaLayer {
    async fn load_inner(&self, ctx: &RequestContext) -> Result<Arc<DeltaLayerInner>> {
        let path = self.path();

-        let loaded = DeltaLayerInner::load(&path, None, ctx)
-            .await
-            .and_then(|res| res)?;
+        let loaded = DeltaLayerInner::load(&path, None, ctx).await?;

        // not production code
        let actual_filename = path.file_name().unwrap().to_owned();
@@ -612,28 +610,18 @@ impl Drop for DeltaLayerWriter {
 }

 impl DeltaLayerInner {
-    /// Returns nested result following Result<Result<_, OpErr>, Critical>:
-    /// - inner has the success or transient failure
-    /// - outer has the permanent failure
    pub(super) async fn load(
        path: &Utf8Path,
        summary: Option<Summary>,
        ctx: &RequestContext,
-    ) -> Result<Result<Self, anyhow::Error>, anyhow::Error> {
-        let file = match VirtualFile::open(path).await {
-            Ok(file) => file,
-            Err(e) => return Ok(Err(anyhow::Error::new(e).context("open layer file"))),
-        };
+    ) -> anyhow::Result<Self> {
+        let file = VirtualFile::open(path)
+            .await
+            .with_context(|| format!("Failed to open file '{path}'"))?;
        let file = FileBlockReader::new(file);

-        let summary_blk = match file.read_blk(0, ctx).await {
-            Ok(blk) => blk,
-            Err(e) => return Ok(Err(anyhow::Error::new(e).context("read first block"))),
-        };
-
-        // TODO: this should be an assertion instead; see ImageLayerInner::load
-        let actual_summary =
-            Summary::des_prefix(summary_blk.as_ref()).context("deserialize first block")?;
+        let summary_blk = file.read_blk(0, ctx).await?;
+        let actual_summary = Summary::des_prefix(summary_blk.as_ref())?;

        if let Some(mut expected_summary) = summary {
            // production code path
@@ -648,11 +636,11 @@ impl DeltaLayerInner {
            }
        }

-        Ok(Ok(DeltaLayerInner {
+        Ok(DeltaLayerInner {
            file,
            index_start_blk: actual_summary.index_start_blk,
            index_root_blk: actual_summary.index_root_blk,
-        }))
+        })
    }

    pub(super) async fn get_value_reconstruct_data(
--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -249,9 +249,7 @@ impl ImageLayer {
    async fn load_inner(&self, ctx: &RequestContext) -> Result<ImageLayerInner> {
        let path = self.path();

-        let loaded = ImageLayerInner::load(&path, self.desc.image_layer_lsn(), None, ctx)
-            .await
-            .and_then(|res| res)?;
+        let loaded = ImageLayerInner::load(&path, self.desc.image_layer_lsn(), None, ctx).await?;

        // not production code
        let actual_filename = path.file_name().unwrap().to_owned();
@@ -297,31 +295,18 @@ impl ImageLayer {
 }

 impl ImageLayerInner {
-    /// Returns nested result following Result<Result<_, OpErr>, Critical>:
-    /// - inner has the success or transient failure
-    /// - outer has the permanent failure
    pub(super) async fn load(
        path: &Utf8Path,
        lsn: Lsn,
        summary: Option<Summary>,
        ctx: &RequestContext,
-    ) -> Result<Result<Self, anyhow::Error>, anyhow::Error> {
-        let file = match VirtualFile::open(path).await {
-            Ok(file) => file,
-            Err(e) => return Ok(Err(anyhow::Error::new(e).context("open layer file"))),
-        };
+    ) -> anyhow::Result<Self> {
+        let file = VirtualFile::open(path)
+            .await
+            .with_context(|| format!("Failed to open file '{}'", path))?;
        let file = FileBlockReader::new(file);
-        let summary_blk = match file.read_blk(0, ctx).await {
-            Ok(blk) => blk,
-            Err(e) => return Ok(Err(anyhow::Error::new(e).context("read first block"))),
-        };
-
-        // length is the only way how this could fail, so it's not actually likely at all unless
-        // read_blk returns wrong sized block.
-        //
-        // TODO: confirm and make this into assertion
-        let actual_summary =
-            Summary::des_prefix(summary_blk.as_ref()).context("deserialize first block")?;
+        let summary_blk = file.read_blk(0, ctx).await?;
+        let actual_summary = Summary::des_prefix(summary_blk.as_ref())?;

        if let Some(mut expected_summary) = summary {
            // production code path
@@ -337,12 +322,12 @@ impl ImageLayerInner {
            }
        }

-        Ok(Ok(ImageLayerInner {
+        Ok(ImageLayerInner {
            index_start_blk: actual_summary.index_start_blk,
            index_root_blk: actual_summary.index_root_blk,
            lsn,
            file,
-        }))
+        })
    }

    pub(super) async fn get_value_reconstruct_data(
--- a/pageserver/src/tenant/storage_layer/layer.rs
+++ b/pageserver/src/tenant/storage_layer/layer.rs
@@ -868,9 +868,6 @@ impl LayerInner {
            }
            Ok((Err(e), _permit)) => {
                // FIXME: this should be with the spawned task and be cancellation sensitive
-                //
-                // while we should not need this, this backoff has turned out to be useful with
-                // a bug of unexpectedly deleted remote layer file (#5787).
                let consecutive_failures =
                    self.consecutive_failures.fetch_add(1, Ordering::Relaxed);
                tracing::error!(consecutive_failures, "layer file download failed: {e:#}");
@@ -1199,7 +1196,7 @@ impl DownloadedLayer {
                ));
                delta_layer::DeltaLayerInner::load(&owner.path, summary, ctx)
                    .await
-                    .map(|res| res.map(LayerKind::Delta))
+                    .map(LayerKind::Delta)
            } else {
                let lsn = owner.desc.image_layer_lsn();
                let summary = Some(image_layer::Summary::expected(
@@ -1210,32 +1207,23 @@ impl DownloadedLayer {
                ));
                image_layer::ImageLayerInner::load(&owner.path, lsn, summary, ctx)
                    .await
-                    .map(|res| res.map(LayerKind::Image))
-            };
-
-            match res {
-                Ok(Ok(layer)) => Ok(Ok(layer)),
-                Ok(Err(transient)) => Err(transient),
-                Err(permanent) => {
-                    LAYER_IMPL_METRICS.inc_permanent_loading_failures();
-                    // TODO(#5815): we are not logging all errors, so temporarily log them **once**
-                    // here as well
-                    let permanent = permanent.context("load layer");
-                    tracing::error!("layer loading failed permanently: {permanent:#}");
-                    Ok(Err(permanent))
-                }
+                    .map(LayerKind::Image)
            }
+            // this will be a permanent failure
+            .context("load layer");
+
+            if let Err(e) = res.as_ref() {
+                LAYER_IMPL_METRICS.inc_permanent_loading_failures();
+                // TODO(#5815): we are not logging all errors, so temporarily log them here as well
+                tracing::error!("layer loading failed permanently: {e:#}");
+            }
+            res
        };
-        self.kind
-            .get_or_try_init(init)
-            // return transient errors using `?`
-            .await?
-            .as_ref()
-            .map_err(|e| {
-                // errors are not clonabled, cannot but stringify
-                // test_broken_timeline matches this string
-                anyhow::anyhow!("layer loading failed: {e:#}")
-            })
+        self.kind.get_or_init(init).await.as_ref().map_err(|e| {
+            // errors are not clonabled, cannot but stringify
+            // test_broken_timeline matches this string
+            anyhow::anyhow!("layer loading failed: {e:#}")
+        })
    }

    async fn get_value_reconstruct_data(
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -180,16 +180,16 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
                // Run compaction
                if let Err(e) = tenant.compaction_iteration(&cancel, &ctx).await {
                    let wait_duration = backoff::exponential_backoff_duration_seconds(
-                        error_run_count + 1,
+                        error_run_count,
                        1.0,
                        MAX_BACKOFF_SECS,
                    );
                    error_run_count += 1;
-                    let wait_duration = Duration::from_secs_f64(wait_duration);
                    error!(
-                        "Compaction failed {error_run_count} times, retrying in {wait_duration:?}: {e:?}",
+                        "Compaction failed {error_run_count} times, retrying in {:?}: {e:?}",
+                        wait_duration
                    );
-                    wait_duration
+                    Duration::from_secs_f64(wait_duration)
                } else {
                    error_run_count = 0;
                    period
@@ -198,10 +198,6 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {

            warn_when_period_overrun(started_at.elapsed(), period, BackgroundLoopKind::Compaction);

-            // Perhaps we did no work and the walredo process has been idle for some time:
-            // give it a chance to shut down to avoid leaving walredo process running indefinitely.
-            tenant.walredo_mgr.maybe_quiesce(period * 10);
-
            // Sleep
            if tokio::time::timeout(sleep_duration, cancel.cancelled())
                .await
@@ -261,20 +257,20 @@ async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
            } else {
                // Run gc
                let res = tenant
-                    .gc_iteration(None, gc_horizon, tenant.get_pitr_interval(), &cancel, &ctx)
+                    .gc_iteration(None, gc_horizon, tenant.get_pitr_interval(), &ctx)
                    .await;
                if let Err(e) = res {
                    let wait_duration = backoff::exponential_backoff_duration_seconds(
-                        error_run_count + 1,
+                        error_run_count,
                        1.0,
                        MAX_BACKOFF_SECS,
                    );
                    error_run_count += 1;
-                    let wait_duration = Duration::from_secs_f64(wait_duration);
                    error!(
-                        "Gc failed {error_run_count} times, retrying in {wait_duration:?}: {e:?}",
+                        "Gc failed {error_run_count} times, retrying in {:?}: {e:?}",
+                        wait_duration
                    );
-                    wait_duration
+                    Duration::from_secs_f64(wait_duration)
                } else {
                    error_run_count = 0;
                    period
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -10,7 +10,6 @@ mod walreceiver;
 use anyhow::{anyhow, bail, ensure, Context, Result};
 use bytes::Bytes;
 use camino::{Utf8Path, Utf8PathBuf};
-use enumset::EnumSet;
 use fail::fail_point;
 use itertools::Itertools;
 use pageserver_api::models::{
@@ -438,11 +437,6 @@ pub enum LogicalSizeCalculationCause {
    TenantSizeHandler,
 }

-#[derive(enumset::EnumSetType)]
-pub(crate) enum CompactFlags {
-    ForceRepartition,
-}
-
 /// Public interface functions
 impl Timeline {
    /// Get the LSN where this branch was created
@@ -700,7 +694,6 @@ impl Timeline {
    pub(crate) async fn compact(
        self: &Arc<Self>,
        cancel: &CancellationToken,
-        flags: EnumSet<CompactFlags>,
        ctx: &RequestContext,
    ) -> Result<(), CompactionError> {
        // this wait probably never needs any "long time spent" logging, because we already nag if
@@ -773,7 +766,6 @@ impl Timeline {
            .repartition(
                self.get_last_record_lsn(),
                self.get_compaction_target_size(),
-                flags,
                ctx,
            )
            .await
@@ -1719,30 +1711,6 @@ impl Timeline {
        if let Some(rtc) = self.remote_client.as_ref() {
            rtc.schedule_layer_file_deletion(&needs_cleanup)?;
            rtc.schedule_index_upload_for_file_changes()?;
-            // This barrier orders above DELETEs before any later operations.
-            // This is critical because code executing after the barrier might
-            // create again objects with the same key that we just scheduled for deletion.
-            // For example, if we just scheduled deletion of an image layer "from the future",
-            // later compaction might run again and re-create the same image layer.
-            // "from the future" here means an image layer whose LSN is > IndexPart::disk_consistent_lsn.
-            // "same" here means same key range and LSN.
-            //
-            // Without a barrier between above DELETEs and the re-creation's PUTs,
-            // the upload queue may execute the PUT first, then the DELETE.
-            // In our example, we will end up with an IndexPart referencing a non-existent object.
-            //
-            // 1. a future image layer is created and uploaded
-            // 2. ps restart
-            // 3. the future layer from (1) is deleted during load layer map
-            // 4. image layer is re-created and uploaded
-            // 5. deletion queue would like to delete (1) but actually deletes (4)
-            // 6. delete by name works as expected, but it now deletes the wrong (later) version
-            //
-            // See https://github.com/neondatabase/neon/issues/5878
-            //
-            // NB: generation numbers naturally protect against this because they disambiguate
-            //     (1) and (4)
-            rtc.schedule_barrier()?;
            // Tenant::create_timeline will wait for these uploads to happen before returning, or
            // on retry.
        }
@@ -2557,12 +2525,7 @@ impl Timeline {
                // Note: The 'ctx' in use here has DownloadBehavior::Error. We should not
                // require downloading anything during initial import.
                let (partitioning, _lsn) = self
-                    .repartition(
-                        self.initdb_lsn,
-                        self.get_compaction_target_size(),
-                        EnumSet::empty(),
-                        ctx,
-                    )
+                    .repartition(self.initdb_lsn, self.get_compaction_target_size(), ctx)
                    .await?;

                if self.cancel.is_cancelled() {
@@ -2600,8 +2563,6 @@ impl Timeline {
                )
            };

-        pausable_failpoint!("flush-layer-cancel-after-writing-layer-out-pausable");
-
        if self.cancel.is_cancelled() {
            return Err(FlushLayerError::Cancelled);
        }
@@ -2783,16 +2744,12 @@ impl Timeline {
        &self,
        lsn: Lsn,
        partition_size: u64,
-        flags: EnumSet<CompactFlags>,
        ctx: &RequestContext,
    ) -> anyhow::Result<(KeyPartitioning, Lsn)> {
        {
            let partitioning_guard = self.partitioning.lock().unwrap();
            let distance = lsn.0 - partitioning_guard.1 .0;
-            if partitioning_guard.1 != Lsn(0)
-                && distance <= self.repartition_threshold
-                && !flags.contains(CompactFlags::ForceRepartition)
-            {
+            if partitioning_guard.1 != Lsn(0) && distance <= self.repartition_threshold {
                debug!(
                    distance,
                    threshold = self.repartition_threshold,
@@ -3540,22 +3497,21 @@ impl Timeline {
            }

            // FIXME: the writer already fsyncs all data, only rename needs to be fsynced here
-            let layer_paths: Vec<Utf8PathBuf> = new_layers
+            let mut layer_paths: Vec<Utf8PathBuf> = new_layers
                .iter()
                .map(|l| l.local_path().to_owned())
                .collect();

            // Fsync all the layer files and directory using multiple threads to
            // minimize latency.
-            par_fsync::par_fsync_async(&layer_paths)
-                .await
-                .context("fsync all new layers")?;
+            //
+            // FIXME: spawn_blocking above for this
+            par_fsync::par_fsync(&layer_paths).context("fsync all new layers")?;

-            let timeline_dir = self.conf.timeline_path(&self.tenant_id, &self.timeline_id);
-
-            par_fsync::par_fsync_async(&[timeline_dir])
-                .await
+            par_fsync::par_fsync(&[self.conf.timeline_path(&self.tenant_id, &self.timeline_id)])
                .context("fsync of timeline dir")?;
+
+            layer_paths.pop().unwrap();
        }

        stats.write_layer_files_micros = stats.read_lock_drop_micros.till_now();
@@ -3728,7 +3684,6 @@ impl Timeline {
        retain_lsns: Vec<Lsn>,
        cutoff_horizon: Lsn,
        pitr: Duration,
-        cancel: &CancellationToken,
        ctx: &RequestContext,
    ) -> anyhow::Result<()> {
        // First, calculate pitr_cutoff_timestamp and then convert it to LSN.
@@ -3742,10 +3697,7 @@ impl Timeline {
            if let Some(pitr_cutoff_timestamp) = now.checked_sub(pitr) {
                let pitr_timestamp = to_pg_timestamp(pitr_cutoff_timestamp);

-                match self
-                    .find_lsn_for_timestamp(pitr_timestamp, cancel, ctx)
-                    .await?
-                {
+                match self.find_lsn_for_timestamp(pitr_timestamp, ctx).await? {
                    LsnForTimestamp::Present(lsn) => lsn,
                    LsnForTimestamp::Future(lsn) => {
                        // The timestamp is in the future. That sounds impossible,
--- a/pageserver/src/tenant/timeline/eviction_task.rs
+++ b/pageserver/src/tenant/timeline/eviction_task.rs
@@ -351,7 +351,7 @@ impl Timeline {
        match state.last_layer_access_imitation {
            Some(ts) if ts.elapsed() < inter_imitate_period => { /* no need to run */ }
            _ => {
-                self.imitate_synthetic_size_calculation_worker(&tenant, cancel, ctx)
+                self.imitate_synthetic_size_calculation_worker(&tenant, ctx, cancel)
                    .await;
                state.last_layer_access_imitation = Some(tokio::time::Instant::now());
            }
@@ -417,8 +417,8 @@ impl Timeline {
    async fn imitate_synthetic_size_calculation_worker(
        &self,
        tenant: &Arc<Tenant>,
-        cancel: &CancellationToken,
        ctx: &RequestContext,
+        cancel: &CancellationToken,
    ) {
        if self.conf.metric_collection_endpoint.is_none() {
            // We don't start the consumption metrics task if this is not set in the config.
@@ -457,7 +457,6 @@ impl Timeline {
            None,
            &mut throwaway_cache,
            LogicalSizeCalculationCause::EvictionTaskImitation,
-            cancel,
            ctx,
        )
        .instrument(info_span!("gather_inputs"));
--- a/pageserver/src/tenant/timeline/uninit.rs
+++ b/pageserver/src/tenant/timeline/uninit.rs
@@ -45,20 +45,12 @@ impl<'t> UninitializedTimeline<'t> {
        let timeline_id = self.timeline_id;
        let tenant_id = self.owning_tenant.tenant_id;

-        if self.raw_timeline.is_none() {
-            return Err(anyhow::anyhow!(
-                "No timeline for initialization found for {tenant_id}/{timeline_id}"
-            ));
-        }
+        let (new_timeline, uninit_mark) = self.raw_timeline.take().with_context(|| {
+            format!("No timeline for initalization found for {tenant_id}/{timeline_id}")
+        })?;

        // Check that the caller initialized disk_consistent_lsn
-        let new_disk_consistent_lsn = self
-            .raw_timeline
-            .as_ref()
-            .expect("checked above")
-            .0
-            .get_disk_consistent_lsn();
-
+        let new_disk_consistent_lsn = new_timeline.get_disk_consistent_lsn();
        anyhow::ensure!(
            new_disk_consistent_lsn.is_valid(),
            "new timeline {tenant_id}/{timeline_id} has invalid disk_consistent_lsn"
@@ -70,13 +62,6 @@ impl<'t> UninitializedTimeline<'t> {
                "Found freshly initialized timeline {tenant_id}/{timeline_id} in the tenant map"
            ),
            Entry::Vacant(v) => {
-                // after taking here should be no fallible operations, because the drop guard will not
-                // cleanup after and would block for example the tenant deletion
-                let (new_timeline, uninit_mark) =
-                    self.raw_timeline.take().expect("already checked");
-
-                // this is the mutual exclusion between different retries to create the timeline;
-                // this should be an assertion.
                uninit_mark.remove_uninit_mark().with_context(|| {
                    format!(
                        "Failed to remove uninit mark file for timeline {tenant_id}/{timeline_id}"
@@ -85,10 +70,10 @@ impl<'t> UninitializedTimeline<'t> {
                v.insert(Arc::clone(&new_timeline));

                new_timeline.maybe_spawn_flush_loop();
-
-                Ok(new_timeline)
            }
        }
+
+        Ok(new_timeline)
    }

    /// Prepares timeline data by loading it from the basebackup archive.
--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -91,7 +91,6 @@ struct ProcessOutput {
 pub struct PostgresRedoManager {
    tenant_id: TenantId,
    conf: &'static PageServerConf,
-    last_redo_at: std::sync::Mutex<Option<Instant>>,
    redo_process: RwLock<Option<Arc<WalRedoProcess>>>,
 }

@@ -188,26 +187,10 @@ impl PostgresRedoManager {
        PostgresRedoManager {
            tenant_id,
            conf,
-            last_redo_at: std::sync::Mutex::default(),
            redo_process: RwLock::new(None),
        }
    }

-    /// This type doesn't have its own background task to check for idleness: we
-    /// rely on our owner calling this function periodically in its own housekeeping
-    /// loops.
-    pub(crate) fn maybe_quiesce(&self, idle_timeout: Duration) {
-        if let Ok(g) = self.last_redo_at.try_lock() {
-            if let Some(last_redo_at) = *g {
-                if last_redo_at.elapsed() >= idle_timeout {
-                    drop(g);
-                    let mut guard = self.redo_process.write().unwrap();
-                    *guard = None;
-                }
-            }
-        }
-    }
-
    ///
    /// Process one request for WAL redo using wal-redo postgres
    ///
@@ -222,8 +205,6 @@ impl PostgresRedoManager {
        wal_redo_timeout: Duration,
        pg_version: u32,
    ) -> anyhow::Result<Bytes> {
-        *(self.last_redo_at.lock().unwrap()) = Some(Instant::now());
-
        let (rel, blknum) = key_to_rel_block(key).context("invalid record")?;
        const MAX_RETRY_ATTEMPTS: u32 = 1;
        let mut n_attempts = 0u32;
@@ -367,13 +348,12 @@ impl PostgresRedoManager {
            self.apply_record_neon(key, &mut page, *record_lsn, record)?;
        }
        // Success!
-        let duration = start_time.elapsed();
-        // FIXME: using the same metric here creates a bimodal distribution by default, and because
-        // there could be multiple batch sizes this would be N+1 modal.
+        let end_time = Instant::now();
+        let duration = end_time.duration_since(start_time);
        WAL_REDO_TIME.observe(duration.as_secs_f64());

        debug!(
-            "neon applied {} WAL records in {} us to reconstruct page image at LSN {}",
+            "neon applied {} WAL records in {} ms to reconstruct page image at LSN {}",
            records.len(),
            duration.as_micros(),
            lsn
--- a/pgxn/neon/Makefile
+++ b/pgxn/neon/Makefile
@@ -20,7 +20,7 @@ SHLIB_LINK_INTERNAL = $(libpq)
 SHLIB_LINK = -lcurl

 EXTENSION = neon
-DATA = neon--1.0.sql neon--1.0--1.1.sql
+DATA = neon--1.0.sql
 PGFILEDESC = "neon - cloud storage for PostgreSQL"

 EXTRA_CLEAN = \
--- a/pgxn/neon/README.md
+++ b/pgxn/neon/README.md
@@ -1,20 +0,0 @@
-neon extension consists of several parts:
-
-### shared preload library `neon.so`
-
- implements storage manager API and network communications with remote page server.
-
- walproposer: implements broadcast protocol between postgres and WAL safekeepers.
-
- control plane connector:  Captures updates to roles/databases using ProcessUtility_hook and sends them to the control ProcessUtility_hook.
-
- remote extension server: Request compute_ctl to download extension files.
-
- file_cache: Local file cache is used to temporary store relations pages in local file system for better performance.
-
- relsize_cache: Relation size cache for better neon performance.
-
-### SQL functions in `neon--*.sql`
-
-Utility functions to expose neon specific information to user and metrics collection.
-This extension is created in all databases in the cluster by default.
--- a/pgxn/neon/control_plane_connector.c
+++ b/pgxn/neon/control_plane_connector.c
@@ -475,12 +475,6 @@ NeonXactCallback(XactEvent event, void *arg)
 	Assert(CurrentDdlTable == &RootTable);
 }

-static bool
-RoleIsNeonSuperuser(const char *role_name)
-{
-    return strcmp(role_name, "neon_superuser") == 0;
-}
-
 static void
 HandleCreateDb(CreatedbStmt *stmt)
 {
@@ -507,16 +501,9 @@ HandleCreateDb(CreatedbStmt *stmt)

 	entry->type = Op_Set;
 	if (downer && downer->arg)
-	{
-		const char *owner_name = defGetString(downer);
-		if (RoleIsNeonSuperuser(owner_name))
-			elog(ERROR, "can't create a database with owner neon_superuser");
-		entry->owner = get_role_oid(owner_name, false);
-	}
+		entry->owner = get_role_oid(defGetString(downer), false);
 	else
-	{
 		entry->owner = GetUserId();
-	}
 }

 static void
@@ -535,10 +522,8 @@ HandleAlterOwner(AlterOwnerStmt *stmt)

 	if (!found)
 		memset(entry->old_name, 0, sizeof(entry->old_name));
-	const char *new_owner = get_rolespec_name(stmt->newowner);
-	if (RoleIsNeonSuperuser(new_owner))
-		elog(ERROR, "can't alter owner to neon_superuser");
-	entry->owner = get_role_oid(new_owner, false);
+
+	entry->owner = get_role_oid(get_rolespec_name(stmt->newowner), false);
 	entry->type = Op_Set;
 }

@@ -632,9 +617,6 @@ HandleAlterRole(AlterRoleStmt *stmt)
 	InitRoleTableIfNeeded();
 	DefElem    *dpass = NULL;
 	ListCell   *option;
-	const char *role_name = stmt->role->rolename;
-	if (RoleIsNeonSuperuser(role_name))
-		elog(ERROR, "can't ALTER neon_superuser");

 	foreach(option, stmt->options)
 	{
@@ -649,7 +631,7 @@ HandleAlterRole(AlterRoleStmt *stmt)
 	bool		found = false;
 	RoleEntry  *entry = hash_search(
 									CurrentDdlTable->role_table,
-									role_name,
+									stmt->role->rolename,
 									HASH_ENTER,
 									&found);

--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -32,13 +32,11 @@
 #include "storage/latch.h"
 #include "storage/ipc.h"
 #include "storage/lwlock.h"
-#include "utils/builtins.h"
 #include "utils/dynahash.h"
 #include "utils/guc.h"
 #include "storage/fd.h"
 #include "storage/pg_shmem.h"
 #include "storage/buf_internals.h"
-#include "pgstat.h"

 /*
 * Local file cache is used to temporary store relations pages in local file system.
@@ -67,7 +65,6 @@
 typedef struct FileCacheEntry
 {
 	BufferTag	key;
-	uint32      hash;
 	uint32		offset;
 	uint32		access_count;
 	uint32		bitmap[BLOCKS_PER_CHUNK/32];
@@ -79,10 +76,6 @@ typedef struct FileCacheControl
 	uint64 generation; /* generation is needed to handle correct hash reenabling */
 	uint32 size; /* size of cache file in chunks */
 	uint32 used; /* number of used chunks */
-	uint32 limit; /* shared copy of lfc_size_limit */
-	uint64 hits;
-	uint64 misses;
-	uint64 writes;
 	dlist_head lru; /* double linked list for LRU replacement algorithm */
 } FileCacheControl;

@@ -98,12 +91,10 @@ static shmem_startup_hook_type prev_shmem_startup_hook;
 static shmem_request_hook_type prev_shmem_request_hook;
 #endif

-#define LFC_ENABLED() (lfc_ctl->limit != 0)
-
-void PGDLLEXPORT FileCacheMonitorMain(Datum main_arg);
+void FileCacheMonitorMain(Datum main_arg);

 /*
- * Local file cache is optional and Neon can work without it.
+ * Local file cache is mandatory and Neon can work without it.
 * In case of any any errors with this cache, we should disable it but to not throw error.
 * Also we should allow  re-enable it if source of failure (lack of disk space, permissions,...) is fixed.
 * All cache content should be invalidated to avoid reading of stale or corrupted data
@@ -111,77 +102,49 @@ void PGDLLEXPORT FileCacheMonitorMain(Datum main_arg);
 static void
 lfc_disable(char const* op)
 {
-	int fd;
+	HASH_SEQ_STATUS status;
+	FileCacheEntry* entry;
+
 	elog(WARNING, "Failed to %s local file cache at %s: %m, disabling local file cache", op, lfc_path);

-	/* Invalidate hash */
-	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
-
-	if (LFC_ENABLED())
-	{
-		HASH_SEQ_STATUS status;
-		FileCacheEntry* entry;
-
-		hash_seq_init(&status, lfc_hash);
-		while ((entry = hash_seq_search(&status)) != NULL)
-		{
-			hash_search_with_hash_value(lfc_hash, &entry->key, entry->hash, HASH_REMOVE, NULL);
-		}
-		lfc_ctl->generation += 1;
-		lfc_ctl->size = 0;
-		lfc_ctl->used = 0;
-		lfc_ctl->limit = 0;
-		dlist_init(&lfc_ctl->lru);
-
-		if (lfc_desc > 0)
-		{
-			/* If the reason of error is ENOSPC, then truncation of file may help to reclaim some space */
-			int rc = ftruncate(lfc_desc, 0);
-			if (rc < 0)
-				elog(WARNING, "Failed to truncate local file cache %s: %m", lfc_path);
-		}
-	}
-	/* We need to use unlink to to avoid races in LFC write, because it is not protectedby */
-	unlink(lfc_path);
-
-	fd = BasicOpenFile(lfc_path, O_RDWR|O_CREAT|O_TRUNC);
-	if (fd < 0)
-		elog(WARNING, "Failed to recreate local file cache %s: %m", lfc_path);
-	else
-		close(fd);
-
-	LWLockRelease(lfc_lock);
-
 	if (lfc_desc > 0)
 		close(lfc_desc);

 	lfc_desc = -1;
-}
+	lfc_size_limit = 0;

-/*
- * This check is done without obtaining lfc_lock, so it is unreliable
- */
-static bool
-lfc_maybe_disabled(void)
-{
-	return !lfc_ctl || !LFC_ENABLED();
+	/* Invalidate hash */
+	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
+
+	hash_seq_init(&status, lfc_hash);
+	while ((entry = hash_seq_search(&status)) != NULL)
+	{
+		hash_search(lfc_hash, &entry->key, HASH_REMOVE, NULL);
+		memset(entry->bitmap, 0, sizeof entry->bitmap);
+	}
+	hash_seq_term(&status);
+	lfc_ctl->generation += 1;
+	lfc_ctl->size = 0;
+	lfc_ctl->used = 0;
+	dlist_init(&lfc_ctl->lru);
+
+	LWLockRelease(lfc_lock);
 }

 static bool
 lfc_ensure_opened(void)
 {
-	bool enabled = !lfc_maybe_disabled();
 	/* Open cache file if not done yet */
-	if (lfc_desc <= 0 && enabled)
+	if (lfc_desc <= 0)
 	{
-		lfc_desc = BasicOpenFile(lfc_path, O_RDWR);
+		lfc_desc = BasicOpenFile(lfc_path, O_RDWR|O_CREAT);

 		if (lfc_desc < 0) {
 			lfc_disable("open");
 			return false;
 		}
 	}
-	return enabled;
+	return true;
 }

 static void
@@ -200,7 +163,6 @@ lfc_shmem_startup(void)
 	lfc_ctl = (FileCacheControl*)ShmemInitStruct("lfc", sizeof(FileCacheControl), &found);
 	if (!found)
 	{
-		int fd;
 		uint32 lfc_size = SIZE_MB_TO_CHUNKS(lfc_max_size);
 		lfc_lock = (LWLockId)GetNamedLWLockTranche("lfc_lock");
 		info.keysize = sizeof(BufferTag);
@@ -213,23 +175,10 @@ lfc_shmem_startup(void)
 		lfc_ctl->generation = 0;
 		lfc_ctl->size = 0;
 		lfc_ctl->used = 0;
-		lfc_ctl->hits = 0;
-		lfc_ctl->misses = 0;
-		lfc_ctl->writes = 0;
 		dlist_init(&lfc_ctl->lru);

-		/* Recreate file cache on restart */
-		fd = BasicOpenFile(lfc_path, O_RDWR|O_CREAT|O_TRUNC);
-		if (fd < 0)
-		{
-			elog(WARNING, "Failed to create local file cache %s: %m", lfc_path);
-			lfc_ctl->limit = 0;
-		}
-		else
-		{
-			close(fd);
-			lfc_ctl->limit = SIZE_MB_TO_CHUNKS(lfc_size_limit);
-		}
+		/* Remove file cache on restart */
+		(void)unlink(lfc_path);
 	}
 	LWLockRelease(AddinShmemInitLock);
 }
@@ -246,17 +195,6 @@ lfc_shmem_request(void)
 	RequestNamedLWLockTranche("lfc_lock", 1);
 }

-static bool
-is_normal_backend(void)
-{
-	/*
-	 * Stats collector detach shared memory, so we should not try to access shared memory here.
-	 * Parallel workers first assign default value (0), so not perform truncation in parallel workers.
-	 * The Postmaster can handle SIGHUP and it has access to shared memory (UsedShmemSegAddr != NULL), but has no PGPROC.
-	 */
-	return lfc_ctl && MyProc && UsedShmemSegAddr && !IsParallelWorker();
-}
-
 static bool
 lfc_check_limit_hook(int *newval, void **extra, GucSource source)
 {
@@ -272,15 +210,25 @@ static void
 lfc_change_limit_hook(int newval, void *extra)
 {
 	uint32 new_size = SIZE_MB_TO_CHUNKS(newval);
-
-	if (!is_normal_backend())
-		return;
-
-	if (!lfc_ensure_opened())
+	/*
+	 * Stats collector detach shared memory, so we should not try to access shared memory here.
+	 * Parallel workers first assign default value (0), so not perform truncation in parallel workers.
+	 * The Postmaster can handle SIGHUP and it has access to shared memory (UsedShmemSegAddr != NULL), but has no PGPROC.
+	 */
+	if (!lfc_ctl || !MyProc || !UsedShmemSegAddr || IsParallelWorker())
 		return;

+	/* Open cache file if not done yet */
+	if (lfc_desc <= 0)
+	{
+		lfc_desc = BasicOpenFile(lfc_path, O_RDWR|O_CREAT);
+		if (lfc_desc < 0) {
+			elog(WARNING, "Failed to open file cache %s: %m, disabling file cache", lfc_path);
+			lfc_size_limit = 0; /* disable file cache */
+			return;
+		}
+	}
 	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
-
 	while (new_size < lfc_ctl->used && !dlist_is_empty(&lfc_ctl->lru))
 	{
 		/* Shrink cache by throwing away least recently accessed chunks and returning their space to file system */
@@ -290,12 +238,10 @@ lfc_change_limit_hook(int newval, void *extra)
 		if (fallocate(lfc_desc, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, (off_t)victim->offset*BLOCKS_PER_CHUNK*BLCKSZ, BLOCKS_PER_CHUNK*BLCKSZ) < 0)
 			elog(LOG, "Failed to punch hole in file: %m");
 #endif
-		hash_search_with_hash_value(lfc_hash, &victim->key, victim->hash, HASH_REMOVE, NULL);
+		hash_search(lfc_hash, &victim->key, HASH_REMOVE, NULL);
 		lfc_ctl->used -= 1;
 	}
-	lfc_ctl->limit = new_size;
 	elog(DEBUG1, "set local file cache limit to %d", new_size);
-
 	LWLockRelease(lfc_lock);
 }

@@ -309,7 +255,6 @@ lfc_init(void)
 	if (!process_shared_preload_libraries_in_progress)
 		elog(ERROR, "Neon module should be loaded via shared_preload_libraries");

-
 	DefineCustomIntVariable("neon.max_file_cache_size",
 							"Maximal size of Neon local file cache",
 							NULL,
@@ -370,10 +315,10 @@ lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
 	BufferTag tag;
 	FileCacheEntry* entry;
 	int chunk_offs = blkno & (BLOCKS_PER_CHUNK-1);
-	bool found = false;
+	bool found;
 	uint32 hash;

-	if (lfc_maybe_disabled()) /* fast exit if file cache is disabled */
+	if (lfc_size_limit == 0) /* fast exit if file cache is disabled */
 		return false;

 	CopyNRelFileInfoToBufTag(tag, rinfo);
@@ -382,11 +327,8 @@ lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
 	hash = get_hash_value(lfc_hash, &tag);

 	LWLockAcquire(lfc_lock, LW_SHARED);
-	if (LFC_ENABLED())
-	{
-		entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
-		found = entry != NULL && (entry->bitmap[chunk_offs >> 5] & (1 << (chunk_offs & 31))) != 0;
-	}
+	entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
+	found = entry != NULL && (entry->bitmap[chunk_offs >> 5] & (1 << (chunk_offs & 31))) != 0;
 	LWLockRelease(lfc_lock);
 	return found;
 }
@@ -403,7 +345,7 @@ lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
 	int chunk_offs = blkno & (BLOCKS_PER_CHUNK-1);
 	uint32 hash;

-	if (lfc_maybe_disabled()) /* fast exit if file cache is disabled */
+	if (lfc_size_limit == 0) /* fast exit if file cache is disabled */
 		return;

 	CopyNRelFileInfoToBufTag(tag, rinfo);
@@ -413,13 +355,6 @@ lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
 	hash = get_hash_value(lfc_hash, &tag);

 	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
-
-	if (!LFC_ENABLED())
-	{
-		LWLockRelease(lfc_lock);
-		return;
-	}
-
 	entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, &found);

 	if (!found)
@@ -470,7 +405,7 @@ lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
 /*
 * Try to read page from local cache.
 * Returns true if page is found in local cache.
- * In case of error local file cache is disabled (lfc->limit is set to zero).
+ * In case of error lfc_size_limit is set to zero to disable any further opera-tins with cache.
 */
 bool
 lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
@@ -485,7 +420,7 @@ lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	uint64 generation;
 	uint32 entry_offset;

-	if (lfc_maybe_disabled()) /* fast exit if file cache is disabled */
+	if (lfc_size_limit == 0) /* fast exit if file cache is disabled */
 		return false;

 	if (!lfc_ensure_opened())
@@ -497,18 +432,10 @@ lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	hash = get_hash_value(lfc_hash, &tag);

 	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
-
-	if (!LFC_ENABLED())
-	{
-		LWLockRelease(lfc_lock);
-		return false;
-	}
-
 	entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
 	if (entry == NULL || (entry->bitmap[chunk_offs >> 5] & (1 << (chunk_offs & 31))) == 0)
 	{
 		/* Page is not cached */
-		lfc_ctl->misses += 1;
 		LWLockRelease(lfc_lock);
 		return false;
 	}
@@ -529,11 +456,8 @@ lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,

 	/* Place entry to the head of LRU list */
 	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
-
 	if (lfc_ctl->generation == generation)
 	{
-		Assert(LFC_ENABLED());
-		lfc_ctl->hits += 1;
 		Assert(entry->access_count > 0);
 		if (--entry->access_count == 0)
 			dlist_push_tail(&lfc_ctl->lru, &entry->lru_node);
@@ -564,10 +488,8 @@ lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	bool found;
 	int chunk_offs = blkno & (BLOCKS_PER_CHUNK-1);
 	uint32 hash;
-	uint64 generation;
-	uint32 entry_offset;

-	if (lfc_maybe_disabled()) /* fast exit if file cache is disabled */
+	if (lfc_size_limit == 0) /* fast exit if file cache is disabled */
 		return;

 	if (!lfc_ensure_opened())
@@ -575,17 +497,12 @@ lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,

 	tag.forkNum = forkNum;
 	tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK-1);
+	
 	CopyNRelFileInfoToBufTag(tag, rinfo);
+	
 	hash = get_hash_value(lfc_hash, &tag);

 	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
-
-	if (!LFC_ENABLED())
-	{
-		LWLockRelease(lfc_lock);
-		return;
-	}
-
 	entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_ENTER, &found);

 	if (found)
@@ -604,13 +521,13 @@ lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		 * there are should be very large number of concurrent IO operations and them are limited by max_connections,
 		 * we prefer not to complicate code and use second approach.
 		 */
-		if (lfc_ctl->used >= lfc_ctl->limit && !dlist_is_empty(&lfc_ctl->lru))
+		if (lfc_ctl->used >= SIZE_MB_TO_CHUNKS(lfc_size_limit) && !dlist_is_empty(&lfc_ctl->lru))
 		{
 			/* Cache overflow: evict least recently used chunk */
 			FileCacheEntry* victim = dlist_container(FileCacheEntry, lru_node, dlist_pop_head_node(&lfc_ctl->lru));
 			Assert(victim->access_count == 0);
 			entry->offset = victim->offset; /* grab victim's chunk */
-			hash_search_with_hash_value(lfc_hash, &victim->key, victim->hash, HASH_REMOVE, NULL);
+			hash_search(lfc_hash, &victim->key, HASH_REMOVE, NULL);
 			elog(DEBUG2, "Swap file cache page");
 		}
 		else
@@ -619,140 +536,27 @@ lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 			entry->offset = lfc_ctl->size++; /* allocate new chunk at end of file */
 		}
 		entry->access_count = 1;
-		entry->hash = hash;
 		memset(entry->bitmap, 0, sizeof entry->bitmap);
 	}

-	generation = lfc_ctl->generation;
-	entry_offset = entry->offset;
-	lfc_ctl->writes += 1;
-	LWLockRelease(lfc_lock);
-
-	rc = pwrite(lfc_desc, buffer, BLCKSZ, ((off_t)entry_offset*BLOCKS_PER_CHUNK + chunk_offs)*BLCKSZ);
+	rc = pwrite(lfc_desc, buffer, BLCKSZ, ((off_t)entry->offset*BLOCKS_PER_CHUNK + chunk_offs)*BLCKSZ);
 	if (rc != BLCKSZ)
 	{
+		LWLockRelease(lfc_lock);
 		lfc_disable("write");
 	}
 	else
 	{
-		LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
-
-		if (lfc_ctl->generation == generation)
-		{
-			Assert(LFC_ENABLED());
-			/* Place entry to the head of LRU list */
-			Assert(entry->access_count > 0);
-			if (--entry->access_count == 0)
-				dlist_push_tail(&lfc_ctl->lru, &entry->lru_node);
-
-			entry->bitmap[chunk_offs >> 5] |= (1 << (chunk_offs & 31));
-		}
+		/* Place entry to the head of LRU list */
+		Assert(entry->access_count > 0);
+		if (--entry->access_count == 0)
+			dlist_push_tail(&lfc_ctl->lru, &entry->lru_node);

+		entry->bitmap[chunk_offs >> 5] |= (1 << (chunk_offs & 31));
 		LWLockRelease(lfc_lock);
 	}
 }

-typedef struct
-{
-	TupleDesc	tupdesc;
-} NeonGetStatsCtx;
-
-#define NUM_NEON_GET_STATS_COLS	2
-#define NUM_NEON_GET_STATS_ROWS	3
-
-PG_FUNCTION_INFO_V1(neon_get_lfc_stats);
-Datum
-neon_get_lfc_stats(PG_FUNCTION_ARGS)
-{
-	FuncCallContext *funcctx;
-	NeonGetStatsCtx* fctx;
-	MemoryContext oldcontext;
-	TupleDesc	tupledesc;
-	Datum		result;
-	HeapTuple	tuple;
-	char const* key;
-	uint64      value;
-	Datum		values[NUM_NEON_GET_STATS_COLS];
-	bool		nulls[NUM_NEON_GET_STATS_COLS];
-
-	if (SRF_IS_FIRSTCALL())
-	{
-		funcctx = SRF_FIRSTCALL_INIT();
-
-		/* Switch context when allocating stuff to be used in later calls */
-		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
-
-		/* Create a user function context for cross-call persistence */
-		fctx = (NeonGetStatsCtx*) palloc(sizeof(NeonGetStatsCtx));
-
-		/* Construct a tuple descriptor for the result rows. */
-		tupledesc = CreateTemplateTupleDesc(NUM_NEON_GET_STATS_COLS);
-
-		TupleDescInitEntry(tupledesc, (AttrNumber) 1, "lfc_key",
-						   TEXTOID, -1, 0);
-		TupleDescInitEntry(tupledesc, (AttrNumber) 2, "lfc_value",
-						   INT8OID, -1, 0);
-
-		fctx->tupdesc = BlessTupleDesc(tupledesc);
-		funcctx->max_calls = NUM_NEON_GET_STATS_ROWS;
-		funcctx->user_fctx = fctx;
-
-		/* Return to original context when allocating transient memory */
-		MemoryContextSwitchTo(oldcontext);
-	}
-
-	funcctx = SRF_PERCALL_SETUP();
-
-	/* Get the saved state */
-	fctx = (NeonGetStatsCtx*) funcctx->user_fctx;
-
-	switch (funcctx->call_cntr)
-	{
-		case 0:
-			key = "file_cache_misses";
-			if (lfc_ctl)
-				value = lfc_ctl->misses;
-			break;
-		case 1:
-			key = "file_cache_hits";
-			if (lfc_ctl)
-				value = lfc_ctl->hits;
-			break;
-		case 2:
-			key = "file_cache_used";
-			if (lfc_ctl)
-				value = lfc_ctl->used;
-			break;
-		case 3:
-			key = "file_cache_writes";
-			if (lfc_ctl)
-				value = lfc_ctl->writes;
-			break;
-		default:
-			SRF_RETURN_DONE(funcctx);
-	}
-	values[0] = PointerGetDatum(cstring_to_text(key));
-	nulls[0] = false;
-	if (lfc_ctl)
-	{
-		nulls[1] = false;
-		values[1] = Int64GetDatum(value);
-	}
-	else
-		nulls[1] = true;
-
-	tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
-	result = HeapTupleGetDatum(tuple);
-	SRF_RETURN_NEXT(funcctx, result);
-}
-
-
-/*
- * Function returning data from the local file cache
- * relation node/tablespace/database/blocknum and access_counter
- */
-PG_FUNCTION_INFO_V1(local_cache_pages);
-
 /*
 * Record structure holding the to be exposed cache data.
 */
@@ -776,6 +580,11 @@ typedef struct
 	LocalCachePagesRec *record;
 } LocalCachePagesContext;

+/*
+ * Function returning data from the local file cache
+ * relation node/tablespace/database/blocknum and access_counter
+ */
+PG_FUNCTION_INFO_V1(local_cache_pages);

 #define NUM_LOCALCACHE_PAGES_ELEM	7

@@ -842,20 +651,15 @@ local_cache_pages(PG_FUNCTION_ARGS)

 		fctx->tupdesc = BlessTupleDesc(tupledesc);

-		if (lfc_ctl)
-		{
-			LWLockAcquire(lfc_lock, LW_SHARED);
+		LWLockAcquire(lfc_lock, LW_SHARED);

-			if (LFC_ENABLED())
-			{
-				hash_seq_init(&status, lfc_hash);
-				while ((entry = hash_seq_search(&status)) != NULL)
-				{
-					for (int i = 0; i < BLOCKS_PER_CHUNK/32; i++)
-						n_pages += pg_popcount32(entry->bitmap[i]);
-				}
-			}
+        hash_seq_init(&status, lfc_hash);
+        while ((entry = hash_seq_search(&status)) != NULL)
+		{
+			for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
+				n_pages += (entry->bitmap[i >> 5] & (1 << (i & 31))) != 0;
 		}
+		hash_seq_term(&status);
 		fctx->record = (LocalCachePagesRec *)
 			MemoryContextAllocHuge(CurrentMemoryContext,
 								   sizeof(LocalCachePagesRec) * n_pages);
@@ -867,35 +671,36 @@ local_cache_pages(PG_FUNCTION_ARGS)
 		/* Return to original context when allocating transient memory */
 		MemoryContextSwitchTo(oldcontext);

-		if (n_pages != 0)
+		/*
+		 * Scan through all the buffers, saving the relevant fields in the
+		 * fctx->record structure.
+		 *
+		 * We don't hold the partition locks, so we don't get a consistent
+		 * snapshot across all buffers, but we do grab the buffer header
+		 * locks, so the information of each buffer is self-consistent.
+		 */
+		n_pages = 0;
+        hash_seq_init(&status, lfc_hash);
+        while ((entry = hash_seq_search(&status)) != NULL)
 		{
-			/*
-			 * Scan through all the cache entries, saving the relevant fields in the
-			 * fctx->record structure.
-			 */
-			uint32 n = 0;
-			hash_seq_init(&status, lfc_hash);
-			while ((entry = hash_seq_search(&status)) != NULL)
+			for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
 			{
-				for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
+				if (entry->bitmap[i >> 5] & (1 << (i & 31)))
 				{
-					if (entry->bitmap[i >> 5] & (1 << (i & 31)))
-					{
-						fctx->record[n].pageoffs = entry->offset*BLOCKS_PER_CHUNK + i;
-						fctx->record[n].relfilenode = NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key));
-						fctx->record[n].reltablespace = NInfoGetSpcOid(BufTagGetNRelFileInfo(entry->key));
-						fctx->record[n].reldatabase = NInfoGetDbOid(BufTagGetNRelFileInfo(entry->key));
-						fctx->record[n].forknum = entry->key.forkNum;
-						fctx->record[n].blocknum = entry->key.blockNum + i;
-						fctx->record[n].accesscount = entry->access_count;
-						n += 1;
-					}
+					fctx->record[n_pages].pageoffs = entry->offset*BLOCKS_PER_CHUNK + i;
+					fctx->record[n_pages].relfilenode = NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key));
+					fctx->record[n_pages].reltablespace = NInfoGetSpcOid(BufTagGetNRelFileInfo(entry->key));
+					fctx->record[n_pages].reldatabase = NInfoGetDbOid(BufTagGetNRelFileInfo(entry->key));
+					fctx->record[n_pages].forknum = entry->key.forkNum;
+					fctx->record[n_pages].blocknum = entry->key.blockNum + i;
+					fctx->record[n_pages].accesscount = entry->access_count;
+					n_pages += 1;
 				}
 			}
-			Assert(n_pages == n);
 		}
-		if (lfc_ctl)
-			LWLockRelease(lfc_lock);
+		hash_seq_term(&status);
+		Assert(n_pages == funcctx->max_calls);
+		LWLockRelease(lfc_lock);
 	}

 	funcctx = SRF_PERCALL_SETUP();
--- a/pgxn/neon/neon--1.0--1.1.sql
+++ b/pgxn/neon/neon--1.0--1.1.sql
@@ -1,10 +0,0 @@
-\echo Use "ALTER EXTENSION neon UPDATE TO '1.1'" to load this file. \quit
-
-CREATE FUNCTION neon_get_lfc_stats()
-RETURNS SETOF RECORD
-AS 'MODULE_PATHNAME', 'neon_get_lfc_stats'
-LANGUAGE C PARALLEL SAFE;
-
-- Create a view for convenient access.
-CREATE VIEW neon_lfc_stats AS
-	SELECT P.* FROM neon_get_lfc_stats() AS P (lfc_key text, lfc_value bigint);
--- a/pgxn/neon/neon.control
+++ b/pgxn/neon/neon.control
@@ -1,5 +1,4 @@
 # neon extension
 comment = 'cloud storage for PostgreSQL'
-default_version = '1.1'
+default_version = '1.0'
 module_pathname = '$libdir/neon'
-relocatable = true
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -1687,9 +1687,9 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
 		if (current_size >= ((uint64) max_cluster_size) * 1024 * 1024)
 			ereport(ERROR,
 					(errcode(ERRCODE_DISK_FULL),
-					 errmsg("could not extend file because project size limit (%d MB) has been exceeded",
+					 errmsg("could not extend file because cluster size limit (%d MB) has been exceeded",
 							max_cluster_size),
-					 errhint("This limit is defined externally by the project size limit, and internally by neon.max_cluster_size GUC")));
+					 errhint("This limit is defined by neon.max_cluster_size GUC")));
 	}

 	/*
--- a/proxy/src/bin/pg_sni_router.rs
+++ b/proxy/src/bin/pg_sni_router.rs
@@ -168,18 +168,9 @@ async fn task_main(
                    .instrument(tracing::info_span!("handle_client", ?session_id))
                );
            }
-            // Don't modify this unless you read https://docs.rs/tokio/latest/tokio/macro.select.html carefully.
-            // If this future completes and the pattern doesn't match, this branch is disabled for this call to `select!`.
-            // This only counts for this loop and it will be enabled again on next `select!`.
-            //
-            // Prior code had this as `Some(Err(e))` which _looks_ equivalent to the current setup, but it's not.
-            // When `connections.join_next()` returned `Some(Ok(()))` (which we expect), it would disable the join_next and it would
-            // not get called again, even if there are more connections to remove.
-            Some(res) = connections.join_next() => {
-                if let Err(e) = res {
-                    if !e.is_panic() && !e.is_cancelled() {
-                        warn!("unexpected error from joined connection task: {e:?}");
-                    }
+            Some(Err(e)) = connections.join_next(), if !connections.is_empty() => {
+                if !e.is_panic() && !e.is_cancelled() {
+                    warn!("unexpected error from joined connection task: {e:?}");
                }
            }
            _ = cancellation_token.cancelled() => {
--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -294,18 +294,9 @@ pub async fn task_main(
                    }),
                );
            }
-            // Don't modify this unless you read https://docs.rs/tokio/latest/tokio/macro.select.html carefully.
-            // If this future completes and the pattern doesn't match, this branch is disabled for this call to `select!`.
-            // This only counts for this loop and it will be enabled again on next `select!`.
-            //
-            // Prior code had this as `Some(Err(e))` which _looks_ equivalent to the current setup, but it's not.
-            // When `connections.join_next()` returned `Some(Ok(()))` (which we expect), it would disable the join_next and it would
-            // not get called again, even if there are more connections to remove.
-            Some(res) = connections.join_next() => {
-                if let Err(e) = res {
-                    if !e.is_panic() && !e.is_cancelled() {
-                        warn!("unexpected error from joined connection task: {e:?}");
-                    }
+            Some(Err(e)) = connections.join_next(), if !connections.is_empty() => {
+                if !e.is_panic() && !e.is_cancelled() {
+                    warn!("unexpected error from joined connection task: {e:?}");
                }
            }
            _ = cancellation_token.cancelled() => {
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,5 +1,5 @@
 [toolchain]
-channel = "1.74.0"
+channel = "1.73.0"
 profile = "default"
 # The default profile includes rustc, rust-std, cargo, rust-docs, rustfmt and clippy.
 # https://rust-lang.github.io/rustup/concepts/profiles.html
--- a/safekeeper/Cargo.toml
+++ b/safekeeper/Cargo.toml
@@ -46,7 +46,6 @@ postgres_ffi.workspace = true
 pq_proto.workspace = true
 remote_storage.workspace = true
 safekeeper_api.workspace = true
-sd-notify.workspace = true
 storage_broker.workspace = true
 tokio-stream.workspace = true
 utils.workspace = true
--- a/safekeeper/src/bin/safekeeper.rs
+++ b/safekeeper/src/bin/safekeeper.rs
@@ -8,7 +8,6 @@ use futures::future::BoxFuture;
 use futures::stream::FuturesUnordered;
 use futures::{FutureExt, StreamExt};
 use remote_storage::RemoteStorageConfig;
-use sd_notify::NotifyState;
 use tokio::runtime::Handle;
 use tokio::signal::unix::{signal, SignalKind};
 use tokio::task::JoinError;
@@ -203,7 +202,6 @@ async fn main() -> anyhow::Result<()> {
    logging::init(
        LogFormat::from_config(&args.log_format)?,
        logging::TracingErrorLayerEnablement::Disabled,
-        logging::Output::Stdout,
    )?;
    logging::replace_panic_hook_with_tracing_panic_hook().forget();
    info!("version: {GIT_VERSION}");
@@ -435,12 +433,6 @@ async fn start_safekeeper(conf: SafeKeeperConf) -> Result<()> {
    let mut sigint_stream = signal(SignalKind::interrupt())?;
    let mut sigterm_stream = signal(SignalKind::terminate())?;

-    // Notify systemd that we are ready. This is important as currently loading
-    // timelines takes significant time (~30s in busy regions).
-    if let Err(e) = sd_notify::notify(true, &[NotifyState::Ready]) {
-        warn!("systemd notify failed: {:?}", e);
-    }
-
    tokio::select! {
        Some((task_name, res)) = tasks_handles.next()=> {
            error!("{} task failed: {:?}, exiting", task_name, res);
--- a/storage_broker/src/bin/storage_broker.rs
+++ b/storage_broker/src/bin/storage_broker.rs
@@ -434,7 +434,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
    logging::init(
        LogFormat::from_config(&args.log_format)?,
        logging::TracingErrorLayerEnablement::Disabled,
-        logging::Output::Stdout,
    )?;
    logging::replace_panic_hook_with_tracing_panic_hook().forget();
    // initialize sentry if SENTRY_DSN is provided
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -41,12 +41,7 @@ from urllib3.util.retry import Retry

 from fixtures.broker import NeonBroker
 from fixtures.log_helper import log
-from fixtures.pageserver.allowed_errors import (
-    DEFAULT_PAGESERVER_ALLOWED_ERRORS,
-    scan_pageserver_log_for_errors,
-)
 from fixtures.pageserver.http import PageserverHttpClient
-from fixtures.pageserver.types import IndexPartDump
 from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_for_upload
 from fixtures.pg_version import PgVersion
 from fixtures.port_distributor import PortDistributor
@@ -707,7 +702,6 @@ class NeonEnv:
        self.port_distributor = config.port_distributor
        self.s3_mock_server = config.mock_s3_server
        self.neon_cli = NeonCli(env=self)
-        self.pagectl = Pagectl(env=self)
        self.endpoints = EndpointFactory(self)
        self.safekeepers: List[Safekeeper] = []
        self.pageservers: List[NeonPageserver] = []
@@ -1228,7 +1222,6 @@ class NeonCli(AbstractNeonCli):
        self,
        new_branch_name: str,
        tenant_id: Optional[TenantId] = None,
-        timeline_id: Optional[TimelineId] = None,
    ) -> TimelineId:
        cmd = [
            "timeline",
@@ -1241,9 +1234,6 @@ class NeonCli(AbstractNeonCli):
            self.env.pg_version,
        ]

-        if timeline_id is not None:
-            cmd.extend(["--timeline-id", str(timeline_id)])
-
        res = self.raw_cli(cmd)
        res.check_returncode()

@@ -1568,20 +1558,6 @@ class ComputeCtl(AbstractNeonCli):
    COMMAND = "compute_ctl"


-class Pagectl(AbstractNeonCli):
-    """
-    A typed wrapper around the `pagectl` utility CLI tool.
-    """
-
-    COMMAND = "pagectl"
-
-    def dump_index_part(self, path: Path) -> IndexPartDump:
-        res = self.raw_cli(["index-part", "dump", str(path)])
-        res.check_returncode()
-        parsed = json.loads(res.stdout)
-        return IndexPartDump.from_json(parsed)
-
-
 class NeonAttachmentService:
    def __init__(self, env: NeonEnv):
        self.env = env
@@ -1646,7 +1622,57 @@ class NeonPageserver(PgProtocol):
        # env.pageserver.allowed_errors.append(".*could not open garage door.*")
        #
        # The entries in the list are regular experessions.
-        self.allowed_errors: List[str] = list(DEFAULT_PAGESERVER_ALLOWED_ERRORS)
+        self.allowed_errors = [
+            # All tests print these, when starting up or shutting down
+            ".*wal receiver task finished with an error: walreceiver connection handling failure.*",
+            ".*Shutdown task error: walreceiver connection handling failure.*",
+            ".*wal_connection_manager.*tcp connect error: Connection refused.*",
+            ".*query handler for .* failed: Socket IO error: Connection reset by peer.*",
+            ".*serving compute connection task.*exited with error: Postgres connection error.*",
+            ".*serving compute connection task.*exited with error: Connection reset by peer.*",
+            ".*serving compute connection task.*exited with error: Postgres query error.*",
+            ".*Connection aborted: error communicating with the server: Transport endpoint is not connected.*",
+            # FIXME: replication patch for tokio_postgres regards  any but CopyDone/CopyData message in CopyBoth stream as unexpected
+            ".*Connection aborted: unexpected message from server*",
+            ".*kill_and_wait_impl.*: wait successful.*",
+            ".*query handler for 'pagestream.*failed: Broken pipe.*",  # pageserver notices compute shut down
+            ".*query handler for 'pagestream.*failed: Connection reset by peer.*",  # pageserver notices compute shut down
+            # safekeeper connection can fail with this, in the window between timeline creation
+            # and streaming start
+            ".*Failed to process query for timeline .*: state uninitialized, no data to read.*",
+            # Tests related to authentication and authorization print these
+            ".*Error processing HTTP request: Forbidden",
+            # intentional failpoints
+            ".*failpoint ",
+            # FIXME: These need investigation
+            ".*manual_gc.*is_shutdown_requested\\(\\) called in an unexpected task or thread.*",
+            ".*tenant_list: timeline is not found in remote index while it is present in the tenants registry.*",
+            ".*Removing intermediate uninit mark file.*",
+            # Tenant::delete_timeline() can cause any of the four following errors.
+            # FIXME: we shouldn't be considering it an error: https://github.com/neondatabase/neon/issues/2946
+            ".*could not flush frozen layer.*queue is in state Stopped",  # when schedule layer upload fails because queued got closed before compaction got killed
+            ".*wait for layer upload ops to complete.*",  # .*Caused by:.*wait_completion aborted because upload queue was stopped
+            ".*gc_loop.*Gc failed, retrying in.*timeline is Stopping",  # When gc checks timeline state after acquiring layer_removal_cs
+            ".*gc_loop.*Gc failed, retrying in.*: Cannot run GC iteration on inactive tenant",  # Tenant::gc precondition
+            ".*compaction_loop.*Compaction failed.*, retrying in.*timeline or pageserver is shutting down",  # When compaction checks timeline state after acquiring layer_removal_cs
+            ".*query handler for 'pagestream.*failed: Timeline .* was not found",  # postgres reconnects while timeline_delete doesn't hold the tenant's timelines.lock()
+            ".*query handler for 'pagestream.*failed: Timeline .* is not active",  # timeline delete in progress
+            ".*task iteration took longer than the configured period.*",
+            # this is until #3501
+            ".*Compaction failed.*, retrying in [^:]+: Cannot run compaction iteration on inactive tenant",
+            # these can happen anytime we do compactions from background task and shutdown pageserver
+            r".*ERROR.*ancestor timeline \S+ is being stopped",
+            # this is expected given our collaborative shutdown approach for the UploadQueue
+            ".*Compaction failed.*, retrying in .*: queue is in state Stopped.*",
+            # Pageserver timeline deletion should be polled until it gets 404, so ignore it globally
+            ".*Error processing HTTP request: NotFound: Timeline .* was not found",
+            ".*took more than expected to complete.*",
+            # these can happen during shutdown, but it should not be a reason to fail a test
+            ".*completed, took longer than expected.*",
+            # AWS S3 may emit 500 errors for keys in a DeleteObjects response: we retry these
+            # and it is not a failure of our code when it happens.
+            ".*DeleteObjects.*We encountered an internal error. Please try again.*",
+        ]

    def timeline_dir(self, tenant_id: TenantId, timeline_id: Optional[TimelineId] = None) -> Path:
        """Get a timeline directory's path based on the repo directory of the test environment"""
@@ -1756,9 +1782,27 @@ class NeonPageserver(PgProtocol):

    def assert_no_errors(self):
        logfile = open(os.path.join(self.workdir, "pageserver.log"), "r")
-        errors = scan_pageserver_log_for_errors(logfile, self.allowed_errors)
+        error_or_warn = re.compile(r"\s(ERROR|WARN)")
+        errors = []
+        while True:
+            line = logfile.readline()
+            if not line:
+                break

-        for _lineno, error in errors:
+            if error_or_warn.search(line):
+                # Is this a torn log line?  This happens when force-killing a process and restarting
+                # Example: "2023-10-25T09:38:31.752314Z  WARN deletion executo2023-10-25T09:38:31.875947Z  INFO version: git-env:0f9452f76e8ccdfc88291bccb3f53e3016f40192"
+                if re.match("\\d{4}-\\d{2}-\\d{2}T.+\\d{4}-\\d{2}-\\d{2}T.+INFO version.+", line):
+                    continue
+
+                # It's an ERROR or WARN. Is it in the allow-list?
+                for a in self.allowed_errors:
+                    if re.match(a, line):
+                        break
+                else:
+                    errors.append(line)
+
+        for error in errors:
            log.info(f"not allowed error: {error.strip()}")

        assert not errors
--- a/test_runner/fixtures/pageserver/allowed_errors.py
+++ b/test_runner/fixtures/pageserver/allowed_errors.py
@@ -1,116 +0,0 @@
-#! /usr/bin/env python3
-
-import argparse
-import re
-import sys
-from typing import Iterable, List, Tuple
-
-
-def scan_pageserver_log_for_errors(
-    input: Iterable[str], allowed_errors: List[str]
-) -> List[Tuple[int, str]]:
-    error_or_warn = re.compile(r"\s(ERROR|WARN)")
-    errors = []
-    for lineno, line in enumerate(input, start=1):
-        if len(line) == 0:
-            continue
-
-        if error_or_warn.search(line):
-            # Is this a torn log line?  This happens when force-killing a process and restarting
-            # Example: "2023-10-25T09:38:31.752314Z  WARN deletion executo2023-10-25T09:38:31.875947Z  INFO version: git-env:0f9452f76e8ccdfc88291bccb3f53e3016f40192"
-            if re.match("\\d{4}-\\d{2}-\\d{2}T.+\\d{4}-\\d{2}-\\d{2}T.+INFO version.+", line):
-                continue
-
-            # It's an ERROR or WARN. Is it in the allow-list?
-            for a in allowed_errors:
-                if re.match(a, line):
-                    break
-            else:
-                errors.append((lineno, line))
-    return errors
-
-
-DEFAULT_PAGESERVER_ALLOWED_ERRORS = (
-    # All tests print these, when starting up or shutting down
-    ".*wal receiver task finished with an error: walreceiver connection handling failure.*",
-    ".*Shutdown task error: walreceiver connection handling failure.*",
-    ".*wal_connection_manager.*tcp connect error: Connection refused.*",
-    ".*query handler for .* failed: Socket IO error: Connection reset by peer.*",
-    ".*serving compute connection task.*exited with error: Postgres connection error.*",
-    ".*serving compute connection task.*exited with error: Connection reset by peer.*",
-    ".*serving compute connection task.*exited with error: Postgres query error.*",
-    ".*Connection aborted: error communicating with the server: Transport endpoint is not connected.*",
-    # FIXME: replication patch for tokio_postgres regards  any but CopyDone/CopyData message in CopyBoth stream as unexpected
-    ".*Connection aborted: unexpected message from server*",
-    ".*kill_and_wait_impl.*: wait successful.*",
-    ".*query handler for 'pagestream.*failed: Broken pipe.*",  # pageserver notices compute shut down
-    ".*query handler for 'pagestream.*failed: Connection reset by peer.*",  # pageserver notices compute shut down
-    # safekeeper connection can fail with this, in the window between timeline creation
-    # and streaming start
-    ".*Failed to process query for timeline .*: state uninitialized, no data to read.*",
-    # Tests related to authentication and authorization print these
-    ".*Error processing HTTP request: Forbidden",
-    # intentional failpoints
-    ".*failpoint ",
-    # FIXME: These need investigation
-    ".*manual_gc.*is_shutdown_requested\\(\\) called in an unexpected task or thread.*",
-    ".*tenant_list: timeline is not found in remote index while it is present in the tenants registry.*",
-    ".*Removing intermediate uninit mark file.*",
-    # Tenant::delete_timeline() can cause any of the four following errors.
-    # FIXME: we shouldn't be considering it an error: https://github.com/neondatabase/neon/issues/2946
-    ".*could not flush frozen layer.*queue is in state Stopped",  # when schedule layer upload fails because queued got closed before compaction got killed
-    ".*wait for layer upload ops to complete.*",  # .*Caused by:.*wait_completion aborted because upload queue was stopped
-    ".*gc_loop.*Gc failed, retrying in.*timeline is Stopping",  # When gc checks timeline state after acquiring layer_removal_cs
-    ".*gc_loop.*Gc failed, retrying in.*: Cannot run GC iteration on inactive tenant",  # Tenant::gc precondition
-    ".*compaction_loop.*Compaction failed.*, retrying in.*timeline or pageserver is shutting down",  # When compaction checks timeline state after acquiring layer_removal_cs
-    ".*query handler for 'pagestream.*failed: Timeline .* was not found",  # postgres reconnects while timeline_delete doesn't hold the tenant's timelines.lock()
-    ".*query handler for 'pagestream.*failed: Timeline .* is not active",  # timeline delete in progress
-    ".*task iteration took longer than the configured period.*",
-    # these can happen anytime we do compactions from background task and shutdown pageserver
-    r".*ERROR.*ancestor timeline \S+ is being stopped",
-    # this is expected given our collaborative shutdown approach for the UploadQueue
-    ".*Compaction failed.*, retrying in .*: Other\\(queue is in state Stopped.*",
-    ".*Compaction failed.*, retrying in .*: ShuttingDown",
-    # Pageserver timeline deletion should be polled until it gets 404, so ignore it globally
-    ".*Error processing HTTP request: NotFound: Timeline .* was not found",
-    ".*took more than expected to complete.*",
-    # these can happen during shutdown, but it should not be a reason to fail a test
-    ".*completed, took longer than expected.*",
-    # AWS S3 may emit 500 errors for keys in a DeleteObjects response: we retry these
-    # and it is not a failure of our code when it happens.
-    ".*DeleteObjects.*We encountered an internal error. Please try again.*",
-)
-
-
-def _check_allowed_errors(input):
-    allowed_errors: List[str] = list(DEFAULT_PAGESERVER_ALLOWED_ERRORS)
-
-    # add any test specifics here; cli parsing is not provided for the
-    # difficulty of copypasting regexes as arguments without any quoting
-    # errors.
-
-    errors = scan_pageserver_log_for_errors(input, allowed_errors)
-
-    for lineno, error in errors:
-        print(f"-:{lineno}: {error.strip()}", file=sys.stderr)
-
-    print(f"\n{len(errors)} not allowed errors", file=sys.stderr)
-
-    return errors
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="check input against pageserver global allowed_errors"
-    )
-    parser.add_argument(
-        "-i",
-        "--input",
-        type=argparse.FileType("r"),
-        default=sys.stdin,
-        help="Pageserver logs file. Reads from stdin if no file is provided.",
-    )
-    args = parser.parse_args()
-    errors = _check_allowed_errors(args.input)
-
-    sys.exit(len(errors) > 0)
--- a/test_runner/fixtures/pageserver/http.py
+++ b/test_runner/fixtures/pageserver/http.py
@@ -432,18 +432,12 @@ class PageserverHttpClient(requests.Session):
        assert isinstance(res_json, dict)
        return res_json

-    def timeline_compact(
-        self, tenant_id: TenantId, timeline_id: TimelineId, force_repartition=False
-    ):
+    def timeline_compact(self, tenant_id: TenantId, timeline_id: TimelineId):
        self.is_testing_enabled_or_skip()
-        query = {}
-        if force_repartition:
-            query["force_repartition"] = "true"

        log.info(f"Requesting compact: tenant {tenant_id}, timeline {timeline_id}")
        res = self.put(
-            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/compact",
-            params=query,
+            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/compact"
        )
        log.info(f"Got compact request response code: {res.status_code}")
        self.verbose_error(res)
@@ -472,18 +466,12 @@ class PageserverHttpClient(requests.Session):
        res_json = res.json()
        return res_json

-    def timeline_checkpoint(
-        self, tenant_id: TenantId, timeline_id: TimelineId, force_repartition=False
-    ):
+    def timeline_checkpoint(self, tenant_id: TenantId, timeline_id: TimelineId):
        self.is_testing_enabled_or_skip()
-        query = {}
-        if force_repartition:
-            query["force_repartition"] = "true"

        log.info(f"Requesting checkpoint: tenant {tenant_id}, timeline {timeline_id}")
        res = self.put(
-            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/checkpoint",
-            params=query,
+            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/checkpoint"
        )
        log.info(f"Got checkpoint request response code: {res.status_code}")
        self.verbose_error(res)
--- a/test_runner/fixtures/pageserver/types.py
+++ b/test_runner/fixtures/pageserver/types.py
@@ -1,146 +0,0 @@
-from dataclasses import dataclass
-from typing import Any, Dict, Tuple, Union
-
-from fixtures.types import KEY_MAX, KEY_MIN, Key, Lsn
-
-
-@dataclass
-class IndexLayerMetadata:
-    @classmethod
-    def from_json(cls, d: Dict[str, Any]):
-        return {}
-
-
-@dataclass(frozen=True)
-class ImageLayerFileName:
-    lsn: Lsn
-    key_start: Key
-    key_end: Key
-
-    def to_str(self):
-        ret = (
-            f"{self.key_start.as_int():036X}-{self.key_end.as_int():036X}__{self.lsn.as_int():016X}"
-        )
-        assert self == parse_layer_file_name(ret)
-        return ret
-
-
-@dataclass(frozen=True)
-class DeltaLayerFileName:
-    lsn_start: Lsn
-    lsn_end: Lsn
-    key_start: Key
-    key_end: Key
-
-    def is_l0(self):
-        return self.key_start == KEY_MIN and self.key_end == KEY_MAX
-
-    def to_str(self):
-        ret = f"{self.key_start.as_int():036X}-{self.key_end.as_int():036X}__{self.lsn_start.as_int():016X}-{self.lsn_end.as_int():016X}"
-        assert self == parse_layer_file_name(ret)
-        return ret
-
-
-LayerFileName = Union[ImageLayerFileName, DeltaLayerFileName]
-
-
-class InvalidFileName(Exception):
-    pass
-
-
-def parse_image_layer(f_name: str) -> Tuple[int, int, int]:
-    """Parse an image layer file name. Return key start, key end, and snapshot lsn"""
-    parts = f_name.split("__")
-    if len(parts) != 2:
-        raise InvalidFileName(f"expecting two parts separated by '__', got: {parts}")
-    key_parts = parts[0].split("-")
-    if len(key_parts) != 2:
-        raise InvalidFileName(
-            f"expecting two key parts separated by '--' in parts[0], got: {key_parts}"
-        )
-    try:
-        return int(key_parts[0], 16), int(key_parts[1], 16), int(parts[1], 16)
-    except ValueError as e:
-        raise InvalidFileName(f"conversion error: {f_name}") from e
-
-
-def parse_delta_layer(f_name: str) -> Tuple[int, int, int, int]:
-    """Parse a delta layer file name. Return key start, key end, lsn start, and lsn end"""
-    parts = f_name.split("__")
-    if len(parts) != 2:
-        raise InvalidFileName(f"expecting two parts separated by '__', got: {parts}")
-    key_parts = parts[0].split("-")
-    if len(key_parts) != 2:
-        raise InvalidFileName(
-            f"expecting two key parts separated by '--' in parts[0], got: {key_parts}"
-        )
-    lsn_parts = parts[1].split("-")
-    if len(lsn_parts) != 2:
-        raise InvalidFileName(
-            f"expecting two lsn parts separated by '--' in parts[1], got: {lsn_parts}"
-        )
-    try:
-        return (
-            int(key_parts[0], 16),
-            int(key_parts[1], 16),
-            int(lsn_parts[0], 16),
-            int(lsn_parts[1], 16),
-        )
-    except ValueError as e:
-        raise InvalidFileName(f"conversion error: {f_name}") from e
-
-
-def parse_layer_file_name(file_name: str) -> LayerFileName:
-    try:
-        key_start, key_end, lsn = parse_image_layer(file_name)
-        return ImageLayerFileName(lsn=Lsn(lsn), key_start=Key(key_start), key_end=Key(key_end))
-    except InvalidFileName:
-        pass
-
-    try:
-        key_start, key_end, lsn_start, lsn_end = parse_delta_layer(file_name)
-        return DeltaLayerFileName(
-            lsn_start=Lsn(lsn_start),
-            lsn_end=Lsn(lsn_end),
-            key_start=Key(key_start),
-            key_end=Key(key_end),
-        )
-    except InvalidFileName:
-        pass
-
-    raise ValueError()
-
-
-def is_future_layer(layer_file_name: LayerFileName, disk_consistent_lsn: Lsn):
-    """
-    Determines if this layer file is considered to be in future meaning we will discard these
-    layers during timeline initialization from the given disk_consistent_lsn.
-    """
-    if (
-        isinstance(layer_file_name, ImageLayerFileName)
-        and layer_file_name.lsn > disk_consistent_lsn
-    ):
-        return True
-    elif (
-        isinstance(layer_file_name, DeltaLayerFileName)
-        and layer_file_name.lsn_end > disk_consistent_lsn + 1
-    ):
-        return True
-    else:
-        return False
-
-
-@dataclass
-class IndexPartDump:
-    layer_metadata: Dict[LayerFileName, IndexLayerMetadata]
-    disk_consistent_lsn: Lsn
-
-    @classmethod
-    def from_json(cls, d: Dict[str, Any]) -> "IndexPartDump":
-        return IndexPartDump(
-            layer_metadata={
-                parse_layer_file_name(n): IndexLayerMetadata.from_json(v)
-                for n, v in d["layer_metadata"].items()
-            },
-            disk_consistent_lsn=Lsn(d["disk_consistent_lsn"]),
-        )
--- a/test_runner/fixtures/remote_storage.py
+++ b/test_runner/fixtures/remote_storage.py
@@ -12,7 +12,6 @@ import boto3
 from mypy_boto3_s3 import S3Client

 from fixtures.log_helper import log
-from fixtures.pageserver.types import LayerFileName
 from fixtures.types import TenantId, TimelineId

 TIMELINE_INDEX_PART_FILE_NAME = "index_part.json"
@@ -88,11 +87,6 @@ class LocalFsStorage:
    def timeline_path(self, tenant_id: TenantId, timeline_id: TimelineId) -> Path:
        return self.tenant_path(tenant_id) / "timelines" / str(timeline_id)

-    def layer_path(
-        self, tenant_id: TenantId, timeline_id: TimelineId, layer_file_name: LayerFileName
-    ):
-        return self.timeline_path(tenant_id, timeline_id) / layer_file_name.to_str()
-
    def index_path(self, tenant_id: TenantId, timeline_id: TimelineId) -> Path:
        return self.timeline_path(tenant_id, timeline_id) / TIMELINE_INDEX_PART_FILE_NAME

--- a/test_runner/fixtures/types.py
+++ b/test_runner/fixtures/types.py
@@ -1,5 +1,4 @@
 import random
-from dataclasses import dataclass
 from functools import total_ordering
 from typing import Any, Type, TypeVar, Union

@@ -37,11 +36,6 @@ class Lsn:
            return NotImplemented
        return self.lsn_int < other.lsn_int

-    def __gt__(self, other: Any) -> bool:
-        if not isinstance(other, Lsn):
-            raise NotImplementedError
-        return self.lsn_int > other.lsn_int
-
    def __eq__(self, other: Any) -> bool:
        if not isinstance(other, Lsn):
            return NotImplemented
@@ -53,32 +47,9 @@ class Lsn:
            return NotImplemented
        return self.lsn_int - other.lsn_int

-    def __add__(self, other: Union[int, "Lsn"]) -> "Lsn":
-        if isinstance(other, int):
-            return Lsn(self.lsn_int + other)
-        elif isinstance(other, Lsn):
-            return Lsn(self.lsn_int + other.lsn_int)
-        else:
-            raise NotImplementedError
-
    def __hash__(self) -> int:
        return hash(self.lsn_int)

-    def as_int(self) -> int:
-        return self.lsn_int
-
-
-@dataclass(frozen=True)
-class Key:
-    key_int: int
-
-    def as_int(self) -> int:
-        return self.key_int
-
-
-KEY_MAX = Key(0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)
-KEY_MIN = Key(0)
-

@total_ordering
 class Id:
--- a/test_runner/fixtures/utils.py
+++ b/test_runner/fixtures/utils.py
@@ -6,16 +6,7 @@ import subprocess
 import threading
 import time
 from pathlib import Path
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Callable,
-    Dict,
-    List,
-    Optional,
-    Tuple,
-    TypeVar,
-)
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, TypeVar
 from urllib.parse import urlencode

 import allure
@@ -23,10 +14,6 @@ import zstandard
 from psycopg2.extensions import cursor

 from fixtures.log_helper import log
-from fixtures.pageserver.types import (
-    parse_delta_layer,
-    parse_image_layer,
-)

 if TYPE_CHECKING:
    from fixtures.neon_fixtures import PgBin
@@ -206,6 +193,26 @@ def get_timeline_dir_size(path: Path) -> int:
    return sz


+def parse_image_layer(f_name: str) -> Tuple[int, int, int]:
+    """Parse an image layer file name. Return key start, key end, and snapshot lsn"""
+    parts = f_name.split("__")
+    key_parts = parts[0].split("-")
+    return int(key_parts[0], 16), int(key_parts[1], 16), int(parts[1], 16)
+
+
+def parse_delta_layer(f_name: str) -> Tuple[int, int, int, int]:
+    """Parse a delta layer file name. Return key start, key end, lsn start, and lsn end"""
+    parts = f_name.split("__")
+    key_parts = parts[0].split("-")
+    lsn_parts = parts[1].split("-")
+    return (
+        int(key_parts[0], 16),
+        int(key_parts[1], 16),
+        int(lsn_parts[0], 16),
+        int(lsn_parts[1], 16),
+    )
+
+
 def get_scale_for_db(size_mb: int) -> int:
    """Returns pgbench scale factor for given target db size in MB.

--- a/test_runner/regress/test_backpressure.py
+++ b/test_runner/regress/test_backpressure.py
@@ -24,6 +24,8 @@ def check_backpressure(endpoint: Endpoint, stop_event: threading.Event, polling_
    log.info("checks started")

    with pg_cur(endpoint) as cur:
+        cur.execute("CREATE EXTENSION neon")  # TODO move it to neon_fixtures?
+
        cur.execute("select pg_size_bytes(current_setting('max_replication_write_lag'))")
        res = cur.fetchone()
        max_replication_write_lag_bytes = res[0]
@@ -100,13 +102,9 @@ def test_backpressure_received_lsn_lag(neon_env_builder: NeonEnvBuilder):
    # Create a branch for us
    env.neon_cli.create_branch("test_backpressure")

-    endpoint = env.endpoints.create(
+    endpoint = env.endpoints.create_start(
        "test_backpressure", config_lines=["max_replication_write_lag=30MB"]
    )
-    # don't skip pg_catalog updates - it runs CREATE EXTENSION neon
-    # which is needed for backpressure_lsns() to work
-    endpoint.respec(skip_pg_catalog_updates=False)
-    endpoint.start()
    log.info("postgres is running on 'test_backpressure' branch")

    # setup check thread
--- a/test_runner/regress/test_branch_and_gc.py
+++ b/test_runner/regress/test_branch_and_gc.py
@@ -46,10 +46,7 @@ from fixtures.utils import query_scalar
 # Because the delta layer D covering lsn1 is corrupted, creating a branch
 # starting from lsn1 should return an error as follows:
 #     could not find data for key ... at LSN ..., for request at LSN ...
-def test_branch_and_gc(neon_simple_env: NeonEnv, build_type: str):
-    if build_type == "debug":
-        pytest.skip("times out in debug builds")
-
+def test_branch_and_gc(neon_simple_env: NeonEnv):
    env = neon_simple_env
    pageserver_http_client = env.pageserver.http_client()

--- a/test_runner/regress/test_broken_timeline.py
+++ b/test_runner/regress/test_broken_timeline.py
@@ -114,7 +114,6 @@ def test_timeline_init_break_before_checkpoint(neon_env_builder: NeonEnvBuilder)
        [
            ".*Failed to process timeline dir contents.*Timeline has no ancestor and no layer files.*",
            ".*Timeline got dropped without initializing, cleaning its files.*",
-            ".*Failed to load index_part from remote storage, failed creation?.*",
        ]
    )

@@ -144,58 +143,6 @@ def test_timeline_init_break_before_checkpoint(neon_env_builder: NeonEnvBuilder)
    ), "pageserver should clean its temp timeline files on timeline creation failure"


-def test_timeline_init_break_before_checkpoint_recreate(neon_env_builder: NeonEnvBuilder):
-    env = neon_env_builder.init_start()
-    pageserver_http = env.pageserver.http_client()
-
-    env.pageserver.allowed_errors.extend(
-        [
-            ".*Failed to process timeline dir contents.*Timeline has no ancestor and no layer files.*",
-            ".*Timeline got dropped without initializing, cleaning its files.*",
-            ".*Failed to load index_part from remote storage, failed creation?.*",
-        ]
-    )
-
-    tenant_id = env.initial_tenant
-
-    timelines_dir = env.pageserver.timeline_dir(tenant_id)
-    old_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
-    initial_timeline_dirs = [d for d in timelines_dir.iterdir()]
-
-    # Some fixed timeline ID (like control plane does)
-    timeline_id = TimelineId("1080243c1f76fe3c5147266663c9860b")
-
-    # Introduce failpoint during timeline init (some intermediate files are on disk), before it's checkpointed.
-    pageserver_http.configure_failpoints(("before-checkpoint-new-timeline", "return"))
-    with pytest.raises(Exception, match="before-checkpoint-new-timeline"):
-        _ = env.neon_cli.create_timeline(
-            "test_timeline_init_break_before_checkpoint", tenant_id, timeline_id
-        )
-
-    # Restart the page server
-    env.pageserver.restart(immediate=True)
-
-    # Creating the timeline didn't finish. The other timelines on tenant should still be present and work normally.
-    new_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
-    assert (
-        new_tenant_timelines == old_tenant_timelines
-    ), f"Pageserver after restart should ignore non-initialized timelines for tenant {tenant_id}"
-
-    timeline_dirs = [d for d in timelines_dir.iterdir()]
-    assert (
-        timeline_dirs == initial_timeline_dirs
-    ), "pageserver should clean its temp timeline files on timeline creation failure"
-
-    # Disable the failpoint again
-    pageserver_http.configure_failpoints(("before-checkpoint-new-timeline", "off"))
-    # creating the branch should have worked now
-    new_timeline_id = env.neon_cli.create_timeline(
-        "test_timeline_init_break_before_checkpoint", tenant_id, timeline_id
-    )
-
-    assert timeline_id == new_timeline_id
-
-
 def test_timeline_create_break_after_uninit_mark(neon_env_builder: NeonEnvBuilder):
    env = neon_env_builder.init_start()
    pageserver_http = env.pageserver.http_client()
--- a/test_runner/regress/test_ddl_forwarding.py
+++ b/test_runner/regress/test_ddl_forwarding.py
@@ -245,19 +245,6 @@ def test_ddl_forwarding(ddl: DdlForwardingContext):
        raise AssertionError("Could not count databases")
    assert result[0] == 0, "Database 'failure' still exists after drop"

-    # We don't have compute_ctl, so here, so create neon_superuser here manually
-    cur.execute("CREATE ROLE neon_superuser NOLOGIN CREATEDB CREATEROLE")
-
-    with pytest.raises(psycopg2.InternalError):
-        cur.execute("ALTER ROLE neon_superuser LOGIN")
-
-    with pytest.raises(psycopg2.InternalError):
-        cur.execute("CREATE DATABASE trololobus WITH OWNER neon_superuser")
-
-    cur.execute("CREATE DATABASE trololobus")
-    with pytest.raises(psycopg2.InternalError):
-        cur.execute("ALTER DATABASE trololobus OWNER TO neon_superuser")
-
    conn.close()


--- a/test_runner/regress/test_layer_eviction.py
+++ b/test_runner/regress/test_layer_eviction.py
@@ -1,6 +1,5 @@
 import time

-import pytest
 from fixtures.log_helper import log
 from fixtures.neon_fixtures import (
    NeonEnvBuilder,
@@ -16,11 +15,7 @@ from fixtures.utils import query_scalar
 # and then download them back.
 def test_basic_eviction(
    neon_env_builder: NeonEnvBuilder,
-    build_type: str,
 ):
-    if build_type == "debug":
-        pytest.skip("times out in debug builds")
-
    neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)

    env = neon_env_builder.init_start(
--- a/test_runner/regress/test_layers_from_future.py
+++ b/test_runner/regress/test_layers_from_future.py
@@ -1,222 +0,0 @@
-import time
-
-from fixtures.log_helper import log
-from fixtures.neon_fixtures import NeonEnvBuilder
-from fixtures.pageserver.types import (
-    DeltaLayerFileName,
-    ImageLayerFileName,
-    is_future_layer,
-)
-from fixtures.pageserver.utils import (
-    wait_for_last_record_lsn,
-    wait_for_upload_queue_empty,
-    wait_until_tenant_active,
-)
-from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind
-from fixtures.types import Lsn
-from fixtures.utils import query_scalar, wait_until
-
-
-def test_issue_5878(neon_env_builder: NeonEnvBuilder):
-    """
-    Regression test for issue https://github.com/neondatabase/neon/issues/5878 .
-
-    Create a situation where IndexPart contains an image layer from a future
-    (i.e., image layer > IndexPart::disk_consistent_lsn).
-    Detach.
-    Attach.
-    Wait for tenant to finish load_layer_map (by waiting for it to become active).
-    Wait for any remote timeline client ops to finish that the attach started.
-    Integrity-check the index part.
-
-    Before fixing the issue, load_layer_map would schedule removal of the future
-    image layer. A compaction run could later re-create the image layer with
-    the same file name, scheduling a PUT.
-    Due to lack of an upload queue barrier, the PUT and DELETE could be re-ordered.
-    The result was IndexPart referencing a non-existent object.
-    """
-    neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
-
-    env = neon_env_builder.init_start()
-
-    ps_http = env.pageserver.http_client()
-
-    l0_l1_threshold = 3
-    image_creation_threshold = 1
-
-    tenant_config = {
-        "gc_period": "0s",  # disable GC (shouldn't matter for this test but still)
-        "compaction_period": "0s",  # we want to control when compaction runs
-        "checkpoint_timeout": "24h",  # something we won't reach
-        "checkpoint_distance": f"{50 * (1024**2)}",  # something we won't reach, we checkpoint manually
-        "image_creation_threshold": f"{image_creation_threshold}",
-        "compaction_threshold": f"{l0_l1_threshold}",
-        "compaction_target_size": f"{128 * (1024**3)}",  # make it so that we only have 1 partition => image coverage for delta layers => enables gc of delta layers
-    }
-
-    tenant_id, timeline_id = env.neon_cli.create_tenant(conf=tenant_config)
-
-    endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
-
-    def get_index_part():
-        assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
-        ip_path = env.pageserver_remote_storage.index_path(tenant_id, timeline_id)
-        return env.pagectl.dump_index_part(ip_path)
-
-    def get_future_layers():
-        ip = get_index_part()
-        future_layers = [
-            layer_file_name
-            for layer_file_name in ip.layer_metadata.keys()
-            if is_future_layer(layer_file_name, ip.disk_consistent_lsn)
-        ]
-        return future_layers
-
-    assert len(get_future_layers()) == 0
-
-    current = get_index_part()
-    assert len(set(current.layer_metadata.keys())) == 1
-    layer_file_name = list(current.layer_metadata.keys())[0]
-    assert isinstance(layer_file_name, DeltaLayerFileName)
-    assert layer_file_name.is_l0(), f"{layer_file_name}"
-
-    log.info("force image layer creation in the future by writing some data into in-memory layer")
-
-    # Create a number of layers in the tenant
-    with endpoint.cursor() as cur:
-        cur.execute("CREATE TABLE foo (t text)")
-        iters = l0_l1_threshold * image_creation_threshold
-        for i in range(0, iters):
-            cur.execute(
-                f"""
-                INSERT INTO foo
-                SELECT '{i}' || g
-                FROM generate_series(1, 10000) g
-                """
-            )
-            last_record_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
-            wait_for_last_record_lsn(ps_http, tenant_id, timeline_id, last_record_lsn)
-            # 0..iters-1: create a stack of delta layers
-            # iters: leave a non-empty in-memory layer which we'll use for image layer generation
-            if i < iters - 1:
-                ps_http.timeline_checkpoint(tenant_id, timeline_id, force_repartition=True)
-                assert (
-                    len(
-                        [
-                            layer
-                            for layer in ps_http.layer_map_info(
-                                tenant_id, timeline_id
-                            ).historic_layers
-                            if layer.kind == "Image"
-                        ]
-                    )
-                    == 0
-                )
-
-    endpoint.stop()
-
-    wait_for_upload_queue_empty(ps_http, tenant_id, timeline_id)
-
-    ip = get_index_part()
-    assert len(ip.layer_metadata.keys())
-    assert (
-        ip.disk_consistent_lsn < last_record_lsn
-    ), "sanity check for what above loop is supposed to do"
-
-    # create the image layer from the future
-    ps_http.timeline_compact(tenant_id, timeline_id, force_repartition=True)
-    assert (
-        len(
-            [
-                layer
-                for layer in ps_http.layer_map_info(tenant_id, timeline_id).historic_layers
-                if layer.kind == "Image"
-            ]
-        )
-        == 1
-    )
-    wait_for_upload_queue_empty(ps_http, tenant_id, timeline_id)
-    future_layers = get_future_layers()
-    assert len(future_layers) == 1
-    future_layer = future_layers[0]
-    assert isinstance(future_layer, ImageLayerFileName)
-    assert future_layer.lsn == last_record_lsn
-    log.info(
-        f"got layer from the future: lsn={future_layer.lsn} disk_consistent_lsn={ip.disk_consistent_lsn} last_record_lsn={last_record_lsn}"
-    )
-    assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
-    future_layer_path = env.pageserver_remote_storage.layer_path(
-        tenant_id, timeline_id, future_layer
-    )
-    log.info(f"future layer path: {future_layer_path}")
-    pre_stat = future_layer_path.stat()
-    time.sleep(1.1)  # so that we can use change in pre_stat.st_mtime to detect overwrites
-
-    # force removal of layers from the future
-    tenant_conf = ps_http.tenant_config(tenant_id)
-    ps_http.tenant_detach(tenant_id)
-    failpoint_name = "before-delete-layer-pausable"
-    ps_http.configure_failpoints((failpoint_name, "pause"))
-    ps_http.tenant_attach(tenant_id, tenant_conf.tenant_specific_overrides)
-    wait_until_tenant_active(ps_http, tenant_id)
-
-    # Ensure the IndexPart upload that unlinks the layer file finishes, i.e., doesn't clog the queue.
-    def future_layer_is_gone_from_index_part():
-        future_layers = set(get_future_layers())
-        assert future_layer not in future_layers
-
-    wait_until(10, 0.5, future_layer_is_gone_from_index_part)
-
-    # NB: the layer file is unlinked index part now, but, because we made the delete
-    # operation stuck, the layer file itself is still in the remote_storage
-    def delete_at_pause_point():
-        assert env.pageserver.log_contains(f".*{tenant_id}.*at failpoint.*{failpoint_name}")
-
-    wait_until(10, 0.5, delete_at_pause_point)
-    assert future_layer_path.exists()
-
-    # wait for re-ingestion of the WAL from safekeepers into the in-memory layer
-    # (this happens in parallel to the above)
-    wait_for_last_record_lsn(ps_http, tenant_id, timeline_id, last_record_lsn)
-
-    # re-do image layer generation
-    # This will produce the same image layer and queue an upload.
-    # However, we still have the deletion for the layer queued, stuck on the failpoint.
-    # An incorrect implementation would let the PUT execute before the DELETE.
-    # The later code in this test asserts that this doesn't happen.
-    ps_http.timeline_compact(tenant_id, timeline_id, force_repartition=True)
-
-    # Let things sit for some time; a good implementation makes no progress because
-    # we can't execute the PUT before the DELETE. A bad implementation would do that.
-    max_race_opportunity_window = 4
-    start = time.monotonic()
-    while True:
-        post_stat = future_layer_path.stat()
-        assert (
-            pre_stat.st_mtime == post_stat.st_mtime
-        ), "observed PUT overtake the stucked DELETE => bug isn't fixed yet"
-        if time.monotonic() - start > max_race_opportunity_window:
-            log.info(
-                "a correct implementation would never let the later PUT overtake the earlier DELETE"
-            )
-            break
-        time.sleep(1)
-
-    # Window has passed, unstuck the delete, let upload queue drain.
-    log.info("unstuck the DELETE")
-    ps_http.configure_failpoints(("before-delete-layer-pausable", "off"))
-
-    wait_for_upload_queue_empty(ps_http, tenant_id, timeline_id)
-
-    # Examine the resulting S3 state.
-    log.info("integrity-check the remote storage")
-    ip = get_index_part()
-    for layer_file_name in ip.layer_metadata.keys():
-        layer_path = env.pageserver_remote_storage.layer_path(
-            tenant_id, timeline_id, layer_file_name
-        )
-        assert layer_path.exists(), f"{layer_file_name.to_str()}"
-
-    log.info("assert that the overwritten layer won")
-    final_stat = future_layer_path.stat()
-    assert final_stat.st_mtime != pre_stat.st_mtime
--- a/test_runner/regress/test_local_file_cache.py
+++ b/test_runner/regress/test_local_file_cache.py
@@ -1,74 +0,0 @@
-import os
-import random
-import threading
-import time
-from typing import List
-
-from fixtures.neon_fixtures import NeonEnv
-from fixtures.utils import query_scalar
-
-
-def test_local_file_cache_unlink(neon_simple_env: NeonEnv):
-    env = neon_simple_env
-
-    cache_dir = os.path.join(env.repo_dir, "file_cache")
-    os.mkdir(cache_dir)
-
-    env.neon_cli.create_branch("test_local_file_cache_unlink", "empty")
-
-    endpoint = env.endpoints.create_start(
-        "test_local_file_cache_unlink",
-        config_lines=[
-            "shared_buffers='1MB'",
-            f"neon.file_cache_path='{cache_dir}/file.cache'",
-            "neon.max_file_cache_size='64MB'",
-            "neon.file_cache_size_limit='10MB'",
-        ],
-    )
-
-    cur = endpoint.connect().cursor()
-
-    n_rows = 100000
-    n_threads = 20
-    n_updates_per_thread = 10000
-    n_updates_per_connection = 1000
-    n_total_updates = n_threads * n_updates_per_thread
-
-    cur.execute("CREATE TABLE lfctest (id int4 PRIMARY KEY, n int) WITH (fillfactor=10)")
-    cur.execute(f"INSERT INTO lfctest SELECT g, 1 FROM generate_series(1, {n_rows}) g")
-
-    # Start threads that will perform random UPDATEs. Each UPDATE
-    # increments the counter on the row, so that we can check at the
-    # end that the sum of all the counters match the number of updates
-    # performed (plus the initial 1 on each row).
-    #
-    # Furthermore, each thread will reconnect between every 1000 updates.
-    def run_updates():
-        n_updates_performed = 0
-        conn = endpoint.connect()
-        cur = conn.cursor()
-        for _ in range(n_updates_per_thread):
-            id = random.randint(1, n_rows)
-            cur.execute(f"UPDATE lfctest SET n = n + 1 WHERE id = {id}")
-            n_updates_performed += 1
-            if n_updates_performed % n_updates_per_connection == 0:
-                cur.close()
-                conn.close()
-                conn = endpoint.connect()
-                cur = conn.cursor()
-
-    threads: List[threading.Thread] = []
-    for _i in range(n_threads):
-        thread = threading.Thread(target=run_updates, args=(), daemon=True)
-        thread.start()
-        threads.append(thread)
-
-    time.sleep(5)
-
-    new_cache_dir = os.path.join(env.repo_dir, "file_cache_new")
-    os.rename(cache_dir, new_cache_dir)
-
-    for thread in threads:
-        thread.join()
-
-    assert query_scalar(cur, "SELECT SUM(n) FROM lfctest") == n_total_updates + n_rows
--- a/test_runner/regress/test_neon_extension.py
+++ b/test_runner/regress/test_neon_extension.py
@@ -1,28 +0,0 @@
-from contextlib import closing
-
-from fixtures.log_helper import log
-from fixtures.neon_fixtures import NeonEnvBuilder
-
-
-# Verify that the neon extension is installed and has the correct version.
-def test_neon_extension(neon_env_builder: NeonEnvBuilder):
-    env = neon_env_builder.init_start()
-    env.neon_cli.create_branch("test_create_extension_neon")
-
-    endpoint_main = env.endpoints.create("test_create_extension_neon")
-    # don't skip pg_catalog updates - it runs CREATE EXTENSION neon
-    endpoint_main.respec(skip_pg_catalog_updates=False)
-    endpoint_main.start()
-
-    log.info("postgres is running on 'test_create_extension_neon' branch")
-
-    with closing(endpoint_main.connect()) as conn:
-        with conn.cursor() as cur:
-            cur.execute("SELECT extversion from pg_extension where extname='neon'")
-            # If this fails, it means the extension is either not installed
-            # or was updated and the version is different.
-            #
-            # IMPORTANT:
-            # If the version has changed, the test should be updated.
-            # Ensure that the default version is also updated in the neon.control file
-            assert cur.fetchone() == ("1.1",)
--- a/test_runner/regress/test_normal_work.py
+++ b/test_runner/regress/test_normal_work.py
@@ -6,12 +6,14 @@ from fixtures.pageserver.http import PageserverHttpClient

 def check_tenant(env: NeonEnv, pageserver_http: PageserverHttpClient):
    tenant_id, timeline_id = env.neon_cli.create_tenant()
-    endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
+    endpoint = env.endpoints.create_start("main", tenant_id=tenant_id, config_lines=[
+                "log_statement=all",
+            ],)
    # we rely upon autocommit after each statement
    res_1 = endpoint.safe_psql_many(
        queries=[
-            "CREATE TABLE t(key int primary key, value text)",
-            "INSERT INTO t SELECT generate_series(1,100000), 'payload'",
+            "CREATE TABLE \nt(key int primary key, value text)",
+            "INSERT INTO \n\nt SELECT generate_series(1,100000), 'payload'",
            "SELECT sum(key) FROM t",
        ]
    )
--- a/test_runner/regress/test_pageserver_restart.py
+++ b/test_runner/regress/test_pageserver_restart.py
@@ -144,10 +144,7 @@ def test_pageserver_restart(neon_env_builder: NeonEnvBuilder, generations: bool)
 # Test that repeatedly kills and restarts the page server, while the
 # safekeeper and compute node keep running.
@pytest.mark.timeout(540)
-def test_pageserver_chaos(neon_env_builder: NeonEnvBuilder, build_type: str):
-    if build_type == "debug":
-        pytest.skip("times out in debug builds")
-
+def test_pageserver_chaos(neon_env_builder: NeonEnvBuilder):
    neon_env_builder.enable_pageserver_remote_storage(s3_storage())
    neon_env_builder.enable_scrub_on_exit()

--- a/test_runner/regress/test_tenant_delete.py
+++ b/test_runner/regress/test_tenant_delete.py
@@ -1,7 +1,6 @@
 import enum
 import os
 import shutil
-from threading import Thread

 import pytest
 from fixtures.log_helper import log
@@ -28,7 +27,7 @@ from fixtures.remote_storage import (
    available_s3_storages,
 )
 from fixtures.types import TenantId
-from fixtures.utils import run_pg_bench_small, wait_until
+from fixtures.utils import run_pg_bench_small


@pytest.mark.parametrize("remote_storage_kind", available_remote_storages())
@@ -400,78 +399,4 @@ def test_tenant_delete_is_resumed_on_attach(
        )


-def test_long_timeline_create_cancelled_by_tenant_delete(neon_env_builder: NeonEnvBuilder):
-    """Reproduction of 2023-11-23 stuck tenants investigation"""
-
-    # do not use default tenant/timeline creation because it would output the failpoint log message too early
-    env = neon_env_builder.init_configs()
-    env.start()
-    pageserver_http = env.pageserver.http_client()
-
-    # happens with the cancellation bailing flushing loop earlier, leaving disk_consistent_lsn at zero
-    env.pageserver.allowed_errors.append(
-        ".*Timeline got dropped without initializing, cleaning its files"
-    )
-    # the response hit_pausable_failpoint_and_later_fail
-    env.pageserver.allowed_errors.append(
-        f".*Error processing HTTP request: InternalServerError\\(new timeline {env.initial_tenant}/{env.initial_timeline} has invalid disk_consistent_lsn"
-    )
-
-    pageserver_http.tenant_create(env.initial_tenant)
-
-    failpoint = "flush-layer-cancel-after-writing-layer-out-pausable"
-    pageserver_http.configure_failpoints((failpoint, "pause"))
-
-    def hit_pausable_failpoint_and_later_fail():
-        with pytest.raises(
-            PageserverApiException, match="new timeline \\S+ has invalid disk_consistent_lsn"
-        ):
-            pageserver_http.timeline_create(
-                env.pg_version, env.initial_tenant, env.initial_timeline
-            )
-
-    def start_deletion():
-        pageserver_http.tenant_delete(env.initial_tenant)
-
-    def has_hit_failpoint():
-        assert env.pageserver.log_contains(f"at failpoint {failpoint}") is not None
-
-    def deletion_has_started_waiting_for_timelines():
-        assert env.pageserver.log_contains("Waiting for timelines...") is not None
-
-    def tenant_is_deleted():
-        try:
-            pageserver_http.tenant_status(env.initial_tenant)
-        except PageserverApiException as e:
-            assert e.status_code == 404
-        else:
-            raise RuntimeError("tenant was still accessible")
-
-    creation = Thread(target=hit_pausable_failpoint_and_later_fail)
-    creation.start()
-
-    deletion = None
-
-    try:
-        wait_until(10, 1, has_hit_failpoint)
-
-        # it should start ok, sync up with the stuck creation, then fail because disk_consistent_lsn was not updated
-        # then deletion should fail and set the tenant broken
-        deletion = Thread(target=start_deletion)
-        deletion.start()
-
-        wait_until(10, 1, deletion_has_started_waiting_for_timelines)
-
-        pageserver_http.configure_failpoints((failpoint, "off"))
-
-        creation.join()
-        deletion.join()
-
-        wait_until(10, 1, tenant_is_deleted)
-    finally:
-        creation.join()
-        if deletion is not None:
-            deletion.join()
-
-
 # TODO test concurrent deletions with "hang" failpoint
--- a/test_runner/regress/test_tenant_detach.py
+++ b/test_runner/regress/test_tenant_detach.py
@@ -307,7 +307,7 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
    )
    gc_thread = Thread(target=lambda: do_gc_target(pageserver_http, tenant_id, timeline_id))
    gc_thread.start()
-    time.sleep(5)
+    time.sleep(1)
    # By now the gc task is spawned but in sleep for another second due to the failpoint.

    log.info("detaching tenant")
--- a/test_runner/regress/test_timeline_size.py
+++ b/test_runner/regress/test_timeline_size.py
@@ -134,11 +134,10 @@ def wait_for_pageserver_catchup(endpoint_main: Endpoint, polling_interval=1, tim
        res = endpoint_main.safe_psql(
            """
            SELECT
-                pg_size_pretty(neon.pg_cluster_size()),
+                pg_size_pretty(pg_cluster_size()),
                pg_wal_lsn_diff(pg_current_wal_flush_lsn(), received_lsn) as received_lsn_lag
-            FROM neon.backpressure_lsns();
-            """,
-            dbname="postgres",
+            FROM backpressure_lsns();
+            """
        )[0]
        log.info(f"pg_cluster_size = {res[0]}, received_lsn_lag = {res[1]}")
        received_lsn_lag = res[1]
@@ -153,20 +152,17 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder):

    wait_for_timeline_size_init(client, tenant=env.initial_tenant, timeline=new_timeline_id)

-    endpoint_main = env.endpoints.create(
+    endpoint_main = env.endpoints.create_start(
        "test_timeline_size_quota",
        # Set small limit for the test
        config_lines=["neon.max_cluster_size=30MB"],
    )
-    # don't skip pg_catalog updates - it runs CREATE EXTENSION neon
-    # which is needed for pg_cluster_size() to work
-    endpoint_main.respec(skip_pg_catalog_updates=False)
-    endpoint_main.start()
-
    log.info("postgres is running on 'test_timeline_size_quota' branch")

    with closing(endpoint_main.connect()) as conn:
        with conn.cursor() as cur:
+            cur.execute("CREATE EXTENSION neon")  # TODO move it to neon_fixtures?
+
            cur.execute("CREATE TABLE foo (t text)")

            wait_for_pageserver_catchup(endpoint_main)
@@ -215,7 +211,7 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder):

            wait_for_pageserver_catchup(endpoint_main)

-            cur.execute("SELECT * from pg_size_pretty(neon.pg_cluster_size())")
+            cur.execute("SELECT * from pg_size_pretty(pg_cluster_size())")
            pg_cluster_size = cur.fetchone()
            log.info(f"pg_cluster_size = {pg_cluster_size}")

--- a/test_runner/regress/test_wal_acceptor_async.py
+++ b/test_runner/regress/test_wal_acceptor_async.py
@@ -602,10 +602,7 @@ async def run_wal_lagging(env: NeonEnv, endpoint: Endpoint, test_output_dir: Pat
 # The test takes more than default 5 minutes on Postgres 16,
 # see https://github.com/neondatabase/neon/issues/5305
@pytest.mark.timeout(600)
-def test_wal_lagging(neon_env_builder: NeonEnvBuilder, test_output_dir: Path, build_type: str):
-    if build_type == "debug":
-        pytest.skip("times out in debug builds")
-
+def test_wal_lagging(neon_env_builder: NeonEnvBuilder, test_output_dir: Path):
    neon_env_builder.num_safekeepers = 3
    env = neon_env_builder.init_start()

--- a/test_runner/regress/test_wal_restore.py
+++ b/test_runner/regress/test_wal_restore.py
@@ -1,19 +1,14 @@
 import sys
-import tarfile
-import tempfile
 from pathlib import Path

 import pytest
-import zstandard
-from fixtures.log_helper import log
 from fixtures.neon_fixtures import (
    NeonEnvBuilder,
    PgBin,
    VanillaPostgres,
 )
 from fixtures.port_distributor import PortDistributor
-from fixtures.remote_storage import LocalFsStorage
-from fixtures.types import Lsn, TenantId, TimelineId
+from fixtures.types import TenantId, TimelineId


@pytest.mark.skipif(
@@ -58,70 +53,3 @@ def test_wal_restore(
        )
        restored.start()
        assert restored.safe_psql("select count(*) from t", user="cloud_admin") == [(300000,)]
-
-
-def decompress_zstd(
-    input_file_name: Path,
-    output_dir: Path,
-):
-    log.info(f"decompressing zstd to: {output_dir}")
-    output_dir.mkdir(mode=0o750, parents=True, exist_ok=True)
-    with tempfile.TemporaryFile(suffix=".tar") as temp:
-        decompressor = zstandard.ZstdDecompressor()
-        with open(input_file_name, "rb") as input_file:
-            decompressor.copy_stream(input_file, temp)
-        temp.seek(0)
-        with tarfile.open(fileobj=temp) as tfile:
-            tfile.extractall(path=output_dir)
-
-
-def test_wal_restore_initdb(
-    neon_env_builder: NeonEnvBuilder,
-    pg_bin: PgBin,
-    test_output_dir: Path,
-    port_distributor: PortDistributor,
-    base_dir: Path,
-    pg_distrib_dir: Path,
-):
-    env = neon_env_builder.init_start()
-    endpoint = env.endpoints.create_start("main")
-    endpoint.safe_psql("create table t as select generate_series(1,300000)")
-    tenant_id = env.initial_tenant
-    timeline_id = env.initial_timeline
-    original_lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
-    env.pageserver.stop()
-    port = port_distributor.get_port()
-    data_dir = test_output_dir / "pgsql.restored"
-
-    assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
-
-    initdb_zst_path = (
-        env.pageserver_remote_storage.timeline_path(tenant_id, timeline_id) / "initdb.tar.zst"
-    )
-
-    decompress_zstd(initdb_zst_path, data_dir)
-    with VanillaPostgres(
-        data_dir, PgBin(test_output_dir, env.pg_distrib_dir, env.pg_version), port, init=False
-    ) as restored:
-        pg_bin.run_capture(
-            [
-                str(base_dir / "libs" / "utils" / "scripts" / "restore_from_wal_initdb.sh"),
-                str(pg_distrib_dir / f"v{env.pg_version}/bin"),
-                str(
-                    test_output_dir
-                    / "repo"
-                    / "safekeepers"
-                    / "sk1"
-                    / str(tenant_id)
-                    / str(timeline_id)
-                ),
-                str(data_dir),
-                str(port),
-            ]
-        )
-        restored.start()
-        restored_lsn = Lsn(
-            restored.safe_psql("SELECT pg_current_wal_flush_lsn()", user="cloud_admin")[0][0]
-        )
-        log.info(f"original lsn: {original_lsn}, restored lsn: {restored_lsn}")
-        assert restored.safe_psql("select count(*) from t", user="cloud_admin") == [(300000,)]
--- a/vm-image-spec.yaml
+++ b/vm-image-spec.yaml
@@ -4,15 +4,15 @@ commands:
  - name: cgconfigparser
    user: root
    sysvInitAction: sysinit
-    shell: 'cgconfigparser -l /etc/cgconfig.conf -s 1664'
+    shell: "cgconfigparser -l /etc/cgconfig.conf -s 1664"
  - name: pgbouncer
    user: nobody
    sysvInitAction: respawn
-    shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini'
+    shell: "/usr/local/bin/pgbouncer /etc/pgbouncer.ini"
  - name: postgres-exporter
    user: nobody
    sysvInitAction: respawn
-    shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres" /bin/postgres_exporter'
+    shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres" /bin/postgres_exporter --extend.query-path /etc/postgres_exporter_queries.yml'
 shutdownHook: |
  su -p postgres --session-command '/usr/local/bin/pg_ctl stop -D /var/db/postgres/compute/pgdata -m fast --wait -t 10'
 files:
@@ -46,6 +46,30 @@ files:
          }
          memory {}
      }
+  - filename: postgres_exporter_queries.yml
+    content: |
+      pg_wait_sampling:
+        query: "select pid, event_type, event, w.queryid as queryid, query, count from pg_wait_sampling_profile w left join pg_stat_statements s on w.queryid = s.queryid;"
+        cache_seconds: 30
+        metrics:
+          - pid:
+              usage: "LABEL"
+              description: "backend pid"
+          - event_type:
+              usage: "LABEL"
+              description: "event type"
+          - event:
+              usage: "LABEL"
+              description: "event"
+          - queryid:
+              usage: "LABEL"
+              description: "queryid"
+          - query:
+              usage: "LABEL"
+              description: "query"
+          - count:
+              usage: "GAUGE"
+              description: "count"
 build: |
  # Build cgroup-tools
  #
@@ -114,10 +138,12 @@ merge: |

  COPY cgconfig.conf /etc/cgconfig.conf
  COPY pgbouncer.ini /etc/pgbouncer.ini
+  COPY postgres_exporter_queries.yml /etc/postgres_exporter_queries.yml
  RUN set -e \
      && chown postgres:postgres /etc/pgbouncer.ini \
      && chmod 0644 /etc/pgbouncer.ini \
-      && chmod 0644 /etc/cgconfig.conf
+      && chmod 0644 /etc/cgconfig.conf \
+      && chmod 0644 /etc/postgres_exporter_queries.yml

  COPY --from=libcgroup-builder /libcgroup-install/bin/*  /usr/bin/
  COPY --from=libcgroup-builder /libcgroup-install/lib/*  /usr/lib/
--- a/workspace_hack/Cargo.toml
+++ b/workspace_hack/Cargo.toml
@@ -68,9 +68,6 @@ tracing-core = { version = "0.1" }
 tungstenite = { version = "0.20" }
 url = { version = "2", features = ["serde"] }
 uuid = { version = "1", features = ["serde", "v4"] }
-zstd = { version = "0.12" }
-zstd-safe = { version = "6", default-features = false, features = ["arrays", "legacy", "std", "zdict_builder"] }
-zstd-sys = { version = "2", default-features = false, features = ["legacy", "std", "zdict_builder"] }

 [build-dependencies]
 anyhow = { version = "1", features = ["backtrace"] }
Author	SHA1	Message	Date
Arseny Sher	32d4e4914a	Add wait events without query to metric.	2023-11-16 23:56:04 +01:00
Arseny Sher	d4d577e7ff	Add query to pg_wait_sampling metric	2023-11-16 22:42:08 +01:00
Arseny Sher	f552aa05fa	Add pg_wait_sampling metric for vms.	2023-11-16 22:04:29 +01:00
Arthur Petukhovsky	779badb7c5	Join postgres multiline logs	2023-11-16 20:54:02 +00:00
Arseny Sher	e6eb548491	create extension pg_wait_sampling in compute_ctl	2023-11-16 20:54:02 +00:00
Arseny Sher	16e9eb2832	Try to enable a custom postgres_exporter query.	2023-11-16 20:54:02 +00:00
Arseny Sher	042686183b	Add pg_wait_sampling extension.	2023-11-16 20:54:02 +00:00