postgres_ffi: bump WAL_SEGMENT_SIZE to 128 MB

2026-04-23 01:10:38 +00:00 · 2024-11-12 18:11:03 +01:00
35 changed files with 214 additions and 1177 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1229,15 +1229,12 @@ dependencies = [
 "flate2",
 "futures",
 "hyper 0.14.30",
- "metrics",
 "nix 0.27.1",
 "notify",
 "num_cpus",
- "once_cell",
 "opentelemetry",
 "opentelemetry_sdk",
 "postgres",
- "prometheus",
 "regex",
 "remote_storage",
 "reqwest 0.12.4",
--- a/2
+++ b/2
@@ -55,7 +55,6 @@ RUN set -e \
      --bin proxy  \
      --bin neon_local \
      --bin storage_scrubber \
-      --bin stream_events \
      --locked --release

 # Build final image
@@ -83,7 +82,6 @@ COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_control
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy               /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/neon_local          /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_scrubber    /usr/local/bin
-COPY --from=build --chown=neon:neon /home/nonroot/target/release/stream_events       /usr/local/bin

 COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/
 COPY --from=pg-build /home/nonroot/pg_install/v15 /usr/local/v15/
--- a/compute/etc/sql_exporter/compute_backpressure_throttling_seconds.sql
+++ b/compute/etc/sql_exporter/compute_backpressure_throttling_seconds.sql
@@ -1 +1 @@
-SELECT (neon.backpressure_throttling_time()::float8 / 1000000) AS throttled;
+SELECT neon.backpressure_throttling_time()::float8 / 1000 AS throttled;
--- a/compute/patches/pg_anon.patch
+++ b/compute/patches/pg_anon.patch
@@ -1,45 +1,3 @@
-commit 00aa659afc9c7336ab81036edec3017168aabf40
-Author: Heikki Linnakangas <heikki@neon.tech>
-Date:   Tue Nov 12 16:59:19 2024 +0200
-
-    Temporarily disable test that depends on timezone
-
-diff --git a/tests/expected/generalization.out b/tests/expected/generalization.out
-index 23ef5fa..9e60deb 100644
--- a/ext-src/pg_anon-src/tests/expected/generalization.out
-+++ b/ext-src/pg_anon-src/tests/expected/generalization.out
-@@ -284,12 +284,9 @@ SELECT anon.generalize_tstzrange('19041107','century');
-  ["Tue Jan 01 00:00:00 1901 PST","Mon Jan 01 00:00:00 2001 PST")
- (1 row)
- 
-SELECT anon.generalize_tstzrange('19041107','millennium');
-                      generalize_tstzrange                       
------------------------------------------------------------------
- ["Thu Jan 01 00:00:00 1001 PST","Mon Jan 01 00:00:00 2001 PST")
-(1 row)
-
-+-- temporarily disabled, see:
-+-- https://gitlab.com/dalibo/postgresql_anonymizer/-/commit/199f0a392b37c59d92ae441fb8f037e094a11a52#note_2148017485
-+--SELECT anon.generalize_tstzrange('19041107','millennium');
- -- generalize_daterange
- SELECT anon.generalize_daterange('19041107');
-   generalize_daterange   
-diff --git a/tests/sql/generalization.sql b/tests/sql/generalization.sql
-index b868344..b4fc977 100644
--- a/ext-src/pg_anon-src/tests/sql/generalization.sql
-+++ b/ext-src/pg_anon-src/tests/sql/generalization.sql
-@@ -61,7 +61,9 @@ SELECT anon.generalize_tstzrange('19041107','month');
- SELECT anon.generalize_tstzrange('19041107','year');
- SELECT anon.generalize_tstzrange('19041107','decade');
- SELECT anon.generalize_tstzrange('19041107','century');
-SELECT anon.generalize_tstzrange('19041107','millennium');
-+-- temporarily disabled, see:
-+-- https://gitlab.com/dalibo/postgresql_anonymizer/-/commit/199f0a392b37c59d92ae441fb8f037e094a11a52#note_2148017485
-+--SELECT anon.generalize_tstzrange('19041107','millennium');
- 
- -- generalize_daterange
- SELECT anon.generalize_daterange('19041107');
-
 commit 7dd414ee75f2875cffb1d6ba474df1f135a6fc6f
 Author: Alexey Masterov <alexeymasterov@neon.tech>
 Date:   Fri May 31 06:34:26 2024 +0000
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -18,11 +18,9 @@ clap.workspace = true
 flate2.workspace = true
 futures.workspace = true
 hyper0 = { workspace = true, features = ["full"] }
-metrics.workspace = true
 nix.workspace = true
 notify.workspace = true
 num_cpus.workspace = true
-once_cell.workspace = true
 opentelemetry.workspace = true
 opentelemetry_sdk.workspace = true
 postgres.workspace = true
@@ -41,7 +39,6 @@ tracing-subscriber.workspace = true
 tracing-utils.workspace = true
 thiserror.workspace = true
 url.workspace = true
-prometheus.workspace = true

 compute_api.workspace = true
 utils.workspace = true
--- a/compute_tools/src/http/api.rs
+++ b/compute_tools/src/http/api.rs
@@ -9,7 +9,6 @@ use crate::catalog::SchemaDumpError;
 use crate::catalog::{get_database_schema, get_dbs_and_roles};
 use crate::compute::forward_termination_signal;
 use crate::compute::{ComputeNode, ComputeState, ParsedSpec};
-use crate::installed_extensions;
 use compute_api::requests::{ConfigurationRequest, ExtensionInstallRequest, SetRoleGrantsRequest};
 use compute_api::responses::{
    ComputeStatus, ComputeStatusResponse, ExtensionInstallResult, GenericAPIError,
@@ -20,8 +19,6 @@ use anyhow::Result;
 use hyper::header::CONTENT_TYPE;
 use hyper::service::{make_service_fn, service_fn};
 use hyper::{Body, Method, Request, Response, Server, StatusCode};
-use metrics::Encoder;
-use metrics::TextEncoder;
 use tokio::task;
 use tracing::{debug, error, info, warn};
 use tracing_utils::http::OtelName;
@@ -68,28 +65,6 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
            Response::new(Body::from(serde_json::to_string(&metrics).unwrap()))
        }

-        // Prometheus metrics
-        (&Method::GET, "/metrics") => {
-            debug!("serving /metrics GET request");
-
-            let mut buffer = vec![];
-            let metrics = installed_extensions::collect();
-            let encoder = TextEncoder::new();
-            encoder.encode(&metrics, &mut buffer).unwrap();
-
-            match Response::builder()
-                .status(StatusCode::OK)
-                .header(CONTENT_TYPE, encoder.format_type())
-                .body(Body::from(buffer))
-            {
-                Ok(response) => response,
-                Err(err) => {
-                    let msg = format!("error handling /metrics request: {err}");
-                    error!(msg);
-                    render_json_error(&msg, StatusCode::INTERNAL_SERVER_ERROR)
-                }
-            }
-        }
        // Collect Postgres current usage insights
        (&Method::GET, "/insights") => {
            info!("serving /insights GET request");
--- a/compute_tools/src/http/openapi_spec.yaml
+++ b/compute_tools/src/http/openapi_spec.yaml
@@ -37,21 +37,6 @@ paths:
              schema:
                $ref: "#/components/schemas/ComputeMetrics"

-  /metrics
-    get:
-      tags:
-      - Info
-      summary: Get compute node metrics in text format.
-      description: ""
-      operationId: getComputeMetrics
-      responses:
-        200:
-          description: ComputeMetrics
-          content:
-            text/plain:
-              schema:
-                type: string
-                description: Metrics in text format.
  /insights:
    get:
      tags:
--- a/compute_tools/src/installed_extensions.rs
+++ b/compute_tools/src/installed_extensions.rs
@@ -1,5 +1,4 @@
 use compute_api::responses::{InstalledExtension, InstalledExtensions};
-use metrics::proto::MetricFamily;
 use std::collections::HashMap;
 use std::collections::HashSet;
 use tracing::info;
@@ -9,10 +8,6 @@ use anyhow::Result;
 use postgres::{Client, NoTls};
 use tokio::task;

-use metrics::core::Collector;
-use metrics::{register_uint_gauge_vec, UIntGaugeVec};
-use once_cell::sync::Lazy;
-
 /// We don't reuse get_existing_dbs() just for code clarity
 /// and to make database listing query here more explicit.
 ///
@@ -64,12 +59,6 @@ pub async fn get_installed_extensions(connstr: Url) -> Result<InstalledExtension

            for (extname, v) in extensions.iter() {
                let version = v.to_string();
-
-                // increment the number of databases where the version of extension is installed
-                INSTALLED_EXTENSIONS
-                    .with_label_values(&[extname, &version])
-                    .inc();
-
                extensions_map
                    .entry(extname.to_string())
                    .and_modify(|e| {
@@ -85,11 +74,9 @@ pub async fn get_installed_extensions(connstr: Url) -> Result<InstalledExtension
            }
        }

-        let res = InstalledExtensions {
+        Ok(InstalledExtensions {
            extensions: extensions_map.values().cloned().collect(),
-        };
-
-        Ok(res)
+        })
    })
    .await?
 }
@@ -110,18 +97,6 @@ pub fn get_installed_extensions_sync(connstr: Url) -> Result<()> {
        "[NEON_EXT_STAT] {}",
        serde_json::to_string(&result).expect("failed to serialize extensions list")
    );
+
    Ok(())
 }
-
-static INSTALLED_EXTENSIONS: Lazy<UIntGaugeVec> = Lazy::new(|| {
-    register_uint_gauge_vec!(
-        "installed_extensions",
-        "Number of databases where the version of extension is installed",
-        &["extension_name", "version"]
-    )
-    .expect("failed to define a metric")
-});
-
-pub fn collect() -> Vec<MetricFamily> {
-    INSTALLED_EXTENSIONS.collect()
-}
--- a/libs/pageserver_api/src/record.rs
+++ b/libs/pageserver_api/src/record.rs
@@ -80,18 +80,18 @@ impl NeonWalRecord {
    }

    #[cfg(feature = "testing")]
-    pub fn wal_clear(s: impl AsRef<str>) -> Self {
+    pub fn wal_clear() -> Self {
        Self::Test {
-            append: s.as_ref().to_string(),
+            append: "".to_string(),
            clear: true,
            will_init: false,
        }
    }

    #[cfg(feature = "testing")]
-    pub fn wal_init(s: impl AsRef<str>) -> Self {
+    pub fn wal_init() -> Self {
        Self::Test {
-            append: s.as_ref().to_string(),
+            append: "".to_string(),
            clear: true,
            will_init: true,
        }
--- a/libs/postgres_ffi/src/lib.rs
+++ b/libs/postgres_ffi/src/lib.rs
@@ -241,7 +241,7 @@ pub use v14::bindings::{CheckPoint, ControlFileData};
 pub const BLCKSZ: u16 = 8192;
 pub const RELSEG_SIZE: u32 = 1024 * 1024 * 1024 / (BLCKSZ as u32);
 pub const XLOG_BLCKSZ: usize = 8192;
-pub const WAL_SEGMENT_SIZE: usize = 16 * 1024 * 1024;
+pub const WAL_SEGMENT_SIZE: usize = 128 * 1024 * 1024;

 pub const MAX_SEND_SIZE: usize = XLOG_BLCKSZ * 16;

--- a/libs/postgres_ffi/wal_craft/src/xlog_utils_test.rs
+++ b/libs/postgres_ffi/wal_craft/src/xlog_utils_test.rs
@@ -151,7 +151,7 @@ fn check_end_of_wal(
    .unwrap();
 }

-const_assert!(WAL_SEGMENT_SIZE == 16 * 1024 * 1024);
+const_assert!(WAL_SEGMENT_SIZE == 128 * 1024 * 1024);

 #[test]
 pub fn test_find_end_of_wal_simple() {
--- a/libs/remote_storage/src/error.rs
+++ b/libs/remote_storage/src/error.rs
@@ -15,9 +15,6 @@ pub enum DownloadError {
    ///
    /// Concurrency control is not timed within timeout.
    Timeout,
-    /// Some integrity/consistency check failed during download. This is used during
-    /// timeline loads to cancel the load of a tenant if some timeline detects fatal corruption.
-    Fatal(String),
    /// The file was found in the remote storage, but the download failed.
    Other(anyhow::Error),
 }
@@ -32,7 +29,6 @@ impl std::fmt::Display for DownloadError {
            DownloadError::Unmodified => write!(f, "File was not modified"),
            DownloadError::Cancelled => write!(f, "Cancelled, shutting down"),
            DownloadError::Timeout => write!(f, "timeout"),
-            DownloadError::Fatal(why) => write!(f, "Fatal read error: {why}"),
            DownloadError::Other(e) => write!(f, "Failed to download a remote file: {e:?}"),
        }
    }
@@ -45,7 +41,7 @@ impl DownloadError {
    pub fn is_permanent(&self) -> bool {
        use DownloadError::*;
        match self {
-            BadInput(_) | NotFound | Unmodified | Fatal(_) | Cancelled => true,
+            BadInput(_) | NotFound | Unmodified | Cancelled => true,
            Timeout | Other(_) => false,
        }
    }
--- a/libs/utils/src/crashsafe.rs
+++ b/libs/utils/src/crashsafe.rs
@@ -123,27 +123,15 @@ pub async fn fsync_async_opt(
    Ok(())
 }

-/// Like postgres' durable_rename, renames a file and issues fsyncs to make it durable. After
-/// returning, both the file and rename are guaranteed to be persisted. Both paths must be on the
-/// same file system.
+/// Like postgres' durable_rename, renames file issuing fsyncs do make it
+/// durable. After return, file and rename are guaranteed to be persisted.
 ///
-/// Unlike postgres, it only fsyncs 1) the file to make contents durable, and 2) the directory to
-/// make the rename durable. This sequence ensures the target file will never be incomplete.
-///
-/// Postgres also:
-///
-/// * Fsyncs the target file, if it exists, before the rename, to ensure either the new or existing
-///   file survives a crash. Current callers don't need this as it should already be fsynced if
-///   durability is needed.
-///
-/// * Fsyncs the file after the rename. This can be required with certain OSes or file systems (e.g.
-///   NFS), but not on Linux with most common file systems like ext4 (which we currently use).
-///
-/// An audit of 8 other databases found that none fsynced the file after a rename:
-/// <https://github.com/neondatabase/neon/pull/9686#discussion_r1837180535>
-///
-/// eBPF probes confirmed that this is sufficient with ext4, XFS, and ZFS, but possibly not Btrfs:
-/// <https://github.com/neondatabase/neon/pull/9686#discussion_r1837926218>
+/// Unlike postgres, it only does fsyncs to 1) file to be renamed to make
+/// contents durable; 2) its directory entry to make rename durable 3) again to
+/// already renamed file, which is not required by standards but postgres does
+/// it, let's stick to that. Postgres additionally fsyncs newpath *before*
+/// rename if it exists to ensure that at least one of the files survives, but
+/// current callers don't need that.
 ///
 /// virtual_file.rs has similar code, but it doesn't use vfs.
 ///
@@ -161,6 +149,9 @@ pub async fn durable_rename(
    // Time to do the real deal.
    tokio::fs::rename(old_path.as_ref(), new_path.as_ref()).await?;

+    // Postgres'ish fsync of renamed file.
+    fsync_async_opt(new_path.as_ref(), do_fsync).await?;
+
    // Now fsync the parent
    let parent = match new_path.as_ref().parent() {
        Some(p) => p,
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -1433,12 +1433,6 @@ impl Tenant {
                    info!(%timeline_id, "index_part not found on remote");
                    continue;
                }
-                Err(DownloadError::Fatal(why)) => {
-                    // If, while loading one remote timeline, we saw an indication that our generation
-                    // number is likely invalid, then we should not load the whole tenant.
-                    error!(%timeline_id, "Fatal error loading timeline: {why}");
-                    anyhow::bail!(why.to_string());
-                }
                Err(e) => {
                    // Some (possibly ephemeral) error happened during index_part download.
                    // Pretend the timeline exists to not delete the timeline directory,
@@ -7763,13 +7757,13 @@ mod tests {
            (
                get_key(3),
                Lsn(0x20),
-                Value::WalRecord(NeonWalRecord::wal_clear("c")),
+                Value::WalRecord(NeonWalRecord::wal_clear()),
            ),
            (get_key(4), Lsn(0x10), Value::Image("0x10".into())),
            (
                get_key(4),
                Lsn(0x20),
-                Value::WalRecord(NeonWalRecord::wal_init("i")),
+                Value::WalRecord(NeonWalRecord::wal_init()),
            ),
        ];
        let image1 = vec![(get_key(1), "0x10".into())];
@@ -7918,30 +7912,8 @@ mod tests {

    #[cfg(feature = "testing")]
    #[tokio::test]
-    async fn test_simple_bottom_most_compaction_deltas_1() -> anyhow::Result<()> {
-        test_simple_bottom_most_compaction_deltas_helper(
-            "test_simple_bottom_most_compaction_deltas_1",
-            false,
-        )
-        .await
-    }
-
-    #[cfg(feature = "testing")]
-    #[tokio::test]
-    async fn test_simple_bottom_most_compaction_deltas_2() -> anyhow::Result<()> {
-        test_simple_bottom_most_compaction_deltas_helper(
-            "test_simple_bottom_most_compaction_deltas_2",
-            true,
-        )
-        .await
-    }
-
-    #[cfg(feature = "testing")]
-    async fn test_simple_bottom_most_compaction_deltas_helper(
-        test_name: &'static str,
-        use_delta_bottom_layer: bool,
-    ) -> anyhow::Result<()> {
-        let harness = TenantHarness::create(test_name).await?;
+    async fn test_simple_bottom_most_compaction_deltas() -> anyhow::Result<()> {
+        let harness = TenantHarness::create("test_simple_bottom_most_compaction_deltas").await?;
        let (tenant, ctx) = harness.load().await;

        fn get_key(id: u32) -> Key {
@@ -7972,16 +7944,6 @@ mod tests {
        let img_layer = (0..10)
            .map(|id| (get_key(id), Bytes::from(format!("value {id}@0x10"))))
            .collect_vec();
-        // or, delta layer at 0x10 if `use_delta_bottom_layer` is true
-        let delta4 = (0..10)
-            .map(|id| {
-                (
-                    get_key(id),
-                    Lsn(0x08),
-                    Value::WalRecord(NeonWalRecord::wal_init(format!("value {id}@0x10"))),
-                )
-            })
-            .collect_vec();

        let delta1 = vec![
            (
@@ -8035,61 +7997,21 @@ mod tests {
            ),
        ];

-        let tline = if use_delta_bottom_layer {
-            tenant
-                .create_test_timeline_with_layers(
-                    TIMELINE_ID,
-                    Lsn(0x08),
-                    DEFAULT_PG_VERSION,
-                    &ctx,
-                    vec![
-                        DeltaLayerTestDesc::new_with_inferred_key_range(
-                            Lsn(0x08)..Lsn(0x10),
-                            delta4,
-                        ),
-                        DeltaLayerTestDesc::new_with_inferred_key_range(
-                            Lsn(0x20)..Lsn(0x48),
-                            delta1,
-                        ),
-                        DeltaLayerTestDesc::new_with_inferred_key_range(
-                            Lsn(0x20)..Lsn(0x48),
-                            delta2,
-                        ),
-                        DeltaLayerTestDesc::new_with_inferred_key_range(
-                            Lsn(0x48)..Lsn(0x50),
-                            delta3,
-                        ),
-                    ], // delta layers
-                    vec![], // image layers
-                    Lsn(0x50),
-                )
-                .await?
-        } else {
-            tenant
-                .create_test_timeline_with_layers(
-                    TIMELINE_ID,
-                    Lsn(0x10),
-                    DEFAULT_PG_VERSION,
-                    &ctx,
-                    vec![
-                        DeltaLayerTestDesc::new_with_inferred_key_range(
-                            Lsn(0x10)..Lsn(0x48),
-                            delta1,
-                        ),
-                        DeltaLayerTestDesc::new_with_inferred_key_range(
-                            Lsn(0x10)..Lsn(0x48),
-                            delta2,
-                        ),
-                        DeltaLayerTestDesc::new_with_inferred_key_range(
-                            Lsn(0x48)..Lsn(0x50),
-                            delta3,
-                        ),
-                    ], // delta layers
-                    vec![(Lsn(0x10), img_layer)], // image layers
-                    Lsn(0x50),
-                )
-                .await?
-        };
+        let tline = tenant
+            .create_test_timeline_with_layers(
+                TIMELINE_ID,
+                Lsn(0x10),
+                DEFAULT_PG_VERSION,
+                &ctx,
+                vec![
+                    DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x10)..Lsn(0x48), delta1),
+                    DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x10)..Lsn(0x48), delta2),
+                    DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x48)..Lsn(0x50), delta3),
+                ], // delta layers
+                vec![(Lsn(0x10), img_layer)], // image layers
+                Lsn(0x50),
+            )
+            .await?;
        {
            // Update GC info
            let mut guard = tline.gc_info.write().unwrap();
@@ -8199,7 +8121,7 @@ mod tests {
            (
                key,
                Lsn(0x10),
-                Value::WalRecord(NeonWalRecord::wal_init("0x10")),
+                Value::Image(Bytes::copy_from_slice(b"0x10")),
            ),
            (
                key,
@@ -8261,7 +8183,7 @@ mod tests {
                    Lsn(0x20),
                    KeyLogAtLsn(vec![(
                        Lsn(0x20),
-                        Value::Image(Bytes::from_static(b"0x10;0x20")),
+                        Value::Image(Bytes::copy_from_slice(b"0x10;0x20")),
                    )]),
                ),
                (
@@ -9243,7 +9165,7 @@ mod tests {

            let will_init = will_init_keys.contains(&i);
            if will_init {
-                delta_layer_spec.push((key, lsn, Value::WalRecord(NeonWalRecord::wal_init(""))));
+                delta_layer_spec.push((key, lsn, Value::WalRecord(NeonWalRecord::wal_init())));

                expected_key_values.insert(key, "".to_string());
            } else {
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -574,18 +574,12 @@ impl RemoteTimelineClient {

            if latest_index_generation > index_generation {
                // Unexpected!  Why are we loading such an old index if a more recent one exists?
-                // We will refuse to proceed, as there is no reasonable scenario where this should happen, but
-                // there _is_ a clear bug/corruption scenario where it would happen (controller sets the generation
-                // backwards).
-                tracing::error!(
+                tracing::warn!(
                    ?index_generation,
                    ?latest_index_generation,
                    ?latest_index_mtime,
                    "Found a newer index while loading an old one"
                );
-                return Err(DownloadError::Fatal(
-                    "Index age exceeds threshold and a newer index exists".into(),
-                ));
            }
        }

--- a/pageserver/src/tenant/storage_layer/merge_iterator.rs
+++ b/pageserver/src/tenant/storage_layer/merge_iterator.rs
@@ -562,7 +562,7 @@ mod tests {
            (
                get_key(0),
                Lsn(0x10),
-                Value::WalRecord(NeonWalRecord::wal_init("")),
+                Value::WalRecord(NeonWalRecord::wal_init()),
            ),
            (
                get_key(0),
@@ -572,7 +572,7 @@ mod tests {
            (
                get_key(5),
                Lsn(0x10),
-                Value::WalRecord(NeonWalRecord::wal_init("")),
+                Value::WalRecord(NeonWalRecord::wal_init()),
            ),
            (
                get_key(5),
--- a/pageserver/src/walredo/apply_neon.rs
+++ b/pageserver/src/walredo/apply_neon.rs
@@ -253,10 +253,6 @@ pub(crate) fn apply_in_neon(
            use bytes::BufMut;
            if *will_init {
                assert!(*clear, "init record must be clear to ensure correctness");
-                assert!(
-                    page.is_empty(),
-                    "init record must be the first entry to ensure correctness"
-                );
            }
            if *clear {
                page.clear();
--- a/pgxn/neon/logical_replication_monitor.c
+++ b/pgxn/neon/logical_replication_monitor.c
@@ -1,8 +1,7 @@
-#include <dirent.h>
 #include <limits.h>
 #include <string.h>
+#include <dirent.h>
 #include <signal.h>
-#include <sys/stat.h>

 #include "postgres.h"

@@ -22,35 +21,17 @@

 static int	logical_replication_max_snap_files = 300;

-/*
- * According to Chi (shyzh), the pageserver _should_ be good with 10 MB worth of
- * snapshot files. Let's use 8 MB since 8 is a power of 2.
- */
-static int	logical_replication_max_logicalsnapdir_size = 8000;
-
-/*
- * A primitive description of a logical snapshot file including the LSN of the
- * file and its size.
- */
-typedef struct SnapDesc {
-	XLogRecPtr	lsn;
-	off_t		sz;
-} SnapDesc;
-
 PGDLLEXPORT void LogicalSlotsMonitorMain(Datum main_arg);

-/*
- * Sorts an array of snapshot descriptors by their LSN.
- */
 static int
-SnapDescComparator(const void *a, const void *b)
+LsnDescComparator(const void *a, const void *b)
 {
-	const SnapDesc	*desc1 = a;
-	const SnapDesc	*desc2 = b;
+	XLogRecPtr	lsn1 = *((const XLogRecPtr *) a);
+	XLogRecPtr	lsn2 = *((const XLogRecPtr *) b);

-	if (desc1->lsn < desc2->lsn)
+	if (lsn1 < lsn2)
 		return 1;
-	else if (desc1->lsn == desc2->lsn)
+	else if (lsn1 == lsn2)
 		return 0;
 	else
 		return -1;
@@ -62,39 +43,28 @@ SnapDescComparator(const void *a, const void *b)
 * slots having lower restart_lsn should be dropped.
 */
 static XLogRecPtr
-get_snapshots_cutoff_lsn(void)
+get_num_snap_files_lsn_threshold(void)
 {
-/* PG 18 has a constant defined for this, PG_LOGICAL_SNAPSHOTS_DIR */
-#define SNAPDIR "pg_logical/snapshots"
-
 	DIR		   *dirdesc;
-	int			dirdesc_fd;
 	struct dirent *de;
-	size_t		snapshot_index = 0;
-	SnapDesc   *snapshot_descriptors;
-	size_t		descriptors_allocated = 1024;
-	XLogRecPtr	cutoff = 0;
-	off_t		logicalsnapdir_size = 0;
-	const int	logical_replication_max_logicalsnapdir_size_bytes = logical_replication_max_logicalsnapdir_size * 1000;
+	char	   *snap_path = "pg_logical/snapshots/";
+	int			lsns_allocated = 1024;
+	int			lsns_num = 0;
+	XLogRecPtr *lsns;
+	XLogRecPtr	cutoff;

-	if (logical_replication_max_snap_files < 0 && logical_replication_max_logicalsnapdir_size < 0)
+	if (logical_replication_max_snap_files < 0)
 		return 0;

-	snapshot_descriptors = palloc(sizeof(*snapshot_descriptors) * descriptors_allocated);
-
-	dirdesc = AllocateDir(SNAPDIR);
-	dirdesc_fd = dirfd(dirdesc);
-	if (dirdesc_fd == -1)
-		ereport(ERROR, errmsg("failed to get a file descriptor for " SNAPDIR ": %m"));
+	lsns = palloc(sizeof(XLogRecPtr) * lsns_allocated);

 	/* find all .snap files and get their lsns */
-	while ((de = ReadDir(dirdesc, SNAPDIR)) != NULL)
+	dirdesc = AllocateDir(snap_path);
+	while ((de = ReadDir(dirdesc, snap_path)) != NULL)
 	{
+		XLogRecPtr	lsn;
 		uint32		hi;
 		uint32		lo;
-		struct stat	st;
-		XLogRecPtr	lsn;
-		SnapDesc   *desc;

 		if (strcmp(de->d_name, ".") == 0 ||
 			strcmp(de->d_name, "..") == 0)
@@ -109,69 +79,28 @@ get_snapshots_cutoff_lsn(void)

 		lsn = ((uint64) hi) << 32 | lo;
 		elog(DEBUG5, "found snap file %X/%X", LSN_FORMAT_ARGS(lsn));
-
-		if (fstatat(dirdesc_fd, de->d_name, &st, 0) == -1)
-			ereport(ERROR, errmsg("failed to get the size of " SNAPDIR "/%s: %m", de->d_name));
-
-		if (descriptors_allocated == snapshot_index)
+		if (lsns_allocated == lsns_num)
 		{
-			descriptors_allocated *= 2;
-			snapshot_descriptors = repalloc(snapshot_descriptors, sizeof(*snapshot_descriptors) * descriptors_allocated);
+			lsns_allocated *= 2;
+			lsns = repalloc(lsns, sizeof(XLogRecPtr) * lsns_allocated);
 		}
-
-		desc = &snapshot_descriptors[snapshot_index++];
-		desc->lsn = lsn;
-		desc->sz = st.st_size;
+		lsns[lsns_num++] = lsn;
 	}
-
-	qsort(snapshot_descriptors, snapshot_index, sizeof(*snapshot_descriptors), SnapDescComparator);
-
-	/* Are there more snapshot files than specified? */
-	if (logical_replication_max_snap_files <= snapshot_index)
+	/* sort by lsn desc */
+	qsort(lsns, lsns_num, sizeof(XLogRecPtr), LsnDescComparator);
+	/* and take cutoff at logical_replication_max_snap_files */
+	if (logical_replication_max_snap_files > lsns_num)
+		cutoff = 0;
+	/* have less files than cutoff */
+	else
 	{
-		cutoff = snapshot_descriptors[logical_replication_max_snap_files - 1].lsn;
-		elog(LOG,
-			"ls_monitor: dropping logical slots with restart_lsn lower %X/%X, found %zu snapshot files, limit is %d",
-			LSN_FORMAT_ARGS(cutoff), snapshot_index, logical_replication_max_snap_files);
+		cutoff = lsns[logical_replication_max_snap_files - 1];
+		elog(LOG, "ls_monitor: dropping logical slots with restart_lsn lower %X/%X, found %d .snap files, limit is %d",
+			 LSN_FORMAT_ARGS(cutoff), lsns_num, logical_replication_max_snap_files);
 	}
-
-	/* Is the size of the logical snapshots directory larger than specified?
-	 *
-	 * It's possible we could hit both thresholds, so remove any extra files
-	 * first, and then truncate based on size of the remaining files.
-	 */
-	if (logicalsnapdir_size > logical_replication_max_logicalsnapdir_size_bytes)
-	{
-		/* Unfortunately, iterating the directory does not guarantee any order
-		 * so we can't cache an index in the preceding loop.
-		 */
-
-		off_t		sz;
-		const XLogRecPtr original = cutoff;
-
-		sz = snapshot_descriptors[0].sz;
-		for (size_t i = 1; i < logical_replication_max_snap_files; ++i)
-		{
-			if (sz > logical_replication_max_logicalsnapdir_size_bytes)
-			{
-				cutoff = snapshot_descriptors[i - 1].lsn;
-				break;
-			}
-
-			sz += snapshot_descriptors[i].sz;
-		}
-
-		if (cutoff != original)
-			elog(LOG, "ls_monitor: dropping logical slots with restart_lsn lower than %X/%X, " SNAPDIR " is larger than %d KB",
-					LSN_FORMAT_ARGS(cutoff), logical_replication_max_logicalsnapdir_size);
-	}
-
-	pfree(snapshot_descriptors);
+	pfree(lsns);
 	FreeDir(dirdesc);
-
 	return cutoff;
-
-#undef SNAPDIR
 }

 void
@@ -189,16 +118,6 @@ InitLogicalReplicationMonitor(void)
 							0,
 							NULL, NULL, NULL);

-	DefineCustomIntVariable(
-							"neon.logical_replication_max_logicalsnapdir_size",
-							"Maximum allowed size of the pg_logical/snapshots directory (KB). When exceeded, slots are dropped until the limit is met. -1 disables the limit.",
-							NULL,
-							&logical_replication_max_logicalsnapdir_size,
-							8000, -1, INT_MAX,
-							PGC_SIGHUP,
-							GUC_UNIT_KB,
-							NULL, NULL, NULL);
-
 	memset(&bgw, 0, sizeof(bgw));
 	bgw.bgw_flags = BGWORKER_SHMEM_ACCESS;
 	bgw.bgw_start_time = BgWorkerStart_RecoveryFinished;
@@ -243,7 +162,7 @@ LogicalSlotsMonitorMain(Datum main_arg)
 		 * If there are too many .snap files, just drop all logical slots to
 		 * prevent aux files bloat.
 		 */
-		cutoff_lsn = get_snapshots_cutoff_lsn();
+		cutoff_lsn = get_num_snap_files_lsn_threshold();
 		if (cutoff_lsn > 0)
 		{
 			for (int i = 0; i < max_replication_slots; i++)
--- a/proxy/src/bin/stream_events.rs
+++ b/proxy/src/bin/stream_events.rs
@@ -1,450 +0,0 @@
-use std::sync::Arc;
-
-use anyhow::bail;
-use aws_config::environment::EnvironmentVariableCredentialsProvider;
-use aws_config::imds::credentials::ImdsCredentialsProvider;
-use aws_config::meta::credentials::CredentialsProviderChain;
-use aws_config::meta::region::RegionProviderChain;
-use aws_config::profile::ProfileFileCredentialsProvider;
-use aws_config::provider_config::ProviderConfig;
-use aws_config::web_identity_token::WebIdentityTokenCredentialsProvider;
-use aws_config::Region;
-use clap::{Parser, ValueEnum};
-use proxy::config::{self, remote_storage_from_toml, ProxyProtocolV2};
-use proxy::context::parquet::ParquetUploadArgs;
-use proxy::rate_limiter::RateBucketInfo;
-use proxy::redis::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
-use proxy::redis::elasticache;
-use redis::streams::{StreamReadOptions, StreamReadReply};
-use redis::{AsyncCommands, FromRedisValue, Value};
-use remote_storage::RemoteStorageConfig;
-use serde::{Deserialize, Serialize};
-use tracing::warn;
-
-#[global_allocator]
-static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
-
-#[derive(Clone, Debug, ValueEnum)]
-enum AuthBackendType {
-    #[value(name("console"), alias("cplane"))]
-    ControlPlane,
-
-    #[value(name("link"), alias("control-redirect"))]
-    ConsoleRedirect,
-
-    #[cfg(feature = "testing")]
-    Postgres,
-}
-
-/// Neon proxy/router
-#[derive(Parser)]
-struct ProxyCliArgs {
-    /// Name of the region this proxy is deployed in
-    #[clap(long, default_value_t = String::new())]
-    region: String,
-    /// listen for incoming client connections on ip:port
-    #[clap(short, long, default_value = "127.0.0.1:4432")]
-    proxy: String,
-    #[clap(value_enum, long, default_value_t = AuthBackendType::ConsoleRedirect)]
-    auth_backend: AuthBackendType,
-    /// listen for management callback connection on ip:port
-    #[clap(short, long, default_value = "127.0.0.1:7000")]
-    mgmt: String,
-    /// listen for incoming http connections (metrics, etc) on ip:port
-    #[clap(long, default_value = "127.0.0.1:7001")]
-    http: String,
-    /// listen for incoming wss connections on ip:port
-    #[clap(long)]
-    wss: Option<String>,
-    /// redirect unauthenticated users to the given uri in case of console redirect auth
-    #[clap(short, long, default_value = "http://localhost:3000/psql_session/")]
-    uri: String,
-    /// cloud API endpoint for authenticating users
-    #[clap(
-        short,
-        long,
-        default_value = "http://localhost:3000/authenticate_proxy_request/"
-    )]
-    auth_endpoint: String,
-    /// JWT used to connect to control plane.
-    #[clap(
-        long,
-        value_name = "JWT",
-        default_value = "",
-        env = "NEON_PROXY_TO_CONTROLPLANE_TOKEN"
-    )]
-    control_plane_token: Arc<str>,
-    /// if this is not local proxy, this toggles whether we accept jwt or passwords for http
-    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
-    is_auth_broker: bool,
-    /// path to TLS key for client postgres connections
-    ///
-    /// tls-key and tls-cert are for backwards compatibility, we can put all certs in one dir
-    #[clap(short = 'k', long, alias = "ssl-key")]
-    tls_key: Option<String>,
-    /// path to TLS cert for client postgres connections
-    ///
-    /// tls-key and tls-cert are for backwards compatibility, we can put all certs in one dir
-    #[clap(short = 'c', long, alias = "ssl-cert")]
-    tls_cert: Option<String>,
-    /// path to directory with TLS certificates for client postgres connections
-    #[clap(long)]
-    certs_dir: Option<String>,
-    /// timeout for the TLS handshake
-    #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
-    handshake_timeout: tokio::time::Duration,
-    /// http endpoint to receive periodic metric updates
-    #[clap(long)]
-    metric_collection_endpoint: Option<String>,
-    /// how often metrics should be sent to a collection endpoint
-    #[clap(long)]
-    metric_collection_interval: Option<String>,
-    /// cache for `wake_compute` api method (use `size=0` to disable)
-    #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
-    wake_compute_cache: String,
-    /// lock for `wake_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable).
-    #[clap(long, default_value = config::ConcurrencyLockOptions::DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK)]
-    wake_compute_lock: String,
-    /// lock for `connect_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable).
-    #[clap(long, default_value = config::ConcurrencyLockOptions::DEFAULT_OPTIONS_CONNECT_COMPUTE_LOCK)]
-    connect_compute_lock: String,
-    /// Allow self-signed certificates for compute nodes (for testing)
-    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
-    allow_self_signed_compute: bool,
-    #[clap(flatten)]
-    sql_over_http: SqlOverHttpArgs,
-    /// timeout for scram authentication protocol
-    #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
-    scram_protocol_timeout: tokio::time::Duration,
-    /// size of the threadpool for password hashing
-    #[clap(long, default_value_t = 4)]
-    scram_thread_pool_size: u8,
-    /// Endpoint rate limiter max number of requests per second.
-    ///
-    /// Provided in the form `<Requests Per Second>@<Bucket Duration Size>`.
-    /// Can be given multiple times for different bucket sizes.
-    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_ENDPOINT_SET)]
-    endpoint_rps_limit: Vec<RateBucketInfo>,
-    /// Wake compute rate limiter max number of requests per second.
-    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_SET)]
-    wake_compute_limit: Vec<RateBucketInfo>,
-    /// Whether the auth rate limiter actually takes effect (for testing)
-    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
-    auth_rate_limit_enabled: bool,
-    /// Authentication rate limiter max number of hashes per second.
-    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_AUTH_SET)]
-    auth_rate_limit: Vec<RateBucketInfo>,
-    /// The IP subnet to use when considering whether two IP addresses are considered the same.
-    #[clap(long, default_value_t = 64)]
-    auth_rate_limit_ip_subnet: u8,
-    /// Redis rate limiter max number of requests per second.
-    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_SET)]
-    redis_rps_limit: Vec<RateBucketInfo>,
-    /// cache for `allowed_ips` (use `size=0` to disable)
-    #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
-    allowed_ips_cache: String,
-    /// cache for `role_secret` (use `size=0` to disable)
-    #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
-    role_secret_cache: String,
-    /// redis url for notifications (if empty, redis_host:port will be used for both notifications and streaming connections)
-    #[clap(long)]
-    redis_notifications: Option<String>,
-    /// what from the available authentications type to use for the regional redis we have. Supported are "irsa" and "plain".
-    #[clap(long, default_value = "irsa")]
-    redis_auth_type: String,
-    /// redis host for streaming connections (might be different from the notifications host)
-    #[clap(long)]
-    redis_host: Option<String>,
-    /// redis port for streaming connections (might be different from the notifications host)
-    #[clap(long)]
-    redis_port: Option<u16>,
-    /// redis cluster name, used in aws elasticache
-    #[clap(long)]
-    redis_cluster_name: Option<String>,
-    /// redis user_id, used in aws elasticache
-    #[clap(long)]
-    redis_user_id: Option<String>,
-    /// aws region to retrieve credentials
-    #[clap(long, default_value_t = String::new())]
-    aws_region: String,
-    /// cache for `project_info` (use `size=0` to disable)
-    #[clap(long, default_value = config::ProjectInfoCacheOptions::CACHE_DEFAULT_OPTIONS)]
-    project_info_cache: String,
-    /// cache for all valid endpoints
-    #[clap(long, default_value = config::EndpointCacheConfig::CACHE_DEFAULT_OPTIONS)]
-    endpoint_cache_config: String,
-    #[clap(flatten)]
-    parquet_upload: ParquetUploadArgs,
-
-    /// interval for backup metric collection
-    #[clap(long, default_value = "10m", value_parser = humantime::parse_duration)]
-    metric_backup_collection_interval: std::time::Duration,
-    /// remote storage configuration for backup metric collection
-    /// Encoded as toml (same format as pageservers), eg
-    /// `{bucket_name='the-bucket',bucket_region='us-east-1',prefix_in_bucket='proxy',endpoint='http://minio:9000'}`
-    #[clap(long, value_parser = remote_storage_from_toml)]
-    metric_backup_collection_remote_storage: Option<RemoteStorageConfig>,
-    /// chunk size for backup metric collection
-    /// Size of each event is no more than 400 bytes, so 2**22 is about 200MB before the compression.
-    #[clap(long, default_value = "4194304")]
-    metric_backup_collection_chunk_size: usize,
-    /// Whether to retry the connection to the compute node
-    #[clap(long, default_value = config::RetryConfig::CONNECT_TO_COMPUTE_DEFAULT_VALUES)]
-    connect_to_compute_retry: String,
-    /// Whether to retry the wake_compute request
-    #[clap(long, default_value = config::RetryConfig::WAKE_COMPUTE_DEFAULT_VALUES)]
-    wake_compute_retry: String,
-
-    /// Configure if this is a private access proxy for the POC: In that case the proxy will ignore the IP allowlist
-    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
-    is_private_access_proxy: bool,
-
-    /// Configure whether all incoming requests have a Proxy Protocol V2 packet.
-    // TODO(conradludgate): switch default to rejected or required once we've updated all deployments
-    #[clap(value_enum, long, default_value_t = ProxyProtocolV2::Supported)]
-    proxy_protocol_v2: ProxyProtocolV2,
-
-    /// Time the proxy waits for the webauth session to be confirmed by the control plane.
-    // TODO: rename to `console_redirect_confirmation_timeout`.
-    #[clap(long, default_value = "2m", value_parser = humantime::parse_duration)]
-    webauth_confirmation_timeout: std::time::Duration,
-}
-
-#[derive(clap::Args, Clone, Copy, Debug)]
-struct SqlOverHttpArgs {
-    /// timeout for http connection requests
-    #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
-    sql_over_http_timeout: tokio::time::Duration,
-
-    /// Whether the SQL over http pool is opt-in
-    #[clap(long, default_value_t = true, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
-    sql_over_http_pool_opt_in: bool,
-
-    /// How many connections to pool for each endpoint. Excess connections are discarded
-    #[clap(long, default_value_t = 20)]
-    sql_over_http_pool_max_conns_per_endpoint: usize,
-
-    /// How many connections to pool for each endpoint. Excess connections are discarded
-    #[clap(long, default_value_t = 20000)]
-    sql_over_http_pool_max_total_conns: usize,
-
-    /// How long pooled connections should remain idle for before closing
-    #[clap(long, default_value = "5m", value_parser = humantime::parse_duration)]
-    sql_over_http_idle_timeout: tokio::time::Duration,
-
-    /// Duration each shard will wait on average before a GC sweep.
-    /// A longer time will causes sweeps to take longer but will interfere less frequently.
-    #[clap(long, default_value = "10m", value_parser = humantime::parse_duration)]
-    sql_over_http_pool_gc_epoch: tokio::time::Duration,
-
-    /// How many shards should the global pool have. Must be a power of two.
-    /// More shards will introduce less contention for pool operations, but can
-    /// increase memory used by the pool
-    #[clap(long, default_value_t = 128)]
-    sql_over_http_pool_shards: usize,
-
-    #[clap(long, default_value_t = 10000)]
-    sql_over_http_client_conn_threshold: u64,
-
-    #[clap(long, default_value_t = 64)]
-    sql_over_http_cancel_set_shards: usize,
-
-    #[clap(long, default_value_t = 10 * 1024 * 1024)] // 10 MiB
-    sql_over_http_max_request_size_bytes: u64,
-
-    #[clap(long, default_value_t = 10 * 1024 * 1024)] // 10 MiB
-    sql_over_http_max_response_size_bytes: usize,
-}
-
-#[tokio::main]
-async fn main() -> anyhow::Result<()> {
-    let _logging_guard = proxy::logging::init().await?;
-    let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook();
-
-    let args = ProxyCliArgs::parse();
-
-    let region_provider =
-        RegionProviderChain::default_provider().or_else(Region::new(args.aws_region.clone()));
-    let provider_conf =
-        ProviderConfig::without_region().with_region(region_provider.region().await);
-    let aws_credentials_provider = {
-        // uses "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"
-        CredentialsProviderChain::first_try("env", EnvironmentVariableCredentialsProvider::new())
-            // uses "AWS_PROFILE" / `aws sso login --profile <profile>`
-            .or_else(
-                "profile-sso",
-                ProfileFileCredentialsProvider::builder()
-                    .configure(&provider_conf)
-                    .build(),
-            )
-            // uses "AWS_WEB_IDENTITY_TOKEN_FILE", "AWS_ROLE_ARN", "AWS_ROLE_SESSION_NAME"
-            // needed to access remote extensions bucket
-            .or_else(
-                "token",
-                WebIdentityTokenCredentialsProvider::builder()
-                    .configure(&provider_conf)
-                    .build(),
-            )
-            // uses imds v2
-            .or_else("imds", ImdsCredentialsProvider::builder().build())
-    };
-    let elasticache_credentials_provider = Arc::new(elasticache::CredentialsProvider::new(
-        elasticache::AWSIRSAConfig::new(
-            args.aws_region.clone(),
-            args.redis_cluster_name,
-            args.redis_user_id,
-        ),
-        aws_credentials_provider,
-    ));
-    let regional_redis_client = match (args.redis_auth_type.as_str(), &args.redis_notifications) {
-        ("plain", redis_url) => match redis_url {
-            None => {
-                bail!("plain auth requires redis_notifications to be set");
-            }
-            Some(url) => Some(
-                ConnectionWithCredentialsProvider::new_with_static_credentials(url.to_string()),
-            ),
-        },
-        ("irsa", _) => match (&args.redis_host, args.redis_port) {
-            (Some(host), Some(port)) => Some(
-                ConnectionWithCredentialsProvider::new_with_credentials_provider(
-                    host.to_string(),
-                    port,
-                    elasticache_credentials_provider.clone(),
-                ),
-            ),
-            (None, None) => {
-                warn!("irsa auth requires redis-host and redis-port to be set, continuing without regional_redis_client");
-                None
-            }
-            _ => {
-                bail!("redis-host and redis-port must be specified together");
-            }
-        },
-        _ => {
-            bail!("unknown auth type given");
-        }
-    };
-
-    let endpoint_cache_config: config::EndpointCacheConfig = args.endpoint_cache_config.parse()?;
-
-    let Some(mut regional_redis_client) = regional_redis_client else {
-        bail!("no regional_redis_client");
-    };
-
-    if let Err(e) = regional_redis_client.connect().await {
-        bail!("error connecting to redis: {:?}", e);
-    }
-
-    let mut last_id = "0-0".to_string();
-    batch_read(
-        &mut regional_redis_client,
-        endpoint_cache_config.stream_name,
-        StreamReadOptions::default().count(endpoint_cache_config.default_batch_size),
-        &mut last_id,
-        true,
-        |event| {
-            let json = serde_json::to_string(&event)?;
-            println!("{}", json);
-            Ok(())
-        },
-    )
-    .await?;
-
-    Ok(())
-}
-
-// TODO: this could be an enum, but events in Redis need to be fixed first.
-// ProjectCreated was sent with type:branch_created. So we ignore type.
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
-struct CPlaneEvent {
-    id: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    endpoint_created: Option<EndpointCreated>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    branch_created: Option<BranchCreated>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    project_created: Option<ProjectCreated>,
-
-    #[serde(rename = "type")]
-    _type: Option<String>,
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
-struct EndpointCreated {
-    endpoint_id: String,
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
-struct BranchCreated {
-    branch_id: String,
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
-struct ProjectCreated {
-    project_id: String,
-}
-
-impl TryFrom<&Value> for CPlaneEvent {
-    type Error = anyhow::Error;
-    fn try_from(value: &Value) -> Result<Self, Self::Error> {
-        let json = String::from_redis_value(value)?;
-        Ok(serde_json::from_str(&json)?)
-    }
-}
-
-async fn batch_read(
-    conn: &mut ConnectionWithCredentialsProvider,
-    stream_name: String,
-    opts: StreamReadOptions,
-    last_id: &mut String,
-    return_when_finish: bool,
-    mut insert_event: impl FnMut(CPlaneEvent) -> anyhow::Result<()>,
-) -> anyhow::Result<()> {
-    let mut total: usize = 0;
-    loop {
-        let mut res: StreamReadReply = conn
-            .xread_options(&[&stream_name], &[last_id.as_str()], &opts)
-            .await?;
-
-        if res.keys.is_empty() {
-            if return_when_finish {
-                if total != 0 {
-                    break;
-                }
-                anyhow::bail!(
-                    "Redis stream {} is empty, cannot be used to filter endpoints",
-                    stream_name
-                );
-            }
-            // If we are not returning when finish, we should wait for more data.
-            continue;
-        }
-        if res.keys.len() != 1 {
-            anyhow::bail!("Cannot read from redis stream {}", stream_name);
-        }
-
-        let key = res.keys.pop().expect("Checked length above");
-
-        for stream_id in key.ids {
-            total += 1;
-            for value in stream_id.map.values() {
-                match value.try_into() {
-                    Ok::<CPlaneEvent, _>(mut event) => {
-                        event.id = Some(stream_id.id.clone());
-                        insert_event(event)?;
-                    }
-                    Err(err) => {
-                        tracing::error!("error parsing value {value:?}: {err:?}");
-                    }
-                };
-            }
-            if total.is_power_of_two() {
-                tracing::debug!("endpoints read {}", total);
-            }
-            *last_id = stream_id.id;
-        }
-    }
-    tracing::info!("read {} endpoints/branches/projects from redis", total);
-    Ok(())
-}
--- a/proxy/src/redis/connection_with_credentials_provider.rs
+++ b/proxy/src/redis/connection_with_credentials_provider.rs
@@ -80,7 +80,7 @@ impl ConnectionWithCredentialsProvider {
        redis::cmd("PING").query_async(con).await
    }

-    pub async fn connect(&mut self) -> anyhow::Result<()> {
+    pub(crate) async fn connect(&mut self) -> anyhow::Result<()> {
        let _guard = self.mutex.lock().await;
        if let Some(con) = self.con.as_mut() {
            match Self::ping(con).await {
--- a/safekeeper/src/wal_storage.rs
+++ b/safekeeper/src/wal_storage.rs
@@ -127,29 +127,23 @@ pub struct PhysicalStorage {
    /// - doesn't point to the end of the segment
    file: Option<File>,

-    /// When true, WAL truncation potentially has been interrupted and we need
-    /// to finish it before allowing WAL writes; see truncate_wal for details.
-    /// In this case [`write_lsn`] can be less than actually written WAL on
-    /// disk. In particular, there can be a case with unexpected .partial file.
+    /// When false, we have just initialized storage using the LSN from find_end_of_wal().
+    /// In this case, [`write_lsn`] can be less than actually written WAL on disk. In particular,
+    /// there can be a case with unexpected .partial file.
    ///
    /// Imagine the following:
    /// - 000000010000000000000001
-    ///   - it was fully written, but the last record is split between 2
-    ///     segments
-    ///   - after restart, `find_end_of_wal()` returned 0/1FFFFF0, which is in
-    ///     the end of this segment
-    ///   - `write_lsn`, `write_record_lsn` and `flush_record_lsn` were
-    ///     initialized to 0/1FFFFF0
+    ///   - it was fully written, but the last record is split between 2 segments
+    ///   - after restart, `find_end_of_wal()` returned 0/1FFFFF0, which is in the end of this segment
+    ///   - `write_lsn`, `write_record_lsn` and `flush_record_lsn` were initialized to 0/1FFFFF0
    /// - 000000010000000000000002.partial
-    ///   - it has only 1 byte written, which is not enough to make a full WAL
-    ///     record
+    ///   - it has only 1 byte written, which is not enough to make a full WAL record
    ///
-    /// Partial segment 002 has no WAL records, and it will be removed by the
-    /// next truncate_wal(). This flag will be set to true after the first
-    /// truncate_wal() call.
+    /// Partial segment 002 has no WAL records, and it will be removed by the next truncate_wal().
+    /// This flag will be set to true after the first truncate_wal() call.
    ///
    /// [`write_lsn`]: Self::write_lsn
-    pending_wal_truncation: bool,
+    is_truncated_after_restart: bool,
 }

 impl PhysicalStorage {
@@ -214,7 +208,7 @@ impl PhysicalStorage {
            flush_record_lsn: flush_lsn,
            decoder: WalStreamDecoder::new(write_lsn, state.server.pg_version / 10000),
            file: None,
-            pending_wal_truncation: true,
+            is_truncated_after_restart: false,
        })
    }

@@ -411,13 +405,6 @@ impl Storage for PhysicalStorage {
                startpos
            );
        }
-        if self.pending_wal_truncation {
-            bail!(
-                "write_wal called with pending WAL truncation, write_lsn={}, startpos={}",
-                self.write_lsn,
-                startpos
-            );
-        }

        let write_seconds = time_io_closure(self.write_exact(startpos, buf)).await?;
        // WAL is written, updating write metrics
@@ -492,34 +479,15 @@ impl Storage for PhysicalStorage {
            );
        }

-        // Quick exit if nothing to do and we know that the state is clean to
-        // avoid writing up to 16 MiB of zeros on disk (this happens on each
-        // connect).
-        if !self.pending_wal_truncation
+        // Quick exit if nothing to do to avoid writing up to 16 MiB of zeros on
+        // disk (this happens on each connect).
+        if self.is_truncated_after_restart
            && end_pos == self.write_lsn
            && end_pos == self.flush_record_lsn
        {
            return Ok(());
        }

-        // Atomicity: we start with LSNs reset because once on disk deletion is
-        // started it can't be reversed. However, we might crash/error in the
-        // middle, leaving garbage above the truncation point. In theory,
-        // concatenated with previous records it might form bogus WAL (though
-        // very unlikely in practice because CRC would guard from that). To
-        // protect, set pending_wal_truncation flag before beginning: it means
-        // truncation must be retried and WAL writes are prohibited until it
-        // succeeds. Flag is also set on boot because we don't know if the last
-        // state was clean.
-        //
-        // Protocol (HandleElected before first AppendRequest) ensures we'll
-        // always try to ensure clean truncation before any writes.
-        self.pending_wal_truncation = true;
-
-        self.write_lsn = end_pos;
-        self.write_record_lsn = end_pos;
-        self.flush_record_lsn = end_pos;
-
        // Close previously opened file, if any
        if let Some(unflushed_file) = self.file.take() {
            self.fdatasync_file(&unflushed_file).await?;
@@ -545,7 +513,11 @@ impl Storage for PhysicalStorage {
            fs::rename(wal_file_path, wal_file_partial_path).await?;
        }

-        self.pending_wal_truncation = false;
+        // Update LSNs
+        self.write_lsn = end_pos;
+        self.write_record_lsn = end_pos;
+        self.flush_record_lsn = end_pos;
+        self.is_truncated_after_restart = true;
        Ok(())
    }

--- a/test_runner/fixtures/endpoint/http.py
+++ b/test_runner/fixtures/endpoint/http.py
@@ -46,8 +46,3 @@ class EndpointHttpClient(requests.Session):
        )
        res.raise_for_status()
        return res.json()
-
-    def metrics(self) -> str:
-        res = self.get(f"http://localhost:{self.port}/metrics")
-        res.raise_for_status()
-        return res.text
--- a/test_runner/fixtures/neon_api.py
+++ b/test_runner/fixtures/neon_api.py
@@ -5,8 +5,6 @@ from typing import TYPE_CHECKING, cast, final

 import requests

-from fixtures.log_helper import log
-
 if TYPE_CHECKING:
    from typing import Any, Literal, Optional

@@ -32,11 +30,7 @@ class NeonAPI:
            kwargs["headers"] = {}
        kwargs["headers"]["Authorization"] = f"Bearer {self.__neon_api_key}"

-        resp = requests.request(method, f"{self.__neon_api_base_url}{endpoint}", **kwargs)
-        log.debug("%s %s returned a %d: %s", method, endpoint, resp.status_code, resp.text)
-        resp.raise_for_status()
-
-        return resp
+        return requests.request(method, f"{self.__neon_api_base_url}{endpoint}", **kwargs)

    def create_project(
        self,
@@ -72,6 +66,8 @@ class NeonAPI:
            json=data,
        )

+        assert resp.status_code == 201
+
        return cast("dict[str, Any]", resp.json())

    def get_project_details(self, project_id: str) -> dict[str, Any]:
@@ -83,7 +79,7 @@ class NeonAPI:
                "Content-Type": "application/json",
            },
        )
-
+        assert resp.status_code == 200
        return cast("dict[str, Any]", resp.json())

    def delete_project(
@@ -99,6 +95,8 @@ class NeonAPI:
            },
        )

+        assert resp.status_code == 200
+
        return cast("dict[str, Any]", resp.json())

    def start_endpoint(
@@ -114,6 +112,8 @@ class NeonAPI:
            },
        )

+        assert resp.status_code == 200
+
        return cast("dict[str, Any]", resp.json())

    def suspend_endpoint(
@@ -129,6 +129,8 @@ class NeonAPI:
            },
        )

+        assert resp.status_code == 200
+
        return cast("dict[str, Any]", resp.json())

    def restart_endpoint(
@@ -144,6 +146,8 @@ class NeonAPI:
            },
        )

+        assert resp.status_code == 200
+
        return cast("dict[str, Any]", resp.json())

    def create_endpoint(
@@ -174,6 +178,8 @@ class NeonAPI:
            json=data,
        )

+        assert resp.status_code == 201
+
        return cast("dict[str, Any]", resp.json())

    def get_connection_uri(
@@ -200,6 +206,8 @@ class NeonAPI:
            },
        )

+        assert resp.status_code == 200
+
        return cast("dict[str, Any]", resp.json())

    def get_branches(self, project_id: str) -> dict[str, Any]:
@@ -211,6 +219,8 @@ class NeonAPI:
            },
        )

+        assert resp.status_code == 200
+
        return cast("dict[str, Any]", resp.json())

    def get_endpoints(self, project_id: str) -> dict[str, Any]:
@@ -222,6 +232,8 @@ class NeonAPI:
            },
        )

+        assert resp.status_code == 200
+
        return cast("dict[str, Any]", resp.json())

    def get_operations(self, project_id: str) -> dict[str, Any]:
@@ -234,6 +246,8 @@ class NeonAPI:
            },
        )

+        assert resp.status_code == 200
+
        return cast("dict[str, Any]", resp.json())

    def wait_for_operation_to_finish(self, project_id: str):
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -1065,9 +1065,6 @@ class NeonEnv:
                "http_auth_type": http_auth_type,
                # Default which can be overriden with `NeonEnvBuilder.pageserver_config_override`
                "availability_zone": "us-east-2a",
-                # Disable pageserver disk syncs in tests: when running tests concurrently, this avoids
-                # the pageserver taking a long time to start up due to syncfs flushing other tests' data
-                "no_sync": True,
            }
            if self.pageserver_virtual_file_io_engine is not None:
                ps_cfg["virtual_file_io_engine"] = self.pageserver_virtual_file_io_engine
--- a/test_runner/performance/test_logical_replication.py
+++ b/test_runner/performance/test_logical_replication.py
@@ -149,16 +149,12 @@ def test_subscriber_lag(
                check_pgbench_still_running(pub_workload, "pub")
                check_pgbench_still_running(sub_workload, "sub")

-                pub_conn = psycopg2.connect(pub_connstr)
-                sub_conn = psycopg2.connect(sub_connstr)
-                pub_conn.autocommit = True
-                sub_conn.autocommit = True
-
-                with pub_conn.cursor() as pub_cur, sub_conn.cursor() as sub_cur:
-                    lag = measure_logical_replication_lag(sub_cur, pub_cur)
-
-                pub_conn.close()
-                sub_conn.close()
+                with (
+                    psycopg2.connect(pub_connstr) as pub_conn,
+                    psycopg2.connect(sub_connstr) as sub_conn,
+                ):
+                    with pub_conn.cursor() as pub_cur, sub_conn.cursor() as sub_cur:
+                        lag = measure_logical_replication_lag(sub_cur, pub_cur)

                log.info(f"Replica lagged behind master by {lag} seconds")
                zenbenchmark.record("replica_lag", lag, "s", MetricReport.LOWER_IS_BETTER)
@@ -210,7 +206,6 @@ def test_publisher_restart(
    sub_conn = psycopg2.connect(sub_connstr)
    pub_conn.autocommit = True
    sub_conn.autocommit = True
-
    with pub_conn.cursor() as pub_cur, sub_conn.cursor() as sub_cur:
        pub_cur.execute("SELECT 1 FROM pg_catalog.pg_publication WHERE pubname = 'pub1'")
        pub_exists = len(pub_cur.fetchall()) != 0
@@ -227,7 +222,6 @@ def test_publisher_restart(
            sub_cur.execute(f"create subscription sub1 connection '{pub_connstr}' publication pub1")

        initial_sync_lag = measure_logical_replication_lag(sub_cur, pub_cur)
-
    pub_conn.close()
    sub_conn.close()

@@ -254,17 +248,12 @@ def test_publisher_restart(
                    ["pgbench", "-c10", pgbench_duration, "-Mprepared"],
                    env=pub_env,
                )
-
-                pub_conn = psycopg2.connect(pub_connstr)
-                sub_conn = psycopg2.connect(sub_connstr)
-                pub_conn.autocommit = True
-                sub_conn.autocommit = True
-
-                with pub_conn.cursor() as pub_cur, sub_conn.cursor() as sub_cur:
-                    lag = measure_logical_replication_lag(sub_cur, pub_cur)
-
-                pub_conn.close()
-                sub_conn.close()
+                with (
+                    psycopg2.connect(pub_connstr) as pub_conn,
+                    psycopg2.connect(sub_connstr) as sub_conn,
+                ):
+                    with pub_conn.cursor() as pub_cur, sub_conn.cursor() as sub_cur:
+                        lag = measure_logical_replication_lag(sub_cur, pub_cur)

                log.info(f"Replica lagged behind master by {lag} seconds")
                zenbenchmark.record("replica_lag", lag, "s", MetricReport.LOWER_IS_BETTER)
@@ -299,56 +288,58 @@ def test_snap_files(
    env = benchmark_project_pub.pgbench_env
    connstr = benchmark_project_pub.connstr

-    conn = psycopg2.connect(connstr)
-    conn.autocommit = True
-
-    with conn.cursor() as cur:
-        cur.execute("SELECT rolsuper FROM pg_roles WHERE rolname = 'neondb_owner'")
-        is_super = cast("bool", cur.fetchall()[0][0])
-        assert is_super, "This benchmark won't work if we don't have superuser"
-
-    conn.close()
+    with psycopg2.connect(connstr) as conn:
+        conn.autocommit = True
+        with conn.cursor() as cur:
+            cur.execute("SELECT rolsuper FROM pg_roles WHERE rolname = 'neondb_owner'")
+            is_super = cast("bool", cur.fetchall()[0][0])
+            assert is_super, "This benchmark won't work if we don't have superuser"

    pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", "-s100"], env=env)

    conn = psycopg2.connect(connstr)
    conn.autocommit = True
+    cur = conn.cursor()
+    cur.execute("ALTER SYSTEM SET neon.logical_replication_max_snap_files = -1")

-    with conn.cursor() as cur:
-        cur.execute(
+    with psycopg2.connect(connstr) as conn:
+        conn.autocommit = True
+        with conn.cursor() as cur:
+            cur.execute("SELECT pg_reload_conf()")
+
+    with psycopg2.connect(connstr) as conn:
+        conn.autocommit = True
+        with conn.cursor() as cur:
+            cur.execute(
+                """
+                DO $$
+                    BEGIN
+                    IF EXISTS (
+                        SELECT 1
+                        FROM pg_replication_slots
+                        WHERE slot_name = 'slotter'
+                    ) THEN
+                        PERFORM pg_drop_replication_slot('slotter');
+                    END IF;
+                END $$;
            """
-            DO $$
-                BEGIN
-                IF EXISTS (
-                    SELECT 1
-                    FROM pg_replication_slots
-                    WHERE slot_name = 'slotter'
-                ) THEN
-                    PERFORM pg_drop_replication_slot('slotter');
-                END IF;
-            END $$;
-        """
-        )
-        cur.execute("SELECT pg_create_logical_replication_slot('slotter', 'test_decoding')")
-
-    conn.close()
+            )
+            cur.execute("SELECT pg_create_logical_replication_slot('slotter', 'test_decoding')")

    workload = pg_bin.run_nonblocking(["pgbench", "-c10", pgbench_duration, "-Mprepared"], env=env)
    try:
        start = time.time()
        prev_measurement = time.time()
        while time.time() - start < test_duration_min * 60:
-            conn = psycopg2.connect(connstr)
-            conn.autocommit = True
-
-            with conn.cursor() as cur:
-                cur.execute(
-                    "SELECT count(*) FROM (SELECT pg_log_standby_snapshot() FROM generate_series(1, 10000) g) s"
-                )
-                check_pgbench_still_running(workload)
-                cur.execute("SELECT pg_replication_slot_advance('slotter', pg_current_wal_lsn())")
-
-            conn.close()
+            with psycopg2.connect(connstr) as conn:
+                with conn.cursor() as cur:
+                    cur.execute(
+                        "SELECT count(*) FROM (SELECT pg_log_standby_snapshot() FROM generate_series(1, 10000) g) s"
+                    )
+                    check_pgbench_still_running(workload)
+                    cur.execute(
+                        "SELECT pg_replication_slot_advance('slotter', pg_current_wal_lsn())"
+                    )

            # Measure storage
            if time.time() - prev_measurement > test_interval_min * 60:
--- a/test_runner/performance/test_physical_replication.py
+++ b/test_runner/performance/test_physical_replication.py
@@ -102,21 +102,15 @@ def test_ro_replica_lag(
                    check_pgbench_still_running(master_workload)
                    check_pgbench_still_running(replica_workload)
                    time.sleep(sync_interval_min * 60)
-
-                    conn_master = psycopg2.connect(master_connstr)
-                    conn_replica = psycopg2.connect(replica_connstr)
-                    conn_master.autocommit = True
-                    conn_replica.autocommit = True
-
                    with (
-                        conn_master.cursor() as cur_master,
-                        conn_replica.cursor() as cur_replica,
+                        psycopg2.connect(master_connstr) as conn_master,
+                        psycopg2.connect(replica_connstr) as conn_replica,
                    ):
-                        lag = measure_replication_lag(cur_master, cur_replica)
-
-                    conn_master.close()
-                    conn_replica.close()
-
+                        with (
+                            conn_master.cursor() as cur_master,
+                            conn_replica.cursor() as cur_replica,
+                        ):
+                            lag = measure_replication_lag(cur_master, cur_replica)
                    log.info(f"Replica lagged behind master by {lag} seconds")
                    zenbenchmark.record("replica_lag", lag, "s", MetricReport.LOWER_IS_BETTER)
            finally:
@@ -225,15 +219,11 @@ def test_replication_start_stop(
        pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", "-s10"], env=master_env)

        # Sync replicas
-        conn_master = psycopg2.connect(master_connstr)
-        conn_master.autocommit = True
-
-        with conn_master.cursor() as cur_master:
-            for i in range(num_replicas):
-                conn_replica = psycopg2.connect(replica_connstr[i])
-                measure_replication_lag(cur_master, conn_replica.cursor())
-
-        conn_master.close()
+        with psycopg2.connect(master_connstr) as conn_master:
+            with conn_master.cursor() as cur_master:
+                for i in range(num_replicas):
+                    conn_replica = psycopg2.connect(replica_connstr[i])
+                    measure_replication_lag(cur_master, conn_replica.cursor())

        master_pgbench = pg_bin.run_nonblocking(
            [
@@ -287,22 +277,17 @@ def test_replication_start_stop(

            time.sleep(configuration_test_time_sec)

-            conn_master = psycopg2.connect(master_connstr)
-            conn_master.autocommit = True
-
-            with conn_master.cursor() as cur_master:
-                for ireplica in range(num_replicas):
-                    replica_conn = psycopg2.connect(replica_connstr[ireplica])
-                    lag = measure_replication_lag(cur_master, replica_conn.cursor())
-                    zenbenchmark.record(
-                        f"Replica {ireplica} lag", lag, "s", MetricReport.LOWER_IS_BETTER
-                    )
-                    log.info(
-                        f"Replica {ireplica} lagging behind master by {lag} seconds after configuration {iconfig:>b}"
-                    )
-
-            conn_master.close()
-
+            with psycopg2.connect(master_connstr) as conn_master:
+                with conn_master.cursor() as cur_master:
+                    for ireplica in range(num_replicas):
+                        replica_conn = psycopg2.connect(replica_connstr[ireplica])
+                        lag = measure_replication_lag(cur_master, replica_conn.cursor())
+                        zenbenchmark.record(
+                            f"Replica {ireplica} lag", lag, "s", MetricReport.LOWER_IS_BETTER
+                        )
+                        log.info(
+                            f"Replica {ireplica} lagging behind master by {lag} seconds after configuration {iconfig:>b}"
+                        )
        master_pgbench.terminate()
    except Exception as e:
        error_occurred = True
--- a/test_runner/regress/test_installed_extensions.py
+++ b/test_runner/regress/test_installed_extensions.py
@@ -1,14 +1,6 @@
-from __future__ import annotations
-
-import time
 from logging import info
-from typing import TYPE_CHECKING

-from fixtures.log_helper import log
-from fixtures.metrics import parse_metrics
-
-if TYPE_CHECKING:
-    from fixtures.neon_fixtures import NeonEnv
+from fixtures.neon_fixtures import NeonEnv


 def test_installed_extensions(neon_simple_env: NeonEnv):
@@ -93,52 +85,3 @@ def test_installed_extensions(neon_simple_env: NeonEnv):
            assert ext["n_databases"] == 2
            ext["versions"].sort()
            assert ext["versions"] == ["1.2", "1.3"]
-
-    # check that /metrics endpoint is available
-    # ensure that we see the metric before and after restart
-    res = client.metrics()
-    info("Metrics: %s", res)
-    m = parse_metrics(res)
-    neon_m = m.query_all("installed_extensions", {"extension_name": "neon", "version": "1.2"})
-    assert len(neon_m) == 1
-    for sample in neon_m:
-        assert sample.value == 2
-    neon_m = m.query_all("installed_extensions", {"extension_name": "neon", "version": "1.3"})
-    assert len(neon_m) == 1
-    for sample in neon_m:
-        assert sample.value == 1
-
-    endpoint.stop()
-    endpoint.start()
-
-    timeout = 10
-    while timeout > 0:
-        try:
-            res = client.metrics()
-            timeout = -1
-            if len(parse_metrics(res).query_all("installed_extensions")) < 4:
-                # Assume that not all metrics that are collected yet
-                time.sleep(1)
-                timeout -= 1
-                continue
-        except Exception:
-            log.exception("failed to get metrics, assume they are not collected yet")
-            time.sleep(1)
-            timeout -= 1
-            continue
-
-        assert (
-            len(parse_metrics(res).query_all("installed_extensions")) >= 4
-        ), "Not all metrics are collected"
-
-        info("After restart metrics: %s", res)
-        m = parse_metrics(res)
-        neon_m = m.query_all("installed_extensions", {"extension_name": "neon", "version": "1.2"})
-        assert len(neon_m) == 1
-        for sample in neon_m:
-            assert sample.value == 1
-
-        neon_m = m.query_all("installed_extensions", {"extension_name": "neon", "version": "1.3"})
-        assert len(neon_m) == 1
-        for sample in neon_m:
-            assert sample.value == 1
--- a/test_runner/regress/test_pageserver_generations.py
+++ b/test_runner/regress/test_pageserver_generations.py
@@ -35,10 +35,9 @@ from fixtures.pageserver.utils import (
    wait_for_upload,
 )
 from fixtures.remote_storage import (
-    LocalFsStorage,
    RemoteStorageKind,
 )
-from fixtures.utils import run_only_on_default_postgres, wait_until
+from fixtures.utils import wait_until
 from fixtures.workload import Workload

 if TYPE_CHECKING:
@@ -729,68 +728,3 @@ def test_upgrade_generationless_local_file_paths(
    )
    # We should download into the same local path we started with
    assert os.path.exists(victim_path)
-
-
-@run_only_on_default_postgres("Only tests index logic")
-def test_old_index_time_threshold(
-    neon_env_builder: NeonEnvBuilder,
-):
-    """
-    Exercise pageserver's detection of trying to load an ancient non-latest index.
-    (see https://github.com/neondatabase/neon/issues/6951)
-    """
-
-    # Run with local_fs because we will interfere with mtimes by local filesystem access
-    neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
-    env = neon_env_builder.init_start()
-    tenant_id = env.initial_tenant
-    timeline_id = env.initial_timeline
-
-    workload = Workload(env, tenant_id, timeline_id)
-    workload.init()
-    workload.write_rows(32)
-
-    # Remember generation 1's index path
-    assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
-    index_path = env.pageserver_remote_storage.index_path(tenant_id, timeline_id)
-
-    # Increment generation by detaching+attaching, and write+flush some data to get a new remote index
-    env.storage_controller.tenant_policy_update(tenant_id, {"placement": "Detached"})
-    env.storage_controller.tenant_policy_update(tenant_id, {"placement": {"Attached": 0}})
-    env.storage_controller.reconcile_until_idle()
-    workload.churn_rows(32)
-
-    # A new index should have been written
-    assert env.pageserver_remote_storage.index_path(tenant_id, timeline_id) != index_path
-
-    # Hack the mtime on the generation 1 index
-    log.info(f"Setting old mtime on {index_path}")
-    os.utime(index_path, times=(time.time(), time.time() - 30 * 24 * 3600))
-    env.pageserver.allowed_errors.extend(
-        [
-            ".*Found a newer index while loading an old one.*",
-            ".*Index age exceeds threshold and a newer index exists.*",
-        ]
-    )
-
-    # Detach from storage controller + attach in an old generation directly on the pageserver.
-    workload.stop()
-    env.storage_controller.tenant_policy_update(tenant_id, {"placement": "Detached"})
-    env.storage_controller.reconcile_until_idle()
-    env.storage_controller.tenant_policy_update(tenant_id, {"scheduling": "Stop"})
-    env.storage_controller.allowed_errors.append(".*Scheduling is disabled by policy")
-
-    # The controller would not do this (attach in an old generation): we are doing it to simulate
-    # a hypothetical profound bug in the controller.
-    env.pageserver.http_client().tenant_location_conf(
-        tenant_id, {"generation": 1, "mode": "AttachedSingle", "tenant_conf": {}}
-    )
-
-    # The pageserver should react to this situation by refusing to attach the tenant and putting
-    # it into Broken state
-    env.pageserver.allowed_errors.append(".*tenant is broken.*")
-    with pytest.raises(
-        PageserverApiException,
-        match="tenant is broken: Index age exceeds threshold and a newer index exists",
-    ):
-        env.pageserver.http_client().timeline_detail(tenant_id, timeline_id)
--- a/test_runner/regress/test_physical_and_logical_replicaiton.py
+++ b/test_runner/regress/test_physical_and_logical_replicaiton.py
@@ -5,8 +5,7 @@ import time
 from fixtures.neon_fixtures import NeonEnv, logical_replication_sync


-def test_physical_and_logical_replication_slot_not_copied(neon_simple_env: NeonEnv, vanilla_pg):
-    """Test read replica of a primary which has a logical replication publication"""
+def test_physical_and_logical_replication(neon_simple_env: NeonEnv, vanilla_pg):
    env = neon_simple_env

    n_records = 100000
@@ -14,6 +13,7 @@ def test_physical_and_logical_replication_slot_not_copied(neon_simple_env: NeonE
    primary = env.endpoints.create_start(
        branch_name="main",
        endpoint_id="primary",
+        config_lines=["min_wal_size=32MB", "max_wal_size=64MB"],
    )
    p_con = primary.connect()
    p_cur = p_con.cursor()
@@ -30,6 +30,7 @@ def test_physical_and_logical_replication_slot_not_copied(neon_simple_env: NeonE
    secondary = env.endpoints.new_replica_start(
        origin=primary,
        endpoint_id="secondary",
+        config_lines=["min_wal_size=32MB", "max_wal_size=64MB"],
    )

    s_con = secondary.connect()
@@ -47,51 +48,3 @@ def test_physical_and_logical_replication_slot_not_copied(neon_simple_env: NeonE
    # Check that LR slot is not copied to replica
    s_cur.execute("select count(*) from pg_replication_slots")
    assert s_cur.fetchall()[0][0] == 0
-
-
-def test_aux_not_logged_at_replica(neon_simple_env: NeonEnv, vanilla_pg):
-    """Test that AUX files are not saved at replica"""
-    env = neon_simple_env
-
-    n_records = 20000
-
-    primary = env.endpoints.create_start(
-        branch_name="main",
-        endpoint_id="primary",
-    )
-    p_con = primary.connect()
-    p_cur = p_con.cursor()
-    p_cur.execute("CREATE TABLE t(pk bigint primary key, payload text default repeat('?',200))")
-    p_cur.execute("create publication pub1 for table t")
-
-    # start subscriber
-    vanilla_pg.start()
-    vanilla_pg.safe_psql("CREATE TABLE t(pk bigint primary key, payload text)")
-    connstr = primary.connstr().replace("'", "''")
-    vanilla_pg.safe_psql(f"create subscription sub1 connection '{connstr}' publication pub1")
-
-    for pk in range(n_records):
-        p_cur.execute("insert into t (pk) values (%s)", (pk,))
-
-    # LR snapshot is stored each 15 seconds
-    time.sleep(16)
-
-    # start replica
-    secondary = env.endpoints.new_replica_start(
-        origin=primary,
-        endpoint_id="secondary",
-    )
-
-    s_con = secondary.connect()
-    s_cur = s_con.cursor()
-
-    logical_replication_sync(vanilla_pg, primary)
-
-    assert vanilla_pg.safe_psql("select count(*) from t")[0][0] == n_records
-    s_cur.execute("select count(*) from t")
-    assert s_cur.fetchall()[0][0] == n_records
-
-    vanilla_pg.stop()
-    secondary.stop()
-    primary.stop()
-    assert not secondary.log_contains("cannot make new WAL entries during recovery")
--- a/test_runner/regress/test_tenants.py
+++ b/test_runner/regress/test_tenants.py
@@ -427,7 +427,7 @@ def test_create_churn_during_restart(neon_env_builder: NeonEnvBuilder):
            env.pageserver.start()

            for f in futs:
-                f.result(timeout=30)
+                f.result(timeout=10)

    # The tenant should end up active
    wait_until_tenant_active(env.pageserver.http_client(), tenant_id, iterations=10, period=1)
--- a/vendor/postgres-v14
+++ b/vendor/postgres-v14
--- a/vendor/postgres-v15
+++ b/vendor/postgres-v15
--- a/vendor/postgres-v16
+++ b/vendor/postgres-v16
--- a/vendor/postgres-v17
+++ b/vendor/postgres-v17
--- a/vendor/revisions.json
+++ b/vendor/revisions.json
@@ -1,18 +1,18 @@
 {
  "v17": [
-    "17.1",
-    "aa2e29f2b6952140dfe51876bbd11054acae776f"
+    "17.0",
+    "9ad2f3c5c37c08069a01c1e3f6b7cf275437e0cb"
  ],
  "v16": [
-    "16.5",
-    "b0b693ea298454e95e6b154780d1fd586a244dfd"
+    "16.4",
+    "e131a9c027b202ce92bd7b9cf2569d48a6f9948e"
  ],
  "v15": [
-    "15.9",
-    "1feff6b60f07cb71b665d0f5ead71a4320a71743"
+    "15.8",
+    "22e580fe9ffcea7e02592110b1c9bf426d83cada"
  ],
  "v14": [
-    "14.14",
-    "c5e0d642efb02e4bfedc283b0a7707fe6c79cc89"
+    "14.13",
+    "2199b83fb72680001ce0f43bf6187a21dfb8f45d"
  ]
 }