repro

2026-05-25 09:00:37 +00:00 · 2025-02-20 18:56:34 +01:00
51 changed files with 249 additions and 970 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1874,12 +1874,6 @@ dependencies = [
 "syn 2.0.90",
 ]

-[[package]]
-name = "difflib"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8"
-
 [[package]]
 name = "digest"
 version = "0.10.7"
@@ -3337,17 +3331,6 @@ dependencies = [
 "wasm-bindgen",
 ]

-[[package]]
-name = "json-structural-diff"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e878e36a8a44c158505c2c818abdc1350413ad83dcb774a0459f6a7ef2b65cbf"
-dependencies = [
- "difflib",
- "regex",
- "serde_json",
-]
-
 [[package]]
 name = "jsonwebtoken"
 version = "9.2.0"
@@ -6460,7 +6443,6 @@ dependencies = [
 "humantime",
 "hyper 0.14.30",
 "itertools 0.10.5",
- "json-structural-diff",
 "lasso",
 "measured",
 "metrics",
@@ -7634,7 +7616,6 @@ dependencies = [
 "once_cell",
 "pin-project-lite",
 "postgres_connection",
- "pprof",
 "pq_proto",
 "rand 0.8.5",
 "regex",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -210,7 +210,6 @@ rustls-native-certs = "0.8"
 x509-parser = "0.16"
 whoami = "1.5.1"
 zerocopy = { version = "0.7", features = ["derive"] }
-json-structural-diff = { version = "0.2.0" }

 ## TODO replace this with tracing
 env_logger = "0.10"
--- a/build-tools.Dockerfile
+++ b/build-tools.Dockerfile
@@ -292,7 +292,7 @@ WORKDIR /home/nonroot

 # Rust
 # Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
-ENV RUSTC_VERSION=1.85.0
+ENV RUSTC_VERSION=1.84.1
 ENV RUSTUP_HOME="/home/nonroot/.rustup"
 ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
 ARG RUSTFILT_VERSION=0.2.1
--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -1669,11 +1669,7 @@ COPY --from=pg_anon-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_ivm-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_partman-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_mooncake-build /usr/local/pgsql/ /usr/local/pgsql/
-
-# Disabled temporarily, because it clashed with pg_mooncake. pg_mooncake
-# also depends on libduckdb, but a different version.
-#COPY --from=pg_duckdb-build /usr/local/pgsql/ /usr/local/pgsql/
-
+COPY --from=pg_duckdb-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_repack-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pgaudit-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pgauditlogtofile-build /usr/local/pgsql/ /usr/local/pgsql/
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -41,6 +41,7 @@ use std::process::exit;
 use std::str::FromStr;
 use std::sync::atomic::Ordering;
 use std::sync::{mpsc, Arc, Condvar, Mutex, RwLock};
+use std::time::SystemTime;
 use std::{thread, time::Duration};

 use anyhow::{Context, Result};
@@ -85,6 +86,19 @@ fn parse_remote_ext_config(arg: &str) -> Result<String> {
    }
 }

+/// Generate a compute ID if one is not supplied. This exists to keep forward
+/// compatibility tests working, but will be removed in a future iteration.
+fn generate_compute_id() -> String {
+    let now = SystemTime::now();
+
+    format!(
+        "compute-{}",
+        now.duration_since(SystemTime::UNIX_EPOCH)
+            .unwrap()
+            .as_secs()
+    )
+}
+
 #[derive(Parser)]
 #[command(rename_all = "kebab-case")]
 struct Cli {
@@ -98,13 +112,16 @@ struct Cli {
    /// outside the compute will talk to the compute through this port. Keep
    /// the previous name for this argument around for a smoother release
    /// with the control plane.
-    #[arg(long, default_value_t = 3080)]
+    ///
+    /// TODO: Remove the alias after the control plane release which teaches the
+    /// control plane about the renamed argument.
+    #[arg(long, alias = "http-port", default_value_t = 3080)]
    pub external_http_port: u16,

-    /// The port to bind the internal listening HTTP server to. Clients include
+    /// The port to bind the internal listening HTTP server to. Clients like
    /// the neon extension (for installing remote extensions) and local_proxy.
-    #[arg(long, default_value_t = 3081)]
-    pub internal_http_port: u16,
+    #[arg(long)]
+    pub internal_http_port: Option<u16>,

    #[arg(short = 'D', long, value_name = "DATADIR")]
    pub pgdata: String,
@@ -139,7 +156,7 @@ struct Cli {
    #[arg(short = 'S', long, group = "spec-path")]
    pub spec_path: Option<OsString>,

-    #[arg(short = 'i', long, group = "compute-id")]
+    #[arg(short = 'i', long, group = "compute-id", default_value = generate_compute_id())]
    pub compute_id: String,

    #[arg(short = 'p', long, conflicts_with_all = ["spec", "spec-path"], value_name = "CONTROL_PLANE_API_BASE_URL")]
@@ -342,7 +359,7 @@ fn wait_spec(
        pgbin: cli.pgbin.clone(),
        pgversion: get_pg_version_string(&cli.pgbin),
        external_http_port: cli.external_http_port,
-        internal_http_port: cli.internal_http_port,
+        internal_http_port: cli.internal_http_port.unwrap_or(cli.external_http_port + 1),
        live_config_allowed,
        state: Mutex::new(new_state),
        state_changed: Condvar::new(),
@@ -366,7 +383,7 @@ fn wait_spec(

    // The internal HTTP server could be launched later, but there isn't much
    // sense in waiting.
-    Server::Internal(cli.internal_http_port).launch(&compute);
+    Server::Internal(cli.internal_http_port.unwrap_or(cli.external_http_port + 1)).launch(&compute);

    if !spec_set {
        // No spec provided, hang waiting for it.
--- a/compute_tools/src/sql/drop_subscriptions.sql
+++ b/compute_tools/src/sql/drop_subscriptions.sql
@@ -2,7 +2,6 @@ DO $$
 DECLARE
    subname TEXT;
 BEGIN
-    LOCK TABLE pg_subscription IN ACCESS EXCLUSIVE MODE;
    FOR subname IN SELECT pg_subscription.subname FROM pg_subscription WHERE subdbid = (SELECT oid FROM pg_database WHERE datname = {datname_str}) LOOP
        EXECUTE format('ALTER SUBSCRIPTION %I DISABLE;', subname);
        EXECUTE format('ALTER SUBSCRIPTION %I SET (slot_name = NONE);', subname);
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -46,8 +46,6 @@ use std::process::Command;
 use std::str::FromStr;
 use std::sync::Arc;
 use std::time::Duration;
-use std::time::SystemTime;
-use std::time::UNIX_EPOCH;

 use anyhow::{anyhow, bail, Context, Result};
 use compute_api::requests::ConfigurationRequest;
@@ -61,7 +59,6 @@ use nix::sys::signal::Signal;
 use pageserver_api::shard::ShardStripeSize;
 use reqwest::header::CONTENT_TYPE;
 use serde::{Deserialize, Serialize};
-use tracing::debug;
 use url::Host;
 use utils::id::{NodeId, TenantId, TimelineId};

@@ -84,10 +81,8 @@ pub struct EndpointConf {
    internal_http_port: u16,
    pg_version: u32,
    skip_pg_catalog_updates: bool,
-    reconfigure_concurrency: usize,
    drop_subscriptions_before_start: bool,
    features: Vec<ComputeFeature>,
-    cluster: Option<Cluster>,
 }

 //
@@ -184,9 +179,7 @@ impl ComputeControlPlane {
            // we also skip catalog updates in the cloud.
            skip_pg_catalog_updates,
            drop_subscriptions_before_start,
-            reconfigure_concurrency: 1,
            features: vec![],
-            cluster: None,
        });

        ep.create_endpoint_dir()?;
@@ -203,9 +196,7 @@ impl ComputeControlPlane {
                pg_version,
                skip_pg_catalog_updates,
                drop_subscriptions_before_start,
-                reconfigure_concurrency: 1,
                features: vec![],
-                cluster: None,
            })?,
        )?;
        std::fs::write(
@@ -270,11 +261,8 @@ pub struct Endpoint {
    skip_pg_catalog_updates: bool,

    drop_subscriptions_before_start: bool,
-    reconfigure_concurrency: usize,
    // Feature flags
    features: Vec<ComputeFeature>,
-    // Cluster settings
-    cluster: Option<Cluster>,
 }

 #[derive(PartialEq, Eq)]
@@ -314,8 +302,6 @@ impl Endpoint {
        let conf: EndpointConf =
            serde_json::from_slice(&std::fs::read(entry.path().join("endpoint.json"))?)?;

-        debug!("serialized endpoint conf: {:?}", conf);
-
        Ok(Endpoint {
            pg_address: SocketAddr::new(IpAddr::from(Ipv4Addr::LOCALHOST), conf.pg_port),
            external_http_address: SocketAddr::new(
@@ -333,10 +319,8 @@ impl Endpoint {
            tenant_id: conf.tenant_id,
            pg_version: conf.pg_version,
            skip_pg_catalog_updates: conf.skip_pg_catalog_updates,
-            reconfigure_concurrency: conf.reconfigure_concurrency,
            drop_subscriptions_before_start: conf.drop_subscriptions_before_start,
            features: conf.features,
-            cluster: conf.cluster,
        })
    }

@@ -623,7 +607,7 @@ impl Endpoint {
        };

        // Create spec file
-        let mut spec = ComputeSpec {
+        let spec = ComputeSpec {
            skip_pg_catalog_updates: self.skip_pg_catalog_updates,
            format_version: 1.0,
            operation_uuid: None,
@@ -656,7 +640,7 @@ impl Endpoint {
                    Vec::new()
                },
                settings: None,
-                postgresql_conf: Some(postgresql_conf.clone()),
+                postgresql_conf: Some(postgresql_conf),
            },
            delta_operations: None,
            tenant_id: Some(self.tenant_id),
@@ -669,35 +653,9 @@ impl Endpoint {
            pgbouncer_settings: None,
            shard_stripe_size: Some(shard_stripe_size),
            local_proxy_config: None,
-            reconfigure_concurrency: self.reconfigure_concurrency,
+            reconfigure_concurrency: 1,
            drop_subscriptions_before_start: self.drop_subscriptions_before_start,
        };
-
-        // this strange code is needed to support respec() in tests
-        if self.cluster.is_some() {
-            debug!("Cluster is already set in the endpoint spec, using it");
-            spec.cluster = self.cluster.clone().unwrap();
-
-            debug!("spec.cluster {:?}", spec.cluster);
-
-            // fill missing fields again
-            if create_test_user {
-                spec.cluster.roles.push(Role {
-                    name: PgIdent::from_str("test").unwrap(),
-                    encrypted_password: None,
-                    options: None,
-                });
-                spec.cluster.databases.push(Database {
-                    name: PgIdent::from_str("neondb").unwrap(),
-                    owner: PgIdent::from_str("test").unwrap(),
-                    options: None,
-                    restrict_conn: false,
-                    invalid: false,
-                });
-            }
-            spec.cluster.postgresql_conf = Some(postgresql_conf);
-        }
-
        let spec_path = self.endpoint_path().join("spec.json");
        std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;

@@ -715,14 +673,18 @@ impl Endpoint {
            println!("Also at '{}'", conn_str);
        }
        let mut cmd = Command::new(self.env.neon_distrib_dir.join("compute_ctl"));
+        //cmd.args([
+        //    "--external-http-port",
+        //    &self.external_http_address.port().to_string(),
+        //])
+        //.args([
+        //    "--internal-http-port",
+        //    &self.internal_http_address.port().to_string(),
+        //])
        cmd.args([
-            "--external-http-port",
+            "--http-port",
            &self.external_http_address.port().to_string(),
        ])
-        .args([
-            "--internal-http-port",
-            &self.internal_http_address.port().to_string(),
-        ])
        .args(["--pgdata", self.pgdata().to_str().unwrap()])
        .args(["--connstr", &conn_str])
        .args([
@@ -739,16 +701,20 @@ impl Endpoint {
        ])
        // TODO: It would be nice if we generated compute IDs with the same
        // algorithm as the real control plane.
-        .args([
-            "--compute-id",
-            &format!(
-                "compute-{}",
-                SystemTime::now()
-                    .duration_since(UNIX_EPOCH)
-                    .unwrap()
-                    .as_secs()
-            ),
-        ])
+        //
+        // TODO: Add this back when
+        // https://github.com/neondatabase/neon/pull/10747 is merged.
+        //
+        //.args([
+        //    "--compute-id",
+        //    &format!(
+        //        "compute-{}",
+        //        SystemTime::now()
+        //            .duration_since(UNIX_EPOCH)
+        //            .unwrap()
+        //            .as_secs()
+        //    ),
+        //])
        .stdin(std::process::Stdio::null())
        .stderr(logfile.try_clone()?)
        .stdout(logfile);
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -335,21 +335,13 @@ impl PageServerNode {
                .map(|x| x.parse::<u64>())
                .transpose()
                .context("Failed to parse 'checkpoint_distance' as an integer")?,
-            checkpoint_timeout: settings
-                .remove("checkpoint_timeout")
-                .map(humantime::parse_duration)
-                .transpose()
-                .context("Failed to parse 'checkpoint_timeout' as duration")?,
+            checkpoint_timeout: settings.remove("checkpoint_timeout").map(|x| x.to_string()),
            compaction_target_size: settings
                .remove("compaction_target_size")
                .map(|x| x.parse::<u64>())
                .transpose()
                .context("Failed to parse 'compaction_target_size' as an integer")?,
-            compaction_period: settings
-                .remove("compaction_period")
-                .map(humantime::parse_duration)
-                .transpose()
-                .context("Failed to parse 'compaction_period' as duration")?,
+            compaction_period: settings.remove("compaction_period").map(|x| x.to_string()),
            compaction_threshold: settings
                .remove("compaction_threshold")
                .map(|x| x.parse::<usize>())
@@ -395,10 +387,7 @@ impl PageServerNode {
                .map(|x| x.parse::<u64>())
                .transpose()
                .context("Failed to parse 'gc_horizon' as an integer")?,
-            gc_period: settings.remove("gc_period")
-                .map(humantime::parse_duration)
-                .transpose()
-                .context("Failed to parse 'gc_period' as duration")?,
+            gc_period: settings.remove("gc_period").map(|x| x.to_string()),
            image_creation_threshold: settings
                .remove("image_creation_threshold")
                .map(|x| x.parse::<usize>())
@@ -414,20 +403,13 @@ impl PageServerNode {
                .map(|x| x.parse::<usize>())
                .transpose()
                .context("Failed to parse 'image_creation_preempt_threshold' as integer")?,
-            pitr_interval: settings.remove("pitr_interval")
-                .map(humantime::parse_duration)
-                .transpose()
-                .context("Failed to parse 'pitr_interval' as duration")?,
+            pitr_interval: settings.remove("pitr_interval").map(|x| x.to_string()),
            walreceiver_connect_timeout: settings
                .remove("walreceiver_connect_timeout")
-                .map(humantime::parse_duration)
-                .transpose()
-                .context("Failed to parse 'walreceiver_connect_timeout' as duration")?,
+                .map(|x| x.to_string()),
            lagging_wal_timeout: settings
                .remove("lagging_wal_timeout")
-                .map(humantime::parse_duration)
-                .transpose()
-                .context("Failed to parse 'lagging_wal_timeout' as duration")?,
+                .map(|x| x.to_string()),
            max_lsn_wal_lag: settings
                .remove("max_lsn_wal_lag")
                .map(|x| x.parse::<NonZeroU64>())
@@ -445,14 +427,8 @@ impl PageServerNode {
                .context("Failed to parse 'min_resident_size_override' as integer")?,
            evictions_low_residence_duration_metric_threshold: settings
                .remove("evictions_low_residence_duration_metric_threshold")
-                .map(humantime::parse_duration)
-                .transpose()
-                .context("Failed to parse 'evictions_low_residence_duration_metric_threshold' as duration")?,
-            heatmap_period: settings
-                .remove("heatmap_period")
-                .map(humantime::parse_duration)
-                .transpose()
-                .context("Failed to parse 'heatmap_period' as duration")?,
+                .map(|x| x.to_string()),
+            heatmap_period: settings.remove("heatmap_period").map(|x| x.to_string()),
            lazy_slru_download: settings
                .remove("lazy_slru_download")
                .map(|x| x.parse::<bool>())
@@ -463,15 +439,10 @@ impl PageServerNode {
                .map(serde_json::from_str)
                .transpose()
                .context("parse `timeline_get_throttle` from json")?,
-            lsn_lease_length: settings.remove("lsn_lease_length")
-                .map(humantime::parse_duration)
-                .transpose()
-                .context("Failed to parse 'lsn_lease_length' as duration")?,
+            lsn_lease_length: settings.remove("lsn_lease_length").map(|x| x.to_string()),
            lsn_lease_length_for_ts: settings
                .remove("lsn_lease_length_for_ts")
-                .map(humantime::parse_duration)
-                .transpose()
-                .context("Failed to parse 'lsn_lease_length_for_ts' as duration")?,
+                .map(|x| x.to_string()),
            timeline_offloading: settings
                .remove("timeline_offloading")
                .map(|x| x.parse::<bool>())
--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -47,9 +47,6 @@ enum Command {
        listen_http_addr: String,
        #[arg(long)]
        listen_http_port: u16,
-        #[arg(long)]
-        listen_https_port: Option<u16>,
-
        #[arg(long)]
        availability_zone_id: String,
    },
@@ -397,7 +394,6 @@ async fn main() -> anyhow::Result<()> {
            listen_pg_port,
            listen_http_addr,
            listen_http_port,
-            listen_https_port,
            availability_zone_id,
        } => {
            storcon_client
@@ -410,7 +406,6 @@ async fn main() -> anyhow::Result<()> {
                        listen_pg_port,
                        listen_http_addr,
                        listen_http_port,
-                        listen_https_port,
                        availability_zone_id: AvailabilityZone(availability_zone_id),
                    }),
                )
@@ -959,7 +954,7 @@ async fn main() -> anyhow::Result<()> {
                                threshold: threshold.into(),
                            },
                        )),
-                        heatmap_period: Some(Duration::from_secs(300)),
+                        heatmap_period: Some("300s".to_string()),
                        ..Default::default()
                    },
                })
--- a/docker-compose/compute_wrapper/shell/compute.sh
+++ b/docker-compose/compute_wrapper/shell/compute.sh
@@ -77,5 +77,4 @@ echo "Start compute node"
 /usr/local/bin/compute_ctl --pgdata /var/db/postgres/compute \
     -C "postgresql://cloud_admin@localhost:55433/postgres"  \
     -b /usr/local/bin/postgres                              \
-     --compute-id "compute-$RANDOM"                          \
     -S ${SPEC_FILE}
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -252,7 +252,7 @@ pub enum ComputeMode {
    Replica,
 }

-#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq, Eq)]
+#[derive(Clone, Debug, Default, Deserialize, Serialize)]
 pub struct Cluster {
    pub cluster_id: Option<String>,
    pub name: Option<String>,
@@ -283,7 +283,7 @@ pub struct DeltaOp {

 /// Rust representation of Postgres role info with only those fields
 /// that matter for us.
-#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
+#[derive(Clone, Debug, Deserialize, Serialize)]
 pub struct Role {
    pub name: PgIdent,
    pub encrypted_password: Option<String>,
@@ -292,7 +292,7 @@ pub struct Role {

 /// Rust representation of Postgres database info with only those fields
 /// that matter for us.
-#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
+#[derive(Clone, Debug, Deserialize, Serialize)]
 pub struct Database {
    pub name: PgIdent,
    pub owner: PgIdent,
@@ -308,7 +308,7 @@ pub struct Database {
 /// Common type representing both SQL statement params with or without value,
 /// like `LOGIN` or `OWNER username` in the `CREATE/ALTER ROLE`, and config
 /// options like `wal_level = logical`.
-#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
+#[derive(Clone, Debug, Deserialize, Serialize)]
 pub struct GenericOption {
    pub name: String,
    pub value: Option<String>,
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -122,8 +122,6 @@ pub struct ConfigToml {
    pub page_service_pipelining: PageServicePipeliningConfig,
    pub get_vectored_concurrent_io: GetVectoredConcurrentIo,
    pub enable_read_path_debugging: Option<bool>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub validate_wal_contiguity: Option<bool>,
 }

 #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
@@ -523,7 +521,6 @@ impl Default for ConfigToml {
            } else {
                None
            },
-            validate_wal_contiguity: None,
        }
    }
 }
--- a/libs/pageserver_api/src/controller_api.rs
+++ b/libs/pageserver_api/src/controller_api.rs
@@ -57,7 +57,6 @@ pub struct NodeRegisterRequest {

    pub listen_http_addr: String,
    pub listen_http_port: u16,
-    pub listen_https_port: Option<u16>,

    pub availability_zone_id: AvailabilityZone,
 }
@@ -106,7 +105,6 @@ pub struct TenantLocateResponseShard {

    pub listen_http_addr: String,
    pub listen_http_port: u16,
-    pub listen_https_port: Option<u16>,
 }

 #[derive(Serialize, Deserialize)]
@@ -150,7 +148,6 @@ pub struct NodeDescribeResponse {

    pub listen_http_addr: String,
    pub listen_http_port: u16,
-    pub listen_https_port: Option<u16>,

    pub listen_pg_addr: String,
    pub listen_pg_port: u16,
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -526,13 +526,9 @@ pub struct TenantConfigPatch {
 #[derive(Serialize, Deserialize, Debug, Default, Clone, Eq, PartialEq)]
 pub struct TenantConfig {
    pub checkpoint_distance: Option<u64>,
-    #[serde(default)]
-    #[serde(with = "humantime_serde")]
-    pub checkpoint_timeout: Option<Duration>,
+    pub checkpoint_timeout: Option<String>,
    pub compaction_target_size: Option<u64>,
-    #[serde(default)]
-    #[serde(with = "humantime_serde")]
-    pub compaction_period: Option<Duration>,
+    pub compaction_period: Option<String>,
    pub compaction_threshold: Option<usize>,
    pub compaction_upper_limit: Option<usize>,
    // defer parsing compaction_algorithm, like eviction_policy
@@ -543,38 +539,22 @@ pub struct TenantConfig {
    pub l0_flush_stall_threshold: Option<usize>,
    pub l0_flush_wait_upload: Option<bool>,
    pub gc_horizon: Option<u64>,
-    #[serde(default)]
-    #[serde(with = "humantime_serde")]
-    pub gc_period: Option<Duration>,
+    pub gc_period: Option<String>,
    pub image_creation_threshold: Option<usize>,
-    #[serde(default)]
-    #[serde(with = "humantime_serde")]
-    pub pitr_interval: Option<Duration>,
-    #[serde(default)]
-    #[serde(with = "humantime_serde")]
-    pub walreceiver_connect_timeout: Option<Duration>,
-    #[serde(default)]
-    #[serde(with = "humantime_serde")]
-    pub lagging_wal_timeout: Option<Duration>,
+    pub pitr_interval: Option<String>,
+    pub walreceiver_connect_timeout: Option<String>,
+    pub lagging_wal_timeout: Option<String>,
    pub max_lsn_wal_lag: Option<NonZeroU64>,
    pub eviction_policy: Option<EvictionPolicy>,
    pub min_resident_size_override: Option<u64>,
-    #[serde(default)]
-    #[serde(with = "humantime_serde")]
-    pub evictions_low_residence_duration_metric_threshold: Option<Duration>,
-    #[serde(default)]
-    #[serde(with = "humantime_serde")]
-    pub heatmap_period: Option<Duration>,
+    pub evictions_low_residence_duration_metric_threshold: Option<String>,
+    pub heatmap_period: Option<String>,
    pub lazy_slru_download: Option<bool>,
    pub timeline_get_throttle: Option<ThrottleConfig>,
    pub image_layer_creation_check_threshold: Option<u8>,
    pub image_creation_preempt_threshold: Option<usize>,
-    #[serde(default)]
-    #[serde(with = "humantime_serde")]
-    pub lsn_lease_length: Option<Duration>,
-    #[serde(default)]
-    #[serde(with = "humantime_serde")]
-    pub lsn_lease_length_for_ts: Option<Duration>,
+    pub lsn_lease_length: Option<String>,
+    pub lsn_lease_length_for_ts: Option<String>,
    pub timeline_offloading: Option<bool>,
    pub wal_receiver_protocol_override: Option<PostgresClientProtocol>,
    pub rel_size_v2_enabled: Option<bool>,
@@ -584,10 +564,7 @@ pub struct TenantConfig {
 }

 impl TenantConfig {
-    pub fn apply_patch(
-        self,
-        patch: TenantConfigPatch,
-    ) -> Result<TenantConfig, humantime::DurationError> {
+    pub fn apply_patch(self, patch: TenantConfigPatch) -> TenantConfig {
        let Self {
            mut checkpoint_distance,
            mut checkpoint_timeout,
@@ -627,17 +604,11 @@ impl TenantConfig {
        } = self;

        patch.checkpoint_distance.apply(&mut checkpoint_distance);
-        patch
-            .checkpoint_timeout
-            .map(|v| humantime::parse_duration(&v))?
-            .apply(&mut checkpoint_timeout);
+        patch.checkpoint_timeout.apply(&mut checkpoint_timeout);
        patch
            .compaction_target_size
            .apply(&mut compaction_target_size);
-        patch
-            .compaction_period
-            .map(|v| humantime::parse_duration(&v))?
-            .apply(&mut compaction_period);
+        patch.compaction_period.apply(&mut compaction_period);
        patch.compaction_threshold.apply(&mut compaction_threshold);
        patch
            .compaction_upper_limit
@@ -655,25 +626,15 @@ impl TenantConfig {
            .apply(&mut l0_flush_stall_threshold);
        patch.l0_flush_wait_upload.apply(&mut l0_flush_wait_upload);
        patch.gc_horizon.apply(&mut gc_horizon);
-        patch
-            .gc_period
-            .map(|v| humantime::parse_duration(&v))?
-            .apply(&mut gc_period);
+        patch.gc_period.apply(&mut gc_period);
        patch
            .image_creation_threshold
            .apply(&mut image_creation_threshold);
-        patch
-            .pitr_interval
-            .map(|v| humantime::parse_duration(&v))?
-            .apply(&mut pitr_interval);
+        patch.pitr_interval.apply(&mut pitr_interval);
        patch
            .walreceiver_connect_timeout
-            .map(|v| humantime::parse_duration(&v))?
            .apply(&mut walreceiver_connect_timeout);
-        patch
-            .lagging_wal_timeout
-            .map(|v| humantime::parse_duration(&v))?
-            .apply(&mut lagging_wal_timeout);
+        patch.lagging_wal_timeout.apply(&mut lagging_wal_timeout);
        patch.max_lsn_wal_lag.apply(&mut max_lsn_wal_lag);
        patch.eviction_policy.apply(&mut eviction_policy);
        patch
@@ -681,12 +642,8 @@ impl TenantConfig {
            .apply(&mut min_resident_size_override);
        patch
            .evictions_low_residence_duration_metric_threshold
-            .map(|v| humantime::parse_duration(&v))?
            .apply(&mut evictions_low_residence_duration_metric_threshold);
-        patch
-            .heatmap_period
-            .map(|v| humantime::parse_duration(&v))?
-            .apply(&mut heatmap_period);
+        patch.heatmap_period.apply(&mut heatmap_period);
        patch.lazy_slru_download.apply(&mut lazy_slru_download);
        patch
            .timeline_get_throttle
@@ -697,13 +654,9 @@ impl TenantConfig {
        patch
            .image_creation_preempt_threshold
            .apply(&mut image_creation_preempt_threshold);
-        patch
-            .lsn_lease_length
-            .map(|v| humantime::parse_duration(&v))?
-            .apply(&mut lsn_lease_length);
+        patch.lsn_lease_length.apply(&mut lsn_lease_length);
        patch
            .lsn_lease_length_for_ts
-            .map(|v| humantime::parse_duration(&v))?
            .apply(&mut lsn_lease_length_for_ts);
        patch.timeline_offloading.apply(&mut timeline_offloading);
        patch
@@ -720,7 +673,7 @@ impl TenantConfig {
            .gc_compaction_ratio_percent
            .apply(&mut gc_compaction_ratio_percent);

-        Ok(Self {
+        Self {
            checkpoint_distance,
            checkpoint_timeout,
            compaction_target_size,
@@ -756,7 +709,7 @@ impl TenantConfig {
            gc_compaction_enabled,
            gc_compaction_initial_threshold_kb,
            gc_compaction_ratio_percent,
-        })
+        }
    }
 }

@@ -2550,7 +2503,7 @@ mod tests {
            ..base.clone()
        };

-        let patched = base.apply_patch(decoded.config).unwrap();
+        let patched = base.apply_patch(decoded.config);

        assert_eq!(patched, expected);
    }
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -27,7 +27,7 @@ humantime.workspace = true
 fail.workspace = true
 futures = { workspace = true }
 jsonwebtoken.workspace = true
-nix = { workspace = true, features = ["ioctl"] }
+nix = {workspace = true, features = [ "ioctl" ] }
 once_cell.workspace = true
 pin-project-lite.workspace = true
 regex.workspace = true
@@ -61,7 +61,6 @@ bytes.workspace = true
 criterion.workspace = true
 hex-literal.workspace = true
 camino-tempfile.workspace = true
-pprof.workspace = true
 serde_assert.workspace = true
 tokio = { workspace = true, features = ["test-util"] }

--- a/libs/utils/benches/README.md
+++ b/libs/utils/benches/README.md
@@ -1,26 +0,0 @@
-## Utils Benchmarks
-
-To run benchmarks:
-
-```sh
-# All benchmarks.
-cargo bench --package utils
-
-# Specific file.
-cargo bench --package utils --bench benchmarks
-
-# Specific benchmark.
-cargo bench --package utils --bench benchmarks warn_slow/enabled=true
-
-# List available benchmarks.
-cargo bench --package utils --benches -- --list
-
-# Generate flamegraph profiles using pprof-rs, profiling for 10 seconds.
-# Output in target/criterion/*/profile/flamegraph.svg.
-cargo bench --package utils --bench benchmarks warn_slow/enabled=true --profile-time 10
-```
-
-Additional charts and statistics are available in `target/criterion/report/index.html`.
-
-Benchmarks are automatically compared against the previous run. To compare against other runs, see
-`--baseline` and `--save-baseline`.
--- a/libs/utils/benches/benchmarks.rs
+++ b/libs/utils/benches/benchmarks.rs
@@ -1,18 +1,5 @@
-use std::time::Duration;
-
-use criterion::{criterion_group, criterion_main, Bencher, Criterion};
-use pprof::criterion::{Output, PProfProfiler};
+use criterion::{criterion_group, criterion_main, Criterion};
 use utils::id;
-use utils::logging::warn_slow;
-
-// Register benchmarks with Criterion.
-criterion_group!(
-    name = benches;
-    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
-    targets = bench_id_stringify,
-    bench_warn_slow,
-);
-criterion_main!(benches);

 pub fn bench_id_stringify(c: &mut Criterion) {
    // Can only use public methods.
@@ -29,31 +16,5 @@ pub fn bench_id_stringify(c: &mut Criterion) {
    });
 }

-pub fn bench_warn_slow(c: &mut Criterion) {
-    for enabled in [false, true] {
-        c.bench_function(&format!("warn_slow/enabled={enabled}"), |b| {
-            run_bench(b, enabled).unwrap()
-        });
-    }
-
-    // The actual benchmark.
-    fn run_bench(b: &mut Bencher, enabled: bool) -> anyhow::Result<()> {
-        const THRESHOLD: Duration = Duration::from_secs(1);
-
-        // Use a multi-threaded runtime to avoid thread parking overhead when yielding.
-        let runtime = tokio::runtime::Builder::new_multi_thread()
-            .enable_all()
-            .build()?;
-
-        // Test both with and without warn_slow, since we're essentially measuring Tokio scheduling
-        // performance too. Use a simple noop future that yields once, to avoid any scheduler fast
-        // paths for a ready future.
-        if enabled {
-            b.iter(|| runtime.block_on(warn_slow("ready", THRESHOLD, tokio::task::yield_now())));
-        } else {
-            b.iter(|| runtime.block_on(tokio::task::yield_now()));
-        }
-
-        Ok(())
-    }
-}
+criterion_group!(benches, bench_id_stringify);
+criterion_main!(benches);
--- a/libs/utils/src/logging.rs
+++ b/libs/utils/src/logging.rs
@@ -1,13 +1,9 @@
-use std::future::Future;
 use std::str::FromStr;
-use std::time::Duration;

 use anyhow::Context;
 use metrics::{IntCounter, IntCounterVec};
 use once_cell::sync::Lazy;
 use strum_macros::{EnumString, VariantNames};
-use tokio::time::Instant;
-use tracing::warn;

 /// Logs a critical error, similarly to `tracing::error!`. This will:
 ///
@@ -322,41 +318,6 @@ impl std::fmt::Debug for SecretString {
    }
 }

-/// Logs a periodic warning if a future is slow to complete.
-///
-/// This is performance-sensitive as it's used on the GetPage read path.
-#[inline]
-pub async fn warn_slow<O>(name: &str, threshold: Duration, f: impl Future<Output = O>) -> O {
-    // TODO: we unfortunately have to pin the future on the heap, since GetPage futures are huge and
-    // won't fit on the stack.
-    let mut f = Box::pin(f);
-
-    let started = Instant::now();
-    let mut attempt = 1;
-
-    loop {
-        // NB: use timeout_at() instead of timeout() to avoid an extra clock reading in the common
-        // case where the timeout doesn't fire.
-        let deadline = started + attempt * threshold;
-        if let Ok(output) = tokio::time::timeout_at(deadline, &mut f).await {
-            // NB: we check if we exceeded the threshold even if the timeout never fired, because
-            // scheduling or execution delays may cause the future to succeed even if it exceeds the
-            // timeout. This costs an extra unconditional clock reading, but seems worth it to avoid
-            // false negatives.
-            let elapsed = started.elapsed();
-            if elapsed >= threshold {
-                warn!("slow {name} completed after {:.3}s", elapsed.as_secs_f64());
-            }
-            return output;
-        }
-
-        let elapsed = started.elapsed().as_secs_f64();
-        warn!("slow {name} still running after {elapsed:.3}s",);
-
-        attempt += 1;
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use metrics::{core::Opts, IntCounterVec};
--- a/libs/wal_decoder/proto/interpreted_wal.proto
+++ b/libs/wal_decoder/proto/interpreted_wal.proto
@@ -5,7 +5,6 @@ package interpreted_wal;
 message InterpretedWalRecords {
  repeated InterpretedWalRecord records = 1;
  optional uint64 next_record_lsn = 2;
-  optional uint64 raw_wal_start_lsn = 3;
 }

 message InterpretedWalRecord {
--- a/libs/wal_decoder/src/models.rs
+++ b/libs/wal_decoder/src/models.rs
@@ -60,11 +60,7 @@ pub struct InterpretedWalRecords {
    pub records: Vec<InterpretedWalRecord>,
    // Start LSN of the next record after the batch.
    // Note that said record may not belong to the current shard.
-    pub next_record_lsn: Lsn,
-    // Inclusive start LSN of the PG WAL from which the interpreted
-    // WAL records were extracted. Note that this is not necessarily the
-    // start LSN of the first interpreted record in the batch.
-    pub raw_wal_start_lsn: Option<Lsn>,
+    pub next_record_lsn: Option<Lsn>,
 }

 /// An interpreted Postgres WAL record, ready to be handled by the pageserver
--- a/libs/wal_decoder/src/wire_format.rs
+++ b/libs/wal_decoder/src/wire_format.rs
@@ -167,8 +167,7 @@ impl TryFrom<InterpretedWalRecords> for proto::InterpretedWalRecords {
            .collect::<Result<Vec<_>, _>>()?;
        Ok(proto::InterpretedWalRecords {
            records,
-            next_record_lsn: Some(value.next_record_lsn.0),
-            raw_wal_start_lsn: value.raw_wal_start_lsn.map(|l| l.0),
+            next_record_lsn: value.next_record_lsn.map(|l| l.0),
        })
    }
 }
@@ -255,11 +254,7 @@ impl TryFrom<proto::InterpretedWalRecords> for InterpretedWalRecords {

        Ok(InterpretedWalRecords {
            records,
-            next_record_lsn: value
-                .next_record_lsn
-                .map(Lsn::from)
-                .expect("Always provided"),
-            raw_wal_start_lsn: value.raw_wal_start_lsn.map(Lsn::from),
+            next_record_lsn: value.next_record_lsn.map(Lsn::from),
        })
    }
 }
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -134,7 +134,6 @@ fn main() -> anyhow::Result<()> {
    info!(?conf.virtual_file_io_engine, "starting with virtual_file IO engine");
    info!(?conf.virtual_file_io_mode, "starting with virtual_file IO mode");
    info!(?conf.wal_receiver_protocol, "starting with WAL receiver protocol");
-    info!(?conf.validate_wal_contiguity, "starting with WAL contiguity validation");
    info!(?conf.page_service_pipelining, "starting with page service pipelining config");
    info!(?conf.get_vectored_concurrent_io, "starting with get_vectored IO concurrency config");

--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -197,10 +197,6 @@ pub struct PageServerConf {
    /// Enable read path debugging. If enabled, read key errors will print a backtrace of the layer
    /// files read.
    pub enable_read_path_debugging: bool,
-
-    /// Interpreted protocol feature: if enabled, validate that the logical WAL received from
-    /// safekeepers does not have gaps.
-    pub validate_wal_contiguity: bool,
 }

 /// Token for authentication to safekeepers
@@ -364,7 +360,6 @@ impl PageServerConf {
            page_service_pipelining,
            get_vectored_concurrent_io,
            enable_read_path_debugging,
-            validate_wal_contiguity,
        } = config_toml;

        let mut conf = PageServerConf {
@@ -451,7 +446,6 @@ impl PageServerConf {
            virtual_file_io_mode: virtual_file_io_mode.unwrap_or(virtual_file::IoMode::preferred()),
            no_sync: no_sync.unwrap_or(false),
            enable_read_path_debugging: enable_read_path_debugging.unwrap_or(false),
-            validate_wal_contiguity: validate_wal_contiguity.unwrap_or(false),
        };

        // ------------------------------------------------------------
--- a/pageserver/src/controller_upcall_client.rs
+++ b/pageserver/src/controller_upcall_client.rs
@@ -173,7 +173,6 @@ impl ControlPlaneGenerationsApi for ControllerUpcallClient {
                        listen_pg_port: m.postgres_port,
                        listen_http_addr: m.http_host,
                        listen_http_port: m.http_port,
-                        listen_https_port: None, // TODO: Support https.
                        availability_zone_id: az_id.expect("Checked above"),
                    })
                }
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -34,13 +34,11 @@ use std::str::FromStr;
 use std::sync::Arc;
 use std::time::SystemTime;
 use std::time::{Duration, Instant};
-use strum_macros::IntoStaticStr;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tokio::io::{AsyncWriteExt, BufWriter};
 use tokio::task::JoinHandle;
 use tokio_util::sync::CancellationToken;
 use tracing::*;
-use utils::logging::warn_slow;
 use utils::sync::gate::{Gate, GateGuard};
 use utils::sync::spsc_fold;
 use utils::{
@@ -83,9 +81,6 @@ use std::os::fd::AsRawFd;
 /// NB: this is a different value than [`crate::http::routes::ACTIVE_TENANT_TIMEOUT`].
 const ACTIVE_TENANT_TIMEOUT: Duration = Duration::from_millis(30000);

-/// Threshold at which to log a warning about slow GetPage requests.
-const WARN_SLOW_GETPAGE_THRESHOLD: Duration = Duration::from_secs(30);
-
 ///////////////////////////////////////////////////////////////////////////////

 pub struct Listener {
@@ -599,7 +594,6 @@ struct BatchedTestRequest {
 /// NB: we only hold [`timeline::handle::WeakHandle`] inside this enum,
 /// so that we don't keep the [`Timeline::gate`] open while the batch
 /// is being built up inside the [`spsc_fold`] (pagestream pipelining).
-#[derive(IntoStaticStr)]
 enum BatchedFeMessage {
    Exists {
        span: Span,
@@ -644,10 +638,6 @@ enum BatchedFeMessage {
 }

 impl BatchedFeMessage {
-    fn as_static_str(&self) -> &'static str {
-        self.into()
-    }
-
    fn observe_execution_start(&mut self, at: Instant) {
        match self {
            BatchedFeMessage::Exists { timer, .. }
@@ -1473,20 +1463,17 @@ impl PageServerHandler {
                }
            };

-            let result = warn_slow(
-                msg.as_static_str(),
-                WARN_SLOW_GETPAGE_THRESHOLD,
-                self.pagesteam_handle_batched_message(
+            let err = self
+                .pagesteam_handle_batched_message(
                    pgb_writer,
                    msg,
                    io_concurrency.clone(),
                    &cancel,
                    protocol_version,
                    ctx,
-                ),
-            )
-            .await;
-            match result {
+                )
+                .await;
+            match err {
                Ok(()) => {}
                Err(e) => break e,
            }
@@ -1649,17 +1636,13 @@ impl PageServerHandler {
                            return Err(e);
                        }
                    };
-                    warn_slow(
-                        batch.as_static_str(),
-                        WARN_SLOW_GETPAGE_THRESHOLD,
-                        self.pagesteam_handle_batched_message(
-                            pgb_writer,
-                            batch,
-                            io_concurrency.clone(),
-                            &cancel,
-                            protocol_version,
-                            &ctx,
-                        ),
+                    self.pagesteam_handle_batched_message(
+                        pgb_writer,
+                        batch,
+                        io_concurrency.clone(),
+                        &cancel,
+                        protocol_version,
+                        &ctx,
                    )
                    .await?;
                }
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -693,15 +693,16 @@ impl TryFrom<&'_ models::TenantConfig> for TenantConfOpt {
 /// This is a conversion from our internal tenant config object to the one used
 /// in external APIs.
 impl From<TenantConfOpt> for models::TenantConfig {
-    // TODO(vlad): These are now the same, but they have different serialization logic.
-    // Can we merge them?
    fn from(value: TenantConfOpt) -> Self {
+        fn humantime(d: Duration) -> String {
+            format!("{}s", d.as_secs())
+        }
        Self {
            checkpoint_distance: value.checkpoint_distance,
-            checkpoint_timeout: value.checkpoint_timeout,
+            checkpoint_timeout: value.checkpoint_timeout.map(humantime),
            compaction_algorithm: value.compaction_algorithm,
            compaction_target_size: value.compaction_target_size,
-            compaction_period: value.compaction_period,
+            compaction_period: value.compaction_period.map(humantime),
            compaction_threshold: value.compaction_threshold,
            compaction_upper_limit: value.compaction_upper_limit,
            compaction_l0_first: value.compaction_l0_first,
@@ -710,23 +711,24 @@ impl From<TenantConfOpt> for models::TenantConfig {
            l0_flush_stall_threshold: value.l0_flush_stall_threshold,
            l0_flush_wait_upload: value.l0_flush_wait_upload,
            gc_horizon: value.gc_horizon,
-            gc_period: value.gc_period,
+            gc_period: value.gc_period.map(humantime),
            image_creation_threshold: value.image_creation_threshold,
-            pitr_interval: value.pitr_interval,
-            walreceiver_connect_timeout: value.walreceiver_connect_timeout,
-            lagging_wal_timeout: value.lagging_wal_timeout,
+            pitr_interval: value.pitr_interval.map(humantime),
+            walreceiver_connect_timeout: value.walreceiver_connect_timeout.map(humantime),
+            lagging_wal_timeout: value.lagging_wal_timeout.map(humantime),
            max_lsn_wal_lag: value.max_lsn_wal_lag,
            eviction_policy: value.eviction_policy,
            min_resident_size_override: value.min_resident_size_override,
            evictions_low_residence_duration_metric_threshold: value
-                .evictions_low_residence_duration_metric_threshold,
-            heatmap_period: value.heatmap_period,
+                .evictions_low_residence_duration_metric_threshold
+                .map(humantime),
+            heatmap_period: value.heatmap_period.map(humantime),
            lazy_slru_download: value.lazy_slru_download,
            timeline_get_throttle: value.timeline_get_throttle,
            image_layer_creation_check_threshold: value.image_layer_creation_check_threshold,
            image_creation_preempt_threshold: value.image_creation_preempt_threshold,
-            lsn_lease_length: value.lsn_lease_length,
-            lsn_lease_length_for_ts: value.lsn_lease_length_for_ts,
+            lsn_lease_length: value.lsn_lease_length.map(humantime),
+            lsn_lease_length_for_ts: value.lsn_lease_length_for_ts.map(humantime),
            timeline_offloading: value.timeline_offloading,
            wal_receiver_protocol_override: value.wal_receiver_protocol_override,
            rel_size_v2_enabled: value.rel_size_v2_enabled,
@@ -758,10 +760,29 @@ mod tests {
        assert_eq!(small_conf, serde_json::from_str(&json_form).unwrap());
    }

+    #[test]
+    fn test_try_from_models_tenant_config_err() {
+        let tenant_config = models::TenantConfig {
+            lagging_wal_timeout: Some("5a".to_string()),
+            ..TenantConfig::default()
+        };
+
+        let tenant_conf_opt = TenantConfOpt::try_from(&tenant_config);
+
+        assert!(
+            tenant_conf_opt.is_err(),
+            "Suceeded to convert TenantConfig to TenantConfOpt"
+        );
+
+        let expected_error_str =
+            "lagging_wal_timeout: invalid value: string \"5a\", expected a duration";
+        assert_eq!(tenant_conf_opt.unwrap_err().to_string(), expected_error_str);
+    }
+
    #[test]
    fn test_try_from_models_tenant_config_success() {
        let tenant_config = models::TenantConfig {
-            lagging_wal_timeout: Some(Duration::from_secs(5)),
+            lagging_wal_timeout: Some("5s".to_string()),
            ..TenantConfig::default()
        };

@@ -772,4 +793,12 @@ mod tests {
            Some(Duration::from_secs(5))
        );
    }
+
+    #[test]
+    fn test_vlad() {
+        let tenant_conf_opt: TenantConfOpt =
+            serde_json::from_str("{\"pitr_interval\": \"24h0m0s\"}").unwrap();
+        let tenant_config: models::TenantConfig = tenant_conf_opt.clone().into();
+        assert_eq!(tenant_config.pitr_interval.unwrap(), "24h0m0s");
+    }
 }
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -2874,7 +2874,6 @@ impl Timeline {
                auth_token: crate::config::SAFEKEEPER_AUTH_TOKEN.get().cloned(),
                availability_zone: self.conf.availability_zone.clone(),
                ingest_batch_size: self.conf.ingest_batch_size,
-                validate_wal_contiguity: self.conf.validate_wal_contiguity,
            },
            broker_client,
            ctx,
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -2212,7 +2212,7 @@ impl Timeline {
        let sub_compaction_max_job_size_mb =
            sub_compaction_max_job_size_mb.unwrap_or(GC_COMPACT_MAX_SIZE_MB);

-        let mut compact_jobs = Vec::<GcCompactJob>::new();
+        let mut compact_jobs = Vec::new();
        // For now, we simply use the key partitioning information; we should do a more fine-grained partitioning
        // by estimating the amount of files read for a compaction job. We should also partition on LSN.
        let ((dense_ks, sparse_ks), _) = self.partitioning.read().as_ref().clone();
@@ -2299,25 +2299,16 @@ impl Timeline {
                } else {
                    end
                };
-                if total_size == 0 && !compact_jobs.is_empty() {
-                    info!(
-                        "splitting compaction job: {}..{}, estimated_size={}, extending the previous job",
-                        start, end, total_size
-                    );
-                    compact_jobs.last_mut().unwrap().compact_key_range.end = end;
-                    current_start = Some(end);
-                } else {
-                    info!(
-                        "splitting compaction job: {}..{}, estimated_size={}",
-                        start, end, total_size
-                    );
-                    compact_jobs.push(GcCompactJob {
-                        dry_run: job.dry_run,
-                        compact_key_range: start..end,
-                        compact_lsn_range: job.compact_lsn_range.start..compact_below_lsn,
-                    });
-                    current_start = Some(end);
-                }
+                info!(
+                    "splitting compaction job: {}..{}, estimated_size={}",
+                    start, end, total_size
+                );
+                compact_jobs.push(GcCompactJob {
+                    dry_run: job.dry_run,
+                    compact_key_range: start..end,
+                    compact_lsn_range: job.compact_lsn_range.start..compact_below_lsn,
+                });
+                current_start = Some(end);
            }
        }
        Ok(compact_jobs)
--- a/pageserver/src/tenant/timeline/walreceiver.rs
+++ b/pageserver/src/tenant/timeline/walreceiver.rs
@@ -56,7 +56,6 @@ pub struct WalReceiverConf {
    pub auth_token: Option<Arc<String>>,
    pub availability_zone: Option<String>,
    pub ingest_batch_size: u64,
-    pub validate_wal_contiguity: bool,
 }

 pub struct WalReceiver {
--- a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
@@ -537,7 +537,6 @@ impl ConnectionManagerState {
        let connect_timeout = self.conf.wal_connect_timeout;
        let ingest_batch_size = self.conf.ingest_batch_size;
        let protocol = self.conf.protocol;
-        let validate_wal_contiguity = self.conf.validate_wal_contiguity;
        let timeline = Arc::clone(&self.timeline);
        let ctx = ctx.detached_child(
            TaskKind::WalReceiverConnectionHandler,
@@ -559,7 +558,6 @@ impl ConnectionManagerState {
                    ctx,
                    node_id,
                    ingest_batch_size,
-                    validate_wal_contiguity,
                )
                .await;

@@ -1565,7 +1563,6 @@ mod tests {
                auth_token: None,
                availability_zone: None,
                ingest_batch_size: 1,
-                validate_wal_contiguity: false,
            },
            wal_connection: None,
            wal_stream_candidates: HashMap::new(),
--- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
@@ -120,7 +120,6 @@ pub(super) async fn handle_walreceiver_connection(
    ctx: RequestContext,
    safekeeper_node: NodeId,
    ingest_batch_size: u64,
-    validate_wal_contiguity: bool,
 ) -> Result<(), WalReceiverError> {
    debug_assert_current_span_has_tenant_and_timeline_id();

@@ -275,7 +274,6 @@ pub(super) async fn handle_walreceiver_connection(
        } => Some((format, compression)),
    };

-    let mut expected_wal_start = startpoint;
    while let Some(replication_message) = {
        select! {
            _ = cancellation.cancelled() => {
@@ -342,49 +340,13 @@ pub(super) async fn handle_walreceiver_connection(
                    )
                    })?;

-                // Guard against WAL gaps. If the start LSN of the PG WAL section
-                // from which the interpreted records were extracted, doesn't match
-                // the end of the previous batch (or the starting point for the first batch),
-                // then kill this WAL receiver connection and start a new one.
-                if validate_wal_contiguity {
-                    if let Some(raw_wal_start_lsn) = batch.raw_wal_start_lsn {
-                        match raw_wal_start_lsn.cmp(&expected_wal_start) {
-                            std::cmp::Ordering::Greater => {
-                                let msg = format!(
-                                    "Gap in streamed WAL: [{}, {})",
-                                    expected_wal_start, raw_wal_start_lsn
-                                );
-                                critical!("{msg}");
-                                return Err(WalReceiverError::Other(anyhow!(msg)));
-                            }
-                            std::cmp::Ordering::Less => {
-                                // Other shards are reading WAL behind us.
-                                // This is valid, but check that we received records
-                                // that we haven't seen before.
-                                if let Some(first_rec) = batch.records.first() {
-                                    if first_rec.next_record_lsn < last_rec_lsn {
-                                        let msg = format!(
-                                            "Received record with next_record_lsn multiple times ({} < {})",
-                                            first_rec.next_record_lsn, expected_wal_start
-                                        );
-                                        critical!("{msg}");
-                                        return Err(WalReceiverError::Other(anyhow!(msg)));
-                                    }
-                                }
-                            }
-                            std::cmp::Ordering::Equal => {}
-                        }
-                    }
-                }
-
                let InterpretedWalRecords {
                    records,
                    next_record_lsn,
-                    raw_wal_start_lsn: _,
                } = batch;

                tracing::debug!(
-                    "Received WAL up to {} with next_record_lsn={}",
+                    "Received WAL up to {} with next_record_lsn={:?}",
                    streaming_lsn,
                    next_record_lsn
                );
@@ -461,11 +423,12 @@ pub(super) async fn handle_walreceiver_connection(
                // need to advance last record LSN on all shards. If we've not ingested the latest
                // record, then set the LSN of the modification past it. This way all shards
                // advance their last record LSN at the same time.
-                let needs_last_record_lsn_advance = if next_record_lsn > modification.get_lsn() {
-                    modification.set_lsn(next_record_lsn).unwrap();
-                    true
-                } else {
-                    false
+                let needs_last_record_lsn_advance = match next_record_lsn {
+                    Some(lsn) if lsn > modification.get_lsn() => {
+                        modification.set_lsn(lsn).unwrap();
+                        true
+                    }
+                    _ => false,
                };

                if uncommitted_records > 0 || needs_last_record_lsn_advance {
@@ -483,8 +446,9 @@ pub(super) async fn handle_walreceiver_connection(
                    timeline.get_last_record_lsn()
                );

-                last_rec_lsn = next_record_lsn;
-                expected_wal_start = streaming_lsn;
+                if let Some(lsn) = next_record_lsn {
+                    last_rec_lsn = lsn;
+                }

                Some(streaming_lsn)
            }
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -474,7 +474,8 @@ readahead_buffer_resize(int newsize, void *extra)
 	 */
 	if (MyPState->n_requests_inflight > newsize)
 	{
-		prefetch_wait_for(MyPState->ring_unused - newsize - 1);
+		Assert(MyPState->ring_unused >= MyPState->n_requests_inflight - newsize);
+		prefetch_wait_for(MyPState->ring_unused - (MyPState->n_requests_inflight - newsize));
 		Assert(MyPState->n_requests_inflight <= newsize);
 	}

--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,5 +1,5 @@
 [toolchain]
-channel = "1.85.0"
+channel = "1.84.1"
 profile = "default"
 # The default profile includes rustc, rust-std, cargo, rust-docs, rustfmt and clippy.
 # https://rust-lang.github.io/rustup/concepts/profiles.html
--- a/safekeeper/src/send_interpreted_wal.rs
+++ b/safekeeper/src/send_interpreted_wal.rs
@@ -295,10 +295,6 @@ impl InterpretedWalReader {

        let mut wal_decoder = WalStreamDecoder::new(start_pos, self.pg_version);

-        // Tracks the start of the PG WAL LSN from which the current batch of
-        // interpreted records originated.
-        let mut current_batch_wal_start_lsn: Option<Lsn> = None;
-
        loop {
            tokio::select! {
                // Main branch for reading WAL and forwarding it
@@ -306,7 +302,7 @@ impl InterpretedWalReader {
                    let wal = wal_or_reset.map(|wor| wor.get_wal().expect("reset handled in select branch below"));
                    let WalBytes {
                        wal,
-                        wal_start_lsn,
+                        wal_start_lsn: _,
                        wal_end_lsn,
                        available_wal_end_lsn,
                    } = match wal {
@@ -319,12 +315,6 @@ impl InterpretedWalReader {
                        }
                    };

-                    // We will already have a value if the previous chunks of WAL
-                    // did not decode into anything useful.
-                    if current_batch_wal_start_lsn.is_none() {
-                        current_batch_wal_start_lsn = Some(wal_start_lsn);
-                    }
-
                    wal_decoder.feed_bytes(&wal);

                    // Deserialize and interpret WAL records from this batch of WAL.
@@ -373,9 +363,7 @@ impl InterpretedWalReader {

                    let max_next_record_lsn = match max_next_record_lsn {
                        Some(lsn) => lsn,
-                        None => {
-                            continue;
-                        }
+                        None => { continue; }
                    };

                    // Update the current position such that new receivers can decide
@@ -389,38 +377,21 @@ impl InterpretedWalReader {
                        }
                    }

-                    let batch_wal_start_lsn = current_batch_wal_start_lsn.take().unwrap();
-
                    // Send interpreted records downstream. Anything that has already been seen
                    // by a shard is filtered out.
                    let mut shard_senders_to_remove = Vec::new();
                    for (shard, states) in &mut self.shard_senders {
                        for state in states {
-                            let shard_sender_id = ShardSenderId::new(*shard, state.sender_id);
-
-                            let batch = if max_next_record_lsn > state.next_record_lsn {
-                                // This batch contains at least one record that this shard has not
-                                // seen yet.
-                                let records = records_by_sender.remove(&shard_sender_id).unwrap_or_default();
-
-                                InterpretedWalRecords {
-                                    records,
-                                    next_record_lsn: max_next_record_lsn,
-                                    raw_wal_start_lsn: Some(batch_wal_start_lsn),
-                                }
-                            } else if wal_end_lsn > state.next_record_lsn {
-                                // All the records in this batch were seen by the shard
-                                // However, the batch maps to a chunk of WAL that the
-                                // shard has not yet seen. Notify it of the start LSN
-                                // of the PG WAL chunk such that it doesn't look like a gap.
-                                InterpretedWalRecords {
-                                    records: Vec::default(),
-                                    next_record_lsn: state.next_record_lsn,
-                                    raw_wal_start_lsn: Some(batch_wal_start_lsn),
-                                }
-                            } else {
-                                // The shard has seen this chunk of WAL before. Skip it.
+                            if max_next_record_lsn <= state.next_record_lsn {
                                continue;
+                            }
+
+                            let shard_sender_id = ShardSenderId::new(*shard, state.sender_id);
+                            let records = records_by_sender.remove(&shard_sender_id).unwrap_or_default();
+
+                            let batch = InterpretedWalRecords {
+                                records,
+                                next_record_lsn: Some(max_next_record_lsn),
                            };

                            let res = state.tx.send(Batch {
@@ -432,7 +403,7 @@ impl InterpretedWalReader {
                            if res.is_err() {
                                shard_senders_to_remove.push(shard_sender_id);
                            } else {
-                                state.next_record_lsn = std::cmp::max(state.next_record_lsn, max_next_record_lsn);
+                                state.next_record_lsn = max_next_record_lsn;
                            }
                        }
                    }
--- a/storage_controller/Cargo.toml
+++ b/storage_controller/Cargo.toml
@@ -24,7 +24,6 @@ hex.workspace = true
 hyper0.workspace = true
 humantime.workspace = true
 itertools.workspace = true
-json-structural-diff.workspace = true
 lasso.workspace = true
 once_cell.workspace = true
 pageserver_api.workspace = true
--- a/storage_controller/migrations/2025-02-11-144848_pageserver_use_https/down.sql
+++ b/storage_controller/migrations/2025-02-11-144848_pageserver_use_https/down.sql
@@ -1 +0,0 @@
-ALTER TABLE nodes DROP listen_https_port;
--- a/storage_controller/migrations/2025-02-11-144848_pageserver_use_https/up.sql
+++ b/storage_controller/migrations/2025-02-11-144848_pageserver_use_https/up.sql
@@ -1 +0,0 @@
-ALTER TABLE nodes ADD listen_https_port INTEGER;
--- a/storage_controller/src/main.rs
+++ b/storage_controller/src/main.rs
@@ -126,10 +126,6 @@ struct Cli {

    #[arg(long)]
    long_reconcile_threshold: Option<humantime::Duration>,
-
-    // Flag to use https for requests to pageserver API.
-    #[arg(long, default_value = "false")]
-    use_https_pageserver_api: bool,
 }

 enum StrictMode {
@@ -325,7 +321,6 @@ async fn async_main() -> anyhow::Result<()> {
        address_for_peers: args.address_for_peers,
        start_as_candidate: args.start_as_candidate,
        http_service_port: args.listen.port() as i32,
-        use_https_pageserver_api: args.use_https_pageserver_api,
    };

    // Validate that we can connect to the database
--- a/storage_controller/src/node.rs
+++ b/storage_controller/src/node.rs
@@ -1,6 +1,5 @@
 use std::{str::FromStr, time::Duration};

-use anyhow::anyhow;
 use pageserver_api::{
    controller_api::{
        AvailabilityZone, NodeAvailability, NodeDescribeResponse, NodeRegisterRequest,
@@ -33,16 +32,12 @@ pub(crate) struct Node {

    listen_http_addr: String,
    listen_http_port: u16,
-    listen_https_port: Option<u16>,

    listen_pg_addr: String,
    listen_pg_port: u16,

    availability_zone_id: AvailabilityZone,

-    // Flag from storcon's config to use https for pageserver admin API.
-    // Invariant: if |true|, listen_https_port should contain a value.
-    use_https: bool,
    // This cancellation token means "stop any RPCs in flight to this node, and don't start
    // any more". It is not related to process shutdown.
    #[serde(skip)]
@@ -61,16 +56,7 @@ pub(crate) enum AvailabilityTransition {

 impl Node {
    pub(crate) fn base_url(&self) -> String {
-        if self.use_https {
-            format!(
-                "https://{}:{}",
-                self.listen_http_addr,
-                self.listen_https_port
-                    .expect("https port should be specified if use_https is on")
-            )
-        } else {
-            format!("http://{}:{}", self.listen_http_addr, self.listen_http_port)
-        }
+        format!("http://{}:{}", self.listen_http_addr, self.listen_http_port)
    }

    pub(crate) fn get_id(&self) -> NodeId {
@@ -96,20 +82,11 @@ impl Node {
        self.id == register_req.node_id
            && self.listen_http_addr == register_req.listen_http_addr
            && self.listen_http_port == register_req.listen_http_port
-            // Note: listen_https_port may change. See [`Self::need_update`] for mode details.
-            // && self.listen_https_port == register_req.listen_https_port
            && self.listen_pg_addr == register_req.listen_pg_addr
            && self.listen_pg_port == register_req.listen_pg_port
            && self.availability_zone_id == register_req.availability_zone_id
    }

-    // Do we need to update an existing record in DB on this registration request?
-    pub(crate) fn need_update(&self, register_req: &NodeRegisterRequest) -> bool {
-        // listen_https_port is checked here because it may change during migration to https.
-        // After migration, this check may be moved to registration_match.
-        self.listen_https_port != register_req.listen_https_port
-    }
-
    /// For a shard located on this node, populate a response object
    /// with this node's address information.
    pub(crate) fn shard_location(&self, shard_id: TenantShardId) -> TenantLocateResponseShard {
@@ -118,7 +95,6 @@ impl Node {
            node_id: self.id,
            listen_http_addr: self.listen_http_addr.clone(),
            listen_http_port: self.listen_http_port,
-            listen_https_port: self.listen_https_port,
            listen_pg_addr: self.listen_pg_addr.clone(),
            listen_pg_port: self.listen_pg_port,
        }
@@ -199,34 +175,25 @@ impl Node {
        }
    }

-    #[allow(clippy::too_many_arguments)]
    pub(crate) fn new(
        id: NodeId,
        listen_http_addr: String,
        listen_http_port: u16,
-        listen_https_port: Option<u16>,
        listen_pg_addr: String,
        listen_pg_port: u16,
        availability_zone_id: AvailabilityZone,
-        use_https: bool,
-    ) -> anyhow::Result<Self> {
-        if use_https && listen_https_port.is_none() {
-            return Err(anyhow!("https is enabled, but node has no https port"));
-        }
-
-        Ok(Self {
+    ) -> Self {
+        Self {
            id,
            listen_http_addr,
            listen_http_port,
-            listen_https_port,
            listen_pg_addr,
            listen_pg_port,
            scheduling: NodeSchedulingPolicy::Active,
            availability: NodeAvailability::Offline,
            availability_zone_id,
-            use_https,
            cancel: CancellationToken::new(),
-        })
+        }
    }

    pub(crate) fn to_persistent(&self) -> NodePersistence {
@@ -235,19 +202,14 @@ impl Node {
            scheduling_policy: self.scheduling.into(),
            listen_http_addr: self.listen_http_addr.clone(),
            listen_http_port: self.listen_http_port as i32,
-            listen_https_port: self.listen_https_port.map(|x| x as i32),
            listen_pg_addr: self.listen_pg_addr.clone(),
            listen_pg_port: self.listen_pg_port as i32,
            availability_zone_id: self.availability_zone_id.0.clone(),
        }
    }

-    pub(crate) fn from_persistent(np: NodePersistence, use_https: bool) -> anyhow::Result<Self> {
-        if use_https && np.listen_https_port.is_none() {
-            return Err(anyhow!("https is enabled, but node has no https port"));
-        }
-
-        Ok(Self {
+    pub(crate) fn from_persistent(np: NodePersistence) -> Self {
+        Self {
            id: NodeId(np.node_id as u64),
            // At startup we consider a node offline until proven otherwise.
            availability: NodeAvailability::Offline,
@@ -255,13 +217,11 @@ impl Node {
                .expect("Bad scheduling policy in DB"),
            listen_http_addr: np.listen_http_addr,
            listen_http_port: np.listen_http_port as u16,
-            listen_https_port: np.listen_https_port.map(|x| x as u16),
            listen_pg_addr: np.listen_pg_addr,
            listen_pg_port: np.listen_pg_port as u16,
            availability_zone_id: AvailabilityZone(np.availability_zone_id),
-            use_https,
            cancel: CancellationToken::new(),
-        })
+        }
    }

    /// Wrapper for issuing requests to pageserver management API: takes care of generic
@@ -325,9 +285,8 @@ impl Node {
            warn_threshold,
            max_retries,
            &format!(
-                "Call to node {} ({}) management API",
-                self.id,
-                self.base_url(),
+                "Call to node {} ({}:{}) management API",
+                self.id, self.listen_http_addr, self.listen_http_port
            ),
            cancel,
        )
@@ -343,7 +302,6 @@ impl Node {
            availability_zone_id: self.availability_zone_id.0.clone(),
            listen_http_addr: self.listen_http_addr.clone(),
            listen_http_port: self.listen_http_port,
-            listen_https_port: self.listen_https_port,
            listen_pg_addr: self.listen_pg_addr.clone(),
            listen_pg_port: self.listen_pg_port,
        }
--- a/storage_controller/src/persistence.rs
+++ b/storage_controller/src/persistence.rs
@@ -375,23 +375,18 @@ impl Persistence {
        Ok(nodes)
    }

-    pub(crate) async fn update_node<V>(
+    pub(crate) async fn update_node(
        &self,
        input_node_id: NodeId,
-        values: V,
-    ) -> DatabaseResult<()>
-    where
-        V: diesel::AsChangeset<Target = crate::schema::nodes::table> + Clone + Send + Sync,
-        V::Changeset: diesel::query_builder::QueryFragment<diesel::pg::Pg> + Send, // valid Postgres SQL
-    {
+        input_scheduling: NodeSchedulingPolicy,
+    ) -> DatabaseResult<()> {
        use crate::schema::nodes::dsl::*;
        let updated = self
            .with_measured_conn(DatabaseOperation::UpdateNode, move |conn| {
-                let values = values.clone();
                Box::pin(async move {
                    let updated = diesel::update(nodes)
                        .filter(node_id.eq(input_node_id.0 as i64))
-                        .set(values)
+                        .set((scheduling_policy.eq(String::from(input_scheduling)),))
                        .execute(conn)
                        .await?;
                    Ok(updated)
@@ -408,32 +403,6 @@ impl Persistence {
        }
    }

-    pub(crate) async fn update_node_scheduling_policy(
-        &self,
-        input_node_id: NodeId,
-        input_scheduling: NodeSchedulingPolicy,
-    ) -> DatabaseResult<()> {
-        use crate::schema::nodes::dsl::*;
-        self.update_node(
-            input_node_id,
-            scheduling_policy.eq(String::from(input_scheduling)),
-        )
-        .await
-    }
-
-    pub(crate) async fn update_node_on_registration(
-        &self,
-        input_node_id: NodeId,
-        input_https_port: Option<u16>,
-    ) -> DatabaseResult<()> {
-        use crate::schema::nodes::dsl::*;
-        self.update_node(
-            input_node_id,
-            listen_https_port.eq(input_https_port.map(|x| x as i32)),
-        )
-        .await
-    }
-
    /// At startup, load the high level state for shards, such as their config + policy.  This will
    /// be enriched at runtime with state discovered on pageservers.
    ///
@@ -1483,7 +1452,6 @@ pub(crate) struct NodePersistence {
    pub(crate) listen_pg_addr: String,
    pub(crate) listen_pg_port: i32,
    pub(crate) availability_zone_id: String,
-    pub(crate) listen_https_port: Option<i32>,
 }

 /// Tenant metadata health status that are stored durably.
--- a/storage_controller/src/reconciler.rs
+++ b/storage_controller/src/reconciler.rs
@@ -1,7 +1,6 @@
 use crate::pageserver_client::PageserverClient;
 use crate::persistence::Persistence;
 use crate::{compute_hook, service};
-use json_structural_diff::JsonDiff;
 use pageserver_api::controller_api::{AvailabilityZone, MigrationConfig, PlacementPolicy};
 use pageserver_api::models::{
    LocationConfig, LocationConfigMode, LocationConfigSecondary, TenantConfig, TenantWaitLsnRequest,
@@ -25,7 +24,7 @@ use crate::compute_hook::{ComputeHook, NotifyError};
 use crate::node::Node;
 use crate::tenant_shard::{IntentState, ObservedState, ObservedStateDelta, ObservedStateLocation};

-const DEFAULT_HEATMAP_PERIOD: Duration = Duration::from_secs(60);
+const DEFAULT_HEATMAP_PERIOD: &str = "60s";

 /// Object with the lifetime of the background reconcile task that is created
 /// for tenants which have a difference between their intent and observed states.
@@ -881,27 +880,7 @@ impl Reconciler {
                        self.generation = Some(generation);
                        wanted_conf.generation = generation.into();
                    }
-
-                    let diff = match observed {
-                        Some(ObservedStateLocation {
-                            conf: Some(observed),
-                        }) => {
-                            let diff = JsonDiff::diff(
-                                &serde_json::to_value(observed.clone()).unwrap(),
-                                &serde_json::to_value(wanted_conf.clone()).unwrap(),
-                                false,
-                            );
-
-                            if let Some(json_diff) = diff.diff {
-                                serde_json::to_string(&json_diff).unwrap_or("diff err".to_string())
-                            } else {
-                                "unknown".to_string()
-                            }
-                        }
-                        _ => "full".to_string(),
-                    };
-
-                    tracing::info!(node_id=%node.get_id(), "Observed configuration requires update: {diff}");
+                    tracing::info!(node_id=%node.get_id(), "Observed configuration requires update.");

                    // Because `node` comes from a ref to &self, clone it before calling into a &mut self
                    // function: this could be avoided by refactoring the state mutated by location_config into
@@ -1201,7 +1180,7 @@ fn ha_aware_config(config: &TenantConfig, has_secondaries: bool) -> TenantConfig
    let mut config = config.clone();
    if has_secondaries {
        if config.heatmap_period.is_none() {
-            config.heatmap_period = Some(DEFAULT_HEATMAP_PERIOD);
+            config.heatmap_period = Some(DEFAULT_HEATMAP_PERIOD.to_string());
        }
    } else {
        config.heatmap_period = None;
--- a/storage_controller/src/scheduler.rs
+++ b/storage_controller/src/scheduler.rs
@@ -930,16 +930,13 @@ pub(crate) mod test_utils {
                        NodeId(i),
                        format!("httphost-{i}"),
                        80 + i as u16,
-                        None,
                        format!("pghost-{i}"),
                        5432 + i as u16,
                        az_iter
                            .next()
                            .cloned()
                            .unwrap_or(AvailabilityZone("test-az".to_string())),
-                        false,
-                    )
-                    .unwrap();
+                    );
                    node.set_availability(NodeAvailability::Active(test_utilization::simple(0, 0)));
                    assert!(node.is_available());
                    node
--- a/storage_controller/src/schema.rs
+++ b/storage_controller/src/schema.rs
@@ -26,7 +26,6 @@ diesel::table! {
        listen_pg_addr -> Varchar,
        listen_pg_port -> Int4,
        availability_zone_id -> Varchar,
-        listen_https_port -> Nullable<Int4>,
    }
 }

--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -399,8 +399,6 @@ pub struct Config {
    pub http_service_port: i32,

    pub long_reconcile_threshold: Duration,
-
-    pub use_https_pageserver_api: bool,
 }

 impl From<DatabaseError> for ApiError {
@@ -1403,8 +1401,8 @@ impl Service {
            .list_nodes()
            .await?
            .into_iter()
-            .map(|x| Node::from_persistent(x, config.use_https_pageserver_api))
-            .collect::<anyhow::Result<Vec<Node>>>()?;
+            .map(Node::from_persistent)
+            .collect::<Vec<_>>();
        let nodes: HashMap<NodeId, Node> = nodes.into_iter().map(|n| (n.get_id(), n)).collect();
        tracing::info!("Loaded {} nodes from database.", nodes.len());
        metrics::METRICS_REGISTRY
@@ -1503,13 +1501,10 @@ impl Service {
                    NodeId(node_id as u64),
                    "".to_string(),
                    123,
-                    None,
                    "".to_string(),
                    123,
                    AvailabilityZone("test_az".to_string()),
-                    false,
-                )
-                .unwrap();
+                );

                scheduler.node_upsert(&node);
            }
@@ -2921,9 +2916,7 @@ impl Service {
            first
        };

-        let updated_config = base
-            .apply_patch(patch)
-            .map_err(|err| ApiError::BadRequest(anyhow::anyhow!(err)))?;
+        let updated_config = base.apply_patch(patch);
        self.set_tenant_config_and_reconcile(tenant_id, updated_config)
            .await
    }
@@ -5914,10 +5907,8 @@ impl Service {
        )
        .await;

-        #[derive(PartialEq)]
        enum RegistrationStatus {
-            UpToDate,
-            NeedUpdate,
+            Matched,
            Mismatched,
            New,
        }
@@ -5926,11 +5917,7 @@ impl Service {
            let locked = self.inner.read().unwrap();
            if let Some(node) = locked.nodes.get(&register_req.node_id) {
                if node.registration_match(&register_req) {
-                    if node.need_update(&register_req) {
-                        RegistrationStatus::NeedUpdate
-                    } else {
-                        RegistrationStatus::UpToDate
-                    }
+                    RegistrationStatus::Matched
                } else {
                    RegistrationStatus::Mismatched
                }
@@ -5940,9 +5927,9 @@ impl Service {
        };

        match registration_status {
-            RegistrationStatus::UpToDate => {
+            RegistrationStatus::Matched => {
                tracing::info!(
-                    "Node {} re-registered with matching address and is up to date",
+                    "Node {} re-registered with matching address",
                    register_req.node_id
                );

@@ -5960,7 +5947,7 @@ impl Service {
                    "Node is already registered with different address".to_string(),
                ));
            }
-            RegistrationStatus::New | RegistrationStatus::NeedUpdate => {
+            RegistrationStatus::New => {
                // fallthrough
            }
        }
@@ -5989,16 +5976,6 @@ impl Service {
            ));
        }

-        if self.config.use_https_pageserver_api && register_req.listen_https_port.is_none() {
-            return Err(ApiError::PreconditionFailed(
-                format!(
-                    "Node {} has no https port, but use_https is enabled",
-                    register_req.node_id
-                )
-                .into(),
-            ));
-        }
-
        // Ordering: we must persist the new node _before_ adding it to in-memory state.
        // This ensures that before we use it for anything or expose it via any external
        // API, it is guaranteed to be available after a restart.
@@ -6006,29 +5983,13 @@ impl Service {
            register_req.node_id,
            register_req.listen_http_addr,
            register_req.listen_http_port,
-            register_req.listen_https_port,
            register_req.listen_pg_addr,
            register_req.listen_pg_port,
            register_req.availability_zone_id.clone(),
-            self.config.use_https_pageserver_api,
        );
-        let new_node = match new_node {
-            Ok(new_node) => new_node,
-            Err(error) => return Err(ApiError::InternalServerError(error)),
-        };

-        match registration_status {
-            RegistrationStatus::New => self.persistence.insert_node(&new_node).await?,
-            RegistrationStatus::NeedUpdate => {
-                self.persistence
-                    .update_node_on_registration(
-                        register_req.node_id,
-                        register_req.listen_https_port,
-                    )
-                    .await?
-            }
-            _ => unreachable!("Other statuses have been processed earlier"),
-        }
+        // TODO: idempotency if the node already exists in the database
+        self.persistence.insert_node(&new_node).await?;

        let mut locked = self.inner.write().unwrap();
        let mut new_nodes = (*locked.nodes).clone();
@@ -6043,24 +6004,12 @@ impl Service {
            .storage_controller_pageserver_nodes
            .set(locked.nodes.len() as i64);

-        match registration_status {
-            RegistrationStatus::New => {
-                tracing::info!(
-                    "Registered pageserver {} ({}), now have {} pageservers",
-                    register_req.node_id,
-                    register_req.availability_zone_id,
-                    locked.nodes.len()
-                );
-            }
-            RegistrationStatus::NeedUpdate => {
-                tracing::info!(
-                    "Re-registered and updated node {} ({})",
-                    register_req.node_id,
-                    register_req.availability_zone_id,
-                );
-            }
-            _ => unreachable!("Other statuses have been processed earlier"),
-        }
+        tracing::info!(
+            "Registered pageserver {} ({}), now have {} pageservers",
+            register_req.node_id,
+            register_req.availability_zone_id,
+            locked.nodes.len()
+        );
        Ok(())
    }

@@ -6078,9 +6027,7 @@ impl Service {
        if let Some(scheduling) = scheduling {
            // Scheduling is a persistent part of Node: we must write updates to the database before
            // applying them in memory
-            self.persistence
-                .update_node_scheduling_policy(node_id, scheduling)
-                .await?;
+            self.persistence.update_node(node_id, scheduling).await?;
        }

        // If we're activating a node, then before setting it active we must reconcile any shard locations
@@ -6651,12 +6598,11 @@ impl Service {
    ) -> Option<ReconcilerWaiter> {
        let reconcile_needed = shard.get_reconcile_needed(nodes);

-        let reconcile_reason = match reconcile_needed {
+        match reconcile_needed {
            ReconcileNeeded::No => return None,
            ReconcileNeeded::WaitExisting(waiter) => return Some(waiter),
-            ReconcileNeeded::Yes(reason) => {
+            ReconcileNeeded::Yes => {
                // Fall through to try and acquire units for spawning reconciler
-                reason
            }
        };

@@ -6695,7 +6641,6 @@ impl Service {
        };

        shard.spawn_reconciler(
-            reconcile_reason,
            &self.result_tx,
            nodes,
            &self.compute_hook,
--- a/storage_controller/src/tenant_shard.rs
+++ b/storage_controller/src/tenant_shard.rs
@@ -481,14 +481,7 @@ pub(crate) enum ReconcileNeeded {
    /// spawned: wait for the existing reconciler rather than spawning a new one.
    WaitExisting(ReconcilerWaiter),
    /// shard needs reconciliation: call into [`TenantShard::spawn_reconciler`]
-    Yes(ReconcileReason),
-}
-
-#[derive(Debug)]
-pub(crate) enum ReconcileReason {
-    ActiveNodesDirty,
-    UnknownLocation,
-    PendingComputeNotification,
+    Yes,
 }

 /// Pending modification to the observed state of a tenant shard.
@@ -1294,7 +1287,13 @@ impl TenantShard {
                attached_location_conf(generation, &self.shard, &self.config, &self.policy);
            match self.observed.locations.get(&node_id) {
                Some(conf) if conf.conf.as_ref() == Some(&wanted_conf) => {}
-                Some(_) | None => {
+                Some(conf) => {
+                    tracing::info!("Wanted: {wanted_conf:?}");
+                    tracing::info!("Observed: {conf:?}");
+
+                    dirty_nodes.insert(node_id);
+                }
+                None => {
                    dirty_nodes.insert(node_id);
                }
            }
@@ -1348,18 +1347,12 @@ impl TenantShard {

        let active_nodes_dirty = self.dirty(pageservers);

-        let reconcile_needed = match (
-            active_nodes_dirty,
-            dirty_observed,
-            self.pending_compute_notification,
-        ) {
-            (true, _, _) => ReconcileNeeded::Yes(ReconcileReason::ActiveNodesDirty),
-            (_, true, _) => ReconcileNeeded::Yes(ReconcileReason::UnknownLocation),
-            (_, _, true) => ReconcileNeeded::Yes(ReconcileReason::PendingComputeNotification),
-            _ => ReconcileNeeded::No,
-        };
+        // Even if there is no pageserver work to be done, if we have a pending notification to computes,
+        // wake up a reconciler to send it.
+        let do_reconcile =
+            active_nodes_dirty || dirty_observed || self.pending_compute_notification;

-        if matches!(reconcile_needed, ReconcileNeeded::No) {
+        if !do_reconcile {
            tracing::debug!("Not dirty, no reconciliation needed.");
            return ReconcileNeeded::No;
        }
@@ -1402,7 +1395,7 @@ impl TenantShard {
            }
        }

-        reconcile_needed
+        ReconcileNeeded::Yes
    }

    /// Ensure the sequence number is set to a value where waiting for this value will make us wait
@@ -1492,7 +1485,6 @@ impl TenantShard {
    #[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))]
    pub(crate) fn spawn_reconciler(
        &mut self,
-        reason: ReconcileReason,
        result_tx: &tokio::sync::mpsc::UnboundedSender<ReconcileResultRequest>,
        pageservers: &Arc<HashMap<NodeId, Node>>,
        compute_hook: &Arc<ComputeHook>,
@@ -1552,7 +1544,7 @@ impl TenantShard {
        let reconcile_seq = self.sequence;
        let long_reconcile_threshold = service_config.long_reconcile_threshold;

-        tracing::info!(seq=%reconcile_seq, "Spawning Reconciler ({reason:?})");
+        tracing::info!(seq=%reconcile_seq, "Spawning Reconciler for sequence {}", self.sequence);
        let must_notify = self.pending_compute_notification;
        let reconciler_span = tracing::info_span!(parent: None, "reconciler", seq=%reconcile_seq,
                                                        tenant_id=%reconciler.tenant_shard_id.tenant_id,
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -1167,15 +1167,15 @@ class NeonEnv:
                "max_batch_size": 32,
            }

+            # Concurrent IO (https://github.com/neondatabase/neon/issues/9378):
+            # enable concurrent IO by default in tests and benchmarks.
+            # Compat tests are exempt because old versions fail to parse the new config.
+            get_vectored_concurrent_io = self.pageserver_get_vectored_concurrent_io
            if config.test_may_use_compatibility_snapshot_binaries:
                log.info(
-                    "Skipping WAL contiguity validation to avoid forward-compatibility related test failures"
+                    "Forcing use of binary-built-in default to avoid forward-compatibility related test failures"
                )
-            else:
-                # Look for gaps in WAL received from safekeepeers
-                ps_cfg["validate_wal_contiguity"] = True
-
-            get_vectored_concurrent_io = self.pageserver_get_vectored_concurrent_io
+                get_vectored_concurrent_io = None
            if get_vectored_concurrent_io is not None:
                ps_cfg["get_vectored_concurrent_io"] = {
                    "mode": self.pageserver_get_vectored_concurrent_io,
@@ -1630,7 +1630,6 @@ def neon_env_builder(
 class PageserverPort:
    pg: int
    http: int
-    https: int | None = None


 class LogUtils:
@@ -1887,7 +1886,6 @@ class NeonStorageController(MetricsGetter, LogUtils):
            "node_id": int(node.id),
            "listen_http_addr": "localhost",
            "listen_http_port": node.service_port.http,
-            "listen_https_port": node.service_port.https,
            "listen_pg_addr": "localhost",
            "listen_pg_port": node.service_port.pg,
            "availability_zone_id": node.az_id,
--- a/test_runner/regress/test_storage_controller.py
+++ b/test_runner/regress/test_storage_controller.py
@@ -3766,66 +3766,7 @@ def test_storage_controller_node_flap_detach_race(
    wait_until(validate_locations, timeout=10)


-def test_update_node_on_registration(neon_env_builder: NeonEnvBuilder):
-    """
-    Check that storage controller handles node_register requests with updated fields correctly.
-    1. Run storage controller and register 1 pageserver without https port.
-    2. Register the same pageserver with https port. Check that port has been updated.
-    3. Restart the storage controller. Check that https port is persistent.
-    4. Register the same pageserver without https port again (rollback). Check that port has been removed.
-    """
-    neon_env_builder.num_pageservers = 1
-    env = neon_env_builder.init_configs()
-
-    env.storage_controller.start()
-    env.storage_controller.wait_until_ready()
-
-    pageserver = env.pageservers[0]
-
-    # Step 1. Register pageserver without https port.
-    env.storage_controller.node_register(pageserver)
-    env.storage_controller.consistency_check()
-
-    nodes = env.storage_controller.node_list()
-    assert len(nodes) == 1
-    assert nodes[0]["listen_https_port"] is None
-
-    # Step 2. Register pageserver with https port.
-    pageserver.service_port.https = 1234
-    env.storage_controller.node_register(pageserver)
-    env.storage_controller.consistency_check()
-
-    nodes = env.storage_controller.node_list()
-    assert len(nodes) == 1
-    assert nodes[0]["listen_https_port"] == 1234
-
-    # Step 3. Restart storage controller.
-    env.storage_controller.stop()
-    env.storage_controller.start()
-    env.storage_controller.wait_until_ready()
-    env.storage_controller.consistency_check()
-
-    nodes = env.storage_controller.node_list()
-    assert len(nodes) == 1
-    assert nodes[0]["listen_https_port"] == 1234
-
-    # Step 4. Register pageserver with no https port again.
-    pageserver.service_port.https = None
-    env.storage_controller.node_register(pageserver)
-    env.storage_controller.consistency_check()
-
-    nodes = env.storage_controller.node_list()
-    assert len(nodes) == 1
-    assert nodes[0]["listen_https_port"] is None
-
-
-def test_storage_controller_location_conf_equivalence(neon_env_builder: NeonEnvBuilder):
-    """
-    Validate that a storage controller restart with no shards in a transient state
-    performs zero reconciliations at start-up. Implicitly, this means that the location
-    configs returned by the pageserver are identical to the persisted state in the
-    storage controller database.
-    """
+def test_storage_controller_config_equivalence(neon_env_builder: NeonEnvBuilder):
    neon_env_builder.num_pageservers = 1
    neon_env_builder.storage_controller_config = {
        "start_as_candidate": False,
--- a/test_runner/regress/test_subscriber_branching.py
+++ b/test_runner/regress/test_subscriber_branching.py
@@ -1,10 +1,9 @@
 from __future__ import annotations

-import threading
 import time

 from fixtures.log_helper import log
-from fixtures.neon_fixtures import NeonEnv, logical_replication_sync
+from fixtures.neon_fixtures import NeonEnv
 from fixtures.utils import query_scalar, wait_until


@@ -240,173 +239,3 @@ def test_subscriber_branching(neon_simple_env: NeonEnv):
            res = scur_postgres.fetchall()
            assert len(res) == 1
            assert str(sub_child_2_timeline_id) == res[0][0]
-
-
-def test_multiple_subscription_branching(neon_simple_env: NeonEnv):
-    """
-    Test that compute_ctl can handle concurrent deletion of subscriptions in a multiple databases
-    """
-    env = neon_simple_env
-
-    NUMBER_OF_DBS = 5
-
-    # Create and start endpoint so that neon_local put all the generated
-    # stuff into the spec.json file.
-    endpoint = env.endpoints.create_start(
-        "main",
-        config_lines=[
-            "max_replication_slots = 10",
-            "max_logical_replication_workers=10",
-            "max_worker_processes=10",
-        ],
-    )
-
-    TEST_DB_NAMES = [
-        {
-            "name": "neondb",
-            "owner": "cloud_admin",
-        },
-        {
-            "name": "publisher_db",
-            "owner": "cloud_admin",
-        },
-    ]
-
-    for i in range(NUMBER_OF_DBS):
-        TEST_DB_NAMES.append(
-            {
-                "name": f"db{i}",
-                "owner": "cloud_admin",
-            }
-        )
-
-    # Update the spec.json file to create the databases
-    # and reconfigure the endpoint to apply the changes.
-    endpoint.respec_deep(
-        **{
-            "skip_pg_catalog_updates": False,
-            "cluster": {
-                "databases": TEST_DB_NAMES,
-            },
-        }
-    )
-    endpoint.reconfigure()
-
-    connstr = endpoint.connstr(dbname="publisher_db").replace("'", "''")
-
-    # create table, replication and subscription for each of the databases
-    with endpoint.cursor(dbname="publisher_db") as publisher_cursor:
-        for i in range(NUMBER_OF_DBS):
-            publisher_cursor.execute(f"CREATE TABLE t{i}(a int)")
-            publisher_cursor.execute(f"CREATE PUBLICATION mypub{i} FOR TABLE t{i}")
-            publisher_cursor.execute(
-                f"select pg_catalog.pg_create_logical_replication_slot('mysub{i}', 'pgoutput');"
-            )
-            publisher_cursor.execute(f"INSERT INTO t{i} VALUES ({i})")
-
-            with endpoint.cursor(dbname=f"db{i}") as cursor:
-                cursor.execute(f"CREATE TABLE t{i}(a int)")
-                cursor.execute(
-                    f"CREATE SUBSCRIPTION mysub{i} CONNECTION '{connstr}' PUBLICATION mypub{i}  WITH (create_slot = false) "
-                )
-
-    # wait for the subscription to be active
-    for i in range(NUMBER_OF_DBS):
-        logical_replication_sync(
-            endpoint,
-            endpoint,
-            f"mysub{i}",
-            sub_dbname=f"db{i}",
-            pub_dbname="publisher_db",
-        )
-
-    # Check that replication is working
-    for i in range(NUMBER_OF_DBS):
-        with endpoint.cursor(dbname=f"db{i}") as cursor:
-            cursor.execute(f"SELECT * FROM t{i}")
-            rows = cursor.fetchall()
-            assert len(rows) == 1
-            assert rows[0][0] == i
-
-            last_insert_lsn = query_scalar(cursor, "select pg_current_wal_insert_lsn();")
-
-    def start_publisher_workload(table_num: int, duration: int):
-        start = time.time()
-        with endpoint.cursor(dbname="publisher_db") as cur:
-            while time.time() - start < duration:
-                cur.execute(f"INSERT INTO t{i} SELECT FROM generate_series(1,1000)")
-
-    LOAD_DURATION = 5
-    threads = [
-        threading.Thread(target=start_publisher_workload, args=(i, LOAD_DURATION))
-        for i in range(NUMBER_OF_DBS)
-    ]
-
-    for thread in threads:
-        thread.start()
-
-    sub_child_1_timeline_id = env.create_branch(
-        "subscriber_child_1",
-        ancestor_branch_name="main",
-        ancestor_start_lsn=last_insert_lsn,
-    )
-
-    sub_child_1 = env.endpoints.create("subscriber_child_1")
-
-    sub_child_1.respec(
-        skip_pg_catalog_updates=False,
-        reconfigure_concurrency=5,
-        drop_subscriptions_before_start=True,
-        cluster={
-            "databases": TEST_DB_NAMES,
-            "roles": [],
-        },
-    )
-
-    sub_child_1.start()
-
-    # ensure that subscription deletion happened on this timeline
-    with sub_child_1.cursor() as scur_postgres:
-        scur_postgres.execute("SELECT timeline_id from neon.drop_subscriptions_done")
-        res = scur_postgres.fetchall()
-        log.info(f"res = {res}")
-        assert len(res) == 1
-        assert str(sub_child_1_timeline_id) == res[0][0]
-
-    # ensure that there are no subscriptions in the databases
-    for i in range(NUMBER_OF_DBS):
-        with sub_child_1.cursor(dbname=f"db{i}") as cursor:
-            cursor.execute("SELECT * FROM pg_catalog.pg_subscription")
-            res = cursor.fetchall()
-            assert len(res) == 0
-
-            # ensure that there are no unexpected rows in the tables
-            cursor.execute(f"SELECT * FROM t{i}")
-            rows = cursor.fetchall()
-            assert len(rows) == 1
-            assert rows[0][0] == i
-
-    for thread in threads:
-        thread.join()
-
-    # ensure that logical replication is still working in main endpoint
-    # wait for it to catch up
-    for i in range(NUMBER_OF_DBS):
-        logical_replication_sync(
-            endpoint,
-            endpoint,
-            f"mysub{i}",
-            sub_dbname=f"db{i}",
-            pub_dbname="publisher_db",
-        )
-
-    # verify that the data is the same in publisher and subscriber tables
-    with endpoint.cursor(dbname="publisher_db") as publisher_cursor:
-        for i in range(NUMBER_OF_DBS):
-            with endpoint.cursor(dbname=f"db{i}") as cursor:
-                publisher_cursor.execute(f"SELECT count(*) FROM t{i}")
-                cursor.execute(f"SELECT count(*) FROM t{i}")
-                pub_res = publisher_cursor.fetchone()
-                sub_res = cursor.fetchone()
-                log.info(f"for table t{i}: pub_res = {pub_res}, sub_res = {sub_res}")
-                assert pub_res == sub_res
--- a/vendor/postgres-v15
+++ b/vendor/postgres-v15
--- a/vendor/postgres-v16
+++ b/vendor/postgres-v16
--- a/vendor/revisions.json
+++ b/vendor/revisions.json
@@ -5,11 +5,11 @@
  ],
  "v16": [
    "16.8",
-    "261ed10e9b8c8dda01ad7aefb18e944e30aa161d"
+    "6cb8d22079570b50fcaff29124d40807c1e63a82"
  ],
  "v15": [
    "15.12",
-    "6ff50443773b69749e16da6db9d4f4b19064b4b7"
+    "023f1020ecb07af3bb0ddbf4622e1a3c3fa276a4"
  ],
  "v14": [
    "14.17",
				`@@ -1 +0,0 @@`
				`ALTER TABLE nodes ADD listen_https_port INTEGER;`