Merge pull request #11023 from neondatabase/rc/release-proxy/2025-02-27

Proxy release 2025-02-27
2026-06-19 13:20:37 +00:00 · 2025-02-27 19:10:58 +01:00 · 2025-02-27 16:18:42 +00:00 · 2025-02-27 13:57:49 +02:00 · 2025-02-27 00:17:57 +00:00 · 2025-02-20 10:37:47 +01:00
16 changed files with 489 additions and 586 deletions
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -831,7 +831,7 @@ jobs:
              || needs.meta.outputs.run-kind == 'pr' && needs.meta.outputs.build-tag
              || needs.meta.outputs.run-kind == 'compute-rc-pr' && needs.meta.outputs.previous-storage-release
            }}
-          TEST_EXTENSIONS_TAG: ${{ needs.meta.outputs.previous-compute-release }}
+          TEST_EXTENSIONS_TAG: latest
          NEW_COMPUTE_TAG: ${{ needs.meta.outputs.build-tag }}
          OLD_COMPUTE_TAG: ${{ needs.meta.outputs.previous-compute-release }}
        run: ./docker-compose/test_extensions_upgrade.sh
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -1,4 +1,5 @@
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
+use std::iter::once;
 use std::os::unix::fs::{PermissionsExt, symlink};
 use std::path::Path;
 use std::process::{Command, Stdio};
@@ -12,7 +13,9 @@ use anyhow::{Context, Result};
 use chrono::{DateTime, Utc};
 use compute_api::privilege::Privilege;
 use compute_api::responses::{ComputeMetrics, ComputeStatus};
-use compute_api::spec::{ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, PgIdent};
+use compute_api::spec::{
+    ComputeFeature, ComputeMode, ComputeSpec, Database, ExtVersion, PgIdent, Role,
+};
 use futures::StreamExt;
 use futures::future::join_all;
 use futures::stream::FuturesUnordered;
@@ -31,6 +34,16 @@ use utils::measured_stream::MeasuredReader;
 use crate::installed_extensions::get_installed_extensions;
 use crate::pg_helpers::*;
 use crate::spec::*;
+use crate::spec_apply::ApplySpecPhase::{
+    CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateSchemaNeon,
+    CreateSuperUser, DropInvalidDatabases, DropRoles, FinalizeDropLogicalSubscriptions,
+    HandleNeonExtension, HandleOtherExtensions, RenameAndDeleteDatabases, RenameRoles,
+    RunInEachDatabase,
+};
+use crate::spec_apply::PerDatabasePhase::{
+    ChangeSchemaPerms, DeleteDBRoleReferences, DropLogicalSubscriptions, HandleAnonExtension,
+};
+use crate::spec_apply::{DB, MutableApplyContext, PerDatabasePhase, apply_operations};
 use crate::sync_sk::{check_if_synced, ping_safekeeper};
 use crate::{config, extension_server, local_proxy};

@@ -915,6 +928,388 @@ impl ComputeNode {
        Ok(client)
    }

+    /// Apply the spec to the running PostgreSQL instance.
+    /// The caller can decide to run with multiple clients in parallel, or
+    /// single mode.  Either way, the commands executed will be the same, and
+    /// only commands run in different databases are parallelized.
+    #[instrument(skip_all)]
+    pub fn apply_spec_sql(
+        &self,
+        spec: Arc<ComputeSpec>,
+        conf: Arc<tokio_postgres::Config>,
+        concurrency: usize,
+    ) -> Result<()> {
+        info!("Applying config with max {} concurrency", concurrency);
+        debug!("Config: {:?}", spec);
+
+        let rt = tokio::runtime::Handle::current();
+        rt.block_on(async {
+            // Proceed with post-startup configuration. Note, that order of operations is important.
+            let client = Self::get_maintenance_client(&conf).await?;
+            let spec = spec.clone();
+
+            let databases = get_existing_dbs_async(&client).await?;
+            let roles = get_existing_roles_async(&client)
+                .await?
+                .into_iter()
+                .map(|role| (role.name.clone(), role))
+                .collect::<HashMap<String, Role>>();
+
+            // Check if we need to drop subscriptions before starting the endpoint.
+            //
+            // It is important to do this operation exactly once when endpoint starts on a new branch.
+            // Otherwise, we may drop not inherited, but newly created subscriptions.
+            //
+            // We cannot rely only on spec.drop_subscriptions_before_start flag,
+            // because if for some reason compute restarts inside VM,
+            // it will start again with the same spec and flag value.
+            //
+            // To handle this, we save the fact of the operation in the database
+            // in the neon.drop_subscriptions_done table.
+            // If the table does not exist, we assume that the operation was never performed, so we must do it.
+            // If table exists, we check if the operation was performed on the current timelilne.
+            //
+            let mut drop_subscriptions_done = false;
+
+            if spec.drop_subscriptions_before_start {
+                let timeline_id = self.get_timeline_id().context("timeline_id must be set")?;
+                let query = format!("select 1 from neon.drop_subscriptions_done where timeline_id = '{}'", timeline_id);
+
+                info!("Checking if drop subscription operation was already performed for timeline_id: {}", timeline_id);
+
+                drop_subscriptions_done =  match
+                    client.simple_query(&query).await {
+                    Ok(result) => {
+                        matches!(&result[0], postgres::SimpleQueryMessage::Row(_))
+                    },
+                    Err(e) =>
+                    {
+                        match e.code() {
+                            Some(&SqlState::UNDEFINED_TABLE) => false,
+                            _ => {
+                                // We don't expect any other error here, except for the schema/table not existing
+                                error!("Error checking if drop subscription operation was already performed: {}", e);
+                                return Err(e.into());
+                            }
+                        }
+                    }
+                }
+            };
+
+
+            let jwks_roles = Arc::new(
+                spec.as_ref()
+                    .local_proxy_config
+                    .iter()
+                    .flat_map(|it| &it.jwks)
+                    .flatten()
+                    .flat_map(|setting| &setting.role_names)
+                    .cloned()
+                    .collect::<HashSet<_>>(),
+            );
+
+            let ctx = Arc::new(tokio::sync::RwLock::new(MutableApplyContext {
+                roles,
+                dbs: databases,
+            }));
+
+            // Apply special pre drop database phase.
+            // NOTE: we use the code of RunInEachDatabase phase for parallelism
+            // and connection management, but we don't really run it in *each* database,
+            // only in databases, we're about to drop.
+            info!("Applying PerDatabase (pre-dropdb) phase");
+            let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency));
+
+            // Run the phase for each database that we're about to drop.
+            let db_processes = spec
+                .delta_operations
+                .iter()
+                .flatten()
+                .filter_map(move |op| {
+                    if op.action.as_str() == "delete_db" {
+                        Some(op.name.clone())
+                    } else {
+                        None
+                    }
+                })
+                .map(|dbname| {
+                    let spec = spec.clone();
+                    let ctx = ctx.clone();
+                    let jwks_roles = jwks_roles.clone();
+                    let mut conf = conf.as_ref().clone();
+                    let concurrency_token = concurrency_token.clone();
+                    // We only need dbname field for this phase, so set other fields to dummy values
+                    let db = DB::UserDB(Database {
+                        name: dbname.clone(),
+                        owner: "cloud_admin".to_string(),
+                        options: None,
+                        restrict_conn: false,
+                        invalid: false,
+                    });
+
+                    debug!("Applying per-database phases for Database {:?}", &db);
+
+                    match &db {
+                        DB::SystemDB => {}
+                        DB::UserDB(db) => {
+                            conf.dbname(db.name.as_str());
+                        }
+                    }
+
+                    let conf = Arc::new(conf);
+                    let fut = Self::apply_spec_sql_db(
+                        spec.clone(),
+                        conf,
+                        ctx.clone(),
+                        jwks_roles.clone(),
+                        concurrency_token.clone(),
+                        db,
+                        [DropLogicalSubscriptions].to_vec(),
+                    );
+
+                    Ok(spawn(fut))
+                })
+                .collect::<Vec<Result<_, anyhow::Error>>>();
+
+            for process in db_processes.into_iter() {
+                let handle = process?;
+                if let Err(e) = handle.await? {
+                    // Handle the error case where the database does not exist
+                    // We do not check whether the DB exists or not in the deletion phase,
+                    // so we shouldn't be strict about it in pre-deletion cleanup as well.
+                    if e.to_string().contains("does not exist") {
+                        warn!("Error dropping subscription: {}", e);
+                    } else {
+                        return Err(e);
+                    }
+                };
+            }
+
+            for phase in [
+                CreateSuperUser,
+                DropInvalidDatabases,
+                RenameRoles,
+                CreateAndAlterRoles,
+                RenameAndDeleteDatabases,
+                CreateAndAlterDatabases,
+                CreateSchemaNeon,
+            ] {
+                info!("Applying phase {:?}", &phase);
+                apply_operations(
+                    spec.clone(),
+                    ctx.clone(),
+                    jwks_roles.clone(),
+                    phase,
+                    || async { Ok(&client) },
+                )
+                .await?;
+            }
+
+            info!("Applying RunInEachDatabase2 phase");
+            let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency));
+
+            let db_processes = spec
+                .cluster
+                .databases
+                .iter()
+                .map(|db| DB::new(db.clone()))
+                // include
+                .chain(once(DB::SystemDB))
+                .map(|db| {
+                    let spec = spec.clone();
+                    let ctx = ctx.clone();
+                    let jwks_roles = jwks_roles.clone();
+                    let mut conf = conf.as_ref().clone();
+                    let concurrency_token = concurrency_token.clone();
+                    let db = db.clone();
+
+                    debug!("Applying per-database phases for Database {:?}", &db);
+
+                    match &db {
+                        DB::SystemDB => {}
+                        DB::UserDB(db) => {
+                            conf.dbname(db.name.as_str());
+                        }
+                    }
+
+                    let conf = Arc::new(conf);
+                    let mut phases = vec![
+                        DeleteDBRoleReferences,
+                        ChangeSchemaPerms,
+                        HandleAnonExtension,
+                    ];
+
+                    if spec.drop_subscriptions_before_start && !drop_subscriptions_done {
+                        info!("Adding DropLogicalSubscriptions phase because drop_subscriptions_before_start is set");
+                        phases.push(DropLogicalSubscriptions);
+                    }
+
+                    let fut = Self::apply_spec_sql_db(
+                        spec.clone(),
+                        conf,
+                        ctx.clone(),
+                        jwks_roles.clone(),
+                        concurrency_token.clone(),
+                        db,
+                        phases,
+                    );
+
+                    Ok(spawn(fut))
+                })
+                .collect::<Vec<Result<_, anyhow::Error>>>();
+
+            for process in db_processes.into_iter() {
+                let handle = process?;
+                handle.await??;
+            }
+
+            let mut phases = vec![
+                HandleOtherExtensions,
+                HandleNeonExtension, // This step depends on CreateSchemaNeon
+                CreateAvailabilityCheck,
+                DropRoles,
+            ];
+
+            // This step depends on CreateSchemaNeon
+            if spec.drop_subscriptions_before_start && !drop_subscriptions_done {
+                info!("Adding FinalizeDropLogicalSubscriptions phase because drop_subscriptions_before_start is set");
+                phases.push(FinalizeDropLogicalSubscriptions);
+            }
+
+            for phase in phases {
+                debug!("Applying phase {:?}", &phase);
+                apply_operations(
+                    spec.clone(),
+                    ctx.clone(),
+                    jwks_roles.clone(),
+                    phase,
+                    || async { Ok(&client) },
+                )
+                .await?;
+            }
+
+            Ok::<(), anyhow::Error>(())
+        })?;
+
+        Ok(())
+    }
+
+    /// Apply SQL migrations of the RunInEachDatabase phase.
+    ///
+    /// May opt to not connect to databases that don't have any scheduled
+    /// operations.  The function is concurrency-controlled with the provided
+    /// semaphore.  The caller has to make sure the semaphore isn't exhausted.
+    async fn apply_spec_sql_db(
+        spec: Arc<ComputeSpec>,
+        conf: Arc<tokio_postgres::Config>,
+        ctx: Arc<tokio::sync::RwLock<MutableApplyContext>>,
+        jwks_roles: Arc<HashSet<String>>,
+        concurrency_token: Arc<tokio::sync::Semaphore>,
+        db: DB,
+        subphases: Vec<PerDatabasePhase>,
+    ) -> Result<()> {
+        let _permit = concurrency_token.acquire().await?;
+
+        let mut client_conn = None;
+
+        for subphase in subphases {
+            apply_operations(
+                spec.clone(),
+                ctx.clone(),
+                jwks_roles.clone(),
+                RunInEachDatabase {
+                    db: db.clone(),
+                    subphase,
+                },
+                // Only connect if apply_operation actually wants a connection.
+                // It's quite possible this database doesn't need any queries,
+                // so by not connecting we save time and effort connecting to
+                // that database.
+                || async {
+                    if client_conn.is_none() {
+                        let db_client = Self::get_maintenance_client(&conf).await?;
+                        client_conn.replace(db_client);
+                    }
+                    let client = client_conn.as_ref().unwrap();
+                    Ok(client)
+                },
+            )
+            .await?;
+        }
+
+        drop(client_conn);
+
+        Ok::<(), anyhow::Error>(())
+    }
+
+    /// Choose how many concurrent connections to use for applying the spec changes.
+    pub fn max_service_connections(
+        &self,
+        compute_state: &ComputeState,
+        spec: &ComputeSpec,
+    ) -> usize {
+        // If the cluster is in Init state we don't have to deal with user connections,
+        // and can thus use all `max_connections` connection slots. However, that's generally not
+        // very efficient, so we generally still limit it to a smaller number.
+        if compute_state.status == ComputeStatus::Init {
+            // If the settings contain 'max_connections', use that as template
+            if let Some(config) = spec.cluster.settings.find("max_connections") {
+                config.parse::<usize>().ok()
+            } else {
+                // Otherwise, try to find the setting in the postgresql_conf string
+                spec.cluster
+                    .postgresql_conf
+                    .iter()
+                    .flat_map(|conf| conf.split("\n"))
+                    .filter_map(|line| {
+                        if !line.contains("max_connections") {
+                            return None;
+                        }
+
+                        let (key, value) = line.split_once("=")?;
+                        let key = key
+                            .trim_start_matches(char::is_whitespace)
+                            .trim_end_matches(char::is_whitespace);
+
+                        let value = value
+                            .trim_start_matches(char::is_whitespace)
+                            .trim_end_matches(char::is_whitespace);
+
+                        if key != "max_connections" {
+                            return None;
+                        }
+
+                        value.parse::<usize>().ok()
+                    })
+                    .next()
+            }
+            // If max_connections is present, use at most 1/3rd of that.
+            // When max_connections is lower than 30, try to use at least 10 connections, but
+            // never more than max_connections.
+            .map(|limit| match limit {
+                0..10 => limit,
+                10..30 => 10,
+                30.. => limit / 3,
+            })
+            // If we didn't find max_connections, default to 10 concurrent connections.
+            .unwrap_or(10)
+        } else {
+            // state == Running
+            // Because the cluster is already in the Running state, we should assume users are
+            // already connected to the cluster, and high concurrency could negatively
+            // impact user connectivity. Therefore, we can limit concurrency to the number of
+            // reserved superuser connections, which users wouldn't be able to use anyway.
+            spec.cluster
+                .settings
+                .find("superuser_reserved_connections")
+                .iter()
+                .filter_map(|val| val.parse::<usize>().ok())
+                .map(|val| if val > 1 { val - 1 } else { 1 })
+                .last()
+                .unwrap_or(3)
+        }
+    }
+
    /// Do initial configuration of the already started Postgres.
    #[instrument(skip_all)]
    pub fn apply_config(&self, compute_state: &ComputeState) -> Result<()> {
--- a/compute_tools/src/spec_apply.rs
+++ b/compute_tools/src/spec_apply.rs
@@ -4,413 +4,15 @@ use std::future::Future;
 use std::iter::{empty, once};
 use std::sync::Arc;

-use anyhow::{Context, Result};
-use compute_api::responses::ComputeStatus;
+use anyhow::Result;
 use compute_api::spec::{ComputeFeature, ComputeSpec, Database, PgIdent, Role};
 use futures::future::join_all;
 use tokio::sync::RwLock;
 use tokio_postgres::Client;
-use tokio_postgres::error::SqlState;
-use tracing::{Instrument, debug, error, info, info_span, instrument, warn};
+use tracing::{Instrument, debug, info_span, warn};

-use crate::compute::{ComputeNode, ComputeState, construct_superuser_query};
-use crate::pg_helpers::{
-    DatabaseExt, Escaping, GenericOptionsSearch, RoleExt, escape_literal, get_existing_dbs_async,
-    get_existing_roles_async,
-};
-use crate::spec_apply::ApplySpecPhase::{
-    CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateSchemaNeon,
-    CreateSuperUser, DropInvalidDatabases, DropRoles, FinalizeDropLogicalSubscriptions,
-    HandleNeonExtension, HandleOtherExtensions, RenameAndDeleteDatabases, RenameRoles,
-    RunInEachDatabase,
-};
-use crate::spec_apply::PerDatabasePhase::{
-    ChangeSchemaPerms, DeleteDBRoleReferences, DropLogicalSubscriptions, HandleAnonExtension,
-};
-
-impl ComputeNode {
-    /// Apply the spec to the running PostgreSQL instance.
-    /// The caller can decide to run with multiple clients in parallel, or
-    /// single mode.  Either way, the commands executed will be the same, and
-    /// only commands run in different databases are parallelized.
-    #[instrument(skip_all)]
-    pub fn apply_spec_sql(
-        &self,
-        spec: Arc<ComputeSpec>,
-        conf: Arc<tokio_postgres::Config>,
-        concurrency: usize,
-    ) -> Result<()> {
-        info!("Applying config with max {} concurrency", concurrency);
-        debug!("Config: {:?}", spec);
-
-        let rt = tokio::runtime::Handle::current();
-        rt.block_on(async {
-            // Proceed with post-startup configuration. Note, that order of operations is important.
-            let client = Self::get_maintenance_client(&conf).await?;
-            let spec = spec.clone();
-
-            let databases = get_existing_dbs_async(&client).await?;
-            let roles = get_existing_roles_async(&client)
-                .await?
-                .into_iter()
-                .map(|role| (role.name.clone(), role))
-                .collect::<HashMap<String, Role>>();
-
-            // Check if we need to drop subscriptions before starting the endpoint.
-            //
-            // It is important to do this operation exactly once when endpoint starts on a new branch.
-            // Otherwise, we may drop not inherited, but newly created subscriptions.
-            //
-            // We cannot rely only on spec.drop_subscriptions_before_start flag,
-            // because if for some reason compute restarts inside VM,
-            // it will start again with the same spec and flag value.
-            //
-            // To handle this, we save the fact of the operation in the database
-            // in the neon.drop_subscriptions_done table.
-            // If the table does not exist, we assume that the operation was never performed, so we must do it.
-            // If table exists, we check if the operation was performed on the current timelilne.
-            //
-            let mut drop_subscriptions_done = false;
-
-            if spec.drop_subscriptions_before_start {
-                let timeline_id = self.get_timeline_id().context("timeline_id must be set")?;
-                let query = format!("select 1 from neon.drop_subscriptions_done where timeline_id = '{}'", timeline_id);
-
-                info!("Checking if drop subscription operation was already performed for timeline_id: {}", timeline_id);
-
-                drop_subscriptions_done =  match
-                    client.simple_query(&query).await {
-                    Ok(result) => {
-                        matches!(&result[0], postgres::SimpleQueryMessage::Row(_))
-                    },
-                    Err(e) =>
-                    {
-                        match e.code() {
-                            Some(&SqlState::UNDEFINED_TABLE) => false,
-                            _ => {
-                                // We don't expect any other error here, except for the schema/table not existing
-                                error!("Error checking if drop subscription operation was already performed: {}", e);
-                                return Err(e.into());
-                            }
-                        }
-                    }
-                }
-            };
-
-
-            let jwks_roles = Arc::new(
-                spec.as_ref()
-                    .local_proxy_config
-                    .iter()
-                    .flat_map(|it| &it.jwks)
-                    .flatten()
-                    .flat_map(|setting| &setting.role_names)
-                    .cloned()
-                    .collect::<HashSet<_>>(),
-            );
-
-            let ctx = Arc::new(tokio::sync::RwLock::new(MutableApplyContext {
-                roles,
-                dbs: databases,
-            }));
-
-            // Apply special pre drop database phase.
-            // NOTE: we use the code of RunInEachDatabase phase for parallelism
-            // and connection management, but we don't really run it in *each* database,
-            // only in databases, we're about to drop.
-            info!("Applying PerDatabase (pre-dropdb) phase");
-            let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency));
-
-            // Run the phase for each database that we're about to drop.
-            let db_processes = spec
-                .delta_operations
-                .iter()
-                .flatten()
-                .filter_map(move |op| {
-                    if op.action.as_str() == "delete_db" {
-                        Some(op.name.clone())
-                    } else {
-                        None
-                    }
-                })
-                .map(|dbname| {
-                    let spec = spec.clone();
-                    let ctx = ctx.clone();
-                    let jwks_roles = jwks_roles.clone();
-                    let mut conf = conf.as_ref().clone();
-                    let concurrency_token = concurrency_token.clone();
-                    // We only need dbname field for this phase, so set other fields to dummy values
-                    let db = DB::UserDB(Database {
-                        name: dbname.clone(),
-                        owner: "cloud_admin".to_string(),
-                        options: None,
-                        restrict_conn: false,
-                        invalid: false,
-                    });
-
-                    debug!("Applying per-database phases for Database {:?}", &db);
-
-                    match &db {
-                        DB::SystemDB => {}
-                        DB::UserDB(db) => {
-                            conf.dbname(db.name.as_str());
-                        }
-                    }
-
-                    let conf = Arc::new(conf);
-                    let fut = Self::apply_spec_sql_db(
-                        spec.clone(),
-                        conf,
-                        ctx.clone(),
-                        jwks_roles.clone(),
-                        concurrency_token.clone(),
-                        db,
-                        [DropLogicalSubscriptions].to_vec(),
-                    );
-
-                    Ok(tokio::spawn(fut))
-                })
-                .collect::<Vec<Result<_, anyhow::Error>>>();
-
-            for process in db_processes.into_iter() {
-                let handle = process?;
-                if let Err(e) = handle.await? {
-                    // Handle the error case where the database does not exist
-                    // We do not check whether the DB exists or not in the deletion phase,
-                    // so we shouldn't be strict about it in pre-deletion cleanup as well.
-                    if e.to_string().contains("does not exist") {
-                        warn!("Error dropping subscription: {}", e);
-                    } else {
-                        return Err(e);
-                    }
-                };
-            }
-
-            for phase in [
-                CreateSuperUser,
-                DropInvalidDatabases,
-                RenameRoles,
-                CreateAndAlterRoles,
-                RenameAndDeleteDatabases,
-                CreateAndAlterDatabases,
-                CreateSchemaNeon,
-            ] {
-                info!("Applying phase {:?}", &phase);
-                apply_operations(
-                    spec.clone(),
-                    ctx.clone(),
-                    jwks_roles.clone(),
-                    phase,
-                    || async { Ok(&client) },
-                )
-                .await?;
-            }
-
-            info!("Applying RunInEachDatabase2 phase");
-            let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency));
-
-            let db_processes = spec
-                .cluster
-                .databases
-                .iter()
-                .map(|db| DB::new(db.clone()))
-                // include
-                .chain(once(DB::SystemDB))
-                .map(|db| {
-                    let spec = spec.clone();
-                    let ctx = ctx.clone();
-                    let jwks_roles = jwks_roles.clone();
-                    let mut conf = conf.as_ref().clone();
-                    let concurrency_token = concurrency_token.clone();
-                    let db = db.clone();
-
-                    debug!("Applying per-database phases for Database {:?}", &db);
-
-                    match &db {
-                        DB::SystemDB => {}
-                        DB::UserDB(db) => {
-                            conf.dbname(db.name.as_str());
-                        }
-                    }
-
-                    let conf = Arc::new(conf);
-                    let mut phases = vec![
-                        DeleteDBRoleReferences,
-                        ChangeSchemaPerms,
-                        HandleAnonExtension,
-                    ];
-
-                    if spec.drop_subscriptions_before_start && !drop_subscriptions_done {
-                        info!("Adding DropLogicalSubscriptions phase because drop_subscriptions_before_start is set");
-                        phases.push(DropLogicalSubscriptions);
-                    }
-
-                    let fut = Self::apply_spec_sql_db(
-                        spec.clone(),
-                        conf,
-                        ctx.clone(),
-                        jwks_roles.clone(),
-                        concurrency_token.clone(),
-                        db,
-                        phases,
-                    );
-
-                    Ok(tokio::spawn(fut))
-                })
-                .collect::<Vec<Result<_, anyhow::Error>>>();
-
-            for process in db_processes.into_iter() {
-                let handle = process?;
-                handle.await??;
-            }
-
-            let mut phases = vec![
-                HandleOtherExtensions,
-                HandleNeonExtension, // This step depends on CreateSchemaNeon
-                CreateAvailabilityCheck,
-                DropRoles,
-            ];
-
-            // This step depends on CreateSchemaNeon
-            if spec.drop_subscriptions_before_start && !drop_subscriptions_done {
-                info!("Adding FinalizeDropLogicalSubscriptions phase because drop_subscriptions_before_start is set");
-                phases.push(FinalizeDropLogicalSubscriptions);
-            }
-
-            for phase in phases {
-                debug!("Applying phase {:?}", &phase);
-                apply_operations(
-                    spec.clone(),
-                    ctx.clone(),
-                    jwks_roles.clone(),
-                    phase,
-                    || async { Ok(&client) },
-                )
-                .await?;
-            }
-
-            Ok::<(), anyhow::Error>(())
-        })?;
-
-        Ok(())
-    }
-
-    /// Apply SQL migrations of the RunInEachDatabase phase.
-    ///
-    /// May opt to not connect to databases that don't have any scheduled
-    /// operations.  The function is concurrency-controlled with the provided
-    /// semaphore.  The caller has to make sure the semaphore isn't exhausted.
-    async fn apply_spec_sql_db(
-        spec: Arc<ComputeSpec>,
-        conf: Arc<tokio_postgres::Config>,
-        ctx: Arc<tokio::sync::RwLock<MutableApplyContext>>,
-        jwks_roles: Arc<HashSet<String>>,
-        concurrency_token: Arc<tokio::sync::Semaphore>,
-        db: DB,
-        subphases: Vec<PerDatabasePhase>,
-    ) -> Result<()> {
-        let _permit = concurrency_token.acquire().await?;
-
-        let mut client_conn = None;
-
-        for subphase in subphases {
-            apply_operations(
-                spec.clone(),
-                ctx.clone(),
-                jwks_roles.clone(),
-                RunInEachDatabase {
-                    db: db.clone(),
-                    subphase,
-                },
-                // Only connect if apply_operation actually wants a connection.
-                // It's quite possible this database doesn't need any queries,
-                // so by not connecting we save time and effort connecting to
-                // that database.
-                || async {
-                    if client_conn.is_none() {
-                        let db_client = Self::get_maintenance_client(&conf).await?;
-                        client_conn.replace(db_client);
-                    }
-                    let client = client_conn.as_ref().unwrap();
-                    Ok(client)
-                },
-            )
-            .await?;
-        }
-
-        drop(client_conn);
-
-        Ok::<(), anyhow::Error>(())
-    }
-
-    /// Choose how many concurrent connections to use for applying the spec changes.
-    pub fn max_service_connections(
-        &self,
-        compute_state: &ComputeState,
-        spec: &ComputeSpec,
-    ) -> usize {
-        // If the cluster is in Init state we don't have to deal with user connections,
-        // and can thus use all `max_connections` connection slots. However, that's generally not
-        // very efficient, so we generally still limit it to a smaller number.
-        if compute_state.status == ComputeStatus::Init {
-            // If the settings contain 'max_connections', use that as template
-            if let Some(config) = spec.cluster.settings.find("max_connections") {
-                config.parse::<usize>().ok()
-            } else {
-                // Otherwise, try to find the setting in the postgresql_conf string
-                spec.cluster
-                    .postgresql_conf
-                    .iter()
-                    .flat_map(|conf| conf.split("\n"))
-                    .filter_map(|line| {
-                        if !line.contains("max_connections") {
-                            return None;
-                        }
-
-                        let (key, value) = line.split_once("=")?;
-                        let key = key
-                            .trim_start_matches(char::is_whitespace)
-                            .trim_end_matches(char::is_whitespace);
-
-                        let value = value
-                            .trim_start_matches(char::is_whitespace)
-                            .trim_end_matches(char::is_whitespace);
-
-                        if key != "max_connections" {
-                            return None;
-                        }
-
-                        value.parse::<usize>().ok()
-                    })
-                    .next()
-            }
-            // If max_connections is present, use at most 1/3rd of that.
-            // When max_connections is lower than 30, try to use at least 10 connections, but
-            // never more than max_connections.
-            .map(|limit| match limit {
-                0..10 => limit,
-                10..30 => 10,
-                30.. => limit / 3,
-            })
-            // If we didn't find max_connections, default to 10 concurrent connections.
-            .unwrap_or(10)
-        } else {
-            // state == Running
-            // Because the cluster is already in the Running state, we should assume users are
-            // already connected to the cluster, and high concurrency could negatively
-            // impact user connectivity. Therefore, we can limit concurrency to the number of
-            // reserved superuser connections, which users wouldn't be able to use anyway.
-            spec.cluster
-                .settings
-                .find("superuser_reserved_connections")
-                .iter()
-                .filter_map(|val| val.parse::<usize>().ok())
-                .map(|val| if val > 1 { val - 1 } else { 1 })
-                .last()
-                .unwrap_or(3)
-        }
-    }
-}
+use crate::compute::construct_superuser_query;
+use crate::pg_helpers::{DatabaseExt, Escaping, GenericOptionsSearch, RoleExt, escape_literal};

 #[derive(Clone)]
 pub enum DB {
--- a/docker-compose/test_extensions_upgrade.sh
+++ b/docker-compose/test_extensions_upgrade.sh
@@ -58,7 +58,7 @@ function check_timeline() {
 # Accepts the tag for the compute node and the timeline as parameters.
 function restart_compute() {
  docker compose down compute compute_is_ready
-  COMPUTE_TAG=${1} TENANT_ID=${tenant_id} TIMELINE_ID=${2} docker compose up --quiet-pull -d --build compute compute_is_ready
+  COMPUTE_TAG=${1} TAG=${OLD_COMPUTE_TAG} TENANT_ID=${tenant_id} TIMELINE_ID=${2} docker compose up --quiet-pull -d --build compute compute_is_ready
  wait_for_ready
  check_timeline ${2}
 }
@@ -82,7 +82,7 @@ EXTENSIONS='[
 {"extname": "pg_repack", "extdir": "pg_repack-src"}
 ]'
 EXTNAMES=$(echo ${EXTENSIONS} | jq -r '.[].extname' | paste -sd ' ' -)
-COMPUTE_TAG=${NEW_COMPUTE_TAG} TEST_EXTENSIONS_TAG=${NEW_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d
+TAG=${NEW_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d
 wait_for_ready
 docker compose exec neon-test-extensions psql -c "DROP DATABASE IF EXISTS contrib_regression"
 docker compose exec neon-test-extensions psql -c "CREATE DATABASE contrib_regression"
@@ -90,7 +90,7 @@ create_extensions "${EXTNAMES}"
 query="select json_object_agg(extname,extversion) from pg_extension where extname in ('${EXTNAMES// /\',\'}')"
 new_vers=$(docker compose exec neon-test-extensions psql -Aqt -d contrib_regression -c "$query")
 docker compose --profile test-extensions down
-COMPUTE_TAG=${OLD_COMPUTE_TAG} TEST_EXTENSIONS_TAG=${NEW_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d --force-recreate
+TAG=${OLD_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d --force-recreate
 wait_for_ready
 docker compose exec neon-test-extensions psql -c "DROP DATABASE IF EXISTS contrib_regression"
 docker compose exec neon-test-extensions psql -c "CREATE DATABASE contrib_regression"
--- a/pageserver/client/src/mgmt_api.rs
+++ b/pageserver/client/src/mgmt_api.rs
@@ -480,7 +480,6 @@ impl Client {
        tenant_shard_id: TenantShardId,
        timeline_id: TimelineId,
        concurrency: Option<usize>,
-        recurse: bool,
    ) -> Result<()> {
        let mut path = reqwest::Url::parse(&format!(
            "{}/v1/tenant/{}/timeline/{}/download_heatmap_layers",
@@ -488,9 +487,6 @@ impl Client {
        ))
        .expect("Cannot build URL");

-        path.query_pairs_mut()
-            .append_pair("recurse", &format!("{}", recurse));
-
        if let Some(concurrency) = concurrency {
            path.query_pairs_mut()
                .append_pair("concurrency", &format!("{}", concurrency));
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -1435,7 +1435,6 @@ async fn timeline_download_heatmap_layers_handler(

    let desired_concurrency =
        parse_query_param(&request, "concurrency")?.unwrap_or(DEFAULT_CONCURRENCY);
-    let recurse = parse_query_param(&request, "recurse")?.unwrap_or(false);

    check_permission(&request, Some(tenant_shard_id.tenant_id))?;

@@ -1452,7 +1451,9 @@ async fn timeline_download_heatmap_layers_handler(
        .unwrap_or(DEFAULT_MAX_CONCURRENCY);
    let concurrency = std::cmp::min(max_concurrency, desired_concurrency);

-    timeline.start_heatmap_layers_download(concurrency, recurse, &ctx)?;
+    timeline
+        .start_heatmap_layers_download(concurrency, &ctx)
+        .await?;

    json_response(StatusCode::ACCEPTED, ())
 }
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -1052,8 +1052,6 @@ impl Timeline {
    ) -> Result<u64, CalculateLogicalSizeError> {
        debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id();

-        fail::fail_point!("skip-logical-size-calculation", |_| { Ok(0) });
-
        // Fetch list of database dirs and iterate them
        let buf = self.get(DBDIR_KEY, lsn, ctx).await?;
        let dbdir = DbDirectory::des(&buf)?;
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -1153,15 +1153,12 @@ impl Tenant {
            let mut tline_ending_at = Some((&timeline, timeline.get_last_record_lsn()));
            while let Some((tline, end_lsn)) = tline_ending_at {
                let unarchival_heatmap = tline.generate_unarchival_heatmap(end_lsn).await;
-                // Another unearchived timeline might have generated a heatmap for this ancestor.
-                // If the current branch point greater than the previous one use the the heatmap
-                // we just generated - it should include more layers.
-                if !tline.should_keep_previous_heatmap(end_lsn) {
+                if !tline.is_previous_heatmap_active() {
                    tline
                        .previous_heatmap
                        .store(Some(Arc::new(unarchival_heatmap)));
                } else {
-                    tracing::info!("Previous heatmap preferred. Dropping unarchival heatmap.")
+                    tracing::info!("Previous heatmap still active. Dropping unarchival heatmap.")
                }

                match tline.ancestor_timeline() {
@@ -1942,7 +1939,6 @@ impl Tenant {
                hs.0.remove(&timeline_id).map(|h| PreviousHeatmap::Active {
                    heatmap: h,
                    read_at: hs.1,
-                    end_lsn: None,
                })
            });
            part_downloads.spawn(
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -442,8 +442,6 @@ pub(crate) enum PreviousHeatmap {
    Active {
        heatmap: HeatMapTimeline,
        read_at: std::time::Instant,
-        // End LSN covered by the heatmap if known
-        end_lsn: Option<Lsn>,
    },
    Obsolete,
 }
@@ -3572,16 +3570,12 @@ impl Timeline {
        Ok(layer)
    }

-    pub(super) fn should_keep_previous_heatmap(&self, new_heatmap_end_lsn: Lsn) -> bool {
-        let crnt = self.previous_heatmap.load();
-        match crnt.as_deref() {
-            Some(PreviousHeatmap::Active { end_lsn, .. }) => match end_lsn {
-                Some(crnt_end_lsn) => *crnt_end_lsn > new_heatmap_end_lsn,
-                None => true,
-            },
-            Some(PreviousHeatmap::Obsolete) => false,
-            None => false,
-        }
+    pub(super) fn is_previous_heatmap_active(&self) -> bool {
+        self.previous_heatmap
+            .load()
+            .as_ref()
+            .map(|prev| matches!(**prev, PreviousHeatmap::Active { .. }))
+            .unwrap_or(false)
    }

    /// The timeline heatmap is a hint to secondary locations from the primary location,
@@ -3609,26 +3603,26 @@ impl Timeline {
        // heatamp.
        let previous_heatmap = self.previous_heatmap.load();
        let visible_non_resident = match previous_heatmap.as_deref() {
-            Some(PreviousHeatmap::Active {
-                heatmap, read_at, ..
-            }) => Some(heatmap.layers.iter().filter_map(|hl| {
-                let desc: PersistentLayerDesc = hl.name.clone().into();
-                let layer = guard.try_get_from_key(&desc.key())?;
+            Some(PreviousHeatmap::Active { heatmap, read_at }) => {
+                Some(heatmap.layers.iter().filter_map(|hl| {
+                    let desc: PersistentLayerDesc = hl.name.clone().into();
+                    let layer = guard.try_get_from_key(&desc.key())?;

-                if layer.visibility() == LayerVisibilityHint::Covered {
-                    return None;
-                }
+                    if layer.visibility() == LayerVisibilityHint::Covered {
+                        return None;
+                    }

-                if layer.is_likely_resident() {
-                    return None;
-                }
+                    if layer.is_likely_resident() {
+                        return None;
+                    }

-                if layer.last_evicted_at().happened_after(*read_at) {
-                    return None;
-                }
+                    if layer.last_evicted_at().happened_after(*read_at) {
+                        return None;
+                    }

-                Some((desc, hl.metadata.clone(), hl.access_time))
-            })),
+                    Some((desc, hl.metadata.clone(), hl.access_time))
+                }))
+            }
            Some(PreviousHeatmap::Obsolete) => None,
            None => None,
        };
@@ -3715,7 +3709,6 @@ impl Timeline {
        PreviousHeatmap::Active {
            heatmap,
            read_at: Instant::now(),
-            end_lsn: Some(end_lsn),
        }
    }

@@ -7053,7 +7046,6 @@ mod tests {
            .store(Some(Arc::new(PreviousHeatmap::Active {
                heatmap: heatmap.clone(),
                read_at: std::time::Instant::now(),
-                end_lsn: None,
            })));

        // Generate a new heatmap and assert that it contains the same layers as the old one.
@@ -7156,7 +7148,6 @@ mod tests {
            .store(Some(Arc::new(PreviousHeatmap::Active {
                heatmap: heatmap.clone(),
                read_at: std::time::Instant::now(),
-                end_lsn: None,
            })));

        // Evict all the layers in the previous heatmap
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -213,33 +213,30 @@ impl GcCompactionQueue {
    }

    /// Trigger an auto compaction.
-    pub async fn trigger_auto_compaction(
-        &self,
-        timeline: &Arc<Timeline>,
-    ) -> Result<(), CompactionError> {
+    pub async fn trigger_auto_compaction(&self, timeline: &Arc<Timeline>) {
        let GcCompactionCombinedSettings {
            gc_compaction_enabled,
            gc_compaction_initial_threshold_kb,
            gc_compaction_ratio_percent,
        } = timeline.get_gc_compaction_settings();
        if !gc_compaction_enabled {
-            return Ok(());
+            return;
        }
        if self.remaining_jobs_num() > 0 {
            // Only schedule auto compaction when the queue is empty
-            return Ok(());
+            return;
        }
        if timeline.ancestor_timeline().is_some() {
            // Do not trigger auto compaction for child timelines. We haven't tested
            // it enough in staging yet.
-            return Ok(());
+            return;
        }

        let Ok(permit) = CONCURRENT_GC_COMPACTION_TASKS.clone().try_acquire_owned() else {
            // Only allow one compaction run at a time. TODO: As we do `try_acquire_owned`, we cannot ensure
            // the fairness of the lock across timelines. We should listen for both `acquire` and `l0_compaction_trigger`
            // to ensure the fairness while avoid starving other tasks.
-            return Ok(());
+            return;
        };

        let gc_compaction_state = timeline.get_gc_compaction_state();
@@ -249,7 +246,7 @@ impl GcCompactionQueue {

        let layers = {
            let guard = timeline.layers.read().await;
-            let layer_map = guard.layer_map()?;
+            let layer_map = guard.layer_map().unwrap();
            layer_map.iter_historic_layers().collect_vec()
        };
        let mut l2_size: u64 = 0;
@@ -326,7 +323,6 @@ impl GcCompactionQueue {
                l1_size, l2_size, l2_lsn, gc_cutoff
            );
        }
-        Ok(())
    }

    /// Notify the caller the job has finished and unblock GC.
@@ -448,7 +444,7 @@ impl GcCompactionQueue {
                None
            }
        }) else {
-            self.trigger_auto_compaction(timeline).await?;
+            self.trigger_auto_compaction(timeline).await;
            // Always yield after triggering auto-compaction. Gc-compaction is a low-priority task and we
            // have not implemented preemption mechanism yet. We always want to yield it to more important
            // tasks if there is one.
--- a/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs
+++ b/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs
@@ -32,7 +32,6 @@ impl HeatmapLayersDownloader {
    fn new(
        timeline: Arc<Timeline>,
        concurrency: usize,
-        recurse: bool,
        ctx: RequestContext,
    ) -> Result<HeatmapLayersDownloader, ApiError> {
        let tl_guard = timeline.gate.enter().map_err(|_| ApiError::Cancelled)?;
@@ -99,20 +98,6 @@ impl HeatmapLayersDownloader {
                    },
                    _ = cancel.cancelled() => {
                        tracing::info!("Heatmap layers download cancelled");
-                        return;
-                    }
-                }
-
-                if recurse {
-                    if let Some(ancestor) = timeline.ancestor_timeline() {
-                        let ctx = ctx.attached_child();
-                        let res =
-                            ancestor.start_heatmap_layers_download(concurrency, recurse, &ctx);
-                        if let Err(err) = res {
-                            tracing::info!(
-                                "Failed to start heatmap layers download for ancestor: {err}"
-                            );
-                        }
                    }
                }
            }
@@ -155,20 +140,14 @@ impl HeatmapLayersDownloader {
 }

 impl Timeline {
-    pub(crate) fn start_heatmap_layers_download(
+    pub(crate) async fn start_heatmap_layers_download(
        self: &Arc<Self>,
        concurrency: usize,
-        recurse: bool,
        ctx: &RequestContext,
    ) -> Result<(), ApiError> {
        let mut locked = self.heatmap_layers_downloader.lock().unwrap();
        if locked.as_ref().map(|dl| dl.is_complete()).unwrap_or(true) {
-            let dl = HeatmapLayersDownloader::new(
-                self.clone(),
-                concurrency,
-                recurse,
-                ctx.attached_child(),
-            )?;
+            let dl = HeatmapLayersDownloader::new(self.clone(), concurrency, ctx.attached_child())?;
            *locked = Some(dl);
            Ok(())
        } else {
--- a/storage_controller/src/http.rs
+++ b/storage_controller/src/http.rs
@@ -524,10 +524,9 @@ async fn handle_tenant_timeline_download_heatmap_layers(

    let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
    let concurrency: Option<usize> = parse_query_param(&req, "concurrency")?;
-    let recurse = parse_query_param(&req, "recurse")?.unwrap_or(false);

    service
-        .tenant_timeline_download_heatmap_layers(tenant_shard_id, timeline_id, concurrency, recurse)
+        .tenant_timeline_download_heatmap_layers(tenant_shard_id, timeline_id, concurrency)
        .await?;

    json_response(StatusCode::OK, ())
@@ -548,7 +547,7 @@ async fn handle_tenant_timeline_passthrough(
    service: Arc<Service>,
    req: Request<Body>,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_or_shard_id: TenantShardId = parse_request_param(&req, "tenant_id")?;
+    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
    check_permissions(&req, Scope::PageServerApi)?;

    let req = match maybe_forward(req).await {
@@ -563,28 +562,15 @@ async fn handle_tenant_timeline_passthrough(
        return Err(ApiError::BadRequest(anyhow::anyhow!("Missing path")));
    };

-    tracing::info!(
-        "Proxying request for tenant {} ({})",
-        tenant_or_shard_id.tenant_id,
-        path
-    );
+    tracing::info!("Proxying request for tenant {} ({})", tenant_id, path);

    // Find the node that holds shard zero
-    let (node, tenant_shard_id) = if tenant_or_shard_id.is_unsharded() {
-        service
-            .tenant_shard0_node(tenant_or_shard_id.tenant_id)
-            .await?
-    } else {
-        (
-            service.tenant_shard_node(tenant_or_shard_id).await?,
-            tenant_or_shard_id,
-        )
-    };
+    let (node, tenant_shard_id) = service.tenant_shard0_node(tenant_id).await?;

    // Callers will always pass an unsharded tenant ID.  Before proxying, we must
    // rewrite this to a shard-aware shard zero ID.
    let path = format!("{}", path);
-    let tenant_str = tenant_or_shard_id.tenant_id.to_string();
+    let tenant_str = tenant_id.to_string();
    let tenant_shard_str = format!("{}", tenant_shard_id);
    let path = path.replace(&tenant_str, &tenant_shard_str);

@@ -624,7 +610,7 @@ async fn handle_tenant_timeline_passthrough(
    // Transform 404 into 503 if we raced with a migration
    if resp.status() == reqwest::StatusCode::NOT_FOUND {
        // Look up node again: if we migrated it will be different
-        let new_node = service.tenant_shard_node(tenant_shard_id).await?;
+        let (new_node, _tenant_shard_id) = service.tenant_shard0_node(tenant_id).await?;
        if new_node.get_id() != node.get_id() {
            // Rather than retry here, send the client a 503 to prompt a retry: this matches
            // the pageserver's use of 503, and all clients calling this API should retry on 503.
--- a/storage_controller/src/pageserver_client.rs
+++ b/storage_controller/src/pageserver_client.rs
@@ -281,19 +281,13 @@ impl PageserverClient {
        tenant_shard_id: TenantShardId,
        timeline_id: TimelineId,
        concurrency: Option<usize>,
-        recurse: bool,
    ) -> Result<()> {
        measured_request!(
            "download_heatmap_layers",
            crate::metrics::Method::Post,
            &self.node_id_label,
            self.inner
-                .timeline_download_heatmap_layers(
-                    tenant_shard_id,
-                    timeline_id,
-                    concurrency,
-                    recurse
-                )
+                .timeline_download_heatmap_layers(tenant_shard_id, timeline_id, concurrency)
                .await
        )
    }
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -3774,7 +3774,6 @@ impl Service {
        tenant_shard_id: TenantShardId,
        timeline_id: TimelineId,
        concurrency: Option<usize>,
-        recurse: bool,
    ) -> Result<(), ApiError> {
        let _tenant_lock = trace_shared_lock(
            &self.tenant_op_locks,
@@ -3812,12 +3811,7 @@ impl Service {
            targets,
            |tenant_shard_id, client| async move {
                client
-                    .timeline_download_heatmap_layers(
-                        tenant_shard_id,
-                        timeline_id,
-                        concurrency,
-                        recurse,
-                    )
+                    .timeline_download_heatmap_layers(tenant_shard_id, timeline_id, concurrency)
                    .await
            },
            1,
@@ -4164,14 +4158,16 @@ impl Service {
        }).await?
    }

-    /// When you know the TenantId but not a specific shard, and would like to get the node holding shard 0.
+    /// When you need to send an HTTP request to the pageserver that holds shard0 of a tenant, this
+    /// function looks up and returns node. If the tenant isn't found, returns Err(ApiError::NotFound)
    pub(crate) async fn tenant_shard0_node(
        &self,
        tenant_id: TenantId,
    ) -> Result<(Node, TenantShardId), ApiError> {
-        let tenant_shard_id = {
+        // Look up in-memory state and maybe use the node from there.
+        {
            let locked = self.inner.read().unwrap();
-            let Some((tenant_shard_id, _shard)) = locked
+            let Some((tenant_shard_id, shard)) = locked
                .tenants
                .range(TenantShardId::tenant_range(tenant_id))
                .next()
@@ -4181,29 +4177,6 @@ impl Service {
                ));
            };

-            *tenant_shard_id
-        };
-
-        self.tenant_shard_node(tenant_shard_id)
-            .await
-            .map(|node| (node, tenant_shard_id))
-    }
-
-    /// When you need to send an HTTP request to the pageserver that holds a shard of a tenant, this
-    /// function looks up and returns node. If the shard isn't found, returns Err(ApiError::NotFound)
-    pub(crate) async fn tenant_shard_node(
-        &self,
-        tenant_shard_id: TenantShardId,
-    ) -> Result<Node, ApiError> {
-        // Look up in-memory state and maybe use the node from there.
-        {
-            let locked = self.inner.read().unwrap();
-            let Some(shard) = locked.tenants.get(&tenant_shard_id) else {
-                return Err(ApiError::NotFound(
-                    anyhow::anyhow!("Tenant shard {tenant_shard_id} not found").into(),
-                ));
-            };
-
            let Some(intent_node_id) = shard.intent.get_attached() else {
                tracing::warn!(
                    tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(),
@@ -4224,7 +4197,7 @@ impl Service {
                        "Shard refers to nonexistent node"
                    )));
                };
-                return Ok(node.clone());
+                return Ok((node.clone(), *tenant_shard_id));
            }
        };

@@ -4232,34 +4205,29 @@ impl Service {
        // generation state: this will reflect the progress of any ongoing migration.
        // Note that it is not guaranteed to _stay_ here, our caller must still handle
        // the case where they call through to the pageserver and get a 404.
-        let db_result = self
-            .persistence
-            .tenant_generations(tenant_shard_id.tenant_id)
-            .await?;
+        let db_result = self.persistence.tenant_generations(tenant_id).await?;
        let Some(ShardGenerationState {
-            tenant_shard_id: _,
+            tenant_shard_id,
            generation: _,
            generation_pageserver: Some(node_id),
-        }) = db_result
-            .into_iter()
-            .find(|s| s.tenant_shard_id == tenant_shard_id)
+        }) = db_result.first()
        else {
            // This can happen if we raced with a tenant deletion or a shard split.  On a retry
            // the caller will either succeed (shard split case), get a proper 404 (deletion case),
            // or a conflict response (case where tenant was detached in background)
            return Err(ApiError::ResourceUnavailable(
-                format!("Shard {tenant_shard_id} not found in database, or is not attached").into(),
+                "Shard {} not found in database, or is not attached".into(),
            ));
        };
        let locked = self.inner.read().unwrap();
-        let Some(node) = locked.nodes.get(&node_id) else {
+        let Some(node) = locked.nodes.get(node_id) else {
            // This should never happen
            return Err(ApiError::InternalServerError(anyhow::anyhow!(
                "Shard refers to nonexistent node"
            )));
        };

-        Ok(node.clone())
+        Ok((node.clone(), *tenant_shard_id))
    }

    pub(crate) fn tenant_locate(
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -2469,21 +2469,12 @@ class NeonStorageController(MetricsGetter, LogUtils):
        response.raise_for_status()
        return [TenantShardId.parse(tid) for tid in response.json()["updated"]]

-    def download_heatmap_layers(
-        self, tenant_shard_id: TenantShardId, timeline_id: TimelineId, recurse: bool | None = None
-    ):
-        url = (
-            f"{self.api}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/download_heatmap_layers"
-        )
-        if recurse is not None:
-            url = url + f"?recurse={str(recurse).lower()}"
-
+    def download_heatmap_layers(self, tenant_shard_id: TenantShardId, timeline_id: TimelineId):
        response = self.request(
            "POST",
-            url,
+            f"{self.api}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/download_heatmap_layers",
            headers=self.headers(TokenScope.ADMIN),
        )
-
        response.raise_for_status()

    def __enter__(self) -> Self:
--- a/test_runner/regress/test_pageserver_secondary.py
+++ b/test_runner/regress/test_pageserver_secondary.py
@@ -938,12 +938,9 @@ def test_migration_to_cold_secondary(neon_env_builder: NeonEnvBuilder):
    # Expect lots of layers
    assert len(ps_attached.list_layers(tenant_id, timeline_id)) > 10

+    # Simulate large data by making layer downloads artifically slow
    for ps in env.pageservers:
-        # Simulate large data by making layer downloads artifically slow
        ps.http_client().configure_failpoints([("secondary-layer-download-sleep", "return(1000)")])
-        # Make the initial logical size calculation lie. Otherwise it on demand downloads
-        # layers and makes accounting difficult.
-        ps.http_client().configure_failpoints(("skip-logical-size-calculation", "return"))

    def timeline_heatmap(tlid):
        assert env.pageserver_remote_storage is not None
@@ -955,6 +952,21 @@ def test_migration_to_cold_secondary(neon_env_builder: NeonEnvBuilder):

        raise RuntimeError(f"No heatmap for timeline: {tlid}")

+    # Upload a heatmap, so that secondaries have something to download
+    ps_attached.http_client().tenant_heatmap_upload(tenant_id)
+    heatmap_before_migration = timeline_heatmap(timeline_id)
+
+    # This has no chance to succeed: we have lots of layers and each one takes at least 1000ms.
+    # However, it pulls the heatmap, which will be important later.
+    http_client = env.storage_controller.pageserver_api()
+    (status, progress) = http_client.tenant_secondary_download(tenant_id, wait_ms=4000)
+    assert status == 202
+    assert progress["heatmap_mtime"] is not None
+    assert progress["layers_downloaded"] > 0
+    assert progress["bytes_downloaded"] > 0
+    assert progress["layers_total"] > progress["layers_downloaded"]
+    assert progress["bytes_total"] > progress["bytes_downloaded"]
+
    env.storage_controller.allowed_errors.extend(
        [
            ".*Timed out.*downloading layers.*",
@@ -963,7 +975,6 @@ def test_migration_to_cold_secondary(neon_env_builder: NeonEnvBuilder):

    # Use a custom configuration that gives up earlier than usual.
    # We can't hydrate everything anyway because of the failpoints.
-    # Implicitly, this also uploads a heatmap from the current attached location.
    config = StorageControllerMigrationConfig(
        secondary_warmup_timeout="5s", secondary_download_request_timeout="2s"
    )
@@ -977,17 +988,22 @@ def test_migration_to_cold_secondary(neon_env_builder: NeonEnvBuilder):
    ps_secondary.http_client().tenant_heatmap_upload(tenant_id)
    heatmap_after_migration = timeline_heatmap(timeline_id)

-    local_layers = ps_secondary.list_layers(tenant_id, timeline_id)
-    # We download 1 layer per second and give up within 5 seconds.
-    assert len(local_layers) < 10
+    assert len(heatmap_before_migration["layers"]) > 0

    after_migration_heatmap_layers_count = len(heatmap_after_migration["layers"])
+    assert len(heatmap_before_migration["layers"]) <= after_migration_heatmap_layers_count
+
    log.info(f"Heatmap size after cold migration is {after_migration_heatmap_layers_count}")

    env.storage_controller.download_heatmap_layers(
        TenantShardId(tenant_id, shard_number=0, shard_count=0), timeline_id
    )

+    # Now simulate the case where a child timeline is archived, parent layers
+    # are evicted and the child is unarchived. When the child is unarchived,
+    # itself and the parent update their heatmaps to contain layers needed by the
+    # child. One can warm up the timeline hierarchy since the heatmaps are ready.
+
    def all_layers_downloaded(expected_layer_count: int):
        local_layers_count = len(ps_secondary.list_layers(tenant_id, timeline_id))

@@ -995,9 +1011,8 @@ def test_migration_to_cold_secondary(neon_env_builder: NeonEnvBuilder):
        assert local_layers_count >= expected_layer_count

    wait_until(lambda: all_layers_downloaded(after_migration_heatmap_layers_count))
+    ps_secondary.http_client().tenant_heatmap_upload(tenant_id)

-    # Read everything and make sure that we're not downloading anything extra.
-    # All hot layers should be available locally now.
    before = (
        ps_secondary.http_client()
        .get_metrics()
@@ -1015,11 +1030,6 @@ def test_migration_to_cold_secondary(neon_env_builder: NeonEnvBuilder):
    workload.stop()
    assert before == after

-    # Now simulate the case where a child timeline is archived, parent layers
-    # are evicted and the child is unarchived. When the child is unarchived,
-    # itself and the parent update their heatmaps to contain layers needed by the
-    # child. One can warm up the timeline hierarchy since the heatmaps are ready.
-
    def check_archival_state(state: TimelineArchivalState, tline):
        timelines = (
            timeline["timeline_id"]
@@ -1054,6 +1064,6 @@ def test_migration_to_cold_secondary(neon_env_builder: NeonEnvBuilder):
    assert expected_locally > 0

    env.storage_controller.download_heatmap_layers(
-        TenantShardId(tenant_id, shard_number=0, shard_count=0), child_timeline_id, recurse=True
+        TenantShardId(tenant_id, shard_number=0, shard_count=0), timeline_id
    )
    wait_until(lambda: all_layers_downloaded(expected_locally))