avoid traversing the anyhow Cause chain

Observation: there was only a thin layer of anyhow between the types for which log_compaction_error chased down the cause chain and the types and the conversion to CompactionError::Other. So, remove the implicit #[from] conversion generated by thiserror, and de-`anyhow`ify / explicitly opt-into-`::Other` all the places that used it previously.
driveby: align error stringifications
2026-01-29 08:10:38 +00:00 · 2024-01-23 20:08:31 +01:00 · 2024-01-23 09:54:42 +00:00 · 2024-01-23 09:54:42 +00:00 · 2024-01-23 09:54:42 +00:00 · 2024-01-23 09:54:42 +00:00
27 changed files with 908 additions and 463 deletions
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -700,13 +700,14 @@ impl ComputeNode {
        // In this case we need to connect with old `zenith_admin` name
        // and create new user. We cannot simply rename connected user,
        // but we can create a new one and grant it all privileges.
-        let mut client = match Client::connect(self.connstr.as_str(), NoTls) {
+        let connstr = self.connstr.clone();
+        let mut client = match Client::connect(connstr.as_str(), NoTls) {
            Err(e) => {
                info!(
                    "cannot connect to postgres: {}, retrying with `zenith_admin` username",
                    e
                );
-                let mut zenith_admin_connstr = self.connstr.clone();
+                let mut zenith_admin_connstr = connstr.clone();

                zenith_admin_connstr
                    .set_username("zenith_admin")
@@ -719,8 +720,8 @@ impl ComputeNode {
                client.simple_query("GRANT zenith_admin TO cloud_admin")?;
                drop(client);

-                // reconnect with connsting with expected name
-                Client::connect(self.connstr.as_str(), NoTls)?
+                // reconnect with connstring with expected name
+                Client::connect(connstr.as_str(), NoTls)?
            }
            Ok(client) => client,
        };
@@ -734,8 +735,8 @@ impl ComputeNode {
        cleanup_instance(&mut client)?;
        handle_roles(spec, &mut client)?;
        handle_databases(spec, &mut client)?;
-        handle_role_deletions(spec, self.connstr.as_str(), &mut client)?;
-        handle_grants(spec, &mut client, self.connstr.as_str())?;
+        handle_role_deletions(spec, connstr.as_str(), &mut client)?;
+        handle_grants(spec, &mut client, connstr.as_str())?;
        handle_extensions(spec, &mut client)?;
        handle_extension_neon(&mut client)?;
        create_availability_check_data(&mut client)?;
@@ -743,6 +744,12 @@ impl ComputeNode {
        // 'Close' connection
        drop(client);

+        if self.has_feature(ComputeFeature::Migrations) {
+            thread::spawn(move || {
+                let mut client = Client::connect(connstr.as_str(), NoTls)?;
+                handle_migrations(&mut client)
+            });
+        }
        Ok(())
    }

@@ -807,6 +814,10 @@ impl ComputeNode {
            handle_grants(&spec, &mut client, self.connstr.as_str())?;
            handle_extensions(&spec, &mut client)?;
            handle_extension_neon(&mut client)?;
+            // We can skip handle_migrations here because a new migration can only appear
+            // if we have a new version of the compute_ctl binary, which can only happen
+            // if compute got restarted, in which case we'll end up inside of apply_config
+            // instead of reconfigure.
        }

        // 'Close' connection
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -727,3 +727,79 @@ pub fn handle_extension_neon(client: &mut Client) -> Result<()> {

    Ok(())
 }
+
+#[instrument(skip_all)]
+pub fn handle_migrations(client: &mut Client) -> Result<()> {
+    info!("handle migrations");
+
+    // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+    // !BE SURE TO ONLY ADD MIGRATIONS TO THE END OF THIS ARRAY. IF YOU DO NOT, VERY VERY BAD THINGS MAY HAPPEN!
+    // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+    let migrations = [
+        "ALTER ROLE neon_superuser BYPASSRLS",
+        r#"
+DO $$
+DECLARE
+    role_name text;
+BEGIN
+    FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, 'neon_superuser', 'member')
+    LOOP
+        RAISE NOTICE 'EXECUTING ALTER ROLE % INHERIT', quote_ident(role_name);
+        EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' INHERIT';
+    END LOOP;
+
+    FOR role_name IN SELECT rolname FROM pg_roles
+        WHERE
+            NOT pg_has_role(rolname, 'neon_superuser', 'member') AND NOT starts_with(rolname, 'pg_')
+    LOOP
+        RAISE NOTICE 'EXECUTING ALTER ROLE % NOBYPASSRLS', quote_ident(role_name);
+        EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOBYPASSRLS';
+    END LOOP;
+END $$;
+"#,
+    ];
+
+    let mut query = "CREATE SCHEMA IF NOT EXISTS neon_migration";
+    client.simple_query(query)?;
+
+    query = "CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)";
+    client.simple_query(query)?;
+
+    query = "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING";
+    client.simple_query(query)?;
+
+    query = "ALTER SCHEMA neon_migration OWNER TO cloud_admin";
+    client.simple_query(query)?;
+
+    query = "REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC";
+    client.simple_query(query)?;
+
+    query = "SELECT id FROM neon_migration.migration_id";
+    let row = client.query_one(query, &[])?;
+    let mut current_migration: usize = row.get::<&str, i64>("id") as usize;
+    let starting_migration_id = current_migration;
+
+    query = "BEGIN";
+    client.simple_query(query)?;
+
+    while current_migration < migrations.len() {
+        info!("Running migration:\n{}\n", migrations[current_migration]);
+        client.simple_query(migrations[current_migration])?;
+        current_migration += 1;
+    }
+    let setval = format!(
+        "UPDATE neon_migration.migration_id SET id={}",
+        migrations.len()
+    );
+    client.simple_query(&setval)?;
+
+    query = "COMMIT";
+    client.simple_query(query)?;
+
+    info!(
+        "Ran {} migrations",
+        (migrations.len() - starting_migration_id)
+    );
+    Ok(())
+}
--- a/control_plane/attachment_service/src/persistence.rs
+++ b/control_plane/attachment_service/src/persistence.rs
@@ -184,7 +184,7 @@ impl Persistence {
    pub(crate) async fn increment_generation(
        &self,
        tenant_shard_id: TenantShardId,
-        node_id: Option<NodeId>,
+        node_id: NodeId,
    ) -> anyhow::Result<Generation> {
        let (write, gen) = {
            let mut locked = self.state.lock().unwrap();
@@ -192,14 +192,9 @@ impl Persistence {
                anyhow::bail!("Tried to increment generation of unknown shard");
            };

-            // If we're called with a None pageserver, we need only update the generation
-            // record to disassociate it with this pageserver, not actually increment the number, as
-            // the increment is guaranteed to happen the next time this tenant is attached.
-            if node_id.is_some() {
-                shard.generation += 1;
-            }
+            shard.generation += 1;
+            shard.generation_pageserver = Some(node_id);

-            shard.generation_pageserver = node_id;
            let gen = Generation::new(shard.generation);
            (locked.save(), gen)
        };
@@ -208,6 +203,19 @@ impl Persistence {
        Ok(gen)
    }

+    pub(crate) async fn detach(&self, tenant_shard_id: TenantShardId) -> anyhow::Result<()> {
+        let write = {
+            let mut locked = self.state.lock().unwrap();
+            let Some(shard) = locked.tenants.get_mut(&tenant_shard_id) else {
+                anyhow::bail!("Tried to increment generation of unknown shard");
+            };
+            shard.generation_pageserver = None;
+            locked.save()
+        };
+        write.commit().await?;
+        Ok(())
+    }
+
    pub(crate) async fn re_attach(
        &self,
        node_id: NodeId,
--- a/control_plane/attachment_service/src/reconciler.rs
+++ b/control_plane/attachment_service/src/reconciler.rs
@@ -296,7 +296,7 @@ impl Reconciler {
        // Increment generation before attaching to new pageserver
        self.generation = self
            .persistence
-            .increment_generation(self.tenant_shard_id, Some(dest_ps_id))
+            .increment_generation(self.tenant_shard_id, dest_ps_id)
            .await?;

        let dest_conf = build_location_config(
@@ -395,7 +395,7 @@ impl Reconciler {
                    // as locations with unknown (None) observed state.
                    self.generation = self
                        .persistence
-                        .increment_generation(self.tenant_shard_id, Some(node_id))
+                        .increment_generation(self.tenant_shard_id, node_id)
                        .await?;
                    wanted_conf.generation = self.generation.into();
                    tracing::info!("Observed configuration requires update.");
--- a/control_plane/attachment_service/src/service.rs
+++ b/control_plane/attachment_service/src/service.rs
@@ -362,13 +362,14 @@ impl Service {
            );
        }

-        let new_generation = if attach_req.node_id.is_some() {
+        let new_generation = if let Some(req_node_id) = attach_req.node_id {
            Some(
                self.persistence
-                    .increment_generation(attach_req.tenant_shard_id, attach_req.node_id)
+                    .increment_generation(attach_req.tenant_shard_id, req_node_id)
                    .await?,
            )
        } else {
+            self.persistence.detach(attach_req.tenant_shard_id).await?;
            None
        };

@@ -407,6 +408,7 @@ impl Service {
            "attach_hook: tenant {} set generation {:?}, pageserver {}",
            attach_req.tenant_shard_id,
            tenant_state.generation,
+            // TODO: this is an odd number of 0xf's
            attach_req.node_id.unwrap_or(utils::id::NodeId(0xfffffff))
        );

--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -57,7 +57,7 @@ use crate::local_env::LocalEnv;
 use crate::postgresql_conf::PostgresConf;

 use compute_api::responses::{ComputeState, ComputeStatus};
-use compute_api::spec::{Cluster, ComputeMode, ComputeSpec};
+use compute_api::spec::{Cluster, ComputeFeature, ComputeMode, ComputeSpec};

 // contents of a endpoint.json file
 #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
@@ -70,6 +70,7 @@ pub struct EndpointConf {
    http_port: u16,
    pg_version: u32,
    skip_pg_catalog_updates: bool,
+    features: Vec<ComputeFeature>,
 }

 //
@@ -140,6 +141,7 @@ impl ComputeControlPlane {
            // with this we basically test a case of waking up an idle compute, where
            // we also skip catalog updates in the cloud.
            skip_pg_catalog_updates: true,
+            features: vec![],
        });

        ep.create_endpoint_dir()?;
@@ -154,6 +156,7 @@ impl ComputeControlPlane {
                pg_port,
                pg_version,
                skip_pg_catalog_updates: true,
+                features: vec![],
            })?,
        )?;
        std::fs::write(
@@ -215,6 +218,9 @@ pub struct Endpoint {

    // Optimizations
    skip_pg_catalog_updates: bool,
+
+    // Feature flags
+    features: Vec<ComputeFeature>,
 }

 impl Endpoint {
@@ -244,6 +250,7 @@ impl Endpoint {
            tenant_id: conf.tenant_id,
            pg_version: conf.pg_version,
            skip_pg_catalog_updates: conf.skip_pg_catalog_updates,
+            features: conf.features,
        })
    }

@@ -519,7 +526,7 @@ impl Endpoint {
            skip_pg_catalog_updates: self.skip_pg_catalog_updates,
            format_version: 1.0,
            operation_uuid: None,
-            features: vec![],
+            features: self.features.clone(),
            cluster: Cluster {
                cluster_id: None, // project ID: not used
                name: None,       // project name: not used
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -90,6 +90,9 @@ pub enum ComputeFeature {
    /// track short-lived connections as user activity.
    ActivityMonitorExperimental,

+    /// Enable running migrations
+    Migrations,
+
    /// This is a special feature flag that is used to represent unknown feature flags.
    /// Basically all unknown to enum flags are represented as this one. See unit test
    /// `parse_unknown_features()` for more details.
--- a/libs/pageserver_api/src/key.rs
+++ b/libs/pageserver_api/src/key.rs
@@ -1,9 +1,11 @@
 use anyhow::{bail, Result};
 use byteorder::{ByteOrder, BE};
+use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
+use postgres_ffi::{Oid, TransactionId};
 use serde::{Deserialize, Serialize};
-use std::fmt;
+use std::{fmt, ops::Range};

-use crate::reltag::{BlockNumber, RelTag};
+use crate::reltag::{BlockNumber, RelTag, SlruKind};

 /// Key used in the Repository kv-store.
 ///
@@ -143,12 +145,390 @@ impl Key {
    }
 }

+// Layout of the Key address space
+//
+// The Key struct, used to address the underlying key-value store, consists of
+// 18 bytes, split into six fields. See 'Key' in repository.rs. We need to map
+// all the data and metadata keys into those 18 bytes.
+//
+// Principles for the mapping:
+//
+// - Things that are often accessed or modified together, should be close to
+//   each other in the key space. For example, if a relation is extended by one
+//   block, we create a new key-value pair for the block data, and update the
+//   relation size entry. Because of that, the RelSize key comes after all the
+//   RelBlocks of a relation: the RelSize and the last RelBlock are always next
+//   to each other.
+//
+// The key space is divided into four major sections, identified by the first
+// byte, and the form a hierarchy:
+//
+// 00 Relation data and metadata
+//
+//   DbDir    () -> (dbnode, spcnode)
+//   Filenodemap
+//   RelDir   -> relnode forknum
+//       RelBlocks
+//       RelSize
+//
+// 01 SLRUs
+//
+//   SlruDir  kind
+//   SlruSegBlocks segno
+//   SlruSegSize
+//
+// 02 pg_twophase
+//
+// 03 misc
+//    Controlfile
+//    checkpoint
+//    pg_version
+//
+// 04 aux files
+//
+// Below is a full list of the keyspace allocation:
+//
+// DbDir:
+// 00 00000000 00000000 00000000 00   00000000
+//
+// Filenodemap:
+// 00 SPCNODE  DBNODE   00000000 00   00000000
+//
+// RelDir:
+// 00 SPCNODE  DBNODE   00000000 00   00000001 (Postgres never uses relfilenode 0)
+//
+// RelBlock:
+// 00 SPCNODE  DBNODE   RELNODE  FORK BLKNUM
+//
+// RelSize:
+// 00 SPCNODE  DBNODE   RELNODE  FORK FFFFFFFF
+//
+// SlruDir:
+// 01 kind     00000000 00000000 00   00000000
+//
+// SlruSegBlock:
+// 01 kind     00000001 SEGNO    00   BLKNUM
+//
+// SlruSegSize:
+// 01 kind     00000001 SEGNO    00   FFFFFFFF
+//
+// TwoPhaseDir:
+// 02 00000000 00000000 00000000 00   00000000
+//
+// TwoPhaseFile:
+// 02 00000000 00000000 00000000 00   XID
+//
+// ControlFile:
+// 03 00000000 00000000 00000000 00   00000000
+//
+// Checkpoint:
+// 03 00000000 00000000 00000000 00   00000001
+//
+// AuxFiles:
+// 03 00000000 00000000 00000000 00   00000002
+//
+
+//-- Section 01: relation data and metadata
+
+pub const DBDIR_KEY: Key = Key {
+    field1: 0x00,
+    field2: 0,
+    field3: 0,
+    field4: 0,
+    field5: 0,
+    field6: 0,
+};
+
+#[inline(always)]
+pub fn dbdir_key_range(spcnode: Oid, dbnode: Oid) -> Range<Key> {
+    Key {
+        field1: 0x00,
+        field2: spcnode,
+        field3: dbnode,
+        field4: 0,
+        field5: 0,
+        field6: 0,
+    }..Key {
+        field1: 0x00,
+        field2: spcnode,
+        field3: dbnode,
+        field4: 0xffffffff,
+        field5: 0xff,
+        field6: 0xffffffff,
+    }
+}
+
+#[inline(always)]
+pub fn relmap_file_key(spcnode: Oid, dbnode: Oid) -> Key {
+    Key {
+        field1: 0x00,
+        field2: spcnode,
+        field3: dbnode,
+        field4: 0,
+        field5: 0,
+        field6: 0,
+    }
+}
+
+#[inline(always)]
+pub fn rel_dir_to_key(spcnode: Oid, dbnode: Oid) -> Key {
+    Key {
+        field1: 0x00,
+        field2: spcnode,
+        field3: dbnode,
+        field4: 0,
+        field5: 0,
+        field6: 1,
+    }
+}
+
+#[inline(always)]
+pub fn rel_block_to_key(rel: RelTag, blknum: BlockNumber) -> Key {
+    Key {
+        field1: 0x00,
+        field2: rel.spcnode,
+        field3: rel.dbnode,
+        field4: rel.relnode,
+        field5: rel.forknum,
+        field6: blknum,
+    }
+}
+
+#[inline(always)]
+pub fn rel_size_to_key(rel: RelTag) -> Key {
+    Key {
+        field1: 0x00,
+        field2: rel.spcnode,
+        field3: rel.dbnode,
+        field4: rel.relnode,
+        field5: rel.forknum,
+        field6: 0xffffffff,
+    }
+}
+
+#[inline(always)]
+pub fn rel_key_range(rel: RelTag) -> Range<Key> {
+    Key {
+        field1: 0x00,
+        field2: rel.spcnode,
+        field3: rel.dbnode,
+        field4: rel.relnode,
+        field5: rel.forknum,
+        field6: 0,
+    }..Key {
+        field1: 0x00,
+        field2: rel.spcnode,
+        field3: rel.dbnode,
+        field4: rel.relnode,
+        field5: rel.forknum + 1,
+        field6: 0,
+    }
+}
+
+//-- Section 02: SLRUs
+
+#[inline(always)]
+pub fn slru_dir_to_key(kind: SlruKind) -> Key {
+    Key {
+        field1: 0x01,
+        field2: match kind {
+            SlruKind::Clog => 0x00,
+            SlruKind::MultiXactMembers => 0x01,
+            SlruKind::MultiXactOffsets => 0x02,
+        },
+        field3: 0,
+        field4: 0,
+        field5: 0,
+        field6: 0,
+    }
+}
+
+#[inline(always)]
+pub fn slru_block_to_key(kind: SlruKind, segno: u32, blknum: BlockNumber) -> Key {
+    Key {
+        field1: 0x01,
+        field2: match kind {
+            SlruKind::Clog => 0x00,
+            SlruKind::MultiXactMembers => 0x01,
+            SlruKind::MultiXactOffsets => 0x02,
+        },
+        field3: 1,
+        field4: segno,
+        field5: 0,
+        field6: blknum,
+    }
+}
+
+#[inline(always)]
+pub fn slru_segment_size_to_key(kind: SlruKind, segno: u32) -> Key {
+    Key {
+        field1: 0x01,
+        field2: match kind {
+            SlruKind::Clog => 0x00,
+            SlruKind::MultiXactMembers => 0x01,
+            SlruKind::MultiXactOffsets => 0x02,
+        },
+        field3: 1,
+        field4: segno,
+        field5: 0,
+        field6: 0xffffffff,
+    }
+}
+
+#[inline(always)]
+pub fn slru_segment_key_range(kind: SlruKind, segno: u32) -> Range<Key> {
+    let field2 = match kind {
+        SlruKind::Clog => 0x00,
+        SlruKind::MultiXactMembers => 0x01,
+        SlruKind::MultiXactOffsets => 0x02,
+    };
+
+    Key {
+        field1: 0x01,
+        field2,
+        field3: 1,
+        field4: segno,
+        field5: 0,
+        field6: 0,
+    }..Key {
+        field1: 0x01,
+        field2,
+        field3: 1,
+        field4: segno,
+        field5: 1,
+        field6: 0,
+    }
+}
+
+//-- Section 03: pg_twophase
+
+pub const TWOPHASEDIR_KEY: Key = Key {
+    field1: 0x02,
+    field2: 0,
+    field3: 0,
+    field4: 0,
+    field5: 0,
+    field6: 0,
+};
+
+#[inline(always)]
+pub fn twophase_file_key(xid: TransactionId) -> Key {
+    Key {
+        field1: 0x02,
+        field2: 0,
+        field3: 0,
+        field4: 0,
+        field5: 0,
+        field6: xid,
+    }
+}
+
+#[inline(always)]
+pub fn twophase_key_range(xid: TransactionId) -> Range<Key> {
+    let (next_xid, overflowed) = xid.overflowing_add(1);
+
+    Key {
+        field1: 0x02,
+        field2: 0,
+        field3: 0,
+        field4: 0,
+        field5: 0,
+        field6: xid,
+    }..Key {
+        field1: 0x02,
+        field2: 0,
+        field3: 0,
+        field4: 0,
+        field5: u8::from(overflowed),
+        field6: next_xid,
+    }
+}
+
+//-- Section 03: Control file
+pub const CONTROLFILE_KEY: Key = Key {
+    field1: 0x03,
+    field2: 0,
+    field3: 0,
+    field4: 0,
+    field5: 0,
+    field6: 0,
+};
+
+pub const CHECKPOINT_KEY: Key = Key {
+    field1: 0x03,
+    field2: 0,
+    field3: 0,
+    field4: 0,
+    field5: 0,
+    field6: 1,
+};
+
+pub const AUX_FILES_KEY: Key = Key {
+    field1: 0x03,
+    field2: 0,
+    field3: 0,
+    field4: 0,
+    field5: 0,
+    field6: 2,
+};
+
+// Reverse mappings for a few Keys.
+// These are needed by WAL redo manager.
+
+// AUX_FILES currently stores only data for logical replication (slots etc), and
+// we don't preserve these on a branch because safekeepers can't follow timeline
+// switch (and generally it likely should be optional), so ignore these.
+#[inline(always)]
+pub fn is_inherited_key(key: Key) -> bool {
+    key != AUX_FILES_KEY
+}
+
+#[inline(always)]
+pub fn is_rel_fsm_block_key(key: Key) -> bool {
+    key.field1 == 0x00 && key.field4 != 0 && key.field5 == FSM_FORKNUM && key.field6 != 0xffffffff
+}
+
+#[inline(always)]
+pub fn is_rel_vm_block_key(key: Key) -> bool {
+    key.field1 == 0x00
+        && key.field4 != 0
+        && key.field5 == VISIBILITYMAP_FORKNUM
+        && key.field6 != 0xffffffff
+}
+
+#[inline(always)]
+pub fn key_to_slru_block(key: Key) -> anyhow::Result<(SlruKind, u32, BlockNumber)> {
+    Ok(match key.field1 {
+        0x01 => {
+            let kind = match key.field2 {
+                0x00 => SlruKind::Clog,
+                0x01 => SlruKind::MultiXactMembers,
+                0x02 => SlruKind::MultiXactOffsets,
+                _ => anyhow::bail!("unrecognized slru kind 0x{:02x}", key.field2),
+            };
+            let segno = key.field4;
+            let blknum = key.field6;
+
+            (kind, segno, blknum)
+        }
+        _ => anyhow::bail!("unexpected value kind 0x{:02x}", key.field1),
+    })
+}
+
+#[inline(always)]
+pub fn is_slru_block_key(key: Key) -> bool {
+    key.field1 == 0x01                // SLRU-related
+        && key.field3 == 0x00000001   // but not SlruDir
+        && key.field6 != 0xffffffff // and not SlruSegSize
+}
+
 #[inline(always)]
 pub fn is_rel_block_key(key: &Key) -> bool {
    key.field1 == 0x00 && key.field4 != 0 && key.field6 != 0xffffffff
 }

 /// Guaranteed to return `Ok()` if [[is_rel_block_key]] returns `true` for `key`.
+#[inline(always)]
 pub fn key_to_rel_block(key: Key) -> anyhow::Result<(RelTag, BlockNumber)> {
    Ok(match key.field1 {
        0x00 => (
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -61,7 +61,7 @@ use crate::context::{DownloadBehavior, RequestContext};
 use crate::import_datadir::import_wal_from_tar;
 use crate::metrics;
 use crate::metrics::LIVE_CONNECTIONS_COUNT;
-use crate::pgdatadir_mapping::{rel_block_to_key, Version};
+use crate::pgdatadir_mapping::Version;
 use crate::task_mgr;
 use crate::task_mgr::TaskKind;
 use crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id;
@@ -75,6 +75,7 @@ use crate::tenant::PageReconstructError;
 use crate::tenant::Timeline;
 use crate::trace::Tracer;

+use pageserver_api::key::rel_block_to_key;
 use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
 use postgres_ffi::BLCKSZ;

@@ -386,12 +387,18 @@ impl PageServerHandler {

    /// Future that completes when we need to shut down the connection.
    ///
-    /// Reasons for need to shut down are:
-    /// - any of the timelines we hold GateGuards for in `shard_timelines` is cancelled
-    /// - task_mgr requests shutdown of the connection
+    /// We currently need to shut down when any of the following happens:
+    /// 1. any of the timelines we hold GateGuards for in `shard_timelines` is cancelled
+    /// 2. task_mgr requests shutdown of the connection
    ///
-    /// The need to check for `task_mgr` cancellation arises mainly from `handle_pagerequests`
-    /// where, at first, `shard_timelines` is empty, see <https://github.com/neondatabase/neon/pull/6388>
+    /// NB on (1): the connection's lifecycle is not actually tied to any of the
+    /// `shard_timelines`s' lifecycles. But it's _necessary_ in the current
+    /// implementation to be responsive to timeline cancellation because
+    /// the connection holds their `GateGuards` open (sored in `shard_timelines`).
+    /// We currently do the easy thing and terminate the connection if any of the
+    /// shard_timelines gets cancelled. But really, we cuold spend more effort
+    /// and simply remove the cancelled timeline from the `shard_timelines`, thereby
+    /// dropping the guard.
    ///
    /// NB: keep in sync with [`Self::is_connection_cancelled`]
    async fn await_connection_cancelled(&self) {
@@ -404,16 +411,17 @@ impl PageServerHandler {
        // immutable &self).  So it's fine to evaluate shard_timelines after the sleep, we don't risk
        // missing any inserts to the map.

-        let mut futs = self
-            .shard_timelines
-            .values()
-            .map(|ht| ht.timeline.cancel.cancelled())
-            .collect::<FuturesUnordered<_>>();
-
-        tokio::select! {
-            _ = task_mgr::shutdown_watcher() => { }
-            _ = futs.next() => {}
-        }
+        let mut cancellation_sources = Vec::with_capacity(1 + self.shard_timelines.len());
+        use futures::future::Either;
+        cancellation_sources.push(Either::Left(task_mgr::shutdown_watcher()));
+        cancellation_sources.extend(
+            self.shard_timelines
+                .values()
+                .map(|ht| Either::Right(ht.timeline.cancel.cancelled())),
+        );
+        FuturesUnordered::from_iter(cancellation_sources)
+            .next()
+            .await;
    }

    /// Checking variant of [`Self::await_connection_cancelled`].
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -13,7 +13,12 @@ use crate::repository::*;
 use crate::walrecord::NeonWalRecord;
 use anyhow::{ensure, Context};
 use bytes::{Buf, Bytes};
-use pageserver_api::key::is_rel_block_key;
+use pageserver_api::key::{
+    dbdir_key_range, is_rel_block_key, is_slru_block_key, rel_block_to_key, rel_dir_to_key,
+    rel_key_range, rel_size_to_key, relmap_file_key, slru_block_to_key, slru_dir_to_key,
+    slru_segment_key_range, slru_segment_size_to_key, twophase_file_key, twophase_key_range,
+    AUX_FILES_KEY, CHECKPOINT_KEY, CONTROLFILE_KEY, DBDIR_KEY, TWOPHASEDIR_KEY,
+};
 use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
 use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
 use postgres_ffi::BLCKSZ;
@@ -1535,366 +1540,6 @@ struct SlruSegmentDirectory {

 static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; BLCKSZ as usize]);

-// Layout of the Key address space
-//
-// The Key struct, used to address the underlying key-value store, consists of
-// 18 bytes, split into six fields. See 'Key' in repository.rs. We need to map
-// all the data and metadata keys into those 18 bytes.
-//
-// Principles for the mapping:
-//
-// - Things that are often accessed or modified together, should be close to
-//   each other in the key space. For example, if a relation is extended by one
-//   block, we create a new key-value pair for the block data, and update the
-//   relation size entry. Because of that, the RelSize key comes after all the
-//   RelBlocks of a relation: the RelSize and the last RelBlock are always next
-//   to each other.
-//
-// The key space is divided into four major sections, identified by the first
-// byte, and the form a hierarchy:
-//
-// 00 Relation data and metadata
-//
-//   DbDir    () -> (dbnode, spcnode)
-//   Filenodemap
-//   RelDir   -> relnode forknum
-//       RelBlocks
-//       RelSize
-//
-// 01 SLRUs
-//
-//   SlruDir  kind
-//   SlruSegBlocks segno
-//   SlruSegSize
-//
-// 02 pg_twophase
-//
-// 03 misc
-//    Controlfile
-//    checkpoint
-//    pg_version
-//
-// 04 aux files
-//
-// Below is a full list of the keyspace allocation:
-//
-// DbDir:
-// 00 00000000 00000000 00000000 00   00000000
-//
-// Filenodemap:
-// 00 SPCNODE  DBNODE   00000000 00   00000000
-//
-// RelDir:
-// 00 SPCNODE  DBNODE   00000000 00   00000001 (Postgres never uses relfilenode 0)
-//
-// RelBlock:
-// 00 SPCNODE  DBNODE   RELNODE  FORK BLKNUM
-//
-// RelSize:
-// 00 SPCNODE  DBNODE   RELNODE  FORK FFFFFFFF
-//
-// SlruDir:
-// 01 kind     00000000 00000000 00   00000000
-//
-// SlruSegBlock:
-// 01 kind     00000001 SEGNO    00   BLKNUM
-//
-// SlruSegSize:
-// 01 kind     00000001 SEGNO    00   FFFFFFFF
-//
-// TwoPhaseDir:
-// 02 00000000 00000000 00000000 00   00000000
-//
-// TwoPhaseFile:
-// 02 00000000 00000000 00000000 00   XID
-//
-// ControlFile:
-// 03 00000000 00000000 00000000 00   00000000
-//
-// Checkpoint:
-// 03 00000000 00000000 00000000 00   00000001
-//
-// AuxFiles:
-// 03 00000000 00000000 00000000 00   00000002
-//
-
-//-- Section 01: relation data and metadata
-
-const DBDIR_KEY: Key = Key {
-    field1: 0x00,
-    field2: 0,
-    field3: 0,
-    field4: 0,
-    field5: 0,
-    field6: 0,
-};
-
-fn dbdir_key_range(spcnode: Oid, dbnode: Oid) -> Range<Key> {
-    Key {
-        field1: 0x00,
-        field2: spcnode,
-        field3: dbnode,
-        field4: 0,
-        field5: 0,
-        field6: 0,
-    }..Key {
-        field1: 0x00,
-        field2: spcnode,
-        field3: dbnode,
-        field4: 0xffffffff,
-        field5: 0xff,
-        field6: 0xffffffff,
-    }
-}
-
-fn relmap_file_key(spcnode: Oid, dbnode: Oid) -> Key {
-    Key {
-        field1: 0x00,
-        field2: spcnode,
-        field3: dbnode,
-        field4: 0,
-        field5: 0,
-        field6: 0,
-    }
-}
-
-fn rel_dir_to_key(spcnode: Oid, dbnode: Oid) -> Key {
-    Key {
-        field1: 0x00,
-        field2: spcnode,
-        field3: dbnode,
-        field4: 0,
-        field5: 0,
-        field6: 1,
-    }
-}
-
-pub(crate) fn rel_block_to_key(rel: RelTag, blknum: BlockNumber) -> Key {
-    Key {
-        field1: 0x00,
-        field2: rel.spcnode,
-        field3: rel.dbnode,
-        field4: rel.relnode,
-        field5: rel.forknum,
-        field6: blknum,
-    }
-}
-
-fn rel_size_to_key(rel: RelTag) -> Key {
-    Key {
-        field1: 0x00,
-        field2: rel.spcnode,
-        field3: rel.dbnode,
-        field4: rel.relnode,
-        field5: rel.forknum,
-        field6: 0xffffffff,
-    }
-}
-
-fn rel_key_range(rel: RelTag) -> Range<Key> {
-    Key {
-        field1: 0x00,
-        field2: rel.spcnode,
-        field3: rel.dbnode,
-        field4: rel.relnode,
-        field5: rel.forknum,
-        field6: 0,
-    }..Key {
-        field1: 0x00,
-        field2: rel.spcnode,
-        field3: rel.dbnode,
-        field4: rel.relnode,
-        field5: rel.forknum + 1,
-        field6: 0,
-    }
-}
-
-//-- Section 02: SLRUs
-
-fn slru_dir_to_key(kind: SlruKind) -> Key {
-    Key {
-        field1: 0x01,
-        field2: match kind {
-            SlruKind::Clog => 0x00,
-            SlruKind::MultiXactMembers => 0x01,
-            SlruKind::MultiXactOffsets => 0x02,
-        },
-        field3: 0,
-        field4: 0,
-        field5: 0,
-        field6: 0,
-    }
-}
-
-fn slru_block_to_key(kind: SlruKind, segno: u32, blknum: BlockNumber) -> Key {
-    Key {
-        field1: 0x01,
-        field2: match kind {
-            SlruKind::Clog => 0x00,
-            SlruKind::MultiXactMembers => 0x01,
-            SlruKind::MultiXactOffsets => 0x02,
-        },
-        field3: 1,
-        field4: segno,
-        field5: 0,
-        field6: blknum,
-    }
-}
-
-fn slru_segment_size_to_key(kind: SlruKind, segno: u32) -> Key {
-    Key {
-        field1: 0x01,
-        field2: match kind {
-            SlruKind::Clog => 0x00,
-            SlruKind::MultiXactMembers => 0x01,
-            SlruKind::MultiXactOffsets => 0x02,
-        },
-        field3: 1,
-        field4: segno,
-        field5: 0,
-        field6: 0xffffffff,
-    }
-}
-
-fn slru_segment_key_range(kind: SlruKind, segno: u32) -> Range<Key> {
-    let field2 = match kind {
-        SlruKind::Clog => 0x00,
-        SlruKind::MultiXactMembers => 0x01,
-        SlruKind::MultiXactOffsets => 0x02,
-    };
-
-    Key {
-        field1: 0x01,
-        field2,
-        field3: 1,
-        field4: segno,
-        field5: 0,
-        field6: 0,
-    }..Key {
-        field1: 0x01,
-        field2,
-        field3: 1,
-        field4: segno,
-        field5: 1,
-        field6: 0,
-    }
-}
-
-//-- Section 03: pg_twophase
-
-const TWOPHASEDIR_KEY: Key = Key {
-    field1: 0x02,
-    field2: 0,
-    field3: 0,
-    field4: 0,
-    field5: 0,
-    field6: 0,
-};
-
-fn twophase_file_key(xid: TransactionId) -> Key {
-    Key {
-        field1: 0x02,
-        field2: 0,
-        field3: 0,
-        field4: 0,
-        field5: 0,
-        field6: xid,
-    }
-}
-
-fn twophase_key_range(xid: TransactionId) -> Range<Key> {
-    let (next_xid, overflowed) = xid.overflowing_add(1);
-
-    Key {
-        field1: 0x02,
-        field2: 0,
-        field3: 0,
-        field4: 0,
-        field5: 0,
-        field6: xid,
-    }..Key {
-        field1: 0x02,
-        field2: 0,
-        field3: 0,
-        field4: 0,
-        field5: u8::from(overflowed),
-        field6: next_xid,
-    }
-}
-
-//-- Section 03: Control file
-const CONTROLFILE_KEY: Key = Key {
-    field1: 0x03,
-    field2: 0,
-    field3: 0,
-    field4: 0,
-    field5: 0,
-    field6: 0,
-};
-
-const CHECKPOINT_KEY: Key = Key {
-    field1: 0x03,
-    field2: 0,
-    field3: 0,
-    field4: 0,
-    field5: 0,
-    field6: 1,
-};
-
-const AUX_FILES_KEY: Key = Key {
-    field1: 0x03,
-    field2: 0,
-    field3: 0,
-    field4: 0,
-    field5: 0,
-    field6: 2,
-};
-
-// Reverse mappings for a few Keys.
-// These are needed by WAL redo manager.
-
-// AUX_FILES currently stores only data for logical replication (slots etc), and
-// we don't preserve these on a branch because safekeepers can't follow timeline
-// switch (and generally it likely should be optional), so ignore these.
-pub fn is_inherited_key(key: Key) -> bool {
-    key != AUX_FILES_KEY
-}
-
-pub fn is_rel_fsm_block_key(key: Key) -> bool {
-    key.field1 == 0x00 && key.field4 != 0 && key.field5 == FSM_FORKNUM && key.field6 != 0xffffffff
-}
-
-pub fn is_rel_vm_block_key(key: Key) -> bool {
-    key.field1 == 0x00
-        && key.field4 != 0
-        && key.field5 == VISIBILITYMAP_FORKNUM
-        && key.field6 != 0xffffffff
-}
-
-pub fn key_to_slru_block(key: Key) -> anyhow::Result<(SlruKind, u32, BlockNumber)> {
-    Ok(match key.field1 {
-        0x01 => {
-            let kind = match key.field2 {
-                0x00 => SlruKind::Clog,
-                0x01 => SlruKind::MultiXactMembers,
-                0x02 => SlruKind::MultiXactOffsets,
-                _ => anyhow::bail!("unrecognized slru kind 0x{:02x}", key.field2),
-            };
-            let segno = key.field4;
-            let blknum = key.field6;
-
-            (kind, segno, blknum)
-        }
-        _ => anyhow::bail!("unexpected value kind 0x{:02x}", key.field1),
-    })
-}
-
-fn is_slru_block_key(key: Key) -> bool {
-    key.field1 == 0x01                // SLRU-related
-        && key.field3 == 0x00000001   // but not SlruDir
-        && key.field6 != 0xffffffff // and not SlruSegSize
-}
-
 #[allow(clippy::bool_assert_comparison)]
 #[cfg(test)]
 mod tests {
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -716,6 +716,10 @@ impl Tenant {
                            // stayed in Activating for such a long time that shutdown found it in
                            // that state.
                            tracing::info!(state=%tenant_clone.current_state(), "Tenant shut down before activation");
+                            // Make the tenant broken so that set_stopping will not hang waiting for it to leave
+                            // the Attaching state.  This is an over-reaction (nothing really broke, the tenant is
+                            // just shutting down), but ensures progress.
+                            make_broken(&tenant_clone, anyhow::anyhow!("Shut down while Attaching"));
                            return Ok(());
                        },
                    )
@@ -1877,7 +1881,7 @@ impl Tenant {
        &self,
        cancel: &CancellationToken,
        ctx: &RequestContext,
-    ) -> anyhow::Result<(), timeline::CompactionError> {
+    ) -> Result<(), timeline::CompactionError> {
        // Don't start doing work during shutdown, or when broken, we do not need those in the logs
        if !self.is_active() {
            return Ok(());
--- a/pageserver/src/tenant/layer_map.rs
+++ b/pageserver/src/tenant/layer_map.rs
@@ -619,8 +619,8 @@ impl LayerMap {
    }

    /// Return all L0 delta layers
-    pub fn get_level0_deltas(&self) -> Result<Vec<Arc<PersistentLayerDesc>>> {
-        Ok(self.l0_delta_layers.to_vec())
+    pub fn get_level0_deltas(&self) -> Vec<Arc<PersistentLayerDesc>> {
+        self.l0_delta_layers.to_vec()
    }

    /// debugging function to print out the contents of the layer map
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -237,7 +237,7 @@ use utils::id::{TenantId, TimelineId};
 use self::index::IndexPart;

 use super::storage_layer::{Layer, LayerFileName, ResidentLayer};
-use super::upload_queue::SetDeletedFlagProgress;
+use super::upload_queue::{self, SetDeletedFlagProgress};
 use super::Generation;

 pub(crate) use download::{is_temp_download_file, list_remote_timelines};
@@ -621,7 +621,9 @@ impl RemoteTimelineClient {
    ///
    /// Like schedule_index_upload_for_metadata_update(), this merely adds
    /// the upload to the upload queue and returns quickly.
-    pub fn schedule_index_upload_for_file_changes(self: &Arc<Self>) -> anyhow::Result<()> {
+    pub(crate) fn schedule_index_upload_for_file_changes(
+        self: &Arc<Self>,
+    ) -> Result<(), upload_queue::NotInitialized> {
        let mut guard = self.upload_queue.lock().unwrap();
        let upload_queue = guard.initialized_mut()?;

@@ -666,7 +668,7 @@ impl RemoteTimelineClient {
    pub(crate) fn schedule_layer_file_upload(
        self: &Arc<Self>,
        layer: ResidentLayer,
-    ) -> anyhow::Result<()> {
+    ) -> Result<(), upload_queue::NotInitialized> {
        let mut guard = self.upload_queue.lock().unwrap();
        let upload_queue = guard.initialized_mut()?;

@@ -875,7 +877,7 @@ impl RemoteTimelineClient {
        self: &Arc<Self>,
        compacted_from: &[Layer],
        compacted_to: &[ResidentLayer],
-    ) -> anyhow::Result<()> {
+    ) -> Result<(), upload_queue::NotInitialized> {
        let mut guard = self.upload_queue.lock().unwrap();
        let upload_queue = guard.initialized_mut()?;

--- a/pageserver/src/tenant/storage_layer/layer.rs
+++ b/pageserver/src/tenant/storage_layer/layer.rs
@@ -290,7 +290,7 @@ impl Layer {
    }

    /// Downloads if necessary and creates a guard, which will keep this layer from being evicted.
-    pub(crate) async fn download_and_keep_resident(&self) -> anyhow::Result<ResidentLayer> {
+    pub(crate) async fn download_and_keep_resident(&self) -> Result<ResidentLayer, DownloadError> {
        let downloaded = self.0.get_or_maybe_download(true, None).await?;

        Ok(ResidentLayer {
@@ -1174,7 +1174,7 @@ pub(crate) enum EvictionError {

 /// Error internal to the [`LayerInner::get_or_maybe_download`]
 #[derive(Debug, thiserror::Error)]
-enum DownloadError {
+pub(crate) enum DownloadError {
    #[error("timeline has already shutdown")]
    TimelineShutdown,
    #[error("no remote storage configured")]
@@ -1197,6 +1197,15 @@ enum DownloadError {
    PostStatFailed(#[source] std::io::Error),
 }

+impl DownloadError {
+    pub fn is_cancelled(&self) -> bool {
+        match self {
+            Self::TimelineShutdown | Self::DownloadCancelled => true,
+            _ => false,
+        }
+    }
+}
+
 #[derive(Debug, PartialEq)]
 pub(crate) enum NeedsDownload {
    NotFound,
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -9,6 +9,7 @@ use crate::context::{DownloadBehavior, RequestContext};
 use crate::metrics::TENANT_TASK_EVENTS;
 use crate::task_mgr;
 use crate::task_mgr::{TaskKind, BACKGROUND_RUNTIME};
+use crate::tenant::timeline::CompactionError;
 use crate::tenant::{Tenant, TenantState};
 use tokio_util::sync::CancellationToken;
 use tracing::*;
@@ -181,8 +182,11 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
                    );
                    error_run_count += 1;
                    let wait_duration = Duration::from_secs_f64(wait_duration);
-                    error!(
-                        "Compaction failed {error_run_count} times, retrying in {wait_duration:?}: {e:?}",
+                    log_compaction_error(
+                        &e,
+                        error_run_count,
+                        &wait_duration,
+                        cancel.is_cancelled(),
                    );
                    wait_duration
                } else {
@@ -210,6 +214,38 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
    TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc();
 }

+fn log_compaction_error(
+    e: &CompactionError,
+    error_run_count: u32,
+    sleep_duration: &std::time::Duration,
+    task_cancelled: bool,
+) {
+    use crate::tenant::upload_queue::NotInitialized;
+    use crate::tenant::PageReconstructError;
+    use CompactionError::*;
+
+    enum LooksLike {
+        Info,
+        Error,
+    }
+
+    let decision = match e {
+        ShuttingDown => None,
+        _ if task_cancelled => Some(LooksLike::Info),
+        Other(e) => Some(LooksLike::Error),
+    };
+
+    match decision {
+        Some(LooksLike::Info) => info!(
+            "Compaction failed {error_run_count} times, retrying in {sleep_duration:?}: {e:#}",
+        ),
+        Some(LooksLike::Error) => error!(
+            "Compaction failed {error_run_count} times, retrying in {sleep_duration:?}: {e:?}",
+        ),
+        None => {}
+    }
+}
+
 ///
 /// GC task's main loop
 ///
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -73,8 +73,8 @@ use crate::metrics::{
    TimelineMetrics, MATERIALIZED_PAGE_CACHE_HIT, MATERIALIZED_PAGE_CACHE_HIT_DIRECT,
 };
 use crate::pgdatadir_mapping::CalculateLogicalSizeError;
-use crate::pgdatadir_mapping::{is_inherited_key, is_rel_fsm_block_key, is_rel_vm_block_key};
 use crate::tenant::config::TenantConfOpt;
+use pageserver_api::key::{is_inherited_key, is_rel_fsm_block_key, is_rel_vm_block_key};
 use pageserver_api::reltag::RelTag;
 use pageserver_api::shard::ShardIndex;

@@ -103,11 +103,14 @@ use self::layer_manager::LayerManager;
 use self::logical_size::LogicalSize;
 use self::walreceiver::{WalReceiver, WalReceiverConf};

-use super::config::TenantConf;
-use super::remote_timeline_client::index::{IndexLayerMetadata, IndexPart};
 use super::remote_timeline_client::RemoteTimelineClient;
 use super::secondary::heatmap::{HeatMapLayer, HeatMapTimeline};
+use super::{config::TenantConf, upload_queue::NotInitialized};
 use super::{debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf};
+use super::{
+    remote_timeline_client::index::{IndexLayerMetadata, IndexPart},
+    storage_layer::layer,
+};

 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
 pub(super) enum FlushLoopState {
@@ -391,8 +394,7 @@ pub(crate) enum PageReconstructError {
    #[error("Ancestor LSN wait error: {0}")]
    AncestorLsnTimeout(#[from] WaitLsnError),

-    /// The operation was cancelled
-    #[error("Cancelled")]
+    #[error("timeline shutting down")]
    Cancelled,

    /// The ancestor of this is being stopped
@@ -404,6 +406,19 @@ pub(crate) enum PageReconstructError {
    WalRedo(anyhow::Error),
 }

+impl PageReconstructError {
+    /// Returns true if this error indicates a tenant/timeline shutdown alike situation
+    pub(crate) fn is_stopping(&self) -> bool {
+        use PageReconstructError::*;
+        match self {
+            Other(_) => false,
+            AncestorLsnTimeout(_) => false,
+            Cancelled | AncestorStopping(_) => true,
+            WalRedo(_) => false,
+        }
+    }
+}
+
 #[derive(thiserror::Error, Debug)]
 enum FlushLayerError {
    /// Timeline cancellation token was cancelled
@@ -832,8 +847,7 @@ impl Timeline {
                // "enough".
                let layers = self
                    .create_image_layers(&partitioning, lsn, false, &image_ctx)
-                    .await
-                    .map_err(anyhow::Error::from)?;
+                    .await?;
                if let Some(remote_client) = &self.remote_client {
                    for layer in layers {
                        remote_client.schedule_layer_file_upload(layer)?;
@@ -3200,7 +3214,46 @@ pub(crate) enum CompactionError {
    ShuttingDown,
    /// Compaction cannot be done right now; page reconstruction and so on.
    #[error(transparent)]
-    Other(#[from] anyhow::Error),
+    Other(anyhow::Error),
+}
+
+impl CompactionError {
+    fn other<E>(err: E) -> Self
+    where
+        E: std::error::Error + Send + Sync + 'static,
+    {
+        CompactionError::Other(anyhow::Error::new(err))
+    }
+}
+
+impl From<PageReconstructError> for CompactionError {
+    fn from(value: PageReconstructError) -> Self {
+        if value.is_stopping() {
+            CompactionError::ShuttingDown
+        } else {
+            CompactionError::other(value)
+        }
+    }
+}
+
+impl From<NotInitialized> for CompactionError {
+    fn from(value: NotInitialized) -> Self {
+        if value.is_stopping() {
+            CompactionError::ShuttingDown
+        } else {
+            CompactionError::other(value)
+        }
+    }
+}
+
+impl From<layer::DownloadError> for CompactionError {
+    fn from(value: layer::DownloadError) -> Self {
+        if value.is_cancelled() {
+            CompactionError::ShuttingDown
+        } else {
+            CompactionError::other(value)
+        }
+    }
 }

 #[serde_as]
@@ -3333,7 +3386,7 @@ impl Timeline {
        stats.read_lock_held_spawn_blocking_startup_micros =
            stats.read_lock_acquisition_micros.till_now(); // set by caller
        let layers = guard.layer_map();
-        let level0_deltas = layers.get_level0_deltas()?;
+        let level0_deltas = layers.get_level0_deltas();
        let mut level0_deltas = level0_deltas
            .into_iter()
            .map(|x| guard.get_from_desc(&x))
@@ -3380,7 +3433,8 @@ impl Timeline {
                        delta
                            .download_and_keep_resident()
                            .await
-                            .context("download layer for failpoint")?,
+                            .context("download layer for failpoint")
+                            .map_err(CompactionError::Other)?,
                    );
                }
                tracing::info!("compact-level0-phase1-return-same"); // so that we can check if we hit the failpoint
@@ -3464,7 +3518,7 @@ impl Timeline {
        let mut all_keys = Vec::new();

        for l in deltas_to_compact.iter() {
-            all_keys.extend(l.load_keys(ctx).await?);
+            all_keys.extend(l.load_keys(ctx).await.map_err(CompactionError::Other)?);
        }

        // FIXME: should spawn_blocking the rest of this function
@@ -3484,7 +3538,10 @@ impl Timeline {
                    // has not so much sense, because largest holes will corresponds field1/field2 changes.
                    // But we are mostly interested to eliminate holes which cause generation of excessive image layers.
                    // That is why it is better to measure size of hole as number of covering image layers.
-                    let coverage_size = layers.image_coverage(&key_range, last_record_lsn)?.len();
+                    let coverage_size = layers
+                        .image_coverage(&key_range, last_record_lsn)
+                        .map_err(CompactionError::Other)?
+                        .len();
                    if coverage_size >= min_hole_coverage_size {
                        heap.push(Hole {
                            key_range,
@@ -3583,7 +3640,7 @@ impl Timeline {
            key, lsn, ref val, ..
        } in all_values_iter
        {
-            let value = val.load(ctx).await?;
+            let value = val.load(ctx).await.map_err(CompactionError::Other)?;
            let same_key = prev_key.map_or(false, |prev_key| prev_key == key);
            // We need to check key boundaries once we reach next key or end of layer with the same key
            if !same_key || lsn == dup_end_lsn {
@@ -3640,7 +3697,8 @@ impl Timeline {
                                .take()
                                .unwrap()
                                .finish(prev_key.unwrap().next(), self)
-                                .await?,
+                                .await
+                                .map_err(CompactionError::Other)?,
                        );
                        writer = None;

@@ -3670,7 +3728,8 @@ impl Timeline {
                            lsn_range.clone()
                        },
                    )
-                    .await?,
+                    .await
+                    .map_err(CompactionError::Other)?,
                );
            }

@@ -3681,7 +3740,12 @@ impl Timeline {
            });

            if !self.shard_identity.is_key_disposable(&key) {
-                writer.as_mut().unwrap().put_value(key, lsn, value).await?;
+                writer
+                    .as_mut()
+                    .unwrap()
+                    .put_value(key, lsn, value)
+                    .await
+                    .map_err(CompactionError::Other)?;
            } else {
                debug!(
                    "Dropping key {} during compaction (it belongs on shard {:?})",
@@ -3697,7 +3761,12 @@ impl Timeline {
            prev_key = Some(key);
        }
        if let Some(writer) = writer {
-            new_layers.push(writer.finish(prev_key.unwrap().next(), self).await?);
+            new_layers.push(
+                writer
+                    .finish(prev_key.unwrap().next(), self)
+                    .await
+                    .map_err(CompactionError::Other)?,
+            );
        }

        // Sync layers
@@ -3726,7 +3795,8 @@ impl Timeline {
            // minimize latency.
            par_fsync::par_fsync_async(&layer_paths)
                .await
-                .context("fsync all new layers")?;
+                .context("fsync all new layers")
+                .map_err(CompactionError::Other)?;

            let timeline_dir = self
                .conf
@@ -3734,7 +3804,8 @@ impl Timeline {

            par_fsync::par_fsync_async(&[timeline_dir])
                .await
-                .context("fsync of timeline dir")?;
+                .context("fsync of timeline dir")
+                .map_err(CompactionError::Other)?;
        }

        stats.write_layer_files_micros = stats.read_lock_drop_micros.till_now();
--- a/pageserver/src/tenant/upload_queue.rs
+++ b/pageserver/src/tenant/upload_queue.rs
@@ -126,6 +126,27 @@ pub(super) struct UploadQueueStopped {
    pub(super) deleted_at: SetDeletedFlagProgress,
 }

+#[derive(thiserror::Error, Debug)]
+pub(crate) enum NotInitialized {
+    #[error("queue is in state Uninitialized")]
+    Uninitialized,
+    #[error("queue is in state Stopping")]
+    Stopped,
+    #[error("queue is shutting down")]
+    ShuttingDown,
+}
+
+impl NotInitialized {
+    pub(crate) fn is_stopping(&self) -> bool {
+        use NotInitialized::*;
+        match self {
+            Uninitialized => false,
+            Stopped => true,
+            ShuttingDown => true,
+        }
+    }
+}
+
 impl UploadQueue {
    pub(crate) fn initialize_empty_remote(
        &mut self,
@@ -213,18 +234,20 @@ impl UploadQueue {
        Ok(self.initialized_mut().expect("we just set it"))
    }

-    pub(crate) fn initialized_mut(&mut self) -> anyhow::Result<&mut UploadQueueInitialized> {
+    pub(crate) fn initialized_mut(
+        &mut self,
+    ) -> Result<&mut UploadQueueInitialized, NotInitialized> {
+        use UploadQueue::*;
        match self {
-            UploadQueue::Uninitialized | UploadQueue::Stopped(_) => {
-                anyhow::bail!("queue is in state {}", self.as_str())
-            }
-            UploadQueue::Initialized(x) => {
-                if !x.shutting_down {
-                    Ok(x)
+            Uninitialized => Err(NotInitialized::Uninitialized.into()),
+            Initialized(x) => {
+                if x.shutting_down {
+                    Err(NotInitialized::ShuttingDown.into())
                } else {
-                    anyhow::bail!("queue is shutting down")
+                    Ok(x)
                }
            }
+            Stopped(_) => Err(NotInitialized::Stopped.into()),
        }
    }

--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -33,11 +33,12 @@ use utils::failpoint_support;

 use crate::context::RequestContext;
 use crate::metrics::WAL_INGEST;
-use crate::pgdatadir_mapping::*;
+use crate::pgdatadir_mapping::{DatadirModification, Version};
 use crate::tenant::PageReconstructError;
 use crate::tenant::Timeline;
 use crate::walrecord::*;
 use crate::ZERO_PAGE;
+use pageserver_api::key::rel_block_to_key;
 use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
 use postgres_ffi::pg_constants;
 use postgres_ffi::relfile_utils::{FSM_FORKNUM, INIT_FORKNUM, MAIN_FORKNUM, VISIBILITYMAP_FORKNUM};
--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -47,11 +47,10 @@ use crate::metrics::{
    WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM, WAL_REDO_RECORDS_HISTOGRAM,
    WAL_REDO_RECORD_COUNTER, WAL_REDO_TIME,
 };
-use crate::pgdatadir_mapping::key_to_slru_block;
 use crate::repository::Key;
 use crate::walrecord::NeonWalRecord;

-use pageserver_api::key::key_to_rel_block;
+use pageserver_api::key::{key_to_rel_block, key_to_slru_block};
 use pageserver_api::reltag::{RelTag, SlruKind};
 use postgres_ffi::pg_constants;
 use postgres_ffi::relfile_utils::VISIBILITYMAP_FORKNUM;
--- a/pgxn/neon/control_plane_connector.c
+++ b/pgxn/neon/control_plane_connector.c
@@ -637,7 +637,7 @@ HandleAlterRole(AlterRoleStmt *stmt)
 	ListCell   *option;
 	const char *role_name = stmt->role->rolename;

-	if (RoleIsNeonSuperuser(role_name))
+	if (RoleIsNeonSuperuser(role_name) && !superuser())
 		elog(ERROR, "can't ALTER neon_superuser");

 	foreach(option, stmt->options)
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -64,10 +64,26 @@ static int max_reconnect_attempts = 60;

 #define MAX_PAGESERVER_CONNSTRING_SIZE 256

+/*
+ * The "neon.pageserver_connstring" GUC is marked with the PGC_SIGHUP option,
+ * allowing it to be changed using pg_reload_conf(). The control plane can
+ * update the connection string if the pageserver crashes, is relocated, or
+ * new shards are added. A copy of the current value of the GUC is kept in
+ * shared memory, updated by the postmaster, because regular backends don't
+ * reload the config during query execution, but we might need to re-establish
+ * the pageserver connection with the new connection string even in the middle
+ * of a query.
+ *
+ * The shared memory copy is protected by a lockless algorithm using two
+ * atomic counters. The counters allow a backend to quickly check if the value
+ * has changed since last access, and to detect and retry copying the value if
+ * the postmaster changes the value concurrently. (Postmaster doesn't have a
+ * PGPROC entry and therefore cannot use LWLocks.)
+ */
 typedef struct
 {
-	LWLockId	lock;
-	pg_atomic_uint64 update_counter;
+	pg_atomic_uint64 begin_update_counter;
+	pg_atomic_uint64 end_update_counter;
 	char		pageserver_connstring[MAX_PAGESERVER_CONNSTRING_SIZE];
 } PagestoreShmemState;

@@ -84,7 +100,7 @@ static bool pageserver_flush(void);
 static void pageserver_disconnect(void);

 static bool
-PagestoreShmemIsValid()
+PagestoreShmemIsValid(void)
 {
 	return pagestore_shared && UsedShmemSegAddr;
 }
@@ -98,31 +114,58 @@ CheckPageserverConnstring(char **newval, void **extra, GucSource source)
 static void
 AssignPageserverConnstring(const char *newval, void *extra)
 {
-	if (!PagestoreShmemIsValid())
+	/*
+	 * Only postmaster updates the copy in shared memory.
+	 */
+	if (!PagestoreShmemIsValid() || IsUnderPostmaster)
 		return;
-	LWLockAcquire(pagestore_shared->lock, LW_EXCLUSIVE);
+
+	pg_atomic_add_fetch_u64(&pagestore_shared->begin_update_counter, 1);
+	pg_write_barrier();
 	strlcpy(pagestore_shared->pageserver_connstring, newval, MAX_PAGESERVER_CONNSTRING_SIZE);
-	pg_atomic_fetch_add_u64(&pagestore_shared->update_counter, 1);
-	LWLockRelease(pagestore_shared->lock);
+	pg_write_barrier();
+	pg_atomic_add_fetch_u64(&pagestore_shared->end_update_counter, 1);
 }

 static bool
-CheckConnstringUpdated()
+CheckConnstringUpdated(void)
 {
 	if (!PagestoreShmemIsValid())
 		return false;
-	return pagestore_local_counter < pg_atomic_read_u64(&pagestore_shared->update_counter);
+	return pagestore_local_counter < pg_atomic_read_u64(&pagestore_shared->begin_update_counter);
 }

 static void
-ReloadConnstring()
+ReloadConnstring(void)
 {
+	uint64		begin_update_counter;
+	uint64		end_update_counter;
+
 	if (!PagestoreShmemIsValid())
 		return;
-	LWLockAcquire(pagestore_shared->lock, LW_SHARED);
-	strlcpy(local_pageserver_connstring, pagestore_shared->pageserver_connstring, sizeof(local_pageserver_connstring));
-	pagestore_local_counter = pg_atomic_read_u64(&pagestore_shared->update_counter);
-	LWLockRelease(pagestore_shared->lock);
+
+	/*
+	 * Copy the current settnig from shared to local memory. Postmaster can
+	 * update the value concurrently, in which case we would copy a garbled
+	 * mix of the old and new values. We will detect it because the counter's
+	 * won't match, and retry. But it's important that we don't do anything
+	 * within the retry-loop that would depend on the string having valid
+	 * contents.
+	 */
+	do
+	{
+		begin_update_counter = pg_atomic_read_u64(&pagestore_shared->begin_update_counter);
+		end_update_counter = pg_atomic_read_u64(&pagestore_shared->end_update_counter);
+		pg_read_barrier();
+
+		strlcpy(local_pageserver_connstring, pagestore_shared->pageserver_connstring, sizeof(local_pageserver_connstring));
+		pg_read_barrier();
+	}
+	while (begin_update_counter != end_update_counter
+		   || begin_update_counter != pg_atomic_read_u64(&pagestore_shared->begin_update_counter)
+		   || end_update_counter != pg_atomic_read_u64(&pagestore_shared->end_update_counter));
+
+	pagestore_local_counter = end_update_counter;
 }

 static bool
@@ -137,7 +180,7 @@ pageserver_connect(int elevel)
 	static TimestampTz last_connect_time = 0;
 	static uint64_t delay_us = MIN_RECONNECT_INTERVAL_USEC;
 	TimestampTz now;
-        uint64_t us_since_last_connect;
+	uint64_t	us_since_last_connect;

 	Assert(!connected);

@@ -147,7 +190,7 @@ pageserver_connect(int elevel)
 	}

 	now = GetCurrentTimestamp();
-        us_since_last_connect = now - last_connect_time;
+	us_since_last_connect = now - last_connect_time;
 	if (us_since_last_connect < delay_us)
 	{
 		pg_usleep(delay_us - us_since_last_connect);
@@ -505,8 +548,8 @@ PagestoreShmemInit(void)
 									   &found);
 	if (!found)
 	{
-		pagestore_shared->lock = &(GetNamedLWLockTranche("neon_libpagestore")->lock);
-		pg_atomic_init_u64(&pagestore_shared->update_counter, 0);
+		pg_atomic_init_u64(&pagestore_shared->begin_update_counter, 0);
+		pg_atomic_init_u64(&pagestore_shared->end_update_counter, 0);
 		AssignPageserverConnstring(page_server_connstring, NULL);
 	}
 	LWLockRelease(AddinShmemInitLock);
@@ -531,7 +574,6 @@ pagestore_shmem_request(void)
 #endif

 	RequestAddinShmemSpace(PagestoreShmemSize());
-	RequestNamedLWLockTranche("neon_libpagestore", 1);
 }

 static void
--- a/safekeeper/src/metrics.rs
+++ b/safekeeper/src/metrics.rs
@@ -110,7 +110,7 @@ pub static REMOVED_WAL_SEGMENTS: Lazy<IntCounter> = Lazy::new(|| {
 pub static BACKED_UP_SEGMENTS: Lazy<IntCounter> = Lazy::new(|| {
    register_int_counter!(
        "safekeeper_backed_up_segments_total",
-        "Number of WAL segments backed up to the broker"
+        "Number of WAL segments backed up to the S3"
    )
    .expect("Failed to register safekeeper_backed_up_segments_total counter")
 });
@@ -337,6 +337,7 @@ pub struct TimelineCollector {
    flushed_wal_seconds: GaugeVec,
    collect_timeline_metrics: Gauge,
    timelines_count: IntGauge,
+    active_timelines_count: IntGauge,
 }

 impl Default for TimelineCollector {
@@ -520,6 +521,13 @@ impl TimelineCollector {
        .unwrap();
        descs.extend(timelines_count.desc().into_iter().cloned());

+        let active_timelines_count = IntGauge::new(
+            "safekeeper_active_timelines",
+            "Total number of active timelines",
+        )
+        .unwrap();
+        descs.extend(active_timelines_count.desc().into_iter().cloned());
+
        TimelineCollector {
            descs,
            commit_lsn,
@@ -540,6 +548,7 @@ impl TimelineCollector {
            flushed_wal_seconds,
            collect_timeline_metrics,
            timelines_count,
+            active_timelines_count,
        }
    }
 }
@@ -572,6 +581,7 @@ impl Collector for TimelineCollector {

        let timelines = GlobalTimelines::get_all();
        let timelines_count = timelines.len();
+        let mut active_timelines_count = 0;

        // Prometheus Collector is sync, and data is stored under async lock. To
        // bridge the gap with a crutch, collect data in spawned thread with
@@ -590,6 +600,10 @@ impl Collector for TimelineCollector {
            let timeline_id = tli.ttid.timeline_id.to_string();
            let labels = &[tenant_id.as_str(), timeline_id.as_str()];

+            if tli.timeline_is_active {
+                active_timelines_count += 1;
+            }
+
            self.commit_lsn
                .with_label_values(labels)
                .set(tli.mem_state.commit_lsn.into());
@@ -681,6 +695,8 @@ impl Collector for TimelineCollector {

        // report total number of timelines
        self.timelines_count.set(timelines_count as i64);
+        self.active_timelines_count
+            .set(active_timelines_count as i64);
        mfs.extend(self.timelines_count.collect());

        mfs
--- a/safekeeper/src/remove_wal.rs
+++ b/safekeeper/src/remove_wal.rs
@@ -10,11 +10,15 @@ use crate::{GlobalTimelines, SafeKeeperConf};
 pub async fn task_main(conf: SafeKeeperConf) -> anyhow::Result<()> {
    let wal_removal_interval = Duration::from_millis(5000);
    loop {
+        let now = tokio::time::Instant::now();
+        let mut active_timelines = 0;
+
        let tlis = GlobalTimelines::get_all();
        for tli in &tlis {
            if !tli.is_active().await {
                continue;
            }
+            active_timelines += 1;
            let ttid = tli.ttid;
            async {
                if let Err(e) = tli.maybe_persist_control_file().await {
@@ -27,6 +31,17 @@ pub async fn task_main(conf: SafeKeeperConf) -> anyhow::Result<()> {
            .instrument(info_span!("WAL removal", ttid = %ttid))
            .await;
        }
+
+        let elapsed = now.elapsed();
+        let total_timelines = tlis.len();
+
+        if elapsed > wal_removal_interval {
+            info!(
+                "WAL removal is too long, processed {} active timelines ({} total) in {:?}",
+                active_timelines, total_timelines, elapsed
+            );
+        }
+
        sleep(wal_removal_interval).await;
    }
 }
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -2914,6 +2914,7 @@ class Endpoint(PgProtocol):

        # Write it back updated
        with open(config_path, "w") as file:
+            log.info(json.dumps(dict(data_dict, **kwargs)))
            json.dump(dict(data_dict, **kwargs), file, indent=4)

    # Mock the extension part of spec passed from control plane for local testing
--- a/test_runner/regress/test_ddl_forwarding.py
+++ b/test_runner/regress/test_ddl_forwarding.py
@@ -248,8 +248,15 @@ def test_ddl_forwarding(ddl: DdlForwardingContext):
    # We don't have compute_ctl, so here, so create neon_superuser here manually
    cur.execute("CREATE ROLE neon_superuser NOLOGIN CREATEDB CREATEROLE")

-    with pytest.raises(psycopg2.InternalError):
-        cur.execute("ALTER ROLE neon_superuser LOGIN")
+    # Contrary to popular belief, being superman does not make you superuser
+    cur.execute("CREATE ROLE superman LOGIN NOSUPERUSER PASSWORD 'jungle_man'")
+
+    with ddl.pg.cursor(user="superman", password="jungle_man") as superman_cur:
+        # We allow real SUPERUSERs to ALTER neon_superuser
+        with pytest.raises(psycopg2.InternalError):
+            superman_cur.execute("ALTER ROLE neon_superuser LOGIN")
+
+    cur.execute("ALTER ROLE neon_superuser LOGIN")

    with pytest.raises(psycopg2.InternalError):
        cur.execute("CREATE DATABASE trololobus WITH OWNER neon_superuser")
--- a/test_runner/regress/test_migrations.py
+++ b/test_runner/regress/test_migrations.py
@@ -0,0 +1,37 @@
+import time
+
+from fixtures.neon_fixtures import NeonEnv
+
+
+def test_migrations(neon_simple_env: NeonEnv):
+    env = neon_simple_env
+    env.neon_cli.create_branch("test_migrations", "empty")
+
+    endpoint = env.endpoints.create("test_migrations")
+    log_path = endpoint.endpoint_path() / "compute.log"
+
+    endpoint.respec(skip_pg_catalog_updates=False, features=["migrations"])
+    endpoint.start()
+
+    time.sleep(1)  # Sleep to let migrations run
+
+    with endpoint.cursor() as cur:
+        cur.execute("SELECT id FROM neon_migration.migration_id")
+        migration_id = cur.fetchall()
+        assert migration_id[0][0] == 2
+
+    with open(log_path, "r") as log_file:
+        logs = log_file.read()
+        assert "INFO handle_migrations: Ran 2 migrations" in logs
+
+    endpoint.stop()
+    endpoint.start()
+    time.sleep(1)  # Sleep to let migrations run
+    with endpoint.cursor() as cur:
+        cur.execute("SELECT id FROM neon_migration.migration_id")
+        migration_id = cur.fetchall()
+        assert migration_id[0][0] == 2
+
+    with open(log_path, "r") as log_file:
+        logs = log_file.read()
+        assert "INFO handle_migrations: Ran 0 migrations" in logs
--- a/test_runner/regress/test_pageserver_reconnect.py
+++ b/test_runner/regress/test_pageserver_reconnect.py
@@ -0,0 +1,42 @@
+import threading
+import time
+from contextlib import closing
+
+from fixtures.log_helper import log
+from fixtures.neon_fixtures import NeonEnv, PgBin
+
+
+# Test updating neon.pageserver_connstring setting on the fly.
+#
+# This merely changes some whitespace in the connection string, so
+# this doesn't prove that the new string actually takes effect. But at
+# least the code gets exercised.
+def test_pageserver_reconnect(neon_simple_env: NeonEnv, pg_bin: PgBin):
+    env = neon_simple_env
+    env.neon_cli.create_branch("test_pageserver_restarts")
+    endpoint = env.endpoints.create_start("test_pageserver_restarts")
+    n_reconnects = 1000
+    timeout = 0.01
+    scale = 10
+
+    def run_pgbench(connstr: str):
+        log.info(f"Start a pgbench workload on pg {connstr}")
+        pg_bin.run_capture(["pgbench", "-i", f"-s{scale}", connstr])
+        pg_bin.run_capture(["pgbench", f"-T{int(n_reconnects*timeout)}", connstr])
+
+    thread = threading.Thread(target=run_pgbench, args=(endpoint.connstr(),), daemon=True)
+    thread.start()
+
+    with closing(endpoint.connect()) as con:
+        with con.cursor() as c:
+            c.execute("SELECT setting FROM pg_settings WHERE name='neon.pageserver_connstring'")
+            connstring = c.fetchall()[0][0]
+            for i in range(n_reconnects):
+                time.sleep(timeout)
+                c.execute(
+                    "alter system set neon.pageserver_connstring=%s",
+                    (connstring + (" " * (i % 2)),),
+                )
+                c.execute("select pg_reload_conf()")
+
+    thread.join()
Author	SHA1	Message	Date
Christian Schwarz	cabf452fa7	avoid traversing the anyhow Cause chain Observation: there was only a thin layer of anyhow between the types for which log_compaction_error chased down the cause chain and the types and the conversion to CompactionError::Other. So, remove the implicit #[from] conversion generated by thiserror, and de-`anyhow`ify / explicitly opt-into-`::Other` all the places that used it previously.	2024-01-23 20:08:31 +01:00
Joonas Koivunen	7c9f4c270e	driveby: align error stringifications	2024-01-23 09:54:42 +00:00
Joonas Koivunen	2404106586	fix: log some errors at info, error or disregard	2024-01-23 09:54:42 +00:00
Joonas Koivunen	b45c1b5965	feat: add similar is_stopping to PageReconstructError	2024-01-23 09:54:42 +00:00
Joonas Koivunen	82e97e0c59	feat: add new root cause for RTC stopping	2024-01-23 09:54:42 +00:00
Konstantin Knizhnik	00d9bf5b61	Implement lockless update of pageserver_connstring GUC in shared memory (#6314 ) ## Problem There is "neon.pageserver_connstring" GUC with PGC_SIGHUP option, allowing to change it using pg_reload_conf(). It is used by control plane to update pageserver connection string if page server is crashed, relocated or new shards are added. It is copied to shared memory because config can not be loaded during query execution and we need to reestablish connection to page server. ## Summary of changes Copying connection string to shared memory is done by postmaster. And other backends should check update counter to determine of connection URL is changed and connection needs to be reestablished. We can not use standard Postgres LW-locks, because postmaster has proc entry and so can not wait on this primitive. This is why lockless access algorithm is implemented using two atomic counters to enforce consistent reading of connection string value from shared memory. ## Checklist before requesting a review - [ ] I have performed a self-review of my code. - [ ] If it is a core feature, I have added thorough tests. - [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard? - [ ] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section. ## Checklist before merging - [ ] Do not forget to reformat commit message to not include the above checklist --------- Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech> Co-authored-by: Heikki Linnakangas <heikki@neon.tech>	2024-01-23 07:55:05 +02:00
Sasha Krassovsky	71f495c7f7	Gate it behind feature flags	2024-01-22 14:53:29 -08:00
Sasha Krassovsky	0a7e050144	Fix test one last time	2024-01-22 14:53:29 -08:00
Sasha Krassovsky	55bfa91bd7	Fix test again again	2024-01-22 14:53:29 -08:00
Sasha Krassovsky	d90b2b99df	Fix test again	2024-01-22 14:53:29 -08:00
Sasha Krassovsky	27587e155d	Fix test	2024-01-22 14:53:29 -08:00
Sasha Krassovsky	55aede2762	Prevnet duplicate insertions	2024-01-22 14:53:29 -08:00
Sasha Krassovsky	9f186b4d3e	Fix query	2024-01-22 14:53:29 -08:00
Sasha Krassovsky	585687d563	Fix syntax error	2024-01-22 14:53:29 -08:00
Sasha Krassovsky	65a98e425d	Switch to bigint	2024-01-22 14:53:29 -08:00
Sasha Krassovsky	b2e7249979	Sleep	2024-01-22 14:53:29 -08:00
Sasha Krassovsky	844303255a	Cargo fmt	2024-01-22 14:53:29 -08:00
Sasha Krassovsky	6d8df2579b	Fix dumb thing	2024-01-22 14:53:29 -08:00
Sasha Krassovsky	3c3b53f8ad	Update test	2024-01-22 14:53:29 -08:00
Sasha Krassovsky	30064eb197	Add scary comment	2024-01-22 14:53:29 -08:00
Sasha Krassovsky	869acfe29b	Make migrations transactional	2024-01-22 14:53:29 -08:00
Sasha Krassovsky	11a91eaf7b	Uncomment the thread	2024-01-22 14:53:29 -08:00
Sasha Krassovsky	394ef013d0	Push the migrations test	2024-01-22 14:53:29 -08:00
Sasha Krassovsky	a718287902	Make migrations happen on a separate thread	2024-01-22 14:53:29 -08:00
Sasha Krassovsky	2eac1adcb9	Make clippy happy	2024-01-22 14:53:29 -08:00
Sasha Krassovsky	3f90b2d337	Fix test_ddl_forwarding	2024-01-22 14:53:29 -08:00
Sasha Krassovsky	a40ed86d87	Add test for migrations, add initial migration	2024-01-22 14:53:29 -08:00
Sasha Krassovsky	1bf8bb88c5	Add support for migrations within compute_ctl	2024-01-22 14:53:29 -08:00
Vlad Lazar	f1901833a6	pageserver_api: migrate keyspace related functions from `pgdatadir_mapping` (#6406 ) The idea is to achieve separation between keyspace layout definition and operating on said keyspace. I've inlined all these function since they're small and we don't use LTO in the storage release builds at the moment. Closes https://github.com/neondatabase/neon/issues/6347	2024-01-22 19:16:38 +00:00
Arthur Petukhovsky	b41ee81308	Log warning on slow WAL removal (#6432 ) Also add `safekeeper_active_timelines` metric. Should help investigating #6403	2024-01-22 18:38:05 +00:00
Christian Schwarz	205b6111e6	attachment_service: /attach-hook: correctly handle detach (#6433 ) Before this patch, we would update the `tenant_state.intent` in memory but not persist the detachment to disk. I noticed this in https://github.com/neondatabase/neon/pull/6214 where we stop, then restart, the attachment service.	2024-01-22 18:27:05 +00:00
John Spray	93572a3e99	pageserver: mark tenant broken when cancelling attach (#6430 ) ## Problem When a tenant is in Attaching state, and waiting for the `concurrent_tenant_warmup` semaphore, it also listens for the tenant cancellation token. When that token fires, Tenant::attach drops out. Meanwhile, Tenant::set_stopping waits forever for the tenant to exit Attaching state. Fixes: https://github.com/neondatabase/neon/issues/6423 ## Summary of changes - In the absence of a valid state for the tenant, it is set to Broken in this path. A more elegant solution will require more refactoring, beyond this minimal fix.	2024-01-22 15:50:32 +00:00
Christian Schwarz	15c0df4de7	fixup(#6037 ): actually fix the issue, #6388 failed to do so (#6429 ) Before this patch, the select! still retured immediately if `futs` was empty. Must have tested a stale build in my manual testing of #6388.	2024-01-22 14:27:29 +00:00