From 19f7d40c1d89691c9d1d43ed2616e8f1cadc2c19 Mon Sep 17 00:00:00 2001
From: Erik Grinaker <erik@grinaker.org>
Date: Fri, 15 Nov 2024 11:41:43 +0100
Subject: [PATCH 01/43] deny.toml: allow CDDL-1.0 license (#9766)

#9764, which adds profiling support to Safekeeper, pulls in the
dependency [`inferno`](https://crates.io/crates/inferno) via
[`pprof-rs`](https://crates.io/crates/pprof). This is licenced under the
[Common Development and Distribution License
1.0](https://spdx.org/licenses/CDDL-1.0.html), which is not allowed by
`cargo-deny`.

This patch allows the CDDL-1.0 license. It is a derivative of the
Mozilla Public License, which we already allow, but avoids some issues
around European copyright law that the MPL had. As such, I don't expect
this to be problematic.
---
 deny.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/deny.toml b/deny.toml
index 327ac58db7..8bf643f4ba 100644
--- a/deny.toml
+++ b/deny.toml
@@ -37,6 +37,7 @@ allow = [
     "BSD-2-Clause",
     "BSD-3-Clause",
     "CC0-1.0",
+    "CDDL-1.0",
     "ISC",
     "MIT",
     "MPL-2.0",

From 04938d9d559d6e5968ce7e3b71a5a86ac8f87f57 Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Fri, 15 Nov 2024 13:22:05 +0000
Subject: [PATCH 02/43] tests: tolerate pageserver 500s in
 test_timeline_archival_chaos (#9769)

## Problem

Test exposes cases where pageserver gives 500 responses, causing
failures like
https://neon-github-public-dev.s3.amazonaws.com/reports/pr-9766/11844529470/index.html#suites/d1acc79950edeb0563fc86236c620898/3546be2ffed99ba6

## Summary of changes

- Tolerate such messages, and link an issue for cleaning up the
pageserver not to return such 500s.
---
 test_runner/regress/test_timeline_archive.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/test_runner/regress/test_timeline_archive.py b/test_runner/regress/test_timeline_archive.py
index c447535e10..83631405ab 100644
--- a/test_runner/regress/test_timeline_archive.py
+++ b/test_runner/regress/test_timeline_archive.py
@@ -406,7 +406,13 @@ def test_timeline_archival_chaos(neon_env_builder: NeonEnvBuilder):
     tenant_shard_id = TenantShardId(tenant_id, 0, 0)
 
     # Unavailable pageservers during timeline CRUD operations can be logged as errors on the storage controller
-    env.storage_controller.allowed_errors.append(".*error sending request.*")
+    env.storage_controller.allowed_errors.extend(
+        [
+            ".*error sending request.*",
+            # FIXME: the pageserver should not return 500s on cancellation (https://github.com/neondatabase/neon/issues/97680)
+            ".*InternalServerError(Error deleting timeline .* on .* on .*: pageserver API: error: Cancelled",
+        ]
+    )
 
     for ps in env.pageservers:
         # We will do unclean restarts, which results in these messages when cleaning up files
@@ -415,10 +421,10 @@ def test_timeline_archival_chaos(neon_env_builder: NeonEnvBuilder):
                 ".*removing local file.*because it has unexpected length.*",
                 ".*__temp.*",
                 # FIXME: there are still anyhow::Error paths in timeline creation/deletion which
-                # generate 500 results when called during shutdown
+                # generate 500 results when called during shutdown (https://github.com/neondatabase/neon/issues/9768)
                 ".*InternalServerError.*",
                 # FIXME: there are still anyhow::Error paths in timeline deletion that generate
-                # log lines at error severity
+                # log lines at error severity (https://github.com/neondatabase/neon/issues/9768)
                 ".*delete_timeline.*Error",
             ]
         )

From 7880c246f1b2b14a8d316fa757df8d2a85894414 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Arpad=20M=C3=BCller?= <arpad-m@users.noreply.github.com>
Date: Fri, 15 Nov 2024 14:22:29 +0100
Subject: [PATCH 03/43] Correct mistakes in offloaded timeline retain_lsn
 management (#9760)

PR #9308 has modified tenant activation code to take offloaded child
timelines into account for populating the list of `retain_lsn` values.
However, there is more places than just tenant activation where one
needs to update the `retain_lsn`s.

This PR fixes some bugs of the current code that could lead to
corruption in the worst case:

1. Deleting of an offloaded timeline would not get its `retain_lsn`
purged from its parent. With the patch we now do it, but as the parent
can be offloaded as well, the situatoin is a bit trickier than for
non-offloaded timelines which can just keep a pointer to their parent.
Here we can't keep a pointer because the parent might get offloaded,
then unoffloaded again, creating a dangling pointer situation. Keeping a
pointer to the *tenant* is not good either, because we might drop the
offloaded timeline in a context where a `offloaded_timelines` lock is
already held: so we don't want to acquire a lock in the drop code of
OffloadedTimeline.
2. Unoffloading a timeline would not get its `retain_lsn` values
populated, leading to it maybe garbage collecting values that its
children might need. We now call `initialize_gc_info` on the parent.
3. Offloading of a timeline would not get its `retain_lsn` values
registered as offloaded at the parent. So if we drop the `Timeline`
object, and its registration is removed, the parent would not have any
of the child's `retain_lsn`s around. Also, before, the `Timeline` object
would delete anything related to its timeline ID, now it only deletes
`retain_lsn`s that have `MaybeOffloaded::No` set.

Incorporates Chi's reproducer from #9753. cc
https://github.com/neondatabase/cloud/issues/20199

The `test_timeline_retain_lsn` test is extended:

1. it gains a new dimension, duplicating each mode, to either have the
"main" branch be the direct parent of the timeline we archive, or the
"test_archived_parent" branch intermediary, creating a three timeline
structure. This doesn't test anything fixed by this PR in particular,
just explores the vast space of possible configurations a little bit
more.
2. it gains two new modes, `offload-parent`, which tests the second
point, and `offload-no-restart` which tests the third point.

It's easy to verify the test actually is "sharp" by removing one of the
respective `self.initialize_gc_info()`, `gc_info.insert_child()` or
`ancestor_children.push()`.

Part of #8088

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
Co-authored-by: Alex Chi Z <chi@neon.tech>
---
 pageserver/src/tenant.rs                     | 152 +++++++++++++++++--
 pageserver/src/tenant/timeline.rs            |  31 ++--
 pageserver/src/tenant/timeline/delete.rs     |   3 +-
 pageserver/src/tenant/timeline/offload.rs    |  12 +-
 test_runner/regress/test_timeline_archive.py |  93 +++++++++---
 5 files changed, 244 insertions(+), 47 deletions(-)

diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs
index c6fc3bfe6c..909f99ea9d 100644
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -39,6 +39,7 @@ use remote_timeline_client::UploadQueueNotReadyError;
 use std::collections::BTreeMap;
 use std::fmt;
 use std::future::Future;
+use std::sync::atomic::AtomicBool;
 use std::sync::Weak;
 use std::time::SystemTime;
 use storage_broker::BrokerClientChannel;
@@ -524,6 +525,9 @@ pub struct OffloadedTimeline {
     /// Prevent two tasks from deleting the timeline at the same time. If held, the
     /// timeline is being deleted. If 'true', the timeline has already been deleted.
     pub delete_progress: TimelineDeleteProgress,
+
+    /// Part of the `OffloadedTimeline` object's lifecycle: this needs to be set before we drop it
+    pub deleted_from_ancestor: AtomicBool,
 }
 
 impl OffloadedTimeline {
@@ -533,9 +537,16 @@ impl OffloadedTimeline {
     /// the timeline is not in a stopped state.
     /// Panics if the timeline is not archived.
     fn from_timeline(timeline: &Timeline) -> Result<Self, UploadQueueNotReadyError> {
-        let ancestor_retain_lsn = timeline
-            .get_ancestor_timeline_id()
-            .map(|_timeline_id| timeline.get_ancestor_lsn());
+        let (ancestor_retain_lsn, ancestor_timeline_id) =
+            if let Some(ancestor_timeline) = timeline.ancestor_timeline() {
+                let ancestor_lsn = timeline.get_ancestor_lsn();
+                let ancestor_timeline_id = ancestor_timeline.timeline_id;
+                let mut gc_info = ancestor_timeline.gc_info.write().unwrap();
+                gc_info.insert_child(timeline.timeline_id, ancestor_lsn, MaybeOffloaded::Yes);
+                (Some(ancestor_lsn), Some(ancestor_timeline_id))
+            } else {
+                (None, None)
+            };
         let archived_at = timeline
             .remote_client
             .archived_at_stopped_queue()?
@@ -543,14 +554,17 @@ impl OffloadedTimeline {
         Ok(Self {
             tenant_shard_id: timeline.tenant_shard_id,
             timeline_id: timeline.timeline_id,
-            ancestor_timeline_id: timeline.get_ancestor_timeline_id(),
+            ancestor_timeline_id,
             ancestor_retain_lsn,
             archived_at,
 
             delete_progress: timeline.delete_progress.clone(),
+            deleted_from_ancestor: AtomicBool::new(false),
         })
     }
     fn from_manifest(tenant_shard_id: TenantShardId, manifest: &OffloadedTimelineManifest) -> Self {
+        // We expect to reach this case in tenant loading, where the `retain_lsn` is populated in the parent's `gc_info`
+        // by the `initialize_gc_info` function.
         let OffloadedTimelineManifest {
             timeline_id,
             ancestor_timeline_id,
@@ -564,6 +578,7 @@ impl OffloadedTimeline {
             ancestor_retain_lsn,
             archived_at,
             delete_progress: TimelineDeleteProgress::default(),
+            deleted_from_ancestor: AtomicBool::new(false),
         }
     }
     fn manifest(&self) -> OffloadedTimelineManifest {
@@ -581,6 +596,33 @@ impl OffloadedTimeline {
             archived_at: *archived_at,
         }
     }
+    /// Delete this timeline's retain_lsn from its ancestor, if present in the given tenant
+    fn delete_from_ancestor_with_timelines(
+        &self,
+        timelines: &std::sync::MutexGuard<'_, HashMap<TimelineId, Arc<Timeline>>>,
+    ) {
+        if let (Some(_retain_lsn), Some(ancestor_timeline_id)) =
+            (self.ancestor_retain_lsn, self.ancestor_timeline_id)
+        {
+            if let Some((_, ancestor_timeline)) = timelines
+                .iter()
+                .find(|(tid, _tl)| **tid == ancestor_timeline_id)
+            {
+                ancestor_timeline
+                    .gc_info
+                    .write()
+                    .unwrap()
+                    .remove_child_offloaded(self.timeline_id);
+            }
+        }
+        self.deleted_from_ancestor.store(true, Ordering::Release);
+    }
+    /// Call [`Self::delete_from_ancestor_with_timelines`] instead if possible.
+    ///
+    /// As the entire tenant is being dropped, don't bother deregistering the `retain_lsn` from the ancestor.
+    fn defuse_for_tenant_drop(&self) {
+        self.deleted_from_ancestor.store(true, Ordering::Release);
+    }
 }
 
 impl fmt::Debug for OffloadedTimeline {
@@ -589,6 +631,17 @@ impl fmt::Debug for OffloadedTimeline {
     }
 }
 
+impl Drop for OffloadedTimeline {
+    fn drop(&mut self) {
+        if !self.deleted_from_ancestor.load(Ordering::Acquire) {
+            tracing::warn!(
+                "offloaded timeline {} was dropped without having cleaned it up at the ancestor",
+                self.timeline_id
+            );
+        }
+    }
+}
+
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 pub enum MaybeOffloaded {
     Yes,
@@ -1531,7 +1584,7 @@ impl Tenant {
         }
         // Complete deletions for offloaded timeline id's.
         offloaded_timelines_list
-            .retain(|(offloaded_id, _offloaded)| {
+            .retain(|(offloaded_id, offloaded)| {
                 // At this point, offloaded_timeline_ids has the list of all offloaded timelines
                 // without a prefix in S3, so they are inexistent.
                 // In the end, existence of a timeline is finally determined by the existence of an index-part.json in remote storage.
@@ -1539,6 +1592,7 @@ impl Tenant {
                 let delete = offloaded_timeline_ids.contains(offloaded_id);
                 if delete {
                     tracing::info!("Removing offloaded timeline {offloaded_id} from manifest as no remote prefix was found");
+                    offloaded.defuse_for_tenant_drop();
                 }
                 !delete
         });
@@ -1927,9 +1981,15 @@ impl Tenant {
                 )));
             };
             let mut offloaded_timelines = self.timelines_offloaded.lock().unwrap();
-            if offloaded_timelines.remove(&timeline_id).is_none() {
-                warn!("timeline already removed from offloaded timelines");
+            match offloaded_timelines.remove(&timeline_id) {
+                Some(offloaded) => {
+                    offloaded.delete_from_ancestor_with_timelines(&timelines);
+                }
+                None => warn!("timeline already removed from offloaded timelines"),
             }
+
+            self.initialize_gc_info(&timelines, &offloaded_timelines, Some(timeline_id));
+
             Arc::clone(timeline)
         };
 
@@ -2667,7 +2727,7 @@ impl Tenant {
                 .filter(|timeline| !(timeline.is_broken() || timeline.is_stopping()));
 
             // Before activation, populate each Timeline's GcInfo with information about its children
-            self.initialize_gc_info(&timelines_accessor, &timelines_offloaded_accessor);
+            self.initialize_gc_info(&timelines_accessor, &timelines_offloaded_accessor, None);
 
             // Spawn gc and compaction loops. The loops will shut themselves
             // down when they notice that the tenant is inactive.
@@ -2782,8 +2842,14 @@ impl Tenant {
                 let timeline_id = timeline.timeline_id;
                 let span = tracing::info_span!("timeline_shutdown", %timeline_id, ?shutdown_mode);
                 js.spawn(async move { timeline.shutdown(shutdown_mode).instrument(span).await });
-            })
-        };
+            });
+        }
+        {
+            let timelines_offloaded = self.timelines_offloaded.lock().unwrap();
+            timelines_offloaded.values().for_each(|timeline| {
+                timeline.defuse_for_tenant_drop();
+            });
+        }
         // test_long_timeline_create_then_tenant_delete is leaning on this message
         tracing::info!("Waiting for timelines...");
         while let Some(res) = js.join_next().await {
@@ -3767,10 +3833,13 @@ impl Tenant {
         &self,
         timelines: &std::sync::MutexGuard<HashMap<TimelineId, Arc<Timeline>>>,
         timelines_offloaded: &std::sync::MutexGuard<HashMap<TimelineId, Arc<OffloadedTimeline>>>,
+        restrict_to_timeline: Option<TimelineId>,
     ) {
-        // This function must be called before activation: after activation timeline create/delete operations
-        // might happen, and this function is not safe to run concurrently with those.
-        assert!(!self.is_active());
+        if restrict_to_timeline.is_none() {
+            // This function must be called before activation: after activation timeline create/delete operations
+            // might happen, and this function is not safe to run concurrently with those.
+            assert!(!self.is_active());
+        }
 
         // Scan all timelines. For each timeline, remember the timeline ID and
         // the branch point where it was created.
@@ -3803,7 +3872,12 @@ impl Tenant {
         let horizon = self.get_gc_horizon();
 
         // Populate each timeline's GcInfo with information about its child branches
-        for timeline in timelines.values() {
+        let timelines_to_write = if let Some(timeline_id) = restrict_to_timeline {
+            itertools::Either::Left(timelines.get(&timeline_id).into_iter())
+        } else {
+            itertools::Either::Right(timelines.values())
+        };
+        for timeline in timelines_to_write {
             let mut branchpoints: Vec<(Lsn, TimelineId, MaybeOffloaded)> = all_branchpoints
                 .remove(&timeline.timeline_id)
                 .unwrap_or_default();
@@ -9650,4 +9724,54 @@ mod tests {
 
         Ok(())
     }
+
+    #[cfg(feature = "testing")]
+    #[tokio::test]
+    async fn test_timeline_offload_retain_lsn() -> anyhow::Result<()> {
+        let harness = TenantHarness::create("test_timeline_offload_retain_lsn")
+            .await
+            .unwrap();
+        let (tenant, ctx) = harness.load().await;
+        let tline_parent = tenant
+            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)
+            .await
+            .unwrap();
+        let tline_child = tenant
+            .branch_timeline_test(&tline_parent, NEW_TIMELINE_ID, Some(Lsn(0x20)), &ctx)
+            .await
+            .unwrap();
+        {
+            let gc_info_parent = tline_parent.gc_info.read().unwrap();
+            assert_eq!(
+                gc_info_parent.retain_lsns,
+                vec![(Lsn(0x20), tline_child.timeline_id, MaybeOffloaded::No)]
+            );
+        }
+        // We have to directly call the remote_client instead of using the archive function to avoid constructing broker client...
+        tline_child
+            .remote_client
+            .schedule_index_upload_for_timeline_archival_state(TimelineArchivalState::Archived)
+            .unwrap();
+        tline_child.remote_client.wait_completion().await.unwrap();
+        offload_timeline(&tenant, &tline_child)
+            .instrument(tracing::info_span!(parent: None, "offload_test", tenant_id=%"test", shard_id=%"test", timeline_id=%"test"))
+            .await.unwrap();
+        let child_timeline_id = tline_child.timeline_id;
+        Arc::try_unwrap(tline_child).unwrap();
+
+        {
+            let gc_info_parent = tline_parent.gc_info.read().unwrap();
+            assert_eq!(
+                gc_info_parent.retain_lsns,
+                vec![(Lsn(0x20), child_timeline_id, MaybeOffloaded::Yes)]
+            );
+        }
+
+        tenant
+            .get_offloaded_timeline(child_timeline_id)
+            .unwrap()
+            .defuse_for_tenant_drop();
+
+        Ok(())
+    }
 }
diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index 09ddb19765..2bc14ec317 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -477,8 +477,21 @@ impl GcInfo {
         self.retain_lsns.sort_by_key(|i| i.0);
     }
 
-    pub(super) fn remove_child(&mut self, child_id: TimelineId) {
-        self.retain_lsns.retain(|i| i.1 != child_id);
+    pub(super) fn remove_child_maybe_offloaded(
+        &mut self,
+        child_id: TimelineId,
+        maybe_offloaded: MaybeOffloaded,
+    ) {
+        self.retain_lsns
+            .retain(|i| !(i.1 == child_id && i.2 == maybe_offloaded));
+    }
+
+    pub(super) fn remove_child_not_offloaded(&mut self, child_id: TimelineId) {
+        self.remove_child_maybe_offloaded(child_id, MaybeOffloaded::No);
+    }
+
+    pub(super) fn remove_child_offloaded(&mut self, child_id: TimelineId) {
+        self.remove_child_maybe_offloaded(child_id, MaybeOffloaded::Yes);
     }
 }
 
@@ -4501,7 +4514,7 @@ impl Drop for Timeline {
             // This lock should never be poisoned, but in case it is we do a .map() instead of
             // an unwrap(), to avoid panicking in a destructor and thereby aborting the process.
             if let Ok(mut gc_info) = ancestor.gc_info.write() {
-                gc_info.remove_child(self.timeline_id)
+                gc_info.remove_child_not_offloaded(self.timeline_id)
             }
         }
     }
@@ -5030,7 +5043,7 @@ impl Timeline {
 
             // 1. Is it newer than GC horizon cutoff point?
             if l.get_lsn_range().end > space_cutoff {
-                debug!(
+                info!(
                     "keeping {} because it's newer than space_cutoff {}",
                     l.layer_name(),
                     space_cutoff,
@@ -5041,7 +5054,7 @@ impl Timeline {
 
             // 2. It is newer than PiTR cutoff point?
             if l.get_lsn_range().end > time_cutoff {
-                debug!(
+                info!(
                     "keeping {} because it's newer than time_cutoff {}",
                     l.layer_name(),
                     time_cutoff,
@@ -5060,7 +5073,7 @@ impl Timeline {
             for retain_lsn in &retain_lsns {
                 // start_lsn is inclusive
                 if &l.get_lsn_range().start <= retain_lsn {
-                    debug!(
+                    info!(
                         "keeping {} because it's still might be referenced by child branch forked at {} is_dropped: xx is_incremental: {}",
                         l.layer_name(),
                         retain_lsn,
@@ -5075,7 +5088,7 @@ impl Timeline {
             if let Some(lsn) = &max_lsn_with_valid_lease {
                 // keep if layer start <= any of the lease
                 if &l.get_lsn_range().start <= lsn {
-                    debug!(
+                    info!(
                         "keeping {} because there is a valid lease preventing GC at {}",
                         l.layer_name(),
                         lsn,
@@ -5107,13 +5120,13 @@ impl Timeline {
             if !layers
                 .image_layer_exists(&l.get_key_range(), &(l.get_lsn_range().end..new_gc_cutoff))
             {
-                debug!("keeping {} because it is the latest layer", l.layer_name());
+                info!("keeping {} because it is the latest layer", l.layer_name());
                 result.layers_not_updated += 1;
                 continue 'outer;
             }
 
             // We didn't find any reason to keep this file, so remove it.
-            debug!(
+            info!(
                 "garbage collecting {} is_dropped: xx is_incremental: {}",
                 l.layer_name(),
                 l.is_incremental(),
diff --git a/pageserver/src/tenant/timeline/delete.rs b/pageserver/src/tenant/timeline/delete.rs
index 69001a6c40..13a8dfa51a 100644
--- a/pageserver/src/tenant/timeline/delete.rs
+++ b/pageserver/src/tenant/timeline/delete.rs
@@ -141,9 +141,10 @@ async fn remove_maybe_offloaded_timeline_from_tenant(
             );
         }
         TimelineOrOffloaded::Offloaded(timeline) => {
-            timelines_offloaded
+            let offloaded_timeline = timelines_offloaded
                 .remove(&timeline.timeline_id)
                 .expect("timeline that we were deleting was concurrently removed from 'timelines_offloaded' map");
+            offloaded_timeline.delete_from_ancestor_with_timelines(&timelines);
         }
     }
 
diff --git a/pageserver/src/tenant/timeline/offload.rs b/pageserver/src/tenant/timeline/offload.rs
index 1394843467..3595d743bc 100644
--- a/pageserver/src/tenant/timeline/offload.rs
+++ b/pageserver/src/tenant/timeline/offload.rs
@@ -66,7 +66,7 @@ pub(crate) async fn offload_timeline(
     let conf = &tenant.conf;
     delete_local_timeline_directory(conf, tenant.tenant_shard_id, &timeline).await;
 
-    remove_timeline_from_tenant(tenant, &timeline, &guard);
+    let remaining_refcount = remove_timeline_from_tenant(tenant, &timeline, &guard);
 
     {
         let mut offloaded_timelines = tenant.timelines_offloaded.lock().unwrap();
@@ -87,16 +87,20 @@ pub(crate) async fn offload_timeline(
     // not our actual state of offloaded timelines.
     tenant.store_tenant_manifest().await?;
 
+    tracing::info!("Timeline offload complete (remaining arc refcount: {remaining_refcount})");
+
     Ok(())
 }
 
 /// It is important that this gets called when DeletionGuard is being held.
 /// For more context see comments in [`DeleteTimelineFlow::prepare`]
+///
+/// Returns the strong count of the timeline `Arc`
 fn remove_timeline_from_tenant(
     tenant: &Tenant,
     timeline: &Timeline,
     _: &DeletionGuard, // using it as a witness
-) {
+) -> usize {
     // Remove the timeline from the map.
     let mut timelines = tenant.timelines.lock().unwrap();
     let children_exist = timelines
@@ -109,7 +113,9 @@ fn remove_timeline_from_tenant(
         panic!("Timeline grew children while we removed layer files");
     }
 
-    timelines
+    let timeline = timelines
         .remove(&timeline.timeline_id)
         .expect("timeline that we were deleting was concurrently removed from 'timelines' map");
+
+    Arc::strong_count(&timeline)
 }
diff --git a/test_runner/regress/test_timeline_archive.py b/test_runner/regress/test_timeline_archive.py
index 83631405ab..ba4e79c343 100644
--- a/test_runner/regress/test_timeline_archive.py
+++ b/test_runner/regress/test_timeline_archive.py
@@ -15,13 +15,19 @@ from fixtures.neon_fixtures import (
     last_flush_lsn_upload,
 )
 from fixtures.pageserver.http import PageserverApiException
-from fixtures.pageserver.utils import assert_prefix_empty, assert_prefix_not_empty, list_prefix
+from fixtures.pageserver.utils import (
+    assert_prefix_empty,
+    assert_prefix_not_empty,
+    list_prefix,
+    wait_until_tenant_active,
+)
 from fixtures.pg_version import PgVersion
 from fixtures.remote_storage import S3Storage, s3_storage
 from fixtures.utils import run_only_on_default_postgres, wait_until
 from mypy_boto3_s3.type_defs import (
     ObjectTypeDef,
 )
+from psycopg2.errors import IoError, UndefinedTable
 
 
 @pytest.mark.parametrize("shard_count", [0, 4])
@@ -641,8 +647,21 @@ def test_timeline_archival_chaos(neon_env_builder: NeonEnvBuilder):
     assert violations == []
 
 
-@pytest.mark.parametrize("offload_child", ["offload", "offload-corrupt", "archive", None])
-def test_timeline_retain_lsn(neon_env_builder: NeonEnvBuilder, offload_child: Optional[str]):
+@pytest.mark.parametrize("with_intermediary", [False, True])
+@pytest.mark.parametrize(
+    "offload_child",
+    [
+        "offload",
+        "offload-corrupt",
+        "offload-no-restart",
+        "offload-parent",
+        "archive",
+        None,
+    ],
+)
+def test_timeline_retain_lsn(
+    neon_env_builder: NeonEnvBuilder, with_intermediary: bool, offload_child: Optional[str]
+):
     """
     Ensure that retain_lsn functionality for timelines works, both for offloaded and non-offloaded ones
     """
@@ -650,6 +669,7 @@ def test_timeline_retain_lsn(neon_env_builder: NeonEnvBuilder, offload_child: Op
         # Our corruption code only works with S3 compatible storage
         neon_env_builder.enable_pageserver_remote_storage(s3_storage())
 
+    neon_env_builder.rust_log_override = "info,[gc_timeline]=debug"
     env = neon_env_builder.init_start()
     ps_http = env.pageserver.http_client()
 
@@ -657,22 +677,30 @@ def test_timeline_retain_lsn(neon_env_builder: NeonEnvBuilder, offload_child: Op
     tenant_id, root_timeline_id = env.create_tenant(
         conf={
             # small checkpointing and compaction targets to ensure we generate many upload operations
-            "checkpoint_distance": 128 * 1024,
+            "checkpoint_distance": 32 * 1024,
             "compaction_threshold": 1,
-            "compaction_target_size": 128 * 1024,
+            "compaction_target_size": 32 * 1024,
             # set small image creation thresholds so that gc deletes data
-            "image_creation_threshold": 2,
+            "image_creation_threshold": 1,
             # disable background compaction and GC. We invoke it manually when we want it to happen.
             "gc_period": "0s",
             "compaction_period": "0s",
             # Disable pitr, we only want the latest lsn
             "pitr_interval": "0s",
+            "gc_horizon": 0,
             # Don't rely on endpoint lsn leases
             "lsn_lease_length": "0s",
         }
     )
 
-    with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
+    if with_intermediary:
+        parent_branch_name = "test_archived_parent"
+        parent_timeline_id = env.create_branch("test_archived_parent", tenant_id)
+    else:
+        parent_branch_name = "main"
+        parent_timeline_id = root_timeline_id
+
+    with env.endpoints.create_start(parent_branch_name, tenant_id=tenant_id) as endpoint:
         endpoint.safe_psql_many(
             [
                 "CREATE TABLE foo(v int, key serial primary key, t text default 'data_content')",
@@ -682,14 +710,16 @@ def test_timeline_retain_lsn(neon_env_builder: NeonEnvBuilder, offload_child: Op
         )
         pre_branch_sum = endpoint.safe_psql("SELECT sum(key) from foo where v < 51200")
         log.info(f"Pre branch sum: {pre_branch_sum}")
-        last_flush_lsn_upload(env, endpoint, tenant_id, root_timeline_id)
+        last_flush_lsn_upload(env, endpoint, tenant_id, parent_timeline_id)
 
     # Create a branch and write some additional data to the parent
-    child_timeline_id = env.create_branch("test_archived_branch", tenant_id)
+    child_timeline_id = env.create_branch(
+        "test_archived_branch", tenant_id, ancestor_branch_name=parent_branch_name
+    )
 
-    with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
-        # Do some churn of the data. This is important so that we can overwrite image layers.
-        for i in range(10):
+    with env.endpoints.create_start(parent_branch_name, tenant_id=tenant_id) as endpoint:
+        # Do some overwriting churn with compactions in between. This is important so that we can overwrite image layers.
+        for i in range(5):
             endpoint.safe_psql_many(
                 [
                     f"SELECT setseed(0.23{i})",
@@ -698,9 +728,9 @@ def test_timeline_retain_lsn(neon_env_builder: NeonEnvBuilder, offload_child: Op
                     "UPDATE foo SET v=(random() * 409600)::int WHERE v % 3 = 0",
                 ]
             )
+            last_flush_lsn_upload(env, endpoint, tenant_id, parent_timeline_id)
         post_branch_sum = endpoint.safe_psql("SELECT sum(key) from foo where v < 51200")
         log.info(f"Post branch sum: {post_branch_sum}")
-        last_flush_lsn_upload(env, endpoint, tenant_id, root_timeline_id)
 
     if offload_child is not None:
         ps_http.timeline_archival_config(
@@ -715,9 +745,19 @@ def test_timeline_retain_lsn(neon_env_builder: NeonEnvBuilder, offload_child: Op
         assert leaf_detail["is_archived"] is True
         if "offload" in offload_child:
             ps_http.timeline_offload(tenant_id, child_timeline_id)
+        if "offload-parent" in offload_child:
+            # Also offload the parent to ensure the retain_lsn of the child
+            # is entered in the parent at unoffloading
+            ps_http.timeline_archival_config(
+                tenant_id,
+                parent_timeline_id,
+                state=TimelineArchivalState.ARCHIVED,
+            )
+            ps_http.timeline_offload(tenant_id, parent_timeline_id)
 
     # Do a restart to get rid of any in-memory objects (we only init gc info once, at attach)
-    env.pageserver.stop()
+    if offload_child is None or "no-restart" not in offload_child:
+        env.pageserver.stop()
     if offload_child == "offload-corrupt":
         assert isinstance(env.pageserver_remote_storage, S3Storage)
         listing = list_prefix(
@@ -752,13 +792,21 @@ def test_timeline_retain_lsn(neon_env_builder: NeonEnvBuilder, offload_child: Op
                 ".*page_service_conn_main.*could not find data for key.*",
             ]
         )
-    env.pageserver.start()
+    if offload_child is None or "no-restart" not in offload_child:
+        env.pageserver.start()
+        if offload_child == "offload-parent":
+            wait_until_tenant_active(ps_http, tenant_id=tenant_id)
+            ps_http.timeline_archival_config(
+                tenant_id,
+                parent_timeline_id,
+                state=TimelineArchivalState.UNARCHIVED,
+            )
 
     # Do an agressive gc and compaction of the parent branch
-    ps_http.timeline_gc(tenant_id=tenant_id, timeline_id=root_timeline_id, gc_horizon=0)
+    ps_http.timeline_gc(tenant_id=tenant_id, timeline_id=parent_timeline_id, gc_horizon=0)
     ps_http.timeline_checkpoint(
         tenant_id,
-        root_timeline_id,
+        parent_timeline_id,
         force_l0_compaction=True,
         force_repartition=True,
         wait_until_uploaded=True,
@@ -774,10 +822,15 @@ def test_timeline_retain_lsn(neon_env_builder: NeonEnvBuilder, offload_child: Op
 
     # Now, after unarchival, the child timeline should still have its data accessible (or corrupted)
     if offload_child == "offload-corrupt":
-        with pytest.raises(RuntimeError, match=".*failed to get basebackup.*"):
-            env.endpoints.create_start(
+        if with_intermediary:
+            error_regex = "(.*could not read .* from page server.*|.*relation .* does not exist)"
+        else:
+            error_regex = ".*failed to get basebackup.*"
+        with pytest.raises((RuntimeError, IoError, UndefinedTable), match=error_regex):
+            with env.endpoints.create_start(
                 "test_archived_branch", tenant_id=tenant_id, basebackup_request_tries=1
-            )
+            ) as endpoint:
+                endpoint.safe_psql("SELECT sum(key) from foo where v < 51200")
     else:
         with env.endpoints.create_start("test_archived_branch", tenant_id=tenant_id) as endpoint:
             sum = endpoint.safe_psql("SELECT sum(key) from foo where v < 51200")

From e12628fe936769217dd28f537e834d10d5901289 Mon Sep 17 00:00:00 2001
From: Mikhail Kot <to@myrrc.dev>
Date: Fri, 15 Nov 2024 17:42:41 +0000
Subject: [PATCH 04/43] Collect max_connections metric (#9770)

This will further allow us to expose this metric to users
---
 compute/etc/neon_collector.jsonnet                     |  1 +
 .../etc/sql_exporter/compute_max_connections.libsonnet | 10 ++++++++++
 compute/etc/sql_exporter/compute_max_connections.sql   |  1 +
 3 files changed, 12 insertions(+)
 create mode 100644 compute/etc/sql_exporter/compute_max_connections.libsonnet
 create mode 100644 compute/etc/sql_exporter/compute_max_connections.sql

diff --git a/compute/etc/neon_collector.jsonnet b/compute/etc/neon_collector.jsonnet
index c6fa645b41..75d69c7b68 100644
--- a/compute/etc/neon_collector.jsonnet
+++ b/compute/etc/neon_collector.jsonnet
@@ -6,6 +6,7 @@
     import 'sql_exporter/compute_backpressure_throttling_seconds.libsonnet',
     import 'sql_exporter/compute_current_lsn.libsonnet',
     import 'sql_exporter/compute_logical_snapshot_files.libsonnet',
+    import 'sql_exporter/compute_max_connections.libsonnet',
     import 'sql_exporter/compute_receive_lsn.libsonnet',
     import 'sql_exporter/compute_subscriptions_count.libsonnet',
     import 'sql_exporter/connection_counts.libsonnet',
diff --git a/compute/etc/sql_exporter/compute_max_connections.libsonnet b/compute/etc/sql_exporter/compute_max_connections.libsonnet
new file mode 100644
index 0000000000..69cfa1f19c
--- /dev/null
+++ b/compute/etc/sql_exporter/compute_max_connections.libsonnet
@@ -0,0 +1,10 @@
+{
+  metric_name: 'compute_max_connections',
+  type: 'gauge',
+  help: 'Max connections allowed for Postgres',
+  key_labels: null,
+  values: [
+    'max_connections',
+  ],
+  query: importstr 'sql_exporter/compute_max_connections.sql',
+}
diff --git a/compute/etc/sql_exporter/compute_max_connections.sql b/compute/etc/sql_exporter/compute_max_connections.sql
new file mode 100644
index 0000000000..99a49483a6
--- /dev/null
+++ b/compute/etc/sql_exporter/compute_max_connections.sql
@@ -0,0 +1 @@
+SELECT current_setting('max_connections') as max_connections;

From 2af791ba83ee402c2ebb9b1f436f9edb9351d468 Mon Sep 17 00:00:00 2001
From: Vlad Lazar <vlad@neon.tech>
Date: Fri, 15 Nov 2024 20:34:48 +0000
Subject: [PATCH 05/43] wal_decoder: make InterpretedWalRecord serde (#9775)

## Problem

We want to serialize interpreted records to send them over the wire from
safekeeper to pageserver.

## Summary of changes

Make `InterpretedWalRecord` ser/de. This is a temporary change to get
the bulk of the lift merged in
https://github.com/neondatabase/neon/pull/9746. For going to prod, we
don't want to use bincode since we can't evolve the schema.
Questions on serialization will be tackled separately.
---
 libs/pageserver_api/src/key.rs           |  2 +-
 libs/pageserver_api/src/reltag.rs        |  2 +-
 libs/postgres_ffi/src/walrecord.rs       | 16 ++++++-------
 libs/wal_decoder/src/models.rs           | 29 ++++++++++++++++++++++++
 libs/wal_decoder/src/serialized_batch.rs |  5 ++++
 5 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/libs/pageserver_api/src/key.rs b/libs/pageserver_api/src/key.rs
index b3fcaae62f..4505101ea6 100644
--- a/libs/pageserver_api/src/key.rs
+++ b/libs/pageserver_api/src/key.rs
@@ -24,7 +24,7 @@ pub struct Key {
 
 /// When working with large numbers of Keys in-memory, it is more efficient to handle them as i128 than as
 /// a struct of fields.
-#[derive(Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd)]
+#[derive(Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize)]
 pub struct CompactKey(i128);
 
 /// The storage key size.
diff --git a/libs/pageserver_api/src/reltag.rs b/libs/pageserver_api/src/reltag.rs
index 010a9c2932..09d1fae221 100644
--- a/libs/pageserver_api/src/reltag.rs
+++ b/libs/pageserver_api/src/reltag.rs
@@ -24,7 +24,7 @@ use postgres_ffi::Oid;
 // FIXME: should move 'forknum' as last field to keep this consistent with Postgres.
 // Then we could replace the custom Ord and PartialOrd implementations below with
 // deriving them. This will require changes in walredoproc.c.
-#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Serialize)]
+#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Serialize, Deserialize)]
 pub struct RelTag {
     pub forknum: u8,
     pub spcnode: Oid,
diff --git a/libs/postgres_ffi/src/walrecord.rs b/libs/postgres_ffi/src/walrecord.rs
index dedbaef64d..b32106632a 100644
--- a/libs/postgres_ffi/src/walrecord.rs
+++ b/libs/postgres_ffi/src/walrecord.rs
@@ -16,7 +16,7 @@ use utils::bin_ser::DeserializeError;
 use utils::lsn::Lsn;
 
 #[repr(C)]
-#[derive(Debug)]
+#[derive(Debug, Serialize, Deserialize)]
 pub struct XlMultiXactCreate {
     pub mid: MultiXactId,
     /* new MultiXact's ID */
@@ -46,7 +46,7 @@ impl XlMultiXactCreate {
 }
 
 #[repr(C)]
-#[derive(Debug)]
+#[derive(Debug, Serialize, Deserialize)]
 pub struct XlMultiXactTruncate {
     pub oldest_multi_db: Oid,
     /* to-be-truncated range of multixact offsets */
@@ -72,7 +72,7 @@ impl XlMultiXactTruncate {
 }
 
 #[repr(C)]
-#[derive(Debug)]
+#[derive(Debug, Serialize, Deserialize)]
 pub struct XlRelmapUpdate {
     pub dbid: Oid,   /* database ID, or 0 for shared map */
     pub tsid: Oid,   /* database's tablespace, or pg_global */
@@ -90,7 +90,7 @@ impl XlRelmapUpdate {
 }
 
 #[repr(C)]
-#[derive(Debug)]
+#[derive(Debug, Serialize, Deserialize)]
 pub struct XlReploriginDrop {
     pub node_id: RepOriginId,
 }
@@ -104,7 +104,7 @@ impl XlReploriginDrop {
 }
 
 #[repr(C)]
-#[derive(Debug)]
+#[derive(Debug, Serialize, Deserialize)]
 pub struct XlReploriginSet {
     pub remote_lsn: Lsn,
     pub node_id: RepOriginId,
@@ -120,7 +120,7 @@ impl XlReploriginSet {
 }
 
 #[repr(C)]
-#[derive(Debug, Clone, Copy)]
+#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
 pub struct RelFileNode {
     pub spcnode: Oid, /* tablespace */
     pub dbnode: Oid,  /* database */
@@ -911,7 +911,7 @@ impl XlSmgrCreate {
 }
 
 #[repr(C)]
-#[derive(Debug)]
+#[derive(Debug, Serialize, Deserialize)]
 pub struct XlSmgrTruncate {
     pub blkno: BlockNumber,
     pub rnode: RelFileNode,
@@ -984,7 +984,7 @@ impl XlDropDatabase {
 /// xl_xact_parsed_abort structs in PostgreSQL, but we use the same
 /// struct for commits and aborts.
 ///
-#[derive(Debug)]
+#[derive(Debug, Serialize, Deserialize)]
 pub struct XlXactParsedRecord {
     pub xid: TransactionId,
     pub info: u8,
diff --git a/libs/wal_decoder/src/models.rs b/libs/wal_decoder/src/models.rs
index 5d90eeb69c..88371fe51e 100644
--- a/libs/wal_decoder/src/models.rs
+++ b/libs/wal_decoder/src/models.rs
@@ -32,16 +32,19 @@ use postgres_ffi::walrecord::{
     XlSmgrTruncate, XlXactParsedRecord,
 };
 use postgres_ffi::{Oid, TransactionId};
+use serde::{Deserialize, Serialize};
 use utils::lsn::Lsn;
 
 use crate::serialized_batch::SerializedValueBatch;
 
+#[derive(Serialize, Deserialize)]
 pub enum FlushUncommittedRecords {
     Yes,
     No,
 }
 
 /// An interpreted Postgres WAL record, ready to be handled by the pageserver
+#[derive(Serialize, Deserialize)]
 pub struct InterpretedWalRecord {
     /// Optional metadata record - may cause writes to metadata keys
     /// in the storage engine
@@ -62,6 +65,7 @@ pub struct InterpretedWalRecord {
 
 /// The interpreted part of the Postgres WAL record which requires metadata
 /// writes to the underlying storage engine.
+#[derive(Serialize, Deserialize)]
 pub enum MetadataRecord {
     Heapam(HeapamRecord),
     Neonrmgr(NeonrmgrRecord),
@@ -77,10 +81,12 @@ pub enum MetadataRecord {
     Replorigin(ReploriginRecord),
 }
 
+#[derive(Serialize, Deserialize)]
 pub enum HeapamRecord {
     ClearVmBits(ClearVmBits),
 }
 
+#[derive(Serialize, Deserialize)]
 pub struct ClearVmBits {
     pub new_heap_blkno: Option<u32>,
     pub old_heap_blkno: Option<u32>,
@@ -88,24 +94,29 @@ pub struct ClearVmBits {
     pub flags: u8,
 }
 
+#[derive(Serialize, Deserialize)]
 pub enum NeonrmgrRecord {
     ClearVmBits(ClearVmBits),
 }
 
+#[derive(Serialize, Deserialize)]
 pub enum SmgrRecord {
     Create(SmgrCreate),
     Truncate(XlSmgrTruncate),
 }
 
+#[derive(Serialize, Deserialize)]
 pub struct SmgrCreate {
     pub rel: RelTag,
 }
 
+#[derive(Serialize, Deserialize)]
 pub enum DbaseRecord {
     Create(DbaseCreate),
     Drop(DbaseDrop),
 }
 
+#[derive(Serialize, Deserialize)]
 pub struct DbaseCreate {
     pub db_id: Oid,
     pub tablespace_id: Oid,
@@ -113,27 +124,32 @@ pub struct DbaseCreate {
     pub src_tablespace_id: Oid,
 }
 
+#[derive(Serialize, Deserialize)]
 pub struct DbaseDrop {
     pub db_id: Oid,
     pub tablespace_ids: Vec<Oid>,
 }
 
+#[derive(Serialize, Deserialize)]
 pub enum ClogRecord {
     ZeroPage(ClogZeroPage),
     Truncate(ClogTruncate),
 }
 
+#[derive(Serialize, Deserialize)]
 pub struct ClogZeroPage {
     pub segno: u32,
     pub rpageno: u32,
 }
 
+#[derive(Serialize, Deserialize)]
 pub struct ClogTruncate {
     pub pageno: u32,
     pub oldest_xid: TransactionId,
     pub oldest_xid_db: Oid,
 }
 
+#[derive(Serialize, Deserialize)]
 pub enum XactRecord {
     Commit(XactCommon),
     Abort(XactCommon),
@@ -142,6 +158,7 @@ pub enum XactRecord {
     Prepare(XactPrepare),
 }
 
+#[derive(Serialize, Deserialize)]
 pub struct XactCommon {
     pub parsed: XlXactParsedRecord,
     pub origin_id: u16,
@@ -150,61 +167,73 @@ pub struct XactCommon {
     pub lsn: Lsn,
 }
 
+#[derive(Serialize, Deserialize)]
 pub struct XactPrepare {
     pub xl_xid: TransactionId,
     pub data: Bytes,
 }
 
+#[derive(Serialize, Deserialize)]
 pub enum MultiXactRecord {
     ZeroPage(MultiXactZeroPage),
     Create(XlMultiXactCreate),
     Truncate(XlMultiXactTruncate),
 }
 
+#[derive(Serialize, Deserialize)]
 pub struct MultiXactZeroPage {
     pub slru_kind: SlruKind,
     pub segno: u32,
     pub rpageno: u32,
 }
 
+#[derive(Serialize, Deserialize)]
 pub enum RelmapRecord {
     Update(RelmapUpdate),
 }
 
+#[derive(Serialize, Deserialize)]
 pub struct RelmapUpdate {
     pub update: XlRelmapUpdate,
     pub buf: Bytes,
 }
 
+#[derive(Serialize, Deserialize)]
 pub enum XlogRecord {
     Raw(RawXlogRecord),
 }
 
+#[derive(Serialize, Deserialize)]
 pub struct RawXlogRecord {
     pub info: u8,
     pub lsn: Lsn,
     pub buf: Bytes,
 }
 
+#[derive(Serialize, Deserialize)]
 pub enum LogicalMessageRecord {
     Put(PutLogicalMessage),
     #[cfg(feature = "testing")]
     Failpoint,
 }
 
+#[derive(Serialize, Deserialize)]
 pub struct PutLogicalMessage {
     pub path: String,
     pub buf: Bytes,
 }
 
+#[derive(Serialize, Deserialize)]
 pub enum StandbyRecord {
     RunningXacts(StandbyRunningXacts),
 }
 
+#[derive(Serialize, Deserialize)]
 pub struct StandbyRunningXacts {
     pub oldest_running_xid: TransactionId,
 }
 
+#[derive(Serialize, Deserialize)]
 pub enum ReploriginRecord {
     Set(XlReploriginSet),
     Drop(XlReploriginDrop),
diff --git a/libs/wal_decoder/src/serialized_batch.rs b/libs/wal_decoder/src/serialized_batch.rs
index 8f33291023..632603cc8b 100644
--- a/libs/wal_decoder/src/serialized_batch.rs
+++ b/libs/wal_decoder/src/serialized_batch.rs
@@ -16,6 +16,7 @@ use pageserver_api::shard::ShardIdentity;
 use pageserver_api::{key::CompactKey, value::Value};
 use postgres_ffi::walrecord::{DecodedBkpBlock, DecodedWALRecord};
 use postgres_ffi::{page_is_new, page_set_lsn, pg_constants, BLCKSZ};
+use serde::{Deserialize, Serialize};
 use utils::bin_ser::BeSer;
 use utils::lsn::Lsn;
 
@@ -29,6 +30,7 @@ static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; BLCKSZ as usize]);
 /// relation sizes. In the case of "observed" values, we only need to know
 /// the key and LSN, so two types of metadata are supported to save on network
 /// bandwidth.
+#[derive(Serialize, Deserialize)]
 pub enum ValueMeta {
     Serialized(SerializedValueMeta),
     Observed(ObservedValueMeta),
@@ -75,6 +77,7 @@ impl PartialEq for OrderedValueMeta {
 impl Eq for OrderedValueMeta {}
 
 /// Metadata for a [`Value`] serialized into the batch.
+#[derive(Serialize, Deserialize)]
 pub struct SerializedValueMeta {
     pub key: CompactKey,
     pub lsn: Lsn,
@@ -86,12 +89,14 @@ pub struct SerializedValueMeta {
 }
 
 /// Metadata for a [`Value`] observed by the batch
+#[derive(Serialize, Deserialize)]
 pub struct ObservedValueMeta {
     pub key: CompactKey,
     pub lsn: Lsn,
 }
 
 /// Batch of serialized [`Value`]s.
+#[derive(Serialize, Deserialize)]
 pub struct SerializedValueBatch {
     /// [`Value`]s serialized in EphemeralFile's native format,
     /// ready for disk write by the pageserver

From 23eabb9919a0457253455bfa844b1c6503b3f8d6 Mon Sep 17 00:00:00 2001
From: Tristan Partin <tristan@neon.tech>
Date: Fri, 15 Nov 2024 15:17:23 -0600
Subject: [PATCH 06/43] Fix PG_MAJORVERSION_NUM typo

In ea32f1d0a36a4d77c1181d623f14a91f2a06d6dd, Matthias added a feature to
our extension to expose more granular wait events. However, due to the
typo, those wait events were never registered, so we used the more
generic wait events instead.

Signed-off-by: Tristan Partin <tristan@neon.tech>
---
 pgxn/neon/neon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pgxn/neon/neon.c b/pgxn/neon/neon.c
index dc87d79e87..f207ed61f9 100644
--- a/pgxn/neon/neon.c
+++ b/pgxn/neon/neon.c
@@ -512,7 +512,7 @@ neon_shmem_startup_hook(void)
 	if (prev_shmem_startup_hook)
 		prev_shmem_startup_hook();
 
-#if PG_PG_MAJORVERSION_NUM >= 17
+#if PG_MAJORVERSION_NUM >= 17
 	WAIT_EVENT_NEON_LFC_MAINTENANCE = WaitEventExtensionNew("Neon/FileCache_Maintenance");
 	WAIT_EVENT_NEON_LFC_READ = WaitEventExtensionNew("Neon/FileCache_Read");
 	WAIT_EVENT_NEON_LFC_TRUNCATE = WaitEventExtensionNew("Neon/FileCache_Truncate");

From ac689ab01406a797f5347a6799d809f375468d52 Mon Sep 17 00:00:00 2001
From: Vlad Lazar <vlad@neon.tech>
Date: Fri, 15 Nov 2024 21:53:11 +0000
Subject: [PATCH 07/43] wal_decoder: rename end_lsn to next_record_lsn (#9776)

## Problem

It turns out that `WalStreamDecoder::poll_decode` returns the start LSN
of the next record and not the end LSN of the current record. They are
not always equal. For example, they're not equal when the record in
question is an XLOG SWITCH record.

## Summary of changes

Rename things to reflect that.
---
 libs/wal_decoder/src/decoder.rs                | 18 +++++++++++-------
 libs/wal_decoder/src/models.rs                 |  6 ++++--
 libs/wal_decoder/src/serialized_batch.rs       | 18 +++++++++++-------
 .../walreceiver/walreceiver_connection.rs      | 12 ++++++------
 pageserver/src/walingest.rs                    |  2 +-
 5 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/libs/wal_decoder/src/decoder.rs b/libs/wal_decoder/src/decoder.rs
index 684718d220..1895f25bfc 100644
--- a/libs/wal_decoder/src/decoder.rs
+++ b/libs/wal_decoder/src/decoder.rs
@@ -19,7 +19,7 @@ impl InterpretedWalRecord {
     pub fn from_bytes_filtered(
         buf: Bytes,
         shard: &ShardIdentity,
-        record_end_lsn: Lsn,
+        next_record_lsn: Lsn,
         pg_version: u32,
     ) -> anyhow::Result<InterpretedWalRecord> {
         let mut decoded = DecodedWALRecord::default();
@@ -32,18 +32,18 @@ impl InterpretedWalRecord {
             FlushUncommittedRecords::No
         };
 
-        let metadata_record = MetadataRecord::from_decoded(&decoded, record_end_lsn, pg_version)?;
+        let metadata_record = MetadataRecord::from_decoded(&decoded, next_record_lsn, pg_version)?;
         let batch = SerializedValueBatch::from_decoded_filtered(
             decoded,
             shard,
-            record_end_lsn,
+            next_record_lsn,
             pg_version,
         )?;
 
         Ok(InterpretedWalRecord {
             metadata_record,
             batch,
-            end_lsn: record_end_lsn,
+            next_record_lsn,
             flush_uncommitted,
             xid,
         })
@@ -53,7 +53,7 @@ impl InterpretedWalRecord {
 impl MetadataRecord {
     fn from_decoded(
         decoded: &DecodedWALRecord,
-        record_end_lsn: Lsn,
+        next_record_lsn: Lsn,
         pg_version: u32,
     ) -> anyhow::Result<Option<MetadataRecord>> {
         // Note: this doesn't actually copy the bytes since
@@ -74,7 +74,9 @@ impl MetadataRecord {
                 Ok(None)
             }
             pg_constants::RM_CLOG_ID => Self::decode_clog_record(&mut buf, decoded, pg_version),
-            pg_constants::RM_XACT_ID => Self::decode_xact_record(&mut buf, decoded, record_end_lsn),
+            pg_constants::RM_XACT_ID => {
+                Self::decode_xact_record(&mut buf, decoded, next_record_lsn)
+            }
             pg_constants::RM_MULTIXACT_ID => {
                 Self::decode_multixact_record(&mut buf, decoded, pg_version)
             }
@@ -86,7 +88,9 @@ impl MetadataRecord {
             //
             // Alternatively, one can make the checkpoint part of the subscription protocol
             // to the pageserver. This should work fine, but can be done at a later point.
-            pg_constants::RM_XLOG_ID => Self::decode_xlog_record(&mut buf, decoded, record_end_lsn),
+            pg_constants::RM_XLOG_ID => {
+                Self::decode_xlog_record(&mut buf, decoded, next_record_lsn)
+            }
             pg_constants::RM_LOGICALMSG_ID => {
                 Self::decode_logical_message_record(&mut buf, decoded)
             }
diff --git a/libs/wal_decoder/src/models.rs b/libs/wal_decoder/src/models.rs
index 88371fe51e..c69f8c869a 100644
--- a/libs/wal_decoder/src/models.rs
+++ b/libs/wal_decoder/src/models.rs
@@ -52,8 +52,10 @@ pub struct InterpretedWalRecord {
     /// A pre-serialized batch along with the required metadata for ingestion
     /// by the pageserver
     pub batch: SerializedValueBatch,
-    /// Byte offset within WAL for the end of the original PG WAL record
-    pub end_lsn: Lsn,
+    /// Byte offset within WAL for the start of the next PG WAL record.
+    /// Usually this is the end LSN of the current record, but in case of
+    /// XLOG SWITCH records it will be within the next segment.
+    pub next_record_lsn: Lsn,
     /// Whether to flush all uncommitted modifications to the storage engine
     /// before ingesting this record. This is currently only used for legacy PG
     /// database creations which read pages from a template database. Such WAL
diff --git a/libs/wal_decoder/src/serialized_batch.rs b/libs/wal_decoder/src/serialized_batch.rs
index 632603cc8b..9c0708ebbe 100644
--- a/libs/wal_decoder/src/serialized_batch.rs
+++ b/libs/wal_decoder/src/serialized_batch.rs
@@ -137,7 +137,7 @@ impl SerializedValueBatch {
     pub(crate) fn from_decoded_filtered(
         decoded: DecodedWALRecord,
         shard: &ShardIdentity,
-        record_end_lsn: Lsn,
+        next_record_lsn: Lsn,
         pg_version: u32,
     ) -> anyhow::Result<SerializedValueBatch> {
         // First determine how big the buffer needs to be and allocate it up-front.
@@ -161,13 +161,17 @@ impl SerializedValueBatch {
             let key = rel_block_to_key(rel, blk.blkno);
 
             if !key.is_valid_key_on_write_path() {
-                anyhow::bail!("Unsupported key decoded at LSN {}: {}", record_end_lsn, key);
+                anyhow::bail!(
+                    "Unsupported key decoded at LSN {}: {}",
+                    next_record_lsn,
+                    key
+                );
             }
 
             let key_is_local = shard.is_key_local(&key);
 
             tracing::debug!(
-                lsn=%record_end_lsn,
+                lsn=%next_record_lsn,
                 key=%key,
                 "ingest: shard decision {}",
                 if !key_is_local { "drop" } else { "keep" },
@@ -179,7 +183,7 @@ impl SerializedValueBatch {
                     // its blkno in case it implicitly extends a relation.
                     metadata.push(ValueMeta::Observed(ObservedValueMeta {
                         key: key.to_compact(),
-                        lsn: record_end_lsn,
+                        lsn: next_record_lsn,
                     }))
                 }
 
@@ -210,7 +214,7 @@ impl SerializedValueBatch {
                 // that would corrupt the page.
                 //
                 if !page_is_new(&image) {
-                    page_set_lsn(&mut image, record_end_lsn)
+                    page_set_lsn(&mut image, next_record_lsn)
                 }
                 assert_eq!(image.len(), BLCKSZ as usize);
 
@@ -229,12 +233,12 @@ impl SerializedValueBatch {
 
             metadata.push(ValueMeta::Serialized(SerializedValueMeta {
                 key: key.to_compact(),
-                lsn: record_end_lsn,
+                lsn: next_record_lsn,
                 batch_offset: relative_off,
                 len: val_ser_size,
                 will_init: val.will_init(),
             }));
-            max_lsn = std::cmp::max(max_lsn, record_end_lsn);
+            max_lsn = std::cmp::max(max_lsn, next_record_lsn);
             len += 1;
         }
 
diff --git a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
index 34bf959058..6ac6920d47 100644
--- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
@@ -331,11 +331,11 @@ pub(super) async fn handle_walreceiver_connection(
                         Ok(())
                     }
 
-                    while let Some((record_end_lsn, recdata)) = waldecoder.poll_decode()? {
+                    while let Some((next_record_lsn, recdata)) = waldecoder.poll_decode()? {
                         // It is important to deal with the aligned records as lsn in getPage@LSN is
                         // aligned and can be several bytes bigger. Without this alignment we are
                         // at risk of hitting a deadlock.
-                        if !record_end_lsn.is_aligned() {
+                        if !next_record_lsn.is_aligned() {
                             return Err(WalReceiverError::Other(anyhow!("LSN not aligned")));
                         }
 
@@ -343,7 +343,7 @@ pub(super) async fn handle_walreceiver_connection(
                         let interpreted = InterpretedWalRecord::from_bytes_filtered(
                             recdata,
                             modification.tline.get_shard_identity(),
-                            record_end_lsn,
+                            next_record_lsn,
                             modification.tline.pg_version,
                         )?;
 
@@ -367,10 +367,10 @@ pub(super) async fn handle_walreceiver_connection(
                             .ingest_record(interpreted, &mut modification, &ctx)
                             .await
                             .with_context(|| {
-                                format!("could not ingest record at {record_end_lsn}")
+                                format!("could not ingest record at {next_record_lsn}")
                             })?;
                         if !ingested {
-                            tracing::debug!("ingest: filtered out record @ LSN {record_end_lsn}");
+                            tracing::debug!("ingest: filtered out record @ LSN {next_record_lsn}");
                             WAL_INGEST.records_filtered.inc();
                             filtered_records += 1;
                         }
@@ -380,7 +380,7 @@ pub(super) async fn handle_walreceiver_connection(
                         // to timeout the tests.
                         fail_point!("walreceiver-after-ingest");
 
-                        last_rec_lsn = record_end_lsn;
+                        last_rec_lsn = next_record_lsn;
 
                         // Commit every ingest_batch_size records. Even if we filtered out
                         // all records, we still need to call commit to advance the LSN.
diff --git a/pageserver/src/walingest.rs b/pageserver/src/walingest.rs
index 84e553f330..38d69760f2 100644
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -154,7 +154,7 @@ impl WalIngest {
         WAL_INGEST.records_received.inc();
         let prev_len = modification.len();
 
-        modification.set_lsn(interpreted.end_lsn)?;
+        modification.set_lsn(interpreted.next_record_lsn)?;
 
         if matches!(interpreted.flush_uncommitted, FlushUncommittedRecords::Yes) {
             // Records of this type should always be preceded by a commit(), as they

From de7e4a34ca61cd1dc671b08239c2042d676e4043 Mon Sep 17 00:00:00 2001
From: Erik Grinaker <erik@neon.tech>
Date: Sun, 17 Nov 2024 18:19:14 +0100
Subject: [PATCH 08/43] safekeeper: send `AppendResponse` on segment flush
 (#9692)

## Problem

When processing pipelined `AppendRequest`s, we explicitly flush the WAL
every second and return an `AppendResponse`. However, the WAL is also
implicitly flushed on segment bounds, but this does not result in an
`AppendResponse`. Because of this, concurrent transactions may take up
to 1 second to commit and writes may take up to 1 second before sending
to the pageserver.

## Summary of changes

Advance `flush_lsn` when a WAL segment is closed and flushed, and emit
an `AppendResponse`. To accommodate this, track the `flush_lsn` in
addition to the `flush_record_lsn`.
---
 safekeeper/src/receive_wal.rs |  3 ++
 safekeeper/src/safekeeper.rs  |  5 ++-
 safekeeper/src/wal_storage.rs | 71 +++++++++++++++++++++++------------
 3 files changed, 54 insertions(+), 25 deletions(-)

diff --git a/safekeeper/src/receive_wal.rs b/safekeeper/src/receive_wal.rs
index a0a96c6e99..2edcc4ef6f 100644
--- a/safekeeper/src/receive_wal.rs
+++ b/safekeeper/src/receive_wal.rs
@@ -562,6 +562,9 @@ impl WalAcceptor {
                     // Don't flush the WAL on every append, only periodically via flush_ticker.
                     // This batches multiple appends per fsync. If the channel is empty after
                     // sending the reply, we'll schedule an immediate flush.
+                    //
+                    // Note that a flush can still happen on segment bounds, which will result
+                    // in an AppendResponse.
                     if let ProposerAcceptorMessage::AppendRequest(append_request) = msg {
                         msg = ProposerAcceptorMessage::NoFlushAppendRequest(append_request);
                         dirty = true;
diff --git a/safekeeper/src/safekeeper.rs b/safekeeper/src/safekeeper.rs
index f4983d44d0..6eb69f0b7c 100644
--- a/safekeeper/src/safekeeper.rs
+++ b/safekeeper/src/safekeeper.rs
@@ -947,6 +947,7 @@ where
         // while first connection still gets some packets later. It might be
         // better to not log this as error! above.
         let write_lsn = self.wal_store.write_lsn();
+        let flush_lsn = self.wal_store.flush_lsn();
         if write_lsn > msg.h.begin_lsn {
             bail!(
                 "append request rewrites WAL written before, write_lsn={}, msg lsn={}",
@@ -1004,7 +1005,9 @@ where
         );
 
         // If flush_lsn hasn't updated, AppendResponse is not very useful.
-        if !require_flush {
+        // This is the common case for !require_flush, but a flush can still
+        // happen on segment bounds.
+        if !require_flush && flush_lsn == self.flush_lsn() {
             return Ok(None);
         }
 
diff --git a/safekeeper/src/wal_storage.rs b/safekeeper/src/wal_storage.rs
index c3bb6cd12c..e338d70731 100644
--- a/safekeeper/src/wal_storage.rs
+++ b/safekeeper/src/wal_storage.rs
@@ -113,6 +113,13 @@ pub struct PhysicalStorage {
     /// non-aligned chunks of data.
     write_record_lsn: Lsn,
 
+    /// The last LSN flushed to disk. May be in the middle of a record.
+    ///
+    /// NB: when the rest of the system refers to `flush_lsn`, it usually
+    /// actually refers to `flush_record_lsn`. This ambiguity can be dangerous
+    /// and should be resolved.
+    flush_lsn: Lsn,
+
     /// The LSN of the last WAL record flushed to disk.
     flush_record_lsn: Lsn,
 
@@ -211,6 +218,7 @@ impl PhysicalStorage {
             system_id: state.server.system_id,
             write_lsn,
             write_record_lsn: write_lsn,
+            flush_lsn,
             flush_record_lsn: flush_lsn,
             decoder: WalStreamDecoder::new(write_lsn, state.server.pg_version / 10000),
             file: None,
@@ -295,8 +303,9 @@ impl PhysicalStorage {
         }
     }
 
-    /// Write WAL bytes, which are known to be located in a single WAL segment.
-    async fn write_in_segment(&mut self, segno: u64, xlogoff: usize, buf: &[u8]) -> Result<()> {
+    /// Write WAL bytes, which are known to be located in a single WAL segment. Returns true if the
+    /// segment was completed, closed, and flushed to disk.
+    async fn write_in_segment(&mut self, segno: u64, xlogoff: usize, buf: &[u8]) -> Result<bool> {
         let mut file = if let Some(file) = self.file.take() {
             file
         } else {
@@ -320,20 +329,24 @@ impl PhysicalStorage {
             let (wal_file_path, wal_file_partial_path) =
                 wal_file_paths(&self.timeline_dir, segno, self.wal_seg_size);
             fs::rename(wal_file_partial_path, wal_file_path).await?;
+            Ok(true)
         } else {
             // otherwise, file can be reused later
             self.file = Some(file);
+            Ok(false)
         }
-
-        Ok(())
     }
 
     /// Writes WAL to the segment files, until everything is writed. If some segments
     /// are fully written, they are flushed to disk. The last (partial) segment can
     /// be flushed separately later.
     ///
-    /// Updates `write_lsn`.
+    /// Updates `write_lsn` and `flush_lsn`.
     async fn write_exact(&mut self, pos: Lsn, mut buf: &[u8]) -> Result<()> {
+        // TODO: this shouldn't be possible, except possibly with write_lsn == 0.
+        // Rename this method to `append_exact`, and make it append-only, removing
+        // the `pos` parameter and this check. For this reason, we don't update
+        // `flush_lsn` here.
         if self.write_lsn != pos {
             // need to flush the file before discarding it
             if let Some(file) = self.file.take() {
@@ -355,9 +368,13 @@ impl PhysicalStorage {
                 buf.len()
             };
 
-            self.write_in_segment(segno, xlogoff, &buf[..bytes_write])
+            let flushed = self
+                .write_in_segment(segno, xlogoff, &buf[..bytes_write])
                 .await?;
             self.write_lsn += bytes_write as u64;
+            if flushed {
+                self.flush_lsn = self.write_lsn;
+            }
             buf = &buf[bytes_write..];
         }
 
@@ -371,6 +388,9 @@ impl Storage for PhysicalStorage {
         self.write_lsn
     }
     /// flush_lsn returns LSN of last durably stored WAL record.
+    ///
+    /// TODO: flush_lsn() returns flush_record_lsn, but write_lsn() returns write_lsn: confusing.
+    #[allow(clippy::misnamed_getters)]
     fn flush_lsn(&self) -> Lsn {
         self.flush_record_lsn
     }
@@ -424,8 +444,9 @@ impl Storage for PhysicalStorage {
         self.metrics.observe_write_seconds(write_seconds);
         self.metrics.observe_write_bytes(buf.len());
 
-        // figure out last record's end lsn for reporting (if we got the
-        // whole record)
+        // Figure out the last record's end LSN and update `write_record_lsn`
+        // (if we got a whole record). The write may also have closed and
+        // flushed a segment, so update `flush_record_lsn` as well.
         if self.decoder.available() != startpos {
             info!(
                 "restart decoder from {} to {}",
@@ -436,12 +457,15 @@ impl Storage for PhysicalStorage {
             self.decoder = WalStreamDecoder::new(startpos, pg_version);
         }
         self.decoder.feed_bytes(buf);
-        loop {
-            match self.decoder.poll_decode()? {
-                None => break, // no full record yet
-                Some((lsn, _rec)) => {
-                    self.write_record_lsn = lsn;
-                }
+
+        if self.write_record_lsn <= self.flush_lsn {
+            // We may have flushed a previously written record.
+            self.flush_record_lsn = self.write_record_lsn;
+        }
+        while let Some((lsn, _rec)) = self.decoder.poll_decode()? {
+            self.write_record_lsn = lsn;
+            if lsn <= self.flush_lsn {
+                self.flush_record_lsn = lsn;
             }
         }
 
@@ -458,19 +482,17 @@ impl Storage for PhysicalStorage {
             self.fdatasync_file(&unflushed_file).await?;
             self.file = Some(unflushed_file);
         } else {
-            // We have unflushed data (write_lsn != flush_lsn), but no file.
-            // This should only happen if last file was fully written and flushed,
-            // but haven't updated flush_lsn yet.
-            if self.write_lsn.segment_offset(self.wal_seg_size) != 0 {
-                bail!(
-                    "unexpected unflushed data with no open file, write_lsn={}, flush_lsn={}",
-                    self.write_lsn,
-                    self.flush_record_lsn
-                );
-            }
+            // We have unflushed data (write_lsn != flush_lsn), but no file. This
+            // shouldn't happen, since the segment is flushed on close.
+            bail!(
+                "unexpected unflushed data with no open file, write_lsn={}, flush_lsn={}",
+                self.write_lsn,
+                self.flush_record_lsn
+            );
         }
 
         // everything is flushed now, let's update flush_lsn
+        self.flush_lsn = self.write_lsn;
         self.flush_record_lsn = self.write_record_lsn;
         Ok(())
     }
@@ -517,6 +539,7 @@ impl Storage for PhysicalStorage {
         self.pending_wal_truncation = true;
 
         self.write_lsn = end_pos;
+        self.flush_lsn = end_pos;
         self.write_record_lsn = end_pos;
         self.flush_record_lsn = end_pos;
 

From 88801341717bb51ca779ba5c9866cccf5f7fbd58 Mon Sep 17 00:00:00 2001
From: Erik Grinaker <erik@neon.tech>
Date: Sun, 17 Nov 2024 19:52:05 +0100
Subject: [PATCH 09/43] Cargo.toml: upgrade tikv-jemallocator to 0.6.0 (#9779)

---
 Cargo.lock                | 12 ++++++------
 Cargo.toml                |  4 ++--
 workspace_hack/Cargo.toml |  2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index f6e3f9ddb1..954bac1c24 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -6074,9 +6074,9 @@ dependencies = [
 
 [[package]]
 name = "tikv-jemalloc-ctl"
-version = "0.5.4"
+version = "0.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "619bfed27d807b54f7f776b9430d4f8060e66ee138a28632ca898584d462c31c"
+checksum = "f21f216790c8df74ce3ab25b534e0718da5a1916719771d3fec23315c99e468b"
 dependencies = [
  "libc",
  "paste",
@@ -6085,9 +6085,9 @@ dependencies = [
 
 [[package]]
 name = "tikv-jemalloc-sys"
-version = "0.5.4+5.3.0-patched"
+version = "0.6.0+5.3.0-1-ge13ca993e8ccb9ba9847cc330696e02839f328f7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9402443cb8fd499b6f327e40565234ff34dbda27460c5b47db0db77443dd85d1"
+checksum = "cd3c60906412afa9c2b5b5a48ca6a5abe5736aec9eb48ad05037a677e52e4e2d"
 dependencies = [
  "cc",
  "libc",
@@ -6095,9 +6095,9 @@ dependencies = [
 
 [[package]]
 name = "tikv-jemallocator"
-version = "0.5.4"
+version = "0.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "965fe0c26be5c56c94e38ba547249074803efd52adfb66de62107d95aab3eaca"
+checksum = "4cec5ff18518d81584f477e9bfdf957f5bb0979b0bac3af4ca30b5b3ae2d2865"
 dependencies = [
  "libc",
  "tikv-jemalloc-sys",
diff --git a/Cargo.toml b/Cargo.toml
index 706d742f1b..dbda930535 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -168,8 +168,8 @@ sync_wrapper = "0.1.2"
 tar = "0.4"
 test-context = "0.3"
 thiserror = "1.0"
-tikv-jemallocator = "0.5"
-tikv-jemalloc-ctl = "0.5"
+tikv-jemallocator = { version = "0.6", features = ["stats"] }
+tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] }
 tokio = { version = "1.17", features = ["macros"] }
 tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
 tokio-io-timeout = "1.2.0"
diff --git a/workspace_hack/Cargo.toml b/workspace_hack/Cargo.toml
index d6773987ea..53d3a7364b 100644
--- a/workspace_hack/Cargo.toml
+++ b/workspace_hack/Cargo.toml
@@ -75,7 +75,7 @@ smallvec = { version = "1", default-features = false, features = ["const_new", "
 spki = { version = "0.7", default-features = false, features = ["pem", "std"] }
 subtle = { version = "2" }
 sync_wrapper = { version = "0.1", default-features = false, features = ["futures"] }
-tikv-jemalloc-sys = { version = "0.5" }
+tikv-jemalloc-sys = { version = "0.6", features = ["stats"] }
 time = { version = "0.3", features = ["macros", "serde-well-known"] }
 tokio = { version = "1", features = ["fs", "io-std", "io-util", "macros", "net", "process", "rt-multi-thread", "signal", "test-util"] }
 tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon", features = ["with-serde_json-1"] }

From b6154b03f4b5a04e0dc364019c9d463c324df312 Mon Sep 17 00:00:00 2001
From: Christian Schwarz <christian@neon.tech>
Date: Sun, 17 Nov 2024 21:25:16 +0100
Subject: [PATCH 10/43] build(deps): bump smallvec to 1.13.2 to get UB fix
 (#9781)

Smallvec 1.13.2 contains [an UB
fix](https://github.com/servo/rust-smallvec/pull/345).

Upstream opened [a
request](https://github.com/rustsec/advisory-db/issues/1960)
for this in the advisory-db but it never got acted upon.

Found while working on https://github.com/neondatabase/neon/pull/9321.
---
 Cargo.lock | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 954bac1c24..f92da5ec51 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5663,9 +5663,9 @@ dependencies = [
 
 [[package]]
 name = "smallvec"
-version = "1.13.1"
+version = "1.13.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7"
+checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
 
 [[package]]
 name = "smol_str"

From 261d065e6fd491046e47d819c39a1fb4993c8219 Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Mon, 18 Nov 2024 08:59:05 +0000
Subject: [PATCH 11/43] pageserver: respect no_sync in `VirtualFile` (#9772)

## Problem

`no_sync` initially just skipped syncfs on startup (#9677). I'm also
interested in flaky tests that time out during pageserver shutdown while
flushing l0s, so to eliminate disk throughput as a source of issues
there,

## Summary of changes

- Drive-by change for test timeouts: add a couple more ::info logs
during pageserver startup so it's obvious which part got stuck.
- Add a SyncMode enum to configure VirtualFile and respect it in
sync_all and sync_data functions
- During pageserver startup, set SyncMode according to `no_sync`
---
 pageserver/benches/bench_ingest.rs       |  1 +
 pageserver/ctl/src/layer_map_analyzer.rs |  1 +
 pageserver/ctl/src/layers.rs             |  3 +++
 pageserver/ctl/src/main.rs               |  1 +
 pageserver/src/bin/pageserver.rs         |  7 +++++
 pageserver/src/virtual_file.rs           | 33 +++++++++++++++++++++++-
 6 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/pageserver/benches/bench_ingest.rs b/pageserver/benches/bench_ingest.rs
index f6b2a8e031..caacd365b3 100644
--- a/pageserver/benches/bench_ingest.rs
+++ b/pageserver/benches/bench_ingest.rs
@@ -167,6 +167,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         16384,
         virtual_file::io_engine_for_bench(),
         conf.virtual_file_io_mode,
+        virtual_file::SyncMode::Sync,
     );
     page_cache::init(conf.page_cache_size);
 
diff --git a/pageserver/ctl/src/layer_map_analyzer.rs b/pageserver/ctl/src/layer_map_analyzer.rs
index 11b8e98f57..2c350d6d86 100644
--- a/pageserver/ctl/src/layer_map_analyzer.rs
+++ b/pageserver/ctl/src/layer_map_analyzer.rs
@@ -138,6 +138,7 @@ pub(crate) async fn main(cmd: &AnalyzeLayerMapCmd) -> Result<()> {
         10,
         virtual_file::api::IoEngineKind::StdFs,
         IoMode::preferred(),
+        virtual_file::SyncMode::Sync,
     );
     pageserver::page_cache::init(100);
 
diff --git a/pageserver/ctl/src/layers.rs b/pageserver/ctl/src/layers.rs
index 6f543dcaa9..4c2c3ab30e 100644
--- a/pageserver/ctl/src/layers.rs
+++ b/pageserver/ctl/src/layers.rs
@@ -51,6 +51,7 @@ async fn read_delta_file(path: impl AsRef<Path>, ctx: &RequestContext) -> Result
         10,
         virtual_file::api::IoEngineKind::StdFs,
         IoMode::preferred(),
+        virtual_file::SyncMode::Sync,
     );
     page_cache::init(100);
     let path = Utf8Path::from_path(path.as_ref()).expect("non-Unicode path");
@@ -65,6 +66,7 @@ async fn read_image_file(path: impl AsRef<Path>, ctx: &RequestContext) -> Result
         10,
         virtual_file::api::IoEngineKind::StdFs,
         IoMode::preferred(),
+        virtual_file::SyncMode::Sync,
     );
     page_cache::init(100);
     let path = Utf8Path::from_path(path.as_ref()).expect("non-Unicode path");
@@ -171,6 +173,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
                 10,
                 virtual_file::api::IoEngineKind::StdFs,
                 IoMode::preferred(),
+                virtual_file::SyncMode::Sync,
             );
             pageserver::page_cache::init(100);
 
diff --git a/pageserver/ctl/src/main.rs b/pageserver/ctl/src/main.rs
index f506caec5b..92e766d2fb 100644
--- a/pageserver/ctl/src/main.rs
+++ b/pageserver/ctl/src/main.rs
@@ -209,6 +209,7 @@ async fn print_layerfile(path: &Utf8Path) -> anyhow::Result<()> {
         10,
         virtual_file::api::IoEngineKind::StdFs,
         IoMode::preferred(),
+        virtual_file::SyncMode::Sync,
     );
     page_cache::init(100);
     let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs
index fe2a31167d..033a9a4619 100644
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -171,11 +171,18 @@ fn main() -> anyhow::Result<()> {
     let scenario = failpoint_support::init();
 
     // Basic initialization of things that don't change after startup
+    tracing::info!("Initializing virtual_file...");
     virtual_file::init(
         conf.max_file_descriptors,
         conf.virtual_file_io_engine,
         conf.virtual_file_io_mode,
+        if conf.no_sync {
+            virtual_file::SyncMode::UnsafeNoSync
+        } else {
+            virtual_file::SyncMode::Sync
+        },
     );
+    tracing::info!("Initializing page_cache...");
     page_cache::init(conf.page_cache_size);
 
     start_pageserver(launch_ts, conf).context("Failed to start pageserver")?;
diff --git a/pageserver/src/virtual_file.rs b/pageserver/src/virtual_file.rs
index daa8b99ab0..b9f8c7ea20 100644
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -175,10 +175,16 @@ impl VirtualFile {
     }
 
     pub async fn sync_all(&self) -> Result<(), Error> {
+        if SYNC_MODE.load(std::sync::atomic::Ordering::Relaxed) == SyncMode::UnsafeNoSync as u8 {
+            return Ok(());
+        }
         self.inner.sync_all().await
     }
 
     pub async fn sync_data(&self) -> Result<(), Error> {
+        if SYNC_MODE.load(std::sync::atomic::Ordering::Relaxed) == SyncMode::UnsafeNoSync as u8 {
+            return Ok(());
+        }
         self.inner.sync_data().await
     }
 
@@ -233,6 +239,27 @@ impl VirtualFile {
     }
 }
 
+/// Indicates whether to enable fsync, fdatasync, or O_SYNC/O_DSYNC when writing
+/// files. Switching this off is unsafe and only used for testing on machines
+/// with slow drives.
+#[repr(u8)]
+pub enum SyncMode {
+    Sync,
+    UnsafeNoSync,
+}
+
+impl TryFrom<u8> for SyncMode {
+    type Error = u8;
+
+    fn try_from(value: u8) -> Result<Self, Self::Error> {
+        Ok(match value {
+            v if v == (SyncMode::Sync as u8) => SyncMode::Sync,
+            v if v == (SyncMode::UnsafeNoSync as u8) => SyncMode::UnsafeNoSync,
+            x => return Err(x),
+        })
+    }
+}
+
 ///
 /// A virtual file descriptor. You can use this just like std::fs::File, but internally
 /// the underlying file is closed if the system is low on file descriptors,
@@ -1332,12 +1359,13 @@ impl OpenFiles {
 /// server startup.
 ///
 #[cfg(not(test))]
-pub fn init(num_slots: usize, engine: IoEngineKind, mode: IoMode) {
+pub fn init(num_slots: usize, engine: IoEngineKind, mode: IoMode, sync_mode: SyncMode) {
     if OPEN_FILES.set(OpenFiles::new(num_slots)).is_err() {
         panic!("virtual_file::init called twice");
     }
     set_io_mode(mode);
     io_engine::init(engine);
+    SYNC_MODE.store(sync_mode as u8, std::sync::atomic::Ordering::Relaxed);
     crate::metrics::virtual_file_descriptor_cache::SIZE_MAX.set(num_slots as u64);
 }
 
@@ -1379,6 +1407,9 @@ pub(crate) fn set_io_mode(mode: IoMode) {
 pub(crate) fn get_io_mode() -> IoMode {
     IoMode::try_from(IO_MODE.load(Ordering::Relaxed)).unwrap()
 }
+
+static SYNC_MODE: AtomicU8 = AtomicU8::new(SyncMode::Sync as u8);
+
 #[cfg(test)]
 mod tests {
     use crate::context::DownloadBehavior;

From 10bc1903e126367e33ea9185b9df1525cd484cde Mon Sep 17 00:00:00 2001
From: a-masterov <72613290+a-masterov@users.noreply.github.com>
Date: Mon, 18 Nov 2024 10:30:50 +0100
Subject: [PATCH 12/43] Fix the regression test running against the staging
 instance (#9773)

## Problem
The Postgres version was updated. The patch has to be updated
accordingly.
## Summary of changes
The patch of the regression test was updated.
---
 compute/patches/cloud_regress_pg16.patch | 104 ++++++++++++++---------
 1 file changed, 65 insertions(+), 39 deletions(-)

diff --git a/compute/patches/cloud_regress_pg16.patch b/compute/patches/cloud_regress_pg16.patch
index d15d0cffeb..a4b93d0260 100644
--- a/compute/patches/cloud_regress_pg16.patch
+++ b/compute/patches/cloud_regress_pg16.patch
@@ -147,7 +147,7 @@ index 542c2e098c..0062d3024f 100644
  ALTER TABLE ptnowner1 OWNER TO regress_ptnowner;
  ALTER TABLE ptnowner OWNER TO regress_ptnowner;
 diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
-index 97bbe53b64..eac3d42a79 100644
+index 3f9a8f539c..0a51b52940 100644
 --- a/src/test/regress/expected/collate.icu.utf8.out
 +++ b/src/test/regress/expected/collate.icu.utf8.out
 @@ -1016,7 +1016,7 @@ select * from collate_test1 where b ilike 'ABC';
@@ -309,7 +309,7 @@ index b48365ec98..a6ef910055 100644
  -- the wrong partition. This test is *not* guaranteed to trigger that bug, but
  -- does so when shared_buffers is small enough.  To test if we encountered the
 diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
-index faf1a4d1b0..a44c97db52 100644
+index 9a74820ee8..22400a5551 100644
 --- a/src/test/regress/expected/copy2.out
 +++ b/src/test/regress/expected/copy2.out
 @@ -553,8 +553,8 @@ select * from check_con_tbl;
@@ -573,7 +573,7 @@ index 93302a07ef..1a73f083ac 100644
  -- that does not match with what's expected.
  -- This checks all the object types that include schema qualifications.
 diff --git a/src/test/regress/expected/create_view.out b/src/test/regress/expected/create_view.out
-index f3f8c7b5a2..3e3e54ff4c 100644
+index f551624afb..57f1e432d4 100644
 --- a/src/test/regress/expected/create_view.out
 +++ b/src/test/regress/expected/create_view.out
 @@ -18,7 +18,8 @@ CREATE TABLE real_city (
@@ -700,12 +700,12 @@ index 6ed50fdcfa..caa00a345d 100644
  COMMENT ON FOREIGN DATA WRAPPER dummy IS 'useless';
  CREATE FOREIGN DATA WRAPPER postgresql VALIDATOR postgresql_fdw_validator;
 diff --git a/src/test/regress/expected/foreign_key.out b/src/test/regress/expected/foreign_key.out
-index 12e523c737..8872e23935 100644
+index 6b8c2f2414..8e13b7fa46 100644
 --- a/src/test/regress/expected/foreign_key.out
 +++ b/src/test/regress/expected/foreign_key.out
-@@ -1968,7 +1968,7 @@ ALTER TABLE fk_partitioned_fk ATTACH PARTITION fk_partitioned_fk_2
-   FOR VALUES IN (1600);
- -- leave these tables around intentionally
+@@ -1985,7 +1985,7 @@ ALTER TABLE fk_partitioned_fk_6 ATTACH PARTITION fk_partitioned_pk_6 FOR VALUES
+ ERROR:  cannot ALTER TABLE "fk_partitioned_pk_61" because it is being used by active queries in this session
+ DROP TABLE fk_partitioned_pk_6, fk_partitioned_fk_6;
  -- test the case when the referenced table is owned by a different user
 -create role regress_other_partitioned_fk_owner;
 +create role regress_other_partitioned_fk_owner PASSWORD NEON_PASSWORD_PLACEHOLDER;
@@ -713,7 +713,7 @@ index 12e523c737..8872e23935 100644
  set role regress_other_partitioned_fk_owner;
  create table other_partitioned_fk(a int, b int) partition by list (a);
 diff --git a/src/test/regress/expected/generated.out b/src/test/regress/expected/generated.out
-index 0f623f7119..b48588a54e 100644
+index 5881420388..4ae21aa43c 100644
 --- a/src/test/regress/expected/generated.out
 +++ b/src/test/regress/expected/generated.out
 @@ -534,7 +534,7 @@ CREATE TABLE gtest10a (a int PRIMARY KEY, b int GENERATED ALWAYS AS (a * 2) STOR
@@ -762,7 +762,7 @@ index a2036a1597..805d73b9d2 100644
  -- fields, leading to long bucket chains and lots of table expansion.
  -- this is therefore a stress test of the bucket overflow code (unlike
 diff --git a/src/test/regress/expected/identity.out b/src/test/regress/expected/identity.out
-index cc7772349f..98a08eb48d 100644
+index 1b74958de9..078187b542 100644
 --- a/src/test/regress/expected/identity.out
 +++ b/src/test/regress/expected/identity.out
 @@ -520,7 +520,7 @@ ALTER TABLE itest7 ALTER COLUMN a SET GENERATED BY DEFAULT;
@@ -775,10 +775,10 @@ index cc7772349f..98a08eb48d 100644
  GRANT SELECT, INSERT ON itest8 TO regress_identity_user1;
  SET ROLE regress_identity_user1;
 diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out
-index 4943429e9b..0257f22b15 100644
+index 8f831c95c3..ec681b52af 100644
 --- a/src/test/regress/expected/inherit.out
 +++ b/src/test/regress/expected/inherit.out
-@@ -2606,7 +2606,7 @@ create index on permtest_parent (left(c, 3));
+@@ -2636,7 +2636,7 @@ create index on permtest_parent (left(c, 3));
  insert into permtest_parent
    select 1, 'a', left(fipshash(i::text), 5) from generate_series(0, 100) i;
  analyze permtest_parent;
@@ -1133,7 +1133,7 @@ index 8475231735..1afae5395f 100644
  SELECT rolname, rolpassword
      FROM pg_authid
 diff --git a/src/test/regress/expected/privileges.out b/src/test/regress/expected/privileges.out
-index fbb0489a4f..2905194e2c 100644
+index 5b9dba7b32..cc408dad42 100644
 --- a/src/test/regress/expected/privileges.out
 +++ b/src/test/regress/expected/privileges.out
 @@ -20,19 +20,19 @@ SELECT lo_unlink(oid) FROM pg_largeobject_metadata WHERE oid >= 1000 AND oid < 3
@@ -1185,7 +1185,7 @@ index fbb0489a4f..2905194e2c 100644
  GRANT pg_read_all_data TO regress_priv_user6;
  GRANT pg_write_all_data TO regress_priv_user7;
  GRANT pg_read_all_settings TO regress_priv_user8 WITH ADMIN OPTION;
-@@ -145,8 +145,8 @@ REVOKE pg_read_all_settings FROM regress_priv_user8;
+@@ -212,8 +212,8 @@ REVOKE pg_read_all_settings FROM regress_priv_user8;
  DROP USER regress_priv_user10;
  DROP USER regress_priv_user9;
  DROP USER regress_priv_user8;
@@ -1196,7 +1196,7 @@ index fbb0489a4f..2905194e2c 100644
  ALTER GROUP regress_priv_group1 ADD USER regress_priv_user4;
  GRANT regress_priv_group2 TO regress_priv_user2 GRANTED BY regress_priv_user1;
  SET SESSION AUTHORIZATION regress_priv_user1;
-@@ -172,12 +172,16 @@ GRANT regress_priv_role TO regress_priv_user1 WITH ADMIN OPTION GRANTED BY regre
+@@ -239,12 +239,16 @@ GRANT regress_priv_role TO regress_priv_user1 WITH ADMIN OPTION GRANTED BY regre
  ERROR:  permission denied to grant privileges as role "regress_priv_role"
  DETAIL:  The grantor must have the ADMIN option on role "regress_priv_role".
  GRANT regress_priv_role TO regress_priv_user1 WITH ADMIN OPTION GRANTED BY CURRENT_ROLE;
@@ -1213,7 +1213,7 @@ index fbb0489a4f..2905194e2c 100644
  DROP ROLE regress_priv_role;
  SET SESSION AUTHORIZATION regress_priv_user1;
  SELECT session_user, current_user;
-@@ -1709,7 +1713,7 @@ SELECT has_table_privilege('regress_priv_user1', 'atest4', 'SELECT WITH GRANT OP
+@@ -1776,7 +1780,7 @@ SELECT has_table_privilege('regress_priv_user1', 'atest4', 'SELECT WITH GRANT OP
  
  -- security-restricted operations
  \c -
@@ -1222,7 +1222,7 @@ index fbb0489a4f..2905194e2c 100644
  -- Check that index expressions and predicates are run as the table's owner
  -- A dummy index function checking current_user
  CREATE FUNCTION sro_ifun(int) RETURNS int AS $$
-@@ -2601,8 +2605,8 @@ drop cascades to function testns.priv_testagg(integer)
+@@ -2668,8 +2672,8 @@ drop cascades to function testns.priv_testagg(integer)
  drop cascades to function testns.priv_testproc(integer)
  -- Change owner of the schema & and rename of new schema owner
  \c -
@@ -1233,7 +1233,7 @@ index fbb0489a4f..2905194e2c 100644
  SET SESSION ROLE regress_schemauser1;
  CREATE SCHEMA testns;
  SELECT nspname, rolname FROM pg_namespace, pg_roles WHERE pg_namespace.nspname = 'testns' AND pg_namespace.nspowner = pg_roles.oid;
-@@ -2725,7 +2729,7 @@ DROP USER regress_priv_user7;
+@@ -2792,7 +2796,7 @@ DROP USER regress_priv_user7;
  DROP USER regress_priv_user8; -- does not exist
  ERROR:  role "regress_priv_user8" does not exist
  -- permissions with LOCK TABLE
@@ -1242,7 +1242,7 @@ index fbb0489a4f..2905194e2c 100644
  CREATE TABLE lock_table (a int);
  -- LOCK TABLE and SELECT permission
  GRANT SELECT ON lock_table TO regress_locktable_user;
-@@ -2807,7 +2811,7 @@ DROP USER regress_locktable_user;
+@@ -2874,7 +2878,7 @@ DROP USER regress_locktable_user;
  -- pg_backend_memory_contexts.
  -- switch to superuser
  \c -
@@ -1251,7 +1251,7 @@ index fbb0489a4f..2905194e2c 100644
  SELECT has_table_privilege('regress_readallstats','pg_backend_memory_contexts','SELECT'); -- no
   has_table_privilege 
  ---------------------
-@@ -2851,10 +2855,10 @@ RESET ROLE;
+@@ -2918,10 +2922,10 @@ RESET ROLE;
  -- clean up
  DROP ROLE regress_readallstats;
  -- test role grantor machinery
@@ -1266,7 +1266,7 @@ index fbb0489a4f..2905194e2c 100644
  GRANT regress_group TO regress_group_direct_manager WITH INHERIT FALSE, ADMIN TRUE;
  GRANT regress_group_direct_manager TO regress_group_indirect_manager;
  SET SESSION AUTHORIZATION regress_group_direct_manager;
-@@ -2883,9 +2887,9 @@ DROP ROLE regress_group_direct_manager;
+@@ -2950,9 +2954,9 @@ DROP ROLE regress_group_direct_manager;
  DROP ROLE regress_group_indirect_manager;
  DROP ROLE regress_group_member;
  -- test SET and INHERIT options with object ownership changes
@@ -1813,7 +1813,7 @@ index 5e6969b173..2c4d52237f 100644
  
  -- clean up roles
 diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out
-index 97ca9bf72c..b2a7a6f710 100644
+index 218c0c2863..f7af0cfb12 100644
 --- a/src/test/regress/expected/rowsecurity.out
 +++ b/src/test/regress/expected/rowsecurity.out
 @@ -14,13 +14,13 @@ DROP ROLE IF EXISTS regress_rls_group2;
@@ -1917,6 +1917,19 @@ index b79fe9a1c0..e29fab88ab 100644
  ALTER DEFAULT PRIVILEGES FOR ROLE regress_selinto_user
  	  REVOKE INSERT ON TABLES FROM regress_selinto_user;
  GRANT ALL ON SCHEMA selinto_schema TO public;
+diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out
+index afc6ab08c2..dfcd891af3 100644
+--- a/src/test/regress/expected/select_parallel.out
++++ b/src/test/regress/expected/select_parallel.out
+@@ -1220,7 +1220,7 @@ SELECT 1 FROM tenk1_vw_sec
+ 
+ rollback;
+ -- test that function option SET ROLE works in parallel workers.
+-create role regress_parallel_worker;
++create role regress_parallel_worker PASSWORD NEON_PASSWORD_PLACEHOLDER;
+ create function set_and_report_role() returns text as
+   $$ select current_setting('role') $$ language sql parallel safe
+   set role = regress_parallel_worker;
 diff --git a/src/test/regress/expected/select_views.out b/src/test/regress/expected/select_views.out
 index 1aeed8452b..7d9427d070 100644
 --- a/src/test/regress/expected/select_views.out
@@ -2369,7 +2382,7 @@ index 6cb9c926c0..5e689e4062 100644
  ALTER TABLE ptnowner1 OWNER TO regress_ptnowner;
  ALTER TABLE ptnowner OWNER TO regress_ptnowner;
 diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
-index 3db9e25913..c66d5aa2c2 100644
+index 8aa902d5ab..24bb823b86 100644
 --- a/src/test/regress/sql/collate.icu.utf8.sql
 +++ b/src/test/regress/sql/collate.icu.utf8.sql
 @@ -353,7 +353,7 @@ reset enable_seqscan;
@@ -2532,7 +2545,7 @@ index 43d2e906dd..6c993d70f0 100644
  -- An earlier bug (see commit b1ecb9b3fcf) could end up using a buffer from
  -- the wrong partition. This test is *not* guaranteed to trigger that bug, but
 diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
-index d759635068..d58e50dcc5 100644
+index cf3828c16e..cf3ca38175 100644
 --- a/src/test/regress/sql/copy2.sql
 +++ b/src/test/regress/sql/copy2.sql
 @@ -365,8 +365,8 @@ copy check_con_tbl from stdin;
@@ -2774,7 +2787,7 @@ index 1b7064247a..be5b662ce1 100644
  -- Cases where schema creation fails as objects are qualified with a schema
  -- that does not match with what's expected.
 diff --git a/src/test/regress/sql/create_view.sql b/src/test/regress/sql/create_view.sql
-index 3a78be1b0c..617d2dc8d6 100644
+index ae6841308b..47bc792e30 100644
 --- a/src/test/regress/sql/create_view.sql
 +++ b/src/test/regress/sql/create_view.sql
 @@ -23,7 +23,8 @@ CREATE TABLE real_city (
@@ -2901,11 +2914,11 @@ index aa147b14a9..370e0dd570 100644
  CREATE FOREIGN DATA WRAPPER dummy;
  COMMENT ON FOREIGN DATA WRAPPER dummy IS 'useless';
 diff --git a/src/test/regress/sql/foreign_key.sql b/src/test/regress/sql/foreign_key.sql
-index 22e177f89b..7138d5e1d4 100644
+index 45c7a534cb..32dd26b8cd 100644
 --- a/src/test/regress/sql/foreign_key.sql
 +++ b/src/test/regress/sql/foreign_key.sql
-@@ -1418,7 +1418,7 @@ ALTER TABLE fk_partitioned_fk ATTACH PARTITION fk_partitioned_fk_2
- -- leave these tables around intentionally
+@@ -1435,7 +1435,7 @@ ALTER TABLE fk_partitioned_fk_6 ATTACH PARTITION fk_partitioned_pk_6 FOR VALUES
+ DROP TABLE fk_partitioned_pk_6, fk_partitioned_fk_6;
  
  -- test the case when the referenced table is owned by a different user
 -create role regress_other_partitioned_fk_owner;
@@ -2963,7 +2976,7 @@ index 527024f710..de49c0b85f 100644
  -- the data in this file has a lot of duplicates in the index key
  -- fields, leading to long bucket chains and lots of table expansion.
 diff --git a/src/test/regress/sql/identity.sql b/src/test/regress/sql/identity.sql
-index 91d2e443b4..241c93f373 100644
+index 7537258a75..9041e35e34 100644
 --- a/src/test/regress/sql/identity.sql
 +++ b/src/test/regress/sql/identity.sql
 @@ -287,7 +287,7 @@ ALTER TABLE itest7 ALTER COLUMN a RESTART;
@@ -2976,10 +2989,10 @@ index 91d2e443b4..241c93f373 100644
  GRANT SELECT, INSERT ON itest8 TO regress_identity_user1;
  SET ROLE regress_identity_user1;
 diff --git a/src/test/regress/sql/inherit.sql b/src/test/regress/sql/inherit.sql
-index fe699c54d5..bdd5993f45 100644
+index b5b554a125..109889ad24 100644
 --- a/src/test/regress/sql/inherit.sql
 +++ b/src/test/regress/sql/inherit.sql
-@@ -950,7 +950,7 @@ create index on permtest_parent (left(c, 3));
+@@ -958,7 +958,7 @@ create index on permtest_parent (left(c, 3));
  insert into permtest_parent
    select 1, 'a', left(fipshash(i::text), 5) from generate_series(0, 100) i;
  analyze permtest_parent;
@@ -3218,7 +3231,7 @@ index 53e86b0b6c..f07cf1ec54 100644
  CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023';
  
 diff --git a/src/test/regress/sql/privileges.sql b/src/test/regress/sql/privileges.sql
-index 3f68cafcd1..004b26831d 100644
+index 249df17a58..b258e7f26a 100644
 --- a/src/test/regress/sql/privileges.sql
 +++ b/src/test/regress/sql/privileges.sql
 @@ -24,18 +24,18 @@ RESET client_min_messages;
@@ -3269,7 +3282,7 @@ index 3f68cafcd1..004b26831d 100644
  
  GRANT pg_read_all_data TO regress_priv_user6;
  GRANT pg_write_all_data TO regress_priv_user7;
-@@ -130,8 +130,8 @@ DROP USER regress_priv_user10;
+@@ -163,8 +163,8 @@ DROP USER regress_priv_user10;
  DROP USER regress_priv_user9;
  DROP USER regress_priv_user8;
  
@@ -3280,7 +3293,7 @@ index 3f68cafcd1..004b26831d 100644
  
  ALTER GROUP regress_priv_group1 ADD USER regress_priv_user4;
  
-@@ -1124,7 +1124,7 @@ SELECT has_table_privilege('regress_priv_user1', 'atest4', 'SELECT WITH GRANT OP
+@@ -1157,7 +1157,7 @@ SELECT has_table_privilege('regress_priv_user1', 'atest4', 'SELECT WITH GRANT OP
  
  -- security-restricted operations
  \c -
@@ -3289,7 +3302,7 @@ index 3f68cafcd1..004b26831d 100644
  
  -- Check that index expressions and predicates are run as the table's owner
  
-@@ -1620,8 +1620,8 @@ DROP SCHEMA testns CASCADE;
+@@ -1653,8 +1653,8 @@ DROP SCHEMA testns CASCADE;
  -- Change owner of the schema & and rename of new schema owner
  \c -
  
@@ -3300,7 +3313,7 @@ index 3f68cafcd1..004b26831d 100644
  
  SET SESSION ROLE regress_schemauser1;
  CREATE SCHEMA testns;
-@@ -1715,7 +1715,7 @@ DROP USER regress_priv_user8; -- does not exist
+@@ -1748,7 +1748,7 @@ DROP USER regress_priv_user8; -- does not exist
  
  
  -- permissions with LOCK TABLE
@@ -3309,7 +3322,7 @@ index 3f68cafcd1..004b26831d 100644
  CREATE TABLE lock_table (a int);
  
  -- LOCK TABLE and SELECT permission
-@@ -1803,7 +1803,7 @@ DROP USER regress_locktable_user;
+@@ -1836,7 +1836,7 @@ DROP USER regress_locktable_user;
  -- switch to superuser
  \c -
  
@@ -3318,7 +3331,7 @@ index 3f68cafcd1..004b26831d 100644
  
  SELECT has_table_privilege('regress_readallstats','pg_backend_memory_contexts','SELECT'); -- no
  SELECT has_table_privilege('regress_readallstats','pg_shmem_allocations','SELECT'); -- no
-@@ -1823,10 +1823,10 @@ RESET ROLE;
+@@ -1856,10 +1856,10 @@ RESET ROLE;
  DROP ROLE regress_readallstats;
  
  -- test role grantor machinery
@@ -3333,7 +3346,7 @@ index 3f68cafcd1..004b26831d 100644
  
  GRANT regress_group TO regress_group_direct_manager WITH INHERIT FALSE, ADMIN TRUE;
  GRANT regress_group_direct_manager TO regress_group_indirect_manager;
-@@ -1848,9 +1848,9 @@ DROP ROLE regress_group_indirect_manager;
+@@ -1881,9 +1881,9 @@ DROP ROLE regress_group_indirect_manager;
  DROP ROLE regress_group_member;
  
  -- test SET and INHERIT options with object ownership changes
@@ -3625,7 +3638,7 @@ index c961b2d730..0859b89c4f 100644
  -- clean up roles
  DROP ROLE regress_test_def_superuser;
 diff --git a/src/test/regress/sql/rowsecurity.sql b/src/test/regress/sql/rowsecurity.sql
-index dec7340538..cdbc03a5cc 100644
+index d3bfd53e23..919ce1d0c6 100644
 --- a/src/test/regress/sql/rowsecurity.sql
 +++ b/src/test/regress/sql/rowsecurity.sql
 @@ -20,13 +20,13 @@ DROP SCHEMA IF EXISTS regress_rls_schema CASCADE;
@@ -3701,6 +3714,19 @@ index 689c448cc2..223ceb1d75 100644
  ALTER DEFAULT PRIVILEGES FOR ROLE regress_selinto_user
  	  REVOKE INSERT ON TABLES FROM regress_selinto_user;
  GRANT ALL ON SCHEMA selinto_schema TO public;
+diff --git a/src/test/regress/sql/select_parallel.sql b/src/test/regress/sql/select_parallel.sql
+index 33d78e16dc..cb193c9b27 100644
+--- a/src/test/regress/sql/select_parallel.sql
++++ b/src/test/regress/sql/select_parallel.sql
+@@ -464,7 +464,7 @@ SELECT 1 FROM tenk1_vw_sec
+ rollback;
+ 
+ -- test that function option SET ROLE works in parallel workers.
+-create role regress_parallel_worker;
++create role regress_parallel_worker PASSWORD NEON_PASSWORD_PLACEHOLDER;
+ 
+ create function set_and_report_role() returns text as
+   $$ select current_setting('role') $$ language sql parallel safe
 diff --git a/src/test/regress/sql/select_views.sql b/src/test/regress/sql/select_views.sql
 index e742f13699..7bd0255df8 100644
 --- a/src/test/regress/sql/select_views.sql

From 6fa9b0cd8c8779a369c58a2493e21e62b65dc87f Mon Sep 17 00:00:00 2001
From: Konstantin Knizhnik <knizhnik@garret.ru>
Date: Mon, 18 Nov 2024 11:55:38 +0200
Subject: [PATCH 13/43] Use DATA_DIR instead of current workign directory in
 restore_from_wal script (#9729)

## Problem

See https://github.com/neondatabase/neon/issues/7750

test_wal_restore.sh is copying file to current working directory which
can cause interfere of test_wa_restore.py tests spawned of different
configurations.

## Summary of changes

Copy file to $DATA_DIR

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
---
 libs/utils/scripts/restore_from_wal.sh        | 6 +++---
 libs/utils/scripts/restore_from_wal_initdb.sh | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/libs/utils/scripts/restore_from_wal.sh b/libs/utils/scripts/restore_from_wal.sh
index 93448369a0..a8615c2337 100755
--- a/libs/utils/scripts/restore_from_wal.sh
+++ b/libs/utils/scripts/restore_from_wal.sh
@@ -50,8 +50,8 @@ REDO_POS=0x$("$PG_BIN"/pg_controldata -D "$DATA_DIR" | grep -F "REDO location"|
 declare -i WAL_SIZE=$REDO_POS+114
 "$PG_BIN"/pg_ctl -D "$DATA_DIR" -l "$DATA_DIR/logfile.log" start
 "$PG_BIN"/pg_ctl -D "$DATA_DIR" -l "$DATA_DIR/logfile.log" stop -m immediate
-cp "$DATA_DIR"/pg_wal/000000010000000000000001 .
+cp "$DATA_DIR"/pg_wal/000000010000000000000001 "$DATA_DIR"
 cp "$WAL_PATH"/* "$DATA_DIR"/pg_wal/
 for partial in "$DATA_DIR"/pg_wal/*.partial ; do mv "$partial" "${partial%.partial}" ; done
-dd if=000000010000000000000001 of="$DATA_DIR"/pg_wal/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc
-rm -f 000000010000000000000001
+dd if="$DATA_DIR"/000000010000000000000001 of="$DATA_DIR"/pg_wal/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc
+rm -f "$DATA_DIR"/000000010000000000000001
diff --git a/libs/utils/scripts/restore_from_wal_initdb.sh b/libs/utils/scripts/restore_from_wal_initdb.sh
index c6277ebc60..e7b0432505 100755
--- a/libs/utils/scripts/restore_from_wal_initdb.sh
+++ b/libs/utils/scripts/restore_from_wal_initdb.sh
@@ -14,8 +14,8 @@ REDO_POS=0x$("$PG_BIN"/pg_controldata -D "$DATA_DIR" | grep -F "REDO location"|
 declare -i WAL_SIZE=$REDO_POS+114
 "$PG_BIN"/pg_ctl -D "$DATA_DIR" -l "$DATA_DIR/logfile.log" start
 "$PG_BIN"/pg_ctl -D "$DATA_DIR" -l "$DATA_DIR/logfile.log" stop -m immediate
-cp "$DATA_DIR"/pg_wal/000000010000000000000001 .
+cp "$DATA_DIR"/pg_wal/000000010000000000000001 "$DATA_DIR"
 cp "$WAL_PATH"/* "$DATA_DIR"/pg_wal/
 for partial in "$DATA_DIR"/pg_wal/*.partial ; do mv "$partial" "${partial%.partial}" ; done
-dd if=000000010000000000000001 of="$DATA_DIR"/pg_wal/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc
-rm -f 000000010000000000000001
+dd if="$DATA_DIR"/000000010000000000000001 of="$DATA_DIR"/pg_wal/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc
+rm -f "$DATA_DIR"/000000010000000000000001

From c3eecf6763876431dbca4e14e58a2a0b86101beb Mon Sep 17 00:00:00 2001
From: Peter Bendel <peterbendel@neon.tech>
Date: Mon, 18 Nov 2024 11:47:43 +0100
Subject: [PATCH 14/43] adapt pgvector bench to minor version upgrades of
 PostgreSql (#9784)

## Problem

pgvector benchmark is failing because after PostgreSQL minor version
upgrade previous version packages are no longer available in deb
repository

[example
failure](https://github.com/neondatabase/neon/actions/runs/11875503070/job/33092787149#step:4:40)

## Summary of changes

Update postgres minor version of packages to current version

[Example run after this
change](https://github.com/neondatabase/neon/actions/runs/11888978279/job/33124614605)
---
 .github/workflows/benchmarking.yml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml
index 69b8bc5d70..35b2a03fff 100644
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -545,12 +545,12 @@ jobs:
         arch=$(uname -m | sed 's/x86_64/amd64/g' | sed 's/aarch64/arm64/g')
 
         cd /home/nonroot
-        wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-17/libpq5_17.0-1.pgdg110+1_${arch}.deb"
-        wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.4-1.pgdg110+2_${arch}.deb"
-        wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.4-1.pgdg110+2_${arch}.deb"
-        dpkg -x libpq5_17.0-1.pgdg110+1_${arch}.deb pg
-        dpkg -x postgresql-16_16.4-1.pgdg110+2_${arch}.deb pg
-        dpkg -x postgresql-client-16_16.4-1.pgdg110+2_${arch}.deb pg
+        wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-17/libpq5_17.1-1.pgdg110+1_${arch}.deb"
+        wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.5-1.pgdg110+1_${arch}.deb"
+        wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.5-1.pgdg110+1_${arch}.deb"
+        dpkg -x libpq5_17.1-1.pgdg110+1_${arch}.deb pg
+        dpkg -x postgresql-16_16.5-1.pgdg110+1_${arch}.deb pg
+        dpkg -x postgresql-client-16_16.5-1.pgdg110+1_${arch}.deb pg
 
         mkdir -p /tmp/neon/pg_install/v16/bin
         ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/pgbench /tmp/neon/pg_install/v16/bin/pgbench

From 3f401a328f62fcdf58424de6a2b7324068426485 Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Mon, 18 Nov 2024 11:33:27 +0000
Subject: [PATCH 15/43] tests: mitigate bug to stabilize
 test_storage_controller_many_tenants (#9771)

## Problem

Due to #9471 , the scale test occasionally gets 404s while trying to
modify the config of a timeline that belongs to a tenant being migrated.
We rarely see this narrow race in the field, but the test is quite good
at reproducing it.

## Summary of changes

- Ignore 404 errors in this test.
---
 .../performance/test_storage_controller_scale.py   | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/test_runner/performance/test_storage_controller_scale.py b/test_runner/performance/test_storage_controller_scale.py
index d2eba751f8..dc051483f8 100644
--- a/test_runner/performance/test_storage_controller_scale.py
+++ b/test_runner/performance/test_storage_controller_scale.py
@@ -16,7 +16,7 @@ from fixtures.neon_fixtures import (
     PageserverAvailability,
     PageserverSchedulingPolicy,
 )
-from fixtures.pageserver.http import PageserverHttpClient
+from fixtures.pageserver.http import PageserverApiException, PageserverHttpClient
 from fixtures.pg_version import PgVersion
 
 
@@ -273,7 +273,17 @@ def test_storage_controller_many_tenants(
             archival_state = rng.choice(
                 [TimelineArchivalState.ARCHIVED, TimelineArchivalState.UNARCHIVED]
             )
-            virtual_ps_http.timeline_archival_config(tenant_id, timeline_id, archival_state)
+            try:
+                virtual_ps_http.timeline_archival_config(tenant_id, timeline_id, archival_state)
+            except PageserverApiException as e:
+                if e.status_code == 404:
+                    # FIXME: there is an edge case where timeline ops can encounter a 404 during
+                    # a very short time window between generating a new generation number and
+                    # attaching this tenant to its new pageserver.
+                    # See https://github.com/neondatabase/neon/issues/9471
+                    pass
+                else:
+                    raise
 
         # Generate a mixture of operations and dispatch them all concurrently
         futs = []

From 913b5b7027c827217fa2b5df3ab9d0cdb94fae5c Mon Sep 17 00:00:00 2001
From: Alexander Bayandin <alexander@neon.tech>
Date: Mon, 18 Nov 2024 13:14:28 +0000
Subject: [PATCH 16/43] CI: remove separate check-build-tools-image workflow
 (#9708)

## Problem

We call `check-build-tools-image` twice for each workflow whenever we
use it, along with `build-build-tools-image`, once as a workflow itself,
and the second time from `build-build-tools-image`. This is not
necessary.

## Summary of changes
- Inline `check-build-tools-image` into `build-build-tools-image`
- Remove separate `check-build-tools-image` workflow
---
 .github/workflows/build-build-tools-image.yml | 59 +++++++++++--------
 .github/workflows/build_and_test.yml          |  8 +--
 .github/workflows/check-build-tools-image.yml | 51 ----------------
 .github/workflows/neon_extra_builds.yml       |  8 +--
 .github/workflows/pg-clients.yml              |  8 +--
 .github/workflows/pre-merge-checks.yml        |  8 +--
 .github/workflows/report-workflow-stats.yml   |  1 -
 7 files changed, 39 insertions(+), 104 deletions(-)
 delete mode 100644 .github/workflows/check-build-tools-image.yml

diff --git a/.github/workflows/build-build-tools-image.yml b/.github/workflows/build-build-tools-image.yml
index 82b065c524..9e7be76901 100644
--- a/.github/workflows/build-build-tools-image.yml
+++ b/.github/workflows/build-build-tools-image.yml
@@ -2,18 +2,13 @@ name: Build build-tools image
 
 on:
   workflow_call:
-    inputs:
-      image-tag:
-        description: "build-tools image tag"
-        required: true
-        type: string
     outputs:
       image-tag:
         description: "build-tools tag"
-        value: ${{ inputs.image-tag }}
+        value: ${{ jobs.check-image.outputs.tag }}
       image:
         description: "build-tools image"
-        value: neondatabase/build-tools:${{ inputs.image-tag }}
+        value: neondatabase/build-tools:${{ jobs.check-image.outputs.tag }}
 
 defaults:
   run:
@@ -35,7 +30,36 @@ permissions: {}
 
 jobs:
   check-image:
-    uses: ./.github/workflows/check-build-tools-image.yml
+    runs-on: ubuntu-22.04
+    outputs:
+      tag: ${{ steps.get-build-tools-tag.outputs.image-tag }}
+      found: ${{ steps.check-image.outputs.found }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Get build-tools image tag for the current commit
+        id: get-build-tools-tag
+        env:
+          IMAGE_TAG: |
+            ${{ hashFiles('build-tools.Dockerfile',
+                          '.github/workflows/build-build-tools-image.yml') }}
+        run: |
+          echo "image-tag=${IMAGE_TAG}" | tee -a $GITHUB_OUTPUT
+
+      - name: Check if such tag found in the registry
+        id: check-image
+        env:
+          IMAGE_TAG: ${{ steps.get-build-tools-tag.outputs.image-tag }}
+        run: |
+          if docker manifest inspect neondatabase/build-tools:${IMAGE_TAG}; then
+            found=true
+          else
+            found=false
+          fi
+
+          echo "found=${found}" | tee -a $GITHUB_OUTPUT
+
 
   build-image:
     needs: [ check-image ]
@@ -48,20 +72,7 @@ jobs:
 
     runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
 
-    env:
-      IMAGE_TAG: ${{ inputs.image-tag }}
-
     steps:
-      - name: Check `input.tag` is correct
-        env:
-          INPUTS_IMAGE_TAG: ${{ inputs.image-tag }}
-          CHECK_IMAGE_TAG : ${{ needs.check-image.outputs.image-tag }}
-        run: |
-          if [ "${INPUTS_IMAGE_TAG}" != "${CHECK_IMAGE_TAG}" ]; then
-            echo "'inputs.image-tag' (${INPUTS_IMAGE_TAG}) does not match the tag of the latest build-tools image 'inputs.image-tag' (${CHECK_IMAGE_TAG})"
-            exit 1
-          fi
-
       - uses: actions/checkout@v4
 
       - uses: neondatabase/dev-actions/set-docker-config-dir@6094485bf440001c94a94a3f9e221e81ff6b6193
@@ -92,10 +103,10 @@ jobs:
           cache-from: type=registry,ref=cache.neon.build/build-tools:cache-${{ matrix.debian-version }}-${{ matrix.arch }}
           cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/build-tools:cache-{0}-{1},mode=max', matrix.debian-version, matrix.arch) || '' }}
           tags: |
-            neondatabase/build-tools:${{ inputs.image-tag }}-${{ matrix.debian-version }}-${{ matrix.arch }}
+            neondatabase/build-tools:${{ needs.check-image.outputs.tag }}-${{ matrix.debian-version }}-${{ matrix.arch }}
 
   merge-images:
-    needs: [ build-image ]
+    needs: [ check-image, build-image ]
     runs-on: ubuntu-22.04
 
     steps:
@@ -107,7 +118,7 @@ jobs:
       - name: Create multi-arch image
         env:
           DEFAULT_DEBIAN_VERSION: bullseye
-          IMAGE_TAG: ${{ inputs.image-tag }}
+          IMAGE_TAG: ${{ needs.check-image.outputs.tag }}
         run: |
           for debian_version in bullseye bookworm; do
             tags=("-t" "neondatabase/build-tools:${IMAGE_TAG}-${debian_version}")
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index cc6f91d28e..89fd2d0d17 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -77,15 +77,9 @@ jobs:
         shell: bash
         id: build-tag
 
-  check-build-tools-image:
-    needs: [ check-permissions ]
-    uses: ./.github/workflows/check-build-tools-image.yml
-
   build-build-tools-image:
-    needs: [ check-build-tools-image ]
+    needs: [ check-permissions ]
     uses: ./.github/workflows/build-build-tools-image.yml
-    with:
-      image-tag: ${{ needs.check-build-tools-image.outputs.image-tag }}
     secrets: inherit
 
   check-codestyle-python:
diff --git a/.github/workflows/check-build-tools-image.yml b/.github/workflows/check-build-tools-image.yml
deleted file mode 100644
index a7a15ad58b..0000000000
--- a/.github/workflows/check-build-tools-image.yml
+++ /dev/null
@@ -1,51 +0,0 @@
-name: Check build-tools image
-
-on:
-  workflow_call:
-    outputs:
-      image-tag:
-        description: "build-tools image tag"
-        value: ${{ jobs.check-image.outputs.tag }}
-      found:
-        description: "Whether the image is found in the registry"
-        value: ${{ jobs.check-image.outputs.found }}
-
-defaults:
-  run:
-    shell: bash -euo pipefail {0}
-
-# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
-permissions: {}
-
-jobs:
-  check-image:
-    runs-on: ubuntu-22.04
-    outputs:
-      tag: ${{ steps.get-build-tools-tag.outputs.image-tag }}
-      found: ${{ steps.check-image.outputs.found }}
-
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Get build-tools image tag for the current commit
-        id: get-build-tools-tag
-        env:
-          IMAGE_TAG: |
-            ${{ hashFiles('build-tools.Dockerfile',
-                          '.github/workflows/check-build-tools-image.yml',
-                          '.github/workflows/build-build-tools-image.yml') }}
-        run: |
-          echo "image-tag=${IMAGE_TAG}" | tee -a $GITHUB_OUTPUT
-
-      - name: Check if such tag found in the registry
-        id: check-image
-        env:
-          IMAGE_TAG: ${{ steps.get-build-tools-tag.outputs.image-tag }}
-        run: |
-          if docker manifest inspect neondatabase/build-tools:${IMAGE_TAG}; then
-            found=true
-          else
-            found=false
-          fi
-
-          echo "found=${found}" | tee -a $GITHUB_OUTPUT
diff --git a/.github/workflows/neon_extra_builds.yml b/.github/workflows/neon_extra_builds.yml
index cd5a665402..e827539c80 100644
--- a/.github/workflows/neon_extra_builds.yml
+++ b/.github/workflows/neon_extra_builds.yml
@@ -26,15 +26,9 @@ jobs:
     with:
       github-event-name: ${{ github.event_name}}
 
-  check-build-tools-image:
-    needs: [ check-permissions ]
-    uses: ./.github/workflows/check-build-tools-image.yml
-
   build-build-tools-image:
-    needs: [ check-build-tools-image ]
+    needs: [ check-permissions ]
     uses: ./.github/workflows/build-build-tools-image.yml
-    with:
-      image-tag: ${{ needs.check-build-tools-image.outputs.image-tag }}
     secrets: inherit
 
   check-macos-build:
diff --git a/.github/workflows/pg-clients.yml b/.github/workflows/pg-clients.yml
index df40b5beda..4f5495cbe2 100644
--- a/.github/workflows/pg-clients.yml
+++ b/.github/workflows/pg-clients.yml
@@ -39,15 +39,9 @@ jobs:
     with:
       github-event-name: ${{ github.event_name }}
 
-  check-build-tools-image:
-    needs: [ check-permissions ]
-    uses: ./.github/workflows/check-build-tools-image.yml
-
   build-build-tools-image:
-    needs: [ check-build-tools-image ]
+    needs: [ check-permissions ]
     uses: ./.github/workflows/build-build-tools-image.yml
-    with:
-      image-tag: ${{ needs.check-build-tools-image.outputs.image-tag }}
     secrets: inherit
 
   test-logical-replication:
diff --git a/.github/workflows/pre-merge-checks.yml b/.github/workflows/pre-merge-checks.yml
index 137faa7abc..e1cec6d33d 100644
--- a/.github/workflows/pre-merge-checks.yml
+++ b/.github/workflows/pre-merge-checks.yml
@@ -34,16 +34,10 @@ jobs:
         run: |
           echo "${PYTHON_CHANGED_FILES}"
 
-  check-build-tools-image:
+  build-build-tools-image:
     if: needs.get-changed-files.outputs.python-changed == 'true'
     needs: [ get-changed-files ]
-    uses: ./.github/workflows/check-build-tools-image.yml
-
-  build-build-tools-image:
-    needs: [ check-build-tools-image ]
     uses: ./.github/workflows/build-build-tools-image.yml
-    with:
-      image-tag: ${{ needs.check-build-tools-image.outputs.image-tag }}
     secrets: inherit
 
   check-codestyle-python:
diff --git a/.github/workflows/report-workflow-stats.yml b/.github/workflows/report-workflow-stats.yml
index 0d135a257c..15e446bcd7 100644
--- a/.github/workflows/report-workflow-stats.yml
+++ b/.github/workflows/report-workflow-stats.yml
@@ -9,7 +9,6 @@ on:
     - Build and Test Locally
     - Build build-tools image
     - Check Permissions
-    - Check build-tools image
     - Check neon with extra platform builds
     - Cloud Regression Test
     - Create Release Branch

From 44f33b2bd60c1d17476376aba93e0e768deda869 Mon Sep 17 00:00:00 2001
From: Alexander Bayandin <alexander@neon.tech>
Date: Mon, 18 Nov 2024 15:06:24 +0000
Subject: [PATCH 17/43] Bump default Postgres version for tests to v17 (#9777)

## Problem
Tests that are marked with `run_only_on_default_postgres` do not run on
debug builds on CI because we run debug builds only for the latest
Postgres version (which is 17)

## Summary of changes
- Bump `PgVersion.DEFAULT` to `v17`
- Skip `test_timeline_archival_chaos` in debug builds
---
 test_runner/fixtures/pg_version.py           | 4 ++--
 test_runner/regress/test_timeline_archive.py | 5 +++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/test_runner/fixtures/pg_version.py b/test_runner/fixtures/pg_version.py
index 4feab52c43..798db1e8d9 100644
--- a/test_runner/fixtures/pg_version.py
+++ b/test_runner/fixtures/pg_version.py
@@ -22,8 +22,8 @@ class PgVersion(str, enum.Enum):
     V16 = "16"
     V17 = "17"
 
-    # Default Postgres Version for tests that don't really depend on Postgres itself
-    DEFAULT = V16
+    # Postgres Version for tests that uses `fixtures.utils.run_only_on_default_postgres`
+    DEFAULT = V17
 
     # Instead of making version an optional parameter in methods, we can use this fake entry
     # to explicitly rely on the default server version (could be different from pg_version fixture value)
diff --git a/test_runner/regress/test_timeline_archive.py b/test_runner/regress/test_timeline_archive.py
index ba4e79c343..0650f12cd1 100644
--- a/test_runner/regress/test_timeline_archive.py
+++ b/test_runner/regress/test_timeline_archive.py
@@ -23,7 +23,7 @@ from fixtures.pageserver.utils import (
 )
 from fixtures.pg_version import PgVersion
 from fixtures.remote_storage import S3Storage, s3_storage
-from fixtures.utils import run_only_on_default_postgres, wait_until
+from fixtures.utils import run_only_on_default_postgres, skip_in_debug_build, wait_until
 from mypy_boto3_s3.type_defs import (
     ObjectTypeDef,
 )
@@ -390,6 +390,7 @@ def test_timeline_offload_persist(neon_env_builder: NeonEnvBuilder, delete_timel
 
 
 @run_only_on_default_postgres("this test isn't sensitive to the contents of timelines")
+@skip_in_debug_build("times out in debug builds")
 def test_timeline_archival_chaos(neon_env_builder: NeonEnvBuilder):
     """
     A general consistency check on archival/offload timeline state, and its intersection
@@ -416,7 +417,7 @@ def test_timeline_archival_chaos(neon_env_builder: NeonEnvBuilder):
         [
             ".*error sending request.*",
             # FIXME: the pageserver should not return 500s on cancellation (https://github.com/neondatabase/neon/issues/97680)
-            ".*InternalServerError(Error deleting timeline .* on .* on .*: pageserver API: error: Cancelled",
+            ".*InternalServerError\\(Error deleting timeline .* on .* on .*: pageserver API: error: Cancelled",
         ]
     )
 

From 5f0e9c9a94c86d3ca808ee16dc5900a268c0a041 Mon Sep 17 00:00:00 2001
From: Alexey Kondratov <kondratov.aleksey@gmail.com>
Date: Mon, 18 Nov 2024 17:05:09 +0100
Subject: [PATCH 18/43] feat(compute/tests): Report successful replication test
 runs as well (#9787)

It should increase the visibility of whether they run and pass.
---
 .github/workflows/benchmarking.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml
index 35b2a03fff..68bc555982 100644
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -240,8 +240,9 @@ jobs:
       env:
         REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
 
+    # Post both success and failure to the Slack channel
     - name: Post to a Slack channel
-      if: ${{ github.event.schedule && failure() }}
+      if: ${{ github.event.schedule }}
       uses: slackapi/slack-github-action@v1
       with:
         channel-id: "C06T9AMNDQQ" # on-call-compute-staging-stream

From e5c89f3da3bdb71d35e70a488becf99bf82158ec Mon Sep 17 00:00:00 2001
From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com>
Date: Mon, 18 Nov 2024 14:27:52 -0500
Subject: [PATCH 19/43] feat(pageserver): drop disposable keys during
 gc-compaction (#9765)

close https://github.com/neondatabase/neon/issues/9552, close
https://github.com/neondatabase/neon/issues/8920, part of
https://github.com/neondatabase/neon/issues/9114

## Summary of changes

* Drop keys not belonging to this shard during gc-compaction to avoid
constructing history that might have been truncated during shard
compaction.
* Run gc-compaction at the end of shard compaction test.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
---
 pageserver/src/tenant/timeline/compaction.rs |  8 +++++++
 test_runner/regress/test_compaction.py       | 24 ++++++++++++++++++--
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs
index e6ef1aae2b..b30e380de5 100644
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -2021,6 +2021,14 @@ impl Timeline {
             if cancel.is_cancelled() {
                 return Err(anyhow!("cancelled")); // TODO: refactor to CompactionError and pass cancel error
             }
+            if self.shard_identity.is_key_disposable(&key) {
+                // If this shard does not need to store this key, simply skip it.
+                //
+                // This is not handled in the filter iterator because shard is determined by hash.
+                // Therefore, it does not give us any performance benefit to do things like skip
+                // a whole layer file as handling key spaces (ranges).
+                continue;
+            }
             if !job_desc.compaction_key_range.contains(&key) {
                 if !desc.is_delta {
                     continue;
diff --git a/test_runner/regress/test_compaction.py b/test_runner/regress/test_compaction.py
index 370df3c379..a02d0f6b98 100644
--- a/test_runner/regress/test_compaction.py
+++ b/test_runner/regress/test_compaction.py
@@ -122,10 +122,19 @@ LARGE_STRIPES = 32768
 
 
 @pytest.mark.parametrize(
-    "shard_count,stripe_size", [(None, None), (4, TINY_STRIPES), (4, LARGE_STRIPES)]
+    "shard_count,stripe_size,gc_compaction",
+    [
+        (None, None, False),
+        (4, TINY_STRIPES, False),
+        (4, LARGE_STRIPES, False),
+        (4, LARGE_STRIPES, True),
+    ],
 )
 def test_sharding_compaction(
-    neon_env_builder: NeonEnvBuilder, stripe_size: int, shard_count: Optional[int]
+    neon_env_builder: NeonEnvBuilder,
+    stripe_size: int,
+    shard_count: Optional[int],
+    gc_compaction: bool,
 ):
     """
     Use small stripes, small layers, and small compaction thresholds to exercise how compaction
@@ -217,6 +226,17 @@ def test_sharding_compaction(
     # Assert that everything is still readable
     workload.validate()
 
+    if gc_compaction:
+        # trigger gc compaction to get more coverage for that, piggyback on the existing workload
+        for shard in env.storage_controller.locate(tenant_id):
+            pageserver = env.get_pageserver(shard["node_id"])
+            tenant_shard_id = shard["shard_id"]
+            pageserver.http_client().timeline_compact(
+                tenant_shard_id,
+                timeline_id,
+                enhanced_gc_bottom_most_compaction=True,
+            )
+
 
 class CompactionAlgorithm(str, enum.Enum):
     LEGACY = "legacy"

From d7662fdc7bd3bd6b89ee27fcb0dbd468b63ee276 Mon Sep 17 00:00:00 2001
From: Vlad Lazar <vlad@neon.tech>
Date: Mon, 18 Nov 2024 20:24:03 +0000
Subject: [PATCH 20/43] feat(page_service): timeout-based batching of requests
 (#9321)

## Problem

We don't take advantage of queue depth generated by the compute
on the pageserver. We can process getpage requests more efficiently
by batching them.

## Summary of changes

Batch up incoming getpage requests that arrive within a configurable
time window (`server_side_batch_timeout`).
Then process the entire batch via one `get_vectored` timeline operation.
By default, no merging takes place.

## Testing

* **Functional**: https://github.com/neondatabase/neon/pull/9792
* **Performance**: will be done in staging/pre-prod

# Refs

* https://github.com/neondatabase/neon/issues/9377
* https://github.com/neondatabase/neon/issues/9376

Co-authored-by: Christian Schwarz <christian@neon.tech>
---
 Cargo.lock                          |   1 +
 libs/pageserver_api/src/config.rs   |   6 +
 libs/postgres_backend/src/lib.rs    |   3 +
 pageserver/Cargo.toml               |   1 +
 pageserver/src/config.rs            |   6 +
 pageserver/src/metrics.rs           |  21 +-
 pageserver/src/page_service.rs      | 626 ++++++++++++++++++++--------
 pageserver/src/pgdatadir_mapping.rs | 210 +++++++++-
 pageserver/src/walingest.rs         |  28 ++
 9 files changed, 706 insertions(+), 196 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index f92da5ec51..da8cefb219 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3642,6 +3642,7 @@ dependencies = [
  "serde_json",
  "serde_path_to_error",
  "serde_with",
+ "smallvec",
  "storage_broker",
  "strum",
  "strum_macros",
diff --git a/libs/pageserver_api/src/config.rs b/libs/pageserver_api/src/config.rs
index f48c1febb5..ee20613d6d 100644
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -109,6 +109,8 @@ pub struct ConfigToml {
     pub virtual_file_io_mode: Option<crate::models::virtual_file::IoMode>,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub no_sync: Option<bool>,
+    #[serde(with = "humantime_serde")]
+    pub server_side_batch_timeout: Option<Duration>,
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
@@ -317,6 +319,8 @@ pub mod defaults {
     pub const DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB: usize = 0;
 
     pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 512;
+
+    pub const DEFAULT_SERVER_SIDE_BATCH_TIMEOUT: Option<&str> = None;
 }
 
 impl Default for ConfigToml {
@@ -397,6 +401,8 @@ impl Default for ConfigToml {
             ephemeral_bytes_per_memory_kb: (DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
             l0_flush: None,
             virtual_file_io_mode: None,
+            server_side_batch_timeout: DEFAULT_SERVER_SIDE_BATCH_TIMEOUT
+                .map(|duration| humantime::parse_duration(duration).unwrap()),
             tenant_config: TenantConfigToml::default(),
             no_sync: None,
         }
diff --git a/libs/postgres_backend/src/lib.rs b/libs/postgres_backend/src/lib.rs
index 7419798a60..9075a019b4 100644
--- a/libs/postgres_backend/src/lib.rs
+++ b/libs/postgres_backend/src/lib.rs
@@ -716,6 +716,9 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
         Ok(())
     }
 
+    // Proto looks like this:
+    // FeMessage::Query("pagestream_v2{FeMessage::CopyData(PagesetreamFeMessage::GetPage(..))}")
+
     async fn process_message(
         &mut self,
         handler: &mut impl Handler<IO>,
diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml
index ecb8fa7491..143d8236df 100644
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -84,6 +84,7 @@ enumset = { workspace = true, features = ["serde"]}
 strum.workspace = true
 strum_macros.workspace = true
 wal_decoder.workspace = true
+smallvec.workspace = true
 
 [target.'cfg(target_os = "linux")'.dependencies]
 procfs.workspace = true
diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs
index b694a43599..f7be6ecaab 100644
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -182,6 +182,10 @@ pub struct PageServerConf {
 
     /// Optionally disable disk syncs (unsafe!)
     pub no_sync: bool,
+
+    /// Maximum amount of time for which a get page request request
+    /// might be held up for request merging.
+    pub server_side_batch_timeout: Option<Duration>,
 }
 
 /// Token for authentication to safekeepers
@@ -336,6 +340,7 @@ impl PageServerConf {
             concurrent_tenant_warmup,
             concurrent_tenant_size_logical_size_queries,
             virtual_file_io_engine,
+            server_side_batch_timeout,
             tenant_config,
             no_sync,
         } = config_toml;
@@ -377,6 +382,7 @@ impl PageServerConf {
             image_compression,
             timeline_offloading,
             ephemeral_bytes_per_memory_kb,
+            server_side_batch_timeout,
 
             // ------------------------------------------------------------
             // fields that require additional validation or custom handling
diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs
index 1473729186..3cdc2a761e 100644
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -1187,6 +1187,7 @@ struct GlobalAndPerTimelineHistogramTimer<'a, 'c> {
     ctx: &'c RequestContext,
     start: std::time::Instant,
     op: SmgrQueryType,
+    count: usize,
 }
 
 impl Drop for GlobalAndPerTimelineHistogramTimer<'_, '_> {
@@ -1214,10 +1215,13 @@ impl Drop for GlobalAndPerTimelineHistogramTimer<'_, '_> {
                 elapsed
             }
         };
-        self.global_latency_histo
-            .observe(ex_throttled.as_secs_f64());
-        if let Some(per_timeline_getpage_histo) = self.per_timeline_latency_histo {
-            per_timeline_getpage_histo.observe(ex_throttled.as_secs_f64());
+
+        for _ in 0..self.count {
+            self.global_latency_histo
+                .observe(ex_throttled.as_secs_f64());
+            if let Some(per_timeline_getpage_histo) = self.per_timeline_latency_histo {
+                per_timeline_getpage_histo.observe(ex_throttled.as_secs_f64());
+            }
         }
     }
 }
@@ -1385,6 +1389,14 @@ impl SmgrQueryTimePerTimeline {
         &'a self,
         op: SmgrQueryType,
         ctx: &'c RequestContext,
+    ) -> Option<impl Drop + 'a> {
+        self.start_timer_many(op, 1, ctx)
+    }
+    pub(crate) fn start_timer_many<'c: 'a, 'a>(
+        &'a self,
+        op: SmgrQueryType,
+        count: usize,
+        ctx: &'c RequestContext,
     ) -> Option<impl Drop + 'a> {
         let start = Instant::now();
 
@@ -1422,6 +1434,7 @@ impl SmgrQueryTimePerTimeline {
             ctx,
             start,
             op,
+            count,
         })
     }
 }
diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs
index f07474df6a..a429dff1fd 100644
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -7,13 +7,13 @@ use bytes::Buf;
 use futures::FutureExt;
 use itertools::Itertools;
 use once_cell::sync::OnceCell;
-use pageserver_api::models::TenantState;
+use pageserver_api::models::{self, TenantState};
 use pageserver_api::models::{
     PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse,
     PagestreamErrorResponse, PagestreamExistsRequest, PagestreamExistsResponse,
-    PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetPageResponse,
-    PagestreamGetSlruSegmentRequest, PagestreamGetSlruSegmentResponse, PagestreamNblocksRequest,
-    PagestreamNblocksResponse, PagestreamProtocolVersion,
+    PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetSlruSegmentRequest,
+    PagestreamGetSlruSegmentResponse, PagestreamNblocksRequest, PagestreamNblocksResponse,
+    PagestreamProtocolVersion,
 };
 use pageserver_api::shard::TenantShardId;
 use postgres_backend::{is_expected_io_error, AuthType, PostgresBackend, QueryError};
@@ -44,7 +44,7 @@ use crate::basebackup;
 use crate::basebackup::BasebackupError;
 use crate::config::PageServerConf;
 use crate::context::{DownloadBehavior, RequestContext};
-use crate::metrics;
+use crate::metrics::{self};
 use crate::metrics::{ComputeCommandKind, COMPUTE_COMMANDS_COUNTERS, LIVE_CONNECTIONS};
 use crate::pgdatadir_mapping::Version;
 use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
@@ -59,7 +59,7 @@ use crate::tenant::GetTimelineError;
 use crate::tenant::PageReconstructError;
 use crate::tenant::Timeline;
 use pageserver_api::key::rel_block_to_key;
-use pageserver_api::reltag::SlruKind;
+use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
 use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
 use postgres_ffi::BLCKSZ;
 
@@ -105,6 +105,7 @@ pub fn spawn(
             pg_auth,
             tcp_listener,
             conf.pg_auth_type,
+            conf.server_side_batch_timeout,
             libpq_ctx,
             cancel.clone(),
         )
@@ -153,6 +154,7 @@ pub async fn libpq_listener_main(
     auth: Option<Arc<SwappableJwtAuth>>,
     listener: tokio::net::TcpListener,
     auth_type: AuthType,
+    server_side_batch_timeout: Option<Duration>,
     listener_ctx: RequestContext,
     listener_cancel: CancellationToken,
 ) -> Connections {
@@ -183,6 +185,7 @@ pub async fn libpq_listener_main(
                     local_auth,
                     socket,
                     auth_type,
+                    server_side_batch_timeout,
                     connection_ctx,
                     connections_cancel.child_token(),
                 ));
@@ -210,6 +213,7 @@ async fn page_service_conn_main(
     auth: Option<Arc<SwappableJwtAuth>>,
     socket: tokio::net::TcpStream,
     auth_type: AuthType,
+    server_side_batch_timeout: Option<Duration>,
     connection_ctx: RequestContext,
     cancel: CancellationToken,
 ) -> ConnectionHandlerResult {
@@ -260,8 +264,13 @@ async fn page_service_conn_main(
     // and create a child per-query context when it invokes process_query.
     // But it's in a shared crate, so, we store connection_ctx inside PageServerHandler
     // and create the per-query context in process_query ourselves.
-    let mut conn_handler =
-        PageServerHandler::new(tenant_manager, auth, connection_ctx, cancel.clone());
+    let mut conn_handler = PageServerHandler::new(
+        tenant_manager,
+        auth,
+        server_side_batch_timeout,
+        connection_ctx,
+        cancel.clone(),
+    );
     let pgbackend = PostgresBackend::new_from_io(socket, peer_addr, auth_type, None)?;
 
     match pgbackend.run(&mut conn_handler, &cancel).await {
@@ -304,6 +313,12 @@ struct PageServerHandler {
     cancel: CancellationToken,
 
     timeline_handles: TimelineHandles,
+
+    /// Messages queued up for the next processing batch
+    next_batch: Option<BatchedFeMessage>,
+
+    /// See [`PageServerConf::server_side_batch_timeout`]
+    server_side_batch_timeout: Option<Duration>,
 }
 
 struct TimelineHandles {
@@ -517,10 +532,47 @@ impl From<WaitLsnError> for QueryError {
     }
 }
 
+enum BatchedFeMessage {
+    Exists {
+        span: Span,
+        req: models::PagestreamExistsRequest,
+    },
+    Nblocks {
+        span: Span,
+        req: models::PagestreamNblocksRequest,
+    },
+    GetPage {
+        span: Span,
+        shard: timeline::handle::Handle<TenantManagerTypes>,
+        effective_request_lsn: Lsn,
+        pages: smallvec::SmallVec<[(RelTag, BlockNumber); 1]>,
+    },
+    DbSize {
+        span: Span,
+        req: models::PagestreamDbSizeRequest,
+    },
+    GetSlruSegment {
+        span: Span,
+        req: models::PagestreamGetSlruSegmentRequest,
+    },
+    RespondError {
+        span: Span,
+        error: PageStreamError,
+    },
+}
+
+enum BatchOrEof {
+    /// In the common case, this has one entry.
+    /// At most, it has two entries: the first is the leftover batch, the second is an error.
+    Batch(smallvec::SmallVec<[BatchedFeMessage; 1]>),
+    Eof,
+}
+
 impl PageServerHandler {
     pub fn new(
         tenant_manager: Arc<TenantManager>,
         auth: Option<Arc<SwappableJwtAuth>>,
+        server_side_batch_timeout: Option<Duration>,
         connection_ctx: RequestContext,
         cancel: CancellationToken,
     ) -> Self {
@@ -530,6 +582,8 @@ impl PageServerHandler {
             connection_ctx,
             timeline_handles: TimelineHandles::new(tenant_manager),
             cancel,
+            next_batch: None,
+            server_side_batch_timeout,
         }
     }
 
@@ -557,6 +611,221 @@ impl PageServerHandler {
         )
     }
 
+    async fn read_batch_from_connection<IO>(
+        &mut self,
+        pgb: &mut PostgresBackend<IO>,
+        tenant_id: &TenantId,
+        timeline_id: &TimelineId,
+        ctx: &RequestContext,
+    ) -> Result<Option<BatchOrEof>, QueryError>
+    where
+        IO: AsyncRead + AsyncWrite + Send + Sync + Unpin,
+    {
+        let mut batch = self.next_batch.take();
+        let mut batch_started_at: Option<std::time::Instant> = None;
+
+        let next_batch: Option<BatchedFeMessage> = loop {
+            let sleep_fut = match (self.server_side_batch_timeout, batch_started_at) {
+                (Some(batch_timeout), Some(started_at)) => futures::future::Either::Left(
+                    tokio::time::sleep_until((started_at + batch_timeout).into()),
+                ),
+                _ => futures::future::Either::Right(futures::future::pending()),
+            };
+
+            let msg = tokio::select! {
+                biased;
+                _ = self.cancel.cancelled() => {
+                    return Err(QueryError::Shutdown)
+                }
+                msg = pgb.read_message() => {
+                    msg
+                }
+                _ = sleep_fut => {
+                    assert!(batch.is_some());
+                    break None;
+                }
+            };
+            let copy_data_bytes = match msg? {
+                Some(FeMessage::CopyData(bytes)) => bytes,
+                Some(FeMessage::Terminate) => {
+                    return Ok(Some(BatchOrEof::Eof));
+                }
+                Some(m) => {
+                    return Err(QueryError::Other(anyhow::anyhow!(
+                        "unexpected message: {m:?} during COPY"
+                    )));
+                }
+                None => {
+                    return Ok(Some(BatchOrEof::Eof));
+                } // client disconnected
+            };
+            trace!("query: {copy_data_bytes:?}");
+            fail::fail_point!("ps::handle-pagerequest-message");
+
+            // parse request
+            let neon_fe_msg = PagestreamFeMessage::parse(&mut copy_data_bytes.reader())?;
+
+            let this_msg = match neon_fe_msg {
+                PagestreamFeMessage::Exists(req) => BatchedFeMessage::Exists {
+                    span: tracing::info_span!("handle_get_rel_exists_request", rel = %req.rel, req_lsn = %req.request_lsn),
+                    req,
+                },
+                PagestreamFeMessage::Nblocks(req) => BatchedFeMessage::Nblocks {
+                    span: tracing::info_span!("handle_get_nblocks_request", rel = %req.rel, req_lsn = %req.request_lsn),
+                    req,
+                },
+                PagestreamFeMessage::DbSize(req) => BatchedFeMessage::DbSize {
+                    span: tracing::info_span!("handle_db_size_request", dbnode = %req.dbnode, req_lsn = %req.request_lsn),
+                    req,
+                },
+                PagestreamFeMessage::GetSlruSegment(req) => BatchedFeMessage::GetSlruSegment {
+                    span: tracing::info_span!("handle_get_slru_segment_request", kind = %req.kind, segno = %req.segno, req_lsn = %req.request_lsn),
+                    req,
+                },
+                PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
+                    request_lsn,
+                    not_modified_since,
+                    rel,
+                    blkno,
+                }) => {
+                    // shard_id is filled in by the handler
+                    let span = tracing::info_span!(
+                        "handle_get_page_at_lsn_request_batched",
+                        %tenant_id, %timeline_id, shard_id = tracing::field::Empty, req_lsn = %request_lsn,
+                        batch_size = tracing::field::Empty, batch_id = tracing::field::Empty
+                    );
+
+                    macro_rules! current_batch_and_error {
+                        ($error:expr) => {{
+                            let error = BatchedFeMessage::RespondError {
+                                span,
+                                error: $error,
+                            };
+                            let batch_and_error = match batch {
+                                Some(b) => smallvec::smallvec![b, error],
+                                None => smallvec::smallvec![error],
+                            };
+                            Ok(Some(BatchOrEof::Batch(batch_and_error)))
+                        }};
+                    }
+
+                    let key = rel_block_to_key(rel, blkno);
+                    let shard = match self
+                        .timeline_handles
+                        .get(*tenant_id, *timeline_id, ShardSelector::Page(key))
+                        .instrument(span.clone())
+                        .await
+                    {
+                        Ok(tl) => tl,
+                        Err(GetActiveTimelineError::Tenant(GetActiveTenantError::NotFound(_))) => {
+                            // We already know this tenant exists in general, because we resolved it at
+                            // start of connection.  Getting a NotFound here indicates that the shard containing
+                            // the requested page is not present on this node: the client's knowledge of shard->pageserver
+                            // mapping is out of date.
+                            //
+                            // Closing the connection by returning ``::Reconnect` has the side effect of rate-limiting above message, via
+                            // client's reconnect backoff, as well as hopefully prompting the client to load its updated configuration
+                            // and talk to a different pageserver.
+                            return current_batch_and_error!(PageStreamError::Reconnect(
+                                "getpage@lsn request routed to wrong shard".into()
+                            ));
+                        }
+                        Err(e) => {
+                            return current_batch_and_error!(e.into());
+                        }
+                    };
+                    let effective_request_lsn = match Self::wait_or_get_last_lsn(
+                        &shard,
+                        request_lsn,
+                        not_modified_since,
+                        &shard.get_latest_gc_cutoff_lsn(),
+                        ctx,
+                    )
+                    // TODO: if we actually need to wait for lsn here, it delays the entire batch which doesn't need to wait
+                    .await
+                    {
+                        Ok(lsn) => lsn,
+                        Err(e) => {
+                            return current_batch_and_error!(e);
+                        }
+                    };
+                    BatchedFeMessage::GetPage {
+                        span,
+                        shard,
+                        effective_request_lsn,
+                        pages: smallvec::smallvec![(rel, blkno)],
+                    }
+                }
+            };
+
+            let batch_timeout = match self.server_side_batch_timeout {
+                Some(value) => value,
+                None => {
+                    // Batching is not enabled - stop on the first message.
+                    return Ok(Some(BatchOrEof::Batch(smallvec::smallvec![this_msg])));
+                }
+            };
+
+            // check if we can batch
+            match (&mut batch, this_msg) {
+                (None, this_msg) => {
+                    batch = Some(this_msg);
+                }
+                (
+                    Some(BatchedFeMessage::GetPage {
+                        span: _,
+                        shard: accum_shard,
+                        pages: accum_pages,
+                        effective_request_lsn: accum_lsn,
+                    }),
+                    BatchedFeMessage::GetPage {
+                        span: _,
+                        shard: this_shard,
+                        pages: this_pages,
+                        effective_request_lsn: this_lsn,
+                    },
+                ) if async {
+                    assert_eq!(this_pages.len(), 1);
+                    if accum_pages.len() >= Timeline::MAX_GET_VECTORED_KEYS as usize {
+                        assert_eq!(accum_pages.len(), Timeline::MAX_GET_VECTORED_KEYS as usize);
+                        return false;
+                    }
+                    if (accum_shard.tenant_shard_id, accum_shard.timeline_id)
+                        != (this_shard.tenant_shard_id, this_shard.timeline_id)
+                    {
+                        // TODO: we _could_ batch & execute each shard seperately (and in parallel).
+                        // But the current logic for keeping responses in order does not support that.
+                        return false;
+                    }
+                    // the vectored get currently only supports a single LSN, so, bounce as soon
+                    // as the effective request_lsn changes
+                    if *accum_lsn != this_lsn {
+                        return false;
+                    }
+                    true
+                }
+                .await =>
+                {
+                    // ok to batch
+                    accum_pages.extend(this_pages);
+                }
+                (Some(_), this_msg) => {
+                    // by default, don't continue batching
+                    break Some(this_msg);
+                }
+            }
+
+            // batching impl piece
+            let started_at = batch_started_at.get_or_insert_with(Instant::now);
+            if started_at.elapsed() > batch_timeout {
+                break None;
+            }
+        };
+
+        self.next_batch = next_batch;
+        Ok(batch.map(|b| BatchOrEof::Batch(smallvec::smallvec![b])))
+    }
+
     /// Pagestream sub-protocol handler.
     ///
     /// It is a simple request-response protocol inside a COPYBOTH session.
@@ -592,133 +861,165 @@ impl PageServerHandler {
             }
         }
 
+        // If [`PageServerHandler`] is reused for multiple pagestreams,
+        // then make sure to not process requests from the previous ones.
+        self.next_batch = None;
+
         loop {
-            // read request bytes (it's exactly 1 PagestreamFeMessage per CopyData)
-            let msg = tokio::select! {
-                biased;
-                _ = self.cancel.cancelled() => {
-                    return Err(QueryError::Shutdown)
+            let maybe_batched = self
+                .read_batch_from_connection(pgb, &tenant_id, &timeline_id, &ctx)
+                .await?;
+            let batched = match maybe_batched {
+                Some(BatchOrEof::Batch(b)) => b,
+                Some(BatchOrEof::Eof) => {
+                    break;
                 }
-                msg = pgb.read_message() => { msg }
-            };
-            let copy_data_bytes = match msg? {
-                Some(FeMessage::CopyData(bytes)) => bytes,
-                Some(FeMessage::Terminate) => break,
-                Some(m) => {
-                    return Err(QueryError::Other(anyhow::anyhow!(
-                        "unexpected message: {m:?} during COPY"
-                    )));
-                }
-                None => break, // client disconnected
-            };
-
-            trace!("query: {copy_data_bytes:?}");
-            fail::fail_point!("ps::handle-pagerequest-message");
-
-            // parse request
-            let neon_fe_msg = PagestreamFeMessage::parse(&mut copy_data_bytes.reader())?;
-
-            // invoke handler function
-            let (handler_result, span) = match neon_fe_msg {
-                PagestreamFeMessage::Exists(req) => {
-                    fail::fail_point!("ps::handle-pagerequest-message::exists");
-                    let span = tracing::info_span!("handle_get_rel_exists_request", rel = %req.rel, req_lsn = %req.request_lsn);
-                    (
-                        self.handle_get_rel_exists_request(tenant_id, timeline_id, &req, &ctx)
-                            .instrument(span.clone())
-                            .await,
-                        span,
-                    )
-                }
-                PagestreamFeMessage::Nblocks(req) => {
-                    fail::fail_point!("ps::handle-pagerequest-message::nblocks");
-                    let span = tracing::info_span!("handle_get_nblocks_request", rel = %req.rel, req_lsn = %req.request_lsn);
-                    (
-                        self.handle_get_nblocks_request(tenant_id, timeline_id, &req, &ctx)
-                            .instrument(span.clone())
-                            .await,
-                        span,
-                    )
-                }
-                PagestreamFeMessage::GetPage(req) => {
-                    fail::fail_point!("ps::handle-pagerequest-message::getpage");
-                    // shard_id is filled in by the handler
-                    let span = tracing::info_span!("handle_get_page_at_lsn_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.request_lsn);
-                    (
-                        self.handle_get_page_at_lsn_request(tenant_id, timeline_id, &req, &ctx)
-                            .instrument(span.clone())
-                            .await,
-                        span,
-                    )
-                }
-                PagestreamFeMessage::DbSize(req) => {
-                    fail::fail_point!("ps::handle-pagerequest-message::dbsize");
-                    let span = tracing::info_span!("handle_db_size_request", dbnode = %req.dbnode, req_lsn = %req.request_lsn);
-                    (
-                        self.handle_db_size_request(tenant_id, timeline_id, &req, &ctx)
-                            .instrument(span.clone())
-                            .await,
-                        span,
-                    )
-                }
-                PagestreamFeMessage::GetSlruSegment(req) => {
-                    fail::fail_point!("ps::handle-pagerequest-message::slrusegment");
-                    let span = tracing::info_span!("handle_get_slru_segment_request", kind = %req.kind, segno = %req.segno, req_lsn = %req.request_lsn);
-                    (
-                        self.handle_get_slru_segment_request(tenant_id, timeline_id, &req, &ctx)
-                            .instrument(span.clone())
-                            .await,
-                        span,
-                    )
+                None => {
+                    continue;
                 }
             };
 
-            // Map handler result to protocol behavior.
-            // Some handler errors cause exit from pagestream protocol.
-            // Other handler errors are sent back as an error message and we stay in pagestream protocol.
-            let response_msg = match handler_result {
-                Err(e) => match &e {
-                    PageStreamError::Shutdown => {
-                        // If we fail to fulfil a request during shutdown, which may be _because_ of
-                        // shutdown, then do not send the error to the client.  Instead just drop the
-                        // connection.
-                        span.in_scope(|| info!("dropping connection due to shutdown"));
-                        return Err(QueryError::Shutdown);
+            for batch in batched {
+                // invoke handler function
+                let (handler_results, span): (
+                    Vec<Result<PagestreamBeMessage, PageStreamError>>,
+                    _,
+                ) = match batch {
+                    BatchedFeMessage::Exists { span, req } => {
+                        fail::fail_point!("ps::handle-pagerequest-message::exists");
+                        (
+                            vec![
+                                self.handle_get_rel_exists_request(
+                                    tenant_id,
+                                    timeline_id,
+                                    &req,
+                                    &ctx,
+                                )
+                                .instrument(span.clone())
+                                .await,
+                            ],
+                            span,
+                        )
                     }
-                    PageStreamError::Reconnect(reason) => {
-                        span.in_scope(|| info!("handler requested reconnect: {reason}"));
-                        return Err(QueryError::Reconnect);
+                    BatchedFeMessage::Nblocks { span, req } => {
+                        fail::fail_point!("ps::handle-pagerequest-message::nblocks");
+                        (
+                            vec![
+                                self.handle_get_nblocks_request(tenant_id, timeline_id, &req, &ctx)
+                                    .instrument(span.clone())
+                                    .await,
+                            ],
+                            span,
+                        )
                     }
-                    PageStreamError::Read(_)
-                    | PageStreamError::LsnTimeout(_)
-                    | PageStreamError::NotFound(_)
-                    | PageStreamError::BadRequest(_) => {
-                        // print the all details to the log with {:#}, but for the client the
-                        // error message is enough.  Do not log if shutting down, as the anyhow::Error
-                        // here includes cancellation which is not an error.
-                        let full = utils::error::report_compact_sources(&e);
-                        span.in_scope(|| {
-                            error!("error reading relation or page version: {full:#}")
-                        });
-                        PagestreamBeMessage::Error(PagestreamErrorResponse {
-                            message: e.to_string(),
-                        })
+                    BatchedFeMessage::GetPage {
+                        span,
+                        shard,
+                        effective_request_lsn,
+                        pages,
+                    } => {
+                        fail::fail_point!("ps::handle-pagerequest-message::getpage");
+                        (
+                            {
+                                let npages = pages.len();
+                                let res = self
+                                    .handle_get_page_at_lsn_request_batched(
+                                        &shard,
+                                        effective_request_lsn,
+                                        pages,
+                                        &ctx,
+                                    )
+                                    .instrument(span.clone())
+                                    .await;
+                                assert_eq!(res.len(), npages);
+                                res
+                            },
+                            span,
+                        )
                     }
-                },
-                Ok(response_msg) => response_msg,
-            };
+                    BatchedFeMessage::DbSize { span, req } => {
+                        fail::fail_point!("ps::handle-pagerequest-message::dbsize");
+                        (
+                            vec![
+                                self.handle_db_size_request(tenant_id, timeline_id, &req, &ctx)
+                                    .instrument(span.clone())
+                                    .await,
+                            ],
+                            span,
+                        )
+                    }
+                    BatchedFeMessage::GetSlruSegment { span, req } => {
+                        fail::fail_point!("ps::handle-pagerequest-message::slrusegment");
+                        (
+                            vec![
+                                self.handle_get_slru_segment_request(
+                                    tenant_id,
+                                    timeline_id,
+                                    &req,
+                                    &ctx,
+                                )
+                                .instrument(span.clone())
+                                .await,
+                            ],
+                            span,
+                        )
+                    }
+                    BatchedFeMessage::RespondError { span, error } => {
+                        // We've already decided to respond with an error, so we don't need to
+                        // call the handler.
+                        (vec![Err(error)], span)
+                    }
+                };
 
-            // marshal & transmit response message
-            pgb.write_message_noflush(&BeMessage::CopyData(&response_msg.serialize()))?;
-            tokio::select! {
-                biased;
-                _ = self.cancel.cancelled() => {
-                    // We were requested to shut down.
-                    info!("shutdown request received in page handler");
-                    return Err(QueryError::Shutdown)
+                // Map handler result to protocol behavior.
+                // Some handler errors cause exit from pagestream protocol.
+                // Other handler errors are sent back as an error message and we stay in pagestream protocol.
+                for handler_result in handler_results {
+                    let response_msg = match handler_result {
+                        Err(e) => match &e {
+                            PageStreamError::Shutdown => {
+                                // If we fail to fulfil a request during shutdown, which may be _because_ of
+                                // shutdown, then do not send the error to the client.  Instead just drop the
+                                // connection.
+                                span.in_scope(|| info!("dropping connection due to shutdown"));
+                                return Err(QueryError::Shutdown);
+                            }
+                            PageStreamError::Reconnect(reason) => {
+                                span.in_scope(|| info!("handler requested reconnect: {reason}"));
+                                return Err(QueryError::Reconnect);
+                            }
+                            PageStreamError::Read(_)
+                            | PageStreamError::LsnTimeout(_)
+                            | PageStreamError::NotFound(_)
+                            | PageStreamError::BadRequest(_) => {
+                                // print the all details to the log with {:#}, but for the client the
+                                // error message is enough.  Do not log if shutting down, as the anyhow::Error
+                                // here includes cancellation which is not an error.
+                                let full = utils::error::report_compact_sources(&e);
+                                span.in_scope(|| {
+                                    error!("error reading relation or page version: {full:#}")
+                                });
+                                PagestreamBeMessage::Error(PagestreamErrorResponse {
+                                    message: e.to_string(),
+                                })
+                            }
+                        },
+                        Ok(response_msg) => response_msg,
+                    };
+
+                    // marshal & transmit response message
+                    pgb.write_message_noflush(&BeMessage::CopyData(&response_msg.serialize()))?;
                 }
-                res = pgb.flush() => {
-                    res?;
+                tokio::select! {
+                    biased;
+                    _ = self.cancel.cancelled() => {
+                        // We were requested to shut down.
+                        info!("shutdown request received in page handler");
+                        return Err(QueryError::Shutdown)
+                    }
+                    res = pgb.flush() => {
+                        res?;
+                    }
                 }
             }
         }
@@ -964,60 +1265,30 @@ impl PageServerHandler {
         }))
     }
 
-    #[instrument(skip_all, fields(shard_id))]
-    async fn handle_get_page_at_lsn_request(
+    #[instrument(skip_all)]
+    async fn handle_get_page_at_lsn_request_batched(
         &mut self,
-        tenant_id: TenantId,
-        timeline_id: TimelineId,
-        req: &PagestreamGetPageRequest,
+        timeline: &Timeline,
+        effective_lsn: Lsn,
+        pages: smallvec::SmallVec<[(RelTag, BlockNumber); 1]>,
         ctx: &RequestContext,
-    ) -> Result<PagestreamBeMessage, PageStreamError> {
-        let timeline = match self
-            .timeline_handles
-            .get(
-                tenant_id,
-                timeline_id,
-                ShardSelector::Page(rel_block_to_key(req.rel, req.blkno)),
-            )
-            .await
-        {
-            Ok(tl) => tl,
-            Err(GetActiveTimelineError::Tenant(GetActiveTenantError::NotFound(_))) => {
-                // We already know this tenant exists in general, because we resolved it at
-                // start of connection.  Getting a NotFound here indicates that the shard containing
-                // the requested page is not present on this node: the client's knowledge of shard->pageserver
-                // mapping is out of date.
-                //
-                // Closing the connection by returning ``::Reconnect` has the side effect of rate-limiting above message, via
-                // client's reconnect backoff, as well as hopefully prompting the client to load its updated configuration
-                // and talk to a different pageserver.
-                return Err(PageStreamError::Reconnect(
-                    "getpage@lsn request routed to wrong shard".into(),
-                ));
-            }
-            Err(e) => return Err(e.into()),
-        };
-
-        let _timer = timeline
-            .query_metrics
-            .start_timer(metrics::SmgrQueryType::GetPageAtLsn, ctx);
-
-        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
-        let lsn = Self::wait_or_get_last_lsn(
-            &timeline,
-            req.request_lsn,
-            req.not_modified_since,
-            &latest_gc_cutoff_lsn,
+    ) -> Vec<Result<PagestreamBeMessage, PageStreamError>> {
+        debug_assert_current_span_has_tenant_and_timeline_id();
+        let _timer = timeline.query_metrics.start_timer_many(
+            metrics::SmgrQueryType::GetPageAtLsn,
+            pages.len(),
             ctx,
-        )
-        .await?;
+        );
 
-        let page = timeline
-            .get_rel_page_at_lsn(req.rel, req.blkno, Version::Lsn(lsn), ctx)
-            .await?;
+        let pages = timeline
+            .get_rel_page_at_lsn_batched(pages, effective_lsn, ctx)
+            .await;
 
-        Ok(PagestreamBeMessage::GetPage(PagestreamGetPageResponse {
-            page,
+        Vec::from_iter(pages.into_iter().map(|page| {
+            page.map(|page| {
+                PagestreamBeMessage::GetPage(models::PagestreamGetPageResponse { page })
+            })
+            .map_err(PageStreamError::from)
         }))
     }
 
@@ -1674,6 +1945,13 @@ fn set_tracing_field_shard_id(timeline: &Timeline) {
     debug_assert_current_span_has_tenant_and_timeline_id();
 }
 
+struct WaitedForLsn(Lsn);
+impl From<WaitedForLsn> for Lsn {
+    fn from(WaitedForLsn(lsn): WaitedForLsn) -> Self {
+        lsn
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use utils::shard::ShardCount;
diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs
index 7c1abbf3e2..5995d1cc57 100644
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -10,10 +10,15 @@ use super::tenant::{PageReconstructError, Timeline};
 use crate::aux_file;
 use crate::context::RequestContext;
 use crate::keyspace::{KeySpace, KeySpaceAccum};
-use crate::span::debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id;
+use crate::span::{
+    debug_assert_current_span_has_tenant_and_timeline_id,
+    debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id,
+};
+use crate::tenant::timeline::GetVectoredError;
 use anyhow::{ensure, Context};
 use bytes::{Buf, Bytes, BytesMut};
 use enum_map::Enum;
+use itertools::Itertools;
 use pageserver_api::key::Key;
 use pageserver_api::key::{
     dbdir_key_range, rel_block_to_key, rel_dir_to_key, rel_key_range, rel_size_to_key,
@@ -30,7 +35,7 @@ use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
 use postgres_ffi::BLCKSZ;
 use postgres_ffi::{Oid, RepOriginId, TimestampTz, TransactionId};
 use serde::{Deserialize, Serialize};
-use std::collections::{hash_map, HashMap, HashSet};
+use std::collections::{hash_map, BTreeMap, HashMap, HashSet};
 use std::ops::ControlFlow;
 use std::ops::Range;
 use strum::IntoEnumIterator;
@@ -193,26 +198,195 @@ impl Timeline {
         version: Version<'_>,
         ctx: &RequestContext,
     ) -> Result<Bytes, PageReconstructError> {
-        if tag.relnode == 0 {
-            return Err(PageReconstructError::Other(
-                RelationError::InvalidRelnode.into(),
-            ));
+        match version {
+            Version::Lsn(effective_lsn) => {
+                let pages = smallvec::smallvec![(tag, blknum)];
+                let res = self
+                    .get_rel_page_at_lsn_batched(pages, effective_lsn, ctx)
+                    .await;
+                assert_eq!(res.len(), 1);
+                res.into_iter().next().unwrap()
+            }
+            Version::Modified(modification) => {
+                if tag.relnode == 0 {
+                    return Err(PageReconstructError::Other(
+                        RelationError::InvalidRelnode.into(),
+                    ));
+                }
+
+                let nblocks = self.get_rel_size(tag, version, ctx).await?;
+                if blknum >= nblocks {
+                    debug!(
+                        "read beyond EOF at {} blk {} at {}, size is {}: returning all-zeros page",
+                        tag,
+                        blknum,
+                        version.get_lsn(),
+                        nblocks
+                    );
+                    return Ok(ZERO_PAGE.clone());
+                }
+
+                let key = rel_block_to_key(tag, blknum);
+                modification.get(key, ctx).await
+            }
+        }
+    }
+
+    /// Like [`Self::get_rel_page_at_lsn`], but returns a batch of pages.
+    ///
+    /// The ordering of the returned vec corresponds to the ordering of `pages`.
+    pub(crate) async fn get_rel_page_at_lsn_batched(
+        &self,
+        pages: smallvec::SmallVec<[(RelTag, BlockNumber); 1]>,
+        effective_lsn: Lsn,
+        ctx: &RequestContext,
+    ) -> Vec<Result<Bytes, PageReconstructError>> {
+        debug_assert_current_span_has_tenant_and_timeline_id();
+
+        let mut slots_filled = 0;
+        let page_count = pages.len();
+
+        // Would be nice to use smallvec here but it doesn't provide the spare_capacity_mut() API.
+        let mut result = Vec::with_capacity(pages.len());
+        let result_slots = result.spare_capacity_mut();
+
+        let mut keys_slots: BTreeMap<Key, smallvec::SmallVec<[usize; 1]>> = BTreeMap::default();
+        for (response_slot_idx, (tag, blknum)) in pages.into_iter().enumerate() {
+            if tag.relnode == 0 {
+                result_slots[response_slot_idx].write(Err(PageReconstructError::Other(
+                    RelationError::InvalidRelnode.into(),
+                )));
+
+                slots_filled += 1;
+                continue;
+            }
+
+            let nblocks = match self
+                .get_rel_size(tag, Version::Lsn(effective_lsn), ctx)
+                .await
+            {
+                Ok(nblocks) => nblocks,
+                Err(err) => {
+                    result_slots[response_slot_idx].write(Err(err));
+                    slots_filled += 1;
+                    continue;
+                }
+            };
+
+            if blknum >= nblocks {
+                debug!(
+                    "read beyond EOF at {} blk {} at {}, size is {}: returning all-zeros page",
+                    tag, blknum, effective_lsn, nblocks
+                );
+                result_slots[response_slot_idx].write(Ok(ZERO_PAGE.clone()));
+                slots_filled += 1;
+                continue;
+            }
+
+            let key = rel_block_to_key(tag, blknum);
+
+            let key_slots = keys_slots.entry(key).or_default();
+            key_slots.push(response_slot_idx);
         }
 
-        let nblocks = self.get_rel_size(tag, version, ctx).await?;
-        if blknum >= nblocks {
-            debug!(
-                "read beyond EOF at {} blk {} at {}, size is {}: returning all-zeros page",
-                tag,
-                blknum,
-                version.get_lsn(),
-                nblocks
-            );
-            return Ok(ZERO_PAGE.clone());
+        let keyspace = {
+            // add_key requires monotonicity
+            let mut acc = KeySpaceAccum::new();
+            for key in keys_slots
+                .keys()
+                // in fact it requires strong monotonicity
+                .dedup()
+            {
+                acc.add_key(*key);
+            }
+            acc.to_keyspace()
+        };
+
+        match self.get_vectored(keyspace, effective_lsn, ctx).await {
+            Ok(results) => {
+                for (key, res) in results {
+                    let mut key_slots = keys_slots.remove(&key).unwrap().into_iter();
+                    let first_slot = key_slots.next().unwrap();
+
+                    for slot in key_slots {
+                        let clone = match &res {
+                            Ok(buf) => Ok(buf.clone()),
+                            Err(err) => Err(match err {
+                                PageReconstructError::Cancelled => {
+                                    PageReconstructError::Cancelled
+                                }
+
+                                x @ PageReconstructError::Other(_) |
+                                x @ PageReconstructError::AncestorLsnTimeout(_) |
+                                x @ PageReconstructError::WalRedo(_) |
+                                x @ PageReconstructError::MissingKey(_) => {
+                                    PageReconstructError::Other(anyhow::anyhow!("there was more than one request for this key in the batch, error logged once: {x:?}"))
+                                },
+                            }),
+                        };
+
+                        result_slots[slot].write(clone);
+                        slots_filled += 1;
+                    }
+
+                    result_slots[first_slot].write(res);
+                    slots_filled += 1;
+                }
+            }
+            Err(err) => {
+                // this cannot really happen because get_vectored only errors globally on invalid LSN or too large batch size
+                // (We enforce the max batch size outside of this function, in the code that constructs the batch request.)
+                for slot in keys_slots.values().flatten() {
+                    // this whole `match` is a lot like `From<GetVectoredError> for PageReconstructError`
+                    // but without taking ownership of the GetVectoredError
+                    let err = match &err {
+                        GetVectoredError::Cancelled => {
+                            Err(PageReconstructError::Cancelled)
+                        }
+                        // TODO: restructure get_vectored API to make this error per-key
+                        GetVectoredError::MissingKey(err) => {
+                            Err(PageReconstructError::Other(anyhow::anyhow!("whole vectored get request failed because one or more of the requested keys were missing: {err:?}")))
+                        }
+                        // TODO: restructure get_vectored API to make this error per-key
+                        GetVectoredError::GetReadyAncestorError(err) => {
+                            Err(PageReconstructError::Other(anyhow::anyhow!("whole vectored get request failed because one or more key required ancestor that wasn't ready: {err:?}")))
+                        }
+                        // TODO: restructure get_vectored API to make this error per-key
+                        GetVectoredError::Other(err) => {
+                            Err(PageReconstructError::Other(
+                                anyhow::anyhow!("whole vectored get request failed: {err:?}"),
+                            ))
+                        }
+                        // TODO: we can prevent this error class by moving this check into the type system
+                        GetVectoredError::InvalidLsn(e) => {
+                            Err(anyhow::anyhow!("invalid LSN: {e:?}").into())
+                        }
+                        // NB: this should never happen in practice because we limit MAX_GET_VECTORED_KEYS
+                        // TODO: we can prevent this error class by moving this check into the type system
+                        GetVectoredError::Oversized(err) => {
+                            Err(anyhow::anyhow!(
+                                "batching oversized: {err:?}"
+                            )
+                            .into())
+                        }
+                    };
+
+                    result_slots[*slot].write(err);
+                }
+
+                slots_filled += keys_slots.values().map(|slots| slots.len()).sum::<usize>();
+            }
+        };
+
+        assert_eq!(slots_filled, page_count);
+        // SAFETY:
+        // 1. `result` and any of its uninint members are not read from until this point
+        // 2. The length below is tracked at run-time and matches the number of requested pages.
+        unsafe {
+            result.set_len(page_count);
         }
 
-        let key = rel_block_to_key(tag, blknum);
-        version.get(self, key, ctx).await
+        result
     }
 
     // Get size of a database in blocks
diff --git a/pageserver/src/walingest.rs b/pageserver/src/walingest.rs
index 38d69760f2..ad6ccbc854 100644
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -1528,6 +1528,11 @@ mod tests {
 
         assert_current_logical_size(&tline, Lsn(0x50));
 
+        let test_span = tracing::info_span!(parent: None, "test",
+                                            tenant_id=%tline.tenant_shard_id.tenant_id,
+                                            shard_id=%tline.tenant_shard_id.shard_slug(),
+                                            timeline_id=%tline.timeline_id);
+
         // The relation was created at LSN 2, not visible at LSN 1 yet.
         assert_eq!(
             tline
@@ -1562,6 +1567,7 @@ mod tests {
         assert_eq!(
             tline
                 .get_rel_page_at_lsn(TESTREL_A, 0, Version::Lsn(Lsn(0x20)), &ctx)
+                .instrument(test_span.clone())
                 .await?,
             test_img("foo blk 0 at 2")
         );
@@ -1569,6 +1575,7 @@ mod tests {
         assert_eq!(
             tline
                 .get_rel_page_at_lsn(TESTREL_A, 0, Version::Lsn(Lsn(0x30)), &ctx)
+                .instrument(test_span.clone())
                 .await?,
             test_img("foo blk 0 at 3")
         );
@@ -1576,12 +1583,14 @@ mod tests {
         assert_eq!(
             tline
                 .get_rel_page_at_lsn(TESTREL_A, 0, Version::Lsn(Lsn(0x40)), &ctx)
+                .instrument(test_span.clone())
                 .await?,
             test_img("foo blk 0 at 3")
         );
         assert_eq!(
             tline
                 .get_rel_page_at_lsn(TESTREL_A, 1, Version::Lsn(Lsn(0x40)), &ctx)
+                .instrument(test_span.clone())
                 .await?,
             test_img("foo blk 1 at 4")
         );
@@ -1589,18 +1598,21 @@ mod tests {
         assert_eq!(
             tline
                 .get_rel_page_at_lsn(TESTREL_A, 0, Version::Lsn(Lsn(0x50)), &ctx)
+                .instrument(test_span.clone())
                 .await?,
             test_img("foo blk 0 at 3")
         );
         assert_eq!(
             tline
                 .get_rel_page_at_lsn(TESTREL_A, 1, Version::Lsn(Lsn(0x50)), &ctx)
+                .instrument(test_span.clone())
                 .await?,
             test_img("foo blk 1 at 4")
         );
         assert_eq!(
             tline
                 .get_rel_page_at_lsn(TESTREL_A, 2, Version::Lsn(Lsn(0x50)), &ctx)
+                .instrument(test_span.clone())
                 .await?,
             test_img("foo blk 2 at 5")
         );
@@ -1623,12 +1635,14 @@ mod tests {
         assert_eq!(
             tline
                 .get_rel_page_at_lsn(TESTREL_A, 0, Version::Lsn(Lsn(0x60)), &ctx)
+                .instrument(test_span.clone())
                 .await?,
             test_img("foo blk 0 at 3")
         );
         assert_eq!(
             tline
                 .get_rel_page_at_lsn(TESTREL_A, 1, Version::Lsn(Lsn(0x60)), &ctx)
+                .instrument(test_span.clone())
                 .await?,
             test_img("foo blk 1 at 4")
         );
@@ -1643,6 +1657,7 @@ mod tests {
         assert_eq!(
             tline
                 .get_rel_page_at_lsn(TESTREL_A, 2, Version::Lsn(Lsn(0x50)), &ctx)
+                .instrument(test_span.clone())
                 .await?,
             test_img("foo blk 2 at 5")
         );
@@ -1675,12 +1690,14 @@ mod tests {
         assert_eq!(
             tline
                 .get_rel_page_at_lsn(TESTREL_A, 0, Version::Lsn(Lsn(0x70)), &ctx)
+                .instrument(test_span.clone())
                 .await?,
             ZERO_PAGE
         );
         assert_eq!(
             tline
                 .get_rel_page_at_lsn(TESTREL_A, 1, Version::Lsn(Lsn(0x70)), &ctx)
+                .instrument(test_span.clone())
                 .await?,
             test_img("foo blk 1")
         );
@@ -1701,6 +1718,7 @@ mod tests {
             assert_eq!(
                 tline
                     .get_rel_page_at_lsn(TESTREL_A, blk, Version::Lsn(Lsn(0x80)), &ctx)
+                    .instrument(test_span.clone())
                     .await?,
                 ZERO_PAGE
             );
@@ -1708,6 +1726,7 @@ mod tests {
         assert_eq!(
             tline
                 .get_rel_page_at_lsn(TESTREL_A, 1500, Version::Lsn(Lsn(0x80)), &ctx)
+                .instrument(test_span.clone())
                 .await?,
             test_img("foo blk 1500")
         );
@@ -1815,6 +1834,11 @@ mod tests {
         }
         m.commit(&ctx).await?;
 
+        let test_span = tracing::info_span!(parent: None, "test",
+                                            tenant_id=%tline.tenant_shard_id.tenant_id,
+                                            shard_id=%tline.tenant_shard_id.shard_slug(),
+                                            timeline_id=%tline.timeline_id);
+
         // The relation was created at LSN 20, not visible at LSN 1 yet.
         assert_eq!(
             tline
@@ -1847,6 +1871,7 @@ mod tests {
             assert_eq!(
                 tline
                     .get_rel_page_at_lsn(TESTREL_A, blkno, Version::Lsn(lsn), &ctx)
+                    .instrument(test_span.clone())
                     .await?,
                 test_img(&data)
             );
@@ -1874,6 +1899,7 @@ mod tests {
             assert_eq!(
                 tline
                     .get_rel_page_at_lsn(TESTREL_A, blkno, Version::Lsn(Lsn(0x60)), &ctx)
+                    .instrument(test_span.clone())
                     .await?,
                 test_img(&data)
             );
@@ -1892,6 +1918,7 @@ mod tests {
             assert_eq!(
                 tline
                     .get_rel_page_at_lsn(TESTREL_A, blkno, Version::Lsn(Lsn(0x50)), &ctx)
+                    .instrument(test_span.clone())
                     .await?,
                 test_img(&data)
             );
@@ -1928,6 +1955,7 @@ mod tests {
             assert_eq!(
                 tline
                     .get_rel_page_at_lsn(TESTREL_A, blkno, Version::Lsn(Lsn(0x80)), &ctx)
+                    .instrument(test_span.clone())
                     .await?,
                 test_img(&data)
             );

From 4fc3af15ddae7058bb38335dd92674d277c10e68 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Arpad=20M=C3=BCller?= <arpad-m@users.noreply.github.com>
Date: Mon, 18 Nov 2024 21:42:19 +0100
Subject: [PATCH 21/43] Remove at most one retain_lsn entry from (possibly
 offloaded) timelne's parent (#9791)

There is a potential data corruption issue, not one I've encountered,
but it's still not hard to hit with some correct looking code given our
current architecture. It has to do with the timeline's memory object storage
via reference counted `Arc`s, and the removal of `retain_lsn` entries at
the drop of the last `Arc` reference.

The corruption steps are as follows:

1. timeline gets offloaded. timeline object A doesn't get dropped
though, because some long-running task accesses it
2. the same timeline gets unoffloaded again. timeline object B gets
created for it, timeline object A still referenced. both point to the
same timeline.
3. the task keeping the reference to timeline object A exits. destructor
for object A runs, removing `retain_lsn` in the timeline's parent.
4. the timeline's parent runs gc without the `retain_lsn` of the still
exant timleine's child, leading to data corruption.

In general we are susceptible each time when we recreate a `Timeline`
object in the same process, which happens both during a timeline
offload/unoffload cycle, as well as during an ancestor detach operation.

The solution this PR implements is to make the destructor for a timeline
as well as an offloaded timeline remove at most one `retain_lsn`.

PR #9760 has added a log line to print the refcounts at timeline
offload, but this only detects one of the places where we do such a
recycle operation. Plus it doesn't prevent the actual issue.

I doubt that this occurs in practice. It is more a defense in depth measure.
Usually I'd assume that the timeline gets dropped immediately in step 1,
as there is no background tasks referencing it after its shutdown.
But one never knows, and reducing the stakes of step 1 actually occurring
is a really good idea, from potential data corruption to waste of CPU time.

Part of #8088
---
 pageserver/src/tenant.rs          |  6 +++++-
 pageserver/src/tenant/timeline.rs | 29 +++++++++++++++++++++--------
 2 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs
index 909f99ea9d..e88dee7c6c 100644
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -608,11 +608,15 @@ impl OffloadedTimeline {
                 .iter()
                 .find(|(tid, _tl)| **tid == ancestor_timeline_id)
             {
-                ancestor_timeline
+                let removal_happened = ancestor_timeline
                     .gc_info
                     .write()
                     .unwrap()
                     .remove_child_offloaded(self.timeline_id);
+                if !removal_happened {
+                    tracing::error!(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id,
+                        "Couldn't remove retain_lsn entry from offloaded timeline's parent: already removed");
+                }
             }
         }
         self.deleted_from_ancestor.store(true, Ordering::Release);
diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index 2bc14ec317..5547bc2c7a 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -481,17 +481,27 @@ impl GcInfo {
         &mut self,
         child_id: TimelineId,
         maybe_offloaded: MaybeOffloaded,
-    ) {
-        self.retain_lsns
-            .retain(|i| !(i.1 == child_id && i.2 == maybe_offloaded));
+    ) -> bool {
+        // Remove at most one element. Needed for correctness if there is two live `Timeline` objects referencing
+        // the same timeline. Shouldn't but maybe can occur when Arc's live longer than intended.
+        let mut removed = false;
+        self.retain_lsns.retain(|i| {
+            if removed {
+                return true;
+            }
+            let remove = i.1 == child_id && i.2 == maybe_offloaded;
+            removed |= remove;
+            !remove
+        });
+        removed
     }
 
-    pub(super) fn remove_child_not_offloaded(&mut self, child_id: TimelineId) {
-        self.remove_child_maybe_offloaded(child_id, MaybeOffloaded::No);
+    pub(super) fn remove_child_not_offloaded(&mut self, child_id: TimelineId) -> bool {
+        self.remove_child_maybe_offloaded(child_id, MaybeOffloaded::No)
     }
 
-    pub(super) fn remove_child_offloaded(&mut self, child_id: TimelineId) {
-        self.remove_child_maybe_offloaded(child_id, MaybeOffloaded::Yes);
+    pub(super) fn remove_child_offloaded(&mut self, child_id: TimelineId) -> bool {
+        self.remove_child_maybe_offloaded(child_id, MaybeOffloaded::Yes)
     }
 }
 
@@ -4514,7 +4524,10 @@ impl Drop for Timeline {
             // This lock should never be poisoned, but in case it is we do a .map() instead of
             // an unwrap(), to avoid panicking in a destructor and thereby aborting the process.
             if let Ok(mut gc_info) = ancestor.gc_info.write() {
-                gc_info.remove_child_not_offloaded(self.timeline_id)
+                if !gc_info.remove_child_not_offloaded(self.timeline_id) {
+                    tracing::error!(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id,
+                        "Couldn't remove retain_lsn entry from offloaded timeline's parent: already removed");
+                }
             }
         }
     }

From 9b6af2bcaddad49f59809c69717c23ced725dfa9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Arpad=20M=C3=BCller?= <arpad-m@users.noreply.github.com>
Date: Mon, 18 Nov 2024 22:01:48 +0100
Subject: [PATCH 22/43] Add the ability to configure GenericRemoteStorage for
 the scrubber (#9652)

Earlier work (#7547) has made the scrubber internally generic, but one
could only configure it to use S3 storage.

This is the final piece to make (most of, snapshotting still requires
S3) the scrubber be able to be configured via GenericRemoteStorage.

I.e. you can now set an env var like:

```
REMOTE_STORAGE_CONFIG='remote_storage = { bucket_name = "neon-dev-safekeeper-us-east-2d", bucket_region = "us-east-2" }
```

and the scrubber will read it instead.
---
 Cargo.lock                                    |   1 -
 libs/remote_storage/src/config.rs             |  21 ++-
 pageserver/ctl/Cargo.toml                     |   1 -
 pageserver/ctl/src/main.rs                    |   6 +-
 storage_scrubber/src/find_large_objects.rs    |   4 +-
 storage_scrubber/src/garbage.rs               |   4 +-
 storage_scrubber/src/lib.rs                   | 129 +++++++++++-------
 storage_scrubber/src/main.rs                  |  18 +--
 .../src/scan_safekeeper_metadata.rs           |   5 +-
 storage_scrubber/src/tenant_snapshot.rs       |  14 +-
 10 files changed, 116 insertions(+), 87 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index da8cefb219..c7af140f7d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3578,7 +3578,6 @@ dependencies = [
  "thiserror",
  "tokio",
  "tokio-util",
- "toml_edit",
  "utils",
  "workspace_hack",
 ]
diff --git a/libs/remote_storage/src/config.rs b/libs/remote_storage/src/config.rs
index d0e92411da..e99ae4f747 100644
--- a/libs/remote_storage/src/config.rs
+++ b/libs/remote_storage/src/config.rs
@@ -26,6 +26,16 @@ pub struct RemoteStorageConfig {
     pub timeout: Duration,
 }
 
+impl RemoteStorageKind {
+    pub fn bucket_name(&self) -> Option<&str> {
+        match self {
+            RemoteStorageKind::LocalFs { .. } => None,
+            RemoteStorageKind::AwsS3(config) => Some(&config.bucket_name),
+            RemoteStorageKind::AzureContainer(config) => Some(&config.container_name),
+        }
+    }
+}
+
 fn default_timeout() -> Duration {
     RemoteStorageConfig::DEFAULT_TIMEOUT
 }
@@ -178,6 +188,14 @@ impl RemoteStorageConfig {
     pub fn from_toml(toml: &toml_edit::Item) -> anyhow::Result<RemoteStorageConfig> {
         Ok(utils::toml_edit_ext::deserialize_item(toml)?)
     }
+
+    pub fn from_toml_str(input: &str) -> anyhow::Result<RemoteStorageConfig> {
+        let toml_document = toml_edit::DocumentMut::from_str(input)?;
+        if let Some(item) = toml_document.get("remote_storage") {
+            return Self::from_toml(item);
+        }
+        Self::from_toml(toml_document.as_item())
+    }
 }
 
 #[cfg(test)]
@@ -185,8 +203,7 @@ mod tests {
     use super::*;
 
     fn parse(input: &str) -> anyhow::Result<RemoteStorageConfig> {
-        let toml = input.parse::<toml_edit::DocumentMut>().unwrap();
-        RemoteStorageConfig::from_toml(toml.as_item())
+        RemoteStorageConfig::from_toml_str(input)
     }
 
     #[test]
diff --git a/pageserver/ctl/Cargo.toml b/pageserver/ctl/Cargo.toml
index a753f806a0..39ca47568c 100644
--- a/pageserver/ctl/Cargo.toml
+++ b/pageserver/ctl/Cargo.toml
@@ -18,7 +18,6 @@ postgres_ffi.workspace = true
 thiserror.workspace = true
 tokio.workspace = true
 tokio-util.workspace = true
-toml_edit.workspace = true
 utils.workspace = true
 svg_fmt.workspace = true
 workspace_hack.workspace = true
diff --git a/pageserver/ctl/src/main.rs b/pageserver/ctl/src/main.rs
index 92e766d2fb..a0aac89dc8 100644
--- a/pageserver/ctl/src/main.rs
+++ b/pageserver/ctl/src/main.rs
@@ -174,11 +174,7 @@ async fn main() -> anyhow::Result<()> {
                 println!("specified prefix '{}' failed validation", cmd.prefix);
                 return Ok(());
             };
-            let toml_document = toml_edit::DocumentMut::from_str(&cmd.config_toml_str)?;
-            let toml_item = toml_document
-                .get("remote_storage")
-                .expect("need remote_storage");
-            let config = RemoteStorageConfig::from_toml(toml_item)?;
+            let config = RemoteStorageConfig::from_toml_str(&cmd.config_toml_str)?;
             let storage = remote_storage::GenericRemoteStorage::from_config(&config).await;
             let cancel = CancellationToken::new();
             storage
diff --git a/storage_scrubber/src/find_large_objects.rs b/storage_scrubber/src/find_large_objects.rs
index 88e36af560..95d3af1453 100644
--- a/storage_scrubber/src/find_large_objects.rs
+++ b/storage_scrubber/src/find_large_objects.rs
@@ -106,9 +106,9 @@ pub async fn find_large_objects(
         }
     }
 
-    let bucket_name = target.bucket_name();
+    let desc_str = target.desc_str();
     tracing::info!(
-        "Scan of {bucket_name} finished. Scanned {tenant_ctr} shards. objects={object_ctr}, found={}.",
+        "Scan of {desc_str} finished. Scanned {tenant_ctr} shards. objects={object_ctr}, found={}.",
         objects.len()
     );
     Ok(LargeObjectListing { objects })
diff --git a/storage_scrubber/src/garbage.rs b/storage_scrubber/src/garbage.rs
index 863dbf960d..91668a42a7 100644
--- a/storage_scrubber/src/garbage.rs
+++ b/storage_scrubber/src/garbage.rs
@@ -177,7 +177,7 @@ async fn find_garbage_inner(
     }));
 
     // Enumerate Tenants in S3, and check if each one exists in Console
-    tracing::info!("Finding all tenants in bucket {}...", bucket_config.bucket);
+    tracing::info!("Finding all tenants in {}...", bucket_config.desc_str());
     let tenants = stream_tenants(&remote_client, &target);
     let tenants_checked = tenants.map_ok(|t| {
         let api_client = cloud_admin_api_client.clone();
@@ -524,7 +524,7 @@ pub async fn purge_garbage(
         init_remote(garbage_list.bucket_config.clone(), garbage_list.node_kind).await?;
 
     assert_eq!(
-        &garbage_list.bucket_config.bucket,
+        garbage_list.bucket_config.bucket_name().unwrap(),
         remote_client.bucket_name().unwrap()
     );
 
diff --git a/storage_scrubber/src/lib.rs b/storage_scrubber/src/lib.rs
index de0857cb5f..1fe4fc58cd 100644
--- a/storage_scrubber/src/lib.rs
+++ b/storage_scrubber/src/lib.rs
@@ -29,8 +29,7 @@ use pageserver::tenant::TENANTS_SEGMENT_NAME;
 use pageserver_api::shard::TenantShardId;
 use remote_storage::{
     DownloadOpts, GenericRemoteStorage, Listing, ListingMode, RemotePath, RemoteStorageConfig,
-    RemoteStorageKind, S3Config, DEFAULT_MAX_KEYS_PER_LIST_RESPONSE,
-    DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT,
+    RemoteStorageKind, S3Config,
 };
 use reqwest::Url;
 use serde::{Deserialize, Serialize};
@@ -48,7 +47,7 @@ const CLOUD_ADMIN_API_TOKEN_ENV_VAR: &str = "CLOUD_ADMIN_API_TOKEN";
 
 #[derive(Debug, Clone)]
 pub struct S3Target {
-    pub bucket_name: String,
+    pub desc_str: String,
     /// This `prefix_in_bucket` is only equal to the PS/SK config of the same
     /// name for the RootTarget: other instances of S3Target will have prefix_in_bucket
     /// with extra parts.
@@ -172,7 +171,7 @@ impl RootTarget {
         };
 
         S3Target {
-            bucket_name: root.bucket_name.clone(),
+            desc_str: root.desc_str.clone(),
             prefix_in_bucket: format!(
                 "{}/{TENANTS_SEGMENT_NAME}/{tenant_id}",
                 root.prefix_in_bucket
@@ -209,10 +208,10 @@ impl RootTarget {
         }
     }
 
-    pub fn bucket_name(&self) -> &str {
+    pub fn desc_str(&self) -> &str {
         match self {
-            Self::Pageserver(root) => &root.bucket_name,
-            Self::Safekeeper(root) => &root.bucket_name,
+            Self::Pageserver(root) => &root.desc_str,
+            Self::Safekeeper(root) => &root.desc_str,
         }
     }
 
@@ -230,24 +229,61 @@ pub fn remote_timeline_path_id(id: &TenantShardTimelineId) -> RemotePath {
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[serde(deny_unknown_fields)]
-pub struct BucketConfig {
-    pub region: String,
-    pub bucket: String,
-    pub prefix_in_bucket: Option<String>,
-}
+pub struct BucketConfig(RemoteStorageConfig);
 
 impl BucketConfig {
     pub fn from_env() -> anyhow::Result<Self> {
-        let region = env::var("REGION").context("'REGION' param retrieval")?;
-        let bucket = env::var("BUCKET").context("'BUCKET' param retrieval")?;
-        let prefix_in_bucket = env::var("BUCKET_PREFIX").ok();
-
-        Ok(Self {
-            region,
-            bucket,
-            prefix_in_bucket,
-        })
+        if let Ok(legacy) = Self::from_env_legacy() {
+            return Ok(legacy);
+        }
+        let config_toml =
+            env::var("REMOTE_STORAGE_CONFIG").context("'REMOTE_STORAGE_CONFIG' retrieval")?;
+        let remote_config = RemoteStorageConfig::from_toml_str(&config_toml)?;
+        Ok(BucketConfig(remote_config))
     }
+
+    fn from_env_legacy() -> anyhow::Result<Self> {
+        let bucket_region = env::var("REGION").context("'REGION' param retrieval")?;
+        let bucket_name = env::var("BUCKET").context("'BUCKET' param retrieval")?;
+        let prefix_in_bucket = env::var("BUCKET_PREFIX").ok();
+        let endpoint = env::var("AWS_ENDPOINT_URL").ok();
+        // Create a json object which we then deserialize so that we don't
+        // have to repeat all of the S3Config fields.
+        let s3_config_json = serde_json::json!({
+            "bucket_name": bucket_name,
+            "bucket_region": bucket_region,
+            "prefix_in_bucket": prefix_in_bucket,
+            "endpoint": endpoint,
+        });
+        let config: RemoteStorageConfig = serde_json::from_value(s3_config_json)?;
+        Ok(BucketConfig(config))
+    }
+    pub fn desc_str(&self) -> String {
+        match &self.0.storage {
+            RemoteStorageKind::LocalFs { local_path } => {
+                format!("local path {local_path}")
+            }
+            RemoteStorageKind::AwsS3(config) => format!(
+                "bucket {}, region {}",
+                config.bucket_name, config.bucket_region
+            ),
+            RemoteStorageKind::AzureContainer(config) => format!(
+                "bucket {}, storage account {:?}, region {}",
+                config.container_name, config.storage_account, config.container_region
+            ),
+        }
+    }
+    pub fn bucket_name(&self) -> Option<&str> {
+        self.0.storage.bucket_name()
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(deny_unknown_fields)]
+pub struct BucketConfigLegacy {
+    pub region: String,
+    pub bucket: String,
+    pub prefix_in_bucket: Option<String>,
 }
 
 pub struct ControllerClientConfig {
@@ -337,13 +373,9 @@ fn default_prefix_in_bucket(node_kind: NodeKind) -> &'static str {
     }
 }
 
-fn make_root_target(
-    bucket_name: String,
-    prefix_in_bucket: String,
-    node_kind: NodeKind,
-) -> RootTarget {
+fn make_root_target(desc_str: String, prefix_in_bucket: String, node_kind: NodeKind) -> RootTarget {
     let s3_target = S3Target {
-        bucket_name,
+        desc_str,
         prefix_in_bucket,
         delimiter: "/".to_string(),
     };
@@ -354,15 +386,15 @@ fn make_root_target(
 }
 
 async fn init_remote_s3(
-    bucket_config: BucketConfig,
+    bucket_config: S3Config,
     node_kind: NodeKind,
 ) -> anyhow::Result<(Arc<Client>, RootTarget)> {
-    let bucket_region = Region::new(bucket_config.region);
+    let bucket_region = Region::new(bucket_config.bucket_region);
     let s3_client = Arc::new(init_s3_client(bucket_region).await);
     let default_prefix = default_prefix_in_bucket(node_kind).to_string();
 
     let s3_root = make_root_target(
-        bucket_config.bucket,
+        bucket_config.bucket_name,
         bucket_config.prefix_in_bucket.unwrap_or(default_prefix),
         node_kind,
     );
@@ -371,33 +403,28 @@ async fn init_remote_s3(
 }
 
 async fn init_remote(
-    bucket_config: BucketConfig,
+    mut storage_config: BucketConfig,
     node_kind: NodeKind,
 ) -> anyhow::Result<(GenericRemoteStorage, RootTarget)> {
-    let endpoint = env::var("AWS_ENDPOINT_URL").ok();
+    let desc_str = storage_config.desc_str();
+
     let default_prefix = default_prefix_in_bucket(node_kind).to_string();
-    let prefix_in_bucket = Some(bucket_config.prefix_in_bucket.unwrap_or(default_prefix));
-    let storage = S3Config {
-        bucket_name: bucket_config.bucket.clone(),
-        bucket_region: bucket_config.region,
-        prefix_in_bucket,
-        endpoint,
-        concurrency_limit: DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT
-            .try_into()
-            .unwrap(),
-        max_keys_per_list_response: DEFAULT_MAX_KEYS_PER_LIST_RESPONSE,
-        upload_storage_class: None,
-    };
-    let storage_config = RemoteStorageConfig {
-        storage: RemoteStorageKind::AwsS3(storage),
-        timeout: RemoteStorageConfig::DEFAULT_TIMEOUT,
-    };
+
+    match &mut storage_config.0.storage {
+        RemoteStorageKind::AwsS3(ref mut config) => {
+            config.prefix_in_bucket.get_or_insert(default_prefix);
+        }
+        RemoteStorageKind::AzureContainer(ref mut config) => {
+            config.prefix_in_container.get_or_insert(default_prefix);
+        }
+        RemoteStorageKind::LocalFs { .. } => (),
+    }
 
     // We already pass the prefix to the remote client above
     let prefix_in_root_target = String::new();
-    let root_target = make_root_target(bucket_config.bucket, prefix_in_root_target, node_kind);
+    let root_target = make_root_target(desc_str, prefix_in_root_target, node_kind);
 
-    let client = GenericRemoteStorage::from_config(&storage_config).await?;
+    let client = GenericRemoteStorage::from_config(&storage_config.0).await?;
     Ok((client, root_target))
 }
 
@@ -469,7 +496,7 @@ async fn list_objects_with_retries(
                 }
                 warn!(
                     "list_objects_v2 query failed: bucket_name={}, prefix={}, delimiter={}, error={}",
-                    s3_target.bucket_name,
+                    remote_client.bucket_name().unwrap_or_default(),
                     s3_target.prefix_in_bucket,
                     s3_target.delimiter,
                     DisplayErrorContext(e),
diff --git a/storage_scrubber/src/main.rs b/storage_scrubber/src/main.rs
index ee816534c6..0ffb570984 100644
--- a/storage_scrubber/src/main.rs
+++ b/storage_scrubber/src/main.rs
@@ -140,7 +140,7 @@ async fn main() -> anyhow::Result<()> {
         "{}_{}_{}_{}.log",
         std::env::args().next().unwrap(),
         command_log_name,
-        bucket_config.bucket,
+        bucket_config.bucket_name().unwrap_or("nobucket"),
         chrono::Utc::now().format("%Y_%m_%d__%H_%M_%S")
     ));
 
@@ -191,13 +191,7 @@ async fn main() -> anyhow::Result<()> {
                     // Strictly speaking an empty bucket is a valid bucket, but if someone ran the
                     // scrubber they were likely expecting to scan something, and if we see no timelines
                     // at all then it's likely due to some configuration issues like a bad prefix
-                    bail!(
-                        "No timelines found in bucket {} prefix {}",
-                        bucket_config.bucket,
-                        bucket_config
-                            .prefix_in_bucket
-                            .unwrap_or("<none>".to_string())
-                    );
+                    bail!("No timelines found in {}", bucket_config.desc_str());
                 }
                 Ok(())
             } else {
@@ -396,13 +390,7 @@ pub async fn scan_pageserver_metadata_cmd(
                 // Strictly speaking an empty bucket is a valid bucket, but if someone ran the
                 // scrubber they were likely expecting to scan something, and if we see no timelines
                 // at all then it's likely due to some configuration issues like a bad prefix
-                tracing::error!(
-                    "No timelines found in bucket {} prefix {}",
-                    bucket_config.bucket,
-                    bucket_config
-                        .prefix_in_bucket
-                        .unwrap_or("<none>".to_string())
-                );
+                tracing::error!("No timelines found in {}", bucket_config.desc_str());
                 if exit_code {
                     std::process::exit(1);
                 }
diff --git a/storage_scrubber/src/scan_safekeeper_metadata.rs b/storage_scrubber/src/scan_safekeeper_metadata.rs
index 403b4590a8..0a4d4266a0 100644
--- a/storage_scrubber/src/scan_safekeeper_metadata.rs
+++ b/storage_scrubber/src/scan_safekeeper_metadata.rs
@@ -84,10 +84,7 @@ pub async fn scan_safekeeper_metadata(
     bucket_config: BucketConfig,
     db_or_list: DatabaseOrList,
 ) -> anyhow::Result<MetadataSummary> {
-    info!(
-        "checking bucket {}, region {}",
-        bucket_config.bucket, bucket_config.region
-    );
+    info!("checking {}", bucket_config.desc_str());
 
     let (remote_client, target) = init_remote(bucket_config, NodeKind::Safekeeper).await?;
     let console_config = ConsoleConfig::from_env()?;
diff --git a/storage_scrubber/src/tenant_snapshot.rs b/storage_scrubber/src/tenant_snapshot.rs
index bb4079b5f4..39e0b5c9b4 100644
--- a/storage_scrubber/src/tenant_snapshot.rs
+++ b/storage_scrubber/src/tenant_snapshot.rs
@@ -16,7 +16,7 @@ use pageserver::tenant::remote_timeline_client::index::LayerFileMetadata;
 use pageserver::tenant::storage_layer::LayerName;
 use pageserver::tenant::IndexPart;
 use pageserver_api::shard::TenantShardId;
-use remote_storage::GenericRemoteStorage;
+use remote_storage::{GenericRemoteStorage, S3Config};
 use utils::generation::Generation;
 use utils::id::TenantId;
 
@@ -24,6 +24,7 @@ pub struct SnapshotDownloader {
     s3_client: Arc<Client>,
     s3_root: RootTarget,
     bucket_config: BucketConfig,
+    bucket_config_s3: S3Config,
     tenant_id: TenantId,
     output_path: Utf8PathBuf,
     concurrency: usize,
@@ -36,12 +37,17 @@ impl SnapshotDownloader {
         output_path: Utf8PathBuf,
         concurrency: usize,
     ) -> anyhow::Result<Self> {
+        let bucket_config_s3 = match &bucket_config.0.storage {
+            remote_storage::RemoteStorageKind::AwsS3(config) => config.clone(),
+            _ => panic!("only S3 configuration is supported for snapshot downloading"),
+        };
         let (s3_client, s3_root) =
-            init_remote_s3(bucket_config.clone(), NodeKind::Pageserver).await?;
+            init_remote_s3(bucket_config_s3.clone(), NodeKind::Pageserver).await?;
         Ok(Self {
             s3_client,
             s3_root,
             bucket_config,
+            bucket_config_s3,
             tenant_id,
             output_path,
             concurrency,
@@ -87,7 +93,7 @@ impl SnapshotDownloader {
             let versions = self
                 .s3_client
                 .list_object_versions()
-                .bucket(self.bucket_config.bucket.clone())
+                .bucket(self.bucket_config_s3.bucket_name.clone())
                 .prefix(&remote_layer_path)
                 .send()
                 .await?;
@@ -96,7 +102,7 @@ impl SnapshotDownloader {
             };
             download_object_to_file_s3(
                 &self.s3_client,
-                &self.bucket_config.bucket,
+                &self.bucket_config_s3.bucket_name,
                 &remote_layer_path,
                 version.version_id.as_deref(),
                 &local_path,

From 982cb1c15da7d7002dfb2eafbb2d45c665f3bf76 Mon Sep 17 00:00:00 2001
From: Peter Bendel <peterbendel@neon.tech>
Date: Tue, 19 Nov 2024 10:46:46 +0100
Subject: [PATCH 23/43] Move logic for ingest benchmark from GitHub workflow
 into python testcase (#9762)

## Problem

The first version of the ingest benchmark had some parsing and reporting
logic in shell script inside GitHub workflow.
it is better to move that logic into a python testcase so that we can
also run it locally.

## Summary of changes

- Create new python testcase
- invoke pgcopydb inside python test case
- move the following logic into python testcase
  - determine backpressure
  - invoke pgcopydb and report its progress
  - parse pgcopydb log and extract metrics
  - insert metrics into perf test database

- add additional column to perf test database that can receive endpoint
ID used for pgcopydb run to have it available in grafana dashboard when
retrieving other metrics for an endpoint

## Example run


https://github.com/neondatabase/neon/actions/runs/11860622170/job/33056264386
---
 .../actions/run-python-test-set/action.yml    |  11 +
 .github/workflows/benchmarking.yml            |   1 +
 .github/workflows/ingest_benchmark.yml        | 277 ++----------------
 scripts/ingest_perf_test_result.py            |  10 +-
 test_runner/fixtures/benchmark_fixture.py     |   6 +
 .../test_perf_ingest_using_pgcopydb.py        | 267 +++++++++++++++++
 6 files changed, 324 insertions(+), 248 deletions(-)
 create mode 100644 test_runner/performance/test_perf_ingest_using_pgcopydb.py

diff --git a/.github/actions/run-python-test-set/action.yml b/.github/actions/run-python-test-set/action.yml
index 037b9aeb1e..275f161019 100644
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -48,6 +48,10 @@ inputs:
     description: 'benchmark durations JSON'
     required: false
     default: '{}'
+  aws_oicd_role_arn:
+    description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role'
+    required: false
+    default: ''
 
 runs:
   using: "composite"
@@ -222,6 +226,13 @@ runs:
         # (for example if we didn't run the test for non build-and-test workflow)
         skip-if-does-not-exist: true
 
+    - name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test
+      if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }}
+      uses: aws-actions/configure-aws-credentials@v4
+      with:
+        aws-region: eu-central-1
+        role-to-assume: ${{ inputs.aws_oicd_role_arn }}
+        role-duration-seconds: 3600 # 1 hour should be more than enough to upload report
     - name: Upload test results
       if: ${{ !cancelled() }}
       uses: ./.github/actions/allure-report-store
diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml
index 68bc555982..0e3c31ec57 100644
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -133,6 +133,7 @@ jobs:
           --ignore test_runner/performance/test_perf_pgvector_queries.py
           --ignore test_runner/performance/test_logical_replication.py
           --ignore test_runner/performance/test_physical_replication.py
+          --ignore test_runner/performance/test_perf_ingest_using_pgcopydb.py
       env:
         BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
         VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
diff --git a/.github/workflows/ingest_benchmark.yml b/.github/workflows/ingest_benchmark.yml
index d770bb2bb5..1033dc6489 100644
--- a/.github/workflows/ingest_benchmark.yml
+++ b/.github/workflows/ingest_benchmark.yml
@@ -1,4 +1,4 @@
-name: Benchmarking
+name: benchmarking ingest
 
 on:
   # uncomment to run on push for debugging your PR
@@ -74,18 +74,16 @@ jobs:
         compute_units: '[7, 7]' # we want to test large compute here to avoid compute-side bottleneck
         api_key: ${{ secrets.NEON_STAGING_API_KEY }}
 
-    - name: Initialize Neon project and retrieve current backpressure seconds
+    - name: Initialize Neon project
       if: ${{ matrix.target_project == 'new_empty_project' }}
       env:
-          NEW_PROJECT_CONNSTR: ${{ steps.create-neon-project-ingest-target.outputs.dsn }}
+          BENCHMARK_INGEST_TARGET_CONNSTR: ${{ steps.create-neon-project-ingest-target.outputs.dsn }}
           NEW_PROJECT_ID: ${{ steps.create-neon-project-ingest-target.outputs.project_id }}
       run: |
         echo "Initializing Neon project with project_id: ${NEW_PROJECT_ID}"
         export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
-        ${PSQL} "${NEW_PROJECT_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;"
-        BACKPRESSURE_TIME_BEFORE_INGEST=$(${PSQL} "${NEW_PROJECT_CONNSTR}" -t -c "select backpressure_throttling_time()/1000000;")
-        echo "BACKPRESSURE_TIME_BEFORE_INGEST=${BACKPRESSURE_TIME_BEFORE_INGEST}" >> $GITHUB_ENV
-        echo "NEW_PROJECT_CONNSTR=${NEW_PROJECT_CONNSTR}" >> $GITHUB_ENV
+        ${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;"
+        echo "BENCHMARK_INGEST_TARGET_CONNSTR=${BENCHMARK_INGEST_TARGET_CONNSTR}" >> $GITHUB_ENV
 
     - name: Create Neon Branch for large tenant
       if: ${{ matrix.target_project == 'large_existing_project' }}
@@ -95,266 +93,55 @@ jobs:
         project_id: ${{ vars.BENCHMARK_INGEST_TARGET_PROJECTID }}
         api_key: ${{ secrets.NEON_STAGING_API_KEY }}
 
-    - name: Initialize Neon project and retrieve current backpressure seconds
+    - name: Initialize Neon project 
       if: ${{ matrix.target_project == 'large_existing_project' }}
       env:
-          NEW_PROJECT_CONNSTR: ${{ steps.create-neon-branch-ingest-target.outputs.dsn }}
+          BENCHMARK_INGEST_TARGET_CONNSTR: ${{ steps.create-neon-branch-ingest-target.outputs.dsn }}
           NEW_BRANCH_ID: ${{ steps.create-neon-branch-ingest-target.outputs.branch_id }}
       run: |
         echo "Initializing Neon branch with branch_id: ${NEW_BRANCH_ID}"
         export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
         # Extract the part before the database name
-        base_connstr="${NEW_PROJECT_CONNSTR%/*}"
+        base_connstr="${BENCHMARK_INGEST_TARGET_CONNSTR%/*}"
         # Extract the query parameters (if any) after the database name
-        query_params="${NEW_PROJECT_CONNSTR#*\?}"
+        query_params="${BENCHMARK_INGEST_TARGET_CONNSTR#*\?}"
         # Reconstruct the new connection string
-        if [ "$query_params" != "$NEW_PROJECT_CONNSTR" ]; then
+        if [ "$query_params" != "$BENCHMARK_INGEST_TARGET_CONNSTR" ]; then
           new_connstr="${base_connstr}/neondb?${query_params}"
         else
           new_connstr="${base_connstr}/neondb"
         fi
         ${PSQL} "${new_connstr}" -c "drop database ludicrous;"
         ${PSQL} "${new_connstr}" -c "CREATE DATABASE ludicrous;"
-        if [ "$query_params" != "$NEW_PROJECT_CONNSTR" ]; then
-          NEW_PROJECT_CONNSTR="${base_connstr}/ludicrous?${query_params}"
+        if [ "$query_params" != "$BENCHMARK_INGEST_TARGET_CONNSTR" ]; then
+          BENCHMARK_INGEST_TARGET_CONNSTR="${base_connstr}/ludicrous?${query_params}"
         else
-          NEW_PROJECT_CONNSTR="${base_connstr}/ludicrous"
+          BENCHMARK_INGEST_TARGET_CONNSTR="${base_connstr}/ludicrous"
         fi
-        ${PSQL} "${NEW_PROJECT_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;"
-        BACKPRESSURE_TIME_BEFORE_INGEST=$(${PSQL} "${NEW_PROJECT_CONNSTR}" -t -c "select backpressure_throttling_time()/1000000;")
-        echo "BACKPRESSURE_TIME_BEFORE_INGEST=${BACKPRESSURE_TIME_BEFORE_INGEST}" >> $GITHUB_ENV
-        echo "NEW_PROJECT_CONNSTR=${NEW_PROJECT_CONNSTR}" >> $GITHUB_ENV
-      
-        
-    - name: Create pgcopydb filter file
-      run: |
-        cat << EOF > /tmp/pgcopydb_filter.txt
-          [include-only-table]
-          public.events
-          public.emails
-          public.email_transmissions
-          public.payments
-          public.editions
-          public.edition_modules
-          public.sp_content
-          public.email_broadcasts
-          public.user_collections
-          public.devices
-          public.user_accounts
-          public.lessons
-          public.lesson_users
-          public.payment_methods
-          public.orders
-          public.course_emails
-          public.modules
-          public.users
-          public.module_users
-          public.courses
-          public.payment_gateway_keys
-          public.accounts
-          public.roles
-          public.payment_gateways
-          public.management
-          public.event_names
-        EOF
+        ${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;"
+        echo "BENCHMARK_INGEST_TARGET_CONNSTR=${BENCHMARK_INGEST_TARGET_CONNSTR}" >> $GITHUB_ENV
 
-    - name: Invoke pgcopydb
+    - name: Invoke pgcopydb  
+      uses: ./.github/actions/run-python-test-set
+      with:
+        build_type: remote
+        test_selection: performance/test_perf_ingest_using_pgcopydb.py
+        run_in_parallel: false
+        extra_params: -s -m remote_cluster --timeout 86400 -k test_ingest_performance_using_pgcopydb
+        pg_version: v16
+        save_perf_report: true
+        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
       env:
-          BENCHMARK_INGEST_SOURCE_CONNSTR: ${{ secrets.BENCHMARK_INGEST_SOURCE_CONNSTR }}
-      run: |
-        export LD_LIBRARY_PATH=${PGCOPYDB_LIB_PATH}:${PG_16_LIB_PATH}
-        export PGCOPYDB_SOURCE_PGURI="${BENCHMARK_INGEST_SOURCE_CONNSTR}"
-        export PGCOPYDB_TARGET_PGURI="${NEW_PROJECT_CONNSTR}"
-        export PGOPTIONS="-c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7"
-        ${PG_CONFIG} --bindir
-        ${PGCOPYDB} --version
-        ${PGCOPYDB} clone --skip-vacuum  --no-owner --no-acl --skip-db-properties --table-jobs 4 \
-          --index-jobs 4 --restore-jobs 4 --split-tables-larger-than 10GB --skip-extensions \
-          --use-copy-binary --filters /tmp/pgcopydb_filter.txt 2>&1 | tee /tmp/pgcopydb_${{ matrix.target_project }}.log
+        BENCHMARK_INGEST_SOURCE_CONNSTR: ${{ secrets.BENCHMARK_INGEST_SOURCE_CONNSTR }}
+        TARGET_PROJECT_TYPE: ${{ matrix.target_project }}
+        # we report PLATFORM in zenbenchmark NeonBenchmarker perf database and want to distinguish between new project and large tenant
+        PLATFORM: "${{ matrix.target_project }}-us-east-2-staging"
+        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
 
-    # create dummy pgcopydb log to test parsing
-    # - name: create dummy log for parser test
-    #   run: |
-    #     cat << EOF > /tmp/pgcopydb_${{ matrix.target_project }}.log
-    #     2024-11-04 18:00:53.433 500861 INFO   main.c:136                Running pgcopydb version 0.17.10.g8361a93 from "/usr/lib/postgresql/17/bin/pgcopydb"
-    #     2024-11-04 18:00:53.434 500861 INFO   cli_common.c:1225         [SOURCE] Copying database from "postgres://neondb_owner@ep-bitter-shape-w2c1ir0a.us-east-2.aws.neon.build/neondb?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60"
-    #     2024-11-04 18:00:53.434 500861 INFO   cli_common.c:1226         [TARGET] Copying database into "postgres://neondb_owner@ep-icy-union-w25qd5pj.us-east-2.aws.neon.build/ludicrous?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60"
-    #     2024-11-04 18:00:53.442 500861 INFO   copydb.c:105              Using work dir "/tmp/pgcopydb"
-    #     2024-11-04 18:00:53.541 500861 INFO   snapshot.c:107            Exported snapshot "00000008-00000033-1" from the source database
-    #     2024-11-04 18:00:53.556 500865 INFO   cli_clone_follow.c:543    STEP 1: fetch source database tables, indexes, and sequences
-    #     2024-11-04 18:00:54.570 500865 INFO   copydb_schema.c:716       Splitting source candidate tables larger than 10 GB
-    #     2024-11-04 18:00:54.570 500865 INFO   copydb_schema.c:829       Table public.events is 96 GB large which is larger than --split-tables-larger-than 10 GB, and does not have a unique column of type integer: splitting by CTID
-    #     2024-11-04 18:01:05.538 500865 INFO   copydb_schema.c:905       Table public.events is 96 GB large, 10 COPY processes will be used, partitioning on ctid.
-    #     2024-11-04 18:01:05.564 500865 INFO   copydb_schema.c:905       Table public.email_transmissions is 27 GB large, 4 COPY processes will be used, partitioning on id.
-    #     2024-11-04 18:01:05.584 500865 INFO   copydb_schema.c:905       Table public.lessons is 25 GB large, 4 COPY processes will be used, partitioning on id.
-    #     2024-11-04 18:01:05.605 500865 INFO   copydb_schema.c:905       Table public.lesson_users is 16 GB large, 3 COPY processes will be used, partitioning on id.
-    #     2024-11-04 18:01:05.605 500865 INFO   copydb_schema.c:761       Fetched information for 26 tables (including 4 tables split in 21 partitions total), with an estimated total of 907 million tuples and 175 GB on-disk
-    #     2024-11-04 18:01:05.687 500865 INFO   copydb_schema.c:968       Fetched information for 57 indexes (supporting 25 constraints)
-    #     2024-11-04 18:01:05.753 500865 INFO   sequences.c:78            Fetching information for 24 sequences
-    #     2024-11-04 18:01:05.903 500865 INFO   copydb_schema.c:1122      Fetched information for 4 extensions
-    #     2024-11-04 18:01:06.178 500865 INFO   copydb_schema.c:1538      Found 0 indexes (supporting 0 constraints) in the target database
-    #     2024-11-04 18:01:06.184 500865 INFO   cli_clone_follow.c:584    STEP 2: dump the source database schema (pre/post data)
-    #     2024-11-04 18:01:06.186 500865 INFO   pgcmd.c:468                /usr/lib/postgresql/16/bin/pg_dump -Fc --snapshot 00000008-00000033-1 --section=pre-data --section=post-data --file /tmp/pgcopydb/schema/schema.dump 'postgres://neondb_owner@ep-bitter-shape-w2c1ir0a.us-east-2.aws.neon.build/neondb?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60'
-    #     2024-11-04 18:01:06.952 500865 INFO   cli_clone_follow.c:592    STEP 3: restore the pre-data section to the target database
-    #     2024-11-04 18:01:07.004 500865 INFO   pgcmd.c:1001               /usr/lib/postgresql/16/bin/pg_restore --dbname 'postgres://neondb_owner@ep-icy-union-w25qd5pj.us-east-2.aws.neon.build/ludicrous?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60' --section pre-data --jobs 4 --no-owner --no-acl --use-list /tmp/pgcopydb/schema/pre-filtered.list /tmp/pgcopydb/schema/schema.dump
-    #     2024-11-04 18:01:07.438 500874 INFO   table-data.c:656          STEP 4: starting 4 table-data COPY processes
-    #     2024-11-04 18:01:07.451 500877 INFO   vacuum.c:139              STEP 8: skipping VACUUM jobs per --skip-vacuum
-    #     2024-11-04 18:01:07.457 500875 INFO   indexes.c:182             STEP 6: starting 4 CREATE INDEX processes
-    #     2024-11-04 18:01:07.457 500875 INFO   indexes.c:183             STEP 7: constraints are built by the CREATE INDEX processes
-    #     2024-11-04 18:01:07.507 500865 INFO   blobs.c:74                Skipping large objects: none found.
-    #     2024-11-04 18:01:07.509 500865 INFO   sequences.c:194           STEP 9: reset sequences values
-    #     2024-11-04 18:01:07.510 500886 INFO   sequences.c:290           Set sequences values on the target database
-    #     2024-11-04 20:49:00.587 500865 INFO   cli_clone_follow.c:608    STEP 10: restore the post-data section to the target database
-    #     2024-11-04 20:49:00.600 500865 INFO   pgcmd.c:1001               /usr/lib/postgresql/16/bin/pg_restore --dbname 'postgres://neondb_owner@ep-icy-union-w25qd5pj.us-east-2.aws.neon.build/ludicrous?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60' --section post-data --jobs 4 --no-owner --no-acl --use-list /tmp/pgcopydb/schema/post-filtered.list /tmp/pgcopydb/schema/schema.dump
-    #     2024-11-05 10:50:58.508 500865 INFO   cli_clone_follow.c:639    All step are now done, 16h49m elapsed
-    #     2024-11-05 10:50:58.508 500865 INFO   summary.c:3155            Printing summary for 26 tables and 57 indexes
-
-    #       OID | Schema |                 Name | Parts | copy duration | transmitted bytes | indexes | create index duration 
-    #     ------+--------+----------------------+-------+---------------+-------------------+---------+----------------------
-    #     24654 | public |               events |    10 |         1d11h |            878 GB |       1 |                 1h41m
-    #     24623 | public |  email_transmissions |     4 |         4h46m |             99 GB |       3 |                 2h04m
-    #     24665 | public |              lessons |     4 |         4h42m |            161 GB |       4 |                 1m11s
-    #     24661 | public |         lesson_users |     3 |         2h46m |             49 GB |       3 |                39m35s
-    #     24631 | public |               emails |     1 |        34m07s |             10 GB |       2 |                   17s
-    #     24739 | public |             payments |     1 |         5m47s |           1848 MB |       4 |                 4m40s
-    #     24681 | public |         module_users |     1 |         4m57s |           1610 MB |       3 |                 1m50s
-    #     24694 | public |               orders |     1 |         2m50s |            835 MB |       3 |                 1m05s
-    #     24597 | public |              devices |     1 |         1m45s |            498 MB |       2 |                   40s
-    #     24723 | public |      payment_methods |     1 |         1m24s |            548 MB |       2 |                   31s
-    #     24765 | public |     user_collections |     1 |         2m17s |           1005 MB |       2 |                 968ms
-    #     24774 | public |                users |     1 |           52s |            291 MB |       4 |                   27s
-    #     24760 | public |        user_accounts |     1 |           16s |            172 MB |       3 |                   16s
-    #     24606 | public |      edition_modules |     1 |         8s983 |             46 MB |       3 |                 4s749
-    #     24583 | public |        course_emails |     1 |         8s526 |             26 MB |       2 |                 996ms
-    #     24685 | public |              modules |     1 |         1s592 |             21 MB |       3 |                 1s696
-    #     24610 | public |             editions |     1 |         2s199 |           7483 kB |       2 |                 1s032
-    #     24755 | public |           sp_content |     1 |         1s555 |           4177 kB |       0 |                   0ms
-    #     24619 | public |     email_broadcasts |     1 |         744ms |           2645 kB |       2 |                 677ms
-    #     24590 | public |              courses |     1 |         387ms |           1540 kB |       2 |                 367ms
-    #     24704 | public | payment_gateway_keys |     1 |         1s972 |            164 kB |       2 |                  27ms
-    #     24576 | public |             accounts |     1 |          58ms |             24 kB |       1 |                  14ms
-    #     24647 | public |          event_names |     1 |          32ms |             397 B |       1 |                   8ms
-    #     24716 | public |     payment_gateways |     1 |         1s675 |             117 B |       1 |                  11ms
-    #     24748 | public |                roles |     1 |          71ms |             173 B |       1 |                   8ms
-    #     24676 | public |           management |     1 |          33ms |              40 B |       1 |                  19ms
-
-
-    #                                                   Step   Connection    Duration    Transfer   Concurrency
-    #     --------------------------------------------------   ----------  ----------  ----------  ------------
-    #       Catalog Queries (table ordering, filtering, etc)       source         12s                         1
-    #                                             Dump Schema       source       765ms                         1
-    #                                         Prepare Schema       target       466ms                         1
-    #           COPY, INDEX, CONSTRAINTS, VACUUM (wall clock)         both       2h47m                        12
-    #                                       COPY (cumulative)         both       7h46m     1225 GB             4
-    #                               CREATE INDEX (cumulative)       target       4h36m                         4
-    #                               CONSTRAINTS (cumulative)       target       8s493                         4
-    #                                     VACUUM (cumulative)       target         0ms                         4
-    #                                         Reset Sequences         both        60ms                         1
-    #                             Large Objects (cumulative)       (null)         0ms                         0
-    #                                         Finalize Schema         both      14h01m                         4
-    #     --------------------------------------------------   ----------  ----------  ----------  ------------
-    #                               Total Wall Clock Duration         both      16h49m                        20
-
-
-    #     EOF
-
-
-    - name: show tables sizes and retrieve current backpressure seconds
+    - name: show tables sizes after ingest
       run: |
         export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
-        ${PSQL} "${NEW_PROJECT_CONNSTR}" -c "\dt+"
-        BACKPRESSURE_TIME_AFTER_INGEST=$(${PSQL} "${NEW_PROJECT_CONNSTR}" -t -c "select backpressure_throttling_time()/1000000;")
-        echo "BACKPRESSURE_TIME_AFTER_INGEST=${BACKPRESSURE_TIME_AFTER_INGEST}" >> $GITHUB_ENV
-
-    - name: Parse pgcopydb log and report performance metrics
-      env:
-        PERF_TEST_RESULT_CONNSTR: ${{ secrets.PERF_TEST_RESULT_CONNSTR }}
-      run: |
-        export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
-
-        # Define the log file path
-        LOG_FILE="/tmp/pgcopydb_${{ matrix.target_project }}.log"
-        
-        # Get the current git commit hash
-        git config --global --add safe.directory /__w/neon/neon
-        COMMIT_HASH=$(git rev-parse --short HEAD)
-        
-        # Define the platform and test suite
-        PLATFORM="pg16-${{ matrix.target_project }}-us-east-2-staging"
-        SUIT="pgcopydb_ingest_bench"
-        
-        # Function to convert time (e.g., "2h47m", "4h36m", "118ms", "8s493") to seconds
-        convert_to_seconds() {
-          local duration=$1
-          local total_seconds=0
-    
-          # Check for hours (h)
-          if [[ "$duration" =~ ([0-9]+)h ]]; then
-            total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0} * 3600))
-          fi
-    
-          # Check for seconds (s)
-          if [[ "$duration" =~ ([0-9]+)s ]]; then
-            total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0}))
-          fi
-    
-          # Check for milliseconds (ms) (if applicable)
-          if [[ "$duration" =~ ([0-9]+)ms ]]; then
-            total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0} / 1000))
-            duration=${duration/${BASH_REMATCH[0]}/} # need to remove it to avoid double counting with m 
-          fi
-
-          # Check for minutes (m) - must be checked after ms because m is contained in ms
-          if [[ "$duration" =~ ([0-9]+)m ]]; then
-            total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0} * 60))
-          fi
-    
-          echo $total_seconds
-        }
-
-        # Calculate the backpressure difference in seconds
-        BACKPRESSURE_TIME_DIFF=$(awk "BEGIN {print $BACKPRESSURE_TIME_AFTER_INGEST - $BACKPRESSURE_TIME_BEFORE_INGEST}")
-
-        # Insert the backpressure time difference into the performance database
-        if [ -n "$BACKPRESSURE_TIME_DIFF" ]; then
-          PSQL_CMD="${PSQL} \"${PERF_TEST_RESULT_CONNSTR}\" -c \"
-          INSERT INTO public.perf_test_results (suit, revision, platform, metric_name, metric_value, metric_unit, metric_report_type, recorded_at_timestamp)
-          VALUES ('${SUIT}', '${COMMIT_HASH}', '${PLATFORM}', 'backpressure_time', ${BACKPRESSURE_TIME_DIFF}, 'seconds', 'lower_is_better', now());
-          \""
-          echo "Inserting backpressure time difference: ${BACKPRESSURE_TIME_DIFF} seconds"
-          eval $PSQL_CMD
-        fi
-
-        # Extract and process log lines
-        while IFS= read -r line; do
-          METRIC_NAME=""
-          # Match each desired line and extract the relevant information
-          if [[ "$line" =~ COPY,\ INDEX,\ CONSTRAINTS,\ VACUUM.* ]]; then
-            METRIC_NAME="COPY, INDEX, CONSTRAINTS, VACUUM (wall clock)"
-          elif [[ "$line" =~ COPY\ \(cumulative\).* ]]; then
-            METRIC_NAME="COPY (cumulative)"
-          elif [[ "$line" =~ CREATE\ INDEX\ \(cumulative\).* ]]; then
-            METRIC_NAME="CREATE INDEX (cumulative)"
-          elif [[ "$line" =~ CONSTRAINTS\ \(cumulative\).* ]]; then
-            METRIC_NAME="CONSTRAINTS (cumulative)"
-          elif [[ "$line" =~ Finalize\ Schema.* ]]; then
-            METRIC_NAME="Finalize Schema"
-          elif [[ "$line" =~ Total\ Wall\ Clock\ Duration.* ]]; then
-            METRIC_NAME="Total Wall Clock Duration"
-          fi
-          
-          # If a metric was matched, insert it into the performance database
-          if [ -n "$METRIC_NAME" ]; then
-            DURATION=$(echo "$line" | grep -oP '\d+h\d+m|\d+s|\d+ms|\d{1,2}h\d{1,2}m|\d+\.\d+s' | head -n 1)
-            METRIC_VALUE=$(convert_to_seconds "$DURATION")
-            PSQL_CMD="${PSQL} \"${PERF_TEST_RESULT_CONNSTR}\" -c \"
-            INSERT INTO public.perf_test_results (suit, revision, platform, metric_name, metric_value, metric_unit, metric_report_type, recorded_at_timestamp)
-            VALUES ('${SUIT}', '${COMMIT_HASH}', '${PLATFORM}', '${METRIC_NAME}', ${METRIC_VALUE}, 'seconds', 'lower_is_better', now());
-            \""
-            echo "Inserting ${METRIC_NAME} with value ${METRIC_VALUE} seconds"
-            eval $PSQL_CMD
-          fi
-        done < "$LOG_FILE"
+        ${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "\dt+"
       
     - name: Delete Neon Project
       if: ${{ always() && matrix.target_project == 'new_empty_project' }}
diff --git a/scripts/ingest_perf_test_result.py b/scripts/ingest_perf_test_result.py
index 40071c01b0..804f8a3cde 100644
--- a/scripts/ingest_perf_test_result.py
+++ b/scripts/ingest_perf_test_result.py
@@ -25,7 +25,8 @@ CREATE TABLE IF NOT EXISTS perf_test_results (
     metric_value NUMERIC,
     metric_unit VARCHAR(10),
     metric_report_type TEXT,
-    recorded_at_timestamp TIMESTAMP WITH TIME ZONE DEFAULT NOW()
+    recorded_at_timestamp TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+    labels JSONB with default '{}'
 )
 """
 
@@ -91,6 +92,7 @@ def ingest_perf_test_result(cursor, data_file: Path, recorded_at_timestamp: int)
                 "metric_unit": metric["unit"],
                 "metric_report_type": metric["report"],
                 "recorded_at_timestamp": datetime.utcfromtimestamp(recorded_at_timestamp),
+                "labels": json.dumps(metric.get("labels")),
             }
             args_list.append(values)
 
@@ -105,7 +107,8 @@ def ingest_perf_test_result(cursor, data_file: Path, recorded_at_timestamp: int)
             metric_value,
             metric_unit,
             metric_report_type,
-            recorded_at_timestamp
+            recorded_at_timestamp,
+            labels
         ) VALUES %s
         """,
         args_list,
@@ -117,7 +120,8 @@ def ingest_perf_test_result(cursor, data_file: Path, recorded_at_timestamp: int)
             %(metric_value)s,
             %(metric_unit)s,
             %(metric_report_type)s,
-            %(recorded_at_timestamp)s
+            %(recorded_at_timestamp)s,
+            %(labels)s
         )""",
     )
     return len(args_list)
diff --git a/test_runner/fixtures/benchmark_fixture.py b/test_runner/fixtures/benchmark_fixture.py
index d3419bd8b1..8e68775471 100644
--- a/test_runner/fixtures/benchmark_fixture.py
+++ b/test_runner/fixtures/benchmark_fixture.py
@@ -256,12 +256,17 @@ class NeonBenchmarker:
         metric_value: float,
         unit: str,
         report: MetricReport,
+        labels: Optional[
+            dict[str, str]
+        ] = None,  # use this to associate additional key/value pairs in json format for associated Neon object IDs like project ID with the metric
     ):
         """
         Record a benchmark result.
         """
         # just to namespace the value
         name = f"{self.PROPERTY_PREFIX}_{metric_name}"
+        if labels is None:
+            labels = {}
         self.property_recorder(
             name,
             {
@@ -269,6 +274,7 @@ class NeonBenchmarker:
                 "value": metric_value,
                 "unit": unit,
                 "report": report,
+                "labels": labels,
             },
         )
 
diff --git a/test_runner/performance/test_perf_ingest_using_pgcopydb.py b/test_runner/performance/test_perf_ingest_using_pgcopydb.py
new file mode 100644
index 0000000000..2f4574ba88
--- /dev/null
+++ b/test_runner/performance/test_perf_ingest_using_pgcopydb.py
@@ -0,0 +1,267 @@
+import os
+import re
+import subprocess
+import sys
+import textwrap
+from pathlib import Path
+from typing import cast
+from urllib.parse import urlparse
+
+import pytest
+from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
+from fixtures.utils import humantime_to_ms
+
+
+def setup_environment():
+    """Set up necessary environment variables for pgcopydb execution.
+
+    Expects the following variables to be set in the environment:
+    - PG_CONFIG: e.g. /tmp/neon/pg_install/v16/bin/pg_config
+    - PSQL: e.g. /tmp/neon/pg_install/v16/bin/psql
+    - PG_16_LIB_PATH: e.g. /tmp/neon/pg_install/v16/lib
+    - PGCOPYDB: e.g. /pgcopydb/bin/pgcopydb
+    - PGCOPYDB_LIB_PATH: e.g. /pgcopydb/lib
+    - BENCHMARK_INGEST_SOURCE_CONNSTR
+    - BENCHMARK_INGEST_TARGET_CONNSTR
+    - PERF_TEST_RESULT_CONNSTR
+    - TARGET_PROJECT_TYPE
+
+    """
+    # Ensure required environment variables are set
+    required_env_vars = [
+        "PGCOPYDB",
+        "PGCOPYDB_LIB_PATH",
+        "PG_CONFIG",
+        "PSQL",
+        "PG_16_LIB_PATH",
+        "BENCHMARK_INGEST_SOURCE_CONNSTR",
+        "BENCHMARK_INGEST_TARGET_CONNSTR",
+        "PERF_TEST_RESULT_CONNSTR",
+        "TARGET_PROJECT_TYPE",
+    ]
+    for var in required_env_vars:
+        if not os.getenv(var):
+            raise OSError(f"Required environment variable '{var}' is not set.")
+
+
+def build_pgcopydb_command(pgcopydb_filter_file: Path, test_output_dir: Path):
+    """Builds the pgcopydb command to execute using existing environment variables."""
+    pgcopydb_executable = os.getenv("PGCOPYDB")
+    if not pgcopydb_executable:
+        raise OSError("PGCOPYDB environment variable is not set.")
+
+    return [
+        pgcopydb_executable,
+        "clone",
+        "--dir",
+        str(test_output_dir),
+        "--skip-vacuum",
+        "--no-owner",
+        "--no-acl",
+        "--skip-db-properties",
+        "--table-jobs",
+        "4",
+        "--index-jobs",
+        "4",
+        "--restore-jobs",
+        "4",
+        "--split-tables-larger-than",
+        "10GB",
+        "--skip-extensions",
+        "--use-copy-binary",
+        "--filters",
+        str(pgcopydb_filter_file),
+    ]
+
+
+@pytest.fixture()  # must be function scoped because test_output_dir is function scoped
+def pgcopydb_filter_file(test_output_dir: Path) -> Path:
+    """Creates the pgcopydb_filter.txt file required by pgcopydb."""
+    filter_content = textwrap.dedent("""\
+        [include-only-table]
+        public.events
+        public.emails
+        public.email_transmissions
+        public.payments
+        public.editions
+        public.edition_modules
+        public.sp_content
+        public.email_broadcasts
+        public.user_collections
+        public.devices
+        public.user_accounts
+        public.lessons
+        public.lesson_users
+        public.payment_methods
+        public.orders
+        public.course_emails
+        public.modules
+        public.users
+        public.module_users
+        public.courses
+        public.payment_gateway_keys
+        public.accounts
+        public.roles
+        public.payment_gateways
+        public.management
+        public.event_names
+        """)
+    filter_path = test_output_dir / "pgcopydb_filter.txt"
+    filter_path.write_text(filter_content)
+    return filter_path
+
+
+def get_backpressure_time(connstr):
+    """Executes a query to get the backpressure throttling time in seconds."""
+    query = "select backpressure_throttling_time()/1000000;"
+    psql_path = os.getenv("PSQL")
+    if psql_path is None:
+        raise OSError("The PSQL environment variable is not set.")
+    result = subprocess.run(
+        [psql_path, connstr, "-t", "-c", query], capture_output=True, text=True, check=True
+    )
+    return float(result.stdout.strip())
+
+
+def run_command_and_log_output(command, log_file_path: Path):
+    """
+    Runs a command and logs output to both a file and GitHub Actions console.
+
+    Args:
+        command (list): The command to execute.
+        log_file_path (Path): Path object for the log file where output is written.
+    """
+    # Define a list of necessary environment variables for pgcopydb
+    custom_env_vars = {
+        "LD_LIBRARY_PATH": f"{os.getenv('PGCOPYDB_LIB_PATH')}:{os.getenv('PG_16_LIB_PATH')}",
+        "PGCOPYDB_SOURCE_PGURI": cast(str, os.getenv("BENCHMARK_INGEST_SOURCE_CONNSTR")),
+        "PGCOPYDB_TARGET_PGURI": cast(str, os.getenv("BENCHMARK_INGEST_TARGET_CONNSTR")),
+        "PGOPTIONS": "-c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7",
+    }
+    # Combine the current environment with custom variables
+    env = os.environ.copy()
+    env.update(custom_env_vars)
+
+    with log_file_path.open("w") as log_file:
+        process = subprocess.Popen(
+            command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, env=env
+        )
+
+        assert process.stdout is not None, "process.stdout should not be None"
+
+        # Stream output to both log file and console
+        for line in process.stdout:
+            print(line, end="")  # Stream to GitHub Actions log
+            sys.stdout.flush()
+            log_file.write(line)  # Write to log file
+
+        process.wait()  # Wait for the process to finish
+        if process.returncode != 0:
+            raise subprocess.CalledProcessError(process.returncode, command)
+
+
+def parse_log_and_report_metrics(
+    zenbenchmark: NeonBenchmarker, log_file_path: Path, backpressure_time_diff: float
+):
+    """Parses the pgcopydb log file for performance metrics and reports them to the database."""
+    metrics = {"backpressure_time": backpressure_time_diff}
+
+    # Define regex patterns to capture metrics
+    metric_patterns = {
+        "COPY_INDEX_CONSTRAINTS_VACUUM": re.compile(
+            r"COPY, INDEX, CONSTRAINTS, VACUUM \(wall clock\).*"
+        ),
+        "COPY_CUMULATIVE": re.compile(r"COPY \(cumulative\).*"),
+        "CREATE_INDEX_CUMULATIVE": re.compile(r"CREATE INDEX \(cumulative\).*"),
+        "CONSTRAINTS_CUMULATIVE": re.compile(r"CONSTRAINTS \(cumulative\).*"),
+        "FINALIZE_SCHEMA": re.compile(r"Finalize Schema.*"),
+        "TOTAL_DURATION": re.compile(r"Total Wall Clock Duration.*"),
+    }
+
+    # Parse log file
+    with log_file_path.open("r") as log_file:
+        for line in log_file:
+            for metric_name, pattern in metric_patterns.items():
+                if pattern.search(line):
+                    # Extract duration and convert it to seconds
+                    duration_match = re.search(r"\d+h\d+m|\d+s|\d+ms|\d+\.\d+s", line)
+                    if duration_match:
+                        duration_str = duration_match.group(0)
+                        parts = re.findall(r"\d+[a-zA-Z]+", duration_str)
+                        rust_like_humantime = " ".join(parts)
+                        duration_seconds = humantime_to_ms(rust_like_humantime) / 1000.0
+                        metrics[metric_name] = duration_seconds
+
+    endpoint_id = {"endpoint_id": get_endpoint_id()}
+    for metric_name, duration_seconds in metrics.items():
+        zenbenchmark.record(
+            metric_name, duration_seconds, "s", MetricReport.LOWER_IS_BETTER, endpoint_id
+        )
+
+
+def get_endpoint_id():
+    """Extracts and returns the first segment of the hostname from the PostgreSQL URI stored in BENCHMARK_INGEST_TARGET_CONNSTR."""
+    connstr = os.getenv("BENCHMARK_INGEST_TARGET_CONNSTR")
+    if connstr is None:
+        raise OSError("BENCHMARK_INGEST_TARGET_CONNSTR environment variable is not set.")
+
+    # Parse the URI
+    parsed_url = urlparse(connstr)
+
+    # Extract the hostname and split to get the first segment
+    hostname = parsed_url.hostname
+    if hostname is None:
+        raise ValueError("Unable to parse hostname from BENCHMARK_INGEST_TARGET_CONNSTR")
+
+    # Split the hostname by dots and take the first segment
+    endpoint_id = hostname.split(".")[0]
+
+    return endpoint_id
+
+
+@pytest.fixture()  # must be function scoped because test_output_dir is function scoped
+def log_file_path(test_output_dir):
+    """Fixture to provide a temporary log file path."""
+    if not os.getenv("TARGET_PROJECT_TYPE"):
+        raise OSError("Required environment variable 'TARGET_PROJECT_TYPE' is not set.")
+    return (test_output_dir / os.getenv("TARGET_PROJECT_TYPE")).with_suffix(".log")
+
+
+@pytest.mark.remote_cluster
+def test_ingest_performance_using_pgcopydb(
+    zenbenchmark: NeonBenchmarker,
+    log_file_path: Path,
+    pgcopydb_filter_file: Path,
+    test_output_dir: Path,
+):
+    """
+    Simulate project migration from another PostgreSQL provider to Neon.
+
+    Measure performance for Neon ingest steps
+    - COPY
+    - CREATE INDEX
+    - CREATE CONSTRAINT
+    - VACUUM ANALYZE
+    - create foreign keys
+
+    Use pgcopydb to copy data from the source database to the destination database.
+    """
+    # Set up environment and create filter file
+    setup_environment()
+
+    # Get backpressure time before ingest
+    backpressure_time_before = get_backpressure_time(os.getenv("BENCHMARK_INGEST_TARGET_CONNSTR"))
+
+    # Build and run the pgcopydb command
+    command = build_pgcopydb_command(pgcopydb_filter_file, test_output_dir)
+    try:
+        run_command_and_log_output(command, log_file_path)
+    except subprocess.CalledProcessError as e:
+        pytest.fail(f"pgcopydb command failed with error: {e}")
+
+    # Get backpressure time after ingest and calculate the difference
+    backpressure_time_after = get_backpressure_time(os.getenv("BENCHMARK_INGEST_TARGET_CONNSTR"))
+    backpressure_time_diff = backpressure_time_after - backpressure_time_before
+
+    # Parse log file and report metrics, including backpressure time difference
+    parse_log_and_report_metrics(zenbenchmark, log_file_path, backpressure_time_diff)

From c9acd214ae37272f3a5eac97aef557b73a692039 Mon Sep 17 00:00:00 2001
From: Konstantin Knizhnik <knizhnik@garret.ru>
Date: Tue, 19 Nov 2024 11:56:40 +0200
Subject: [PATCH 24/43] Do not create DSM segment for wal_redo_postgres (#9793)

## Problem

See  https://github.com/neondatabase/neon/issues/9738

## Summary of changes

Do not create DSM segment for wal_redo Postgres

---------

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
---
 pgxn/neon_walredo/walredoproc.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/pgxn/neon_walredo/walredoproc.c b/pgxn/neon_walredo/walredoproc.c
index 37abb3fa03..619b7255ae 100644
--- a/pgxn/neon_walredo/walredoproc.c
+++ b/pgxn/neon_walredo/walredoproc.c
@@ -453,7 +453,6 @@ WalRedoMain(int argc, char *argv[])
 static void
 CreateFakeSharedMemoryAndSemaphores(void)
 {
-	PGShmemHeader *shim = NULL;
 	PGShmemHeader *hdr;
 	Size		size;
 	int			numSemas;
@@ -486,7 +485,6 @@ CreateFakeSharedMemoryAndSemaphores(void)
 		hdr->totalsize = size;
 		hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
 
-		shim = hdr;
 		UsedShmemSegAddr = hdr;
 		UsedShmemSegID = (unsigned long) 42; /* not relevant for non-shared memory */
 	}
@@ -523,8 +521,6 @@ CreateFakeSharedMemoryAndSemaphores(void)
 	 */
 	InitShmemIndex();
 
-	dsm_shmem_init();
-
 	/*
 	 * Set up xlog, clog, and buffers
 	 */
@@ -599,10 +595,6 @@ CreateFakeSharedMemoryAndSemaphores(void)
 		ShmemBackendArrayAllocation();
 #endif
 
-	/* Initialize dynamic shared memory facilities. */
-	if (!IsUnderPostmaster)
-		dsm_postmaster_startup(shim);
-
 	/*
 	 * Now give loadable modules a chance to set up their shmem allocations
 	 */

From 37b97b3a682ea8496a98e62d3f2b89906a3eaa64 Mon Sep 17 00:00:00 2001
From: Conrad Ludgate <conrad@neon.tech>
Date: Tue, 19 Nov 2024 13:58:11 +0000
Subject: [PATCH 25/43] chore(local_proxy): reduce some startup logging (#9798)

Currently, local_proxy will write an error log if it doesn't find the
config file. This is expected for startup, so it's just noise. It is an
error if we do receive an explicit SIGHUP though.

I've also demoted the build info logs to be debug level. We don't need
them in the compute image since we have other ways to determine what
code is running.

Lastly, I've demoted SIGHUP signal handling from warn to info, since
it's not really a warning event.

See https://github.com/neondatabase/cloud/issues/10880 for more details
---
 proxy/src/bin/local_proxy.rs | 37 ++++++++++++++++++++++++++++++------
 proxy/src/jemalloc.rs        |  2 +-
 proxy/src/signals.rs         |  4 ++--
 3 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/proxy/src/bin/local_proxy.rs b/proxy/src/bin/local_proxy.rs
index fbdb1dec15..41b0e11e85 100644
--- a/proxy/src/bin/local_proxy.rs
+++ b/proxy/src/bin/local_proxy.rs
@@ -32,11 +32,12 @@ project_git_version!(GIT_VERSION);
 project_build_tag!(BUILD_TAG);
 
 use clap::Parser;
+use thiserror::Error;
 use tokio::net::TcpListener;
 use tokio::sync::Notify;
 use tokio::task::JoinSet;
 use tokio_util::sync::CancellationToken;
-use tracing::{error, info, warn};
+use tracing::{debug, error, info, warn};
 use utils::sentry_init::init_sentry;
 use utils::{pid_file, project_build_tag, project_git_version};
 
@@ -124,8 +125,8 @@ async fn main() -> anyhow::Result<()> {
 
     Metrics::install(Arc::new(ThreadPoolMetrics::new(0)));
 
-    info!("Version: {GIT_VERSION}");
-    info!("Build_tag: {BUILD_TAG}");
+    debug!("Version: {GIT_VERSION}");
+    debug!("Build_tag: {BUILD_TAG}");
     let neon_metrics = ::metrics::NeonMetrics::new(::metrics::BuildInfo {
         revision: GIT_VERSION,
         build_tag: BUILD_TAG,
@@ -305,26 +306,46 @@ fn build_auth_backend(
     Ok(Box::leak(Box::new(auth_backend)))
 }
 
+#[derive(Error, Debug)]
+enum RefreshConfigError {
+    #[error(transparent)]
+    Read(#[from] std::io::Error),
+    #[error(transparent)]
+    Parse(#[from] serde_json::Error),
+    #[error(transparent)]
+    Validate(anyhow::Error),
+}
+
 async fn refresh_config_loop(path: Utf8PathBuf, rx: Arc<Notify>) {
+    let mut init = true;
     loop {
         rx.notified().await;
 
         match refresh_config_inner(&path).await {
             Ok(()) => {}
+            // don't log for file not found errors if this is the first time we are checking
+            // for computes that don't use local_proxy, this is not an error.
+            Err(RefreshConfigError::Read(e))
+                if init && e.kind() == std::io::ErrorKind::NotFound =>
+            {
+                debug!(error=?e, ?path, "could not read config file");
+            }
             Err(e) => {
                 error!(error=?e, ?path, "could not read config file");
             }
         }
+
+        init = false;
     }
 }
 
-async fn refresh_config_inner(path: &Utf8Path) -> anyhow::Result<()> {
+async fn refresh_config_inner(path: &Utf8Path) -> Result<(), RefreshConfigError> {
     let bytes = tokio::fs::read(&path).await?;
     let data: LocalProxySpec = serde_json::from_slice(&bytes)?;
 
     let mut jwks_set = vec![];
 
-    for jwks in data.jwks.into_iter().flatten() {
+    fn parse_jwks_settings(jwks: compute_api::spec::JwksSettings) -> anyhow::Result<JwksSettings> {
         let mut jwks_url = url::Url::from_str(&jwks.jwks_url).context("parsing JWKS url")?;
 
         ensure!(
@@ -367,7 +388,7 @@ async fn refresh_config_inner(path: &Utf8Path) -> anyhow::Result<()> {
             }
         }
 
-        jwks_set.push(JwksSettings {
+        Ok(JwksSettings {
             id: jwks.id,
             jwks_url,
             provider_name: jwks.provider_name,
@@ -381,6 +402,10 @@ async fn refresh_config_inner(path: &Utf8Path) -> anyhow::Result<()> {
         })
     }
 
+    for jwks in data.jwks.into_iter().flatten() {
+        jwks_set.push(parse_jwks_settings(jwks).map_err(RefreshConfigError::Validate)?);
+    }
+
     info!("successfully loaded new config");
     JWKS_ROLE_MAP.store(Some(Arc::new(EndpointJwksResponse { jwks: jwks_set })));
 
diff --git a/proxy/src/jemalloc.rs b/proxy/src/jemalloc.rs
index 0fae78b60c..9888458ee2 100644
--- a/proxy/src/jemalloc.rs
+++ b/proxy/src/jemalloc.rs
@@ -38,7 +38,7 @@ where
 
 impl MetricRecorder {
     pub fn new() -> Result<Self, anyhow::Error> {
-        tracing::info!(
+        tracing::debug!(
             config = config::malloc_conf::read()?,
             version = version::read()?,
             "starting jemalloc recorder"
diff --git a/proxy/src/signals.rs b/proxy/src/signals.rs
index 514a83d5eb..0b675683c0 100644
--- a/proxy/src/signals.rs
+++ b/proxy/src/signals.rs
@@ -2,7 +2,7 @@ use std::convert::Infallible;
 
 use anyhow::bail;
 use tokio_util::sync::CancellationToken;
-use tracing::warn;
+use tracing::{info, warn};
 
 /// Handle unix signals appropriately.
 pub async fn handle<F>(
@@ -22,7 +22,7 @@ where
         tokio::select! {
             // Hangup is commonly used for config reload.
             _ = hangup.recv() => {
-                warn!("received SIGHUP");
+                info!("received SIGHUP");
                 refresh_config();
             }
             // Shut down the whole application.

From 191f745c814e475b6f55e243ac5606cf2089dd64 Mon Sep 17 00:00:00 2001
From: Conrad Ludgate <conrad@neon.tech>
Date: Tue, 19 Nov 2024 13:58:26 +0000
Subject: [PATCH 26/43] fix(proxy/auth_broker): ignore -pooler suffix (#9800)

Fixes https://github.com/neondatabase/cloud/issues/20400

We cannot mix local_proxy and pgbouncer, so we are filtering out the
`-pooler` suffix prior to calling wake_compute.
---
 proxy/src/serverless/backend.rs | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/proxy/src/serverless/backend.rs b/proxy/src/serverless/backend.rs
index 7fc5bd236d..5e9fd151ae 100644
--- a/proxy/src/serverless/backend.rs
+++ b/proxy/src/serverless/backend.rs
@@ -33,7 +33,7 @@ use crate::intern::EndpointIdInt;
 use crate::proxy::connect_compute::ConnectMechanism;
 use crate::proxy::retry::{CouldRetry, ShouldRetryWakeCompute};
 use crate::rate_limiter::EndpointRateLimiter;
-use crate::types::{EndpointId, Host};
+use crate::types::{EndpointId, Host, LOCAL_PROXY_SUFFIX};
 
 pub(crate) struct PoolingBackend {
     pub(crate) http_conn_pool: Arc<super::http_conn_pool::GlobalConnPool<Send>>,
@@ -215,7 +215,10 @@ impl PoolingBackend {
         let backend = self.auth_backend.as_ref().map(|()| ComputeCredentials {
             info: ComputeUserInfo {
                 user: conn_info.user_info.user.clone(),
-                endpoint: EndpointId::from(format!("{}-local-proxy", conn_info.user_info.endpoint)),
+                endpoint: EndpointId::from(format!(
+                    "{}{LOCAL_PROXY_SUFFIX}",
+                    conn_info.user_info.endpoint.normalize()
+                )),
                 options: conn_info.user_info.options.clone(),
             },
             keys: crate::auth::backend::ComputeCredentialKeys::None,

From ada84400b7fd7f0e5e1c0f5f9758c9261b0674e9 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki@neon.tech>
Date: Tue, 19 Nov 2024 17:01:05 +0200
Subject: [PATCH 27/43] PostgreSQL minor version updates (17.2, 16.6, 15.10,
 14.15) (#9795)

The community decided to make a new off-schedule release due to ABI
breakage in last week's release. We're not affected by the ABI
breakage because we rebuild all extensions in our docker images, but
let's stay up-to-date. There were a few other fixes in the release
too.
---
 vendor/postgres-v14   |  2 +-
 vendor/postgres-v15   |  2 +-
 vendor/postgres-v16   |  2 +-
 vendor/postgres-v17   |  2 +-
 vendor/revisions.json | 16 ++++++++--------
 5 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/vendor/postgres-v14 b/vendor/postgres-v14
index c5e0d642ef..e54af35045 160000
--- a/vendor/postgres-v14
+++ b/vendor/postgres-v14
@@ -1 +1 @@
-Subproject commit c5e0d642efb02e4bfedc283b0a7707fe6c79cc89
+Subproject commit e54af3504513b1f44c0e0f68791a0d6d4210e948
diff --git a/vendor/postgres-v15 b/vendor/postgres-v15
index 1feff6b60f..29bf1f04a5 160000
--- a/vendor/postgres-v15
+++ b/vendor/postgres-v15
@@ -1 +1 @@
-Subproject commit 1feff6b60f07cb71b665d0f5ead71a4320a71743
+Subproject commit 29bf1f04a5628618b4c7972fed6f87065e3750ce
diff --git a/vendor/postgres-v16 b/vendor/postgres-v16
index b0b693ea29..b7e9ac3eb9 160000
--- a/vendor/postgres-v16
+++ b/vendor/postgres-v16
@@ -1 +1 @@
-Subproject commit b0b693ea298454e95e6b154780d1fd586a244dfd
+Subproject commit b7e9ac3eb9c5f43c443ebc76ddf06d5038c9bb34
diff --git a/vendor/postgres-v17 b/vendor/postgres-v17
index aa2e29f2b6..a05dc1378d 160000
--- a/vendor/postgres-v17
+++ b/vendor/postgres-v17
@@ -1 +1 @@
-Subproject commit aa2e29f2b6952140dfe51876bbd11054acae776f
+Subproject commit a05dc1378dd822276dc99cb5e888f905d3527597
diff --git a/vendor/revisions.json b/vendor/revisions.json
index a1f2bc5dd1..7243ba8716 100644
--- a/vendor/revisions.json
+++ b/vendor/revisions.json
@@ -1,18 +1,18 @@
 {
   "v17": [
-    "17.1",
-    "aa2e29f2b6952140dfe51876bbd11054acae776f"
+    "17.2",
+    "a05dc1378dd822276dc99cb5e888f905d3527597"
   ],
   "v16": [
-    "16.5",
-    "b0b693ea298454e95e6b154780d1fd586a244dfd"
+    "16.6",
+    "b7e9ac3eb9c5f43c443ebc76ddf06d5038c9bb34"
   ],
   "v15": [
-    "15.9",
-    "1feff6b60f07cb71b665d0f5ead71a4320a71743"
+    "15.10",
+    "29bf1f04a5628618b4c7972fed6f87065e3750ce"
   ],
   "v14": [
-    "14.14",
-    "c5e0d642efb02e4bfedc283b0a7707fe6c79cc89"
+    "14.15",
+    "e54af3504513b1f44c0e0f68791a0d6d4210e948"
   ]
 }

From a8ac895b83cd7339398d153b8ce73db959c21686 Mon Sep 17 00:00:00 2001
From: Peter Bendel <peterbendel@neon.tech>
Date: Tue, 19 Nov 2024 18:22:51 +0100
Subject: [PATCH 28/43] re-acquire S3 OIDC token after long running tests for
 report upload to S3 (#9799)

## Problem

If a benchmark or test-case runs longer than the AWS OIDC token lifetime
successive upload of test reports to S3 fail - example:


https://github.com/neondatabase/neon/actions/runs/11905529176/job/33176168174#step:9:243

## Summary of changes

In actions that require access to S3 and which are invoked after a long
running python testcase we re-acquire the OIDC token explicitly.
Note that we need to pass down the aws_oicd_role_arn from the workflow
to the action because actions have no access to GitHub vars for security
reasons.

Sample run
https://github.com/neondatabase/neon/actions/runs/11912328276/job/33195676867
---
 .../actions/allure-report-generate/action.yml | 12 ++++++++++
 .../actions/allure-report-store/action.yml    | 12 ++++++++++
 .github/workflows/benchmarking.yml            | 24 +++++++++++++++++++
 3 files changed, 48 insertions(+)

diff --git a/.github/actions/allure-report-generate/action.yml b/.github/actions/allure-report-generate/action.yml
index 16b6e71498..d1d09223db 100644
--- a/.github/actions/allure-report-generate/action.yml
+++ b/.github/actions/allure-report-generate/action.yml
@@ -7,6 +7,10 @@ inputs:
     type: boolean
     required: false
     default: false
+  aws_oicd_role_arn:
+    description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role'
+    required: false
+    default: ''
 
 outputs:
   base-url:
@@ -79,6 +83,14 @@ runs:
         ALLURE_VERSION: 2.27.0
         ALLURE_ZIP_SHA256: b071858fb2fa542c65d8f152c5c40d26267b2dfb74df1f1608a589ecca38e777
 
+    - name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test
+      if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }}
+      uses: aws-actions/configure-aws-credentials@v4
+      with:
+        aws-region: eu-central-1
+        role-to-assume: ${{ inputs.aws_oicd_role_arn }}
+        role-duration-seconds: 3600 # 1 hour should be more than enough to upload report
+
     # Potentially we could have several running build for the same key (for example, for the main branch), so we use improvised lock for this
     - name: Acquire lock
       shell: bash -euxo pipefail {0}
diff --git a/.github/actions/allure-report-store/action.yml b/.github/actions/allure-report-store/action.yml
index df4a6712ac..9c376f420a 100644
--- a/.github/actions/allure-report-store/action.yml
+++ b/.github/actions/allure-report-store/action.yml
@@ -8,6 +8,10 @@ inputs:
   unique-key:
     description: 'string to distinguish different results in the same run'
     required: true
+  aws_oicd_role_arn:
+    description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role'
+    required: false
+    default: ''
 
 runs:
   using: "composite"
@@ -31,6 +35,14 @@ runs:
       env:
         REPORT_DIR: ${{ inputs.report-dir }}
 
+    - name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test
+      if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }}
+      uses: aws-actions/configure-aws-credentials@v4
+      with:
+        aws-region: eu-central-1
+        role-to-assume: ${{ inputs.aws_oicd_role_arn }}
+        role-duration-seconds: 3600 # 1 hour should be more than enough to upload report
+
     - name: Upload test results
       shell: bash -euxo pipefail {0}
       run: |
diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml
index 0e3c31ec57..0289f552f9 100644
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -122,6 +122,7 @@ jobs:
         run_in_parallel: false
         save_perf_report: ${{ env.SAVE_PERF_REPORT }}
         pg_version: ${{ env.DEFAULT_PG_VERSION }}
+        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
         # Set --sparse-ordering option of pytest-order plugin
         # to ensure tests are running in order of appears in the file.
         # It's important for test_perf_pgbench.py::test_pgbench_remote_* tests
@@ -150,6 +151,8 @@ jobs:
       id: create-allure-report
       if: ${{ !cancelled() }}
       uses: ./.github/actions/allure-report-generate
+      with:
+        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
 
     - name: Post to a Slack channel
       if: ${{ github.event.schedule && failure() }}
@@ -211,6 +214,7 @@ jobs:
         save_perf_report: ${{ env.SAVE_PERF_REPORT }}
         extra_params: -m remote_cluster --timeout 5400
         pg_version: ${{ env.DEFAULT_PG_VERSION }}
+        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
       env:
         VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
         PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -227,6 +231,7 @@ jobs:
         save_perf_report: ${{ env.SAVE_PERF_REPORT }}
         extra_params: -m remote_cluster --timeout 5400
         pg_version: ${{ env.DEFAULT_PG_VERSION }}
+        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
       env:
         VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
         PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -238,6 +243,7 @@ jobs:
       uses: ./.github/actions/allure-report-generate
       with:
         store-test-results-into-db: true
+        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
       env:
         REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
 
@@ -446,6 +452,7 @@ jobs:
         save_perf_report: ${{ env.SAVE_PERF_REPORT }}
         extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init
         pg_version: ${{ env.DEFAULT_PG_VERSION }}
+        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
       env:
         BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
         VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -460,6 +467,7 @@ jobs:
         save_perf_report: ${{ env.SAVE_PERF_REPORT }}
         extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update
         pg_version: ${{ env.DEFAULT_PG_VERSION }}
+        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
       env:
         BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
         VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -474,6 +482,7 @@ jobs:
         save_perf_report: ${{ env.SAVE_PERF_REPORT }}
         extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only
         pg_version: ${{ env.DEFAULT_PG_VERSION }}
+        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
       env:
         BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
         VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -490,6 +499,8 @@ jobs:
       id: create-allure-report
       if: ${{ !cancelled() }}
       uses: ./.github/actions/allure-report-generate
+      with:
+        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
 
     - name: Post to a Slack channel
       if: ${{ github.event.schedule && failure() }}
@@ -600,6 +611,7 @@ jobs:
         save_perf_report: ${{ env.SAVE_PERF_REPORT }}
         extra_params: -m remote_cluster --timeout 21600 -k test_pgvector_indexing
         pg_version: ${{ env.DEFAULT_PG_VERSION }}
+        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
       env:
         VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
         PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -614,6 +626,7 @@ jobs:
         save_perf_report: ${{ env.SAVE_PERF_REPORT }}
         extra_params: -m remote_cluster --timeout 21600
         pg_version: ${{ env.DEFAULT_PG_VERSION }}
+        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
       env:
         BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
         VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -623,6 +636,8 @@ jobs:
       id: create-allure-report
       if: ${{ !cancelled() }}
       uses: ./.github/actions/allure-report-generate
+      with:
+        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
 
     - name: Post to a Slack channel
       if: ${{ github.event.schedule && failure() }}
@@ -724,6 +739,7 @@ jobs:
         save_perf_report: ${{ env.SAVE_PERF_REPORT }}
         extra_params: -m remote_cluster --timeout 43200 -k test_clickbench
         pg_version: ${{ env.DEFAULT_PG_VERSION }}
+        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
       env:
         VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
         PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -736,6 +752,8 @@ jobs:
       id: create-allure-report
       if: ${{ !cancelled() }}
       uses: ./.github/actions/allure-report-generate
+      with:
+        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
 
     - name: Post to a Slack channel
       if: ${{ github.event.schedule && failure() }}
@@ -838,6 +856,7 @@ jobs:
         save_perf_report: ${{ env.SAVE_PERF_REPORT }}
         extra_params: -m remote_cluster --timeout 21600 -k test_tpch
         pg_version: ${{ env.DEFAULT_PG_VERSION }}
+        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
       env:
         VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
         PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -848,6 +867,8 @@ jobs:
       id: create-allure-report
       if: ${{ !cancelled() }}
       uses: ./.github/actions/allure-report-generate
+      with:
+        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
 
     - name: Post to a Slack channel
       if: ${{ github.event.schedule && failure() }}
@@ -936,6 +957,7 @@ jobs:
         save_perf_report: ${{ env.SAVE_PERF_REPORT }}
         extra_params: -m remote_cluster --timeout 21600 -k test_user_examples
         pg_version: ${{ env.DEFAULT_PG_VERSION }}
+        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
       env:
         VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
         PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -945,6 +967,8 @@ jobs:
       id: create-allure-report
       if: ${{ !cancelled() }}
       uses: ./.github/actions/allure-report-generate
+      with:
+        aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
 
     - name: Post to a Slack channel
       if: ${{ github.event.schedule && failure() }}

From 15468cd23c8398ad37cc568e2140fd5413c4653d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 19 Nov 2024 19:08:00 +0000
Subject: [PATCH 29/43] build(deps): bump aiohttp from 3.10.2 to 3.10.11
 (#9794)

---
 poetry.lock    | 459 +++++++++++++++++++++++++++++++------------------
 pyproject.toml |   2 +-
 2 files changed, 288 insertions(+), 173 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index d869761e8e..6171f92391 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -13,97 +13,112 @@ files = [
 
 [[package]]
 name = "aiohttp"
-version = "3.10.2"
+version = "3.10.11"
 description = "Async http client/server framework (asyncio)"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "aiohttp-3.10.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:95213b3d79c7e387144e9cb7b9d2809092d6ff2c044cb59033aedc612f38fb6d"},
-    {file = "aiohttp-3.10.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1aa005f060aff7124cfadaa2493f00a4e28ed41b232add5869e129a2e395935a"},
-    {file = "aiohttp-3.10.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:eabe6bf4c199687592f5de4ccd383945f485779c7ffb62a9b9f1f8a3f9756df8"},
-    {file = "aiohttp-3.10.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:96e010736fc16d21125c7e2dc5c350cd43c528b85085c04bf73a77be328fe944"},
-    {file = "aiohttp-3.10.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:99f81f9c1529fd8e03be4a7bd7df32d14b4f856e90ef6e9cbad3415dbfa9166c"},
-    {file = "aiohttp-3.10.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d611d1a01c25277bcdea06879afbc11472e33ce842322496b211319aa95441bb"},
-    {file = "aiohttp-3.10.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e00191d38156e09e8c81ef3d75c0d70d4f209b8381e71622165f22ef7da6f101"},
-    {file = "aiohttp-3.10.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:74c091a5ded6cb81785de2d7a8ab703731f26de910dbe0f3934eabef4ae417cc"},
-    {file = "aiohttp-3.10.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:18186a80ec5a701816adbf1d779926e1069392cf18504528d6e52e14b5920525"},
-    {file = "aiohttp-3.10.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5a7ceb2a0d2280f23a02c64cd0afdc922079bb950400c3dd13a1ab2988428aac"},
-    {file = "aiohttp-3.10.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8bd7be6ff6c162a60cb8fce65ee879a684fbb63d5466aba3fa5b9288eb04aefa"},
-    {file = "aiohttp-3.10.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:fae962b62944eaebff4f4fddcf1a69de919e7b967136a318533d82d93c3c6bd1"},
-    {file = "aiohttp-3.10.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a0fde16d284efcacbe15fb0c1013f0967b6c3e379649239d783868230bf1db42"},
-    {file = "aiohttp-3.10.2-cp310-cp310-win32.whl", hash = "sha256:f81cd85a0e76ec7b8e2b6636fe02952d35befda4196b8c88f3cec5b4fb512839"},
-    {file = "aiohttp-3.10.2-cp310-cp310-win_amd64.whl", hash = "sha256:54ba10eb5a3481c28282eb6afb5f709aedf53cf9c3a31875ffbdc9fc719ffd67"},
-    {file = "aiohttp-3.10.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:87fab7f948e407444c2f57088286e00e2ed0003ceaf3d8f8cc0f60544ba61d91"},
-    {file = "aiohttp-3.10.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ec6ad66ed660d46503243cbec7b2b3d8ddfa020f984209b3b8ef7d98ce69c3f2"},
-    {file = "aiohttp-3.10.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a4be88807283bd96ae7b8e401abde4ca0bab597ba73b5e9a2d98f36d451e9aac"},
-    {file = "aiohttp-3.10.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01c98041f90927c2cbd72c22a164bb816fa3010a047d264969cf82e1d4bcf8d1"},
-    {file = "aiohttp-3.10.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:54e36c67e1a9273ecafab18d6693da0fb5ac48fd48417e4548ac24a918c20998"},
-    {file = "aiohttp-3.10.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7de3ddb6f424af54535424082a1b5d1ae8caf8256ebd445be68c31c662354720"},
-    {file = "aiohttp-3.10.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7dd9c7db94b4692b827ce51dcee597d61a0e4f4661162424faf65106775b40e7"},
-    {file = "aiohttp-3.10.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e57e21e1167705f8482ca29cc5d02702208d8bf4aff58f766d94bcd6ead838cd"},
-    {file = "aiohttp-3.10.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a1a50e59b720060c29e2951fd9f13c01e1ea9492e5a527b92cfe04dd64453c16"},
-    {file = "aiohttp-3.10.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:686c87782481fda5ee6ba572d912a5c26d9f98cc5c243ebd03f95222af3f1b0f"},
-    {file = "aiohttp-3.10.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:dafb4abb257c0ed56dc36f4e928a7341b34b1379bd87e5a15ce5d883c2c90574"},
-    {file = "aiohttp-3.10.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:494a6f77560e02bd7d1ab579fdf8192390567fc96a603f21370f6e63690b7f3d"},
-    {file = "aiohttp-3.10.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6fe8503b1b917508cc68bf44dae28823ac05e9f091021e0c41f806ebbb23f92f"},
-    {file = "aiohttp-3.10.2-cp311-cp311-win32.whl", hash = "sha256:4ddb43d06ce786221c0dfd3c91b4892c318eaa36b903f7c4278e7e2fa0dd5102"},
-    {file = "aiohttp-3.10.2-cp311-cp311-win_amd64.whl", hash = "sha256:ca2f5abcb0a9a47e56bac173c01e9f6c6e7f27534d91451c5f22e6a35a5a2093"},
-    {file = "aiohttp-3.10.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:14eb6b17f6246959fb0b035d4f4ae52caa870c4edfb6170aad14c0de5bfbf478"},
-    {file = "aiohttp-3.10.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:465e445ec348d4e4bd349edd8b22db75f025da9d7b6dc1369c48e7935b85581e"},
-    {file = "aiohttp-3.10.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:341f8ece0276a828d95b70cd265d20e257f5132b46bf77d759d7f4e0443f2906"},
-    {file = "aiohttp-3.10.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c01fbb87b5426381cd9418b3ddcf4fc107e296fa2d3446c18ce6c76642f340a3"},
-    {file = "aiohttp-3.10.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2c474af073e1a6763e1c5522bbb2d85ff8318197e4c6c919b8d7886e16213345"},
-    {file = "aiohttp-3.10.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d9076810a5621236e29b2204e67a68e1fe317c8727ee4c9abbfbb1083b442c38"},
-    {file = "aiohttp-3.10.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8f515d6859e673940e08de3922b9c4a2249653b0ac181169313bd6e4b1978ac"},
-    {file = "aiohttp-3.10.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:655e583afc639bef06f3b2446972c1726007a21003cd0ef57116a123e44601bc"},
-    {file = "aiohttp-3.10.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8da9449a575133828cc99985536552ea2dcd690e848f9d41b48d8853a149a959"},
-    {file = "aiohttp-3.10.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:19073d57d0feb1865d12361e2a1f5a49cb764bf81a4024a3b608ab521568093a"},
-    {file = "aiohttp-3.10.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c8e98e1845805f184d91fda6f9ab93d7c7b0dddf1c07e0255924bfdb151a8d05"},
-    {file = "aiohttp-3.10.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:377220a5efde6f9497c5b74649b8c261d3cce8a84cb661be2ed8099a2196400a"},
-    {file = "aiohttp-3.10.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:92f7f4a4dc9cdb5980973a74d43cdbb16286dacf8d1896b6c3023b8ba8436f8e"},
-    {file = "aiohttp-3.10.2-cp312-cp312-win32.whl", hash = "sha256:9bb2834a6f11d65374ce97d366d6311a9155ef92c4f0cee543b2155d06dc921f"},
-    {file = "aiohttp-3.10.2-cp312-cp312-win_amd64.whl", hash = "sha256:518dc3cb37365255708283d1c1c54485bbacccd84f0a0fb87ed8917ba45eda5b"},
-    {file = "aiohttp-3.10.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:7f98e70bbbf693086efe4b86d381efad8edac040b8ad02821453083d15ec315f"},
-    {file = "aiohttp-3.10.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9f6f0b252a009e98fe84028a4ec48396a948e7a65b8be06ccfc6ef68cf1f614d"},
-    {file = "aiohttp-3.10.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9360e3ffc7b23565600e729e8c639c3c50d5520e05fdf94aa2bd859eef12c407"},
-    {file = "aiohttp-3.10.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3988044d1635c7821dd44f0edfbe47e9875427464e59d548aece447f8c22800a"},
-    {file = "aiohttp-3.10.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:30a9d59da1543a6f1478c3436fd49ec59be3868bca561a33778b4391005e499d"},
-    {file = "aiohttp-3.10.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9f49bdb94809ac56e09a310a62f33e5f22973d6fd351aac72a39cd551e98194"},
-    {file = "aiohttp-3.10.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ddfd2dca3f11c365d6857a07e7d12985afc59798458a2fdb2ffa4a0332a3fd43"},
-    {file = "aiohttp-3.10.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:685c1508ec97b2cd3e120bfe309a4ff8e852e8a7460f1ef1de00c2c0ed01e33c"},
-    {file = "aiohttp-3.10.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:49904f38667c44c041a0b44c474b3ae36948d16a0398a8f8cd84e2bb3c42a069"},
-    {file = "aiohttp-3.10.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:352f3a4e5f11f3241a49b6a48bc5b935fabc35d1165fa0d87f3ca99c1fcca98b"},
-    {file = "aiohttp-3.10.2-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:fc61f39b534c5d5903490478a0dd349df397d2284a939aa3cbaa2fb7a19b8397"},
-    {file = "aiohttp-3.10.2-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:ad2274e707be37420d0b6c3d26a8115295fe9d8e6e530fa6a42487a8ca3ad052"},
-    {file = "aiohttp-3.10.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c836bf3c7512100219fe1123743fd8dd9a2b50dd7cfb0c3bb10d041309acab4b"},
-    {file = "aiohttp-3.10.2-cp38-cp38-win32.whl", hash = "sha256:53e8898adda402be03ff164b0878abe2d884e3ea03a4701e6ad55399d84b92dc"},
-    {file = "aiohttp-3.10.2-cp38-cp38-win_amd64.whl", hash = "sha256:7cc8f65f5b22304693de05a245b6736b14cb5bc9c8a03da6e2ae9ef15f8b458f"},
-    {file = "aiohttp-3.10.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:9dfc906d656e14004c5bc672399c1cccc10db38df2b62a13fb2b6e165a81c316"},
-    {file = "aiohttp-3.10.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:91b10208b222ddf655c3a3d5b727879d7163db12b634492df41a9182a76edaae"},
-    {file = "aiohttp-3.10.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9fd16b5e1a7bdd14668cd6bde60a2a29b49147a535c74f50d8177d11b38433a7"},
-    {file = "aiohttp-3.10.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2bfdda4971bd79201f59adbad24ec2728875237e1c83bba5221284dbbf57bda"},
-    {file = "aiohttp-3.10.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:69d73f869cf29e8a373127fc378014e2b17bcfbe8d89134bc6fb06a2f67f3cb3"},
-    {file = "aiohttp-3.10.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df59f8486507c421c0620a2c3dce81fbf1d54018dc20ff4fecdb2c106d6e6abc"},
-    {file = "aiohttp-3.10.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0df930015db36b460aa9badbf35eccbc383f00d52d4b6f3de2ccb57d064a6ade"},
-    {file = "aiohttp-3.10.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:562b1153ab7f766ee6b8b357ec777a302770ad017cf18505d34f1c088fccc448"},
-    {file = "aiohttp-3.10.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:d984db6d855de58e0fde1ef908d48fe9a634cadb3cf715962722b4da1c40619d"},
-    {file = "aiohttp-3.10.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:14dc3fcb0d877911d775d511eb617a486a8c48afca0a887276e63db04d3ee920"},
-    {file = "aiohttp-3.10.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:b52a27a5c97275e254704e1049f4b96a81e67d6205f52fa37a4777d55b0e98ef"},
-    {file = "aiohttp-3.10.2-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:cd33d9de8cfd006a0d0fe85f49b4183c57e91d18ffb7e9004ce855e81928f704"},
-    {file = "aiohttp-3.10.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1238fc979160bc03a92fff9ad021375ff1c8799c6aacb0d8ea1b357ea40932bb"},
-    {file = "aiohttp-3.10.2-cp39-cp39-win32.whl", hash = "sha256:e2f43d238eae4f0b04f58d4c0df4615697d4ca3e9f9b1963d49555a94f0f5a04"},
-    {file = "aiohttp-3.10.2-cp39-cp39-win_amd64.whl", hash = "sha256:947847f07a8f81d7b39b2d0202fd73e61962ebe17ac2d8566f260679e467da7b"},
-    {file = "aiohttp-3.10.2.tar.gz", hash = "sha256:4d1f694b5d6e459352e5e925a42e05bac66655bfde44d81c59992463d2897014"},
+    {file = "aiohttp-3.10.11-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5077b1a5f40ffa3ba1f40d537d3bec4383988ee51fbba6b74aa8fb1bc466599e"},
+    {file = "aiohttp-3.10.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8d6a14a4d93b5b3c2891fca94fa9d41b2322a68194422bef0dd5ec1e57d7d298"},
+    {file = "aiohttp-3.10.11-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ffbfde2443696345e23a3c597049b1dd43049bb65337837574205e7368472177"},
+    {file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20b3d9e416774d41813bc02fdc0663379c01817b0874b932b81c7f777f67b217"},
+    {file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2b943011b45ee6bf74b22245c6faab736363678e910504dd7531a58c76c9015a"},
+    {file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48bc1d924490f0d0b3658fe5c4b081a4d56ebb58af80a6729d4bd13ea569797a"},
+    {file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e12eb3f4b1f72aaaf6acd27d045753b18101524f72ae071ae1c91c1cd44ef115"},
+    {file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f14ebc419a568c2eff3c1ed35f634435c24ead2fe19c07426af41e7adb68713a"},
+    {file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:72b191cdf35a518bfc7ca87d770d30941decc5aaf897ec8b484eb5cc8c7706f3"},
+    {file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5ab2328a61fdc86424ee540d0aeb8b73bbcad7351fb7cf7a6546fc0bcffa0038"},
+    {file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:aa93063d4af05c49276cf14e419550a3f45258b6b9d1f16403e777f1addf4519"},
+    {file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:30283f9d0ce420363c24c5c2421e71a738a2155f10adbb1a11a4d4d6d2715cfc"},
+    {file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e5358addc8044ee49143c546d2182c15b4ac3a60be01c3209374ace05af5733d"},
+    {file = "aiohttp-3.10.11-cp310-cp310-win32.whl", hash = "sha256:e1ffa713d3ea7cdcd4aea9cddccab41edf6882fa9552940344c44e59652e1120"},
+    {file = "aiohttp-3.10.11-cp310-cp310-win_amd64.whl", hash = "sha256:778cbd01f18ff78b5dd23c77eb82987ee4ba23408cbed233009fd570dda7e674"},
+    {file = "aiohttp-3.10.11-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:80ff08556c7f59a7972b1e8919f62e9c069c33566a6d28586771711e0eea4f07"},
+    {file = "aiohttp-3.10.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2c8f96e9ee19f04c4914e4e7a42a60861066d3e1abf05c726f38d9d0a466e695"},
+    {file = "aiohttp-3.10.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fb8601394d537da9221947b5d6e62b064c9a43e88a1ecd7414d21a1a6fba9c24"},
+    {file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ea224cf7bc2d8856d6971cea73b1d50c9c51d36971faf1abc169a0d5f85a382"},
+    {file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db9503f79e12d5d80b3efd4d01312853565c05367493379df76d2674af881caa"},
+    {file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0f449a50cc33f0384f633894d8d3cd020e3ccef81879c6e6245c3c375c448625"},
+    {file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82052be3e6d9e0c123499127782a01a2b224b8af8c62ab46b3f6197035ad94e9"},
+    {file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:20063c7acf1eec550c8eb098deb5ed9e1bb0521613b03bb93644b810986027ac"},
+    {file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:489cced07a4c11488f47aab1f00d0c572506883f877af100a38f1fedaa884c3a"},
+    {file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ea9b3bab329aeaa603ed3bf605f1e2a6f36496ad7e0e1aa42025f368ee2dc07b"},
+    {file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:ca117819d8ad113413016cb29774b3f6d99ad23c220069789fc050267b786c16"},
+    {file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:2dfb612dcbe70fb7cdcf3499e8d483079b89749c857a8f6e80263b021745c730"},
+    {file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f9b615d3da0d60e7d53c62e22b4fd1c70f4ae5993a44687b011ea3a2e49051b8"},
+    {file = "aiohttp-3.10.11-cp311-cp311-win32.whl", hash = "sha256:29103f9099b6068bbdf44d6a3d090e0a0b2be6d3c9f16a070dd9d0d910ec08f9"},
+    {file = "aiohttp-3.10.11-cp311-cp311-win_amd64.whl", hash = "sha256:236b28ceb79532da85d59aa9b9bf873b364e27a0acb2ceaba475dc61cffb6f3f"},
+    {file = "aiohttp-3.10.11-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:7480519f70e32bfb101d71fb9a1f330fbd291655a4c1c922232a48c458c52710"},
+    {file = "aiohttp-3.10.11-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f65267266c9aeb2287a6622ee2bb39490292552f9fbf851baabc04c9f84e048d"},
+    {file = "aiohttp-3.10.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7400a93d629a0608dc1d6c55f1e3d6e07f7375745aaa8bd7f085571e4d1cee97"},
+    {file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f34b97e4b11b8d4eb2c3a4f975be626cc8af99ff479da7de49ac2c6d02d35725"},
+    {file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e7b825da878464a252ccff2958838f9caa82f32a8dbc334eb9b34a026e2c636"},
+    {file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9f92a344c50b9667827da308473005f34767b6a2a60d9acff56ae94f895f385"},
+    {file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc6f1ab987a27b83c5268a17218463c2ec08dbb754195113867a27b166cd6087"},
+    {file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1dc0f4ca54842173d03322793ebcf2c8cc2d34ae91cc762478e295d8e361e03f"},
+    {file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7ce6a51469bfaacff146e59e7fb61c9c23006495d11cc24c514a455032bcfa03"},
+    {file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:aad3cd91d484d065ede16f3cf15408254e2469e3f613b241a1db552c5eb7ab7d"},
+    {file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f4df4b8ca97f658c880fb4b90b1d1ec528315d4030af1ec763247ebfd33d8b9a"},
+    {file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:2e4e18a0a2d03531edbc06c366954e40a3f8d2a88d2b936bbe78a0c75a3aab3e"},
+    {file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6ce66780fa1a20e45bc753cda2a149daa6dbf1561fc1289fa0c308391c7bc0a4"},
+    {file = "aiohttp-3.10.11-cp312-cp312-win32.whl", hash = "sha256:a919c8957695ea4c0e7a3e8d16494e3477b86f33067478f43106921c2fef15bb"},
+    {file = "aiohttp-3.10.11-cp312-cp312-win_amd64.whl", hash = "sha256:b5e29706e6389a2283a91611c91bf24f218962717c8f3b4e528ef529d112ee27"},
+    {file = "aiohttp-3.10.11-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:703938e22434d7d14ec22f9f310559331f455018389222eed132808cd8f44127"},
+    {file = "aiohttp-3.10.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9bc50b63648840854e00084c2b43035a62e033cb9b06d8c22b409d56eb098413"},
+    {file = "aiohttp-3.10.11-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5f0463bf8b0754bc744e1feb61590706823795041e63edf30118a6f0bf577461"},
+    {file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6c6dec398ac5a87cb3a407b068e1106b20ef001c344e34154616183fe684288"},
+    {file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bcaf2d79104d53d4dcf934f7ce76d3d155302d07dae24dff6c9fffd217568067"},
+    {file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:25fd5470922091b5a9aeeb7e75be609e16b4fba81cdeaf12981393fb240dd10e"},
+    {file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbde2ca67230923a42161b1f408c3992ae6e0be782dca0c44cb3206bf330dee1"},
+    {file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:249c8ff8d26a8b41a0f12f9df804e7c685ca35a207e2410adbd3e924217b9006"},
+    {file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:878ca6a931ee8c486a8f7b432b65431d095c522cbeb34892bee5be97b3481d0f"},
+    {file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:8663f7777ce775f0413324be0d96d9730959b2ca73d9b7e2c2c90539139cbdd6"},
+    {file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:6cd3f10b01f0c31481fba8d302b61603a2acb37b9d30e1d14e0f5a58b7b18a31"},
+    {file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:4e8d8aad9402d3aa02fdc5ca2fe68bcb9fdfe1f77b40b10410a94c7f408b664d"},
+    {file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:38e3c4f80196b4f6c3a85d134a534a56f52da9cb8d8e7af1b79a32eefee73a00"},
+    {file = "aiohttp-3.10.11-cp313-cp313-win32.whl", hash = "sha256:fc31820cfc3b2863c6e95e14fcf815dc7afe52480b4dc03393c4873bb5599f71"},
+    {file = "aiohttp-3.10.11-cp313-cp313-win_amd64.whl", hash = "sha256:4996ff1345704ffdd6d75fb06ed175938c133425af616142e7187f28dc75f14e"},
+    {file = "aiohttp-3.10.11-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:74baf1a7d948b3d640badeac333af581a367ab916b37e44cf90a0334157cdfd2"},
+    {file = "aiohttp-3.10.11-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:473aebc3b871646e1940c05268d451f2543a1d209f47035b594b9d4e91ce8339"},
+    {file = "aiohttp-3.10.11-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c2f746a6968c54ab2186574e15c3f14f3e7f67aef12b761e043b33b89c5b5f95"},
+    {file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d110cabad8360ffa0dec8f6ec60e43286e9d251e77db4763a87dcfe55b4adb92"},
+    {file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0099c7d5d7afff4202a0c670e5b723f7718810000b4abcbc96b064129e64bc7"},
+    {file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0316e624b754dbbf8c872b62fe6dcb395ef20c70e59890dfa0de9eafccd2849d"},
+    {file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a5f7ab8baf13314e6b2485965cbacb94afff1e93466ac4d06a47a81c50f9cca"},
+    {file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c891011e76041e6508cbfc469dd1a8ea09bc24e87e4c204e05f150c4c455a5fa"},
+    {file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:9208299251370ee815473270c52cd3f7069ee9ed348d941d574d1457d2c73e8b"},
+    {file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:459f0f32c8356e8125f45eeff0ecf2b1cb6db1551304972702f34cd9e6c44658"},
+    {file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:14cdc8c1810bbd4b4b9f142eeee23cda528ae4e57ea0923551a9af4820980e39"},
+    {file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:971aa438a29701d4b34e4943e91b5e984c3ae6ccbf80dd9efaffb01bd0b243a9"},
+    {file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:9a309c5de392dfe0f32ee57fa43ed8fc6ddf9985425e84bd51ed66bb16bce3a7"},
+    {file = "aiohttp-3.10.11-cp38-cp38-win32.whl", hash = "sha256:9ec1628180241d906a0840b38f162a3215114b14541f1a8711c368a8739a9be4"},
+    {file = "aiohttp-3.10.11-cp38-cp38-win_amd64.whl", hash = "sha256:9c6e0ffd52c929f985c7258f83185d17c76d4275ad22e90aa29f38e211aacbec"},
+    {file = "aiohttp-3.10.11-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:cdc493a2e5d8dc79b2df5bec9558425bcd39aff59fc949810cbd0832e294b106"},
+    {file = "aiohttp-3.10.11-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b3e70f24e7d0405be2348da9d5a7836936bf3a9b4fd210f8c37e8d48bc32eca6"},
+    {file = "aiohttp-3.10.11-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:968b8fb2a5eee2770eda9c7b5581587ef9b96fbdf8dcabc6b446d35ccc69df01"},
+    {file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:deef4362af9493d1382ef86732ee2e4cbc0d7c005947bd54ad1a9a16dd59298e"},
+    {file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:686b03196976e327412a1b094f4120778c7c4b9cff9bce8d2fdfeca386b89829"},
+    {file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3bf6d027d9d1d34e1c2e1645f18a6498c98d634f8e373395221121f1c258ace8"},
+    {file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:099fd126bf960f96d34a760e747a629c27fb3634da5d05c7ef4d35ef4ea519fc"},
+    {file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c73c4d3dae0b4644bc21e3de546530531d6cdc88659cdeb6579cd627d3c206aa"},
+    {file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0c5580f3c51eea91559db3facd45d72e7ec970b04528b4709b1f9c2555bd6d0b"},
+    {file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:fdf6429f0caabfd8a30c4e2eaecb547b3c340e4730ebfe25139779b9815ba138"},
+    {file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:d97187de3c276263db3564bb9d9fad9e15b51ea10a371ffa5947a5ba93ad6777"},
+    {file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:0acafb350cfb2eba70eb5d271f55e08bd4502ec35e964e18ad3e7d34d71f7261"},
+    {file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c13ed0c779911c7998a58e7848954bd4d63df3e3575f591e321b19a2aec8df9f"},
+    {file = "aiohttp-3.10.11-cp39-cp39-win32.whl", hash = "sha256:22b7c540c55909140f63ab4f54ec2c20d2635c0289cdd8006da46f3327f971b9"},
+    {file = "aiohttp-3.10.11-cp39-cp39-win_amd64.whl", hash = "sha256:7b26b1551e481012575dab8e3727b16fe7dd27eb2711d2e63ced7368756268fb"},
+    {file = "aiohttp-3.10.11.tar.gz", hash = "sha256:9dc2b8f3dcab2e39e0fa309c8da50c3b55e6f34ab25f1a71d3288f24924d33a7"},
 ]
 
 [package.dependencies]
 aiohappyeyeballs = ">=2.3.0"
 aiosignal = ">=1.1.2"
-async-timeout = {version = ">=4.0,<5.0", markers = "python_version < \"3.11\""}
+async-timeout = {version = ">=4.0,<6.0", markers = "python_version < \"3.11\""}
 attrs = ">=17.3.0"
 frozenlist = ">=1.1.1"
 multidict = ">=4.5,<7.0"
-yarl = ">=1.0,<2.0"
+yarl = ">=1.12.0,<2.0"
 
 [package.extras]
 speedups = ["Brotli", "aiodns (>=3.2.0)", "brotlicffi"]
@@ -2078,6 +2093,113 @@ files = [
 [package.extras]
 twisted = ["twisted"]
 
+[[package]]
+name = "propcache"
+version = "0.2.0"
+description = "Accelerated property cache"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "propcache-0.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c5869b8fd70b81835a6f187c5fdbe67917a04d7e52b6e7cc4e5fe39d55c39d58"},
+    {file = "propcache-0.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:952e0d9d07609d9c5be361f33b0d6d650cd2bae393aabb11d9b719364521984b"},
+    {file = "propcache-0.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:33ac8f098df0585c0b53009f039dfd913b38c1d2edafed0cedcc0c32a05aa110"},
+    {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:97e48e8875e6c13909c800fa344cd54cc4b2b0db1d5f911f840458a500fde2c2"},
+    {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:388f3217649d6d59292b722d940d4d2e1e6a7003259eb835724092a1cca0203a"},
+    {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f571aea50ba5623c308aa146eb650eebf7dbe0fd8c5d946e28343cb3b5aad577"},
+    {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3dfafb44f7bb35c0c06eda6b2ab4bfd58f02729e7c4045e179f9a861b07c9850"},
+    {file = "propcache-0.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a3ebe9a75be7ab0b7da2464a77bb27febcb4fab46a34f9288f39d74833db7f61"},
+    {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d2f0d0f976985f85dfb5f3d685697ef769faa6b71993b46b295cdbbd6be8cc37"},
+    {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:a3dc1a4b165283bd865e8f8cb5f0c64c05001e0718ed06250d8cac9bec115b48"},
+    {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9e0f07b42d2a50c7dd2d8675d50f7343d998c64008f1da5fef888396b7f84630"},
+    {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:e63e3e1e0271f374ed489ff5ee73d4b6e7c60710e1f76af5f0e1a6117cd26394"},
+    {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:56bb5c98f058a41bb58eead194b4db8c05b088c93d94d5161728515bd52b052b"},
+    {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7665f04d0c7f26ff8bb534e1c65068409bf4687aa2534faf7104d7182debb336"},
+    {file = "propcache-0.2.0-cp310-cp310-win32.whl", hash = "sha256:7cf18abf9764746b9c8704774d8b06714bcb0a63641518a3a89c7f85cc02c2ad"},
+    {file = "propcache-0.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:cfac69017ef97db2438efb854edf24f5a29fd09a536ff3a992b75990720cdc99"},
+    {file = "propcache-0.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:63f13bf09cc3336eb04a837490b8f332e0db41da66995c9fd1ba04552e516354"},
+    {file = "propcache-0.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:608cce1da6f2672a56b24a015b42db4ac612ee709f3d29f27a00c943d9e851de"},
+    {file = "propcache-0.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:466c219deee4536fbc83c08d09115249db301550625c7fef1c5563a584c9bc87"},
+    {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc2db02409338bf36590aa985a461b2c96fce91f8e7e0f14c50c5fcc4f229016"},
+    {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a6ed8db0a556343d566a5c124ee483ae113acc9a557a807d439bcecc44e7dfbb"},
+    {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:91997d9cb4a325b60d4e3f20967f8eb08dfcb32b22554d5ef78e6fd1dda743a2"},
+    {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c7dde9e533c0a49d802b4f3f218fa9ad0a1ce21f2c2eb80d5216565202acab4"},
+    {file = "propcache-0.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffcad6c564fe6b9b8916c1aefbb37a362deebf9394bd2974e9d84232e3e08504"},
+    {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:97a58a28bcf63284e8b4d7b460cbee1edaab24634e82059c7b8c09e65284f178"},
+    {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:945db8ee295d3af9dbdbb698cce9bbc5c59b5c3fe328bbc4387f59a8a35f998d"},
+    {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:39e104da444a34830751715f45ef9fc537475ba21b7f1f5b0f4d71a3b60d7fe2"},
+    {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:c5ecca8f9bab618340c8e848d340baf68bcd8ad90a8ecd7a4524a81c1764b3db"},
+    {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:c436130cc779806bdf5d5fae0d848713105472b8566b75ff70048c47d3961c5b"},
+    {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:191db28dc6dcd29d1a3e063c3be0b40688ed76434622c53a284e5427565bbd9b"},
+    {file = "propcache-0.2.0-cp311-cp311-win32.whl", hash = "sha256:5f2564ec89058ee7c7989a7b719115bdfe2a2fb8e7a4543b8d1c0cc4cf6478c1"},
+    {file = "propcache-0.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:6e2e54267980349b723cff366d1e29b138b9a60fa376664a157a342689553f71"},
+    {file = "propcache-0.2.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:2ee7606193fb267be4b2e3b32714f2d58cad27217638db98a60f9efb5efeccc2"},
+    {file = "propcache-0.2.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:91ee8fc02ca52e24bcb77b234f22afc03288e1dafbb1f88fe24db308910c4ac7"},
+    {file = "propcache-0.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2e900bad2a8456d00a113cad8c13343f3b1f327534e3589acc2219729237a2e8"},
+    {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f52a68c21363c45297aca15561812d542f8fc683c85201df0bebe209e349f793"},
+    {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e41d67757ff4fbc8ef2af99b338bfb955010444b92929e9e55a6d4dcc3c4f09"},
+    {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a64e32f8bd94c105cc27f42d3b658902b5bcc947ece3c8fe7bc1b05982f60e89"},
+    {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:55346705687dbd7ef0d77883ab4f6fabc48232f587925bdaf95219bae072491e"},
+    {file = "propcache-0.2.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:00181262b17e517df2cd85656fcd6b4e70946fe62cd625b9d74ac9977b64d8d9"},
+    {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6994984550eaf25dd7fc7bd1b700ff45c894149341725bb4edc67f0ffa94efa4"},
+    {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:56295eb1e5f3aecd516d91b00cfd8bf3a13991de5a479df9e27dd569ea23959c"},
+    {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:439e76255daa0f8151d3cb325f6dd4a3e93043e6403e6491813bcaaaa8733887"},
+    {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f6475a1b2ecb310c98c28d271a30df74f9dd436ee46d09236a6b750a7599ce57"},
+    {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3444cdba6628accf384e349014084b1cacd866fbb88433cd9d279d90a54e0b23"},
+    {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4a9d9b4d0a9b38d1c391bb4ad24aa65f306c6f01b512e10a8a34a2dc5675d348"},
+    {file = "propcache-0.2.0-cp312-cp312-win32.whl", hash = "sha256:69d3a98eebae99a420d4b28756c8ce6ea5a29291baf2dc9ff9414b42676f61d5"},
+    {file = "propcache-0.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:ad9c9b99b05f163109466638bd30ada1722abb01bbb85c739c50b6dc11f92dc3"},
+    {file = "propcache-0.2.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ecddc221a077a8132cf7c747d5352a15ed763b674c0448d811f408bf803d9ad7"},
+    {file = "propcache-0.2.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0e53cb83fdd61cbd67202735e6a6687a7b491c8742dfc39c9e01e80354956763"},
+    {file = "propcache-0.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:92fe151145a990c22cbccf9ae15cae8ae9eddabfc949a219c9f667877e40853d"},
+    {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6a21ef516d36909931a2967621eecb256018aeb11fc48656e3257e73e2e247a"},
+    {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f88a4095e913f98988f5b338c1d4d5d07dbb0b6bad19892fd447484e483ba6b"},
+    {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5a5b3bb545ead161be780ee85a2b54fdf7092815995661947812dde94a40f6fb"},
+    {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67aeb72e0f482709991aa91345a831d0b707d16b0257e8ef88a2ad246a7280bf"},
+    {file = "propcache-0.2.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c997f8c44ec9b9b0bcbf2d422cc00a1d9b9c681f56efa6ca149a941e5560da2"},
+    {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2a66df3d4992bc1d725b9aa803e8c5a66c010c65c741ad901e260ece77f58d2f"},
+    {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:3ebbcf2a07621f29638799828b8d8668c421bfb94c6cb04269130d8de4fb7136"},
+    {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1235c01ddaa80da8235741e80815ce381c5267f96cc49b1477fdcf8c047ef325"},
+    {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3947483a381259c06921612550867b37d22e1df6d6d7e8361264b6d037595f44"},
+    {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d5bed7f9805cc29c780f3aee05de3262ee7ce1f47083cfe9f77471e9d6777e83"},
+    {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e4a91d44379f45f5e540971d41e4626dacd7f01004826a18cb048e7da7e96544"},
+    {file = "propcache-0.2.0-cp313-cp313-win32.whl", hash = "sha256:f902804113e032e2cdf8c71015651c97af6418363bea8d78dc0911d56c335032"},
+    {file = "propcache-0.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:8f188cfcc64fb1266f4684206c9de0e80f54622c3f22a910cbd200478aeae61e"},
+    {file = "propcache-0.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:53d1bd3f979ed529f0805dd35ddaca330f80a9a6d90bc0121d2ff398f8ed8861"},
+    {file = "propcache-0.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:83928404adf8fb3d26793665633ea79b7361efa0287dfbd372a7e74311d51ee6"},
+    {file = "propcache-0.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:77a86c261679ea5f3896ec060be9dc8e365788248cc1e049632a1be682442063"},
+    {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:218db2a3c297a3768c11a34812e63b3ac1c3234c3a086def9c0fee50d35add1f"},
+    {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7735e82e3498c27bcb2d17cb65d62c14f1100b71723b68362872bca7d0913d90"},
+    {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:20a617c776f520c3875cf4511e0d1db847a076d720714ae35ffe0df3e440be68"},
+    {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67b69535c870670c9f9b14a75d28baa32221d06f6b6fa6f77a0a13c5a7b0a5b9"},
+    {file = "propcache-0.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4569158070180c3855e9c0791c56be3ceeb192defa2cdf6a3f39e54319e56b89"},
+    {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:db47514ffdbd91ccdc7e6f8407aac4ee94cc871b15b577c1c324236b013ddd04"},
+    {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_armv7l.whl", hash = "sha256:2a60ad3e2553a74168d275a0ef35e8c0a965448ffbc3b300ab3a5bb9956c2162"},
+    {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:662dd62358bdeaca0aee5761de8727cfd6861432e3bb828dc2a693aa0471a563"},
+    {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:25a1f88b471b3bc911d18b935ecb7115dff3a192b6fef46f0bfaf71ff4f12418"},
+    {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:f60f0ac7005b9f5a6091009b09a419ace1610e163fa5deaba5ce3484341840e7"},
+    {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:74acd6e291f885678631b7ebc85d2d4aec458dd849b8c841b57ef04047833bed"},
+    {file = "propcache-0.2.0-cp38-cp38-win32.whl", hash = "sha256:d9b6ddac6408194e934002a69bcaadbc88c10b5f38fb9307779d1c629181815d"},
+    {file = "propcache-0.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:676135dcf3262c9c5081cc8f19ad55c8a64e3f7282a21266d05544450bffc3a5"},
+    {file = "propcache-0.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:25c8d773a62ce0451b020c7b29a35cfbc05de8b291163a7a0f3b7904f27253e6"},
+    {file = "propcache-0.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:375a12d7556d462dc64d70475a9ee5982465fbb3d2b364f16b86ba9135793638"},
+    {file = "propcache-0.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1ec43d76b9677637a89d6ab86e1fef70d739217fefa208c65352ecf0282be957"},
+    {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f45eec587dafd4b2d41ac189c2156461ebd0c1082d2fe7013571598abb8505d1"},
+    {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bc092ba439d91df90aea38168e11f75c655880c12782facf5cf9c00f3d42b562"},
+    {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fa1076244f54bb76e65e22cb6910365779d5c3d71d1f18b275f1dfc7b0d71b4d"},
+    {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:682a7c79a2fbf40f5dbb1eb6bfe2cd865376deeac65acf9beb607505dced9e12"},
+    {file = "propcache-0.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8e40876731f99b6f3c897b66b803c9e1c07a989b366c6b5b475fafd1f7ba3fb8"},
+    {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:363ea8cd3c5cb6679f1c2f5f1f9669587361c062e4899fce56758efa928728f8"},
+    {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:140fbf08ab3588b3468932974a9331aff43c0ab8a2ec2c608b6d7d1756dbb6cb"},
+    {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:e70fac33e8b4ac63dfc4c956fd7d85a0b1139adcfc0d964ce288b7c527537fea"},
+    {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:b33d7a286c0dc1a15f5fc864cc48ae92a846df287ceac2dd499926c3801054a6"},
+    {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:f6d5749fdd33d90e34c2efb174c7e236829147a2713334d708746e94c4bde40d"},
+    {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:22aa8f2272d81d9317ff5756bb108021a056805ce63dd3630e27d042c8092798"},
+    {file = "propcache-0.2.0-cp39-cp39-win32.whl", hash = "sha256:73e4b40ea0eda421b115248d7e79b59214411109a5bc47d0d48e4c73e3b8fcf9"},
+    {file = "propcache-0.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:9517d5e9e0731957468c29dbfd0f976736a0e55afaea843726e887f36fe017df"},
+    {file = "propcache-0.2.0-py3-none-any.whl", hash = "sha256:2ccc28197af5313706511fab3a8b66dcd6da067a1331372c82ea1cb74285e036"},
+    {file = "propcache-0.2.0.tar.gz", hash = "sha256:df81779732feb9d01e5d513fad0122efb3d53bbc75f61b2a4f29a020bc985e70"},
+]
+
 [[package]]
 name = "psutil"
 version = "5.9.4"
@@ -3307,106 +3429,99 @@ files = [
 
 [[package]]
 name = "yarl"
-version = "1.9.4"
+version = "1.17.2"
 description = "Yet another URL library"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.9"
 files = [
-    {file = "yarl-1.9.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a8c1df72eb746f4136fe9a2e72b0c9dc1da1cbd23b5372f94b5820ff8ae30e0e"},
-    {file = "yarl-1.9.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a3a6ed1d525bfb91b3fc9b690c5a21bb52de28c018530ad85093cc488bee2dd2"},
-    {file = "yarl-1.9.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c38c9ddb6103ceae4e4498f9c08fac9b590c5c71b0370f98714768e22ac6fa66"},
-    {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9e09c9d74f4566e905a0b8fa668c58109f7624db96a2171f21747abc7524234"},
-    {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8477c1ee4bd47c57d49621a062121c3023609f7a13b8a46953eb6c9716ca392"},
-    {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5ff2c858f5f6a42c2a8e751100f237c5e869cbde669a724f2062d4c4ef93551"},
-    {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:357495293086c5b6d34ca9616a43d329317feab7917518bc97a08f9e55648455"},
-    {file = "yarl-1.9.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54525ae423d7b7a8ee81ba189f131054defdb122cde31ff17477951464c1691c"},
-    {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:801e9264d19643548651b9db361ce3287176671fb0117f96b5ac0ee1c3530d53"},
-    {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e516dc8baf7b380e6c1c26792610230f37147bb754d6426462ab115a02944385"},
-    {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:7d5aaac37d19b2904bb9dfe12cdb08c8443e7ba7d2852894ad448d4b8f442863"},
-    {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:54beabb809ffcacbd9d28ac57b0db46e42a6e341a030293fb3185c409e626b8b"},
-    {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bac8d525a8dbc2a1507ec731d2867025d11ceadcb4dd421423a5d42c56818541"},
-    {file = "yarl-1.9.4-cp310-cp310-win32.whl", hash = "sha256:7855426dfbddac81896b6e533ebefc0af2f132d4a47340cee6d22cac7190022d"},
-    {file = "yarl-1.9.4-cp310-cp310-win_amd64.whl", hash = "sha256:848cd2a1df56ddbffeb375535fb62c9d1645dde33ca4d51341378b3f5954429b"},
-    {file = "yarl-1.9.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:35a2b9396879ce32754bd457d31a51ff0a9d426fd9e0e3c33394bf4b9036b099"},
-    {file = "yarl-1.9.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c7d56b293cc071e82532f70adcbd8b61909eec973ae9d2d1f9b233f3d943f2c"},
-    {file = "yarl-1.9.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d8a1c6c0be645c745a081c192e747c5de06e944a0d21245f4cf7c05e457c36e0"},
-    {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b3c1ffe10069f655ea2d731808e76e0f452fc6c749bea04781daf18e6039525"},
-    {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:549d19c84c55d11687ddbd47eeb348a89df9cb30e1993f1b128f4685cd0ebbf8"},
-    {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7409f968456111140c1c95301cadf071bd30a81cbd7ab829169fb9e3d72eae9"},
-    {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e23a6d84d9d1738dbc6e38167776107e63307dfc8ad108e580548d1f2c587f42"},
-    {file = "yarl-1.9.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d8b889777de69897406c9fb0b76cdf2fd0f31267861ae7501d93003d55f54fbe"},
-    {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:03caa9507d3d3c83bca08650678e25364e1843b484f19986a527630ca376ecce"},
-    {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4e9035df8d0880b2f1c7f5031f33f69e071dfe72ee9310cfc76f7b605958ceb9"},
-    {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:c0ec0ed476f77db9fb29bca17f0a8fcc7bc97ad4c6c1d8959c507decb22e8572"},
-    {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:ee04010f26d5102399bd17f8df8bc38dc7ccd7701dc77f4a68c5b8d733406958"},
-    {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49a180c2e0743d5d6e0b4d1a9e5f633c62eca3f8a86ba5dd3c471060e352ca98"},
-    {file = "yarl-1.9.4-cp311-cp311-win32.whl", hash = "sha256:81eb57278deb6098a5b62e88ad8281b2ba09f2f1147c4767522353eaa6260b31"},
-    {file = "yarl-1.9.4-cp311-cp311-win_amd64.whl", hash = "sha256:d1d2532b340b692880261c15aee4dc94dd22ca5d61b9db9a8a361953d36410b1"},
-    {file = "yarl-1.9.4-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0d2454f0aef65ea81037759be5ca9947539667eecebca092733b2eb43c965a81"},
-    {file = "yarl-1.9.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:44d8ffbb9c06e5a7f529f38f53eda23e50d1ed33c6c869e01481d3fafa6b8142"},
-    {file = "yarl-1.9.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aaaea1e536f98754a6e5c56091baa1b6ce2f2700cc4a00b0d49eca8dea471074"},
-    {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3777ce5536d17989c91696db1d459574e9a9bd37660ea7ee4d3344579bb6f129"},
-    {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9fc5fc1eeb029757349ad26bbc5880557389a03fa6ada41703db5e068881e5f2"},
-    {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ea65804b5dc88dacd4a40279af0cdadcfe74b3e5b4c897aa0d81cf86927fee78"},
-    {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa102d6d280a5455ad6a0f9e6d769989638718e938a6a0a2ff3f4a7ff8c62cc4"},
-    {file = "yarl-1.9.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09efe4615ada057ba2d30df871d2f668af661e971dfeedf0c159927d48bbeff0"},
-    {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:008d3e808d03ef28542372d01057fd09168419cdc8f848efe2804f894ae03e51"},
-    {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:6f5cb257bc2ec58f437da2b37a8cd48f666db96d47b8a3115c29f316313654ff"},
-    {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:992f18e0ea248ee03b5a6e8b3b4738850ae7dbb172cc41c966462801cbf62cf7"},
-    {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:0e9d124c191d5b881060a9e5060627694c3bdd1fe24c5eecc8d5d7d0eb6faabc"},
-    {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3986b6f41ad22988e53d5778f91855dc0399b043fc8946d4f2e68af22ee9ff10"},
-    {file = "yarl-1.9.4-cp312-cp312-win32.whl", hash = "sha256:4b21516d181cd77ebd06ce160ef8cc2a5e9ad35fb1c5930882baff5ac865eee7"},
-    {file = "yarl-1.9.4-cp312-cp312-win_amd64.whl", hash = "sha256:a9bd00dc3bc395a662900f33f74feb3e757429e545d831eef5bb280252631984"},
-    {file = "yarl-1.9.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:63b20738b5aac74e239622d2fe30df4fca4942a86e31bf47a81a0e94c14df94f"},
-    {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7d7f7de27b8944f1fee2c26a88b4dabc2409d2fea7a9ed3df79b67277644e17"},
-    {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c74018551e31269d56fab81a728f683667e7c28c04e807ba08f8c9e3bba32f14"},
-    {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ca06675212f94e7a610e85ca36948bb8fc023e458dd6c63ef71abfd482481aa5"},
-    {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5aef935237d60a51a62b86249839b51345f47564208c6ee615ed2a40878dccdd"},
-    {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b134fd795e2322b7684155b7855cc99409d10b2e408056db2b93b51a52accc7"},
-    {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d25039a474c4c72a5ad4b52495056f843a7ff07b632c1b92ea9043a3d9950f6e"},
-    {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f7d6b36dd2e029b6bcb8a13cf19664c7b8e19ab3a58e0fefbb5b8461447ed5ec"},
-    {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:957b4774373cf6f709359e5c8c4a0af9f6d7875db657adb0feaf8d6cb3c3964c"},
-    {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:d7eeb6d22331e2fd42fce928a81c697c9ee2d51400bd1a28803965883e13cead"},
-    {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:6a962e04b8f91f8c4e5917e518d17958e3bdee71fd1d8b88cdce74dd0ebbf434"},
-    {file = "yarl-1.9.4-cp37-cp37m-win32.whl", hash = "sha256:f3bc6af6e2b8f92eced34ef6a96ffb248e863af20ef4fde9448cc8c9b858b749"},
-    {file = "yarl-1.9.4-cp37-cp37m-win_amd64.whl", hash = "sha256:ad4d7a90a92e528aadf4965d685c17dacff3df282db1121136c382dc0b6014d2"},
-    {file = "yarl-1.9.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ec61d826d80fc293ed46c9dd26995921e3a82146feacd952ef0757236fc137be"},
-    {file = "yarl-1.9.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8be9e837ea9113676e5754b43b940b50cce76d9ed7d2461df1af39a8ee674d9f"},
-    {file = "yarl-1.9.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bef596fdaa8f26e3d66af846bbe77057237cb6e8efff8cd7cc8dff9a62278bbf"},
-    {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d47552b6e52c3319fede1b60b3de120fe83bde9b7bddad11a69fb0af7db32f1"},
-    {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84fc30f71689d7fc9168b92788abc977dc8cefa806909565fc2951d02f6b7d57"},
-    {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4aa9741085f635934f3a2583e16fcf62ba835719a8b2b28fb2917bb0537c1dfa"},
-    {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:206a55215e6d05dbc6c98ce598a59e6fbd0c493e2de4ea6cc2f4934d5a18d130"},
-    {file = "yarl-1.9.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07574b007ee20e5c375a8fe4a0789fad26db905f9813be0f9fef5a68080de559"},
-    {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5a2e2433eb9344a163aced6a5f6c9222c0786e5a9e9cac2c89f0b28433f56e23"},
-    {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:6ad6d10ed9b67a382b45f29ea028f92d25bc0bc1daf6c5b801b90b5aa70fb9ec"},
-    {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:6fe79f998a4052d79e1c30eeb7d6c1c1056ad33300f682465e1b4e9b5a188b78"},
-    {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:a825ec844298c791fd28ed14ed1bffc56a98d15b8c58a20e0e08c1f5f2bea1be"},
-    {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8619d6915b3b0b34420cf9b2bb6d81ef59d984cb0fde7544e9ece32b4b3043c3"},
-    {file = "yarl-1.9.4-cp38-cp38-win32.whl", hash = "sha256:686a0c2f85f83463272ddffd4deb5e591c98aac1897d65e92319f729c320eece"},
-    {file = "yarl-1.9.4-cp38-cp38-win_amd64.whl", hash = "sha256:a00862fb23195b6b8322f7d781b0dc1d82cb3bcac346d1e38689370cc1cc398b"},
-    {file = "yarl-1.9.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:604f31d97fa493083ea21bd9b92c419012531c4e17ea6da0f65cacdcf5d0bd27"},
-    {file = "yarl-1.9.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8a854227cf581330ffa2c4824d96e52ee621dd571078a252c25e3a3b3d94a1b1"},
-    {file = "yarl-1.9.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ba6f52cbc7809cd8d74604cce9c14868306ae4aa0282016b641c661f981a6e91"},
-    {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a6327976c7c2f4ee6816eff196e25385ccc02cb81427952414a64811037bbc8b"},
-    {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8397a3817d7dcdd14bb266283cd1d6fc7264a48c186b986f32e86d86d35fbac5"},
-    {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e0381b4ce23ff92f8170080c97678040fc5b08da85e9e292292aba67fdac6c34"},
-    {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23d32a2594cb5d565d358a92e151315d1b2268bc10f4610d098f96b147370136"},
-    {file = "yarl-1.9.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ddb2a5c08a4eaaba605340fdee8fc08e406c56617566d9643ad8bf6852778fc7"},
-    {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:26a1dc6285e03f3cc9e839a2da83bcbf31dcb0d004c72d0730e755b33466c30e"},
-    {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:18580f672e44ce1238b82f7fb87d727c4a131f3a9d33a5e0e82b793362bf18b4"},
-    {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:29e0f83f37610f173eb7e7b5562dd71467993495e568e708d99e9d1944f561ec"},
-    {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:1f23e4fe1e8794f74b6027d7cf19dc25f8b63af1483d91d595d4a07eca1fb26c"},
-    {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:db8e58b9d79200c76956cefd14d5c90af54416ff5353c5bfd7cbe58818e26ef0"},
-    {file = "yarl-1.9.4-cp39-cp39-win32.whl", hash = "sha256:c7224cab95645c7ab53791022ae77a4509472613e839dab722a72abe5a684575"},
-    {file = "yarl-1.9.4-cp39-cp39-win_amd64.whl", hash = "sha256:824d6c50492add5da9374875ce72db7a0733b29c2394890aef23d533106e2b15"},
-    {file = "yarl-1.9.4-py3-none-any.whl", hash = "sha256:928cecb0ef9d5a7946eb6ff58417ad2fe9375762382f1bf5c55e61645f2c43ad"},
-    {file = "yarl-1.9.4.tar.gz", hash = "sha256:566db86717cf8080b99b58b083b773a908ae40f06681e87e589a976faf8246bf"},
+    {file = "yarl-1.17.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:93771146ef048b34201bfa382c2bf74c524980870bb278e6df515efaf93699ff"},
+    {file = "yarl-1.17.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8281db240a1616af2f9c5f71d355057e73a1409c4648c8949901396dc0a3c151"},
+    {file = "yarl-1.17.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:170ed4971bf9058582b01a8338605f4d8c849bd88834061e60e83b52d0c76870"},
+    {file = "yarl-1.17.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc61b005f6521fcc00ca0d1243559a5850b9dd1e1fe07b891410ee8fe192d0c0"},
+    {file = "yarl-1.17.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:871e1b47eec7b6df76b23c642a81db5dd6536cbef26b7e80e7c56c2fd371382e"},
+    {file = "yarl-1.17.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3a58a2f2ca7aaf22b265388d40232f453f67a6def7355a840b98c2d547bd037f"},
+    {file = "yarl-1.17.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:736bb076f7299c5c55dfef3eb9e96071a795cb08052822c2bb349b06f4cb2e0a"},
+    {file = "yarl-1.17.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8fd51299e21da709eabcd5b2dd60e39090804431292daacbee8d3dabe39a6bc0"},
+    {file = "yarl-1.17.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:358dc7ddf25e79e1cc8ee16d970c23faee84d532b873519c5036dbb858965795"},
+    {file = "yarl-1.17.2-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:50d866f7b1a3f16f98603e095f24c0eeba25eb508c85a2c5939c8b3870ba2df8"},
+    {file = "yarl-1.17.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:8b9c4643e7d843a0dca9cd9d610a0876e90a1b2cbc4c5ba7930a0d90baf6903f"},
+    {file = "yarl-1.17.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:d63123bfd0dce5f91101e77c8a5427c3872501acece8c90df457b486bc1acd47"},
+    {file = "yarl-1.17.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:4e76381be3d8ff96a4e6c77815653063e87555981329cf8f85e5be5abf449021"},
+    {file = "yarl-1.17.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:734144cd2bd633a1516948e477ff6c835041c0536cef1d5b9a823ae29899665b"},
+    {file = "yarl-1.17.2-cp310-cp310-win32.whl", hash = "sha256:26bfb6226e0c157af5da16d2d62258f1ac578d2899130a50433ffee4a5dfa673"},
+    {file = "yarl-1.17.2-cp310-cp310-win_amd64.whl", hash = "sha256:76499469dcc24759399accd85ec27f237d52dec300daaca46a5352fcbebb1071"},
+    {file = "yarl-1.17.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:792155279dc093839e43f85ff7b9b6493a8eaa0af1f94f1f9c6e8f4de8c63500"},
+    {file = "yarl-1.17.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:38bc4ed5cae853409cb193c87c86cd0bc8d3a70fd2268a9807217b9176093ac6"},
+    {file = "yarl-1.17.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4a8c83f6fcdc327783bdc737e8e45b2e909b7bd108c4da1892d3bc59c04a6d84"},
+    {file = "yarl-1.17.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c6d5fed96f0646bfdf698b0a1cebf32b8aae6892d1bec0c5d2d6e2df44e1e2d"},
+    {file = "yarl-1.17.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:782ca9c58f5c491c7afa55518542b2b005caedaf4685ec814fadfcee51f02493"},
+    {file = "yarl-1.17.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ff6af03cac0d1a4c3c19e5dcc4c05252411bf44ccaa2485e20d0a7c77892ab6e"},
+    {file = "yarl-1.17.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a3f47930fbbed0f6377639503848134c4aa25426b08778d641491131351c2c8"},
+    {file = "yarl-1.17.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d1fa68a3c921365c5745b4bd3af6221ae1f0ea1bf04b69e94eda60e57958907f"},
+    {file = "yarl-1.17.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:187df91395c11e9f9dc69b38d12406df85aa5865f1766a47907b1cc9855b6303"},
+    {file = "yarl-1.17.2-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:93d1c8cc5bf5df401015c5e2a3ce75a5254a9839e5039c881365d2a9dcfc6dc2"},
+    {file = "yarl-1.17.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:11d86c6145ac5c706c53d484784cf504d7d10fa407cb73b9d20f09ff986059ef"},
+    {file = "yarl-1.17.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:c42774d1d1508ec48c3ed29e7b110e33f5e74a20957ea16197dbcce8be6b52ba"},
+    {file = "yarl-1.17.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:0c8e589379ef0407b10bed16cc26e7392ef8f86961a706ade0a22309a45414d7"},
+    {file = "yarl-1.17.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1056cadd5e850a1c026f28e0704ab0a94daaa8f887ece8dfed30f88befb87bb0"},
+    {file = "yarl-1.17.2-cp311-cp311-win32.whl", hash = "sha256:be4c7b1c49d9917c6e95258d3d07f43cfba2c69a6929816e77daf322aaba6628"},
+    {file = "yarl-1.17.2-cp311-cp311-win_amd64.whl", hash = "sha256:ac8eda86cc75859093e9ce390d423aba968f50cf0e481e6c7d7d63f90bae5c9c"},
+    {file = "yarl-1.17.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:dd90238d3a77a0e07d4d6ffdebc0c21a9787c5953a508a2231b5f191455f31e9"},
+    {file = "yarl-1.17.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c74f0b0472ac40b04e6d28532f55cac8090e34c3e81f118d12843e6df14d0909"},
+    {file = "yarl-1.17.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4d486ddcaca8c68455aa01cf53d28d413fb41a35afc9f6594a730c9779545876"},
+    {file = "yarl-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f25b7e93f5414b9a983e1a6c1820142c13e1782cc9ed354c25e933aebe97fcf2"},
+    {file = "yarl-1.17.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3a0baff7827a632204060f48dca9e63fbd6a5a0b8790c1a2adfb25dc2c9c0d50"},
+    {file = "yarl-1.17.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:460024cacfc3246cc4d9f47a7fc860e4fcea7d1dc651e1256510d8c3c9c7cde0"},
+    {file = "yarl-1.17.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5870d620b23b956f72bafed6a0ba9a62edb5f2ef78a8849b7615bd9433384171"},
+    {file = "yarl-1.17.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2941756754a10e799e5b87e2319bbec481ed0957421fba0e7b9fb1c11e40509f"},
+    {file = "yarl-1.17.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9611b83810a74a46be88847e0ea616794c406dbcb4e25405e52bff8f4bee2d0a"},
+    {file = "yarl-1.17.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:cd7e35818d2328b679a13268d9ea505c85cd773572ebb7a0da7ccbca77b6a52e"},
+    {file = "yarl-1.17.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:6b981316fcd940f085f646b822c2ff2b8b813cbd61281acad229ea3cbaabeb6b"},
+    {file = "yarl-1.17.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:688058e89f512fb7541cb85c2f149c292d3fa22f981d5a5453b40c5da49eb9e8"},
+    {file = "yarl-1.17.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:56afb44a12b0864d17b597210d63a5b88915d680f6484d8d202ed68ade38673d"},
+    {file = "yarl-1.17.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:17931dfbb84ae18b287279c1f92b76a3abcd9a49cd69b92e946035cff06bcd20"},
+    {file = "yarl-1.17.2-cp312-cp312-win32.whl", hash = "sha256:ff8d95e06546c3a8c188f68040e9d0360feb67ba8498baf018918f669f7bc39b"},
+    {file = "yarl-1.17.2-cp312-cp312-win_amd64.whl", hash = "sha256:4c840cc11163d3c01a9d8aad227683c48cd3e5be5a785921bcc2a8b4b758c4f3"},
+    {file = "yarl-1.17.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:3294f787a437cb5d81846de3a6697f0c35ecff37a932d73b1fe62490bef69211"},
+    {file = "yarl-1.17.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f1e7fedb09c059efee2533119666ca7e1a2610072076926fa028c2ba5dfeb78c"},
+    {file = "yarl-1.17.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:da9d3061e61e5ae3f753654813bc1cd1c70e02fb72cf871bd6daf78443e9e2b1"},
+    {file = "yarl-1.17.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91c012dceadc695ccf69301bfdccd1fc4472ad714fe2dd3c5ab4d2046afddf29"},
+    {file = "yarl-1.17.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f11fd61d72d93ac23718d393d2a64469af40be2116b24da0a4ca6922df26807e"},
+    {file = "yarl-1.17.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:46c465ad06971abcf46dd532f77560181387b4eea59084434bdff97524444032"},
+    {file = "yarl-1.17.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef6eee1a61638d29cd7c85f7fd3ac7b22b4c0fabc8fd00a712b727a3e73b0685"},
+    {file = "yarl-1.17.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4434b739a8a101a837caeaa0137e0e38cb4ea561f39cb8960f3b1e7f4967a3fc"},
+    {file = "yarl-1.17.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:752485cbbb50c1e20908450ff4f94217acba9358ebdce0d8106510859d6eb19a"},
+    {file = "yarl-1.17.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:17791acaa0c0f89323c57da7b9a79f2174e26d5debbc8c02d84ebd80c2b7bff8"},
+    {file = "yarl-1.17.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:5c6ea72fe619fee5e6b5d4040a451d45d8175f560b11b3d3e044cd24b2720526"},
+    {file = "yarl-1.17.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:db5ac3871ed76340210fe028f535392f097fb31b875354bcb69162bba2632ef4"},
+    {file = "yarl-1.17.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:7a1606ba68e311576bcb1672b2a1543417e7e0aa4c85e9e718ba6466952476c0"},
+    {file = "yarl-1.17.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9bc27dd5cfdbe3dc7f381b05e6260ca6da41931a6e582267d5ca540270afeeb2"},
+    {file = "yarl-1.17.2-cp313-cp313-win32.whl", hash = "sha256:52492b87d5877ec405542f43cd3da80bdcb2d0c2fbc73236526e5f2c28e6db28"},
+    {file = "yarl-1.17.2-cp313-cp313-win_amd64.whl", hash = "sha256:8e1bf59e035534ba4077f5361d8d5d9194149f9ed4f823d1ee29ef3e8964ace3"},
+    {file = "yarl-1.17.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c556fbc6820b6e2cda1ca675c5fa5589cf188f8da6b33e9fc05b002e603e44fa"},
+    {file = "yarl-1.17.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f2f44a4247461965fed18b2573f3a9eb5e2c3cad225201ee858726cde610daca"},
+    {file = "yarl-1.17.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3a3ede8c248f36b60227eb777eac1dbc2f1022dc4d741b177c4379ca8e75571a"},
+    {file = "yarl-1.17.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2654caaf5584449d49c94a6b382b3cb4a246c090e72453493ea168b931206a4d"},
+    {file = "yarl-1.17.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0d41c684f286ce41fa05ab6af70f32d6da1b6f0457459a56cf9e393c1c0b2217"},
+    {file = "yarl-1.17.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2270d590997445a0dc29afa92e5534bfea76ba3aea026289e811bf9ed4b65a7f"},
+    {file = "yarl-1.17.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18662443c6c3707e2fc7fad184b4dc32dd428710bbe72e1bce7fe1988d4aa654"},
+    {file = "yarl-1.17.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:75ac158560dec3ed72f6d604c81090ec44529cfb8169b05ae6fcb3e986b325d9"},
+    {file = "yarl-1.17.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:1fee66b32e79264f428dc8da18396ad59cc48eef3c9c13844adec890cd339db5"},
+    {file = "yarl-1.17.2-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:585ce7cd97be8f538345de47b279b879e091c8b86d9dbc6d98a96a7ad78876a3"},
+    {file = "yarl-1.17.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:c019abc2eca67dfa4d8fb72ba924871d764ec3c92b86d5b53b405ad3d6aa56b0"},
+    {file = "yarl-1.17.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:c6e659b9a24d145e271c2faf3fa6dd1fcb3e5d3f4e17273d9e0350b6ab0fe6e2"},
+    {file = "yarl-1.17.2-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:d17832ba39374134c10e82d137e372b5f7478c4cceeb19d02ae3e3d1daed8721"},
+    {file = "yarl-1.17.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:bc3003710e335e3f842ae3fd78efa55f11a863a89a72e9a07da214db3bf7e1f8"},
+    {file = "yarl-1.17.2-cp39-cp39-win32.whl", hash = "sha256:f5ffc6b7ace5b22d9e73b2a4c7305740a339fbd55301d52735f73e21d9eb3130"},
+    {file = "yarl-1.17.2-cp39-cp39-win_amd64.whl", hash = "sha256:48e424347a45568413deec6f6ee2d720de2cc0385019bedf44cd93e8638aa0ed"},
+    {file = "yarl-1.17.2-py3-none-any.whl", hash = "sha256:dd7abf4f717e33b7487121faf23560b3a50924f80e4bef62b22dab441ded8f3b"},
+    {file = "yarl-1.17.2.tar.gz", hash = "sha256:753eaaa0c7195244c84b5cc159dc8204b7fd99f716f11198f999f2332a86b178"},
 ]
 
 [package.dependencies]
 idna = ">=2.0"
 multidict = ">=4.0"
+propcache = ">=0.2.0"
 
 [[package]]
 name = "zipp"
@@ -3484,4 +3599,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "c656496f9fbb7c29b2df3143c1d72c95b5e121cb6340134c0b8d070f54a08508"
+content-hash = "8cb9c38d83eec441391c0528ac2fbefde18c734373b2399e07c69382044e8ced"
diff --git a/pyproject.toml b/pyproject.toml
index 9ea42bf46f..197946fff8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,7 +32,7 @@ psutil = "^5.9.4"
 types-psutil = "^5.9.5.12"
 types-toml = "^0.10.8.6"
 pytest-httpserver = "^1.0.8"
-aiohttp = "3.10.2"
+aiohttp = "3.10.11"
 pytest-rerunfailures = "^13.0"
 types-pytest-lazy-fixture = "^0.6.3.3"
 pytest-split = "^0.8.1"

From 5e3fbef7210a84870cb012837db6830aeab3d38d Mon Sep 17 00:00:00 2001
From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com>
Date: Tue, 19 Nov 2024 14:10:09 -0500
Subject: [PATCH 30/43] fix(pageserver): queue stopped error should be ignored
 during create timeline (#9767)

close https://github.com/neondatabase/neon/issues/9730

The test case tests if anything goes wrong during pageserver restart +
*during timeline creation not complete*. Therefore, queue is stopped
error is normal in this case, except that it should be categorized as a
shutdown error instead of a real error.

## Summary of changes

* More comments for the test case.
* Queue stopped error will now be forwarded as
CreateTimelineError::ShuttingDown.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
---
 pageserver/src/tenant.rs            | 6 ++++++
 test_runner/regress/test_tenants.py | 4 ++++
 2 files changed, 10 insertions(+)

diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs
index e88dee7c6c..46317e93ee 100644
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -2446,6 +2446,12 @@ impl Tenant {
             .remote_client
             .wait_completion()
             .await
+            .map_err(|e| match e {
+                WaitCompletionError::NotInitialized(
+                    e, // If the queue is already stopped, it's a shutdown error.
+                ) if e.is_stopping() => CreateTimelineError::ShuttingDown,
+                e => CreateTimelineError::Other(e.into()),
+            })
             .context("wait for timeline initial uploads to complete")?;
 
         // The creating task is responsible for activating the timeline.
diff --git a/test_runner/regress/test_tenants.py b/test_runner/regress/test_tenants.py
index 5a499ea98b..158c3fddb0 100644
--- a/test_runner/regress/test_tenants.py
+++ b/test_runner/regress/test_tenants.py
@@ -369,12 +369,16 @@ def test_create_churn_during_restart(neon_env_builder: NeonEnvBuilder):
     - Bad response codes during shutdown (e.g. returning 500 instead of 503)
     - Issues where a tenant is still starting up while we receive a request for it
     - Issues with interrupting/resuming tenant/timeline creation in shutdown
+    - Issues with a timeline is not created successfully because of restart.
     """
     env = neon_env_builder.init_configs()
     env.start()
     tenant_id: TenantId = env.initial_tenant
     timeline_id = env.initial_timeline
 
+    # At this point, the initial tenant/timeline might not have been created successfully,
+    # and this is the case we want to test.
+
     # Multiple creation requests which race will generate this error on the pageserver
     # and storage controller respectively
     env.pageserver.allowed_errors.append(".*Conflict: Tenant is already being modified.*")

From b092126c94fc2af37188ad05e5951ae10c84813a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Arpad=20M=C3=BCller?= <arpad-m@users.noreply.github.com>
Date: Tue, 19 Nov 2024 20:10:53 +0100
Subject: [PATCH 31/43] scrubber: fix parsing issue with Azure (#9797)

Apparently Azure returns timelines ending with `/` which confuses the
parsing. So remove all trailing `/`s before attempting to parse.

Part of https://github.com/neondatabase/cloud/issues/19963
---
 storage_scrubber/src/metadata_stream.rs | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/storage_scrubber/src/metadata_stream.rs b/storage_scrubber/src/metadata_stream.rs
index f896cff2d5..efda7c213d 100644
--- a/storage_scrubber/src/metadata_stream.rs
+++ b/storage_scrubber/src/metadata_stream.rs
@@ -60,7 +60,7 @@ pub async fn stream_tenant_shards<'a>(
 
             first_part
                 .parse::<TenantShardId>()
-                .with_context(|| format!("Incorrect entry id str: {first_part}"))
+                .with_context(|| format!("Incorrect tenant entry id str: {first_part}"))
         })
         .collect::<Vec<_>>();
 
@@ -114,9 +114,10 @@ pub async fn stream_tenant_timelines<'a>(
                 prefix.get_path().as_str().strip_prefix(prefix_str)
             })
             .map(|entry_id_str| {
-                entry_id_str
+                let first_part = entry_id_str.split('/').next().unwrap();
+                first_part
                     .parse::<TimelineId>()
-                    .with_context(|| format!("Incorrect entry id str: {entry_id_str}"))
+                    .with_context(|| format!("Incorrect timeline entry id str: {entry_id_str}"))
             });
 
         for i in new_entry_ids {

From b22a84a7bf2ccae30243be81439cc284835a37f1 Mon Sep 17 00:00:00 2001
From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com>
Date: Tue, 19 Nov 2024 14:38:41 -0500
Subject: [PATCH 32/43] feat(pageserver): support key range for manual
 compaction trigger (#9723)

part of https://github.com/neondatabase/neon/issues/9114, we want to be
able to run partial gc-compaction in tests. In the future, we can also
expand this functionality to legacy compaction, so that we can trigger
compaction for a specific key range.

## Summary of changes

* Support passing compaction key range through pageserver routes.
* Refactor input parameters of compact related function to take the new
`CompactOptions`.
* Add tests for partial compaction. Note that the test may or may not
trigger compaction based on GC horizon. We need to improve the test case
to ensure things always get below the gc_horizon and the gc-compaction
can be triggered.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
---
 libs/utils/src/http/json.rs                  | 22 ++++++++++
 pageserver/src/http/routes.rs                | 15 +++++--
 pageserver/src/tenant.rs                     | 42 ++++++++++++-------
 pageserver/src/tenant/timeline.rs            | 36 +++++++++++++++-
 pageserver/src/tenant/timeline/compaction.rs | 43 +++++++++++++++-----
 test_runner/fixtures/pageserver/http.py      |  2 +
 test_runner/regress/test_compaction.py       | 39 ++++++++++++++++++
 7 files changed, 170 insertions(+), 29 deletions(-)

diff --git a/libs/utils/src/http/json.rs b/libs/utils/src/http/json.rs
index 6c25440b42..e53231f313 100644
--- a/libs/utils/src/http/json.rs
+++ b/libs/utils/src/http/json.rs
@@ -5,6 +5,7 @@ use serde::{Deserialize, Serialize};
 
 use super::error::ApiError;
 
+/// Parse a json request body and deserialize it to the type `T`.
 pub async fn json_request<T: for<'de> Deserialize<'de>>(
     request: &mut Request<Body>,
 ) -> Result<T, ApiError> {
@@ -27,6 +28,27 @@ pub async fn json_request<T: for<'de> Deserialize<'de>>(
         .map_err(ApiError::BadRequest)
 }
 
+/// Parse a json request body and deserialize it to the type `T`. If the body is empty, return `T::default`.
+pub async fn json_request_maybe<T: for<'de> Deserialize<'de> + Default>(
+    request: &mut Request<Body>,
+) -> Result<T, ApiError> {
+    let body = hyper::body::aggregate(request.body_mut())
+        .await
+        .context("Failed to read request body")
+        .map_err(ApiError::BadRequest)?;
+
+    if body.remaining() == 0 {
+        return Ok(T::default());
+    }
+
+    let mut deser = serde_json::de::Deserializer::from_reader(body.reader());
+
+    serde_path_to_error::deserialize(&mut deser)
+        // intentionally stringify because the debug version is not helpful in python logs
+        .map_err(|e| anyhow::anyhow!("Failed to parse json request: {e}"))
+        .map_err(ApiError::BadRequest)
+}
+
 pub fn json_response<T: Serialize>(
     status: StatusCode,
     data: T,
diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs
index ab170679ba..306b0f35ab 100644
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -83,6 +83,8 @@ use crate::tenant::storage_layer::LayerName;
 use crate::tenant::timeline::offload::offload_timeline;
 use crate::tenant::timeline::offload::OffloadError;
 use crate::tenant::timeline::CompactFlags;
+use crate::tenant::timeline::CompactOptions;
+use crate::tenant::timeline::CompactRange;
 use crate::tenant::timeline::CompactionError;
 use crate::tenant::timeline::Timeline;
 use crate::tenant::GetTimelineError;
@@ -100,7 +102,7 @@ use utils::{
     http::{
         endpoint::{self, attach_openapi_ui, auth_middleware, check_permission_with},
         error::{ApiError, HttpErrorBody},
-        json::{json_request, json_response},
+        json::{json_request, json_request_maybe, json_response},
         request::parse_request_param,
         RequestExt, RouterBuilder,
     },
@@ -1927,13 +1929,15 @@ async fn timeline_gc_handler(
 
 // Run compaction immediately on given timeline.
 async fn timeline_compact_handler(
-    request: Request<Body>,
+    mut request: Request<Body>,
     cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
     let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
     check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
+    let compact_range = json_request_maybe::<Option<CompactRange>>(&mut request).await?;
+
     let state = get_state(&request);
 
     let mut flags = EnumSet::empty();
@@ -1957,11 +1961,16 @@ async fn timeline_compact_handler(
     let wait_until_uploaded =
         parse_query_param::<_, bool>(&request, "wait_until_uploaded")?.unwrap_or(false);
 
+    let options = CompactOptions {
+        compact_range,
+        flags,
+    };
+
     async {
         let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
         let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id).await?;
         timeline
-            .compact(&cancel, flags, &ctx)
+            .compact_with_options(&cancel, options, &ctx)
             .await
             .map_err(|e| ApiError::InternalServerError(e.into()))?;
         if wait_until_uploaded {
diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs
index 46317e93ee..37bf83c984 100644
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -5254,7 +5254,7 @@ mod tests {
     use storage_layer::PersistentLayerKey;
     use tests::storage_layer::ValuesReconstructState;
     use tests::timeline::{GetVectoredError, ShutdownMode};
-    use timeline::DeltaLayerTestDesc;
+    use timeline::{CompactOptions, DeltaLayerTestDesc};
     use utils::id::TenantId;
 
     #[cfg(feature = "testing")]
@@ -7728,7 +7728,7 @@ mod tests {
 
         let cancel = CancellationToken::new();
         tline
-            .compact_with_gc(&cancel, EnumSet::new(), &ctx)
+            .compact_with_gc(&cancel, CompactOptions::default(), &ctx)
             .await
             .unwrap();
 
@@ -7805,7 +7805,7 @@ mod tests {
             guard.cutoffs.space = Lsn(0x40);
         }
         tline
-            .compact_with_gc(&cancel, EnumSet::new(), &ctx)
+            .compact_with_gc(&cancel, CompactOptions::default(), &ctx)
             .await
             .unwrap();
 
@@ -8237,7 +8237,7 @@ mod tests {
 
         let cancel = CancellationToken::new();
         tline
-            .compact_with_gc(&cancel, EnumSet::new(), &ctx)
+            .compact_with_gc(&cancel, CompactOptions::default(), &ctx)
             .await
             .unwrap();
 
@@ -8266,7 +8266,7 @@ mod tests {
             guard.cutoffs.space = Lsn(0x40);
         }
         tline
-            .compact_with_gc(&cancel, EnumSet::new(), &ctx)
+            .compact_with_gc(&cancel, CompactOptions::default(), &ctx)
             .await
             .unwrap();
 
@@ -8819,7 +8819,14 @@ mod tests {
         dryrun_flags.insert(CompactFlags::DryRun);
 
         tline
-            .compact_with_gc(&cancel, dryrun_flags, &ctx)
+            .compact_with_gc(
+                &cancel,
+                CompactOptions {
+                    flags: dryrun_flags,
+                    compact_range: None,
+                },
+                &ctx,
+            )
             .await
             .unwrap();
         // We expect layer map to be the same b/c the dry run flag, but we don't know whether there will be other background jobs
@@ -8827,14 +8834,14 @@ mod tests {
         verify_result().await;
 
         tline
-            .compact_with_gc(&cancel, EnumSet::new(), &ctx)
+            .compact_with_gc(&cancel, CompactOptions::default(), &ctx)
             .await
             .unwrap();
         verify_result().await;
 
         // compact again
         tline
-            .compact_with_gc(&cancel, EnumSet::new(), &ctx)
+            .compact_with_gc(&cancel, CompactOptions::default(), &ctx)
             .await
             .unwrap();
         verify_result().await;
@@ -8847,14 +8854,14 @@ mod tests {
             guard.cutoffs.space = Lsn(0x38);
         }
         tline
-            .compact_with_gc(&cancel, EnumSet::new(), &ctx)
+            .compact_with_gc(&cancel, CompactOptions::default(), &ctx)
             .await
             .unwrap();
         verify_result().await; // no wals between 0x30 and 0x38, so we should obtain the same result
 
         // not increasing the GC horizon and compact again
         tline
-            .compact_with_gc(&cancel, EnumSet::new(), &ctx)
+            .compact_with_gc(&cancel, CompactOptions::default(), &ctx)
             .await
             .unwrap();
         verify_result().await;
@@ -9048,7 +9055,14 @@ mod tests {
         dryrun_flags.insert(CompactFlags::DryRun);
 
         tline
-            .compact_with_gc(&cancel, dryrun_flags, &ctx)
+            .compact_with_gc(
+                &cancel,
+                CompactOptions {
+                    flags: dryrun_flags,
+                    compact_range: None,
+                },
+                &ctx,
+            )
             .await
             .unwrap();
         // We expect layer map to be the same b/c the dry run flag, but we don't know whether there will be other background jobs
@@ -9056,14 +9070,14 @@ mod tests {
         verify_result().await;
 
         tline
-            .compact_with_gc(&cancel, EnumSet::new(), &ctx)
+            .compact_with_gc(&cancel, CompactOptions::default(), &ctx)
             .await
             .unwrap();
         verify_result().await;
 
         // compact again
         tline
-            .compact_with_gc(&cancel, EnumSet::new(), &ctx)
+            .compact_with_gc(&cancel, CompactOptions::default(), &ctx)
             .await
             .unwrap();
         verify_result().await;
@@ -9248,7 +9262,7 @@ mod tests {
 
         let cancel = CancellationToken::new();
         branch_tline
-            .compact_with_gc(&cancel, EnumSet::new(), &ctx)
+            .compact_with_gc(&cancel, CompactOptions::default(), &ctx)
             .await
             .unwrap();
 
diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index 5547bc2c7a..0eb3de21e9 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -774,6 +774,21 @@ pub(crate) enum CompactFlags {
     DryRun,
 }
 
+#[serde_with::serde_as]
+#[derive(Debug, Clone, serde::Deserialize)]
+pub(crate) struct CompactRange {
+    #[serde_as(as = "serde_with::DisplayFromStr")]
+    pub start: Key,
+    #[serde_as(as = "serde_with::DisplayFromStr")]
+    pub end: Key,
+}
+
+#[derive(Clone, Default)]
+pub(crate) struct CompactOptions {
+    pub flags: EnumSet<CompactFlags>,
+    pub compact_range: Option<CompactRange>,
+}
+
 impl std::fmt::Debug for Timeline {
     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
         write!(f, "Timeline<{}>", self.timeline_id)
@@ -1612,6 +1627,25 @@ impl Timeline {
         cancel: &CancellationToken,
         flags: EnumSet<CompactFlags>,
         ctx: &RequestContext,
+    ) -> Result<bool, CompactionError> {
+        self.compact_with_options(
+            cancel,
+            CompactOptions {
+                flags,
+                compact_range: None,
+            },
+            ctx,
+        )
+        .await
+    }
+
+    /// Outermost timeline compaction operation; downloads needed layers. Returns whether we have pending
+    /// compaction tasks.
+    pub(crate) async fn compact_with_options(
+        self: &Arc<Self>,
+        cancel: &CancellationToken,
+        options: CompactOptions,
+        ctx: &RequestContext,
     ) -> Result<bool, CompactionError> {
         // most likely the cancellation token is from background task, but in tests it could be the
         // request task as well.
@@ -1649,7 +1683,7 @@ impl Timeline {
                 self.compact_tiered(cancel, ctx).await?;
                 Ok(false)
             }
-            CompactionAlgorithm::Legacy => self.compact_legacy(cancel, flags, ctx).await,
+            CompactionAlgorithm::Legacy => self.compact_legacy(cancel, options, ctx).await,
         }
     }
 
diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs
index b30e380de5..ecd68ba55e 100644
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -10,7 +10,7 @@ use std::sync::Arc;
 
 use super::layer_manager::LayerManager;
 use super::{
-    CompactFlags, CreateImageLayersError, DurationRecorder, ImageLayerCreationMode,
+    CompactFlags, CompactOptions, CreateImageLayersError, DurationRecorder, ImageLayerCreationMode,
     RecordedDuration, Timeline,
 };
 
@@ -273,22 +273,32 @@ impl Timeline {
     pub(crate) async fn compact_legacy(
         self: &Arc<Self>,
         cancel: &CancellationToken,
-        flags: EnumSet<CompactFlags>,
+        options: CompactOptions,
         ctx: &RequestContext,
     ) -> Result<bool, CompactionError> {
-        if flags.contains(CompactFlags::EnhancedGcBottomMostCompaction) {
-            self.compact_with_gc(cancel, flags, ctx)
+        if options
+            .flags
+            .contains(CompactFlags::EnhancedGcBottomMostCompaction)
+        {
+            self.compact_with_gc(cancel, options, ctx)
                 .await
                 .map_err(CompactionError::Other)?;
             return Ok(false);
         }
 
-        if flags.contains(CompactFlags::DryRun) {
+        if options.flags.contains(CompactFlags::DryRun) {
             return Err(CompactionError::Other(anyhow!(
                 "dry-run mode is not supported for legacy compaction for now"
             )));
         }
 
+        if options.compact_range.is_some() {
+            // maybe useful in the future? could implement this at some point
+            return Err(CompactionError::Other(anyhow!(
+                "compaction range is not supported for legacy compaction for now"
+            )));
+        }
+
         // High level strategy for compaction / image creation:
         //
         // 1. First, calculate the desired "partitioning" of the
@@ -338,7 +348,7 @@ impl Timeline {
             .repartition(
                 self.get_last_record_lsn(),
                 self.get_compaction_target_size(),
-                flags,
+                options.flags,
                 ctx,
             )
             .await
@@ -354,7 +364,7 @@ impl Timeline {
                 let fully_compacted = self
                     .compact_level0(
                         target_file_size,
-                        flags.contains(CompactFlags::ForceL0Compaction),
+                        options.flags.contains(CompactFlags::ForceL0Compaction),
                         ctx,
                     )
                     .await?;
@@ -372,7 +382,10 @@ impl Timeline {
                         .create_image_layers(
                             &partitioning,
                             lsn,
-                            if flags.contains(CompactFlags::ForceImageLayerCreation) {
+                            if options
+                                .flags
+                                .contains(CompactFlags::ForceImageLayerCreation)
+                            {
                                 ImageLayerCreationMode::Force
                             } else {
                                 ImageLayerCreationMode::Try
@@ -1736,11 +1749,19 @@ impl Timeline {
     pub(crate) async fn compact_with_gc(
         self: &Arc<Self>,
         cancel: &CancellationToken,
-        flags: EnumSet<CompactFlags>,
+        options: CompactOptions,
         ctx: &RequestContext,
     ) -> anyhow::Result<()> {
-        self.partial_compact_with_gc(Key::MIN..Key::MAX, cancel, flags, ctx)
-            .await
+        self.partial_compact_with_gc(
+            options
+                .compact_range
+                .map(|range| range.start..range.end)
+                .unwrap_or_else(|| Key::MIN..Key::MAX),
+            cancel,
+            options.flags,
+            ctx,
+        )
+        .await
     }
 
     /// An experimental compaction building block that combines compaction with garbage collection.
diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py
index d1a9b5921a..01583757fa 100644
--- a/test_runner/fixtures/pageserver/http.py
+++ b/test_runner/fixtures/pageserver/http.py
@@ -665,6 +665,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
         force_l0_compaction=False,
         wait_until_uploaded=False,
         enhanced_gc_bottom_most_compaction=False,
+        body: Optional[dict[str, Any]] = None,
     ):
         self.is_testing_enabled_or_skip()
         query = {}
@@ -683,6 +684,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
         res = self.put(
             f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/compact",
             params=query,
+            json=body,
         )
         log.info(f"Got compact request response code: {res.status_code}")
         self.verbose_error(res)
diff --git a/test_runner/regress/test_compaction.py b/test_runner/regress/test_compaction.py
index a02d0f6b98..48950a5a50 100644
--- a/test_runner/regress/test_compaction.py
+++ b/test_runner/regress/test_compaction.py
@@ -116,6 +116,45 @@ page_cache_size=10
     assert vectored_average < 8
 
 
+def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder):
+    env = neon_env_builder.init_start(initial_tenant_conf=AGGRESIVE_COMPACTION_TENANT_CONF)
+
+    tenant_id = env.initial_tenant
+    timeline_id = env.initial_timeline
+
+    row_count = 1000
+    churn_rounds = 10
+
+    ps_http = env.pageserver.http_client()
+
+    workload = Workload(env, tenant_id, timeline_id)
+    workload.init(env.pageserver.id)
+
+    log.info("Writing initial data ...")
+    workload.write_rows(row_count, env.pageserver.id)
+
+    for i in range(1, churn_rounds + 1):
+        if i % 10 == 0:
+            log.info(f"Running churn round {i}/{churn_rounds} ...")
+
+        workload.churn_rows(row_count, env.pageserver.id)
+        # Force L0 compaction to ensure the number of layers is within bounds, so that gc-compaction can run.
+        ps_http.timeline_compact(tenant_id, timeline_id, force_l0_compaction=True)
+        assert ps_http.perf_info(tenant_id, timeline_id)[0]["num_of_l0"] <= 1
+        ps_http.timeline_compact(
+            tenant_id,
+            timeline_id,
+            enhanced_gc_bottom_most_compaction=True,
+            body={
+                "start": "000000000000000000000000000000000000",
+                "end": "030000000000000000000000000000000000",
+            },
+        )
+
+    log.info("Validating at workload end ...")
+    workload.validate(env.pageserver.id)
+
+
 # Stripe sizes in number of pages.
 TINY_STRIPES = 16
 LARGE_STRIPES = 32768

From 770ac34ae6137bfb3c7dab9536a2943e209f21d0 Mon Sep 17 00:00:00 2001
From: Konstantin Knizhnik <knizhnik@garret.ru>
Date: Tue, 19 Nov 2024 22:29:57 +0200
Subject: [PATCH 33/43] Register custom xlog reader callbacks for on-demand WAL
 download in StartupDecodingContext (#9007)

## Problem

See https://github.com/neondatabase/neon/issues/8931
On-demand WAL download are not set in all cases where WAL is accessed by
logical replication

## Summary of changes

Set customer xlog reader handles in StartupDecodingContext

Related changes in Postgres modules:

https://github.com/neondatabase/postgres/pull/495
https://github.com/neondatabase/postgres/pull/496
https://github.com/neondatabase/postgres/pull/497
https://github.com/neondatabase/postgres/pull/498

## Checklist before requesting a review

- [ ] I have performed a self-review of my code.
- [ ] If it is a core feature, I have added thorough tests.
- [ ] Do we need to implement analytics? if so did you add the relevant
metrics to the dashboard?
- [ ] If this PR requires public announcement, mark it with
/release-notes label and add several sentences in this section.

## Checklist before merging

- [ ] Do not forget to reformat commit message to not include the above
checklist

---------

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
---
 pgxn/neon/neon.c                              |  4 +--
 .../regress/test_ondemand_wal_download.py     | 27 +++++++++++++++++++
 vendor/postgres-v14                           |  2 +-
 vendor/postgres-v15                           |  2 +-
 vendor/postgres-v16                           |  2 +-
 vendor/postgres-v17                           |  2 +-
 vendor/revisions.json                         |  8 +++---
 7 files changed, 36 insertions(+), 11 deletions(-)
 create mode 100644 test_runner/regress/test_ondemand_wal_download.py

diff --git a/pgxn/neon/neon.c b/pgxn/neon/neon.c
index f207ed61f9..51b9f58bbc 100644
--- a/pgxn/neon/neon.c
+++ b/pgxn/neon/neon.c
@@ -421,9 +421,7 @@ _PG_init(void)
 
 	pg_init_libpagestore();
 	pg_init_walproposer();
-	WalSender_Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines;
-	LogicalFuncs_Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines;
-	SlotFuncs_Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines;
+	Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines;
 
 	InitUnstableExtensionsSupport();
 	InitLogicalReplicationMonitor();
diff --git a/test_runner/regress/test_ondemand_wal_download.py b/test_runner/regress/test_ondemand_wal_download.py
new file mode 100644
index 0000000000..a7eb3e6625
--- /dev/null
+++ b/test_runner/regress/test_ondemand_wal_download.py
@@ -0,0 +1,27 @@
+from fixtures.neon_fixtures import NeonEnv
+
+
+def test_on_demand_wal_download(neon_simple_env: NeonEnv):
+    env = neon_simple_env
+    ep = env.endpoints.create_start(
+        branch_name="main",
+        endpoint_id="primary",
+        config_lines=[
+            "max_wal_size=32MB",
+            "min_wal_size=32MB",
+            "neon.logical_replication_max_snap_files=10000",
+        ],
+    )
+
+    con = ep.connect()
+    cur = con.cursor()
+    cur.execute("CREATE TABLE t(pk bigint primary key, payload text)")
+    cur.execute("ALTER TABLE t ALTER payload SET STORAGE external")
+    cur.execute("select pg_create_logical_replication_slot('myslot', 'test_decoding', false, true)")
+    cur.execute("insert into t values (generate_series(1,100000),repeat('?',10000))")
+
+    ep.stop("fast")
+    ep.start()
+    con = ep.connect()
+    cur = con.cursor()
+    cur.execute("select pg_replication_slot_advance('myslot', pg_current_wal_insert_lsn())")
diff --git a/vendor/postgres-v14 b/vendor/postgres-v14
index e54af35045..aeecd27b1f 160000
--- a/vendor/postgres-v14
+++ b/vendor/postgres-v14
@@ -1 +1 @@
-Subproject commit e54af3504513b1f44c0e0f68791a0d6d4210e948
+Subproject commit aeecd27b1f0775b606409d1cbb9c8aa9853a82af
diff --git a/vendor/postgres-v15 b/vendor/postgres-v15
index 29bf1f04a5..544620db4c 160000
--- a/vendor/postgres-v15
+++ b/vendor/postgres-v15
@@ -1 +1 @@
-Subproject commit 29bf1f04a5628618b4c7972fed6f87065e3750ce
+Subproject commit 544620db4ca6945be4f1f686a7fbd2cdfb0bf96f
diff --git a/vendor/postgres-v16 b/vendor/postgres-v16
index b7e9ac3eb9..3cc152ae2d 160000
--- a/vendor/postgres-v16
+++ b/vendor/postgres-v16
@@ -1 +1 @@
-Subproject commit b7e9ac3eb9c5f43c443ebc76ddf06d5038c9bb34
+Subproject commit 3cc152ae2d17b19679c7102486bdb94677705c02
diff --git a/vendor/postgres-v17 b/vendor/postgres-v17
index a05dc1378d..e5d795a1a0 160000
--- a/vendor/postgres-v17
+++ b/vendor/postgres-v17
@@ -1 +1 @@
-Subproject commit a05dc1378dd822276dc99cb5e888f905d3527597
+Subproject commit e5d795a1a0c25da907176d37c905badab70e00c0
diff --git a/vendor/revisions.json b/vendor/revisions.json
index 7243ba8716..a13ef29e45 100644
--- a/vendor/revisions.json
+++ b/vendor/revisions.json
@@ -1,18 +1,18 @@
 {
   "v17": [
     "17.2",
-    "a05dc1378dd822276dc99cb5e888f905d3527597"
+    "e5d795a1a0c25da907176d37c905badab70e00c0"
   ],
   "v16": [
     "16.6",
-    "b7e9ac3eb9c5f43c443ebc76ddf06d5038c9bb34"
+    "3cc152ae2d17b19679c7102486bdb94677705c02"
   ],
   "v15": [
     "15.10",
-    "29bf1f04a5628618b4c7972fed6f87065e3750ce"
+    "544620db4ca6945be4f1f686a7fbd2cdfb0bf96f"
   ],
   "v14": [
     "14.15",
-    "e54af3504513b1f44c0e0f68791a0d6d4210e948"
+    "aeecd27b1f0775b606409d1cbb9c8aa9853a82af"
   ]
 }

From 725e0a1ac9d1b409c57d2e7d87ac16d9c3d9f91b Mon Sep 17 00:00:00 2001
From: Alexander Bayandin <alexander@neon.tech>
Date: Tue, 19 Nov 2024 23:03:15 +0000
Subject: [PATCH 34/43] CI(release): create reusable workflow for releases
 (#9806)

## Problem

We have a bunch of duplicated code for automated releases. There will be
even more, once we have `release-compute` branch
(https://github.com/neondatabase/neon/pull/9637).

Another issue with the current `release` workflow is that it creates a
PR from the main as is. If we create 2 different releases from the
same commit, GitHub could mix up results from different PRs.

## Summary of changes
- Create a reusable workflow for releases
- Create an empty commit to differentiate releases
---
 .github/workflows/_create-release-pr.yml | 79 ++++++++++++++++++++++
 .github/workflows/release.yml            | 84 ++++--------------------
 2 files changed, 93 insertions(+), 70 deletions(-)
 create mode 100644 .github/workflows/_create-release-pr.yml

diff --git a/.github/workflows/_create-release-pr.yml b/.github/workflows/_create-release-pr.yml
new file mode 100644
index 0000000000..cc6994397f
--- /dev/null
+++ b/.github/workflows/_create-release-pr.yml
@@ -0,0 +1,79 @@
+name: Create Release PR
+
+on:
+  workflow_call:
+    inputs:
+      component-name:
+        description: 'Component name'
+        required: true
+        type: string
+      release-branch:
+        description: 'Release branch'
+        required: true
+        type: string
+    secrets:
+      ci-access-token:
+        description: 'CI access token'
+        required: true
+
+defaults:
+  run:
+    shell: bash -euo pipefail {0}
+
+jobs:
+  create-storage-release-branch:
+    runs-on: ubuntu-22.04
+
+    permissions:
+      contents: write # for `git push`
+
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        ref: main
+
+    - name: Set variables
+      id: vars
+      env:
+        COMPONENT_NAME: ${{ inputs.component-name }}
+        RELEASE_BRANCH: ${{ inputs.release-branch }}
+      run: |
+        today=$(date +'%Y-%m-%d')
+        echo "title=${COMPONENT_NAME} release ${today}" | tee -a ${GITHUB_OUTPUT}
+        echo "rc-branch=rc/${RELEASE_BRANCH}/${today}"  | tee -a ${GITHUB_OUTPUT}
+
+    - name: Configure git
+      run: |
+        git config user.name "github-actions[bot]"
+        git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+
+    - name: Create RC branch
+      env:
+        RC_BRANCH: ${{ steps.vars.outputs.rc-branch }}
+        TITLE: ${{ steps.vars.outputs.title }}
+      run: |
+        git checkout -b "${RC_BRANCH}"
+
+        # create an empty commit to distinguish workflow runs
+        # from other possible releases from the same commit
+        git commit --allow-empty -m "${TITLE}"
+
+        git push origin "${RC_BRANCH}"
+
+    - name: Create a PR into ${{ inputs.release-branch }}
+      env:
+        GH_TOKEN: ${{ secrets.ci-access-token }}
+        RC_BRANCH: ${{ steps.vars.outputs.rc-branch }}
+        RELEASE_BRANCH: ${{ inputs.release-branch }}
+        TITLE: ${{ steps.vars.outputs.title }}
+      run: |
+        cat << EOF > body.md
+          ## ${TITLE}
+
+          **Please merge this Pull Request using 'Create a merge commit' button**
+        EOF
+
+        gh pr create --title "${TITLE}" \
+                     --body-file "body.md" \
+                     --head "${RC_BRANCH}" \
+                     --base "${RELEASE_BRANCH}"
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 56ef6f4bbb..11f010b6d4 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -26,82 +26,26 @@ defaults:
 jobs:
   create-storage-release-branch:
     if: ${{ github.event.schedule == '0 6 * * MON' || format('{0}', inputs.create-storage-release-branch) == 'true' }}
-    runs-on: ubuntu-22.04
 
     permissions:
-      contents: write # for `git push`
+      contents: write
 
-    steps:
-    - name: Check out code
-      uses: actions/checkout@v4
-      with:
-        ref: main
-
-    - name: Set environment variables
-      run: |
-        echo "RELEASE_DATE=$(date +'%Y-%m-%d')" | tee -a $GITHUB_ENV
-        echo "RELEASE_BRANCH=rc/$(date +'%Y-%m-%d')" | tee -a $GITHUB_ENV
-
-    - name: Create release branch
-      run: git checkout -b $RELEASE_BRANCH
-
-    - name: Push new branch
-      run: git push origin $RELEASE_BRANCH
-
-    - name: Create pull request into release
-      env:
-        GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
-      run: |
-        TITLE="Storage & Compute release ${RELEASE_DATE}"
-
-        cat << EOF > body.md
-          ## ${TITLE}
-
-          **Please merge this Pull Request using 'Create a merge commit' button**
-        EOF
-
-        gh pr create --title "${TITLE}" \
-                     --body-file "body.md" \
-                     --head "${RELEASE_BRANCH}" \
-                     --base "release"
+    uses: ./.github/workflows/_create-release-pr.yml
+    with:
+      component-name: 'Storage & Compute'
+      release-branch: 'release'
+    secrets:
+      ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }}
 
   create-proxy-release-branch:
     if: ${{ github.event.schedule == '0 6 * * THU' || format('{0}', inputs.create-proxy-release-branch) == 'true' }}
-    runs-on: ubuntu-22.04
 
     permissions:
-      contents: write # for `git push`
+      contents: write
 
-    steps:
-    - name: Check out code
-      uses: actions/checkout@v4
-      with:
-        ref: main
-
-    - name: Set environment variables
-      run: |
-        echo "RELEASE_DATE=$(date +'%Y-%m-%d')" | tee -a $GITHUB_ENV
-        echo "RELEASE_BRANCH=rc/proxy/$(date +'%Y-%m-%d')" | tee -a $GITHUB_ENV
-
-    - name: Create release branch
-      run: git checkout -b $RELEASE_BRANCH
-
-    - name: Push new branch
-      run: git push origin $RELEASE_BRANCH
-
-    - name: Create pull request into release
-      env:
-        GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
-      run: |
-        TITLE="Proxy release ${RELEASE_DATE}"
-
-        cat << EOF > body.md
-          ## ${TITLE}
-
-          **Please merge this Pull Request using 'Create a merge commit' button**
-        EOF
-
-        gh pr create --title "${TITLE}" \
-                     --body-file "body.md" \
-                     --head "${RELEASE_BRANCH}" \
-                     --base "release-proxy"
+    uses: ./.github/workflows/_create-release-pr.yml
+    with:
+      component-name: 'Proxy'
+      release-branch: 'release-proxy'
+    secrets:
+      ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }}

From 2281a02c49fd396ef9b06fafa35028c33eea8b3d Mon Sep 17 00:00:00 2001
From: Alexander Bayandin <alexander@neon.tech>
Date: Wed, 20 Nov 2024 00:30:24 +0000
Subject: [PATCH 35/43] CODEOWNERS: add developer-productivity team (#9810)

Notify @neondatabase/developer-productivity team about changes in CI
(i.e. in `.github/` directory)
---
 CODEOWNERS | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/CODEOWNERS b/CODEOWNERS
index f8ed4be816..21b0e7c51f 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -1,6 +1,5 @@
+/.github/ @neondatabase/developer-productivity
 /compute_tools/ @neondatabase/control-plane @neondatabase/compute
-/storage_controller @neondatabase/storage
-/storage_scrubber @neondatabase/storage
 /libs/pageserver_api/ @neondatabase/storage
 /libs/postgres_ffi/ @neondatabase/compute @neondatabase/storage
 /libs/remote_storage/ @neondatabase/storage
@@ -11,4 +10,6 @@
 /pgxn/neon/ @neondatabase/compute @neondatabase/storage
 /proxy/ @neondatabase/proxy
 /safekeeper/ @neondatabase/storage
+/storage_controller @neondatabase/storage
+/storage_scrubber @neondatabase/storage
 /vendor/ @neondatabase/compute

From ea1858e3b66fa058ce8ddfb6f37b364154dd20a6 Mon Sep 17 00:00:00 2001
From: Matthias van de Meent <boekewurm@gmail.com>
Date: Wed, 20 Nov 2024 02:14:58 +0100
Subject: [PATCH 36/43] compute_ctl: Streamline and Pipeline startup SQL
 (#9717)

Before, compute_ctl didn't have a good registry for what command would
run when, depending exclusively on sync code to apply changes. When
users have many databases/roles to manage, this step can take a
substantial amount of time, breaking assumptions about low (re)start
times in other systems.

This commit reduces the time compute_ctl takes to restart when changes
must be applied, by making all commands more or less blind writes, and
applying these commands in an asynchronous context, only waiting for
completion once we know the commands have all been sent.

Additionally, this reduces time spent by batching per-database
operations where previously we would create a new SQL connection for
every user-database operation we planned to execute.
---
 compute_tools/src/catalog.rs                  |  44 +-
 compute_tools/src/checker.rs                  |  28 -
 compute_tools/src/compute.rs                  | 397 ++++++++--
 compute_tools/src/lib.rs                      |   1 +
 compute_tools/src/pg_helpers.rs               |  39 +-
 compute_tools/src/spec.rs                     | 634 +---------------
 compute_tools/src/spec_apply.rs               | 680 ++++++++++++++++++
 .../src/sql/add_availabilitycheck_tables.sql  |  18 +
 .../src/sql/anon_ext_fn_reassign.sql          |  12 +
 compute_tools/src/sql/default_grants.sql      |  30 +
 .../src/sql/set_public_schema_owner.sql       |  23 +
 .../src/sql/unset_template_for_drop_dbs.sql   |  12 +
 12 files changed, 1146 insertions(+), 772 deletions(-)
 create mode 100644 compute_tools/src/spec_apply.rs
 create mode 100644 compute_tools/src/sql/add_availabilitycheck_tables.sql
 create mode 100644 compute_tools/src/sql/anon_ext_fn_reassign.sql
 create mode 100644 compute_tools/src/sql/default_grants.sql
 create mode 100644 compute_tools/src/sql/set_public_schema_owner.sql
 create mode 100644 compute_tools/src/sql/unset_template_for_drop_dbs.sql

diff --git a/compute_tools/src/catalog.rs b/compute_tools/src/catalog.rs
index 4fefa831e0..2f6f82dd39 100644
--- a/compute_tools/src/catalog.rs
+++ b/compute_tools/src/catalog.rs
@@ -1,38 +1,40 @@
-use compute_api::{
-    responses::CatalogObjects,
-    spec::{Database, Role},
-};
+use compute_api::responses::CatalogObjects;
 use futures::Stream;
-use postgres::{Client, NoTls};
+use postgres::NoTls;
 use std::{path::Path, process::Stdio, result::Result, sync::Arc};
 use tokio::{
     io::{AsyncBufReadExt, BufReader},
     process::Command,
-    task,
+    spawn,
 };
+use tokio_postgres::connect;
 use tokio_stream::{self as stream, StreamExt};
 use tokio_util::codec::{BytesCodec, FramedRead};
 use tracing::warn;
 
-use crate::{
-    compute::ComputeNode,
-    pg_helpers::{get_existing_dbs, get_existing_roles},
-};
+use crate::compute::ComputeNode;
+use crate::pg_helpers::{get_existing_dbs_async, get_existing_roles_async};
 
 pub async fn get_dbs_and_roles(compute: &Arc<ComputeNode>) -> anyhow::Result<CatalogObjects> {
     let connstr = compute.connstr.clone();
-    task::spawn_blocking(move || {
-        let mut client = Client::connect(connstr.as_str(), NoTls)?;
-        let roles: Vec<Role>;
-        {
-            let mut xact = client.transaction()?;
-            roles = get_existing_roles(&mut xact)?;
-        }
-        let databases: Vec<Database> = get_existing_dbs(&mut client)?.values().cloned().collect();
 
-        Ok(CatalogObjects { roles, databases })
-    })
-    .await?
+    let (client, connection): (tokio_postgres::Client, _) =
+        connect(connstr.as_str(), NoTls).await?;
+
+    spawn(async move {
+        if let Err(e) = connection.await {
+            eprintln!("connection error: {}", e);
+        }
+    });
+
+    let roles = get_existing_roles_async(&client).await?;
+
+    let databases = get_existing_dbs_async(&client)
+        .await?
+        .into_values()
+        .collect();
+
+    Ok(CatalogObjects { roles, databases })
 }
 
 #[derive(Debug, thiserror::Error)]
diff --git a/compute_tools/src/checker.rs b/compute_tools/src/checker.rs
index d76eaad0a0..cec2b1bed8 100644
--- a/compute_tools/src/checker.rs
+++ b/compute_tools/src/checker.rs
@@ -1,37 +1,9 @@
 use anyhow::{anyhow, Ok, Result};
-use postgres::Client;
 use tokio_postgres::NoTls;
 use tracing::{error, instrument, warn};
 
 use crate::compute::ComputeNode;
 
-/// Create a special service table for availability checks
-/// only if it does not exist already.
-pub fn create_availability_check_data(client: &mut Client) -> Result<()> {
-    let query = "
-        DO $$
-        BEGIN
-            IF NOT EXISTS(
-                SELECT 1
-                FROM pg_catalog.pg_tables
-                WHERE tablename = 'health_check'
-            )
-            THEN
-            CREATE TABLE health_check (
-                id serial primary key,
-                updated_at timestamptz default now()
-            );
-            INSERT INTO health_check VALUES (1, now())
-                ON CONFLICT (id) DO UPDATE
-                 SET updated_at = now();
-            END IF;
-        END
-        $$;";
-    client.execute(query, &[])?;
-
-    Ok(())
-}
-
 /// Update timestamp in a row in a special service table to check
 /// that we can actually write some data in this particular timeline.
 #[instrument(skip_all)]
diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs
index 0a8cb14058..4f67425ba8 100644
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -1,20 +1,21 @@
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
 use std::env;
 use std::fs;
+use std::iter::once;
 use std::os::unix::fs::{symlink, PermissionsExt};
 use std::path::Path;
 use std::process::{Command, Stdio};
 use std::str::FromStr;
 use std::sync::atomic::AtomicU32;
 use std::sync::atomic::Ordering;
-use std::sync::{Condvar, Mutex, RwLock};
+use std::sync::{Arc, Condvar, Mutex, RwLock};
 use std::thread;
 use std::time::Duration;
 use std::time::Instant;
 
 use anyhow::{Context, Result};
 use chrono::{DateTime, Utc};
-use compute_api::spec::PgIdent;
+use compute_api::spec::{PgIdent, Role};
 use futures::future::join_all;
 use futures::stream::FuturesUnordered;
 use futures::StreamExt;
@@ -31,15 +32,23 @@ use compute_api::spec::{ComputeFeature, ComputeMode, ComputeSpec, ExtVersion};
 use utils::measured_stream::MeasuredReader;
 
 use nix::sys::signal::{kill, Signal};
-
 use remote_storage::{DownloadError, RemotePath};
+use tokio::spawn;
+use url::Url;
 
-use crate::checker::create_availability_check_data;
 use crate::installed_extensions::get_installed_extensions_sync;
 use crate::local_proxy;
-use crate::logger::inlinify;
 use crate::pg_helpers::*;
 use crate::spec::*;
+use crate::spec_apply::ApplySpecPhase::{
+    CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateSuperUser,
+    DropInvalidDatabases, DropRoles, HandleNeonExtension, HandleOtherExtensions,
+    RenameAndDeleteDatabases, RenameRoles, RunInEachDatabase,
+};
+use crate::spec_apply::PerDatabasePhase::{
+    ChangeSchemaPerms, DeleteDBRoleReferences, HandleAnonExtension,
+};
+use crate::spec_apply::{apply_operations, MutableApplyContext, DB};
 use crate::sync_sk::{check_if_synced, ping_safekeeper};
 use crate::{config, extension_server};
 
@@ -224,10 +233,7 @@ fn maybe_cgexec(cmd: &str) -> Command {
     }
 }
 
-/// Create special neon_superuser role, that's a slightly nerfed version of a real superuser
-/// that we give to customers
-#[instrument(skip_all)]
-fn create_neon_superuser(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
+pub(crate) fn construct_superuser_query(spec: &ComputeSpec) -> String {
     let roles = spec
         .cluster
         .roles
@@ -296,11 +302,8 @@ fn create_neon_superuser(spec: &ComputeSpec, client: &mut Client) -> Result<()>
             $$;"#,
         roles_decl, database_decl,
     );
-    info!("Neon superuser created: {}", inlinify(&query));
-    client
-        .simple_query(&query)
-        .map_err(|e| anyhow::anyhow!(e).context(query))?;
-    Ok(())
+
+    query
 }
 
 impl ComputeNode {
@@ -813,21 +816,14 @@ impl ComputeNode {
         Ok(())
     }
 
-    /// Do initial configuration of the already started Postgres.
-    #[instrument(skip_all)]
-    pub fn apply_config(&self, compute_state: &ComputeState) -> Result<()> {
-        // If connection fails,
-        // it may be the old node with `zenith_admin` superuser.
-        //
-        // In this case we need to connect with old `zenith_admin` name
-        // and create new user. We cannot simply rename connected user,
-        // but we can create a new one and grant it all privileges.
-        let mut connstr = self.connstr.clone();
+    async fn get_maintenance_client(url: &Url) -> Result<tokio_postgres::Client> {
+        let mut connstr = url.clone();
+
         connstr
             .query_pairs_mut()
             .append_pair("application_name", "apply_config");
 
-        let mut client = match Client::connect(connstr.as_str(), NoTls) {
+        let (client, conn) = match tokio_postgres::connect(connstr.as_str(), NoTls).await {
             Err(e) => match e.code() {
                 Some(&SqlState::INVALID_PASSWORD)
                 | Some(&SqlState::INVALID_AUTHORIZATION_SPECIFICATION) => {
@@ -845,8 +841,8 @@ impl ComputeNode {
                     let mut client =
                         Client::connect(zenith_admin_connstr.as_str(), NoTls)
                             .context("broken cloud_admin credential: tried connecting with cloud_admin but could not authenticate, and zenith_admin does not work either")?;
-                    // Disable forwarding so that users don't get a cloud_admin role
 
+                    // Disable forwarding so that users don't get a cloud_admin role
                     let mut func = || {
                         client.simple_query("SET neon.forward_ddl = false")?;
                         client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
@@ -858,49 +854,309 @@ impl ComputeNode {
                     drop(client);
 
                     // reconnect with connstring with expected name
-                    Client::connect(connstr.as_str(), NoTls)?
+                    tokio_postgres::connect(connstr.as_str(), NoTls).await?
                 }
                 _ => return Err(e.into()),
             },
-            Ok(client) => client,
+            Ok((client, conn)) => (client, conn),
         };
 
-        // Disable DDL forwarding because control plane already knows about these roles/databases.
+        spawn(async move {
+            if let Err(e) = conn.await {
+                error!("maintenance client connection error: {}", e);
+            }
+        });
+
+        // Disable DDL forwarding because control plane already knows about the roles/databases
+        // we're about to modify.
         client
             .simple_query("SET neon.forward_ddl = false")
+            .await
             .context("apply_config SET neon.forward_ddl = false")?;
 
-        // Proceed with post-startup configuration. Note, that order of operations is important.
-        let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec;
-        create_neon_superuser(spec, &mut client).context("apply_config create_neon_superuser")?;
-        cleanup_instance(&mut client).context("apply_config cleanup_instance")?;
-        handle_roles(spec, &mut client).context("apply_config handle_roles")?;
-        handle_databases(spec, &mut client).context("apply_config handle_databases")?;
-        handle_role_deletions(spec, connstr.as_str(), &mut client)
-            .context("apply_config handle_role_deletions")?;
-        handle_grants(
-            spec,
-            &mut client,
-            connstr.as_str(),
-            self.has_feature(ComputeFeature::AnonExtension),
-        )
-        .context("apply_config handle_grants")?;
-        handle_extensions(spec, &mut client).context("apply_config handle_extensions")?;
-        handle_extension_neon(&mut client).context("apply_config handle_extension_neon")?;
-        create_availability_check_data(&mut client)
-            .context("apply_config create_availability_check_data")?;
+        Ok(client)
+    }
 
-        // 'Close' connection
-        drop(client);
+    /// Apply the spec to the running PostgreSQL instance.
+    /// The caller can decide to run with multiple clients in parallel, or
+    /// single mode.  Either way, the commands executed will be the same, and
+    /// only commands run in different databases are parallelized.
+    #[instrument(skip_all)]
+    pub fn apply_spec_sql(
+        &self,
+        spec: Arc<ComputeSpec>,
+        url: Arc<Url>,
+        concurrency: usize,
+    ) -> Result<()> {
+        let rt = tokio::runtime::Builder::new_multi_thread()
+            .enable_all()
+            .build()?;
 
-        if let Some(ref local_proxy) = spec.local_proxy_config {
+        info!("Applying config with max {} concurrency", concurrency);
+        debug!("Config: {:?}", spec);
+
+        rt.block_on(async {
+            // Proceed with post-startup configuration. Note, that order of operations is important.
+            let client = Self::get_maintenance_client(&url).await?;
+            let spec = spec.clone();
+
+            let databases = get_existing_dbs_async(&client).await?;
+            let roles = get_existing_roles_async(&client)
+                .await?
+                .into_iter()
+                .map(|role| (role.name.clone(), role))
+                .collect::<HashMap<String, Role>>();
+
+            let jwks_roles = Arc::new(
+                spec.as_ref()
+                    .local_proxy_config
+                    .iter()
+                    .flat_map(|it| &it.jwks)
+                    .flatten()
+                    .flat_map(|setting| &setting.role_names)
+                    .cloned()
+                    .collect::<HashSet<_>>(),
+            );
+
+            let ctx = Arc::new(tokio::sync::RwLock::new(MutableApplyContext {
+                roles,
+                dbs: databases,
+            }));
+
+            for phase in [
+                CreateSuperUser,
+                DropInvalidDatabases,
+                RenameRoles,
+                CreateAndAlterRoles,
+                RenameAndDeleteDatabases,
+                CreateAndAlterDatabases,
+            ] {
+                debug!("Applying phase {:?}", &phase);
+                apply_operations(
+                    spec.clone(),
+                    ctx.clone(),
+                    jwks_roles.clone(),
+                    phase,
+                    || async { Ok(&client) },
+                )
+                .await?;
+            }
+
+            let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency));
+
+            let db_processes = spec
+                .cluster
+                .databases
+                .iter()
+                .map(|db| DB::new(db.clone()))
+                // include
+                .chain(once(DB::SystemDB))
+                .map(|db| {
+                    let spec = spec.clone();
+                    let ctx = ctx.clone();
+                    let jwks_roles = jwks_roles.clone();
+                    let mut url = url.as_ref().clone();
+                    let concurrency_token = concurrency_token.clone();
+                    let db = db.clone();
+
+                    debug!("Applying per-database phases for Database {:?}", &db);
+
+                    match &db {
+                        DB::SystemDB => {}
+                        DB::UserDB(db) => {
+                            url.set_path(db.name.as_str());
+                        }
+                    }
+
+                    let url = Arc::new(url);
+                    let fut = Self::apply_spec_sql_db(
+                        spec.clone(),
+                        url,
+                        ctx.clone(),
+                        jwks_roles.clone(),
+                        concurrency_token.clone(),
+                        db,
+                    );
+
+                    Ok(spawn(fut))
+                })
+                .collect::<Vec<Result<_, anyhow::Error>>>();
+
+            for process in db_processes.into_iter() {
+                let handle = process?;
+                handle.await??;
+            }
+
+            for phase in vec![
+                HandleOtherExtensions,
+                HandleNeonExtension,
+                CreateAvailabilityCheck,
+                DropRoles,
+            ] {
+                debug!("Applying phase {:?}", &phase);
+                apply_operations(
+                    spec.clone(),
+                    ctx.clone(),
+                    jwks_roles.clone(),
+                    phase,
+                    || async { Ok(&client) },
+                )
+                .await?;
+            }
+
+            Ok::<(), anyhow::Error>(())
+        })?;
+
+        Ok(())
+    }
+
+    /// Apply SQL migrations of the RunInEachDatabase phase.
+    ///
+    /// May opt to not connect to databases that don't have any scheduled
+    /// operations.  The function is concurrency-controlled with the provided
+    /// semaphore.  The caller has to make sure the semaphore isn't exhausted.
+    async fn apply_spec_sql_db(
+        spec: Arc<ComputeSpec>,
+        url: Arc<Url>,
+        ctx: Arc<tokio::sync::RwLock<MutableApplyContext>>,
+        jwks_roles: Arc<HashSet<String>>,
+        concurrency_token: Arc<tokio::sync::Semaphore>,
+        db: DB,
+    ) -> Result<()> {
+        let _permit = concurrency_token.acquire().await?;
+
+        let mut client_conn = None;
+
+        for subphase in [
+            DeleteDBRoleReferences,
+            ChangeSchemaPerms,
+            HandleAnonExtension,
+        ] {
+            apply_operations(
+                spec.clone(),
+                ctx.clone(),
+                jwks_roles.clone(),
+                RunInEachDatabase {
+                    db: db.clone(),
+                    subphase,
+                },
+                // Only connect if apply_operation actually wants a connection.
+                // It's quite possible this database doesn't need any queries,
+                // so by not connecting we save time and effort connecting to
+                // that database.
+                || async {
+                    if client_conn.is_none() {
+                        let db_client = Self::get_maintenance_client(&url).await?;
+                        client_conn.replace(db_client);
+                    }
+                    let client = client_conn.as_ref().unwrap();
+                    Ok(client)
+                },
+            )
+            .await?;
+        }
+
+        drop(client_conn);
+
+        Ok::<(), anyhow::Error>(())
+    }
+
+    /// Do initial configuration of the already started Postgres.
+    #[instrument(skip_all)]
+    pub fn apply_config(&self, compute_state: &ComputeState) -> Result<()> {
+        // If connection fails,
+        // it may be the old node with `zenith_admin` superuser.
+        //
+        // In this case we need to connect with old `zenith_admin` name
+        // and create new user. We cannot simply rename connected user,
+        // but we can create a new one and grant it all privileges.
+        let mut url = self.connstr.clone();
+        url.query_pairs_mut()
+            .append_pair("application_name", "apply_config");
+
+        let url = Arc::new(url);
+        let spec = Arc::new(
+            compute_state
+                .pspec
+                .as_ref()
+                .expect("spec must be set")
+                .spec
+                .clone(),
+        );
+
+        // Choose how many concurrent connections to use for applying the spec changes.
+        // If the cluster is not currently Running we don't have to deal with user connections,
+        // and can thus use all `max_connections` connection slots. However, that's generally not
+        // very efficient, so we generally still limit it to a smaller number.
+        let max_concurrent_connections = if compute_state.status != ComputeStatus::Running {
+            // If the settings contain 'max_connections', use that as template
+            if let Some(config) = spec.cluster.settings.find("max_connections") {
+                config.parse::<usize>().ok()
+            } else {
+                // Otherwise, try to find the setting in the postgresql_conf string
+                spec.cluster
+                    .postgresql_conf
+                    .iter()
+                    .flat_map(|conf| conf.split("\n"))
+                    .filter_map(|line| {
+                        if !line.contains("max_connections") {
+                            return None;
+                        }
+
+                        let (key, value) = line.split_once("=")?;
+                        let key = key
+                            .trim_start_matches(char::is_whitespace)
+                            .trim_end_matches(char::is_whitespace);
+
+                        let value = value
+                            .trim_start_matches(char::is_whitespace)
+                            .trim_end_matches(char::is_whitespace);
+
+                        if key != "max_connections" {
+                            return None;
+                        }
+
+                        value.parse::<usize>().ok()
+                    })
+                    .next()
+            }
+            // If max_connections is present, use at most 1/3rd of that.
+            // When max_connections is lower than 30, try to use at least 10 connections, but
+            // never more than max_connections.
+            .map(|limit| match limit {
+                0..10 => limit,
+                10..30 => 10,
+                30.. => limit / 3,
+            })
+            // If we didn't find max_connections, default to 10 concurrent connections.
+            .unwrap_or(10)
+        } else {
+            // state == Running
+            // Because the cluster is already in the Running state, we should assume users are
+            // already connected to the cluster, and high concurrency could negatively
+            // impact user connectivity. Therefore, we can limit concurrency to the number of
+            // reserved superuser connections, which users wouldn't be able to use anyway.
+            spec.cluster
+                .settings
+                .find("superuser_reserved_connections")
+                .iter()
+                .filter_map(|val| val.parse::<usize>().ok())
+                .map(|val| if val > 1 { val - 1 } else { 1 })
+                .last()
+                .unwrap_or(3)
+        };
+
+        // Merge-apply spec & changes to PostgreSQL state.
+        self.apply_spec_sql(spec.clone(), url.clone(), max_concurrent_connections)?;
+
+        if let Some(ref local_proxy) = &spec.clone().local_proxy_config {
             info!("configuring local_proxy");
             local_proxy::configure(local_proxy).context("apply_config local_proxy")?;
         }
 
         // Run migrations separately to not hold up cold starts
         thread::spawn(move || {
-            let mut connstr = connstr.clone();
+            let mut connstr = url.as_ref().clone();
             connstr
                 .query_pairs_mut()
                 .append_pair("application_name", "migrations");
@@ -908,7 +1164,8 @@ impl ComputeNode {
             let mut client = Client::connect(connstr.as_str(), NoTls)?;
             handle_migrations(&mut client).context("apply_config handle_migrations")
         });
-        Ok(())
+
+        Ok::<(), anyhow::Error>(())
     }
 
     // Wrapped this around `pg_ctl reload`, but right now we don't use
@@ -971,32 +1228,16 @@ impl ComputeNode {
         config::with_compute_ctl_tmp_override(pgdata_path, "neon.max_cluster_size=-1", || {
             self.pg_reload_conf()?;
 
-            let mut client = Client::connect(self.connstr.as_str(), NoTls)?;
-
-            // Proceed with post-startup configuration. Note, that order of operations is important.
-            // Disable DDL forwarding because control plane already knows about these roles/databases.
             if spec.mode == ComputeMode::Primary {
-                client.simple_query("SET neon.forward_ddl = false")?;
-                cleanup_instance(&mut client)?;
-                handle_roles(&spec, &mut client)?;
-                handle_databases(&spec, &mut client)?;
-                handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
-                handle_grants(
-                    &spec,
-                    &mut client,
-                    self.connstr.as_str(),
-                    self.has_feature(ComputeFeature::AnonExtension),
-                )?;
-                handle_extensions(&spec, &mut client)?;
-                handle_extension_neon(&mut client)?;
-                // We can skip handle_migrations here because a new migration can only appear
-                // if we have a new version of the compute_ctl binary, which can only happen
-                // if compute got restarted, in which case we'll end up inside of apply_config
-                // instead of reconfigure.
-            }
+                let mut url = self.connstr.clone();
+                url.query_pairs_mut()
+                    .append_pair("application_name", "apply_config");
+                let url = Arc::new(url);
 
-            // 'Close' connection
-            drop(client);
+                let spec = Arc::new(spec.clone());
+
+                self.apply_spec_sql(spec, url, 1)?;
+            }
 
             Ok(())
         })?;
diff --git a/compute_tools/src/lib.rs b/compute_tools/src/lib.rs
index d27ae58fa2..ee4cf2dfa5 100644
--- a/compute_tools/src/lib.rs
+++ b/compute_tools/src/lib.rs
@@ -23,5 +23,6 @@ pub mod monitor;
 pub mod params;
 pub mod pg_helpers;
 pub mod spec;
+mod spec_apply;
 pub mod swap;
 pub mod sync_sk;
diff --git a/compute_tools/src/pg_helpers.rs b/compute_tools/src/pg_helpers.rs
index b2dc265864..4a1e5ee0e8 100644
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -10,9 +10,9 @@ use std::thread::JoinHandle;
 use std::time::{Duration, Instant};
 
 use anyhow::{bail, Result};
+use futures::StreamExt;
 use ini::Ini;
 use notify::{RecursiveMode, Watcher};
-use postgres::{Client, Transaction};
 use tokio::io::AsyncBufReadExt;
 use tokio::time::timeout;
 use tokio_postgres::NoTls;
@@ -197,27 +197,34 @@ impl Escaping for PgIdent {
 }
 
 /// Build a list of existing Postgres roles
-pub fn get_existing_roles(xact: &mut Transaction<'_>) -> Result<Vec<Role>> {
-    let postgres_roles = xact
-        .query("SELECT rolname, rolpassword FROM pg_catalog.pg_authid", &[])?
-        .iter()
+pub async fn get_existing_roles_async(client: &tokio_postgres::Client) -> Result<Vec<Role>> {
+    let postgres_roles = client
+        .query_raw::<str, &String, &[String; 0]>(
+            "SELECT rolname, rolpassword FROM pg_catalog.pg_authid",
+            &[],
+        )
+        .await?
+        .filter_map(|row| async { row.ok() })
         .map(|row| Role {
             name: row.get("rolname"),
             encrypted_password: row.get("rolpassword"),
             options: None,
         })
-        .collect();
+        .collect()
+        .await;
 
     Ok(postgres_roles)
 }
 
 /// Build a list of existing Postgres databases
-pub fn get_existing_dbs(client: &mut Client) -> Result<HashMap<String, Database>> {
+pub async fn get_existing_dbs_async(
+    client: &tokio_postgres::Client,
+) -> Result<HashMap<String, Database>> {
     // `pg_database.datconnlimit = -2` means that the database is in the
     // invalid state. See:
     //   https://github.com/postgres/postgres/commit/a4b4cc1d60f7e8ccfcc8ff8cb80c28ee411ad9a9
-    let postgres_dbs: Vec<Database> = client
-        .query(
+    let rowstream = client
+        .query_raw::<str, &String, &[String; 0]>(
             "SELECT
                 datname AS name,
                 datdba::regrole::text AS owner,
@@ -226,8 +233,11 @@ pub fn get_existing_dbs(client: &mut Client) -> Result<HashMap<String, Database>
             FROM
                 pg_catalog.pg_database;",
             &[],
-        )?
-        .iter()
+        )
+        .await?;
+
+    let dbs_map = rowstream
+        .filter_map(|r| async { r.ok() })
         .map(|row| Database {
             name: row.get("name"),
             owner: row.get("owner"),
@@ -235,12 +245,9 @@ pub fn get_existing_dbs(client: &mut Client) -> Result<HashMap<String, Database>
             invalid: row.get("invalid"),
             options: None,
         })
-        .collect();
-
-    let dbs_map = postgres_dbs
-        .iter()
         .map(|db| (db.name.clone(), db.clone()))
-        .collect::<HashMap<_, _>>();
+        .collect::<HashMap<_, _>>()
+        .await;
 
     Ok(dbs_map)
 }
diff --git a/compute_tools/src/spec.rs b/compute_tools/src/spec.rs
index 73f3d1006a..c7d2deb090 100644
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -1,22 +1,17 @@
-use std::collections::HashSet;
+use anyhow::{anyhow, bail, Result};
+use postgres::Client;
+use reqwest::StatusCode;
 use std::fs::File;
 use std::path::Path;
-use std::str::FromStr;
-
-use anyhow::{anyhow, bail, Context, Result};
-use postgres::config::Config;
-use postgres::{Client, NoTls};
-use reqwest::StatusCode;
-use tracing::{error, info, info_span, instrument, span_enabled, warn, Level};
+use tracing::{error, info, instrument, warn};
 
 use crate::config;
-use crate::logger::inlinify;
 use crate::migration::MigrationRunner;
 use crate::params::PG_HBA_ALL_MD5;
 use crate::pg_helpers::*;
 
 use compute_api::responses::{ControlPlaneComputeStatus, ControlPlaneSpecResponse};
-use compute_api::spec::{ComputeSpec, PgIdent, Role};
+use compute_api::spec::ComputeSpec;
 
 // Do control plane request and return response if any. In case of error it
 // returns a bool flag indicating whether it makes sense to retry the request
@@ -151,625 +146,6 @@ pub fn add_standby_signal(pgdata_path: &Path) -> Result<()> {
     Ok(())
 }
 
-/// Compute could be unexpectedly shut down, for example, during the
-/// database dropping. This leaves the database in the invalid state,
-/// which prevents new db creation with the same name. This function
-/// will clean it up before proceeding with catalog updates. All
-/// possible future cleanup operations may go here too.
-#[instrument(skip_all)]
-pub fn cleanup_instance(client: &mut Client) -> Result<()> {
-    let existing_dbs = get_existing_dbs(client)?;
-
-    for (_, db) in existing_dbs {
-        if db.invalid {
-            // After recent commit in Postgres, interrupted DROP DATABASE
-            // leaves the database in the invalid state. According to the
-            // commit message, the only option for user is to drop it again.
-            // See:
-            //   https://github.com/postgres/postgres/commit/a4b4cc1d60f7e8ccfcc8ff8cb80c28ee411ad9a9
-            //
-            // Postgres Neon extension is done the way, that db is de-registered
-            // in the control plane metadata only after it is dropped. So there is
-            // a chance that it still thinks that db should exist. This means
-            // that it will be re-created by `handle_databases()`. Yet, it's fine
-            // as user can just repeat drop (in vanilla Postgres they would need
-            // to do the same, btw).
-            let query = format!("DROP DATABASE IF EXISTS {}", db.name.pg_quote());
-            info!("dropping invalid database {}", db.name);
-            client.execute(query.as_str(), &[])?;
-        }
-    }
-
-    Ok(())
-}
-
-/// Given a cluster spec json and open transaction it handles roles creation,
-/// deletion and update.
-#[instrument(skip_all)]
-pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
-    let mut xact = client.transaction()?;
-    let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;
-
-    let mut jwks_roles = HashSet::new();
-    if let Some(local_proxy) = &spec.local_proxy_config {
-        for jwks_setting in local_proxy.jwks.iter().flatten() {
-            for role_name in &jwks_setting.role_names {
-                jwks_roles.insert(role_name.clone());
-            }
-        }
-    }
-
-    // Print a list of existing Postgres roles (only in debug mode)
-    if span_enabled!(Level::INFO) {
-        let mut vec = Vec::new();
-        for r in &existing_roles {
-            vec.push(format!(
-                "{}:{}",
-                r.name,
-                if r.encrypted_password.is_some() {
-                    "[FILTERED]"
-                } else {
-                    "(null)"
-                }
-            ));
-        }
-
-        info!("postgres roles (total {}): {:?}", vec.len(), vec);
-    }
-
-    // Process delta operations first
-    if let Some(ops) = &spec.delta_operations {
-        info!("processing role renames");
-        for op in ops {
-            match op.action.as_ref() {
-                "delete_role" => {
-                    // no-op now, roles will be deleted at the end of configuration
-                }
-                // Renaming role drops its password, since role name is
-                // used as a salt there.  It is important that this role
-                // is recorded with a new `name` in the `roles` list.
-                // Follow up roles update will set the new password.
-                "rename_role" => {
-                    let new_name = op.new_name.as_ref().unwrap();
-
-                    // XXX: with a limited number of roles it is fine, but consider making it a HashMap
-                    if existing_roles.iter().any(|r| r.name == op.name) {
-                        let query: String = format!(
-                            "ALTER ROLE {} RENAME TO {}",
-                            op.name.pg_quote(),
-                            new_name.pg_quote()
-                        );
-
-                        warn!("renaming role '{}' to '{}'", op.name, new_name);
-                        xact.execute(query.as_str(), &[])?;
-                    }
-                }
-                _ => {}
-            }
-        }
-    }
-
-    // Refresh Postgres roles info to handle possible roles renaming
-    let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;
-
-    info!(
-        "handling cluster spec roles (total {})",
-        spec.cluster.roles.len()
-    );
-    for role in &spec.cluster.roles {
-        let name = &role.name;
-        // XXX: with a limited number of roles it is fine, but consider making it a HashMap
-        let pg_role = existing_roles.iter().find(|r| r.name == *name);
-
-        enum RoleAction {
-            None,
-            Update,
-            Create,
-        }
-        let action = if let Some(r) = pg_role {
-            if (r.encrypted_password.is_none() && role.encrypted_password.is_some())
-                || (r.encrypted_password.is_some() && role.encrypted_password.is_none())
-            {
-                RoleAction::Update
-            } else if let Some(pg_pwd) = &r.encrypted_password {
-                // Check whether password changed or not (trim 'md5' prefix first if any)
-                //
-                // This is a backward compatibility hack, which comes from the times when we were using
-                // md5 for everyone and hashes were stored in the console db without md5 prefix. So when
-                // role comes from the control-plane (json spec) `Role.encrypted_password` doesn't have md5 prefix,
-                // but when role comes from Postgres (`get_existing_roles` / `existing_roles`) it has this prefix.
-                // Here is the only place so far where we compare hashes, so it seems to be the best candidate
-                // to place this compatibility layer.
-                let pg_pwd = if let Some(stripped) = pg_pwd.strip_prefix("md5") {
-                    stripped
-                } else {
-                    pg_pwd
-                };
-                if pg_pwd != *role.encrypted_password.as_ref().unwrap() {
-                    RoleAction::Update
-                } else {
-                    RoleAction::None
-                }
-            } else {
-                RoleAction::None
-            }
-        } else {
-            RoleAction::Create
-        };
-
-        match action {
-            RoleAction::None => {}
-            RoleAction::Update => {
-                // This can be run on /every/ role! Not just ones created through the console.
-                // This means that if you add some funny ALTER here that adds a permission,
-                // this will get run even on user-created roles! This will result in different
-                // behavior before and after a spec gets reapplied. The below ALTER as it stands
-                // now only grants LOGIN and changes the password. Please do not allow this branch
-                // to do anything silly.
-                let mut query: String = format!("ALTER ROLE {} ", name.pg_quote());
-                query.push_str(&role.to_pg_options());
-                xact.execute(query.as_str(), &[])?;
-            }
-            RoleAction::Create => {
-                // This branch only runs when roles are created through the console, so it is
-                // safe to add more permissions here. BYPASSRLS and REPLICATION are inherited
-                // from neon_superuser.
-                let mut query: String = format!(
-                    "CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser",
-                    name.pg_quote()
-                );
-                if jwks_roles.contains(name.as_str()) {
-                    query = format!("CREATE ROLE {}", name.pg_quote());
-                }
-                info!("running role create query: '{}'", &query);
-                query.push_str(&role.to_pg_options());
-                xact.execute(query.as_str(), &[])?;
-            }
-        }
-
-        if span_enabled!(Level::INFO) {
-            let pwd = if role.encrypted_password.is_some() {
-                "[FILTERED]"
-            } else {
-                "(null)"
-            };
-            let action_str = match action {
-                RoleAction::None => "",
-                RoleAction::Create => " -> create",
-                RoleAction::Update => " -> update",
-            };
-            info!(" - {}:{}{}", name, pwd, action_str);
-        }
-    }
-
-    xact.commit()?;
-
-    Ok(())
-}
-
-/// Reassign all dependent objects and delete requested roles.
-#[instrument(skip_all)]
-pub fn handle_role_deletions(spec: &ComputeSpec, connstr: &str, client: &mut Client) -> Result<()> {
-    if let Some(ops) = &spec.delta_operations {
-        // First, reassign all dependent objects to db owners.
-        info!("reassigning dependent objects of to-be-deleted roles");
-
-        // Fetch existing roles. We could've exported and used `existing_roles` from
-        // `handle_roles()`, but we only make this list there before creating new roles.
-        // Which is probably fine as we never create to-be-deleted roles, but that'd
-        // just look a bit untidy. Anyway, the entire `pg_roles` should be in shared
-        // buffers already, so this shouldn't be a big deal.
-        let mut xact = client.transaction()?;
-        let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;
-        xact.commit()?;
-
-        for op in ops {
-            // Check that role is still present in Postgres, as this could be a
-            // restart with the same spec after role deletion.
-            if op.action == "delete_role" && existing_roles.iter().any(|r| r.name == op.name) {
-                reassign_owned_objects(spec, connstr, &op.name)?;
-            }
-        }
-
-        // Second, proceed with role deletions.
-        info!("processing role deletions");
-        let mut xact = client.transaction()?;
-        for op in ops {
-            // We do not check either role exists or not,
-            // Postgres will take care of it for us
-            if op.action == "delete_role" {
-                let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.pg_quote());
-
-                warn!("deleting role '{}'", &op.name);
-                xact.execute(query.as_str(), &[])?;
-            }
-        }
-        xact.commit()?;
-    }
-
-    Ok(())
-}
-
-fn reassign_owned_objects_in_one_db(
-    conf: Config,
-    role_name: &PgIdent,
-    db_owner: &PgIdent,
-) -> Result<()> {
-    let mut client = conf.connect(NoTls)?;
-
-    // This will reassign all dependent objects to the db owner
-    let reassign_query = format!(
-        "REASSIGN OWNED BY {} TO {}",
-        role_name.pg_quote(),
-        db_owner.pg_quote()
-    );
-    info!(
-        "reassigning objects owned by '{}' in db '{}' to '{}'",
-        role_name,
-        conf.get_dbname().unwrap_or(""),
-        db_owner
-    );
-    client.simple_query(&reassign_query)?;
-
-    // This now will only drop privileges of the role
-    let drop_query = format!("DROP OWNED BY {}", role_name.pg_quote());
-    client.simple_query(&drop_query)?;
-    Ok(())
-}
-
-// Reassign all owned objects in all databases to the owner of the database.
-fn reassign_owned_objects(spec: &ComputeSpec, connstr: &str, role_name: &PgIdent) -> Result<()> {
-    for db in &spec.cluster.databases {
-        if db.owner != *role_name {
-            let mut conf = Config::from_str(connstr)?;
-            conf.dbname(&db.name);
-            reassign_owned_objects_in_one_db(conf, role_name, &db.owner)?;
-        }
-    }
-
-    // Also handle case when there are no databases in the spec.
-    // In this case we need to reassign objects in the default database.
-    let conf = Config::from_str(connstr)?;
-    let db_owner = PgIdent::from_str("cloud_admin")?;
-    reassign_owned_objects_in_one_db(conf, role_name, &db_owner)?;
-
-    Ok(())
-}
-
-/// It follows mostly the same logic as `handle_roles()` excepting that we
-/// does not use an explicit transactions block, since major database operations
-/// like `CREATE DATABASE` and `DROP DATABASE` do not support it. Statement-level
-/// atomicity should be enough here due to the order of operations and various checks,
-/// which together provide us idempotency.
-#[instrument(skip_all)]
-pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
-    let existing_dbs = get_existing_dbs(client)?;
-
-    // Print a list of existing Postgres databases (only in debug mode)
-    if span_enabled!(Level::INFO) {
-        let mut vec = Vec::new();
-        for (dbname, db) in &existing_dbs {
-            vec.push(format!("{}:{}", dbname, db.owner));
-        }
-        info!("postgres databases (total {}): {:?}", vec.len(), vec);
-    }
-
-    // Process delta operations first
-    if let Some(ops) = &spec.delta_operations {
-        info!("processing delta operations on databases");
-        for op in ops {
-            match op.action.as_ref() {
-                // We do not check either DB exists or not,
-                // Postgres will take care of it for us
-                "delete_db" => {
-                    // In Postgres we can't drop a database if it is a template.
-                    // So we need to unset the template flag first, but it could
-                    // be a retry, so we could've already dropped the database.
-                    // Check that database exists first to make it idempotent.
-                    let unset_template_query: String = format!(
-                        "
-                        DO $$
-                        BEGIN
-                            IF EXISTS(
-                                SELECT 1
-                                FROM pg_catalog.pg_database
-                                WHERE datname = {}
-                            )
-                            THEN
-                            ALTER DATABASE {} is_template false;
-                            END IF;
-                        END
-                        $$;",
-                        escape_literal(&op.name),
-                        &op.name.pg_quote()
-                    );
-                    // Use FORCE to drop database even if there are active connections.
-                    // We run this from `cloud_admin`, so it should have enough privileges.
-                    // NB: there could be other db states, which prevent us from dropping
-                    // the database. For example, if db is used by any active subscription
-                    // or replication slot.
-                    // TODO: deal with it once we allow logical replication. Proper fix should
-                    // involve returning an error code to the control plane, so it could
-                    // figure out that this is a non-retryable error, return it to the user
-                    // and fail operation permanently.
-                    let drop_db_query: String = format!(
-                        "DROP DATABASE IF EXISTS {} WITH (FORCE)",
-                        &op.name.pg_quote()
-                    );
-
-                    warn!("deleting database '{}'", &op.name);
-                    client.execute(unset_template_query.as_str(), &[])?;
-                    client.execute(drop_db_query.as_str(), &[])?;
-                }
-                "rename_db" => {
-                    let new_name = op.new_name.as_ref().unwrap();
-
-                    if existing_dbs.contains_key(&op.name) {
-                        let query: String = format!(
-                            "ALTER DATABASE {} RENAME TO {}",
-                            op.name.pg_quote(),
-                            new_name.pg_quote()
-                        );
-
-                        warn!("renaming database '{}' to '{}'", op.name, new_name);
-                        client.execute(query.as_str(), &[])?;
-                    }
-                }
-                _ => {}
-            }
-        }
-    }
-
-    // Refresh Postgres databases info to handle possible renames
-    let existing_dbs = get_existing_dbs(client)?;
-
-    info!(
-        "handling cluster spec databases (total {})",
-        spec.cluster.databases.len()
-    );
-    for db in &spec.cluster.databases {
-        let name = &db.name;
-        let pg_db = existing_dbs.get(name);
-
-        enum DatabaseAction {
-            None,
-            Update,
-            Create,
-        }
-        let action = if let Some(r) = pg_db {
-            // XXX: db owner name is returned as quoted string from Postgres,
-            // when quoting is needed.
-            let new_owner = if r.owner.starts_with('"') {
-                db.owner.pg_quote()
-            } else {
-                db.owner.clone()
-            };
-
-            if new_owner != r.owner {
-                // Update the owner
-                DatabaseAction::Update
-            } else {
-                DatabaseAction::None
-            }
-        } else {
-            DatabaseAction::Create
-        };
-
-        match action {
-            DatabaseAction::None => {}
-            DatabaseAction::Update => {
-                let query: String = format!(
-                    "ALTER DATABASE {} OWNER TO {}",
-                    name.pg_quote(),
-                    db.owner.pg_quote()
-                );
-                let _guard = info_span!("executing", query).entered();
-                client.execute(query.as_str(), &[])?;
-            }
-            DatabaseAction::Create => {
-                let mut query: String = format!("CREATE DATABASE {} ", name.pg_quote());
-                query.push_str(&db.to_pg_options());
-                let _guard = info_span!("executing", query).entered();
-                client.execute(query.as_str(), &[])?;
-                let grant_query: String = format!(
-                    "GRANT ALL PRIVILEGES ON DATABASE {} TO neon_superuser",
-                    name.pg_quote()
-                );
-                client.execute(grant_query.as_str(), &[])?;
-            }
-        };
-
-        if span_enabled!(Level::INFO) {
-            let action_str = match action {
-                DatabaseAction::None => "",
-                DatabaseAction::Create => " -> create",
-                DatabaseAction::Update => " -> update",
-            };
-            info!(" - {}:{}{}", db.name, db.owner, action_str);
-        }
-    }
-
-    Ok(())
-}
-
-/// Grant CREATE ON DATABASE to the database owner and do some other alters and grants
-/// to allow users creating trusted extensions and re-creating `public` schema, for example.
-#[instrument(skip_all)]
-pub fn handle_grants(
-    spec: &ComputeSpec,
-    client: &mut Client,
-    connstr: &str,
-    enable_anon_extension: bool,
-) -> Result<()> {
-    info!("modifying database permissions");
-    let existing_dbs = get_existing_dbs(client)?;
-
-    // Do some per-database access adjustments. We'd better do this at db creation time,
-    // but CREATE DATABASE isn't transactional. So we cannot create db + do some grants
-    // atomically.
-    for db in &spec.cluster.databases {
-        match existing_dbs.get(&db.name) {
-            Some(pg_db) => {
-                if pg_db.restrict_conn || pg_db.invalid {
-                    info!(
-                        "skipping grants for db {} (invalid: {}, connections not allowed: {})",
-                        db.name, pg_db.invalid, pg_db.restrict_conn
-                    );
-                    continue;
-                }
-            }
-            None => {
-                bail!(
-                    "database {} doesn't exist in Postgres after handle_databases()",
-                    db.name
-                );
-            }
-        }
-
-        let mut conf = Config::from_str(connstr)?;
-        conf.dbname(&db.name);
-
-        let mut db_client = conf.connect(NoTls)?;
-
-        // This will only change ownership on the schema itself, not the objects
-        // inside it. Without it owner of the `public` schema will be `cloud_admin`
-        // and database owner cannot do anything with it. SQL procedure ensures
-        // that it won't error out if schema `public` doesn't exist.
-        let alter_query = format!(
-            "DO $$\n\
-                DECLARE\n\
-                    schema_owner TEXT;\n\
-                BEGIN\n\
-                    IF EXISTS(\n\
-                        SELECT nspname\n\
-                        FROM pg_catalog.pg_namespace\n\
-                        WHERE nspname = 'public'\n\
-                    )\n\
-                    THEN\n\
-                        SELECT nspowner::regrole::text\n\
-                            FROM pg_catalog.pg_namespace\n\
-                            WHERE nspname = 'public'\n\
-                            INTO schema_owner;\n\
-                \n\
-                        IF schema_owner = 'cloud_admin' OR schema_owner = 'zenith_admin'\n\
-                        THEN\n\
-                            ALTER SCHEMA public OWNER TO {};\n\
-                        END IF;\n\
-                    END IF;\n\
-                END\n\
-            $$;",
-            db.owner.pg_quote()
-        );
-        db_client.simple_query(&alter_query)?;
-
-        // Explicitly grant CREATE ON SCHEMA PUBLIC to the web_access user.
-        // This is needed because since postgres 15 this privilege is removed by default.
-        // TODO: web_access isn't created for almost 1 year. It could be that we have
-        // active users of 1 year old projects, but hopefully not, so check it and
-        // remove this code if possible. The worst thing that could happen is that
-        // user won't be able to use public schema in NEW databases created in the
-        // very OLD project.
-        //
-        // Also, alter default permissions so that relations created by extensions can be
-        // used by neon_superuser without permission issues.
-        let grant_query = "DO $$\n\
-                BEGIN\n\
-                    IF EXISTS(\n\
-                        SELECT nspname\n\
-                        FROM pg_catalog.pg_namespace\n\
-                        WHERE nspname = 'public'\n\
-                    ) AND\n\
-                    current_setting('server_version_num')::int/10000 >= 15\n\
-                    THEN\n\
-                        IF EXISTS(\n\
-                            SELECT rolname\n\
-                            FROM pg_catalog.pg_roles\n\
-                            WHERE rolname = 'web_access'\n\
-                        )\n\
-                        THEN\n\
-                            GRANT CREATE ON SCHEMA public TO web_access;\n\
-                        END IF;\n\
-                    END IF;\n\
-                    IF EXISTS(\n\
-                        SELECT nspname\n\
-                        FROM pg_catalog.pg_namespace\n\
-                        WHERE nspname = 'public'\n\
-                    )\n\
-                    THEN\n\
-                        ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser WITH GRANT OPTION;\n\
-                        ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser WITH GRANT OPTION;\n\
-                    END IF;\n\
-                END\n\
-            $$;"
-        .to_string();
-
-        info!(
-            "grant query for db {} : {}",
-            &db.name,
-            inlinify(&grant_query)
-        );
-        db_client.simple_query(&grant_query)?;
-
-        // it is important to run this after all grants
-        if enable_anon_extension {
-            handle_extension_anon(spec, &db.owner, &mut db_client, false)
-                .context("handle_grants handle_extension_anon")?;
-        }
-    }
-
-    Ok(())
-}
-
-/// Create required system extensions
-#[instrument(skip_all)]
-pub fn handle_extensions(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
-    if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") {
-        if libs.contains("pg_stat_statements") {
-            // Create extension only if this compute really needs it
-            let query = "CREATE EXTENSION IF NOT EXISTS pg_stat_statements";
-            info!("creating system extensions with query: {}", query);
-            client.simple_query(query)?;
-        }
-    }
-
-    Ok(())
-}
-
-/// Run CREATE and ALTER EXTENSION neon UPDATE for postgres database
-#[instrument(skip_all)]
-pub fn handle_extension_neon(client: &mut Client) -> Result<()> {
-    info!("handle extension neon");
-
-    let mut query = "CREATE SCHEMA IF NOT EXISTS neon";
-    client.simple_query(query)?;
-
-    query = "CREATE EXTENSION IF NOT EXISTS neon WITH SCHEMA neon";
-    info!("create neon extension with query: {}", query);
-    client.simple_query(query)?;
-
-    query = "UPDATE pg_extension SET extrelocatable = true WHERE extname = 'neon'";
-    client.simple_query(query)?;
-
-    query = "ALTER EXTENSION neon SET SCHEMA neon";
-    info!("alter neon extension schema with query: {}", query);
-    client.simple_query(query)?;
-
-    // this will be a no-op if extension is already up to date,
-    // which may happen in two cases:
-    // - extension was just installed
-    // - extension was already installed and is up to date
-    let query = "ALTER EXTENSION neon UPDATE";
-    info!("update neon extension version with query: {}", query);
-    if let Err(e) = client.simple_query(query) {
-        error!(
-            "failed to upgrade neon extension during `handle_extension_neon`: {}",
-            e
-        );
-    }
-
-    Ok(())
-}
-
 #[instrument(skip_all)]
 pub fn handle_neon_extension_upgrade(client: &mut Client) -> Result<()> {
     info!("handle neon extension upgrade");
diff --git a/compute_tools/src/spec_apply.rs b/compute_tools/src/spec_apply.rs
new file mode 100644
index 0000000000..7308d5d36e
--- /dev/null
+++ b/compute_tools/src/spec_apply.rs
@@ -0,0 +1,680 @@
+use std::collections::{HashMap, HashSet};
+use std::fmt::{Debug, Formatter};
+use std::future::Future;
+use std::iter::empty;
+use std::iter::once;
+use std::sync::Arc;
+
+use crate::compute::construct_superuser_query;
+use crate::pg_helpers::{escape_literal, DatabaseExt, Escaping, GenericOptionsSearch, RoleExt};
+use anyhow::{bail, Result};
+use compute_api::spec::{ComputeFeature, ComputeSpec, Database, PgIdent, Role};
+use futures::future::join_all;
+use tokio::sync::RwLock;
+use tokio_postgres::Client;
+use tracing::{debug, info_span, Instrument};
+
+#[derive(Clone)]
+pub enum DB {
+    SystemDB,
+    UserDB(Database),
+}
+
+impl DB {
+    pub fn new(db: Database) -> DB {
+        Self::UserDB(db)
+    }
+
+    pub fn is_owned_by(&self, role: &PgIdent) -> bool {
+        match self {
+            DB::SystemDB => false,
+            DB::UserDB(db) => &db.owner == role,
+        }
+    }
+}
+
+impl Debug for DB {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match self {
+            DB::SystemDB => f.debug_tuple("SystemDB").finish(),
+            DB::UserDB(db) => f.debug_tuple("UserDB").field(&db.name).finish(),
+        }
+    }
+}
+
+#[derive(Copy, Clone, Debug)]
+pub enum PerDatabasePhase {
+    DeleteDBRoleReferences,
+    ChangeSchemaPerms,
+    HandleAnonExtension,
+}
+
+#[derive(Clone, Debug)]
+pub enum ApplySpecPhase {
+    CreateSuperUser,
+    DropInvalidDatabases,
+    RenameRoles,
+    CreateAndAlterRoles,
+    RenameAndDeleteDatabases,
+    CreateAndAlterDatabases,
+    RunInEachDatabase { db: DB, subphase: PerDatabasePhase },
+    HandleOtherExtensions,
+    HandleNeonExtension,
+    CreateAvailabilityCheck,
+    DropRoles,
+}
+
+pub struct Operation {
+    pub query: String,
+    pub comment: Option<String>,
+}
+
+pub struct MutableApplyContext {
+    pub roles: HashMap<String, Role>,
+    pub dbs: HashMap<String, Database>,
+}
+
+/// Appply the operations that belong to the given spec apply phase.
+///
+/// Commands within a single phase are executed in order of Iterator yield.
+/// Commands of ApplySpecPhase::RunInEachDatabase will execute in the database
+/// indicated by its `db` field, and can share a single client for all changes
+/// to that database.
+///
+/// Notes:
+/// - Commands are pipelined, and thus may cause incomplete apply if one
+///   command of many fails.
+/// - Failing commands will fail the phase's apply step once the return value
+///   is processed.
+/// - No timeouts have (yet) been implemented.
+/// - The caller is responsible for limiting and/or applying concurrency.
+pub async fn apply_operations<'a, Fut, F>(
+    spec: Arc<ComputeSpec>,
+    ctx: Arc<RwLock<MutableApplyContext>>,
+    jwks_roles: Arc<HashSet<String>>,
+    apply_spec_phase: ApplySpecPhase,
+    client: F,
+) -> Result<()>
+where
+    F: FnOnce() -> Fut,
+    Fut: Future<Output = Result<&'a Client>>,
+{
+    debug!("Starting phase {:?}", &apply_spec_phase);
+    let span = info_span!("db_apply_changes", phase=?apply_spec_phase);
+    let span2 = span.clone();
+    async move {
+        debug!("Processing phase {:?}", &apply_spec_phase);
+        let ctx = ctx;
+
+        let mut ops = get_operations(&spec, &ctx, &jwks_roles, &apply_spec_phase)
+            .await?
+            .peekable();
+
+        // Return (and by doing so, skip requesting the PostgreSQL client) if
+        // we don't have any operations scheduled.
+        if ops.peek().is_none() {
+            return Ok(());
+        }
+
+        let client = client().await?;
+
+        debug!("Applying phase {:?}", &apply_spec_phase);
+
+        let active_queries = ops
+            .map(|op| {
+                let Operation { comment, query } = op;
+                let inspan = match comment {
+                    None => span.clone(),
+                    Some(comment) => info_span!("phase {}: {}", comment),
+                };
+
+                async {
+                    let query = query;
+                    let res = client.simple_query(&query).await;
+                    debug!(
+                        "{} {}",
+                        if res.is_ok() {
+                            "successfully executed"
+                        } else {
+                            "failed to execute"
+                        },
+                        query
+                    );
+                    res
+                }
+                .instrument(inspan)
+            })
+            .collect::<Vec<_>>();
+
+        drop(ctx);
+
+        for it in join_all(active_queries).await {
+            drop(it?);
+        }
+
+        debug!("Completed phase {:?}", &apply_spec_phase);
+
+        Ok(())
+    }
+    .instrument(span2)
+    .await
+}
+
+/// Create a stream of operations to be executed for that phase of applying
+/// changes.
+///
+/// In the future we may generate a single stream of changes and then
+/// sort/merge/batch execution, but for now this is a nice way to improve
+/// batching behaviour of the commands.
+async fn get_operations<'a>(
+    spec: &'a ComputeSpec,
+    ctx: &'a RwLock<MutableApplyContext>,
+    jwks_roles: &'a HashSet<String>,
+    apply_spec_phase: &'a ApplySpecPhase,
+) -> Result<Box<dyn Iterator<Item = Operation> + 'a + Send>> {
+    match apply_spec_phase {
+        ApplySpecPhase::CreateSuperUser => {
+            let query = construct_superuser_query(spec);
+
+            Ok(Box::new(once(Operation {
+                query,
+                comment: None,
+            })))
+        }
+        ApplySpecPhase::DropInvalidDatabases => {
+            let mut ctx = ctx.write().await;
+            let databases = &mut ctx.dbs;
+
+            let keys: Vec<_> = databases
+                .iter()
+                .filter(|(_, db)| db.invalid)
+                .map(|(dbname, _)| dbname.clone())
+                .collect();
+
+            // After recent commit in Postgres, interrupted DROP DATABASE
+            // leaves the database in the invalid state. According to the
+            // commit message, the only option for user is to drop it again.
+            // See:
+            //   https://github.com/postgres/postgres/commit/a4b4cc1d60f7e8ccfcc8ff8cb80c28ee411ad9a9
+            //
+            // Postgres Neon extension is done the way, that db is de-registered
+            // in the control plane metadata only after it is dropped. So there is
+            // a chance that it still thinks that the db should exist. This means
+            // that it will be re-created by the `CreateDatabases` phase. This
+            // is fine, as user can just drop the table again (in vanilla
+            // Postgres they would need to do the same).
+            let operations = keys
+                .into_iter()
+                .filter_map(move |dbname| ctx.dbs.remove(&dbname))
+                .map(|db| Operation {
+                    query: format!("DROP DATABASE IF EXISTS {}", db.name.pg_quote()),
+                    comment: Some(format!("Dropping invalid database {}", db.name)),
+                });
+
+            Ok(Box::new(operations))
+        }
+        ApplySpecPhase::RenameRoles => {
+            let mut ctx = ctx.write().await;
+
+            let operations = spec
+                .delta_operations
+                .iter()
+                .flatten()
+                .filter(|op| op.action == "rename_role")
+                .filter_map(move |op| {
+                    let roles = &mut ctx.roles;
+
+                    if roles.contains_key(op.name.as_str()) {
+                        None
+                    } else {
+                        let new_name = op.new_name.as_ref().unwrap();
+                        let mut role = roles.remove(op.name.as_str()).unwrap();
+
+                        role.name = new_name.clone();
+                        role.encrypted_password = None;
+                        roles.insert(role.name.clone(), role);
+
+                        Some(Operation {
+                            query: format!(
+                                "ALTER ROLE {} RENAME TO {}",
+                                op.name.pg_quote(),
+                                new_name.pg_quote()
+                            ),
+                            comment: Some(format!("renaming role '{}' to '{}'", op.name, new_name)),
+                        })
+                    }
+                });
+
+            Ok(Box::new(operations))
+        }
+        ApplySpecPhase::CreateAndAlterRoles => {
+            let mut ctx = ctx.write().await;
+
+            let operations = spec.cluster.roles
+                .iter()
+                .filter_map(move |role| {
+                    let roles = &mut ctx.roles;
+                    let db_role = roles.get(&role.name);
+
+                    match db_role {
+                        Some(db_role) => {
+                            if db_role.encrypted_password != role.encrypted_password {
+                                // This can be run on /every/ role! Not just ones created through the console.
+                                // This means that if you add some funny ALTER here that adds a permission,
+                                // this will get run even on user-created roles! This will result in different
+                                // behavior before and after a spec gets reapplied. The below ALTER as it stands
+                                // now only grants LOGIN and changes the password. Please do not allow this branch
+                                // to do anything silly.
+                                Some(Operation {
+                                    query: format!(
+                                        "ALTER ROLE {} {}",
+                                        role.name.pg_quote(),
+                                        role.to_pg_options(),
+                                    ),
+                                    comment: None,
+                                })
+                            } else {
+                                None
+                            }
+                        }
+                        None => {
+                            let query = if !jwks_roles.contains(role.name.as_str()) {
+                                format!(
+                                    "CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser {}",
+                                    role.name.pg_quote(),
+                                    role.to_pg_options(),
+                                )
+                            } else {
+                                format!(
+                                    "CREATE ROLE {} {}",
+                                    role.name.pg_quote(),
+                                    role.to_pg_options(),
+                                )
+                            };
+                            Some(Operation {
+                                query,
+                                comment: Some(format!("creating role {}", role.name)),
+                            })
+                        }
+                    }
+                });
+
+            Ok(Box::new(operations))
+        }
+        ApplySpecPhase::RenameAndDeleteDatabases => {
+            let mut ctx = ctx.write().await;
+
+            let operations = spec
+                .delta_operations
+                .iter()
+                .flatten()
+                .filter_map(move |op| {
+                    let databases = &mut ctx.dbs;
+                    match op.action.as_str() {
+                        // We do not check whether the DB exists or not,
+                        // Postgres will take care of it for us
+                        "delete_db" => {
+                            // In Postgres we can't drop a database if it is a template.
+                            // So we need to unset the template flag first, but it could
+                            // be a retry, so we could've already dropped the database.
+                            // Check that database exists first to make it idempotent.
+                            let unset_template_query: String = format!(
+                                include_str!("sql/unset_template_for_drop_dbs.sql"),
+                                datname_str = escape_literal(&op.name),
+                                datname = &op.name.pg_quote()
+                            );
+
+                            // Use FORCE to drop database even if there are active connections.
+                            // We run this from `cloud_admin`, so it should have enough privileges.
+                            // NB: there could be other db states, which prevent us from dropping
+                            // the database. For example, if db is used by any active subscription
+                            // or replication slot.
+                            // TODO: deal with it once we allow logical replication. Proper fix should
+                            // involve returning an error code to the control plane, so it could
+                            // figure out that this is a non-retryable error, return it to the user
+                            // and fail operation permanently.
+                            let drop_db_query: String = format!(
+                                "DROP DATABASE IF EXISTS {} WITH (FORCE)",
+                                &op.name.pg_quote()
+                            );
+
+                            databases.remove(&op.name);
+
+                            Some(vec![
+                                Operation {
+                                    query: unset_template_query,
+                                    comment: Some(format!(
+                                        "optionally clearing template flags for DB {}",
+                                        op.name,
+                                    )),
+                                },
+                                Operation {
+                                    query: drop_db_query,
+                                    comment: Some(format!("deleting database {}", op.name,)),
+                                },
+                            ])
+                        }
+                        "rename_db" => {
+                            if let Some(mut db) = databases.remove(&op.name) {
+                                // update state of known databases
+                                let new_name = op.new_name.as_ref().unwrap();
+                                db.name = new_name.clone();
+                                databases.insert(db.name.clone(), db);
+
+                                Some(vec![Operation {
+                                    query: format!(
+                                        "ALTER DATABASE {} RENAME TO {}",
+                                        op.name.pg_quote(),
+                                        new_name.pg_quote(),
+                                    ),
+                                    comment: Some(format!(
+                                        "renaming database '{}' to '{}'",
+                                        op.name, new_name
+                                    )),
+                                }])
+                            } else {
+                                None
+                            }
+                        }
+                        _ => None,
+                    }
+                })
+                .flatten();
+
+            Ok(Box::new(operations))
+        }
+        ApplySpecPhase::CreateAndAlterDatabases => {
+            let mut ctx = ctx.write().await;
+
+            let operations = spec
+                .cluster
+                .databases
+                .iter()
+                .filter_map(move |db| {
+                    let databases = &mut ctx.dbs;
+                    if let Some(edb) = databases.get_mut(&db.name) {
+                        let change_owner = if edb.owner.starts_with('"') {
+                            db.owner.pg_quote() != edb.owner
+                        } else {
+                            db.owner != edb.owner
+                        };
+
+                        edb.owner = db.owner.clone();
+
+                        if change_owner {
+                            Some(vec![Operation {
+                                query: format!(
+                                    "ALTER DATABASE {} OWNER TO {}",
+                                    db.name.pg_quote(),
+                                    db.owner.pg_quote()
+                                ),
+                                comment: Some(format!(
+                                    "changing database owner of database {} to {}",
+                                    db.name, db.owner
+                                )),
+                            }])
+                        } else {
+                            None
+                        }
+                    } else {
+                        databases.insert(db.name.clone(), db.clone());
+
+                        Some(vec![
+                            Operation {
+                                query: format!(
+                                    "CREATE DATABASE {} {}",
+                                    db.name.pg_quote(),
+                                    db.to_pg_options(),
+                                ),
+                                comment: None,
+                            },
+                            Operation {
+                                query: format!(
+                                    "GRANT ALL PRIVILEGES ON DATABASE {} TO neon_superuser",
+                                    db.name.pg_quote()
+                                ),
+                                comment: None,
+                            },
+                        ])
+                    }
+                })
+                .flatten();
+
+            Ok(Box::new(operations))
+        }
+        ApplySpecPhase::RunInEachDatabase { db, subphase } => {
+            match subphase {
+                PerDatabasePhase::DeleteDBRoleReferences => {
+                    let ctx = ctx.read().await;
+
+                    let operations =
+                        spec.delta_operations
+                            .iter()
+                            .flatten()
+                            .filter(|op| op.action == "delete_role")
+                            .filter_map(move |op| {
+                                if db.is_owned_by(&op.name) {
+                                    return None;
+                                }
+                                if !ctx.roles.contains_key(&op.name) {
+                                    return None;
+                                }
+                                let quoted = op.name.pg_quote();
+                                let new_owner = match &db {
+                                    DB::SystemDB => PgIdent::from("cloud_admin").pg_quote(),
+                                    DB::UserDB(db) => db.owner.pg_quote(),
+                                };
+
+                                Some(vec![
+                                    // This will reassign all dependent objects to the db owner
+                                    Operation {
+                                        query: format!(
+                                            "REASSIGN OWNED BY {} TO {}",
+                                            quoted, new_owner,
+                                        ),
+                                        comment: None,
+                                    },
+                                    // This now will only drop privileges of the role
+                                    Operation {
+                                        query: format!("DROP OWNED BY {}", quoted),
+                                        comment: None,
+                                    },
+                                ])
+                            })
+                            .flatten();
+
+                    Ok(Box::new(operations))
+                }
+                PerDatabasePhase::ChangeSchemaPerms => {
+                    let ctx = ctx.read().await;
+                    let databases = &ctx.dbs;
+
+                    let db = match &db {
+                        // ignore schema permissions on the system database
+                        DB::SystemDB => return Ok(Box::new(empty())),
+                        DB::UserDB(db) => db,
+                    };
+
+                    if databases.get(&db.name).is_none() {
+                        bail!("database {} doesn't exist in PostgreSQL", db.name);
+                    }
+
+                    let edb = databases.get(&db.name).unwrap();
+
+                    if edb.restrict_conn || edb.invalid {
+                        return Ok(Box::new(empty()));
+                    }
+
+                    let operations = vec![
+                        Operation {
+                            query: format!(
+                                include_str!("sql/set_public_schema_owner.sql"),
+                                db_owner = db.owner.pg_quote()
+                            ),
+                            comment: None,
+                        },
+                        Operation {
+                            query: String::from(include_str!("sql/default_grants.sql")),
+                            comment: None,
+                        },
+                    ]
+                    .into_iter();
+
+                    Ok(Box::new(operations))
+                }
+                PerDatabasePhase::HandleAnonExtension => {
+                    // Only install Anon into user databases
+                    let db = match &db {
+                        DB::SystemDB => return Ok(Box::new(empty())),
+                        DB::UserDB(db) => db,
+                    };
+                    // Never install Anon when it's not enabled as feature
+                    if !spec.features.contains(&ComputeFeature::AnonExtension) {
+                        return Ok(Box::new(empty()));
+                    }
+
+                    // Only install Anon when it's added in preload libraries
+                    let opt_libs = spec.cluster.settings.find("shared_preload_libraries");
+
+                    let libs = match opt_libs {
+                        Some(libs) => libs,
+                        None => return Ok(Box::new(empty())),
+                    };
+
+                    if !libs.contains("anon") {
+                        return Ok(Box::new(empty()));
+                    }
+
+                    let db_owner = db.owner.pg_quote();
+
+                    let operations = vec![
+                        // Create anon extension if this compute needs it
+                        // Users cannot create it themselves, because superuser is required.
+                        Operation {
+                            query: String::from("CREATE EXTENSION IF NOT EXISTS anon CASCADE"),
+                            comment: Some(String::from("creating anon extension")),
+                        },
+                        // Initialize anon extension
+                        // This also requires superuser privileges, so users cannot do it themselves.
+                        Operation {
+                            query: String::from("SELECT anon.init()"),
+                            comment: Some(String::from("initializing anon extension data")),
+                        },
+                        Operation {
+                            query: format!("GRANT ALL ON SCHEMA anon TO {}", db_owner),
+                            comment: Some(String::from(
+                                "granting anon extension schema permissions",
+                            )),
+                        },
+                        Operation {
+                            query: format!(
+                                "GRANT ALL ON ALL FUNCTIONS IN SCHEMA anon TO {}",
+                                db_owner
+                            ),
+                            comment: Some(String::from(
+                                "granting anon extension schema functions permissions",
+                            )),
+                        },
+                        // We need this, because some functions are defined as SECURITY DEFINER.
+                        // In Postgres SECURITY DEFINER functions are executed with the privileges
+                        // of the owner.
+                        // In anon extension this it is needed to access some GUCs, which are only accessible to
+                        // superuser. But we've patched postgres to allow db_owner to access them as well.
+                        // So we need to change owner of these functions to db_owner.
+                        Operation {
+                            query: format!(
+                                include_str!("sql/anon_ext_fn_reassign.sql"),
+                                db_owner = db_owner,
+                            ),
+                            comment: Some(String::from(
+                                "change anon extension functions owner to database_owner",
+                            )),
+                        },
+                        Operation {
+                            query: format!(
+                                "GRANT ALL ON ALL TABLES IN SCHEMA anon TO {}",
+                                db_owner,
+                            ),
+                            comment: Some(String::from(
+                                "granting anon extension tables permissions",
+                            )),
+                        },
+                        Operation {
+                            query: format!(
+                                "GRANT ALL ON ALL SEQUENCES IN SCHEMA anon TO {}",
+                                db_owner,
+                            ),
+                            comment: Some(String::from(
+                                "granting anon extension sequences permissions",
+                            )),
+                        },
+                    ]
+                    .into_iter();
+
+                    Ok(Box::new(operations))
+                }
+            }
+        }
+        // Interestingly, we only install p_s_s in the main database, even when
+        // it's preloaded.
+        ApplySpecPhase::HandleOtherExtensions => {
+            if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") {
+                if libs.contains("pg_stat_statements") {
+                    return Ok(Box::new(once(Operation {
+                        query: String::from("CREATE EXTENSION IF NOT EXISTS pg_stat_statements"),
+                        comment: Some(String::from("create system extensions")),
+                    })));
+                }
+            }
+            Ok(Box::new(empty()))
+        }
+        ApplySpecPhase::HandleNeonExtension => {
+            let operations = vec![
+                Operation {
+                    query: String::from("CREATE SCHEMA IF NOT EXISTS neon"),
+                    comment: Some(String::from("init: add schema for extension")),
+                },
+                Operation {
+                    query: String::from("CREATE EXTENSION IF NOT EXISTS neon WITH SCHEMA neon"),
+                    comment: Some(String::from(
+                        "init: install the extension if not already installed",
+                    )),
+                },
+                Operation {
+                    query: String::from(
+                        "UPDATE pg_extension SET extrelocatable = true WHERE extname = 'neon'",
+                    ),
+                    comment: Some(String::from("compat/fix: make neon relocatable")),
+                },
+                Operation {
+                    query: String::from("ALTER EXTENSION neon SET SCHEMA neon"),
+                    comment: Some(String::from("compat/fix: alter neon extension schema")),
+                },
+                Operation {
+                    query: String::from("ALTER EXTENSION neon UPDATE"),
+                    comment: Some(String::from("compat/update: update neon extension version")),
+                },
+            ]
+            .into_iter();
+
+            Ok(Box::new(operations))
+        }
+        ApplySpecPhase::CreateAvailabilityCheck => Ok(Box::new(once(Operation {
+            query: String::from(include_str!("sql/add_availabilitycheck_tables.sql")),
+            comment: None,
+        }))),
+        ApplySpecPhase::DropRoles => {
+            let operations = spec
+                .delta_operations
+                .iter()
+                .flatten()
+                .filter(|op| op.action == "delete_role")
+                .map(|op| Operation {
+                    query: format!("DROP ROLE IF EXISTS {}", op.name.pg_quote()),
+                    comment: None,
+                });
+
+            Ok(Box::new(operations))
+        }
+    }
+}
diff --git a/compute_tools/src/sql/add_availabilitycheck_tables.sql b/compute_tools/src/sql/add_availabilitycheck_tables.sql
new file mode 100644
index 0000000000..7c60690c78
--- /dev/null
+++ b/compute_tools/src/sql/add_availabilitycheck_tables.sql
@@ -0,0 +1,18 @@
+DO $$
+BEGIN
+    IF NOT EXISTS(
+        SELECT 1
+        FROM pg_catalog.pg_tables
+        WHERE tablename = 'health_check'
+    )
+    THEN
+    CREATE TABLE health_check (
+        id serial primary key,
+        updated_at timestamptz default now()
+    );
+    INSERT INTO health_check VALUES (1, now())
+        ON CONFLICT (id) DO UPDATE
+         SET updated_at = now();
+    END IF;
+END
+$$
\ No newline at end of file
diff --git a/compute_tools/src/sql/anon_ext_fn_reassign.sql b/compute_tools/src/sql/anon_ext_fn_reassign.sql
new file mode 100644
index 0000000000..3d7b15c590
--- /dev/null
+++ b/compute_tools/src/sql/anon_ext_fn_reassign.sql
@@ -0,0 +1,12 @@
+DO $$
+DECLARE
+    query varchar;
+BEGIN
+    FOR query IN SELECT 'ALTER FUNCTION '||nsp.nspname||'.'||p.proname||'('||pg_get_function_identity_arguments(p.oid)||') OWNER TO {db_owner};'
+    FROM pg_proc p
+        JOIN pg_namespace nsp ON p.pronamespace = nsp.oid
+    WHERE nsp.nspname = 'anon' LOOP
+        EXECUTE query;
+    END LOOP;
+END
+$$;
diff --git a/compute_tools/src/sql/default_grants.sql b/compute_tools/src/sql/default_grants.sql
new file mode 100644
index 0000000000..58ebb0690b
--- /dev/null
+++ b/compute_tools/src/sql/default_grants.sql
@@ -0,0 +1,30 @@
+DO
+$$
+    BEGIN
+        IF EXISTS(
+            SELECT nspname
+            FROM pg_catalog.pg_namespace
+            WHERE nspname = 'public'
+        ) AND
+           current_setting('server_version_num')::int / 10000 >= 15
+        THEN
+            IF EXISTS(
+                SELECT rolname
+                FROM pg_catalog.pg_roles
+                WHERE rolname = 'web_access'
+            )
+            THEN
+                GRANT CREATE ON SCHEMA public TO web_access;
+            END IF;
+        END IF;
+        IF EXISTS(
+            SELECT nspname
+            FROM pg_catalog.pg_namespace
+            WHERE nspname = 'public'
+        )
+        THEN
+            ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser WITH GRANT OPTION;
+            ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser WITH GRANT OPTION;
+        END IF;
+    END
+$$;
\ No newline at end of file
diff --git a/compute_tools/src/sql/set_public_schema_owner.sql b/compute_tools/src/sql/set_public_schema_owner.sql
new file mode 100644
index 0000000000..fd061a713e
--- /dev/null
+++ b/compute_tools/src/sql/set_public_schema_owner.sql
@@ -0,0 +1,23 @@
+DO
+$$
+    DECLARE
+        schema_owner TEXT;
+    BEGIN
+        IF EXISTS(
+            SELECT nspname
+            FROM pg_catalog.pg_namespace
+            WHERE nspname = 'public'
+        )
+        THEN
+            SELECT nspowner::regrole::text
+            FROM pg_catalog.pg_namespace
+            WHERE nspname = 'public'
+            INTO schema_owner;
+
+            IF schema_owner = 'cloud_admin' OR schema_owner = 'zenith_admin'
+            THEN
+                ALTER SCHEMA public OWNER TO {db_owner};
+            END IF;
+        END IF;
+    END
+$$;
\ No newline at end of file
diff --git a/compute_tools/src/sql/unset_template_for_drop_dbs.sql b/compute_tools/src/sql/unset_template_for_drop_dbs.sql
new file mode 100644
index 0000000000..6c4343a589
--- /dev/null
+++ b/compute_tools/src/sql/unset_template_for_drop_dbs.sql
@@ -0,0 +1,12 @@
+DO $$
+    BEGIN
+        IF EXISTS(
+            SELECT 1
+            FROM pg_catalog.pg_database
+            WHERE datname = {datname_str}
+        )
+        THEN
+            ALTER DATABASE {datname} is_template false;
+        END IF;
+    END
+$$;
\ No newline at end of file

From 0a499a317614a049bab4e1166984557789460793 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Arpad=20M=C3=BCller?= <arpad-m@users.noreply.github.com>
Date: Wed, 20 Nov 2024 06:44:23 +0100
Subject: [PATCH 37/43] Don't preload offloaded timelines (#9646)

In timeline preloading, we also do a preload for offloaded timelines.
This includes the download of `index-part.json`. Ultimately, such a
download is wasteful, therefore avoid it. Same goes for the remote
client, we just discard it immediately thereafter.

Part of #8088

---------

Co-authored-by: Christian Schwarz <christian@neon.tech>
---
 pageserver/src/tenant.rs | 71 +++++++++++++++++++++++-----------------
 1 file changed, 41 insertions(+), 30 deletions(-)

diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs
index 37bf83c984..8e9e3890ba 100644
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -249,7 +249,8 @@ struct TimelinePreload {
 
 pub(crate) struct TenantPreload {
     tenant_manifest: TenantManifest,
-    timelines: HashMap<TimelineId, TimelinePreload>,
+    /// Map from timeline ID to a possible timeline preload. It is None iff the timeline is offloaded according to the manifest.
+    timelines: HashMap<TimelineId, Option<TimelinePreload>>,
 }
 
 /// When we spawn a tenant, there is a special mode for tenant creation that
@@ -1397,7 +1398,7 @@ impl Tenant {
         // Get list of remote timelines
         // download index files for every tenant timeline
         info!("listing remote timelines");
-        let (remote_timeline_ids, other_keys) = remote_timeline_client::list_remote_timelines(
+        let (mut remote_timeline_ids, other_keys) = remote_timeline_client::list_remote_timelines(
             remote_storage,
             self.tenant_shard_id,
             cancel.clone(),
@@ -1431,11 +1432,27 @@ impl Tenant {
             warn!("Unexpected non timeline key {k}");
         }
 
+        // Avoid downloading IndexPart of offloaded timelines.
+        let mut offloaded_with_prefix = HashSet::new();
+        for offloaded in tenant_manifest.offloaded_timelines.iter() {
+            if remote_timeline_ids.remove(&offloaded.timeline_id) {
+                offloaded_with_prefix.insert(offloaded.timeline_id);
+            } else {
+                // We'll take care later of timelines in the manifest without a prefix
+            }
+        }
+
+        let timelines = self
+            .load_timelines_metadata(remote_timeline_ids, remote_storage, cancel)
+            .await?;
+
         Ok(TenantPreload {
             tenant_manifest,
-            timelines: self
-                .load_timelines_metadata(remote_timeline_ids, remote_storage, cancel)
-                .await?,
+            timelines: timelines
+                .into_iter()
+                .map(|(id, tl)| (id, Some(tl)))
+                .chain(offloaded_with_prefix.into_iter().map(|id| (id, None)))
+                .collect(),
         })
     }
 
@@ -1466,6 +1483,19 @@ impl Tenant {
             offloaded_timelines_list.push((timeline_id, Arc::new(offloaded_timeline)));
             offloaded_timeline_ids.insert(timeline_id);
         }
+        // Complete deletions for offloaded timeline id's from manifest.
+        // The manifest will be uploaded later in this function.
+        offloaded_timelines_list
+            .retain(|(offloaded_id, offloaded)| {
+                // Existence of a timeline is finally determined by the existence of an index-part.json in remote storage.
+                // If there is dangling references in another location, they need to be cleaned up.
+                let delete = !preload.timelines.contains_key(offloaded_id);
+                if delete {
+                    tracing::info!("Removing offloaded timeline {offloaded_id} from manifest as no remote prefix was found");
+                    offloaded.defuse_for_tenant_drop();
+                }
+                !delete
+        });
 
         let mut timelines_to_resume_deletions = vec![];
 
@@ -1473,10 +1503,9 @@ impl Tenant {
         let mut timeline_ancestors = HashMap::new();
         let mut existent_timelines = HashSet::new();
         for (timeline_id, preload) in preload.timelines {
-            if offloaded_timeline_ids.remove(&timeline_id) {
-                // The timeline is offloaded, skip loading it.
-                continue;
-            }
+            let Some(preload) = preload else { continue };
+            // This is an invariant of the `preload` function's API
+            assert!(!offloaded_timeline_ids.contains(&timeline_id));
             let index_part = match preload.index_part {
                 Ok(i) => {
                     debug!("remote index part exists for timeline {timeline_id}");
@@ -1586,31 +1615,13 @@ impl Tenant {
             .context("resume_deletion")
             .map_err(LoadLocalTimelineError::ResumeDeletion)?;
         }
-        // Complete deletions for offloaded timeline id's.
-        offloaded_timelines_list
-            .retain(|(offloaded_id, offloaded)| {
-                // At this point, offloaded_timeline_ids has the list of all offloaded timelines
-                // without a prefix in S3, so they are inexistent.
-                // In the end, existence of a timeline is finally determined by the existence of an index-part.json in remote storage.
-                // If there is a dangling reference in another location, they need to be cleaned up.
-                let delete = offloaded_timeline_ids.contains(offloaded_id);
-                if delete {
-                    tracing::info!("Removing offloaded timeline {offloaded_id} from manifest as no remote prefix was found");
-                    offloaded.defuse_for_tenant_drop();
-                }
-                !delete
-        });
-        if !offloaded_timelines_list.is_empty() {
-            tracing::info!(
-                "Tenant has {} offloaded timelines",
-                offloaded_timelines_list.len()
-            );
-        }
+        let needs_manifest_upload =
+            offloaded_timelines_list.len() != preload.tenant_manifest.offloaded_timelines.len();
         {
             let mut offloaded_timelines_accessor = self.timelines_offloaded.lock().unwrap();
             offloaded_timelines_accessor.extend(offloaded_timelines_list.into_iter());
         }
-        if !offloaded_timeline_ids.is_empty() {
+        if needs_manifest_upload {
             self.store_tenant_manifest().await?;
         }
 

From 3ae0b2149e1a80975e098a4156b424e636cc550f Mon Sep 17 00:00:00 2001
From: Conrad Ludgate <conrad@neon.tech>
Date: Wed, 20 Nov 2024 10:14:28 +0000
Subject: [PATCH 38/43] chore(proxy): demote a ton of logs for successful
 connection attempts (#9803)

See https://github.com/neondatabase/cloud/issues/14378

In collaboration with @cloneable and @awarus, we sifted through logs and
simply demoted some logs to debug. This is not at all finished and there
are more logs to review, but we ran out of time in the session we
organised. In any slightly more nuanced cases, we didn't touch the log,
instead leaving a TODO comment.
---
 proxy/src/auth/backend/classic.rs      |  8 +++++---
 proxy/src/auth/backend/hacks.rs        |  2 +-
 proxy/src/auth/backend/mod.rs          |  7 ++++---
 proxy/src/auth/credentials.rs          | 10 +++++-----
 proxy/src/auth/flow.rs                 |  2 ++
 proxy/src/bin/local_proxy.rs           |  1 +
 proxy/src/bin/proxy.rs                 |  1 +
 proxy/src/cancellation.rs              | 10 +++++-----
 proxy/src/compute.rs                   |  5 +++--
 proxy/src/console_redirect_proxy.rs    |  2 +-
 proxy/src/context/mod.rs               |  9 +++++++--
 proxy/src/control_plane/client/mod.rs  |  4 ++--
 proxy/src/control_plane/client/neon.rs | 13 +++++++++----
 proxy/src/stream.rs                    |  1 +
 14 files changed, 47 insertions(+), 28 deletions(-)

diff --git a/proxy/src/auth/backend/classic.rs b/proxy/src/auth/backend/classic.rs
index 6d26c99832..87a02133c8 100644
--- a/proxy/src/auth/backend/classic.rs
+++ b/proxy/src/auth/backend/classic.rs
@@ -1,5 +1,5 @@
 use tokio::io::{AsyncRead, AsyncWrite};
-use tracing::{info, warn};
+use tracing::{debug, info, warn};
 
 use super::{ComputeCredentials, ComputeUserInfo};
 use crate::auth::backend::ComputeCredentialKeys;
@@ -21,11 +21,11 @@ pub(super) async fn authenticate(
     let scram_keys = match secret {
         #[cfg(any(test, feature = "testing"))]
         AuthSecret::Md5(_) => {
-            info!("auth endpoint chooses MD5");
+            debug!("auth endpoint chooses MD5");
             return Err(auth::AuthError::bad_auth_method("MD5"));
         }
         AuthSecret::Scram(secret) => {
-            info!("auth endpoint chooses SCRAM");
+            debug!("auth endpoint chooses SCRAM");
             let scram = auth::Scram(&secret, ctx);
 
             let auth_outcome = tokio::time::timeout(
@@ -50,6 +50,8 @@ pub(super) async fn authenticate(
             let client_key = match auth_outcome {
                 sasl::Outcome::Success(key) => key,
                 sasl::Outcome::Failure(reason) => {
+                    // TODO: warnings?
+                    // TODO: should we get rid of this because double logging?
                     info!("auth backend failed with an error: {reason}");
                     return Err(auth::AuthError::password_failed(&*creds.user));
                 }
diff --git a/proxy/src/auth/backend/hacks.rs b/proxy/src/auth/backend/hacks.rs
index 1411d908a5..e651df1d34 100644
--- a/proxy/src/auth/backend/hacks.rs
+++ b/proxy/src/auth/backend/hacks.rs
@@ -73,7 +73,7 @@ pub(crate) async fn password_hack_no_authentication(
         .get_password()
         .await?;
 
-    info!(project = &*payload.endpoint, "received missing parameter");
+    debug!(project = &*payload.endpoint, "received missing parameter");
 
     // Report tentative success; compute node will check the password anyway.
     Ok((
diff --git a/proxy/src/auth/backend/mod.rs b/proxy/src/auth/backend/mod.rs
index 242fe99de2..83c72e7be0 100644
--- a/proxy/src/auth/backend/mod.rs
+++ b/proxy/src/auth/backend/mod.rs
@@ -14,7 +14,7 @@ use ipnet::{Ipv4Net, Ipv6Net};
 use local::LocalBackend;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tokio_postgres::config::AuthKeys;
-use tracing::{info, warn};
+use tracing::{debug, info, warn};
 
 use crate::auth::credentials::check_peer_addr_is_in_list;
 use crate::auth::{self, validate_password_and_exchange, AuthError, ComputeUserInfoMaybeEndpoint};
@@ -286,7 +286,7 @@ async fn auth_quirks(
         Ok(info) => (info, None),
     };
 
-    info!("fetching user's authentication info");
+    debug!("fetching user's authentication info");
     let (allowed_ips, maybe_secret) = api.get_allowed_ips_and_secret(ctx, &info).await?;
 
     // check allowed list
@@ -404,7 +404,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> {
     ) -> auth::Result<Backend<'a, ComputeCredentials>> {
         let res = match self {
             Self::ControlPlane(api, user_info) => {
-                info!(
+                debug!(
                     user = &*user_info.user,
                     project = user_info.endpoint(),
                     "performing authentication using the console"
@@ -427,6 +427,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> {
             }
         };
 
+        // TODO: replace with some metric
         info!("user successfully authenticated");
         Ok(res)
     }
diff --git a/proxy/src/auth/credentials.rs b/proxy/src/auth/credentials.rs
index ddecae6af5..dab9007400 100644
--- a/proxy/src/auth/credentials.rs
+++ b/proxy/src/auth/credentials.rs
@@ -7,7 +7,7 @@ use std::str::FromStr;
 use itertools::Itertools;
 use pq_proto::StartupMessageParams;
 use thiserror::Error;
-use tracing::{info, warn};
+use tracing::{debug, warn};
 
 use crate::auth::password_hack::parse_endpoint_param;
 use crate::context::RequestMonitoring;
@@ -147,22 +147,22 @@ impl ComputeUserInfoMaybeEndpoint {
         }
 
         let metrics = Metrics::get();
-        info!(%user, "credentials");
+        debug!(%user, "credentials");
         if sni.is_some() {
-            info!("Connection with sni");
+            debug!("Connection with sni");
             metrics.proxy.accepted_connections_by_sni.inc(SniKind::Sni);
         } else if endpoint.is_some() {
             metrics
                 .proxy
                 .accepted_connections_by_sni
                 .inc(SniKind::NoSni);
-            info!("Connection without sni");
+            debug!("Connection without sni");
         } else {
             metrics
                 .proxy
                 .accepted_connections_by_sni
                 .inc(SniKind::PasswordHack);
-            info!("Connection with password hack");
+            debug!("Connection with password hack");
         }
 
         let options = NeonOptions::parse_params(params);
diff --git a/proxy/src/auth/flow.rs b/proxy/src/auth/flow.rs
index 6294549ff6..1740b59b14 100644
--- a/proxy/src/auth/flow.rs
+++ b/proxy/src/auth/flow.rs
@@ -178,6 +178,8 @@ impl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, Scram<'_>> {
             SCRAM_SHA_256_PLUS => ctx.set_auth_method(crate::context::AuthMethod::ScramSha256Plus),
             _ => {}
         }
+
+        // TODO: make this a metric instead
         info!("client chooses {}", sasl.method);
 
         let outcome = sasl::SaslStream::new(self.stream, sasl.message)
diff --git a/proxy/src/bin/local_proxy.rs b/proxy/src/bin/local_proxy.rs
index 41b0e11e85..c4ec1300f2 100644
--- a/proxy/src/bin/local_proxy.rs
+++ b/proxy/src/bin/local_proxy.rs
@@ -125,6 +125,7 @@ async fn main() -> anyhow::Result<()> {
 
     Metrics::install(Arc::new(ThreadPoolMetrics::new(0)));
 
+    // TODO: refactor these to use labels
     debug!("Version: {GIT_VERSION}");
     debug!("Build_tag: {BUILD_TAG}");
     let neon_metrics = ::metrics::NeonMetrics::new(::metrics::BuildInfo {
diff --git a/proxy/src/bin/proxy.rs b/proxy/src/bin/proxy.rs
index fda5b25961..232721338d 100644
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -288,6 +288,7 @@ async fn main() -> anyhow::Result<()> {
     let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook();
     let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);
 
+    // TODO: refactor these to use labels
     info!("Version: {GIT_VERSION}");
     info!("Build_tag: {BUILD_TAG}");
     let neon_metrics = ::metrics::NeonMetrics::new(::metrics::BuildInfo {
diff --git a/proxy/src/cancellation.rs b/proxy/src/cancellation.rs
index db0970adcb..3ad2d55b53 100644
--- a/proxy/src/cancellation.rs
+++ b/proxy/src/cancellation.rs
@@ -7,7 +7,7 @@ use thiserror::Error;
 use tokio::net::TcpStream;
 use tokio::sync::Mutex;
 use tokio_postgres::{CancelToken, NoTls};
-use tracing::info;
+use tracing::{debug, info};
 use uuid::Uuid;
 
 use crate::error::ReportableError;
@@ -73,7 +73,7 @@ impl<P: CancellationPublisher> CancellationHandler<P> {
             break key;
         };
 
-        info!("registered new query cancellation key {key}");
+        debug!("registered new query cancellation key {key}");
         Session {
             key,
             cancellation_handler: self,
@@ -165,7 +165,7 @@ impl CancelClosure {
     pub(crate) async fn try_cancel_query(self) -> Result<(), CancelError> {
         let socket = TcpStream::connect(self.socket_addr).await?;
         self.cancel_token.cancel_query_raw(socket, NoTls).await?;
-        info!("query was cancelled");
+        debug!("query was cancelled");
         Ok(())
     }
 }
@@ -182,7 +182,7 @@ impl<P> Session<P> {
     /// Store the cancel token for the given session.
     /// This enables query cancellation in `crate::proxy::prepare_client_connection`.
     pub(crate) fn enable_query_cancellation(&self, cancel_closure: CancelClosure) -> CancelKeyData {
-        info!("enabling query cancellation for this session");
+        debug!("enabling query cancellation for this session");
         self.cancellation_handler
             .map
             .insert(self.key, Some(cancel_closure));
@@ -194,7 +194,7 @@ impl<P> Session<P> {
 impl<P> Drop for Session<P> {
     fn drop(&mut self) {
         self.cancellation_handler.map.remove(&self.key);
-        info!("dropped query cancellation key {}", &self.key);
+        debug!("dropped query cancellation key {}", &self.key);
     }
 }
 
diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs
index ca4a348ed8..b8876b44eb 100644
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -14,7 +14,7 @@ use thiserror::Error;
 use tokio::net::TcpStream;
 use tokio_postgres::tls::MakeTlsConnect;
 use tokio_postgres_rustls::MakeRustlsConnect;
-use tracing::{error, info, warn};
+use tracing::{debug, error, info, warn};
 
 use crate::auth::parse_endpoint_param;
 use crate::cancellation::CancelClosure;
@@ -213,7 +213,7 @@ impl ConnCfg {
         };
 
         let connect_once = |host, port| {
-            info!("trying to connect to compute node at {host}:{port}");
+            debug!("trying to connect to compute node at {host}:{port}");
             connect_with_timeout(host, port).and_then(|socket| async {
                 let socket_addr = socket.peer_addr()?;
                 // This prevents load balancer from severing the connection.
@@ -328,6 +328,7 @@ impl ConnCfg {
         tracing::Span::current().record("pid", tracing::field::display(client.get_process_id()));
         let stream = connection.stream.into_inner();
 
+        // TODO: lots of useful info but maybe we can move it elsewhere (eg traces?)
         info!(
             cold_start_info = ctx.cold_start_info().as_str(),
             "connected to compute node at {host} ({socket_addr}) sslmode={:?}",
diff --git a/proxy/src/console_redirect_proxy.rs b/proxy/src/console_redirect_proxy.rs
index cc456f3667..8e71f552a5 100644
--- a/proxy/src/console_redirect_proxy.rs
+++ b/proxy/src/console_redirect_proxy.rs
@@ -146,7 +146,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
     stream: S,
     conn_gauge: NumClientConnectionsGuard<'static>,
 ) -> Result<Option<ProxyPassthrough<CancellationHandlerMainInternal, S>>, ClientRequestError> {
-    info!(
+    debug!(
         protocol = %ctx.protocol(),
         "handling interactive connection from client"
     );
diff --git a/proxy/src/context/mod.rs b/proxy/src/context/mod.rs
index 6cf99c0c97..d057ee0bfd 100644
--- a/proxy/src/context/mod.rs
+++ b/proxy/src/context/mod.rs
@@ -8,7 +8,7 @@ use pq_proto::StartupMessageParams;
 use smol_str::SmolStr;
 use tokio::sync::mpsc;
 use tracing::field::display;
-use tracing::{debug, info, info_span, Span};
+use tracing::{debug, info_span, Span};
 use try_lock::TryLock;
 use uuid::Uuid;
 
@@ -122,6 +122,7 @@ impl RequestMonitoring {
         protocol: Protocol,
         region: &'static str,
     ) -> Self {
+        // TODO: be careful with long lived spans
         let span = info_span!(
             "connect_request",
             %protocol,
@@ -384,6 +385,10 @@ impl RequestMonitoringInner {
         } else {
             ConnectOutcome::Failed
         };
+
+        // TODO: get rid of entirely/refactor
+        // check for false positives
+        // AND false negatives
         if let Some(rejected) = self.rejected {
             let ep = self
                 .endpoint_id
@@ -391,7 +396,7 @@ impl RequestMonitoringInner {
                 .map(|x| x.as_str())
                 .unwrap_or_default();
             // This makes sense only if cache is disabled
-            info!(
+            debug!(
                 ?outcome,
                 ?rejected,
                 ?ep,
diff --git a/proxy/src/control_plane/client/mod.rs b/proxy/src/control_plane/client/mod.rs
index e388d8a538..50903e2f1e 100644
--- a/proxy/src/control_plane/client/mod.rs
+++ b/proxy/src/control_plane/client/mod.rs
@@ -8,7 +8,7 @@ use std::time::Duration;
 
 use dashmap::DashMap;
 use tokio::time::Instant;
-use tracing::info;
+use tracing::{debug, info};
 
 use crate::auth::backend::jwt::{AuthRule, FetchAuthRules, FetchAuthRulesError};
 use crate::auth::backend::ComputeUserInfo;
@@ -214,7 +214,7 @@ impl<K: Hash + Eq + Clone> ApiLocks<K> {
         self.metrics
             .semaphore_acquire_seconds
             .observe(now.elapsed().as_secs_f64());
-        info!("acquired permit {:?}", now.elapsed().as_secs_f64());
+        debug!("acquired permit {:?}", now.elapsed().as_secs_f64());
         Ok(WakeComputePermit { permit: permit? })
     }
 
diff --git a/proxy/src/control_plane/client/neon.rs b/proxy/src/control_plane/client/neon.rs
index 26ff4e1402..8f4ae13f33 100644
--- a/proxy/src/control_plane/client/neon.rs
+++ b/proxy/src/control_plane/client/neon.rs
@@ -73,6 +73,8 @@ impl NeonControlPlaneClient {
             .endpoints_cache
             .is_valid(ctx, &user_info.endpoint.normalize())
         {
+            // TODO: refactor this because it's weird
+            // this is a failure to authenticate but we return Ok.
             info!("endpoint is not valid, skipping the request");
             return Ok(AuthInfo::default());
         }
@@ -92,7 +94,7 @@ impl NeonControlPlaneClient {
                 ])
                 .build()?;
 
-            info!(url = request.url().as_str(), "sending http request");
+            debug!(url = request.url().as_str(), "sending http request");
             let start = Instant::now();
             let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane);
             let response = self.endpoint.execute(request).await?;
@@ -104,10 +106,12 @@ impl NeonControlPlaneClient {
                 // TODO(anna): retry
                 Err(e) => {
                     return if e.get_reason().is_not_found() {
+                        // TODO: refactor this because it's weird
+                        // this is a failure to authenticate but we return Ok.
                         Ok(AuthInfo::default())
                     } else {
                         Err(e.into())
-                    }
+                    };
                 }
             };
 
@@ -163,7 +167,7 @@ impl NeonControlPlaneClient {
                 .build()
                 .map_err(GetEndpointJwksError::RequestBuild)?;
 
-            info!(url = request.url().as_str(), "sending http request");
+            debug!(url = request.url().as_str(), "sending http request");
             let start = Instant::now();
             let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane);
             let response = self
@@ -220,7 +224,7 @@ impl NeonControlPlaneClient {
 
             let request = request_builder.build()?;
 
-            info!(url = request.url().as_str(), "sending http request");
+            debug!(url = request.url().as_str(), "sending http request");
             let start = Instant::now();
             let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane);
             let response = self.endpoint.execute(request).await?;
@@ -249,6 +253,7 @@ impl NeonControlPlaneClient {
             Ok(node)
         }
         .map_err(crate::error::log_error)
+        // TODO: redo this span stuff
         .instrument(info_span!("http", id = request_id))
         .await
     }
diff --git a/proxy/src/stream.rs b/proxy/src/stream.rs
index 89df48c5d3..11f426819d 100644
--- a/proxy/src/stream.rs
+++ b/proxy/src/stream.rs
@@ -133,6 +133,7 @@ impl<S: AsyncWrite + Unpin> PqStream<S> {
         msg: &'static str,
         error_kind: ErrorKind,
     ) -> Result<T, ReportedError> {
+        // TODO: only log this for actually interesting errors
         tracing::info!(
             kind = error_kind.to_metric_label(),
             msg,

From 33dce25af8ea722b3bf53467616fb5156dd41249 Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Wed, 20 Nov 2024 11:07:45 +0000
Subject: [PATCH 39/43] safekeeper: block deletion on protocol handler shutdown
 (#9364)

## Problem

Two recently observed log errors indicate safekeeper tasks for a
timeline running after that timeline's deletion has started.
- https://github.com/neondatabase/neon/issues/8972
- https://github.com/neondatabase/neon/issues/8974

These code paths do not have a mechanism that coordinates task shutdown
with the overall shutdown of the timeline.

## Summary of changes

- Add a `Gate` to `Timeline`
- Take the gate as part of resident timeline guard: any code that holds
a guard over a timeline staying resident should also hold a guard over
the timeline's total lifetime.
- Take the gate from the wal removal task
- Respect Timeline::cancel in WAL send/recv code, so that we do not
block shutdown indefinitely.
- Add a test that deletes timelines with open pageserver+compute
connections, to check these get torn down as expected.

There is some risk to introducing gates: if there is code holding a gate
which does not properly respect a cancellation token, it can cause
shutdown hangs. The risk of this for safekeepers is lower in practice
than it is for other services, because in a healthy timeline deletion,
the compute is shutdown first, then the timeline is deleted on the
pageserver, and finally it is deleted on the safekeepers -- that makes
it much less likely that some protocol handler will still be running.

Closes: #8972
Closes: #8974
---
 libs/postgres_backend/src/lib.rs         |   7 +-
 safekeeper/src/receive_wal.rs            |  29 +++++-
 safekeeper/src/send_wal.rs               |  37 ++++---
 safekeeper/src/timeline.rs               | 120 +++++++++--------------
 safekeeper/src/timeline_guard.rs         |  11 ++-
 safekeeper/src/timeline_manager.rs       |  55 +++++++++--
 safekeeper/src/timelines_global_map.rs   |   4 +-
 safekeeper/src/wal_backup.rs             |  42 +++++---
 test_runner/regress/test_wal_acceptor.py |  83 ++++++++++++++++
 9 files changed, 270 insertions(+), 118 deletions(-)

diff --git a/libs/postgres_backend/src/lib.rs b/libs/postgres_backend/src/lib.rs
index 9075a019b4..8c024375c1 100644
--- a/libs/postgres_backend/src/lib.rs
+++ b/libs/postgres_backend/src/lib.rs
@@ -834,7 +834,7 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
         use CopyStreamHandlerEnd::*;
 
         let expected_end = match &end {
-            ServerInitiated(_) | CopyDone | CopyFail | Terminate | EOF => true,
+            ServerInitiated(_) | CopyDone | CopyFail | Terminate | EOF | Cancelled => true,
             CopyStreamHandlerEnd::Disconnected(ConnectionError::Io(io_error))
                 if is_expected_io_error(io_error) =>
             {
@@ -874,6 +874,9 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
             // message from server' when it receives ErrorResponse (anything but
             // CopyData/CopyDone) back.
             CopyFail => Some((end.to_string(), SQLSTATE_SUCCESSFUL_COMPLETION)),
+
+            // When cancelled, send no response: we must not risk blocking on sending that response
+            Cancelled => None,
             _ => None,
         };
         if let Some((err, errcode)) = err_to_send_and_errcode {
@@ -1051,6 +1054,8 @@ pub enum CopyStreamHandlerEnd {
     /// The connection was lost
     #[error("connection error: {0}")]
     Disconnected(#[from] ConnectionError),
+    #[error("Shutdown")]
+    Cancelled,
     /// Some other error
     #[error(transparent)]
     Other(#[from] anyhow::Error),
diff --git a/safekeeper/src/receive_wal.rs b/safekeeper/src/receive_wal.rs
index 2edcc4ef6f..bfa1764abf 100644
--- a/safekeeper/src/receive_wal.rs
+++ b/safekeeper/src/receive_wal.rs
@@ -239,6 +239,10 @@ impl SafekeeperPostgresHandler {
         pgb: &mut PostgresBackend<IO>,
         tli: &mut Option<WalResidentTimeline>,
     ) -> Result<(), CopyStreamHandlerEnd> {
+        // The `tli` parameter is only used for passing _out_ a timeline, one should
+        // not have been passed in.
+        assert!(tli.is_none());
+
         // Notify the libpq client that it's allowed to send `CopyData` messages
         pgb.write_message(&BeMessage::CopyBothResponse).await?;
 
@@ -256,6 +260,7 @@ impl SafekeeperPostgresHandler {
         // sends, so this avoids deadlocks.
         let mut pgb_reader = pgb.split().context("START_WAL_PUSH split")?;
         let peer_addr = *pgb.get_peer_addr();
+
         let mut network_reader = NetworkReader {
             ttid: self.ttid,
             conn_id: self.conn_id,
@@ -275,10 +280,14 @@ impl SafekeeperPostgresHandler {
                     .subscribe();
             *tli = Some(timeline.wal_residence_guard().await?);
 
+            let timeline_cancel = timeline.cancel.clone();
             tokio::select! {
                 // todo: add read|write .context to these errors
                 r = network_reader.run(msg_tx, msg_rx, reply_tx, timeline, next_msg) => r,
                 r = network_write(pgb, reply_rx, pageserver_feedback_rx) => r,
+                _ = timeline_cancel.cancelled() => {
+                    return Err(CopyStreamHandlerEnd::Cancelled);
+                }
             }
         } else {
             res.map(|_| ())
@@ -303,7 +312,7 @@ impl SafekeeperPostgresHandler {
 
                 // Otherwise, WalAcceptor thread must have errored.
                 match wal_acceptor_res {
-                    Ok(Ok(_)) => Ok(()), // can't happen currently; would be if we add graceful termination
+                    Ok(Ok(_)) => Ok(()), // Clean shutdown
                     Ok(Err(e)) => Err(CopyStreamHandlerEnd::Other(e.context("WAL acceptor"))),
                     Err(_) => Err(CopyStreamHandlerEnd::Other(anyhow!(
                         "WalAcceptor task panicked",
@@ -356,6 +365,7 @@ impl<'a, IO: AsyncRead + AsyncWrite + Unpin> NetworkReader<'a, IO> {
         Ok((tli, next_msg))
     }
 
+    /// This function is cancellation-safe (only does network I/O and channel read/writes).
     async fn run(
         self,
         msg_tx: Sender<ProposerAcceptorMessage>,
@@ -397,6 +407,7 @@ async fn read_network_loop<IO: AsyncRead + AsyncWrite + Unpin>(
     loop {
         let started = Instant::now();
         let size = next_msg.size();
+
         match msg_tx.send_timeout(next_msg, SLOW_THRESHOLD).await {
             Ok(()) => {}
             // Slow send, log a message and keep trying. Log context has timeline ID.
@@ -428,6 +439,8 @@ async fn read_network_loop<IO: AsyncRead + AsyncWrite + Unpin>(
 /// Read replies from WalAcceptor and pass them back to socket. Returns Ok(())
 /// if reply_rx closed; it must mean WalAcceptor terminated, joining it should
 /// tell the error.
+///
+/// This function is cancellation-safe (only does network I/O and channel read/writes).
 async fn network_write<IO: AsyncRead + AsyncWrite + Unpin>(
     pgb_writer: &mut PostgresBackend<IO>,
     mut reply_rx: Receiver<AcceptorProposerMessage>,
@@ -461,7 +474,7 @@ async fn network_write<IO: AsyncRead + AsyncWrite + Unpin>(
                         Some(AcceptorProposerMessage::AppendResponse(append_response))
                     }
                     _ => None,
-                }
+                },
         };
 
         let Some(msg) = msg else {
@@ -527,6 +540,10 @@ impl WalAcceptor {
 
     /// The main loop. Returns Ok(()) if either msg_rx or reply_tx got closed;
     /// it must mean that network thread terminated.
+    ///
+    /// This function is *not* cancellation safe, it does local disk I/O: it should always
+    /// be allowed to run to completion. It respects Timeline::cancel and shuts down cleanly
+    /// when that gets triggered.
     async fn run(&mut self) -> anyhow::Result<()> {
         let walreceiver_guard = self.tli.get_walreceivers().register(self.conn_id);
 
@@ -541,7 +558,7 @@ impl WalAcceptor {
         // Tracks whether we have unflushed appends.
         let mut dirty = false;
 
-        loop {
+        while !self.tli.is_cancelled() {
             let reply = tokio::select! {
                 // Process inbound message.
                 msg = self.msg_rx.recv() => {
@@ -599,6 +616,10 @@ impl WalAcceptor {
                     WAL_RECEIVER_QUEUE_DEPTH.observe(self.msg_rx.len() as f64);
                     None // no reply
                 }
+
+                _ = self.tli.cancel.cancelled() => {
+                    break;
+                }
             };
 
             // Send reply, if any.
@@ -610,7 +631,7 @@ impl WalAcceptor {
         }
 
         // Flush WAL on disconnect, see https://github.com/neondatabase/neon/issues/9259.
-        if dirty {
+        if dirty && !self.tli.cancel.is_cancelled() {
             self.tli
                 .process_msg(&ProposerAcceptorMessage::FlushWAL)
                 .await?;
diff --git a/safekeeper/src/send_wal.rs b/safekeeper/src/send_wal.rs
index 6d94ff98b1..aa65ec851b 100644
--- a/safekeeper/src/send_wal.rs
+++ b/safekeeper/src/send_wal.rs
@@ -456,6 +456,8 @@ impl SafekeeperPostgresHandler {
         // not synchronized with sends, so this avoids deadlocks.
         let reader = pgb.split().context("START_REPLICATION split")?;
 
+        let tli_cancel = tli.cancel.clone();
+
         let mut sender = WalSender {
             pgb,
             // should succeed since we're already holding another guard
@@ -479,6 +481,9 @@ impl SafekeeperPostgresHandler {
             // todo: add read|write .context to these errors
             r = sender.run() => r,
             r = reply_reader.run() => r,
+            _ = tli_cancel.cancelled() => {
+                return Err(CopyStreamHandlerEnd::Cancelled);
+            }
         };
 
         let ws_state = ws_guard
@@ -557,6 +562,7 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> WalSender<'_, IO> {
     /// Send WAL until
     /// - an error occurs
     /// - receiver is caughtup and there is no computes (if streaming up to commit_lsn)
+    /// - timeline's cancellation token fires
     ///
     /// Err(CopyStreamHandlerEnd) is always returned; Result is used only for ?
     /// convenience.
@@ -601,15 +607,14 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> WalSender<'_, IO> {
             };
             let send_buf = &send_buf[..send_size];
 
-            // and send it
-            self.pgb
-                .write_message(&BeMessage::XLogData(XLogDataBody {
-                    wal_start: self.start_pos.0,
-                    wal_end: self.end_pos.0,
-                    timestamp: get_current_timestamp(),
-                    data: send_buf,
-                }))
-                .await?;
+            // and send it, while respecting Timeline::cancel
+            let msg = BeMessage::XLogData(XLogDataBody {
+                wal_start: self.start_pos.0,
+                wal_end: self.end_pos.0,
+                timestamp: get_current_timestamp(),
+                data: send_buf,
+            });
+            self.pgb.write_message(&msg).await?;
 
             if let Some(appname) = &self.appname {
                 if appname == "replica" {
@@ -674,13 +679,13 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> WalSender<'_, IO> {
                 }
             }
 
-            self.pgb
-                .write_message(&BeMessage::KeepAlive(WalSndKeepAlive {
-                    wal_end: self.end_pos.0,
-                    timestamp: get_current_timestamp(),
-                    request_reply: true,
-                }))
-                .await?;
+            let msg = BeMessage::KeepAlive(WalSndKeepAlive {
+                wal_end: self.end_pos.0,
+                timestamp: get_current_timestamp(),
+                request_reply: true,
+            });
+
+            self.pgb.write_message(&msg).await?;
         }
     }
 
diff --git a/safekeeper/src/timeline.rs b/safekeeper/src/timeline.rs
index 85add6bfea..ef928f7633 100644
--- a/safekeeper/src/timeline.rs
+++ b/safekeeper/src/timeline.rs
@@ -9,6 +9,7 @@ use serde::{Deserialize, Serialize};
 use tokio::fs::{self};
 use tokio_util::sync::CancellationToken;
 use utils::id::TenantId;
+use utils::sync::gate::Gate;
 
 use std::cmp::max;
 use std::ops::{Deref, DerefMut};
@@ -467,6 +468,10 @@ pub struct Timeline {
     timeline_dir: Utf8PathBuf,
     manager_ctl: ManagerCtl,
 
+    /// Hold this gate from code that depends on the Timeline's non-shut-down state.  While holding
+    /// this gate, you must respect [`Timeline::cancel`]
+    pub(crate) gate: Gate,
+
     /// Delete/cancel will trigger this, background tasks should drop out as soon as it fires
     pub(crate) cancel: CancellationToken,
 
@@ -508,6 +513,7 @@ impl Timeline {
             mutex: RwLock::new(shared_state),
             walsenders: WalSenders::new(walreceivers.clone()),
             walreceivers,
+            gate: Default::default(),
             cancel: CancellationToken::default(),
             manager_ctl: ManagerCtl::new(),
             broker_active: AtomicBool::new(false),
@@ -533,56 +539,6 @@ impl Timeline {
         ))
     }
 
-    /// Initialize fresh timeline on disk and start background tasks. If init
-    /// fails, timeline is cancelled and cannot be used anymore.
-    ///
-    /// Init is transactional, so if it fails, created files will be deleted,
-    /// and state on disk should remain unchanged.
-    pub async fn init_new(
-        self: &Arc<Timeline>,
-        shared_state: &mut WriteGuardSharedState<'_>,
-        conf: &SafeKeeperConf,
-        broker_active_set: Arc<TimelinesSet>,
-        partial_backup_rate_limiter: RateLimiter,
-    ) -> Result<()> {
-        match fs::metadata(&self.timeline_dir).await {
-            Ok(_) => {
-                // Timeline directory exists on disk, we should leave state unchanged
-                // and return error.
-                bail!(TimelineError::Invalid(self.ttid));
-            }
-            Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
-            Err(e) => {
-                return Err(e.into());
-            }
-        }
-
-        // Create timeline directory.
-        fs::create_dir_all(&self.timeline_dir).await?;
-
-        // Write timeline to disk and start background tasks.
-        if let Err(e) = shared_state.sk.state_mut().flush().await {
-            // Bootstrap failed, cancel timeline and remove timeline directory.
-            self.cancel(shared_state);
-
-            if let Err(fs_err) = fs::remove_dir_all(&self.timeline_dir).await {
-                warn!(
-                    "failed to remove timeline {} directory after bootstrap failure: {}",
-                    self.ttid, fs_err
-                );
-            }
-
-            return Err(e);
-        }
-        self.bootstrap(
-            shared_state,
-            conf,
-            broker_active_set,
-            partial_backup_rate_limiter,
-        );
-        Ok(())
-    }
-
     /// Bootstrap new or existing timeline starting background tasks.
     pub fn bootstrap(
         self: &Arc<Timeline>,
@@ -593,33 +549,61 @@ impl Timeline {
     ) {
         let (tx, rx) = self.manager_ctl.bootstrap_manager();
 
+        let Ok(gate_guard) = self.gate.enter() else {
+            // Init raced with shutdown
+            return;
+        };
+
         // Start manager task which will monitor timeline state and update
         // background tasks.
-        tokio::spawn(timeline_manager::main_task(
-            ManagerTimeline { tli: self.clone() },
-            conf.clone(),
-            broker_active_set,
-            tx,
-            rx,
-            partial_backup_rate_limiter,
-        ));
+        tokio::spawn({
+            let this = self.clone();
+            let conf = conf.clone();
+            async move {
+                let _gate_guard = gate_guard;
+                timeline_manager::main_task(
+                    ManagerTimeline { tli: this },
+                    conf,
+                    broker_active_set,
+                    tx,
+                    rx,
+                    partial_backup_rate_limiter,
+                )
+                .await
+            }
+        });
+    }
+
+    /// Background timeline activities (which hold Timeline::gate) will no
+    /// longer run once this function completes.
+    pub async fn shutdown(&self) {
+        info!("timeline {} shutting down", self.ttid);
+        self.cancel.cancel();
+
+        // Wait for any concurrent tasks to stop using this timeline, to avoid e.g. attempts
+        // to read deleted files.
+        self.gate.close().await;
     }
 
     /// Delete timeline from disk completely, by removing timeline directory.
-    /// Background timeline activities will stop eventually.
     ///
     /// Also deletes WAL in s3. Might fail if e.g. s3 is unavailable, but
     /// deletion API endpoint is retriable.
+    ///
+    /// Timeline must be in shut-down state (i.e. call [`Self::shutdown`] first)
     pub async fn delete(
         &self,
         shared_state: &mut WriteGuardSharedState<'_>,
         only_local: bool,
     ) -> Result<bool> {
-        self.cancel(shared_state);
+        // Assert that [`Self::shutdown`] was already called
+        assert!(self.cancel.is_cancelled());
+        assert!(self.gate.close_complete());
+
+        // Close associated FDs. Nobody will be able to touch timeline data once
+        // it is cancelled, so WAL storage won't be opened again.
+        shared_state.sk.close_wal_store();
 
-        // TODO: It's better to wait for s3 offloader termination before
-        // removing data from s3. Though since s3 doesn't have transactions it
-        // still wouldn't guarantee absense of data after removal.
         let conf = GlobalTimelines::get_global_config();
         if !only_local && conf.is_wal_backup_enabled() {
             // Note: we concurrently delete remote storage data from multiple
@@ -631,16 +615,6 @@ impl Timeline {
         Ok(dir_existed)
     }
 
-    /// Cancel timeline to prevent further usage. Background tasks will stop
-    /// eventually after receiving cancellation signal.
-    fn cancel(&self, shared_state: &mut WriteGuardSharedState<'_>) {
-        info!("timeline {} is cancelled", self.ttid);
-        self.cancel.cancel();
-        // Close associated FDs. Nobody will be able to touch timeline data once
-        // it is cancelled, so WAL storage won't be opened again.
-        shared_state.sk.close_wal_store();
-    }
-
     /// Returns if timeline is cancelled.
     pub fn is_cancelled(&self) -> bool {
         self.cancel.is_cancelled()
diff --git a/safekeeper/src/timeline_guard.rs b/safekeeper/src/timeline_guard.rs
index 1ddac573d2..9102a40df8 100644
--- a/safekeeper/src/timeline_guard.rs
+++ b/safekeeper/src/timeline_guard.rs
@@ -7,6 +7,7 @@
 use std::collections::HashSet;
 
 use tracing::debug;
+use utils::sync::gate::GateGuard;
 
 use crate::timeline_manager::ManagerCtlMessage;
 
@@ -16,6 +17,12 @@ pub struct GuardId(u64);
 pub struct ResidenceGuard {
     manager_tx: tokio::sync::mpsc::UnboundedSender<ManagerCtlMessage>,
     guard_id: GuardId,
+
+    /// [`ResidenceGuard`] represents a guarantee that a timeline's data remains resident,
+    /// which by extension also means the timeline is not shut down (since after shut down
+    /// our data may be deleted). Therefore everyone holding a residence guard must also
+    /// hold a guard on [`crate::timeline::Timeline::gate`]
+    _gate_guard: GateGuard,
 }
 
 impl Drop for ResidenceGuard {
@@ -52,7 +59,8 @@ impl AccessService {
         self.guards.is_empty()
     }
 
-    pub(crate) fn create_guard(&mut self) -> ResidenceGuard {
+    /// `timeline_gate_guard` is a guarantee that the timeline is not shut down
+    pub(crate) fn create_guard(&mut self, timeline_gate_guard: GateGuard) -> ResidenceGuard {
         let guard_id = self.next_guard_id;
         self.next_guard_id += 1;
         self.guards.insert(guard_id);
@@ -63,6 +71,7 @@ impl AccessService {
         ResidenceGuard {
             manager_tx: self.manager_tx.clone(),
             guard_id,
+            _gate_guard: timeline_gate_guard,
         }
     }
 
diff --git a/safekeeper/src/timeline_manager.rs b/safekeeper/src/timeline_manager.rs
index e9fed21bf5..c02fb904cf 100644
--- a/safekeeper/src/timeline_manager.rs
+++ b/safekeeper/src/timeline_manager.rs
@@ -266,8 +266,10 @@ pub async fn main_task(
 
     // Start recovery task which always runs on the timeline.
     if !mgr.is_offloaded && mgr.conf.peer_recovery_enabled {
-        let tli = mgr.wal_resident_timeline();
-        mgr.recovery_task = Some(tokio::spawn(recovery_main(tli, mgr.conf.clone())));
+        // Recovery task is only spawned if we can get a residence guard (i.e. timeline is not already shutting down)
+        if let Ok(tli) = mgr.wal_resident_timeline() {
+            mgr.recovery_task = Some(tokio::spawn(recovery_main(tli, mgr.conf.clone())));
+        }
     }
 
     // If timeline is evicted, reflect that in the metric.
@@ -375,6 +377,13 @@ pub async fn main_task(
 
     // shutdown background tasks
     if mgr.conf.is_wal_backup_enabled() {
+        if let Some(backup_task) = mgr.backup_task.take() {
+            // If we fell through here, then the timeline is shutting down. This is important
+            // because otherwise joining on the wal_backup handle might hang.
+            assert!(mgr.tli.cancel.is_cancelled());
+
+            backup_task.join().await;
+        }
         wal_backup::update_task(&mut mgr, false, &last_state).await;
     }
 
@@ -442,10 +451,18 @@ impl Manager {
     /// Get a WalResidentTimeline.
     /// Manager code must use this function instead of one from `Timeline`
     /// directly, because it will deadlock.
-    pub(crate) fn wal_resident_timeline(&mut self) -> WalResidentTimeline {
+    ///
+    /// This function is fallible because the guard may not be created if the timeline is
+    /// shutting down.
+    pub(crate) fn wal_resident_timeline(&mut self) -> anyhow::Result<WalResidentTimeline> {
         assert!(!self.is_offloaded);
-        let guard = self.access_service.create_guard();
-        WalResidentTimeline::new(self.tli.clone(), guard)
+        let guard = self.access_service.create_guard(
+            self.tli
+                .gate
+                .enter()
+                .map_err(|_| anyhow::anyhow!("Timeline shutting down"))?,
+        );
+        Ok(WalResidentTimeline::new(self.tli.clone(), guard))
     }
 
     /// Get a snapshot of the timeline state.
@@ -559,6 +576,11 @@ impl Manager {
 
         if removal_horizon_segno > self.last_removed_segno {
             // we need to remove WAL
+            let Ok(timeline_gate_guard) = self.tli.gate.enter() else {
+                tracing::info!("Timeline shutdown, not spawning WAL removal task");
+                return;
+            };
+
             let remover = match self.tli.read_shared_state().await.sk {
                 StateSK::Loaded(ref sk) => {
                     crate::wal_storage::Storage::remove_up_to(&sk.wal_store, removal_horizon_segno)
@@ -573,6 +595,8 @@ impl Manager {
 
             self.wal_removal_task = Some(tokio::spawn(
                 async move {
+                    let _timeline_gate_guard = timeline_gate_guard;
+
                     remover.await?;
                     Ok(removal_horizon_segno)
                 }
@@ -619,10 +643,15 @@ impl Manager {
             return;
         }
 
+        let Ok(resident) = self.wal_resident_timeline() else {
+            // Shutting down
+            return;
+        };
+
         // Get WalResidentTimeline and start partial backup task.
         let cancel = CancellationToken::new();
         let handle = tokio::spawn(wal_backup_partial::main_task(
-            self.wal_resident_timeline(),
+            resident,
             self.conf.clone(),
             self.global_rate_limiter.clone(),
             cancel.clone(),
@@ -664,7 +693,7 @@ impl Manager {
             self.partial_backup_task = None;
         }
 
-        let tli = self.wal_resident_timeline();
+        let tli = self.wal_resident_timeline()?;
         let mut partial_backup = PartialBackup::new(tli, self.conf.clone()).await;
         // Reset might fail e.g. when cfile is already reset but s3 removal
         // failed, so set manager state to None beforehand. In any case caller
@@ -688,7 +717,12 @@ impl Manager {
                 let guard = if self.is_offloaded {
                     Err(anyhow::anyhow!("timeline is offloaded, can't get a guard"))
                 } else {
-                    Ok(self.access_service.create_guard())
+                    match self.tli.gate.enter() {
+                        Ok(gate_guard) => Ok(self.access_service.create_guard(gate_guard)),
+                        Err(_) => Err(anyhow::anyhow!(
+                            "timeline is shutting down, can't get a guard"
+                        )),
+                    }
                 };
 
                 if tx.send(guard).is_err() {
@@ -699,7 +733,10 @@ impl Manager {
                 let result = if self.is_offloaded {
                     None
                 } else {
-                    Some(self.access_service.create_guard())
+                    match self.tli.gate.enter() {
+                        Ok(gate_guard) => Some(self.access_service.create_guard(gate_guard)),
+                        Err(_) => None,
+                    }
                 };
 
                 if tx.send(result).is_err() {
diff --git a/safekeeper/src/timelines_global_map.rs b/safekeeper/src/timelines_global_map.rs
index 33d94da034..067945fd5f 100644
--- a/safekeeper/src/timelines_global_map.rs
+++ b/safekeeper/src/timelines_global_map.rs
@@ -457,10 +457,12 @@ impl GlobalTimelines {
             Ok(timeline) => {
                 let was_active = timeline.broker_active.load(Ordering::Relaxed);
 
+                info!("deleting timeline {}, only_local={}", ttid, only_local);
+                timeline.shutdown().await;
+
                 // Take a lock and finish the deletion holding this mutex.
                 let mut shared_state = timeline.write_shared_state().await;
 
-                info!("deleting timeline {}, only_local={}", ttid, only_local);
                 let dir_existed = timeline.delete(&mut shared_state, only_local).await?;
 
                 Ok(TimelineDeleteForceResult {
diff --git a/safekeeper/src/wal_backup.rs b/safekeeper/src/wal_backup.rs
index 6c87e5a926..34b5dbeaa1 100644
--- a/safekeeper/src/wal_backup.rs
+++ b/safekeeper/src/wal_backup.rs
@@ -25,7 +25,6 @@ use tokio::fs::File;
 use tokio::select;
 use tokio::sync::mpsc::{self, Receiver, Sender};
 use tokio::sync::{watch, OnceCell};
-use tokio::time::sleep;
 use tracing::*;
 
 use utils::{id::TenantTimelineId, lsn::Lsn};
@@ -46,6 +45,14 @@ pub struct WalBackupTaskHandle {
     handle: JoinHandle<()>,
 }
 
+impl WalBackupTaskHandle {
+    pub(crate) async fn join(self) {
+        if let Err(e) = self.handle.await {
+            error!("WAL backup task panicked: {}", e);
+        }
+    }
+}
+
 /// Do we have anything to upload to S3, i.e. should safekeepers run backup activity?
 pub(crate) fn is_wal_backup_required(
     wal_seg_size: usize,
@@ -74,11 +81,12 @@ pub(crate) async fn update_task(mgr: &mut Manager, need_backup: bool, state: &St
 
             let (shutdown_tx, shutdown_rx) = mpsc::channel(1);
 
-            let async_task = backup_task_main(
-                mgr.wal_resident_timeline(),
-                mgr.conf.backup_parallel_jobs,
-                shutdown_rx,
-            );
+            let Ok(resident) = mgr.wal_resident_timeline() else {
+                info!("Timeline shut down");
+                return;
+            };
+
+            let async_task = backup_task_main(resident, mgr.conf.backup_parallel_jobs, shutdown_rx);
 
             let handle = if mgr.conf.current_thread_runtime {
                 tokio::spawn(async_task)
@@ -108,9 +116,7 @@ async fn shut_down_task(entry: &mut Option<WalBackupTaskHandle>) {
         // Tell the task to shutdown. Error means task exited earlier, that's ok.
         let _ = wb_handle.shutdown_tx.send(()).await;
         // Await the task itself. TODO: restart panicked tasks earlier.
-        if let Err(e) = wb_handle.handle.await {
-            warn!("WAL backup task panicked: {}", e);
-        }
+        wb_handle.join().await;
     }
 }
 
@@ -214,6 +220,7 @@ async fn backup_task_main(
     let _guard = WAL_BACKUP_TASKS.guard();
     info!("started");
 
+    let cancel = tli.tli.cancel.clone();
     let mut wb = WalBackupTask {
         wal_seg_size: tli.get_wal_seg_size().await,
         commit_lsn_watch_rx: tli.get_commit_lsn_watch_rx(),
@@ -230,25 +237,34 @@ async fn backup_task_main(
         _ = wb.run() => {}
         _ = shutdown_rx.recv() => {
             canceled = true;
+        },
+        _ = cancel.cancelled() => {
+            canceled = true;
         }
     }
     info!("task {}", if canceled { "canceled" } else { "terminated" });
 }
 
 impl WalBackupTask {
+    /// This function must be called from a select! that also respects self.timeline's
+    /// cancellation token.  This is done in [`backup_task_main`].
+    ///
+    /// The future returned by this function is safe to drop at any time because it
+    /// does not write to local disk.
     async fn run(&mut self) {
         let mut backup_lsn = Lsn(0);
 
         let mut retry_attempt = 0u32;
         // offload loop
-        loop {
+        while !self.timeline.cancel.is_cancelled() {
             if retry_attempt == 0 {
                 // wait for new WAL to arrive
                 if let Err(e) = self.commit_lsn_watch_rx.changed().await {
-                    // should never happen, as we hold Arc to timeline.
+                    // should never happen, as we hold Arc to timeline and transmitter's lifetime
+                    // is within Timeline's
                     error!("commit_lsn watch shut down: {:?}", e);
                     return;
-                }
+                };
             } else {
                 // or just sleep if we errored previously
                 let mut retry_delay = UPLOAD_FAILURE_RETRY_MAX_MS;
@@ -256,7 +272,7 @@ impl WalBackupTask {
                 {
                     retry_delay = min(retry_delay, backoff_delay);
                 }
-                sleep(Duration::from_millis(retry_delay)).await;
+                tokio::time::sleep(Duration::from_millis(retry_delay)).await;
             }
 
             let commit_lsn = *self.commit_lsn_watch_rx.borrow();
diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py
index 0676b3dd9a..6eaaa3c37f 100644
--- a/test_runner/regress/test_wal_acceptor.py
+++ b/test_runner/regress/test_wal_acceptor.py
@@ -1784,6 +1784,89 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
             cur.execute("INSERT INTO t (key) VALUES (123)")
 
 
+def test_delete_timeline_under_load(neon_env_builder: NeonEnvBuilder):
+    """
+    Test deleting timelines on a safekeeper while they're under load.
+
+    This should not happen under normal operation, but it can happen if
+    there is some rogue compute/pageserver that is writing/reading to a
+    safekeeper that we're migrating a timeline away from, or if the timeline
+    is being deleted while such a rogue client is running.
+    """
+    neon_env_builder.auth_enabled = True
+    env = neon_env_builder.init_start()
+
+    # Create two endpoints that will generate load
+    timeline_id_a = env.create_branch("deleteme_a")
+    timeline_id_b = env.create_branch("deleteme_b")
+
+    endpoint_a = env.endpoints.create("deleteme_a")
+    endpoint_a.start()
+    endpoint_b = env.endpoints.create("deleteme_b")
+    endpoint_b.start()
+
+    # Get tenant and timeline IDs
+    tenant_id = env.initial_tenant
+
+    # Start generating load on both timelines
+    def generate_load(endpoint: Endpoint):
+        with closing(endpoint.connect()) as conn:
+            with conn.cursor() as cur:
+                cur.execute("CREATE TABLE IF NOT EXISTS t(key int, value text)")
+                while True:
+                    try:
+                        cur.execute("INSERT INTO t SELECT generate_series(1,1000), 'data'")
+                    except:  # noqa
+                        # Ignore errors since timeline may be deleted
+                        break
+
+    t_a = threading.Thread(target=generate_load, args=(endpoint_a,))
+    t_b = threading.Thread(target=generate_load, args=(endpoint_b,))
+    try:
+        t_a.start()
+        t_b.start()
+
+        # Let the load run for a bit
+        log.info("Warming up...")
+        time.sleep(2)
+
+        # Safekeeper errors will propagate to the pageserver: it is correct that these are
+        # logged at error severity because they indicate the pageserver is trying to read
+        # a timeline that it shouldn't.
+        env.pageserver.allowed_errors.extend(
+            [
+                ".*Timeline.*was cancelled.*",
+                ".*Timeline.*was not found.*",
+            ]
+        )
+
+        # Try deleting timelines while under load
+        sk = env.safekeepers[0]
+        sk_http = sk.http_client(auth_token=env.auth_keys.generate_tenant_token(tenant_id))
+
+        # Delete first timeline
+        log.info(f"Deleting {timeline_id_a}...")
+        assert sk_http.timeline_delete(tenant_id, timeline_id_a, only_local=True)["dir_existed"]
+
+        # Delete second timeline
+        log.info(f"Deleting {timeline_id_b}...")
+        assert sk_http.timeline_delete(tenant_id, timeline_id_b, only_local=True)["dir_existed"]
+
+        # Verify timelines are gone from disk
+        sk_data_dir = sk.data_dir
+        assert not (sk_data_dir / str(tenant_id) / str(timeline_id_a)).exists()
+        # assert not (sk_data_dir / str(tenant_id) / str(timeline_id_b)).exists()
+
+    finally:
+        log.info("Stopping endpoints...")
+        # Stop endpoints with immediate mode because we deleted the timeline out from under the compute, which may cause it to hang
+        endpoint_a.stop(mode="immediate")
+        endpoint_b.stop(mode="immediate")
+        log.info("Joining threads...")
+        t_a.join()
+        t_b.join()
+
+
 # Basic pull_timeline test.
 # When live_sk_change is False, compute is restarted to change set of
 # safekeepers; otherwise it is live reload.

From 94e4a0e2a0d43e066bd006a68eb147333ab0d074 Mon Sep 17 00:00:00 2001
From: Fedor Dikarev <fedor@neon.tech>
Date: Wed, 20 Nov 2024 13:04:14 +0100
Subject: [PATCH 40/43] update macos version for runner (#9817)

Closes: https://github.com/neondatabase/neon/issues/9816

Run MacOs builds on `macos-15`.
As `pkg-config` is bundled in runner image, don't install it with `brew`
---
 .github/workflows/neon_extra_builds.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/neon_extra_builds.yml b/.github/workflows/neon_extra_builds.yml
index e827539c80..092831adb9 100644
--- a/.github/workflows/neon_extra_builds.yml
+++ b/.github/workflows/neon_extra_builds.yml
@@ -38,7 +38,7 @@ jobs:
       contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
       github.ref_name == 'main'
     timeout-minutes: 90
-    runs-on: macos-14
+    runs-on: macos-15
 
     env:
       # Use release build only, to have less debug info around
@@ -52,7 +52,7 @@ jobs:
           submodules: true
 
       - name: Install macOS postgres dependencies
-        run: brew install flex bison openssl protobuf icu4c pkg-config
+        run: brew install flex bison openssl protobuf icu4c
 
       - name: Set pg 14 revision for caching
         id: pg_v14_rev

From 46beecacce50bf1d113dbb6f31fe2283a598adf7 Mon Sep 17 00:00:00 2001
From: Alexander Bayandin <alexander@neon.tech>
Date: Wed, 20 Nov 2024 12:23:41 +0000
Subject: [PATCH 41/43] CI(benchmarking): route test failures to
 on-call-qa-staging-stream (#9813)

## Problem

We want to keep `#on-call-staging-stream` channel close to the prod one
and redirect notifications from failing benchmarks to another channel
for investigation.

## Summary of changes
- Send notifications regarding failures in `benchmarking` job to
`#on-call-staging-stream`
- Send notifications regarding failures in `periodic_pagebench` job to
`#on-call-staging-stream`
---
 .github/workflows/benchmarking.yml       | 12 ++++++------
 .github/workflows/periodic_pagebench.yml |  8 ++++----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml
index 0289f552f9..acea859b4d 100644
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -158,7 +158,7 @@ jobs:
       if: ${{ github.event.schedule && failure() }}
       uses: slackapi/slack-github-action@v1
       with:
-        channel-id: "C033QLM5P7D" # dev-staging-stream
+        channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
         slack-message: |
           Periodic perf testing: ${{ job.status }}
           <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
@@ -506,7 +506,7 @@ jobs:
       if: ${{ github.event.schedule && failure() }}
       uses: slackapi/slack-github-action@v1
       with:
-        channel-id: "C033QLM5P7D" # dev-staging-stream
+        channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
         slack-message: |
           Periodic perf testing on ${{ matrix.platform }}: ${{ job.status }}
           <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
@@ -643,7 +643,7 @@ jobs:
       if: ${{ github.event.schedule && failure() }}
       uses: slackapi/slack-github-action@v1
       with:
-        channel-id: "C033QLM5P7D" # dev-staging-stream
+        channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
         slack-message: |
           Periodic perf testing on ${{ env.PLATFORM }}: ${{ job.status }}
           <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
@@ -759,7 +759,7 @@ jobs:
       if: ${{ github.event.schedule && failure() }}
       uses: slackapi/slack-github-action@v1
       with:
-        channel-id: "C033QLM5P7D" # dev-staging-stream
+        channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
         slack-message: |
           Periodic OLAP perf testing on ${{ matrix.platform }}: ${{ job.status }}
           <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
@@ -874,7 +874,7 @@ jobs:
       if: ${{ github.event.schedule && failure() }}
       uses: slackapi/slack-github-action@v1
       with:
-        channel-id: "C033QLM5P7D" # dev-staging-stream
+        channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
         slack-message: |
           Periodic TPC-H perf testing on ${{ matrix.platform }}: ${{ job.status }}
           <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
@@ -974,7 +974,7 @@ jobs:
       if: ${{ github.event.schedule && failure() }}
       uses: slackapi/slack-github-action@v1
       with:
-        channel-id: "C033QLM5P7D" # dev-staging-stream
+        channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
         slack-message: |
           Periodic TPC-H perf testing on ${{ matrix.platform }}: ${{ job.status }}
           <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
diff --git a/.github/workflows/periodic_pagebench.yml b/.github/workflows/periodic_pagebench.yml
index 615937b5a1..1cce348ae2 100644
--- a/.github/workflows/periodic_pagebench.yml
+++ b/.github/workflows/periodic_pagebench.yml
@@ -72,7 +72,7 @@ jobs:
           echo "COMMIT_HASH=$INPUT_COMMIT_HASH" >> $GITHUB_ENV
         fi
 
-    - name: Start Bench with run_id   
+    - name: Start Bench with run_id
       run: |
         curl -k -X 'POST' \
         "${EC2_MACHINE_URL_US}/start_test/${GITHUB_RUN_ID}" \
@@ -116,7 +116,7 @@ jobs:
         -H 'accept: application/gzip' \
         -H "Authorization: Bearer $API_KEY" \
         --output "test_log_${GITHUB_RUN_ID}.gz"
-    
+
     - name: Unzip Test Log and Print it into this job's log
       if: always() && steps.poll_step.outputs.too_many_runs != 'true'
       run: |
@@ -134,13 +134,13 @@ jobs:
       if: ${{ github.event.schedule && failure() }}
       uses: slackapi/slack-github-action@v1
       with:
-        channel-id: "C033QLM5P7D" # dev-staging-stream
+        channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
         slack-message: "Periodic pagebench testing on dedicated hardware: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
       env:
         SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
 
     - name: Cleanup Test Resources
-      if: always() 
+      if: always()
       run: |
         curl -k -X 'POST' \
         "${EC2_MACHINE_URL_US}/cleanup_test/${GITHUB_RUN_ID}" \

From 899933e159b56d8cfe92995befff8b37d6eb55b8 Mon Sep 17 00:00:00 2001
From: Alexander Bayandin <alexander@neon.tech>
Date: Wed, 20 Nov 2024 12:48:21 +0000
Subject: [PATCH 42/43] scan_log_for_errors: check that regex is correct
 (#9815)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Problem

I've noticed that we have 2 flaky tests which failed with error:
```
re.error: missing ), unterminated subpattern at position 21
```

- `test_timeline_archival_chaos` — has been already fixed
- `test_sharded_tad_interleaved_after_partial_success` — I didn't manage
to find the incorrect regex

[Internal link](https://neonprod.grafana.net/goto/yfmVHV7NR?orgId=1)

## Summary of changes
- Wrap `re.match` in `try..except` block and print incorrect regex
---
 test_runner/fixtures/pageserver/allowed_errors.py | 10 ++++++++--
 test_runner/fixtures/utils.py                     | 10 ++++++++--
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/test_runner/fixtures/pageserver/allowed_errors.py b/test_runner/fixtures/pageserver/allowed_errors.py
index d05704c8e0..5059039678 100755
--- a/test_runner/fixtures/pageserver/allowed_errors.py
+++ b/test_runner/fixtures/pageserver/allowed_errors.py
@@ -25,8 +25,14 @@ def scan_pageserver_log_for_errors(
 
             # It's an ERROR or WARN. Is it in the allow-list?
             for a in allowed_errors:
-                if re.match(a, line):
-                    break
+                try:
+                    if re.match(a, line):
+                        break
+                # We can switch `re.error` with `re.PatternError` after 3.13
+                # https://docs.python.org/3/library/re.html#re.PatternError
+                except re.error:
+                    print(f"Invalid regex: '{a}'", file=sys.stderr)
+                    raise
             else:
                 errors.append((lineno, line))
     return errors
diff --git a/test_runner/fixtures/utils.py b/test_runner/fixtures/utils.py
index 96a651f0db..bb45385ea6 100644
--- a/test_runner/fixtures/utils.py
+++ b/test_runner/fixtures/utils.py
@@ -495,8 +495,14 @@ def scan_log_for_errors(input: Iterable[str], allowed_errors: list[str]) -> list
 
             # It's an ERROR or WARN. Is it in the allow-list?
             for a in allowed_errors:
-                if re.match(a, line):
-                    break
+                try:
+                    if re.match(a, line):
+                        break
+                # We can switch `re.error` with `re.PatternError` after 3.13
+                # https://docs.python.org/3/library/re.html#re.PatternError
+                except re.error:
+                    log.error(f"Invalid regex: '{a}'")
+                    raise
             else:
                 errors.append((lineno, line))
     return errors

From bf7d859a8bdb26a6ac4ce1c17fec948d7bcecdcb Mon Sep 17 00:00:00 2001
From: Folke Behrens <folke@neon.tech>
Date: Wed, 20 Nov 2024 13:50:36 +0100
Subject: [PATCH 43/43] proxy: Rename RequestMonitoring to RequestContext
 (#9805)

## Problem

It is called context/ctx everywhere and the Monitoring suffix needlessly
confuses with proper monitoring code.

## Summary of changes

* Rename RequestMonitoring to RequestContext
* Rename RequestMonitoringInner to RequestContextInner
---
 proxy/src/auth/backend/classic.rs          |  4 +--
 proxy/src/auth/backend/console_redirect.rs |  8 +++---
 proxy/src/auth/backend/hacks.rs            |  6 ++--
 proxy/src/auth/backend/jwt.rs              | 26 +++++++++---------
 proxy/src/auth/backend/local.rs            |  4 +--
 proxy/src/auth/backend/mod.rs              | 32 +++++++++++-----------
 proxy/src/auth/credentials.rs              | 30 ++++++++++----------
 proxy/src/auth/flow.rs                     |  4 +--
 proxy/src/bin/pg_sni_router.rs             |  8 +++---
 proxy/src/cache/endpoints.rs               |  4 +--
 proxy/src/compute.rs                       |  4 +--
 proxy/src/console_redirect_proxy.rs        |  6 ++--
 proxy/src/context/mod.rs                   | 22 +++++++--------
 proxy/src/context/parquet.rs               |  6 ++--
 proxy/src/control_plane/client/mock.rs     | 10 +++----
 proxy/src/control_plane/client/mod.rs      | 12 ++++----
 proxy/src/control_plane/client/neon.rs     | 16 +++++------
 proxy/src/control_plane/mod.rs             | 12 ++++----
 proxy/src/proxy/connect_compute.rs         | 10 +++----
 proxy/src/proxy/handshake.rs               |  4 +--
 proxy/src/proxy/mod.rs                     |  6 ++--
 proxy/src/proxy/tests/mitm.rs              |  2 +-
 proxy/src/proxy/tests/mod.rs               | 27 +++++++++---------
 proxy/src/proxy/wake_compute.rs            |  4 +--
 proxy/src/serverless/backend.rs            | 16 +++++------
 proxy/src/serverless/conn_pool.rs          |  4 +--
 proxy/src/serverless/conn_pool_lib.rs      |  4 +--
 proxy/src/serverless/http_conn_pool.rs     |  6 ++--
 proxy/src/serverless/local_conn_pool.rs    |  6 ++--
 proxy/src/serverless/mod.rs                |  6 ++--
 proxy/src/serverless/sql_over_http.rs      | 12 ++++----
 proxy/src/serverless/websocket.rs          |  4 +--
 32 files changed, 162 insertions(+), 163 deletions(-)

diff --git a/proxy/src/auth/backend/classic.rs b/proxy/src/auth/backend/classic.rs
index 87a02133c8..491b272ac4 100644
--- a/proxy/src/auth/backend/classic.rs
+++ b/proxy/src/auth/backend/classic.rs
@@ -5,13 +5,13 @@ use super::{ComputeCredentials, ComputeUserInfo};
 use crate::auth::backend::ComputeCredentialKeys;
 use crate::auth::{self, AuthFlow};
 use crate::config::AuthenticationConfig;
-use crate::context::RequestMonitoring;
+use crate::context::RequestContext;
 use crate::control_plane::AuthSecret;
 use crate::stream::{PqStream, Stream};
 use crate::{compute, sasl};
 
 pub(super) async fn authenticate(
-    ctx: &RequestMonitoring,
+    ctx: &RequestContext,
     creds: ComputeUserInfo,
     client: &mut PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
     config: &'static AuthenticationConfig,
diff --git a/proxy/src/auth/backend/console_redirect.rs b/proxy/src/auth/backend/console_redirect.rs
index e25dc3d45e..5772471486 100644
--- a/proxy/src/auth/backend/console_redirect.rs
+++ b/proxy/src/auth/backend/console_redirect.rs
@@ -8,7 +8,7 @@ use tracing::{info, info_span};
 use super::ComputeCredentialKeys;
 use crate::cache::Cached;
 use crate::config::AuthenticationConfig;
-use crate::context::RequestMonitoring;
+use crate::context::RequestContext;
 use crate::control_plane::{self, CachedNodeInfo, NodeInfo};
 use crate::error::{ReportableError, UserFacingError};
 use crate::proxy::connect_compute::ComputeConnectBackend;
@@ -71,7 +71,7 @@ impl ConsoleRedirectBackend {
 
     pub(crate) async fn authenticate(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         auth_config: &'static AuthenticationConfig,
         client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
     ) -> auth::Result<ConsoleRedirectNodeInfo> {
@@ -87,7 +87,7 @@ pub struct ConsoleRedirectNodeInfo(pub(super) NodeInfo);
 impl ComputeConnectBackend for ConsoleRedirectNodeInfo {
     async fn wake_compute(
         &self,
-        _ctx: &RequestMonitoring,
+        _ctx: &RequestContext,
     ) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> {
         Ok(Cached::new_uncached(self.0.clone()))
     }
@@ -98,7 +98,7 @@ impl ComputeConnectBackend for ConsoleRedirectNodeInfo {
 }
 
 async fn authenticate(
-    ctx: &RequestMonitoring,
+    ctx: &RequestContext,
     auth_config: &'static AuthenticationConfig,
     link_uri: &reqwest::Url,
     client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
diff --git a/proxy/src/auth/backend/hacks.rs b/proxy/src/auth/backend/hacks.rs
index e651df1d34..3316543022 100644
--- a/proxy/src/auth/backend/hacks.rs
+++ b/proxy/src/auth/backend/hacks.rs
@@ -4,7 +4,7 @@ use tracing::{debug, info};
 use super::{ComputeCredentials, ComputeUserInfo, ComputeUserInfoNoEndpoint};
 use crate::auth::{self, AuthFlow};
 use crate::config::AuthenticationConfig;
-use crate::context::RequestMonitoring;
+use crate::context::RequestContext;
 use crate::control_plane::AuthSecret;
 use crate::intern::EndpointIdInt;
 use crate::sasl;
@@ -15,7 +15,7 @@ use crate::stream::{self, Stream};
 /// These properties are benefical for serverless JS workers, so we
 /// use this mechanism for websocket connections.
 pub(crate) async fn authenticate_cleartext(
-    ctx: &RequestMonitoring,
+    ctx: &RequestContext,
     info: ComputeUserInfo,
     client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
     secret: AuthSecret,
@@ -57,7 +57,7 @@ pub(crate) async fn authenticate_cleartext(
 /// Similar to [`authenticate_cleartext`], but there's a specific password format,
 /// and passwords are not yet validated (we don't know how to validate them!)
 pub(crate) async fn password_hack_no_authentication(
-    ctx: &RequestMonitoring,
+    ctx: &RequestContext,
     info: ComputeUserInfoNoEndpoint,
     client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
 ) -> auth::Result<(ComputeUserInfo, Vec<u8>)> {
diff --git a/proxy/src/auth/backend/jwt.rs b/proxy/src/auth/backend/jwt.rs
index bfc674139b..f721d81aa2 100644
--- a/proxy/src/auth/backend/jwt.rs
+++ b/proxy/src/auth/backend/jwt.rs
@@ -17,7 +17,7 @@ use thiserror::Error;
 use tokio::time::Instant;
 
 use crate::auth::backend::ComputeCredentialKeys;
-use crate::context::RequestMonitoring;
+use crate::context::RequestContext;
 use crate::control_plane::errors::GetEndpointJwksError;
 use crate::http::read_body_with_limit;
 use crate::intern::RoleNameInt;
@@ -39,7 +39,7 @@ const JWKS_FETCH_RETRIES: u32 = 3;
 pub(crate) trait FetchAuthRules: Clone + Send + Sync + 'static {
     fn fetch_auth_rules(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         endpoint: EndpointId,
     ) -> impl Future<Output = Result<Vec<AuthRule>, FetchAuthRulesError>> + Send;
 }
@@ -144,7 +144,7 @@ impl JwkCacheEntryLock {
     async fn renew_jwks<F: FetchAuthRules>(
         &self,
         _permit: JwkRenewalPermit<'_>,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         client: &reqwest_middleware::ClientWithMiddleware,
         endpoint: EndpointId,
         auth_rules: &F,
@@ -261,7 +261,7 @@ impl JwkCacheEntryLock {
 
     async fn get_or_update_jwk_cache<F: FetchAuthRules>(
         self: &Arc<Self>,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         client: &reqwest_middleware::ClientWithMiddleware,
         endpoint: EndpointId,
         fetch: &F,
@@ -314,7 +314,7 @@ impl JwkCacheEntryLock {
 
     async fn check_jwt<F: FetchAuthRules>(
         self: &Arc<Self>,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         jwt: &str,
         client: &reqwest_middleware::ClientWithMiddleware,
         endpoint: EndpointId,
@@ -409,7 +409,7 @@ impl JwkCacheEntryLock {
 impl JwkCache {
     pub(crate) async fn check_jwt<F: FetchAuthRules>(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         endpoint: EndpointId,
         role_name: &RoleName,
         fetch: &F,
@@ -941,7 +941,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL
     impl FetchAuthRules for Fetch {
         async fn fetch_auth_rules(
             &self,
-            _ctx: &RequestMonitoring,
+            _ctx: &RequestContext,
             _endpoint: EndpointId,
         ) -> Result<Vec<AuthRule>, FetchAuthRulesError> {
             Ok(self.0.clone())
@@ -1039,7 +1039,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL
             for token in &tokens {
                 jwk_cache
                     .check_jwt(
-                        &RequestMonitoring::test(),
+                        &RequestContext::test(),
                         endpoint.clone(),
                         role,
                         &fetch,
@@ -1097,7 +1097,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL
 
         jwk_cache
             .check_jwt(
-                &RequestMonitoring::test(),
+                &RequestContext::test(),
                 endpoint.clone(),
                 &role_name,
                 &fetch,
@@ -1136,7 +1136,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL
 
         let ep = EndpointId::from("ep");
 
-        let ctx = RequestMonitoring::test();
+        let ctx = RequestContext::test();
         let err = jwk_cache
             .check_jwt(&ctx, ep, &role, &fetch, &bad_jwt)
             .await
@@ -1175,7 +1175,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL
         // this role_name is not accepted
         let bad_role_name = RoleName::from("cloud_admin");
 
-        let ctx = RequestMonitoring::test();
+        let ctx = RequestContext::test();
         let err = jwk_cache
             .check_jwt(&ctx, ep, &bad_role_name, &fetch, &jwt)
             .await
@@ -1268,7 +1268,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL
 
         let ep = EndpointId::from("ep");
 
-        let ctx = RequestMonitoring::test();
+        let ctx = RequestContext::test();
         for test in table {
             let jwt = new_custom_ec_jwt("1".into(), &key, test.body);
 
@@ -1336,7 +1336,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL
 
         jwk_cache
             .check_jwt(
-                &RequestMonitoring::test(),
+                &RequestContext::test(),
                 endpoint.clone(),
                 &role_name,
                 &fetch,
diff --git a/proxy/src/auth/backend/local.rs b/proxy/src/auth/backend/local.rs
index f9cb085daf..32e0f53615 100644
--- a/proxy/src/auth/backend/local.rs
+++ b/proxy/src/auth/backend/local.rs
@@ -7,7 +7,7 @@ use super::jwt::{AuthRule, FetchAuthRules};
 use crate::auth::backend::jwt::FetchAuthRulesError;
 use crate::compute::ConnCfg;
 use crate::compute_ctl::ComputeCtlApi;
-use crate::context::RequestMonitoring;
+use crate::context::RequestContext;
 use crate::control_plane::messages::{ColdStartInfo, EndpointJwksResponse, MetricsAuxInfo};
 use crate::control_plane::NodeInfo;
 use crate::http;
@@ -56,7 +56,7 @@ pub static JWKS_ROLE_MAP: ArcSwapOption<EndpointJwksResponse> = ArcSwapOption::c
 impl FetchAuthRules for StaticAuthRules {
     async fn fetch_auth_rules(
         &self,
-        _ctx: &RequestMonitoring,
+        _ctx: &RequestContext,
         _endpoint: EndpointId,
     ) -> Result<Vec<AuthRule>, FetchAuthRulesError> {
         let mappings = JWKS_ROLE_MAP.load();
diff --git a/proxy/src/auth/backend/mod.rs b/proxy/src/auth/backend/mod.rs
index 83c72e7be0..57ecd5e499 100644
--- a/proxy/src/auth/backend/mod.rs
+++ b/proxy/src/auth/backend/mod.rs
@@ -20,7 +20,7 @@ use crate::auth::credentials::check_peer_addr_is_in_list;
 use crate::auth::{self, validate_password_and_exchange, AuthError, ComputeUserInfoMaybeEndpoint};
 use crate::cache::Cached;
 use crate::config::AuthenticationConfig;
-use crate::context::RequestMonitoring;
+use crate::context::RequestContext;
 use crate::control_plane::client::ControlPlaneClient;
 use crate::control_plane::errors::GetAuthInfoError;
 use crate::control_plane::{
@@ -210,7 +210,7 @@ impl RateBucketInfo {
 impl AuthenticationConfig {
     pub(crate) fn check_rate_limit(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         secret: AuthSecret,
         endpoint: &EndpointId,
         is_cleartext: bool,
@@ -265,7 +265,7 @@ impl AuthenticationConfig {
 ///
 /// All authentication flows will emit an AuthenticationOk message if successful.
 async fn auth_quirks(
-    ctx: &RequestMonitoring,
+    ctx: &RequestContext,
     api: &impl control_plane::ControlPlaneApi,
     user_info: ComputeUserInfoMaybeEndpoint,
     client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
@@ -343,7 +343,7 @@ async fn auth_quirks(
 }
 
 async fn authenticate_with_secret(
-    ctx: &RequestMonitoring,
+    ctx: &RequestContext,
     secret: AuthSecret,
     info: ComputeUserInfo,
     client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
@@ -396,7 +396,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> {
     #[tracing::instrument(fields(allow_cleartext = allow_cleartext), skip_all)]
     pub(crate) async fn authenticate(
         self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
         allow_cleartext: bool,
         config: &'static AuthenticationConfig,
@@ -436,7 +436,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> {
 impl Backend<'_, ComputeUserInfo> {
     pub(crate) async fn get_role_secret(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
     ) -> Result<CachedRoleSecret, GetAuthInfoError> {
         match self {
             Self::ControlPlane(api, user_info) => api.get_role_secret(ctx, user_info).await,
@@ -446,7 +446,7 @@ impl Backend<'_, ComputeUserInfo> {
 
     pub(crate) async fn get_allowed_ips_and_secret(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
     ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), GetAuthInfoError> {
         match self {
             Self::ControlPlane(api, user_info) => {
@@ -461,7 +461,7 @@ impl Backend<'_, ComputeUserInfo> {
 impl ComputeConnectBackend for Backend<'_, ComputeCredentials> {
     async fn wake_compute(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
     ) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> {
         match self {
             Self::ControlPlane(api, creds) => api.wake_compute(ctx, &creds.info).await,
@@ -497,7 +497,7 @@ mod tests {
     use crate::auth::backend::MaskedIp;
     use crate::auth::{ComputeUserInfoMaybeEndpoint, IpPattern};
     use crate::config::AuthenticationConfig;
-    use crate::context::RequestMonitoring;
+    use crate::context::RequestContext;
     use crate::control_plane::{self, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret};
     use crate::proxy::NeonOptions;
     use crate::rate_limiter::{EndpointRateLimiter, RateBucketInfo};
@@ -513,7 +513,7 @@ mod tests {
     impl control_plane::ControlPlaneApi for Auth {
         async fn get_role_secret(
             &self,
-            _ctx: &RequestMonitoring,
+            _ctx: &RequestContext,
             _user_info: &super::ComputeUserInfo,
         ) -> Result<CachedRoleSecret, control_plane::errors::GetAuthInfoError> {
             Ok(CachedRoleSecret::new_uncached(Some(self.secret.clone())))
@@ -521,7 +521,7 @@ mod tests {
 
         async fn get_allowed_ips_and_secret(
             &self,
-            _ctx: &RequestMonitoring,
+            _ctx: &RequestContext,
             _user_info: &super::ComputeUserInfo,
         ) -> Result<
             (CachedAllowedIps, Option<CachedRoleSecret>),
@@ -535,7 +535,7 @@ mod tests {
 
         async fn get_endpoint_jwks(
             &self,
-            _ctx: &RequestMonitoring,
+            _ctx: &RequestContext,
             _endpoint: crate::types::EndpointId,
         ) -> Result<Vec<super::jwt::AuthRule>, control_plane::errors::GetEndpointJwksError>
         {
@@ -544,7 +544,7 @@ mod tests {
 
         async fn wake_compute(
             &self,
-            _ctx: &RequestMonitoring,
+            _ctx: &RequestContext,
             _user_info: &super::ComputeUserInfo,
         ) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> {
             unimplemented!()
@@ -623,7 +623,7 @@ mod tests {
         let (mut client, server) = tokio::io::duplex(1024);
         let mut stream = PqStream::new(Stream::from_raw(server));
 
-        let ctx = RequestMonitoring::test();
+        let ctx = RequestContext::test();
         let api = Auth {
             ips: vec![],
             secret: AuthSecret::Scram(ServerSecret::build("my-secret-password").await.unwrap()),
@@ -700,7 +700,7 @@ mod tests {
         let (mut client, server) = tokio::io::duplex(1024);
         let mut stream = PqStream::new(Stream::from_raw(server));
 
-        let ctx = RequestMonitoring::test();
+        let ctx = RequestContext::test();
         let api = Auth {
             ips: vec![],
             secret: AuthSecret::Scram(ServerSecret::build("my-secret-password").await.unwrap()),
@@ -752,7 +752,7 @@ mod tests {
         let (mut client, server) = tokio::io::duplex(1024);
         let mut stream = PqStream::new(Stream::from_raw(server));
 
-        let ctx = RequestMonitoring::test();
+        let ctx = RequestContext::test();
         let api = Auth {
             ips: vec![],
             secret: AuthSecret::Scram(ServerSecret::build("my-secret-password").await.unwrap()),
diff --git a/proxy/src/auth/credentials.rs b/proxy/src/auth/credentials.rs
index dab9007400..f6bce9f2d8 100644
--- a/proxy/src/auth/credentials.rs
+++ b/proxy/src/auth/credentials.rs
@@ -10,7 +10,7 @@ use thiserror::Error;
 use tracing::{debug, warn};
 
 use crate::auth::password_hack::parse_endpoint_param;
-use crate::context::RequestMonitoring;
+use crate::context::RequestContext;
 use crate::error::{ReportableError, UserFacingError};
 use crate::metrics::{Metrics, SniKind};
 use crate::proxy::NeonOptions;
@@ -86,7 +86,7 @@ pub(crate) fn endpoint_sni(
 
 impl ComputeUserInfoMaybeEndpoint {
     pub(crate) fn parse(
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         params: &StartupMessageParams,
         sni: Option<&str>,
         common_names: Option<&HashSet<String>>,
@@ -260,7 +260,7 @@ mod tests {
     fn parse_bare_minimum() -> anyhow::Result<()> {
         // According to postgresql, only `user` should be required.
         let options = StartupMessageParams::new([("user", "john_doe")]);
-        let ctx = RequestMonitoring::test();
+        let ctx = RequestContext::test();
         let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?;
         assert_eq!(user_info.user, "john_doe");
         assert_eq!(user_info.endpoint_id, None);
@@ -275,7 +275,7 @@ mod tests {
             ("database", "world"), // should be ignored
             ("foo", "bar"),        // should be ignored
         ]);
-        let ctx = RequestMonitoring::test();
+        let ctx = RequestContext::test();
         let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?;
         assert_eq!(user_info.user, "john_doe");
         assert_eq!(user_info.endpoint_id, None);
@@ -290,7 +290,7 @@ mod tests {
         let sni = Some("foo.localhost");
         let common_names = Some(["localhost".into()].into());
 
-        let ctx = RequestMonitoring::test();
+        let ctx = RequestContext::test();
         let user_info =
             ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())?;
         assert_eq!(user_info.user, "john_doe");
@@ -307,7 +307,7 @@ mod tests {
             ("options", "-ckey=1 project=bar -c geqo=off"),
         ]);
 
-        let ctx = RequestMonitoring::test();
+        let ctx = RequestContext::test();
         let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?;
         assert_eq!(user_info.user, "john_doe");
         assert_eq!(user_info.endpoint_id.as_deref(), Some("bar"));
@@ -322,7 +322,7 @@ mod tests {
             ("options", "-ckey=1 endpoint=bar -c geqo=off"),
         ]);
 
-        let ctx = RequestMonitoring::test();
+        let ctx = RequestContext::test();
         let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?;
         assert_eq!(user_info.user, "john_doe");
         assert_eq!(user_info.endpoint_id.as_deref(), Some("bar"));
@@ -340,7 +340,7 @@ mod tests {
             ),
         ]);
 
-        let ctx = RequestMonitoring::test();
+        let ctx = RequestContext::test();
         let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?;
         assert_eq!(user_info.user, "john_doe");
         assert!(user_info.endpoint_id.is_none());
@@ -355,7 +355,7 @@ mod tests {
             ("options", "-ckey=1 endpoint=bar project=foo -c geqo=off"),
         ]);
 
-        let ctx = RequestMonitoring::test();
+        let ctx = RequestContext::test();
         let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?;
         assert_eq!(user_info.user, "john_doe");
         assert!(user_info.endpoint_id.is_none());
@@ -370,7 +370,7 @@ mod tests {
         let sni = Some("baz.localhost");
         let common_names = Some(["localhost".into()].into());
 
-        let ctx = RequestMonitoring::test();
+        let ctx = RequestContext::test();
         let user_info =
             ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())?;
         assert_eq!(user_info.user, "john_doe");
@@ -385,14 +385,14 @@ mod tests {
 
         let common_names = Some(["a.com".into(), "b.com".into()].into());
         let sni = Some("p1.a.com");
-        let ctx = RequestMonitoring::test();
+        let ctx = RequestContext::test();
         let user_info =
             ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())?;
         assert_eq!(user_info.endpoint_id.as_deref(), Some("p1"));
 
         let common_names = Some(["a.com".into(), "b.com".into()].into());
         let sni = Some("p1.b.com");
-        let ctx = RequestMonitoring::test();
+        let ctx = RequestContext::test();
         let user_info =
             ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())?;
         assert_eq!(user_info.endpoint_id.as_deref(), Some("p1"));
@@ -408,7 +408,7 @@ mod tests {
         let sni = Some("second.localhost");
         let common_names = Some(["localhost".into()].into());
 
-        let ctx = RequestMonitoring::test();
+        let ctx = RequestContext::test();
         let err = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())
             .expect_err("should fail");
         match err {
@@ -427,7 +427,7 @@ mod tests {
         let sni = Some("project.localhost");
         let common_names = Some(["example.com".into()].into());
 
-        let ctx = RequestMonitoring::test();
+        let ctx = RequestContext::test();
         let err = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())
             .expect_err("should fail");
         match err {
@@ -447,7 +447,7 @@ mod tests {
 
         let sni = Some("project.localhost");
         let common_names = Some(["localhost".into()].into());
-        let ctx = RequestMonitoring::test();
+        let ctx = RequestContext::test();
         let user_info =
             ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())?;
         assert_eq!(user_info.endpoint_id.as_deref(), Some("project"));
diff --git a/proxy/src/auth/flow.rs b/proxy/src/auth/flow.rs
index 1740b59b14..9c6ce151cb 100644
--- a/proxy/src/auth/flow.rs
+++ b/proxy/src/auth/flow.rs
@@ -11,7 +11,7 @@ use tracing::info;
 use super::backend::ComputeCredentialKeys;
 use super::{AuthError, PasswordHackPayload};
 use crate::config::TlsServerEndPoint;
-use crate::context::RequestMonitoring;
+use crate::context::RequestContext;
 use crate::control_plane::AuthSecret;
 use crate::intern::EndpointIdInt;
 use crate::sasl;
@@ -32,7 +32,7 @@ pub(crate) struct Begin;
 /// Use [SCRAM](crate::scram)-based auth in [`AuthFlow`].
 pub(crate) struct Scram<'a>(
     pub(crate) &'a scram::ServerSecret,
-    pub(crate) &'a RequestMonitoring,
+    pub(crate) &'a RequestContext,
 );
 
 impl AuthMethod for Scram<'_> {
diff --git a/proxy/src/bin/pg_sni_router.rs b/proxy/src/bin/pg_sni_router.rs
index ef5b5e8509..623a0fd3b2 100644
--- a/proxy/src/bin/pg_sni_router.rs
+++ b/proxy/src/bin/pg_sni_router.rs
@@ -11,7 +11,7 @@ use futures::future::Either;
 use futures::TryFutureExt;
 use itertools::Itertools;
 use proxy::config::TlsServerEndPoint;
-use proxy::context::RequestMonitoring;
+use proxy::context::RequestContext;
 use proxy::metrics::{Metrics, ThreadPoolMetrics};
 use proxy::protocol2::ConnectionInfo;
 use proxy::proxy::{copy_bidirectional_client_compute, run_until_cancelled, ErrorSource};
@@ -177,7 +177,7 @@ async fn task_main(
                     .context("failed to set socket option")?;
 
                 info!(%peer_addr, "serving");
-                let ctx = RequestMonitoring::new(
+                let ctx = RequestContext::new(
                     session_id,
                     ConnectionInfo {
                         addr: peer_addr,
@@ -208,7 +208,7 @@ async fn task_main(
 const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmode=require`)";
 
 async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
-    ctx: &RequestMonitoring,
+    ctx: &RequestContext,
     raw_stream: S,
     tls_config: Arc<rustls::ServerConfig>,
     tls_server_end_point: TlsServerEndPoint,
@@ -259,7 +259,7 @@ async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
 }
 
 async fn handle_client(
-    ctx: RequestMonitoring,
+    ctx: RequestContext,
     dest_suffix: Arc<String>,
     tls_config: Arc<rustls::ServerConfig>,
     tls_server_end_point: TlsServerEndPoint,
diff --git a/proxy/src/cache/endpoints.rs b/proxy/src/cache/endpoints.rs
index 07769e053c..20db1fbb14 100644
--- a/proxy/src/cache/endpoints.rs
+++ b/proxy/src/cache/endpoints.rs
@@ -11,7 +11,7 @@ use tokio_util::sync::CancellationToken;
 use tracing::info;
 
 use crate::config::EndpointCacheConfig;
-use crate::context::RequestMonitoring;
+use crate::context::RequestContext;
 use crate::intern::{BranchIdInt, EndpointIdInt, ProjectIdInt};
 use crate::metrics::{Metrics, RedisErrors, RedisEventsCount};
 use crate::rate_limiter::GlobalRateLimiter;
@@ -75,7 +75,7 @@ impl EndpointsCache {
         }
     }
 
-    pub(crate) fn is_valid(&self, ctx: &RequestMonitoring, endpoint: &EndpointId) -> bool {
+    pub(crate) fn is_valid(&self, ctx: &RequestContext, endpoint: &EndpointId) -> bool {
         if !self.ready.load(Ordering::Acquire) {
             // the endpoint cache is not yet fully initialised.
             return true;
diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs
index b8876b44eb..e7fbe9ab47 100644
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -18,7 +18,7 @@ use tracing::{debug, error, info, warn};
 
 use crate::auth::parse_endpoint_param;
 use crate::cancellation::CancelClosure;
-use crate::context::RequestMonitoring;
+use crate::context::RequestContext;
 use crate::control_plane::client::ApiLockError;
 use crate::control_plane::errors::WakeComputeError;
 use crate::control_plane::messages::MetricsAuxInfo;
@@ -286,7 +286,7 @@ impl ConnCfg {
     /// Connect to a corresponding compute node.
     pub(crate) async fn connect(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         allow_self_signed_compute: bool,
         aux: MetricsAuxInfo,
         timeout: Duration,
diff --git a/proxy/src/console_redirect_proxy.rs b/proxy/src/console_redirect_proxy.rs
index 8e71f552a5..c88b2936db 100644
--- a/proxy/src/console_redirect_proxy.rs
+++ b/proxy/src/console_redirect_proxy.rs
@@ -8,7 +8,7 @@ use tracing::{debug, error, info, Instrument};
 use crate::auth::backend::ConsoleRedirectBackend;
 use crate::cancellation::{CancellationHandlerMain, CancellationHandlerMainInternal};
 use crate::config::{ProxyConfig, ProxyProtocolV2};
-use crate::context::RequestMonitoring;
+use crate::context::RequestContext;
 use crate::error::ReportableError;
 use crate::metrics::{Metrics, NumClientConnectionsGuard};
 use crate::protocol2::{read_proxy_protocol, ConnectHeader, ConnectionInfo};
@@ -82,7 +82,7 @@ pub async fn task_main(
                 }
             };
 
-            let ctx = RequestMonitoring::new(
+            let ctx = RequestContext::new(
                 session_id,
                 peer_addr,
                 crate::metrics::Protocol::Tcp,
@@ -141,7 +141,7 @@ pub async fn task_main(
 pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
     config: &'static ProxyConfig,
     backend: &'static ConsoleRedirectBackend,
-    ctx: &RequestMonitoring,
+    ctx: &RequestContext,
     cancellation_handler: Arc<CancellationHandlerMain>,
     stream: S,
     conn_gauge: NumClientConnectionsGuard<'static>,
diff --git a/proxy/src/context/mod.rs b/proxy/src/context/mod.rs
index d057ee0bfd..6d2d2d51ce 100644
--- a/proxy/src/context/mod.rs
+++ b/proxy/src/context/mod.rs
@@ -32,15 +32,15 @@ pub(crate) static LOG_CHAN_DISCONNECT: OnceCell<mpsc::WeakUnboundedSender<Reques
 ///
 /// This data should **not** be used for connection logic, only for observability and limiting purposes.
 /// All connection logic should instead use strongly typed state machines, not a bunch of Options.
-pub struct RequestMonitoring(
+pub struct RequestContext(
     /// To allow easier use of the ctx object, we have interior mutability.
     /// I would typically use a RefCell but that would break the `Send` requirements
     /// so we need something with thread-safety. `TryLock` is a cheap alternative
     /// that offers similar semantics to a `RefCell` but with synchronisation.
-    TryLock<RequestMonitoringInner>,
+    TryLock<RequestContextInner>,
 );
 
-struct RequestMonitoringInner {
+struct RequestContextInner {
     pub(crate) conn_info: ConnectionInfo,
     pub(crate) session_id: Uuid,
     pub(crate) protocol: Protocol,
@@ -81,10 +81,10 @@ pub(crate) enum AuthMethod {
     Cleartext,
 }
 
-impl Clone for RequestMonitoring {
+impl Clone for RequestContext {
     fn clone(&self) -> Self {
         let inner = self.0.try_lock().expect("should not deadlock");
-        let new = RequestMonitoringInner {
+        let new = RequestContextInner {
             conn_info: inner.conn_info.clone(),
             session_id: inner.session_id,
             protocol: inner.protocol,
@@ -115,7 +115,7 @@ impl Clone for RequestMonitoring {
     }
 }
 
-impl RequestMonitoring {
+impl RequestContext {
     pub fn new(
         session_id: Uuid,
         conn_info: ConnectionInfo,
@@ -132,7 +132,7 @@ impl RequestMonitoring {
             role = tracing::field::Empty,
         );
 
-        let inner = RequestMonitoringInner {
+        let inner = RequestContextInner {
             conn_info,
             session_id,
             protocol,
@@ -168,7 +168,7 @@ impl RequestMonitoring {
         let ip = IpAddr::from([127, 0, 0, 1]);
         let addr = SocketAddr::new(ip, 5432);
         let conn_info = ConnectionInfo { addr, extra: None };
-        RequestMonitoring::new(Uuid::now_v7(), conn_info, Protocol::Tcp, "test")
+        RequestContext::new(Uuid::now_v7(), conn_info, Protocol::Tcp, "test")
     }
 
     pub(crate) fn console_application_name(&self) -> String {
@@ -325,7 +325,7 @@ impl RequestMonitoring {
 }
 
 pub(crate) struct LatencyTimerPause<'a> {
-    ctx: &'a RequestMonitoring,
+    ctx: &'a RequestContext,
     start: tokio::time::Instant,
     waiting_for: Waiting,
 }
@@ -341,7 +341,7 @@ impl Drop for LatencyTimerPause<'_> {
     }
 }
 
-impl RequestMonitoringInner {
+impl RequestContextInner {
     fn set_cold_start_info(&mut self, info: ColdStartInfo) {
         self.cold_start_info = info;
         self.latency_timer.cold_start_info(info);
@@ -430,7 +430,7 @@ impl RequestMonitoringInner {
     }
 }
 
-impl Drop for RequestMonitoringInner {
+impl Drop for RequestContextInner {
     fn drop(&mut self) {
         if self.sender.is_some() {
             self.log_connect();
diff --git a/proxy/src/context/parquet.rs b/proxy/src/context/parquet.rs
index 4112de646f..9bf3a275bb 100644
--- a/proxy/src/context/parquet.rs
+++ b/proxy/src/context/parquet.rs
@@ -20,7 +20,7 @@ use tokio_util::sync::CancellationToken;
 use tracing::{debug, info, Span};
 use utils::backoff;
 
-use super::{RequestMonitoringInner, LOG_CHAN};
+use super::{RequestContextInner, LOG_CHAN};
 use crate::config::remote_storage_from_toml;
 use crate::context::LOG_CHAN_DISCONNECT;
 
@@ -117,8 +117,8 @@ impl serde::Serialize for Options<'_> {
     }
 }
 
-impl From<&RequestMonitoringInner> for RequestData {
-    fn from(value: &RequestMonitoringInner) -> Self {
+impl From<&RequestContextInner> for RequestData {
+    fn from(value: &RequestContextInner) -> Self {
         Self {
             session_id: value.session_id,
             peer_addr: value.conn_info.addr.ip().to_string(),
diff --git a/proxy/src/control_plane/client/mock.rs b/proxy/src/control_plane/client/mock.rs
index fd333d2aac..500acad50f 100644
--- a/proxy/src/control_plane/client/mock.rs
+++ b/proxy/src/control_plane/client/mock.rs
@@ -13,7 +13,7 @@ use crate::auth::backend::jwt::AuthRule;
 use crate::auth::backend::ComputeUserInfo;
 use crate::auth::IpPattern;
 use crate::cache::Cached;
-use crate::context::RequestMonitoring;
+use crate::context::RequestContext;
 use crate::control_plane::client::{CachedAllowedIps, CachedRoleSecret};
 use crate::control_plane::errors::{
     ControlPlaneError, GetAuthInfoError, GetEndpointJwksError, WakeComputeError,
@@ -206,7 +206,7 @@ impl super::ControlPlaneApi for MockControlPlane {
     #[tracing::instrument(skip_all)]
     async fn get_role_secret(
         &self,
-        _ctx: &RequestMonitoring,
+        _ctx: &RequestContext,
         user_info: &ComputeUserInfo,
     ) -> Result<CachedRoleSecret, GetAuthInfoError> {
         Ok(CachedRoleSecret::new_uncached(
@@ -216,7 +216,7 @@ impl super::ControlPlaneApi for MockControlPlane {
 
     async fn get_allowed_ips_and_secret(
         &self,
-        _ctx: &RequestMonitoring,
+        _ctx: &RequestContext,
         user_info: &ComputeUserInfo,
     ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), GetAuthInfoError> {
         Ok((
@@ -229,7 +229,7 @@ impl super::ControlPlaneApi for MockControlPlane {
 
     async fn get_endpoint_jwks(
         &self,
-        _ctx: &RequestMonitoring,
+        _ctx: &RequestContext,
         endpoint: EndpointId,
     ) -> Result<Vec<AuthRule>, GetEndpointJwksError> {
         self.do_get_endpoint_jwks(endpoint).await
@@ -238,7 +238,7 @@ impl super::ControlPlaneApi for MockControlPlane {
     #[tracing::instrument(skip_all)]
     async fn wake_compute(
         &self,
-        _ctx: &RequestMonitoring,
+        _ctx: &RequestContext,
         _user_info: &ComputeUserInfo,
     ) -> Result<CachedNodeInfo, WakeComputeError> {
         self.do_wake_compute().map_ok(Cached::new_uncached).await
diff --git a/proxy/src/control_plane/client/mod.rs b/proxy/src/control_plane/client/mod.rs
index 50903e2f1e..f8f74372f0 100644
--- a/proxy/src/control_plane/client/mod.rs
+++ b/proxy/src/control_plane/client/mod.rs
@@ -15,7 +15,7 @@ use crate::auth::backend::ComputeUserInfo;
 use crate::cache::endpoints::EndpointsCache;
 use crate::cache::project_info::ProjectInfoCacheImpl;
 use crate::config::{CacheOptions, EndpointCacheConfig, ProjectInfoCacheOptions};
-use crate::context::RequestMonitoring;
+use crate::context::RequestContext;
 use crate::control_plane::{
     errors, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret, ControlPlaneApi, NodeInfoCache,
 };
@@ -41,7 +41,7 @@ pub enum ControlPlaneClient {
 impl ControlPlaneApi for ControlPlaneClient {
     async fn get_role_secret(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         user_info: &ComputeUserInfo,
     ) -> Result<CachedRoleSecret, errors::GetAuthInfoError> {
         match self {
@@ -57,7 +57,7 @@ impl ControlPlaneApi for ControlPlaneClient {
 
     async fn get_allowed_ips_and_secret(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         user_info: &ComputeUserInfo,
     ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), errors::GetAuthInfoError> {
         match self {
@@ -71,7 +71,7 @@ impl ControlPlaneApi for ControlPlaneClient {
 
     async fn get_endpoint_jwks(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         endpoint: EndpointId,
     ) -> Result<Vec<AuthRule>, errors::GetEndpointJwksError> {
         match self {
@@ -85,7 +85,7 @@ impl ControlPlaneApi for ControlPlaneClient {
 
     async fn wake_compute(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         user_info: &ComputeUserInfo,
     ) -> Result<CachedNodeInfo, errors::WakeComputeError> {
         match self {
@@ -271,7 +271,7 @@ impl WakeComputePermit {
 impl FetchAuthRules for ControlPlaneClient {
     async fn fetch_auth_rules(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         endpoint: EndpointId,
     ) -> Result<Vec<AuthRule>, FetchAuthRulesError> {
         self.get_endpoint_jwks(ctx, endpoint)
diff --git a/proxy/src/control_plane/client/neon.rs b/proxy/src/control_plane/client/neon.rs
index 8f4ae13f33..53f9234926 100644
--- a/proxy/src/control_plane/client/neon.rs
+++ b/proxy/src/control_plane/client/neon.rs
@@ -14,7 +14,7 @@ use super::super::messages::{ControlPlaneErrorMessage, GetRoleSecret, WakeComput
 use crate::auth::backend::jwt::AuthRule;
 use crate::auth::backend::ComputeUserInfo;
 use crate::cache::Cached;
-use crate::context::RequestMonitoring;
+use crate::context::RequestContext;
 use crate::control_plane::caches::ApiCaches;
 use crate::control_plane::errors::{
     ControlPlaneError, GetAuthInfoError, GetEndpointJwksError, WakeComputeError,
@@ -65,7 +65,7 @@ impl NeonControlPlaneClient {
 
     async fn do_get_auth_info(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         user_info: &ComputeUserInfo,
     ) -> Result<AuthInfo, GetAuthInfoError> {
         if !self
@@ -141,7 +141,7 @@ impl NeonControlPlaneClient {
 
     async fn do_get_endpoint_jwks(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         endpoint: EndpointId,
     ) -> Result<Vec<AuthRule>, GetEndpointJwksError> {
         if !self
@@ -200,7 +200,7 @@ impl NeonControlPlaneClient {
 
     async fn do_wake_compute(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         user_info: &ComputeUserInfo,
     ) -> Result<NodeInfo, WakeComputeError> {
         let request_id = ctx.session_id().to_string();
@@ -263,7 +263,7 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
     #[tracing::instrument(skip_all)]
     async fn get_role_secret(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         user_info: &ComputeUserInfo,
     ) -> Result<CachedRoleSecret, GetAuthInfoError> {
         let normalized_ep = &user_info.endpoint.normalize();
@@ -297,7 +297,7 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
 
     async fn get_allowed_ips_and_secret(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         user_info: &ComputeUserInfo,
     ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), GetAuthInfoError> {
         let normalized_ep = &user_info.endpoint.normalize();
@@ -339,7 +339,7 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
     #[tracing::instrument(skip_all)]
     async fn get_endpoint_jwks(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         endpoint: EndpointId,
     ) -> Result<Vec<AuthRule>, GetEndpointJwksError> {
         self.do_get_endpoint_jwks(ctx, endpoint).await
@@ -348,7 +348,7 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
     #[tracing::instrument(skip_all)]
     async fn wake_compute(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         user_info: &ComputeUserInfo,
     ) -> Result<CachedNodeInfo, WakeComputeError> {
         let key = user_info.endpoint_cache_key();
diff --git a/proxy/src/control_plane/mod.rs b/proxy/src/control_plane/mod.rs
index 70607ac0d0..41972e4e44 100644
--- a/proxy/src/control_plane/mod.rs
+++ b/proxy/src/control_plane/mod.rs
@@ -17,7 +17,7 @@ use crate::auth::backend::{ComputeCredentialKeys, ComputeUserInfo};
 use crate::auth::IpPattern;
 use crate::cache::project_info::ProjectInfoCacheImpl;
 use crate::cache::{Cached, TimedLru};
-use crate::context::RequestMonitoring;
+use crate::context::RequestContext;
 use crate::control_plane::messages::{ControlPlaneErrorMessage, MetricsAuxInfo};
 use crate::intern::ProjectIdInt;
 use crate::types::{EndpointCacheKey, EndpointId};
@@ -75,7 +75,7 @@ pub(crate) struct NodeInfo {
 impl NodeInfo {
     pub(crate) async fn connect(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         timeout: Duration,
     ) -> Result<compute::PostgresConnection, compute::ConnectionError> {
         self.config
@@ -116,26 +116,26 @@ pub(crate) trait ControlPlaneApi {
     /// We still have to mock the scram to avoid leaking information that user doesn't exist.
     async fn get_role_secret(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         user_info: &ComputeUserInfo,
     ) -> Result<CachedRoleSecret, errors::GetAuthInfoError>;
 
     async fn get_allowed_ips_and_secret(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         user_info: &ComputeUserInfo,
     ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), errors::GetAuthInfoError>;
 
     async fn get_endpoint_jwks(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         endpoint: EndpointId,
     ) -> Result<Vec<AuthRule>, errors::GetEndpointJwksError>;
 
     /// Wake up the compute node and return the corresponding connection info.
     async fn wake_compute(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         user_info: &ComputeUserInfo,
     ) -> Result<CachedNodeInfo, errors::WakeComputeError>;
 }
diff --git a/proxy/src/proxy/connect_compute.rs b/proxy/src/proxy/connect_compute.rs
index 659b7afa68..b30aec09c1 100644
--- a/proxy/src/proxy/connect_compute.rs
+++ b/proxy/src/proxy/connect_compute.rs
@@ -7,7 +7,7 @@ use super::retry::ShouldRetryWakeCompute;
 use crate::auth::backend::ComputeCredentialKeys;
 use crate::compute::{self, PostgresConnection, COULD_NOT_CONNECT};
 use crate::config::RetryConfig;
-use crate::context::RequestMonitoring;
+use crate::context::RequestContext;
 use crate::control_plane::errors::WakeComputeError;
 use crate::control_plane::locks::ApiLocks;
 use crate::control_plane::{self, CachedNodeInfo, NodeInfo};
@@ -47,7 +47,7 @@ pub(crate) trait ConnectMechanism {
     type Error: From<Self::ConnectError>;
     async fn connect_once(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         node_info: &control_plane::CachedNodeInfo,
         timeout: time::Duration,
     ) -> Result<Self::Connection, Self::ConnectError>;
@@ -59,7 +59,7 @@ pub(crate) trait ConnectMechanism {
 pub(crate) trait ComputeConnectBackend {
     async fn wake_compute(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
     ) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError>;
 
     fn get_keys(&self) -> &ComputeCredentialKeys;
@@ -82,7 +82,7 @@ impl ConnectMechanism for TcpMechanism<'_> {
     #[tracing::instrument(fields(pid = tracing::field::Empty), skip_all)]
     async fn connect_once(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         node_info: &control_plane::CachedNodeInfo,
         timeout: time::Duration,
     ) -> Result<PostgresConnection, Self::Error> {
@@ -99,7 +99,7 @@ impl ConnectMechanism for TcpMechanism<'_> {
 /// Try to connect to the compute node, retrying if necessary.
 #[tracing::instrument(skip_all)]
 pub(crate) async fn connect_to_compute<M: ConnectMechanism, B: ComputeConnectBackend>(
-    ctx: &RequestMonitoring,
+    ctx: &RequestContext,
     mechanism: &M,
     user_info: &B,
     allow_self_signed_compute: bool,
diff --git a/proxy/src/proxy/handshake.rs b/proxy/src/proxy/handshake.rs
index a67f1b8112..3ada3a9995 100644
--- a/proxy/src/proxy/handshake.rs
+++ b/proxy/src/proxy/handshake.rs
@@ -9,7 +9,7 @@ use tracing::{info, warn};
 
 use crate::auth::endpoint_sni;
 use crate::config::{TlsConfig, PG_ALPN_PROTOCOL};
-use crate::context::RequestMonitoring;
+use crate::context::RequestContext;
 use crate::error::ReportableError;
 use crate::metrics::Metrics;
 use crate::proxy::ERR_INSECURE_CONNECTION;
@@ -66,7 +66,7 @@ pub(crate) enum HandshakeData<S> {
 /// we also take an extra care of propagating only the select handshake errors to client.
 #[tracing::instrument(skip_all)]
 pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
-    ctx: &RequestMonitoring,
+    ctx: &RequestContext,
     stream: S,
     mut tls: Option<&TlsConfig>,
     record_handshake_error: bool,
diff --git a/proxy/src/proxy/mod.rs b/proxy/src/proxy/mod.rs
index 17721c23d5..4be4006d15 100644
--- a/proxy/src/proxy/mod.rs
+++ b/proxy/src/proxy/mod.rs
@@ -25,7 +25,7 @@ use self::connect_compute::{connect_to_compute, TcpMechanism};
 use self::passthrough::ProxyPassthrough;
 use crate::cancellation::{self, CancellationHandlerMain, CancellationHandlerMainInternal};
 use crate::config::{ProxyConfig, ProxyProtocolV2, TlsConfig};
-use crate::context::RequestMonitoring;
+use crate::context::RequestContext;
 use crate::error::ReportableError;
 use crate::metrics::{Metrics, NumClientConnectionsGuard};
 use crate::protocol2::{read_proxy_protocol, ConnectHeader, ConnectionInfo};
@@ -117,7 +117,7 @@ pub async fn task_main(
                 }
             };
 
-            let ctx = RequestMonitoring::new(
+            let ctx = RequestContext::new(
                 session_id,
                 conn_info,
                 crate::metrics::Protocol::Tcp,
@@ -247,7 +247,7 @@ impl ReportableError for ClientRequestError {
 pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
     config: &'static ProxyConfig,
     auth_backend: &'static auth::Backend<'static, ()>,
-    ctx: &RequestMonitoring,
+    ctx: &RequestContext,
     cancellation_handler: Arc<CancellationHandlerMain>,
     stream: S,
     mode: ClientMode,
diff --git a/proxy/src/proxy/tests/mitm.rs b/proxy/src/proxy/tests/mitm.rs
index df9f79a7e3..fe211adfeb 100644
--- a/proxy/src/proxy/tests/mitm.rs
+++ b/proxy/src/proxy/tests/mitm.rs
@@ -36,7 +36,7 @@ async fn proxy_mitm(
         // begin handshake with end_server
         let end_server = connect_tls(server2, client_config2.make_tls_connect().unwrap()).await;
         let (end_client, startup) = match handshake(
-            &RequestMonitoring::test(),
+            &RequestContext::test(),
             client1,
             Some(&server_config1),
             false,
diff --git a/proxy/src/proxy/tests/mod.rs b/proxy/src/proxy/tests/mod.rs
index be821925b5..3de8ca8736 100644
--- a/proxy/src/proxy/tests/mod.rs
+++ b/proxy/src/proxy/tests/mod.rs
@@ -162,7 +162,7 @@ impl TestAuth for Scram {
         stream: &mut PqStream<Stream<S>>,
     ) -> anyhow::Result<()> {
         let outcome = auth::AuthFlow::new(stream)
-            .begin(auth::Scram(&self.0, &RequestMonitoring::test()))
+            .begin(auth::Scram(&self.0, &RequestContext::test()))
             .await?
             .authenticate()
             .await?;
@@ -182,11 +182,10 @@ async fn dummy_proxy(
     auth: impl TestAuth + Send,
 ) -> anyhow::Result<()> {
     let (client, _) = read_proxy_protocol(client).await?;
-    let mut stream =
-        match handshake(&RequestMonitoring::test(), client, tls.as_ref(), false).await? {
-            HandshakeData::Startup(stream, _) => stream,
-            HandshakeData::Cancel(_) => bail!("cancellation not supported"),
-        };
+    let mut stream = match handshake(&RequestContext::test(), client, tls.as_ref(), false).await? {
+        HandshakeData::Startup(stream, _) => stream,
+        HandshakeData::Cancel(_) => bail!("cancellation not supported"),
+    };
 
     auth.authenticate(&mut stream).await?;
 
@@ -466,7 +465,7 @@ impl ConnectMechanism for TestConnectMechanism {
 
     async fn connect_once(
         &self,
-        _ctx: &RequestMonitoring,
+        _ctx: &RequestContext,
         _node_info: &control_plane::CachedNodeInfo,
         _timeout: std::time::Duration,
     ) -> Result<Self::Connection, Self::ConnectError> {
@@ -581,7 +580,7 @@ fn helper_create_connect_info(
 async fn connect_to_compute_success() {
     let _ = env_logger::try_init();
     use ConnectAction::*;
-    let ctx = RequestMonitoring::test();
+    let ctx = RequestContext::test();
     let mechanism = TestConnectMechanism::new(vec![Wake, Connect]);
     let user_info = helper_create_connect_info(&mechanism);
     let config = RetryConfig {
@@ -599,7 +598,7 @@ async fn connect_to_compute_success() {
 async fn connect_to_compute_retry() {
     let _ = env_logger::try_init();
     use ConnectAction::*;
-    let ctx = RequestMonitoring::test();
+    let ctx = RequestContext::test();
     let mechanism = TestConnectMechanism::new(vec![Wake, Retry, Wake, Connect]);
     let user_info = helper_create_connect_info(&mechanism);
     let config = RetryConfig {
@@ -618,7 +617,7 @@ async fn connect_to_compute_retry() {
 async fn connect_to_compute_non_retry_1() {
     let _ = env_logger::try_init();
     use ConnectAction::*;
-    let ctx = RequestMonitoring::test();
+    let ctx = RequestContext::test();
     let mechanism = TestConnectMechanism::new(vec![Wake, Retry, Wake, Fail]);
     let user_info = helper_create_connect_info(&mechanism);
     let config = RetryConfig {
@@ -637,7 +636,7 @@ async fn connect_to_compute_non_retry_1() {
 async fn connect_to_compute_non_retry_2() {
     let _ = env_logger::try_init();
     use ConnectAction::*;
-    let ctx = RequestMonitoring::test();
+    let ctx = RequestContext::test();
     let mechanism = TestConnectMechanism::new(vec![Wake, Fail, Wake, Connect]);
     let user_info = helper_create_connect_info(&mechanism);
     let config = RetryConfig {
@@ -657,7 +656,7 @@ async fn connect_to_compute_non_retry_3() {
     let _ = env_logger::try_init();
     tokio::time::pause();
     use ConnectAction::*;
-    let ctx = RequestMonitoring::test();
+    let ctx = RequestContext::test();
     let mechanism =
         TestConnectMechanism::new(vec![Wake, Retry, Wake, Retry, Retry, Retry, Retry, Retry]);
     let user_info = helper_create_connect_info(&mechanism);
@@ -689,7 +688,7 @@ async fn connect_to_compute_non_retry_3() {
 async fn wake_retry() {
     let _ = env_logger::try_init();
     use ConnectAction::*;
-    let ctx = RequestMonitoring::test();
+    let ctx = RequestContext::test();
     let mechanism = TestConnectMechanism::new(vec![WakeRetry, Wake, Connect]);
     let user_info = helper_create_connect_info(&mechanism);
     let config = RetryConfig {
@@ -708,7 +707,7 @@ async fn wake_retry() {
 async fn wake_non_retry() {
     let _ = env_logger::try_init();
     use ConnectAction::*;
-    let ctx = RequestMonitoring::test();
+    let ctx = RequestContext::test();
     let mechanism = TestConnectMechanism::new(vec![WakeRetry, WakeFail]);
     let user_info = helper_create_connect_info(&mechanism);
     let config = RetryConfig {
diff --git a/proxy/src/proxy/wake_compute.rs b/proxy/src/proxy/wake_compute.rs
index f9f46bb66c..d09e0b1f41 100644
--- a/proxy/src/proxy/wake_compute.rs
+++ b/proxy/src/proxy/wake_compute.rs
@@ -2,7 +2,7 @@ use tracing::{error, info, warn};
 
 use super::connect_compute::ComputeConnectBackend;
 use crate::config::RetryConfig;
-use crate::context::RequestMonitoring;
+use crate::context::RequestContext;
 use crate::control_plane::errors::WakeComputeError;
 use crate::control_plane::CachedNodeInfo;
 use crate::error::ReportableError;
@@ -13,7 +13,7 @@ use crate::proxy::retry::{retry_after, should_retry};
 
 pub(crate) async fn wake_compute<B: ComputeConnectBackend>(
     num_retries: &mut u32,
-    ctx: &RequestMonitoring,
+    ctx: &RequestContext,
     api: &B,
     config: RetryConfig,
 ) -> Result<CachedNodeInfo, WakeComputeError> {
diff --git a/proxy/src/serverless/backend.rs b/proxy/src/serverless/backend.rs
index 5e9fd151ae..d9dcf6fbb7 100644
--- a/proxy/src/serverless/backend.rs
+++ b/proxy/src/serverless/backend.rs
@@ -23,7 +23,7 @@ use crate::compute_ctl::{
     ComputeCtlError, ExtensionInstallRequest, Privilege, SetRoleGrantsRequest,
 };
 use crate::config::ProxyConfig;
-use crate::context::RequestMonitoring;
+use crate::context::RequestContext;
 use crate::control_plane::client::ApiLockError;
 use crate::control_plane::errors::{GetAuthInfoError, WakeComputeError};
 use crate::control_plane::locks::ApiLocks;
@@ -48,7 +48,7 @@ pub(crate) struct PoolingBackend {
 impl PoolingBackend {
     pub(crate) async fn authenticate_with_password(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         user_info: &ComputeUserInfo,
         password: &[u8],
     ) -> Result<ComputeCredentials, AuthError> {
@@ -110,7 +110,7 @@ impl PoolingBackend {
 
     pub(crate) async fn authenticate_with_jwt(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         user_info: &ComputeUserInfo,
         jwt: String,
     ) -> Result<ComputeCredentials, AuthError> {
@@ -161,7 +161,7 @@ impl PoolingBackend {
     #[tracing::instrument(fields(pid = tracing::field::Empty), skip_all)]
     pub(crate) async fn connect_to_compute(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         conn_info: ConnInfo,
         keys: ComputeCredentials,
         force_new: bool,
@@ -201,7 +201,7 @@ impl PoolingBackend {
     #[tracing::instrument(fields(pid = tracing::field::Empty), skip_all)]
     pub(crate) async fn connect_to_local_proxy(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         conn_info: ConnInfo,
     ) -> Result<http_conn_pool::Client<Send>, HttpConnError> {
         info!("pool: looking for an existing connection");
@@ -249,7 +249,7 @@ impl PoolingBackend {
     #[tracing::instrument(fields(pid = tracing::field::Empty), skip_all)]
     pub(crate) async fn connect_to_local_postgres(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         conn_info: ConnInfo,
     ) -> Result<Client<tokio_postgres::Client>, HttpConnError> {
         if let Some(client) = self.local_pool.get(ctx, &conn_info)? {
@@ -490,7 +490,7 @@ impl ConnectMechanism for TokioMechanism {
 
     async fn connect_once(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         node_info: &CachedNodeInfo,
         timeout: Duration,
     ) -> Result<Self::Connection, Self::ConnectError> {
@@ -540,7 +540,7 @@ impl ConnectMechanism for HyperMechanism {
 
     async fn connect_once(
         &self,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         node_info: &CachedNodeInfo,
         timeout: Duration,
     ) -> Result<Self::Connection, Self::ConnectError> {
diff --git a/proxy/src/serverless/conn_pool.rs b/proxy/src/serverless/conn_pool.rs
index 1845603bf7..07ba1ae9af 100644
--- a/proxy/src/serverless/conn_pool.rs
+++ b/proxy/src/serverless/conn_pool.rs
@@ -21,7 +21,7 @@ use {
 use super::conn_pool_lib::{
     Client, ClientDataEnum, ClientInnerCommon, ClientInnerExt, ConnInfo, GlobalConnPool,
 };
-use crate::context::RequestMonitoring;
+use crate::context::RequestContext;
 use crate::control_plane::messages::MetricsAuxInfo;
 use crate::metrics::Metrics;
 
@@ -53,7 +53,7 @@ impl fmt::Display for ConnInfo {
 
 pub(crate) fn poll_client<C: ClientInnerExt>(
     global_pool: Arc<GlobalConnPool<C>>,
-    ctx: &RequestMonitoring,
+    ctx: &RequestContext,
     conn_info: ConnInfo,
     client: C,
     mut connection: tokio_postgres::Connection<Socket, NoTlsStream>,
diff --git a/proxy/src/serverless/conn_pool_lib.rs b/proxy/src/serverless/conn_pool_lib.rs
index 61c39c32c9..fe3c422c3b 100644
--- a/proxy/src/serverless/conn_pool_lib.rs
+++ b/proxy/src/serverless/conn_pool_lib.rs
@@ -15,7 +15,7 @@ use super::conn_pool::ClientDataRemote;
 use super::http_conn_pool::ClientDataHttp;
 use super::local_conn_pool::ClientDataLocal;
 use crate::auth::backend::ComputeUserInfo;
-use crate::context::RequestMonitoring;
+use crate::context::RequestContext;
 use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo};
 use crate::metrics::{HttpEndpointPoolsGuard, Metrics};
 use crate::types::{DbName, EndpointCacheKey, RoleName};
@@ -380,7 +380,7 @@ impl<C: ClientInnerExt> GlobalConnPool<C> {
 
     pub(crate) fn get(
         self: &Arc<Self>,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         conn_info: &ConnInfo,
     ) -> Result<Option<Client<C>>, HttpConnError> {
         let mut client: Option<ClientInnerCommon<C>> = None;
diff --git a/proxy/src/serverless/http_conn_pool.rs b/proxy/src/serverless/http_conn_pool.rs
index a1d4473b01..bc86c4b1cd 100644
--- a/proxy/src/serverless/http_conn_pool.rs
+++ b/proxy/src/serverless/http_conn_pool.rs
@@ -12,7 +12,7 @@ use tracing::{debug, error, info, info_span, Instrument};
 
 use super::backend::HttpConnError;
 use super::conn_pool_lib::{ClientInnerExt, ConnInfo};
-use crate::context::RequestMonitoring;
+use crate::context::RequestContext;
 use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo};
 use crate::metrics::{HttpEndpointPoolsGuard, Metrics};
 use crate::types::EndpointCacheKey;
@@ -212,7 +212,7 @@ impl<C: ClientInnerExt + Clone> GlobalConnPool<C> {
     #[expect(unused_results)]
     pub(crate) fn get(
         self: &Arc<Self>,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         conn_info: &ConnInfo,
     ) -> Result<Option<Client<C>>, HttpConnError> {
         let result: Result<Option<Client<C>>, HttpConnError>;
@@ -280,7 +280,7 @@ impl<C: ClientInnerExt + Clone> GlobalConnPool<C> {
 
 pub(crate) fn poll_http2_client(
     global_pool: Arc<GlobalConnPool<Send>>,
-    ctx: &RequestMonitoring,
+    ctx: &RequestContext,
     conn_info: &ConnInfo,
     client: Send,
     connection: Connect,
diff --git a/proxy/src/serverless/local_conn_pool.rs b/proxy/src/serverless/local_conn_pool.rs
index 99d4329f88..cadcbd7530 100644
--- a/proxy/src/serverless/local_conn_pool.rs
+++ b/proxy/src/serverless/local_conn_pool.rs
@@ -36,7 +36,7 @@ use super::conn_pool_lib::{
     Client, ClientDataEnum, ClientInnerCommon, ClientInnerExt, ConnInfo, DbUserConn,
     EndpointConnPool,
 };
-use crate::context::RequestMonitoring;
+use crate::context::RequestContext;
 use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo};
 use crate::metrics::Metrics;
 
@@ -88,7 +88,7 @@ impl<C: ClientInnerExt> LocalConnPool<C> {
 
     pub(crate) fn get(
         self: &Arc<Self>,
-        ctx: &RequestMonitoring,
+        ctx: &RequestContext,
         conn_info: &ConnInfo,
     ) -> Result<Option<Client<C>>, HttpConnError> {
         let client = self
@@ -159,7 +159,7 @@ impl<C: ClientInnerExt> LocalConnPool<C> {
 #[allow(clippy::too_many_arguments)]
 pub(crate) fn poll_client<C: ClientInnerExt>(
     global_pool: Arc<LocalConnPool<C>>,
-    ctx: &RequestMonitoring,
+    ctx: &RequestContext,
     conn_info: ConnInfo,
     client: C,
     mut connection: tokio_postgres::Connection<Socket, NoTlsStream>,
diff --git a/proxy/src/serverless/mod.rs b/proxy/src/serverless/mod.rs
index cf758855fa..59247f03bf 100644
--- a/proxy/src/serverless/mod.rs
+++ b/proxy/src/serverless/mod.rs
@@ -45,7 +45,7 @@ use utils::http::error::ApiError;
 
 use crate::cancellation::CancellationHandlerMain;
 use crate::config::{ProxyConfig, ProxyProtocolV2};
-use crate::context::RequestMonitoring;
+use crate::context::RequestContext;
 use crate::metrics::Metrics;
 use crate::protocol2::{read_proxy_protocol, ChainRW, ConnectHeader, ConnectionInfo};
 use crate::proxy::run_until_cancelled;
@@ -423,7 +423,7 @@ async fn request_handler(
     if config.http_config.accept_websockets
         && framed_websockets::upgrade::is_upgrade_request(&request)
     {
-        let ctx = RequestMonitoring::new(
+        let ctx = RequestContext::new(
             session_id,
             conn_info,
             crate::metrics::Protocol::Ws,
@@ -458,7 +458,7 @@ async fn request_handler(
         // Return the response so the spawned future can continue.
         Ok(response.map(|b| b.map_err(|x| match x {}).boxed()))
     } else if request.uri().path() == "/sql" && *request.method() == Method::POST {
-        let ctx = RequestMonitoring::new(
+        let ctx = RequestContext::new(
             session_id,
             conn_info,
             crate::metrics::Protocol::Http,
diff --git a/proxy/src/serverless/sql_over_http.rs b/proxy/src/serverless/sql_over_http.rs
index f0975617d4..36d8595902 100644
--- a/proxy/src/serverless/sql_over_http.rs
+++ b/proxy/src/serverless/sql_over_http.rs
@@ -34,7 +34,7 @@ use super::json::{json_to_pg_text, pg_text_row_to_json, JsonConversionError};
 use crate::auth::backend::{ComputeCredentialKeys, ComputeUserInfo};
 use crate::auth::{endpoint_sni, ComputeUserInfoParseError};
 use crate::config::{AuthenticationConfig, HttpConfig, ProxyConfig, TlsConfig};
-use crate::context::RequestMonitoring;
+use crate::context::RequestContext;
 use crate::error::{ErrorKind, ReportableError, UserFacingError};
 use crate::metrics::{HttpDirection, Metrics};
 use crate::proxy::{run_until_cancelled, NeonOptions};
@@ -133,7 +133,7 @@ impl UserFacingError for ConnInfoError {
 
 fn get_conn_info(
     config: &'static AuthenticationConfig,
-    ctx: &RequestMonitoring,
+    ctx: &RequestContext,
     headers: &HeaderMap,
     tls: Option<&TlsConfig>,
 ) -> Result<ConnInfoWithAuth, ConnInfoError> {
@@ -240,7 +240,7 @@ fn get_conn_info(
 
 pub(crate) async fn handle(
     config: &'static ProxyConfig,
-    ctx: RequestMonitoring,
+    ctx: RequestContext,
     request: Request<Incoming>,
     backend: Arc<PoolingBackend>,
     cancel: CancellationToken,
@@ -516,7 +516,7 @@ fn map_isolation_level_to_headers(level: IsolationLevel) -> Option<HeaderValue>
 async fn handle_inner(
     cancel: CancellationToken,
     config: &'static ProxyConfig,
-    ctx: &RequestMonitoring,
+    ctx: &RequestContext,
     request: Request<Incoming>,
     backend: Arc<PoolingBackend>,
 ) -> Result<Response<BoxBody<Bytes, hyper::Error>>, SqlOverHttpError> {
@@ -562,7 +562,7 @@ async fn handle_inner(
 async fn handle_db_inner(
     cancel: CancellationToken,
     config: &'static ProxyConfig,
-    ctx: &RequestMonitoring,
+    ctx: &RequestContext,
     request: Request<Incoming>,
     conn_info: ConnInfo,
     auth: AuthData,
@@ -733,7 +733,7 @@ pub(crate) fn uuid_to_header_value(id: Uuid) -> HeaderValue {
 }
 
 async fn handle_auth_broker_inner(
-    ctx: &RequestMonitoring,
+    ctx: &RequestContext,
     request: Request<Incoming>,
     conn_info: ConnInfo,
     jwt: String,
diff --git a/proxy/src/serverless/websocket.rs b/proxy/src/serverless/websocket.rs
index ba36116c2c..4088fea835 100644
--- a/proxy/src/serverless/websocket.rs
+++ b/proxy/src/serverless/websocket.rs
@@ -14,7 +14,7 @@ use tracing::warn;
 
 use crate::cancellation::CancellationHandlerMain;
 use crate::config::ProxyConfig;
-use crate::context::RequestMonitoring;
+use crate::context::RequestContext;
 use crate::error::{io_error, ReportableError};
 use crate::metrics::Metrics;
 use crate::proxy::{handle_client, ClientMode, ErrorSource};
@@ -126,7 +126,7 @@ impl<S: AsyncRead + AsyncWrite + Unpin> AsyncBufRead for WebSocketRw<S> {
 pub(crate) async fn serve_websocket(
     config: &'static ProxyConfig,
     auth_backend: &'static crate::auth::Backend<'static, ()>,
-    ctx: RequestMonitoring,
+    ctx: RequestContext,
     websocket: OnUpgrade,
     cancellation_handler: Arc<CancellationHandlerMain>,
     endpoint_rate_limiter: Arc<EndpointRateLimiter>,