From 7a598b9842f47707d9d93dda621094494221c5a6 Mon Sep 17 00:00:00 2001
From: Ivan Efremov <ivan@neon.tech>
Date: Fri, 3 Jan 2025 12:04:58 +0200
Subject: [PATCH 01/44] [proxy/docs]imprv: Add local testing section to proxy
 README (#10230)

Add commands to run proxy locally with the mocked control plane
---
 proxy/README.md | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/proxy/README.md b/proxy/README.md
index 8d850737be..4b98342d72 100644
--- a/proxy/README.md
+++ b/proxy/README.md
@@ -102,23 +102,39 @@ User can pass several optional headers that will affect resulting json.
 2. `Neon-Array-Mode: true`. Return postgres rows as arrays instead of objects. That is more compact representation and also helps in some edge
 cases where it is hard to use rows represented as objects (e.g. when several fields have the same name).
 
+## Test proxy locally
 
-## Using SNI-based routing on localhost
-
-Now proxy determines project name from the subdomain, request to the `round-rice-566201.somedomain.tld` will be routed to the project named `round-rice-566201`. Unfortunately, `/etc/hosts` does not support domain wildcards, so I usually use `*.localtest.me` which resolves to `127.0.0.1`. Now we can create self-signed certificate and play with proxy:
+Proxy determines project name from the subdomain, request to the `round-rice-566201.somedomain.tld` will be routed to the project named `round-rice-566201`. Unfortunately, `/etc/hosts` does not support domain wildcards, so we can use *.localtest.me` which resolves to `127.0.0.1`.
 
+Let's create self-signed certificate by running:
 ```sh
 openssl req -new -x509 -days 365 -nodes -text -out server.crt -keyout server.key -subj "/CN=*.localtest.me"
 ```
 
-start proxy
-
+Then we need to build proxy with 'testing' feature and run, e.g.:
 ```sh
-./target/debug/proxy -c server.crt -k server.key
+RUST_LOG=proxy cargo run -p proxy --bin proxy --features testing -- --auth-backend postgres --auth-endpoint 'postgresql://proxy:password@endpoint.localtest.me:5432/postgres' --is-private-access-proxy true -c server.crt -k server.key
 ```
 
-and connect to it
+We will also need to have a postgres instance. Assuming that we have setted up docker we can set it up as follows:
+```sh
+docker run \
+  --detach \
+  --name proxy-postgres \
+  --env POSTGRES_PASSWORD=proxy-postgres \
+  --publish 5432:5432 \
+  postgres:17-bookworm
+```
+
+Next step is setting up auth table and schema as well as creating role (without the JWT table):
+```sh
+docker exec -it proxy-postgres psql -U postgres -c "CREATE SCHEMA IF NOT EXISTS neon_control_plane"
+docker exec -it proxy-postgres psql -U postgres -c "CREATE TABLE neon_control_plane.endpoints (endpoint_id VARCHAR(255) PRIMARY KEY, allowed_ips VARCHAR(255))"
+docker exec -it proxy-postgres psql -U postgres -c "CREATE ROLE proxy WITH SUPERUSER LOGIN PASSWORD 'password';"
+```
+
+Now from client you can start a new session:
 
 ```sh
-PGSSLROOTCERT=./server.crt psql 'postgres://my-cluster-42.localtest.me:1234?sslmode=verify-full'
-```
+PGSSLROOTCERT=./server.crt psql  "postgresql://proxy:password@endpoint.localtest.me:4432/postgres?sslmode=verify-full"
+```
\ No newline at end of file

From 2d4f267983858c197de795074046a2b1376a8616 Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Fri, 3 Jan 2025 10:20:18 +0000
Subject: [PATCH 02/44] cargo: update diesel, pq-sys (#10256)

## Problem

Versions of `diesel` and `pq-sys` were somewhat stale. I was checking on
libpq->openssl versions while investigating a segfault via
https://github.com/neondatabase/cloud/issues/21010. I don't think these
rust bindings are likely to be the source of issues, but we might as
well freshen them as a precaution.

## Summary of changes

- Update diesel to 2.2.6
- Update pq-sys to 0.6.3
---
 Cargo.lock                    | 8 ++++----
 storage_controller/Cargo.toml | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 420def152d..9e0e343996 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1733,9 +1733,9 @@ checksum = "ab03c107fafeb3ee9f5925686dbb7a73bc76e3932abb0d2b365cb64b169cf04c"
 
 [[package]]
 name = "diesel"
-version = "2.2.3"
+version = "2.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "65e13bab2796f412722112327f3e575601a3e9cdcbe426f0d30dbf43f3f5dc71"
+checksum = "ccf1bedf64cdb9643204a36dd15b19a6ce8e7aa7f7b105868e9f1fad5ffa7d12"
 dependencies = [
  "bitflags 2.4.1",
  "byteorder",
@@ -4494,9 +4494,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
 
 [[package]]
 name = "pq-sys"
-version = "0.4.8"
+version = "0.6.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "31c0052426df997c0cbd30789eb44ca097e3541717a7b8fa36b1c464ee7edebd"
+checksum = "f6cc05d7ea95200187117196eee9edd0644424911821aeb28a18ce60ea0b8793"
 dependencies = [
  "vcpkg",
 ]
diff --git a/storage_controller/Cargo.toml b/storage_controller/Cargo.toml
index 2f5d266567..5f3319512d 100644
--- a/storage_controller/Cargo.toml
+++ b/storage_controller/Cargo.toml
@@ -43,13 +43,13 @@ scopeguard.workspace = true
 strum.workspace = true
 strum_macros.workspace = true
 
-diesel = { version = "2.1.4", features = [
+diesel = { version = "2.2.6", features = [
     "serde_json",
     "postgres",
     "r2d2",
     "chrono",
 ] }
-diesel_migrations = { version = "2.1.0" }
+diesel_migrations = { version = "2.2.0" }
 r2d2 = { version = "0.8.10" }
 
 utils = { path = "../libs/utils/" }

From ba9722a2fd913d30b639ec506c42761cdca44440 Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Fri, 3 Jan 2025 10:55:07 +0000
Subject: [PATCH 03/44] tests: add upload wait in
 test_scrubber_physical_gc_ancestors (#10260)

## Problem

We see periodic failures in `test_scrubber_physical_gc_ancestors`, where
the logs show that the pageserver is creating image layers that should
cause child shards to no longer reference their parents' layers, but
then the scrubber runs and doesn't find any unreferenced layers.[


https://neon-github-public-dev.s3.amazonaws.com/reports/pr-10256/12582034135/index.html#/testresult/78ea06dea6ba8dd3

From inspecting the code & test, it seems like this could be as simple
as the test failing to wait for uploads before running the scrubber. It
had a 2 second delay built in to satisfy the scrubbers time threshold
checks, which on a lightly loaded machine would also have been easily
enough for uploads to complete, but our test machines are more heavily
loaded all the time.

## Summary of changes

- Wait for uploads to complete after generating images layers in
test_scrubber_physical_gc_ancestors, so that the scrubber should
reliably see the post-compaction metadata.
---
 test_runner/regress/test_storage_scrubber.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/test_runner/regress/test_storage_scrubber.py b/test_runner/regress/test_storage_scrubber.py
index 198e4f0460..220c428531 100644
--- a/test_runner/regress/test_storage_scrubber.py
+++ b/test_runner/regress/test_storage_scrubber.py
@@ -266,7 +266,9 @@ def test_scrubber_physical_gc_ancestors(neon_env_builder: NeonEnvBuilder, shard_
     for shard in shards:
         ps = env.get_tenant_pageserver(shard)
         assert ps is not None
-        ps.http_client().timeline_compact(shard, timeline_id, force_image_layer_creation=True)
+        ps.http_client().timeline_compact(
+            shard, timeline_id, force_image_layer_creation=True, wait_until_uploaded=True
+        )
         ps.http_client().timeline_gc(shard, timeline_id, 0)
 
     # We will use a min_age_secs=1 threshold for deletion, let it pass

From c08759f367063fde2558f979b83f6f9209741c7a Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Fri, 3 Jan 2025 10:55:15 +0000
Subject: [PATCH 04/44] storcon: verbose logs in rare case of shards not
 attached yet (#10262)

## Problem

When we do a timeline CRUD operation, we check that the shards we need
to mutate are currently attached to a pageserver, by reading
`generation` and `generation_pageserver` from the database.

If any don't appear to be attached, we respond with a a 503 and "One or
more shards in tenant is not yet attached".

This is happening more often than expected, and it's not obvious with
current logging what's going on: specifically which shard has a problem,
and exactly what we're seeing in these persistent generation columns.

(Aside: it's possible that we broke something with the change in #10011
which clears generation_pageserver when we detach a shard, although if
so the mechanism isn't trivial: what should happen is that if we stamp
on generation_pageserver if a reconciler is running, then it shouldn't
matter because we're about to

## Summary of changes

- When we are in Attached mode but find that
generation_pageserver/generation are unset, output details while looping
over shards.
---
 storage_controller/src/service.rs | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs
index c0c5bc371a..222cb9fdd4 100644
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -3572,6 +3572,11 @@ impl Service {
                 .iter()
                 .any(|i| i.generation.is_none() || i.generation_pageserver.is_none())
             {
+                let shard_generations = generations
+                    .into_iter()
+                    .map(|i| (i.tenant_shard_id, (i.generation, i.generation_pageserver)))
+                    .collect::<HashMap<_, _>>();
+
                 // One or more shards has not been attached to a pageserver.  Check if this is because it's configured
                 // to be detached (409: caller should give up), or because it's meant to be attached but isn't yet (503: caller should retry)
                 let locked = self.inner.read().unwrap();
@@ -3582,6 +3587,28 @@ impl Service {
                         PlacementPolicy::Attached(_) => {
                             // This shard is meant to be attached: the caller is not wrong to try and
                             // use this function, but we can't service the request right now.
+                            let Some(generation) = shard_generations.get(shard_id) else {
+                                // This can only happen if there is a split brain controller modifying the database.  This should
+                                // never happen when testing, and if it happens in production we can only log the issue.
+                                debug_assert!(false);
+                                tracing::error!("Shard {shard_id} not found in generation state!  Is another rogue controller running?");
+                                continue;
+                            };
+                            let (generation, generation_pageserver) = generation;
+                            if let Some(generation) = generation {
+                                if generation_pageserver.is_none() {
+                                    // This is legitimate only in a very narrow window where the shard was only just configured into
+                                    // Attached mode after being created in Secondary or Detached mode, and it has had its generation
+                                    // set but not yet had a Reconciler run (reconciler is the only thing that sets generation_pageserver).
+                                    tracing::warn!("Shard {shard_id} generation is set ({generation:?}) but generation_pageserver is None, reconciler not run yet?");
+                                }
+                            } else {
+                                // This should never happen: a shard with no generation is only permitted when it was created in some state
+                                // other than PlacementPolicy::Attached (and generation is always written to DB before setting Attached in memory)
+                                debug_assert!(false);
+                                tracing::error!("Shard {shard_id} generation is None, but it is in PlacementPolicy::Attached mode!");
+                                continue;
+                            }
                         }
                         PlacementPolicy::Secondary | PlacementPolicy::Detached => {
                             return Err(ApiError::Conflict(format!(

From 1303cd5d05062c95660ec00f8846b4258fc62b4c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Arpad=20M=C3=BCller?= <arpad-m@users.noreply.github.com>
Date: Fri, 3 Jan 2025 13:36:01 +0100
Subject: [PATCH 05/44] Fix defusing race between Tenant::shutdown and
 offload_timeline (#10150)

There is a race condition between `Tenant::shutdown`'s `defuse_for_drop`
loop and `offload_timeline`, where timeline offloading can insert into a
tenant that is in the process of shutting down, in fact so far
progressed that the `defuse_for_drop` has already been called.

This prevents warn log lines of the form:

```
offloaded timeline <hash> was dropped without having cleaned it up at the ancestor
```

The solution piggybacks on the `offloaded_timelines` lock: both the
defuse loop and the offloaded timeline insertion need to acquire the
lock, and we know that the defuse loop only runs after the tenant has
set its `TenantState` to `Stopping`.

So if we hold the `offloaded_timelines` lock, and know that the
`TenantState` is not `Stopping`, then we know that the defuse loop has
not ran yet, and holding the lock ensures that it doesn't start running
while we are inserting the offloaded timeline.

Fixes #10070
---
 pageserver/src/tenant/timeline/offload.rs | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/pageserver/src/tenant/timeline/offload.rs b/pageserver/src/tenant/timeline/offload.rs
index 3bfbfb5061..15628a9645 100644
--- a/pageserver/src/tenant/timeline/offload.rs
+++ b/pageserver/src/tenant/timeline/offload.rs
@@ -1,5 +1,7 @@
 use std::sync::Arc;
 
+use pageserver_api::models::TenantState;
+
 use super::delete::{delete_local_timeline_directory, DeleteTimelineFlow, DeletionGuard};
 use super::Timeline;
 use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
@@ -70,6 +72,15 @@ pub(crate) async fn offload_timeline(
 
     {
         let mut offloaded_timelines = tenant.timelines_offloaded.lock().unwrap();
+        if matches!(
+            tenant.current_state(),
+            TenantState::Stopping { .. } | TenantState::Broken { .. }
+        ) {
+            // Cancel the operation if the tenant is shutting down. Do this while the
+            // timelines_offloaded lock is held to prevent a race with Tenant::shutdown
+            // for defusing the lock
+            return Err(OffloadError::Cancelled);
+        }
         offloaded_timelines.insert(
             timeline.timeline_id,
             Arc::new(

From e9d30edc7f7f5fab66f7bdffcca9e8c5bb9b8d27 Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Fri, 3 Jan 2025 13:13:22 +0000
Subject: [PATCH 06/44] pageserver: fix a 500 during timeline creation +
 shutdown (#10259)

## Problem

The test_create_churn_during_restart test fails if timeline creation
calls return 500 errors (because the API shouldn't do it), and it's
sometimes failing, for example:

https://neon-github-public-dev.s3.amazonaws.com/reports/pr-10256/12582034135/index.html#/testresult/3ce2e7045465012e

## Summary of changes

- Avoid handling UploadQueueShutDownOrStopped case as an Other (i.e.
500)
---
 pageserver/src/tenant.rs | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs
index 2e4c47c6e4..90017b25f2 100644
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -2604,9 +2604,15 @@ impl Tenant {
                 WaitCompletionError::NotInitialized(
                     e, // If the queue is already stopped, it's a shutdown error.
                 ) if e.is_stopping() => CreateTimelineError::ShuttingDown,
-                e => CreateTimelineError::Other(e.into()),
-            })
-            .context("wait for timeline initial uploads to complete")?;
+                WaitCompletionError::NotInitialized(_) => {
+                    // This is a bug: we should never try to wait for uploads before initializing the timeline
+                    debug_assert!(false);
+                    CreateTimelineError::Other(anyhow::anyhow!("timeline not initialized"))
+                }
+                WaitCompletionError::UploadQueueShutDownOrStopped => {
+                    CreateTimelineError::ShuttingDown
+                }
+            })?;
 
         // The creating task is responsible for activating the timeline.
         // We do this after `wait_completion()` so that we don't spin up tasks that start

From b33299dc37d9269fe55bd3256b7a4a72c129b81c Mon Sep 17 00:00:00 2001
From: Erik Grinaker <erik@neon.tech>
Date: Fri, 3 Jan 2025 16:21:31 +0100
Subject: [PATCH 07/44] pageserver,safekeeper: disable heap profiling (#10268)

## Problem

Since enabling continuous profiling in staging, we've seen frequent seg
faults. This is suspected to be because jemalloc and pprof-rs take a
stack trace at the same time, and the handlers aren't signal safe.
jemalloc does this probabilistically on every allocation, regardless of
whether someone is taking a heap profile, which means that any CPU
profile has a chance to cause a seg fault.

Touches #10225.

## Summary of changes

For now, just disable heap profiles -- CPU profiles are more important,
and we need to be able to take them without risking a crash.
---
 pageserver/src/bin/pageserver.rs | 10 ++++++----
 safekeeper/src/bin/safekeeper.rs | 10 ++++++----
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs
index 567a69da3b..b92ff4ebf9 100644
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -53,10 +53,12 @@ project_build_tag!(BUILD_TAG);
 #[global_allocator]
 static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
 
-/// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20).
-#[allow(non_upper_case_globals)]
-#[export_name = "malloc_conf"]
-pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";
+// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20).
+// TODO: disabled because concurrent CPU profiles cause seg faults. See:
+// https://github.com/neondatabase/neon/issues/10225.
+//#[allow(non_upper_case_globals)]
+//#[export_name = "malloc_conf"]
+//pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";
 
 const PID_FILE_NAME: &str = "pageserver.pid";
 
diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs
index 13f6e34575..e0ba38d638 100644
--- a/safekeeper/src/bin/safekeeper.rs
+++ b/safekeeper/src/bin/safekeeper.rs
@@ -51,10 +51,12 @@ use utils::{
 #[global_allocator]
 static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
 
-/// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20).
-#[allow(non_upper_case_globals)]
-#[export_name = "malloc_conf"]
-pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";
+// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20).
+// TODO: disabled because concurrent CPU profiles cause seg faults. See:
+// https://github.com/neondatabase/neon/issues/10225.
+//#[allow(non_upper_case_globals)]
+//#[export_name = "malloc_conf"]
+//pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";
 
 const PID_FILE_NAME: &str = "safekeeper.pid";
 const ID_FILE_NAME: &str = "safekeeper.id";

From 1393cc668bce904cf5300f8829addce86437e755 Mon Sep 17 00:00:00 2001
From: Erik Grinaker <erik@neon.tech>
Date: Fri, 3 Jan 2025 16:38:51 +0100
Subject: [PATCH 08/44] Revert "pageserver: revert flush backpressure (#8550)
 (#10135)" (#10270)

This reverts commit f3ecd5d76ad8b858b2bfaaabba5018046aca46ac.

It is
[suspected](https://neondb.slack.com/archives/C033RQ5SPDH/p1735907405716759)
to have caused significant read amplification in the [ingest
benchmark](https://neonprod.grafana.net/d/de3mupf4g68e8e/perf-test3a-ingest-benchmark?orgId=1&from=now-30d&to=now&timezone=utc&var-new_project_endpoint_id=ep-solitary-sun-w22bmut6&var-large_tenant_endpoint_id=ep-holy-bread-w203krzs)
(specifically during index creation).

We will revisit an intermediate improvement here to unblock [upload
parallelism](https://github.com/neondatabase/neon/issues/10096) before
properly addressing [compaction
backpressure](https://github.com/neondatabase/neon/issues/8390).
---
 pageserver/src/metrics.rs                  | 25 ++++++++++-
 pageserver/src/tenant/timeline.rs          | 38 +++++++++++++----
 test_runner/fixtures/metrics.py            |  1 +
 test_runner/regress/test_branching.py      | 13 ++++--
 test_runner/regress/test_remote_storage.py | 48 ++++++++++++++++++++++
 5 files changed, 112 insertions(+), 13 deletions(-)

diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs
index bdbabf3f75..b4e20cb8b9 100644
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -3,7 +3,7 @@ use metrics::{
     register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
     register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
     register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
-    Counter, CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
+    Counter, CounterVec, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
     IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
 };
 use once_cell::sync::Lazy;
@@ -445,6 +445,15 @@ pub(crate) static WAIT_LSN_TIME: Lazy<Histogram> = Lazy::new(|| {
     .expect("failed to define a metric")
 });
 
+static FLUSH_WAIT_UPLOAD_TIME: Lazy<GaugeVec> = Lazy::new(|| {
+    register_gauge_vec!(
+        "pageserver_flush_wait_upload_seconds",
+        "Time spent waiting for preceding uploads during layer flush",
+        &["tenant_id", "shard_id", "timeline_id"]
+    )
+    .expect("failed to define a metric")
+});
+
 static LAST_RECORD_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
     register_int_gauge_vec!(
         "pageserver_last_record_lsn",
@@ -2577,6 +2586,7 @@ pub(crate) struct TimelineMetrics {
     shard_id: String,
     timeline_id: String,
     pub flush_time_histo: StorageTimeMetrics,
+    pub flush_wait_upload_time_gauge: Gauge,
     pub compact_time_histo: StorageTimeMetrics,
     pub create_images_time_histo: StorageTimeMetrics,
     pub logical_size_histo: StorageTimeMetrics,
@@ -2622,6 +2632,9 @@ impl TimelineMetrics {
             &shard_id,
             &timeline_id,
         );
+        let flush_wait_upload_time_gauge = FLUSH_WAIT_UPLOAD_TIME
+            .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
+            .unwrap();
         let compact_time_histo = StorageTimeMetrics::new(
             StorageTimeOperation::Compact,
             &tenant_id,
@@ -2767,6 +2780,7 @@ impl TimelineMetrics {
             shard_id,
             timeline_id,
             flush_time_histo,
+            flush_wait_upload_time_gauge,
             compact_time_histo,
             create_images_time_histo,
             logical_size_histo,
@@ -2816,6 +2830,14 @@ impl TimelineMetrics {
         self.resident_physical_size_gauge.get()
     }
 
+    pub(crate) fn flush_wait_upload_time_gauge_add(&self, duration: f64) {
+        self.flush_wait_upload_time_gauge.add(duration);
+        crate::metrics::FLUSH_WAIT_UPLOAD_TIME
+            .get_metric_with_label_values(&[&self.tenant_id, &self.shard_id, &self.timeline_id])
+            .unwrap()
+            .add(duration);
+    }
+
     pub(crate) fn shutdown(&self) {
         let was_shutdown = self
             .shutdown
@@ -2833,6 +2855,7 @@ impl TimelineMetrics {
         let shard_id = &self.shard_id;
         let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
         let _ = DISK_CONSISTENT_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
+        let _ = FLUSH_WAIT_UPLOAD_TIME.remove_label_values(&[tenant_id, shard_id, timeline_id]);
         let _ = STANDBY_HORIZON.remove_label_values(&[tenant_id, shard_id, timeline_id]);
         {
             RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());
diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index e71cb4db80..b36c2f487f 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -144,15 +144,19 @@ use self::layer_manager::LayerManager;
 use self::logical_size::LogicalSize;
 use self::walreceiver::{WalReceiver, WalReceiverConf};
 
-use super::config::TenantConf;
-use super::remote_timeline_client::index::IndexPart;
-use super::remote_timeline_client::RemoteTimelineClient;
-use super::secondary::heatmap::{HeatMapLayer, HeatMapTimeline};
-use super::storage_layer::{LayerFringe, LayerVisibilityHint, ReadableLayer};
-use super::upload_queue::NotInitialized;
-use super::GcError;
 use super::{
-    debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf, MaybeOffloaded,
+    config::TenantConf, storage_layer::LayerVisibilityHint, upload_queue::NotInitialized,
+    MaybeOffloaded,
+};
+use super::{debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf};
+use super::{remote_timeline_client::index::IndexPart, storage_layer::LayerFringe};
+use super::{
+    remote_timeline_client::RemoteTimelineClient, remote_timeline_client::WaitCompletionError,
+    storage_layer::ReadableLayer,
+};
+use super::{
+    secondary::heatmap::{HeatMapLayer, HeatMapTimeline},
+    GcError,
 };
 
 #[cfg(test)]
@@ -3836,6 +3840,24 @@ impl Timeline {
             // release lock on 'layers'
         };
 
+        // Backpressure mechanism: wait with continuation of the flush loop until we have uploaded all layer files.
+        // This makes us refuse ingest until the new layers have been persisted to the remote
+        let start = Instant::now();
+        self.remote_client
+            .wait_completion()
+            .await
+            .map_err(|e| match e {
+                WaitCompletionError::UploadQueueShutDownOrStopped
+                | WaitCompletionError::NotInitialized(
+                    NotInitialized::ShuttingDown | NotInitialized::Stopped,
+                ) => FlushLayerError::Cancelled,
+                WaitCompletionError::NotInitialized(NotInitialized::Uninitialized) => {
+                    FlushLayerError::Other(anyhow!(e).into())
+                }
+            })?;
+        let duration = start.elapsed().as_secs_f64();
+        self.metrics.flush_wait_upload_time_gauge_add(duration);
+
         // FIXME: between create_delta_layer and the scheduling of the upload in `update_metadata_file`,
         // a compaction can delete the file and then it won't be available for uploads any more.
         // We still schedule the upload, resulting in an error, but ideally we'd somehow avoid this
diff --git a/test_runner/fixtures/metrics.py b/test_runner/fixtures/metrics.py
index eb3d06b949..c5295360c3 100644
--- a/test_runner/fixtures/metrics.py
+++ b/test_runner/fixtures/metrics.py
@@ -170,6 +170,7 @@ PAGESERVER_PER_TENANT_METRICS: tuple[str, ...] = (
     "pageserver_evictions_with_low_residence_duration_total",
     "pageserver_aux_file_estimated_size",
     "pageserver_valid_lsn_lease_count",
+    "pageserver_flush_wait_upload_seconds",
     counter("pageserver_tenant_throttling_count_accounted_start"),
     counter("pageserver_tenant_throttling_count_accounted_finish"),
     counter("pageserver_tenant_throttling_wait_usecs_sum"),
diff --git a/test_runner/regress/test_branching.py b/test_runner/regress/test_branching.py
index a4056404f0..34e4e994cb 100644
--- a/test_runner/regress/test_branching.py
+++ b/test_runner/regress/test_branching.py
@@ -19,7 +19,6 @@ from fixtures.pageserver.utils import wait_until_tenant_active
 from fixtures.utils import query_scalar
 from performance.test_perf_pgbench import get_scales_matrix
 from requests import RequestException
-from requests.exceptions import RetryError
 
 
 # Test branch creation
@@ -177,8 +176,11 @@ def test_cannot_create_endpoint_on_non_uploaded_timeline(neon_env_builder: NeonE
 
         env.neon_cli.mappings_map_branch(initial_branch, env.initial_tenant, env.initial_timeline)
 
-        with pytest.raises(RuntimeError, match="is not active, state: Loading"):
-            env.endpoints.create_start(initial_branch, tenant_id=env.initial_tenant)
+        with pytest.raises(RuntimeError, match="ERROR: Not found: Timeline"):
+            env.endpoints.create_start(
+                initial_branch, tenant_id=env.initial_tenant, basebackup_request_tries=2
+            )
+        ps_http.configure_failpoints(("before-upload-index-pausable", "off"))
     finally:
         env.pageserver.stop(immediate=True)
 
@@ -219,7 +221,10 @@ def test_cannot_branch_from_non_uploaded_branch(neon_env_builder: NeonEnvBuilder
 
         branch_id = TimelineId.generate()
 
-        with pytest.raises(RetryError, match="too many 503 error responses"):
+        with pytest.raises(
+            PageserverApiException,
+            match="Cannot branch off the timeline that's not present in pageserver",
+        ):
             ps_http.timeline_create(
                 env.pg_version,
                 env.initial_tenant,
diff --git a/test_runner/regress/test_remote_storage.py b/test_runner/regress/test_remote_storage.py
index 52b6b254aa..76a42ef4a2 100644
--- a/test_runner/regress/test_remote_storage.py
+++ b/test_runner/regress/test_remote_storage.py
@@ -784,6 +784,54 @@ def test_empty_branch_remote_storage_upload_on_restart(neon_env_builder: NeonEnv
         create_thread.join()
 
 
+def test_paused_upload_stalls_checkpoint(
+    neon_env_builder: NeonEnvBuilder,
+):
+    """
+    This test checks that checkpoints block on uploads to remote storage.
+    """
+    neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
+
+    env = neon_env_builder.init_start(
+        initial_tenant_conf={
+            # Set a small compaction threshold
+            "compaction_threshold": "3",
+            # Disable GC
+            "gc_period": "0s",
+            # disable PITR
+            "pitr_interval": "0s",
+        }
+    )
+
+    env.pageserver.allowed_errors.append(
+        f".*PUT.* path=/v1/tenant/{env.initial_tenant}/timeline.* request was dropped before completing"
+    )
+
+    tenant_id = env.initial_tenant
+    timeline_id = env.initial_timeline
+
+    client = env.pageserver.http_client()
+    layers_at_creation = client.layer_map_info(tenant_id, timeline_id)
+    deltas_at_creation = len(layers_at_creation.delta_layers())
+    assert (
+        deltas_at_creation == 1
+    ), "are you fixing #5863? make sure we end up with 2 deltas at the end of endpoint lifecycle"
+
+    # Make new layer uploads get stuck.
+    # Note that timeline creation waits for the initial layers to reach remote storage.
+    # So at this point, the `layers_at_creation` are in remote storage.
+    client.configure_failpoints(("before-upload-layer-pausable", "pause"))
+
+    with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
+        # Build two tables with some data inside
+        endpoint.safe_psql("CREATE TABLE foo AS SELECT x FROM generate_series(1, 10000) g(x)")
+        wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
+
+        with pytest.raises(ReadTimeout):
+            client.timeline_checkpoint(tenant_id, timeline_id, timeout=5)
+        client.configure_failpoints(("before-upload-layer-pausable", "off"))
+
+
 def wait_upload_queue_empty(
     client: PageserverHttpClient, tenant_id: TenantId, timeline_id: TimelineId
 ):

From a77e87a48a628abcac77afeb1f64e5a491275f1c Mon Sep 17 00:00:00 2001
From: Erik Grinaker <erik@neon.tech>
Date: Fri, 3 Jan 2025 17:03:19 +0100
Subject: [PATCH 09/44] pageserver: assert that uploads don't modify indexed
 layers (#10228)

## Problem

It's not legal to modify layers that are referenced by the current layer
index. Assert this in the upload queue, as preparation for upload queue
reordering.

Touches #10096.

## Summary of changes

Add a debug assertion that the upload queue does not modify layers
referenced by the current index.

I could be convinced that this should be a plain assertion, but will be
conservative for now.
---
 .../src/tenant/remote_timeline_client.rs      | 39 +++++++++++++++++++
 .../tenant/remote_timeline_client/index.rs    | 21 +++++++---
 2 files changed, 54 insertions(+), 6 deletions(-)

diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs
index fee11bc742..b27ac3e933 100644
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -1943,6 +1943,30 @@ impl RemoteTimelineClient {
                 return;
             }
 
+            // Assert that we don't modify a layer that's referenced by the current index.
+            if cfg!(debug_assertions) {
+                let modified = match &task.op {
+                    UploadOp::UploadLayer(layer, layer_metadata, _) => {
+                        vec![(layer.layer_desc().layer_name(), layer_metadata)]
+                    }
+                    UploadOp::Delete(delete) => {
+                        delete.layers.iter().map(|(n, m)| (n.clone(), m)).collect()
+                    }
+                    // These don't modify layers.
+                    UploadOp::UploadMetadata { .. } => Vec::new(),
+                    UploadOp::Barrier(_) => Vec::new(),
+                    UploadOp::Shutdown => Vec::new(),
+                };
+                if let Ok(queue) = self.upload_queue.lock().unwrap().initialized_mut() {
+                    for (ref name, metadata) in modified {
+                        debug_assert!(
+                            !queue.clean.0.references(name, metadata),
+                            "layer {name} modified while referenced by index",
+                        );
+                    }
+                }
+            }
+
             let upload_result: anyhow::Result<()> = match &task.op {
                 UploadOp::UploadLayer(ref layer, ref layer_metadata, mode) => {
                     if let Some(OpType::FlushDeletion) = mode {
@@ -2509,6 +2533,21 @@ pub fn remote_layer_path(
     RemotePath::from_string(&path).expect("Failed to construct path")
 }
 
+/// Returns true if a and b have the same layer path within a tenant/timeline. This is essentially
+/// remote_layer_path(a) == remote_layer_path(b) without the string allocations.
+///
+/// TODO: there should be a variant of LayerName for the physical path that contains information
+/// about the shard and generation, such that this could be replaced by a simple comparison.
+pub fn is_same_remote_layer_path(
+    aname: &LayerName,
+    ameta: &LayerFileMetadata,
+    bname: &LayerName,
+    bmeta: &LayerFileMetadata,
+) -> bool {
+    // NB: don't assert remote_layer_path(a) == remote_layer_path(b); too expensive even for debug.
+    aname == bname && ameta.shard == bmeta.shard && ameta.generation == bmeta.generation
+}
+
 pub fn remote_initdb_archive_path(tenant_id: &TenantId, timeline_id: &TimelineId) -> RemotePath {
     RemotePath::from_string(&format!(
         "tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{INITDB_PATH}"
diff --git a/pageserver/src/tenant/remote_timeline_client/index.rs b/pageserver/src/tenant/remote_timeline_client/index.rs
index 506990fb2f..51f093cb87 100644
--- a/pageserver/src/tenant/remote_timeline_client/index.rs
+++ b/pageserver/src/tenant/remote_timeline_client/index.rs
@@ -8,14 +8,14 @@ use std::collections::HashMap;
 use chrono::NaiveDateTime;
 use pageserver_api::models::AuxFilePolicy;
 use serde::{Deserialize, Serialize};
-use utils::id::TimelineId;
 
+use super::is_same_remote_layer_path;
 use crate::tenant::metadata::TimelineMetadata;
 use crate::tenant::storage_layer::LayerName;
 use crate::tenant::timeline::import_pgdata;
 use crate::tenant::Generation;
 use pageserver_api::shard::ShardIndex;
-
+use utils::id::TimelineId;
 use utils::lsn::Lsn;
 
 /// In-memory representation of an `index_part.json` file
@@ -45,10 +45,8 @@ pub struct IndexPart {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub import_pgdata: Option<import_pgdata::index_part_format::Root>,
 
-    /// Per layer file name metadata, which can be present for a present or missing layer file.
-    ///
-    /// Older versions of `IndexPart` will not have this property or have only a part of metadata
-    /// that latest version stores.
+    /// Layer filenames and metadata. For an index persisted in remote storage, all layers must
+    /// exist in remote storage.
     pub layer_metadata: HashMap<LayerName, LayerFileMetadata>,
 
     /// Because of the trouble of eyeballing the legacy "metadata" field, we copied the
@@ -143,6 +141,17 @@ impl IndexPart {
     pub(crate) fn example() -> Self {
         Self::empty(TimelineMetadata::example())
     }
+
+    /// Returns true if the index contains a reference to the given layer (i.e. file path).
+    ///
+    /// TODO: there should be a variant of LayerName for the physical remote path that contains
+    /// information about the shard and generation, to avoid passing in metadata.
+    pub fn references(&self, name: &LayerName, metadata: &LayerFileMetadata) -> bool {
+        let Some(index_metadata) = self.layer_metadata.get(name) else {
+            return false;
+        };
+        is_same_remote_layer_path(name, metadata, name, index_metadata)
+    }
 }
 
 /// Metadata gathered for each of the layer files.

From 4b2f56862dc5738407893df451348e78696abd65 Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Fri, 3 Jan 2025 16:16:04 +0000
Subject: [PATCH 10/44] docker: include vanilla debian postgres client (#10269)

## Problem

We are chasing down segfaults in the storage controller
https://github.com/neondatabase/cloud/issues/21010

This is for use by the storage controller, which links dynamically with
`libpq`. We currently use the neon-built libpq, but this may be unsafe
for use from multi-threaded programs like the controller, as it uses a
statically linked openssl

Precursor to https://github.com/neondatabase/neon/pull/10258

## Summary of changes

- Include `postgresql-15` in container builds.

The reason for using version 15 is simply because that is what's
available in Debian 12 without adding any extra repositories, and we
don't have any special need for latest version in our libpq usage.
---
 Dockerfile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Dockerfile b/Dockerfile
index e888efbae2..df9bcb3002 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -69,6 +69,8 @@ RUN set -e \
         libreadline-dev \
         libseccomp-dev \
         ca-certificates \
+	# System postgres for use with client libraries (e.g. in storage controller)
+        postgresql-15 \
     && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
     && useradd -d /data neon \
     && chown -R neon:neon /data

From b368e62cfc374bd48ca656b476c5c081c4018546 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sat, 4 Jan 2025 15:40:50 +0000
Subject: [PATCH 11/44] build(deps): bump jinja2 from 3.1.4 to 3.1.5 in the pip
 group (#10236)

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 poetry.lock    | 20 +++++++++++++++-----
 pyproject.toml |  2 +-
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 59ae5cf1ca..072bf9a5e9 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand.
 
 [[package]]
 name = "aiohappyeyeballs"
@@ -1322,13 +1322,13 @@ files = [
 
 [[package]]
 name = "jinja2"
-version = "3.1.4"
+version = "3.1.5"
 description = "A very fast and expressive template engine."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"},
-    {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"},
+    {file = "jinja2-3.1.5-py3-none-any.whl", hash = "sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb"},
+    {file = "jinja2-3.1.5.tar.gz", hash = "sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb"},
 ]
 
 [package.dependencies]
@@ -3309,6 +3309,16 @@ files = [
     {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"},
     {file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"},
     {file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"},
+    {file = "wrapt-1.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ecee4132c6cd2ce5308e21672015ddfed1ff975ad0ac8d27168ea82e71413f55"},
+    {file = "wrapt-1.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2020f391008ef874c6d9e208b24f28e31bcb85ccff4f335f15a3251d222b92d9"},
+    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2feecf86e1f7a86517cab34ae6c2f081fd2d0dac860cb0c0ded96d799d20b335"},
+    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:240b1686f38ae665d1b15475966fe0472f78e71b1b4903c143a842659c8e4cb9"},
+    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9008dad07d71f68487c91e96579c8567c98ca4c3881b9b113bc7b33e9fd78b8"},
+    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6447e9f3ba72f8e2b985a1da758767698efa72723d5b59accefd716e9e8272bf"},
+    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:acae32e13a4153809db37405f5eba5bac5fbe2e2ba61ab227926a22901051c0a"},
+    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49ef582b7a1152ae2766557f0550a9fcbf7bbd76f43fbdc94dd3bf07cc7168be"},
+    {file = "wrapt-1.14.1-cp311-cp311-win32.whl", hash = "sha256:358fe87cc899c6bb0ddc185bf3dbfa4ba646f05b1b0b9b5a27c2cb92c2cea204"},
+    {file = "wrapt-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:26046cd03936ae745a502abf44dac702a5e6880b2b01c29aea8ddf3353b68224"},
     {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"},
     {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"},
     {file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"},
@@ -3524,4 +3534,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.11"
-content-hash = "426c385df93f578ba3537c40a269535e27fbcca1978b3cf266096ecbc298c6a9"
+content-hash = "9032c11f264f2f6d8a50230e5021c606d460aafdf370da0524784c3f0f1f31b1"
diff --git a/pyproject.toml b/pyproject.toml
index 01d15ee6bb..ba4ab0b1f7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,7 +13,7 @@ requests = "^2.32.3"
 pytest-xdist = "^3.3.1"
 asyncpg = "^0.29.0"
 aiopg = "^1.4.0"
-Jinja2 = "^3.1.4"
+Jinja2 = "^3.1.5"
 types-requests = "^2.31.0.0"
 types-psycopg2 = "^2.9.21.20241019"
 boto3 = "^1.34.11"

From 406cca643b9529979522b33abf3a0457681fc987 Mon Sep 17 00:00:00 2001
From: Busra Kugler <busra@neon.tech>
Date: Mon, 6 Jan 2025 11:44:23 +0100
Subject: [PATCH 12/44] Update neon_fixtures.py - remove logs (#10219)

We need to remove this line to prevent aws keys exposing in the public
s3 buckets
---
 test_runner/fixtures/neon_fixtures.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py
index a0c642163d..8fd9eec8ce 100644
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -4606,7 +4606,8 @@ class StorageScrubber:
         ]
         args = base_args + args
 
-        log.info(f"Invoking scrubber command {args} with env: {env}")
+        log.info(f"Invoking scrubber command {args}")
+
         (output_path, stdout, status_code) = subprocess_capture(
             self.log_dir,
             args,

From fda52a0005ea4ca8e4e1d6a16de75724a7e619fe Mon Sep 17 00:00:00 2001
From: Conrad Ludgate <conrad@neon.tech>
Date: Mon, 6 Jan 2025 13:05:35 +0000
Subject: [PATCH 13/44] feat(proxy): dont trigger error alerts for unknown
 topics (#10266)

## Problem

Before the holidays, and just before our code freeze, a change to cplane
was made that started publishing the topics from #10197. This triggered
our alerts and put us in a sticky situation as it was not an error, and
we didn't want to silence the alert for the entire holidays, and we
didn't want to release proxy 2 days in a row if it was not essential.

We fixed it eventually by rewriting the alert based on logs, but this is
not a good solution.

## Summary of changes

Introduces an intermediate parsing step to check the topic name first,
to allow us to ignore parsing errors for any topics we do not know
about.
---
 proxy/src/redis/notifications.rs | 48 +++++++++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/proxy/src/redis/notifications.rs b/proxy/src/redis/notifications.rs
index 671305a300..4383d6be2c 100644
--- a/proxy/src/redis/notifications.rs
+++ b/proxy/src/redis/notifications.rs
@@ -30,8 +30,14 @@ async fn try_connect(client: &ConnectionWithCredentialsProvider) -> anyhow::Resu
     Ok(conn)
 }
 
+#[derive(Debug, Deserialize)]
+struct NotificationHeader<'a> {
+    topic: &'a str,
+}
+
 #[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
 #[serde(tag = "topic", content = "data")]
+// Message to contributors: Make sure to align these topic names with the list below.
 pub(crate) enum Notification {
     #[serde(
         rename = "/allowed_ips_updated",
@@ -69,6 +75,22 @@ pub(crate) enum Notification {
     #[serde(rename = "/cancel_session")]
     Cancel(CancelSession),
 }
+
+/// Returns true if the topic name given is a known topic that we can deserialize and action on.
+/// Returns false otherwise.
+fn known_topic(s: &str) -> bool {
+    // Message to contributors: Make sure to align these topic names with the enum above.
+    matches!(
+        s,
+        "/allowed_ips_updated"
+            | "/block_public_or_vpc_access_updated"
+            | "/allowed_vpc_endpoints_updated_for_org"
+            | "/allowed_vpc_endpoints_updated_for_projects"
+            | "/password_updated"
+            | "/cancel_session"
+    )
+}
+
 #[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
 pub(crate) struct AllowedIpsUpdate {
     project_id: ProjectIdInt,
@@ -96,6 +118,7 @@ pub(crate) struct PasswordUpdate {
     project_id: ProjectIdInt,
     role_name: RoleNameInt,
 }
+
 #[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
 pub(crate) struct CancelSession {
     pub(crate) region_id: Option<String>,
@@ -141,18 +164,23 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
             region_id,
         }
     }
+
     pub(crate) async fn increment_active_listeners(&self) {
         self.cache.increment_active_listeners().await;
     }
+
     pub(crate) async fn decrement_active_listeners(&self) {
         self.cache.decrement_active_listeners().await;
     }
+
     #[tracing::instrument(skip(self, msg), fields(session_id = tracing::field::Empty))]
     async fn handle_message(&self, msg: redis::Msg) -> anyhow::Result<()> {
         let payload: String = msg.get_payload()?;
         tracing::debug!(?payload, "received a message payload");
 
-        let msg: Notification = match serde_json::from_str(&payload) {
+        // For better error handling, we first parse the payload to extract the topic.
+        // If there's a topic we don't support, we can handle that error more gracefully.
+        let header: NotificationHeader = match serde_json::from_str(&payload) {
             Ok(msg) => msg,
             Err(e) => {
                 Metrics::get().proxy.redis_errors_total.inc(RedisErrors {
@@ -162,6 +190,24 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
                 return Ok(());
             }
         };
+
+        if !known_topic(header.topic) {
+            // don't update the metric for redis errors if it's just a topic we don't know about.
+            tracing::warn!(topic = header.topic, "unknown topic");
+            return Ok(());
+        }
+
+        let msg: Notification = match serde_json::from_str(&payload) {
+            Ok(msg) => msg,
+            Err(e) => {
+                Metrics::get().proxy.redis_errors_total.inc(RedisErrors {
+                    channel: msg.get_channel_name(),
+                });
+                tracing::error!(topic = header.topic, "broken message: {e}");
+                return Ok(());
+            }
+        };
+
         tracing::debug!(?msg, "received a message");
         match msg {
             Notification::Cancel(cancel_session) => {

From 95f1920231465e2b898b71a9959acec9ddd63896 Mon Sep 17 00:00:00 2001
From: Erik Grinaker <erik@neon.tech>
Date: Mon, 6 Jan 2025 18:27:08 +0100
Subject: [PATCH 14/44] cargo: build with frame pointers (#10226)

## Problem

Frame pointers are typically disabled by default (depending on CPU
architecture), to improve performance. This frees up a CPU register, and
avoids a couple of instructions per function call. However, it makes
stack unwinding much more inefficient, since it has to use DWARF debug
information instead, and gives worse results with e.g. `perf` and eBPF
profiles. The `backtrace` implementation of `libunwind` is also
suspected to cause seg faults.

The performance benefit of frame pointer omission doesn't appear to
matter that much on modern 64-bit CPU architectures (which have plenty
of registers and optimized instruction execution), and benchmarks did
not show measurable overhead.

The Rust standard library and jemalloc already enable frame pointers by
default.

For more information, see
https://www.brendangregg.com/blog/2024-03-17/the-return-of-the-frame-pointers.html.

Resolves #10224.
Touches #10225.

## Summary of changes

Enable frame pointers in all builds, and use frame pointers for pprof-rs
stack sampling.
---
 .cargo/config.toml | 8 ++++++++
 Cargo.toml         | 4 +++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/.cargo/config.toml b/.cargo/config.toml
index 5e452974ad..20a2a929b9 100644
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -3,6 +3,14 @@
 # by the RUSTDOCFLAGS env var in CI.
 rustdocflags = ["-Arustdoc::private_intra_doc_links"]
 
+# Enable frame pointers. This may have a minor performance overhead, but makes it easier and more
+# efficient to obtain stack traces (and thus CPU/heap profiles). With continuous profiling, this is
+# likely a net win, and allows higher profiling resolution. See also:
+#
+# * <https://www.brendangregg.com/blog/2024-03-17/the-return-of-the-frame-pointers.html>
+# * <https://github.com/rust-lang/rust/pull/122646>
+rustflags = ["-Cforce-frame-pointers=yes"]
+
 [alias]
 build_testing = ["build", "--features", "testing"]
 neon = ["run", "--bin", "neon_local"]
diff --git a/Cargo.toml b/Cargo.toml
index 885f02ba81..197808d5ae 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -135,7 +135,7 @@ parquet = { version = "53", default-features = false, features = ["zstd"] }
 parquet_derive = "53"
 pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
 pin-project-lite = "0.2"
-pprof = { version = "0.14", features = ["criterion", "flamegraph", "protobuf", "protobuf-codec"] }
+pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointer", "protobuf", "protobuf-codec"] }
 procfs = "0.16"
 prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
 prost = "0.13"
@@ -266,6 +266,8 @@ tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", br
 [profile.release]
 # This is useful for profiling and, to some extent, debug.
 # Besides, debug info should not affect the performance.
+#
+# NB: we also enable frame pointers for improved profiling, see .cargo/config.toml.
 debug = true
 
 # disable debug symbols for all packages except this one to decrease binaries size

From 4a6556e269018844a8c3413bd7414331cd968fce Mon Sep 17 00:00:00 2001
From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com>
Date: Mon, 6 Jan 2025 14:29:18 -0500
Subject: [PATCH 15/44] fix(pageserver): ensure GC computes time cutoff using
 the same start time (#10193)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Problem

close https://github.com/neondatabase/neon/issues/10192

## Summary of changes

* `find_gc_time_cutoff` takes `now` parameter so that all branches
compute the cutoff based on the same start time, avoiding races.
* gc-compaction uses a single `get_gc_compaction_watermark` function to
get the safe LSN to compact.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
Co-authored-by: Arpad Müller <arpad-m@users.noreply.github.com>
---
 pageserver/src/tenant.rs                     |  6 +++++-
 pageserver/src/tenant/timeline.rs            |  5 +++--
 pageserver/src/tenant/timeline/compaction.rs | 22 ++++++++++++++++++--
 3 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs
index 90017b25f2..e3dab2fc1d 100644
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -4488,13 +4488,17 @@ impl Tenant {
         let mut gc_cutoffs: HashMap<TimelineId, GcCutoffs> =
             HashMap::with_capacity(timelines.len());
 
+        // Ensures all timelines use the same start time when computing the time cutoff.
+        let now_ts_for_pitr_calc = SystemTime::now();
         for timeline in timelines.iter() {
             let cutoff = timeline
                 .get_last_record_lsn()
                 .checked_sub(horizon)
                 .unwrap_or(Lsn(0));
 
-            let cutoffs = timeline.find_gc_cutoffs(cutoff, pitr, cancel, ctx).await?;
+            let cutoffs = timeline
+                .find_gc_cutoffs(now_ts_for_pitr_calc, cutoff, pitr, cancel, ctx)
+                .await?;
             let old = gc_cutoffs.insert(timeline.timeline_id, cutoffs);
             assert!(old.is_none());
         }
diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index b36c2f487f..c1b71262e0 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -4859,6 +4859,7 @@ impl Timeline {
 
     async fn find_gc_time_cutoff(
         &self,
+        now: SystemTime,
         pitr: Duration,
         cancel: &CancellationToken,
         ctx: &RequestContext,
@@ -4866,7 +4867,6 @@ impl Timeline {
         debug_assert_current_span_has_tenant_and_timeline_id();
         if self.shard_identity.is_shard_zero() {
             // Shard Zero has SLRU data and can calculate the PITR time -> LSN mapping itself
-            let now = SystemTime::now();
             let time_range = if pitr == Duration::ZERO {
                 humantime::parse_duration(DEFAULT_PITR_INTERVAL).expect("constant is invalid")
             } else {
@@ -4952,6 +4952,7 @@ impl Timeline {
     #[instrument(skip_all, fields(timeline_id=%self.timeline_id))]
     pub(super) async fn find_gc_cutoffs(
         &self,
+        now: SystemTime,
         space_cutoff: Lsn,
         pitr: Duration,
         cancel: &CancellationToken,
@@ -4979,7 +4980,7 @@ impl Timeline {
         // - if PITR interval is set, then this is our cutoff.
         // - if PITR interval is not set, then we do a lookup
         //   based on DEFAULT_PITR_INTERVAL, so that size-based retention does not result in keeping history around permanently on idle databases.
-        let time_cutoff = self.find_gc_time_cutoff(pitr, cancel, ctx).await?;
+        let time_cutoff = self.find_gc_time_cutoff(now, pitr, cancel, ctx).await?;
 
         Ok(match (pitr, time_cutoff) {
             (Duration::ZERO, Some(time_cutoff)) => {
diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs
index 94c65631b2..55cde8603e 100644
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -1799,6 +1799,24 @@ impl Timeline {
         Ok(())
     }
 
+    /// Get a watermark for gc-compaction, that is the lowest LSN that we can use as the `gc_horizon` for
+    /// the compaction algorithm. It is min(space_cutoff, time_cutoff, latest_gc_cutoff, standby_horizon).
+    /// Leases and retain_lsns are considered in the gc-compaction job itself so we don't need to account for them
+    /// here.
+    pub(crate) fn get_gc_compaction_watermark(self: &Arc<Self>) -> Lsn {
+        let gc_cutoff_lsn = {
+            let gc_info = self.gc_info.read().unwrap();
+            gc_info.min_cutoff()
+        };
+
+        // TODO: standby horizon should use leases so we don't really need to consider it here.
+        // let watermark = watermark.min(self.standby_horizon.load());
+
+        // TODO: ensure the child branches will not use anything below the watermark, or consider
+        // them when computing the watermark.
+        gc_cutoff_lsn.min(*self.get_latest_gc_cutoff_lsn())
+    }
+
     /// Split a gc-compaction job into multiple compaction jobs. The split is based on the key range and the estimated size of the compaction job.
     /// The function returns a list of compaction jobs that can be executed separately. If the upper bound of the compact LSN
     /// range is not specified, we will use the latest gc_cutoff as the upper bound, so that all jobs in the jobset acts
@@ -1811,7 +1829,7 @@ impl Timeline {
         let compact_below_lsn = if job.compact_lsn_range.end != Lsn::MAX {
             job.compact_lsn_range.end
         } else {
-            *self.get_latest_gc_cutoff_lsn() // use the real gc cutoff
+            self.get_gc_compaction_watermark()
         };
 
         // Split compaction job to about 4GB each
@@ -2006,7 +2024,7 @@ impl Timeline {
                 // Therefore, it can only clean up data that cannot be cleaned up with legacy gc, instead of
                 // cleaning everything that theoritically it could. In the future, it should use `self.gc_info`
                 // to get the truth data.
-                let real_gc_cutoff = *self.get_latest_gc_cutoff_lsn();
+                let real_gc_cutoff = self.get_gc_compaction_watermark();
                 // The compaction algorithm will keep all keys above the gc_cutoff while keeping only necessary keys below the gc_cutoff for
                 // each of the retain_lsn. Therefore, if the user-provided `compact_lsn_range.end` is larger than the real gc cutoff, we will use
                 // the real cutoff.

From b342a02b1c591642e2d52be606ccc42857af112d Mon Sep 17 00:00:00 2001
From: Erik Grinaker <erik@neon.tech>
Date: Mon, 6 Jan 2025 21:17:43 +0100
Subject: [PATCH 16/44] Dockerfile: build with `force-frame-pointers=yes`
 (#10286)

See
https://github.com/neondatabase/neon/pull/10226#issuecomment-2573725182.
---
 .cargo/config.toml | 6 ++++--
 Dockerfile         | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/.cargo/config.toml b/.cargo/config.toml
index 20a2a929b9..c71d491303 100644
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -4,11 +4,13 @@
 rustdocflags = ["-Arustdoc::private_intra_doc_links"]
 
 # Enable frame pointers. This may have a minor performance overhead, but makes it easier and more
-# efficient to obtain stack traces (and thus CPU/heap profiles). With continuous profiling, this is
-# likely a net win, and allows higher profiling resolution. See also:
+# efficient to obtain stack traces (and thus CPU/heap profiles). It may also avoid seg faults that
+# we've seen with libunwind-based profiling. See also:
 #
 # * <https://www.brendangregg.com/blog/2024-03-17/the-return-of-the-frame-pointers.html>
 # * <https://github.com/rust-lang/rust/pull/122646>
+#
+# NB: the RUSTFLAGS envvar will replace this. Make sure to update e.g. Dockerfile as well.
 rustflags = ["-Cforce-frame-pointers=yes"]
 
 [alias]
diff --git a/Dockerfile b/Dockerfile
index df9bcb3002..2c157b3b2a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -45,7 +45,7 @@ COPY --chown=nonroot . .
 
 ARG ADDITIONAL_RUSTFLAGS
 RUN set -e \
-    && PQ_LIB_DIR=$(pwd)/pg_install/v${STABLE_PG_VERSION}/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment ${ADDITIONAL_RUSTFLAGS}" cargo build \
+    && PQ_LIB_DIR=$(pwd)/pg_install/v${STABLE_PG_VERSION}/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}" cargo build \
       --bin pg_sni_router  \
       --bin pageserver  \
       --bin pagectl  \

From ad7f14d526a4e7e5195ca5d8651672aae0c96d93 Mon Sep 17 00:00:00 2001
From: Alexander Bayandin <alexander@neon.tech>
Date: Mon, 6 Jan 2025 20:25:31 +0000
Subject: [PATCH 17/44] test_runner: update packages for Python 3.13 (#10285)

## Problem

It's impossible to run regression tests with Python 3.13 as some
dependencies don't support it (some of them are outdated, and `jsonnet`
doesn't support it at all yet)

## Summary of changes
- Update dependencies for Python 3.13
- Install `jsonnet` only on Python < 3.13 and skip relevant tests on
Python 3.13

Closes #10237
---
 poetry.lock                                 | 771 +++++++++++---------
 pyproject.toml                              |  11 +-
 test_runner/regress/test_compute_metrics.py |   9 +-
 3 files changed, 459 insertions(+), 332 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 072bf9a5e9..5f15223dca 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -239,60 +239,66 @@ files = [
 
 [[package]]
 name = "asyncpg"
-version = "0.29.0"
+version = "0.30.0"
 description = "An asyncio PostgreSQL driver"
 optional = false
 python-versions = ">=3.8.0"
 files = [
-    {file = "asyncpg-0.29.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72fd0ef9f00aeed37179c62282a3d14262dbbafb74ec0ba16e1b1864d8a12169"},
-    {file = "asyncpg-0.29.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:52e8f8f9ff6e21f9b39ca9f8e3e33a5fcdceaf5667a8c5c32bee158e313be385"},
-    {file = "asyncpg-0.29.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9e6823a7012be8b68301342ba33b4740e5a166f6bbda0aee32bc01638491a22"},
-    {file = "asyncpg-0.29.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:746e80d83ad5d5464cfbf94315eb6744222ab00aa4e522b704322fb182b83610"},
-    {file = "asyncpg-0.29.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:ff8e8109cd6a46ff852a5e6bab8b0a047d7ea42fcb7ca5ae6eaae97d8eacf397"},
-    {file = "asyncpg-0.29.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:97eb024685b1d7e72b1972863de527c11ff87960837919dac6e34754768098eb"},
-    {file = "asyncpg-0.29.0-cp310-cp310-win32.whl", hash = "sha256:5bbb7f2cafd8d1fa3e65431833de2642f4b2124be61a449fa064e1a08d27e449"},
-    {file = "asyncpg-0.29.0-cp310-cp310-win_amd64.whl", hash = "sha256:76c3ac6530904838a4b650b2880f8e7af938ee049e769ec2fba7cd66469d7772"},
-    {file = "asyncpg-0.29.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4900ee08e85af01adb207519bb4e14b1cae8fd21e0ccf80fac6aa60b6da37b4"},
-    {file = "asyncpg-0.29.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a65c1dcd820d5aea7c7d82a3fdcb70e096f8f70d1a8bf93eb458e49bfad036ac"},
-    {file = "asyncpg-0.29.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b52e46f165585fd6af4863f268566668407c76b2c72d366bb8b522fa66f1870"},
-    {file = "asyncpg-0.29.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc600ee8ef3dd38b8d67421359779f8ccec30b463e7aec7ed481c8346decf99f"},
-    {file = "asyncpg-0.29.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:039a261af4f38f949095e1e780bae84a25ffe3e370175193174eb08d3cecab23"},
-    {file = "asyncpg-0.29.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6feaf2d8f9138d190e5ec4390c1715c3e87b37715cd69b2c3dfca616134efd2b"},
-    {file = "asyncpg-0.29.0-cp311-cp311-win32.whl", hash = "sha256:1e186427c88225ef730555f5fdda6c1812daa884064bfe6bc462fd3a71c4b675"},
-    {file = "asyncpg-0.29.0-cp311-cp311-win_amd64.whl", hash = "sha256:cfe73ffae35f518cfd6e4e5f5abb2618ceb5ef02a2365ce64f132601000587d3"},
-    {file = "asyncpg-0.29.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6011b0dc29886ab424dc042bf9eeb507670a3b40aece3439944006aafe023178"},
-    {file = "asyncpg-0.29.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b544ffc66b039d5ec5a7454667f855f7fec08e0dfaf5a5490dfafbb7abbd2cfb"},
-    {file = "asyncpg-0.29.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d84156d5fb530b06c493f9e7635aa18f518fa1d1395ef240d211cb563c4e2364"},
-    {file = "asyncpg-0.29.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54858bc25b49d1114178d65a88e48ad50cb2b6f3e475caa0f0c092d5f527c106"},
-    {file = "asyncpg-0.29.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bde17a1861cf10d5afce80a36fca736a86769ab3579532c03e45f83ba8a09c59"},
-    {file = "asyncpg-0.29.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:37a2ec1b9ff88d8773d3eb6d3784dc7e3fee7756a5317b67f923172a4748a175"},
-    {file = "asyncpg-0.29.0-cp312-cp312-win32.whl", hash = "sha256:bb1292d9fad43112a85e98ecdc2e051602bce97c199920586be83254d9dafc02"},
-    {file = "asyncpg-0.29.0-cp312-cp312-win_amd64.whl", hash = "sha256:2245be8ec5047a605e0b454c894e54bf2ec787ac04b1cb7e0d3c67aa1e32f0fe"},
-    {file = "asyncpg-0.29.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0009a300cae37b8c525e5b449233d59cd9868fd35431abc470a3e364d2b85cb9"},
-    {file = "asyncpg-0.29.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5cad1324dbb33f3ca0cd2074d5114354ed3be2b94d48ddfd88af75ebda7c43cc"},
-    {file = "asyncpg-0.29.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:012d01df61e009015944ac7543d6ee30c2dc1eb2f6b10b62a3f598beb6531548"},
-    {file = "asyncpg-0.29.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:000c996c53c04770798053e1730d34e30cb645ad95a63265aec82da9093d88e7"},
-    {file = "asyncpg-0.29.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e0bfe9c4d3429706cf70d3249089de14d6a01192d617e9093a8e941fea8ee775"},
-    {file = "asyncpg-0.29.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:642a36eb41b6313ffa328e8a5c5c2b5bea6ee138546c9c3cf1bffaad8ee36dd9"},
-    {file = "asyncpg-0.29.0-cp38-cp38-win32.whl", hash = "sha256:a921372bbd0aa3a5822dd0409da61b4cd50df89ae85150149f8c119f23e8c408"},
-    {file = "asyncpg-0.29.0-cp38-cp38-win_amd64.whl", hash = "sha256:103aad2b92d1506700cbf51cd8bb5441e7e72e87a7b3a2ca4e32c840f051a6a3"},
-    {file = "asyncpg-0.29.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5340dd515d7e52f4c11ada32171d87c05570479dc01dc66d03ee3e150fb695da"},
-    {file = "asyncpg-0.29.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e17b52c6cf83e170d3d865571ba574577ab8e533e7361a2b8ce6157d02c665d3"},
-    {file = "asyncpg-0.29.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f100d23f273555f4b19b74a96840aa27b85e99ba4b1f18d4ebff0734e78dc090"},
-    {file = "asyncpg-0.29.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48e7c58b516057126b363cec8ca02b804644fd012ef8e6c7e23386b7d5e6ce83"},
-    {file = "asyncpg-0.29.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f9ea3f24eb4c49a615573724d88a48bd1b7821c890c2effe04f05382ed9e8810"},
-    {file = "asyncpg-0.29.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8d36c7f14a22ec9e928f15f92a48207546ffe68bc412f3be718eedccdf10dc5c"},
-    {file = "asyncpg-0.29.0-cp39-cp39-win32.whl", hash = "sha256:797ab8123ebaed304a1fad4d7576d5376c3a006a4100380fb9d517f0b59c1ab2"},
-    {file = "asyncpg-0.29.0-cp39-cp39-win_amd64.whl", hash = "sha256:cce08a178858b426ae1aa8409b5cc171def45d4293626e7aa6510696d46decd8"},
-    {file = "asyncpg-0.29.0.tar.gz", hash = "sha256:d1c49e1f44fffafd9a55e1a9b101590859d881d639ea2922516f5d9c512d354e"},
+    {file = "asyncpg-0.30.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bfb4dd5ae0699bad2b233672c8fc5ccbd9ad24b89afded02341786887e37927e"},
+    {file = "asyncpg-0.30.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dc1f62c792752a49f88b7e6f774c26077091b44caceb1983509edc18a2222ec0"},
+    {file = "asyncpg-0.30.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3152fef2e265c9c24eec4ee3d22b4f4d2703d30614b0b6753e9ed4115c8a146f"},
+    {file = "asyncpg-0.30.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c7255812ac85099a0e1ffb81b10dc477b9973345793776b128a23e60148dd1af"},
+    {file = "asyncpg-0.30.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:578445f09f45d1ad7abddbff2a3c7f7c291738fdae0abffbeb737d3fc3ab8b75"},
+    {file = "asyncpg-0.30.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c42f6bb65a277ce4d93f3fba46b91a265631c8df7250592dd4f11f8b0152150f"},
+    {file = "asyncpg-0.30.0-cp310-cp310-win32.whl", hash = "sha256:aa403147d3e07a267ada2ae34dfc9324e67ccc4cdca35261c8c22792ba2b10cf"},
+    {file = "asyncpg-0.30.0-cp310-cp310-win_amd64.whl", hash = "sha256:fb622c94db4e13137c4c7f98834185049cc50ee01d8f657ef898b6407c7b9c50"},
+    {file = "asyncpg-0.30.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5e0511ad3dec5f6b4f7a9e063591d407eee66b88c14e2ea636f187da1dcfff6a"},
+    {file = "asyncpg-0.30.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:915aeb9f79316b43c3207363af12d0e6fd10776641a7de8a01212afd95bdf0ed"},
+    {file = "asyncpg-0.30.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c198a00cce9506fcd0bf219a799f38ac7a237745e1d27f0e1f66d3707c84a5a"},
+    {file = "asyncpg-0.30.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3326e6d7381799e9735ca2ec9fd7be4d5fef5dcbc3cb555d8a463d8460607956"},
+    {file = "asyncpg-0.30.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:51da377487e249e35bd0859661f6ee2b81db11ad1f4fc036194bc9cb2ead5056"},
+    {file = "asyncpg-0.30.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bc6d84136f9c4d24d358f3b02be4b6ba358abd09f80737d1ac7c444f36108454"},
+    {file = "asyncpg-0.30.0-cp311-cp311-win32.whl", hash = "sha256:574156480df14f64c2d76450a3f3aaaf26105869cad3865041156b38459e935d"},
+    {file = "asyncpg-0.30.0-cp311-cp311-win_amd64.whl", hash = "sha256:3356637f0bd830407b5597317b3cb3571387ae52ddc3bca6233682be88bbbc1f"},
+    {file = "asyncpg-0.30.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c902a60b52e506d38d7e80e0dd5399f657220f24635fee368117b8b5fce1142e"},
+    {file = "asyncpg-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aca1548e43bbb9f0f627a04666fedaca23db0a31a84136ad1f868cb15deb6e3a"},
+    {file = "asyncpg-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c2a2ef565400234a633da0eafdce27e843836256d40705d83ab7ec42074efb3"},
+    {file = "asyncpg-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1292b84ee06ac8a2ad8e51c7475aa309245874b61333d97411aab835c4a2f737"},
+    {file = "asyncpg-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0f5712350388d0cd0615caec629ad53c81e506b1abaaf8d14c93f54b35e3595a"},
+    {file = "asyncpg-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:db9891e2d76e6f425746c5d2da01921e9a16b5a71a1c905b13f30e12a257c4af"},
+    {file = "asyncpg-0.30.0-cp312-cp312-win32.whl", hash = "sha256:68d71a1be3d83d0570049cd1654a9bdfe506e794ecc98ad0873304a9f35e411e"},
+    {file = "asyncpg-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:9a0292c6af5c500523949155ec17b7fe01a00ace33b68a476d6b5059f9630305"},
+    {file = "asyncpg-0.30.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:05b185ebb8083c8568ea8a40e896d5f7af4b8554b64d7719c0eaa1eb5a5c3a70"},
+    {file = "asyncpg-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c47806b1a8cbb0a0db896f4cd34d89942effe353a5035c62734ab13b9f938da3"},
+    {file = "asyncpg-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b6fde867a74e8c76c71e2f64f80c64c0f3163e687f1763cfaf21633ec24ec33"},
+    {file = "asyncpg-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46973045b567972128a27d40001124fbc821c87a6cade040cfcd4fa8a30bcdc4"},
+    {file = "asyncpg-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9110df111cabc2ed81aad2f35394a00cadf4f2e0635603db6ebbd0fc896f46a4"},
+    {file = "asyncpg-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:04ff0785ae7eed6cc138e73fc67b8e51d54ee7a3ce9b63666ce55a0bf095f7ba"},
+    {file = "asyncpg-0.30.0-cp313-cp313-win32.whl", hash = "sha256:ae374585f51c2b444510cdf3595b97ece4f233fde739aa14b50e0d64e8a7a590"},
+    {file = "asyncpg-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:f59b430b8e27557c3fb9869222559f7417ced18688375825f8f12302c34e915e"},
+    {file = "asyncpg-0.30.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:29ff1fc8b5bf724273782ff8b4f57b0f8220a1b2324184846b39d1ab4122031d"},
+    {file = "asyncpg-0.30.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:64e899bce0600871b55368b8483e5e3e7f1860c9482e7f12e0a771e747988168"},
+    {file = "asyncpg-0.30.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b290f4726a887f75dcd1b3006f484252db37602313f806e9ffc4e5996cfe5cb"},
+    {file = "asyncpg-0.30.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f86b0e2cd3f1249d6fe6fd6cfe0cd4538ba994e2d8249c0491925629b9104d0f"},
+    {file = "asyncpg-0.30.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:393af4e3214c8fa4c7b86da6364384c0d1b3298d45803375572f415b6f673f38"},
+    {file = "asyncpg-0.30.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:fd4406d09208d5b4a14db9a9dbb311b6d7aeeab57bded7ed2f8ea41aeef39b34"},
+    {file = "asyncpg-0.30.0-cp38-cp38-win32.whl", hash = "sha256:0b448f0150e1c3b96cb0438a0d0aa4871f1472e58de14a3ec320dbb2798fb0d4"},
+    {file = "asyncpg-0.30.0-cp38-cp38-win_amd64.whl", hash = "sha256:f23b836dd90bea21104f69547923a02b167d999ce053f3d502081acea2fba15b"},
+    {file = "asyncpg-0.30.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6f4e83f067b35ab5e6371f8a4c93296e0439857b4569850b178a01385e82e9ad"},
+    {file = "asyncpg-0.30.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5df69d55add4efcd25ea2a3b02025b669a285b767bfbf06e356d68dbce4234ff"},
+    {file = "asyncpg-0.30.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3479a0d9a852c7c84e822c073622baca862d1217b10a02dd57ee4a7a081f708"},
+    {file = "asyncpg-0.30.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26683d3b9a62836fad771a18ecf4659a30f348a561279d6227dab96182f46144"},
+    {file = "asyncpg-0.30.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:1b982daf2441a0ed314bd10817f1606f1c28b1136abd9e4f11335358c2c631cb"},
+    {file = "asyncpg-0.30.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1c06a3a50d014b303e5f6fc1e5f95eb28d2cee89cf58384b700da621e5d5e547"},
+    {file = "asyncpg-0.30.0-cp39-cp39-win32.whl", hash = "sha256:1b11a555a198b08f5c4baa8f8231c74a366d190755aa4f99aacec5970afe929a"},
+    {file = "asyncpg-0.30.0-cp39-cp39-win_amd64.whl", hash = "sha256:8b684a3c858a83cd876f05958823b68e8d14ec01bb0c0d14a6704c5bf9711773"},
+    {file = "asyncpg-0.30.0.tar.gz", hash = "sha256:c551e9928ab6707602f44811817f82ba3c446e018bfe1d3abecc8ba5f3eac851"},
 ]
 
-[package.dependencies]
-async-timeout = {version = ">=4.0.3", markers = "python_version < \"3.12.0\""}
-
 [package.extras]
-docs = ["Sphinx (>=5.3.0,<5.4.0)", "sphinx-rtd-theme (>=1.2.2)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"]
-test = ["flake8 (>=6.1,<7.0)", "uvloop (>=0.15.3)"]
+docs = ["Sphinx (>=8.1.3,<8.2.0)", "sphinx-rtd-theme (>=1.2.2)"]
+gssauth = ["gssapi", "sspilib"]
+test = ["distro (>=1.9.0,<1.10.0)", "flake8 (>=6.1,<7.0)", "flake8-pyi (>=24.1.0,<24.2.0)", "gssapi", "k5test", "mypy (>=1.8.0,<1.9.0)", "sspilib", "uvloop (>=0.15.3)"]
 
 [[package]]
 name = "attrs"
@@ -766,75 +772,78 @@ files = [
 
 [[package]]
 name = "cffi"
-version = "1.15.1"
+version = "1.17.1"
 description = "Foreign Function Interface for Python calling C code."
 optional = false
-python-versions = "*"
+python-versions = ">=3.8"
 files = [
-    {file = "cffi-1.15.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a66d3508133af6e8548451b25058d5812812ec3798c886bf38ed24a98216fab2"},
-    {file = "cffi-1.15.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:470c103ae716238bbe698d67ad020e1db9d9dba34fa5a899b5e21577e6d52ed2"},
-    {file = "cffi-1.15.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:9ad5db27f9cabae298d151c85cf2bad1d359a1b9c686a275df03385758e2f914"},
-    {file = "cffi-1.15.1-cp27-cp27m-win32.whl", hash = "sha256:b3bbeb01c2b273cca1e1e0c5df57f12dce9a4dd331b4fa1635b8bec26350bde3"},
-    {file = "cffi-1.15.1-cp27-cp27m-win_amd64.whl", hash = "sha256:e00b098126fd45523dd056d2efba6c5a63b71ffe9f2bbe1a4fe1716e1d0c331e"},
-    {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:d61f4695e6c866a23a21acab0509af1cdfd2c013cf256bbf5b6b5e2695827162"},
-    {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:ed9cb427ba5504c1dc15ede7d516b84757c3e3d7868ccc85121d9310d27eed0b"},
-    {file = "cffi-1.15.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:39d39875251ca8f612b6f33e6b1195af86d1b3e60086068be9cc053aa4376e21"},
-    {file = "cffi-1.15.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:285d29981935eb726a4399badae8f0ffdff4f5050eaa6d0cfc3f64b857b77185"},
-    {file = "cffi-1.15.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3eb6971dcff08619f8d91607cfc726518b6fa2a9eba42856be181c6d0d9515fd"},
-    {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21157295583fe8943475029ed5abdcf71eb3911894724e360acff1d61c1d54bc"},
-    {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5635bd9cb9731e6d4a1132a498dd34f764034a8ce60cef4f5319c0541159392f"},
-    {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2012c72d854c2d03e45d06ae57f40d78e5770d252f195b93f581acf3ba44496e"},
-    {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd86c085fae2efd48ac91dd7ccffcfc0571387fe1193d33b6394db7ef31fe2a4"},
-    {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:fa6693661a4c91757f4412306191b6dc88c1703f780c8234035eac011922bc01"},
-    {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:59c0b02d0a6c384d453fece7566d1c7e6b7bae4fc5874ef2ef46d56776d61c9e"},
-    {file = "cffi-1.15.1-cp310-cp310-win32.whl", hash = "sha256:cba9d6b9a7d64d4bd46167096fc9d2f835e25d7e4c121fb2ddfc6528fb0413b2"},
-    {file = "cffi-1.15.1-cp310-cp310-win_amd64.whl", hash = "sha256:ce4bcc037df4fc5e3d184794f27bdaab018943698f4ca31630bc7f84a7b69c6d"},
-    {file = "cffi-1.15.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3d08afd128ddaa624a48cf2b859afef385b720bb4b43df214f85616922e6a5ac"},
-    {file = "cffi-1.15.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3799aecf2e17cf585d977b780ce79ff0dc9b78d799fc694221ce814c2c19db83"},
-    {file = "cffi-1.15.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a591fe9e525846e4d154205572a029f653ada1a78b93697f3b5a8f1f2bc055b9"},
-    {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3548db281cd7d2561c9ad9984681c95f7b0e38881201e157833a2342c30d5e8c"},
-    {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91fc98adde3d7881af9b59ed0294046f3806221863722ba7d8d120c575314325"},
-    {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94411f22c3985acaec6f83c6df553f2dbe17b698cc7f8ae751ff2237d96b9e3c"},
-    {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:03425bdae262c76aad70202debd780501fabeaca237cdfddc008987c0e0f59ef"},
-    {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cc4d65aeeaa04136a12677d3dd0b1c0c94dc43abac5860ab33cceb42b801c1e8"},
-    {file = "cffi-1.15.1-cp311-cp311-win32.whl", hash = "sha256:a0f100c8912c114ff53e1202d0078b425bee3649ae34d7b070e9697f93c5d52d"},
-    {file = "cffi-1.15.1-cp311-cp311-win_amd64.whl", hash = "sha256:04ed324bda3cda42b9b695d51bb7d54b680b9719cfab04227cdd1e04e5de3104"},
-    {file = "cffi-1.15.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50a74364d85fd319352182ef59c5c790484a336f6db772c1a9231f1c3ed0cbd7"},
-    {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e263d77ee3dd201c3a142934a086a4450861778baaeeb45db4591ef65550b0a6"},
-    {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cec7d9412a9102bdc577382c3929b337320c4c4c4849f2c5cdd14d7368c5562d"},
-    {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4289fc34b2f5316fbb762d75362931e351941fa95fa18789191b33fc4cf9504a"},
-    {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:173379135477dc8cac4bc58f45db08ab45d228b3363adb7af79436135d028405"},
-    {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6975a3fac6bc83c4a65c9f9fcab9e47019a11d3d2cf7f3c0d03431bf145a941e"},
-    {file = "cffi-1.15.1-cp36-cp36m-win32.whl", hash = "sha256:2470043b93ff09bf8fb1d46d1cb756ce6132c54826661a32d4e4d132e1977adf"},
-    {file = "cffi-1.15.1-cp36-cp36m-win_amd64.whl", hash = "sha256:30d78fbc8ebf9c92c9b7823ee18eb92f2e6ef79b45ac84db507f52fbe3ec4497"},
-    {file = "cffi-1.15.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:198caafb44239b60e252492445da556afafc7d1e3ab7a1fb3f0584ef6d742375"},
-    {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5ef34d190326c3b1f822a5b7a45f6c4535e2f47ed06fec77d3d799c450b2651e"},
-    {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8102eaf27e1e448db915d08afa8b41d6c7ca7a04b7d73af6514df10a3e74bd82"},
-    {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5df2768244d19ab7f60546d0c7c63ce1581f7af8b5de3eb3004b9b6fc8a9f84b"},
-    {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a8c4917bd7ad33e8eb21e9a5bbba979b49d9a97acb3a803092cbc1133e20343c"},
-    {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2642fe3142e4cc4af0799748233ad6da94c62a8bec3a6648bf8ee68b1c7426"},
-    {file = "cffi-1.15.1-cp37-cp37m-win32.whl", hash = "sha256:e229a521186c75c8ad9490854fd8bbdd9a0c9aa3a524326b55be83b54d4e0ad9"},
-    {file = "cffi-1.15.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a0b71b1b8fbf2b96e41c4d990244165e2c9be83d54962a9a1d118fd8657d2045"},
-    {file = "cffi-1.15.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:320dab6e7cb2eacdf0e658569d2575c4dad258c0fcc794f46215e1e39f90f2c3"},
-    {file = "cffi-1.15.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e74c6b51a9ed6589199c787bf5f9875612ca4a8a0785fb2d4a84429badaf22a"},
-    {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5c84c68147988265e60416b57fc83425a78058853509c1b0629c180094904a5"},
-    {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b926aa83d1edb5aa5b427b4053dc420ec295a08e40911296b9eb1b6170f6cca"},
-    {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:87c450779d0914f2861b8526e035c5e6da0a3199d8f1add1a665e1cbc6fc6d02"},
-    {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f2c9f67e9821cad2e5f480bc8d83b8742896f1242dba247911072d4fa94c192"},
-    {file = "cffi-1.15.1-cp38-cp38-win32.whl", hash = "sha256:8b7ee99e510d7b66cdb6c593f21c043c248537a32e0bedf02e01e9553a172314"},
-    {file = "cffi-1.15.1-cp38-cp38-win_amd64.whl", hash = "sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5"},
-    {file = "cffi-1.15.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:54a2db7b78338edd780e7ef7f9f6c442500fb0d41a5a4ea24fff1c929d5af585"},
-    {file = "cffi-1.15.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fcd131dd944808b5bdb38e6f5b53013c5aa4f334c5cad0c72742f6eba4b73db0"},
-    {file = "cffi-1.15.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7473e861101c9e72452f9bf8acb984947aa1661a7704553a9f6e4baa5ba64415"},
-    {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c9a799e985904922a4d207a94eae35c78ebae90e128f0c4e521ce339396be9d"},
-    {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3bcde07039e586f91b45c88f8583ea7cf7a0770df3a1649627bf598332cb6984"},
-    {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33ab79603146aace82c2427da5ca6e58f2b3f2fb5da893ceac0c42218a40be35"},
-    {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d598b938678ebf3c67377cdd45e09d431369c3b1a5b331058c338e201f12b27"},
-    {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db0fbb9c62743ce59a9ff687eb5f4afbe77e5e8403d6697f7446e5f609976f76"},
-    {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:98d85c6a2bef81588d9227dde12db8a7f47f639f4a17c9ae08e773aa9c697bf3"},
-    {file = "cffi-1.15.1-cp39-cp39-win32.whl", hash = "sha256:40f4774f5a9d4f5e344f31a32b5096977b5d48560c5592e2f3d2c4374bd543ee"},
-    {file = "cffi-1.15.1-cp39-cp39-win_amd64.whl", hash = "sha256:70df4e3b545a17496c9b3f41f5115e69a4f2e77e94e1d2a8e1070bc0c38c8a3c"},
-    {file = "cffi-1.15.1.tar.gz", hash = "sha256:d400bfb9a37b1351253cb402671cea7e89bdecc294e8016a707f6d1d8ac934f9"},
+    {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"},
+    {file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"},
+    {file = "cffi-1.17.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382"},
+    {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702"},
+    {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3"},
+    {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6"},
+    {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17"},
+    {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8"},
+    {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e"},
+    {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be"},
+    {file = "cffi-1.17.1-cp310-cp310-win32.whl", hash = "sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c"},
+    {file = "cffi-1.17.1-cp310-cp310-win_amd64.whl", hash = "sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15"},
+    {file = "cffi-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401"},
+    {file = "cffi-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf"},
+    {file = "cffi-1.17.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4"},
+    {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41"},
+    {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1"},
+    {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6"},
+    {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d"},
+    {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6"},
+    {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f"},
+    {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b"},
+    {file = "cffi-1.17.1-cp311-cp311-win32.whl", hash = "sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655"},
+    {file = "cffi-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0"},
+    {file = "cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4"},
+    {file = "cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c"},
+    {file = "cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36"},
+    {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5"},
+    {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff"},
+    {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99"},
+    {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93"},
+    {file = "cffi-1.17.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3"},
+    {file = "cffi-1.17.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8"},
+    {file = "cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65"},
+    {file = "cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903"},
+    {file = "cffi-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e"},
+    {file = "cffi-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2"},
+    {file = "cffi-1.17.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3"},
+    {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683"},
+    {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5"},
+    {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4"},
+    {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd"},
+    {file = "cffi-1.17.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed"},
+    {file = "cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9"},
+    {file = "cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d"},
+    {file = "cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a"},
+    {file = "cffi-1.17.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:636062ea65bd0195bc012fea9321aca499c0504409f413dc88af450b57ffd03b"},
+    {file = "cffi-1.17.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7eac2ef9b63c79431bc4b25f1cd649d7f061a28808cbc6c47b534bd789ef964"},
+    {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e221cf152cff04059d011ee126477f0d9588303eb57e88923578ace7baad17f9"},
+    {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:31000ec67d4221a71bd3f67df918b1f88f676f1c3b535a7eb473255fdc0b83fc"},
+    {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f17be4345073b0a7b8ea599688f692ac3ef23ce28e5df79c04de519dbc4912c"},
+    {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1"},
+    {file = "cffi-1.17.1-cp38-cp38-win32.whl", hash = "sha256:7596d6620d3fa590f677e9ee430df2958d2d6d6de2feeae5b20e82c00b76fbf8"},
+    {file = "cffi-1.17.1-cp38-cp38-win_amd64.whl", hash = "sha256:78122be759c3f8a014ce010908ae03364d00a1f81ab5c7f4a7a5120607ea56e1"},
+    {file = "cffi-1.17.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16"},
+    {file = "cffi-1.17.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36"},
+    {file = "cffi-1.17.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8"},
+    {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca74b8dbe6e8e8263c0ffd60277de77dcee6c837a3d0881d8c1ead7268c9e576"},
+    {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87"},
+    {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98e3969bcff97cae1b2def8ba499ea3d6f31ddfdb7635374834cf89a1a08ecf0"},
+    {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdf5ce3acdfd1661132f2a9c19cac174758dc2352bfe37d98aa7512c6b7178b3"},
+    {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9755e4345d1ec879e3849e62222a18c7174d65a6a92d5b346b1863912168b595"},
+    {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f1e22e8c4419538cb197e4dd60acc919d7696e5ef98ee4da4e01d3f8cfa4cc5a"},
+    {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c03e868a0b3bc35839ba98e74211ed2b05d2119be4e8a0f224fba9384f1fe02e"},
+    {file = "cffi-1.17.1-cp39-cp39-win32.whl", hash = "sha256:e31ae45bc2e29f6b2abd0de1cc3b9d5205aa847cafaecb8af1476a609a2f6eb7"},
+    {file = "cffi-1.17.1-cp39-cp39-win_amd64.whl", hash = "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662"},
+    {file = "cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824"},
 ]
 
 [package.dependencies]
@@ -1114,72 +1123,103 @@ Flask = ">=0.9"
 
 [[package]]
 name = "frozenlist"
-version = "1.4.0"
+version = "1.5.0"
 description = "A list-like structure which implements collections.abc.MutableSequence"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:764226ceef3125e53ea2cb275000e309c0aa5464d43bd72abd661e27fffc26ab"},
-    {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d6484756b12f40003c6128bfcc3fa9f0d49a687e171186c2d85ec82e3758c559"},
-    {file = "frozenlist-1.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9ac08e601308e41eb533f232dbf6b7e4cea762f9f84f6357136eed926c15d12c"},
-    {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d081f13b095d74b67d550de04df1c756831f3b83dc9881c38985834387487f1b"},
-    {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:71932b597f9895f011f47f17d6428252fc728ba2ae6024e13c3398a087c2cdea"},
-    {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:981b9ab5a0a3178ff413bca62526bb784249421c24ad7381e39d67981be2c326"},
-    {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e41f3de4df3e80de75845d3e743b3f1c4c8613c3997a912dbf0229fc61a8b963"},
-    {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6918d49b1f90821e93069682c06ffde41829c346c66b721e65a5c62b4bab0300"},
-    {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0e5c8764c7829343d919cc2dfc587a8db01c4f70a4ebbc49abde5d4b158b007b"},
-    {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8d0edd6b1c7fb94922bf569c9b092ee187a83f03fb1a63076e7774b60f9481a8"},
-    {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e29cda763f752553fa14c68fb2195150bfab22b352572cb36c43c47bedba70eb"},
-    {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:0c7c1b47859ee2cac3846fde1c1dc0f15da6cec5a0e5c72d101e0f83dcb67ff9"},
-    {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:901289d524fdd571be1c7be054f48b1f88ce8dddcbdf1ec698b27d4b8b9e5d62"},
-    {file = "frozenlist-1.4.0-cp310-cp310-win32.whl", hash = "sha256:1a0848b52815006ea6596c395f87449f693dc419061cc21e970f139d466dc0a0"},
-    {file = "frozenlist-1.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:b206646d176a007466358aa21d85cd8600a415c67c9bd15403336c331a10d956"},
-    {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:de343e75f40e972bae1ef6090267f8260c1446a1695e77096db6cfa25e759a95"},
-    {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ad2a9eb6d9839ae241701d0918f54c51365a51407fd80f6b8289e2dfca977cc3"},
-    {file = "frozenlist-1.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bd7bd3b3830247580de99c99ea2a01416dfc3c34471ca1298bccabf86d0ff4dc"},
-    {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bdf1847068c362f16b353163391210269e4f0569a3c166bc6a9f74ccbfc7e839"},
-    {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:38461d02d66de17455072c9ba981d35f1d2a73024bee7790ac2f9e361ef1cd0c"},
-    {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5a32087d720c608f42caed0ef36d2b3ea61a9d09ee59a5142d6070da9041b8f"},
-    {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dd65632acaf0d47608190a71bfe46b209719bf2beb59507db08ccdbe712f969b"},
-    {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:261b9f5d17cac914531331ff1b1d452125bf5daa05faf73b71d935485b0c510b"},
-    {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b89ac9768b82205936771f8d2eb3ce88503b1556324c9f903e7156669f521472"},
-    {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:008eb8b31b3ea6896da16c38c1b136cb9fec9e249e77f6211d479db79a4eaf01"},
-    {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e74b0506fa5aa5598ac6a975a12aa8928cbb58e1f5ac8360792ef15de1aa848f"},
-    {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:490132667476f6781b4c9458298b0c1cddf237488abd228b0b3650e5ecba7467"},
-    {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:76d4711f6f6d08551a7e9ef28c722f4a50dd0fc204c56b4bcd95c6cc05ce6fbb"},
-    {file = "frozenlist-1.4.0-cp311-cp311-win32.whl", hash = "sha256:a02eb8ab2b8f200179b5f62b59757685ae9987996ae549ccf30f983f40602431"},
-    {file = "frozenlist-1.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:515e1abc578dd3b275d6a5114030b1330ba044ffba03f94091842852f806f1c1"},
-    {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f0ed05f5079c708fe74bf9027e95125334b6978bf07fd5ab923e9e55e5fbb9d3"},
-    {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ca265542ca427bf97aed183c1676e2a9c66942e822b14dc6e5f42e038f92a503"},
-    {file = "frozenlist-1.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:491e014f5c43656da08958808588cc6c016847b4360e327a62cb308c791bd2d9"},
-    {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17ae5cd0f333f94f2e03aaf140bb762c64783935cc764ff9c82dff626089bebf"},
-    {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e78fb68cf9c1a6aa4a9a12e960a5c9dfbdb89b3695197aa7064705662515de2"},
-    {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5655a942f5f5d2c9ed93d72148226d75369b4f6952680211972a33e59b1dfdc"},
-    {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c11b0746f5d946fecf750428a95f3e9ebe792c1ee3b1e96eeba145dc631a9672"},
-    {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e66d2a64d44d50d2543405fb183a21f76b3b5fd16f130f5c99187c3fb4e64919"},
-    {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:88f7bc0fcca81f985f78dd0fa68d2c75abf8272b1f5c323ea4a01a4d7a614efc"},
-    {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5833593c25ac59ede40ed4de6d67eb42928cca97f26feea219f21d0ed0959b79"},
-    {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:fec520865f42e5c7f050c2a79038897b1c7d1595e907a9e08e3353293ffc948e"},
-    {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:b826d97e4276750beca7c8f0f1a4938892697a6bcd8ec8217b3312dad6982781"},
-    {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ceb6ec0a10c65540421e20ebd29083c50e6d1143278746a4ef6bcf6153171eb8"},
-    {file = "frozenlist-1.4.0-cp38-cp38-win32.whl", hash = "sha256:2b8bcf994563466db019fab287ff390fffbfdb4f905fc77bc1c1d604b1c689cc"},
-    {file = "frozenlist-1.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:a6c8097e01886188e5be3e6b14e94ab365f384736aa1fca6a0b9e35bd4a30bc7"},
-    {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6c38721585f285203e4b4132a352eb3daa19121a035f3182e08e437cface44bf"},
-    {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a0c6da9aee33ff0b1a451e867da0c1f47408112b3391dd43133838339e410963"},
-    {file = "frozenlist-1.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:93ea75c050c5bb3d98016b4ba2497851eadf0ac154d88a67d7a6816206f6fa7f"},
-    {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f61e2dc5ad442c52b4887f1fdc112f97caeff4d9e6ebe78879364ac59f1663e1"},
-    {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa384489fefeb62321b238e64c07ef48398fe80f9e1e6afeff22e140e0850eef"},
-    {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:10ff5faaa22786315ef57097a279b833ecab1a0bfb07d604c9cbb1c4cdc2ed87"},
-    {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:007df07a6e3eb3e33e9a1fe6a9db7af152bbd8a185f9aaa6ece10a3529e3e1c6"},
-    {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f4f399d28478d1f604c2ff9119907af9726aed73680e5ed1ca634d377abb087"},
-    {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c5374b80521d3d3f2ec5572e05adc94601985cc526fb276d0c8574a6d749f1b3"},
-    {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ce31ae3e19f3c902de379cf1323d90c649425b86de7bbdf82871b8a2a0615f3d"},
-    {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7211ef110a9194b6042449431e08c4d80c0481e5891e58d429df5899690511c2"},
-    {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:556de4430ce324c836789fa4560ca62d1591d2538b8ceb0b4f68fb7b2384a27a"},
-    {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7645a8e814a3ee34a89c4a372011dcd817964ce8cb273c8ed6119d706e9613e3"},
-    {file = "frozenlist-1.4.0-cp39-cp39-win32.whl", hash = "sha256:19488c57c12d4e8095a922f328df3f179c820c212940a498623ed39160bc3c2f"},
-    {file = "frozenlist-1.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:6221d84d463fb110bdd7619b69cb43878a11d51cbb9394ae3105d082d5199167"},
-    {file = "frozenlist-1.4.0.tar.gz", hash = "sha256:09163bdf0b2907454042edb19f887c6d33806adc71fbd54afc14908bfdc22251"},
+    {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5b6a66c18b5b9dd261ca98dffcb826a525334b2f29e7caa54e182255c5f6a65a"},
+    {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d1b3eb7b05ea246510b43a7e53ed1653e55c2121019a97e60cad7efb881a97bb"},
+    {file = "frozenlist-1.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:15538c0cbf0e4fa11d1e3a71f823524b0c46299aed6e10ebb4c2089abd8c3bec"},
+    {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e79225373c317ff1e35f210dd5f1344ff31066ba8067c307ab60254cd3a78ad5"},
+    {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9272fa73ca71266702c4c3e2d4a28553ea03418e591e377a03b8e3659d94fa76"},
+    {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:498524025a5b8ba81695761d78c8dd7382ac0b052f34e66939c42df860b8ff17"},
+    {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:92b5278ed9d50fe610185ecd23c55d8b307d75ca18e94c0e7de328089ac5dcba"},
+    {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f3c8c1dacd037df16e85227bac13cca58c30da836c6f936ba1df0c05d046d8d"},
+    {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f2ac49a9bedb996086057b75bf93538240538c6d9b38e57c82d51f75a73409d2"},
+    {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e66cc454f97053b79c2ab09c17fbe3c825ea6b4de20baf1be28919460dd7877f"},
+    {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:5a3ba5f9a0dfed20337d3e966dc359784c9f96503674c2faf015f7fe8e96798c"},
+    {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:6321899477db90bdeb9299ac3627a6a53c7399c8cd58d25da094007402b039ab"},
+    {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:76e4753701248476e6286f2ef492af900ea67d9706a0155335a40ea21bf3b2f5"},
+    {file = "frozenlist-1.5.0-cp310-cp310-win32.whl", hash = "sha256:977701c081c0241d0955c9586ffdd9ce44f7a7795df39b9151cd9a6fd0ce4cfb"},
+    {file = "frozenlist-1.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:189f03b53e64144f90990d29a27ec4f7997d91ed3d01b51fa39d2dbe77540fd4"},
+    {file = "frozenlist-1.5.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:fd74520371c3c4175142d02a976aee0b4cb4a7cc912a60586ffd8d5929979b30"},
+    {file = "frozenlist-1.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2f3f7a0fbc219fb4455264cae4d9f01ad41ae6ee8524500f381de64ffaa077d5"},
+    {file = "frozenlist-1.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f47c9c9028f55a04ac254346e92977bf0f166c483c74b4232bee19a6697e4778"},
+    {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0996c66760924da6e88922756d99b47512a71cfd45215f3570bf1e0b694c206a"},
+    {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a2fe128eb4edeabe11896cb6af88fca5346059f6c8d807e3b910069f39157869"},
+    {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1a8ea951bbb6cacd492e3948b8da8c502a3f814f5d20935aae74b5df2b19cf3d"},
+    {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:de537c11e4aa01d37db0d403b57bd6f0546e71a82347a97c6a9f0dcc532b3a45"},
+    {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c2623347b933fcb9095841f1cc5d4ff0b278addd743e0e966cb3d460278840d"},
+    {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cee6798eaf8b1416ef6909b06f7dc04b60755206bddc599f52232606e18179d3"},
+    {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f5f9da7f5dbc00a604fe74aa02ae7c98bcede8a3b8b9666f9f86fc13993bc71a"},
+    {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:90646abbc7a5d5c7c19461d2e3eeb76eb0b204919e6ece342feb6032c9325ae9"},
+    {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:bdac3c7d9b705d253b2ce370fde941836a5f8b3c5c2b8fd70940a3ea3af7f4f2"},
+    {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:03d33c2ddbc1816237a67f66336616416e2bbb6beb306e5f890f2eb22b959cdf"},
+    {file = "frozenlist-1.5.0-cp311-cp311-win32.whl", hash = "sha256:237f6b23ee0f44066219dae14c70ae38a63f0440ce6750f868ee08775073f942"},
+    {file = "frozenlist-1.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:0cc974cc93d32c42e7b0f6cf242a6bd941c57c61b618e78b6c0a96cb72788c1d"},
+    {file = "frozenlist-1.5.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:31115ba75889723431aa9a4e77d5f398f5cf976eea3bdf61749731f62d4a4a21"},
+    {file = "frozenlist-1.5.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7437601c4d89d070eac8323f121fcf25f88674627505334654fd027b091db09d"},
+    {file = "frozenlist-1.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7948140d9f8ece1745be806f2bfdf390127cf1a763b925c4a805c603df5e697e"},
+    {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:feeb64bc9bcc6b45c6311c9e9b99406660a9c05ca8a5b30d14a78555088b0b3a"},
+    {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:683173d371daad49cffb8309779e886e59c2f369430ad28fe715f66d08d4ab1a"},
+    {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7d57d8f702221405a9d9b40f9da8ac2e4a1a8b5285aac6100f3393675f0a85ee"},
+    {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30c72000fbcc35b129cb09956836c7d7abf78ab5416595e4857d1cae8d6251a6"},
+    {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:000a77d6034fbad9b6bb880f7ec073027908f1b40254b5d6f26210d2dab1240e"},
+    {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5d7f5a50342475962eb18b740f3beecc685a15b52c91f7d975257e13e029eca9"},
+    {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:87f724d055eb4785d9be84e9ebf0f24e392ddfad00b3fe036e43f489fafc9039"},
+    {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:6e9080bb2fb195a046e5177f10d9d82b8a204c0736a97a153c2466127de87784"},
+    {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9b93d7aaa36c966fa42efcaf716e6b3900438632a626fb09c049f6a2f09fc631"},
+    {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:52ef692a4bc60a6dd57f507429636c2af8b6046db8b31b18dac02cbc8f507f7f"},
+    {file = "frozenlist-1.5.0-cp312-cp312-win32.whl", hash = "sha256:29d94c256679247b33a3dc96cce0f93cbc69c23bf75ff715919332fdbb6a32b8"},
+    {file = "frozenlist-1.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:8969190d709e7c48ea386db202d708eb94bdb29207a1f269bab1196ce0dcca1f"},
+    {file = "frozenlist-1.5.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:7a1a048f9215c90973402e26c01d1cff8a209e1f1b53f72b95c13db61b00f953"},
+    {file = "frozenlist-1.5.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:dd47a5181ce5fcb463b5d9e17ecfdb02b678cca31280639255ce9d0e5aa67af0"},
+    {file = "frozenlist-1.5.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1431d60b36d15cda188ea222033eec8e0eab488f39a272461f2e6d9e1a8e63c2"},
+    {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6482a5851f5d72767fbd0e507e80737f9c8646ae7fd303def99bfe813f76cf7f"},
+    {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:44c49271a937625619e862baacbd037a7ef86dd1ee215afc298a417ff3270608"},
+    {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:12f78f98c2f1c2429d42e6a485f433722b0061d5c0b0139efa64f396efb5886b"},
+    {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce3aa154c452d2467487765e3adc730a8c153af77ad84096bc19ce19a2400840"},
+    {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b7dc0c4338e6b8b091e8faf0db3168a37101943e687f373dce00959583f7439"},
+    {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:45e0896250900b5aa25180f9aec243e84e92ac84bd4a74d9ad4138ef3f5c97de"},
+    {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:561eb1c9579d495fddb6da8959fd2a1fca2c6d060d4113f5844b433fc02f2641"},
+    {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:df6e2f325bfee1f49f81aaac97d2aa757c7646534a06f8f577ce184afe2f0a9e"},
+    {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:140228863501b44b809fb39ec56b5d4071f4d0aa6d216c19cbb08b8c5a7eadb9"},
+    {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7707a25d6a77f5d27ea7dc7d1fc608aa0a478193823f88511ef5e6b8a48f9d03"},
+    {file = "frozenlist-1.5.0-cp313-cp313-win32.whl", hash = "sha256:31a9ac2b38ab9b5a8933b693db4939764ad3f299fcaa931a3e605bc3460e693c"},
+    {file = "frozenlist-1.5.0-cp313-cp313-win_amd64.whl", hash = "sha256:11aabdd62b8b9c4b84081a3c246506d1cddd2dd93ff0ad53ede5defec7886b28"},
+    {file = "frozenlist-1.5.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:dd94994fc91a6177bfaafd7d9fd951bc8689b0a98168aa26b5f543868548d3ca"},
+    {file = "frozenlist-1.5.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2d0da8bbec082bf6bf18345b180958775363588678f64998c2b7609e34719b10"},
+    {file = "frozenlist-1.5.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:73f2e31ea8dd7df61a359b731716018c2be196e5bb3b74ddba107f694fbd7604"},
+    {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:828afae9f17e6de596825cf4228ff28fbdf6065974e5ac1410cecc22f699d2b3"},
+    {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f1577515d35ed5649d52ab4319db757bb881ce3b2b796d7283e6634d99ace307"},
+    {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2150cc6305a2c2ab33299453e2968611dacb970d2283a14955923062c8d00b10"},
+    {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a72b7a6e3cd2725eff67cd64c8f13335ee18fc3c7befc05aed043d24c7b9ccb9"},
+    {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c16d2fa63e0800723139137d667e1056bee1a1cf7965153d2d104b62855e9b99"},
+    {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:17dcc32fc7bda7ce5875435003220a457bcfa34ab7924a49a1c19f55b6ee185c"},
+    {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:97160e245ea33d8609cd2b8fd997c850b56db147a304a262abc2b3be021a9171"},
+    {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:f1e6540b7fa044eee0bb5111ada694cf3dc15f2b0347ca125ee9ca984d5e9e6e"},
+    {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:91d6c171862df0a6c61479d9724f22efb6109111017c87567cfeb7b5d1449fdf"},
+    {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c1fac3e2ace2eb1052e9f7c7db480818371134410e1f5c55d65e8f3ac6d1407e"},
+    {file = "frozenlist-1.5.0-cp38-cp38-win32.whl", hash = "sha256:b97f7b575ab4a8af9b7bc1d2ef7f29d3afee2226bd03ca3875c16451ad5a7723"},
+    {file = "frozenlist-1.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:374ca2dabdccad8e2a76d40b1d037f5bd16824933bf7bcea3e59c891fd4a0923"},
+    {file = "frozenlist-1.5.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:9bbcdfaf4af7ce002694a4e10a0159d5a8d20056a12b05b45cea944a4953f972"},
+    {file = "frozenlist-1.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1893f948bf6681733aaccf36c5232c231e3b5166d607c5fa77773611df6dc336"},
+    {file = "frozenlist-1.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2b5e23253bb709ef57a8e95e6ae48daa9ac5f265637529e4ce6b003a37b2621f"},
+    {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f253985bb515ecd89629db13cb58d702035ecd8cfbca7d7a7e29a0e6d39af5f"},
+    {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:04a5c6babd5e8fb7d3c871dc8b321166b80e41b637c31a995ed844a6139942b6"},
+    {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a9fe0f1c29ba24ba6ff6abf688cb0b7cf1efab6b6aa6adc55441773c252f7411"},
+    {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:226d72559fa19babe2ccd920273e767c96a49b9d3d38badd7c91a0fdeda8ea08"},
+    {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15b731db116ab3aedec558573c1a5eec78822b32292fe4f2f0345b7f697745c2"},
+    {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:366d8f93e3edfe5a918c874702f78faac300209a4d5bf38352b2c1bdc07a766d"},
+    {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1b96af8c582b94d381a1c1f51ffaedeb77c821c690ea5f01da3d70a487dd0a9b"},
+    {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:c03eff4a41bd4e38415cbed054bbaff4a075b093e2394b6915dca34a40d1e38b"},
+    {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:50cf5e7ee9b98f22bdecbabf3800ae78ddcc26e4a435515fc72d97903e8488e0"},
+    {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1e76bfbc72353269c44e0bc2cfe171900fbf7f722ad74c9a7b638052afe6a00c"},
+    {file = "frozenlist-1.5.0-cp39-cp39-win32.whl", hash = "sha256:666534d15ba8f0fda3f53969117383d5dc021266b3c1a42c9ec4855e4b58b9d3"},
+    {file = "frozenlist-1.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:5c28f4b5dbef8a0d8aad0d4de24d1e9e981728628afaf4ea0792f5d0939372f0"},
+    {file = "frozenlist-1.5.0-py3-none-any.whl", hash = "sha256:d994863bba198a4a518b467bb971c56e1db3f180a25c6cf7bb1949c267f748c3"},
+    {file = "frozenlist-1.5.0.tar.gz", hash = "sha256:81d5af29e61b9c8348e876d442253723928dce6433e0e76cd925cd83f1b4b817"},
 ]
 
 [[package]]
@@ -2295,109 +2335,131 @@ files = [
 
 [[package]]
 name = "pydantic"
-version = "2.7.1"
+version = "2.10.4"
 description = "Data validation using Python type hints"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pydantic-2.7.1-py3-none-any.whl", hash = "sha256:e029badca45266732a9a79898a15ae2e8b14840b1eabbb25844be28f0b33f3d5"},
-    {file = "pydantic-2.7.1.tar.gz", hash = "sha256:e9dbb5eada8abe4d9ae5f46b9939aead650cd2b68f249bb3a8139dbe125803cc"},
+    {file = "pydantic-2.10.4-py3-none-any.whl", hash = "sha256:597e135ea68be3a37552fb524bc7d0d66dcf93d395acd93a00682f1efcb8ee3d"},
+    {file = "pydantic-2.10.4.tar.gz", hash = "sha256:82f12e9723da6de4fe2ba888b5971157b3be7ad914267dea8f05f82b28254f06"},
 ]
 
 [package.dependencies]
-annotated-types = ">=0.4.0"
-pydantic-core = "2.18.2"
-typing-extensions = ">=4.6.1"
+annotated-types = ">=0.6.0"
+pydantic-core = "2.27.2"
+typing-extensions = ">=4.12.2"
 
 [package.extras]
 email = ["email-validator (>=2.0.0)"]
+timezone = ["tzdata"]
 
 [[package]]
 name = "pydantic-core"
-version = "2.18.2"
+version = "2.27.2"
 description = "Core functionality for Pydantic validation and serialization"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pydantic_core-2.18.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:9e08e867b306f525802df7cd16c44ff5ebbe747ff0ca6cf3fde7f36c05a59a81"},
-    {file = "pydantic_core-2.18.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f0a21cbaa69900cbe1a2e7cad2aa74ac3cf21b10c3efb0fa0b80305274c0e8a2"},
-    {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0680b1f1f11fda801397de52c36ce38ef1c1dc841a0927a94f226dea29c3ae3d"},
-    {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:95b9d5e72481d3780ba3442eac863eae92ae43a5f3adb5b4d0a1de89d42bb250"},
-    {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fcf5cd9c4b655ad666ca332b9a081112cd7a58a8b5a6ca7a3104bc950f2038"},
-    {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b5155ff768083cb1d62f3e143b49a8a3432e6789a3abee8acd005c3c7af1c74"},
-    {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:553ef617b6836fc7e4df130bb851e32fe357ce36336d897fd6646d6058d980af"},
-    {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b89ed9eb7d616ef5714e5590e6cf7f23b02d0d539767d33561e3675d6f9e3857"},
-    {file = "pydantic_core-2.18.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:75f7e9488238e920ab6204399ded280dc4c307d034f3924cd7f90a38b1829563"},
-    {file = "pydantic_core-2.18.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ef26c9e94a8c04a1b2924149a9cb081836913818e55681722d7f29af88fe7b38"},
-    {file = "pydantic_core-2.18.2-cp310-none-win32.whl", hash = "sha256:182245ff6b0039e82b6bb585ed55a64d7c81c560715d1bad0cbad6dfa07b4027"},
-    {file = "pydantic_core-2.18.2-cp310-none-win_amd64.whl", hash = "sha256:e23ec367a948b6d812301afc1b13f8094ab7b2c280af66ef450efc357d2ae543"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:219da3f096d50a157f33645a1cf31c0ad1fe829a92181dd1311022f986e5fbe3"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cc1cfd88a64e012b74e94cd00bbe0f9c6df57049c97f02bb07d39e9c852e19a4"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05b7133a6e6aeb8df37d6f413f7705a37ab4031597f64ab56384c94d98fa0e90"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:224c421235f6102e8737032483f43c1a8cfb1d2f45740c44166219599358c2cd"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b14d82cdb934e99dda6d9d60dc84a24379820176cc4a0d123f88df319ae9c150"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2728b01246a3bba6de144f9e3115b532ee44bd6cf39795194fb75491824a1413"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:470b94480bb5ee929f5acba6995251ada5e059a5ef3e0dfc63cca287283ebfa6"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:997abc4df705d1295a42f95b4eec4950a37ad8ae46d913caeee117b6b198811c"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:75250dbc5290e3f1a0f4618db35e51a165186f9034eff158f3d490b3fed9f8a0"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4456f2dca97c425231d7315737d45239b2b51a50dc2b6f0c2bb181fce6207664"},
-    {file = "pydantic_core-2.18.2-cp311-none-win32.whl", hash = "sha256:269322dcc3d8bdb69f054681edff86276b2ff972447863cf34c8b860f5188e2e"},
-    {file = "pydantic_core-2.18.2-cp311-none-win_amd64.whl", hash = "sha256:800d60565aec896f25bc3cfa56d2277d52d5182af08162f7954f938c06dc4ee3"},
-    {file = "pydantic_core-2.18.2-cp311-none-win_arm64.whl", hash = "sha256:1404c69d6a676245199767ba4f633cce5f4ad4181f9d0ccb0577e1f66cf4c46d"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:fb2bd7be70c0fe4dfd32c951bc813d9fe6ebcbfdd15a07527796c8204bd36242"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6132dd3bd52838acddca05a72aafb6eab6536aa145e923bb50f45e78b7251043"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7d904828195733c183d20a54230c0df0eb46ec746ea1a666730787353e87182"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c9bd70772c720142be1020eac55f8143a34ec9f82d75a8e7a07852023e46617f"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2b8ed04b3582771764538f7ee7001b02e1170223cf9b75dff0bc698fadb00cf3"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e6dac87ddb34aaec85f873d737e9d06a3555a1cc1a8e0c44b7f8d5daeb89d86f"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ca4ae5a27ad7a4ee5170aebce1574b375de390bc01284f87b18d43a3984df72"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:886eec03591b7cf058467a70a87733b35f44707bd86cf64a615584fd72488b7c"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ca7b0c1f1c983e064caa85f3792dd2fe3526b3505378874afa84baf662e12241"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4b4356d3538c3649337df4074e81b85f0616b79731fe22dd11b99499b2ebbdf3"},
-    {file = "pydantic_core-2.18.2-cp312-none-win32.whl", hash = "sha256:8b172601454f2d7701121bbec3425dd71efcb787a027edf49724c9cefc14c038"},
-    {file = "pydantic_core-2.18.2-cp312-none-win_amd64.whl", hash = "sha256:b1bd7e47b1558ea872bd16c8502c414f9e90dcf12f1395129d7bb42a09a95438"},
-    {file = "pydantic_core-2.18.2-cp312-none-win_arm64.whl", hash = "sha256:98758d627ff397e752bc339272c14c98199c613f922d4a384ddc07526c86a2ec"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:9fdad8e35f278b2c3eb77cbdc5c0a49dada440657bf738d6905ce106dc1de439"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1d90c3265ae107f91a4f279f4d6f6f1d4907ac76c6868b27dc7fb33688cfb347"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:390193c770399861d8df9670fb0d1874f330c79caaca4642332df7c682bf6b91"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:82d5d4d78e4448683cb467897fe24e2b74bb7b973a541ea1dcfec1d3cbce39fb"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4774f3184d2ef3e14e8693194f661dea5a4d6ca4e3dc8e39786d33a94865cefd"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d4d938ec0adf5167cb335acb25a4ee69a8107e4984f8fbd2e897021d9e4ca21b"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e0e8b1be28239fc64a88a8189d1df7fad8be8c1ae47fcc33e43d4be15f99cc70"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:868649da93e5a3d5eacc2b5b3b9235c98ccdbfd443832f31e075f54419e1b96b"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:78363590ef93d5d226ba21a90a03ea89a20738ee5b7da83d771d283fd8a56761"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:852e966fbd035a6468fc0a3496589b45e2208ec7ca95c26470a54daed82a0788"},
-    {file = "pydantic_core-2.18.2-cp38-none-win32.whl", hash = "sha256:6a46e22a707e7ad4484ac9ee9f290f9d501df45954184e23fc29408dfad61350"},
-    {file = "pydantic_core-2.18.2-cp38-none-win_amd64.whl", hash = "sha256:d91cb5ea8b11607cc757675051f61b3d93f15eca3cefb3e6c704a5d6e8440f4e"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:ae0a8a797a5e56c053610fa7be147993fe50960fa43609ff2a9552b0e07013e8"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:042473b6280246b1dbf530559246f6842b56119c2926d1e52b631bdc46075f2a"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a388a77e629b9ec814c1b1e6b3b595fe521d2cdc625fcca26fbc2d44c816804"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e25add29b8f3b233ae90ccef2d902d0ae0432eb0d45370fe315d1a5cf231004b"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f459a5ce8434614dfd39bbebf1041952ae01da6bed9855008cb33b875cb024c0"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eff2de745698eb46eeb51193a9f41d67d834d50e424aef27df2fcdee1b153845"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8309f67285bdfe65c372ea3722b7a5642680f3dba538566340a9d36e920b5f0"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f93a8a2e3938ff656a7c1bc57193b1319960ac015b6e87d76c76bf14fe0244b4"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:22057013c8c1e272eb8d0eebc796701167d8377441ec894a8fed1af64a0bf399"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cfeecd1ac6cc1fb2692c3d5110781c965aabd4ec5d32799773ca7b1456ac636b"},
-    {file = "pydantic_core-2.18.2-cp39-none-win32.whl", hash = "sha256:0d69b4c2f6bb3e130dba60d34c0845ba31b69babdd3f78f7c0c8fae5021a253e"},
-    {file = "pydantic_core-2.18.2-cp39-none-win_amd64.whl", hash = "sha256:d9319e499827271b09b4e411905b24a426b8fb69464dfa1696258f53a3334641"},
-    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a1874c6dd4113308bd0eb568418e6114b252afe44319ead2b4081e9b9521fe75"},
-    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:ccdd111c03bfd3666bd2472b674c6899550e09e9f298954cfc896ab92b5b0e6d"},
-    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e18609ceaa6eed63753037fc06ebb16041d17d28199ae5aba0052c51449650a9"},
-    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e5c584d357c4e2baf0ff7baf44f4994be121e16a2c88918a5817331fc7599d7"},
-    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43f0f463cf89ace478de71a318b1b4f05ebc456a9b9300d027b4b57c1a2064fb"},
-    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:e1b395e58b10b73b07b7cf740d728dd4ff9365ac46c18751bf8b3d8cca8f625a"},
-    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:0098300eebb1c837271d3d1a2cd2911e7c11b396eac9661655ee524a7f10587b"},
-    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:36789b70d613fbac0a25bb07ab3d9dba4d2e38af609c020cf4d888d165ee0bf3"},
-    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3f9a801e7c8f1ef8718da265bba008fa121243dfe37c1cea17840b0944dfd72c"},
-    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:3a6515ebc6e69d85502b4951d89131ca4e036078ea35533bb76327f8424531ce"},
-    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20aca1e2298c56ececfd8ed159ae4dde2df0781988c97ef77d5c16ff4bd5b400"},
-    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:223ee893d77a310a0391dca6df00f70bbc2f36a71a895cecd9a0e762dc37b349"},
-    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2334ce8c673ee93a1d6a65bd90327588387ba073c17e61bf19b4fd97d688d63c"},
-    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:cbca948f2d14b09d20268cda7b0367723d79063f26c4ffc523af9042cad95592"},
-    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b3ef08e20ec49e02d5c6717a91bb5af9b20f1805583cb0adfe9ba2c6b505b5ae"},
-    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c6fdc8627910eed0c01aed6a390a252fe3ea6d472ee70fdde56273f198938374"},
-    {file = "pydantic_core-2.18.2.tar.gz", hash = "sha256:2e29d20810dfc3043ee13ac7d9e25105799817683348823f305ab3f349b9386e"},
+    {file = "pydantic_core-2.27.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2d367ca20b2f14095a8f4fa1210f5a7b78b8a20009ecced6b12818f455b1e9fa"},
+    {file = "pydantic_core-2.27.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:491a2b73db93fab69731eaee494f320faa4e093dbed776be1a829c2eb222c34c"},
+    {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7969e133a6f183be60e9f6f56bfae753585680f3b7307a8e555a948d443cc05a"},
+    {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3de9961f2a346257caf0aa508a4da705467f53778e9ef6fe744c038119737ef5"},
+    {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e2bb4d3e5873c37bb3dd58714d4cd0b0e6238cebc4177ac8fe878f8b3aa8e74c"},
+    {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:280d219beebb0752699480fe8f1dc61ab6615c2046d76b7ab7ee38858de0a4e7"},
+    {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47956ae78b6422cbd46f772f1746799cbb862de838fd8d1fbd34a82e05b0983a"},
+    {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:14d4a5c49d2f009d62a2a7140d3064f686d17a5d1a268bc641954ba181880236"},
+    {file = "pydantic_core-2.27.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:337b443af21d488716f8d0b6164de833e788aa6bd7e3a39c005febc1284f4962"},
+    {file = "pydantic_core-2.27.2-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:03d0f86ea3184a12f41a2d23f7ccb79cdb5a18e06993f8a45baa8dfec746f0e9"},
+    {file = "pydantic_core-2.27.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7041c36f5680c6e0f08d922aed302e98b3745d97fe1589db0a3eebf6624523af"},
+    {file = "pydantic_core-2.27.2-cp310-cp310-win32.whl", hash = "sha256:50a68f3e3819077be2c98110c1f9dcb3817e93f267ba80a2c05bb4f8799e2ff4"},
+    {file = "pydantic_core-2.27.2-cp310-cp310-win_amd64.whl", hash = "sha256:e0fd26b16394ead34a424eecf8a31a1f5137094cabe84a1bcb10fa6ba39d3d31"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:8e10c99ef58cfdf2a66fc15d66b16c4a04f62bca39db589ae8cba08bc55331bc"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:26f32e0adf166a84d0cb63be85c562ca8a6fa8de28e5f0d92250c6b7e9e2aff7"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c19d1ea0673cd13cc2f872f6c9ab42acc4e4f492a7ca9d3795ce2b112dd7e15"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5e68c4446fe0810e959cdff46ab0a41ce2f2c86d227d96dc3847af0ba7def306"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d9640b0059ff4f14d1f37321b94061c6db164fbe49b334b31643e0528d100d99"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:40d02e7d45c9f8af700f3452f329ead92da4c5f4317ca9b896de7ce7199ea459"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c1fd185014191700554795c99b347d64f2bb637966c4cfc16998a0ca700d048"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d81d2068e1c1228a565af076598f9e7451712700b673de8f502f0334f281387d"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1a4207639fb02ec2dbb76227d7c751a20b1a6b4bc52850568e52260cae64ca3b"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:3de3ce3c9ddc8bbd88f6e0e304dea0e66d843ec9de1b0042b0911c1663ffd474"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:30c5f68ded0c36466acede341551106821043e9afaad516adfb6e8fa80a4e6a6"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-win32.whl", hash = "sha256:c70c26d2c99f78b125a3459f8afe1aed4d9687c24fd677c6a4436bc042e50d6c"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-win_amd64.whl", hash = "sha256:08e125dbdc505fa69ca7d9c499639ab6407cfa909214d500897d02afb816e7cc"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-win_arm64.whl", hash = "sha256:26f0d68d4b235a2bae0c3fc585c585b4ecc51382db0e3ba402a22cbc440915e4"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:9e0c8cfefa0ef83b4da9588448b6d8d2a2bf1a53c3f1ae5fca39eb3061e2f0b0"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:83097677b8e3bd7eaa6775720ec8e0405f1575015a463285a92bfdfe254529ef"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:172fce187655fece0c90d90a678424b013f8fbb0ca8b036ac266749c09438cb7"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:519f29f5213271eeeeb3093f662ba2fd512b91c5f188f3bb7b27bc5973816934"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:05e3a55d124407fffba0dd6b0c0cd056d10e983ceb4e5dbd10dda135c31071d6"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c3ed807c7b91de05e63930188f19e921d1fe90de6b4f5cd43ee7fcc3525cb8c"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fb4aadc0b9a0c063206846d603b92030eb6f03069151a625667f982887153e2"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:28ccb213807e037460326424ceb8b5245acb88f32f3d2777427476e1b32c48c4"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:de3cd1899e2c279b140adde9357c4495ed9d47131b4a4eaff9052f23398076b3"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:220f892729375e2d736b97d0e51466252ad84c51857d4d15f5e9692f9ef12be4"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a0fcd29cd6b4e74fe8ddd2c90330fd8edf2e30cb52acda47f06dd615ae72da57"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-win32.whl", hash = "sha256:1e2cb691ed9834cd6a8be61228471d0a503731abfb42f82458ff27be7b2186fc"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-win_amd64.whl", hash = "sha256:cc3f1a99a4f4f9dd1de4fe0312c114e740b5ddead65bb4102884b384c15d8bc9"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-win_arm64.whl", hash = "sha256:3911ac9284cd8a1792d3cb26a2da18f3ca26c6908cc434a18f730dc0db7bfa3b"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:7d14bd329640e63852364c306f4d23eb744e0f8193148d4044dd3dacdaacbd8b"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:82f91663004eb8ed30ff478d77c4d1179b3563df6cdb15c0817cd1cdaf34d154"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71b24c7d61131bb83df10cc7e687433609963a944ccf45190cfc21e0887b08c9"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fa8e459d4954f608fa26116118bb67f56b93b209c39b008277ace29937453dc9"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ce8918cbebc8da707ba805b7fd0b382816858728ae7fe19a942080c24e5b7cd1"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eda3f5c2a021bbc5d976107bb302e0131351c2ba54343f8a496dc8783d3d3a6a"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd8086fa684c4775c27f03f062cbb9eaa6e17f064307e86b21b9e0abc9c0f02e"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8d9b3388db186ba0c099a6d20f0604a44eabdeef1777ddd94786cdae158729e4"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:7a66efda2387de898c8f38c0cf7f14fca0b51a8ef0b24bfea5849f1b3c95af27"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:18a101c168e4e092ab40dbc2503bdc0f62010e95d292b27827871dc85450d7ee"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ba5dd002f88b78a4215ed2f8ddbdf85e8513382820ba15ad5ad8955ce0ca19a1"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-win32.whl", hash = "sha256:1ebaf1d0481914d004a573394f4be3a7616334be70261007e47c2a6fe7e50130"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-win_amd64.whl", hash = "sha256:953101387ecf2f5652883208769a79e48db18c6df442568a0b5ccd8c2723abee"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-win_arm64.whl", hash = "sha256:ac4dbfd1691affb8f48c2c13241a2e3b60ff23247cbcf981759c768b6633cf8b"},
+    {file = "pydantic_core-2.27.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:d3e8d504bdd3f10835468f29008d72fc8359d95c9c415ce6e767203db6127506"},
+    {file = "pydantic_core-2.27.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:521eb9b7f036c9b6187f0b47318ab0d7ca14bd87f776240b90b21c1f4f149320"},
+    {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:85210c4d99a0114f5a9481b44560d7d1e35e32cc5634c656bc48e590b669b145"},
+    {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d716e2e30c6f140d7560ef1538953a5cd1a87264c737643d481f2779fc247fe1"},
+    {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f66d89ba397d92f840f8654756196d93804278457b5fbede59598a1f9f90b228"},
+    {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:669e193c1c576a58f132e3158f9dfa9662969edb1a250c54d8fa52590045f046"},
+    {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdbe7629b996647b99c01b37f11170a57ae675375b14b8c13b8518b8320ced5"},
+    {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d262606bf386a5ba0b0af3b97f37c83d7011439e3dc1a9298f21efb292e42f1a"},
+    {file = "pydantic_core-2.27.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:cabb9bcb7e0d97f74df8646f34fc76fbf793b7f6dc2438517d7a9e50eee4f14d"},
+    {file = "pydantic_core-2.27.2-cp38-cp38-musllinux_1_1_armv7l.whl", hash = "sha256:d2d63f1215638d28221f664596b1ccb3944f6e25dd18cd3b86b0a4c408d5ebb9"},
+    {file = "pydantic_core-2.27.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:bca101c00bff0adb45a833f8451b9105d9df18accb8743b08107d7ada14bd7da"},
+    {file = "pydantic_core-2.27.2-cp38-cp38-win32.whl", hash = "sha256:f6f8e111843bbb0dee4cb6594cdc73e79b3329b526037ec242a3e49012495b3b"},
+    {file = "pydantic_core-2.27.2-cp38-cp38-win_amd64.whl", hash = "sha256:fd1aea04935a508f62e0d0ef1f5ae968774a32afc306fb8545e06f5ff5cdf3ad"},
+    {file = "pydantic_core-2.27.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:c10eb4f1659290b523af58fa7cffb452a61ad6ae5613404519aee4bfbf1df993"},
+    {file = "pydantic_core-2.27.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ef592d4bad47296fb11f96cd7dc898b92e795032b4894dfb4076cfccd43a9308"},
+    {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c61709a844acc6bf0b7dce7daae75195a10aac96a596ea1b776996414791ede4"},
+    {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:42c5f762659e47fdb7b16956c71598292f60a03aa92f8b6351504359dbdba6cf"},
+    {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4c9775e339e42e79ec99c441d9730fccf07414af63eac2f0e48e08fd38a64d76"},
+    {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:57762139821c31847cfb2df63c12f725788bd9f04bc2fb392790959b8f70f118"},
+    {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d1e85068e818c73e048fe28cfc769040bb1f475524f4745a5dc621f75ac7630"},
+    {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:097830ed52fd9e427942ff3b9bc17fab52913b2f50f2880dc4a5611446606a54"},
+    {file = "pydantic_core-2.27.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:044a50963a614ecfae59bb1eaf7ea7efc4bc62f49ed594e18fa1e5d953c40e9f"},
+    {file = "pydantic_core-2.27.2-cp39-cp39-musllinux_1_1_armv7l.whl", hash = "sha256:4e0b4220ba5b40d727c7f879eac379b822eee5d8fff418e9d3381ee45b3b0362"},
+    {file = "pydantic_core-2.27.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5e4f4bb20d75e9325cc9696c6802657b58bc1dbbe3022f32cc2b2b632c3fbb96"},
+    {file = "pydantic_core-2.27.2-cp39-cp39-win32.whl", hash = "sha256:cca63613e90d001b9f2f9a9ceb276c308bfa2a43fafb75c8031c4f66039e8c6e"},
+    {file = "pydantic_core-2.27.2-cp39-cp39-win_amd64.whl", hash = "sha256:77d1bca19b0f7021b3a982e6f903dcd5b2b06076def36a652e3907f596e29f67"},
+    {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:2bf14caea37e91198329b828eae1618c068dfb8ef17bb33287a7ad4b61ac314e"},
+    {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:b0cb791f5b45307caae8810c2023a184c74605ec3bcbb67d13846c28ff731ff8"},
+    {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:688d3fd9fcb71f41c4c015c023d12a79d1c4c0732ec9eb35d96e3388a120dcf3"},
+    {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d591580c34f4d731592f0e9fe40f9cc1b430d297eecc70b962e93c5c668f15f"},
+    {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:82f986faf4e644ffc189a7f1aafc86e46ef70372bb153e7001e8afccc6e54133"},
+    {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:bec317a27290e2537f922639cafd54990551725fc844249e64c523301d0822fc"},
+    {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:0296abcb83a797db256b773f45773da397da75a08f5fcaef41f2044adec05f50"},
+    {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:0d75070718e369e452075a6017fbf187f788e17ed67a3abd47fa934d001863d9"},
+    {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:7e17b560be3c98a8e3aa66ce828bdebb9e9ac6ad5466fba92eb74c4c95cb1151"},
+    {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c33939a82924da9ed65dab5a65d427205a73181d8098e79b6b426bdf8ad4e656"},
+    {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:00bad2484fa6bda1e216e7345a798bd37c68fb2d97558edd584942aa41b7d278"},
+    {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c817e2b40aba42bac6f457498dacabc568c3b7a986fc9ba7c8d9d260b71485fb"},
+    {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:251136cdad0cb722e93732cb45ca5299fb56e1344a833640bf93b2803f8d1bfd"},
+    {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d2088237af596f0a524d3afc39ab3b036e8adb054ee57cbb1dcf8e09da5b29cc"},
+    {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:d4041c0b966a84b4ae7a09832eb691a35aec90910cd2dbe7a208de59be77965b"},
+    {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:8083d4e875ebe0b864ffef72a4304827015cff328a1be6e22cc850753bfb122b"},
+    {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f141ee28a0ad2123b6611b6ceff018039df17f32ada8b534e6aa039545a3efb2"},
+    {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7d0c8399fcc1848491f00e0314bd59fb34a9c008761bcb422a057670c3f65e35"},
+    {file = "pydantic_core-2.27.2.tar.gz", hash = "sha256:eb026e5a4c1fee05726072337ff51d1efb6f59090b7da90d30ea58625b1ffb39"},
 ]
 
 [package.dependencies]
@@ -2638,6 +2700,20 @@ files = [
 [package.dependencies]
 six = ">=1.5"
 
+[[package]]
+name = "python-dotenv"
+version = "1.0.1"
+description = "Read key-value pairs from a .env file and set them as environment variables"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"},
+    {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"},
+]
+
+[package.extras]
+cli = ["click (>=5.0)"]
+
 [[package]]
 name = "pytz"
 version = "2024.1"
@@ -2992,17 +3068,18 @@ mpmath = ">=0.19"
 
 [[package]]
 name = "testcontainers"
-version = "4.8.1"
+version = "4.9.0"
 description = "Python library for throwaway instances of anything that can run in a Docker container"
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "testcontainers-4.8.1-py3-none-any.whl", hash = "sha256:d8ae43e8fe34060fcd5c3f494e0b7652b7774beabe94568a2283d0881e94d489"},
-    {file = "testcontainers-4.8.1.tar.gz", hash = "sha256:5ded4820b7227ad526857eb3caaafcabce1bbac05d22ad194849b136ffae3cb0"},
+    {file = "testcontainers-4.9.0-py3-none-any.whl", hash = "sha256:c6fee929990972c40bf6b91b7072c94064ff3649b405a14fde0274c8b2479d32"},
+    {file = "testcontainers-4.9.0.tar.gz", hash = "sha256:2cd6af070109ff68c1ab5389dc89c86c2dc3ab30a21ca734b2cb8f0f80ad479e"},
 ]
 
 [package.dependencies]
 docker = "*"
+python-dotenv = "*"
 typing-extensions = "*"
 urllib3 = "*"
 wrapt = "*"
@@ -3160,13 +3237,13 @@ files = [
 
 [[package]]
 name = "typing-extensions"
-version = "4.6.1"
-description = "Backported and Experimental Type Hints for Python 3.7+"
+version = "4.12.2"
+description = "Backported and Experimental Type Hints for Python 3.8+"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "typing_extensions-4.6.1-py3-none-any.whl", hash = "sha256:6bac751f4789b135c43228e72de18637e9a6c29d12777023a703fd1a6858469f"},
-    {file = "typing_extensions-4.6.1.tar.gz", hash = "sha256:558bc0c4145f01e6405f4a5fdbd82050bd221b119f4bf72a961a1cfd471349d6"},
+    {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
+    {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
 ]
 
 [[package]]
@@ -3309,16 +3386,6 @@ files = [
     {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"},
     {file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"},
     {file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"},
-    {file = "wrapt-1.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ecee4132c6cd2ce5308e21672015ddfed1ff975ad0ac8d27168ea82e71413f55"},
-    {file = "wrapt-1.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2020f391008ef874c6d9e208b24f28e31bcb85ccff4f335f15a3251d222b92d9"},
-    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2feecf86e1f7a86517cab34ae6c2f081fd2d0dac860cb0c0ded96d799d20b335"},
-    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:240b1686f38ae665d1b15475966fe0472f78e71b1b4903c143a842659c8e4cb9"},
-    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9008dad07d71f68487c91e96579c8567c98ca4c3881b9b113bc7b33e9fd78b8"},
-    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6447e9f3ba72f8e2b985a1da758767698efa72723d5b59accefd716e9e8272bf"},
-    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:acae32e13a4153809db37405f5eba5bac5fbe2e2ba61ab227926a22901051c0a"},
-    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49ef582b7a1152ae2766557f0550a9fcbf7bbd76f43fbdc94dd3bf07cc7168be"},
-    {file = "wrapt-1.14.1-cp311-cp311-win32.whl", hash = "sha256:358fe87cc899c6bb0ddc185bf3dbfa4ba646f05b1b0b9b5a27c2cb92c2cea204"},
-    {file = "wrapt-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:26046cd03936ae745a502abf44dac702a5e6880b2b01c29aea8ddf3353b68224"},
     {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"},
     {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"},
     {file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"},
@@ -3475,54 +3542,108 @@ propcache = ">=0.2.0"
 
 [[package]]
 name = "zstandard"
-version = "0.21.0"
+version = "0.23.0"
 description = "Zstandard bindings for Python"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "zstandard-0.21.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:649a67643257e3b2cff1c0a73130609679a5673bf389564bc6d4b164d822a7ce"},
-    {file = "zstandard-0.21.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:144a4fe4be2e747bf9c646deab212666e39048faa4372abb6a250dab0f347a29"},
-    {file = "zstandard-0.21.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b72060402524ab91e075881f6b6b3f37ab715663313030d0ce983da44960a86f"},
-    {file = "zstandard-0.21.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8257752b97134477fb4e413529edaa04fc0457361d304c1319573de00ba796b1"},
-    {file = "zstandard-0.21.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:c053b7c4cbf71cc26808ed67ae955836232f7638444d709bfc302d3e499364fa"},
-    {file = "zstandard-0.21.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2769730c13638e08b7a983b32cb67775650024632cd0476bf1ba0e6360f5ac7d"},
-    {file = "zstandard-0.21.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7d3bc4de588b987f3934ca79140e226785d7b5e47e31756761e48644a45a6766"},
-    {file = "zstandard-0.21.0-cp310-cp310-win32.whl", hash = "sha256:67829fdb82e7393ca68e543894cd0581a79243cc4ec74a836c305c70a5943f07"},
-    {file = "zstandard-0.21.0-cp310-cp310-win_amd64.whl", hash = "sha256:e6048a287f8d2d6e8bc67f6b42a766c61923641dd4022b7fd3f7439e17ba5a4d"},
-    {file = "zstandard-0.21.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7f2afab2c727b6a3d466faee6974a7dad0d9991241c498e7317e5ccf53dbc766"},
-    {file = "zstandard-0.21.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ff0852da2abe86326b20abae912d0367878dd0854b8931897d44cfeb18985472"},
-    {file = "zstandard-0.21.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d12fa383e315b62630bd407477d750ec96a0f438447d0e6e496ab67b8b451d39"},
-    {file = "zstandard-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1b9703fe2e6b6811886c44052647df7c37478af1b4a1a9078585806f42e5b15"},
-    {file = "zstandard-0.21.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:df28aa5c241f59a7ab524f8ad8bb75d9a23f7ed9d501b0fed6d40ec3064784e8"},
-    {file = "zstandard-0.21.0-cp311-cp311-win32.whl", hash = "sha256:0aad6090ac164a9d237d096c8af241b8dcd015524ac6dbec1330092dba151657"},
-    {file = "zstandard-0.21.0-cp311-cp311-win_amd64.whl", hash = "sha256:48b6233b5c4cacb7afb0ee6b4f91820afbb6c0e3ae0fa10abbc20000acdf4f11"},
-    {file = "zstandard-0.21.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e7d560ce14fd209db6adacce8908244503a009c6c39eee0c10f138996cd66d3e"},
-    {file = "zstandard-0.21.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e6e131a4df2eb6f64961cea6f979cdff22d6e0d5516feb0d09492c8fd36f3bc"},
-    {file = "zstandard-0.21.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1e0c62a67ff425927898cf43da2cf6b852289ebcc2054514ea9bf121bec10a5"},
-    {file = "zstandard-0.21.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:1545fb9cb93e043351d0cb2ee73fa0ab32e61298968667bb924aac166278c3fc"},
-    {file = "zstandard-0.21.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fe6c821eb6870f81d73bf10e5deed80edcac1e63fbc40610e61f340723fd5f7c"},
-    {file = "zstandard-0.21.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ddb086ea3b915e50f6604be93f4f64f168d3fc3cef3585bb9a375d5834392d4f"},
-    {file = "zstandard-0.21.0-cp37-cp37m-win32.whl", hash = "sha256:57ac078ad7333c9db7a74804684099c4c77f98971c151cee18d17a12649bc25c"},
-    {file = "zstandard-0.21.0-cp37-cp37m-win_amd64.whl", hash = "sha256:1243b01fb7926a5a0417120c57d4c28b25a0200284af0525fddba812d575f605"},
-    {file = "zstandard-0.21.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ea68b1ba4f9678ac3d3e370d96442a6332d431e5050223626bdce748692226ea"},
-    {file = "zstandard-0.21.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8070c1cdb4587a8aa038638acda3bd97c43c59e1e31705f2766d5576b329e97c"},
-    {file = "zstandard-0.21.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4af612c96599b17e4930fe58bffd6514e6c25509d120f4eae6031b7595912f85"},
-    {file = "zstandard-0.21.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cff891e37b167bc477f35562cda1248acc115dbafbea4f3af54ec70821090965"},
-    {file = "zstandard-0.21.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:a9fec02ce2b38e8b2e86079ff0b912445495e8ab0b137f9c0505f88ad0d61296"},
-    {file = "zstandard-0.21.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0bdbe350691dec3078b187b8304e6a9c4d9db3eb2d50ab5b1d748533e746d099"},
-    {file = "zstandard-0.21.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b69cccd06a4a0a1d9fb3ec9a97600055cf03030ed7048d4bcb88c574f7895773"},
-    {file = "zstandard-0.21.0-cp38-cp38-win32.whl", hash = "sha256:9980489f066a391c5572bc7dc471e903fb134e0b0001ea9b1d3eff85af0a6f1b"},
-    {file = "zstandard-0.21.0-cp38-cp38-win_amd64.whl", hash = "sha256:0e1e94a9d9e35dc04bf90055e914077c80b1e0c15454cc5419e82529d3e70728"},
-    {file = "zstandard-0.21.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d2d61675b2a73edcef5e327e38eb62bdfc89009960f0e3991eae5cc3d54718de"},
-    {file = "zstandard-0.21.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:25fbfef672ad798afab12e8fd204d122fca3bc8e2dcb0a2ba73bf0a0ac0f5f07"},
-    {file = "zstandard-0.21.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62957069a7c2626ae80023998757e27bd28d933b165c487ab6f83ad3337f773d"},
-    {file = "zstandard-0.21.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14e10ed461e4807471075d4b7a2af51f5234c8f1e2a0c1d37d5ca49aaaad49e8"},
-    {file = "zstandard-0.21.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:9cff89a036c639a6a9299bf19e16bfb9ac7def9a7634c52c257166db09d950e7"},
-    {file = "zstandard-0.21.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:52b2b5e3e7670bd25835e0e0730a236f2b0df87672d99d3bf4bf87248aa659fb"},
-    {file = "zstandard-0.21.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b1367da0dde8ae5040ef0413fb57b5baeac39d8931c70536d5f013b11d3fc3a5"},
-    {file = "zstandard-0.21.0-cp39-cp39-win32.whl", hash = "sha256:db62cbe7a965e68ad2217a056107cc43d41764c66c895be05cf9c8b19578ce9c"},
-    {file = "zstandard-0.21.0-cp39-cp39-win_amd64.whl", hash = "sha256:a8d200617d5c876221304b0e3fe43307adde291b4a897e7b0617a61611dfff6a"},
-    {file = "zstandard-0.21.0.tar.gz", hash = "sha256:f08e3a10d01a247877e4cb61a82a319ea746c356a3786558bed2481e6c405546"},
+    {file = "zstandard-0.23.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bf0a05b6059c0528477fba9054d09179beb63744355cab9f38059548fedd46a9"},
+    {file = "zstandard-0.23.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fc9ca1c9718cb3b06634c7c8dec57d24e9438b2aa9a0f02b8bb36bf478538880"},
+    {file = "zstandard-0.23.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77da4c6bfa20dd5ea25cbf12c76f181a8e8cd7ea231c673828d0386b1740b8dc"},
+    {file = "zstandard-0.23.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b2170c7e0367dde86a2647ed5b6f57394ea7f53545746104c6b09fc1f4223573"},
+    {file = "zstandard-0.23.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c16842b846a8d2a145223f520b7e18b57c8f476924bda92aeee3a88d11cfc391"},
+    {file = "zstandard-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:157e89ceb4054029a289fb504c98c6a9fe8010f1680de0201b3eb5dc20aa6d9e"},
+    {file = "zstandard-0.23.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:203d236f4c94cd8379d1ea61db2fce20730b4c38d7f1c34506a31b34edc87bdd"},
+    {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:dc5d1a49d3f8262be192589a4b72f0d03b72dcf46c51ad5852a4fdc67be7b9e4"},
+    {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:752bf8a74412b9892f4e5b58f2f890a039f57037f52c89a740757ebd807f33ea"},
+    {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:80080816b4f52a9d886e67f1f96912891074903238fe54f2de8b786f86baded2"},
+    {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:84433dddea68571a6d6bd4fbf8ff398236031149116a7fff6f777ff95cad3df9"},
+    {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ab19a2d91963ed9e42b4e8d77cd847ae8381576585bad79dbd0a8837a9f6620a"},
+    {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:59556bf80a7094d0cfb9f5e50bb2db27fefb75d5138bb16fb052b61b0e0eeeb0"},
+    {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:27d3ef2252d2e62476389ca8f9b0cf2bbafb082a3b6bfe9d90cbcbb5529ecf7c"},
+    {file = "zstandard-0.23.0-cp310-cp310-win32.whl", hash = "sha256:5d41d5e025f1e0bccae4928981e71b2334c60f580bdc8345f824e7c0a4c2a813"},
+    {file = "zstandard-0.23.0-cp310-cp310-win_amd64.whl", hash = "sha256:519fbf169dfac1222a76ba8861ef4ac7f0530c35dd79ba5727014613f91613d4"},
+    {file = "zstandard-0.23.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:34895a41273ad33347b2fc70e1bff4240556de3c46c6ea430a7ed91f9042aa4e"},
+    {file = "zstandard-0.23.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:77ea385f7dd5b5676d7fd943292ffa18fbf5c72ba98f7d09fc1fb9e819b34c23"},
+    {file = "zstandard-0.23.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:983b6efd649723474f29ed42e1467f90a35a74793437d0bc64a5bf482bedfa0a"},
+    {file = "zstandard-0.23.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80a539906390591dd39ebb8d773771dc4db82ace6372c4d41e2d293f8e32b8db"},
+    {file = "zstandard-0.23.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:445e4cb5048b04e90ce96a79b4b63140e3f4ab5f662321975679b5f6360b90e2"},
+    {file = "zstandard-0.23.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd30d9c67d13d891f2360b2a120186729c111238ac63b43dbd37a5a40670b8ca"},
+    {file = "zstandard-0.23.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d20fd853fbb5807c8e84c136c278827b6167ded66c72ec6f9a14b863d809211c"},
+    {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ed1708dbf4d2e3a1c5c69110ba2b4eb6678262028afd6c6fbcc5a8dac9cda68e"},
+    {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:be9b5b8659dff1f913039c2feee1aca499cfbc19e98fa12bc85e037c17ec6ca5"},
+    {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:65308f4b4890aa12d9b6ad9f2844b7ee42c7f7a4fd3390425b242ffc57498f48"},
+    {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:98da17ce9cbf3bfe4617e836d561e433f871129e3a7ac16d6ef4c680f13a839c"},
+    {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:8ed7d27cb56b3e058d3cf684d7200703bcae623e1dcc06ed1e18ecda39fee003"},
+    {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:b69bb4f51daf461b15e7b3db033160937d3ff88303a7bc808c67bbc1eaf98c78"},
+    {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:034b88913ecc1b097f528e42b539453fa82c3557e414b3de9d5632c80439a473"},
+    {file = "zstandard-0.23.0-cp311-cp311-win32.whl", hash = "sha256:f2d4380bf5f62daabd7b751ea2339c1a21d1c9463f1feb7fc2bdcea2c29c3160"},
+    {file = "zstandard-0.23.0-cp311-cp311-win_amd64.whl", hash = "sha256:62136da96a973bd2557f06ddd4e8e807f9e13cbb0bfb9cc06cfe6d98ea90dfe0"},
+    {file = "zstandard-0.23.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b4567955a6bc1b20e9c31612e615af6b53733491aeaa19a6b3b37f3b65477094"},
+    {file = "zstandard-0.23.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e172f57cd78c20f13a3415cc8dfe24bf388614324d25539146594c16d78fcc8"},
+    {file = "zstandard-0.23.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b0e166f698c5a3e914947388c162be2583e0c638a4703fc6a543e23a88dea3c1"},
+    {file = "zstandard-0.23.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:12a289832e520c6bd4dcaad68e944b86da3bad0d339ef7989fb7e88f92e96072"},
+    {file = "zstandard-0.23.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d50d31bfedd53a928fed6707b15a8dbeef011bb6366297cc435accc888b27c20"},
+    {file = "zstandard-0.23.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72c68dda124a1a138340fb62fa21b9bf4848437d9ca60bd35db36f2d3345f373"},
+    {file = "zstandard-0.23.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:53dd9d5e3d29f95acd5de6802e909ada8d8d8cfa37a3ac64836f3bc4bc5512db"},
+    {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:6a41c120c3dbc0d81a8e8adc73312d668cd34acd7725f036992b1b72d22c1772"},
+    {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:40b33d93c6eddf02d2c19f5773196068d875c41ca25730e8288e9b672897c105"},
+    {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9206649ec587e6b02bd124fb7799b86cddec350f6f6c14bc82a2b70183e708ba"},
+    {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:76e79bc28a65f467e0409098fa2c4376931fd3207fbeb6b956c7c476d53746dd"},
+    {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:66b689c107857eceabf2cf3d3fc699c3c0fe8ccd18df2219d978c0283e4c508a"},
+    {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9c236e635582742fee16603042553d276cca506e824fa2e6489db04039521e90"},
+    {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a8fffdbd9d1408006baaf02f1068d7dd1f016c6bcb7538682622c556e7b68e35"},
+    {file = "zstandard-0.23.0-cp312-cp312-win32.whl", hash = "sha256:dc1d33abb8a0d754ea4763bad944fd965d3d95b5baef6b121c0c9013eaf1907d"},
+    {file = "zstandard-0.23.0-cp312-cp312-win_amd64.whl", hash = "sha256:64585e1dba664dc67c7cdabd56c1e5685233fbb1fc1966cfba2a340ec0dfff7b"},
+    {file = "zstandard-0.23.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:576856e8594e6649aee06ddbfc738fec6a834f7c85bf7cadd1c53d4a58186ef9"},
+    {file = "zstandard-0.23.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:38302b78a850ff82656beaddeb0bb989a0322a8bbb1bf1ab10c17506681d772a"},
+    {file = "zstandard-0.23.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2240ddc86b74966c34554c49d00eaafa8200a18d3a5b6ffbf7da63b11d74ee2"},
+    {file = "zstandard-0.23.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2ef230a8fd217a2015bc91b74f6b3b7d6522ba48be29ad4ea0ca3a3775bf7dd5"},
+    {file = "zstandard-0.23.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:774d45b1fac1461f48698a9d4b5fa19a69d47ece02fa469825b442263f04021f"},
+    {file = "zstandard-0.23.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f77fa49079891a4aab203d0b1744acc85577ed16d767b52fc089d83faf8d8ed"},
+    {file = "zstandard-0.23.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ac184f87ff521f4840e6ea0b10c0ec90c6b1dcd0bad2f1e4a9a1b4fa177982ea"},
+    {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c363b53e257246a954ebc7c488304b5592b9c53fbe74d03bc1c64dda153fb847"},
+    {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e7792606d606c8df5277c32ccb58f29b9b8603bf83b48639b7aedf6df4fe8171"},
+    {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a0817825b900fcd43ac5d05b8b3079937073d2b1ff9cf89427590718b70dd840"},
+    {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:9da6bc32faac9a293ddfdcb9108d4b20416219461e4ec64dfea8383cac186690"},
+    {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:fd7699e8fd9969f455ef2926221e0233f81a2542921471382e77a9e2f2b57f4b"},
+    {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d477ed829077cd945b01fc3115edd132c47e6540ddcd96ca169facff28173057"},
+    {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa6ce8b52c5987b3e34d5674b0ab529a4602b632ebab0a93b07bfb4dfc8f8a33"},
+    {file = "zstandard-0.23.0-cp313-cp313-win32.whl", hash = "sha256:a9b07268d0c3ca5c170a385a0ab9fb7fdd9f5fd866be004c4ea39e44edce47dd"},
+    {file = "zstandard-0.23.0-cp313-cp313-win_amd64.whl", hash = "sha256:f3513916e8c645d0610815c257cbfd3242adfd5c4cfa78be514e5a3ebb42a41b"},
+    {file = "zstandard-0.23.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2ef3775758346d9ac6214123887d25c7061c92afe1f2b354f9388e9e4d48acfc"},
+    {file = "zstandard-0.23.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4051e406288b8cdbb993798b9a45c59a4896b6ecee2f875424ec10276a895740"},
+    {file = "zstandard-0.23.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e2d1a054f8f0a191004675755448d12be47fa9bebbcffa3cdf01db19f2d30a54"},
+    {file = "zstandard-0.23.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f83fa6cae3fff8e98691248c9320356971b59678a17f20656a9e59cd32cee6d8"},
+    {file = "zstandard-0.23.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:32ba3b5ccde2d581b1e6aa952c836a6291e8435d788f656fe5976445865ae045"},
+    {file = "zstandard-0.23.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f146f50723defec2975fb7e388ae3a024eb7151542d1599527ec2aa9cacb152"},
+    {file = "zstandard-0.23.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1bfe8de1da6d104f15a60d4a8a768288f66aa953bbe00d027398b93fb9680b26"},
+    {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:29a2bc7c1b09b0af938b7a8343174b987ae021705acabcbae560166567f5a8db"},
+    {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:61f89436cbfede4bc4e91b4397eaa3e2108ebe96d05e93d6ccc95ab5714be512"},
+    {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:53ea7cdc96c6eb56e76bb06894bcfb5dfa93b7adcf59d61c6b92674e24e2dd5e"},
+    {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:a4ae99c57668ca1e78597d8b06d5af837f377f340f4cce993b551b2d7731778d"},
+    {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:379b378ae694ba78cef921581ebd420c938936a153ded602c4fea612b7eaa90d"},
+    {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:50a80baba0285386f97ea36239855f6020ce452456605f262b2d33ac35c7770b"},
+    {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:61062387ad820c654b6a6b5f0b94484fa19515e0c5116faf29f41a6bc91ded6e"},
+    {file = "zstandard-0.23.0-cp38-cp38-win32.whl", hash = "sha256:b8c0bd73aeac689beacd4e7667d48c299f61b959475cdbb91e7d3d88d27c56b9"},
+    {file = "zstandard-0.23.0-cp38-cp38-win_amd64.whl", hash = "sha256:a05e6d6218461eb1b4771d973728f0133b2a4613a6779995df557f70794fd60f"},
+    {file = "zstandard-0.23.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3aa014d55c3af933c1315eb4bb06dd0459661cc0b15cd61077afa6489bec63bb"},
+    {file = "zstandard-0.23.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0a7f0804bb3799414af278e9ad51be25edf67f78f916e08afdb983e74161b916"},
+    {file = "zstandard-0.23.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb2b1ecfef1e67897d336de3a0e3f52478182d6a47eda86cbd42504c5cbd009a"},
+    {file = "zstandard-0.23.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:837bb6764be6919963ef41235fd56a6486b132ea64afe5fafb4cb279ac44f259"},
+    {file = "zstandard-0.23.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1516c8c37d3a053b01c1c15b182f3b5f5eef19ced9b930b684a73bad121addf4"},
+    {file = "zstandard-0.23.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48ef6a43b1846f6025dde6ed9fee0c24e1149c1c25f7fb0a0585572b2f3adc58"},
+    {file = "zstandard-0.23.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:11e3bf3c924853a2d5835b24f03eeba7fc9b07d8ca499e247e06ff5676461a15"},
+    {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:2fb4535137de7e244c230e24f9d1ec194f61721c86ebea04e1581d9d06ea1269"},
+    {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8c24f21fa2af4bb9f2c492a86fe0c34e6d2c63812a839590edaf177b7398f700"},
+    {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a8c86881813a78a6f4508ef9daf9d4995b8ac2d147dcb1a450448941398091c9"},
+    {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:fe3b385d996ee0822fd46528d9f0443b880d4d05528fd26a9119a54ec3f91c69"},
+    {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:82d17e94d735c99621bf8ebf9995f870a6b3e6d14543b99e201ae046dfe7de70"},
+    {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:c7c517d74bea1a6afd39aa612fa025e6b8011982a0897768a2f7c8ab4ebb78a2"},
+    {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1fd7e0f1cfb70eb2f95a19b472ee7ad6d9a0a992ec0ae53286870c104ca939e5"},
+    {file = "zstandard-0.23.0-cp39-cp39-win32.whl", hash = "sha256:43da0f0092281bf501f9c5f6f3b4c975a8a0ea82de49ba3f7100e64d422a1274"},
+    {file = "zstandard-0.23.0-cp39-cp39-win_amd64.whl", hash = "sha256:f8346bfa098532bc1fb6c7ef06783e969d87a99dd1d2a5a18a892c1d7a643c58"},
+    {file = "zstandard-0.23.0.tar.gz", hash = "sha256:b2d8c62d08e7255f68f7a740bae85b3c9b8e5466baa9cbf7f57f1cde0ac6bc09"},
 ]
 
 [package.dependencies]
@@ -3534,4 +3655,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.11"
-content-hash = "9032c11f264f2f6d8a50230e5021c606d460aafdf370da0524784c3f0f1f31b1"
+content-hash = "e6904aca09abc6c805604b21a5702a97e0056406f9ec7469b091d35ee10a6b16"
diff --git a/pyproject.toml b/pyproject.toml
index ba4ab0b1f7..735d12d756 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,11 +7,11 @@ package-mode = false
 python = "^3.11"
 pytest = "^7.4.4"
 psycopg2-binary = "^2.9.10"
-typing-extensions = "^4.6.1"
+typing-extensions = "^4.12.2"
 PyJWT = {version = "^2.1.0", extras = ["crypto"]}
 requests = "^2.32.3"
 pytest-xdist = "^3.3.1"
-asyncpg = "^0.29.0"
+asyncpg = "^0.30.0"
 aiopg = "^1.4.0"
 Jinja2 = "^3.1.5"
 types-requests = "^2.31.0.0"
@@ -36,7 +36,7 @@ aiohttp = "3.10.11"
 pytest-rerunfailures = "^15.0"
 types-pytest-lazy-fixture = "^0.6.3.3"
 pytest-split = "^0.8.1"
-zstandard = "^0.21.0"
+zstandard = "^0.23.0"
 httpx = {extras = ["http2"], version = "^0.26.0"}
 pytest-repeat = "^0.9.3"
 websockets = "^12.0"
@@ -47,8 +47,9 @@ h2 = "^4.1.0"
 types-jwcrypto = "^1.5.0.20240925"
 pyyaml = "^6.0.2"
 types-pyyaml = "^6.0.12.20240917"
-testcontainers = "^4.8.1"
-jsonnet = "^0.20.0"
+testcontainers = "^4.9.0"
+# Jsonnet doesn't support Python 3.13 yet
+jsonnet = { version = "^0.20.0", markers = "python_version < '3.13'" }
 
 [tool.poetry.group.dev.dependencies]
 mypy = "==1.13.0"
diff --git a/test_runner/regress/test_compute_metrics.py b/test_runner/regress/test_compute_metrics.py
index 787790103f..71963355b7 100644
--- a/test_runner/regress/test_compute_metrics.py
+++ b/test_runner/regress/test_compute_metrics.py
@@ -3,12 +3,11 @@ from __future__ import annotations
 import enum
 import os
 import shutil
+import sys
 from enum import StrEnum
 from pathlib import Path
 from typing import TYPE_CHECKING, cast
 
-# Docs are available at https://jsonnet.org/ref/bindings.html#python_api
-import _jsonnet
 import pytest
 import requests
 import yaml
@@ -87,6 +86,10 @@ def jsonnet_evaluate_file(
     ext_vars: str | dict[str, str] | None = None,
     tla_vars: str | dict[str, str] | None = None,
 ) -> str:
+    # Jsonnet doesn't support Python 3.13 yet
+    # Docs are available at https://jsonnet.org/ref/bindings.html#python_api
+    import _jsonnet
+
     return cast(
         "str",
         _jsonnet.evaluate_file(
@@ -121,6 +124,7 @@ class SqlExporterProcess(StrEnum):
     AUTOSCALING = "autoscaling"
 
 
+@pytest.mark.xfail(sys.version_info >= (3, 13), reason="Jsonnet doesn't support Python 3.13 yet")
 @pytest.mark.parametrize(
     "collector_name",
     ["neon_collector", "neon_collector_autoscaling"],
@@ -352,6 +356,7 @@ else:
             self.__proc.wait()
 
 
+@pytest.mark.xfail(sys.version_info >= (3, 13), reason="Jsonnet doesn't support Python 3.13 yet")
 @pytest.mark.parametrize(
     "exporter",
     [SqlExporterProcess.COMPUTE, SqlExporterProcess.AUTOSCALING],

From 02f81b6469c88187ddda548c2dbe32c8c5a9a41d Mon Sep 17 00:00:00 2001
From: Alexander Bayandin <alexander@neon.tech>
Date: Mon, 6 Jan 2025 20:28:33 +0000
Subject: [PATCH 18/44] Fix clippy warning on macOS (#10282)

## Problem

On macOS:

```
error: unused variable: `disable_lfc_resizing`
   --> compute_tools/src/bin/compute_ctl.rs:431:9
    |
431 |         disable_lfc_resizing,
    |         ^^^^^^^^^^^^^^^^^^^^ help: try ignoring the field: `disable_lfc_resizing: _`
    |
    = note: `-D unused-variables` implied by `-D warnings`
    = help: to override `-D warnings` add `#[allow(unused_variables)]`
```

## Summary of changes
- Initialise `disable_lfc_resizing` only on Linux (because it's used on
Linux only in further bloc)
---
 compute_tools/src/bin/compute_ctl.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs
index 26ae25ec20..6ede5fdceb 100644
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -428,6 +428,7 @@ fn start_postgres(
     let &ComputeSpec {
         swap_size_bytes,
         disk_quota_bytes,
+        #[cfg(target_os = "linux")]
         disable_lfc_resizing,
         ..
     } = &state.pspec.as_ref().unwrap().spec;

From 30863c010421affd737977dbfe21ea08f18a43cb Mon Sep 17 00:00:00 2001
From: Matthias van de Meent <matthias@neon.tech>
Date: Tue, 7 Jan 2025 10:07:38 +0100
Subject: [PATCH 19/44] libpagestore: timeout = max(0, difference), not min(0,
 difference) (#10274)

Using `min(0, ...)` causes us to fail to wait in most situations, so a
lack of data would be a hot wait loop, which is bad.

## Problem

We noticed high CPU usage in some situations
---
 pgxn/neon/libpagestore.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c
index 88d0a5292b..fa2a570ea8 100644
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -680,7 +680,7 @@ call_PQgetCopyData(shardno_t shard_no, char **buffer)
 	 * but in the cases that take exceptionally long, it's useful to log the
 	 * exact timestamps.
 	 */
-#define LOG_INTERVAL_US		UINT64CONST(10 * 1000000)
+#define LOG_INTERVAL_MS		INT64CONST(10 * 1000)
 
 	INSTR_TIME_SET_CURRENT(now);
 	start_ts = last_log_ts = now;
@@ -694,7 +694,7 @@ retry:
 		WaitEvent	event;
 		long		timeout;
 
-		timeout = Min(0, LOG_INTERVAL_US - INSTR_TIME_GET_MICROSEC(since_last_log));
+		timeout = Max(0, LOG_INTERVAL_MS - INSTR_TIME_GET_MILLISEC(since_last_log));
 
 		/* Sleep until there's something to do */
 		(void) WaitEventSetWait(shard->wes_read, timeout, &event, 1,
@@ -723,7 +723,7 @@ retry:
 		INSTR_TIME_SET_CURRENT(now);
 		since_last_log = now;
 		INSTR_TIME_SUBTRACT(since_last_log, last_log_ts);
-		if (INSTR_TIME_GET_MICROSEC(since_last_log) >= LOG_INTERVAL_US)
+		if (INSTR_TIME_GET_MILLISEC(since_last_log) >= LOG_INTERVAL_MS)
 		{
 			since_start = now;
 			INSTR_TIME_SUBTRACT(since_start, start_ts);

From ea84ec357fa4caa5a48ec65a0aab9e37d1a9fda4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?JC=20Gr=C3=BCnhage?= <jc@neon.tech>
Date: Tue, 7 Jan 2025 11:36:05 +0100
Subject: [PATCH 20/44] Split promote-images into promote-images-dev and
 promote-images-prod (#10267)

## Problem
`trigger-e2e-tests` waits half an hour before starting to run. Nearly
half of that time can be saved by promoting images before tests on them
are complete, so the e2e tests can run in parallel.

On `main` and `release{,-proxy,-compute}`, `promote-images` updates
`latest` and pushes things to prod ecr, so we want to run
`promote-images` only after `test-images` is done, but on other
branches, there is no harm in promoting images that aren't tested yet.

## Summary of changes

To promote images into dev container registries sooner, `promote-images`
is split into `promote-images-dev` and `promote-images-prod`. The former
pushes to dev container registries, the latter to prod ones. The latter
also waits for `test-images`, while the former doesn't. This allows to
run `trigger-e2e-tests` sooner.
---
 .github/workflows/actionlint.yml        |  2 +-
 .github/workflows/build_and_test.yml    | 35 +++++++++++++++++++------
 .github/workflows/trigger-e2e-tests.yml |  8 +++---
 3 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/actionlint.yml b/.github/workflows/actionlint.yml
index 85cfe7446e..0e53830040 100644
--- a/.github/workflows/actionlint.yml
+++ b/.github/workflows/actionlint.yml
@@ -33,7 +33,7 @@ jobs:
           # SC2086 - Double quote to prevent globbing and word splitting. - https://www.shellcheck.net/wiki/SC2086
           SHELLCHECK_OPTS: --exclude=SC2046,SC2086
         with:
-          fail_on_error: true
+          fail_level: error
           filter_mode: nofilter
           level: error
 
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 12b1ac98ac..5c2b397c82 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -538,7 +538,7 @@ jobs:
 
   trigger-e2e-tests:
     if: ${{ !github.event.pull_request.draft || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft') || github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute' }}
-    needs: [ check-permissions, promote-images, tag ]
+    needs: [ check-permissions, promote-images-dev, tag ]
     uses: ./.github/workflows/trigger-e2e-tests.yml
     secrets: inherit
 
@@ -930,8 +930,8 @@ jobs:
           docker compose -f ./docker-compose/docker-compose.yml logs || 0
           docker compose -f ./docker-compose/docker-compose.yml down
 
-  promote-images:
-    needs: [ check-permissions, tag, test-images, vm-compute-node-image ]
+  promote-images-dev:
+    needs: [ check-permissions, tag, vm-compute-node-image ]
     runs-on: ubuntu-22.04
 
     permissions:
@@ -965,6 +965,25 @@ jobs:
                                                neondatabase/vm-compute-node-${version}:${{ needs.tag.outputs.build-tag }}
           done
 
+  promote-images-prod:
+    needs: [ check-permissions, tag, test-images, vm-compute-node-image ]
+    runs-on: ubuntu-22.04
+    if: github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
+
+    permissions:
+      id-token: write # aws-actions/configure-aws-credentials
+      statuses: write
+      contents: read
+
+    env:
+      VERSIONS: v14 v15 v16 v17
+
+    steps:
+      - uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+
       - name: Add latest tag to images
         if: github.ref_name == 'main'
         run: |
@@ -1010,7 +1029,7 @@ jobs:
 
   push-to-acr-dev:
     if: github.ref_name == 'main'
-    needs: [ tag, promote-images ]
+    needs: [ tag, promote-images-dev ]
     uses: ./.github/workflows/_push-to-acr.yml
     with:
       client_id: ${{ vars.AZURE_DEV_CLIENT_ID }}
@@ -1022,7 +1041,7 @@ jobs:
 
   push-to-acr-prod:
     if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
-    needs: [ tag, promote-images ]
+    needs: [ tag, promote-images-prod ]
     uses: ./.github/workflows/_push-to-acr.yml
     with:
       client_id: ${{ vars.AZURE_PROD_CLIENT_ID }}
@@ -1112,7 +1131,7 @@ jobs:
           exit 1
 
   deploy:
-    needs: [ check-permissions, promote-images, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait, push-to-acr-dev, push-to-acr-prod ]
+    needs: [ check-permissions, promote-images-prod, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait, push-to-acr-dev, push-to-acr-prod ]
     # `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod`
     if: (github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute') && !failure() && !cancelled()
     permissions:
@@ -1333,7 +1352,7 @@ jobs:
           done
 
   pin-build-tools-image:
-    needs: [ build-build-tools-image, promote-images, build-and-test-locally ]
+    needs: [ build-build-tools-image, promote-images-prod, build-and-test-locally ]
     if: github.ref_name == 'main'
     uses: ./.github/workflows/pin-build-tools-image.yml
     with:
@@ -1356,7 +1375,7 @@ jobs:
       - build-and-test-locally
       - check-codestyle-python
       - check-codestyle-rust
-      - promote-images
+      - promote-images-dev
       - test-images
       - trigger-custom-extensions-build-and-wait
     runs-on: ubuntu-22.04
diff --git a/.github/workflows/trigger-e2e-tests.yml b/.github/workflows/trigger-e2e-tests.yml
index 70c2e8549f..31696248b0 100644
--- a/.github/workflows/trigger-e2e-tests.yml
+++ b/.github/workflows/trigger-e2e-tests.yml
@@ -68,7 +68,7 @@ jobs:
       GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
       TAG: ${{ needs.tag.outputs.build-tag }}
     steps:
-      - name: Wait for `promote-images` job to finish
+      - name: Wait for `promote-images-dev` job to finish
         # It's important to have a timeout here, the script in the step can run infinitely
         timeout-minutes: 60
         run: |
@@ -79,17 +79,17 @@ jobs:
           # For PRs we use the run id as the tag
           BUILD_AND_TEST_RUN_ID=${TAG}
           while true; do
-            conclusion=$(gh run --repo ${GITHUB_REPOSITORY} view ${BUILD_AND_TEST_RUN_ID} --json jobs --jq '.jobs[] | select(.name == "promote-images") | .conclusion')
+            conclusion=$(gh run --repo ${GITHUB_REPOSITORY} view ${BUILD_AND_TEST_RUN_ID} --json jobs --jq '.jobs[] | select(.name == "promote-images-dev") | .conclusion')
             case "$conclusion" in
               success)
                 break
                 ;;
               failure | cancelled | skipped)
-                echo "The 'promote-images' job didn't succeed: '${conclusion}'. Exiting..."
+                echo "The 'promote-images-dev' job didn't succeed: '${conclusion}'. Exiting..."
                 exit 1
                 ;;
               *)
-                echo "The 'promote-images' hasn't succeed yet. Waiting..."
+                echo "The 'promote-images-dev' hasn't succeed yet. Waiting..."
                 sleep 60
                 ;;
             esac

From be38123e62b029dcd9f9cc0beb765ad2d3333906 Mon Sep 17 00:00:00 2001
From: Matthias van de Meent <matthias@neon.tech>
Date: Tue, 7 Jan 2025 11:41:52 +0100
Subject: [PATCH 21/44] Fix accounting of dropped prefetched GetPage requests
 (#10276)

Apparently, we failed to do this bookkeeping in quite a few places...

## Problem

Fixes https://github.com/neondatabase/cloud/issues/22364

## Summary of changes

Add accounting of dropped requests. Note that this includes prefetches
dropped due to things like "PS connection dropped unexpectedly" or
"prefetch queue is already full", but *not* (yet?) "dropped due to
backend shutdown".
---
 pgxn/neon/pagestore_smgr.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/pgxn/neon/pagestore_smgr.c b/pgxn/neon/pagestore_smgr.c
index 385905d9ce..b733807026 100644
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -716,6 +716,8 @@ prefetch_on_ps_disconnect(void)
 		MyPState->ring_receive += 1;
 
 		prefetch_set_unused(ring_index);
+		pgBufferUsage.prefetch.expired += 1;
+		MyNeonCounters->getpage_prefetch_discards_total += 1;
 	}
 
 	/*
@@ -935,7 +937,8 @@ Retry:
 					prefetch_set_unused(ring_index);
 					entry = NULL;
 					slot = NULL;
-					MyNeonCounters->getpage_prefetch_discards_total++;
+					pgBufferUsage.prefetch.expired += 1;
+					MyNeonCounters->getpage_prefetch_discards_total += 1;
 				}
 			}
 
@@ -1026,10 +1029,14 @@ Retry:
 						if (!prefetch_wait_for(cleanup_index))
 							goto Retry;
 						prefetch_set_unused(cleanup_index);
+						pgBufferUsage.prefetch.expired += 1;
+						MyNeonCounters->getpage_prefetch_discards_total += 1;
 						break;
 					case PRFS_RECEIVED:
 					case PRFS_TAG_REMAINS:
 						prefetch_set_unused(cleanup_index);
+						pgBufferUsage.prefetch.expired += 1;
+						MyNeonCounters->getpage_prefetch_discards_total += 1;
 						break;
 					default:
 						pg_unreachable();

From 4aa9786c6bffe8094cfc82947504d2044e284d7b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?JC=20Gr=C3=BCnhage?= <jc@neon.tech>
Date: Tue, 7 Jan 2025 14:45:18 +0100
Subject: [PATCH 22/44] Fix promote-images-prod after splitting it out (#10292)

## Problem
`promote-images` was split into `promote-images-dev` and
`promote-images-prod` in
https://github.com/neondatabase/neon/pull/10267.

`dev` credentials were loaded in `promote-images-dev` and `prod`
credentials were loaded in `promote-images-prod`, but
`promote-images-prod` needs `dev` credentials as well to access the
`dev` images to replicate them from `dev` to `prod`.

## Summary of changes
Load `dev` credentials in `promote-images-prod` as well.
---
 .github/workflows/build_and_test.yml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 5c2b397c82..01f5c3ede9 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -979,6 +979,16 @@ jobs:
       VERSIONS: v14 v15 v16 v17
 
     steps:
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          aws-region: eu-central-1
+          role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          role-duration-seconds: 3600
+
+      - name: Login to Amazon Dev ECR
+        uses: aws-actions/amazon-ecr-login@v2
+
       - uses: docker/login-action@v3
         with:
           username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}

From 0a117fb1f1a9c2a062c8a60d53f2eb00637392e8 Mon Sep 17 00:00:00 2001
From: Folke Behrens <folke@neon.tech>
Date: Tue, 7 Jan 2025 16:24:54 +0100
Subject: [PATCH 23/44] proxy: Parse Notification twice only for unknown topic
 (#10296)

## Problem

We currently parse Notification twice even in the happy path.

## Summary of changes

Use `#[serde(other)]` to catch unknown topics and defer the second
parsing.
---
 proxy/src/redis/notifications.rs | 55 ++++++++++++--------------------
 1 file changed, 21 insertions(+), 34 deletions(-)

diff --git a/proxy/src/redis/notifications.rs b/proxy/src/redis/notifications.rs
index 4383d6be2c..bf9d61ded3 100644
--- a/proxy/src/redis/notifications.rs
+++ b/proxy/src/redis/notifications.rs
@@ -37,7 +37,6 @@ struct NotificationHeader<'a> {
 
 #[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
 #[serde(tag = "topic", content = "data")]
-// Message to contributors: Make sure to align these topic names with the list below.
 pub(crate) enum Notification {
     #[serde(
         rename = "/allowed_ips_updated",
@@ -74,21 +73,9 @@ pub(crate) enum Notification {
     PasswordUpdate { password_update: PasswordUpdate },
     #[serde(rename = "/cancel_session")]
     Cancel(CancelSession),
-}
 
-/// Returns true if the topic name given is a known topic that we can deserialize and action on.
-/// Returns false otherwise.
-fn known_topic(s: &str) -> bool {
-    // Message to contributors: Make sure to align these topic names with the enum above.
-    matches!(
-        s,
-        "/allowed_ips_updated"
-            | "/block_public_or_vpc_access_updated"
-            | "/allowed_vpc_endpoints_updated_for_org"
-            | "/allowed_vpc_endpoints_updated_for_projects"
-            | "/password_updated"
-            | "/cancel_session"
-    )
+    #[serde(other, skip_serializing)]
+    UnknownTopic,
 }
 
 #[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
@@ -178,32 +165,29 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
         let payload: String = msg.get_payload()?;
         tracing::debug!(?payload, "received a message payload");
 
-        // For better error handling, we first parse the payload to extract the topic.
-        // If there's a topic we don't support, we can handle that error more gracefully.
-        let header: NotificationHeader = match serde_json::from_str(&payload) {
-            Ok(msg) => msg,
-            Err(e) => {
-                Metrics::get().proxy.redis_errors_total.inc(RedisErrors {
-                    channel: msg.get_channel_name(),
-                });
-                tracing::error!("broken message: {e}");
+        let msg: Notification = match serde_json::from_str(&payload) {
+            Ok(Notification::UnknownTopic) => {
+                match serde_json::from_str::<NotificationHeader>(&payload) {
+                    // don't update the metric for redis errors if it's just a topic we don't know about.
+                    Ok(header) => tracing::warn!(topic = header.topic, "unknown topic"),
+                    Err(e) => {
+                        Metrics::get().proxy.redis_errors_total.inc(RedisErrors {
+                            channel: msg.get_channel_name(),
+                        });
+                        tracing::error!("broken message: {e}");
+                    }
+                };
                 return Ok(());
             }
-        };
-
-        if !known_topic(header.topic) {
-            // don't update the metric for redis errors if it's just a topic we don't know about.
-            tracing::warn!(topic = header.topic, "unknown topic");
-            return Ok(());
-        }
-
-        let msg: Notification = match serde_json::from_str(&payload) {
             Ok(msg) => msg,
             Err(e) => {
                 Metrics::get().proxy.redis_errors_total.inc(RedisErrors {
                     channel: msg.get_channel_name(),
                 });
-                tracing::error!(topic = header.topic, "broken message: {e}");
+                match serde_json::from_str::<NotificationHeader>(&payload) {
+                    Ok(header) => tracing::error!(topic = header.topic, "broken message: {e}"),
+                    Err(_) => tracing::error!("broken message: {e}"),
+                };
                 return Ok(());
             }
         };
@@ -278,6 +262,8 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
                     invalidate_cache(cache, msg);
                 });
             }
+
+            Notification::UnknownTopic => unreachable!(),
         }
 
         Ok(())
@@ -304,6 +290,7 @@ fn invalidate_cache<C: ProjectInfoCache>(cache: Arc<C>, msg: Notification) {
         Notification::AllowedVpcEndpointsUpdatedForProjects { .. } => {
             // https://github.com/neondatabase/neon/pull/10073
         }
+        Notification::UnknownTopic => unreachable!(),
     }
 }
 

From 43a5e575d611c546c9bf5d538a57d8984560589d Mon Sep 17 00:00:00 2001
From: Fedor Dikarev <fedor@neon.tech>
Date: Tue, 7 Jan 2025 21:00:56 +0100
Subject: [PATCH 24/44] ci: use reusable workflow for MacOs build (#9889)

Closes: https://github.com/neondatabase/cloud/issues/17784

## Problem
Currently, we run the whole CI pipeline for any changes. It's slow and
expensive.

## Suggestion
Starting with MacOs builds:
- check what files were changed
- rebuild only needed parts
- reuse results from previous builds when available
- run builds in parallel when possible

---------

Co-authored-by: Alexander Bayandin <alexander@neon.tech>
---
 .github/file-filters.yaml               |  12 ++
 .github/workflows/build-macos.yml       | 241 ++++++++++++++++++++++++
 .github/workflows/neon_extra_builds.yml | 139 ++++----------
 3 files changed, 290 insertions(+), 102 deletions(-)
 create mode 100644 .github/file-filters.yaml
 create mode 100644 .github/workflows/build-macos.yml

diff --git a/.github/file-filters.yaml b/.github/file-filters.yaml
new file mode 100644
index 0000000000..886cd3919a
--- /dev/null
+++ b/.github/file-filters.yaml
@@ -0,0 +1,12 @@
+rust_code: ['**/*.rs', '**/Cargo.toml', '**/Cargo.lock']
+
+v14: ['vendor/postgres-v14/**', 'Makefile', 'pgxn/**']
+v15: ['vendor/postgres-v15/**', 'Makefile', 'pgxn/**']
+v16: ['vendor/postgres-v16/**', 'Makefile', 'pgxn/**']
+v17: ['vendor/postgres-v17/**', 'Makefile', 'pgxn/**']
+
+rebuild_neon_extra:
+    - .github/workflows/neon_extra_builds.yml
+
+rebuild_macos:
+    - .github/workflows/build-macos.yml
diff --git a/.github/workflows/build-macos.yml b/.github/workflows/build-macos.yml
new file mode 100644
index 0000000000..01d82a1ed2
--- /dev/null
+++ b/.github/workflows/build-macos.yml
@@ -0,0 +1,241 @@
+name: Check neon with MacOS builds
+
+on:
+  workflow_call:
+    inputs:
+      pg_versions:
+        description: "Array of the pg versions to build for, for example: ['v14', 'v17']"
+        type: string
+        default: '[]'
+        required: false
+      rebuild_rust_code:
+        description: "Rebuild Rust code"
+        type: boolean
+        default: false
+        required: false
+      rebuild_everything:
+        description: "If true, rebuild for all versions"
+        type: boolean
+        default: false
+        required: false
+
+env:
+  RUST_BACKTRACE: 1
+  COPT: '-Werror'
+
+# TODO: move `check-*` and `files-changed` jobs to the "Caller" Workflow
+# We should care about that as Github has limitations:
+# - You can connect up to four levels of workflows
+# - You can call a maximum of 20 unique reusable workflows from a single workflow file.
+# https://docs.github.com/en/actions/sharing-automations/reusing-workflows#limitations
+jobs:
+  build-pgxn:
+    if: |
+      (inputs.pg_versions != '[]' || inputs.rebuild_everything) && (
+        contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
+        contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
+        github.ref_name == 'main'
+      )
+    timeout-minutes: 30
+    runs-on: macos-15
+    strategy:
+      matrix:
+        postgres-version: ${{ inputs.rebuild_everything && fromJson('["v14", "v15", "v16", "v17"]') || fromJSON(inputs.pg_versions) }}
+    env:
+      # Use release build only, to have less debug info around
+      # Hence keeping target/ (and general cache size) smaller
+      BUILD_TYPE: release
+    steps:
+      - name: Checkout main repo
+        uses: actions/checkout@v4
+
+      - name: Set pg ${{ matrix.postgres-version }} for caching
+        id: pg_rev
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-${{ matrix.postgres-version }}) | tee -a "${GITHUB_OUTPUT}"
+
+      - name: Cache postgres ${{ matrix.postgres-version }} build
+        id: cache_pg
+        uses: actions/cache@v4
+        with:
+          path: pg_install/${{ matrix.postgres-version }}
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ matrix.postgres-version }}-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+
+      - name: Checkout submodule vendor/postgres-${{ matrix.postgres-version }}
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          git submodule init vendor/postgres-${{ matrix.postgres-version }}
+          git submodule update --depth 1 --recursive
+
+      - name: Install build dependencies
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          brew install flex bison openssl protobuf icu4c
+
+      - name: Set extra env for macOS
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
+          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
+
+      - name: Build Postgres ${{ matrix.postgres-version }}
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          make postgres-${{ matrix.postgres-version }} -j$(sysctl -n hw.ncpu)
+
+      - name: Build Neon Pg Ext ${{ matrix.postgres-version }}
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          make "neon-pg-ext-${{ matrix.postgres-version }}" -j$(sysctl -n hw.ncpu)
+
+      - name: Get postgres headers ${{ matrix.postgres-version }}
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          make postgres-headers-${{ matrix.postgres-version }} -j$(sysctl -n hw.ncpu)
+
+  build-walproposer-lib:
+    if: |
+      (inputs.pg_versions != '[]' || inputs.rebuild_everything) && (
+        contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
+        contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
+        github.ref_name == 'main'
+      )
+    timeout-minutes: 30
+    runs-on: macos-15
+    needs: [build-pgxn]
+    env:
+      # Use release build only, to have less debug info around
+      # Hence keeping target/ (and general cache size) smaller
+      BUILD_TYPE: release
+    steps:
+      - name: Checkout main repo
+        uses: actions/checkout@v4
+
+      - name: Set pg v17 for caching
+        id: pg_rev
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) | tee -a "${GITHUB_OUTPUT}"
+
+      - name: Cache postgres v17 build
+        id: cache_pg
+        uses: actions/cache@v4
+        with:
+          path: pg_install/v17
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+
+      - name: Cache walproposer-lib
+        id: cache_walproposer_lib
+        uses: actions/cache@v4
+        with:
+          path: pg_install/build/walproposer-lib
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+
+      - name: Checkout submodule vendor/postgres-v17
+        if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
+        run: |
+          git submodule init vendor/postgres-v17
+          git submodule update --depth 1 --recursive
+
+      - name: Install build dependencies
+        if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
+        run: |
+          brew install flex bison openssl protobuf icu4c
+
+      - name: Set extra env for macOS
+        if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
+        run: |
+          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
+          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
+
+      - name: Build walproposer-lib (only for v17)
+        if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
+        run:
+          make walproposer-lib -j$(sysctl -n hw.ncpu)
+
+  cargo-build:
+    if: |
+      (inputs.pg_versions != '[]' || inputs.rebuild_rust_code || inputs.rebuild_everything) && (
+        contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
+        contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
+        github.ref_name == 'main'
+      )
+    timeout-minutes: 30
+    runs-on: macos-15
+    needs: [build-pgxn, build-walproposer-lib]
+    env:
+      # Use release build only, to have less debug info around
+      # Hence keeping target/ (and general cache size) smaller
+      BUILD_TYPE: release
+    steps:
+      - name: Checkout main repo
+        uses: actions/checkout@v4
+        with:
+          submodules: true
+
+      - name: Set pg v14 for caching
+        id: pg_rev_v14
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) | tee -a "${GITHUB_OUTPUT}"
+      - name: Set pg v15 for caching
+        id: pg_rev_v15
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) | tee -a "${GITHUB_OUTPUT}"
+      - name: Set pg v16 for caching
+        id: pg_rev_v16
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) | tee -a "${GITHUB_OUTPUT}"
+      - name: Set pg v17 for caching
+        id: pg_rev_v17
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) | tee -a "${GITHUB_OUTPUT}"
+
+      - name: Cache postgres v14 build
+        id: cache_pg
+        uses: actions/cache@v4
+        with:
+          path: pg_install/v14
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v14-${{ steps.pg_rev_v14.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+      - name: Cache postgres v15 build
+        id: cache_pg_v15
+        uses: actions/cache@v4
+        with:
+          path: pg_install/v15
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v15-${{ steps.pg_rev_v15.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+      - name: Cache postgres v16 build
+        id: cache_pg_v16
+        uses: actions/cache@v4
+        with:
+          path: pg_install/v16
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v16-${{ steps.pg_rev_v16.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+      - name: Cache postgres v17 build
+        id: cache_pg_v17
+        uses: actions/cache@v4
+        with:
+          path: pg_install/v17
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v17-${{ steps.pg_rev_v17.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+
+      - name: Cache cargo deps (only for v17)
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cargo/registry
+            !~/.cargo/registry/src
+            ~/.cargo/git
+            target
+          key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust
+
+      - name: Cache walproposer-lib
+        id: cache_walproposer_lib
+        uses: actions/cache@v4
+        with:
+          path: pg_install/build/walproposer-lib
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev_v17.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+
+      - name: Install build dependencies
+        run: |
+          brew install flex bison openssl protobuf icu4c
+
+      - name: Set extra env for macOS
+        run: |
+          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
+          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
+
+      - name: Run cargo build (only for v17)
+        run: PQ_LIB_DIR=$(pwd)/pg_install/v17/lib cargo build --all --release -j$(sysctl -n hw.ncpu)
+
+      - name: Check that no warnings are produced (only for v17)
+        run: ./run_clippy.sh
diff --git a/.github/workflows/neon_extra_builds.yml b/.github/workflows/neon_extra_builds.yml
index 1f85c2e102..5b5910badf 100644
--- a/.github/workflows/neon_extra_builds.yml
+++ b/.github/workflows/neon_extra_builds.yml
@@ -31,19 +31,15 @@ jobs:
     uses: ./.github/workflows/build-build-tools-image.yml
     secrets: inherit
 
-  check-macos-build:
-    needs: [ check-permissions ]
-    if: |
-      contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
-      contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
-      github.ref_name == 'main'
-    timeout-minutes: 90
-    runs-on: macos-15
-
-    env:
-      # Use release build only, to have less debug info around
-      # Hence keeping target/ (and general cache size) smaller
-      BUILD_TYPE: release
+  files-changed:
+    name: Detect what files changed
+    runs-on: ubuntu-22.04
+    timeout-minutes: 3
+    outputs:
+      v17: ${{ steps.files_changed.outputs.v17 }}
+      postgres_changes: ${{ steps.postgres_changes.outputs.changes }}
+      rebuild_rust_code: ${{ steps.files_changed.outputs.rust_code }}
+      rebuild_everything: ${{ steps.files_changed.outputs.rebuild_neon_extra || steps.files_changed.outputs.rebuild_macos }}
 
     steps:
       - name: Checkout
@@ -51,106 +47,45 @@ jobs:
         with:
           submodules: true
 
-      - name: Install macOS postgres dependencies
-        run: brew install flex bison openssl protobuf icu4c
-
-      - name: Set pg 14 revision for caching
-        id: pg_v14_rev
-        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
-
-      - name: Set pg 15 revision for caching
-        id: pg_v15_rev
-        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
-
-      - name: Set pg 16 revision for caching
-        id: pg_v16_rev
-        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
-
-      - name: Set pg 17 revision for caching
-        id: pg_v17_rev
-        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) >> $GITHUB_OUTPUT
-
-      - name: Cache postgres v14 build
-        id: cache_pg_14
-        uses: actions/cache@v4
+      - name: Check for Postgres changes
+        uses: dorny/paths-filter@1441771bbfdd59dcd748680ee64ebd8faab1a242  #v3
+        id: files_changed
         with:
-          path: pg_install/v14
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+          token: ${{ github.token }}
+          filters: .github/file-filters.yaml
+          base: ${{ github.event_name != 'pull_request' && (github.event.merge_group.base_ref || github.ref_name) || '' }}
+          ref: ${{ github.event_name != 'pull_request' && (github.event.merge_group.head_ref || github.ref) || '' }}
 
-      - name: Cache postgres v15 build
-        id: cache_pg_15
-        uses: actions/cache@v4
-        with:
-          path: pg_install/v15
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
-
-      - name: Cache postgres v16 build
-        id: cache_pg_16
-        uses: actions/cache@v4
-        with:
-          path: pg_install/v16
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
-
-      - name: Cache postgres v17 build
-        id: cache_pg_17
-        uses: actions/cache@v4
-        with:
-          path: pg_install/v17
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
-
-      - name: Set extra env for macOS
+      - name: Filter out only v-string for build matrix
+        id: postgres_changes
         run: |
-          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
-          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
+          v_strings_only_as_json_array=$(echo ${{ steps.files_changed.outputs.chnages }} | jq '.[]|select(test("v\\d+"))' | jq --slurp -c)
+          echo "changes=${v_strings_only_as_json_array}" | tee -a "${GITHUB_OUTPUT}"
 
-      - name: Cache cargo deps
-        uses: actions/cache@v4
-        with:
-          path: |
-            ~/.cargo/registry
-            !~/.cargo/registry/src
-            ~/.cargo/git
-            target
-          key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust
-
-      - name: Build postgres v14
-        if: steps.cache_pg_14.outputs.cache-hit != 'true'
-        run: make postgres-v14 -j$(sysctl -n hw.ncpu)
-
-      - name: Build postgres v15
-        if: steps.cache_pg_15.outputs.cache-hit != 'true'
-        run: make postgres-v15 -j$(sysctl -n hw.ncpu)
-
-      - name: Build postgres v16
-        if: steps.cache_pg_16.outputs.cache-hit != 'true'
-        run: make postgres-v16 -j$(sysctl -n hw.ncpu)
-
-      - name: Build postgres v17
-        if: steps.cache_pg_17.outputs.cache-hit != 'true'
-        run: make postgres-v17 -j$(sysctl -n hw.ncpu)
-
-      - name: Build neon extensions
-        run: make neon-pg-ext -j$(sysctl -n hw.ncpu)
-
-      - name: Build walproposer-lib
-        run: make walproposer-lib -j$(sysctl -n hw.ncpu)
-
-      - name: Run cargo build
-        run: PQ_LIB_DIR=$(pwd)/pg_install/v16/lib cargo build --all --release
-
-      - name: Check that no warnings are produced
-        run: ./run_clippy.sh
+  check-macos-build:
+    needs: [ check-permissions, files-changed ]
+    if: |
+      contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
+      contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
+      github.ref_name == 'main'
+    uses: ./.github/workflows/build-macos.yml
+    with:
+      pg_versions: ${{ needs.files-changed.outputs.postgres_changes }}
+      rebuild_rust_code: ${{ needs.files-changed.outputs.rebuild_rust_code }}
+      rebuild_everything: ${{ fromJson(needs.files-changed.outputs.rebuild_everything) }}
 
   gather-rust-build-stats:
-    needs: [ check-permissions, build-build-tools-image ]
+    needs: [ check-permissions, build-build-tools-image, files-changed ]
     permissions:
       id-token: write # aws-actions/configure-aws-credentials
       statuses: write
       contents: write
     if: |
-      contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats') ||
-      contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
-      github.ref_name == 'main'
+      (needs.files-changed.outputs.v17 == 'true' || needs.files-changed.outputs.rebuild_everything == 'true') && (
+        contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats') ||
+        contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
+        github.ref_name == 'main'
+      )
     runs-on: [ self-hosted, large ]
     container:
       image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm

From 237dae71a1bd00d1611fb2298b2e3cb13883155b Mon Sep 17 00:00:00 2001
From: Erik Grinaker <erik@neon.tech>
Date: Tue, 7 Jan 2025 23:49:00 +0100
Subject: [PATCH 25/44] Revert "pageserver,safekeeper: disable heap profiling
 (#10268)" (#10303)

This reverts commit b33299dc37d9269fe55bd3256b7a4a72c129b81c.

Heap profiles weren't the culprit after all.

Touches #10225.
---
 pageserver/src/bin/pageserver.rs | 10 ++++------
 safekeeper/src/bin/safekeeper.rs | 10 ++++------
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs
index b92ff4ebf9..567a69da3b 100644
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -53,12 +53,10 @@ project_build_tag!(BUILD_TAG);
 #[global_allocator]
 static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
 
-// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20).
-// TODO: disabled because concurrent CPU profiles cause seg faults. See:
-// https://github.com/neondatabase/neon/issues/10225.
-//#[allow(non_upper_case_globals)]
-//#[export_name = "malloc_conf"]
-//pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";
+/// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20).
+#[allow(non_upper_case_globals)]
+#[export_name = "malloc_conf"]
+pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";
 
 const PID_FILE_NAME: &str = "pageserver.pid";
 
diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs
index e0ba38d638..13f6e34575 100644
--- a/safekeeper/src/bin/safekeeper.rs
+++ b/safekeeper/src/bin/safekeeper.rs
@@ -51,12 +51,10 @@ use utils::{
 #[global_allocator]
 static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
 
-// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20).
-// TODO: disabled because concurrent CPU profiles cause seg faults. See:
-// https://github.com/neondatabase/neon/issues/10225.
-//#[allow(non_upper_case_globals)]
-//#[export_name = "malloc_conf"]
-//pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";
+/// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20).
+#[allow(non_upper_case_globals)]
+#[export_name = "malloc_conf"]
+pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";
 
 const PID_FILE_NAME: &str = "safekeeper.pid";
 const ID_FILE_NAME: &str = "safekeeper.id";

From 5c76e2a983295f3123631b0178309a942f584596 Mon Sep 17 00:00:00 2001
From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com>
Date: Tue, 7 Jan 2025 18:24:17 -0500
Subject: [PATCH 26/44] fix(storage-scrubber): ignore errors if index_part is
 not consistent (#10304)

## Problem

Consider the pageserver is doing the following sequence of operations:

* upload X files
* update index_part to add X and remove Y
* delete Y files

When storage scrubber obtains the initial timeline snapshot before
"update index_part" (that is the old version that contains Y but not X),
and then obtains the index_part file after it gets updated, it will
report all Y files are missing.

## Summary of changes

Do not report layer file missing if index_part listed and downloaded are
not the same (i.e. different last_modified times)

Signed-off-by: Alex Chi Z <chi@neon.tech>
---
 storage_scrubber/src/checks.rs                | 24 +++++++++++++------
 storage_scrubber/src/lib.rs                   |  7 +++---
 .../src/pageserver_physical_gc.rs             |  6 ++++-
 .../src/scan_pageserver_metadata.rs           | 10 ++++++--
 storage_scrubber/src/tenant_snapshot.rs       |  2 ++
 5 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/storage_scrubber/src/checks.rs b/storage_scrubber/src/checks.rs
index 32c86052ef..b42709868b 100644
--- a/storage_scrubber/src/checks.rs
+++ b/storage_scrubber/src/checks.rs
@@ -1,4 +1,5 @@
 use std::collections::{HashMap, HashSet};
+use std::time::SystemTime;
 
 use itertools::Itertools;
 use pageserver::tenant::checks::check_valid_layermap;
@@ -88,9 +89,14 @@ pub(crate) async fn branch_cleanup_and_check_errors(
             match s3_data.blob_data {
                 BlobDataParseResult::Parsed {
                     index_part,
-                    index_part_generation: _index_part_generation,
-                    s3_layers: _s3_layers,
+                    index_part_generation: _,
+                    s3_layers: _,
+                    index_part_last_modified_time,
+                    index_part_snapshot_time,
                 } => {
+                    // Ignore missing file error if index_part downloaded is different from the one when listing the layer files.
+                    let ignore_error = index_part_snapshot_time < index_part_last_modified_time
+                        && !cfg!(debug_assertions);
                     if !IndexPart::KNOWN_VERSIONS.contains(&index_part.version()) {
                         result
                             .errors
@@ -171,7 +177,7 @@ pub(crate) async fn branch_cleanup_and_check_errors(
                                     is_l0,
                                 );
 
-                                if is_l0 {
+                                if is_l0 || ignore_error {
                                     result.warnings.push(msg);
                                 } else {
                                     result.errors.push(msg);
@@ -308,6 +314,8 @@ pub(crate) enum BlobDataParseResult {
     Parsed {
         index_part: Box<IndexPart>,
         index_part_generation: Generation,
+        index_part_last_modified_time: SystemTime,
+        index_part_snapshot_time: SystemTime,
         s3_layers: HashSet<(LayerName, Generation)>,
     },
     /// The remains of an uncleanly deleted Timeline or aborted timeline creation(e.g. an initdb archive only, or some layer without an index)
@@ -484,9 +492,9 @@ async fn list_timeline_blobs_impl(
     }
 
     if let Some(index_part_object_key) = index_part_object.as_ref() {
-        let index_part_bytes =
+        let (index_part_bytes, index_part_last_modified_time) =
             match download_object_with_retries(remote_client, &index_part_object_key.key).await {
-                Ok(index_part_bytes) => index_part_bytes,
+                Ok(data) => data,
                 Err(e) => {
                     // It is possible that the branch gets deleted in-between we list the objects
                     // and we download the index part file.
@@ -500,7 +508,7 @@ async fn list_timeline_blobs_impl(
                     ));
                 }
             };
-
+        let index_part_snapshot_time = index_part_object_key.last_modified;
         match serde_json::from_slice(&index_part_bytes) {
             Ok(index_part) => {
                 return Ok(ListTimelineBlobsResult::Ready(RemoteTimelineBlobData {
@@ -508,6 +516,8 @@ async fn list_timeline_blobs_impl(
                         index_part: Box::new(index_part),
                         index_part_generation,
                         s3_layers,
+                        index_part_last_modified_time,
+                        index_part_snapshot_time,
                     },
                     unused_index_keys: index_part_keys,
                     unknown_keys,
@@ -625,7 +635,7 @@ pub(crate) async fn list_tenant_manifests(
 
     let manifest_bytes =
         match download_object_with_retries(remote_client, &latest_listing_object.key).await {
-            Ok(bytes) => bytes,
+            Ok((bytes, _)) => bytes,
             Err(e) => {
                 // It is possible that the tenant gets deleted in-between we list the objects
                 // and we download the manifest file.
diff --git a/storage_scrubber/src/lib.rs b/storage_scrubber/src/lib.rs
index be526daaf0..224235098c 100644
--- a/storage_scrubber/src/lib.rs
+++ b/storage_scrubber/src/lib.rs
@@ -13,7 +13,7 @@ pub mod tenant_snapshot;
 use std::env;
 use std::fmt::Display;
 use std::sync::Arc;
-use std::time::Duration;
+use std::time::{Duration, SystemTime};
 
 use anyhow::Context;
 use aws_config::retry::{RetryConfigBuilder, RetryMode};
@@ -509,10 +509,11 @@ async fn list_objects_with_retries(
     panic!("MAX_RETRIES is not allowed to be 0");
 }
 
+/// Returns content, last modified time
 async fn download_object_with_retries(
     remote_client: &GenericRemoteStorage,
     key: &RemotePath,
-) -> anyhow::Result<Vec<u8>> {
+) -> anyhow::Result<(Vec<u8>, SystemTime)> {
     let cancel = CancellationToken::new();
     for trial in 0..MAX_RETRIES {
         let mut buf = Vec::new();
@@ -535,7 +536,7 @@ async fn download_object_with_retries(
         {
             Ok(bytes_read) => {
                 tracing::debug!("Downloaded {bytes_read} bytes for object {key}");
-                return Ok(buf);
+                return Ok((buf, download.last_modified));
             }
             Err(e) => {
                 error!("Failed to stream object body for key {key}: {e}");
diff --git a/storage_scrubber/src/pageserver_physical_gc.rs b/storage_scrubber/src/pageserver_physical_gc.rs
index d19b8a5f91..a997373375 100644
--- a/storage_scrubber/src/pageserver_physical_gc.rs
+++ b/storage_scrubber/src/pageserver_physical_gc.rs
@@ -450,6 +450,8 @@ async fn gc_ancestor(
                 index_part: _,
                 index_part_generation: _,
                 s3_layers,
+                index_part_last_modified_time: _,
+                index_part_snapshot_time: _,
             } => s3_layers,
             BlobDataParseResult::Relic => {
                 // Post-deletion tenant location: don't try and GC it.
@@ -586,7 +588,9 @@ async fn gc_timeline(
         BlobDataParseResult::Parsed {
             index_part,
             index_part_generation,
-            s3_layers: _s3_layers,
+            s3_layers: _,
+            index_part_last_modified_time: _,
+            index_part_snapshot_time: _,
         } => (index_part, *index_part_generation, data.unused_index_keys),
         BlobDataParseResult::Relic => {
             // Post-deletion tenant location: don't try and GC it.
diff --git a/storage_scrubber/src/scan_pageserver_metadata.rs b/storage_scrubber/src/scan_pageserver_metadata.rs
index c8de6e46b3..a31fb5b242 100644
--- a/storage_scrubber/src/scan_pageserver_metadata.rs
+++ b/storage_scrubber/src/scan_pageserver_metadata.rs
@@ -47,6 +47,8 @@ impl MetadataSummary {
             index_part,
             index_part_generation: _,
             s3_layers: _,
+            index_part_last_modified_time: _,
+            index_part_snapshot_time: _,
         } = &data.blob_data
         {
             *self
@@ -195,7 +197,9 @@ pub async fn scan_pageserver_metadata(
                     if let BlobDataParseResult::Parsed {
                         index_part,
                         index_part_generation,
-                        s3_layers: _s3_layers,
+                        s3_layers: _,
+                        index_part_last_modified_time: _,
+                        index_part_snapshot_time: _,
                     } = &data.blob_data
                     {
                         if index_part.deleted_at.is_some() {
@@ -318,9 +322,11 @@ pub async fn scan_pageserver_metadata(
 
         match &data.blob_data {
             BlobDataParseResult::Parsed {
-                index_part: _index_part,
+                index_part: _,
                 index_part_generation: _index_part_generation,
                 s3_layers,
+                index_part_last_modified_time: _,
+                index_part_snapshot_time: _,
             } => {
                 tenant_objects.push(ttid, s3_layers.clone());
             }
diff --git a/storage_scrubber/src/tenant_snapshot.rs b/storage_scrubber/src/tenant_snapshot.rs
index 39e0b5c9b4..60e79fb859 100644
--- a/storage_scrubber/src/tenant_snapshot.rs
+++ b/storage_scrubber/src/tenant_snapshot.rs
@@ -268,6 +268,8 @@ impl SnapshotDownloader {
                         index_part,
                         index_part_generation,
                         s3_layers: _,
+                        index_part_last_modified_time: _,
+                        index_part_snapshot_time: _,
                     } => {
                         self.download_timeline(
                             ttid,

From dc284247a5b0d4fd442868c9dc555dd0ab50c0c3 Mon Sep 17 00:00:00 2001
From: Vlad Lazar <vlad@neon.tech>
Date: Wed, 8 Jan 2025 10:26:53 +0000
Subject: [PATCH 27/44] storage_controller: fix node flap detach race (#10298)

## Problem

The observed state removal may race with the inline updates of the
observed state done from `Service::node_activate_reconcile`.

This was intended to work as follows:
1. Detaches while the node is unavailable remove the entry from the
   observed state.
2. `Service::node_activate_reconcile` diffs the locations returned
   by the pageserver with the observed state and detaches in-line
   when required.

## Summary of changes

This PR removes step (1) and lets background reconciliations
deal with the mismatch between the intent and observed state.
A follow up will attempt to remove `Service::node_activate_reconcile`
altogether.

Closes https://github.com/neondatabase/neon/issues/10253
---
 storage_controller/src/reconciler.rs          |  16 ++-
 storage_controller/src/service.rs             |   4 +
 test_runner/fixtures/neon_fixtures.py         |   7 +-
 .../regress/test_storage_controller.py        | 122 +++++++++++++++++-
 4 files changed, 139 insertions(+), 10 deletions(-)

diff --git a/storage_controller/src/reconciler.rs b/storage_controller/src/reconciler.rs
index 475f91eff4..e0a854fff7 100644
--- a/storage_controller/src/reconciler.rs
+++ b/storage_controller/src/reconciler.rs
@@ -14,7 +14,6 @@ use std::sync::Arc;
 use std::time::{Duration, Instant};
 use tokio_util::sync::CancellationToken;
 use utils::backoff::exponential_backoff;
-use utils::failpoint_support;
 use utils::generation::Generation;
 use utils::id::{NodeId, TimelineId};
 use utils::lsn::Lsn;
@@ -212,11 +211,12 @@ impl Reconciler {
         lazy: bool,
     ) -> Result<(), ReconcileError> {
         if !node.is_available() && config.mode == LocationConfigMode::Detached {
-            // Attempts to detach from offline nodes may be imitated without doing I/O: a node which is offline
-            // will get fully reconciled wrt the shard's intent state when it is reactivated, irrespective of
-            // what we put into `observed`, in [`crate::service::Service::node_activate_reconcile`]
-            tracing::info!("Node {node} is unavailable during detach: proceeding anyway, it will be detached on next activation");
-            self.observed.locations.remove(&node.get_id());
+            // [`crate::service::Service::node_activate_reconcile`] will update the observed state
+            // when the node comes back online. At that point, the intent and observed states will
+            // be mismatched and a background reconciliation will detach.
+            tracing::info!(
+                "Node {node} is unavailable during detach: proceeding anyway, it will be detached via background reconciliation"
+            );
             return Ok(());
         }
 
@@ -749,6 +749,8 @@ impl Reconciler {
                     };
 
                     if increment_generation {
+                        pausable_failpoint!("reconciler-pre-increment-generation");
+
                         let generation = self
                             .persistence
                             .increment_generation(self.tenant_shard_id, node.get_id())
@@ -824,7 +826,7 @@ impl Reconciler {
                 .handle_detach(self.tenant_shard_id, self.shard.stripe_size);
         }
 
-        failpoint_support::sleep_millis_async!("sleep-on-reconcile-epilogue");
+        pausable_failpoint!("reconciler-epilogue");
 
         Ok(())
     }
diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs
index 222cb9fdd4..359fcb3288 100644
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -83,6 +83,7 @@ use utils::{
     generation::Generation,
     http::error::ApiError,
     id::{NodeId, TenantId, TimelineId},
+    pausable_failpoint,
     sync::gate::Gate,
 };
 
@@ -1024,6 +1025,8 @@ impl Service {
                     )
                     .await;
 
+                    pausable_failpoint!("heartbeat-pre-node-state-configure");
+
                     // This is the code path for geniune availability transitions (i.e node
                     // goes unavailable and/or comes back online).
                     let res = self
@@ -2492,6 +2495,7 @@ impl Service {
                 // Persist updates
                 // Ordering: write to the database before applying changes in-memory, so that
                 // we will not appear time-travel backwards on a restart.
+
                 let mut schedule_context = ScheduleContext::default();
                 for ShardUpdate {
                     tenant_shard_id,
diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py
index 8fd9eec8ce..00fdda2998 100644
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -2521,6 +2521,7 @@ class NeonPageserver(PgProtocol, LogUtils):
         self,
         extra_env_vars: dict[str, str] | None = None,
         timeout_in_seconds: int | None = None,
+        await_active: bool = True,
     ) -> Self:
         """
         Start the page server.
@@ -2547,8 +2548,10 @@ class NeonPageserver(PgProtocol, LogUtils):
         )
         self.running = True
 
-        if self.env.storage_controller.running and self.env.storage_controller.node_registered(
-            self.id
+        if (
+            await_active
+            and self.env.storage_controller.running
+            and self.env.storage_controller.node_registered(self.id)
         ):
             self.env.storage_controller.poll_node_status(
                 self.id, PageserverAvailability.ACTIVE, None, max_attempts=200, backoff=0.1
diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py
index 7062c35e05..973d0cdf82 100644
--- a/test_runner/regress/test_storage_controller.py
+++ b/test_runner/regress/test_storage_controller.py
@@ -17,6 +17,7 @@ from fixtures.compute_reconfigure import ComputeReconfigure
 from fixtures.log_helper import log
 from fixtures.neon_fixtures import (
     DEFAULT_AZ_ID,
+    LogCursor,
     NeonEnv,
     NeonEnvBuilder,
     NeonPageserver,
@@ -2406,7 +2407,14 @@ def test_storage_controller_step_down(neon_env_builder: NeonEnvBuilder):
     env.storage_controller.tenant_create(tid)
 
     env.storage_controller.reconcile_until_idle()
-    env.storage_controller.configure_failpoints(("sleep-on-reconcile-epilogue", "return(10000)"))
+    env.storage_controller.configure_failpoints(("reconciler-epilogue", "pause"))
+
+    def unpause_failpoint():
+        time.sleep(2)
+        env.storage_controller.configure_failpoints(("reconciler-epilogue", "off"))
+
+    thread = threading.Thread(target=unpause_failpoint)
+    thread.start()
 
     # Make a change to the tenant config to trigger a slow reconcile
     virtual_ps_http = PageserverHttpClient(env.storage_controller_port, lambda: True)
@@ -2421,6 +2429,8 @@ def test_storage_controller_step_down(neon_env_builder: NeonEnvBuilder):
     observed_state = env.storage_controller.step_down()
     log.info(f"Storage controller stepped down with {observed_state=}")
 
+    thread.join()
+
     # Validate that we waited for the slow reconcile to complete
     # and updated the observed state in the storcon before stepping down.
     node_id = str(env.pageserver.id)
@@ -3294,3 +3304,113 @@ def test_storage_controller_detached_stopped(
 
     # Confirm the detach happened
     assert env.pageserver.http_client().tenant_list_locations()["tenant_shards"] == []
+
+
+@run_only_on_default_postgres("Postgres version makes no difference here")
+def test_storage_controller_node_flap_detach_race(
+    neon_env_builder: NeonEnvBuilder,
+):
+    """
+    Reproducer for https://github.com/neondatabase/neon/issues/10253.
+
+    When a node's availability flaps, the reconciliations spawned by the node
+    going offline may race with the reconciliation done when then node comes
+    back online.
+    """
+    neon_env_builder.num_pageservers = 4
+
+    env = neon_env_builder.init_configs()
+    env.start()
+
+    tenant_id = TenantId.generate()
+    env.storage_controller.tenant_create(
+        tenant_id,
+        shard_count=2,
+    )
+    env.storage_controller.reconcile_until_idle()
+
+    stopped_nodes = [s["node_id"] for s in env.storage_controller.locate(tenant_id)]
+
+    def has_hit_failpoint(failpoint: str, offset: LogCursor | None = None) -> LogCursor:
+        res = env.storage_controller.log_contains(f"at failpoint {failpoint}", offset=offset)
+        assert res
+        return res[1]
+
+    # Stop the nodes which host attached shards.
+    # This will trigger reconciliations which pause before incrmenenting the generation,
+    # and, more importantly, updating the `generation_pageserver` of the shards.
+    env.storage_controller.configure_failpoints(("reconciler-pre-increment-generation", "pause"))
+    for node_id in stopped_nodes:
+        env.get_pageserver(node_id).stop(immediate=True)
+
+    def failure_handled() -> LogCursor:
+        stop_offset = None
+
+        for node_id in stopped_nodes:
+            res = env.storage_controller.log_contains(f"node {node_id} going offline")
+            assert res
+            stop_offset = res[1]
+
+        assert stop_offset
+        return stop_offset
+
+    offset = wait_until(failure_handled)
+
+    # Now restart the nodes and make them pause before marking themselves as available
+    # or running the activation reconciliation.
+    env.storage_controller.configure_failpoints(("heartbeat-pre-node-state-configure", "pause"))
+
+    for node_id in stopped_nodes:
+        env.get_pageserver(node_id).start(await_active=False)
+
+    offset = wait_until(
+        lambda: has_hit_failpoint("heartbeat-pre-node-state-configure", offset=offset)
+    )
+
+    # The nodes have restarted and are waiting to perform activaction reconciliation.
+    # Unpause the initial reconciliation triggered by the nodes going offline.
+    # It will attempt to detach from the old location, but notice that the old location
+    # is not yet available, and then stop before processing the results of the reconciliation.
+    env.storage_controller.configure_failpoints(("reconciler-epilogue", "pause"))
+    env.storage_controller.configure_failpoints(("reconciler-pre-increment-generation", "off"))
+
+    offset = wait_until(lambda: has_hit_failpoint("reconciler-epilogue", offset=offset))
+
+    # Let the nodes perform activation reconciliation while still holding up processing the result
+    # from the initial reconcile triggered by going offline.
+    env.storage_controller.configure_failpoints(("heartbeat-pre-node-state-configure", "off"))
+
+    def activate_reconciliation_done():
+        for node_id in stopped_nodes:
+            assert env.storage_controller.log_contains(
+                f"Node {node_id} transition to active", offset=offset
+            )
+
+    wait_until(activate_reconciliation_done)
+
+    # Finally, allow the initial reconcile to finish up.
+    env.storage_controller.configure_failpoints(("reconciler-epilogue", "off"))
+
+    # Give things a chance to settle and validate that no stale locations exist
+    env.storage_controller.reconcile_until_idle()
+
+    def validate_locations():
+        shard_locations = defaultdict(list)
+        for ps in env.pageservers:
+            locations = ps.http_client().tenant_list_locations()["tenant_shards"]
+            for loc in locations:
+                shard_locations[loc[0]].append(
+                    {"generation": loc[1]["generation"], "mode": loc[1]["mode"], "node": ps.id}
+                )
+
+        log.info(f"Shard locations: {shard_locations}")
+
+        attached_locations = {
+            k: list(filter(lambda loc: loc["mode"] == "AttachedSingle", v))
+            for k, v in shard_locations.items()
+        }
+
+        for shard, locs in attached_locations.items():
+            assert len(locs) == 1, f"{shard} has {len(locs)} attached locations"
+
+    wait_until(validate_locations, timeout=10)

From 68d8acfd058b7b2d0deb041d14252f17d50ad05f Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Wed, 8 Jan 2025 18:12:09 +0000
Subject: [PATCH 28/44] storage controller: don't hold detached tenants in
 memory (#10264)

## Problem

Typical deployments of neon have some tenants that stay in use
continuously, and a background churning population of tenants that are
created and then fall idle, and are configured to Detached state.
Currently, this churn of short lived tenants results in an
ever-increasing memory footprint.

Closes: https://github.com/neondatabase/neon/issues/9712

## Summary of changes

- At startup, filter to only load shards that don't have Detached policy
- In process_result, check if a tenant's shards are all Detached and
observed=={}, and if so drop them from memory
- In tenant_location_conf and other tenant mutators, load the tenants'
shards on-demand if they are not present
---
 storage_controller/src/id_lock_map.rs         |   8 +
 storage_controller/src/persistence.rs         |  34 ++-
 storage_controller/src/service.rs             | 195 +++++++++++++++---
 storage_controller/src/tenant_shard.rs        |   4 +
 .../regress/test_storage_controller.py        | 105 +++++++++-
 5 files changed, 318 insertions(+), 28 deletions(-)

diff --git a/storage_controller/src/id_lock_map.rs b/storage_controller/src/id_lock_map.rs
index fcd3eb57e2..2d8b674f86 100644
--- a/storage_controller/src/id_lock_map.rs
+++ b/storage_controller/src/id_lock_map.rs
@@ -112,6 +112,14 @@ where
         }
     }
 
+    pub(crate) fn try_exclusive(&self, key: T, operation: I) -> Option<TracingExclusiveGuard<I>> {
+        let mut locked = self.entities.lock().unwrap();
+        let entry = locked.entry(key).or_default().clone();
+        let mut guard = TracingExclusiveGuard::new(entry.try_write_owned().ok()?);
+        *guard.guard = Some(operation);
+        Some(guard)
+    }
+
     /// Rather than building a lock guard that re-takes the [`Self::entities`] lock, we just do
     /// periodic housekeeping to avoid the map growing indefinitely
     pub(crate) fn housekeeping(&self) {
diff --git a/storage_controller/src/persistence.rs b/storage_controller/src/persistence.rs
index cc377e606e..c5eb106f24 100644
--- a/storage_controller/src/persistence.rs
+++ b/storage_controller/src/persistence.rs
@@ -97,6 +97,7 @@ pub(crate) enum DatabaseOperation {
     TenantGenerations,
     ShardGenerations,
     ListTenantShards,
+    LoadTenant,
     InsertTenantShards,
     UpdateTenantShard,
     DeleteTenant,
@@ -330,11 +331,40 @@ impl Persistence {
 
     /// At startup, load the high level state for shards, such as their config + policy.  This will
     /// be enriched at runtime with state discovered on pageservers.
-    pub(crate) async fn list_tenant_shards(&self) -> DatabaseResult<Vec<TenantShardPersistence>> {
+    ///
+    /// We exclude shards configured to be detached.  During startup, if we see any attached locations
+    /// for such shards, they will automatically be detached as 'orphans'.
+    pub(crate) async fn load_active_tenant_shards(
+        &self,
+    ) -> DatabaseResult<Vec<TenantShardPersistence>> {
+        use crate::schema::tenant_shards::dsl::*;
         self.with_measured_conn(
             DatabaseOperation::ListTenantShards,
             move |conn| -> DatabaseResult<_> {
-                Ok(crate::schema::tenant_shards::table.load::<TenantShardPersistence>(conn)?)
+                let query = tenant_shards.filter(
+                    placement_policy.ne(serde_json::to_string(&PlacementPolicy::Detached).unwrap()),
+                );
+                let result = query.load::<TenantShardPersistence>(conn)?;
+
+                Ok(result)
+            },
+        )
+        .await
+    }
+
+    /// When restoring a previously detached tenant into memory, load it from the database
+    pub(crate) async fn load_tenant(
+        &self,
+        filter_tenant_id: TenantId,
+    ) -> DatabaseResult<Vec<TenantShardPersistence>> {
+        use crate::schema::tenant_shards::dsl::*;
+        self.with_measured_conn(
+            DatabaseOperation::LoadTenant,
+            move |conn| -> DatabaseResult<_> {
+                let query = tenant_shards.filter(tenant_id.eq(filter_tenant_id.to_string()));
+                let result = query.load::<TenantShardPersistence>(conn)?;
+
+                Ok(result)
             },
         )
         .await
diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs
index 359fcb3288..fd4ee7fd10 100644
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -155,6 +155,7 @@ enum TenantOperations {
     TimelineArchivalConfig,
     TimelineDetachAncestor,
     TimelineGcBlockUnblock,
+    DropDetached,
 }
 
 #[derive(Clone, strum_macros::Display)]
@@ -416,8 +417,8 @@ pub struct Service {
     /// Queue of tenants who are waiting for concurrency limits to permit them to reconcile
     /// Send into this queue to promptly attempt to reconcile this shard next time units are available.
     ///
-    /// Note that this state logically lives inside ServiceInner, but carrying Sender here makes the code simpler
-    /// by avoiding needing a &mut ref to something inside the ServiceInner.  This could be optimized to
+    /// Note that this state logically lives inside ServiceState, but carrying Sender here makes the code simpler
+    /// by avoiding needing a &mut ref to something inside the ServiceState.  This could be optimized to
     /// use a VecDeque instead of a channel to reduce synchronization overhead, at the cost of some code complexity.
     delayed_reconcile_tx: tokio::sync::mpsc::Sender<TenantShardId>,
 
@@ -1165,6 +1166,20 @@ impl Service {
             }
         }
 
+        // If we just finished detaching all shards for a tenant, it might be time to drop it from memory.
+        if tenant.policy == PlacementPolicy::Detached {
+            // We may only drop a tenant from memory while holding the exclusive lock on the tenant ID: this protects us
+            // from concurrent execution wrt a request handler that might expect the tenant to remain in memory for the
+            // duration of the request.
+            let guard = self.tenant_op_locks.try_exclusive(
+                tenant.tenant_shard_id.tenant_id,
+                TenantOperations::DropDetached,
+            );
+            if let Some(guard) = guard {
+                self.maybe_drop_tenant(tenant.tenant_shard_id.tenant_id, &mut locked, &guard);
+            }
+        }
+
         // Maybe some other work can proceed now that this job finished.
         if self.reconciler_concurrency.available_permits() > 0 {
             while let Ok(tenant_shard_id) = locked.delayed_reconcile_rx.try_recv() {
@@ -1294,7 +1309,7 @@ impl Service {
             .set(nodes.len() as i64);
 
         tracing::info!("Loading shards from database...");
-        let mut tenant_shard_persistence = persistence.list_tenant_shards().await?;
+        let mut tenant_shard_persistence = persistence.load_active_tenant_shards().await?;
         tracing::info!(
             "Loaded {} shards from database.",
             tenant_shard_persistence.len()
@@ -1546,8 +1561,14 @@ impl Service {
         // the pageserver API (not via this service), we will auto-create any missing tenant
         // shards with default state.
         let insert = {
-            let locked = self.inner.write().unwrap();
-            !locked.tenants.contains_key(&attach_req.tenant_shard_id)
+            match self
+                .maybe_load_tenant(attach_req.tenant_shard_id.tenant_id, &_tenant_lock)
+                .await
+            {
+                Ok(_) => false,
+                Err(ApiError::NotFound(_)) => true,
+                Err(e) => return Err(e.into()),
+            }
         };
 
         if insert {
@@ -2439,6 +2460,99 @@ impl Service {
         }
     }
 
+    /// For APIs that might act on tenants with [`PlacementPolicy::Detached`], first check if
+    /// the tenant is present in memory. If not, load it from the database.  If it is found
+    /// in neither location, return a NotFound error.
+    ///
+    /// Caller must demonstrate they hold a lock guard, as otherwise two callers might try and load
+    /// it at the same time, or we might race with [`Self::maybe_drop_tenant`]
+    async fn maybe_load_tenant(
+        &self,
+        tenant_id: TenantId,
+        _guard: &TracingExclusiveGuard<TenantOperations>,
+    ) -> Result<(), ApiError> {
+        let present_in_memory = {
+            let locked = self.inner.read().unwrap();
+            locked
+                .tenants
+                .range(TenantShardId::tenant_range(tenant_id))
+                .next()
+                .is_some()
+        };
+
+        if present_in_memory {
+            return Ok(());
+        }
+
+        let tenant_shards = self.persistence.load_tenant(tenant_id).await?;
+        if tenant_shards.is_empty() {
+            return Err(ApiError::NotFound(
+                anyhow::anyhow!("Tenant {} not found", tenant_id).into(),
+            ));
+        }
+
+        // TODO: choose a fresh AZ to use for this tenant when un-detaching: there definitely isn't a running
+        // compute, so no benefit to making AZ sticky across detaches.
+
+        let mut locked = self.inner.write().unwrap();
+        tracing::info!(
+            "Loaded {} shards for tenant {}",
+            tenant_shards.len(),
+            tenant_id
+        );
+
+        locked.tenants.extend(tenant_shards.into_iter().map(|p| {
+            let intent = IntentState::new();
+            let shard =
+                TenantShard::from_persistent(p, intent).expect("Corrupt shard row in database");
+
+            // Sanity check: when loading on-demand, we should always be loaded something Detached
+            debug_assert!(shard.policy == PlacementPolicy::Detached);
+            if shard.policy != PlacementPolicy::Detached {
+                tracing::error!(
+                    "Tenant shard {} loaded on-demand, but has non-Detached policy {:?}",
+                    shard.tenant_shard_id,
+                    shard.policy
+                );
+            }
+
+            (shard.tenant_shard_id, shard)
+        }));
+
+        Ok(())
+    }
+
+    /// If all shards for a tenant are detached, and in a fully quiescent state (no observed locations on pageservers),
+    /// and have no reconciler running, then we can drop the tenant from memory.  It will be reloaded on-demand
+    /// if we are asked to attach it again (see [`Self::maybe_load_tenant`]).
+    ///
+    /// Caller must demonstrate they hold a lock guard, as otherwise it is unsafe to drop a tenant from
+    /// memory while some other function might assume it continues to exist while not holding the lock on Self::inner.
+    fn maybe_drop_tenant(
+        &self,
+        tenant_id: TenantId,
+        locked: &mut std::sync::RwLockWriteGuard<ServiceState>,
+        _guard: &TracingExclusiveGuard<TenantOperations>,
+    ) {
+        let mut tenant_shards = locked.tenants.range(TenantShardId::tenant_range(tenant_id));
+        if tenant_shards.all(|(_id, shard)| {
+            shard.policy == PlacementPolicy::Detached
+                && shard.reconciler.is_none()
+                && shard.observed.is_empty()
+        }) {
+            let keys = locked
+                .tenants
+                .range(TenantShardId::tenant_range(tenant_id))
+                .map(|(id, _)| id)
+                .copied()
+                .collect::<Vec<_>>();
+            for key in keys {
+                tracing::info!("Dropping detached tenant shard {} from memory", key);
+                locked.tenants.remove(&key);
+            }
+        }
+    }
+
     /// This API is used by the cloud control plane to migrate unsharded tenants that it created
     /// directly with pageservers into this service.
     ///
@@ -2465,14 +2579,26 @@ impl Service {
         )
         .await;
 
-        if !tenant_shard_id.is_unsharded() {
+        let tenant_id = if !tenant_shard_id.is_unsharded() {
             return Err(ApiError::BadRequest(anyhow::anyhow!(
                 "This API is for importing single-sharded or unsharded tenants"
             )));
-        }
+        } else {
+            tenant_shard_id.tenant_id
+        };
+
+        // In case we are waking up a Detached tenant
+        match self.maybe_load_tenant(tenant_id, &_tenant_lock).await {
+            Ok(()) | Err(ApiError::NotFound(_)) => {
+                // This is a creation or an update
+            }
+            Err(e) => {
+                return Err(e);
+            }
+        };
 
         // First check if this is a creation or an update
-        let create_or_update = self.tenant_location_config_prepare(tenant_shard_id.tenant_id, req);
+        let create_or_update = self.tenant_location_config_prepare(tenant_id, req);
 
         let mut result = TenantLocationConfigResponse {
             shards: Vec::new(),
@@ -2600,6 +2726,8 @@ impl Service {
         let tenant_id = req.tenant_id;
         let patch = req.config;
 
+        self.maybe_load_tenant(tenant_id, &_tenant_lock).await?;
+
         let base = {
             let locked = self.inner.read().unwrap();
             let shards = locked
@@ -2644,19 +2772,7 @@ impl Service {
         )
         .await;
 
-        let tenant_exists = {
-            let locked = self.inner.read().unwrap();
-            let mut r = locked
-                .tenants
-                .range(TenantShardId::tenant_range(req.tenant_id));
-            r.next().is_some()
-        };
-
-        if !tenant_exists {
-            return Err(ApiError::NotFound(
-                anyhow::anyhow!("Tenant {} not found", req.tenant_id).into(),
-            ));
-        }
+        self.maybe_load_tenant(req.tenant_id, &_tenant_lock).await?;
 
         self.set_tenant_config_and_reconcile(req.tenant_id, req.config)
             .await
@@ -2949,6 +3065,8 @@ impl Service {
         let _tenant_lock =
             trace_exclusive_lock(&self.tenant_op_locks, tenant_id, TenantOperations::Delete).await;
 
+        self.maybe_load_tenant(tenant_id, &_tenant_lock).await?;
+
         // Detach all shards. This also deletes local pageserver shard data.
         let (detach_waiters, node) = {
             let mut detach_waiters = Vec::new();
@@ -3068,6 +3186,8 @@ impl Service {
         )
         .await;
 
+        self.maybe_load_tenant(tenant_id, &_tenant_lock).await?;
+
         failpoint_support::sleep_millis_async!("tenant-update-policy-exclusive-lock");
 
         let TenantPolicyRequest {
@@ -5150,11 +5270,13 @@ impl Service {
             )));
         }
 
-        let mut shards = self.persistence.list_tenant_shards().await?;
-        shards.sort_by_key(|tsp| (tsp.tenant_id.clone(), tsp.shard_number, tsp.shard_count));
+        let mut persistent_shards = self.persistence.load_active_tenant_shards().await?;
+        persistent_shards
+            .sort_by_key(|tsp| (tsp.tenant_id.clone(), tsp.shard_number, tsp.shard_count));
+
         expect_shards.sort_by_key(|tsp| (tsp.tenant_id.clone(), tsp.shard_number, tsp.shard_count));
 
-        if shards != expect_shards {
+        if persistent_shards != expect_shards {
             tracing::error!("Consistency check failed on shards.");
             tracing::error!(
                 "Shards in memory: {}",
@@ -5163,7 +5285,7 @@ impl Service {
             );
             tracing::error!(
                 "Shards in database: {}",
-                serde_json::to_string(&shards)
+                serde_json::to_string(&persistent_shards)
                     .map_err(|e| ApiError::InternalServerError(e.into()))?
             );
             return Err(ApiError::InternalServerError(anyhow::anyhow!(
@@ -6119,6 +6241,10 @@ impl Service {
         let mut pending_reconciles = 0;
         let mut az_violations = 0;
 
+        // If we find any tenants to drop from memory, stash them to offload after
+        // we're done traversing the map of tenants.
+        let mut drop_detached_tenants = Vec::new();
+
         let mut reconciles_spawned = 0;
         for shard in tenants.values_mut() {
             // Accumulate scheduling statistics
@@ -6152,6 +6278,25 @@ impl Service {
                 // Shard wanted to reconcile but for some reason couldn't.
                 pending_reconciles += 1;
             }
+
+            // If this tenant is detached, try dropping it from memory. This is usually done
+            // proactively in [`Self::process_results`], but we do it here to handle the edge
+            // case where a reconcile completes while someone else is holding an op lock for the tenant.
+            if shard.tenant_shard_id.shard_number == ShardNumber(0)
+                && shard.policy == PlacementPolicy::Detached
+            {
+                if let Some(guard) = self.tenant_op_locks.try_exclusive(
+                    shard.tenant_shard_id.tenant_id,
+                    TenantOperations::DropDetached,
+                ) {
+                    drop_detached_tenants.push((shard.tenant_shard_id.tenant_id, guard));
+                }
+            }
+        }
+
+        // Process any deferred tenant drops
+        for (tenant_id, guard) in drop_detached_tenants {
+            self.maybe_drop_tenant(tenant_id, &mut locked, &guard);
         }
 
         metrics::METRICS_REGISTRY
diff --git a/storage_controller/src/tenant_shard.rs b/storage_controller/src/tenant_shard.rs
index cba579e8a7..c17989a316 100644
--- a/storage_controller/src/tenant_shard.rs
+++ b/storage_controller/src/tenant_shard.rs
@@ -465,6 +465,10 @@ impl ObservedState {
             locations: HashMap::new(),
         }
     }
+
+    pub(crate) fn is_empty(&self) -> bool {
+        self.locations.is_empty()
+    }
 }
 
 impl TenantShard {
diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py
index 973d0cdf82..207f55a214 100644
--- a/test_runner/regress/test_storage_controller.py
+++ b/test_runner/regress/test_storage_controller.py
@@ -3299,13 +3299,116 @@ def test_storage_controller_detached_stopped(
             "generation": None,
         },
     )
-
+    env.storage_controller.reconcile_until_idle()
     env.storage_controller.consistency_check()
 
     # Confirm the detach happened
     assert env.pageserver.http_client().tenant_list_locations()["tenant_shards"] == []
 
 
+@run_only_on_default_postgres("Postgres version makes no difference here")
+def test_storage_controller_detach_lifecycle(
+    neon_env_builder: NeonEnvBuilder,
+):
+    """
+    Test that detached tenants are handled properly through their lifecycle: getting dropped
+    from memory when detached, then getting loaded back on-demand.
+    """
+
+    remote_storage_kind = s3_storage()
+    neon_env_builder.enable_pageserver_remote_storage(remote_storage_kind)
+
+    neon_env_builder.num_pageservers = 1
+
+    env = neon_env_builder.init_configs()
+    env.start()
+    virtual_ps_http = PageserverHttpClient(env.storage_controller_port, lambda: True)
+
+    tenant_id = TenantId.generate()
+    timeline_id = TimelineId.generate()
+    env.storage_controller.tenant_create(
+        tenant_id,
+        shard_count=1,
+    )
+    virtual_ps_http.timeline_create(PgVersion.NOT_SET, tenant_id, timeline_id)
+
+    remote_prefix = "/".join(
+        (
+            "tenants",
+            str(tenant_id),
+        )
+    )
+    # We will later check data is gone after deletion, so as a control check that it is present to begin with
+    assert_prefix_not_empty(
+        neon_env_builder.pageserver_remote_storage,
+        prefix=remote_prefix,
+    )
+
+    assert len(env.pageserver.http_client().tenant_list_locations()["tenant_shards"]) == 1
+    assert len(env.storage_controller.tenant_list()) == 1
+
+    # Detach the tenant
+    virtual_ps_http.tenant_location_conf(
+        tenant_id,
+        {
+            "mode": "Detached",
+            "secondary_conf": None,
+            "tenant_conf": {},
+            "generation": None,
+        },
+    )
+    # Ensure reconciles are done (the one we do inline in location_conf is advisory and if it takes too long that API just succeeds anyway)
+    env.storage_controller.reconcile_until_idle()
+    env.storage_controller.consistency_check()
+
+    # Confirm the detach happened on pageserver
+    assert env.pageserver.http_client().tenant_list_locations()["tenant_shards"] == []
+    # Confirm the tenant is not in memory on the controller
+    assert env.storage_controller.tenant_list() == []
+
+    # The detached tenant does not get loaded into memory across a controller restart
+    env.storage_controller.stop()
+    env.storage_controller.start()
+    assert env.storage_controller.tenant_list() == []
+    env.storage_controller.consistency_check()
+
+    # The detached tenant can be re-attached
+    virtual_ps_http.tenant_location_conf(
+        tenant_id,
+        {
+            "mode": "AttachedSingle",
+            "secondary_conf": None,
+            "tenant_conf": {},
+            "generation": None,
+        },
+    )
+    assert len(env.pageserver.http_client().tenant_list_locations()["tenant_shards"]) == 1
+    assert len(env.storage_controller.tenant_list()) == 1
+    env.storage_controller.consistency_check()
+
+    # Detach it again before doing deletion
+    virtual_ps_http.tenant_location_conf(
+        tenant_id,
+        {
+            "mode": "Detached",
+            "secondary_conf": None,
+            "tenant_conf": {},
+            "generation": None,
+        },
+    )
+    env.storage_controller.reconcile_until_idle()
+    env.storage_controller.consistency_check()
+
+    # A detached tenant can be deleted
+    virtual_ps_http.tenant_delete(tenant_id)
+
+    # Such deletions really work (empty remote storage)
+    assert_prefix_empty(
+        neon_env_builder.pageserver_remote_storage,
+        prefix=remote_prefix,
+    )
+
+
 @run_only_on_default_postgres("Postgres version makes no difference here")
 def test_storage_controller_node_flap_detach_race(
     neon_env_builder: NeonEnvBuilder,

From 0ad0db6ff8b5b491244d251fa09c8093f725a1d3 Mon Sep 17 00:00:00 2001
From: Anastasia Lubennikova <anastasia@neon.tech>
Date: Wed, 8 Jan 2025 18:55:04 +0000
Subject: [PATCH 29/44] compute: dropdb DROP SUBSCRIPTION fix (#10066)

## Problem
Project gets stuck if database with subscriptions was deleted via API /
UI.

https://github.com/neondatabase/cloud/issues/18646

## Summary of changes
Before dropping the database, drop all the subscriptions in it.
Do not drop slot on publisher, because we have no guarantee that the
slot still exists or that the publisher is reachable.

Add `DropSubscriptionsForDeletedDatabases` phase to run these operations
in all databases, we're about to delete.
Ignore the error if the database does not exist.
---
 compute_tools/src/compute.rs                  |  95 ++++++++++++--
 compute_tools/src/spec_apply.rs               |  32 ++++-
 .../sql/drop_subscription_for_drop_dbs.sql    |  11 ++
 test_runner/fixtures/neon_fixtures.py         |  27 +++-
 test_runner/regress/test_compute_catalog.py   | 116 +++++++++++++++++-
 5 files changed, 262 insertions(+), 19 deletions(-)
 create mode 100644 compute_tools/src/sql/drop_subscription_for_drop_dbs.sql

diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs
index 78f6033429..1ac97a378b 100644
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -15,7 +15,7 @@ use std::time::Instant;
 
 use anyhow::{Context, Result};
 use chrono::{DateTime, Utc};
-use compute_api::spec::{PgIdent, Role};
+use compute_api::spec::{Database, PgIdent, Role};
 use futures::future::join_all;
 use futures::stream::FuturesUnordered;
 use futures::StreamExt;
@@ -45,8 +45,10 @@ use crate::spec_apply::ApplySpecPhase::{
     DropInvalidDatabases, DropRoles, HandleNeonExtension, HandleOtherExtensions,
     RenameAndDeleteDatabases, RenameRoles, RunInEachDatabase,
 };
+use crate::spec_apply::PerDatabasePhase;
 use crate::spec_apply::PerDatabasePhase::{
-    ChangeSchemaPerms, DeleteDBRoleReferences, HandleAnonExtension,
+    ChangeSchemaPerms, DeleteDBRoleReferences, DropSubscriptionsForDeletedDatabases,
+    HandleAnonExtension,
 };
 use crate::spec_apply::{apply_operations, MutableApplyContext, DB};
 use crate::sync_sk::{check_if_synced, ping_safekeeper};
@@ -834,7 +836,7 @@ impl ComputeNode {
         conf
     }
 
-    async fn get_maintenance_client(
+    pub async fn get_maintenance_client(
         conf: &tokio_postgres::Config,
     ) -> Result<tokio_postgres::Client> {
         let mut conf = conf.clone();
@@ -943,6 +945,78 @@ impl ComputeNode {
                 dbs: databases,
             }));
 
+            // Apply special pre drop database phase.
+            // NOTE: we use the code of RunInEachDatabase phase for parallelism
+            // and connection management, but we don't really run it in *each* database,
+            // only in databases, we're about to drop.
+            info!("Applying PerDatabase (pre-dropdb) phase");
+            let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency));
+
+            // Run the phase for each database that we're about to drop.
+            let db_processes = spec
+                .delta_operations
+                .iter()
+                .flatten()
+                .filter_map(move |op| {
+                    if op.action.as_str() == "delete_db" {
+                        Some(op.name.clone())
+                    } else {
+                        None
+                    }
+                })
+                .map(|dbname| {
+                    let spec = spec.clone();
+                    let ctx = ctx.clone();
+                    let jwks_roles = jwks_roles.clone();
+                    let mut conf = conf.as_ref().clone();
+                    let concurrency_token = concurrency_token.clone();
+                    // We only need dbname field for this phase, so set other fields to dummy values
+                    let db = DB::UserDB(Database {
+                        name: dbname.clone(),
+                        owner: "cloud_admin".to_string(),
+                        options: None,
+                        restrict_conn: false,
+                        invalid: false,
+                    });
+
+                    debug!("Applying per-database phases for Database {:?}", &db);
+
+                    match &db {
+                        DB::SystemDB => {}
+                        DB::UserDB(db) => {
+                            conf.dbname(db.name.as_str());
+                        }
+                    }
+
+                    let conf = Arc::new(conf);
+                    let fut = Self::apply_spec_sql_db(
+                        spec.clone(),
+                        conf,
+                        ctx.clone(),
+                        jwks_roles.clone(),
+                        concurrency_token.clone(),
+                        db,
+                        [DropSubscriptionsForDeletedDatabases].to_vec(),
+                    );
+
+                    Ok(spawn(fut))
+                })
+                .collect::<Vec<Result<_, anyhow::Error>>>();
+
+            for process in db_processes.into_iter() {
+                let handle = process?;
+                if let Err(e) = handle.await? {
+                    // Handle the error case where the database does not exist
+                    // We do not check whether the DB exists or not in the deletion phase,
+                    // so we shouldn't be strict about it in pre-deletion cleanup as well.
+                    if e.to_string().contains("does not exist") {
+                        warn!("Error dropping subscription: {}", e);
+                    } else {
+                        return Err(e);
+                    }
+                };
+            }
+
             for phase in [
                 CreateSuperUser,
                 DropInvalidDatabases,
@@ -962,7 +1036,7 @@ impl ComputeNode {
                 .await?;
             }
 
-            info!("Applying RunInEachDatabase phase");
+            info!("Applying RunInEachDatabase2 phase");
             let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency));
 
             let db_processes = spec
@@ -997,6 +1071,12 @@ impl ComputeNode {
                         jwks_roles.clone(),
                         concurrency_token.clone(),
                         db,
+                        [
+                            DeleteDBRoleReferences,
+                            ChangeSchemaPerms,
+                            HandleAnonExtension,
+                        ]
+                        .to_vec(),
                     );
 
                     Ok(spawn(fut))
@@ -1043,16 +1123,13 @@ impl ComputeNode {
         jwks_roles: Arc<HashSet<String>>,
         concurrency_token: Arc<tokio::sync::Semaphore>,
         db: DB,
+        subphases: Vec<PerDatabasePhase>,
     ) -> Result<()> {
         let _permit = concurrency_token.acquire().await?;
 
         let mut client_conn = None;
 
-        for subphase in [
-            DeleteDBRoleReferences,
-            ChangeSchemaPerms,
-            HandleAnonExtension,
-        ] {
+        for subphase in subphases {
             apply_operations(
                 spec.clone(),
                 ctx.clone(),
diff --git a/compute_tools/src/spec_apply.rs b/compute_tools/src/spec_apply.rs
index 7308d5d36e..695a722d6d 100644
--- a/compute_tools/src/spec_apply.rs
+++ b/compute_tools/src/spec_apply.rs
@@ -47,6 +47,7 @@ pub enum PerDatabasePhase {
     DeleteDBRoleReferences,
     ChangeSchemaPerms,
     HandleAnonExtension,
+    DropSubscriptionsForDeletedDatabases,
 }
 
 #[derive(Clone, Debug)]
@@ -326,13 +327,12 @@ async fn get_operations<'a>(
 
                             // Use FORCE to drop database even if there are active connections.
                             // We run this from `cloud_admin`, so it should have enough privileges.
+                            //
                             // NB: there could be other db states, which prevent us from dropping
                             // the database. For example, if db is used by any active subscription
                             // or replication slot.
-                            // TODO: deal with it once we allow logical replication. Proper fix should
-                            // involve returning an error code to the control plane, so it could
-                            // figure out that this is a non-retryable error, return it to the user
-                            // and fail operation permanently.
+                            // Such cases are handled in the DropSubscriptionsForDeletedDatabases
+                            // phase. We do all the cleanup before actually dropping the database.
                             let drop_db_query: String = format!(
                                 "DROP DATABASE IF EXISTS {} WITH (FORCE)",
                                 &op.name.pg_quote()
@@ -444,6 +444,30 @@ async fn get_operations<'a>(
         }
         ApplySpecPhase::RunInEachDatabase { db, subphase } => {
             match subphase {
+                PerDatabasePhase::DropSubscriptionsForDeletedDatabases => {
+                    match &db {
+                        DB::UserDB(db) => {
+                            let drop_subscription_query: String = format!(
+                                include_str!("sql/drop_subscription_for_drop_dbs.sql"),
+                                datname_str = escape_literal(&db.name),
+                            );
+
+                            let operations = vec![Operation {
+                                query: drop_subscription_query,
+                                comment: Some(format!(
+                                    "optionally dropping subscriptions for DB {}",
+                                    db.name,
+                                )),
+                            }]
+                            .into_iter();
+
+                            Ok(Box::new(operations))
+                        }
+                        // skip this cleanup for the system databases
+                        // because users can't drop them
+                        DB::SystemDB => Ok(Box::new(empty())),
+                    }
+                }
                 PerDatabasePhase::DeleteDBRoleReferences => {
                     let ctx = ctx.read().await;
 
diff --git a/compute_tools/src/sql/drop_subscription_for_drop_dbs.sql b/compute_tools/src/sql/drop_subscription_for_drop_dbs.sql
new file mode 100644
index 0000000000..dfb925e48e
--- /dev/null
+++ b/compute_tools/src/sql/drop_subscription_for_drop_dbs.sql
@@ -0,0 +1,11 @@
+DO $$
+DECLARE
+    subname TEXT;
+BEGIN
+    FOR subname IN SELECT pg_subscription.subname FROM pg_subscription WHERE subdbid = (SELECT oid FROM pg_database WHERE datname = {datname_str}) LOOP
+        EXECUTE format('ALTER SUBSCRIPTION %I DISABLE;', subname);
+        EXECUTE format('ALTER SUBSCRIPTION %I SET (slot_name = NONE);', subname);
+        EXECUTE format('DROP SUBSCRIPTION %I;', subname);
+    END LOOP;
+END;
+$$;
diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py
index 00fdda2998..e22e452a52 100644
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -4933,13 +4933,30 @@ def check_restored_datadir_content(
     assert (mismatch, error) == ([], [])
 
 
-def logical_replication_sync(subscriber: PgProtocol, publisher: PgProtocol) -> Lsn:
+def logical_replication_sync(
+    subscriber: PgProtocol,
+    publisher: PgProtocol,
+    sub_dbname: str | None = None,
+    pub_dbname: str | None = None,
+) -> Lsn:
     """Wait logical replication subscriber to sync with publisher."""
-    publisher_lsn = Lsn(publisher.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
+    if pub_dbname is not None:
+        publisher_lsn = Lsn(
+            publisher.safe_psql("SELECT pg_current_wal_flush_lsn()", dbname=pub_dbname)[0][0]
+        )
+    else:
+        publisher_lsn = Lsn(publisher.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
+
     while True:
-        res = subscriber.safe_psql("select latest_end_lsn from pg_catalog.pg_stat_subscription")[0][
-            0
-        ]
+        if sub_dbname is not None:
+            res = subscriber.safe_psql(
+                "select latest_end_lsn from pg_catalog.pg_stat_subscription", dbname=sub_dbname
+            )[0][0]
+        else:
+            res = subscriber.safe_psql(
+                "select latest_end_lsn from pg_catalog.pg_stat_subscription"
+            )[0][0]
+
         if res:
             log.info(f"subscriber_lsn={res}")
             subscriber_lsn = Lsn(res)
diff --git a/test_runner/regress/test_compute_catalog.py b/test_runner/regress/test_compute_catalog.py
index b3719a45ed..e411aad97d 100644
--- a/test_runner/regress/test_compute_catalog.py
+++ b/test_runner/regress/test_compute_catalog.py
@@ -1,7 +1,9 @@
 from __future__ import annotations
 
+import logging
+
 import requests
-from fixtures.neon_fixtures import NeonEnv
+from fixtures.neon_fixtures import NeonEnv, logical_replication_sync
 
 TEST_DB_NAMES = [
     {
@@ -136,3 +138,115 @@ def test_compute_create_databases(neon_simple_env: NeonEnv):
             assert curr_db is not None
             assert len(curr_db) == 1
             assert curr_db[0] == db["name"]
+
+
+def test_dropdb_with_subscription(neon_simple_env: NeonEnv):
+    """
+    Test that compute_ctl can drop a database that has a logical replication subscription.
+    """
+    env = neon_simple_env
+
+    # Create and start endpoint so that neon_local put all the generated
+    # stuff into the spec.json file.
+    endpoint = env.endpoints.create_start("main")
+
+    TEST_DB_NAMES = [
+        {
+            "name": "neondb",
+            "owner": "cloud_admin",
+        },
+        {
+            "name": "subscriber_db",
+            "owner": "cloud_admin",
+        },
+        {
+            "name": "publisher_db",
+            "owner": "cloud_admin",
+        },
+    ]
+
+    # Update the spec.json file to create the databases
+    # and reconfigure the endpoint to apply the changes.
+    endpoint.respec_deep(
+        **{
+            "skip_pg_catalog_updates": False,
+            "cluster": {
+                "databases": TEST_DB_NAMES,
+            },
+        }
+    )
+    endpoint.reconfigure()
+
+    # connect to the publisher_db and create a publication
+    with endpoint.cursor(dbname="publisher_db") as cursor:
+        cursor.execute("CREATE PUBLICATION mypub FOR ALL TABLES")
+        cursor.execute("select pg_catalog.pg_create_logical_replication_slot('mysub', 'pgoutput');")
+        cursor.execute("CREATE TABLE t(a int)")
+        cursor.execute("INSERT INTO t VALUES (1)")
+
+    # connect to the subscriber_db and create a subscription
+    # Note that we need to create subscription with
+    connstr = endpoint.connstr(dbname="publisher_db").replace("'", "''")
+    with endpoint.cursor(dbname="subscriber_db") as cursor:
+        cursor.execute("CREATE TABLE t(a int)")
+        cursor.execute(
+            f"CREATE SUBSCRIPTION mysub CONNECTION '{connstr}' PUBLICATION mypub  WITH (create_slot = false) "
+        )
+
+    # wait for the subscription to be active
+    logical_replication_sync(
+        endpoint, endpoint, sub_dbname="subscriber_db", pub_dbname="publisher_db"
+    )
+
+    # Check that replication is working
+    with endpoint.cursor(dbname="subscriber_db") as cursor:
+        cursor.execute("SELECT * FROM t")
+        rows = cursor.fetchall()
+        assert len(rows) == 1
+        assert rows[0][0] == 1
+
+    # drop the subscriber_db from the list
+    TEST_DB_NAMES_NEW = [
+        {
+            "name": "neondb",
+            "owner": "cloud_admin",
+        },
+        {
+            "name": "publisher_db",
+            "owner": "cloud_admin",
+        },
+    ]
+    # Update the spec.json file to drop the database
+    # and reconfigure the endpoint to apply the changes.
+    endpoint.respec_deep(
+        **{
+            "skip_pg_catalog_updates": False,
+            "cluster": {
+                "databases": TEST_DB_NAMES_NEW,
+            },
+            "delta_operations": [
+                {"action": "delete_db", "name": "subscriber_db"},
+                # also test the case when we try to delete a non-existent database
+                # shouldn't happen in normal operation,
+                # but can occur when failed operations are retried
+                {"action": "delete_db", "name": "nonexistent_db"},
+            ],
+        }
+    )
+
+    logging.info("Reconfiguring the endpoint to drop the subscriber_db")
+    endpoint.reconfigure()
+
+    # Check that the subscriber_db is dropped
+    with endpoint.cursor() as cursor:
+        cursor.execute("SELECT datname FROM pg_database WHERE datname = %s", ("subscriber_db",))
+        catalog_db = cursor.fetchone()
+        assert catalog_db is None
+
+    # Check that we can still connect to the publisher_db
+    with endpoint.cursor(dbname="publisher_db") as cursor:
+        cursor.execute("SELECT * FROM current_database()")
+        curr_db = cursor.fetchone()
+        assert curr_db is not None
+        assert len(curr_db) == 1
+        assert curr_db[0] == "publisher_db"

From fcfff724547cda260e884d2d680f199fdbc9471c Mon Sep 17 00:00:00 2001
From: Ivan Efremov <ivan@neon.tech>
Date: Wed, 8 Jan 2025 21:34:53 +0200
Subject: [PATCH 30/44] impr(proxy): Decouple ip_allowlist from the
 CancelClosure (#10199)

This PR removes the direct dependency of the IP allowlist from
CancelClosure, allowing for more scalable and flexible IP restrictions
and enabling the future use of Redis-based CancelMap storage.

Changes:
- Introduce a new BackendAuth async trait that retrieves the IP
allowlist through existing authentication methods;
- Improve cancellation error handling by instrument() async
cancel_sesion() rather than dropping it.
- Set and store IP allowlist for SCRAM Proxy to consistently perform IP
allowance check

 Relates to #9660
---
 proxy/src/auth/backend/console_redirect.rs    |  32 ++++-
 proxy/src/auth/backend/mod.rs                 |  47 ++++++--
 proxy/src/bin/proxy.rs                        |  54 ++++++++-
 proxy/src/cancellation.rs                     | 111 +++++++++++++++++-
 proxy/src/compute.rs                          |  28 ++++-
 proxy/src/console_redirect_proxy.rs           |  28 ++---
 .../control_plane/client/cplane_proxy_v1.rs   |  36 ++++--
 proxy/src/proxy/mod.rs                        |  31 +++--
 8 files changed, 307 insertions(+), 60 deletions(-)

diff --git a/proxy/src/auth/backend/console_redirect.rs b/proxy/src/auth/backend/console_redirect.rs
index c3de77b352..dbfda588cc 100644
--- a/proxy/src/auth/backend/console_redirect.rs
+++ b/proxy/src/auth/backend/console_redirect.rs
@@ -1,16 +1,18 @@
 use async_trait::async_trait;
 use postgres_client::config::SslMode;
 use pq_proto::BeMessage as Be;
+use std::fmt;
 use thiserror::Error;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::{info, info_span};
 
-use super::ComputeCredentialKeys;
+use super::{ComputeCredentialKeys, ControlPlaneApi};
+use crate::auth::backend::{BackendIpAllowlist, ComputeUserInfo};
 use crate::auth::IpPattern;
 use crate::cache::Cached;
 use crate::config::AuthenticationConfig;
 use crate::context::RequestContext;
-use crate::control_plane::{self, CachedNodeInfo, NodeInfo};
+use crate::control_plane::{self, client::cplane_proxy_v1, CachedNodeInfo, NodeInfo};
 use crate::error::{ReportableError, UserFacingError};
 use crate::proxy::connect_compute::ComputeConnectBackend;
 use crate::stream::PqStream;
@@ -31,6 +33,13 @@ pub(crate) enum ConsoleRedirectError {
 #[derive(Debug)]
 pub struct ConsoleRedirectBackend {
     console_uri: reqwest::Url,
+    api: cplane_proxy_v1::NeonControlPlaneClient,
+}
+
+impl fmt::Debug for cplane_proxy_v1::NeonControlPlaneClient {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "NeonControlPlaneClient")
+    }
 }
 
 impl UserFacingError for ConsoleRedirectError {
@@ -71,9 +80,24 @@ pub(crate) fn new_psql_session_id() -> String {
     hex::encode(rand::random::<[u8; 8]>())
 }
 
+#[async_trait]
+impl BackendIpAllowlist for ConsoleRedirectBackend {
+    async fn get_allowed_ips(
+        &self,
+        ctx: &RequestContext,
+        user_info: &ComputeUserInfo,
+    ) -> auth::Result<Vec<auth::IpPattern>> {
+        self.api
+            .get_allowed_ips_and_secret(ctx, user_info)
+            .await
+            .map(|(ips, _)| ips.as_ref().clone())
+            .map_err(|e| e.into())
+    }
+}
+
 impl ConsoleRedirectBackend {
-    pub fn new(console_uri: reqwest::Url) -> Self {
-        Self { console_uri }
+    pub fn new(console_uri: reqwest::Url, api: cplane_proxy_v1::NeonControlPlaneClient) -> Self {
+        Self { console_uri, api }
     }
 
     pub(crate) async fn authenticate(
diff --git a/proxy/src/auth/backend/mod.rs b/proxy/src/auth/backend/mod.rs
index 0c9a7f7825..de48be2952 100644
--- a/proxy/src/auth/backend/mod.rs
+++ b/proxy/src/auth/backend/mod.rs
@@ -16,7 +16,9 @@ use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::{debug, info, warn};
 
 use crate::auth::credentials::check_peer_addr_is_in_list;
-use crate::auth::{self, validate_password_and_exchange, AuthError, ComputeUserInfoMaybeEndpoint};
+use crate::auth::{
+    self, validate_password_and_exchange, AuthError, ComputeUserInfoMaybeEndpoint, IpPattern,
+};
 use crate::cache::Cached;
 use crate::config::AuthenticationConfig;
 use crate::context::RequestContext;
@@ -131,7 +133,7 @@ pub(crate) struct ComputeUserInfoNoEndpoint {
     pub(crate) options: NeonOptions,
 }
 
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, Default)]
 pub(crate) struct ComputeUserInfo {
     pub(crate) endpoint: EndpointId,
     pub(crate) user: RoleName,
@@ -244,6 +246,15 @@ impl AuthenticationConfig {
     }
 }
 
+#[async_trait::async_trait]
+pub(crate) trait BackendIpAllowlist {
+    async fn get_allowed_ips(
+        &self,
+        ctx: &RequestContext,
+        user_info: &ComputeUserInfo,
+    ) -> auth::Result<Vec<auth::IpPattern>>;
+}
+
 /// True to its name, this function encapsulates our current auth trade-offs.
 /// Here, we choose the appropriate auth flow based on circumstances.
 ///
@@ -256,7 +267,7 @@ async fn auth_quirks(
     allow_cleartext: bool,
     config: &'static AuthenticationConfig,
     endpoint_rate_limiter: Arc<EndpointRateLimiter>,
-) -> auth::Result<ComputeCredentials> {
+) -> auth::Result<(ComputeCredentials, Option<Vec<IpPattern>>)> {
     // If there's no project so far, that entails that client doesn't
     // support SNI or other means of passing the endpoint (project) name.
     // We now expect to see a very specific payload in the place of password.
@@ -315,7 +326,7 @@ async fn auth_quirks(
     )
     .await
     {
-        Ok(keys) => Ok(keys),
+        Ok(keys) => Ok((keys, Some(allowed_ips.as_ref().clone()))),
         Err(e) => {
             if e.is_password_failed() {
                 // The password could have been changed, so we invalidate the cache.
@@ -385,7 +396,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> {
         allow_cleartext: bool,
         config: &'static AuthenticationConfig,
         endpoint_rate_limiter: Arc<EndpointRateLimiter>,
-    ) -> auth::Result<Backend<'a, ComputeCredentials>> {
+    ) -> auth::Result<(Backend<'a, ComputeCredentials>, Option<Vec<IpPattern>>)> {
         let res = match self {
             Self::ControlPlane(api, user_info) => {
                 debug!(
@@ -394,7 +405,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> {
                     "performing authentication using the console"
                 );
 
-                let credentials = auth_quirks(
+                let (credentials, ip_allowlist) = auth_quirks(
                     ctx,
                     &*api,
                     user_info,
@@ -404,7 +415,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> {
                     endpoint_rate_limiter,
                 )
                 .await?;
-                Backend::ControlPlane(api, credentials)
+                Ok((Backend::ControlPlane(api, credentials), ip_allowlist))
             }
             Self::Local(_) => {
                 return Err(auth::AuthError::bad_auth_method("invalid for local proxy"))
@@ -413,7 +424,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> {
 
         // TODO: replace with some metric
         info!("user successfully authenticated");
-        Ok(res)
+        res
     }
 }
 
@@ -441,6 +452,24 @@ impl Backend<'_, ComputeUserInfo> {
     }
 }
 
+#[async_trait::async_trait]
+impl BackendIpAllowlist for Backend<'_, ()> {
+    async fn get_allowed_ips(
+        &self,
+        ctx: &RequestContext,
+        user_info: &ComputeUserInfo,
+    ) -> auth::Result<Vec<auth::IpPattern>> {
+        let auth_data = match self {
+            Self::ControlPlane(api, ()) => api.get_allowed_ips_and_secret(ctx, user_info).await,
+            Self::Local(_) => Ok((Cached::new_uncached(Arc::new(vec![])), None)),
+        };
+
+        auth_data
+            .map(|(ips, _)| ips.as_ref().clone())
+            .map_err(|e| e.into())
+    }
+}
+
 #[async_trait::async_trait]
 impl ComputeConnectBackend for Backend<'_, ComputeCredentials> {
     async fn wake_compute(
@@ -786,7 +815,7 @@ mod tests {
         .await
         .unwrap();
 
-        assert_eq!(creds.info.endpoint, "my-endpoint");
+        assert_eq!(creds.0.info.endpoint, "my-endpoint");
 
         handle.await.unwrap();
     }
diff --git a/proxy/src/bin/proxy.rs b/proxy/src/bin/proxy.rs
index 3b122d771c..70b50436bf 100644
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -744,9 +744,59 @@ fn build_auth_backend(
         }
 
         AuthBackendType::ConsoleRedirect => {
-            let url = args.uri.parse()?;
-            let backend = ConsoleRedirectBackend::new(url);
+            let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?;
+            let project_info_cache_config: ProjectInfoCacheOptions =
+                args.project_info_cache.parse()?;
+            let endpoint_cache_config: config::EndpointCacheConfig =
+                args.endpoint_cache_config.parse()?;
 
+            info!("Using NodeInfoCache (wake_compute) with options={wake_compute_cache_config:?}");
+            info!(
+                "Using AllowedIpsCache (wake_compute) with options={project_info_cache_config:?}"
+            );
+            info!("Using EndpointCacheConfig with options={endpoint_cache_config:?}");
+            let caches = Box::leak(Box::new(control_plane::caches::ApiCaches::new(
+                wake_compute_cache_config,
+                project_info_cache_config,
+                endpoint_cache_config,
+            )));
+
+            let config::ConcurrencyLockOptions {
+                shards,
+                limiter,
+                epoch,
+                timeout,
+            } = args.wake_compute_lock.parse()?;
+            info!(?limiter, shards, ?epoch, "Using NodeLocks (wake_compute)");
+            let locks = Box::leak(Box::new(control_plane::locks::ApiLocks::new(
+                "wake_compute_lock",
+                limiter,
+                shards,
+                timeout,
+                epoch,
+                &Metrics::get().wake_compute_lock,
+            )?));
+
+            let url = args.uri.clone().parse()?;
+            let ep_url: proxy::url::ApiUrl = args.auth_endpoint.parse()?;
+            let endpoint = http::Endpoint::new(ep_url, http::new_client());
+            let mut wake_compute_rps_limit = args.wake_compute_limit.clone();
+            RateBucketInfo::validate(&mut wake_compute_rps_limit)?;
+            let wake_compute_endpoint_rate_limiter =
+                Arc::new(WakeComputeRateLimiter::new(wake_compute_rps_limit));
+
+            // Since we use only get_allowed_ips_and_secret() wake_compute_endpoint_rate_limiter
+            // and locks are not used in ConsoleRedirectBackend,
+            // but they are required by the NeonControlPlaneClient
+            let api = control_plane::client::cplane_proxy_v1::NeonControlPlaneClient::new(
+                endpoint,
+                args.control_plane_token.clone(),
+                caches,
+                locks,
+                wake_compute_endpoint_rate_limiter,
+            );
+
+            let backend = ConsoleRedirectBackend::new(url, api);
             let config = Box::leak(Box::new(backend));
 
             Ok(Either::Right(config))
diff --git a/proxy/src/cancellation.rs b/proxy/src/cancellation.rs
index df618cf242..a96c43f2ce 100644
--- a/proxy/src/cancellation.rs
+++ b/proxy/src/cancellation.rs
@@ -12,8 +12,10 @@ use tokio::sync::Mutex;
 use tracing::{debug, info};
 use uuid::Uuid;
 
-use crate::auth::{check_peer_addr_is_in_list, IpPattern};
+use crate::auth::backend::{BackendIpAllowlist, ComputeUserInfo};
+use crate::auth::{check_peer_addr_is_in_list, AuthError, IpPattern};
 use crate::config::ComputeConfig;
+use crate::context::RequestContext;
 use crate::error::ReportableError;
 use crate::ext::LockExt;
 use crate::metrics::{CancellationRequest, CancellationSource, Metrics};
@@ -56,6 +58,9 @@ pub(crate) enum CancelError {
 
     #[error("IP is not allowed")]
     IpNotAllowed,
+
+    #[error("Authentication backend error")]
+    AuthError(#[from] AuthError),
 }
 
 impl ReportableError for CancelError {
@@ -68,6 +73,7 @@ impl ReportableError for CancelError {
             CancelError::Postgres(_) => crate::error::ErrorKind::Compute,
             CancelError::RateLimit => crate::error::ErrorKind::RateLimit,
             CancelError::IpNotAllowed => crate::error::ErrorKind::User,
+            CancelError::AuthError(_) => crate::error::ErrorKind::ControlPlane,
         }
     }
 }
@@ -102,10 +108,7 @@ impl<P: CancellationPublisher> CancellationHandler<P> {
         }
     }
 
-    /// Try to cancel a running query for the corresponding connection.
-    /// If the cancellation key is not found, it will be published to Redis.
-    /// check_allowed - if true, check if the IP is allowed to cancel the query
-    /// return Result primarily for tests
+    /// Cancelling only in notification, will be removed
     pub(crate) async fn cancel_session(
         &self,
         key: CancelKeyData,
@@ -134,7 +137,8 @@ impl<P: CancellationPublisher> CancellationHandler<P> {
         }
 
         // NB: we should immediately release the lock after cloning the token.
-        let Some(cancel_closure) = self.map.get(&key).and_then(|x| x.clone()) else {
+        let cancel_state = self.map.get(&key).and_then(|x| x.clone());
+        let Some(cancel_closure) = cancel_state else {
             tracing::warn!("query cancellation key not found: {key}");
             Metrics::get()
                 .proxy
@@ -185,6 +189,96 @@ impl<P: CancellationPublisher> CancellationHandler<P> {
         cancel_closure.try_cancel_query(self.compute_config).await
     }
 
+    /// Try to cancel a running query for the corresponding connection.
+    /// If the cancellation key is not found, it will be published to Redis.
+    /// check_allowed - if true, check if the IP is allowed to cancel the query.
+    /// Will fetch IP allowlist internally.
+    ///
+    /// return Result primarily for tests
+    pub(crate) async fn cancel_session_auth<T: BackendIpAllowlist>(
+        &self,
+        key: CancelKeyData,
+        ctx: RequestContext,
+        check_allowed: bool,
+        auth_backend: &T,
+    ) -> Result<(), CancelError> {
+        // TODO: check for unspecified address is only for backward compatibility, should be removed
+        if !ctx.peer_addr().is_unspecified() {
+            let subnet_key = match ctx.peer_addr() {
+                IpAddr::V4(ip) => IpNet::V4(Ipv4Net::new_assert(ip, 24).trunc()), // use defaut mask here
+                IpAddr::V6(ip) => IpNet::V6(Ipv6Net::new_assert(ip, 64).trunc()),
+            };
+            if !self.limiter.lock_propagate_poison().check(subnet_key, 1) {
+                // log only the subnet part of the IP address to know which subnet is rate limited
+                tracing::warn!("Rate limit exceeded. Skipping cancellation message, {subnet_key}");
+                Metrics::get()
+                    .proxy
+                    .cancellation_requests_total
+                    .inc(CancellationRequest {
+                        source: self.from,
+                        kind: crate::metrics::CancellationOutcome::RateLimitExceeded,
+                    });
+                return Err(CancelError::RateLimit);
+            }
+        }
+
+        // NB: we should immediately release the lock after cloning the token.
+        let cancel_state = self.map.get(&key).and_then(|x| x.clone());
+        let Some(cancel_closure) = cancel_state else {
+            tracing::warn!("query cancellation key not found: {key}");
+            Metrics::get()
+                .proxy
+                .cancellation_requests_total
+                .inc(CancellationRequest {
+                    source: self.from,
+                    kind: crate::metrics::CancellationOutcome::NotFound,
+                });
+
+            if ctx.session_id() == Uuid::nil() {
+                // was already published, do not publish it again
+                return Ok(());
+            }
+
+            match self
+                .client
+                .try_publish(key, ctx.session_id(), ctx.peer_addr())
+                .await
+            {
+                Ok(()) => {} // do nothing
+                Err(e) => {
+                    // log it here since cancel_session could be spawned in a task
+                    tracing::error!("failed to publish cancellation key: {key}, error: {e}");
+                    return Err(CancelError::IO(std::io::Error::new(
+                        std::io::ErrorKind::Other,
+                        e.to_string(),
+                    )));
+                }
+            }
+            return Ok(());
+        };
+
+        let ip_allowlist = auth_backend
+            .get_allowed_ips(&ctx, &cancel_closure.user_info)
+            .await
+            .map_err(CancelError::AuthError)?;
+
+        if check_allowed && !check_peer_addr_is_in_list(&ctx.peer_addr(), &ip_allowlist) {
+            // log it here since cancel_session could be spawned in a task
+            tracing::warn!("IP is not allowed to cancel the query: {key}");
+            return Err(CancelError::IpNotAllowed);
+        }
+
+        Metrics::get()
+            .proxy
+            .cancellation_requests_total
+            .inc(CancellationRequest {
+                source: self.from,
+                kind: crate::metrics::CancellationOutcome::Found,
+            });
+        info!("cancelling query per user's request using key {key}");
+        cancel_closure.try_cancel_query(self.compute_config).await
+    }
+
     #[cfg(test)]
     fn contains(&self, session: &Session<P>) -> bool {
         self.map.contains_key(&session.key)
@@ -248,6 +342,7 @@ pub struct CancelClosure {
     cancel_token: CancelToken,
     ip_allowlist: Vec<IpPattern>,
     hostname: String, // for pg_sni router
+    user_info: ComputeUserInfo,
 }
 
 impl CancelClosure {
@@ -256,12 +351,14 @@ impl CancelClosure {
         cancel_token: CancelToken,
         ip_allowlist: Vec<IpPattern>,
         hostname: String,
+        user_info: ComputeUserInfo,
     ) -> Self {
         Self {
             socket_addr,
             cancel_token,
             ip_allowlist,
             hostname,
+            user_info,
         }
     }
     /// Cancels the query running on user's compute node.
@@ -288,6 +385,8 @@ impl CancelClosure {
         debug!("query was cancelled");
         Ok(())
     }
+
+    /// Obsolete (will be removed after moving CancelMap to Redis), only for notifications
     pub(crate) fn set_ip_allowlist(&mut self, ip_allowlist: Vec<IpPattern>) {
         self.ip_allowlist = ip_allowlist;
     }
diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs
index 89de6692ad..788bd63fee 100644
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -13,6 +13,7 @@ use thiserror::Error;
 use tokio::net::TcpStream;
 use tracing::{debug, error, info, warn};
 
+use crate::auth::backend::ComputeUserInfo;
 use crate::auth::parse_endpoint_param;
 use crate::cancellation::CancelClosure;
 use crate::config::ComputeConfig;
@@ -23,8 +24,10 @@ use crate::control_plane::messages::MetricsAuxInfo;
 use crate::error::{ReportableError, UserFacingError};
 use crate::metrics::{Metrics, NumDbConnectionsGuard};
 use crate::proxy::neon_option;
+use crate::proxy::NeonOptions;
 use crate::tls::postgres_rustls::MakeRustlsConnect;
 use crate::types::Host;
+use crate::types::{EndpointId, RoleName};
 
 pub const COULD_NOT_CONNECT: &str = "Couldn't connect to compute node";
 
@@ -284,6 +287,28 @@ impl ConnCfg {
             self.0.get_ssl_mode()
         );
 
+        let compute_info = match parameters.get("user") {
+            Some(user) => {
+                match parameters.get("database") {
+                    Some(database) => {
+                        ComputeUserInfo {
+                            user: RoleName::from(user),
+                            options: NeonOptions::default(), // just a shim, we don't need options
+                            endpoint: EndpointId::from(database),
+                        }
+                    }
+                    None => {
+                        warn!("compute node didn't return database name");
+                        ComputeUserInfo::default()
+                    }
+                }
+            }
+            None => {
+                warn!("compute node didn't return user name");
+                ComputeUserInfo::default()
+            }
+        };
+
         // NB: CancelToken is supposed to hold socket_addr, but we use connect_raw.
         // Yet another reason to rework the connection establishing code.
         let cancel_closure = CancelClosure::new(
@@ -294,8 +319,9 @@ impl ConnCfg {
                 process_id,
                 secret_key,
             },
-            vec![],
+            vec![], // TODO: deprecated, will be removed
             host.to_string(),
+            compute_info,
         );
 
         let connection = PostgresConnection {
diff --git a/proxy/src/console_redirect_proxy.rs b/proxy/src/console_redirect_proxy.rs
index 25a549039c..846f55f9e1 100644
--- a/proxy/src/console_redirect_proxy.rs
+++ b/proxy/src/console_redirect_proxy.rs
@@ -159,6 +159,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
     let request_gauge = metrics.connection_requests.guard(proto);
 
     let tls = config.tls_config.as_ref();
+
     let record_handshake_error = !ctx.has_private_peer_addr();
     let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
     let do_handshake = handshake(ctx, stream, tls, record_handshake_error);
@@ -171,23 +172,20 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
             // spawn a task to cancel the session, but don't wait for it
             cancellations.spawn({
                 let cancellation_handler_clone = Arc::clone(&cancellation_handler);
-                let session_id = ctx.session_id();
-                let peer_ip = ctx.peer_addr();
-                let cancel_span = tracing::span!(parent: None, tracing::Level::INFO, "cancel_session", session_id = ?session_id);
+                let ctx = ctx.clone();
+                let cancel_span = tracing::span!(parent: None, tracing::Level::INFO, "cancel_session", session_id = ?ctx.session_id());
                 cancel_span.follows_from(tracing::Span::current());
                 async move {
-                    drop(
-                        cancellation_handler_clone
-                            .cancel_session(
-                                cancel_key_data,
-                                session_id,
-                                peer_ip,
-                                config.authentication_config.ip_allowlist_check_enabled,
-                            )
-                            .instrument(cancel_span)
-                            .await,
-                    );
-                }
+                    cancellation_handler_clone
+                        .cancel_session_auth(
+                            cancel_key_data,
+                            ctx,
+                            config.authentication_config.ip_allowlist_check_enabled,
+                            backend,
+                        )
+                        .await
+                        .inspect_err(|e | debug!(error = ?e, "cancel_session failed")).ok();
+                }.instrument(cancel_span)
             });
 
             return Ok(None);
diff --git a/proxy/src/control_plane/client/cplane_proxy_v1.rs b/proxy/src/control_plane/client/cplane_proxy_v1.rs
index 00038a6ac6..ece03156d1 100644
--- a/proxy/src/control_plane/client/cplane_proxy_v1.rs
+++ b/proxy/src/control_plane/client/cplane_proxy_v1.rs
@@ -29,7 +29,7 @@ use crate::rate_limiter::WakeComputeRateLimiter;
 use crate::types::{EndpointCacheKey, EndpointId};
 use crate::{compute, http, scram};
 
-const X_REQUEST_ID: HeaderName = HeaderName::from_static("x-request-id");
+pub(crate) const X_REQUEST_ID: HeaderName = HeaderName::from_static("x-request-id");
 
 #[derive(Clone)]
 pub struct NeonControlPlaneClient {
@@ -78,15 +78,30 @@ impl NeonControlPlaneClient {
             info!("endpoint is not valid, skipping the request");
             return Ok(AuthInfo::default());
         }
-        let request_id = ctx.session_id().to_string();
-        let application_name = ctx.console_application_name();
+        self.do_get_auth_req(user_info, &ctx.session_id(), Some(ctx))
+            .await
+    }
+
+    async fn do_get_auth_req(
+        &self,
+        user_info: &ComputeUserInfo,
+        session_id: &uuid::Uuid,
+        ctx: Option<&RequestContext>,
+    ) -> Result<AuthInfo, GetAuthInfoError> {
+        let request_id: String = session_id.to_string();
+        let application_name = if let Some(ctx) = ctx {
+            ctx.console_application_name()
+        } else {
+            "auth_cancellation".to_string()
+        };
+
         async {
             let request = self
                 .endpoint
                 .get_path("get_endpoint_access_control")
                 .header(X_REQUEST_ID, &request_id)
                 .header(AUTHORIZATION, format!("Bearer {}", &self.jwt))
-                .query(&[("session_id", ctx.session_id())])
+                .query(&[("session_id", session_id)])
                 .query(&[
                     ("application_name", application_name.as_str()),
                     ("endpointish", user_info.endpoint.as_str()),
@@ -96,9 +111,16 @@ impl NeonControlPlaneClient {
 
             debug!(url = request.url().as_str(), "sending http request");
             let start = Instant::now();
-            let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane);
-            let response = self.endpoint.execute(request).await?;
-            drop(pause);
+            let response = match ctx {
+                Some(ctx) => {
+                    let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane);
+                    let rsp = self.endpoint.execute(request).await;
+                    drop(pause);
+                    rsp?
+                }
+                None => self.endpoint.execute(request).await?,
+            };
+
             info!(duration = ?start.elapsed(), "received http response");
             let body = match parse_body::<GetEndpointAccessControl>(response).await {
                 Ok(body) => body,
diff --git a/proxy/src/proxy/mod.rs b/proxy/src/proxy/mod.rs
index 3926c56fec..1f7dba2f9a 100644
--- a/proxy/src/proxy/mod.rs
+++ b/proxy/src/proxy/mod.rs
@@ -273,23 +273,20 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
             // spawn a task to cancel the session, but don't wait for it
             cancellations.spawn({
                 let cancellation_handler_clone = Arc::clone(&cancellation_handler);
-                let session_id = ctx.session_id();
-                let peer_ip = ctx.peer_addr();
-                let cancel_span = tracing::span!(parent: None, tracing::Level::INFO, "cancel_session", session_id = ?session_id);
+                let ctx = ctx.clone();
+                let cancel_span = tracing::span!(parent: None, tracing::Level::INFO, "cancel_session", session_id = ?ctx.session_id());
                 cancel_span.follows_from(tracing::Span::current());
                 async move {
-                    drop(
-                        cancellation_handler_clone
-                            .cancel_session(
-                                cancel_key_data,
-                                session_id,
-                                peer_ip,
-                                config.authentication_config.ip_allowlist_check_enabled,
-                            )
-                            .instrument(cancel_span)
-                            .await,
-                    );
-                }
+                    cancellation_handler_clone
+                        .cancel_session_auth(
+                            cancel_key_data,
+                            ctx,
+                            config.authentication_config.ip_allowlist_check_enabled,
+                            auth_backend,
+                        )
+                        .await
+                        .inspect_err(|e | debug!(error = ?e, "cancel_session failed")).ok();
+                }.instrument(cancel_span)
             });
 
             return Ok(None);
@@ -315,7 +312,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
     };
 
     let user = user_info.get_user().to_owned();
-    let user_info = match user_info
+    let (user_info, ip_allowlist) = match user_info
         .authenticate(
             ctx,
             &mut stream,
@@ -356,6 +353,8 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
     .or_else(|e| stream.throw_error(e))
     .await?;
 
+    node.cancel_closure
+        .set_ip_allowlist(ip_allowlist.unwrap_or_default());
     let session = cancellation_handler.get_session();
     prepare_client_connection(&node, &session, &mut stream).await?;
 

From 5b2751397dee2234bfc97d2241f05ab47b95f5ee Mon Sep 17 00:00:00 2001
From: Tristan Partin <tristan@neon.tech>
Date: Thu, 9 Jan 2025 01:05:07 -0600
Subject: [PATCH 31/44] Refactor MigrationRunner::run_migrations() to call a
 helper (#10232)

This will make it easier to add per-db migrations, such as that for
CVE-2024-4317.

Link: https://www.postgresql.org/support/security/CVE-2024-4317/
Signed-off-by: Tristan Partin <tristan@neon.tech>

Signed-off-by: Tristan Partin <tristan@neon.tech>
---
 compute_tools/src/migration.rs | 94 ++++++++++++++++------------------
 1 file changed, 44 insertions(+), 50 deletions(-)

diff --git a/compute_tools/src/migration.rs b/compute_tools/src/migration.rs
index 1f3de65806..45c33172f7 100644
--- a/compute_tools/src/migration.rs
+++ b/compute_tools/src/migration.rs
@@ -1,6 +1,6 @@
 use anyhow::{Context, Result};
 use fail::fail_point;
-use postgres::Client;
+use postgres::{Client, Transaction};
 use tracing::info;
 
 /// Runs a series of migrations on a target database
@@ -20,11 +20,9 @@ impl<'m> MigrationRunner<'m> {
 
     /// Get the current value neon_migration.migration_id
     fn get_migration_id(&mut self) -> Result<i64> {
-        let query = "SELECT id FROM neon_migration.migration_id";
         let row = self
             .client
-            .query_one(query, &[])
-            .context("run_migrations get migration_id")?;
+            .query_one("SELECT id FROM neon_migration.migration_id", &[])?;
 
         Ok(row.get::<&str, i64>("id"))
     }
@@ -34,7 +32,7 @@ impl<'m> MigrationRunner<'m> {
     /// This function has a fail point called compute-migration, which can be
     /// used if you would like to fail the application of a series of migrations
     /// at some point.
-    fn update_migration_id(&mut self, migration_id: i64) -> Result<()> {
+    fn update_migration_id(txn: &mut Transaction, migration_id: i64) -> Result<()> {
         // We use this fail point in order to check that failing in the
         // middle of applying a series of migrations fails in an expected
         // manner
@@ -55,12 +53,11 @@ impl<'m> MigrationRunner<'m> {
             }
         }
 
-        self.client
-            .query(
-                "UPDATE neon_migration.migration_id SET id = $1",
-                &[&migration_id],
-            )
-            .context("run_migrations update id")?;
+        txn.query(
+            "UPDATE neon_migration.migration_id SET id = $1",
+            &[&migration_id],
+        )
+        .with_context(|| format!("update neon_migration.migration_id to {migration_id}"))?;
 
         Ok(())
     }
@@ -81,53 +78,50 @@ impl<'m> MigrationRunner<'m> {
         Ok(())
     }
 
-    /// Run the configrured set of migrations
+    /// Run an individual migration
+    fn run_migration(txn: &mut Transaction, migration_id: i64, migration: &str) -> Result<()> {
+        if migration.starts_with("-- SKIP") {
+            info!("Skipping migration id={}", migration_id);
+
+            // Even though we are skipping the migration, updating the
+            // migration ID should help keep logic easy to understand when
+            // trying to understand the state of a cluster.
+            Self::update_migration_id(txn, migration_id)?;
+        } else {
+            info!("Running migration id={}:\n{}\n", migration_id, migration);
+
+            txn.simple_query(migration)
+                .with_context(|| format!("apply migration {migration_id}"))?;
+
+            Self::update_migration_id(txn, migration_id)?;
+        }
+
+        Ok(())
+    }
+
+    /// Run the configured set of migrations
     pub fn run_migrations(mut self) -> Result<()> {
-        self.prepare_database()?;
+        self.prepare_database()
+            .context("prepare database to handle migrations")?;
 
         let mut current_migration = self.get_migration_id()? as usize;
         while current_migration < self.migrations.len() {
-            macro_rules! migration_id {
-                ($cm:expr) => {
-                    ($cm + 1) as i64
-                };
-            }
+            // The index lags the migration ID by 1, so the current migration
+            // ID is also the next index
+            let migration_id = (current_migration + 1) as i64;
 
-            let migration = self.migrations[current_migration];
+            let mut txn = self
+                .client
+                .transaction()
+                .with_context(|| format!("begin transaction for migration {migration_id}"))?;
 
-            if migration.starts_with("-- SKIP") {
-                info!("Skipping migration id={}", migration_id!(current_migration));
+            Self::run_migration(&mut txn, migration_id, self.migrations[current_migration])
+                .with_context(|| format!("running migration {migration_id}"))?;
 
-                // Even though we are skipping the migration, updating the
-                // migration ID should help keep logic easy to understand when
-                // trying to understand the state of a cluster.
-                self.update_migration_id(migration_id!(current_migration))?;
-            } else {
-                info!(
-                    "Running migration id={}:\n{}\n",
-                    migration_id!(current_migration),
-                    migration
-                );
+            txn.commit()
+                .with_context(|| format!("commit transaction for migration {migration_id}"))?;
 
-                self.client
-                    .simple_query("BEGIN")
-                    .context("begin migration")?;
-
-                self.client.simple_query(migration).with_context(|| {
-                    format!(
-                        "run_migrations migration id={}",
-                        migration_id!(current_migration)
-                    )
-                })?;
-
-                self.update_migration_id(migration_id!(current_migration))?;
-
-                self.client
-                    .simple_query("COMMIT")
-                    .context("commit migration")?;
-
-                info!("Finished migration id={}", migration_id!(current_migration));
-            }
+            info!("Finished migration id={}", migration_id);
 
             current_migration += 1;
         }

From 5baa4e7f0a4821802dccd98e936b361cbc7a544b Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Thu, 9 Jan 2025 11:47:55 +0000
Subject: [PATCH 32/44] docker: don't set LD_LIBRARY_PATH (#10321)

## Problem

This was causing storage controller to still use neon-built libpq
instead of vanilla libpq.

Since https://github.com/neondatabase/neon/pull/10269 we have a vanilla
postgres in the system path -- anything that wants a postgres library
will use that.

## Summary of changes

- Remove LD_LIBRARY_PATH assignment in Dockerfile
---
 Dockerfile | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 2c157b3b2a..d3659f917a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -103,11 +103,6 @@ RUN mkdir -p /data/.neon/ && \
   > /data/.neon/pageserver.toml && \
   chown -R neon:neon /data/.neon
 
-# When running a binary that links with libpq, default to using our most recent postgres version.  Binaries
-# that want a particular postgres version will select it explicitly: this is just a default.
-ENV LD_LIBRARY_PATH=/usr/local/v${DEFAULT_PG_VERSION}/lib
-
-
 VOLUME ["/data"]
 USER neon
 EXPOSE 6400

From 030ab1c0e8dc9f4e20b118ee77361a4b34918652 Mon Sep 17 00:00:00 2001
From: Arseny Sher <ars@neon.tech>
Date: Thu, 9 Jan 2025 15:26:17 +0300
Subject: [PATCH 33/44] TLA+ spec for safekeeper membership change (#9966)

## Problem

We want to define the algorithm for safekeeper membership change.

## Summary of changes

Add spec for it, several models and logs of checking them.

ref https://github.com/neondatabase/neon/issues/8699
---
 .../spec/MCProposerAcceptorReconfig.tla       |  41 ++
 safekeeper/spec/MCProposerAcceptorStatic.tla  |   3 +
 safekeeper/spec/ProposerAcceptorReconfig.tla  | 350 ++++++++++++++++++
 safekeeper/spec/ProposerAcceptorStatic.tla    | 234 +++++++-----
 safekeeper/spec/modelcheck.sh                 |   3 +
 ...roposerAcceptorReconfig_p2_a2_t2_l2_c3.cfg |  21 ++
 ...roposerAcceptorReconfig_p2_a2_t2_l2_c5.cfg |  19 +
 ...roposerAcceptorReconfig_p2_a3_t2_l2_c3.cfg |  20 +
 ...roposerAcceptorReconfig_p2_a4_t2_l2_c3.cfg |  19 +
 safekeeper/spec/remove_interm_progress.awk    |  25 ++
 safekeeper/spec/remove_interm_progress.sh     |   3 +
 ...2_a2_t2_l2_c3.cfg-2024-12-11--04-24-12.log |  65 ++++
 ...2_a2_t2_l2_c5.cfg-2024-12-11--04-26-11.log |  64 ++++
 13 files changed, 784 insertions(+), 83 deletions(-)
 create mode 100644 safekeeper/spec/MCProposerAcceptorReconfig.tla
 create mode 100644 safekeeper/spec/ProposerAcceptorReconfig.tla
 create mode 100644 safekeeper/spec/models/MCProposerAcceptorReconfig_p2_a2_t2_l2_c3.cfg
 create mode 100644 safekeeper/spec/models/MCProposerAcceptorReconfig_p2_a2_t2_l2_c5.cfg
 create mode 100644 safekeeper/spec/models/MCProposerAcceptorReconfig_p2_a3_t2_l2_c3.cfg
 create mode 100644 safekeeper/spec/models/MCProposerAcceptorReconfig_p2_a4_t2_l2_c3.cfg
 create mode 100644 safekeeper/spec/remove_interm_progress.awk
 create mode 100755 safekeeper/spec/remove_interm_progress.sh
 create mode 100644 safekeeper/spec/tlc-results/MCProposerAcceptorReconfig.tla-MCProposerAcceptorReconfig_p2_a2_t2_l2_c3.cfg-2024-12-11--04-24-12.log
 create mode 100644 safekeeper/spec/tlc-results/MCProposerAcceptorReconfig.tla-MCProposerAcceptorReconfig_p2_a2_t2_l2_c5.cfg-2024-12-11--04-26-11.log

diff --git a/safekeeper/spec/MCProposerAcceptorReconfig.tla b/safekeeper/spec/MCProposerAcceptorReconfig.tla
new file mode 100644
index 0000000000..a4b25e383a
--- /dev/null
+++ b/safekeeper/spec/MCProposerAcceptorReconfig.tla
@@ -0,0 +1,41 @@
+---- MODULE MCProposerAcceptorReconfig ----
+EXTENDS TLC, ProposerAcceptorReconfig
+
+\* Augments the spec with model checking constraints.
+
+\* It slightly duplicates MCProposerAcceptorStatic, but we can't EXTENDS it
+\* because it EXTENDS ProposerAcceptorStatic in turn. The duplication isn't big
+\* anyway.
+
+\* For model checking.
+CONSTANTS
+  max_entries, \* model constraint: max log entries acceptor/proposer can hold
+  max_term, \* model constraint: max allowed term
+  max_generation \* mode constraint: max config generation
+
+ASSUME max_entries \in Nat /\ max_term \in Nat /\ max_generation \in Nat
+
+\* Model space constraint.
+StateConstraint == /\ \A p \in proposers:
+                     /\ prop_state[p].term <= max_term
+                     /\ Len(prop_state[p].wal) <= max_entries
+                   /\ conf_store.generation <= max_generation
+
+\* Sets of proposers and acceptors and symmetric because we don't take any
+\* actions depending on some concrete proposer/acceptor (like IF p = p1 THEN
+\* ...)
+ProposerAcceptorSymmetry == Permutations(proposers) \union Permutations(acceptors)
+
+\* enforce order of the vars in the error trace with ALIAS
+\* Note that ALIAS is supported only since version 1.8.0 which is pre-release
+\* as of writing this.
+Alias == [
+           prop_state |-> prop_state,
+           prop_conf |-> prop_conf,
+           acc_state |-> acc_state,
+           acc_conf |-> acc_conf,
+           committed |-> committed,
+           conf_store |-> conf_store
+         ]
+
+====
diff --git a/safekeeper/spec/MCProposerAcceptorStatic.tla b/safekeeper/spec/MCProposerAcceptorStatic.tla
index be3d99c697..b4eca1965a 100644
--- a/safekeeper/spec/MCProposerAcceptorStatic.tla
+++ b/safekeeper/spec/MCProposerAcceptorStatic.tla
@@ -3,6 +3,9 @@ EXTENDS TLC, ProposerAcceptorStatic
 
 \* Augments the spec with model checking constraints.
 
+\* Note that MCProposerAcceptorReconfig duplicates it and might need to
+\* be updated as well.
+
 \* For model checking.
 CONSTANTS
   max_entries, \* model constraint: max log entries acceptor/proposer can hold
diff --git a/safekeeper/spec/ProposerAcceptorReconfig.tla b/safekeeper/spec/ProposerAcceptorReconfig.tla
new file mode 100644
index 0000000000..78de231a39
--- /dev/null
+++ b/safekeeper/spec/ProposerAcceptorReconfig.tla
@@ -0,0 +1,350 @@
+---- MODULE ProposerAcceptorReconfig ----
+
+(*
+    Spec for https://github.com/neondatabase/neon/blob/538e2312a617c65d489d391892c70b2e4d7407b5/docs/rfcs/035-safekeeper-dynamic-membership-change.md
+
+    Simplifications:
+    - The ones inherited from ProposerAcceptorStatic.
+    - We don't model transient state of the configuration change driver process
+      (storage controller in the implementation). Its actions StartChange and FinishChange
+      are taken based on the persistent state of safekeepers and conf store. The
+      justification for that is the following: once new configuration n is
+      created (e.g with StartChange or FinishChange), any old configuration
+      change driver working on older conf < n will never be able to commit
+      it to the conf store because it is protected by CAS. The
+      propagation of these older confs is still possible though, and
+      spec allows to do it through acceptors.
+      Plus the model is already pretty huge.
+    - Previous point also means that the FinishChange action is
+      based only on the current state of safekeepers, not from
+      the past. That's ok because while individual
+      acceptor <last_log_term, flush_lsn> may go down,
+      quorum one never does. So the FinishChange
+      condition which collects max of the quorum may get
+      only more strict over time.
+
+    The invariants expectedly break if any of FinishChange
+    required conditions are removed.
+*)
+
+EXTENDS Integers, Sequences, FiniteSets, TLC
+
+VARIABLES
+  \* state which is the same in the static spec
+  prop_state,
+  acc_state,
+  committed,
+  elected_history,
+  \* reconfiguration only state
+  prop_conf, \* prop_conf[p] is current configuration of proposer p
+  acc_conf, \* acc_conf[a] is current configuration of acceptor a
+  conf_store \* configuration in the configuration store.
+
+CONSTANT
+  acceptors,
+  proposers
+
+CONSTANT NULL
+
+\* Import ProposerAcceptorStatic under PAS.
+\*
+\* Note that all vars and consts are named the same and thus substituted
+\* implicitly.
+PAS == INSTANCE ProposerAcceptorStatic
+
+\********************************************************************************
+\* Helpers
+\********************************************************************************
+
+\********************************************************************************
+\* Type assertion
+\********************************************************************************
+
+\* Is c a valid config?
+IsConfig(c) ==
+    /\ DOMAIN c = {"generation", "members", "newMembers"}
+    \* Unique id of the configuration.
+    /\ c.generation \in Nat
+    /\ c.members \in SUBSET acceptors
+    \* newMembers is NULL when it is not a joint conf.
+    /\ \/ c.newMembers = NULL
+       \/ c.newMembers \in SUBSET acceptors
+
+TypeOk ==
+    /\ PAS!TypeOk
+    /\ \A p \in proposers: IsConfig(prop_conf[p])
+    /\ \A a \in acceptors: IsConfig(acc_conf[a])
+    /\ IsConfig(conf_store)
+
+\********************************************************************************
+\* Initial
+\********************************************************************************
+
+Init ==
+  /\ PAS!Init
+  /\ \E init_members \in SUBSET acceptors:
+       LET init_conf == [generation |-> 1, members |-> init_members, newMembers |-> NULL] IN
+           \* refer to RestartProposer why it is not NULL
+           /\ prop_conf = [p \in proposers |-> init_conf]
+           /\ acc_conf = [a \in acceptors |-> init_conf]
+           /\ conf_store = init_conf
+           \* We could start with anything, but to reduce state space state with
+           \* the most reasonable total acceptors - 1 conf size, which e.g.
+           \* makes basic {a1} -> {a2} change in {a1, a2} acceptors and {a1, a2,
+           \* a3} -> {a2, a3, a4} in {a1, a2, a3, a4} acceptors models even in
+           \* the smallest models with single change.
+           /\ Cardinality(init_members) = Cardinality(acceptors) - 1
+
+\********************************************************************************
+\* Actions
+\********************************************************************************
+
+\* Proposer p loses all state, restarting. In the static spec we bump restarted
+\* proposer term to max of some quorum + 1 which is a minimal term which can win
+\* election. With reconfigurations it's harder to calculate such a term, so keep
+\* it simple and take random acceptor one + 1.
+\*
+\* Also make proposer to adopt configuration of another random acceptor. In the
+\* impl proposer starts with NULL configuration until handshake with first
+\* acceptor. Removing this NULL special case makes the spec a bit simpler.
+RestartProposer(p) ==
+    /\ \E a \in acceptors: PAS!RestartProposerWithTerm(p, acc_state[a].term + 1)
+    /\ \E a \in acceptors: prop_conf' = [prop_conf EXCEPT ![p] = acc_conf[a]]
+    /\ UNCHANGED <<acc_conf, conf_store>>
+
+\* Acceptor a immediately votes for proposer p.
+Vote(p, a) ==
+    \* Configuration must be the same.
+    /\ prop_conf[p].generation = acc_conf[a].generation
+    \* And a is expected be a member of it. This is likely redundant as long as
+    \* becoming leader checks membership (though vote also contributes to max
+    \* <term, lsn> calculation).
+    /\ \/ a \in prop_conf[p].members
+       \/ (prop_conf[p].newMembers /= NULL) /\ (a \in prop_conf[p].newMembers)
+    /\ PAS!Vote(p, a)
+    /\ UNCHANGED <<prop_conf, acc_conf, conf_store>>
+
+\* Proposer p gets elected.
+BecomeLeader(p) ==
+    /\ prop_state[p].state = "campaign"
+    \* Votes must form quorum in both sets (if the newMembers exists).
+    /\ PAS!FormsQuorum(DOMAIN prop_state[p].votes, prop_conf[p].members)
+    /\ \/ prop_conf[p].newMembers = NULL
+       \* TLA+ disjunction evaluation doesn't short-circuit for a good reason:
+       \* https://groups.google.com/g/tlaplus/c/U6tOJ4dsjVM/m/UdOznPCVBwAJ
+       \* so repeat the null check.
+       \/ (prop_conf[p].newMembers /= NULL)  /\ (PAS!FormsQuorum(DOMAIN prop_state[p].votes, prop_conf[p].newMembers))
+    \* DoBecomeLeader will copy WAL of the highest voter to proposer's WAL, so
+    \* ensure its conf is still the same. In the impl WAL fetching also has to
+    \* check the configuration.
+    /\ prop_conf[p].generation = acc_conf[PAS!MaxVoteAcc(p)].generation
+    /\ \A a \in DOMAIN prop_state[p].votes: prop_conf[p].generation = acc_conf[a].generation
+    /\ PAS!DoBecomeLeader(p)
+    /\ UNCHANGED <<prop_conf, acc_conf, conf_store>>
+
+UpdateTerm(p, a) ==
+    /\ PAS!UpdateTerm(p, a)
+    /\ UNCHANGED <<prop_conf, acc_conf, conf_store>>
+
+TruncateWal(p, a) ==
+    /\ prop_state[p].state = "leader"
+    \* Configuration must be the same.
+    /\ prop_conf[p].generation = acc_conf[a].generation
+    /\ PAS!TruncateWal(p, a)
+    /\ UNCHANGED <<prop_conf, acc_conf, conf_store>>
+
+NewEntry(p) ==
+    /\ PAS!NewEntry(p)
+    /\ UNCHANGED <<prop_conf, acc_conf, conf_store>>
+
+AppendEntry(p, a) ==
+    /\ prop_state[p].state = "leader"
+    \* Configuration must be the same.
+    /\ prop_conf[p].generation = acc_conf[a].generation
+    \* And a is member of it. Ignoring this likely wouldn't hurt, but not useful
+    \* either.
+    /\ \/ a \in prop_conf[p].members
+       \/ (prop_conf[p].newMembers /= NULL) /\ (a \in prop_conf[p].newMembers)
+    /\ PAS!AppendEntry(p, a)
+    /\ UNCHANGED <<prop_conf, acc_conf, conf_store>>
+
+\* see PAS!CommitEntries for comments.
+CommitEntries(p) ==
+    /\ prop_state[p].state = "leader"
+    /\ \E q1 \in PAS!AllMinQuorums(prop_conf[p].members):
+           LET q1_commit_lsn == PAS!QuorumCommitLsn(p, q1) IN
+               \* Configuration must be the same.
+               /\ \A a \in q1: prop_conf[p].generation = acc_conf[a].generation
+               /\ q1_commit_lsn /= NULL
+               \* We must collect acks from both quorums, if newMembers is present.
+               /\ IF prop_conf[p].newMembers = NULL THEN
+                      PAS!DoCommitEntries(p, q1_commit_lsn)
+                  ELSE
+                      \E q2 \in PAS!AllMinQuorums(prop_conf[p].newMembers):
+                          LET q2_commit_lsn == PAS!QuorumCommitLsn(p, q2) IN
+                              \* Configuration must be the same.
+                              /\ \A a \in q1: prop_conf[p].generation = acc_conf[a].generation
+                              /\ q2_commit_lsn /= NULL
+                              /\ PAS!DoCommitEntries(p, PAS!Min(q1_commit_lsn, q2_commit_lsn))
+    /\ UNCHANGED <<prop_conf, acc_conf, conf_store>>
+
+\* Proposer p adopts higher conf c from conf store or from some acceptor.
+ProposerSwitchConf(p) ==
+    /\ \E c \in ({conf_store} \union {acc_conf[a]: a \in acceptors}):
+        \* p's conf is lower than c.
+        /\ (c.generation > prop_conf[p].generation)
+        \* We allow to bump conf without restart only when wp is already elected.
+        \* If it isn't, the votes it has already collected are from the previous
+        \* configuration and can't be used.
+        \*
+        \* So if proposer is in 'campaign' in the impl we would restart preserving
+        \* conf and increasing term. In the spec this transition is already covered
+        \* by more a generic RestartProposer, so we don't specify it here.
+        /\ prop_state[p].state = "leader"
+        /\ prop_conf' = [prop_conf EXCEPT ![p] = c]
+        /\ UNCHANGED <<prop_state, acc_state, committed, elected_history, acc_conf, conf_store>>
+
+\* Do CAS on the conf store, starting change into the new_members conf.
+StartChange(new_members) ==
+    \* Possible only if we don't already have the change in progress.
+    /\ conf_store.newMembers = NULL
+    \* Not necessary, but reduces space a bit.
+    /\ new_members /= conf_store.members
+    /\ conf_store' = [generation |-> conf_store.generation + 1, members |-> conf_store.members, newMembers |-> new_members]
+    /\ UNCHANGED <<prop_state, acc_state, committed, elected_history, prop_conf, acc_conf>>
+
+\* Acceptor's last_log_term.
+AccLastLogTerm(acc) ==
+    PAS!LastLogTerm(PAS!AcceptorTermHistory(acc))
+
+\* Do CAS on the conf store, transferring joint conf into the newMembers only.
+FinishChange ==
+    \* have joint conf
+    /\ conf_store.newMembers /= NULL
+    \* The conditions for finishing the change are:
+    /\ \E qo \in PAS!AllMinQuorums(conf_store.members):
+           \* 1) Old majority must be aware of the joint conf.
+           \* Note: generally the driver can't know current acceptor
+           \* generation, it can only know that it once had been the
+           \* expected one, but it might have advanced since then.
+           \* But as explained at the top of the file if acceptor gen
+           \* advanced, FinishChange will never be able to complete
+           \* due to CAS anyway. We use strict equality here because
+           \* that's what makes sense conceptually (old driver should
+           \* abandon its attempt if it observes that conf has advanced).
+           /\ \A a \in qo: conf_store.generation = acc_conf[a].generation
+           \* 2) New member set must have log synced, i.e. some its majority needs
+           \*    to have <last_log_term, lsn> at least as high as max of some
+           \*    old majority.
+           \* 3) Term must be synced, i.e. some majority of the new set must
+           \*    have term >= than max term of some old majority.
+           \*    This ensures that two leaders are never elected with the same
+           \*    term even after config change (which would be bad unless we treat
+           \*    generation as a part of term which we don't).
+           \* 4) A majority of the new set must be aware of the joint conf.
+           \*    This allows to safely destoy acceptor state if it is not a
+           \*    member of its current conf (which is useful for cleanup after
+           \*    migration as well as for aborts).
+           /\ LET sync_pos == PAS!MaxTermLsn({[term |-> AccLastLogTerm(a), lsn |-> PAS!FlushLsn(a)]: a \in qo})
+                  sync_term == PAS!Maximum({acc_state[a].term: a \in qo})
+              IN
+                  \E qn \in PAS!AllMinQuorums(conf_store.newMembers):
+                      \A a \in qn:
+                          /\ PAS!TermLsnGE([term |-> AccLastLogTerm(a), lsn |-> PAS!FlushLsn(a)], sync_pos)
+                          /\ acc_state[a].term >= sync_term
+                          \* The same note as above about strict equality applies here.
+                          /\ conf_store.generation = acc_conf[a].generation
+    /\ conf_store' = [generation |-> conf_store.generation + 1, members |-> conf_store.newMembers, newMembers |-> NULL]
+    /\ UNCHANGED <<prop_state, acc_state, committed, elected_history, prop_conf, acc_conf>>
+
+\* Do CAS on the conf store, aborting the change in progress.
+AbortChange ==
+    \* have joint conf
+    /\ conf_store.newMembers /= NULL
+    /\ conf_store' = [generation |-> conf_store.generation + 1, members |-> conf_store.members, newMembers |-> NULL]
+    /\ UNCHANGED <<prop_state, acc_state, committed, elected_history, prop_conf, acc_conf>>
+
+\* Acceptor a switches to higher configuration from the conf store
+\* or from some proposer.
+AccSwitchConf(a) ==
+    /\ \E c \in ({conf_store} \union {prop_conf[p]: p \in proposers}):
+        /\ acc_conf[a].generation < c.generation
+        /\ acc_conf' = [acc_conf EXCEPT ![a] = c]
+        /\ UNCHANGED <<prop_state, acc_state, committed, elected_history, prop_conf, conf_store>>
+
+\* Nuke all acceptor state if it is not a member of its current conf. Models
+\* cleanup after migration/abort.
+AccReset(a) ==
+    /\ \/ (acc_conf[a].newMembers = NULL) /\ (a \notin acc_conf[a].members)
+       \/ (acc_conf[a].newMembers /= NULL) /\ (a \notin (acc_conf[a].members \union acc_conf[a].newMembers))
+    /\ acc_state' = [acc_state EXCEPT ![a] = PAS!InitAcc]
+    \* Set nextSendLsn to `a` to NULL everywhere. nextSendLsn serves as a mark
+    \* that elected proposer performed TruncateWal on the acceptor, which isn't
+    \* true anymore after state reset. In the impl local deletion is expected to
+    \* terminate all existing connections.
+    /\ prop_state' = [p \in proposers |-> [prop_state[p] EXCEPT !.nextSendLsn[a] = NULL]]
+    /\ UNCHANGED <<committed, elected_history, prop_conf, acc_conf, conf_store>>
+
+\*******************************************************************************
+\* Final spec
+\*******************************************************************************
+
+Next ==
+  \/ \E p \in proposers: RestartProposer(p)
+  \/ \E p \in proposers: \E a \in acceptors: Vote(p, a)
+  \/ \E p \in proposers: BecomeLeader(p)
+  \/ \E p \in proposers: \E a \in acceptors: UpdateTerm(p, a)
+  \/ \E p \in proposers: \E a \in acceptors: TruncateWal(p, a)
+  \/ \E p \in proposers: NewEntry(p)
+  \/ \E p \in proposers: \E a \in acceptors: AppendEntry(p, a)
+  \/ \E p \in proposers: CommitEntries(p)
+  \/ \E new_members \in SUBSET acceptors: StartChange(new_members)
+  \/ FinishChange
+  \/ AbortChange
+  \/ \E p \in proposers: ProposerSwitchConf(p)
+  \/ \E a \in acceptors: AccSwitchConf(a)
+  \/ \E a \in acceptors: AccReset(a)
+
+Spec == Init /\ [][Next]_<<prop_state, acc_state, committed, elected_history, prop_conf, acc_conf, conf_store>>
+
+\********************************************************************************
+\* Invariants
+\********************************************************************************
+
+AllConfs ==
+    {conf_store} \union {prop_conf[p]: p \in proposers} \union {acc_conf[a]: a \in acceptors}
+
+\* Fairly trivial (given the conf store) invariant that different configurations
+\* with the same generation are never issued.
+ConfigSafety ==
+    \A c1, c2 \in AllConfs:
+        (c1.generation = c2.generation) => (c1 = c2)
+
+ElectionSafety == PAS!ElectionSafety
+
+ElectionSafetyFull == PAS!ElectionSafetyFull
+
+LogIsMonotonic == PAS!LogIsMonotonic
+
+LogSafety == PAS!LogSafety
+
+\********************************************************************************
+\* Invariants which don't need to hold, but useful for playing/debugging.
+\********************************************************************************
+
+\* Check that we ever switch into non joint conf.
+MaxAccConf == ~ \E a \in acceptors:
+    /\ acc_conf[a].generation = 3
+    /\ acc_conf[a].newMembers /= NULL
+
+CommittedNotTruncated == PAS!CommittedNotTruncated
+
+MaxTerm == PAS!MaxTerm
+
+MaxStoreConf == conf_store.generation <= 1
+
+MaxAccWalLen == PAS!MaxAccWalLen
+
+MaxCommitLsn == PAS!MaxCommitLsn
+
+====
diff --git a/safekeeper/spec/ProposerAcceptorStatic.tla b/safekeeper/spec/ProposerAcceptorStatic.tla
index b2d2f005db..fab085bc2e 100644
--- a/safekeeper/spec/ProposerAcceptorStatic.tla
+++ b/safekeeper/spec/ProposerAcceptorStatic.tla
@@ -18,7 +18,7 @@
 \* - old WAL is immediately copied to proposer on its election, without on-demand fetch later.
 
 \* Some ideas how to break it to play around to get a feeling:
-\* - replace Quorums with BadQuorums.
+\* - replace Quorum with BadQuorum.
 \* - remove 'don't commit entries from previous terms separately' rule in
 \*   CommitEntries and observe figure 8 from the raft paper.
 \*   With p2a3t4l4 32 steps error was found in 1h on 80 cores.
@@ -69,16 +69,26 @@ Upsert(f, k, v, l(_)) ==
 
 \*****************
 
-NumAccs == Cardinality(acceptors)
+\* Does set of acceptors `acc_set` form the quorum in the member set `members`?
+\* Acceptors not from `members` are excluded (matters only for reconfig).
+FormsQuorum(acc_set, members) ==
+    Cardinality(acc_set \intersect members) >= (Cardinality(members) \div 2 + 1)
 
-\* does acc_set form the quorum?
-Quorum(acc_set) == Cardinality(acc_set) >= (NumAccs \div 2 + 1)
-\* all quorums of acceptors
-Quorums == {subset \in SUBSET acceptors: Quorum(subset)}
+\* Like FormsQuorum, but for minimal quorum.
+FormsMinQuorum(acc_set, members) ==
+    Cardinality(acc_set \intersect members) = (Cardinality(members) \div 2 + 1)
 
-\* For substituting Quorums and seeing what happens.
-BadQuorum(acc_set) == Cardinality(acc_set) >= (NumAccs \div 2)
-BadQuorums == {subset \in SUBSET acceptors: BadQuorum(subset)}
+\* All sets of acceptors forming minimal quorums in the member set `members`.
+AllQuorums(members) == {subset \in SUBSET members: FormsQuorum(subset, members)}
+AllMinQuorums(members) == {subset \in SUBSET acceptors: FormsMinQuorum(subset, members)}
+
+\* For substituting Quorum and seeing what happens.
+FormsBadQuorum(acc_set, members) ==
+    Cardinality(acc_set \intersect members) >= (Cardinality(members) \div 2)
+FormsMinBadQuorum(acc_set, members) ==
+    Cardinality(acc_set \intersect members) = (Cardinality(members) \div 2)
+AllBadQuorums(members) == {subset \in SUBSET acceptors: FormsBadQuorum(subset, members)}
+AllMinBadQuorums(members) == {subset \in SUBSET acceptors: FormsMinBadQuorum(subset, members)}
 
 \* flushLsn (end of WAL, i.e. index of next entry) of acceptor a.
 FlushLsn(a) == Len(acc_state[a].wal) + 1
@@ -135,10 +145,11 @@ TypeOk ==
         /\ IsWal(prop_state[p].wal)
         \* Map of acceptor -> next lsn to send. It is set when truncate_wal is
         \* done so sending entries is allowed only after that. In the impl TCP
-        \* ensures this ordering.
+        \* ensures this ordering. We use NULL instead of missing value to use
+        \* EXCEPT in AccReset.
         /\ \A a \in DOMAIN prop_state[p].nextSendLsn:
                /\ a \in acceptors
-               /\ prop_state[p].nextSendLsn[a] \in Lsns
+               /\ prop_state[p].nextSendLsn[a] \in Lsns \union {NULL}
     /\ \A a \in acceptors:
            /\ DOMAIN acc_state[a] = {"term", "termHistory", "wal"}
            /\ acc_state[a].term \in Terms
@@ -167,6 +178,19 @@ TypeOk ==
 \* Initial
 \********************************************************************************
 
+InitAcc ==
+    [
+        \* There will be no leader in zero term, 1 is the first
+        \* real.
+        term |-> 0,
+        \* Again, leader in term 0 doesn't exist, but we initialize
+        \* term histories with it to always have common point in
+        \* them. Lsn is 1 because TLA+ sequences are indexed from 1
+        \* (we don't want to truncate WAL out of range).
+        termHistory |-> << [term |-> 0, lsn |-> 1] >>,
+        wal |-> << >>
+    ]
+
 Init ==
     /\ prop_state = [p \in proposers |-> [
                         state |-> "campaign",
@@ -174,19 +198,9 @@ Init ==
                         votes |-> EmptyF,
                         termHistory |-> << >>,
                         wal |-> << >>,
-                        nextSendLsn |-> EmptyF
+                        nextSendLsn |-> [a \in acceptors |-> NULL]
                     ]]
-    /\ acc_state = [a \in acceptors |-> [
-                       \* There will be no leader in zero term, 1 is the first
-                       \* real.
-                       term |-> 0,
-                       \* Again, leader in term 0 doesn't exist, but we initialize
-                       \* term histories with it to always have common point in
-                       \* them. Lsn is 1 because TLA+ sequences are indexed from 1
-                       \* (we don't want to truncate WAL out of range).
-                       termHistory |-> << [term |-> 0, lsn |-> 1] >>,
-                       wal |-> << >>
-                   ]]
+    /\ acc_state = [a \in acceptors |-> InitAcc]
     /\ committed = {}
     /\ elected_history = EmptyF
 
@@ -195,23 +209,35 @@ Init ==
 \* Actions
 \********************************************************************************
 
-\* Proposer loses all state.
+RestartProposerWithTerm(p, new_term) ==
+    /\ prop_state' = [prop_state EXCEPT ![p].state = "campaign",
+                                        ![p].term = new_term,
+                                        ![p].votes = EmptyF,
+                                        ![p].termHistory = << >>,
+                                        ![p].wal = << >>,
+                                        ![p].nextSendLsn = [a \in acceptors |-> NULL]]
+    /\ UNCHANGED <<acc_state, committed, elected_history>>
+
+\* Proposer p loses all state, restarting.
 \* For simplicity (and to reduct state space), we assume it immediately gets
 \* current state from quorum q of acceptors determining the term he will request
 \* to vote for.
-RestartProposer(p, q) ==
-    /\ Quorum(q)
-    /\ LET new_term == Maximum({acc_state[a].term : a \in q}) + 1 IN
-           /\ prop_state' = [prop_state EXCEPT ![p].state = "campaign",
-                                               ![p].term = new_term,
-                                               ![p].votes = EmptyF,
-                                               ![p].termHistory = << >>,
-                                               ![p].wal = << >>,
-                                               ![p].nextSendLsn = EmptyF]
-           /\ UNCHANGED <<acc_state, committed, elected_history>>
+RestartProposer(p) ==
+    \E q \in AllQuorums(acceptors):
+        LET new_term == Maximum({acc_state[a].term : a \in q}) + 1 IN
+            RestartProposerWithTerm(p, new_term)
 
 \* Term history of acceptor a's WAL: the one saved truncated to contain only <=
-\* local FlushLsn entries.
+\* local FlushLsn entries. Note that FlushLsn is the end LSN of the last entry
+\* (and begin LSN of the next). The mental model for non strict comparison is
+\* that once proposer is elected it immediately writes log record with zero
+\* length. This allows leader to commit existing log without writing any new
+\* entries. For example, assume acceptor has WAL
+\*   1.1, 1.2
+\* written by prop with term 1; its current <last_log_term, flush_lsn>
+\* is <1, 3>. Now prop with term 2 and max vote from this acc is elected.
+\* Once TruncateWAL is done, <last_log_term, flush_lsn> becomes <2, 3>
+\* without any new records explicitly written.
 AcceptorTermHistory(a) ==
     SelectSeq(acc_state[a].termHistory, LAMBDA th_entry: th_entry.lsn <= FlushLsn(a))
 
@@ -230,35 +256,52 @@ Vote(p, a) ==
 \* Get lastLogTerm from term history th.
 LastLogTerm(th) == th[Len(th)].term
 
+\* Compares <term, lsn> pairs: returns true if tl1 >= tl2.
+TermLsnGE(tl1, tl2) ==
+    /\ tl1.term >= tl2.term
+    /\ (tl1.term = tl2.term => tl1.lsn >= tl2.lsn)
+
+\* Choose max <term, lsn> pair in the non empty set of them.
+MaxTermLsn(term_lsn_set) ==
+    CHOOSE max_tl \in term_lsn_set: \A tl \in term_lsn_set: TermLsnGE(max_tl, tl)
+
+\* Find acceptor with the highest <last_log_term, lsn> vote in proposer p's votes.
+MaxVoteAcc(p) ==
+    CHOOSE a \in DOMAIN prop_state[p].votes:
+        LET a_vote == prop_state[p].votes[a]
+            a_vote_term_lsn == [term |-> LastLogTerm(a_vote.termHistory), lsn |-> a_vote.flushLsn]
+            vote_term_lsns == {[term |-> LastLogTerm(v.termHistory), lsn |-> v.flushLsn]: v \in Range(prop_state[p].votes)}
+        IN
+            a_vote_term_lsn = MaxTermLsn(vote_term_lsns)
+
+\* Workhorse for BecomeLeader.
+\* Assumes the check prop_state[p] votes is quorum has been done *outside*.
+DoBecomeLeader(p) ==
+    LET
+        \* Find acceptor with the highest <last_log_term, lsn> vote.
+        max_vote_acc == MaxVoteAcc(p)
+        max_vote == prop_state[p].votes[max_vote_acc]
+        prop_th == Append(max_vote.termHistory, [term |-> prop_state[p].term, lsn |-> max_vote.flushLsn])
+    IN
+        \* We copy all log preceding proposer's term from the max vote node so
+        \* make sure it is still on one term with us. This is a model
+        \* simplification which can be removed, in impl we fetch WAL on demand
+        \* from safekeeper which has it later. Note though that in case of on
+        \* demand fetch we must check on donor not only term match, but that
+        \* truncate_wal had already been done (if it is not max_vote_acc).
+        /\ acc_state[max_vote_acc].term = prop_state[p].term
+        /\ prop_state' = [prop_state EXCEPT ![p].state = "leader",
+                                            ![p].termHistory = prop_th,
+                                            ![p].wal = acc_state[max_vote_acc].wal
+                         ]
+        /\ elected_history' = Upsert(elected_history, prop_state[p].term, 1, LAMBDA c: c + 1)
+        /\ UNCHANGED <<acc_state, committed>>
+
 \* Proposer p gets elected.
 BecomeLeader(p) ==
   /\ prop_state[p].state = "campaign"
-  /\ Quorum(DOMAIN prop_state[p].votes)
-  /\ LET
-         \* Find acceptor with the highest <last_log_term, lsn> vote.
-         max_vote_acc ==
-              CHOOSE a \in DOMAIN prop_state[p].votes:
-                  LET v == prop_state[p].votes[a]
-                  IN \A v2 \in Range(prop_state[p].votes):
-                         /\ LastLogTerm(v.termHistory) >= LastLogTerm(v2.termHistory)
-                         /\ (LastLogTerm(v.termHistory) = LastLogTerm(v2.termHistory) => v.flushLsn >= v2.flushLsn)
-         max_vote == prop_state[p].votes[max_vote_acc]
-         prop_th == Append(max_vote.termHistory, [term |-> prop_state[p].term, lsn |-> max_vote.flushLsn])
-     IN
-         \* We copy all log preceding proposer's term from the max vote node so
-         \* make sure it is still on one term with us. This is a model
-         \* simplification which can be removed, in impl we fetch WAL on demand
-         \* from safekeeper which has it later. Note though that in case of on
-         \* demand fetch we must check on donor not only term match, but that
-         \* truncate_wal had already been done (if it is not max_vote_acc).
-         /\ acc_state[max_vote_acc].term = prop_state[p].term
-         /\ prop_state' = [prop_state EXCEPT ![p].state = "leader",
-                                             ![p].termHistory = prop_th,
-                                             ![p].wal = acc_state[max_vote_acc].wal
-                          ]
-         /\ elected_history' = Upsert(elected_history, prop_state[p].term, 1, LAMBDA c: c + 1)
-         /\ UNCHANGED <<acc_state, committed>>
-
+  /\ FormsQuorum(DOMAIN prop_state[p].votes, acceptors)
+  /\ DoBecomeLeader(p)
 
 \* Acceptor a learns about elected proposer p's term. In impl it matches to
 \* VoteRequest/VoteResponse exchange when leader is already elected and is not
@@ -287,10 +330,11 @@ FindHighestCommonPoint(prop_th, acc_th, acc_flush_lsn) ==
     IN
         [term |-> last_common_term, lsn |-> Min(acc_common_term_end, prop_common_term_end)]
 
-\* Elected proposer p immediately truncates WAL (and term history) of acceptor a
-\* before starting streaming. Establishes nextSendLsn for a.
+\* Elected proposer p immediately truncates WAL (and sets term history) of
+\* acceptor a before starting streaming. Establishes nextSendLsn for a.
 \*
-\* In impl this happens at each reconnection, here we also allow to do it multiple times.
+\* In impl this happens at each reconnection, here we also allow to do it
+\* multiple times.
 TruncateWal(p, a) ==
     /\ prop_state[p].state = "leader"
     /\ acc_state[a].term = prop_state[p].term
@@ -321,8 +365,8 @@ NewEntry(p) ==
 AppendEntry(p, a) ==
     /\ prop_state[p].state = "leader"
     /\ acc_state[a].term = prop_state[p].term
-    /\ a \in DOMAIN prop_state[p].nextSendLsn \* did TruncateWal
-    /\ prop_state[p].nextSendLsn[a] <= Len(prop_state[p].wal) \* have smth to send
+    /\ prop_state[p].nextSendLsn[a] /= NULL  \* did TruncateWal
+    /\ prop_state[p].nextSendLsn[a] <= Len(prop_state[p].wal)  \* have smth to send
     /\ LET
            send_lsn == prop_state[p].nextSendLsn[a]
            entry == prop_state[p].wal[send_lsn]
@@ -337,41 +381,65 @@ AppendEntry(p, a) ==
 PropStartLsn(p) ==
     IF prop_state[p].state = "leader" THEN prop_state[p].termHistory[Len(prop_state[p].termHistory)].lsn ELSE NULL
 
-\* Proposer p commits all entries it can using quorum q. Note that unlike
-\* will62794/logless-reconfig this allows to commit entries from previous terms
-\* (when conditions for that are met).
-CommitEntries(p, q) ==
-    /\ prop_state[p].state = "leader"
-    /\ \A a \in q:
+\* LSN which can be committed by proposer p using min quorum q (check that q
+\* forms quorum must have been done outside). NULL if there is none.
+QuorumCommitLsn(p, q) ==
+    IF
+        /\ prop_state[p].state = "leader"
+        /\ \A a \in q:
+            \* Without explicit responses to appends this ensures that append
+            \* up to FlushLsn has been accepted.
            /\ acc_state[a].term = prop_state[p].term
              \* nextSendLsn existence means TruncateWal has happened, it ensures
              \* acceptor's WAL (and FlushLsn) are from proper proposer's history.
              \* Alternatively we could compare LastLogTerm here, but that's closer to
              \* what we do in the impl (we check flushLsn in AppendResponse, but
              \* AppendRequest is processed only if HandleElected handling was good).
-           /\ a \in DOMAIN prop_state[p].nextSendLsn
-    \* Now find the LSN present on all the quorum.
-    /\ LET quorum_lsn == Minimum({FlushLsn(a): a \in q}) IN
-           \* This is the basic Raft rule of not committing entries from previous
-           \* terms except along with current term entry (commit them only when
-           \* quorum recovers, i.e. last_log_term on it reaches leader's term).
-           /\ quorum_lsn >= PropStartLsn(p)
-           /\ committed' = committed \cup {[term |-> prop_state[p].wal[lsn], lsn |-> lsn]: lsn \in 1..(quorum_lsn - 1)}
-           /\ UNCHANGED <<prop_state, acc_state, elected_history>>
+           /\ prop_state[p].nextSendLsn[a] /= NULL
+    THEN
+        \* Now find the LSN present on all the quorum.
+        LET quorum_lsn == Minimum({FlushLsn(a): a \in q}) IN
+            \* This is the basic Raft rule of not committing entries from previous
+            \* terms except along with current term entry (commit them only when
+            \* quorum recovers, i.e. last_log_term on it reaches leader's term).
+            IF quorum_lsn >= PropStartLsn(p) THEN
+                quorum_lsn
+            ELSE
+                NULL
+    ELSE
+        NULL
+
+\* Commit all entries on proposer p with record lsn < commit_lsn.
+DoCommitEntries(p, commit_lsn) ==
+    /\ committed' = committed \cup {[term |-> prop_state[p].wal[lsn], lsn |-> lsn]: lsn \in 1..(commit_lsn - 1)}
+    /\ UNCHANGED <<prop_state, acc_state, elected_history>>
+
+\* Proposer p commits all entries it can using some quorum. Note that unlike
+\* will62794/logless-reconfig this allows to commit entries from previous terms
+\* (when conditions for that are met).
+CommitEntries(p) ==
+    /\ prop_state[p].state = "leader"
+    \* Using min quorums here is better because 1) QuorumCommitLsn for
+    \* simplicity checks min across all accs in q. 2) it probably makes
+    \* evaluation faster.
+    /\ \E q \in AllMinQuorums(acceptors):
+           LET commit_lsn == QuorumCommitLsn(p, q) IN
+               /\ commit_lsn /= NULL
+               /\ DoCommitEntries(p, commit_lsn)
 
 \*******************************************************************************
 \* Final spec
 \*******************************************************************************
 
 Next ==
-    \/ \E q \in Quorums: \E p \in proposers: RestartProposer(p, q)
+    \/ \E p \in proposers: RestartProposer(p)
     \/ \E p \in proposers: \E a \in acceptors: Vote(p, a)
     \/ \E p \in proposers: BecomeLeader(p)
     \/ \E p \in proposers: \E a \in acceptors: UpdateTerm(p, a)
     \/ \E p \in proposers: \E a \in acceptors: TruncateWal(p, a)
     \/ \E p \in proposers: NewEntry(p)
     \/ \E p \in proposers: \E a \in acceptors: AppendEntry(p, a)
-    \/ \E q \in Quorums: \E p \in proposers: CommitEntries(p, q)
+    \/ \E p \in proposers: CommitEntries(p)
 
 Spec == Init /\ [][Next]_<<prop_state, acc_state, committed, elected_history>>
 
diff --git a/safekeeper/spec/modelcheck.sh b/safekeeper/spec/modelcheck.sh
index 21ead7dad8..0084a8c638 100755
--- a/safekeeper/spec/modelcheck.sh
+++ b/safekeeper/spec/modelcheck.sh
@@ -2,6 +2,7 @@
 
 # Usage: ./modelcheck.sh <config_file> <spec_file>, e.g.
 # ./modelcheck.sh models/MCProposerAcceptorStatic_p2_a3_t3_l3.cfg MCProposerAcceptorStatic.tla
+# ./modelcheck.sh models/MCProposerAcceptorReconfig_p2_a3_t3_l3_c3.cfg MCProposerAcceptorReconfig.tla
 CONFIG=$1
 SPEC=$2
 
@@ -12,6 +13,7 @@ mkdir -p "tlc-results"
 CONFIG_FILE=$(basename -- "$CONFIG")
 outfilename="$SPEC-${CONFIG_FILE}-$(date --utc +%Y-%m-%d--%H-%M-%S)".log
 outfile="tlc-results/$outfilename"
+echo "saving results to $outfile"
 touch $outfile
 
 # Save some info about the run.
@@ -45,5 +47,6 @@ echo "" >> $outfile
 # https://docs.tlapl.us/codebase:architecture#fingerprint_sets_fpsets
 #
 # Add -simulate to run in infinite simulation mode.
+# -coverage 1 is useful for profiling (check how many times actions are taken).
 java -Xmx$MEM -XX:MaxDirectMemorySize=$MEM -XX:+UseParallelGC -Dtlc2.tool.fp.FPSet.impl=tlc2.tool.fp.OffHeapDiskFPSet \
   -cp "${TOOLSPATH}" tlc2.TLC $SPEC -config $CONFIG -workers auto -gzip | tee -a $outfile
diff --git a/safekeeper/spec/models/MCProposerAcceptorReconfig_p2_a2_t2_l2_c3.cfg b/safekeeper/spec/models/MCProposerAcceptorReconfig_p2_a2_t2_l2_c3.cfg
new file mode 100644
index 0000000000..8d34751083
--- /dev/null
+++ b/safekeeper/spec/models/MCProposerAcceptorReconfig_p2_a2_t2_l2_c3.cfg
@@ -0,0 +1,21 @@
+CONSTANTS
+NULL = NULL
+proposers = {p1, p2}
+acceptors = {a1, a2}
+max_term = 2
+max_entries = 2
+max_generation = 3
+SPECIFICATION Spec
+CONSTRAINT StateConstraint
+INVARIANT
+TypeOk
+ConfigSafety
+ElectionSafetyFull
+LogIsMonotonic
+LogSafety
+\* As its comment explains generally it is not expected to hold, but
+\* in such small model it is true.
+CommittedNotTruncated
+SYMMETRY ProposerAcceptorSymmetry
+CHECK_DEADLOCK FALSE
+ALIAS Alias
diff --git a/safekeeper/spec/models/MCProposerAcceptorReconfig_p2_a2_t2_l2_c5.cfg b/safekeeper/spec/models/MCProposerAcceptorReconfig_p2_a2_t2_l2_c5.cfg
new file mode 100644
index 0000000000..eb7e0768ff
--- /dev/null
+++ b/safekeeper/spec/models/MCProposerAcceptorReconfig_p2_a2_t2_l2_c5.cfg
@@ -0,0 +1,19 @@
+CONSTANTS
+NULL = NULL
+proposers = {p1, p2}
+acceptors = {a1, a2}
+max_term = 2
+max_entries = 2
+max_generation = 5
+SPECIFICATION Spec
+CONSTRAINT StateConstraint
+INVARIANT
+TypeOk
+ConfigSafety
+ElectionSafetyFull
+LogIsMonotonic
+LogSafety
+CommittedNotTruncated
+SYMMETRY ProposerAcceptorSymmetry
+CHECK_DEADLOCK FALSE
+ALIAS Alias
diff --git a/safekeeper/spec/models/MCProposerAcceptorReconfig_p2_a3_t2_l2_c3.cfg b/safekeeper/spec/models/MCProposerAcceptorReconfig_p2_a3_t2_l2_c3.cfg
new file mode 100644
index 0000000000..b5fae13880
--- /dev/null
+++ b/safekeeper/spec/models/MCProposerAcceptorReconfig_p2_a3_t2_l2_c3.cfg
@@ -0,0 +1,20 @@
+CONSTANTS
+NULL = NULL
+proposers = {p1, p2}
+acceptors = {a1, a2, a3}
+max_term = 2
+max_entries = 2
+max_generation = 3
+SPECIFICATION Spec
+CONSTRAINT StateConstraint
+INVARIANT
+TypeOk
+ConfigSafety
+ElectionSafetyFull
+LogIsMonotonic
+LogSafety
+CommittedNotTruncated
+SYMMETRY ProposerAcceptorSymmetry
+CHECK_DEADLOCK FALSE
+ALIAS Alias
+
diff --git a/safekeeper/spec/models/MCProposerAcceptorReconfig_p2_a4_t2_l2_c3.cfg b/safekeeper/spec/models/MCProposerAcceptorReconfig_p2_a4_t2_l2_c3.cfg
new file mode 100644
index 0000000000..71af9fa367
--- /dev/null
+++ b/safekeeper/spec/models/MCProposerAcceptorReconfig_p2_a4_t2_l2_c3.cfg
@@ -0,0 +1,19 @@
+CONSTANTS
+NULL = NULL
+proposers = {p1, p2}
+acceptors = {a1, a2, a3, a4}
+max_term = 2
+max_entries = 2
+max_generation = 3
+SPECIFICATION Spec
+CONSTRAINT StateConstraint
+INVARIANT
+TypeOk
+ElectionSafetyFull
+LogIsMonotonic
+LogSafety
+CommittedNotTruncated
+SYMMETRY ProposerAcceptorSymmetry
+CHECK_DEADLOCK FALSE
+ALIAS Alias
+
diff --git a/safekeeper/spec/remove_interm_progress.awk b/safekeeper/spec/remove_interm_progress.awk
new file mode 100644
index 0000000000..6203f6fa4f
--- /dev/null
+++ b/safekeeper/spec/remove_interm_progress.awk
@@ -0,0 +1,25 @@
+# Print all lines, but thin out lines starting with Progress:
+# leave only first and last 5 ones in the beginning, and only 1 of 1440
+# of others (once a day).
+# Also remove checkpointing logs.
+{
+    lines[NR] = $0
+}
+$0 ~ /^Progress/ {
+    ++pcount
+}
+END {
+    progress_idx = 0
+    for (i = 1; i <= NR; i++) {
+        if (lines[i] ~ /^Progress/) {
+            if (progress_idx < 5 || progress_idx >= pcount - 5 || progress_idx % 1440 == 0) {
+                print lines[i]
+            }
+            progress_idx++
+        }
+        else if (lines[i] ~ /^Checkpointing/) {}
+        else {
+            print lines[i]
+        }
+    }
+}
\ No newline at end of file
diff --git a/safekeeper/spec/remove_interm_progress.sh b/safekeeper/spec/remove_interm_progress.sh
new file mode 100755
index 0000000000..a8724a2b92
--- /dev/null
+++ b/safekeeper/spec/remove_interm_progress.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+awk -f remove_interm_progress.awk $1 > $1.thin
\ No newline at end of file
diff --git a/safekeeper/spec/tlc-results/MCProposerAcceptorReconfig.tla-MCProposerAcceptorReconfig_p2_a2_t2_l2_c3.cfg-2024-12-11--04-24-12.log b/safekeeper/spec/tlc-results/MCProposerAcceptorReconfig.tla-MCProposerAcceptorReconfig_p2_a2_t2_l2_c3.cfg-2024-12-11--04-24-12.log
new file mode 100644
index 0000000000..8aac9eb58c
--- /dev/null
+++ b/safekeeper/spec/tlc-results/MCProposerAcceptorReconfig.tla-MCProposerAcceptorReconfig_p2_a2_t2_l2_c3.cfg-2024-12-11--04-24-12.log
@@ -0,0 +1,65 @@
+git revision: 9e386917a
+Platform: Linux neon-dev-arm64-1 6.8.0-49-generic #49-Ubuntu SMP PREEMPT_DYNAMIC Sun Nov  3 21:21:58 UTC 2024 aarch64 aarch64 aarch64 GNU/Linux
+CPU Info Linux: Neoverse-N1
+CPU Cores Linux: 80
+CPU Info Mac: 
+CPU Cores Mac: 
+Spec: MCProposerAcceptorReconfig.tla
+Config: models/MCProposerAcceptorReconfig_p2_a2_t2_l2_c3.cfg
+----
+CONSTANTS
+NULL = NULL
+proposers = {p1, p2}
+acceptors = {a1, a2}
+max_term = 2
+max_entries = 2
+max_generation = 3
+SPECIFICATION Spec
+CONSTRAINT StateConstraint
+INVARIANT
+TypeOk
+ElectionSafetyFull
+LogIsMonotonic
+LogSafety
+\* CommittedNotTruncated
+SYMMETRY ProposerAcceptorSymmetry
+CHECK_DEADLOCK FALSE
+ALIAS Alias
+
+----
+
+TLC2 Version 2.20 of Day Month 20?? (rev: f68cb71)
+Running breadth-first search Model-Checking with fp 99 and seed -9189733667206762985 with 35 workers on 80 cores with 27307MB heap and 30720MB offheap memory [pid: 391272] (Linux 6.8.0-49-generic aarch64, Ubuntu 21.0.5 x86_64, OffHeapDiskFPSet, DiskStateQueue).
+Parsing file /home/arseny/neon2/safekeeper/spec/MCProposerAcceptorReconfig.tla
+Parsing file /tmp/tlc-3211535543066978921/TLC.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/TLC.tla)
+Parsing file /home/arseny/neon2/safekeeper/spec/ProposerAcceptorReconfig.tla
+Parsing file /tmp/tlc-3211535543066978921/_TLCTrace.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/_TLCTrace.tla)
+Parsing file /tmp/tlc-3211535543066978921/Integers.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Integers.tla)
+Parsing file /tmp/tlc-3211535543066978921/Sequences.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Sequences.tla)
+Parsing file /tmp/tlc-3211535543066978921/FiniteSets.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/FiniteSets.tla)
+Parsing file /tmp/tlc-3211535543066978921/Naturals.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Naturals.tla)
+Parsing file /home/arseny/neon2/safekeeper/spec/ProposerAcceptorStatic.tla
+Parsing file /tmp/tlc-3211535543066978921/TLCExt.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/TLCExt.tla)
+Semantic processing of module Naturals
+Semantic processing of module Sequences
+Semantic processing of module FiniteSets
+Semantic processing of module TLC
+Semantic processing of module Integers
+Semantic processing of module ProposerAcceptorStatic
+Semantic processing of module ProposerAcceptorReconfig
+Semantic processing of module TLCExt
+Semantic processing of module _TLCTrace
+Semantic processing of module MCProposerAcceptorReconfig
+Starting... (2024-12-11 04:24:13)
+Computing initial states...
+Finished computing initial states: 2 states generated, with 1 of them distinct at 2024-12-11 04:24:15.
+Progress(16) at 2024-12-11 04:24:18: 1,427,589 states generated (1,427,589 s/min), 142,472 distinct states found (142,472 ds/min), 47,162 states left on queue.
+Model checking completed. No error has been found.
+  Estimates of the probability that TLC did not check all reachable states
+  because two distinct states had the same fingerprint:
+  calculated (optimistic):  val = 1.0E-6
+  based on the actual fingerprints:  val = 4.2E-8
+17746857 states generated, 1121659 distinct states found, 0 states left on queue.
+The depth of the complete state graph search is 37.
+The average outdegree of the complete state graph is 1 (minimum is 0, the maximum 9 and the 95th percentile is 3).
+Finished in 33s at (2024-12-11 04:24:46)
diff --git a/safekeeper/spec/tlc-results/MCProposerAcceptorReconfig.tla-MCProposerAcceptorReconfig_p2_a2_t2_l2_c5.cfg-2024-12-11--04-26-11.log b/safekeeper/spec/tlc-results/MCProposerAcceptorReconfig.tla-MCProposerAcceptorReconfig_p2_a2_t2_l2_c5.cfg-2024-12-11--04-26-11.log
new file mode 100644
index 0000000000..40e7611ae3
--- /dev/null
+++ b/safekeeper/spec/tlc-results/MCProposerAcceptorReconfig.tla-MCProposerAcceptorReconfig_p2_a2_t2_l2_c5.cfg-2024-12-11--04-26-11.log
@@ -0,0 +1,64 @@
+git revision: 9e386917a
+Platform: Linux neon-dev-arm64-1 6.8.0-49-generic #49-Ubuntu SMP PREEMPT_DYNAMIC Sun Nov  3 21:21:58 UTC 2024 aarch64 aarch64 aarch64 GNU/Linux
+CPU Info Linux: Neoverse-N1
+CPU Cores Linux: 80
+CPU Info Mac: 
+CPU Cores Mac: 
+Spec: MCProposerAcceptorReconfig.tla
+Config: models/MCProposerAcceptorReconfig_p2_a2_t2_l2_c5.cfg
+----
+CONSTANTS
+NULL = NULL
+proposers = {p1, p2}
+acceptors = {a1, a2}
+max_term = 2
+max_entries = 2
+max_generation = 5
+SPECIFICATION Spec
+CONSTRAINT StateConstraint
+INVARIANT
+TypeOk
+ElectionSafetyFull
+LogIsMonotonic
+LogSafety
+\* CommittedNotTruncated
+SYMMETRY ProposerAcceptorSymmetry
+CHECK_DEADLOCK FALSE
+ALIAS Alias
+
+----
+
+TLC2 Version 2.20 of Day Month 20?? (rev: f68cb71)
+Running breadth-first search Model-Checking with fp 114 and seed -8099467489737745861 with 35 workers on 80 cores with 27307MB heap and 30720MB offheap memory [pid: 392020] (Linux 6.8.0-49-generic aarch64, Ubuntu 21.0.5 x86_64, OffHeapDiskFPSet, DiskStateQueue).
+Parsing file /home/arseny/neon2/safekeeper/spec/MCProposerAcceptorReconfig.tla
+Parsing file /tmp/tlc-11757875725969857497/TLC.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/TLC.tla)
+Parsing file /home/arseny/neon2/safekeeper/spec/ProposerAcceptorReconfig.tla
+Parsing file /tmp/tlc-11757875725969857497/_TLCTrace.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/_TLCTrace.tla)
+Parsing file /tmp/tlc-11757875725969857497/Integers.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Integers.tla)
+Parsing file /tmp/tlc-11757875725969857497/Sequences.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Sequences.tla)
+Parsing file /tmp/tlc-11757875725969857497/FiniteSets.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/FiniteSets.tla)
+Parsing file /tmp/tlc-11757875725969857497/Naturals.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Naturals.tla)
+Parsing file /home/arseny/neon2/safekeeper/spec/ProposerAcceptorStatic.tla
+Parsing file /tmp/tlc-11757875725969857497/TLCExt.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/TLCExt.tla)
+Semantic processing of module Naturals
+Semantic processing of module Sequences
+Semantic processing of module FiniteSets
+Semantic processing of module TLC
+Semantic processing of module Integers
+Semantic processing of module ProposerAcceptorStatic
+Semantic processing of module ProposerAcceptorReconfig
+Semantic processing of module TLCExt
+Semantic processing of module _TLCTrace
+Semantic processing of module MCProposerAcceptorReconfig
+Starting... (2024-12-11 04:26:12)
+Computing initial states...
+Finished computing initial states: 2 states generated, with 1 of them distinct at 2024-12-11 04:26:14.
+Progress(14) at 2024-12-11 04:26:17: 1,519,385 states generated (1,519,385 s/min), 231,263 distinct states found (231,263 ds/min), 121,410 states left on queue.
+Progress(20) at 2024-12-11 04:27:17: 42,757,204 states generated (41,237,819 s/min), 4,198,386 distinct states found (3,967,123 ds/min), 1,308,109 states left on queue.
+Progress(22) at 2024-12-11 04:28:17: 83,613,929 states generated (40,856,725 s/min), 7,499,873 distinct states found (3,301,487 ds/min), 1,929,464 states left on queue.
+Progress(23) at 2024-12-11 04:29:17: 124,086,758 states generated (40,472,829 s/min), 10,569,712 distinct states found (3,069,839 ds/min), 2,386,988 states left on queue.
+Progress(24) at 2024-12-11 04:30:17: 163,412,538 states generated (39,325,780 s/min), 13,314,303 distinct states found (2,744,591 ds/min), 2,610,637 states left on queue.
+Progress(25) at 2024-12-11 04:31:17: 202,643,708 states generated (39,231,170 s/min), 15,960,583 distinct states found (2,646,280 ds/min), 2,759,681 states left on queue.
+Progress(26) at 2024-12-11 04:32:17: 240,681,633 states generated (38,037,925 s/min), 18,443,440 distinct states found (2,482,857 ds/min), 2,852,177 states left on queue.
+Progress(27) at 2024-12-11 04:33:17: 278,559,134 states generated (37,877,501 s/min), 20,878,067 distinct states found (2,434,627 ds/min), 2,904,400 states left on queue.
+Progress(28) at 2024-12-11 04:34:17: 316,699,911 states generated (38,140,777 s/min), 23,212,229 distinct states found (2,334,162 ds/min), 2,864,969 states left on queue.

From f4739d49e3f20fae6a88b7a050084ab73c6319e4 Mon Sep 17 00:00:00 2001
From: Vlad Lazar <vlad@neon.tech>
Date: Thu, 9 Jan 2025 12:31:02 +0000
Subject: [PATCH 34/44] pageserver: tweak interpreted ingest record metrics
 (#10291)

## Problem
The filtered record metric doesn't make sense for interpreted ingest.

## Summary of changes
While of dubious utility in the first place, this patch replaces them
with records received and records observed metrics for interpreted
ingest:
* received records cause the pageserver to do _something_: write a key,
value pair to storage, update some metadata or flush pending
modifications
* observed records are a shard 0 concept and contain only key metadata
used in tracking relation sizes (received records include observed
records)
---
 libs/wal_decoder/src/models.rs                |  8 +++
 libs/wal_decoder/src/serialized_batch.rs      |  5 ++
 pageserver/src/metrics.rs                     | 10 ++-
 .../walreceiver/walreceiver_connection.rs     | 72 +++++++------------
 test_runner/regress/test_sharding.py          | 54 +++++++++-----
 5 files changed, 83 insertions(+), 66 deletions(-)

diff --git a/libs/wal_decoder/src/models.rs b/libs/wal_decoder/src/models.rs
index af22de5d95..6576dd0eba 100644
--- a/libs/wal_decoder/src/models.rs
+++ b/libs/wal_decoder/src/models.rs
@@ -95,6 +95,14 @@ impl InterpretedWalRecord {
             && self.metadata_record.is_none()
             && matches!(self.flush_uncommitted, FlushUncommittedRecords::No)
     }
+
+    /// Checks if the WAL record is observed (i.e. contains only metadata
+    /// for observed values)
+    pub fn is_observed(&self) -> bool {
+        self.batch.is_observed()
+            && self.metadata_record.is_none()
+            && matches!(self.flush_uncommitted, FlushUncommittedRecords::No)
+    }
 }
 
 /// The interpreted part of the Postgres WAL record which requires metadata
diff --git a/libs/wal_decoder/src/serialized_batch.rs b/libs/wal_decoder/src/serialized_batch.rs
index 41294da7a0..af2b179e05 100644
--- a/libs/wal_decoder/src/serialized_batch.rs
+++ b/libs/wal_decoder/src/serialized_batch.rs
@@ -501,6 +501,11 @@ impl SerializedValueBatch {
         !self.has_data() && self.metadata.is_empty()
     }
 
+    /// Checks if the batch contains only observed values
+    pub fn is_observed(&self) -> bool {
+        !self.has_data() && !self.metadata.is_empty()
+    }
+
     /// Checks if the batch contains data
     ///
     /// Note that if this returns false, it may still contain observed values or
diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs
index b4e20cb8b9..e825d538a2 100644
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -2337,13 +2337,15 @@ macro_rules! redo_bytes_histogram_count_buckets {
 pub(crate) struct WalIngestMetrics {
     pub(crate) bytes_received: IntCounter,
     pub(crate) records_received: IntCounter,
+    pub(crate) records_observed: IntCounter,
     pub(crate) records_committed: IntCounter,
     pub(crate) records_filtered: IntCounter,
     pub(crate) gap_blocks_zeroed_on_rel_extend: IntCounter,
     pub(crate) clear_vm_bits_unknown: IntCounterVec,
 }
 
-pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| WalIngestMetrics {
+pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| {
+    WalIngestMetrics {
     bytes_received: register_int_counter!(
         "pageserver_wal_ingest_bytes_received",
         "Bytes of WAL ingested from safekeepers",
@@ -2354,6 +2356,11 @@ pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| WalIngestMet
         "Number of WAL records received from safekeepers"
     )
     .expect("failed to define a metric"),
+    records_observed: register_int_counter!(
+        "pageserver_wal_ingest_records_observed",
+        "Number of WAL records observed from safekeepers. These are metadata only records for shard 0."
+    )
+    .expect("failed to define a metric"),
     records_committed: register_int_counter!(
         "pageserver_wal_ingest_records_committed",
         "Number of WAL records which resulted in writes to pageserver storage"
@@ -2375,6 +2382,7 @@ pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| WalIngestMet
         &["entity"],
     )
     .expect("failed to define a metric"),
+}
 });
 
 pub(crate) static PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED: Lazy<IntCounterVec> = Lazy::new(|| {
diff --git a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
index 3f10eeda60..d74faa1af5 100644
--- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
@@ -319,27 +319,11 @@ pub(super) async fn handle_walreceiver_connection(
             return Ok(());
         }
 
-        async fn commit(
-            modification: &mut DatadirModification<'_>,
-            uncommitted: &mut u64,
-            filtered: &mut u64,
-            ctx: &RequestContext,
-        ) -> anyhow::Result<()> {
-            WAL_INGEST
-                .records_committed
-                .inc_by(*uncommitted - *filtered);
-            modification.commit(ctx).await?;
-            *uncommitted = 0;
-            *filtered = 0;
-            Ok(())
-        }
-
         let status_update = match replication_message {
             ReplicationMessage::RawInterpretedWalRecords(raw) => {
                 WAL_INGEST.bytes_received.inc_by(raw.data().len() as u64);
 
                 let mut uncommitted_records = 0;
-                let mut filtered_records = 0;
 
                 // This is the end LSN of the raw WAL from which the records
                 // were interpreted.
@@ -380,31 +364,23 @@ pub(super) async fn handle_walreceiver_connection(
                     if matches!(interpreted.flush_uncommitted, FlushUncommittedRecords::Yes)
                         && uncommitted_records > 0
                     {
-                        commit(
-                            &mut modification,
-                            &mut uncommitted_records,
-                            &mut filtered_records,
-                            &ctx,
-                        )
-                        .await?;
+                        modification.commit(&ctx).await?;
+                        uncommitted_records = 0;
                     }
 
                     let local_next_record_lsn = interpreted.next_record_lsn;
-                    let ingested = walingest
+
+                    if interpreted.is_observed() {
+                        WAL_INGEST.records_observed.inc();
+                    }
+
+                    walingest
                         .ingest_record(interpreted, &mut modification, &ctx)
                         .await
                         .with_context(|| {
                             format!("could not ingest record at {local_next_record_lsn}")
                         })?;
 
-                    if !ingested {
-                        tracing::debug!(
-                            "ingest: filtered out record @ LSN {local_next_record_lsn}"
-                        );
-                        WAL_INGEST.records_filtered.inc();
-                        filtered_records += 1;
-                    }
-
                     uncommitted_records += 1;
 
                     // FIXME: this cannot be made pausable_failpoint without fixing the
@@ -418,13 +394,8 @@ pub(super) async fn handle_walreceiver_connection(
                         || modification.approx_pending_bytes()
                             > DatadirModification::MAX_PENDING_BYTES
                     {
-                        commit(
-                            &mut modification,
-                            &mut uncommitted_records,
-                            &mut filtered_records,
-                            &ctx,
-                        )
-                        .await?;
+                        modification.commit(&ctx).await?;
+                        uncommitted_records = 0;
                     }
                 }
 
@@ -442,13 +413,7 @@ pub(super) async fn handle_walreceiver_connection(
 
                 if uncommitted_records > 0 || needs_last_record_lsn_advance {
                     // Commit any uncommitted records
-                    commit(
-                        &mut modification,
-                        &mut uncommitted_records,
-                        &mut filtered_records,
-                        &ctx,
-                    )
-                    .await?;
+                    modification.commit(&ctx).await?;
                 }
 
                 if !caught_up && streaming_lsn >= end_of_wal {
@@ -469,6 +434,21 @@ pub(super) async fn handle_walreceiver_connection(
             }
 
             ReplicationMessage::XLogData(xlog_data) => {
+                async fn commit(
+                    modification: &mut DatadirModification<'_>,
+                    uncommitted: &mut u64,
+                    filtered: &mut u64,
+                    ctx: &RequestContext,
+                ) -> anyhow::Result<()> {
+                    WAL_INGEST
+                        .records_committed
+                        .inc_by(*uncommitted - *filtered);
+                    modification.commit(ctx).await?;
+                    *uncommitted = 0;
+                    *filtered = 0;
+                    Ok(())
+                }
+
                 // Pass the WAL data to the decoder, and see if we can decode
                 // more records as a result.
                 let data = xlog_data.data();
diff --git a/test_runner/regress/test_sharding.py b/test_runner/regress/test_sharding.py
index 4c381b563f..673904a1cd 100644
--- a/test_runner/regress/test_sharding.py
+++ b/test_runner/regress/test_sharding.py
@@ -561,11 +561,17 @@ def test_sharding_split_smoke(
     workload.write_rows(256)
 
     # Note which pageservers initially hold a shard after tenant creation
-    pre_split_pageserver_ids = [loc["node_id"] for loc in env.storage_controller.locate(tenant_id)]
-    log.info("Pre-split pageservers: {pre_split_pageserver_ids}")
+    pre_split_pageserver_ids = dict()
+    for loc in env.storage_controller.locate(tenant_id):
+        shard_no = TenantShardId.parse(loc["shard_id"]).shard_number
+        pre_split_pageserver_ids[loc["node_id"]] = shard_no
+    log.info(f"Pre-split pageservers: {pre_split_pageserver_ids}")
 
     # For pageservers holding a shard, validate their ingest statistics
     # reflect a proper splitting of the WAL.
+
+    observed_on_shard_zero = 0
+    received_on_non_zero_shard = 0
     for pageserver in env.pageservers:
         if pageserver.id not in pre_split_pageserver_ids:
             continue
@@ -573,28 +579,38 @@ def test_sharding_split_smoke(
         metrics = pageserver.http_client().get_metrics_values(
             [
                 "pageserver_wal_ingest_records_received_total",
-                "pageserver_wal_ingest_records_committed_total",
-                "pageserver_wal_ingest_records_filtered_total",
+                "pageserver_wal_ingest_records_observed_total",
             ]
         )
 
         log.info(f"Pageserver {pageserver.id} metrics: {metrics}")
 
-        # Not everything received was committed
-        assert (
-            metrics["pageserver_wal_ingest_records_received_total"]
-            > metrics["pageserver_wal_ingest_records_committed_total"]
-        )
+        received = metrics["pageserver_wal_ingest_records_received_total"]
+        observed = metrics["pageserver_wal_ingest_records_observed_total"]
 
-        # Something was committed
-        assert metrics["pageserver_wal_ingest_records_committed_total"] > 0
+        shard_number: int | None = pre_split_pageserver_ids.get(pageserver.id, None)
+        if shard_number is None:
+            assert received == 0
+            assert observed == 0
+        elif shard_number == 0:
+            # Shard 0 receives its own records and observes records of other shards
+            # for relation size tracking.
+            assert observed > 0
+            assert received > 0
+            observed_on_shard_zero = int(observed)
+        else:
+            # Non zero shards do not observe any records, but only receive their own.
+            assert observed == 0
+            assert received > 0
+            received_on_non_zero_shard += int(received)
 
-        # Counts are self consistent
-        assert (
-            metrics["pageserver_wal_ingest_records_received_total"]
-            == metrics["pageserver_wal_ingest_records_committed_total"]
-            + metrics["pageserver_wal_ingest_records_filtered_total"]
-        )
+    # Some records are sent to multiple shards and some shard 0 records include both value observations
+    # and other metadata. Hence, we do a sanity check below that shard 0 observes the majority of records
+    # received by other shards.
+    assert (
+        observed_on_shard_zero <= received_on_non_zero_shard
+        and observed_on_shard_zero >= received_on_non_zero_shard // 2
+    )
 
     # TODO: validate that shards have different sizes
 
@@ -633,7 +649,7 @@ def test_sharding_split_smoke(
     # We should have split into 8 shards, on the same 4 pageservers we started on.
     assert len(post_split_pageserver_ids) == split_shard_count
     assert len(set(post_split_pageserver_ids)) == shard_count
-    assert set(post_split_pageserver_ids) == set(pre_split_pageserver_ids)
+    assert set(post_split_pageserver_ids) == set(pre_split_pageserver_ids.keys())
 
     # The old parent shards should no longer exist on disk
     assert not shards_on_disk(old_shard_ids)
@@ -739,7 +755,7 @@ def test_sharding_split_smoke(
     # all the pageservers that originally held an attached shard should still hold one, otherwise
     # it would indicate that we had done some unnecessary migration.
     log.info(f"attached: {attached}")
-    for ps_id in pre_split_pageserver_ids:
+    for ps_id in pre_split_pageserver_ids.keys():
         log.info("Pre-split pageserver {ps_id} should still hold an attached location")
         assert ps_id in attached
 

From 20c40eb733f401903adfa1c1da47dbed54098cae Mon Sep 17 00:00:00 2001
From: Konstantin Knizhnik <knizhnik@garret.ru>
Date: Thu, 9 Jan 2025 15:12:04 +0200
Subject: [PATCH 35/44] Add response tag to getpage request in V3 protocol
 version (#8686)

## Problem

We have several serious data corruption incidents caused by mismatch of
get-age requests:
https://neondb.slack.com/archives/C07FJS4QF7V/p1723032720164359

We hope that the problem is fixed now. But it is better to prevent such
kind of problems in future.

Part of https://github.com/neondatabase/cloud/issues/16472

## Summary of changes

This PR introduce new V3 version of compute<->pageserver protocol,
adding tag to getpage response.
So now compute is able to check if it really gets response to the
requested page.

## Checklist before requesting a review

- [ ] I have performed a self-review of my code.
- [ ] If it is a core feature, I have added thorough tests.
- [ ] Do we need to implement analytics? if so did you add the relevant
metrics to the dashboard?
- [ ] If this PR requires public announcement, mark it with
/release-notes label and add several sentences in this section.

## Checklist before merging

- [ ] Do not forget to reformat commit message to not include the above
checklist

---------

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
Co-authored-by: Heikki Linnakangas <heikki@neon.tech>
---
 libs/pageserver_api/src/models.rs             | 395 ++++++++++++----
 pageserver/client/src/page_service.rs         |   2 +-
 .../pagebench/src/cmd/getpage_latest_lsn.rs   |  15 +-
 pageserver/src/metrics.rs                     |   1 +
 pageserver/src/page_service.rs                | 181 ++++---
 pgxn/neon/libpagestore.c                      |   9 +-
 pgxn/neon/pagestore_client.h                  |  51 +-
 pgxn/neon/pagestore_smgr.c                    | 445 ++++++++++++------
 8 files changed, 784 insertions(+), 315 deletions(-)

diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs
index f3fc9fad76..39390d7647 100644
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -1460,75 +1460,91 @@ impl TryFrom<u8> for PagestreamBeMessageTag {
 // interface allows sending both LSNs, and let the pageserver do the right thing. There was no
 // difference in the responses between V1 and V2.
 //
-#[derive(Clone, Copy)]
+// V3 version of protocol adds request ID to all requests. This request ID is also included in response
+// as well as other fields from requests, which allows to verify that we receive response for our request.
+// We copy fields from request to response to make checking more reliable: request ID is formed from process ID
+// and local counter, so in principle there can be duplicated requests IDs if process PID is reused.
+//
+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
 pub enum PagestreamProtocolVersion {
     V2,
+    V3,
 }
 
-#[derive(Debug, PartialEq, Eq)]
+pub type RequestId = u64;
+
+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
+pub struct PagestreamRequest {
+    pub reqid: RequestId,
+    pub request_lsn: Lsn,
+    pub not_modified_since: Lsn,
+}
+
+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
 pub struct PagestreamExistsRequest {
-    pub request_lsn: Lsn,
-    pub not_modified_since: Lsn,
+    pub hdr: PagestreamRequest,
     pub rel: RelTag,
 }
 
-#[derive(Debug, PartialEq, Eq)]
+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
 pub struct PagestreamNblocksRequest {
-    pub request_lsn: Lsn,
-    pub not_modified_since: Lsn,
+    pub hdr: PagestreamRequest,
     pub rel: RelTag,
 }
 
-#[derive(Debug, PartialEq, Eq)]
+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
 pub struct PagestreamGetPageRequest {
-    pub request_lsn: Lsn,
-    pub not_modified_since: Lsn,
+    pub hdr: PagestreamRequest,
     pub rel: RelTag,
     pub blkno: u32,
 }
 
-#[derive(Debug, PartialEq, Eq)]
+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
 pub struct PagestreamDbSizeRequest {
-    pub request_lsn: Lsn,
-    pub not_modified_since: Lsn,
+    pub hdr: PagestreamRequest,
     pub dbnode: u32,
 }
 
-#[derive(Debug, PartialEq, Eq)]
+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
 pub struct PagestreamGetSlruSegmentRequest {
-    pub request_lsn: Lsn,
-    pub not_modified_since: Lsn,
+    pub hdr: PagestreamRequest,
     pub kind: u8,
     pub segno: u32,
 }
 
 #[derive(Debug)]
 pub struct PagestreamExistsResponse {
+    pub req: PagestreamExistsRequest,
     pub exists: bool,
 }
 
 #[derive(Debug)]
 pub struct PagestreamNblocksResponse {
+    pub req: PagestreamNblocksRequest,
     pub n_blocks: u32,
 }
 
 #[derive(Debug)]
 pub struct PagestreamGetPageResponse {
+    pub req: PagestreamGetPageRequest,
     pub page: Bytes,
 }
 
 #[derive(Debug)]
 pub struct PagestreamGetSlruSegmentResponse {
+    pub req: PagestreamGetSlruSegmentRequest,
     pub segment: Bytes,
 }
 
 #[derive(Debug)]
 pub struct PagestreamErrorResponse {
+    pub req: PagestreamRequest,
     pub message: String,
 }
 
 #[derive(Debug)]
 pub struct PagestreamDbSizeResponse {
+    pub req: PagestreamDbSizeRequest,
     pub db_size: i64,
 }
 
@@ -1545,15 +1561,16 @@ pub struct TenantHistorySize {
 
 impl PagestreamFeMessage {
     /// Serialize a compute -> pageserver message. This is currently only used in testing
-    /// tools. Always uses protocol version 2.
+    /// tools. Always uses protocol version 3.
     pub fn serialize(&self) -> Bytes {
         let mut bytes = BytesMut::new();
 
         match self {
             Self::Exists(req) => {
                 bytes.put_u8(0);
-                bytes.put_u64(req.request_lsn.0);
-                bytes.put_u64(req.not_modified_since.0);
+                bytes.put_u64(req.hdr.reqid);
+                bytes.put_u64(req.hdr.request_lsn.0);
+                bytes.put_u64(req.hdr.not_modified_since.0);
                 bytes.put_u32(req.rel.spcnode);
                 bytes.put_u32(req.rel.dbnode);
                 bytes.put_u32(req.rel.relnode);
@@ -1562,8 +1579,9 @@ impl PagestreamFeMessage {
 
             Self::Nblocks(req) => {
                 bytes.put_u8(1);
-                bytes.put_u64(req.request_lsn.0);
-                bytes.put_u64(req.not_modified_since.0);
+                bytes.put_u64(req.hdr.reqid);
+                bytes.put_u64(req.hdr.request_lsn.0);
+                bytes.put_u64(req.hdr.not_modified_since.0);
                 bytes.put_u32(req.rel.spcnode);
                 bytes.put_u32(req.rel.dbnode);
                 bytes.put_u32(req.rel.relnode);
@@ -1572,8 +1590,9 @@ impl PagestreamFeMessage {
 
             Self::GetPage(req) => {
                 bytes.put_u8(2);
-                bytes.put_u64(req.request_lsn.0);
-                bytes.put_u64(req.not_modified_since.0);
+                bytes.put_u64(req.hdr.reqid);
+                bytes.put_u64(req.hdr.request_lsn.0);
+                bytes.put_u64(req.hdr.not_modified_since.0);
                 bytes.put_u32(req.rel.spcnode);
                 bytes.put_u32(req.rel.dbnode);
                 bytes.put_u32(req.rel.relnode);
@@ -1583,15 +1602,17 @@ impl PagestreamFeMessage {
 
             Self::DbSize(req) => {
                 bytes.put_u8(3);
-                bytes.put_u64(req.request_lsn.0);
-                bytes.put_u64(req.not_modified_since.0);
+                bytes.put_u64(req.hdr.reqid);
+                bytes.put_u64(req.hdr.request_lsn.0);
+                bytes.put_u64(req.hdr.not_modified_since.0);
                 bytes.put_u32(req.dbnode);
             }
 
             Self::GetSlruSegment(req) => {
                 bytes.put_u8(4);
-                bytes.put_u64(req.request_lsn.0);
-                bytes.put_u64(req.not_modified_since.0);
+                bytes.put_u64(req.hdr.reqid);
+                bytes.put_u64(req.hdr.request_lsn.0);
+                bytes.put_u64(req.hdr.not_modified_since.0);
                 bytes.put_u8(req.kind);
                 bytes.put_u32(req.segno);
             }
@@ -1600,21 +1621,35 @@ impl PagestreamFeMessage {
         bytes.into()
     }
 
-    pub fn parse<R: std::io::Read>(body: &mut R) -> anyhow::Result<PagestreamFeMessage> {
+    pub fn parse<R: std::io::Read>(
+        body: &mut R,
+        protocol_version: PagestreamProtocolVersion,
+    ) -> anyhow::Result<PagestreamFeMessage> {
         // these correspond to the NeonMessageTag enum in pagestore_client.h
         //
         // TODO: consider using protobuf or serde bincode for less error prone
         // serialization.
         let msg_tag = body.read_u8()?;
-
-        // these two fields are the same for every request type
-        let request_lsn = Lsn::from(body.read_u64::<BigEndian>()?);
-        let not_modified_since = Lsn::from(body.read_u64::<BigEndian>()?);
+        let (reqid, request_lsn, not_modified_since) = match protocol_version {
+            PagestreamProtocolVersion::V2 => (
+                0,
+                Lsn::from(body.read_u64::<BigEndian>()?),
+                Lsn::from(body.read_u64::<BigEndian>()?),
+            ),
+            PagestreamProtocolVersion::V3 => (
+                body.read_u64::<BigEndian>()?,
+                Lsn::from(body.read_u64::<BigEndian>()?),
+                Lsn::from(body.read_u64::<BigEndian>()?),
+            ),
+        };
 
         match msg_tag {
             0 => Ok(PagestreamFeMessage::Exists(PagestreamExistsRequest {
-                request_lsn,
-                not_modified_since,
+                hdr: PagestreamRequest {
+                    reqid,
+                    request_lsn,
+                    not_modified_since,
+                },
                 rel: RelTag {
                     spcnode: body.read_u32::<BigEndian>()?,
                     dbnode: body.read_u32::<BigEndian>()?,
@@ -1623,8 +1658,11 @@ impl PagestreamFeMessage {
                 },
             })),
             1 => Ok(PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
-                request_lsn,
-                not_modified_since,
+                hdr: PagestreamRequest {
+                    reqid,
+                    request_lsn,
+                    not_modified_since,
+                },
                 rel: RelTag {
                     spcnode: body.read_u32::<BigEndian>()?,
                     dbnode: body.read_u32::<BigEndian>()?,
@@ -1633,8 +1671,11 @@ impl PagestreamFeMessage {
                 },
             })),
             2 => Ok(PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
-                request_lsn,
-                not_modified_since,
+                hdr: PagestreamRequest {
+                    reqid,
+                    request_lsn,
+                    not_modified_since,
+                },
                 rel: RelTag {
                     spcnode: body.read_u32::<BigEndian>()?,
                     dbnode: body.read_u32::<BigEndian>()?,
@@ -1644,14 +1685,20 @@ impl PagestreamFeMessage {
                 blkno: body.read_u32::<BigEndian>()?,
             })),
             3 => Ok(PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
-                request_lsn,
-                not_modified_since,
+                hdr: PagestreamRequest {
+                    reqid,
+                    request_lsn,
+                    not_modified_since,
+                },
                 dbnode: body.read_u32::<BigEndian>()?,
             })),
             4 => Ok(PagestreamFeMessage::GetSlruSegment(
                 PagestreamGetSlruSegmentRequest {
-                    request_lsn,
-                    not_modified_since,
+                    hdr: PagestreamRequest {
+                        reqid,
+                        request_lsn,
+                        not_modified_since,
+                    },
                     kind: body.read_u8()?,
                     segno: body.read_u32::<BigEndian>()?,
                 },
@@ -1662,43 +1709,114 @@ impl PagestreamFeMessage {
 }
 
 impl PagestreamBeMessage {
-    pub fn serialize(&self) -> Bytes {
+    pub fn serialize(&self, protocol_version: PagestreamProtocolVersion) -> Bytes {
         let mut bytes = BytesMut::new();
 
         use PagestreamBeMessageTag as Tag;
-        match self {
-            Self::Exists(resp) => {
-                bytes.put_u8(Tag::Exists as u8);
-                bytes.put_u8(resp.exists as u8);
-            }
+        match protocol_version {
+            PagestreamProtocolVersion::V2 => {
+                match self {
+                    Self::Exists(resp) => {
+                        bytes.put_u8(Tag::Exists as u8);
+                        bytes.put_u8(resp.exists as u8);
+                    }
 
-            Self::Nblocks(resp) => {
-                bytes.put_u8(Tag::Nblocks as u8);
-                bytes.put_u32(resp.n_blocks);
-            }
+                    Self::Nblocks(resp) => {
+                        bytes.put_u8(Tag::Nblocks as u8);
+                        bytes.put_u32(resp.n_blocks);
+                    }
 
-            Self::GetPage(resp) => {
-                bytes.put_u8(Tag::GetPage as u8);
-                bytes.put(&resp.page[..]);
-            }
+                    Self::GetPage(resp) => {
+                        bytes.put_u8(Tag::GetPage as u8);
+                        bytes.put(&resp.page[..])
+                    }
 
-            Self::Error(resp) => {
-                bytes.put_u8(Tag::Error as u8);
-                bytes.put(resp.message.as_bytes());
-                bytes.put_u8(0); // null terminator
-            }
-            Self::DbSize(resp) => {
-                bytes.put_u8(Tag::DbSize as u8);
-                bytes.put_i64(resp.db_size);
-            }
+                    Self::Error(resp) => {
+                        bytes.put_u8(Tag::Error as u8);
+                        bytes.put(resp.message.as_bytes());
+                        bytes.put_u8(0); // null terminator
+                    }
+                    Self::DbSize(resp) => {
+                        bytes.put_u8(Tag::DbSize as u8);
+                        bytes.put_i64(resp.db_size);
+                    }
 
-            Self::GetSlruSegment(resp) => {
-                bytes.put_u8(Tag::GetSlruSegment as u8);
-                bytes.put_u32((resp.segment.len() / BLCKSZ as usize) as u32);
-                bytes.put(&resp.segment[..]);
+                    Self::GetSlruSegment(resp) => {
+                        bytes.put_u8(Tag::GetSlruSegment as u8);
+                        bytes.put_u32((resp.segment.len() / BLCKSZ as usize) as u32);
+                        bytes.put(&resp.segment[..]);
+                    }
+                }
+            }
+            PagestreamProtocolVersion::V3 => {
+                match self {
+                    Self::Exists(resp) => {
+                        bytes.put_u8(Tag::Exists as u8);
+                        bytes.put_u64(resp.req.hdr.reqid);
+                        bytes.put_u64(resp.req.hdr.request_lsn.0);
+                        bytes.put_u64(resp.req.hdr.not_modified_since.0);
+                        bytes.put_u32(resp.req.rel.spcnode);
+                        bytes.put_u32(resp.req.rel.dbnode);
+                        bytes.put_u32(resp.req.rel.relnode);
+                        bytes.put_u8(resp.req.rel.forknum);
+                        bytes.put_u8(resp.exists as u8);
+                    }
+
+                    Self::Nblocks(resp) => {
+                        bytes.put_u8(Tag::Nblocks as u8);
+                        bytes.put_u64(resp.req.hdr.reqid);
+                        bytes.put_u64(resp.req.hdr.request_lsn.0);
+                        bytes.put_u64(resp.req.hdr.not_modified_since.0);
+                        bytes.put_u32(resp.req.rel.spcnode);
+                        bytes.put_u32(resp.req.rel.dbnode);
+                        bytes.put_u32(resp.req.rel.relnode);
+                        bytes.put_u8(resp.req.rel.forknum);
+                        bytes.put_u32(resp.n_blocks);
+                    }
+
+                    Self::GetPage(resp) => {
+                        bytes.put_u8(Tag::GetPage as u8);
+                        bytes.put_u64(resp.req.hdr.reqid);
+                        bytes.put_u64(resp.req.hdr.request_lsn.0);
+                        bytes.put_u64(resp.req.hdr.not_modified_since.0);
+                        bytes.put_u32(resp.req.rel.spcnode);
+                        bytes.put_u32(resp.req.rel.dbnode);
+                        bytes.put_u32(resp.req.rel.relnode);
+                        bytes.put_u8(resp.req.rel.forknum);
+                        bytes.put_u32(resp.req.blkno);
+                        bytes.put(&resp.page[..])
+                    }
+
+                    Self::Error(resp) => {
+                        bytes.put_u8(Tag::Error as u8);
+                        bytes.put_u64(resp.req.reqid);
+                        bytes.put_u64(resp.req.request_lsn.0);
+                        bytes.put_u64(resp.req.not_modified_since.0);
+                        bytes.put(resp.message.as_bytes());
+                        bytes.put_u8(0); // null terminator
+                    }
+                    Self::DbSize(resp) => {
+                        bytes.put_u8(Tag::DbSize as u8);
+                        bytes.put_u64(resp.req.hdr.reqid);
+                        bytes.put_u64(resp.req.hdr.request_lsn.0);
+                        bytes.put_u64(resp.req.hdr.not_modified_since.0);
+                        bytes.put_u32(resp.req.dbnode);
+                        bytes.put_i64(resp.db_size);
+                    }
+
+                    Self::GetSlruSegment(resp) => {
+                        bytes.put_u8(Tag::GetSlruSegment as u8);
+                        bytes.put_u64(resp.req.hdr.reqid);
+                        bytes.put_u64(resp.req.hdr.request_lsn.0);
+                        bytes.put_u64(resp.req.hdr.not_modified_since.0);
+                        bytes.put_u8(resp.req.kind);
+                        bytes.put_u32(resp.req.segno);
+                        bytes.put_u32((resp.segment.len() / BLCKSZ as usize) as u32);
+                        bytes.put(&resp.segment[..]);
+                    }
+                }
             }
         }
-
         bytes.into()
     }
 
@@ -1710,38 +1828,131 @@ impl PagestreamBeMessage {
         let ok =
             match Tag::try_from(msg_tag).map_err(|tag: u8| anyhow::anyhow!("invalid tag {tag}"))? {
                 Tag::Exists => {
-                    let exists = buf.read_u8()?;
+                    let reqid = buf.read_u64::<BigEndian>()?;
+                    let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
+                    let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
+                    let rel = RelTag {
+                        spcnode: buf.read_u32::<BigEndian>()?,
+                        dbnode: buf.read_u32::<BigEndian>()?,
+                        relnode: buf.read_u32::<BigEndian>()?,
+                        forknum: buf.read_u8()?,
+                    };
+                    let exists = buf.read_u8()? != 0;
                     Self::Exists(PagestreamExistsResponse {
-                        exists: exists != 0,
+                        req: PagestreamExistsRequest {
+                            hdr: PagestreamRequest {
+                                reqid,
+                                request_lsn,
+                                not_modified_since,
+                            },
+                            rel,
+                        },
+                        exists,
                     })
                 }
                 Tag::Nblocks => {
+                    let reqid = buf.read_u64::<BigEndian>()?;
+                    let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
+                    let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
+                    let rel = RelTag {
+                        spcnode: buf.read_u32::<BigEndian>()?,
+                        dbnode: buf.read_u32::<BigEndian>()?,
+                        relnode: buf.read_u32::<BigEndian>()?,
+                        forknum: buf.read_u8()?,
+                    };
                     let n_blocks = buf.read_u32::<BigEndian>()?;
-                    Self::Nblocks(PagestreamNblocksResponse { n_blocks })
+                    Self::Nblocks(PagestreamNblocksResponse {
+                        req: PagestreamNblocksRequest {
+                            hdr: PagestreamRequest {
+                                reqid,
+                                request_lsn,
+                                not_modified_since,
+                            },
+                            rel,
+                        },
+                        n_blocks,
+                    })
                 }
                 Tag::GetPage => {
+                    let reqid = buf.read_u64::<BigEndian>()?;
+                    let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
+                    let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
+                    let rel = RelTag {
+                        spcnode: buf.read_u32::<BigEndian>()?,
+                        dbnode: buf.read_u32::<BigEndian>()?,
+                        relnode: buf.read_u32::<BigEndian>()?,
+                        forknum: buf.read_u8()?,
+                    };
+                    let blkno = buf.read_u32::<BigEndian>()?;
                     let mut page = vec![0; 8192]; // TODO: use MaybeUninit
                     buf.read_exact(&mut page)?;
-                    PagestreamBeMessage::GetPage(PagestreamGetPageResponse { page: page.into() })
+                    Self::GetPage(PagestreamGetPageResponse {
+                        req: PagestreamGetPageRequest {
+                            hdr: PagestreamRequest {
+                                reqid,
+                                request_lsn,
+                                not_modified_since,
+                            },
+                            rel,
+                            blkno,
+                        },
+                        page: page.into(),
+                    })
                 }
                 Tag::Error => {
+                    let reqid = buf.read_u64::<BigEndian>()?;
+                    let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
+                    let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
                     let mut msg = Vec::new();
                     buf.read_until(0, &mut msg)?;
                     let cstring = std::ffi::CString::from_vec_with_nul(msg)?;
                     let rust_str = cstring.to_str()?;
-                    PagestreamBeMessage::Error(PagestreamErrorResponse {
+                    Self::Error(PagestreamErrorResponse {
+                        req: PagestreamRequest {
+                            reqid,
+                            request_lsn,
+                            not_modified_since,
+                        },
                         message: rust_str.to_owned(),
                     })
                 }
                 Tag::DbSize => {
+                    let reqid = buf.read_u64::<BigEndian>()?;
+                    let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
+                    let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
+                    let dbnode = buf.read_u32::<BigEndian>()?;
                     let db_size = buf.read_i64::<BigEndian>()?;
-                    Self::DbSize(PagestreamDbSizeResponse { db_size })
+                    Self::DbSize(PagestreamDbSizeResponse {
+                        req: PagestreamDbSizeRequest {
+                            hdr: PagestreamRequest {
+                                reqid,
+                                request_lsn,
+                                not_modified_since,
+                            },
+                            dbnode,
+                        },
+                        db_size,
+                    })
                 }
                 Tag::GetSlruSegment => {
+                    let reqid = buf.read_u64::<BigEndian>()?;
+                    let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
+                    let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
+                    let kind = buf.read_u8()?;
+                    let segno = buf.read_u32::<BigEndian>()?;
                     let n_blocks = buf.read_u32::<BigEndian>()?;
                     let mut segment = vec![0; n_blocks as usize * BLCKSZ as usize];
                     buf.read_exact(&mut segment)?;
                     Self::GetSlruSegment(PagestreamGetSlruSegmentResponse {
+                        req: PagestreamGetSlruSegmentRequest {
+                            hdr: PagestreamRequest {
+                                reqid,
+                                request_lsn,
+                                not_modified_since,
+                            },
+                            kind,
+                            segno,
+                        },
                         segment: segment.into(),
                     })
                 }
@@ -1780,8 +1991,11 @@ mod tests {
         // Test serialization/deserialization of PagestreamFeMessage
         let messages = vec![
             PagestreamFeMessage::Exists(PagestreamExistsRequest {
-                request_lsn: Lsn(4),
-                not_modified_since: Lsn(3),
+                hdr: PagestreamRequest {
+                    reqid: 0,
+                    request_lsn: Lsn(4),
+                    not_modified_since: Lsn(3),
+                },
                 rel: RelTag {
                     forknum: 1,
                     spcnode: 2,
@@ -1790,8 +2004,11 @@ mod tests {
                 },
             }),
             PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
-                request_lsn: Lsn(4),
-                not_modified_since: Lsn(4),
+                hdr: PagestreamRequest {
+                    reqid: 0,
+                    request_lsn: Lsn(4),
+                    not_modified_since: Lsn(4),
+                },
                 rel: RelTag {
                     forknum: 1,
                     spcnode: 2,
@@ -1800,8 +2017,11 @@ mod tests {
                 },
             }),
             PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
-                request_lsn: Lsn(4),
-                not_modified_since: Lsn(3),
+                hdr: PagestreamRequest {
+                    reqid: 0,
+                    request_lsn: Lsn(4),
+                    not_modified_since: Lsn(3),
+                },
                 rel: RelTag {
                     forknum: 1,
                     spcnode: 2,
@@ -1811,14 +2031,19 @@ mod tests {
                 blkno: 7,
             }),
             PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
-                request_lsn: Lsn(4),
-                not_modified_since: Lsn(3),
+                hdr: PagestreamRequest {
+                    reqid: 0,
+                    request_lsn: Lsn(4),
+                    not_modified_since: Lsn(3),
+                },
                 dbnode: 7,
             }),
         ];
         for msg in messages {
             let bytes = msg.serialize();
-            let reconstructed = PagestreamFeMessage::parse(&mut bytes.reader()).unwrap();
+            let reconstructed =
+                PagestreamFeMessage::parse(&mut bytes.reader(), PagestreamProtocolVersion::V3)
+                    .unwrap();
             assert!(msg == reconstructed);
         }
     }
diff --git a/pageserver/client/src/page_service.rs b/pageserver/client/src/page_service.rs
index f9507fc47a..207ec4166c 100644
--- a/pageserver/client/src/page_service.rs
+++ b/pageserver/client/src/page_service.rs
@@ -60,7 +60,7 @@ impl Client {
     ) -> anyhow::Result<PagestreamClient> {
         let copy_both: tokio_postgres::CopyBothDuplex<bytes::Bytes> = self
             .client
-            .copy_both_simple(&format!("pagestream_v2 {tenant_id} {timeline_id}"))
+            .copy_both_simple(&format!("pagestream_v3 {tenant_id} {timeline_id}"))
             .await?;
         let Client {
             cancel_on_client_drop,
diff --git a/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs b/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
index b2df01714d..9f3984f1bd 100644
--- a/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
+++ b/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
@@ -2,7 +2,7 @@ use anyhow::Context;
 use camino::Utf8PathBuf;
 use pageserver_api::key::Key;
 use pageserver_api::keyspace::KeySpaceAccum;
-use pageserver_api::models::PagestreamGetPageRequest;
+use pageserver_api::models::{PagestreamGetPageRequest, PagestreamRequest};
 
 use pageserver_api::shard::TenantShardId;
 use tokio_util::sync::CancellationToken;
@@ -322,12 +322,15 @@ async fn main_impl(
                         .to_rel_block()
                         .expect("we filter non-rel-block keys out above");
                     PagestreamGetPageRequest {
-                        request_lsn: if rng.gen_bool(args.req_latest_probability) {
-                            Lsn::MAX
-                        } else {
-                            r.timeline_lsn
+                        hdr: PagestreamRequest {
+                            reqid: 0,
+                            request_lsn: if rng.gen_bool(args.req_latest_probability) {
+                                Lsn::MAX
+                            } else {
+                                r.timeline_lsn
+                            },
+                            not_modified_since: r.timeline_lsn,
                         },
-                        not_modified_since: r.timeline_lsn,
                         rel: rel_tag,
                         blkno: block_no,
                     }
diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs
index e825d538a2..a313a64080 100644
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -1854,6 +1854,7 @@ pub(crate) static LIVE_CONNECTIONS: Lazy<IntCounterPairVec> = Lazy::new(|| {
 
 #[derive(Clone, Copy, enum_map::Enum, IntoStaticStr)]
 pub(crate) enum ComputeCommandKind {
+    PageStreamV3,
     PageStreamV2,
     Basebackup,
     Fullbackup,
diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs
index d00ec11a76..0c4a1b18f5 100644
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -17,7 +17,7 @@ use pageserver_api::models::{
     PagestreamErrorResponse, PagestreamExistsRequest, PagestreamExistsResponse,
     PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetSlruSegmentRequest,
     PagestreamGetSlruSegmentResponse, PagestreamNblocksRequest, PagestreamNblocksResponse,
-    PagestreamProtocolVersion,
+    PagestreamProtocolVersion, PagestreamRequest,
 };
 use pageserver_api::shard::TenantShardId;
 use postgres_backend::{
@@ -67,7 +67,7 @@ use crate::tenant::PageReconstructError;
 use crate::tenant::Timeline;
 use crate::{basebackup, timed_after_cancellation};
 use pageserver_api::key::rel_block_to_key;
-use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
+use pageserver_api::reltag::SlruKind;
 use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
 use postgres_ffi::BLCKSZ;
 
@@ -537,6 +537,23 @@ impl From<WaitLsnError> for QueryError {
     }
 }
 
+#[derive(thiserror::Error, Debug)]
+struct BatchedPageStreamError {
+    req: PagestreamRequest,
+    err: PageStreamError,
+}
+
+impl std::fmt::Display for BatchedPageStreamError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        self.err.fmt(f)
+    }
+}
+
+struct BatchedGetPageRequest {
+    req: PagestreamGetPageRequest,
+    timer: SmgrOpTimer,
+}
+
 enum BatchedFeMessage {
     Exists {
         span: Span,
@@ -554,7 +571,7 @@ enum BatchedFeMessage {
         span: Span,
         shard: timeline::handle::Handle<TenantManagerTypes>,
         effective_request_lsn: Lsn,
-        pages: smallvec::SmallVec<[(RelTag, BlockNumber, SmgrOpTimer); 1]>,
+        pages: smallvec::SmallVec<[BatchedGetPageRequest; 1]>,
     },
     DbSize {
         span: Span,
@@ -570,7 +587,7 @@ enum BatchedFeMessage {
     },
     RespondError {
         span: Span,
-        error: PageStreamError,
+        error: BatchedPageStreamError,
     },
 }
 
@@ -595,7 +612,7 @@ impl BatchedFeMessage {
             BatchedFeMessage::GetPage { shard, pages, .. } => (
                 shard,
                 pages.len(),
-                itertools::Either::Right(pages.iter_mut().map(|(_, _, timer)| timer)),
+                itertools::Either::Right(pages.iter_mut().map(|p| &mut p.timer)),
             ),
             BatchedFeMessage::RespondError { .. } => return Ok(()),
         };
@@ -654,6 +671,7 @@ impl PageServerHandler {
         )
     }
 
+    #[allow(clippy::too_many_arguments)]
     async fn pagestream_read_message<IO>(
         pgb: &mut PostgresBackendReader<IO>,
         tenant_id: TenantId,
@@ -661,6 +679,7 @@ impl PageServerHandler {
         timeline_handles: &mut TimelineHandles,
         cancel: &CancellationToken,
         ctx: &RequestContext,
+        protocol_version: PagestreamProtocolVersion,
         parent_span: Span,
     ) -> Result<Option<BatchedFeMessage>, QueryError>
     where
@@ -695,11 +714,12 @@ impl PageServerHandler {
         fail::fail_point!("ps::handle-pagerequest-message");
 
         // parse request
-        let neon_fe_msg = PagestreamFeMessage::parse(&mut copy_data_bytes.reader())?;
+        let neon_fe_msg =
+            PagestreamFeMessage::parse(&mut copy_data_bytes.reader(), protocol_version)?;
 
         let batched_msg = match neon_fe_msg {
             PagestreamFeMessage::Exists(req) => {
-                let span = tracing::info_span!(parent: parent_span, "handle_get_rel_exists_request", rel = %req.rel, req_lsn = %req.request_lsn);
+                let span = tracing::info_span!(parent: parent_span, "handle_get_rel_exists_request", rel = %req.rel, req_lsn = %req.hdr.request_lsn);
                 let shard = timeline_handles
                     .get(tenant_id, timeline_id, ShardSelector::Zero)
                     .instrument(span.clone()) // sets `shard_id` field
@@ -715,7 +735,7 @@ impl PageServerHandler {
                 }
             }
             PagestreamFeMessage::Nblocks(req) => {
-                let span = tracing::info_span!(parent: parent_span, "handle_get_nblocks_request", rel = %req.rel, req_lsn = %req.request_lsn);
+                let span = tracing::info_span!(parent: parent_span, "handle_get_nblocks_request", rel = %req.rel, req_lsn = %req.hdr.request_lsn);
                 let shard = timeline_handles
                     .get(tenant_id, timeline_id, ShardSelector::Zero)
                     .instrument(span.clone()) // sets `shard_id` field
@@ -731,7 +751,7 @@ impl PageServerHandler {
                 }
             }
             PagestreamFeMessage::DbSize(req) => {
-                let span = tracing::info_span!(parent: parent_span, "handle_db_size_request", dbnode = %req.dbnode, req_lsn = %req.request_lsn);
+                let span = tracing::info_span!(parent: parent_span, "handle_db_size_request", dbnode = %req.dbnode, req_lsn = %req.hdr.request_lsn);
                 let shard = timeline_handles
                     .get(tenant_id, timeline_id, ShardSelector::Zero)
                     .instrument(span.clone()) // sets `shard_id` field
@@ -747,7 +767,7 @@ impl PageServerHandler {
                 }
             }
             PagestreamFeMessage::GetSlruSegment(req) => {
-                let span = tracing::info_span!(parent: parent_span, "handle_get_slru_segment_request", kind = %req.kind, segno = %req.segno, req_lsn = %req.request_lsn);
+                let span = tracing::info_span!(parent: parent_span, "handle_get_slru_segment_request", kind = %req.kind, segno = %req.segno, req_lsn = %req.hdr.request_lsn);
                 let shard = timeline_handles
                     .get(tenant_id, timeline_id, ShardSelector::Zero)
                     .instrument(span.clone()) // sets `shard_id` field
@@ -762,25 +782,23 @@ impl PageServerHandler {
                     req,
                 }
             }
-            PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
-                request_lsn,
-                not_modified_since,
-                rel,
-                blkno,
-            }) => {
-                let span = tracing::info_span!(parent: parent_span, "handle_get_page_at_lsn_request_batched", req_lsn = %request_lsn);
+            PagestreamFeMessage::GetPage(req) => {
+                let span = tracing::info_span!(parent: parent_span, "handle_get_page_at_lsn_request_batched", req_lsn = %req.hdr.request_lsn);
 
                 macro_rules! respond_error {
                     ($error:expr) => {{
                         let error = BatchedFeMessage::RespondError {
                             span,
-                            error: $error,
+                            error: BatchedPageStreamError {
+                                req: req.hdr,
+                                err: $error,
+                            },
                         };
                         Ok(Some(error))
                     }};
                 }
 
-                let key = rel_block_to_key(rel, blkno);
+                let key = rel_block_to_key(req.rel, req.blkno);
                 let shard = match timeline_handles
                     .get(tenant_id, timeline_id, ShardSelector::Page(key))
                     .instrument(span.clone()) // sets `shard_id` field
@@ -814,8 +832,8 @@ impl PageServerHandler {
 
                 let effective_request_lsn = match Self::wait_or_get_last_lsn(
                     &shard,
-                    request_lsn,
-                    not_modified_since,
+                    req.hdr.request_lsn,
+                    req.hdr.not_modified_since,
                     &shard.get_latest_gc_cutoff_lsn(),
                     ctx,
                 )
@@ -831,7 +849,7 @@ impl PageServerHandler {
                     span,
                     shard,
                     effective_request_lsn,
-                    pages: smallvec::smallvec![(rel, blkno, timer)],
+                    pages: smallvec::smallvec![BatchedGetPageRequest { req, timer }],
                 }
             }
         };
@@ -910,6 +928,7 @@ impl PageServerHandler {
         pgb_writer: &mut PostgresBackend<IO>,
         batch: BatchedFeMessage,
         cancel: &CancellationToken,
+        protocol_version: PagestreamProtocolVersion,
         ctx: &RequestContext,
     ) -> Result<(), QueryError>
     where
@@ -917,7 +936,7 @@ impl PageServerHandler {
     {
         // invoke handler function
         let (handler_results, span): (
-            Vec<Result<(PagestreamBeMessage, SmgrOpTimer), PageStreamError>>,
+            Vec<Result<(PagestreamBeMessage, SmgrOpTimer), BatchedPageStreamError>>,
             _,
         ) = match batch {
             BatchedFeMessage::Exists {
@@ -932,7 +951,8 @@ impl PageServerHandler {
                         .handle_get_rel_exists_request(&shard, &req, ctx)
                         .instrument(span.clone())
                         .await
-                        .map(|msg| (msg, timer))],
+                        .map(|msg| (msg, timer))
+                        .map_err(|err| BatchedPageStreamError { err, req: req.hdr })],
                     span,
                 )
             }
@@ -948,7 +968,8 @@ impl PageServerHandler {
                         .handle_get_nblocks_request(&shard, &req, ctx)
                         .instrument(span.clone())
                         .await
-                        .map(|msg| (msg, timer))],
+                        .map(|msg| (msg, timer))
+                        .map_err(|err| BatchedPageStreamError { err, req: req.hdr })],
                     span,
                 )
             }
@@ -990,7 +1011,8 @@ impl PageServerHandler {
                         .handle_db_size_request(&shard, &req, ctx)
                         .instrument(span.clone())
                         .await
-                        .map(|msg| (msg, timer))],
+                        .map(|msg| (msg, timer))
+                        .map_err(|err| BatchedPageStreamError { err, req: req.hdr })],
                     span,
                 )
             }
@@ -1006,7 +1028,8 @@ impl PageServerHandler {
                         .handle_get_slru_segment_request(&shard, &req, ctx)
                         .instrument(span.clone())
                         .await
-                        .map(|msg| (msg, timer))],
+                        .map(|msg| (msg, timer))
+                        .map_err(|err| BatchedPageStreamError { err, req: req.hdr })],
                     span,
                 )
             }
@@ -1022,7 +1045,7 @@ impl PageServerHandler {
         // Other handler errors are sent back as an error message and we stay in pagestream protocol.
         for handler_result in handler_results {
             let (response_msg, timer) = match handler_result {
-                Err(e) => match &e {
+                Err(e) => match &e.err {
                     PageStreamError::Shutdown => {
                         // If we fail to fulfil a request during shutdown, which may be _because_ of
                         // shutdown, then do not send the error to the client.  Instead just drop the
@@ -1041,13 +1064,14 @@ impl PageServerHandler {
                         // print the all details to the log with {:#}, but for the client the
                         // error message is enough.  Do not log if shutting down, as the anyhow::Error
                         // here includes cancellation which is not an error.
-                        let full = utils::error::report_compact_sources(&e);
+                        let full = utils::error::report_compact_sources(&e.err);
                         span.in_scope(|| {
                             error!("error reading relation or page version: {full:#}")
                         });
                         (
                             PagestreamBeMessage::Error(PagestreamErrorResponse {
-                                message: e.to_string(),
+                                req: e.req,
+                                message: e.err.to_string(),
                             }),
                             None, // TODO: measure errors
                         )
@@ -1060,7 +1084,9 @@ impl PageServerHandler {
             // marshal & transmit response message
             //
 
-            pgb_writer.write_message_noflush(&BeMessage::CopyData(&response_msg.serialize()))?;
+            pgb_writer.write_message_noflush(&BeMessage::CopyData(
+                &response_msg.serialize(protocol_version),
+            ))?;
 
             // We purposefully don't count flush time into the timer.
             //
@@ -1123,7 +1149,7 @@ impl PageServerHandler {
         pgb: &mut PostgresBackend<IO>,
         tenant_id: TenantId,
         timeline_id: TimelineId,
-        _protocol_version: PagestreamProtocolVersion,
+        protocol_version: PagestreamProtocolVersion,
         ctx: RequestContext,
     ) -> Result<(), QueryError>
     where
@@ -1163,6 +1189,7 @@ impl PageServerHandler {
                     timeline_handles,
                     request_span,
                     pipelining_config,
+                    protocol_version,
                     &ctx,
                 )
                 .await
@@ -1175,6 +1202,7 @@ impl PageServerHandler {
                     timeline_id,
                     timeline_handles,
                     request_span,
+                    protocol_version,
                     &ctx,
                 )
                 .await
@@ -1201,6 +1229,7 @@ impl PageServerHandler {
         timeline_id: TimelineId,
         mut timeline_handles: TimelineHandles,
         request_span: Span,
+        protocol_version: PagestreamProtocolVersion,
         ctx: &RequestContext,
     ) -> (
         (PostgresBackendReader<IO>, TimelineHandles),
@@ -1218,6 +1247,7 @@ impl PageServerHandler {
                 &mut timeline_handles,
                 &cancel,
                 ctx,
+                protocol_version,
                 request_span.clone(),
             )
             .await;
@@ -1238,7 +1268,7 @@ impl PageServerHandler {
             }
 
             let err = self
-                .pagesteam_handle_batched_message(pgb_writer, msg, &cancel, ctx)
+                .pagesteam_handle_batched_message(pgb_writer, msg, &cancel, protocol_version, ctx)
                 .await;
             match err {
                 Ok(()) => {}
@@ -1261,6 +1291,7 @@ impl PageServerHandler {
         mut timeline_handles: TimelineHandles,
         request_span: Span,
         pipelining_config: PageServicePipeliningConfigPipelined,
+        protocol_version: PagestreamProtocolVersion,
         ctx: &RequestContext,
     ) -> (
         (PostgresBackendReader<IO>, TimelineHandles),
@@ -1358,6 +1389,7 @@ impl PageServerHandler {
                         &mut timeline_handles,
                         &cancel_batcher,
                         &ctx,
+                        protocol_version,
                         request_span.clone(),
                     )
                     .await;
@@ -1403,8 +1435,14 @@ impl PageServerHandler {
                     batch
                         .throttle_and_record_start_processing(&self.cancel)
                         .await?;
-                    self.pagesteam_handle_batched_message(pgb_writer, batch, &cancel, &ctx)
-                        .await?;
+                    self.pagesteam_handle_batched_message(
+                        pgb_writer,
+                        batch,
+                        &cancel,
+                        protocol_version,
+                        &ctx,
+                    )
+                    .await?;
                 }
             }
         });
@@ -1578,8 +1616,8 @@ impl PageServerHandler {
         let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
         let lsn = Self::wait_or_get_last_lsn(
             timeline,
-            req.request_lsn,
-            req.not_modified_since,
+            req.hdr.request_lsn,
+            req.hdr.not_modified_since,
             &latest_gc_cutoff_lsn,
             ctx,
         )
@@ -1590,6 +1628,7 @@ impl PageServerHandler {
             .await?;
 
         Ok(PagestreamBeMessage::Exists(PagestreamExistsResponse {
+            req: *req,
             exists,
         }))
     }
@@ -1604,8 +1643,8 @@ impl PageServerHandler {
         let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
         let lsn = Self::wait_or_get_last_lsn(
             timeline,
-            req.request_lsn,
-            req.not_modified_since,
+            req.hdr.request_lsn,
+            req.hdr.not_modified_since,
             &latest_gc_cutoff_lsn,
             ctx,
         )
@@ -1616,6 +1655,7 @@ impl PageServerHandler {
             .await?;
 
         Ok(PagestreamBeMessage::Nblocks(PagestreamNblocksResponse {
+            req: *req,
             n_blocks,
         }))
     }
@@ -1630,8 +1670,8 @@ impl PageServerHandler {
         let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
         let lsn = Self::wait_or_get_last_lsn(
             timeline,
-            req.request_lsn,
-            req.not_modified_since,
+            req.hdr.request_lsn,
+            req.hdr.not_modified_since,
             &latest_gc_cutoff_lsn,
             ctx,
         )
@@ -1643,6 +1683,7 @@ impl PageServerHandler {
         let db_size = total_blocks as i64 * BLCKSZ as i64;
 
         Ok(PagestreamBeMessage::DbSize(PagestreamDbSizeResponse {
+            req: *req,
             db_size,
         }))
     }
@@ -1652,9 +1693,9 @@ impl PageServerHandler {
         &mut self,
         timeline: &Timeline,
         effective_lsn: Lsn,
-        requests: smallvec::SmallVec<[(RelTag, BlockNumber, SmgrOpTimer); 1]>,
+        requests: smallvec::SmallVec<[BatchedGetPageRequest; 1]>,
         ctx: &RequestContext,
-    ) -> Vec<Result<(PagestreamBeMessage, SmgrOpTimer), PageStreamError>> {
+    ) -> Vec<Result<(PagestreamBeMessage, SmgrOpTimer), BatchedPageStreamError>> {
         debug_assert_current_span_has_tenant_and_timeline_id();
 
         timeline
@@ -1663,7 +1704,7 @@ impl PageServerHandler {
 
         let results = timeline
             .get_rel_page_at_lsn_batched(
-                requests.iter().map(|(reltag, blkno, _)| (reltag, blkno)),
+                requests.iter().map(|p| (&p.req.rel, &p.req.blkno)),
                 effective_lsn,
                 ctx,
             )
@@ -1675,16 +1716,20 @@ impl PageServerHandler {
             requests
                 .into_iter()
                 .zip(results.into_iter())
-                .map(|((_, _, timer), res)| {
+                .map(|(req, res)| {
                     res.map(|page| {
                         (
                             PagestreamBeMessage::GetPage(models::PagestreamGetPageResponse {
+                                req: req.req,
                                 page,
                             }),
-                            timer,
+                            req.timer,
                         )
                     })
-                    .map_err(PageStreamError::from)
+                    .map_err(|e| BatchedPageStreamError {
+                        err: PageStreamError::from(e),
+                        req: req.req.hdr,
+                    })
                 }),
         )
     }
@@ -1699,8 +1744,8 @@ impl PageServerHandler {
         let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
         let lsn = Self::wait_or_get_last_lsn(
             timeline,
-            req.request_lsn,
-            req.not_modified_since,
+            req.hdr.request_lsn,
+            req.hdr.not_modified_since,
             &latest_gc_cutoff_lsn,
             ctx,
         )
@@ -1711,7 +1756,7 @@ impl PageServerHandler {
         let segment = timeline.get_slru_segment(kind, req.segno, lsn, ctx).await?;
 
         Ok(PagestreamBeMessage::GetSlruSegment(
-            PagestreamGetSlruSegmentResponse { segment },
+            PagestreamGetSlruSegmentResponse { req: *req, segment },
         ))
     }
 
@@ -1906,6 +1951,7 @@ struct FullBackupCmd {
 struct PageStreamCmd {
     tenant_id: TenantId,
     timeline_id: TimelineId,
+    protocol_version: PagestreamProtocolVersion,
 }
 
 /// `lease lsn tenant timeline lsn`
@@ -1926,7 +1972,7 @@ enum PageServiceCmd {
 }
 
 impl PageStreamCmd {
-    fn parse(query: &str) -> anyhow::Result<Self> {
+    fn parse(query: &str, protocol_version: PagestreamProtocolVersion) -> anyhow::Result<Self> {
         let parameters = query.split_whitespace().collect_vec();
         if parameters.len() != 2 {
             bail!(
@@ -1941,6 +1987,7 @@ impl PageStreamCmd {
         Ok(Self {
             tenant_id,
             timeline_id,
+            protocol_version,
         })
     }
 }
@@ -2078,7 +2125,14 @@ impl PageServiceCmd {
             bail!("cannot parse query: {query}")
         };
         match cmd.to_ascii_lowercase().as_str() {
-            "pagestream_v2" => Ok(Self::PageStream(PageStreamCmd::parse(other)?)),
+            "pagestream_v2" => Ok(Self::PageStream(PageStreamCmd::parse(
+                other,
+                PagestreamProtocolVersion::V2,
+            )?)),
+            "pagestream_v3" => Ok(Self::PageStream(PageStreamCmd::parse(
+                other,
+                PagestreamProtocolVersion::V3,
+            )?)),
             "basebackup" => Ok(Self::BaseBackup(BaseBackupCmd::parse(other)?)),
             "fullbackup" => Ok(Self::FullBackup(FullBackupCmd::parse(other)?)),
             "lease" => {
@@ -2160,25 +2214,21 @@ where
             PageServiceCmd::PageStream(PageStreamCmd {
                 tenant_id,
                 timeline_id,
+                protocol_version,
             }) => {
                 tracing::Span::current()
                     .record("tenant_id", field::display(tenant_id))
                     .record("timeline_id", field::display(timeline_id));
 
                 self.check_permission(Some(tenant_id))?;
+                let command_kind = match protocol_version {
+                    PagestreamProtocolVersion::V2 => ComputeCommandKind::PageStreamV2,
+                    PagestreamProtocolVersion::V3 => ComputeCommandKind::PageStreamV3,
+                };
+                COMPUTE_COMMANDS_COUNTERS.for_command(command_kind).inc();
 
-                COMPUTE_COMMANDS_COUNTERS
-                    .for_command(ComputeCommandKind::PageStreamV2)
-                    .inc();
-
-                self.handle_pagerequests(
-                    pgb,
-                    tenant_id,
-                    timeline_id,
-                    PagestreamProtocolVersion::V2,
-                    ctx,
-                )
-                .await?;
+                self.handle_pagerequests(pgb, tenant_id, timeline_id, protocol_version, ctx)
+                    .await?;
             }
             PageServiceCmd::BaseBackup(BaseBackupCmd {
                 tenant_id,
@@ -2357,7 +2407,8 @@ mod tests {
             cmd,
             PageServiceCmd::PageStream(PageStreamCmd {
                 tenant_id,
-                timeline_id
+                timeline_id,
+                protocol_version: PagestreamProtocolVersion::V2,
             })
         );
         let cmd = PageServiceCmd::parse(&format!("basebackup {tenant_id} {timeline_id}")).unwrap();
diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c
index fa2a570ea8..769befb4e5 100644
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -556,6 +556,9 @@ pageserver_connect(shardno_t shard_no, int elevel)
 
 		switch (neon_protocol_version)
 		{
+		case 3:
+			pagestream_query = psprintf("pagestream_v3 %s %s", neon_tenant, neon_timeline);
+			break;
 		case 2:
 			pagestream_query = psprintf("pagestream_v2 %s %s", neon_tenant, neon_timeline);
 			break;
@@ -1135,9 +1138,9 @@ pg_init_libpagestore(void)
 							"Version of compute<->page server protocol",
 							NULL,
 							&neon_protocol_version,
-							2, /* use protocol version 2 */
-							2, /* min */
-							2, /* max */
+							2,	/* use protocol version 2 */
+							2,	/* min */
+							3,	/* max */
 							PGC_SU_BACKEND,
 							0,	/* no flags required */
 							NULL, NULL, NULL);
diff --git a/pgxn/neon/pagestore_client.h b/pgxn/neon/pagestore_client.h
index f905e3b0fa..37bc4f7886 100644
--- a/pgxn/neon/pagestore_client.h
+++ b/pgxn/neon/pagestore_client.h
@@ -44,10 +44,15 @@ typedef enum
 	T_NeonGetSlruSegmentResponse,
 } NeonMessageTag;
 
+typedef uint64 NeonRequestId;
+
 /* base struct for c-style inheritance */
 typedef struct
 {
 	NeonMessageTag tag;
+	NeonRequestId reqid;
+	XLogRecPtr	lsn;
+	XLogRecPtr	not_modified_since;
 } NeonMessage;
 
 #define messageTag(m) (((const NeonMessage *)(m))->tag)
@@ -67,6 +72,7 @@ typedef enum {
 	SLRU_MULTIXACT_OFFSETS
 } SlruKind;
 
+
 /*--
  * supertype of all the Neon*Request structs below.
  *
@@ -87,37 +93,37 @@ typedef enum {
  *
  * These structs describe the V2 of these requests. (The old now-defunct V1
  * protocol contained just one LSN and a boolean 'latest' flag.)
+ *
+ * V3 version of protocol adds request ID to all requests. This request ID is also included in response
+ * as well as other fields from requests, which allows to verify that we receive response for our request.
+ * We copy fields from request to response to make checking more reliable: request ID is formed from process ID
+ * and local counter, so in principle there can be duplicated requests IDs if process PID is reused.
  */
-typedef struct
-{
-	NeonMessageTag tag;
-	XLogRecPtr	lsn;
-	XLogRecPtr	not_modified_since;
-} NeonRequest;
+typedef NeonMessage NeonRequest;
 
 typedef struct
 {
-	NeonRequest req;
+	NeonRequest hdr;
 	NRelFileInfo rinfo;
 	ForkNumber	forknum;
 } NeonExistsRequest;
 
 typedef struct
 {
-	NeonRequest req;
+	NeonRequest hdr;
 	NRelFileInfo rinfo;
 	ForkNumber	forknum;
 } NeonNblocksRequest;
 
 typedef struct
 {
-	NeonRequest req;
+	NeonRequest hdr;
 	Oid			dbNode;
 } NeonDbSizeRequest;
 
 typedef struct
 {
-	NeonRequest req;
+	NeonRequest hdr;
 	NRelFileInfo rinfo;
 	ForkNumber	forknum;
 	BlockNumber blkno;
@@ -125,32 +131,29 @@ typedef struct
 
 typedef struct
 {
-	NeonRequest req;
-	SlruKind kind;
-	int      segno;
+	NeonRequest hdr;
+	SlruKind	kind;
+	int			segno;
 } NeonGetSlruSegmentRequest;
 
 /* supertype of all the Neon*Response structs below */
-typedef struct
-{
-	NeonMessageTag tag;
-} NeonResponse;
+typedef NeonMessage NeonResponse;
 
 typedef struct
 {
-	NeonMessageTag tag;
+	NeonExistsRequest req;
 	bool		exists;
 } NeonExistsResponse;
 
 typedef struct
 {
-	NeonMessageTag tag;
+	NeonNblocksRequest req;
 	uint32		n_blocks;
 } NeonNblocksResponse;
 
 typedef struct
 {
-	NeonMessageTag tag;
+	NeonGetPageRequest req;
 	char		page[FLEXIBLE_ARRAY_MEMBER];
 } NeonGetPageResponse;
 
@@ -158,21 +161,21 @@ typedef struct
 
 typedef struct
 {
-	NeonMessageTag tag;
+	NeonDbSizeRequest req;
 	int64		db_size;
 } NeonDbSizeResponse;
 
 typedef struct
 {
-	NeonMessageTag tag;
+	NeonResponse req;
 	char		message[FLEXIBLE_ARRAY_MEMBER]; /* null-terminated error
 												 * message */
 } NeonErrorResponse;
 
 typedef struct
 {
-	NeonMessageTag tag;
-	int         n_blocks;
+	NeonGetSlruSegmentRequest req;
+	int			n_blocks;
 	char		data[BLCKSZ * SLRU_PAGES_PER_SEGMENT];
 } NeonGetSlruSegmentResponse;
 
diff --git a/pgxn/neon/pagestore_smgr.c b/pgxn/neon/pagestore_smgr.c
index b733807026..7a4c0ef487 100644
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -120,6 +120,9 @@ static bool (*old_redo_read_buffer_filter) (XLogReaderState *record, uint8 block
 
 static BlockNumber neon_nblocks(SMgrRelation reln, ForkNumber forknum);
 
+static uint32 local_request_counter;
+#define GENERATE_REQUEST_ID() (((NeonRequestId)MyProcPid << 32) | ++local_request_counter)
+
 /*
  * Prefetch implementation:
  *
@@ -188,15 +191,11 @@ typedef struct PrefetchRequest
 	uint8		status;		/* see PrefetchStatus for valid values */
 	uint8		flags;		/* see PrefetchRequestFlags */
 	neon_request_lsns request_lsns;
+	NeonRequestId reqid;
 	NeonResponse *response;		/* may be null */
 	uint64		my_ring_index;
 } PrefetchRequest;
 
-StaticAssertDecl(sizeof(PrefetchRequest) == 64,
-				 "We prefer to have a power-of-2 size for this struct. Please"
-				 " try to find an alternative solution before reaching to"
-				 " increase the expected size here");
-
 /* prefetch buffer lookup hash table */
 
 typedef struct PrfHashEntry
@@ -365,6 +364,7 @@ compact_prefetch_buffers(void)
 		target_slot->shard_no = source_slot->shard_no;
 		target_slot->status = source_slot->status;
 		target_slot->response = source_slot->response;
+		target_slot->reqid = source_slot->reqid;
 		target_slot->request_lsns = source_slot->request_lsns;
 		target_slot->my_ring_index = empty_ring_index;
 
@@ -798,7 +798,8 @@ prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns
 	uint64		mySlotNo PG_USED_FOR_ASSERTS_ONLY = slot->my_ring_index;
 
 	NeonGetPageRequest request = {
-		.req.tag = T_NeonGetPageRequest,
+		.hdr.tag = T_NeonGetPageRequest,
+		.hdr.reqid = GENERATE_REQUEST_ID(),
 		/* lsn and not_modified_since are filled in below */
 		.rinfo = BufTagGetNRelFileInfo(slot->buftag),
 		.forknum = slot->buftag.forkNum,
@@ -807,14 +808,16 @@ prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns
 
 	Assert(mySlotNo == MyPState->ring_unused);
 
+	slot->reqid = request.hdr.reqid;
+
 	if (force_request_lsns)
 		slot->request_lsns = *force_request_lsns;
 	else
 		neon_get_request_lsns(BufTagGetNRelFileInfo(slot->buftag),
 							  slot->buftag.forkNum, slot->buftag.blockNum,
 							  &slot->request_lsns, 1, NULL);
-	request.req.lsn = slot->request_lsns.request_lsn;
-	request.req.not_modified_since = slot->request_lsns.not_modified_since;
+	request.hdr.lsn = slot->request_lsns.request_lsn;
+	request.hdr.not_modified_since = slot->request_lsns.not_modified_since;
 
 	Assert(slot->response == NULL);
 	Assert(slot->my_ring_index == MyPState->ring_unused);
@@ -1102,6 +1105,12 @@ Retry:
 	return min_ring_index;
 }
 
+static bool
+equal_requests(NeonRequest* a, NeonRequest* b)
+{
+	return a->reqid == b->reqid && a->lsn == b->lsn && a->not_modified_since == b->not_modified_since;
+}
+
 
 /*
  * Note: this function can get canceled and use a long jump to the next catch
@@ -1184,6 +1193,10 @@ nm_pack_request(NeonRequest *msg)
 	initStringInfo(&s);
 
 	pq_sendbyte(&s, msg->tag);
+	if (neon_protocol_version >= 3)
+	{
+		pq_sendint64(&s, msg->reqid);
+	}
 	pq_sendint64(&s, msg->lsn);
 	pq_sendint64(&s, msg->not_modified_since);
 
@@ -1261,8 +1274,16 @@ NeonResponse *
 nm_unpack_response(StringInfo s)
 {
 	NeonMessageTag tag = pq_getmsgbyte(s);
+	NeonResponse resp_hdr = {0}; /* make valgrind happy */
 	NeonResponse *resp = NULL;
 
+	resp_hdr.tag = tag;
+	if (neon_protocol_version >= 3)
+	{
+		resp_hdr.reqid = pq_getmsgint64(s);
+		resp_hdr.lsn = pq_getmsgint64(s);
+		resp_hdr.not_modified_since = pq_getmsgint64(s);
+	}
 	switch (tag)
 	{
 			/* pagestore -> pagestore_client */
@@ -1270,7 +1291,14 @@ nm_unpack_response(StringInfo s)
 			{
 				NeonExistsResponse *msg_resp = palloc0(sizeof(NeonExistsResponse));
 
-				msg_resp->tag = tag;
+				if (neon_protocol_version >= 3)
+				{
+					NInfoGetSpcOid(msg_resp->req.rinfo) = pq_getmsgint(s, 4);
+					NInfoGetDbOid(msg_resp->req.rinfo) = pq_getmsgint(s, 4);
+					NInfoGetRelNumber(msg_resp->req.rinfo) = pq_getmsgint(s, 4);
+					msg_resp->req.forknum = pq_getmsgbyte(s);
+				}
+				msg_resp->req.hdr = resp_hdr;
 				msg_resp->exists = pq_getmsgbyte(s);
 				pq_getmsgend(s);
 
@@ -1282,7 +1310,14 @@ nm_unpack_response(StringInfo s)
 			{
 				NeonNblocksResponse *msg_resp = palloc0(sizeof(NeonNblocksResponse));
 
-				msg_resp->tag = tag;
+				if (neon_protocol_version >= 3)
+				{
+					NInfoGetSpcOid(msg_resp->req.rinfo) = pq_getmsgint(s, 4);
+					NInfoGetDbOid(msg_resp->req.rinfo) = pq_getmsgint(s, 4);
+					NInfoGetRelNumber(msg_resp->req.rinfo) = pq_getmsgint(s, 4);
+					msg_resp->req.forknum = pq_getmsgbyte(s);
+				}
+				msg_resp->req.hdr = resp_hdr;
 				msg_resp->n_blocks = pq_getmsgint(s, 4);
 				pq_getmsgend(s);
 
@@ -1295,12 +1330,20 @@ nm_unpack_response(StringInfo s)
 				NeonGetPageResponse *msg_resp;
 
 				msg_resp = MemoryContextAllocZero(MyPState->bufctx, PS_GETPAGERESPONSE_SIZE);
-				msg_resp->tag = tag;
+				if (neon_protocol_version >= 3)
+				{
+					NInfoGetSpcOid(msg_resp->req.rinfo) = pq_getmsgint(s, 4);
+					NInfoGetDbOid(msg_resp->req.rinfo) = pq_getmsgint(s, 4);
+					NInfoGetRelNumber(msg_resp->req.rinfo) = pq_getmsgint(s, 4);
+					msg_resp->req.forknum = pq_getmsgbyte(s);
+					msg_resp->req.blkno = pq_getmsgint(s, 4);
+				}
+				msg_resp->req.hdr = resp_hdr;
 				/* XXX:	should be varlena */
 				memcpy(msg_resp->page, pq_getmsgbytes(s, BLCKSZ), BLCKSZ);
 				pq_getmsgend(s);
 
-				Assert(msg_resp->tag == T_NeonGetPageResponse);
+				Assert(msg_resp->req.hdr.tag == T_NeonGetPageResponse);
 
 				resp = (NeonResponse *) msg_resp;
 				break;
@@ -1310,7 +1353,11 @@ nm_unpack_response(StringInfo s)
 			{
 				NeonDbSizeResponse *msg_resp = palloc0(sizeof(NeonDbSizeResponse));
 
-				msg_resp->tag = tag;
+				if (neon_protocol_version >= 3)
+				{
+					msg_resp->req.dbNode = pq_getmsgint(s, 4);
+				}
+				msg_resp->req.hdr = resp_hdr;
 				msg_resp->db_size = pq_getmsgint64(s);
 				pq_getmsgend(s);
 
@@ -1328,7 +1375,7 @@ nm_unpack_response(StringInfo s)
 				msglen = strlen(msgtext);
 
 				msg_resp = palloc0(sizeof(NeonErrorResponse) + msglen + 1);
-				msg_resp->tag = tag;
+				msg_resp->req = resp_hdr;
 				memcpy(msg_resp->message, msgtext, msglen + 1);
 				pq_getmsgend(s);
 
@@ -1339,9 +1386,17 @@ nm_unpack_response(StringInfo s)
 		case T_NeonGetSlruSegmentResponse:
 		    {
 				NeonGetSlruSegmentResponse *msg_resp;
-				int n_blocks = pq_getmsgint(s, 4);
-				msg_resp = palloc(sizeof(NeonGetSlruSegmentResponse));
-				msg_resp->tag = tag;
+				int n_blocks;
+				msg_resp = palloc0(sizeof(NeonGetSlruSegmentResponse));
+
+				if (neon_protocol_version >= 3)
+				{
+					msg_resp->req.kind = pq_getmsgbyte(s);
+					msg_resp->req.segno = pq_getmsgint(s, 4);
+				}
+				msg_resp->req.hdr = resp_hdr;
+
+				n_blocks = pq_getmsgint(s, 4);
 				msg_resp->n_blocks = n_blocks;
 				memcpy(msg_resp->data, pq_getmsgbytes(s, n_blocks * BLCKSZ), n_blocks * BLCKSZ);
 				pq_getmsgend(s);
@@ -1386,8 +1441,8 @@ nm_to_string(NeonMessage *msg)
 				appendStringInfoString(&s, "{\"type\": \"NeonExistsRequest\"");
 				appendStringInfo(&s, ", \"rinfo\": \"%u/%u/%u\"", RelFileInfoFmt(msg_req->rinfo));
 				appendStringInfo(&s, ", \"forknum\": %d", msg_req->forknum);
-				appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn));
-				appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.not_modified_since));
+				appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.lsn));
+				appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.not_modified_since));
 				appendStringInfoChar(&s, '}');
 				break;
 			}
@@ -1399,8 +1454,8 @@ nm_to_string(NeonMessage *msg)
 				appendStringInfoString(&s, "{\"type\": \"NeonNblocksRequest\"");
 				appendStringInfo(&s, ", \"rinfo\": \"%u/%u/%u\"", RelFileInfoFmt(msg_req->rinfo));
 				appendStringInfo(&s, ", \"forknum\": %d", msg_req->forknum);
-				appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn));
-				appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.not_modified_since));
+				appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.lsn));
+				appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.not_modified_since));
 				appendStringInfoChar(&s, '}');
 				break;
 			}
@@ -1413,8 +1468,8 @@ nm_to_string(NeonMessage *msg)
 				appendStringInfo(&s, ", \"rinfo\": \"%u/%u/%u\"", RelFileInfoFmt(msg_req->rinfo));
 				appendStringInfo(&s, ", \"forknum\": %d", msg_req->forknum);
 				appendStringInfo(&s, ", \"blkno\": %u", msg_req->blkno);
-				appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn));
-				appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.not_modified_since));
+				appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.lsn));
+				appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.not_modified_since));
 				appendStringInfoChar(&s, '}');
 				break;
 			}
@@ -1424,8 +1479,8 @@ nm_to_string(NeonMessage *msg)
 
 				appendStringInfoString(&s, "{\"type\": \"NeonDbSizeRequest\"");
 				appendStringInfo(&s, ", \"dbnode\": \"%u\"", msg_req->dbNode);
-				appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn));
-				appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.not_modified_since));
+				appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.lsn));
+				appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.not_modified_since));
 				appendStringInfoChar(&s, '}');
 				break;
 			}
@@ -1436,8 +1491,8 @@ nm_to_string(NeonMessage *msg)
 				appendStringInfoString(&s, "{\"type\": \"NeonGetSlruSegmentRequest\"");
 				appendStringInfo(&s, ", \"kind\": %u", msg_req->kind);
 				appendStringInfo(&s, ", \"segno\": %u", msg_req->segno);
-				appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn));
-				appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.not_modified_since));
+				appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.lsn));
+				appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.not_modified_since));
 				appendStringInfoChar(&s, '}');
 				break;
 			}
@@ -2312,39 +2367,64 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
 						  REL_METADATA_PSEUDO_BLOCKNO, &request_lsns, 1, NULL);
 	{
 		NeonExistsRequest request = {
-			.req.tag = T_NeonExistsRequest,
-			.req.lsn = request_lsns.request_lsn,
-			.req.not_modified_since = request_lsns.not_modified_since,
+			.hdr.tag = T_NeonExistsRequest,
+			.hdr.reqid = GENERATE_REQUEST_ID(),
+			.hdr.lsn = request_lsns.request_lsn,
+			.hdr.not_modified_since = request_lsns.not_modified_since,
 			.rinfo = InfoFromSMgrRel(reln),
 			.forknum = forkNum
 		};
 
 		resp = page_server_request(&request);
+
+		switch (resp->tag)
+		{
+			case T_NeonExistsResponse:
+			{
+				NeonExistsResponse* exists_resp = (NeonExistsResponse *) resp;
+				if (neon_protocol_version >= 3)
+				{
+					if (!equal_requests(resp, &request.hdr) ||
+						!RelFileInfoEquals(exists_resp->req.rinfo, request.rinfo) ||
+						exists_resp->req.forknum != request.forknum)
+					{
+						NEON_PANIC_CONNECTION_STATE(-1, PANIC,
+													"Unexpect response {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u} to exits request {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u}",
+													resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), RelFileInfoFmt(exists_resp->req.rinfo), exists_resp->req.forknum,
+													request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since), RelFileInfoFmt(request.rinfo), request.forknum);
+					}
+				}
+				exists = exists_resp->exists;
+				break;
+			}
+			case T_NeonErrorResponse:
+				if (neon_protocol_version >= 3)
+				{
+					if (!equal_requests(resp, &request.hdr))
+					{
+						elog(WARNING, NEON_TAG "Error message {reqid=%lx,lsn=%X/%08X, since=%X/%08X} doesn't match exists request {reqid=%lx,lsn=%X/%08X, since=%X/%08X}",
+							 resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),
+							 request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since));
+					}
+				}
+				ereport(ERROR,
+						(errcode(ERRCODE_IO_ERROR),
+						 errmsg(NEON_TAG "[reqid %lx] could not read relation existence of rel %u/%u/%u.%u from page server at lsn %X/%08X",
+								resp->reqid,
+								RelFileInfoFmt(InfoFromSMgrRel(reln)),
+								forkNum,
+								LSN_FORMAT_ARGS(request_lsns.effective_request_lsn)),
+						 errdetail("page server returned error: %s",
+								   ((NeonErrorResponse *) resp)->message)));
+				break;
+
+			default:
+				NEON_PANIC_CONNECTION_STATE(-1, PANIC,
+											"Expected Exists (0x%02x) or Error (0x%02x) response to ExistsRequest, but got 0x%02x",
+											T_NeonExistsResponse, T_NeonErrorResponse, resp->tag);
+		}
+		pfree(resp);
 	}
-
-	switch (resp->tag)
-	{
-		case T_NeonExistsResponse:
-			exists = ((NeonExistsResponse *) resp)->exists;
-			break;
-
-		case T_NeonErrorResponse:
-			ereport(ERROR,
-					(errcode(ERRCODE_IO_ERROR),
-					 errmsg(NEON_TAG "could not read relation existence of rel %u/%u/%u.%u from page server at lsn %X/%08X",
-							RelFileInfoFmt(InfoFromSMgrRel(reln)),
-							forkNum,
-							LSN_FORMAT_ARGS(request_lsns.effective_request_lsn)),
-					 errdetail("page server returned error: %s",
-							   ((NeonErrorResponse *) resp)->message)));
-			break;
-
-		default:
-			NEON_PANIC_CONNECTION_STATE(-1, PANIC,
-										"Expected Exists (0x%02x) or Error (0x%02x) response to ExistsRequest, but got 0x%02x",
-										T_NeonExistsResponse, T_NeonErrorResponse, resp->tag);
-	}
-	pfree(resp);
 	return exists;
 }
 
@@ -2952,15 +3032,43 @@ Retry:
 		switch (resp->tag)
 		{
 			case T_NeonGetPageResponse:
-				memcpy(buffer, ((NeonGetPageResponse *) resp)->page, BLCKSZ);
+			{
+				NeonGetPageResponse* getpage_resp = (NeonGetPageResponse *) resp;
+				if (neon_protocol_version >= 3)
+				{
+					if (resp->reqid != slot->reqid ||
+						resp->lsn != slot->request_lsns.request_lsn ||
+						resp->not_modified_since != slot->request_lsns.not_modified_since ||
+						!RelFileInfoEquals(getpage_resp->req.rinfo, rinfo) ||
+						getpage_resp->req.forknum != forkNum ||
+						getpage_resp->req.blkno != base_blockno + i)
+					{
+						NEON_PANIC_CONNECTION_STATE(-1, PANIC,
+													"Unexpect response {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u, block=%u} to get page request {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u, block=%u}",
+													resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), RelFileInfoFmt(getpage_resp->req.rinfo), getpage_resp->req.forknum, getpage_resp->req.blkno,
+													slot->reqid, LSN_FORMAT_ARGS(slot->request_lsns.request_lsn), LSN_FORMAT_ARGS(slot->request_lsns.not_modified_since), RelFileInfoFmt(rinfo), forkNum, base_blockno + i);
+					}
+				}
+				memcpy(buffer, getpage_resp->page, BLCKSZ);
 				lfc_write(rinfo, forkNum, blockno, buffer);
 				break;
-
+			}
 			case T_NeonErrorResponse:
+				if (neon_protocol_version >= 3)
+				{
+					if (resp->reqid != slot->reqid ||
+						resp->lsn != slot->request_lsns.request_lsn ||
+						resp->not_modified_since != slot->request_lsns.not_modified_since)
+					{
+						elog(WARNING, NEON_TAG "Error message {reqid=%lx,lsn=%X/%08X, since=%X/%08X} doesn't match get relsize request {reqid=%lx,lsn=%X/%08X, since=%X/%08X}",
+							 resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),
+							 slot->reqid, LSN_FORMAT_ARGS(slot->request_lsns.request_lsn), LSN_FORMAT_ARGS(slot->request_lsns.not_modified_since));
+					}
+				}
 				ereport(ERROR,
 						(errcode(ERRCODE_IO_ERROR),
-						 errmsg(NEON_TAG "[shard %d] could not read block %u in rel %u/%u/%u.%u from page server at lsn %X/%08X",
-								slot->shard_no, blockno, RelFileInfoFmt(rinfo),
+						 errmsg(NEON_TAG "[shard %d, reqid %lx] could not read block %u in rel %u/%u/%u.%u from page server at lsn %X/%08X",
+								slot->shard_no, resp->reqid, blockno, RelFileInfoFmt(rinfo),
 								forkNum, LSN_FORMAT_ARGS(reqlsns->effective_request_lsn)),
 						 errdetail("page server returned error: %s",
 								   ((NeonErrorResponse *) resp)->message)));
@@ -3443,47 +3551,72 @@ neon_nblocks(SMgrRelation reln, ForkNumber forknum)
 
 	{
 		NeonNblocksRequest request = {
-			.req.tag = T_NeonNblocksRequest,
-			.req.lsn = request_lsns.request_lsn,
-			.req.not_modified_since = request_lsns.not_modified_since,
+			.hdr.tag = T_NeonNblocksRequest,
+			.hdr.reqid = GENERATE_REQUEST_ID(),
+			.hdr.lsn = request_lsns.request_lsn,
+			.hdr.not_modified_since = request_lsns.not_modified_since,
 			.rinfo = InfoFromSMgrRel(reln),
 			.forknum = forknum,
 		};
 
 		resp = page_server_request(&request);
+
+		switch (resp->tag)
+		{
+			case T_NeonNblocksResponse:
+			{
+				NeonNblocksResponse * relsize_resp = (NeonNblocksResponse *) resp;
+				if (neon_protocol_version >= 3)
+				{
+					if (!equal_requests(resp, &request.hdr) ||
+						!RelFileInfoEquals(relsize_resp->req.rinfo, request.rinfo) ||
+						relsize_resp->req.forknum != forknum)
+					{
+						NEON_PANIC_CONNECTION_STATE(-1, PANIC,
+													"Unexpect response {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u} to get relsize request {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u}",
+													resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), RelFileInfoFmt(relsize_resp->req.rinfo), relsize_resp->req.forknum,
+													request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since), RelFileInfoFmt(request.rinfo), forknum);
+					}
+				}
+				n_blocks = relsize_resp->n_blocks;
+				break;
+			}
+			case T_NeonErrorResponse:
+				if (neon_protocol_version >= 3)
+				{
+					if (!equal_requests(resp, &request.hdr))
+					{
+						elog(WARNING, NEON_TAG "Error message {reqid=%lx,lsn=%X/%08X, since=%X/%08X} doesn't match get relsize request {reqid=%lx,lsn=%X/%08X, since=%X/%08X}",
+							 resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),
+							 request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since));
+					}
+				}
+				ereport(ERROR,
+						(errcode(ERRCODE_IO_ERROR),
+						 errmsg(NEON_TAG "[reqid %lx] could not read relation size of rel %u/%u/%u.%u from page server at lsn %X/%08X",
+								resp->reqid,
+								RelFileInfoFmt(InfoFromSMgrRel(reln)),
+								forknum,
+								LSN_FORMAT_ARGS(request_lsns.effective_request_lsn)),
+						 errdetail("page server returned error: %s",
+								   ((NeonErrorResponse *) resp)->message)));
+				break;
+
+			default:
+				NEON_PANIC_CONNECTION_STATE(-1, PANIC,
+											"Expected Nblocks (0x%02x) or Error (0x%02x) response to NblocksRequest, but got 0x%02x",
+											T_NeonNblocksResponse, T_NeonErrorResponse, resp->tag);
+		}
+		update_cached_relsize(InfoFromSMgrRel(reln), forknum, n_blocks);
+
+		neon_log(SmgrTrace, "neon_nblocks: rel %u/%u/%u fork %u (request LSN %X/%08X): %u blocks",
+				 RelFileInfoFmt(InfoFromSMgrRel(reln)),
+				 forknum,
+				 LSN_FORMAT_ARGS(request_lsns.effective_request_lsn),
+				 n_blocks);
+
+		pfree(resp);
 	}
-
-	switch (resp->tag)
-	{
-		case T_NeonNblocksResponse:
-			n_blocks = ((NeonNblocksResponse *) resp)->n_blocks;
-			break;
-
-		case T_NeonErrorResponse:
-			ereport(ERROR,
-					(errcode(ERRCODE_IO_ERROR),
-					 errmsg(NEON_TAG "could not read relation size of rel %u/%u/%u.%u from page server at lsn %X/%08X",
-							RelFileInfoFmt(InfoFromSMgrRel(reln)),
-							forknum,
-							LSN_FORMAT_ARGS(request_lsns.effective_request_lsn)),
-					 errdetail("page server returned error: %s",
-							   ((NeonErrorResponse *) resp)->message)));
-			break;
-
-		default:
-			NEON_PANIC_CONNECTION_STATE(-1, PANIC,
-										"Expected Nblocks (0x%02x) or Error (0x%02x) response to NblocksRequest, but got 0x%02x",
-										T_NeonNblocksResponse, T_NeonErrorResponse, resp->tag);
-	}
-	update_cached_relsize(InfoFromSMgrRel(reln), forknum, n_blocks);
-
-	neon_log(SmgrTrace, "neon_nblocks: rel %u/%u/%u fork %u (request LSN %X/%08X): %u blocks",
-			 RelFileInfoFmt(InfoFromSMgrRel(reln)),
-			 forknum,
-			 LSN_FORMAT_ARGS(request_lsns.effective_request_lsn),
-			 n_blocks);
-
-	pfree(resp);
 	return n_blocks;
 }
 
@@ -3503,40 +3636,64 @@ neon_dbsize(Oid dbNode)
 
 	{
 		NeonDbSizeRequest request = {
-			.req.tag = T_NeonDbSizeRequest,
-			.req.lsn = request_lsns.request_lsn,
-			.req.not_modified_since = request_lsns.not_modified_since,
+			.hdr.tag = T_NeonDbSizeRequest,
+			.hdr.reqid = GENERATE_REQUEST_ID(),
+			.hdr.lsn = request_lsns.request_lsn,
+			.hdr.not_modified_since = request_lsns.not_modified_since,
 			.dbNode = dbNode,
 		};
 
 		resp = page_server_request(&request);
+
+		switch (resp->tag)
+		{
+			case T_NeonDbSizeResponse:
+			{
+				NeonDbSizeResponse* dbsize_resp = (NeonDbSizeResponse *) resp;
+				if (neon_protocol_version >= 3)
+				{
+					if (!equal_requests(resp, &request.hdr) ||
+						dbsize_resp->req.dbNode != dbNode)
+					{
+						NEON_PANIC_CONNECTION_STATE(-1, PANIC,
+													"Unexpect response {reqid=%lx,lsn=%X/%08X, since=%X/%08X, dbNode=%u} to get DB size request {reqid=%lx,lsn=%X/%08X, since=%X/%08X, dbNode=%u}",
+													resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), dbsize_resp->req.dbNode,
+													request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since), dbNode);
+					}
+				}
+				db_size = dbsize_resp->db_size;
+				break;
+			}
+			case T_NeonErrorResponse:
+				if (neon_protocol_version >= 3)
+				{
+					if (!equal_requests(resp, &request.hdr))
+					{
+						elog(WARNING, NEON_TAG "Error message {reqid=%lx,lsn=%X/%08X, since=%X/%08X} doesn't match get DB size request {reqid=%lx,lsn=%X/%08X, since=%X/%08X}",
+							 resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),
+							 request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since));
+					}
+				}
+				ereport(ERROR,
+						(errcode(ERRCODE_IO_ERROR),
+						 errmsg(NEON_TAG "[reqid %lx] could not read db size of db %u from page server at lsn %X/%08X",
+								resp->reqid,
+								dbNode, LSN_FORMAT_ARGS(request_lsns.effective_request_lsn)),
+						 errdetail("page server returned error: %s",
+								   ((NeonErrorResponse *) resp)->message)));
+				break;
+
+			default:
+				NEON_PANIC_CONNECTION_STATE(-1, PANIC,
+											"Expected DbSize (0x%02x) or Error (0x%02x) response to DbSizeRequest, but got 0x%02x",
+											T_NeonDbSizeResponse, T_NeonErrorResponse, resp->tag);
+		}
+
+		neon_log(SmgrTrace, "neon_dbsize: db %u (request LSN %X/%08X): %ld bytes",
+				 dbNode, LSN_FORMAT_ARGS(request_lsns.effective_request_lsn), db_size);
+
+		pfree(resp);
 	}
-
-	switch (resp->tag)
-	{
-		case T_NeonDbSizeResponse:
-			db_size = ((NeonDbSizeResponse *) resp)->db_size;
-			break;
-
-		case T_NeonErrorResponse:
-			ereport(ERROR,
-					(errcode(ERRCODE_IO_ERROR),
-					 errmsg(NEON_TAG "could not read db size of db %u from page server at lsn %X/%08X",
-							dbNode, LSN_FORMAT_ARGS(request_lsns.effective_request_lsn)),
-					 errdetail("page server returned error: %s",
-							   ((NeonErrorResponse *) resp)->message)));
-			break;
-
-		default:
-			NEON_PANIC_CONNECTION_STATE(-1, PANIC,
-										"Expected DbSize (0x%02x) or Error (0x%02x) response to DbSizeRequest, but got 0x%02x",
-										T_NeonDbSizeResponse, T_NeonErrorResponse, resp->tag);
-	}
-
-	neon_log(SmgrTrace, "neon_dbsize: db %u (request LSN %X/%08X): %ld bytes",
-			 dbNode, LSN_FORMAT_ARGS(request_lsns.effective_request_lsn), db_size);
-
-	pfree(resp);
 	return db_size;
 }
 
@@ -3868,16 +4025,17 @@ neon_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buf
 		return -1;
 
 	request = (NeonGetSlruSegmentRequest) {
-		.req.tag = T_NeonGetSlruSegmentRequest,
-		.req.lsn = request_lsn,
-		.req.not_modified_since = not_modified_since,
+		.hdr.tag = T_NeonGetSlruSegmentRequest,
+		.hdr.reqid = GENERATE_REQUEST_ID(),
+		.hdr.lsn = request_lsn,
+		.hdr.not_modified_since = not_modified_since,
 		.kind = kind,
 		.segno = segno
 	};
 
 	do
 	{
-		while (!page_server->send(shard_no, &request.req) || !page_server->flush(shard_no));
+		while (!page_server->send(shard_no, &request.hdr) || !page_server->flush(shard_no));
 
 		consume_prefetch_responses();
 
@@ -3887,14 +4045,38 @@ neon_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buf
 	switch (resp->tag)
 	{
 		case T_NeonGetSlruSegmentResponse:
-			n_blocks = ((NeonGetSlruSegmentResponse *) resp)->n_blocks;
-			memcpy(buffer, ((NeonGetSlruSegmentResponse *) resp)->data, n_blocks*BLCKSZ);
+		{
+			NeonGetSlruSegmentResponse* slru_resp = (NeonGetSlruSegmentResponse *) resp;
+			if (neon_protocol_version >= 3)
+			{
+				if (!equal_requests(resp, &request.hdr) ||
+					slru_resp->req.kind != kind ||
+					slru_resp->req.segno != segno)
+				{
+					NEON_PANIC_CONNECTION_STATE(-1, PANIC,
+												"Unexpect response {reqid=%lx,lsn=%X/%08X, since=%X/%08X, kind=%u, segno=%u} to get SLRU segment request {reqid=%lx,lsn=%X/%08X, since=%X/%08X, kind=%u, segno=%u}",
+												resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), slru_resp->req.kind, slru_resp->req.segno,
+												request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since), kind, segno);
+				}
+			}
+			n_blocks = slru_resp->n_blocks;
+			memcpy(buffer, slru_resp->data, n_blocks*BLCKSZ);
 			break;
-
+		}
 		case T_NeonErrorResponse:
+			if (neon_protocol_version >= 3)
+			{
+				if (!equal_requests(resp, &request.hdr))
+				{
+					elog(WARNING, NEON_TAG "Error message {reqid=%lx,lsn=%X/%08X, since=%X/%08X} doesn't match get SLRU segment request {reqid=%lx,lsn=%X/%08X, since=%X/%08X}",
+						 resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),
+						 request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since));
+				}
+			}
 			ereport(ERROR,
 					(errcode(ERRCODE_IO_ERROR),
-					 errmsg(NEON_TAG "could not read SLRU %d segment %d at lsn %X/%08X",
+					 errmsg(NEON_TAG "[reqid %lx] could not read SLRU %d segment %d at lsn %X/%08X",
+							resp->reqid,
 							kind,
 							segno,
 							LSN_FORMAT_ARGS(request_lsn)),
@@ -4033,8 +4215,9 @@ neon_extend_rel_size(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
 		NeonResponse *response;
 		NeonNblocksResponse *nbresponse;
 		NeonNblocksRequest request = {
-			.req = (NeonRequest) {
+			.hdr = (NeonRequest) {
 				.tag = T_NeonNblocksRequest,
+				.reqid = GENERATE_REQUEST_ID(),
 				.lsn = end_recptr,
 				.not_modified_since = end_recptr,
 			},

From 640ac4fc9efcdadad442f7dfafe15e7f9d816906 Mon Sep 17 00:00:00 2001
From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com>
Date: Thu, 9 Jan 2025 09:43:20 -0500
Subject: [PATCH 36/44] fix(pageserver): report timestamp is in the past if the
 key is missing (#10210)

## Problem

If for some reasons we already garbage-collected the data under an LSN
but the caller uses a past LSN for the find_time_cutoff function, now we
will report a missing key error and GC will never proceed.

Note that missing key error can also happen if the key is really missing
(i.e., during the past offload incidents)

## Summary of changes

Make sure GC proceeds by bumping the LSN. When time_cutoff=None, we will
not increase the time_cutoff (it will be set to latest_gc_cutoff). If we
really need to bump the GC LSN for maintenance purpose, we need a
separate API to do that.

Signed-off-by: Alex Chi Z <chi@neon.tech>
---
 pageserver/src/pgdatadir_mapping.rs | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs
index 14c7e0d2f8..b65fe6cf7c 100644
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -627,7 +627,7 @@ impl Timeline {
             // cannot overflow, high and low are both smaller than u64::MAX / 2
             let mid = (high + low) / 2;
 
-            let cmp = self
+            let cmp = match self
                 .is_latest_commit_timestamp_ge_than(
                     search_timestamp,
                     Lsn(mid * 8),
@@ -635,7 +635,16 @@ impl Timeline {
                     &mut found_larger,
                     ctx,
                 )
-                .await?;
+                .await
+            {
+                Ok(res) => res,
+                Err(PageReconstructError::MissingKey(e)) => {
+                    warn!("Missing key while find_lsn_for_timestamp. Either we might have already garbage-collected that data or the key is really missing. Last error: {:#}", e);
+                    // Return that we didn't find any requests smaller than the LSN, and logging the error.
+                    return Ok(LsnForTimestamp::Past(min_lsn));
+                }
+                Err(e) => return Err(e),
+            };
 
             if cmp {
                 high = mid;
@@ -643,6 +652,7 @@ impl Timeline {
                 low = mid + 1;
             }
         }
+
         // If `found_smaller == true`, `low = t + 1` where `t` is the target LSN,
         // so the LSN of the last commit record before or at `search_timestamp`.
         // Remove one from `low` to get `t`.

From ac6cca17acae975ab261c9e82c8fb1bc8a06bcce Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Thu, 9 Jan 2025 15:33:44 +0000
Subject: [PATCH 37/44] storcon: don't log a heartbeat error during shutdown
 (#10325)

## Problem

Occasionally we see an unexpected error like:
```
ERROR spawn_heartbeat_driver: Failed to update node state 1 after heartbeat round: Shutting down\n')
Hint: use scripts/check_allowed_errors.sh to test any new allowed_error you add
```


https://neon-github-public-dev.s3.amazonaws.com/reports/pr-10324/12690404952/index.html#/testresult/63406a0687bf6eca

## Summary of changes

- Explicitly handle ApiError::ShuttingDown as a no-op when mutating node
status
---
 storage_controller/src/service.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs
index fd4ee7fd10..cf2d1ef97b 100644
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -1047,6 +1047,9 @@ impl Service {
                             // on a snapshot of the nodes.
                             tracing::info!("Node {} was not found after heartbeat round", node_id);
                         }
+                        Err(ApiError::ShuttingDown) => {
+                            // No-op: we're shutting down, no need to try and update any nodes' statuses
+                        }
                         Err(err) => {
                             // Transition to active involves reconciling: if a node responds to a heartbeat then
                             // becomes unavailable again, we may get an error here.

From ad51622568a24b6be53ff165f02d32e004476a73 Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Thu, 9 Jan 2025 15:34:06 +0000
Subject: [PATCH 38/44] remote_storage: enable Azure connection pooling by
 default (#10324)

## Problem

Initially we defaulted this to zero to reduce risk. We have now been
using pooling in staging for some time without issues, so let's make it
the default for anyone using this software without setting the config
explicitly.

Closes: https://github.com/neondatabase/cloud/issues/20971

## Summary of changes

- Set Azure blob storage connection pool size to 8 by default
---
 libs/remote_storage/src/config.rs | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/libs/remote_storage/src/config.rs b/libs/remote_storage/src/config.rs
index dd49d4d5e7..49b1d9dc87 100644
--- a/libs/remote_storage/src/config.rs
+++ b/libs/remote_storage/src/config.rs
@@ -115,13 +115,15 @@ fn default_max_keys_per_list_response() -> Option<i32> {
 }
 
 fn default_azure_conn_pool_size() -> usize {
-    // Conservative default: no connection pooling.  At time of writing this is the Azure
-    // SDK's default as well, due to historic reports of hard-to-reproduce issues
+    // By default, the Azure SDK does no connection pooling, due to historic reports of hard-to-reproduce issues
     // (https://github.com/hyperium/hyper/issues/2312)
     //
     // However, using connection pooling is important to avoid exhausting client ports when
     // doing huge numbers of requests (https://github.com/neondatabase/cloud/issues/20971)
-    0
+    //
+    // We therefore enable a modest pool size by default: this may be configured to zero if
+    // issues like the alleged upstream hyper issue appear.
+    8
 }
 
 impl Debug for S3Config {

From bebc46e71336104a3282e9f2497c2889583caffa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Arpad=20M=C3=BCller?= <arpad-m@users.noreply.github.com>
Date: Thu, 9 Jan 2025 16:55:02 +0100
Subject: [PATCH 39/44] Add scheduling_policy column to safekeepers table
 (#10205)

Add a `scheduling_policy` column to the safekeepers table of the storage
controller.

Part of #9981
---
 control_plane/storcon_cli/src/main.rs         | 13 +++-
 libs/pageserver_api/src/controller_api.rs     | 33 +++++++++
 .../down.sql                                  |  1 +
 .../up.sql                                    |  1 +
 storage_controller/src/http.rs                |  4 +-
 storage_controller/src/persistence.rs         | 69 ++++++++++++++-----
 storage_controller/src/schema.rs              |  1 +
 storage_controller/src/service.rs             |  9 ++-
 .../regress/test_storage_controller.py        |  2 +-
 9 files changed, 106 insertions(+), 27 deletions(-)
 create mode 100644 storage_controller/migrations/2024-12-12-212515_safekeepers_scheduling_policy/down.sql
 create mode 100644 storage_controller/migrations/2024-12-12-212515_safekeepers_scheduling_policy/up.sql

diff --git a/control_plane/storcon_cli/src/main.rs b/control_plane/storcon_cli/src/main.rs
index 6ee1044c18..617b2cd1ba 100644
--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -1035,7 +1035,15 @@ async fn main() -> anyhow::Result<()> {
             resp.sort_by(|a, b| a.id.cmp(&b.id));
 
             let mut table = comfy_table::Table::new();
-            table.set_header(["Id", "Version", "Host", "Port", "Http Port", "AZ Id"]);
+            table.set_header([
+                "Id",
+                "Version",
+                "Host",
+                "Port",
+                "Http Port",
+                "AZ Id",
+                "Scheduling",
+            ]);
             for sk in resp {
                 table.add_row([
                     format!("{}", sk.id),
@@ -1043,7 +1051,8 @@ async fn main() -> anyhow::Result<()> {
                     sk.host,
                     format!("{}", sk.port),
                     format!("{}", sk.http_port),
-                    sk.availability_zone_id.to_string(),
+                    sk.availability_zone_id.clone(),
+                    String::from(sk.scheduling_policy),
                 ]);
             }
             println!("{table}");
diff --git a/libs/pageserver_api/src/controller_api.rs b/libs/pageserver_api/src/controller_api.rs
index faf11e487c..7eb3547183 100644
--- a/libs/pageserver_api/src/controller_api.rs
+++ b/libs/pageserver_api/src/controller_api.rs
@@ -320,6 +320,38 @@ impl From<NodeSchedulingPolicy> for String {
     }
 }
 
+#[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq, Debug)]
+pub enum SkSchedulingPolicy {
+    Active,
+    Disabled,
+    Decomissioned,
+}
+
+impl FromStr for SkSchedulingPolicy {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        Ok(match s {
+            "active" => Self::Active,
+            "disabled" => Self::Disabled,
+            "decomissioned" => Self::Decomissioned,
+            _ => return Err(anyhow::anyhow!("Unknown scheduling state '{s}'")),
+        })
+    }
+}
+
+impl From<SkSchedulingPolicy> for String {
+    fn from(value: SkSchedulingPolicy) -> String {
+        use SkSchedulingPolicy::*;
+        match value {
+            Active => "active",
+            Disabled => "disabled",
+            Decomissioned => "decomissioned",
+        }
+        .to_string()
+    }
+}
+
 /// Controls how tenant shards are mapped to locations on pageservers, e.g. whether
 /// to create secondary locations.
 #[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq)]
@@ -387,6 +419,7 @@ pub struct SafekeeperDescribeResponse {
     pub port: i32,
     pub http_port: i32,
     pub availability_zone_id: String,
+    pub scheduling_policy: SkSchedulingPolicy,
 }
 
 #[cfg(test)]
diff --git a/storage_controller/migrations/2024-12-12-212515_safekeepers_scheduling_policy/down.sql b/storage_controller/migrations/2024-12-12-212515_safekeepers_scheduling_policy/down.sql
new file mode 100644
index 0000000000..e26bff798f
--- /dev/null
+++ b/storage_controller/migrations/2024-12-12-212515_safekeepers_scheduling_policy/down.sql
@@ -0,0 +1 @@
+ALTER TABLE safekeepers DROP scheduling_policy;
diff --git a/storage_controller/migrations/2024-12-12-212515_safekeepers_scheduling_policy/up.sql b/storage_controller/migrations/2024-12-12-212515_safekeepers_scheduling_policy/up.sql
new file mode 100644
index 0000000000..d83cc6cc46
--- /dev/null
+++ b/storage_controller/migrations/2024-12-12-212515_safekeepers_scheduling_policy/up.sql
@@ -0,0 +1 @@
+ALTER TABLE safekeepers ADD scheduling_policy VARCHAR NOT NULL DEFAULT 'disabled';
diff --git a/storage_controller/src/http.rs b/storage_controller/src/http.rs
index 24fd4c341a..5385e4ee0b 100644
--- a/storage_controller/src/http.rs
+++ b/storage_controller/src/http.rs
@@ -3,7 +3,7 @@ use crate::metrics::{
     HttpRequestLatencyLabelGroup, HttpRequestStatusLabelGroup, PageserverRequestLabelGroup,
     METRICS_REGISTRY,
 };
-use crate::persistence::SafekeeperPersistence;
+use crate::persistence::SafekeeperUpsert;
 use crate::reconciler::ReconcileError;
 use crate::service::{LeadershipStatus, Service, RECONCILE_TIMEOUT, STARTUP_RECONCILE_TIMEOUT};
 use anyhow::Context;
@@ -1249,7 +1249,7 @@ async fn handle_get_safekeeper(req: Request<Body>) -> Result<Response<Body>, Api
 async fn handle_upsert_safekeeper(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
     check_permissions(&req, Scope::Infra)?;
 
-    let body = json_request::<SafekeeperPersistence>(&mut req).await?;
+    let body = json_request::<SafekeeperUpsert>(&mut req).await?;
     let id = parse_request_param::<i64>(&req, "id")?;
 
     if id != body.id {
diff --git a/storage_controller/src/persistence.rs b/storage_controller/src/persistence.rs
index c5eb106f24..cebf3e9594 100644
--- a/storage_controller/src/persistence.rs
+++ b/storage_controller/src/persistence.rs
@@ -13,6 +13,7 @@ use pageserver_api::controller_api::AvailabilityZone;
 use pageserver_api::controller_api::MetadataHealthRecord;
 use pageserver_api::controller_api::SafekeeperDescribeResponse;
 use pageserver_api::controller_api::ShardSchedulingPolicy;
+use pageserver_api::controller_api::SkSchedulingPolicy;
 use pageserver_api::controller_api::{NodeSchedulingPolicy, PlacementPolicy};
 use pageserver_api::models::TenantConfig;
 use pageserver_api::shard::ShardConfigError;
@@ -1075,12 +1076,14 @@ impl Persistence {
 
     pub(crate) async fn safekeeper_upsert(
         &self,
-        record: SafekeeperPersistence,
+        record: SafekeeperUpsert,
     ) -> Result<(), DatabaseError> {
         use crate::schema::safekeepers::dsl::*;
 
         self.with_conn(move |conn| -> DatabaseResult<()> {
-            let bind = record.as_insert_or_update();
+            let bind = record
+                .as_insert_or_update()
+                .map_err(|e| DatabaseError::Logical(format!("{e}")))?;
 
             let inserted_updated = diesel::insert_into(safekeepers)
                 .values(&bind)
@@ -1243,6 +1246,7 @@ pub(crate) struct ControllerPersistence {
     pub(crate) started_at: chrono::DateTime<chrono::Utc>,
 }
 
+// What we store in the database
 #[derive(Serialize, Deserialize, Queryable, Selectable, Eq, PartialEq, Debug, Clone)]
 #[diesel(table_name = crate::schema::safekeepers)]
 pub(crate) struct SafekeeperPersistence {
@@ -1257,11 +1261,51 @@ pub(crate) struct SafekeeperPersistence {
     pub(crate) active: bool,
     pub(crate) http_port: i32,
     pub(crate) availability_zone_id: String,
+    pub(crate) scheduling_policy: String,
 }
 
 impl SafekeeperPersistence {
-    fn as_insert_or_update(&self) -> InsertUpdateSafekeeper<'_> {
-        InsertUpdateSafekeeper {
+    pub(crate) fn as_describe_response(&self) -> Result<SafekeeperDescribeResponse, DatabaseError> {
+        let scheduling_policy =
+            SkSchedulingPolicy::from_str(&self.scheduling_policy).map_err(|e| {
+                DatabaseError::Logical(format!("can't construct SkSchedulingPolicy: {e:?}"))
+            })?;
+        // omit the `active` flag on purpose: it is deprecated.
+        Ok(SafekeeperDescribeResponse {
+            id: NodeId(self.id as u64),
+            region_id: self.region_id.clone(),
+            version: self.version,
+            host: self.host.clone(),
+            port: self.port,
+            http_port: self.http_port,
+            availability_zone_id: self.availability_zone_id.clone(),
+            scheduling_policy,
+        })
+    }
+}
+
+/// What we expect from the upsert http api
+#[derive(Serialize, Deserialize, Eq, PartialEq, Debug, Clone)]
+pub(crate) struct SafekeeperUpsert {
+    pub(crate) id: i64,
+    pub(crate) region_id: String,
+    /// 1 is special, it means just created (not currently posted to storcon).
+    /// Zero or negative is not really expected.
+    /// Otherwise the number from `release-$(number_of_commits_on_branch)` tag.
+    pub(crate) version: i64,
+    pub(crate) host: String,
+    pub(crate) port: i32,
+    pub(crate) active: bool,
+    pub(crate) http_port: i32,
+    pub(crate) availability_zone_id: String,
+}
+
+impl SafekeeperUpsert {
+    fn as_insert_or_update(&self) -> anyhow::Result<InsertUpdateSafekeeper<'_>> {
+        if self.version < 0 {
+            anyhow::bail!("negative version: {}", self.version);
+        }
+        Ok(InsertUpdateSafekeeper {
             id: self.id,
             region_id: &self.region_id,
             version: self.version,
@@ -1270,19 +1314,9 @@ impl SafekeeperPersistence {
             active: self.active,
             http_port: self.http_port,
             availability_zone_id: &self.availability_zone_id,
-        }
-    }
-    pub(crate) fn as_describe_response(&self) -> SafekeeperDescribeResponse {
-        // omit the `active` flag on purpose: it is deprecated.
-        SafekeeperDescribeResponse {
-            id: NodeId(self.id as u64),
-            region_id: self.region_id.clone(),
-            version: self.version,
-            host: self.host.clone(),
-            port: self.port,
-            http_port: self.http_port,
-            availability_zone_id: self.availability_zone_id.clone(),
-        }
+            // None means a wish to not update this column. We expose abilities to update it via other means.
+            scheduling_policy: None,
+        })
     }
 }
 
@@ -1297,4 +1331,5 @@ struct InsertUpdateSafekeeper<'a> {
     active: bool,
     http_port: i32,
     availability_zone_id: &'a str,
+    scheduling_policy: Option<&'a str>,
 }
diff --git a/storage_controller/src/schema.rs b/storage_controller/src/schema.rs
index 9e005ab932..44c91619ab 100644
--- a/storage_controller/src/schema.rs
+++ b/storage_controller/src/schema.rs
@@ -39,6 +39,7 @@ diesel::table! {
         active -> Bool,
         http_port -> Int4,
         availability_zone_id -> Text,
+        scheduling_policy -> Varchar,
     }
 }
 
diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs
index cf2d1ef97b..265b2798d2 100644
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -7350,13 +7350,12 @@ impl Service {
     pub(crate) async fn safekeepers_list(
         &self,
     ) -> Result<Vec<SafekeeperDescribeResponse>, DatabaseError> {
-        Ok(self
-            .persistence
+        self.persistence
             .list_safekeepers()
             .await?
             .into_iter()
             .map(|v| v.as_describe_response())
-            .collect::<Vec<_>>())
+            .collect::<Result<Vec<_>, _>>()
     }
 
     pub(crate) async fn get_safekeeper(
@@ -7366,12 +7365,12 @@ impl Service {
         self.persistence
             .safekeeper_get(id)
             .await
-            .map(|v| v.as_describe_response())
+            .and_then(|v| v.as_describe_response())
     }
 
     pub(crate) async fn upsert_safekeeper(
         &self,
-        record: crate::persistence::SafekeeperPersistence,
+        record: crate::persistence::SafekeeperUpsert,
     ) -> Result<(), DatabaseError> {
         self.persistence.safekeeper_upsert(record).await
     }
diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py
index 207f55a214..da6d5b8622 100644
--- a/test_runner/regress/test_storage_controller.py
+++ b/test_runner/regress/test_storage_controller.py
@@ -3019,7 +3019,7 @@ def test_safekeeper_deployment_time_update(neon_env_builder: NeonEnvBuilder):
 def eq_safekeeper_records(a: dict[str, Any], b: dict[str, Any]) -> bool:
     compared = [dict(a), dict(b)]
 
-    masked_keys = ["created_at", "updated_at", "active"]
+    masked_keys = ["created_at", "updated_at", "active", "scheduling_policy"]
 
     for d in compared:
         # keep deleting these in case we are comparing the body as it will be uploaded by real scripts

From f37eeb56addb3c7946ac6299cd7e415f4dc47a9d Mon Sep 17 00:00:00 2001
From: Alexey Kondratov <kondratov.aleksey@gmail.com>
Date: Thu, 9 Jan 2025 17:39:53 +0100
Subject: [PATCH 40/44] fix(compute_ctl): Resolve issues with dropping roles
 having dangling permissions (#10299)

## Problem

In Postgres, one cannot drop a role if it has any dependent objects in
the DB. In `compute_ctl`, we automatically reassign all dependent
objects in every DB to the corresponding DB owner. Yet, it seems that it
doesn't help with some implicit permissions. The issue is reproduced by
installing a `postgis` extension because it creates some views and
tables in the public schema.

## Summary of changes

Added a repro test without using a `postgis`: i) create a role via
`compute_ctl` (with `neon_superuser` grant); ii) create a test role, a
table in schema public, and grant permissions via the role in
`neon_superuser`.

To fix the issue, I added a new `compute_ctl` code that removes such
dangling permissions before dropping the role. It's done in the least
invasive way, i.e., only touches the schema public, because i) that's
the problem we had with PostGIS; ii) it creates a smaller chance of
messing anything up and getting a stuck operation again, just for a
different reason.

Properly, any API-based catalog operations should fail gracefully and
provide an actionable error and status code to the control plane,
allowing the latter to unwind the operation and propagate an error
message and hint to the user. In this sense, it's aligned with another
feature request https://github.com/neondatabase/cloud/issues/21611

Resolve neondatabase/cloud#13582
---
 compute_tools/src/spec_apply.rs               |  14 ++-
 .../sql/pre_drop_role_revoke_privileges.sql   |  28 +++++
 test_runner/regress/test_compute_catalog.py   | 115 ++++++++++++++++++
 3 files changed, 156 insertions(+), 1 deletion(-)
 create mode 100644 compute_tools/src/sql/pre_drop_role_revoke_privileges.sql

diff --git a/compute_tools/src/spec_apply.rs b/compute_tools/src/spec_apply.rs
index 695a722d6d..7401de2e60 100644
--- a/compute_tools/src/spec_apply.rs
+++ b/compute_tools/src/spec_apply.rs
@@ -75,7 +75,7 @@ pub struct MutableApplyContext {
     pub dbs: HashMap<String, Database>,
 }
 
-/// Appply the operations that belong to the given spec apply phase.
+/// Apply the operations that belong to the given spec apply phase.
 ///
 /// Commands within a single phase are executed in order of Iterator yield.
 /// Commands of ApplySpecPhase::RunInEachDatabase will execute in the database
@@ -498,7 +498,19 @@ async fn get_operations<'a>(
                                         ),
                                         comment: None,
                                     },
+                                    // Revoke some potentially blocking privileges (Neon-specific currently)
+                                    Operation {
+                                        query: format!(
+                                            include_str!("sql/pre_drop_role_revoke_privileges.sql"),
+                                            role_name = quoted,
+                                        ),
+                                        comment: None,
+                                    },
                                     // This now will only drop privileges of the role
+                                    // TODO: this is obviously not 100% true because of the above case,
+                                    // there could be still some privileges that are not revoked. Maybe this
+                                    // only drops privileges that were granted *by this* role, not *to this* role,
+                                    // but this has to be checked.
                                     Operation {
                                         query: format!("DROP OWNED BY {}", quoted),
                                         comment: None,
diff --git a/compute_tools/src/sql/pre_drop_role_revoke_privileges.sql b/compute_tools/src/sql/pre_drop_role_revoke_privileges.sql
new file mode 100644
index 0000000000..cdaa7071d3
--- /dev/null
+++ b/compute_tools/src/sql/pre_drop_role_revoke_privileges.sql
@@ -0,0 +1,28 @@
+SET SESSION ROLE neon_superuser;
+
+DO $$
+DECLARE
+    schema TEXT;
+    revoke_query TEXT;
+BEGIN
+    FOR schema IN
+        SELECT schema_name
+        FROM information_schema.schemata
+        -- So far, we only had issues with 'public' schema. Probably, because we do some additional grants,
+        -- e.g., make DB owner the owner of 'public' schema automatically (when created via API).
+        -- See https://github.com/neondatabase/cloud/issues/13582 for the context.
+        -- Still, keep the loop because i) it efficiently handles the case when there is no 'public' schema,
+        -- ii) it's easy to add more schemas to the list if needed.
+        WHERE schema_name IN ('public')
+    LOOP
+        revoke_query := format(
+            'REVOKE ALL PRIVILEGES ON ALL TABLES IN SCHEMA %I FROM {role_name} GRANTED BY neon_superuser;',
+            schema
+        );
+
+        EXECUTE revoke_query;
+    END LOOP;
+END;
+$$;
+
+RESET ROLE;
diff --git a/test_runner/regress/test_compute_catalog.py b/test_runner/regress/test_compute_catalog.py
index e411aad97d..f0878b2631 100644
--- a/test_runner/regress/test_compute_catalog.py
+++ b/test_runner/regress/test_compute_catalog.py
@@ -250,3 +250,118 @@ def test_dropdb_with_subscription(neon_simple_env: NeonEnv):
         assert curr_db is not None
         assert len(curr_db) == 1
         assert curr_db[0] == "publisher_db"
+
+
+def test_compute_drop_role(neon_simple_env: NeonEnv):
+    """
+    Test that compute_ctl can drop a role even if it has some depending objects
+    like permissions in one of the databases.
+    Reproduction test for https://github.com/neondatabase/cloud/issues/13582
+    """
+    env = neon_simple_env
+    TEST_DB_NAME = "db_with_permissions"
+
+    endpoint = env.endpoints.create_start("main")
+
+    endpoint.respec_deep(
+        **{
+            "skip_pg_catalog_updates": False,
+            "cluster": {
+                "roles": [
+                    {
+                        # We need to create role via compute_ctl, because in this case it will receive
+                        # additional grants equivalent to our real environment, so we can repro some
+                        # issues.
+                        "name": "neon",
+                        # Some autocomplete-suggested hash, no specific meaning.
+                        "encrypted_password": "SCRAM-SHA-256$4096:hBT22QjqpydQWqEulorfXA==$miBogcoj68JWYdsNB5PW1X6PjSLBEcNuctuhtGkb4PY=:hxk2gxkwxGo6P7GCtfpMlhA9zwHvPMsCz+NQf2HfvWk=",
+                        "options": [],
+                    },
+                ],
+                "databases": [
+                    {
+                        "name": TEST_DB_NAME,
+                        "owner": "neon",
+                    },
+                ],
+            },
+        }
+    )
+    endpoint.reconfigure()
+
+    with endpoint.cursor(dbname=TEST_DB_NAME) as cursor:
+        # Create table and view as `cloud_admin`. This is the case when, for example,
+        # PostGIS extensions creates tables in `public` schema.
+        cursor.execute("create table test_table (id int)")
+        cursor.execute("create view test_view as select * from test_table")
+
+    with endpoint.cursor(dbname=TEST_DB_NAME, user="neon") as cursor:
+        cursor.execute("create role readonly")
+        # We (`compute_ctl`) make 'neon' the owner of schema 'public' in the owned database.
+        # Postgres has all sorts of permissions and grants that we may not handle well,
+        # but this is the shortest repro grant for the issue
+        # https://github.com/neondatabase/cloud/issues/13582
+        cursor.execute("grant select on all tables in schema public to readonly")
+
+    # Check that role was created
+    with endpoint.cursor() as cursor:
+        cursor.execute("SELECT rolname FROM pg_roles WHERE rolname = 'readonly'")
+        role = cursor.fetchone()
+        assert role is not None
+
+    # Confirm that we actually have some permissions for 'readonly' role
+    # that may block our ability to drop the role.
+    with endpoint.cursor(dbname=TEST_DB_NAME) as cursor:
+        cursor.execute(
+            "select grantor from information_schema.role_table_grants where grantee = 'readonly'"
+        )
+        res = cursor.fetchall()
+        assert len(res) == 2, f"Expected 2 table grants, got {len(res)}"
+        for row in res:
+            assert row[0] == "neon_superuser"
+
+    # Drop role via compute_ctl
+    endpoint.respec_deep(
+        **{
+            "skip_pg_catalog_updates": False,
+            "delta_operations": [
+                {
+                    "action": "delete_role",
+                    "name": "readonly",
+                },
+            ],
+        }
+    )
+    endpoint.reconfigure()
+
+    # Check that role is dropped
+    with endpoint.cursor() as cursor:
+        cursor.execute("SELECT rolname FROM pg_roles WHERE rolname = 'readonly'")
+        role = cursor.fetchone()
+        assert role is None
+
+    #
+    # Drop schema 'public' and check that we can still drop the role
+    #
+    with endpoint.cursor(dbname=TEST_DB_NAME) as cursor:
+        cursor.execute("create role readonly2")
+        cursor.execute("grant select on all tables in schema public to readonly2")
+        cursor.execute("drop schema public cascade")
+
+    endpoint.respec_deep(
+        **{
+            "skip_pg_catalog_updates": False,
+            "delta_operations": [
+                {
+                    "action": "delete_role",
+                    "name": "readonly2",
+                },
+            ],
+        }
+    )
+    endpoint.reconfigure()
+
+    with endpoint.cursor() as cursor:
+        cursor.execute("SELECT rolname FROM pg_roles WHERE rolname = 'readonly2'")
+        role = cursor.fetchone()
+        assert role is None

From 99b5a6705f1dfa41a6cc7ae60b74b2f769d0eb85 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Arpad=20M=C3=BCller?= <arpad-m@users.noreply.github.com>
Date: Thu, 9 Jan 2025 19:29:09 +0100
Subject: [PATCH 41/44] Update rust to 1.84.0 (#10328)

We keep the practice of keeping the compiler up to date, pointing to the
latest release. This is done by many other projects in the Rust
ecosystem as well.

[Release notes](https://releases.rs/docs/1.84.0/).

Prior update was in #9926.
---
 build-tools.Dockerfile | 2 +-
 rust-toolchain.toml    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/build-tools.Dockerfile b/build-tools.Dockerfile
index fa84e467ad..79210a2e1b 100644
--- a/build-tools.Dockerfile
+++ b/build-tools.Dockerfile
@@ -258,7 +258,7 @@ WORKDIR /home/nonroot
 
 # Rust
 # Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
-ENV RUSTC_VERSION=1.83.0
+ENV RUSTC_VERSION=1.84.0
 ENV RUSTUP_HOME="/home/nonroot/.rustup"
 ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
 ARG RUSTFILT_VERSION=0.2.1
diff --git a/rust-toolchain.toml b/rust-toolchain.toml
index f0661a32e0..06746d3e1d 100644
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,5 +1,5 @@
 [toolchain]
-channel = "1.83.0"
+channel = "1.84.0"
 profile = "default"
 # The default profile includes rustc, rust-std, cargo, rust-docs, rustfmt and clippy.
 # https://rust-lang.github.io/rustup/concepts/profiles.html

From 49756a0d01530c6df00b6095da15ad05027c149d Mon Sep 17 00:00:00 2001
From: Tristan Partin <tristan@neon.tech>
Date: Thu, 9 Jan 2025 14:08:26 -0600
Subject: [PATCH 42/44] Implement compute_ctl management API in Axum (#10099)

This is a refactor to create better abstractions related to our
management server. It cleans up the code, and prepares everything for
authorized communication to and from the control plane.

Signed-off-by: Tristan Partin <tristan@neon.tech>
---
 Cargo.lock                                    | 105 ++-
 Cargo.toml                                    |   6 +-
 compute_tools/Cargo.toml                      |   5 +-
 compute_tools/src/bin/compute_ctl.rs          |   9 +-
 compute_tools/src/catalog.rs                  |   6 +-
 compute_tools/src/http/api.rs                 | 606 ------------------
 compute_tools/src/http/extract/json.rs        |  48 ++
 compute_tools/src/http/extract/mod.rs         |   7 +
 compute_tools/src/http/extract/path.rs        |  48 ++
 compute_tools/src/http/extract/query.rs       |  48 ++
 compute_tools/src/http/mod.rs                 |  57 +-
 compute_tools/src/http/openapi_spec.yaml      |   2 +-
 .../src/http/routes/check_writability.rs      |  20 +
 compute_tools/src/http/routes/configure.rs    |  91 +++
 .../src/http/routes/database_schema.rs        |  34 +
 .../src/http/routes/dbs_and_roles.rs          |  16 +
 .../src/http/routes/extension_server.rs       |  67 ++
 compute_tools/src/http/routes/extensions.rs   |  45 ++
 compute_tools/src/http/routes/failpoints.rs   |  35 +
 compute_tools/src/http/routes/grants.rs       |  48 ++
 compute_tools/src/http/routes/info.rs         |  11 +
 compute_tools/src/http/routes/insights.rs     |  18 +
 .../src/http/routes/installed_extensions.rs   |  33 +
 compute_tools/src/http/routes/metrics.rs      |  32 +
 compute_tools/src/http/routes/metrics_json.rs |  12 +
 compute_tools/src/http/routes/mod.rs          |  38 ++
 compute_tools/src/http/routes/status.rs       |  14 +
 compute_tools/src/http/routes/terminate.rs    |  58 ++
 compute_tools/src/http/server.rs              | 165 +++++
 compute_tools/src/lib.rs                      |   2 -
 control_plane/src/endpoint.rs                 |   4 +-
 libs/compute_api/src/responses.rs             |  23 +-
 workspace_hack/Cargo.toml                     |   3 +-
 33 files changed, 1065 insertions(+), 651 deletions(-)
 delete mode 100644 compute_tools/src/http/api.rs
 create mode 100644 compute_tools/src/http/extract/json.rs
 create mode 100644 compute_tools/src/http/extract/mod.rs
 create mode 100644 compute_tools/src/http/extract/path.rs
 create mode 100644 compute_tools/src/http/extract/query.rs
 create mode 100644 compute_tools/src/http/routes/check_writability.rs
 create mode 100644 compute_tools/src/http/routes/configure.rs
 create mode 100644 compute_tools/src/http/routes/database_schema.rs
 create mode 100644 compute_tools/src/http/routes/dbs_and_roles.rs
 create mode 100644 compute_tools/src/http/routes/extension_server.rs
 create mode 100644 compute_tools/src/http/routes/extensions.rs
 create mode 100644 compute_tools/src/http/routes/failpoints.rs
 create mode 100644 compute_tools/src/http/routes/grants.rs
 create mode 100644 compute_tools/src/http/routes/info.rs
 create mode 100644 compute_tools/src/http/routes/insights.rs
 create mode 100644 compute_tools/src/http/routes/installed_extensions.rs
 create mode 100644 compute_tools/src/http/routes/metrics.rs
 create mode 100644 compute_tools/src/http/routes/metrics_json.rs
 create mode 100644 compute_tools/src/http/routes/mod.rs
 create mode 100644 compute_tools/src/http/routes/status.rs
 create mode 100644 compute_tools/src/http/routes/terminate.rs
 create mode 100644 compute_tools/src/http/server.rs

diff --git a/Cargo.lock b/Cargo.lock
index 9e0e343996..44143fa0da 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -718,13 +718,13 @@ dependencies = [
 
 [[package]]
 name = "axum"
-version = "0.7.5"
+version = "0.7.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf"
+checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
 dependencies = [
  "async-trait",
  "axum-core",
- "base64 0.21.1",
+ "base64 0.22.1",
  "bytes",
  "futures-util",
  "http 1.1.0",
@@ -746,8 +746,8 @@ dependencies = [
  "sha1",
  "sync_wrapper 1.0.1",
  "tokio",
- "tokio-tungstenite",
- "tower",
+ "tokio-tungstenite 0.24.0",
+ "tower 0.5.2",
  "tower-layer",
  "tower-service",
  "tracing",
@@ -1267,6 +1267,7 @@ dependencies = [
  "aws-config",
  "aws-sdk-kms",
  "aws-sdk-s3",
+ "axum",
  "base64 0.13.1",
  "bytes",
  "camino",
@@ -1277,7 +1278,7 @@ dependencies = [
  "fail",
  "flate2",
  "futures",
- "hyper 0.14.30",
+ "http 1.1.0",
  "metrics",
  "nix 0.27.1",
  "notify",
@@ -1303,6 +1304,8 @@ dependencies = [
  "tokio-postgres",
  "tokio-stream",
  "tokio-util",
+ "tower 0.5.2",
+ "tower-http",
  "tracing",
  "tracing-opentelemetry",
  "tracing-subscriber",
@@ -2720,7 +2723,7 @@ dependencies = [
  "pin-project-lite",
  "socket2",
  "tokio",
- "tower",
+ "tower 0.4.13",
  "tower-service",
  "tracing",
 ]
@@ -3260,9 +3263,9 @@ checksum = "b87248edafb776e59e6ee64a79086f65890d3510f2c656c000bf2a7e8a0aea40"
 
 [[package]]
 name = "matchit"
-version = "0.8.2"
+version = "0.8.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "540f1c43aed89909c0cc0cc604e3bb2f7e7a341a3728a9e6cfe760e733cd11ed"
+checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3"
 
 [[package]]
 name = "md-5"
@@ -4758,7 +4761,7 @@ dependencies = [
  "tokio-postgres",
  "tokio-postgres2",
  "tokio-rustls 0.26.0",
- "tokio-tungstenite",
+ "tokio-tungstenite 0.21.0",
  "tokio-util",
  "tracing",
  "tracing-subscriber",
@@ -5186,7 +5189,7 @@ dependencies = [
  "async-trait",
  "getrandom 0.2.11",
  "http 1.1.0",
- "matchit 0.8.2",
+ "matchit 0.8.4",
  "opentelemetry",
  "reqwest",
  "reqwest-middleware",
@@ -6800,7 +6803,19 @@ dependencies = [
  "futures-util",
  "log",
  "tokio",
- "tungstenite",
+ "tungstenite 0.21.0",
+]
+
+[[package]]
+name = "tokio-tungstenite"
+version = "0.24.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edc5f74e248dc973e0dbb7b74c7e0d6fcc301c694ff50049504004ef4d0cdcd9"
+dependencies = [
+ "futures-util",
+ "log",
+ "tokio",
+ "tungstenite 0.24.0",
 ]
 
 [[package]]
@@ -6881,7 +6896,7 @@ dependencies = [
  "tokio",
  "tokio-rustls 0.26.0",
  "tokio-stream",
- "tower",
+ "tower 0.4.13",
  "tower-layer",
  "tower-service",
  "tracing",
@@ -6922,16 +6937,49 @@ dependencies = [
 ]
 
 [[package]]
-name = "tower-layer"
-version = "0.3.2"
+name = "tower"
+version = "0.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0"
+checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
+dependencies = [
+ "futures-core",
+ "futures-util",
+ "pin-project-lite",
+ "sync_wrapper 1.0.1",
+ "tokio",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "tower-http"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "403fa3b783d4b626a8ad51d766ab03cb6d2dbfc46b1c5d4448395e6628dc9697"
+dependencies = [
+ "bitflags 2.4.1",
+ "bytes",
+ "http 1.1.0",
+ "http-body 1.0.0",
+ "pin-project-lite",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+ "uuid",
+]
+
+[[package]]
+name = "tower-layer"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
 
 [[package]]
 name = "tower-service"
-version = "0.3.2"
+version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52"
+checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
 
 [[package]]
 name = "tracing"
@@ -7086,6 +7134,24 @@ dependencies = [
  "utf-8",
 ]
 
+[[package]]
+name = "tungstenite"
+version = "0.24.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "18e5b8366ee7a95b16d32197d0b2604b43a0be89dc5fac9f8e96ccafbaedda8a"
+dependencies = [
+ "byteorder",
+ "bytes",
+ "data-encoding",
+ "http 1.1.0",
+ "httparse",
+ "log",
+ "rand 0.8.5",
+ "sha1",
+ "thiserror",
+ "utf-8",
+]
+
 [[package]]
 name = "twox-hash"
 version = "1.6.3"
@@ -7867,7 +7933,8 @@ dependencies = [
  "tokio-util",
  "toml_edit",
  "tonic",
- "tower",
+ "tower 0.4.13",
+ "tower 0.5.2",
  "tracing",
  "tracing-core",
  "url",
diff --git a/Cargo.toml b/Cargo.toml
index 197808d5ae..39898e1c8d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -65,7 +65,7 @@ aws-smithy-types = "1.2"
 aws-credential-types = "1.2.0"
 aws-sigv4 = { version = "1.2", features = ["sign-http"] }
 aws-types = "1.3"
-axum = { version = "0.7.5", features = ["ws"] }
+axum = { version = "0.7.9", features = ["ws"] }
 base64 = "0.13.0"
 bincode = "1.3"
 bindgen = "0.70"
@@ -187,7 +187,9 @@ tokio-util = { version = "0.7.10", features = ["io", "rt"] }
 toml = "0.8"
 toml_edit = "0.22"
 tonic = {version = "0.12.3", features = ["tls", "tls-roots"]}
-tower-service = "0.3.2"
+tower = { version = "0.5.2", default-features = false }
+tower-http = { version = "0.6.2", features = ["request-id", "trace"] }
+tower-service = "0.3.3"
 tracing = "0.1"
 tracing-error = "0.2"
 tracing-opentelemetry = "0.27"
diff --git a/compute_tools/Cargo.toml b/compute_tools/Cargo.toml
index 9525b27818..33892813c4 100644
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -15,6 +15,7 @@ aws-config.workspace = true
 aws-sdk-s3.workspace = true
 aws-sdk-kms.workspace = true
 anyhow.workspace = true
+axum = { workspace = true, features = [] }
 camino.workspace = true
 chrono.workspace = true
 cfg-if.workspace = true
@@ -22,7 +23,7 @@ clap.workspace = true
 fail.workspace = true
 flate2.workspace = true
 futures.workspace = true
-hyper0 = { workspace = true, features = ["full"] }
+http.workspace = true
 metrics.workspace = true
 nix.workspace = true
 notify.workspace = true
@@ -37,6 +38,8 @@ serde_with.workspace = true
 serde_json.workspace = true
 signal-hook.workspace = true
 tar.workspace = true
+tower.workspace = true
+tower-http.workspace = true
 reqwest = { workspace = true, features = ["json"] }
 tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
 tokio-postgres.workspace = true
diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs
index 6ede5fdceb..04432ad0f3 100644
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -60,7 +60,7 @@ use compute_tools::compute::{
 };
 use compute_tools::configurator::launch_configurator;
 use compute_tools::extension_server::get_pg_version_string;
-use compute_tools::http::api::launch_http_server;
+use compute_tools::http::launch_http_server;
 use compute_tools::logger::*;
 use compute_tools::monitor::launch_monitor;
 use compute_tools::params::*;
@@ -493,7 +493,10 @@ fn start_postgres(
     let mut pg = None;
     if !prestartup_failed {
         pg = match compute.start_compute() {
-            Ok(pg) => Some(pg),
+            Ok(pg) => {
+                info!(postmaster_pid = %pg.0.id(), "Postgres was started");
+                Some(pg)
+            }
             Err(err) => {
                 error!("could not start the compute node: {:#}", err);
                 compute.set_failed_status(err);
@@ -591,6 +594,8 @@ fn wait_postgres(pg: Option<PostgresHandle>) -> Result<WaitPostgresResult> {
     // propagate to Postgres and it will be shut down as well.
     let mut exit_code = None;
     if let Some((mut pg, logs_handle)) = pg {
+        info!(postmaster_pid = %pg.id(), "Waiting for Postgres to exit");
+
         let ecode = pg
             .wait()
             .expect("failed to start waiting on Postgres process");
diff --git a/compute_tools/src/catalog.rs b/compute_tools/src/catalog.rs
index 72198a9479..4a297cfacf 100644
--- a/compute_tools/src/catalog.rs
+++ b/compute_tools/src/catalog.rs
@@ -36,11 +36,11 @@ pub async fn get_dbs_and_roles(compute: &Arc<ComputeNode>) -> anyhow::Result<Cat
 
 #[derive(Debug, thiserror::Error)]
 pub enum SchemaDumpError {
-    #[error("Database does not exist.")]
+    #[error("database does not exist")]
     DatabaseDoesNotExist,
-    #[error("Failed to execute pg_dump.")]
+    #[error("failed to execute pg_dump")]
     IO(#[from] std::io::Error),
-    #[error("Unexpected error.")]
+    #[error("unexpected I/O error")]
     Unexpected,
 }
 
diff --git a/compute_tools/src/http/api.rs b/compute_tools/src/http/api.rs
deleted file mode 100644
index a4b1a63e6d..0000000000
--- a/compute_tools/src/http/api.rs
+++ /dev/null
@@ -1,606 +0,0 @@
-use std::convert::Infallible;
-use std::net::IpAddr;
-use std::net::Ipv6Addr;
-use std::net::SocketAddr;
-use std::sync::Arc;
-use std::thread;
-
-use crate::catalog::SchemaDumpError;
-use crate::catalog::{get_database_schema, get_dbs_and_roles};
-use crate::compute::forward_termination_signal;
-use crate::compute::{ComputeNode, ComputeState, ParsedSpec};
-use crate::installed_extensions;
-use compute_api::requests::{ConfigurationRequest, ExtensionInstallRequest, SetRoleGrantsRequest};
-use compute_api::responses::{
-    ComputeStatus, ComputeStatusResponse, ExtensionInstallResult, GenericAPIError,
-    SetRoleGrantsResponse,
-};
-
-use anyhow::Result;
-use hyper::header::CONTENT_TYPE;
-use hyper::service::{make_service_fn, service_fn};
-use hyper::{Body, Method, Request, Response, Server, StatusCode};
-use metrics::proto::MetricFamily;
-use metrics::Encoder;
-use metrics::TextEncoder;
-use tokio::task;
-use tokio_util::sync::CancellationToken;
-use tracing::{debug, error, info, warn};
-use tracing_utils::http::OtelName;
-use utils::failpoint_support::failpoints_handler;
-use utils::http::error::ApiError;
-use utils::http::request::must_get_query_param;
-
-fn status_response_from_state(state: &ComputeState) -> ComputeStatusResponse {
-    ComputeStatusResponse {
-        start_time: state.start_time,
-        tenant: state
-            .pspec
-            .as_ref()
-            .map(|pspec| pspec.tenant_id.to_string()),
-        timeline: state
-            .pspec
-            .as_ref()
-            .map(|pspec| pspec.timeline_id.to_string()),
-        status: state.status,
-        last_active: state.last_active,
-        error: state.error.clone(),
-    }
-}
-
-// Service function to handle all available routes.
-async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body> {
-    //
-    // NOTE: The URI path is currently included in traces. That's OK because
-    // it doesn't contain any variable parts or sensitive information. But
-    // please keep that in mind if you change the routing here.
-    //
-    match (req.method(), req.uri().path()) {
-        // Serialized compute state.
-        (&Method::GET, "/status") => {
-            debug!("serving /status GET request");
-            let state = compute.state.lock().unwrap();
-            let status_response = status_response_from_state(&state);
-            Response::new(Body::from(serde_json::to_string(&status_response).unwrap()))
-        }
-
-        // Startup metrics in JSON format. Keep /metrics reserved for a possible
-        // future use for Prometheus metrics format.
-        (&Method::GET, "/metrics.json") => {
-            info!("serving /metrics.json GET request");
-            let metrics = compute.state.lock().unwrap().metrics.clone();
-            Response::new(Body::from(serde_json::to_string(&metrics).unwrap()))
-        }
-
-        // Prometheus metrics
-        (&Method::GET, "/metrics") => {
-            debug!("serving /metrics GET request");
-
-            // When we call TextEncoder::encode() below, it will immediately
-            // return an error if a metric family has no metrics, so we need to
-            // preemptively filter out metric families with no metrics.
-            let metrics = installed_extensions::collect()
-                .into_iter()
-                .filter(|m| !m.get_metric().is_empty())
-                .collect::<Vec<MetricFamily>>();
-
-            let encoder = TextEncoder::new();
-            let mut buffer = vec![];
-
-            if let Err(err) = encoder.encode(&metrics, &mut buffer) {
-                let msg = format!("error handling /metrics request: {err}");
-                error!(msg);
-                return render_json_error(&msg, StatusCode::INTERNAL_SERVER_ERROR);
-            }
-
-            match Response::builder()
-                .status(StatusCode::OK)
-                .header(CONTENT_TYPE, encoder.format_type())
-                .body(Body::from(buffer))
-            {
-                Ok(response) => response,
-                Err(err) => {
-                    let msg = format!("error handling /metrics request: {err}");
-                    error!(msg);
-                    render_json_error(&msg, StatusCode::INTERNAL_SERVER_ERROR)
-                }
-            }
-        }
-        // Collect Postgres current usage insights
-        (&Method::GET, "/insights") => {
-            info!("serving /insights GET request");
-            let status = compute.get_status();
-            if status != ComputeStatus::Running {
-                let msg = format!("compute is not running, current status: {:?}", status);
-                error!(msg);
-                return Response::new(Body::from(msg));
-            }
-
-            let insights = compute.collect_insights().await;
-            Response::new(Body::from(insights))
-        }
-
-        (&Method::POST, "/check_writability") => {
-            info!("serving /check_writability POST request");
-            let status = compute.get_status();
-            if status != ComputeStatus::Running {
-                let msg = format!(
-                    "invalid compute status for check_writability request: {:?}",
-                    status
-                );
-                error!(msg);
-                return Response::new(Body::from(msg));
-            }
-
-            let res = crate::checker::check_writability(compute).await;
-            match res {
-                Ok(_) => Response::new(Body::from("true")),
-                Err(e) => {
-                    error!("check_writability failed: {}", e);
-                    Response::new(Body::from(e.to_string()))
-                }
-            }
-        }
-
-        (&Method::POST, "/extensions") => {
-            info!("serving /extensions POST request");
-            let status = compute.get_status();
-            if status != ComputeStatus::Running {
-                let msg = format!(
-                    "invalid compute status for extensions request: {:?}",
-                    status
-                );
-                error!(msg);
-                return render_json_error(&msg, StatusCode::PRECONDITION_FAILED);
-            }
-
-            let request = hyper::body::to_bytes(req.into_body()).await.unwrap();
-            let request = serde_json::from_slice::<ExtensionInstallRequest>(&request).unwrap();
-            let res = compute
-                .install_extension(&request.extension, &request.database, request.version)
-                .await;
-            match res {
-                Ok(version) => render_json(Body::from(
-                    serde_json::to_string(&ExtensionInstallResult {
-                        extension: request.extension,
-                        version,
-                    })
-                    .unwrap(),
-                )),
-                Err(e) => {
-                    error!("install_extension failed: {}", e);
-                    render_json_error(&e.to_string(), StatusCode::INTERNAL_SERVER_ERROR)
-                }
-            }
-        }
-
-        (&Method::GET, "/info") => {
-            let num_cpus = num_cpus::get_physical();
-            info!("serving /info GET request. num_cpus: {}", num_cpus);
-            Response::new(Body::from(
-                serde_json::json!({
-                    "num_cpus": num_cpus,
-                })
-                .to_string(),
-            ))
-        }
-
-        // Accept spec in JSON format and request compute configuration. If
-        // anything goes wrong after we set the compute status to `ConfigurationPending`
-        // and update compute state with new spec, we basically leave compute
-        // in the potentially wrong state. That said, it's control-plane's
-        // responsibility to watch compute state after reconfiguration request
-        // and to clean restart in case of errors.
-        (&Method::POST, "/configure") => {
-            info!("serving /configure POST request");
-            match handle_configure_request(req, compute).await {
-                Ok(msg) => Response::new(Body::from(msg)),
-                Err((msg, code)) => {
-                    error!("error handling /configure request: {msg}");
-                    render_json_error(&msg, code)
-                }
-            }
-        }
-
-        (&Method::POST, "/terminate") => {
-            info!("serving /terminate POST request");
-            match handle_terminate_request(compute).await {
-                Ok(()) => Response::new(Body::empty()),
-                Err((msg, code)) => {
-                    error!("error handling /terminate request: {msg}");
-                    render_json_error(&msg, code)
-                }
-            }
-        }
-
-        (&Method::GET, "/dbs_and_roles") => {
-            info!("serving /dbs_and_roles GET request",);
-            match get_dbs_and_roles(compute).await {
-                Ok(res) => render_json(Body::from(serde_json::to_string(&res).unwrap())),
-                Err(_) => {
-                    render_json_error("can't get dbs and roles", StatusCode::INTERNAL_SERVER_ERROR)
-                }
-            }
-        }
-
-        (&Method::GET, "/database_schema") => {
-            let database = match must_get_query_param(&req, "database") {
-                Err(e) => return e.into_response(),
-                Ok(database) => database,
-            };
-            info!("serving /database_schema GET request with database: {database}",);
-            match get_database_schema(compute, &database).await {
-                Ok(res) => render_plain(Body::wrap_stream(res)),
-                Err(SchemaDumpError::DatabaseDoesNotExist) => {
-                    render_json_error("database does not exist", StatusCode::NOT_FOUND)
-                }
-                Err(e) => {
-                    error!("can't get schema dump: {}", e);
-                    render_json_error("can't get schema dump", StatusCode::INTERNAL_SERVER_ERROR)
-                }
-            }
-        }
-
-        (&Method::POST, "/grants") => {
-            info!("serving /grants POST request");
-            let status = compute.get_status();
-            if status != ComputeStatus::Running {
-                let msg = format!(
-                    "invalid compute status for set_role_grants request: {:?}",
-                    status
-                );
-                error!(msg);
-                return render_json_error(&msg, StatusCode::PRECONDITION_FAILED);
-            }
-
-            let request = hyper::body::to_bytes(req.into_body()).await.unwrap();
-            let request = serde_json::from_slice::<SetRoleGrantsRequest>(&request).unwrap();
-
-            let res = compute
-                .set_role_grants(
-                    &request.database,
-                    &request.schema,
-                    &request.privileges,
-                    &request.role,
-                )
-                .await;
-            match res {
-                Ok(()) => render_json(Body::from(
-                    serde_json::to_string(&SetRoleGrantsResponse {
-                        database: request.database,
-                        schema: request.schema,
-                        role: request.role,
-                        privileges: request.privileges,
-                    })
-                    .unwrap(),
-                )),
-                Err(e) => render_json_error(
-                    &format!("could not grant role privileges to the schema: {e}"),
-                    // TODO: can we filter on role/schema not found errors
-                    // and return appropriate error code?
-                    StatusCode::INTERNAL_SERVER_ERROR,
-                ),
-            }
-        }
-
-        // get the list of installed extensions
-        // currently only used in python tests
-        // TODO: call it from cplane
-        (&Method::GET, "/installed_extensions") => {
-            info!("serving /installed_extensions GET request");
-            let status = compute.get_status();
-            if status != ComputeStatus::Running {
-                let msg = format!(
-                    "invalid compute status for extensions request: {:?}",
-                    status
-                );
-                error!(msg);
-                return Response::new(Body::from(msg));
-            }
-
-            let conf = compute.get_conn_conf(None);
-            let res =
-                task::spawn_blocking(move || installed_extensions::get_installed_extensions(conf))
-                    .await
-                    .unwrap();
-
-            match res {
-                Ok(res) => render_json(Body::from(serde_json::to_string(&res).unwrap())),
-                Err(e) => render_json_error(
-                    &format!("could not get list of installed extensions: {}", e),
-                    StatusCode::INTERNAL_SERVER_ERROR,
-                ),
-            }
-        }
-
-        (&Method::POST, "/failpoints") if cfg!(feature = "testing") => {
-            match failpoints_handler(req, CancellationToken::new()).await {
-                Ok(r) => r,
-                Err(ApiError::BadRequest(e)) => {
-                    render_json_error(&e.to_string(), StatusCode::BAD_REQUEST)
-                }
-                Err(_) => {
-                    render_json_error("Internal server error", StatusCode::INTERNAL_SERVER_ERROR)
-                }
-            }
-        }
-
-        // download extension files from remote extension storage on demand
-        (&Method::POST, route) if route.starts_with("/extension_server/") => {
-            info!("serving {:?} POST request", route);
-            info!("req.uri {:?}", req.uri());
-
-            // don't even try to download extensions
-            // if no remote storage is configured
-            if compute.ext_remote_storage.is_none() {
-                info!("no extensions remote storage configured");
-                let mut resp = Response::new(Body::from("no remote storage configured"));
-                *resp.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
-                return resp;
-            }
-
-            let mut is_library = false;
-            if let Some(params) = req.uri().query() {
-                info!("serving {:?} POST request with params: {}", route, params);
-                if params == "is_library=true" {
-                    is_library = true;
-                } else {
-                    let mut resp = Response::new(Body::from("Wrong request parameters"));
-                    *resp.status_mut() = StatusCode::BAD_REQUEST;
-                    return resp;
-                }
-            }
-            let filename = route.split('/').last().unwrap().to_string();
-            info!("serving /extension_server POST request, filename: {filename:?} is_library: {is_library}");
-
-            // get ext_name and path from spec
-            // don't lock compute_state for too long
-            let ext = {
-                let compute_state = compute.state.lock().unwrap();
-                let pspec = compute_state.pspec.as_ref().expect("spec must be set");
-                let spec = &pspec.spec;
-
-                // debug only
-                info!("spec: {:?}", spec);
-
-                let remote_extensions = match spec.remote_extensions.as_ref() {
-                    Some(r) => r,
-                    None => {
-                        info!("no remote extensions spec was provided");
-                        let mut resp = Response::new(Body::from("no remote storage configured"));
-                        *resp.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
-                        return resp;
-                    }
-                };
-
-                remote_extensions.get_ext(
-                    &filename,
-                    is_library,
-                    &compute.build_tag,
-                    &compute.pgversion,
-                )
-            };
-
-            match ext {
-                Ok((ext_name, ext_path)) => {
-                    match compute.download_extension(ext_name, ext_path).await {
-                        Ok(_) => Response::new(Body::from("OK")),
-                        Err(e) => {
-                            error!("extension download failed: {}", e);
-                            let mut resp = Response::new(Body::from(e.to_string()));
-                            *resp.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
-                            resp
-                        }
-                    }
-                }
-                Err(e) => {
-                    warn!("extension download failed to find extension: {}", e);
-                    let mut resp = Response::new(Body::from("failed to find file"));
-                    *resp.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
-                    resp
-                }
-            }
-        }
-
-        // Return the `404 Not Found` for any other routes.
-        _ => {
-            let mut not_found = Response::new(Body::from("404 Not Found"));
-            *not_found.status_mut() = StatusCode::NOT_FOUND;
-            not_found
-        }
-    }
-}
-
-async fn handle_configure_request(
-    req: Request<Body>,
-    compute: &Arc<ComputeNode>,
-) -> Result<String, (String, StatusCode)> {
-    if !compute.live_config_allowed {
-        return Err((
-            "live configuration is not allowed for this compute node".to_string(),
-            StatusCode::PRECONDITION_FAILED,
-        ));
-    }
-
-    let body_bytes = hyper::body::to_bytes(req.into_body()).await.unwrap();
-    let spec_raw = String::from_utf8(body_bytes.to_vec()).unwrap();
-    if let Ok(request) = serde_json::from_str::<ConfigurationRequest>(&spec_raw) {
-        let spec = request.spec;
-
-        let parsed_spec = match ParsedSpec::try_from(spec) {
-            Ok(ps) => ps,
-            Err(msg) => return Err((msg, StatusCode::BAD_REQUEST)),
-        };
-
-        // XXX: wrap state update under lock in code blocks. Otherwise,
-        // we will try to `Send` `mut state` into the spawned thread
-        // bellow, which will cause error:
-        // ```
-        // error: future cannot be sent between threads safely
-        // ```
-        {
-            let mut state = compute.state.lock().unwrap();
-            if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
-                let msg = format!(
-                    "invalid compute status for configuration request: {:?}",
-                    state.status.clone()
-                );
-                return Err((msg, StatusCode::PRECONDITION_FAILED));
-            }
-            state.pspec = Some(parsed_spec);
-            state.set_status(ComputeStatus::ConfigurationPending, &compute.state_changed);
-            drop(state);
-            info!("set new spec and notified waiters");
-        }
-
-        // Spawn a blocking thread to wait for compute to become Running.
-        // This is needed to do not block the main pool of workers and
-        // be able to serve other requests while some particular request
-        // is waiting for compute to finish configuration.
-        let c = compute.clone();
-        task::spawn_blocking(move || {
-            let mut state = c.state.lock().unwrap();
-            while state.status != ComputeStatus::Running {
-                state = c.state_changed.wait(state).unwrap();
-                info!(
-                    "waiting for compute to become Running, current status: {:?}",
-                    state.status
-                );
-
-                if state.status == ComputeStatus::Failed {
-                    let err = state.error.as_ref().map_or("unknown error", |x| x);
-                    let msg = format!("compute configuration failed: {:?}", err);
-                    return Err((msg, StatusCode::INTERNAL_SERVER_ERROR));
-                }
-            }
-
-            Ok(())
-        })
-        .await
-        .unwrap()?;
-
-        // Return current compute state if everything went well.
-        let state = compute.state.lock().unwrap().clone();
-        let status_response = status_response_from_state(&state);
-        Ok(serde_json::to_string(&status_response).unwrap())
-    } else {
-        Err(("invalid spec".to_string(), StatusCode::BAD_REQUEST))
-    }
-}
-
-fn render_json_error(e: &str, status: StatusCode) -> Response<Body> {
-    let error = GenericAPIError {
-        error: e.to_string(),
-    };
-    Response::builder()
-        .status(status)
-        .header(CONTENT_TYPE, "application/json")
-        .body(Body::from(serde_json::to_string(&error).unwrap()))
-        .unwrap()
-}
-
-fn render_json(body: Body) -> Response<Body> {
-    Response::builder()
-        .header(CONTENT_TYPE, "application/json")
-        .body(body)
-        .unwrap()
-}
-
-fn render_plain(body: Body) -> Response<Body> {
-    Response::builder()
-        .header(CONTENT_TYPE, "text/plain")
-        .body(body)
-        .unwrap()
-}
-
-async fn handle_terminate_request(compute: &Arc<ComputeNode>) -> Result<(), (String, StatusCode)> {
-    {
-        let mut state = compute.state.lock().unwrap();
-        if state.status == ComputeStatus::Terminated {
-            return Ok(());
-        }
-        if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
-            let msg = format!(
-                "invalid compute status for termination request: {}",
-                state.status
-            );
-            return Err((msg, StatusCode::PRECONDITION_FAILED));
-        }
-        state.set_status(ComputeStatus::TerminationPending, &compute.state_changed);
-        drop(state);
-    }
-
-    forward_termination_signal();
-    info!("sent signal and notified waiters");
-
-    // Spawn a blocking thread to wait for compute to become Terminated.
-    // This is needed to do not block the main pool of workers and
-    // be able to serve other requests while some particular request
-    // is waiting for compute to finish configuration.
-    let c = compute.clone();
-    task::spawn_blocking(move || {
-        let mut state = c.state.lock().unwrap();
-        while state.status != ComputeStatus::Terminated {
-            state = c.state_changed.wait(state).unwrap();
-            info!(
-                "waiting for compute to become {}, current status: {:?}",
-                ComputeStatus::Terminated,
-                state.status
-            );
-        }
-
-        Ok(())
-    })
-    .await
-    .unwrap()?;
-    info!("terminated Postgres");
-    Ok(())
-}
-
-// Main Hyper HTTP server function that runs it and blocks waiting on it forever.
-#[tokio::main]
-async fn serve(port: u16, state: Arc<ComputeNode>) {
-    // this usually binds to both IPv4 and IPv6 on linux
-    // see e.g. https://github.com/rust-lang/rust/pull/34440
-    let addr = SocketAddr::new(IpAddr::from(Ipv6Addr::UNSPECIFIED), port);
-
-    let make_service = make_service_fn(move |_conn| {
-        let state = state.clone();
-        async move {
-            Ok::<_, Infallible>(service_fn(move |req: Request<Body>| {
-                let state = state.clone();
-                async move {
-                    Ok::<_, Infallible>(
-                        // NOTE: We include the URI path in the string. It
-                        // doesn't contain any variable parts or sensitive
-                        // information in this API.
-                        tracing_utils::http::tracing_handler(
-                            req,
-                            |req| routes(req, &state),
-                            OtelName::UriPath,
-                        )
-                        .await,
-                    )
-                }
-            }))
-        }
-    });
-
-    info!("starting HTTP server on {}", addr);
-
-    let server = Server::bind(&addr).serve(make_service);
-
-    // Run this server forever
-    if let Err(e) = server.await {
-        error!("server error: {}", e);
-    }
-}
-
-/// Launch a separate Hyper HTTP API server thread and return its `JoinHandle`.
-pub fn launch_http_server(port: u16, state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
-    let state = Arc::clone(state);
-
-    Ok(thread::Builder::new()
-        .name("http-endpoint".into())
-        .spawn(move || serve(port, state))?)
-}
diff --git a/compute_tools/src/http/extract/json.rs b/compute_tools/src/http/extract/json.rs
new file mode 100644
index 0000000000..41f13625ad
--- /dev/null
+++ b/compute_tools/src/http/extract/json.rs
@@ -0,0 +1,48 @@
+use std::ops::{Deref, DerefMut};
+
+use axum::{
+    async_trait,
+    extract::{rejection::JsonRejection, FromRequest, Request},
+};
+use compute_api::responses::GenericAPIError;
+use http::StatusCode;
+
+/// Custom `Json` extractor, so that we can format errors into
+/// `JsonResponse<GenericAPIError>`.
+#[derive(Debug, Clone, Copy, Default)]
+pub(crate) struct Json<T>(pub T);
+
+#[async_trait]
+impl<S, T> FromRequest<S> for Json<T>
+where
+    axum::Json<T>: FromRequest<S, Rejection = JsonRejection>,
+    S: Send + Sync,
+{
+    type Rejection = (StatusCode, axum::Json<GenericAPIError>);
+
+    async fn from_request(req: Request, state: &S) -> Result<Self, Self::Rejection> {
+        match axum::Json::<T>::from_request(req, state).await {
+            Ok(value) => Ok(Self(value.0)),
+            Err(rejection) => Err((
+                rejection.status(),
+                axum::Json(GenericAPIError {
+                    error: rejection.body_text().to_lowercase(),
+                }),
+            )),
+        }
+    }
+}
+
+impl<T> Deref for Json<T> {
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl<T> DerefMut for Json<T> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.0
+    }
+}
diff --git a/compute_tools/src/http/extract/mod.rs b/compute_tools/src/http/extract/mod.rs
new file mode 100644
index 0000000000..1b690e444d
--- /dev/null
+++ b/compute_tools/src/http/extract/mod.rs
@@ -0,0 +1,7 @@
+pub(crate) mod json;
+pub(crate) mod path;
+pub(crate) mod query;
+
+pub(crate) use json::Json;
+pub(crate) use path::Path;
+pub(crate) use query::Query;
diff --git a/compute_tools/src/http/extract/path.rs b/compute_tools/src/http/extract/path.rs
new file mode 100644
index 0000000000..95edc657f2
--- /dev/null
+++ b/compute_tools/src/http/extract/path.rs
@@ -0,0 +1,48 @@
+use std::ops::{Deref, DerefMut};
+
+use axum::{
+    async_trait,
+    extract::{rejection::PathRejection, FromRequestParts},
+};
+use compute_api::responses::GenericAPIError;
+use http::{request::Parts, StatusCode};
+
+/// Custom `Path` extractor, so that we can format errors into
+/// `JsonResponse<GenericAPIError>`.
+#[derive(Debug, Clone, Copy, Default)]
+pub(crate) struct Path<T>(pub T);
+
+#[async_trait]
+impl<S, T> FromRequestParts<S> for Path<T>
+where
+    axum::extract::Path<T>: FromRequestParts<S, Rejection = PathRejection>,
+    S: Send + Sync,
+{
+    type Rejection = (StatusCode, axum::Json<GenericAPIError>);
+
+    async fn from_request_parts(parts: &mut Parts, state: &S) -> Result<Self, Self::Rejection> {
+        match axum::extract::Path::<T>::from_request_parts(parts, state).await {
+            Ok(value) => Ok(Self(value.0)),
+            Err(rejection) => Err((
+                rejection.status(),
+                axum::Json(GenericAPIError {
+                    error: rejection.body_text().to_ascii_lowercase(),
+                }),
+            )),
+        }
+    }
+}
+
+impl<T> Deref for Path<T> {
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl<T> DerefMut for Path<T> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.0
+    }
+}
diff --git a/compute_tools/src/http/extract/query.rs b/compute_tools/src/http/extract/query.rs
new file mode 100644
index 0000000000..a1f1b0cef0
--- /dev/null
+++ b/compute_tools/src/http/extract/query.rs
@@ -0,0 +1,48 @@
+use std::ops::{Deref, DerefMut};
+
+use axum::{
+    async_trait,
+    extract::{rejection::QueryRejection, FromRequestParts},
+};
+use compute_api::responses::GenericAPIError;
+use http::{request::Parts, StatusCode};
+
+/// Custom `Query` extractor, so that we can format errors into
+/// `JsonResponse<GenericAPIError>`.
+#[derive(Debug, Clone, Copy, Default)]
+pub(crate) struct Query<T>(pub T);
+
+#[async_trait]
+impl<S, T> FromRequestParts<S> for Query<T>
+where
+    axum::extract::Query<T>: FromRequestParts<S, Rejection = QueryRejection>,
+    S: Send + Sync,
+{
+    type Rejection = (StatusCode, axum::Json<GenericAPIError>);
+
+    async fn from_request_parts(parts: &mut Parts, state: &S) -> Result<Self, Self::Rejection> {
+        match axum::extract::Query::<T>::from_request_parts(parts, state).await {
+            Ok(value) => Ok(Self(value.0)),
+            Err(rejection) => Err((
+                rejection.status(),
+                axum::Json(GenericAPIError {
+                    error: rejection.body_text().to_ascii_lowercase(),
+                }),
+            )),
+        }
+    }
+}
+
+impl<T> Deref for Query<T> {
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl<T> DerefMut for Query<T> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.0
+    }
+}
diff --git a/compute_tools/src/http/mod.rs b/compute_tools/src/http/mod.rs
index e5fdf85eed..a596bea504 100644
--- a/compute_tools/src/http/mod.rs
+++ b/compute_tools/src/http/mod.rs
@@ -1 +1,56 @@
-pub mod api;
+use axum::{body::Body, response::Response};
+use compute_api::responses::{ComputeStatus, GenericAPIError};
+use http::{header::CONTENT_TYPE, StatusCode};
+use serde::Serialize;
+use tracing::error;
+
+pub use server::launch_http_server;
+
+mod extract;
+mod routes;
+mod server;
+
+/// Convenience response builder for JSON responses
+struct JsonResponse;
+
+impl JsonResponse {
+    /// Helper for actually creating a response
+    fn create_response(code: StatusCode, body: impl Serialize) -> Response {
+        Response::builder()
+            .status(code)
+            .header(CONTENT_TYPE.as_str(), "application/json")
+            .body(Body::from(serde_json::to_string(&body).unwrap()))
+            .unwrap()
+    }
+
+    /// Create a successful error response
+    pub(self) fn success(code: StatusCode, body: impl Serialize) -> Response {
+        assert!({
+            let code = code.as_u16();
+
+            (200..300).contains(&code)
+        });
+
+        Self::create_response(code, body)
+    }
+
+    /// Create an error response
+    pub(self) fn error(code: StatusCode, error: impl ToString) -> Response {
+        assert!(code.as_u16() >= 400);
+
+        let message = error.to_string();
+        error!(message);
+
+        Self::create_response(code, &GenericAPIError { error: message })
+    }
+
+    /// Create an error response related to the compute being in an invalid state
+    pub(self) fn invalid_status(status: ComputeStatus) -> Response {
+        Self::create_response(
+            StatusCode::PRECONDITION_FAILED,
+            &GenericAPIError {
+                error: format!("invalid compute status: {status}"),
+            },
+        )
+    }
+}
diff --git a/compute_tools/src/http/openapi_spec.yaml b/compute_tools/src/http/openapi_spec.yaml
index 24a67cac71..50319cdd85 100644
--- a/compute_tools/src/http/openapi_spec.yaml
+++ b/compute_tools/src/http/openapi_spec.yaml
@@ -37,7 +37,7 @@ paths:
               schema:
                 $ref: "#/components/schemas/ComputeMetrics"
 
-  /metrics
+  /metrics:
     get:
       tags:
       - Info
diff --git a/compute_tools/src/http/routes/check_writability.rs b/compute_tools/src/http/routes/check_writability.rs
new file mode 100644
index 0000000000..d7feb055e9
--- /dev/null
+++ b/compute_tools/src/http/routes/check_writability.rs
@@ -0,0 +1,20 @@
+use std::sync::Arc;
+
+use axum::{extract::State, response::Response};
+use compute_api::responses::ComputeStatus;
+use http::StatusCode;
+
+use crate::{checker::check_writability, compute::ComputeNode, http::JsonResponse};
+
+/// Check that the compute is currently running.
+pub(in crate::http) async fn is_writable(State(compute): State<Arc<ComputeNode>>) -> Response {
+    let status = compute.get_status();
+    if status != ComputeStatus::Running {
+        return JsonResponse::invalid_status(status);
+    }
+
+    match check_writability(&compute).await {
+        Ok(_) => JsonResponse::success(StatusCode::OK, true),
+        Err(e) => JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e),
+    }
+}
diff --git a/compute_tools/src/http/routes/configure.rs b/compute_tools/src/http/routes/configure.rs
new file mode 100644
index 0000000000..2546cbc344
--- /dev/null
+++ b/compute_tools/src/http/routes/configure.rs
@@ -0,0 +1,91 @@
+use std::sync::Arc;
+
+use axum::{extract::State, response::Response};
+use compute_api::{
+    requests::ConfigurationRequest,
+    responses::{ComputeStatus, ComputeStatusResponse},
+};
+use http::StatusCode;
+use tokio::task;
+use tracing::info;
+
+use crate::{
+    compute::{ComputeNode, ParsedSpec},
+    http::{extract::Json, JsonResponse},
+};
+
+// Accept spec in JSON format and request compute configuration. If anything
+// goes wrong after we set the compute status to `ConfigurationPending` and
+// update compute state with new spec, we basically leave compute in the
+// potentially wrong state. That said, it's control-plane's responsibility to
+// watch compute state after reconfiguration request and to clean restart in
+// case of errors.
+pub(in crate::http) async fn configure(
+    State(compute): State<Arc<ComputeNode>>,
+    request: Json<ConfigurationRequest>,
+) -> Response {
+    if !compute.live_config_allowed {
+        return JsonResponse::error(
+            StatusCode::PRECONDITION_FAILED,
+            "live configuration is not allowed for this compute node".to_string(),
+        );
+    }
+
+    let pspec = match ParsedSpec::try_from(request.spec.clone()) {
+        Ok(p) => p,
+        Err(e) => return JsonResponse::error(StatusCode::BAD_REQUEST, e),
+    };
+
+    // XXX: wrap state update under lock in a code block. Otherwise, we will try
+    // to `Send` `mut state` into the spawned thread bellow, which will cause
+    // the following rustc error:
+    //
+    // error: future cannot be sent between threads safely
+    {
+        let mut state = compute.state.lock().unwrap();
+        if !matches!(state.status, ComputeStatus::Empty | ComputeStatus::Running) {
+            return JsonResponse::invalid_status(state.status);
+        }
+
+        state.pspec = Some(pspec);
+        state.set_status(ComputeStatus::ConfigurationPending, &compute.state_changed);
+        drop(state);
+    }
+
+    // Spawn a blocking thread to wait for compute to become Running. This is
+    // needed to do not block the main pool of workers and be able to serve
+    // other requests while some particular request is waiting for compute to
+    // finish configuration.
+    let c = compute.clone();
+    let completed = task::spawn_blocking(move || {
+        let mut state = c.state.lock().unwrap();
+        while state.status != ComputeStatus::Running {
+            state = c.state_changed.wait(state).unwrap();
+            info!(
+                "waiting for compute to become {}, current status: {}",
+                ComputeStatus::Running,
+                state.status
+            );
+
+            if state.status == ComputeStatus::Failed {
+                let err = state.error.as_ref().map_or("unknown error", |x| x);
+                let msg = format!("compute configuration failed: {:?}", err);
+                return Err(msg);
+            }
+        }
+
+        Ok(())
+    })
+    .await
+    .unwrap();
+
+    if let Err(e) = completed {
+        return JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e);
+    }
+
+    // Return current compute state if everything went well.
+    let state = compute.state.lock().unwrap().clone();
+    let body = ComputeStatusResponse::from(&state);
+
+    JsonResponse::success(StatusCode::OK, body)
+}
diff --git a/compute_tools/src/http/routes/database_schema.rs b/compute_tools/src/http/routes/database_schema.rs
new file mode 100644
index 0000000000..fd716272dc
--- /dev/null
+++ b/compute_tools/src/http/routes/database_schema.rs
@@ -0,0 +1,34 @@
+use std::sync::Arc;
+
+use axum::{body::Body, extract::State, response::Response};
+use http::{header::CONTENT_TYPE, StatusCode};
+use serde::Deserialize;
+
+use crate::{
+    catalog::{get_database_schema, SchemaDumpError},
+    compute::ComputeNode,
+    http::{extract::Query, JsonResponse},
+};
+
+#[derive(Debug, Clone, Deserialize)]
+pub(in crate::http) struct DatabaseSchemaParams {
+    database: String,
+}
+
+/// Get a schema dump of the requested database.
+pub(in crate::http) async fn get_schema_dump(
+    params: Query<DatabaseSchemaParams>,
+    State(compute): State<Arc<ComputeNode>>,
+) -> Response {
+    match get_database_schema(&compute, &params.database).await {
+        Ok(schema) => Response::builder()
+            .status(StatusCode::OK)
+            .header(CONTENT_TYPE.as_str(), "application/json")
+            .body(Body::from_stream(schema))
+            .unwrap(),
+        Err(SchemaDumpError::DatabaseDoesNotExist) => {
+            JsonResponse::error(StatusCode::NOT_FOUND, SchemaDumpError::DatabaseDoesNotExist)
+        }
+        Err(e) => JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e),
+    }
+}
diff --git a/compute_tools/src/http/routes/dbs_and_roles.rs b/compute_tools/src/http/routes/dbs_and_roles.rs
new file mode 100644
index 0000000000..4843c3fab4
--- /dev/null
+++ b/compute_tools/src/http/routes/dbs_and_roles.rs
@@ -0,0 +1,16 @@
+use std::sync::Arc;
+
+use axum::{extract::State, response::Response};
+use http::StatusCode;
+
+use crate::{catalog::get_dbs_and_roles, compute::ComputeNode, http::JsonResponse};
+
+/// Get the databases and roles from the compute.
+pub(in crate::http) async fn get_catalog_objects(
+    State(compute): State<Arc<ComputeNode>>,
+) -> Response {
+    match get_dbs_and_roles(&compute).await {
+        Ok(catalog_objects) => JsonResponse::success(StatusCode::OK, catalog_objects),
+        Err(e) => JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e),
+    }
+}
diff --git a/compute_tools/src/http/routes/extension_server.rs b/compute_tools/src/http/routes/extension_server.rs
new file mode 100644
index 0000000000..ee5bc675ba
--- /dev/null
+++ b/compute_tools/src/http/routes/extension_server.rs
@@ -0,0 +1,67 @@
+use std::sync::Arc;
+
+use axum::{
+    extract::State,
+    response::{IntoResponse, Response},
+};
+use http::StatusCode;
+use serde::Deserialize;
+
+use crate::{
+    compute::ComputeNode,
+    http::{
+        extract::{Path, Query},
+        JsonResponse,
+    },
+};
+
+#[derive(Debug, Clone, Deserialize)]
+pub(in crate::http) struct ExtensionServerParams {
+    is_library: Option<bool>,
+}
+
+/// Download a remote extension.
+pub(in crate::http) async fn download_extension(
+    Path(filename): Path<String>,
+    params: Query<ExtensionServerParams>,
+    State(compute): State<Arc<ComputeNode>>,
+) -> Response {
+    // Don't even try to download extensions if no remote storage is configured
+    if compute.ext_remote_storage.is_none() {
+        return JsonResponse::error(
+            StatusCode::PRECONDITION_FAILED,
+            "remote storage is not configured",
+        );
+    }
+
+    let ext = {
+        let state = compute.state.lock().unwrap();
+        let pspec = state.pspec.as_ref().unwrap();
+        let spec = &pspec.spec;
+
+        let remote_extensions = match spec.remote_extensions.as_ref() {
+            Some(r) => r,
+            None => {
+                return JsonResponse::error(
+                    StatusCode::CONFLICT,
+                    "information about remote extensions is unavailable",
+                );
+            }
+        };
+
+        remote_extensions.get_ext(
+            &filename,
+            params.is_library.unwrap_or(false),
+            &compute.build_tag,
+            &compute.pgversion,
+        )
+    };
+
+    match ext {
+        Ok((ext_name, ext_path)) => match compute.download_extension(ext_name, ext_path).await {
+            Ok(_) => StatusCode::OK.into_response(),
+            Err(e) => JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e),
+        },
+        Err(e) => JsonResponse::error(StatusCode::NOT_FOUND, e),
+    }
+}
diff --git a/compute_tools/src/http/routes/extensions.rs b/compute_tools/src/http/routes/extensions.rs
new file mode 100644
index 0000000000..1fc03b9109
--- /dev/null
+++ b/compute_tools/src/http/routes/extensions.rs
@@ -0,0 +1,45 @@
+use std::sync::Arc;
+
+use axum::{extract::State, response::Response};
+use compute_api::{
+    requests::ExtensionInstallRequest,
+    responses::{ComputeStatus, ExtensionInstallResponse},
+};
+use http::StatusCode;
+
+use crate::{
+    compute::ComputeNode,
+    http::{extract::Json, JsonResponse},
+};
+
+/// Install a extension.
+pub(in crate::http) async fn install_extension(
+    State(compute): State<Arc<ComputeNode>>,
+    request: Json<ExtensionInstallRequest>,
+) -> Response {
+    let status = compute.get_status();
+    if status != ComputeStatus::Running {
+        return JsonResponse::invalid_status(status);
+    }
+
+    match compute
+        .install_extension(
+            &request.extension,
+            &request.database,
+            request.version.to_string(),
+        )
+        .await
+    {
+        Ok(version) => JsonResponse::success(
+            StatusCode::CREATED,
+            Some(ExtensionInstallResponse {
+                extension: request.extension.clone(),
+                version,
+            }),
+        ),
+        Err(e) => JsonResponse::error(
+            StatusCode::INTERNAL_SERVER_ERROR,
+            format!("failed to install extension: {e}"),
+        ),
+    }
+}
diff --git a/compute_tools/src/http/routes/failpoints.rs b/compute_tools/src/http/routes/failpoints.rs
new file mode 100644
index 0000000000..2ec4511676
--- /dev/null
+++ b/compute_tools/src/http/routes/failpoints.rs
@@ -0,0 +1,35 @@
+use axum::response::{IntoResponse, Response};
+use http::StatusCode;
+use tracing::info;
+use utils::failpoint_support::{apply_failpoint, ConfigureFailpointsRequest};
+
+use crate::http::{extract::Json, JsonResponse};
+
+/// Configure failpoints for testing purposes.
+pub(in crate::http) async fn configure_failpoints(
+    failpoints: Json<ConfigureFailpointsRequest>,
+) -> Response {
+    if !fail::has_failpoints() {
+        return JsonResponse::error(
+            StatusCode::PRECONDITION_FAILED,
+            "Cannot manage failpoints because neon was compiled without failpoints support",
+        );
+    }
+
+    for fp in &*failpoints {
+        info!("cfg failpoint: {} {}", fp.name, fp.actions);
+
+        // We recognize one extra "action" that's not natively recognized
+        // by the failpoints crate: exit, to immediately kill the process
+        let cfg_result = apply_failpoint(&fp.name, &fp.actions);
+
+        if let Err(e) = cfg_result {
+            return JsonResponse::error(
+                StatusCode::BAD_REQUEST,
+                format!("failed to configure failpoints: {e}"),
+            );
+        }
+    }
+
+    StatusCode::OK.into_response()
+}
diff --git a/compute_tools/src/http/routes/grants.rs b/compute_tools/src/http/routes/grants.rs
new file mode 100644
index 0000000000..3f67f011e5
--- /dev/null
+++ b/compute_tools/src/http/routes/grants.rs
@@ -0,0 +1,48 @@
+use std::sync::Arc;
+
+use axum::{extract::State, response::Response};
+use compute_api::{
+    requests::SetRoleGrantsRequest,
+    responses::{ComputeStatus, SetRoleGrantsResponse},
+};
+use http::StatusCode;
+
+use crate::{
+    compute::ComputeNode,
+    http::{extract::Json, JsonResponse},
+};
+
+/// Add grants for a role.
+pub(in crate::http) async fn add_grant(
+    State(compute): State<Arc<ComputeNode>>,
+    request: Json<SetRoleGrantsRequest>,
+) -> Response {
+    let status = compute.get_status();
+    if status != ComputeStatus::Running {
+        return JsonResponse::invalid_status(status);
+    }
+
+    match compute
+        .set_role_grants(
+            &request.database,
+            &request.schema,
+            &request.privileges,
+            &request.role,
+        )
+        .await
+    {
+        Ok(()) => JsonResponse::success(
+            StatusCode::CREATED,
+            Some(SetRoleGrantsResponse {
+                database: request.database.clone(),
+                schema: request.schema.clone(),
+                role: request.role.clone(),
+                privileges: request.privileges.clone(),
+            }),
+        ),
+        Err(e) => JsonResponse::error(
+            StatusCode::INTERNAL_SERVER_ERROR,
+            format!("failed to grant role privileges to the schema: {e}"),
+        ),
+    }
+}
diff --git a/compute_tools/src/http/routes/info.rs b/compute_tools/src/http/routes/info.rs
new file mode 100644
index 0000000000..32d6fea74c
--- /dev/null
+++ b/compute_tools/src/http/routes/info.rs
@@ -0,0 +1,11 @@
+use axum::response::Response;
+use compute_api::responses::InfoResponse;
+use http::StatusCode;
+
+use crate::http::JsonResponse;
+
+/// Get information about the physical characteristics about the compute.
+pub(in crate::http) async fn get_info() -> Response {
+    let num_cpus = num_cpus::get_physical();
+    JsonResponse::success(StatusCode::OK, &InfoResponse { num_cpus })
+}
diff --git a/compute_tools/src/http/routes/insights.rs b/compute_tools/src/http/routes/insights.rs
new file mode 100644
index 0000000000..6b03a461c3
--- /dev/null
+++ b/compute_tools/src/http/routes/insights.rs
@@ -0,0 +1,18 @@
+use std::sync::Arc;
+
+use axum::{extract::State, response::Response};
+use compute_api::responses::ComputeStatus;
+use http::StatusCode;
+
+use crate::{compute::ComputeNode, http::JsonResponse};
+
+/// Collect current Postgres usage insights.
+pub(in crate::http) async fn get_insights(State(compute): State<Arc<ComputeNode>>) -> Response {
+    let status = compute.get_status();
+    if status != ComputeStatus::Running {
+        return JsonResponse::invalid_status(status);
+    }
+
+    let insights = compute.collect_insights().await;
+    JsonResponse::success(StatusCode::OK, insights)
+}
diff --git a/compute_tools/src/http/routes/installed_extensions.rs b/compute_tools/src/http/routes/installed_extensions.rs
new file mode 100644
index 0000000000..db74a6b195
--- /dev/null
+++ b/compute_tools/src/http/routes/installed_extensions.rs
@@ -0,0 +1,33 @@
+use std::sync::Arc;
+
+use axum::{extract::State, response::Response};
+use compute_api::responses::ComputeStatus;
+use http::StatusCode;
+use tokio::task;
+
+use crate::{compute::ComputeNode, http::JsonResponse, installed_extensions};
+
+/// Get a list of installed extensions.
+pub(in crate::http) async fn get_installed_extensions(
+    State(compute): State<Arc<ComputeNode>>,
+) -> Response {
+    let status = compute.get_status();
+    if status != ComputeStatus::Running {
+        return JsonResponse::invalid_status(status);
+    }
+
+    let conf = compute.get_conn_conf(None);
+    let res = task::spawn_blocking(move || installed_extensions::get_installed_extensions(conf))
+        .await
+        .unwrap();
+
+    match res {
+        Ok(installed_extensions) => {
+            JsonResponse::success(StatusCode::OK, Some(installed_extensions))
+        }
+        Err(e) => JsonResponse::error(
+            StatusCode::INTERNAL_SERVER_ERROR,
+            format!("failed to get list of installed extensions: {e}"),
+        ),
+    }
+}
diff --git a/compute_tools/src/http/routes/metrics.rs b/compute_tools/src/http/routes/metrics.rs
new file mode 100644
index 0000000000..40d71b5de7
--- /dev/null
+++ b/compute_tools/src/http/routes/metrics.rs
@@ -0,0 +1,32 @@
+use axum::{body::Body, response::Response};
+use http::header::CONTENT_TYPE;
+use http::StatusCode;
+use metrics::proto::MetricFamily;
+use metrics::Encoder;
+use metrics::TextEncoder;
+
+use crate::{http::JsonResponse, installed_extensions};
+
+/// Expose Prometheus metrics.
+pub(in crate::http) async fn get_metrics() -> Response {
+    // When we call TextEncoder::encode() below, it will immediately return an
+    // error if a metric family has no metrics, so we need to preemptively
+    // filter out metric families with no metrics.
+    let metrics = installed_extensions::collect()
+        .into_iter()
+        .filter(|m| !m.get_metric().is_empty())
+        .collect::<Vec<MetricFamily>>();
+
+    let encoder = TextEncoder::new();
+    let mut buffer = vec![];
+
+    if let Err(e) = encoder.encode(&metrics, &mut buffer) {
+        return JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e);
+    }
+
+    Response::builder()
+        .status(StatusCode::OK)
+        .header(CONTENT_TYPE, encoder.format_type())
+        .body(Body::from(buffer))
+        .unwrap()
+}
diff --git a/compute_tools/src/http/routes/metrics_json.rs b/compute_tools/src/http/routes/metrics_json.rs
new file mode 100644
index 0000000000..0709db5011
--- /dev/null
+++ b/compute_tools/src/http/routes/metrics_json.rs
@@ -0,0 +1,12 @@
+use std::sync::Arc;
+
+use axum::{extract::State, response::Response};
+use http::StatusCode;
+
+use crate::{compute::ComputeNode, http::JsonResponse};
+
+/// Get startup metrics.
+pub(in crate::http) async fn get_metrics(State(compute): State<Arc<ComputeNode>>) -> Response {
+    let metrics = compute.state.lock().unwrap().metrics.clone();
+    JsonResponse::success(StatusCode::OK, metrics)
+}
diff --git a/compute_tools/src/http/routes/mod.rs b/compute_tools/src/http/routes/mod.rs
new file mode 100644
index 0000000000..3efa1153ad
--- /dev/null
+++ b/compute_tools/src/http/routes/mod.rs
@@ -0,0 +1,38 @@
+use compute_api::responses::ComputeStatusResponse;
+
+use crate::compute::ComputeState;
+
+pub(in crate::http) mod check_writability;
+pub(in crate::http) mod configure;
+pub(in crate::http) mod database_schema;
+pub(in crate::http) mod dbs_and_roles;
+pub(in crate::http) mod extension_server;
+pub(in crate::http) mod extensions;
+pub(in crate::http) mod failpoints;
+pub(in crate::http) mod grants;
+pub(in crate::http) mod info;
+pub(in crate::http) mod insights;
+pub(in crate::http) mod installed_extensions;
+pub(in crate::http) mod metrics;
+pub(in crate::http) mod metrics_json;
+pub(in crate::http) mod status;
+pub(in crate::http) mod terminate;
+
+impl From<&ComputeState> for ComputeStatusResponse {
+    fn from(state: &ComputeState) -> Self {
+        ComputeStatusResponse {
+            start_time: state.start_time,
+            tenant: state
+                .pspec
+                .as_ref()
+                .map(|pspec| pspec.tenant_id.to_string()),
+            timeline: state
+                .pspec
+                .as_ref()
+                .map(|pspec| pspec.timeline_id.to_string()),
+            status: state.status,
+            last_active: state.last_active,
+            error: state.error.clone(),
+        }
+    }
+}
diff --git a/compute_tools/src/http/routes/status.rs b/compute_tools/src/http/routes/status.rs
new file mode 100644
index 0000000000..d64d53a58f
--- /dev/null
+++ b/compute_tools/src/http/routes/status.rs
@@ -0,0 +1,14 @@
+use std::{ops::Deref, sync::Arc};
+
+use axum::{extract::State, http::StatusCode, response::Response};
+use compute_api::responses::ComputeStatusResponse;
+
+use crate::{compute::ComputeNode, http::JsonResponse};
+
+/// Retrieve the state of the comute.
+pub(in crate::http) async fn get_status(State(compute): State<Arc<ComputeNode>>) -> Response {
+    let state = compute.state.lock().unwrap();
+    let body = ComputeStatusResponse::from(state.deref());
+
+    JsonResponse::success(StatusCode::OK, body)
+}
diff --git a/compute_tools/src/http/routes/terminate.rs b/compute_tools/src/http/routes/terminate.rs
new file mode 100644
index 0000000000..7acd84f236
--- /dev/null
+++ b/compute_tools/src/http/routes/terminate.rs
@@ -0,0 +1,58 @@
+use std::sync::Arc;
+
+use axum::{
+    extract::State,
+    response::{IntoResponse, Response},
+};
+use compute_api::responses::ComputeStatus;
+use http::StatusCode;
+use tokio::task;
+use tracing::info;
+
+use crate::{
+    compute::{forward_termination_signal, ComputeNode},
+    http::JsonResponse,
+};
+
+/// Terminate the compute.
+pub(in crate::http) async fn terminate(State(compute): State<Arc<ComputeNode>>) -> Response {
+    {
+        let mut state = compute.state.lock().unwrap();
+        if state.status == ComputeStatus::Terminated {
+            return StatusCode::CREATED.into_response();
+        }
+
+        if !matches!(state.status, ComputeStatus::Empty | ComputeStatus::Running) {
+            return JsonResponse::invalid_status(state.status);
+        }
+
+        state.set_status(ComputeStatus::TerminationPending, &compute.state_changed);
+        drop(state);
+    }
+
+    forward_termination_signal();
+    info!("sent signal and notified waiters");
+
+    // Spawn a blocking thread to wait for compute to become Terminated.
+    // This is needed to do not block the main pool of workers and
+    // be able to serve other requests while some particular request
+    // is waiting for compute to finish configuration.
+    let c = compute.clone();
+    task::spawn_blocking(move || {
+        let mut state = c.state.lock().unwrap();
+        while state.status != ComputeStatus::Terminated {
+            state = c.state_changed.wait(state).unwrap();
+            info!(
+                "waiting for compute to become {}, current status: {:?}",
+                ComputeStatus::Terminated,
+                state.status
+            );
+        }
+    })
+    .await
+    .unwrap();
+
+    info!("terminated Postgres");
+
+    StatusCode::OK.into_response()
+}
diff --git a/compute_tools/src/http/server.rs b/compute_tools/src/http/server.rs
new file mode 100644
index 0000000000..33d4b489a0
--- /dev/null
+++ b/compute_tools/src/http/server.rs
@@ -0,0 +1,165 @@
+use std::{
+    net::{IpAddr, Ipv6Addr, SocketAddr},
+    sync::{
+        atomic::{AtomicU64, Ordering},
+        Arc,
+    },
+    thread,
+    time::Duration,
+};
+
+use anyhow::Result;
+use axum::{
+    response::{IntoResponse, Response},
+    routing::{get, post},
+    Router,
+};
+use http::StatusCode;
+use tokio::net::TcpListener;
+use tower::ServiceBuilder;
+use tower_http::{
+    request_id::{MakeRequestId, PropagateRequestIdLayer, RequestId, SetRequestIdLayer},
+    trace::TraceLayer,
+};
+use tracing::{debug, error, info, Span};
+
+use super::routes::{
+    check_writability, configure, database_schema, dbs_and_roles, extension_server, extensions,
+    grants, info as info_route, insights, installed_extensions, metrics, metrics_json, status,
+    terminate,
+};
+use crate::compute::ComputeNode;
+
+async fn handle_404() -> Response {
+    StatusCode::NOT_FOUND.into_response()
+}
+
+#[derive(Clone, Default)]
+struct ComputeMakeRequestId(Arc<AtomicU64>);
+
+impl MakeRequestId for ComputeMakeRequestId {
+    fn make_request_id<B>(
+        &mut self,
+        _request: &http::Request<B>,
+    ) -> Option<tower_http::request_id::RequestId> {
+        let request_id = self
+            .0
+            .fetch_add(1, Ordering::SeqCst)
+            .to_string()
+            .parse()
+            .unwrap();
+
+        Some(RequestId::new(request_id))
+    }
+}
+
+/// Run the HTTP server and wait on it forever.
+#[tokio::main]
+async fn serve(port: u16, compute: Arc<ComputeNode>) {
+    const X_REQUEST_ID: &str = "x-request-id";
+
+    let mut app = Router::new()
+        .route("/check_writability", post(check_writability::is_writable))
+        .route("/configure", post(configure::configure))
+        .route("/database_schema", get(database_schema::get_schema_dump))
+        .route("/dbs_and_roles", get(dbs_and_roles::get_catalog_objects))
+        .route(
+            "/extension_server/*filename",
+            post(extension_server::download_extension),
+        )
+        .route("/extensions", post(extensions::install_extension))
+        .route("/grants", post(grants::add_grant))
+        .route("/info", get(info_route::get_info))
+        .route("/insights", get(insights::get_insights))
+        .route(
+            "/installed_extensions",
+            get(installed_extensions::get_installed_extensions),
+        )
+        .route("/metrics", get(metrics::get_metrics))
+        .route("/metrics.json", get(metrics_json::get_metrics))
+        .route("/status", get(status::get_status))
+        .route("/terminate", post(terminate::terminate))
+        .fallback(handle_404)
+        .layer(
+            ServiceBuilder::new()
+                .layer(SetRequestIdLayer::x_request_id(
+                    ComputeMakeRequestId::default(),
+                ))
+                .layer(
+                    TraceLayer::new_for_http()
+                        .on_request(|request: &http::Request<_>, _span: &Span| {
+                            let request_id = request
+                                .headers()
+                                .get(X_REQUEST_ID)
+                                .unwrap()
+                                .to_str()
+                                .unwrap();
+
+                            match request.uri().path() {
+                                "/metrics" => {
+                                    debug!(%request_id, "{} {}", request.method(), request.uri())
+                                }
+                                _ => info!(%request_id, "{} {}", request.method(), request.uri()),
+                            };
+                        })
+                        .on_response(
+                            |response: &http::Response<_>, latency: Duration, _span: &Span| {
+                                let request_id = response
+                                    .headers()
+                                    .get(X_REQUEST_ID)
+                                    .unwrap()
+                                    .to_str()
+                                    .unwrap();
+
+                                info!(
+                                    %request_id,
+                                    code = response.status().as_u16(),
+                                    latency = latency.as_millis()
+                                )
+                            },
+                        ),
+                )
+                .layer(PropagateRequestIdLayer::x_request_id()),
+        )
+        .with_state(compute);
+
+    // Add in any testing support
+    if cfg!(feature = "testing") {
+        use super::routes::failpoints;
+
+        app = app.route("/failpoints", post(failpoints::configure_failpoints))
+    }
+
+    // This usually binds to both IPv4 and IPv6 on Linux, see
+    // https://github.com/rust-lang/rust/pull/34440 for more information
+    let addr = SocketAddr::new(IpAddr::from(Ipv6Addr::UNSPECIFIED), port);
+    let listener = match TcpListener::bind(&addr).await {
+        Ok(listener) => listener,
+        Err(e) => {
+            error!(
+                "failed to bind the compute_ctl HTTP server to port {}: {}",
+                port, e
+            );
+            return;
+        }
+    };
+
+    if let Ok(local_addr) = listener.local_addr() {
+        info!("compute_ctl HTTP server listening on {}", local_addr);
+    } else {
+        info!("compute_ctl HTTP server listening on port {}", port);
+    }
+
+    if let Err(e) = axum::serve(listener, app).await {
+        error!("compute_ctl HTTP server error: {}", e);
+    }
+}
+
+/// Launch a separate HTTP server thread and return its `JoinHandle`.
+pub fn launch_http_server(port: u16, state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
+    let state = Arc::clone(state);
+
+    Ok(thread::Builder::new()
+        .name("http-server".into())
+        .spawn(move || serve(port, state))?)
+}
diff --git a/compute_tools/src/lib.rs b/compute_tools/src/lib.rs
index ee4cf2dfa5..12fea4e61a 100644
--- a/compute_tools/src/lib.rs
+++ b/compute_tools/src/lib.rs
@@ -3,8 +3,6 @@
 #![deny(unsafe_code)]
 #![deny(clippy::undocumented_unsafe_blocks)]
 
-extern crate hyper0 as hyper;
-
 pub mod checker;
 pub mod config;
 pub mod configurator;
diff --git a/control_plane/src/endpoint.rs b/control_plane/src/endpoint.rs
index 5e47ec4811..b8027abf7c 100644
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -62,7 +62,7 @@ use crate::local_env::LocalEnv;
 use crate::postgresql_conf::PostgresConf;
 use crate::storage_controller::StorageController;
 
-use compute_api::responses::{ComputeState, ComputeStatus};
+use compute_api::responses::{ComputeStatus, ComputeStatusResponse};
 use compute_api::spec::{Cluster, ComputeFeature, ComputeMode, ComputeSpec};
 
 // contents of a endpoint.json file
@@ -739,7 +739,7 @@ impl Endpoint {
     }
 
     // Call the /status HTTP API
-    pub async fn get_status(&self) -> Result<ComputeState> {
+    pub async fn get_status(&self) -> Result<ComputeStatusResponse> {
         let client = reqwest::Client::new();
 
         let response = client
diff --git a/libs/compute_api/src/responses.rs b/libs/compute_api/src/responses.rs
index 0d65f6a38d..9ce605089b 100644
--- a/libs/compute_api/src/responses.rs
+++ b/libs/compute_api/src/responses.rs
@@ -15,6 +15,17 @@ pub struct GenericAPIError {
     pub error: String,
 }
 
+#[derive(Debug, Clone, Serialize)]
+pub struct InfoResponse {
+    pub num_cpus: usize,
+}
+
+#[derive(Debug, Clone, Serialize)]
+pub struct ExtensionInstallResponse {
+    pub extension: PgIdent,
+    pub version: ExtVersion,
+}
+
 /// Response of the /status API
 #[derive(Serialize, Debug, Deserialize)]
 #[serde(rename_all = "snake_case")]
@@ -28,16 +39,6 @@ pub struct ComputeStatusResponse {
     pub error: Option<String>,
 }
 
-#[derive(Deserialize, Serialize)]
-#[serde(rename_all = "snake_case")]
-pub struct ComputeState {
-    pub status: ComputeStatus,
-    /// Timestamp of the last Postgres activity
-    #[serde(serialize_with = "rfc3339_serialize")]
-    pub last_active: Option<DateTime<Utc>>,
-    pub error: Option<String>,
-}
-
 #[derive(Serialize, Clone, Copy, Debug, Deserialize, PartialEq, Eq)]
 #[serde(rename_all = "snake_case")]
 pub enum ComputeStatus {
@@ -78,7 +79,7 @@ impl Display for ComputeStatus {
     }
 }
 
-fn rfc3339_serialize<S>(x: &Option<DateTime<Utc>>, s: S) -> Result<S::Ok, S::Error>
+pub fn rfc3339_serialize<S>(x: &Option<DateTime<Utc>>, s: S) -> Result<S::Ok, S::Error>
 where
     S: Serializer,
 {
diff --git a/workspace_hack/Cargo.toml b/workspace_hack/Cargo.toml
index 33bdc25785..0ffeeead18 100644
--- a/workspace_hack/Cargo.toml
+++ b/workspace_hack/Cargo.toml
@@ -91,7 +91,8 @@ tokio-stream = { version = "0.1", features = ["net"] }
 tokio-util = { version = "0.7", features = ["codec", "compat", "io", "rt"] }
 toml_edit = { version = "0.22", features = ["serde"] }
 tonic = { version = "0.12", features = ["tls-roots"] }
-tower = { version = "0.4", default-features = false, features = ["balance", "buffer", "limit", "log", "util"] }
+tower-9fbad63c4bcf4a8f = { package = "tower", version = "0.4", default-features = false, features = ["balance", "buffer", "limit", "util"] }
+tower-d8f496e17d97b5cb = { package = "tower", version = "0.5", default-features = false, features = ["log", "make", "util"] }
 tracing = { version = "0.1", features = ["log"] }
 tracing-core = { version = "0.1" }
 url = { version = "2", features = ["serde"] }

From 6149ac88343c455fc99b6fb8e49c5d8e92e7dec2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Arpad=20M=C3=BCller?= <arpad-m@users.noreply.github.com>
Date: Thu, 9 Jan 2025 21:41:49 +0100
Subject: [PATCH 43/44] Handle race between auto-offload and unarchival
 (#10305)

## Problem

Auto-offloading as requested by the compaction task is racy with
unarchival, in that the compaction task might attempt to offload an
unarchived timeline. By that point it will already have set the timeline
to the `Stopping` state however, which makes it unusable for any
purpose. For example:

1. compaction task decides to offload timeline
2. timeline gets unarchived
3. `offload_timeline` gets called by compaction task
  * sets timeline's state to `Stopping`
  * realizes that the timeline can't be unarchived, errors out
6. endpoint can't be started as the timeline is `Stopping` and thus
'can't be found'.

A future iteration of the compaction task can't "heal" this state either
as the timeline will still not be archived, same goes for other
automatic stuff. The only way to heal this is a tenant detach+attach, or
alternatively a pageserver restart.

Furthermore, the compaction task is especially amenable for such races
as it first stores `can_offload` into a variable, figures out whether
compaction is needed (which takes some time), and only then does it
attempt an offload operation: the time difference between "check" and
"use" is non-trivially small.

To make it even worse, we start the compaction task right after attach
of a tenant, and it is a common pattern by pageserver users to attach a
tenant to then immediately unarchive a timeline, so that an endpoint can
be started.

## Solutions not adopted

The simplest solution is to move the `can_offload` check to right before
attempting of the offload. But this is not a good solution, as no lock
is held between that check and timeline shutdown. So races would still
be possible, just become less likely.

I explored using the timeline state for this, as in adding an additional
enum variant. But `Timeline::set_state` is racy (#10297).

## Adopted solution

We use the lock on the timeline's upload queue as an arbiter: either
unarchival gets to it first and sours the state for auto-offloading, or
auto-offloading shuts it down, which stops any parallel unarchival in
its tracks. The key part is not releasing the upload queue's lock
between the check whether the timeline is archived or not, and shutting
it down (the actual implementation only sets `shutting_down` but it has
the same effect on `initialized_mut()` as a full shutdown). The rest of
the patch is stuff that follows from this.

We also move the part where we set the state to `Stopping` to after that
arbiter has decided the fate of the timeline. For deletions, we do keep
it inside `DeleteTimelineFlow::prepare` however, so that it is called
with all of the the timelines locks held that the function allocates
(timelines lock most importantly). This is only a precautionary measure
however, as I didn't want to analyze deletion related code for possible
races.

## Future changes

It might make sense to move `can_offload` to right before the offload
attempt. Maybe some other properties might have changed as well.
Although this will not be perfect either as no lock is held. I want to
keep it out of this change to emphasize that this move wasn't the main
reason we are race free now.

Fixes #10220
---
 pageserver/src/tenant.rs                      |  15 ++-
 .../src/tenant/remote_timeline_client.rs      |  58 ++++++++++
 pageserver/src/tenant/timeline/delete.rs      |  11 +-
 pageserver/src/tenant/timeline/offload.rs     |  32 +++---
 test_runner/regress/test_timeline_archive.py  | 100 ++++++++++++++++++
 5 files changed, 195 insertions(+), 21 deletions(-)

diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs
index e3dab2fc1d..8e61d09de7 100644
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -48,6 +48,7 @@ use timeline::compaction::GcCompactJob;
 use timeline::compaction::ScheduledCompactionTask;
 use timeline::import_pgdata;
 use timeline::offload::offload_timeline;
+use timeline::offload::OffloadError;
 use timeline::CompactFlags;
 use timeline::CompactOptions;
 use timeline::CompactionError;
@@ -2039,7 +2040,7 @@ impl Tenant {
     ) -> Result<Arc<Timeline>, TimelineArchivalError> {
         info!("unoffloading timeline");
 
-        // We activate the timeline below manually, so this must be called on an active timeline.
+        // We activate the timeline below manually, so this must be called on an active tenant.
         // We expect callers of this function to ensure this.
         match self.current_state() {
             TenantState::Activating { .. }
@@ -3100,9 +3101,17 @@ impl Tenant {
             };
             has_pending_task |= pending_task_left.unwrap_or(false);
             if pending_task_left == Some(false) && *can_offload {
-                offload_timeline(self, timeline)
+                pausable_failpoint!("before-timeline-auto-offload");
+                match offload_timeline(self, timeline)
                     .instrument(info_span!("offload_timeline", %timeline_id))
-                    .await?;
+                    .await
+                {
+                    Err(OffloadError::NotArchived) => {
+                        // Ignore this, we likely raced with unarchival
+                        Ok(())
+                    }
+                    other => other,
+                }?;
             }
         }
 
diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs
index b27ac3e933..813111245d 100644
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -304,6 +304,15 @@ pub enum WaitCompletionError {
 #[derive(Debug, thiserror::Error)]
 #[error("Upload queue either in unexpected state or hasn't downloaded manifest yet")]
 pub struct UploadQueueNotReadyError;
+
+#[derive(Debug, thiserror::Error)]
+pub enum ShutdownIfArchivedError {
+    #[error(transparent)]
+    NotInitialized(NotInitialized),
+    #[error("timeline is not archived")]
+    NotArchived,
+}
+
 /// Behavioral modes that enable seamless live migration.
 ///
 /// See docs/rfcs/028-pageserver-migration.md to understand how these fit in.
@@ -816,6 +825,55 @@ impl RemoteTimelineClient {
         Ok(need_wait)
     }
 
+    /// Shuts the timeline client down, but only if the timeline is archived.
+    ///
+    /// This function and [`Self::schedule_index_upload_for_timeline_archival_state`] use the
+    /// same lock to prevent races between unarchival and offloading: unarchival requires the
+    /// upload queue to be initialized, and leaves behind an upload queue where either dirty
+    /// or clean has archived_at of `None`. offloading leaves behind an uninitialized upload
+    /// queue.
+    pub(crate) async fn shutdown_if_archived(
+        self: &Arc<Self>,
+    ) -> Result<(), ShutdownIfArchivedError> {
+        {
+            let mut guard = self.upload_queue.lock().unwrap();
+            let upload_queue = guard
+                .initialized_mut()
+                .map_err(ShutdownIfArchivedError::NotInitialized)?;
+
+            match (
+                upload_queue.dirty.archived_at.is_none(),
+                upload_queue.clean.0.archived_at.is_none(),
+            ) {
+                // The expected case: the timeline is archived and we don't want to unarchive
+                (false, false) => {}
+                (true, false) => {
+                    tracing::info!("can't shut down timeline: timeline slated for unarchival");
+                    return Err(ShutdownIfArchivedError::NotArchived);
+                }
+                (dirty_archived, true) => {
+                    tracing::info!(%dirty_archived, "can't shut down timeline: timeline not archived in remote storage");
+                    return Err(ShutdownIfArchivedError::NotArchived);
+                }
+            }
+
+            // Set the shutting_down flag while the guard from the archival check is held.
+            // This prevents a race with unarchival, as initialized_mut will not return
+            // an upload queue from this point.
+            // Also launch the queued tasks like shutdown() does.
+            if !upload_queue.shutting_down {
+                upload_queue.shutting_down = true;
+                upload_queue.queued_operations.push_back(UploadOp::Shutdown);
+                // this operation is not counted similar to Barrier
+                self.launch_queued_tasks(upload_queue);
+            }
+        }
+
+        self.shutdown().await;
+
+        Ok(())
+    }
+
     /// Launch an index-file upload operation in the background, setting `import_pgdata` field.
     pub(crate) fn schedule_index_upload_for_import_pgdata_state_update(
         self: &Arc<Self>,
diff --git a/pageserver/src/tenant/timeline/delete.rs b/pageserver/src/tenant/timeline/delete.rs
index 47a93b19d2..ae44af3fad 100644
--- a/pageserver/src/tenant/timeline/delete.rs
+++ b/pageserver/src/tenant/timeline/delete.rs
@@ -194,7 +194,9 @@ impl DeleteTimelineFlow {
         super::debug_assert_current_span_has_tenant_and_timeline_id();
 
         let allow_offloaded_children = false;
-        let (timeline, mut guard) = Self::prepare(tenant, timeline_id, allow_offloaded_children)?;
+        let set_stopping = true;
+        let (timeline, mut guard) =
+            Self::prepare(tenant, timeline_id, allow_offloaded_children, set_stopping)?;
 
         guard.mark_in_progress()?;
 
@@ -334,6 +336,7 @@ impl DeleteTimelineFlow {
         tenant: &Tenant,
         timeline_id: TimelineId,
         allow_offloaded_children: bool,
+        set_stopping: bool,
     ) -> Result<(TimelineOrOffloaded, DeletionGuard), DeleteTimelineError> {
         // Note the interaction between this guard and deletion guard.
         // Here we attempt to lock deletion guard when we're holding a lock on timelines.
@@ -389,8 +392,10 @@ impl DeleteTimelineFlow {
             }
         };
 
-        if let TimelineOrOffloaded::Timeline(timeline) = &timeline {
-            timeline.set_state(TimelineState::Stopping);
+        if set_stopping {
+            if let TimelineOrOffloaded::Timeline(timeline) = &timeline {
+                timeline.set_state(TimelineState::Stopping);
+            }
         }
 
         Ok((timeline, delete_lock_guard))
diff --git a/pageserver/src/tenant/timeline/offload.rs b/pageserver/src/tenant/timeline/offload.rs
index 15628a9645..6c6b19e8b1 100644
--- a/pageserver/src/tenant/timeline/offload.rs
+++ b/pageserver/src/tenant/timeline/offload.rs
@@ -1,10 +1,11 @@
 use std::sync::Arc;
 
-use pageserver_api::models::TenantState;
+use pageserver_api::models::{TenantState, TimelineState};
 
 use super::delete::{delete_local_timeline_directory, DeleteTimelineFlow, DeletionGuard};
 use super::Timeline;
 use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
+use crate::tenant::remote_timeline_client::ShutdownIfArchivedError;
 use crate::tenant::{OffloadedTimeline, Tenant, TenantManifestError, TimelineOrOffloaded};
 
 #[derive(thiserror::Error, Debug)]
@@ -36,28 +37,29 @@ pub(crate) async fn offload_timeline(
     tracing::info!("offloading archived timeline");
 
     let allow_offloaded_children = true;
-    let (timeline, guard) =
-        DeleteTimelineFlow::prepare(tenant, timeline.timeline_id, allow_offloaded_children)
-            .map_err(|e| OffloadError::Other(anyhow::anyhow!(e)))?;
+    let set_stopping = false;
+    let (timeline, guard) = DeleteTimelineFlow::prepare(
+        tenant,
+        timeline.timeline_id,
+        allow_offloaded_children,
+        set_stopping,
+    )
+    .map_err(|e| OffloadError::Other(anyhow::anyhow!(e)))?;
 
     let TimelineOrOffloaded::Timeline(timeline) = timeline else {
         tracing::error!("timeline already offloaded, but given timeline object");
         return Ok(());
     };
 
-    let is_archived = timeline.is_archived();
-    match is_archived {
-        Some(true) => (),
-        Some(false) => {
-            tracing::warn!("tried offloading a non-archived timeline");
-            return Err(OffloadError::NotArchived);
-        }
-        None => {
-            // This is legal: calls to this function can race with the timeline shutting down
-            tracing::info!("tried offloading a timeline whose remote storage is not initialized");
-            return Err(OffloadError::Cancelled);
+    match timeline.remote_client.shutdown_if_archived().await {
+        Ok(()) => {}
+        Err(ShutdownIfArchivedError::NotInitialized(_)) => {
+            // Either the timeline is being deleted, the operation is being retried, or we are shutting down.
+            // Don't return cancelled here to keep it idempotent.
         }
+        Err(ShutdownIfArchivedError::NotArchived) => return Err(OffloadError::NotArchived),
     }
+    timeline.set_state(TimelineState::Stopping);
 
     // Now that the Timeline is in Stopping state, request all the related tasks to shut down.
     timeline.shutdown(super::ShutdownMode::Reload).await;
diff --git a/test_runner/regress/test_timeline_archive.py b/test_runner/regress/test_timeline_archive.py
index 9b3a48add9..bec8270582 100644
--- a/test_runner/regress/test_timeline_archive.py
+++ b/test_runner/regress/test_timeline_archive.py
@@ -959,3 +959,103 @@ def test_timeline_offload_generations(neon_env_builder: NeonEnvBuilder):
     assert gc_summary["remote_storage_errors"] == 0
     assert gc_summary["indices_deleted"] > 0
     assert gc_summary["tenant_manifests_deleted"] > 0
+
+
+@pytest.mark.parametrize("end_with_offloaded", [False, True])
+def test_timeline_offload_race_unarchive(
+    neon_env_builder: NeonEnvBuilder, end_with_offloaded: bool
+):
+    """
+    Ensure that unarchive and timeline offload don't race each other
+    """
+    # Regression test for issue https://github.com/neondatabase/neon/issues/10220
+    # (automatic) timeline offloading defaults to false for now
+    neon_env_builder.pageserver_config_override = "timeline_offloading = true"
+
+    failpoint = "before-timeline-auto-offload"
+
+    env = neon_env_builder.init_start()
+    ps_http = env.pageserver.http_client()
+
+    # Turn off gc and compaction loops: we want to issue them manually for better reliability
+    tenant_id, initial_timeline_id = env.create_tenant(
+        conf={
+            "gc_period": "0s",
+            "compaction_period": "1s",
+        }
+    )
+
+    # Create a branch
+    leaf_timeline_id = env.create_branch("test_ancestor_branch_archive", tenant_id)
+
+    # write some stuff to the leaf
+    with env.endpoints.create_start(
+        "test_ancestor_branch_archive", tenant_id=tenant_id
+    ) as endpoint:
+        endpoint.safe_psql_many(
+            [
+                "CREATE TABLE foo(key serial primary key, t text default 'data_content')",
+                "INSERT INTO foo SELECT FROM generate_series(1,1000)",
+            ]
+        )
+        sum = endpoint.safe_psql("SELECT sum(key) from foo where key % 7 = 1")
+
+    ps_http.configure_failpoints((failpoint, "pause"))
+
+    ps_http.timeline_archival_config(
+        tenant_id,
+        leaf_timeline_id,
+        state=TimelineArchivalState.ARCHIVED,
+    )
+    leaf_detail = ps_http.timeline_detail(
+        tenant_id,
+        leaf_timeline_id,
+    )
+    assert leaf_detail["is_archived"] is True
+
+    # The actual race: get the compaction task to right before
+    # offloading the timeline and attempt to unarchive it
+    wait_until(lambda: env.pageserver.assert_log_contains(f"at failpoint {failpoint}"))
+
+    # This unarchival should go through
+    ps_http.timeline_archival_config(
+        tenant_id,
+        leaf_timeline_id,
+        state=TimelineArchivalState.UNARCHIVED,
+    )
+
+    def timeline_offloaded_api(timeline_id: TimelineId) -> bool:
+        # TODO add a proper API to check if a timeline has been offloaded or not
+        return not any(
+            timeline["timeline_id"] == str(timeline_id)
+            for timeline in ps_http.timeline_list(tenant_id=tenant_id)
+        )
+
+    def leaf_offloaded():
+        assert timeline_offloaded_api(leaf_timeline_id)
+
+    # Ensure that we've hit the failed offload attempt
+    ps_http.configure_failpoints((failpoint, "off"))
+    wait_until(
+        lambda: env.pageserver.assert_log_contains(
+            f".*compaction_loop.*offload_timeline.*{leaf_timeline_id}.*can't shut down timeline.*"
+        )
+    )
+
+    with env.endpoints.create_start(
+        "test_ancestor_branch_archive", tenant_id=tenant_id
+    ) as endpoint:
+        sum_again = endpoint.safe_psql("SELECT sum(key) from foo where key % 7 = 1")
+        assert sum == sum_again
+
+    if end_with_offloaded:
+        # Ensure that offloading still works after all of this
+        ps_http.timeline_archival_config(
+            tenant_id,
+            leaf_timeline_id,
+            state=TimelineArchivalState.ARCHIVED,
+        )
+        wait_until(leaf_offloaded)
+    else:
+        # Test that deletion of leaf timeline works
+        ps_http.timeline_delete(tenant_id, leaf_timeline_id)

From 32f58f822816e0bb81cf4b69e11d31e501380f38 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 10 Jan 2025 06:02:00 +0000
Subject: [PATCH 44/44] Storage release 2025-01-10