From aaf980f70d38d6a6a54494aea1b6b16e5328abf5 Mon Sep 17 00:00:00 2001
From: Konstantin Knizhnik <knizhnik@garret.ru>
Date: Wed, 18 Dec 2024 11:34:38 +0200
Subject: [PATCH 01/63] Online checkpoint replication state (#9976)

## Problem

See https://neondb.slack.com/archives/C04DGM6SMTM/p1733180965970089

Replication state is checkpointed only by shutdown checkpoint.
It means that replication snapshots are not removed till compute
shutdown.

## Summary of changes

Checkpoint replication state during online checkpoint

Related Postgres PR:
https://github.com/neondatabase/postgres/pull/546

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
---
 vendor/postgres-v17   | 2 +-
 vendor/revisions.json | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/vendor/postgres-v17 b/vendor/postgres-v17
index 65c4e46baf..7e3f3974bc 160000
--- a/vendor/postgres-v17
+++ b/vendor/postgres-v17
@@ -1 +1 @@
-Subproject commit 65c4e46baf56ec05412c7dd63d62faff0b33dcfb
+Subproject commit 7e3f3974bc8895938308f94d0e96879ffae638cd
diff --git a/vendor/revisions.json b/vendor/revisions.json
index c8db81c73f..bff2f70931 100644
--- a/vendor/revisions.json
+++ b/vendor/revisions.json
@@ -1,7 +1,7 @@
 {
   "v17": [
     "17.2",
-    "65c4e46baf56ec05412c7dd63d62faff0b33dcfb"
+    "7e3f3974bc8895938308f94d0e96879ffae638cd"
   ],
   "v16": [
     "16.6",

From 85696297c5dc15b74384441f318c9381d99086e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Arpad=20M=C3=BCller?= <arpad-m@users.noreply.github.com>
Date: Wed, 18 Dec 2024 13:47:56 +0100
Subject: [PATCH 02/63] Add safekeepers command to storcon_cli for listing
 (#10151)

Add a `safekeepers` subcommand to `storcon_cli` that allows listing the
safekeepers.

```
$ curl -X POST --url http://localhost:1234/control/v1/safekeeper/42 --data \
  '{"active":true, "id":42, "created_at":"2023-10-25T09:11:25Z", "updated_at":"2024-08-28T11:32:43Z","region_id":"neon_local","host":"localhost","port":5454,"http_port":0,"version":123,"availability_zone_id":"us-east-2b"}'
$ cargo run --bin storcon_cli  -- --api http://localhost:1234 safekeepers
    Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.38s
     Running `target/debug/storcon_cli --api 'http://localhost:1234' safekeepers`
+----+---------+-----------+------+-----------+------------+
| Id | Version | Host      | Port | Http Port | AZ Id      |
+==========================================================+
| 42 | 123     | localhost | 5454 | 0         | us-east-2b |
+----+---------+-----------+------+-----------+------------+
```

Also:

* Don't return the raw `SafekeeperPersistence` struct that contains the
raw database presentation, but instead a new
`SafekeeperDescribeResponse` struct.
* The `SafekeeperPersistence` struct leaves out the `active` field on
purpose because we want to deprecate it and replace it with a
`scheduling_policy` one.

Part of https://github.com/neondatabase/neon/issues/9981
---
 control_plane/storcon_cli/src/main.rs         | 30 ++++++++++++++++++-
 libs/pageserver_api/src/controller_api.rs     | 17 +++++++++++
 storage_controller/src/persistence.rs         | 13 ++++++++
 storage_controller/src/service.rs             | 26 +++++++++++-----
 .../regress/test_storage_controller.py        |  2 +-
 5 files changed, 78 insertions(+), 10 deletions(-)

diff --git a/control_plane/storcon_cli/src/main.rs b/control_plane/storcon_cli/src/main.rs
index df07216fde..6ee1044c18 100644
--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -5,7 +5,8 @@ use clap::{Parser, Subcommand};
 use pageserver_api::{
     controller_api::{
         AvailabilityZone, NodeAvailabilityWrapper, NodeDescribeResponse, NodeShardResponse,
-        ShardSchedulingPolicy, TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
+        SafekeeperDescribeResponse, ShardSchedulingPolicy, TenantCreateRequest,
+        TenantDescribeResponse, TenantPolicyRequest,
     },
     models::{
         EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary,
@@ -211,6 +212,8 @@ enum Command {
         #[arg(long)]
         timeout: humantime::Duration,
     },
+    /// List safekeepers known to the storage controller
+    Safekeepers {},
 }
 
 #[derive(Parser)]
@@ -1020,6 +1023,31 @@ async fn main() -> anyhow::Result<()> {
                 "Fill was cancelled for node {node_id}. Schedulling policy is now {final_policy:?}"
             );
         }
+        Command::Safekeepers {} => {
+            let mut resp = storcon_client
+                .dispatch::<(), Vec<SafekeeperDescribeResponse>>(
+                    Method::GET,
+                    "control/v1/safekeeper".to_string(),
+                    None,
+                )
+                .await?;
+
+            resp.sort_by(|a, b| a.id.cmp(&b.id));
+
+            let mut table = comfy_table::Table::new();
+            table.set_header(["Id", "Version", "Host", "Port", "Http Port", "AZ Id"]);
+            for sk in resp {
+                table.add_row([
+                    format!("{}", sk.id),
+                    format!("{}", sk.version),
+                    sk.host,
+                    format!("{}", sk.port),
+                    format!("{}", sk.http_port),
+                    sk.availability_zone_id.to_string(),
+                ]);
+            }
+            println!("{table}");
+        }
     }
 
     Ok(())
diff --git a/libs/pageserver_api/src/controller_api.rs b/libs/pageserver_api/src/controller_api.rs
index ec7b81423a..faf11e487c 100644
--- a/libs/pageserver_api/src/controller_api.rs
+++ b/libs/pageserver_api/src/controller_api.rs
@@ -372,6 +372,23 @@ pub struct MetadataHealthListOutdatedResponse {
     pub health_records: Vec<MetadataHealthRecord>,
 }
 
+/// Publicly exposed safekeeper description
+///
+/// The `active` flag which we have in the DB is not included on purpose: it is deprecated.
+#[derive(Serialize, Deserialize, Clone)]
+pub struct SafekeeperDescribeResponse {
+    pub id: NodeId,
+    pub region_id: String,
+    /// 1 is special, it means just created (not currently posted to storcon).
+    /// Zero or negative is not really expected.
+    /// Otherwise the number from `release-$(number_of_commits_on_branch)` tag.
+    pub version: i64,
+    pub host: String,
+    pub port: i32,
+    pub http_port: i32,
+    pub availability_zone_id: String,
+}
+
 #[cfg(test)]
 mod test {
     use super::*;
diff --git a/storage_controller/src/persistence.rs b/storage_controller/src/persistence.rs
index e17fe78d25..cc377e606e 100644
--- a/storage_controller/src/persistence.rs
+++ b/storage_controller/src/persistence.rs
@@ -11,6 +11,7 @@ use diesel::Connection;
 use itertools::Itertools;
 use pageserver_api::controller_api::AvailabilityZone;
 use pageserver_api::controller_api::MetadataHealthRecord;
+use pageserver_api::controller_api::SafekeeperDescribeResponse;
 use pageserver_api::controller_api::ShardSchedulingPolicy;
 use pageserver_api::controller_api::{NodeSchedulingPolicy, PlacementPolicy};
 use pageserver_api::models::TenantConfig;
@@ -1241,6 +1242,18 @@ impl SafekeeperPersistence {
             availability_zone_id: &self.availability_zone_id,
         }
     }
+    pub(crate) fn as_describe_response(&self) -> SafekeeperDescribeResponse {
+        // omit the `active` flag on purpose: it is deprecated.
+        SafekeeperDescribeResponse {
+            id: NodeId(self.id as u64),
+            region_id: self.region_id.clone(),
+            version: self.version,
+            host: self.host.clone(),
+            port: self.port,
+            http_port: self.http_port,
+            availability_zone_id: self.availability_zone_id.clone(),
+        }
+    }
 }
 
 #[derive(Insertable, AsChangeset)]
diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs
index 42b50835f8..c0c5bc371a 100644
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -46,10 +46,11 @@ use pageserver_api::{
     controller_api::{
         AvailabilityZone, MetadataHealthRecord, MetadataHealthUpdateRequest, NodeAvailability,
         NodeRegisterRequest, NodeSchedulingPolicy, NodeShard, NodeShardResponse, PlacementPolicy,
-        ShardSchedulingPolicy, ShardsPreferredAzsRequest, ShardsPreferredAzsResponse,
-        TenantCreateRequest, TenantCreateResponse, TenantCreateResponseShard,
-        TenantDescribeResponse, TenantDescribeResponseShard, TenantLocateResponse,
-        TenantPolicyRequest, TenantShardMigrateRequest, TenantShardMigrateResponse,
+        SafekeeperDescribeResponse, ShardSchedulingPolicy, ShardsPreferredAzsRequest,
+        ShardsPreferredAzsResponse, TenantCreateRequest, TenantCreateResponse,
+        TenantCreateResponseShard, TenantDescribeResponse, TenantDescribeResponseShard,
+        TenantLocateResponse, TenantPolicyRequest, TenantShardMigrateRequest,
+        TenantShardMigrateResponse,
     },
     models::{
         SecondaryProgress, TenantConfigPatchRequest, TenantConfigRequest,
@@ -7169,15 +7170,24 @@ impl Service {
 
     pub(crate) async fn safekeepers_list(
         &self,
-    ) -> Result<Vec<crate::persistence::SafekeeperPersistence>, DatabaseError> {
-        self.persistence.list_safekeepers().await
+    ) -> Result<Vec<SafekeeperDescribeResponse>, DatabaseError> {
+        Ok(self
+            .persistence
+            .list_safekeepers()
+            .await?
+            .into_iter()
+            .map(|v| v.as_describe_response())
+            .collect::<Vec<_>>())
     }
 
     pub(crate) async fn get_safekeeper(
         &self,
         id: i64,
-    ) -> Result<crate::persistence::SafekeeperPersistence, DatabaseError> {
-        self.persistence.safekeeper_get(id).await
+    ) -> Result<SafekeeperDescribeResponse, DatabaseError> {
+        self.persistence
+            .safekeeper_get(id)
+            .await
+            .map(|v| v.as_describe_response())
     }
 
     pub(crate) async fn upsert_safekeeper(
diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py
index 0be800d103..7062c35e05 100644
--- a/test_runner/regress/test_storage_controller.py
+++ b/test_runner/regress/test_storage_controller.py
@@ -3009,7 +3009,7 @@ def test_safekeeper_deployment_time_update(neon_env_builder: NeonEnvBuilder):
 def eq_safekeeper_records(a: dict[str, Any], b: dict[str, Any]) -> bool:
     compared = [dict(a), dict(b)]
 
-    masked_keys = ["created_at", "updated_at"]
+    masked_keys = ["created_at", "updated_at", "active"]
 
     for d in compared:
         # keep deleting these in case we are comparing the body as it will be uploaded by real scripts

From 1d12efc42886bcb204db4c764ea64413da5c8dba Mon Sep 17 00:00:00 2001
From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com>
Date: Wed, 18 Dec 2024 10:37:26 -0500
Subject: [PATCH 03/63] fix(pageserver): allow repartition errors during
 gc-compaction smoke tests (#10164)

## Problem

part of https://github.com/neondatabase/neon/issues/9114

In https://github.com/neondatabase/neon/pull/10127 we fixed the race,
but we didn't add the errors to the allowlist.

## Summary of changes

* Allow repartition errors in the gc-compaction smoke test.

I think it might be worth to refactor the code to allow multiple threads
getting a copy of repartition status (i.e., using Rcu) in the future.

Signed-off-by: Alex Chi Z <chi@neon.tech>
---
 pageserver/src/tenant/timeline/compaction.rs | 2 +-
 test_runner/regress/test_compaction.py       | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs
index 8b6cc8ed84..a4e8f39522 100644
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -1823,7 +1823,7 @@ impl Timeline {
         // by estimating the amount of files read for a compaction job. We should also partition on LSN.
         let ((dense_ks, sparse_ks), _) = {
             let Ok(partition) = self.partitioning.try_lock() else {
-                bail!("failed to acquire partition lock");
+                bail!("failed to acquire partition lock during gc-compaction");
             };
             partition.clone()
         };
diff --git a/test_runner/regress/test_compaction.py b/test_runner/regress/test_compaction.py
index 88873c63c2..aef9a825ee 100644
--- a/test_runner/regress/test_compaction.py
+++ b/test_runner/regress/test_compaction.py
@@ -134,6 +134,10 @@ def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder):
     }
 
     env = neon_env_builder.init_start(initial_tenant_conf=SMOKE_CONF)
+    env.pageserver.allowed_errors.append(
+        r".*failed to acquire partition lock during gc-compaction.*"
+    )
+    env.pageserver.allowed_errors.append(r".*repartition() called concurrently.*")
 
     tenant_id = env.initial_tenant
     timeline_id = env.initial_timeline

From 1668d39b7cbe7d1bbe48152cb1b4024a6e2da90a Mon Sep 17 00:00:00 2001
From: Erik Grinaker <erik@neon.tech>
Date: Wed, 18 Dec 2024 16:51:53 +0100
Subject: [PATCH 04/63] safekeeper: fix typo in allowlist for `/profile/heap`
 (#10186)

---
 safekeeper/src/http/routes.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/safekeeper/src/http/routes.rs b/safekeeper/src/http/routes.rs
index 9bc1bf3409..6186f4c3ba 100644
--- a/safekeeper/src/http/routes.rs
+++ b/safekeeper/src/http/routes.rs
@@ -564,7 +564,7 @@ pub fn make_router(
     if conf.http_auth.is_some() {
         router = router.middleware(auth_middleware(|request| {
             const ALLOWLIST_ROUTES: &[&str] =
-                &["/v1/status", "/metrics", "/profile/cpu", "profile/heap"];
+                &["/v1/status", "/metrics", "/profile/cpu", "/profile/heap"];
             if ALLOWLIST_ROUTES.contains(&request.uri().path()) {
                 None
             } else {

From d63602cc7822eb5f9670d7e7926bb412eba1ff3a Mon Sep 17 00:00:00 2001
From: Conrad Ludgate <conrad@neon.tech>
Date: Wed, 18 Dec 2024 16:03:14 +0000
Subject: [PATCH 05/63] chore(proxy): fully remove allow-self-signed-compute
 flag (#10168)

When https://github.com/neondatabase/cloud/pull/21856 is merged, this
flag is no longer necessary.
---
 proxy/src/bin/local_proxy.rs          |  1 -
 proxy/src/bin/proxy.rs                |  7 ---
 proxy/src/cancellation.rs             | 44 +++++++----------
 proxy/src/compute.rs                  | 69 +++------------------------
 proxy/src/config.rs                   |  1 -
 proxy/src/console_redirect_proxy.rs   |  1 -
 proxy/src/control_plane/mod.rs        |  5 +-
 proxy/src/proxy/connect_compute.rs    |  9 +---
 proxy/src/proxy/mod.rs                |  2 -
 test_runner/fixtures/neon_fixtures.py |  1 -
 10 files changed, 25 insertions(+), 115 deletions(-)

diff --git a/proxy/src/bin/local_proxy.rs b/proxy/src/bin/local_proxy.rs
index 968682cf0f..56bbd94850 100644
--- a/proxy/src/bin/local_proxy.rs
+++ b/proxy/src/bin/local_proxy.rs
@@ -271,7 +271,6 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig
     Ok(Box::leak(Box::new(ProxyConfig {
         tls_config: None,
         metric_collection: None,
-        allow_self_signed_compute: false,
         http_config,
         authentication_config: AuthenticationConfig {
             jwks_cache: JwkCache::default(),
diff --git a/proxy/src/bin/proxy.rs b/proxy/src/bin/proxy.rs
index e90555e250..3dcf9ca060 100644
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -129,9 +129,6 @@ struct ProxyCliArgs {
     /// lock for `connect_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable).
     #[clap(long, default_value = config::ConcurrencyLockOptions::DEFAULT_OPTIONS_CONNECT_COMPUTE_LOCK)]
     connect_compute_lock: String,
-    /// Allow self-signed certificates for compute nodes (for testing)
-    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
-    allow_self_signed_compute: bool,
     #[clap(flatten)]
     sql_over_http: SqlOverHttpArgs,
     /// timeout for scram authentication protocol
@@ -564,9 +561,6 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
         _ => bail!("either both or neither tls-key and tls-cert must be specified"),
     };
 
-    if args.allow_self_signed_compute {
-        warn!("allowing self-signed compute certificates");
-    }
     let backup_metric_collection_config = config::MetricBackupCollectionConfig {
         interval: args.metric_backup_collection_interval,
         remote_storage_config: args.metric_backup_collection_remote_storage.clone(),
@@ -641,7 +635,6 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
     let config = ProxyConfig {
         tls_config,
         metric_collection,
-        allow_self_signed_compute: args.allow_self_signed_compute,
         http_config,
         authentication_config,
         proxy_protocol_v2: args.proxy_protocol_v2,
diff --git a/proxy/src/cancellation.rs b/proxy/src/cancellation.rs
index a58e3961da..ebaea173ae 100644
--- a/proxy/src/cancellation.rs
+++ b/proxy/src/cancellation.rs
@@ -4,7 +4,8 @@ use std::sync::Arc;
 use dashmap::DashMap;
 use ipnet::{IpNet, Ipv4Net, Ipv6Net};
 use once_cell::sync::OnceCell;
-use postgres_client::{tls::MakeTlsConnect, CancelToken};
+use postgres_client::tls::MakeTlsConnect;
+use postgres_client::CancelToken;
 use pq_proto::CancelKeyData;
 use rustls::crypto::ring;
 use thiserror::Error;
@@ -14,17 +15,16 @@ use tracing::{debug, info};
 use uuid::Uuid;
 
 use crate::auth::{check_peer_addr_is_in_list, IpPattern};
+use crate::compute::load_certs;
 use crate::error::ReportableError;
 use crate::ext::LockExt;
 use crate::metrics::{CancellationRequest, CancellationSource, Metrics};
+use crate::postgres_rustls::MakeRustlsConnect;
 use crate::rate_limiter::LeakyBucketRateLimiter;
 use crate::redis::cancellation_publisher::{
     CancellationPublisher, CancellationPublisherMut, RedisPublisherClient,
 };
 
-use crate::compute::{load_certs, AcceptEverythingVerifier};
-use crate::postgres_rustls::MakeRustlsConnect;
-
 pub type CancelMap = Arc<DashMap<CancelKeyData, Option<CancelClosure>>>;
 pub type CancellationHandlerMain = CancellationHandler<Option<Arc<Mutex<RedisPublisherClient>>>>;
 pub(crate) type CancellationHandlerMainInternal = Option<Arc<Mutex<RedisPublisherClient>>>;
@@ -240,7 +240,6 @@ pub struct CancelClosure {
     cancel_token: CancelToken,
     ip_allowlist: Vec<IpPattern>,
     hostname: String, // for pg_sni router
-    allow_self_signed_compute: bool,
 }
 
 impl CancelClosure {
@@ -249,45 +248,34 @@ impl CancelClosure {
         cancel_token: CancelToken,
         ip_allowlist: Vec<IpPattern>,
         hostname: String,
-        allow_self_signed_compute: bool,
     ) -> Self {
         Self {
             socket_addr,
             cancel_token,
             ip_allowlist,
             hostname,
-            allow_self_signed_compute,
         }
     }
     /// Cancels the query running on user's compute node.
     pub(crate) async fn try_cancel_query(self) -> Result<(), CancelError> {
         let socket = TcpStream::connect(self.socket_addr).await?;
 
-        let client_config = if self.allow_self_signed_compute {
-            // Allow all certificates for creating the connection. Used only for tests
-            let verifier = Arc::new(AcceptEverythingVerifier);
-            rustls::ClientConfig::builder_with_provider(Arc::new(ring::default_provider()))
-                .with_safe_default_protocol_versions()
-                .expect("ring should support the default protocol versions")
-                .dangerous()
-                .with_custom_certificate_verifier(verifier)
-        } else {
-            let root_store = TLS_ROOTS
-                .get_or_try_init(load_certs)
-                .map_err(|_e| {
-                    CancelError::IO(std::io::Error::new(
-                        std::io::ErrorKind::Other,
-                        "TLS root store initialization failed".to_string(),
-                    ))
-                })?
-                .clone();
+        let root_store = TLS_ROOTS
+            .get_or_try_init(load_certs)
+            .map_err(|_e| {
+                CancelError::IO(std::io::Error::new(
+                    std::io::ErrorKind::Other,
+                    "TLS root store initialization failed".to_string(),
+                ))
+            })?
+            .clone();
+
+        let client_config =
             rustls::ClientConfig::builder_with_provider(Arc::new(ring::default_provider()))
                 .with_safe_default_protocol_versions()
                 .expect("ring should support the default protocol versions")
                 .with_root_certificates(root_store)
-        };
-
-        let client_config = client_config.with_no_client_auth();
+                .with_no_client_auth();
 
         let mut mk_tls = crate::postgres_rustls::MakeRustlsConnect::new(client_config);
         let tls = <MakeRustlsConnect as MakeTlsConnect<tokio::net::TcpStream>>::make_tls_connect(
diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs
index 42df5ff5e3..8dc9b59e81 100644
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -10,7 +10,6 @@ use postgres_client::tls::MakeTlsConnect;
 use postgres_client::{CancelToken, RawConnection};
 use postgres_protocol::message::backend::NoticeResponseBody;
 use pq_proto::StartupMessageParams;
-use rustls::client::danger::ServerCertVerifier;
 use rustls::crypto::ring;
 use rustls::pki_types::InvalidDnsNameError;
 use thiserror::Error;
@@ -251,7 +250,6 @@ impl ConnCfg {
     pub(crate) async fn connect(
         &self,
         ctx: &RequestContext,
-        allow_self_signed_compute: bool,
         aux: MetricsAuxInfo,
         timeout: Duration,
     ) -> Result<PostgresConnection, ConnectionError> {
@@ -259,25 +257,17 @@ impl ConnCfg {
         let (socket_addr, stream, host) = self.connect_raw(timeout).await?;
         drop(pause);
 
-        let client_config = if allow_self_signed_compute {
-            // Allow all certificates for creating the connection
-            let verifier = Arc::new(AcceptEverythingVerifier);
-            rustls::ClientConfig::builder_with_provider(Arc::new(ring::default_provider()))
-                .with_safe_default_protocol_versions()
-                .expect("ring should support the default protocol versions")
-                .dangerous()
-                .with_custom_certificate_verifier(verifier)
-        } else {
-            let root_store = TLS_ROOTS
-                .get_or_try_init(load_certs)
-                .map_err(ConnectionError::TlsCertificateError)?
-                .clone();
+        let root_store = TLS_ROOTS
+            .get_or_try_init(load_certs)
+            .map_err(ConnectionError::TlsCertificateError)?
+            .clone();
+
+        let client_config =
             rustls::ClientConfig::builder_with_provider(Arc::new(ring::default_provider()))
                 .with_safe_default_protocol_versions()
                 .expect("ring should support the default protocol versions")
                 .with_root_certificates(root_store)
-        };
-        let client_config = client_config.with_no_client_auth();
+                .with_no_client_auth();
 
         let mut mk_tls = crate::postgres_rustls::MakeRustlsConnect::new(client_config);
         let tls = <MakeRustlsConnect as MakeTlsConnect<tokio::net::TcpStream>>::make_tls_connect(
@@ -320,7 +310,6 @@ impl ConnCfg {
             },
             vec![],
             host.to_string(),
-            allow_self_signed_compute,
         );
 
         let connection = PostgresConnection {
@@ -365,50 +354,6 @@ pub(crate) fn load_certs() -> Result<Arc<rustls::RootCertStore>, Vec<rustls_nati
 }
 static TLS_ROOTS: OnceCell<Arc<rustls::RootCertStore>> = OnceCell::new();
 
-#[derive(Debug)]
-pub(crate) struct AcceptEverythingVerifier;
-impl ServerCertVerifier for AcceptEverythingVerifier {
-    fn supported_verify_schemes(&self) -> Vec<rustls::SignatureScheme> {
-        use rustls::SignatureScheme;
-        // The schemes for which `SignatureScheme::supported_in_tls13` returns true.
-        vec![
-            SignatureScheme::ECDSA_NISTP521_SHA512,
-            SignatureScheme::ECDSA_NISTP384_SHA384,
-            SignatureScheme::ECDSA_NISTP256_SHA256,
-            SignatureScheme::RSA_PSS_SHA512,
-            SignatureScheme::RSA_PSS_SHA384,
-            SignatureScheme::RSA_PSS_SHA256,
-            SignatureScheme::ED25519,
-        ]
-    }
-    fn verify_server_cert(
-        &self,
-        _end_entity: &rustls::pki_types::CertificateDer<'_>,
-        _intermediates: &[rustls::pki_types::CertificateDer<'_>],
-        _server_name: &rustls::pki_types::ServerName<'_>,
-        _ocsp_response: &[u8],
-        _now: rustls::pki_types::UnixTime,
-    ) -> Result<rustls::client::danger::ServerCertVerified, rustls::Error> {
-        Ok(rustls::client::danger::ServerCertVerified::assertion())
-    }
-    fn verify_tls12_signature(
-        &self,
-        _message: &[u8],
-        _cert: &rustls::pki_types::CertificateDer<'_>,
-        _dss: &rustls::DigitallySignedStruct,
-    ) -> Result<rustls::client::danger::HandshakeSignatureValid, rustls::Error> {
-        Ok(rustls::client::danger::HandshakeSignatureValid::assertion())
-    }
-    fn verify_tls13_signature(
-        &self,
-        _message: &[u8],
-        _cert: &rustls::pki_types::CertificateDer<'_>,
-        _dss: &rustls::DigitallySignedStruct,
-    ) -> Result<rustls::client::danger::HandshakeSignatureValid, rustls::Error> {
-        Ok(rustls::client::danger::HandshakeSignatureValid::assertion())
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/proxy/src/config.rs b/proxy/src/config.rs
index debd77ac32..33d1d2e9e4 100644
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -25,7 +25,6 @@ use crate::types::Host;
 pub struct ProxyConfig {
     pub tls_config: Option<TlsConfig>,
     pub metric_collection: Option<MetricCollectionConfig>,
-    pub allow_self_signed_compute: bool,
     pub http_config: HttpConfig,
     pub authentication_config: AuthenticationConfig,
     pub proxy_protocol_v2: ProxyProtocolV2,
diff --git a/proxy/src/console_redirect_proxy.rs b/proxy/src/console_redirect_proxy.rs
index 02398fb777..c477822e85 100644
--- a/proxy/src/console_redirect_proxy.rs
+++ b/proxy/src/console_redirect_proxy.rs
@@ -213,7 +213,6 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
             params_compat: true,
             params: &params,
             locks: &config.connect_compute_locks,
-            allow_self_signed_compute: config.allow_self_signed_compute,
         },
         &user_info,
         config.wake_compute_retry_config,
diff --git a/proxy/src/control_plane/mod.rs b/proxy/src/control_plane/mod.rs
index c0718920b4..0ca1a6aae0 100644
--- a/proxy/src/control_plane/mod.rs
+++ b/proxy/src/control_plane/mod.rs
@@ -73,12 +73,9 @@ impl NodeInfo {
     pub(crate) async fn connect(
         &self,
         ctx: &RequestContext,
-        allow_self_signed_compute: bool,
         timeout: Duration,
     ) -> Result<compute::PostgresConnection, compute::ConnectionError> {
-        self.config
-            .connect(ctx, allow_self_signed_compute, self.aux.clone(), timeout)
-            .await
+        self.config.connect(ctx, self.aux.clone(), timeout).await
     }
 
     pub(crate) fn reuse_settings(&mut self, other: Self) {
diff --git a/proxy/src/proxy/connect_compute.rs b/proxy/src/proxy/connect_compute.rs
index 6da4c90a53..4a30d23985 100644
--- a/proxy/src/proxy/connect_compute.rs
+++ b/proxy/src/proxy/connect_compute.rs
@@ -73,9 +73,6 @@ pub(crate) struct TcpMechanism<'a> {
 
     /// connect_to_compute concurrency lock
     pub(crate) locks: &'static ApiLocks<Host>,
-
-    /// Whether we should accept self-signed certificates (for testing)
-    pub(crate) allow_self_signed_compute: bool,
 }
 
 #[async_trait]
@@ -93,11 +90,7 @@ impl ConnectMechanism for TcpMechanism<'_> {
     ) -> Result<PostgresConnection, Self::Error> {
         let host = node_info.config.get_host();
         let permit = self.locks.get_permit(&host).await?;
-        permit.release_result(
-            node_info
-                .connect(ctx, self.allow_self_signed_compute, timeout)
-                .await,
-        )
+        permit.release_result(node_info.connect(ctx, timeout).await)
     }
 
     fn update_connect_config(&self, config: &mut compute::ConnCfg) {
diff --git a/proxy/src/proxy/mod.rs b/proxy/src/proxy/mod.rs
index 4e5ecda237..dbe174cab7 100644
--- a/proxy/src/proxy/mod.rs
+++ b/proxy/src/proxy/mod.rs
@@ -348,8 +348,6 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
             params_compat,
             params: &params,
             locks: &config.connect_compute_locks,
-            // only used for console redirect testing.
-            allow_self_signed_compute: false,
         },
         &user_info,
         config.wake_compute_retry_config,
diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py
index 2553a0c99a..9f78ad120b 100644
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -3222,7 +3222,6 @@ class NeonProxy(PgProtocol):
                 # Link auth backend params
                 *["--auth-backend", "link"],
                 *["--uri", NeonProxy.link_auth_uri],
-                *["--allow-self-signed-compute", "true"],
             ]
 
     class ProxyV1(AuthBackend):

From 835287ba3aa1e6a4fea2c1929fbd601de9354218 Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Wed, 18 Dec 2024 16:29:47 +0000
Subject: [PATCH 06/63] neon_local: add a `flock` to protect against concurrent
 execution (#10185)

## Problem

`neon_local` has always been unsafe to run concurrently with itself: it
uses simple text files for persistent state, and concurrent runs will
step on each other.

In some test environments we intentionally handle this with mutexes in
python land, but it's fragile to try and always remember to do that.

## Summary of changes

- Add a `flock` based mutex around the `main` function of neon_local,
using the repo directory as the file to lock
- Clean up an Option<> around control_plane_api, this is a drive-by
change because it was one of the fields that had a weird effect when
previous concurrent stuff stamped on it.
---
 control_plane/src/bin/neon_local.rs     | 59 ++++++++++++++++---------
 control_plane/src/local_env.rs          | 10 ++---
 control_plane/src/pageserver.rs         | 26 +++++------
 control_plane/src/storage_controller.rs |  4 +-
 4 files changed, 57 insertions(+), 42 deletions(-)

diff --git a/control_plane/src/bin/neon_local.rs b/control_plane/src/bin/neon_local.rs
index 1ea443b026..c73debae4c 100644
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -19,6 +19,7 @@ use control_plane::storage_controller::{
     NeonStorageControllerStartArgs, NeonStorageControllerStopArgs, StorageController,
 };
 use control_plane::{broker, local_env};
+use nix::fcntl::{flock, FlockArg};
 use pageserver_api::config::{
     DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
     DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT,
@@ -36,6 +37,8 @@ use safekeeper_api::{
 };
 use std::borrow::Cow;
 use std::collections::{BTreeSet, HashMap};
+use std::fs::File;
+use std::os::fd::AsRawFd;
 use std::path::PathBuf;
 use std::process::exit;
 use std::str::FromStr;
@@ -689,6 +692,21 @@ struct TimelineTreeEl {
     pub children: BTreeSet<TimelineId>,
 }
 
+/// A flock-based guard over the neon_local repository directory
+struct RepoLock {
+    _file: File,
+}
+
+impl RepoLock {
+    fn new() -> Result<Self> {
+        let repo_dir = File::open(local_env::base_path())?;
+        let repo_dir_fd = repo_dir.as_raw_fd();
+        flock(repo_dir_fd, FlockArg::LockExclusive)?;
+
+        Ok(Self { _file: repo_dir })
+    }
+}
+
 // Main entry point for the 'neon_local' CLI utility
 //
 // This utility helps to manage neon installation. That includes following:
@@ -700,9 +718,14 @@ fn main() -> Result<()> {
     let cli = Cli::parse();
 
     // Check for 'neon init' command first.
-    let subcommand_result = if let NeonLocalCmd::Init(args) = cli.command {
-        handle_init(&args).map(|env| Some(Cow::Owned(env)))
+    let (subcommand_result, _lock) = if let NeonLocalCmd::Init(args) = cli.command {
+        (handle_init(&args).map(|env| Some(Cow::Owned(env))), None)
     } else {
+        // This tool uses a collection of simple files to store its state, and consequently
+        // it is not generally safe to run multiple commands concurrently.  Rather than expect
+        // all callers to know this, use a lock file to protect against concurrent execution.
+        let _repo_lock = RepoLock::new().unwrap();
+
         // all other commands need an existing config
         let env = LocalEnv::load_config(&local_env::base_path()).context("Error loading config")?;
         let original_env = env.clone();
@@ -728,11 +751,12 @@ fn main() -> Result<()> {
             NeonLocalCmd::Mappings(subcmd) => handle_mappings(&subcmd, env),
         };
 
-        if &original_env != env {
+        let subcommand_result = if &original_env != env {
             subcommand_result.map(|()| Some(Cow::Borrowed(env)))
         } else {
             subcommand_result.map(|()| None)
-        }
+        };
+        (subcommand_result, Some(_repo_lock))
     };
 
     match subcommand_result {
@@ -922,7 +946,7 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result<LocalEnv> {
     } else {
         // User (likely interactive) did not provide a description of the environment, give them the default
         NeonLocalInitConf {
-            control_plane_api: Some(Some(DEFAULT_PAGESERVER_CONTROL_PLANE_API.parse().unwrap())),
+            control_plane_api: Some(DEFAULT_PAGESERVER_CONTROL_PLANE_API.parse().unwrap()),
             broker: NeonBroker {
                 listen_addr: DEFAULT_BROKER_ADDR.parse().unwrap(),
             },
@@ -1718,18 +1742,15 @@ async fn handle_start_all_impl(
             broker::start_broker_process(env, &retry_timeout).await
         });
 
-        // Only start the storage controller if the pageserver is configured to need it
-        if env.control_plane_api.is_some() {
-            js.spawn(async move {
-                let storage_controller = StorageController::from_env(env);
-                storage_controller
-                    .start(NeonStorageControllerStartArgs::with_default_instance_id(
-                        retry_timeout,
-                    ))
-                    .await
-                    .map_err(|e| e.context("start storage_controller"))
-            });
-        }
+        js.spawn(async move {
+            let storage_controller = StorageController::from_env(env);
+            storage_controller
+                .start(NeonStorageControllerStartArgs::with_default_instance_id(
+                    retry_timeout,
+                ))
+                .await
+                .map_err(|e| e.context("start storage_controller"))
+        });
 
         for ps_conf in &env.pageservers {
             js.spawn(async move {
@@ -1774,10 +1795,6 @@ async fn neon_start_status_check(
     const RETRY_INTERVAL: Duration = Duration::from_millis(100);
     const NOTICE_AFTER_RETRIES: Duration = Duration::from_secs(5);
 
-    if env.control_plane_api.is_none() {
-        return Ok(());
-    }
-
     let storcon = StorageController::from_env(env);
 
     let retries = retry_timeout.as_millis() / RETRY_INTERVAL.as_millis();
diff --git a/control_plane/src/local_env.rs b/control_plane/src/local_env.rs
index 032c88a829..489f9c8509 100644
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -76,7 +76,7 @@ pub struct LocalEnv {
 
     // Control plane upcall API for pageserver: if None, we will not run storage_controller  If set, this will
     // be propagated into each pageserver's configuration.
-    pub control_plane_api: Option<Url>,
+    pub control_plane_api: Url,
 
     // Control plane upcall API for storage controller.  If set, this will be propagated into the
     // storage controller's configuration.
@@ -133,7 +133,7 @@ pub struct NeonLocalInitConf {
     pub storage_controller: Option<NeonStorageControllerConf>,
     pub pageservers: Vec<NeonLocalInitPageserverConf>,
     pub safekeepers: Vec<SafekeeperConf>,
-    pub control_plane_api: Option<Option<Url>>,
+    pub control_plane_api: Option<Url>,
     pub control_plane_compute_hook_api: Option<Option<Url>>,
 }
 
@@ -535,7 +535,7 @@ impl LocalEnv {
                 storage_controller,
                 pageservers,
                 safekeepers,
-                control_plane_api,
+                control_plane_api: control_plane_api.unwrap(),
                 control_plane_compute_hook_api,
                 branch_name_mappings,
             }
@@ -638,7 +638,7 @@ impl LocalEnv {
                 storage_controller: self.storage_controller.clone(),
                 pageservers: vec![], // it's skip_serializing anyway
                 safekeepers: self.safekeepers.clone(),
-                control_plane_api: self.control_plane_api.clone(),
+                control_plane_api: Some(self.control_plane_api.clone()),
                 control_plane_compute_hook_api: self.control_plane_compute_hook_api.clone(),
                 branch_name_mappings: self.branch_name_mappings.clone(),
             },
@@ -768,7 +768,7 @@ impl LocalEnv {
             storage_controller: storage_controller.unwrap_or_default(),
             pageservers: pageservers.iter().map(Into::into).collect(),
             safekeepers,
-            control_plane_api: control_plane_api.unwrap_or_default(),
+            control_plane_api: control_plane_api.unwrap(),
             control_plane_compute_hook_api: control_plane_compute_hook_api.unwrap_or_default(),
             branch_name_mappings: Default::default(),
         };
diff --git a/control_plane/src/pageserver.rs b/control_plane/src/pageserver.rs
index 9d3f018345..ef5b3d6593 100644
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -95,21 +95,19 @@ impl PageServerNode {
 
         let mut overrides = vec![pg_distrib_dir_param, broker_endpoint_param];
 
-        if let Some(control_plane_api) = &self.env.control_plane_api {
-            overrides.push(format!(
-                "control_plane_api='{}'",
-                control_plane_api.as_str()
-            ));
+        overrides.push(format!(
+            "control_plane_api='{}'",
+            self.env.control_plane_api.as_str()
+        ));
 
-            // Storage controller uses the same auth as pageserver: if JWT is enabled
-            // for us, we will also need it to talk to them.
-            if matches!(conf.http_auth_type, AuthType::NeonJWT) {
-                let jwt_token = self
-                    .env
-                    .generate_auth_token(&Claims::new(None, Scope::GenerationsApi))
-                    .unwrap();
-                overrides.push(format!("control_plane_api_token='{}'", jwt_token));
-            }
+        // Storage controller uses the same auth as pageserver: if JWT is enabled
+        // for us, we will also need it to talk to them.
+        if matches!(conf.http_auth_type, AuthType::NeonJWT) {
+            let jwt_token = self
+                .env
+                .generate_auth_token(&Claims::new(None, Scope::GenerationsApi))
+                .unwrap();
+            overrides.push(format!("control_plane_api_token='{}'", jwt_token));
         }
 
         if !conf.other.contains_key("remote_storage") {
diff --git a/control_plane/src/storage_controller.rs b/control_plane/src/storage_controller.rs
index b70bd2e1b5..22d2420ed4 100644
--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -338,7 +338,7 @@ impl StorageController {
                         .port(),
                 )
             } else {
-                let listen_url = self.env.control_plane_api.clone().unwrap();
+                let listen_url = self.env.control_plane_api.clone();
 
                 let listen = format!(
                     "{}:{}",
@@ -708,7 +708,7 @@ impl StorageController {
         } else {
             // The configured URL has the /upcall path prefix for pageservers to use: we will strip that out
             // for general purpose API access.
-            let listen_url = self.env.control_plane_api.clone().unwrap();
+            let listen_url = self.env.control_plane_api.clone();
             Url::from_str(&format!(
                 "http://{}:{}/{path}",
                 listen_url.host_str().unwrap(),

From 3d1c3a80ae0fd2babe42d7fc2293cb2a058ff8cb Mon Sep 17 00:00:00 2001
From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com>
Date: Wed, 18 Dec 2024 13:09:02 -0500
Subject: [PATCH 07/63] feat(pageserver): add compact queue http endpoint
 (#10173)

## Problem

We cannot get the size of the compaction queue and access the info.

Part of #9114

## Summary of changes

* Add an API endpoint to get the compaction queue.
* gc_compaction test case now waits until the compaction finishes.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
---
 libs/pageserver_api/src/models.rs       | 64 +++++++++++++++++++++++++
 pageserver/src/http/routes.rs           | 36 +++++++++++++-
 pageserver/src/tenant.rs                | 23 +++++++--
 pageserver/src/tenant/timeline.rs       | 63 ++----------------------
 test_runner/fixtures/pageserver/http.py | 13 ++++-
 test_runner/regress/test_compaction.py  |  6 +++
 6 files changed, 139 insertions(+), 66 deletions(-)

diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs
index 5690b643f0..f3fc9fad76 100644
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -6,6 +6,7 @@ pub mod utilization;
 use camino::Utf8PathBuf;
 pub use utilization::PageserverUtilization;
 
+use core::ops::Range;
 use std::{
     collections::HashMap,
     fmt::Display,
@@ -28,6 +29,7 @@ use utils::{
 };
 
 use crate::{
+    key::Key,
     reltag::RelTag,
     shard::{ShardCount, ShardStripeSize, TenantShardId},
 };
@@ -210,6 +212,68 @@ pub enum TimelineState {
     Broken { reason: String, backtrace: String },
 }
 
+#[serde_with::serde_as]
+#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
+pub struct CompactLsnRange {
+    pub start: Lsn,
+    pub end: Lsn,
+}
+
+#[serde_with::serde_as]
+#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
+pub struct CompactKeyRange {
+    #[serde_as(as = "serde_with::DisplayFromStr")]
+    pub start: Key,
+    #[serde_as(as = "serde_with::DisplayFromStr")]
+    pub end: Key,
+}
+
+impl From<Range<Lsn>> for CompactLsnRange {
+    fn from(range: Range<Lsn>) -> Self {
+        Self {
+            start: range.start,
+            end: range.end,
+        }
+    }
+}
+
+impl From<Range<Key>> for CompactKeyRange {
+    fn from(range: Range<Key>) -> Self {
+        Self {
+            start: range.start,
+            end: range.end,
+        }
+    }
+}
+
+impl From<CompactLsnRange> for Range<Lsn> {
+    fn from(range: CompactLsnRange) -> Self {
+        range.start..range.end
+    }
+}
+
+impl From<CompactKeyRange> for Range<Key> {
+    fn from(range: CompactKeyRange) -> Self {
+        range.start..range.end
+    }
+}
+
+impl CompactLsnRange {
+    pub fn above(lsn: Lsn) -> Self {
+        Self {
+            start: lsn,
+            end: Lsn::MAX,
+        }
+    }
+}
+
+#[derive(Debug, Clone, Serialize)]
+pub struct CompactInfoResponse {
+    pub compact_key_range: Option<CompactKeyRange>,
+    pub compact_lsn_range: Option<CompactLsnRange>,
+    pub sub_compaction: bool,
+}
+
 #[derive(Serialize, Deserialize, Clone)]
 pub struct TimelineCreateRequest {
     pub new_timeline_id: TimelineId,
diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs
index db7d293856..60ef4c3702 100644
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -97,8 +97,8 @@ use crate::tenant::{LogicalSizeCalculationCause, PageReconstructError};
 use crate::DEFAULT_PG_VERSION;
 use crate::{disk_usage_eviction_task, tenant};
 use pageserver_api::models::{
-    StatusResponse, TenantConfigRequest, TenantInfo, TimelineCreateRequest, TimelineGcRequest,
-    TimelineInfo,
+    CompactInfoResponse, StatusResponse, TenantConfigRequest, TenantInfo, TimelineCreateRequest,
+    TimelineGcRequest, TimelineInfo,
 };
 use utils::{
     auth::SwappableJwtAuth,
@@ -2039,6 +2039,34 @@ async fn timeline_cancel_compact_handler(
     .await
 }
 
+// Get compact info of a timeline
+async fn timeline_compact_info_handler(
+    request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
+    let state = get_state(&request);
+    async {
+        let tenant = state
+            .tenant_manager
+            .get_attached_tenant_shard(tenant_shard_id)?;
+        let res = tenant.get_scheduled_compaction_tasks(timeline_id);
+        let mut resp = Vec::new();
+        for item in res {
+            resp.push(CompactInfoResponse {
+                compact_key_range: item.compact_key_range,
+                compact_lsn_range: item.compact_lsn_range,
+                sub_compaction: item.sub_compaction,
+            });
+        }
+        json_response(StatusCode::OK, resp)
+    }
+    .instrument(info_span!("timeline_compact_info", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
+    .await
+}
+
 // Run compaction immediately on given timeline.
 async fn timeline_compact_handler(
     mut request: Request<Body>,
@@ -3400,6 +3428,10 @@ pub fn make_router(
             "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/do_gc",
             |r| api_handler(r, timeline_gc_handler),
         )
+        .get(
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/compact",
+            |r| api_handler(r, timeline_compact_info_handler),
+        )
         .put(
             "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/compact",
             |r| api_handler(r, timeline_compact_handler),
diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs
index 99289d5f15..2e4c47c6e4 100644
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -3122,6 +3122,23 @@ impl Tenant {
         }
     }
 
+    pub(crate) fn get_scheduled_compaction_tasks(
+        &self,
+        timeline_id: TimelineId,
+    ) -> Vec<CompactOptions> {
+        use itertools::Itertools;
+        let guard = self.scheduled_compaction_tasks.lock().unwrap();
+        guard
+            .get(&timeline_id)
+            .map(|tline_pending_tasks| {
+                tline_pending_tasks
+                    .iter()
+                    .map(|x| x.options.clone())
+                    .collect_vec()
+            })
+            .unwrap_or_default()
+    }
+
     /// Schedule a compaction task for a timeline.
     pub(crate) async fn schedule_compaction(
         &self,
@@ -5759,13 +5776,13 @@ mod tests {
     use timeline::{CompactOptions, DeltaLayerTestDesc};
     use utils::id::TenantId;
 
+    #[cfg(feature = "testing")]
+    use models::CompactLsnRange;
     #[cfg(feature = "testing")]
     use pageserver_api::record::NeonWalRecord;
     #[cfg(feature = "testing")]
     use timeline::compaction::{KeyHistoryRetention, KeyLogAtLsn};
     #[cfg(feature = "testing")]
-    use timeline::CompactLsnRange;
-    #[cfg(feature = "testing")]
     use timeline::GcInfo;
 
     static TEST_KEY: Lazy<Key> =
@@ -9634,7 +9651,7 @@ mod tests {
     #[cfg(feature = "testing")]
     #[tokio::test]
     async fn test_simple_bottom_most_compaction_on_branch() -> anyhow::Result<()> {
-        use timeline::CompactLsnRange;
+        use models::CompactLsnRange;
 
         let harness = TenantHarness::create("test_simple_bottom_most_compaction_on_branch").await?;
         let (tenant, ctx) = harness.load().await;
diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index 87f5a03382..e71cb4db80 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -31,9 +31,9 @@ use pageserver_api::{
     },
     keyspace::{KeySpaceAccum, KeySpaceRandomAccum, SparseKeyPartitioning},
     models::{
-        CompactionAlgorithm, CompactionAlgorithmSettings, DownloadRemoteLayersTaskInfo,
-        DownloadRemoteLayersTaskSpawnRequest, EvictionPolicy, InMemoryLayerInfo, LayerMapInfo,
-        LsnLease, TimelineState,
+        CompactKeyRange, CompactLsnRange, CompactionAlgorithm, CompactionAlgorithmSettings,
+        DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest, EvictionPolicy,
+        InMemoryLayerInfo, LayerMapInfo, LsnLease, TimelineState,
     },
     reltag::BlockNumber,
     shard::{ShardIdentity, ShardNumber, TenantShardId},
@@ -788,63 +788,6 @@ pub(crate) struct CompactRequest {
     pub sub_compaction_max_job_size_mb: Option<u64>,
 }
 
-#[serde_with::serde_as]
-#[derive(Debug, Clone, serde::Deserialize)]
-pub(crate) struct CompactLsnRange {
-    pub start: Lsn,
-    pub end: Lsn,
-}
-
-#[serde_with::serde_as]
-#[derive(Debug, Clone, serde::Deserialize)]
-pub(crate) struct CompactKeyRange {
-    #[serde_as(as = "serde_with::DisplayFromStr")]
-    pub start: Key,
-    #[serde_as(as = "serde_with::DisplayFromStr")]
-    pub end: Key,
-}
-
-impl From<Range<Lsn>> for CompactLsnRange {
-    fn from(range: Range<Lsn>) -> Self {
-        Self {
-            start: range.start,
-            end: range.end,
-        }
-    }
-}
-
-impl From<Range<Key>> for CompactKeyRange {
-    fn from(range: Range<Key>) -> Self {
-        Self {
-            start: range.start,
-            end: range.end,
-        }
-    }
-}
-
-impl From<CompactLsnRange> for Range<Lsn> {
-    fn from(range: CompactLsnRange) -> Self {
-        range.start..range.end
-    }
-}
-
-impl From<CompactKeyRange> for Range<Key> {
-    fn from(range: CompactKeyRange) -> Self {
-        range.start..range.end
-    }
-}
-
-impl CompactLsnRange {
-    #[cfg(test)]
-    #[cfg(feature = "testing")]
-    pub fn above(lsn: Lsn) -> Self {
-        Self {
-            start: lsn,
-            end: Lsn::MAX,
-        }
-    }
-}
-
 #[derive(Debug, Clone, Default)]
 pub(crate) struct CompactOptions {
     pub flags: EnumSet<CompactFlags>,
diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py
index eabdeb1053..378e568622 100644
--- a/test_runner/fixtures/pageserver/http.py
+++ b/test_runner/fixtures/pageserver/http.py
@@ -738,6 +738,18 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
         res_json = res.json()
         assert res_json is None
 
+    def timeline_compact_info(
+        self,
+        tenant_id: TenantId | TenantShardId,
+        timeline_id: TimelineId,
+    ) -> Any:
+        res = self.get(
+            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/compact",
+        )
+        self.verbose_error(res)
+        res_json = res.json()
+        return res_json
+
     def timeline_compact(
         self,
         tenant_id: TenantId | TenantShardId,
@@ -749,7 +761,6 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
         enhanced_gc_bottom_most_compaction=False,
         body: dict[str, Any] | None = None,
     ):
-        self.is_testing_enabled_or_skip()
         query = {}
         if force_repartition:
             query["force_repartition"] = "true"
diff --git a/test_runner/regress/test_compaction.py b/test_runner/regress/test_compaction.py
index aef9a825ee..ae48a8fc27 100644
--- a/test_runner/regress/test_compaction.py
+++ b/test_runner/regress/test_compaction.py
@@ -176,6 +176,12 @@ def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder):
 
         workload.churn_rows(row_count, env.pageserver.id)
 
+    def compaction_finished():
+        queue_depth = len(ps_http.timeline_compact_info(tenant_id, timeline_id))
+        assert queue_depth == 0
+
+    wait_until(compaction_finished, timeout=60)
+
     # ensure gc_compaction is scheduled and it's actually running (instead of skipping due to no layers picked)
     env.pageserver.assert_log_contains(
         "scheduled_compact_timeline.*picked .* layers for compaction"

From 6d3e8096fcad394d387b59fa300f07fb19613760 Mon Sep 17 00:00:00 2001
From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com>
Date: Wed, 18 Dec 2024 13:10:05 -0500
Subject: [PATCH 08/63] refactor(test): tighten up test_gc_feedback (#10126)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Problem

In https://github.com/neondatabase/neon/pull/8103 we changed the test
case to have more test coverage of gc_compaction. Now that we have
`test_gc_compaction_smoke`, we can revert this test case to serve its
original purpose and revert the parameter changes.

part of https://github.com/neondatabase/neon/issues/9114

## Summary of changes

* Revert pitr_interval from 60s to 10s.
* Assert the physical/logical size ratio in the benchmark.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
Co-authored-by: Arpad Müller <arpad-m@users.noreply.github.com>
---
 test_runner/performance/test_gc_feedback.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/test_runner/performance/test_gc_feedback.py b/test_runner/performance/test_gc_feedback.py
index 07f244da0c..acb7b56fd0 100644
--- a/test_runner/performance/test_gc_feedback.py
+++ b/test_runner/performance/test_gc_feedback.py
@@ -22,7 +22,7 @@ def gc_feedback_impl(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchma
             "checkpoint_distance": f"{1024 ** 2}",
             "compaction_target_size": f"{1024 ** 2}",
             # set PITR interval to be small, so we can do GC
-            "pitr_interval": "60 s",
+            "pitr_interval": "10 s",
             # "compaction_threshold": "3",
             # "image_creation_threshold": "2",
         }
@@ -32,6 +32,7 @@ def gc_feedback_impl(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchma
     n_steps = 10
     n_update_iters = 100
     step_size = 10000
+    branch_created = 0
     with endpoint.cursor() as cur:
         cur.execute("SET statement_timeout='1000s'")
         cur.execute(
@@ -66,6 +67,7 @@ def gc_feedback_impl(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchma
         if mode == "with_snapshots":
             if step == n_steps / 2:
                 env.create_branch("child")
+                branch_created += 1
 
     max_num_of_deltas_above_image = 0
     max_total_num_of_deltas = 0
@@ -142,6 +144,15 @@ def gc_feedback_impl(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchma
     with layer_map_path.open("w") as f:
         f.write(json.dumps(client.timeline_layer_map_info(tenant_id, timeline_id)))
 
+    # We should have collected all garbage
+    if mode == "normal":
+        # in theory we should get physical size ~= logical size, but given that gc interval is 10s,
+        # and the layer has indexes that might contribute to the fluctuation, we allow a small margin
+        # of 1 here, and the end ratio we are asserting is 1 (margin) + 1 (expected) = 2.
+        assert physical_size / logical_size < 2
+    elif mode == "with_snapshots":
+        assert physical_size / logical_size < (2 + branch_created)
+
 
 @pytest.mark.timeout(10000)
 def test_gc_feedback(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker):

From 61fcf64c22b7464aa9beb5f22ec9ded96891a12f Mon Sep 17 00:00:00 2001
From: Konstantin Knizhnik <knizhnik@garret.ru>
Date: Wed, 18 Dec 2024 21:15:38 +0200
Subject: [PATCH 09/63] Fix flukyness of
 test_physical_and_logical_replicaiton.py (#10176)

## Problem

See https://github.com/neondatabase/neon/issues/10037
test_physical_and_logical_replication.py sometimes failed.

## Summary of changes

Add `wait_replica_caughtup` to wait for replica sync

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
---
 test_runner/regress/test_physical_and_logical_replicaiton.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/test_runner/regress/test_physical_and_logical_replicaiton.py b/test_runner/regress/test_physical_and_logical_replicaiton.py
index ad2d0871b8..3f9824ee67 100644
--- a/test_runner/regress/test_physical_and_logical_replicaiton.py
+++ b/test_runner/regress/test_physical_and_logical_replicaiton.py
@@ -2,7 +2,7 @@ from __future__ import annotations
 
 import time
 
-from fixtures.neon_fixtures import NeonEnv, logical_replication_sync
+from fixtures.neon_fixtures import NeonEnv, logical_replication_sync, wait_replica_caughtup
 
 
 def test_physical_and_logical_replication_slot_not_copied(neon_simple_env: NeonEnv, vanilla_pg):
@@ -38,6 +38,8 @@ def test_physical_and_logical_replication_slot_not_copied(neon_simple_env: NeonE
     for pk in range(n_records):
         p_cur.execute("insert into t (pk) values (%s)", (pk,))
 
+    wait_replica_caughtup(primary, secondary)
+
     s_cur.execute("select count(*) from t")
     assert s_cur.fetchall()[0][0] == n_records
 

From cc138b56f983c8fc66e737c5e02898121fdd72ec Mon Sep 17 00:00:00 2001
From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com>
Date: Thu, 19 Dec 2024 04:45:06 -0500
Subject: [PATCH 10/63] fix(pageserver): run psql in thread to avoid blocking
 (#10177)

## Problem

ref https://github.com/neondatabase/neon/issues/10170
ref https://github.com/neondatabase/neon/issues/9994

The psql command will block the main thread, causing other async tasks
to timeout (i.e., HTTP connect). Therefore, we need to move it to an I/O
executor thread.

## Summary of changes

* run psql connection in a thread

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
Co-authored-by: John Spray <john@neon.tech>
---
 .../regress/test_pageserver_layer_rolling.py    | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/test_runner/regress/test_pageserver_layer_rolling.py b/test_runner/regress/test_pageserver_layer_rolling.py
index 706da1e35e..fcc465f90a 100644
--- a/test_runner/regress/test_pageserver_layer_rolling.py
+++ b/test_runner/regress/test_pageserver_layer_rolling.py
@@ -22,7 +22,10 @@ CHECKPOINT_TIMEOUT_SECONDS = 60
 
 
 async def run_worker_for_tenant(
-    env: NeonEnv, entries: int, tenant: TenantId, offset: int | None = None
+    env: NeonEnv,
+    entries: int,
+    tenant: TenantId,
+    offset: int | None = None,
 ) -> Lsn:
     if offset is None:
         offset = 0
@@ -37,12 +40,20 @@ async def run_worker_for_tenant(
         finally:
             await conn.close(timeout=10)
 
-        last_flush_lsn = Lsn(ep.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
+        loop = asyncio.get_running_loop()
+        sql = await loop.run_in_executor(
+            None, lambda ep: ep.safe_psql("SELECT pg_current_wal_flush_lsn()"), ep
+        )
+        last_flush_lsn = Lsn(sql[0][0])
         return last_flush_lsn
 
 
 async def run_worker(env: NeonEnv, tenant_conf, entries: int) -> tuple[TenantId, TimelineId, Lsn]:
-    tenant, timeline = env.create_tenant(conf=tenant_conf)
+    loop = asyncio.get_running_loop()
+    # capture tenant_conf by specifying `tenant_conf=tenant_conf`, otherwise it will be evaluated to some random value
+    tenant, timeline = await loop.run_in_executor(
+        None, lambda tenant_conf, env: env.create_tenant(conf=tenant_conf), tenant_conf, env
+    )
     last_flush_lsn = await run_worker_for_tenant(env, entries, tenant)
     return tenant, timeline, last_flush_lsn
 

From a1b0558493930dca4f5c86db658ad295b9beabd6 Mon Sep 17 00:00:00 2001
From: Christian Schwarz <christian@neon.tech>
Date: Thu, 19 Dec 2024 11:04:17 +0100
Subject: [PATCH 11/63] fast import: importer: use aws s3 cli (#10162)

## Problem

s5cmd doesn't pick up the pod service account

```
2024/12/16 16:26:01 Ignoring, HTTP credential provider invalid endpoint host, "169.254.170.23", only loopback hosts are allowed. <nil>
ERROR "ls s3://neon-dev-bulk-import-us-east-2/import-pgdata/fast-import/v1/br-wandering-hall-w2xobawv": NoCredentialProviders: no valid providers in chain. Deprecated. For verbose messaging see aws.Config.CredentialsChainVerboseErrors
```

## Summary of changes

Switch to offical CLI.


## Testing

Tested the pre-merge image in staging, using `job_image` override in
project settings.


https://neondb.slack.com/archives/C033RQ5SPDH/p1734554944391949?thread_ts=1734368383.258759&cid=C033RQ5SPDH

## Future Work

Switch back to s5cmd once https://github.com/peak/s5cmd/pull/769 gets
merged.

## Refs

- fixes https://github.com/neondatabase/cloud/issues/21876

---------

Co-authored-by: Gleb Novikov <NanoBjorn@users.noreply.github.com>
---
 compute/compute-node.Dockerfile               | 24 ++++++++++---------
 compute_tools/src/bin/fast_import.rs          | 10 ++++----
 .../fast_import/{s5cmd.rs => aws_s3_sync.rs}  | 13 ++++------
 3 files changed, 23 insertions(+), 24 deletions(-)
 rename compute_tools/src/bin/fast_import/{s5cmd.rs => aws_s3_sync.rs} (50%)

diff --git a/compute/compute-node.Dockerfile b/compute/compute-node.Dockerfile
index 9f1f3b7343..5e7b4e8287 100644
--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -1556,28 +1556,30 @@ RUN apt update && \
         locales \
         procps \
         ca-certificates \
+        curl \
+        unzip \
         $VERSION_INSTALLS && \
     apt clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
     localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
 
-# s5cmd 2.2.2 from https://github.com/peak/s5cmd/releases/tag/v2.2.2
-# used by fast_import
+# aws cli is used by fast_import (curl and unzip above are at this time only used for this installation step)
 ARG TARGETARCH
-ADD https://github.com/peak/s5cmd/releases/download/v2.2.2/s5cmd_2.2.2_linux_$TARGETARCH.deb /tmp/s5cmd.deb
 RUN set -ex; \
-    \
-    # Determine the expected checksum based on TARGETARCH
     if [ "${TARGETARCH}" = "amd64" ]; then \
-        CHECKSUM="392c385320cd5ffa435759a95af77c215553d967e4b1c0fffe52e4f14c29cf85"; \
+        TARGETARCH_ALT="x86_64"; \
+        CHECKSUM="c9a9df3770a3ff9259cb469b6179e02829687a464e0824d5c32d378820b53a00"; \
     elif [ "${TARGETARCH}" = "arm64" ]; then \
-        CHECKSUM="939bee3cf4b5604ddb00e67f8c157b91d7c7a5b553d1fbb6890fad32894b7b46"; \
+        TARGETARCH_ALT="aarch64"; \
+        CHECKSUM="8181730be7891582b38b028112e81b4899ca817e8c616aad807c9e9d1289223a"; \
     else \
         echo "Unsupported architecture: ${TARGETARCH}"; exit 1; \
     fi; \
-    \
-    # Compute and validate the checksum
-    echo "${CHECKSUM}  /tmp/s5cmd.deb" | sha256sum -c -
-RUN dpkg -i /tmp/s5cmd.deb && rm /tmp/s5cmd.deb
+    curl -L "https://awscli.amazonaws.com/awscli-exe-linux-${TARGETARCH_ALT}-2.17.5.zip" -o /tmp/awscliv2.zip; \
+    echo "${CHECKSUM}  /tmp/awscliv2.zip" | sha256sum -c -; \
+    unzip /tmp/awscliv2.zip -d /tmp/awscliv2; \
+    /tmp/awscliv2/aws/install; \
+    rm -rf /tmp/awscliv2.zip /tmp/awscliv2; \
+    true
 
 ENV LANG=en_US.utf8
 USER postgres
diff --git a/compute_tools/src/bin/fast_import.rs b/compute_tools/src/bin/fast_import.rs
index b6db3eb11a..793ec4cf10 100644
--- a/compute_tools/src/bin/fast_import.rs
+++ b/compute_tools/src/bin/fast_import.rs
@@ -34,12 +34,12 @@ use nix::unistd::Pid;
 use tracing::{info, info_span, warn, Instrument};
 use utils::fs_ext::is_directory_empty;
 
+#[path = "fast_import/aws_s3_sync.rs"]
+mod aws_s3_sync;
 #[path = "fast_import/child_stdio_to_log.rs"]
 mod child_stdio_to_log;
 #[path = "fast_import/s3_uri.rs"]
 mod s3_uri;
-#[path = "fast_import/s5cmd.rs"]
-mod s5cmd;
 
 #[derive(clap::Parser)]
 struct Args {
@@ -326,7 +326,7 @@ pub(crate) async fn main() -> anyhow::Result<()> {
     }
 
     info!("upload pgdata");
-    s5cmd::sync(Utf8Path::new(&pgdata_dir), &s3_prefix.append("/"))
+    aws_s3_sync::sync(Utf8Path::new(&pgdata_dir), &s3_prefix.append("/pgdata/"))
         .await
         .context("sync dump directory to destination")?;
 
@@ -334,10 +334,10 @@ pub(crate) async fn main() -> anyhow::Result<()> {
     {
         let status_dir = working_directory.join("status");
         std::fs::create_dir(&status_dir).context("create status directory")?;
-        let status_file = status_dir.join("status");
+        let status_file = status_dir.join("pgdata");
         std::fs::write(&status_file, serde_json::json!({"done": true}).to_string())
             .context("write status file")?;
-        s5cmd::sync(&status_file, &s3_prefix.append("/status/pgdata"))
+        aws_s3_sync::sync(&status_dir, &s3_prefix.append("/status/"))
             .await
             .context("sync status directory to destination")?;
     }
diff --git a/compute_tools/src/bin/fast_import/s5cmd.rs b/compute_tools/src/bin/fast_import/aws_s3_sync.rs
similarity index 50%
rename from compute_tools/src/bin/fast_import/s5cmd.rs
rename to compute_tools/src/bin/fast_import/aws_s3_sync.rs
index d2d9a79736..5fa58c8f87 100644
--- a/compute_tools/src/bin/fast_import/s5cmd.rs
+++ b/compute_tools/src/bin/fast_import/aws_s3_sync.rs
@@ -4,24 +4,21 @@ use camino::Utf8Path;
 use super::s3_uri::S3Uri;
 
 pub(crate) async fn sync(local: &Utf8Path, remote: &S3Uri) -> anyhow::Result<()> {
-    let mut builder = tokio::process::Command::new("s5cmd");
-    // s5cmd uses aws-sdk-go v1, hence doesn't support AWS_ENDPOINT_URL
-    if let Some(val) = std::env::var_os("AWS_ENDPOINT_URL") {
-        builder.arg("--endpoint-url").arg(val);
-    }
+    let mut builder = tokio::process::Command::new("aws");
     builder
+        .arg("s3")
         .arg("sync")
         .arg(local.as_str())
         .arg(remote.to_string());
     let st = builder
         .spawn()
-        .context("spawn s5cmd")?
+        .context("spawn aws s3 sync")?
         .wait()
         .await
-        .context("wait for s5cmd")?;
+        .context("wait for aws s3 sync")?;
     if st.success() {
         Ok(())
     } else {
-        Err(anyhow::anyhow!("s5cmd failed"))
+        Err(anyhow::anyhow!("aws s3 sync failed"))
     }
 }

From 43dc03459d42ec4c7ca028e48f5a0d8994ecf983 Mon Sep 17 00:00:00 2001
From: Peter Bendel <peterbendel@neon.tech>
Date: Thu, 19 Dec 2024 11:25:44 +0100
Subject: [PATCH 12/63] Run pgbench on 10 GB scale factor on database with n
 relations (e.g. 10k) (#10172)

## Problem

We want to verify how much / if pgbench throughput and latency on Neon
suffers if the database contains many other relations, too.

## Summary of changes

Modify the benchmarking.yml pgbench-compare job to
- create an addiitional project at scale factor 10 GiB
- before running pgbench add n tables (initially 10k) to the database
- then compare the pgbench throughput and latency to the existing
pgbench-compare at 10 Gib scale factor

We use a realistic template for the n relations that is a partitioned
table with some realistic data types, indexes and constraints - similar
to a table that we use internally.

Example run:
https://github.com/neondatabase/neon/actions/runs/12377565956/job/34547386959
---
 .github/workflows/benchmarking.yml            |  25 ++-
 .../many_relations/create_many_relations.sql  | 199 ++++++++++++++++++
 .../performance/test_perf_many_relations.py   |  66 ++++++
 3 files changed, 288 insertions(+), 2 deletions(-)
 create mode 100644 test_runner/performance/many_relations/create_many_relations.sql
 create mode 100644 test_runner/performance/test_perf_many_relations.py

diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml
index bbdcf5ef49..ab0f2a6155 100644
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -308,6 +308,7 @@ jobs:
           "image": [ "'"$image_default"'" ],
           "include": [{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier",       "db_size": "3gb" ,"runner": '"$runner_default"', "image": "'"$image_default"'" },
                       { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new",            "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
+                      { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new-many-tables","db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
                       { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new",            "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
                       { "pg_version": 16, "region_id": "azure-eastus2",          "platform": "neonvm-azure-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_azure"',   "image": "neondatabase/build-tools:pinned-bookworm" },
                       { "pg_version": 16, "region_id": "azure-eastus2",          "platform": "neonvm-azure-captest-new",      "db_size": "10gb","runner": '"$runner_azure"',   "image": "neondatabase/build-tools:pinned-bookworm" },
@@ -410,7 +411,7 @@ jobs:
         aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
 
     - name: Create Neon Project
-      if: contains(fromJson('["neonvm-captest-new", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform)
+      if: contains(fromJson('["neonvm-captest-new", "neonvm-captest-new-many-tables", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform)
       id: create-neon-project
       uses: ./.github/actions/neon-project-create
       with:
@@ -429,7 +430,7 @@ jobs:
           neonvm-captest-sharding-reuse)
             CONNSTR=${{ secrets.BENCHMARK_CAPTEST_SHARDING_CONNSTR }}
             ;;
-          neonvm-captest-new | neonvm-captest-freetier | neonvm-azure-captest-new | neonvm-azure-captest-freetier)
+          neonvm-captest-new | neonvm-captest-new-many-tables | neonvm-captest-freetier | neonvm-azure-captest-new | neonvm-azure-captest-freetier)
             CONNSTR=${{ steps.create-neon-project.outputs.dsn }}
             ;;
           rds-aurora)
@@ -446,6 +447,26 @@ jobs:
 
         echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
 
+    # we want to compare Neon project OLTP throughput and latency at scale factor 10 GB 
+    # without (neonvm-captest-new)
+    # and with (neonvm-captest-new-many-tables) many relations in the database
+    - name: Create many relations before the run
+      if: contains(fromJson('["neonvm-captest-new-many-tables"]'), matrix.platform)
+      uses: ./.github/actions/run-python-test-set
+      with:
+        build_type: ${{ env.BUILD_TYPE }}
+        test_selection: performance
+        run_in_parallel: false
+        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
+        extra_params: -m remote_cluster --timeout 21600 -k test_perf_many_relations
+        pg_version: ${{ env.DEFAULT_PG_VERSION }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+      env:
+        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
+        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
+        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
+        TEST_NUM_RELATIONS: 10000
+
     - name: Benchmark init
       uses: ./.github/actions/run-python-test-set
       with:
diff --git a/test_runner/performance/many_relations/create_many_relations.sql b/test_runner/performance/many_relations/create_many_relations.sql
new file mode 100644
index 0000000000..1b3673c9e1
--- /dev/null
+++ b/test_runner/performance/many_relations/create_many_relations.sql
@@ -0,0 +1,199 @@
+-- create a schema that simulates Neon control plane operations table
+-- however use partitioned operations tables with many (e.g. 500) child partition tables per table
+-- in summary we create multiple of these partitioned operations tables (with 500 childs each) - until we reach the requested number of tables
+
+
+-- first we need some other tables that can be referenced by the operations table
+
+--  Table for branches
+CREATE TABLE public.branches (
+    id text PRIMARY KEY
+);
+
+-- Table for endpoints
+CREATE TABLE public.endpoints (
+    id text PRIMARY KEY
+);
+
+-- Table for projects
+CREATE TABLE public.projects (
+    id text PRIMARY KEY
+);
+
+INSERT INTO public.branches (id)
+VALUES ('branch_1');
+
+-- Insert one row into endpoints
+INSERT INTO public.endpoints (id)
+VALUES ('endpoint_1');
+
+-- Insert one row into projects
+INSERT INTO public.projects (id)
+VALUES ('project_1');
+
+-- now we create a procedure that can create n operations tables
+-- we do that in a procedure to save roundtrip latency when scaling the test to many tables
+-- prefix is the base table name, e.g. 'operations_scale_1000' if we create 1000 tables
+CREATE OR REPLACE PROCEDURE create_partitioned_tables(prefix text, n INT)
+LANGUAGE plpgsql AS $$
+DECLARE
+    table_name TEXT;  -- Variable to hold table names dynamically
+    i INT;            -- Counter for the loop
+BEGIN
+    -- Loop to create n partitioned tables
+    FOR i IN 1..n LOOP
+        table_name := format('%s_%s', prefix, i);
+
+        -- Create the partitioned table
+        EXECUTE format(
+            'CREATE TABLE public.%s (
+                project_id character varying NOT NULL,
+                id uuid NOT NULL,
+                status integer,
+                action character varying NOT NULL,
+                error character varying,
+                created_at timestamp with time zone NOT NULL DEFAULT now(),
+                updated_at timestamp with time zone NOT NULL DEFAULT now(),
+                spec jsonb,
+                retry_at timestamp with time zone,
+                failures_count integer DEFAULT 0,
+                metadata jsonb NOT NULL DEFAULT ''{}''::jsonb,
+                executor_id text NOT NULL,
+                attempt_duration_ms integer,
+                metrics jsonb DEFAULT ''{}''::jsonb,
+                branch_id text,
+                endpoint_id text,
+                next_operation_id uuid,
+                compute_id text,
+                connection_attempt_at timestamp with time zone,
+                concurrency_key text,
+                queue_id text,
+                CONSTRAINT %s_pkey PRIMARY KEY (id, created_at),
+                CONSTRAINT %s_branch_id_fk FOREIGN KEY (branch_id) REFERENCES branches(id) ON DELETE CASCADE,
+                CONSTRAINT %s_endpoint_id_fk FOREIGN KEY (endpoint_id) REFERENCES endpoints(id) ON DELETE CASCADE,
+                CONSTRAINT %s_next_operation_id_fk FOREIGN KEY (next_operation_id, created_at) REFERENCES %s(id, created_at),
+                CONSTRAINT %s_project_id_fk FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE
+            ) PARTITION BY RANGE (created_at)',
+            table_name, table_name, table_name, table_name, table_name, table_name, table_name
+        );
+
+        -- Add indexes for the partitioned table
+        EXECUTE format('CREATE INDEX index_%s_on_next_operation_id ON public.%s (next_operation_id)', table_name, table_name);
+        EXECUTE format('CREATE INDEX index_%s_on_project_id ON public.%s (project_id)', table_name, table_name);
+        EXECUTE format('CREATE INDEX %s_branch_id ON public.%s (branch_id)', table_name, table_name);
+        EXECUTE format('CREATE INDEX %s_branch_id_created_idx ON public.%s (branch_id, created_at)', table_name, table_name);
+        EXECUTE format('CREATE INDEX %s_created_at_idx ON public.%s (created_at)', table_name, table_name);
+        EXECUTE format('CREATE INDEX %s_created_at_project_id_id_cond_idx ON public.%s (created_at, project_id, id)', table_name, table_name);
+        EXECUTE format('CREATE INDEX %s_endpoint_id ON public.%s (endpoint_id)', table_name, table_name);
+        EXECUTE format(
+            'CREATE INDEX %s_for_redo_worker_idx ON public.%s (executor_id) WHERE status <> 1',
+            table_name, table_name
+        );
+        EXECUTE format(
+            'CREATE INDEX %s_project_id_status_index ON public.%s ((project_id::text), status)',
+            table_name, table_name
+        );
+        EXECUTE format(
+            'CREATE INDEX %s_status_not_finished ON public.%s (status) WHERE status <> 1',
+            table_name, table_name
+        );
+        EXECUTE format('CREATE INDEX %s_updated_at_desc_idx ON public.%s (updated_at DESC)', table_name, table_name);
+        EXECUTE format(
+            'CREATE INDEX %s_with_failures ON public.%s (failures_count) WHERE failures_count > 0',
+            table_name, table_name
+        );
+    END LOOP;
+END;
+$$;
+
+-- next we create a procedure that can add the child partitions (one per day) to each of the operations tables
+CREATE OR REPLACE PROCEDURE create_operations_partitions(
+    table_name TEXT, 
+    start_date DATE,
+    end_date DATE
+)
+LANGUAGE plpgsql AS $$
+DECLARE
+    partition_date DATE;
+    partition_name TEXT;
+    counter INT := 0;  -- Counter to track the number of tables created in the current transaction
+BEGIN
+    partition_date := start_date;
+
+    -- Create partitions in batches
+    WHILE partition_date < end_date LOOP
+        partition_name := format('%s_%s', table_name, to_char(partition_date,'YYYY_MM_DD'));
+
+        EXECUTE format(
+            'CREATE TABLE IF NOT EXISTS public.%s PARTITION OF public.%s
+             FOR VALUES FROM (''%s'') TO (''%s'')',
+            partition_name,
+            table_name,
+            partition_date,
+            partition_date + INTERVAL '1 day'
+        );
+
+        counter := counter + 1;
+
+        -- Commit and reset counter after every 100 partitions
+        IF counter >= 100 THEN
+            COMMIT;
+            counter := 0;  -- Reset the counter
+        END IF;
+
+        -- Advance to the next day
+        partition_date := partition_date + INTERVAL '1 day';
+    END LOOP;
+
+    -- Final commit for remaining partitions
+    IF counter > 0 THEN
+        COMMIT;
+    END IF;
+
+    -- Insert synthetic rows into each partition
+    EXECUTE format(
+        'INSERT INTO %I (
+            project_id,
+            branch_id,
+            endpoint_id,
+            id,
+            status,
+            action,
+            created_at,
+            updated_at,
+            spec,
+            metadata,
+            executor_id,
+            failures_count
+        )
+        SELECT 
+            ''project_1'',                                   -- project_id
+            ''branch_1'',                                    -- branch_id
+            ''endpoint_1'',                                  -- endpoint_id
+            ''e8bba687-0df9-4291-bfcd-7d5f6aa7c158'',          -- unique id
+            1,                                               -- status
+            ''SYNTHETIC_ACTION'',                            -- action
+            gs::timestamp + interval ''0 ms'',               -- created_at
+            gs::timestamp + interval ''1 minute'',           -- updated_at
+            ''{"key": "value"}'',                            -- spec (JSONB)
+            ''{"metadata_key": "metadata_value"}'',          -- metadata (JSONB)
+            ''executor_1'',                                  -- executor_id
+            0                                                -- failures_count
+        FROM generate_series(%L, %L::DATE - INTERVAL ''1 day'', INTERVAL ''1 day'') AS gs',
+        table_name, start_date, end_date
+    );
+    
+    -- Commit the inserted rows
+    COMMIT;
+END;
+$$;
+
+-- we can now create partitioned tables using something like
+-- CALL create_partitioned_tables('operations_scale_1000' ,10);
+
+-- and we can create the child partitions for a table using something like
+-- CALL create_operations_partitions(
+--     'operations_scale_1000_1',
+--     '2000-01-01',            -- Start date
+--     ('2000-01-01'::DATE + INTERVAL '1 day' * 500)::DATE  -- End date (start date + number of days)
+-- );
diff --git a/test_runner/performance/test_perf_many_relations.py b/test_runner/performance/test_perf_many_relations.py
new file mode 100644
index 0000000000..0ee0efe8b9
--- /dev/null
+++ b/test_runner/performance/test_perf_many_relations.py
@@ -0,0 +1,66 @@
+import os
+from pathlib import Path
+
+import pytest
+from fixtures.compare_fixtures import RemoteCompare
+from fixtures.log_helper import log
+
+
+def get_num_relations(default: int = 1000) -> list[int]:
+    # We parametrize each run with scale specifying the number of wanted child partitions.
+    # Databases are pre-created and passed through BENCHMARK_CONNSTR env variable.
+    scales = os.getenv("TEST_NUM_RELATIONS", default=str(default))
+    rv = []
+    for s in scales.split(","):
+        scale = int(s)
+        rv.append(scale)
+    return rv
+
+
+@pytest.mark.parametrize("num_relations", get_num_relations())
+@pytest.mark.remote_cluster
+def test_perf_many_relations(remote_compare: RemoteCompare, num_relations: int):
+    """
+    Test creating many relations in a single database.
+    We use partitioned tables with child tables, indexes and constraints to have a realistic schema.
+    Also we include some common data types like text, uuid, timestamp, JSONB, etc.
+
+    see many_relations/create_many_relations.sql
+    """
+    env = remote_compare
+
+    # prepare some base tables and the plpgsql procedures that we use to create the tables
+    sql_file = Path(__file__).parent / "many_relations" / "create_many_relations.sql"
+    env.pg_bin.run_capture(["psql", env.pg.connstr(), "-f", str(sql_file)])
+
+    num_parent_tables = num_relations // 500 + 1
+    log.info(f"Creating {num_relations} relations in {num_parent_tables} parent tables")
+
+    log.info(f"Creating {num_parent_tables} parent tables")
+    sql = f"CALL create_partitioned_tables('operations_scale_{num_relations}', {num_parent_tables})"
+    log.info(sql)
+    env.pg_bin.run_capture(["psql", env.pg.connstr(), "-c", sql])
+
+    current_table = 0
+    num_relations_remaining = num_relations
+
+    # now run and measure the actual relation creation
+    while num_relations_remaining > 0:
+        current_table += 1
+        parent_table_name = f"operations_scale_{num_relations}_{current_table}"
+        if num_relations_remaining > 500:
+            num_relations_to_create = 500
+        else:
+            num_relations_to_create = num_relations_remaining
+        num_relations_remaining -= num_relations_to_create
+        log.info(
+            f"Creating {num_relations_to_create} child tables in partitioned parent table '{parent_table_name}'"
+        )
+        sql = f"CALL create_operations_partitions( '{parent_table_name}', '2000-01-01', ('2000-01-01'::DATE + INTERVAL '1 day' * {num_relations_to_create})::DATE)"
+        log.info(sql)
+        with env.zenbenchmark.record_duration(
+            f"CREATE_TABLE/{current_table}/{num_relations_to_create}"
+        ):
+            env.pg_bin.run_capture(
+                ["psql", env.pg.connstr(options="-cstatement_timeout=1000s "), "-c", sql]
+            )

From b135194090369d8e5452c9ee1c6e7c37cc9ba8bd Mon Sep 17 00:00:00 2001
From: Folke Behrens <folke@neon.tech>
Date: Thu, 19 Dec 2024 11:37:08 +0100
Subject: [PATCH 13/63] proxy: Delay SASL complete message until auth is done
 (#10189)

The final SASL complete message can be bundled with the remainder of the
auth flow messages until ReadyForQuery.

neondatabase/cloud#19184
---
 proxy/src/auth/backend/mod.rs | 3 +++
 proxy/src/sasl/stream.rs      | 8 +++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/proxy/src/auth/backend/mod.rs b/proxy/src/auth/backend/mod.rs
index 50cb94bfa0..0c9a7f7825 100644
--- a/proxy/src/auth/backend/mod.rs
+++ b/proxy/src/auth/backend/mod.rs
@@ -678,6 +678,9 @@ mod tests {
         .await
         .unwrap();
 
+        // flush the final server message
+        stream.flush().await.unwrap();
+
         handle.await.unwrap();
     }
 
diff --git a/proxy/src/sasl/stream.rs b/proxy/src/sasl/stream.rs
index f1c916daa2..ac77556566 100644
--- a/proxy/src/sasl/stream.rs
+++ b/proxy/src/sasl/stream.rs
@@ -50,6 +50,12 @@ impl<S: AsyncWrite + Unpin> SaslStream<'_, S> {
         self.stream.write_message(&msg.to_reply()).await?;
         Ok(())
     }
+
+    // Queue a SASL message for the client.
+    fn send_noflush(&mut self, msg: &ServerMessage<&str>) -> io::Result<()> {
+        self.stream.write_message_noflush(&msg.to_reply())?;
+        Ok(())
+    }
 }
 
 /// SASL authentication outcome.
@@ -85,7 +91,7 @@ impl<S: AsyncRead + AsyncWrite + Unpin> SaslStream<'_, S> {
                     continue;
                 }
                 Step::Success(result, reply) => {
-                    self.send(&ServerMessage::Final(&reply)).await?;
+                    self.send_noflush(&ServerMessage::Final(&reply))?;
                     Outcome::Success(result)
                 }
                 Step::Failure(reason) => Outcome::Failure(reason),

From 65042cbadd0426c43499bb7675e671b5c6e980e9 Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Thu, 19 Dec 2024 10:58:49 +0000
Subject: [PATCH 14/63] tests: use high IO concurrency in
 `test_pgdata_import_smoke`, use `effective_io_concurrency=2` in tests by
 default (#10114)

## Problem

`test_pgdata_import_smoke` writes two gigabytes of pages and then reads
them back serially. This is CPU bottlenecked and results in a long
runtime, and sensitivity to CPU load from other tests on the same
machine.

Closes: https://github.com/neondatabase/neon/issues/10071

## Summary of changes

- Use effective_io_concurrency=32 when doing sequential scans through
2GiB of pages in test_pgdata_import_smoke. This is a ~10x runtime
decrease in the parts of the test that do sequential scans.
- Also set `effective_io_concurrency=2` for tests, as I noticed while
debugging that we were doing all getpage requests serially, which is bad
for checking the stability of the batching code.
---
 control_plane/src/endpoint.rs             |  4 ++++
 test_runner/regress/test_import_pgdata.py | 14 +++++++++++---
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/control_plane/src/endpoint.rs b/control_plane/src/endpoint.rs
index 1fdf326051..5ebf842813 100644
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -316,6 +316,10 @@ impl Endpoint {
         // and can cause errors like 'no unpinned buffers available', see
         // <https://github.com/neondatabase/neon/issues/9956>
         conf.append("shared_buffers", "1MB");
+        // Postgres defaults to effective_io_concurrency=1, which does not exercise the pageserver's
+        // batching logic.  Set this to 2 so that we exercise the code a bit without letting
+        // individual tests do a lot of concurrent work on underpowered test machines
+        conf.append("effective_io_concurrency", "2");
         conf.append("fsync", "off");
         conf.append("max_connections", "100");
         conf.append("wal_level", "logical");
diff --git a/test_runner/regress/test_import_pgdata.py b/test_runner/regress/test_import_pgdata.py
index 29229b73c1..6ea2393a9d 100644
--- a/test_runner/regress/test_import_pgdata.py
+++ b/test_runner/regress/test_import_pgdata.py
@@ -84,6 +84,8 @@ def test_pgdata_import_smoke(
     elif rel_block_size == RelBlockSize.TWO_STRPES_PER_SHARD:
         target_relblock_size = (shard_count or 1) * stripe_size * 8192 * 2
     elif rel_block_size == RelBlockSize.MULTIPLE_RELATION_SEGMENTS:
+        # Postgres uses a 1GiB segment size, fixed at compile time, so we must use >2GB of data
+        # to exercise multiple segments.
         target_relblock_size = int(((2.333 * 1024 * 1024 * 1024) // 8192) * 8192)
     else:
         raise ValueError
@@ -111,9 +113,15 @@ def test_pgdata_import_smoke(
 
     def validate_vanilla_equivalence(ep):
         # TODO: would be nicer to just compare pgdump
-        assert ep.safe_psql("select count(*), sum(data::bigint)::bigint from t") == [
-            (expect_nrows, expect_sum)
-        ]
+
+        # Enable IO concurrency for batching on large sequential scan, to avoid making
+        # this test unnecessarily onerous on CPU
+        assert ep.safe_psql_many(
+            [
+                "set effective_io_concurrency=32;",
+                "select count(*), sum(data::bigint)::bigint from t",
+            ]
+        ) == [[], [(expect_nrows, expect_sum)]]
 
     validate_vanilla_equivalence(vanilla_pg)
 

From afda6d4700ca0b521ecb412f0bc81e80e5903dbd Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Thu, 19 Dec 2024 12:55:05 +0000
Subject: [PATCH 15/63] storage_scrubber: don't report half-created timelines
 as corruption (#10198)

## Problem

test_timeline_archival_chaos does timeline creation with failure
injection, and thereby sometimes leaves timelines in a part created
state. This was being reported as corruption by the scrubber on test
teardown, because it considered a layer without an index to be an
invalid state. This was incorrect: the scrubber should accept this
state, it occurs legitimately during timeline creation.

Closes: https://github.com/neondatabase/neon/issues/9988

## Summary of changes

- Report a timeline with layers but no index as Relic rather than
MissingIndexPart.
- We retain the MissingIndexPart variant for the case where an index
_was_ found in the listing, but was not found by a subsequent GET, i.e.
racing with deletion.
---
 storage_scrubber/src/checks.rs | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/storage_scrubber/src/checks.rs b/storage_scrubber/src/checks.rs
index f759f54d19..32c86052ef 100644
--- a/storage_scrubber/src/checks.rs
+++ b/storage_scrubber/src/checks.rs
@@ -310,7 +310,7 @@ pub(crate) enum BlobDataParseResult {
         index_part_generation: Generation,
         s3_layers: HashSet<(LayerName, Generation)>,
     },
-    /// The remains of a deleted Timeline (i.e. an initdb archive only)
+    /// The remains of an uncleanly deleted Timeline or aborted timeline creation(e.g. an initdb archive only, or some layer without an index)
     Relic,
     Incorrect {
         errors: Vec<String>,
@@ -346,7 +346,7 @@ pub(crate) async fn list_timeline_blobs(
     match res {
         ListTimelineBlobsResult::Ready(data) => Ok(data),
         ListTimelineBlobsResult::MissingIndexPart(_) => {
-            // Retry if index is missing.
+            // Retry if listing raced with removal of an index
             let data = list_timeline_blobs_impl(remote_client, id, root_target)
                 .await?
                 .into_data();
@@ -358,7 +358,7 @@ pub(crate) async fn list_timeline_blobs(
 enum ListTimelineBlobsResult {
     /// Blob data is ready to be intepreted.
     Ready(RemoteTimelineBlobData),
-    /// List timeline blobs has layer files but is missing [`IndexPart`].
+    /// The listing contained an index but when we tried to fetch it, we couldn't
     MissingIndexPart(RemoteTimelineBlobData),
 }
 
@@ -467,19 +467,19 @@ async fn list_timeline_blobs_impl(
     match index_part_object.as_ref() {
         Some(selected) => index_part_keys.retain(|k| k != selected),
         None => {
-            // It is possible that the branch gets deleted after we got some layer files listed
-            // and we no longer have the index file in the listing.
-            errors.push(
+            // This case does not indicate corruption, but it should be very unusual.  It can
+            // happen if:
+            // - timeline creation is in progress (first layer is written before index is written)
+            // - timeline deletion happened while a stale pageserver was still attached, it might upload
+            //   a layer after the deletion is done.
+            tracing::info!(
                 "S3 list response got no index_part.json file but still has layer files"
-                    .to_string(),
             );
-            return Ok(ListTimelineBlobsResult::MissingIndexPart(
-                RemoteTimelineBlobData {
-                    blob_data: BlobDataParseResult::Incorrect { errors, s3_layers },
-                    unused_index_keys: index_part_keys,
-                    unknown_keys,
-                },
-            ));
+            return Ok(ListTimelineBlobsResult::Ready(RemoteTimelineBlobData {
+                blob_data: BlobDataParseResult::Relic,
+                unused_index_keys: index_part_keys,
+                unknown_keys,
+            }));
         }
     }
 

From 502d512fe2ca9f07392428d70d5262cf3f5103e2 Mon Sep 17 00:00:00 2001
From: Vlad Lazar <vlad@neon.tech>
Date: Thu, 19 Dec 2024 14:04:42 +0000
Subject: [PATCH 16/63] safekeeper: lift benchmarking utils into safekeeper
 crate (#10200)

## Problem

The benchmarking utilities are also useful for testing. We want to write
tests in the safekeeper crate.

## Summary of changes

This commit lifts the utils to the safekeeper crate. They are compiled
if the benchmarking features is enabled or if in test mode.
---
 libs/postgres_ffi/src/wal_generator.rs        |  6 ++--
 safekeeper/Cargo.toml                         |  2 ++
 safekeeper/benches/receive_wal.rs             | 23 +++++++--------
 safekeeper/src/lib.rs                         |  3 ++
 .../benchutils.rs => src/test_utils.rs}       | 28 ++++++++++---------
 .../tests/walproposer_sim/walproposer_disk.rs |  2 +-
 6 files changed, 36 insertions(+), 28 deletions(-)
 rename safekeeper/{benches/benchutils.rs => src/test_utils.rs} (78%)

diff --git a/libs/postgres_ffi/src/wal_generator.rs b/libs/postgres_ffi/src/wal_generator.rs
index 69cc4b771f..a72b035e17 100644
--- a/libs/postgres_ffi/src/wal_generator.rs
+++ b/libs/postgres_ffi/src/wal_generator.rs
@@ -106,11 +106,11 @@ impl<R: RecordGenerator> WalGenerator<R> {
     const TIMELINE_ID: u32 = 1;
 
     /// Creates a new WAL generator with the given record generator.
-    pub fn new(record_generator: R) -> WalGenerator<R> {
+    pub fn new(record_generator: R, start_lsn: Lsn) -> WalGenerator<R> {
         Self {
             record_generator,
-            lsn: Lsn(0),
-            prev_lsn: Lsn(0),
+            lsn: start_lsn,
+            prev_lsn: start_lsn,
         }
     }
 
diff --git a/safekeeper/Cargo.toml b/safekeeper/Cargo.toml
index 086407603f..3ebb7097f2 100644
--- a/safekeeper/Cargo.toml
+++ b/safekeeper/Cargo.toml
@@ -9,6 +9,7 @@ default = []
 # Enables test-only APIs, incuding failpoints. In particular, enables the `fail_point!` macro,
 # which adds some runtime cost to run tests on outage conditions
 testing = ["fail/failpoints"]
+benchmarking = []
 
 [dependencies]
 async-stream.workspace = true
@@ -77,3 +78,4 @@ tracing-subscriber = { workspace = true, features = ["json"] }
 [[bench]]
 name = "receive_wal"
 harness = false
+required-features = ["benchmarking"]
diff --git a/safekeeper/benches/receive_wal.rs b/safekeeper/benches/receive_wal.rs
index 313d945b94..996c4d9b8c 100644
--- a/safekeeper/benches/receive_wal.rs
+++ b/safekeeper/benches/receive_wal.rs
@@ -1,11 +1,7 @@
 //! WAL ingestion benchmarks.
 
-#[path = "benchutils.rs"]
-mod benchutils;
-
 use std::io::Write as _;
 
-use benchutils::Env;
 use bytes::BytesMut;
 use camino_tempfile::tempfile;
 use criterion::{criterion_group, criterion_main, BatchSize, Bencher, Criterion};
@@ -16,6 +12,7 @@ use safekeeper::receive_wal::{self, WalAcceptor};
 use safekeeper::safekeeper::{
     AcceptorProposerMessage, AppendRequest, AppendRequestHeader, ProposerAcceptorMessage,
 };
+use safekeeper::test_utils::Env;
 use tokio::io::AsyncWriteExt as _;
 use utils::id::{NodeId, TenantTimelineId};
 use utils::lsn::Lsn;
@@ -76,12 +73,15 @@ fn bench_process_msg(c: &mut Criterion) {
         assert!(size >= prefixlen);
         let message = vec![0; size - prefixlen];
 
-        let walgen = &mut WalGenerator::new(LogicalMessageGenerator::new(prefix, &message));
+        let walgen = &mut WalGenerator::new(LogicalMessageGenerator::new(prefix, &message), Lsn(0));
 
         // Set up the Safekeeper.
         let env = Env::new(fsync)?;
-        let mut safekeeper =
-            runtime.block_on(env.make_safekeeper(NodeId(1), TenantTimelineId::generate()))?;
+        let mut safekeeper = runtime.block_on(env.make_safekeeper(
+            NodeId(1),
+            TenantTimelineId::generate(),
+            Lsn(0),
+        ))?;
 
         b.iter_batched_ref(
             // Pre-construct WAL records and requests. Criterion will batch them.
@@ -134,7 +134,8 @@ fn bench_wal_acceptor(c: &mut Criterion) {
         let runtime = tokio::runtime::Runtime::new()?; // needs multithreaded
 
         let env = Env::new(fsync)?;
-        let walgen = &mut WalGenerator::new(LogicalMessageGenerator::new(c"prefix", b"message"));
+        let walgen =
+            &mut WalGenerator::new(LogicalMessageGenerator::new(c"prefix", b"message"), Lsn(0));
 
         // Create buffered channels that can fit all requests, to avoid blocking on channels.
         let (msg_tx, msg_rx) = tokio::sync::mpsc::channel(n);
@@ -145,7 +146,7 @@ fn bench_wal_acceptor(c: &mut Criterion) {
             // TODO: WalAcceptor doesn't actually need a full timeline, only
             // Safekeeper::process_msg(). Consider decoupling them to simplify the setup.
             let tli = env
-                .make_timeline(NodeId(1), TenantTimelineId::generate())
+                .make_timeline(NodeId(1), TenantTimelineId::generate(), Lsn(0))
                 .await?
                 .wal_residence_guard()
                 .await?;
@@ -239,7 +240,7 @@ fn bench_wal_acceptor_throughput(c: &mut Criterion) {
         assert!(size >= prefixlen);
         let message = vec![0; size - prefixlen];
 
-        let walgen = &mut WalGenerator::new(LogicalMessageGenerator::new(prefix, &message));
+        let walgen = &mut WalGenerator::new(LogicalMessageGenerator::new(prefix, &message), Lsn(0));
 
         // Construct and spawn the WalAcceptor task.
         let env = Env::new(fsync)?;
@@ -249,7 +250,7 @@ fn bench_wal_acceptor_throughput(c: &mut Criterion) {
 
         runtime.block_on(async {
             let tli = env
-                .make_timeline(NodeId(1), TenantTimelineId::generate())
+                .make_timeline(NodeId(1), TenantTimelineId::generate(), Lsn(0))
                 .await?
                 .wal_residence_guard()
                 .await?;
diff --git a/safekeeper/src/lib.rs b/safekeeper/src/lib.rs
index abe6e00a66..7acf355e6a 100644
--- a/safekeeper/src/lib.rs
+++ b/safekeeper/src/lib.rs
@@ -43,6 +43,9 @@ pub mod wal_reader_stream;
 pub mod wal_service;
 pub mod wal_storage;
 
+#[cfg(any(test, feature = "benchmarking"))]
+pub mod test_utils;
+
 mod timelines_global_map;
 use std::sync::Arc;
 pub use timelines_global_map::GlobalTimelines;
diff --git a/safekeeper/benches/benchutils.rs b/safekeeper/src/test_utils.rs
similarity index 78%
rename from safekeeper/benches/benchutils.rs
rename to safekeeper/src/test_utils.rs
index 48d796221b..c40a8bae5a 100644
--- a/safekeeper/benches/benchutils.rs
+++ b/safekeeper/src/test_utils.rs
@@ -1,18 +1,18 @@
 use std::sync::Arc;
 
+use crate::rate_limit::RateLimiter;
+use crate::safekeeper::{ProposerAcceptorMessage, ProposerElected, SafeKeeper, TermHistory};
+use crate::state::{TimelinePersistentState, TimelineState};
+use crate::timeline::{get_timeline_dir, SharedState, StateSK, Timeline};
+use crate::timelines_set::TimelinesSet;
+use crate::wal_backup::remote_timeline_path;
+use crate::{control_file, wal_storage, SafeKeeperConf};
 use camino_tempfile::Utf8TempDir;
-use safekeeper::rate_limit::RateLimiter;
-use safekeeper::safekeeper::{ProposerAcceptorMessage, ProposerElected, SafeKeeper, TermHistory};
-use safekeeper::state::{TimelinePersistentState, TimelineState};
-use safekeeper::timeline::{get_timeline_dir, SharedState, StateSK, Timeline};
-use safekeeper::timelines_set::TimelinesSet;
-use safekeeper::wal_backup::remote_timeline_path;
-use safekeeper::{control_file, wal_storage, SafeKeeperConf};
 use tokio::fs::create_dir_all;
 use utils::id::{NodeId, TenantTimelineId};
 use utils::lsn::Lsn;
 
-/// A Safekeeper benchmarking environment. Uses a tempdir for storage, removed on drop.
+/// A Safekeeper testing or benchmarking environment. Uses a tempdir for storage, removed on drop.
 pub struct Env {
     /// Whether to enable fsync.
     pub fsync: bool,
@@ -21,7 +21,7 @@ pub struct Env {
 }
 
 impl Env {
-    /// Creates a new benchmarking environment in a temporary directory. fsync controls whether to
+    /// Creates a new test or benchmarking environment in a temporary directory. fsync controls whether to
     /// enable fsyncing.
     pub fn new(fsync: bool) -> anyhow::Result<Self> {
         let tempdir = camino_tempfile::tempdir()?;
@@ -47,6 +47,7 @@ impl Env {
         &self,
         node_id: NodeId,
         ttid: TenantTimelineId,
+        start_lsn: Lsn,
     ) -> anyhow::Result<SafeKeeper<control_file::FileStorage, wal_storage::PhysicalStorage>> {
         let conf = self.make_conf(node_id);
 
@@ -67,9 +68,9 @@ impl Env {
         safekeeper
             .process_msg(&ProposerAcceptorMessage::Elected(ProposerElected {
                 term: 1,
-                start_streaming_at: Lsn(0),
-                term_history: TermHistory(vec![(1, Lsn(0)).into()]),
-                timeline_start_lsn: Lsn(0),
+                start_streaming_at: start_lsn,
+                term_history: TermHistory(vec![(1, start_lsn).into()]),
+                timeline_start_lsn: start_lsn,
             }))
             .await?;
 
@@ -82,12 +83,13 @@ impl Env {
         &self,
         node_id: NodeId,
         ttid: TenantTimelineId,
+        start_lsn: Lsn,
     ) -> anyhow::Result<Arc<Timeline>> {
         let conf = Arc::new(self.make_conf(node_id));
         let timeline_dir = get_timeline_dir(&conf, &ttid);
         let remote_path = remote_timeline_path(&ttid)?;
 
-        let safekeeper = self.make_safekeeper(node_id, ttid).await?;
+        let safekeeper = self.make_safekeeper(node_id, ttid, start_lsn).await?;
         let shared_state = SharedState::new(StateSK::Loaded(safekeeper));
 
         let timeline = Timeline::new(
diff --git a/safekeeper/tests/walproposer_sim/walproposer_disk.rs b/safekeeper/tests/walproposer_sim/walproposer_disk.rs
index aefb3919a1..7dc7f48548 100644
--- a/safekeeper/tests/walproposer_sim/walproposer_disk.rs
+++ b/safekeeper/tests/walproposer_sim/walproposer_disk.rs
@@ -18,7 +18,7 @@ impl DiskWalProposer {
                 internal_available_lsn: Lsn(0),
                 prev_lsn: Lsn(0),
                 disk: BlockStorage::new(),
-                wal_generator: WalGenerator::new(LogicalMessageGenerator::new(c"", &[])),
+                wal_generator: WalGenerator::new(LogicalMessageGenerator::new(c"", &[]), Lsn(0)),
             }),
         })
     }

From 628451d68ec30c0fb591e14a64f696b031d7ca88 Mon Sep 17 00:00:00 2001
From: Vlad Lazar <vlad@neon.tech>
Date: Thu, 19 Dec 2024 14:04:46 +0000
Subject: [PATCH 17/63] safekeeper: short-circuit interpreted wal sender
 (#10202)

## Problem

Safekeeper may currently send a batch to the pageserver even if it
hasn't decoded a new record.
I think this is quite unlikely in the field, but worth adressing.

## Summary of changes

Don't send anything if we haven't decoded a full record. Once this
merges and releases, the `InterpretedWalRecords` struct can be updated
to remove the Option wrapper for `next_record_lsn`.
---
 safekeeper/src/send_interpreted_wal.rs | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/safekeeper/src/send_interpreted_wal.rs b/safekeeper/src/send_interpreted_wal.rs
index 2589030422..7d215176dd 100644
--- a/safekeeper/src/send_interpreted_wal.rs
+++ b/safekeeper/src/send_interpreted_wal.rs
@@ -94,9 +94,14 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> InterpretedWalSender<'_, IO> {
                         }
                     }
 
+                    let max_next_record_lsn = match max_next_record_lsn {
+                        Some(lsn) => lsn,
+                        None => { continue; }
+                    };
+
                     let batch = InterpretedWalRecords {
                         records,
-                        next_record_lsn: max_next_record_lsn
+                        next_record_lsn: Some(max_next_record_lsn),
                     };
 
                     tx.send(Batch {wal_end_lsn, available_wal_end_lsn, records: batch}).await.unwrap();

From 04517c6ff3db53b7145aff41a9de208648194a6d Mon Sep 17 00:00:00 2001
From: Konstantin Knizhnik <knizhnik@garret.ru>
Date: Thu, 19 Dec 2024 17:22:39 +0200
Subject: [PATCH 18/63] Do not reload config file on PS reconnect (#10204)

## Problem

See https://github.com/neondatabase/neon/issues/10184
and
https://neondb.slack.com/archives/C04DGM6SMTM/p1733997259898819

Reloading config file inside parallel worker cause it's termination

## Summary of changes

Remove call of `HandleMainLoopInterrupts()`
Update of page server URL is propagated by postmaster through shared
memory and we should not reload config for it.

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
---
 pgxn/neon/libpagestore.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c
index 6513ba4dd6..88d0a5292b 100644
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -827,7 +827,6 @@ pageserver_send(shardno_t shard_no, NeonRequest *request)
 	{
 		while (!pageserver_connect(shard_no, shard->n_reconnect_attempts < max_reconnect_attempts ? LOG : ERROR))
 		{
-			HandleMainLoopInterrupts();
 			shard->n_reconnect_attempts += 1;
 		}
 		shard->n_reconnect_attempts = 0;

From b89e02f3e8efccfb900685f2d6c1fe18d13cb956 Mon Sep 17 00:00:00 2001
From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com>
Date: Thu, 19 Dec 2024 13:04:53 -0500
Subject: [PATCH 19/63] fix(pageserver): consider partial compaction layer map
 in layer check (#10044)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Problem

In https://github.com/neondatabase/neon/pull/9897 we temporarily
disabled the layer valid check because the current one only considers
the end result of all compaction algorithms, but partial gc-compaction
would temporarily produce an "invalid" layer map.

part of https://github.com/neondatabase/neon/issues/9114

## Summary of changes

Allow LSN splits to overlap in the slow path check. Currently, the valid
check is only used in storage scrubber (background job) and during
gc-compaction (without taking layer lock). Therefore, it's fine for such
checks to be a little bit inefficient but more accurate.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
Co-authored-by: Arpad Müller <arpad-m@users.noreply.github.com>
---
 pageserver/src/tenant/checks.rs              | 47 +++++++++++++------
 pageserver/src/tenant/timeline/compaction.rs | 48 ++++++++++++++++----
 2 files changed, 71 insertions(+), 24 deletions(-)

diff --git a/pageserver/src/tenant/checks.rs b/pageserver/src/tenant/checks.rs
index 1e8fa8d1d6..f98356242e 100644
--- a/pageserver/src/tenant/checks.rs
+++ b/pageserver/src/tenant/checks.rs
@@ -1,12 +1,15 @@
 use std::collections::BTreeSet;
 
 use itertools::Itertools;
+use pageserver_compaction::helpers::overlaps_with;
 
 use super::storage_layer::LayerName;
 
 /// Checks whether a layer map is valid (i.e., is a valid result of the current compaction algorithm if nothing goes wrong).
 ///
-/// The function checks if we can split the LSN range of a delta layer only at the LSNs of the delta layers. For example,
+/// The function implements a fast path check and a slow path check.
+///
+/// The fast path checks if we can split the LSN range of a delta layer only at the LSNs of the delta layers. For example,
 ///
 /// ```plain
 /// |       |                 |       |
@@ -25,31 +28,47 @@ use super::storage_layer::LayerName;
 /// |       |    |   4   |    |       |
 ///
 /// If layer 2 and 4 contain the same single key, this is also a valid layer map.
+///
+/// However, if a partial compaction is still going on, it is possible that we get a layer map not satisfying the above condition.
+/// Therefore, we fallback to simply check if any of the two delta layers overlap. (See "A slow path...")
 pub fn check_valid_layermap(metadata: &[LayerName]) -> Option<String> {
     let mut lsn_split_point = BTreeSet::new(); // TODO: use a better data structure (range tree / range set?)
     let mut all_delta_layers = Vec::new();
     for name in metadata {
         if let LayerName::Delta(layer) = name {
-            if layer.key_range.start.next() != layer.key_range.end {
-                all_delta_layers.push(layer.clone());
-            }
+            all_delta_layers.push(layer.clone());
         }
     }
     for layer in &all_delta_layers {
-        let lsn_range = &layer.lsn_range;
-        lsn_split_point.insert(lsn_range.start);
-        lsn_split_point.insert(lsn_range.end);
+        if layer.key_range.start.next() != layer.key_range.end {
+            let lsn_range = &layer.lsn_range;
+            lsn_split_point.insert(lsn_range.start);
+            lsn_split_point.insert(lsn_range.end);
+        }
     }
-    for layer in &all_delta_layers {
+    for (idx, layer) in all_delta_layers.iter().enumerate() {
+        if layer.key_range.start.next() == layer.key_range.end {
+            continue;
+        }
         let lsn_range = layer.lsn_range.clone();
         let intersects = lsn_split_point.range(lsn_range).collect_vec();
         if intersects.len() > 1 {
-            let err = format!(
-                "layer violates the layer map LSN split assumption: layer {} intersects with LSN [{}]",
-                layer,
-                intersects.into_iter().map(|lsn| lsn.to_string()).join(", ")
-            );
-            return Some(err);
+            // A slow path to check if the layer intersects with any other delta layer.
+            for (other_idx, other_layer) in all_delta_layers.iter().enumerate() {
+                if other_idx == idx {
+                    // do not check self intersects with self
+                    continue;
+                }
+                if overlaps_with(&layer.lsn_range, &other_layer.lsn_range)
+                    && overlaps_with(&layer.key_range, &other_layer.key_range)
+                {
+                    let err = format!(
+                            "layer violates the layer map LSN split assumption: layer {} intersects with layer {}",
+                            layer, other_layer
+                        );
+                    return Some(err);
+                }
+            }
         }
     }
     None
diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs
index a4e8f39522..01f2a5b170 100644
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -29,6 +29,7 @@ use utils::id::TimelineId;
 use crate::context::{AccessStatsBehavior, RequestContext, RequestContextBuilder};
 use crate::page_cache;
 use crate::statvfs::Statvfs;
+use crate::tenant::checks::check_valid_layermap;
 use crate::tenant::remote_timeline_client::WaitCompletionError;
 use crate::tenant::storage_layer::batch_split_writer::{
     BatchWriterResult, SplitDeltaLayerWriter, SplitImageLayerWriter,
@@ -2156,15 +2157,14 @@ impl Timeline {
 
         // Step 1: construct a k-merge iterator over all layers.
         // Also, verify if the layer map can be split by drawing a horizontal line at every LSN start/end split point.
-        // disable the check for now because we need to adjust the check for partial compactions, will enable later.
-        // let layer_names = job_desc
-        //     .selected_layers
-        //     .iter()
-        //     .map(|layer| layer.layer_desc().layer_name())
-        //     .collect_vec();
-        // if let Some(err) = check_valid_layermap(&layer_names) {
-        //     warn!("gc-compaction layer map check failed because {}, this is normal if partial compaction is not finished yet", err);
-        // }
+        let layer_names = job_desc
+            .selected_layers
+            .iter()
+            .map(|layer| layer.layer_desc().layer_name())
+            .collect_vec();
+        if let Some(err) = check_valid_layermap(&layer_names) {
+            bail!("gc-compaction layer map check failed because {}, cannot proceed with compaction due to potential data loss", err);
+        }
         // The maximum LSN we are processing in this compaction loop
         let end_lsn = job_desc
             .selected_layers
@@ -2546,8 +2546,36 @@ impl Timeline {
         );
 
         // Step 3: Place back to the layer map.
+
+        // First, do a sanity check to ensure the newly-created layer map does not contain overlaps.
+        let all_layers = {
+            let guard = self.layers.read().await;
+            let layer_map = guard.layer_map()?;
+            layer_map.iter_historic_layers().collect_vec()
+        };
+
+        let mut final_layers = all_layers
+            .iter()
+            .map(|layer| layer.layer_name())
+            .collect::<HashSet<_>>();
+        for layer in &layer_selection {
+            final_layers.remove(&layer.layer_desc().layer_name());
+        }
+        for layer in &compact_to {
+            final_layers.insert(layer.layer_desc().layer_name());
+        }
+        let final_layers = final_layers.into_iter().collect_vec();
+
+        // TODO: move this check before we call `finish` on image layer writers. However, this will require us to get the layer name before we finish
+        // the writer, so potentially, we will need a function like `ImageLayerBatchWriter::get_all_pending_layer_keys` to get all the keys that are
+        // in the writer before finalizing the persistent layers. Now we would leave some dangling layers on the disk if the check fails.
+        if let Some(err) = check_valid_layermap(&final_layers) {
+            bail!("gc-compaction layer map check failed after compaction because {}, compaction result not applied to the layer map due to potential data loss", err);
+        }
+
+        // Between the sanity check and this compaction update, there could be new layers being flushed, but it should be fine because we only
+        // operate on L1 layers.
         {
-            // TODO: sanity check if the layer map is valid (i.e., should not have overlaps)
             let mut guard = self.layers.write().await;
             guard
                 .open_mut()?

From 197a89ab3dee0ff90b060c92032a1a8e0b3213a8 Mon Sep 17 00:00:00 2001
From: Konstantin Knizhnik <knizhnik@garret.ru>
Date: Thu, 19 Dec 2024 20:32:32 +0200
Subject: [PATCH 20/63] =?UTF-8?q?Increase=20default=20stotrage=20controlle?=
 =?UTF-8?q?r=20heartbeat=20interval=20from=20100msec=20=E2=80=A6=20(#10206?=
 =?UTF-8?q?)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Problem

Currently default value of storage controller heartbeat interval is
100msec. It means that 10 times per second it establish connection to
PS. And it seems to be quite expensive.
At MacOS right now storage_controller consumes 70% CPU and trusts - 30%.
So together they completely utilize one core.
A lot of us has Macs. Let's save environment a little bit and do not
waste electricity and contribute to global warming.

By the way, on prod we have interval  10seconds

## Summary of changes

Increase heartbeat interval from 100msec to 1 second.

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
---
 control_plane/src/local_env.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/control_plane/src/local_env.rs b/control_plane/src/local_env.rs
index 489f9c8509..5b82acb3a5 100644
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -180,7 +180,7 @@ impl NeonStorageControllerConf {
     const DEFAULT_MAX_WARMING_UP_INTERVAL: std::time::Duration = std::time::Duration::from_secs(30);
 
     // Very tight heartbeat interval to speed up tests
-    const DEFAULT_HEARTBEAT_INTERVAL: std::time::Duration = std::time::Duration::from_millis(100);
+    const DEFAULT_HEARTBEAT_INTERVAL: std::time::Duration = std::time::Duration::from_millis(1000);
 }
 
 impl Default for NeonStorageControllerConf {

From 9c53b41245e3aecba30c2e05df4eeabe45fd39ac Mon Sep 17 00:00:00 2001
From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com>
Date: Thu, 19 Dec 2024 13:40:20 -0500
Subject: [PATCH 21/63] fix(pageserver): update remote latest_gc_cutoff after
 gc-compaction (#10209)

## Problem

close https://github.com/neondatabase/neon/issues/10208
part of #9114

## Summary of changes

* Ensure remote `latest_gc_cutoff` is up-to-date before removing any
files for gc-compaction.

Signed-off-by: Alex Chi Z <chi@neon.tech>
---
 pageserver/src/tenant/timeline/compaction.rs | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs
index 01f2a5b170..94c65631b2 100644
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -2581,6 +2581,13 @@ impl Timeline {
                 .open_mut()?
                 .finish_gc_compaction(&layer_selection, &compact_to, &self.metrics)
         };
+
+        // Schedule an index-only upload to update the `latest_gc_cutoff` in the index_part.json.
+        // Otherwise, after restart, the index_part only contains the old `latest_gc_cutoff` and
+        // find_gc_cutoffs will try accessing things below the cutoff. TODO: ideally, this should
+        // be batched into `schedule_compaction_update`.
+        let disk_consistent_lsn = self.disk_consistent_lsn.load();
+        self.schedule_uploads(disk_consistent_lsn, None)?;
         self.remote_client
             .schedule_compaction_update(&layer_selection, &compact_to)?;
 

From f94248a5941dc0ba38ea8fc94ebc49016caef162 Mon Sep 17 00:00:00 2001
From: Conrad Ludgate <conrad@neon.tech>
Date: Thu, 2 Jan 2025 09:35:28 +0000
Subject: [PATCH 22/63] chore(libs/proxy): refactor tokio-postgres connection
 control flow (#10247)

In #10207 it was clear there was some confusion with the current
connection logic. To analyse the flow to make sure there was no poll
stalling, I ended up with the following refactor.

Notable changes:
1. Now all functions called `poll_xyz` and that have a `cx: &mut
Context` argument must return a `Poll<_>` type, and can only return
`Pending` iff an internal poll call also returned `Pending`
2. State management is handled entirely by `poll_messages`. There are
now only 2 states which makes it much easier to keep track of.

Each commit should be self-reviewable and should be simple to verify
that it keeps the same behaviour
---
 libs/proxy/tokio-postgres2/src/connection.rs | 147 ++++++++++---------
 1 file changed, 81 insertions(+), 66 deletions(-)

diff --git a/libs/proxy/tokio-postgres2/src/connection.rs b/libs/proxy/tokio-postgres2/src/connection.rs
index 0aa5c77e22..f478717e0d 100644
--- a/libs/proxy/tokio-postgres2/src/connection.rs
+++ b/libs/proxy/tokio-postgres2/src/connection.rs
@@ -33,10 +33,14 @@ pub struct Response {
 #[derive(PartialEq, Debug)]
 enum State {
     Active,
-    Terminating,
     Closing,
 }
 
+enum WriteReady {
+    Terminating,
+    WaitingOnRead,
+}
+
 /// A connection to a PostgreSQL database.
 ///
 /// This is one half of what is returned when a new connection is established. It performs the actual IO with the
@@ -51,7 +55,6 @@ pub struct Connection<S, T> {
     /// HACK: we need this in the Neon Proxy to forward params.
     pub parameters: HashMap<String, String>,
     receiver: mpsc::UnboundedReceiver<Request>,
-    pending_request: Option<RequestMessages>,
     pending_responses: VecDeque<BackendMessage>,
     responses: VecDeque<Response>,
     state: State,
@@ -72,7 +75,6 @@ where
             stream,
             parameters,
             receiver,
-            pending_request: None,
             pending_responses,
             responses: VecDeque::new(),
             state: State::Active,
@@ -93,26 +95,23 @@ where
             .map(|o| o.map(|r| r.map_err(Error::io)))
     }
 
-    fn poll_read(&mut self, cx: &mut Context<'_>) -> Result<Option<AsyncMessage>, Error> {
-        if self.state != State::Active {
-            trace!("poll_read: done");
-            return Ok(None);
-        }
-
+    /// Read and process messages from the connection to postgres.
+    /// client <- postgres
+    fn poll_read(&mut self, cx: &mut Context<'_>) -> Poll<Result<AsyncMessage, Error>> {
         loop {
             let message = match self.poll_response(cx)? {
                 Poll::Ready(Some(message)) => message,
-                Poll::Ready(None) => return Err(Error::closed()),
+                Poll::Ready(None) => return Poll::Ready(Err(Error::closed())),
                 Poll::Pending => {
                     trace!("poll_read: waiting on response");
-                    return Ok(None);
+                    return Poll::Pending;
                 }
             };
 
             let (mut messages, request_complete) = match message {
                 BackendMessage::Async(Message::NoticeResponse(body)) => {
                     let error = DbError::parse(&mut body.fields()).map_err(Error::parse)?;
-                    return Ok(Some(AsyncMessage::Notice(error)));
+                    return Poll::Ready(Ok(AsyncMessage::Notice(error)));
                 }
                 BackendMessage::Async(Message::NotificationResponse(body)) => {
                     let notification = Notification {
@@ -120,7 +119,7 @@ where
                         channel: body.channel().map_err(Error::parse)?.to_string(),
                         payload: body.message().map_err(Error::parse)?.to_string(),
                     };
-                    return Ok(Some(AsyncMessage::Notification(notification)));
+                    return Poll::Ready(Ok(AsyncMessage::Notification(notification)));
                 }
                 BackendMessage::Async(Message::ParameterStatus(body)) => {
                     self.parameters.insert(
@@ -139,8 +138,10 @@ where
             let mut response = match self.responses.pop_front() {
                 Some(response) => response,
                 None => match messages.next().map_err(Error::parse)? {
-                    Some(Message::ErrorResponse(error)) => return Err(Error::db(error)),
-                    _ => return Err(Error::unexpected_message()),
+                    Some(Message::ErrorResponse(error)) => {
+                        return Poll::Ready(Err(Error::db(error)))
+                    }
+                    _ => return Poll::Ready(Err(Error::unexpected_message())),
                 },
             };
 
@@ -164,18 +165,14 @@ where
                         request_complete,
                     });
                     trace!("poll_read: waiting on sender");
-                    return Ok(None);
+                    return Poll::Pending;
                 }
             }
         }
     }
 
+    /// Fetch the next client request and enqueue the response sender.
     fn poll_request(&mut self, cx: &mut Context<'_>) -> Poll<Option<RequestMessages>> {
-        if let Some(messages) = self.pending_request.take() {
-            trace!("retrying pending request");
-            return Poll::Ready(Some(messages));
-        }
-
         if self.receiver.is_closed() {
             return Poll::Ready(None);
         }
@@ -193,74 +190,80 @@ where
         }
     }
 
-    fn poll_write(&mut self, cx: &mut Context<'_>) -> Result<bool, Error> {
+    /// Process client requests and write them to the postgres connection, flushing if necessary.
+    /// client -> postgres
+    fn poll_write(&mut self, cx: &mut Context<'_>) -> Poll<Result<WriteReady, Error>> {
         loop {
-            if self.state == State::Closing {
-                trace!("poll_write: done");
-                return Ok(false);
-            }
-
             if Pin::new(&mut self.stream)
                 .poll_ready(cx)
                 .map_err(Error::io)?
                 .is_pending()
             {
                 trace!("poll_write: waiting on socket");
-                return Ok(false);
+
+                // poll_ready is self-flushing.
+                return Poll::Pending;
             }
 
-            let request = match self.poll_request(cx) {
-                Poll::Ready(Some(request)) => request,
-                Poll::Ready(None) if self.responses.is_empty() && self.state == State::Active => {
+            match self.poll_request(cx) {
+                // send the message to postgres
+                Poll::Ready(Some(RequestMessages::Single(request))) => {
+                    Pin::new(&mut self.stream)
+                        .start_send(request)
+                        .map_err(Error::io)?;
+                }
+                // No more messages from the client, and no more responses to wait for.
+                // Send a terminate message to postgres
+                Poll::Ready(None) if self.responses.is_empty() => {
                     trace!("poll_write: at eof, terminating");
-                    self.state = State::Terminating;
                     let mut request = BytesMut::new();
                     frontend::terminate(&mut request);
-                    RequestMessages::Single(FrontendMessage::Raw(request.freeze()))
+                    let request = FrontendMessage::Raw(request.freeze());
+
+                    Pin::new(&mut self.stream)
+                        .start_send(request)
+                        .map_err(Error::io)?;
+
+                    trace!("poll_write: sent eof, closing");
+                    trace!("poll_write: done");
+                    return Poll::Ready(Ok(WriteReady::Terminating));
                 }
+                // No more messages from the client, but there are still some responses to wait for.
                 Poll::Ready(None) => {
                     trace!(
                         "poll_write: at eof, pending responses {}",
                         self.responses.len()
                     );
-                    return Ok(true);
+                    ready!(self.poll_flush(cx))?;
+                    return Poll::Ready(Ok(WriteReady::WaitingOnRead));
                 }
+                // Still waiting for a message from the client.
                 Poll::Pending => {
                     trace!("poll_write: waiting on request");
-                    return Ok(true);
-                }
-            };
-
-            match request {
-                RequestMessages::Single(request) => {
-                    Pin::new(&mut self.stream)
-                        .start_send(request)
-                        .map_err(Error::io)?;
-                    if self.state == State::Terminating {
-                        trace!("poll_write: sent eof, closing");
-                        self.state = State::Closing;
-                    }
+                    ready!(self.poll_flush(cx))?;
+                    return Poll::Pending;
                 }
             }
         }
     }
 
-    fn poll_flush(&mut self, cx: &mut Context<'_>) -> Result<(), Error> {
+    fn poll_flush(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Error>> {
         match Pin::new(&mut self.stream)
             .poll_flush(cx)
             .map_err(Error::io)?
         {
-            Poll::Ready(()) => trace!("poll_flush: flushed"),
-            Poll::Pending => trace!("poll_flush: waiting on socket"),
+            Poll::Ready(()) => {
+                trace!("poll_flush: flushed");
+                Poll::Ready(Ok(()))
+            }
+            Poll::Pending => {
+                trace!("poll_flush: waiting on socket");
+                Poll::Pending
+            }
         }
-        Ok(())
     }
 
     fn poll_shutdown(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Error>> {
-        if self.state != State::Closing {
-            return Poll::Pending;
-        }
-
         match Pin::new(&mut self.stream)
             .poll_close(cx)
             .map_err(Error::io)?
@@ -289,18 +292,30 @@ where
         &mut self,
         cx: &mut Context<'_>,
     ) -> Poll<Option<Result<AsyncMessage, Error>>> {
-        let message = self.poll_read(cx)?;
-        let want_flush = self.poll_write(cx)?;
-        if want_flush {
-            self.poll_flush(cx)?;
+        if self.state != State::Closing {
+            // if the state is still active, try read from and write to postgres.
+            let message = self.poll_read(cx)?;
+            let closing = self.poll_write(cx)?;
+            if let Poll::Ready(WriteReady::Terminating) = closing {
+                self.state = State::Closing;
+            }
+
+            if let Poll::Ready(message) = message {
+                return Poll::Ready(Some(Ok(message)));
+            }
+
+            // poll_read returned Pending.
+            // poll_write returned Pending or Ready(WriteReady::WaitingOnRead).
+            // if poll_write returned Ready(WriteReady::WaitingOnRead), then we are waiting to read more data from postgres.
+            if self.state != State::Closing {
+                return Poll::Pending;
+            }
         }
-        match message {
-            Some(message) => Poll::Ready(Some(Ok(message))),
-            None => match self.poll_shutdown(cx) {
-                Poll::Ready(Ok(())) => Poll::Ready(None),
-                Poll::Ready(Err(e)) => Poll::Ready(Some(Err(e))),
-                Poll::Pending => Poll::Pending,
-            },
+
+        match self.poll_shutdown(cx) {
+            Poll::Ready(Ok(())) => Poll::Ready(None),
+            Poll::Ready(Err(e)) => Poll::Ready(Some(Err(e))),
+            Poll::Pending => Poll::Pending,
         }
     }
 }

From 38c7a2abfc40bbbe2c952ed634aaab32fb100fcc Mon Sep 17 00:00:00 2001
From: Conrad Ludgate <conrad@neon.tech>
Date: Thu, 2 Jan 2025 09:36:13 +0000
Subject: [PATCH 23/63] chore(proxy): pre-load native tls certificates and
 propagate compute client config (#10182)

Now that we construct the TLS client config for cancellation as well as
connect, it feels appropriate to construct the same config once and
re-use it elsewhere. It might also help should #7500 require any extra
setup, so we can easily add it to all the appropriate call sites.
---
 proxy/src/auth/flow.rs                        |   2 +-
 proxy/src/bin/local_proxy.rs                  |  16 +-
 proxy/src/bin/pg_sni_router.rs                |   2 +-
 proxy/src/bin/proxy.rs                        |  17 +-
 proxy/src/cancellation.rs                     |  77 +++--
 proxy/src/compute.rs                          |  41 +--
 proxy/src/config.rs                           | 288 +-----------------
 proxy/src/console_redirect_proxy.rs           |   4 +-
 proxy/src/control_plane/mod.rs                |   6 +-
 proxy/src/lib.rs                              |   2 +-
 proxy/src/proxy/connect_compute.rs            |  28 +-
 proxy/src/proxy/handshake.rs                  |   3 +-
 proxy/src/proxy/mod.rs                        |   4 +-
 proxy/src/proxy/passthrough.rs                |  13 +-
 proxy/src/proxy/tests/mod.rs                  |  91 +++---
 proxy/src/redis/notifications.rs              |   3 +
 proxy/src/scram/exchange.rs                   |  13 +-
 proxy/src/scram/mod.rs                        |   7 +-
 proxy/src/serverless/backend.rs               |  14 +-
 proxy/src/serverless/websocket.rs             |   2 +-
 proxy/src/stream.rs                           |   2 +-
 proxy/src/tls/client_config.rs                |  42 +++
 proxy/src/tls/mod.rs                          |  72 +++++
 .../mod.rs => tls/postgres_rustls.rs}         |  10 +-
 proxy/src/tls/server_config.rs                | 218 +++++++++++++
 25 files changed, 509 insertions(+), 468 deletions(-)
 create mode 100644 proxy/src/tls/client_config.rs
 create mode 100644 proxy/src/tls/mod.rs
 rename proxy/src/{postgres_rustls/mod.rs => tls/postgres_rustls.rs} (96%)
 create mode 100644 proxy/src/tls/server_config.rs

diff --git a/proxy/src/auth/flow.rs b/proxy/src/auth/flow.rs
index 60d1962d7f..0992c6d875 100644
--- a/proxy/src/auth/flow.rs
+++ b/proxy/src/auth/flow.rs
@@ -10,7 +10,6 @@ use tracing::info;
 
 use super::backend::ComputeCredentialKeys;
 use super::{AuthError, PasswordHackPayload};
-use crate::config::TlsServerEndPoint;
 use crate::context::RequestContext;
 use crate::control_plane::AuthSecret;
 use crate::intern::EndpointIdInt;
@@ -18,6 +17,7 @@ use crate::sasl;
 use crate::scram::threadpool::ThreadPool;
 use crate::scram::{self};
 use crate::stream::{PqStream, Stream};
+use crate::tls::TlsServerEndPoint;
 
 /// Every authentication selector is supposed to implement this trait.
 pub(crate) trait AuthMethod {
diff --git a/proxy/src/bin/local_proxy.rs b/proxy/src/bin/local_proxy.rs
index 56bbd94850..644f670f88 100644
--- a/proxy/src/bin/local_proxy.rs
+++ b/proxy/src/bin/local_proxy.rs
@@ -13,7 +13,9 @@ use proxy::auth::backend::jwt::JwkCache;
 use proxy::auth::backend::local::{LocalBackend, JWKS_ROLE_MAP};
 use proxy::auth::{self};
 use proxy::cancellation::CancellationHandlerMain;
-use proxy::config::{self, AuthenticationConfig, HttpConfig, ProxyConfig, RetryConfig};
+use proxy::config::{
+    self, AuthenticationConfig, ComputeConfig, HttpConfig, ProxyConfig, RetryConfig,
+};
 use proxy::control_plane::locks::ApiLocks;
 use proxy::control_plane::messages::{EndpointJwksResponse, JwksSettings};
 use proxy::http::health_server::AppMetrics;
@@ -25,6 +27,7 @@ use proxy::rate_limiter::{
 use proxy::scram::threadpool::ThreadPool;
 use proxy::serverless::cancel_set::CancelSet;
 use proxy::serverless::{self, GlobalConnPoolOptions};
+use proxy::tls::client_config::compute_client_config_with_root_certs;
 use proxy::types::RoleName;
 use proxy::url::ApiUrl;
 
@@ -209,6 +212,7 @@ async fn main() -> anyhow::Result<()> {
         http_listener,
         shutdown.clone(),
         Arc::new(CancellationHandlerMain::new(
+            &config.connect_to_compute,
             Arc::new(DashMap::new()),
             None,
             proxy::metrics::CancellationSource::Local,
@@ -268,6 +272,12 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig
         max_response_size_bytes: args.sql_over_http.sql_over_http_max_response_size_bytes,
     };
 
+    let compute_config = ComputeConfig {
+        retry: RetryConfig::parse(RetryConfig::CONNECT_TO_COMPUTE_DEFAULT_VALUES)?,
+        tls: Arc::new(compute_client_config_with_root_certs()?),
+        timeout: Duration::from_secs(2),
+    };
+
     Ok(Box::leak(Box::new(ProxyConfig {
         tls_config: None,
         metric_collection: None,
@@ -289,9 +299,7 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig
         region: "local".into(),
         wake_compute_retry_config: RetryConfig::parse(RetryConfig::WAKE_COMPUTE_DEFAULT_VALUES)?,
         connect_compute_locks,
-        connect_to_compute_retry_config: RetryConfig::parse(
-            RetryConfig::CONNECT_TO_COMPUTE_DEFAULT_VALUES,
-        )?,
+        connect_to_compute: compute_config,
     })))
 }
 
diff --git a/proxy/src/bin/pg_sni_router.rs b/proxy/src/bin/pg_sni_router.rs
index 9538384b9e..97d870a83a 100644
--- a/proxy/src/bin/pg_sni_router.rs
+++ b/proxy/src/bin/pg_sni_router.rs
@@ -10,12 +10,12 @@ use clap::Arg;
 use futures::future::Either;
 use futures::TryFutureExt;
 use itertools::Itertools;
-use proxy::config::TlsServerEndPoint;
 use proxy::context::RequestContext;
 use proxy::metrics::{Metrics, ThreadPoolMetrics};
 use proxy::protocol2::ConnectionInfo;
 use proxy::proxy::{copy_bidirectional_client_compute, run_until_cancelled, ErrorSource};
 use proxy::stream::{PqStream, Stream};
+use proxy::tls::TlsServerEndPoint;
 use rustls::crypto::ring;
 use rustls::pki_types::PrivateKeyDer;
 use tokio::io::{AsyncRead, AsyncWrite};
diff --git a/proxy/src/bin/proxy.rs b/proxy/src/bin/proxy.rs
index 3dcf9ca060..3b122d771c 100644
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -1,6 +1,7 @@
 use std::net::SocketAddr;
 use std::pin::pin;
 use std::sync::Arc;
+use std::time::Duration;
 
 use anyhow::bail;
 use futures::future::Either;
@@ -8,7 +9,7 @@ use proxy::auth::backend::jwt::JwkCache;
 use proxy::auth::backend::{AuthRateLimiter, ConsoleRedirectBackend, MaybeOwned};
 use proxy::cancellation::{CancelMap, CancellationHandler};
 use proxy::config::{
-    self, remote_storage_from_toml, AuthenticationConfig, CacheOptions, HttpConfig,
+    self, remote_storage_from_toml, AuthenticationConfig, CacheOptions, ComputeConfig, HttpConfig,
     ProjectInfoCacheOptions, ProxyConfig, ProxyProtocolV2,
 };
 use proxy::context::parquet::ParquetUploadArgs;
@@ -23,6 +24,7 @@ use proxy::redis::{elasticache, notifications};
 use proxy::scram::threadpool::ThreadPool;
 use proxy::serverless::cancel_set::CancelSet;
 use proxy::serverless::GlobalConnPoolOptions;
+use proxy::tls::client_config::compute_client_config_with_root_certs;
 use proxy::{auth, control_plane, http, serverless, usage_metrics};
 use remote_storage::RemoteStorageConfig;
 use tokio::net::TcpListener;
@@ -397,6 +399,7 @@ async fn main() -> anyhow::Result<()> {
     let cancellation_handler = Arc::new(CancellationHandler::<
         Option<Arc<Mutex<RedisPublisherClient>>>,
     >::new(
+        &config.connect_to_compute,
         cancel_map.clone(),
         redis_publisher,
         proxy::metrics::CancellationSource::FromClient,
@@ -492,6 +495,7 @@ async fn main() -> anyhow::Result<()> {
                     let cache = api.caches.project_info.clone();
                     if let Some(client) = client1 {
                         maintenance_tasks.spawn(notifications::task_main(
+                            config,
                             client,
                             cache.clone(),
                             cancel_map.clone(),
@@ -500,6 +504,7 @@ async fn main() -> anyhow::Result<()> {
                     }
                     if let Some(client) = client2 {
                         maintenance_tasks.spawn(notifications::task_main(
+                            config,
                             client,
                             cache.clone(),
                             cancel_map.clone(),
@@ -632,6 +637,12 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
         console_redirect_confirmation_timeout: args.webauth_confirmation_timeout,
     };
 
+    let compute_config = ComputeConfig {
+        retry: config::RetryConfig::parse(&args.connect_to_compute_retry)?,
+        tls: Arc::new(compute_client_config_with_root_certs()?),
+        timeout: Duration::from_secs(2),
+    };
+
     let config = ProxyConfig {
         tls_config,
         metric_collection,
@@ -642,9 +653,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
         region: args.region.clone(),
         wake_compute_retry_config: config::RetryConfig::parse(&args.wake_compute_retry)?,
         connect_compute_locks,
-        connect_to_compute_retry_config: config::RetryConfig::parse(
-            &args.connect_to_compute_retry,
-        )?,
+        connect_to_compute: compute_config,
     };
 
     let config = Box::leak(Box::new(config));
diff --git a/proxy/src/cancellation.rs b/proxy/src/cancellation.rs
index ebaea173ae..df618cf242 100644
--- a/proxy/src/cancellation.rs
+++ b/proxy/src/cancellation.rs
@@ -3,11 +3,9 @@ use std::sync::Arc;
 
 use dashmap::DashMap;
 use ipnet::{IpNet, Ipv4Net, Ipv6Net};
-use once_cell::sync::OnceCell;
 use postgres_client::tls::MakeTlsConnect;
 use postgres_client::CancelToken;
 use pq_proto::CancelKeyData;
-use rustls::crypto::ring;
 use thiserror::Error;
 use tokio::net::TcpStream;
 use tokio::sync::Mutex;
@@ -15,15 +13,15 @@ use tracing::{debug, info};
 use uuid::Uuid;
 
 use crate::auth::{check_peer_addr_is_in_list, IpPattern};
-use crate::compute::load_certs;
+use crate::config::ComputeConfig;
 use crate::error::ReportableError;
 use crate::ext::LockExt;
 use crate::metrics::{CancellationRequest, CancellationSource, Metrics};
-use crate::postgres_rustls::MakeRustlsConnect;
 use crate::rate_limiter::LeakyBucketRateLimiter;
 use crate::redis::cancellation_publisher::{
     CancellationPublisher, CancellationPublisherMut, RedisPublisherClient,
 };
+use crate::tls::postgres_rustls::MakeRustlsConnect;
 
 pub type CancelMap = Arc<DashMap<CancelKeyData, Option<CancelClosure>>>;
 pub type CancellationHandlerMain = CancellationHandler<Option<Arc<Mutex<RedisPublisherClient>>>>;
@@ -35,6 +33,7 @@ type IpSubnetKey = IpNet;
 ///
 /// If `CancellationPublisher` is available, cancel request will be used to publish the cancellation key to other proxy instances.
 pub struct CancellationHandler<P> {
+    compute_config: &'static ComputeConfig,
     map: CancelMap,
     client: P,
     /// This field used for the monitoring purposes.
@@ -183,7 +182,7 @@ impl<P: CancellationPublisher> CancellationHandler<P> {
             "cancelling query per user's request using key {key}, hostname {}, address: {}",
             cancel_closure.hostname, cancel_closure.socket_addr
         );
-        cancel_closure.try_cancel_query().await
+        cancel_closure.try_cancel_query(self.compute_config).await
     }
 
     #[cfg(test)]
@@ -198,8 +197,13 @@ impl<P: CancellationPublisher> CancellationHandler<P> {
 }
 
 impl CancellationHandler<()> {
-    pub fn new(map: CancelMap, from: CancellationSource) -> Self {
+    pub fn new(
+        compute_config: &'static ComputeConfig,
+        map: CancelMap,
+        from: CancellationSource,
+    ) -> Self {
         Self {
+            compute_config,
             map,
             client: (),
             from,
@@ -214,8 +218,14 @@ impl CancellationHandler<()> {
 }
 
 impl<P: CancellationPublisherMut> CancellationHandler<Option<Arc<Mutex<P>>>> {
-    pub fn new(map: CancelMap, client: Option<Arc<Mutex<P>>>, from: CancellationSource) -> Self {
+    pub fn new(
+        compute_config: &'static ComputeConfig,
+        map: CancelMap,
+        client: Option<Arc<Mutex<P>>>,
+        from: CancellationSource,
+    ) -> Self {
         Self {
+            compute_config,
             map,
             client,
             from,
@@ -229,8 +239,6 @@ impl<P: CancellationPublisherMut> CancellationHandler<Option<Arc<Mutex<P>>>> {
     }
 }
 
-static TLS_ROOTS: OnceCell<Arc<rustls::RootCertStore>> = OnceCell::new();
-
 /// This should've been a [`std::future::Future`], but
 /// it's impossible to name a type of an unboxed future
 /// (we'd need something like `#![feature(type_alias_impl_trait)]`).
@@ -257,27 +265,14 @@ impl CancelClosure {
         }
     }
     /// Cancels the query running on user's compute node.
-    pub(crate) async fn try_cancel_query(self) -> Result<(), CancelError> {
+    pub(crate) async fn try_cancel_query(
+        self,
+        compute_config: &ComputeConfig,
+    ) -> Result<(), CancelError> {
         let socket = TcpStream::connect(self.socket_addr).await?;
 
-        let root_store = TLS_ROOTS
-            .get_or_try_init(load_certs)
-            .map_err(|_e| {
-                CancelError::IO(std::io::Error::new(
-                    std::io::ErrorKind::Other,
-                    "TLS root store initialization failed".to_string(),
-                ))
-            })?
-            .clone();
-
-        let client_config =
-            rustls::ClientConfig::builder_with_provider(Arc::new(ring::default_provider()))
-                .with_safe_default_protocol_versions()
-                .expect("ring should support the default protocol versions")
-                .with_root_certificates(root_store)
-                .with_no_client_auth();
-
-        let mut mk_tls = crate::postgres_rustls::MakeRustlsConnect::new(client_config);
+        let mut mk_tls =
+            crate::tls::postgres_rustls::MakeRustlsConnect::new(compute_config.tls.clone());
         let tls = <MakeRustlsConnect as MakeTlsConnect<tokio::net::TcpStream>>::make_tls_connect(
             &mut mk_tls,
             &self.hostname,
@@ -329,11 +324,30 @@ impl<P> Drop for Session<P> {
 #[cfg(test)]
 #[expect(clippy::unwrap_used)]
 mod tests {
+    use std::time::Duration;
+
     use super::*;
+    use crate::config::RetryConfig;
+    use crate::tls::client_config::compute_client_config_with_certs;
+
+    fn config() -> ComputeConfig {
+        let retry = RetryConfig {
+            base_delay: Duration::from_secs(1),
+            max_retries: 5,
+            backoff_factor: 2.0,
+        };
+
+        ComputeConfig {
+            retry,
+            tls: Arc::new(compute_client_config_with_certs(std::iter::empty())),
+            timeout: Duration::from_secs(2),
+        }
+    }
 
     #[tokio::test]
     async fn check_session_drop() -> anyhow::Result<()> {
         let cancellation_handler = Arc::new(CancellationHandler::<()>::new(
+            Box::leak(Box::new(config())),
             CancelMap::default(),
             CancellationSource::FromRedis,
         ));
@@ -349,8 +363,11 @@ mod tests {
 
     #[tokio::test]
     async fn cancel_session_noop_regression() {
-        let handler =
-            CancellationHandler::<()>::new(CancelMap::default(), CancellationSource::Local);
+        let handler = CancellationHandler::<()>::new(
+            Box::leak(Box::new(config())),
+            CancelMap::default(),
+            CancellationSource::Local,
+        );
         handler
             .cancel_session(
                 CancelKeyData {
diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs
index 8dc9b59e81..d60dfd0f80 100644
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -1,16 +1,13 @@
 use std::io;
 use std::net::SocketAddr;
-use std::sync::Arc;
 use std::time::Duration;
 
 use futures::{FutureExt, TryFutureExt};
 use itertools::Itertools;
-use once_cell::sync::OnceCell;
 use postgres_client::tls::MakeTlsConnect;
 use postgres_client::{CancelToken, RawConnection};
 use postgres_protocol::message::backend::NoticeResponseBody;
 use pq_proto::StartupMessageParams;
-use rustls::crypto::ring;
 use rustls::pki_types::InvalidDnsNameError;
 use thiserror::Error;
 use tokio::net::TcpStream;
@@ -18,14 +15,15 @@ use tracing::{debug, error, info, warn};
 
 use crate::auth::parse_endpoint_param;
 use crate::cancellation::CancelClosure;
+use crate::config::ComputeConfig;
 use crate::context::RequestContext;
 use crate::control_plane::client::ApiLockError;
 use crate::control_plane::errors::WakeComputeError;
 use crate::control_plane::messages::MetricsAuxInfo;
 use crate::error::{ReportableError, UserFacingError};
 use crate::metrics::{Metrics, NumDbConnectionsGuard};
-use crate::postgres_rustls::MakeRustlsConnect;
 use crate::proxy::neon_option;
+use crate::tls::postgres_rustls::MakeRustlsConnect;
 use crate::types::Host;
 
 pub const COULD_NOT_CONNECT: &str = "Couldn't connect to compute node";
@@ -40,9 +38,6 @@ pub(crate) enum ConnectionError {
     #[error("{COULD_NOT_CONNECT}: {0}")]
     CouldNotConnect(#[from] io::Error),
 
-    #[error("Couldn't load native TLS certificates: {0:?}")]
-    TlsCertificateError(Vec<rustls_native_certs::Error>),
-
     #[error("{COULD_NOT_CONNECT}: {0}")]
     TlsError(#[from] InvalidDnsNameError),
 
@@ -89,7 +84,6 @@ impl ReportableError for ConnectionError {
             }
             ConnectionError::Postgres(_) => crate::error::ErrorKind::Compute,
             ConnectionError::CouldNotConnect(_) => crate::error::ErrorKind::Compute,
-            ConnectionError::TlsCertificateError(_) => crate::error::ErrorKind::Service,
             ConnectionError::TlsError(_) => crate::error::ErrorKind::Compute,
             ConnectionError::WakeComputeError(e) => e.get_error_kind(),
             ConnectionError::TooManyConnectionAttempts(e) => e.get_error_kind(),
@@ -251,25 +245,13 @@ impl ConnCfg {
         &self,
         ctx: &RequestContext,
         aux: MetricsAuxInfo,
-        timeout: Duration,
+        config: &ComputeConfig,
     ) -> Result<PostgresConnection, ConnectionError> {
         let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);
-        let (socket_addr, stream, host) = self.connect_raw(timeout).await?;
+        let (socket_addr, stream, host) = self.connect_raw(config.timeout).await?;
         drop(pause);
 
-        let root_store = TLS_ROOTS
-            .get_or_try_init(load_certs)
-            .map_err(ConnectionError::TlsCertificateError)?
-            .clone();
-
-        let client_config =
-            rustls::ClientConfig::builder_with_provider(Arc::new(ring::default_provider()))
-                .with_safe_default_protocol_versions()
-                .expect("ring should support the default protocol versions")
-                .with_root_certificates(root_store)
-                .with_no_client_auth();
-
-        let mut mk_tls = crate::postgres_rustls::MakeRustlsConnect::new(client_config);
+        let mut mk_tls = crate::tls::postgres_rustls::MakeRustlsConnect::new(config.tls.clone());
         let tls = <MakeRustlsConnect as MakeTlsConnect<tokio::net::TcpStream>>::make_tls_connect(
             &mut mk_tls,
             host,
@@ -341,19 +323,6 @@ fn filtered_options(options: &str) -> Option<String> {
     Some(options)
 }
 
-pub(crate) fn load_certs() -> Result<Arc<rustls::RootCertStore>, Vec<rustls_native_certs::Error>> {
-    let der_certs = rustls_native_certs::load_native_certs();
-
-    if !der_certs.errors.is_empty() {
-        return Err(der_certs.errors);
-    }
-
-    let mut store = rustls::RootCertStore::empty();
-    store.add_parsable_certificates(der_certs.certs);
-    Ok(Arc::new(store))
-}
-static TLS_ROOTS: OnceCell<Arc<rustls::RootCertStore>> = OnceCell::new();
-
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/proxy/src/config.rs b/proxy/src/config.rs
index 33d1d2e9e4..8502edcfab 100644
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -1,17 +1,10 @@
-use std::collections::{HashMap, HashSet};
 use std::str::FromStr;
 use std::sync::Arc;
 use std::time::Duration;
 
 use anyhow::{bail, ensure, Context, Ok};
 use clap::ValueEnum;
-use itertools::Itertools;
 use remote_storage::RemoteStorageConfig;
-use rustls::crypto::ring::{self, sign};
-use rustls::pki_types::{CertificateDer, PrivateKeyDer};
-use sha2::{Digest, Sha256};
-use tracing::{error, info};
-use x509_parser::oid_registry;
 
 use crate::auth::backend::jwt::JwkCache;
 use crate::auth::backend::AuthRateLimiter;
@@ -20,6 +13,7 @@ use crate::rate_limiter::{RateBucketInfo, RateLimitAlgorithm, RateLimiterConfig}
 use crate::scram::threadpool::ThreadPool;
 use crate::serverless::cancel_set::CancelSet;
 use crate::serverless::GlobalConnPoolOptions;
+pub use crate::tls::server_config::{configure_tls, TlsConfig};
 use crate::types::Host;
 
 pub struct ProxyConfig {
@@ -32,7 +26,13 @@ pub struct ProxyConfig {
     pub handshake_timeout: Duration,
     pub wake_compute_retry_config: RetryConfig,
     pub connect_compute_locks: ApiLocks<Host>,
-    pub connect_to_compute_retry_config: RetryConfig,
+    pub connect_to_compute: ComputeConfig,
+}
+
+pub struct ComputeConfig {
+    pub retry: RetryConfig,
+    pub tls: Arc<rustls::ClientConfig>,
+    pub timeout: Duration,
 }
 
 #[derive(Copy, Clone, Debug, ValueEnum, PartialEq)]
@@ -52,12 +52,6 @@ pub struct MetricCollectionConfig {
     pub backup_metric_collection_config: MetricBackupCollectionConfig,
 }
 
-pub struct TlsConfig {
-    pub config: Arc<rustls::ServerConfig>,
-    pub common_names: HashSet<String>,
-    pub cert_resolver: Arc<CertResolver>,
-}
-
 pub struct HttpConfig {
     pub accept_websockets: bool,
     pub pool_options: GlobalConnPoolOptions,
@@ -80,272 +74,6 @@ pub struct AuthenticationConfig {
     pub console_redirect_confirmation_timeout: tokio::time::Duration,
 }
 
-impl TlsConfig {
-    pub fn to_server_config(&self) -> Arc<rustls::ServerConfig> {
-        self.config.clone()
-    }
-}
-
-/// <https://github.com/postgres/postgres/blob/ca481d3c9ab7bf69ff0c8d71ad3951d407f6a33c/src/include/libpq/pqcomm.h#L159>
-pub const PG_ALPN_PROTOCOL: &[u8] = b"postgresql";
-
-/// Configure TLS for the main endpoint.
-pub fn configure_tls(
-    key_path: &str,
-    cert_path: &str,
-    certs_dir: Option<&String>,
-    allow_tls_keylogfile: bool,
-) -> anyhow::Result<TlsConfig> {
-    let mut cert_resolver = CertResolver::new();
-
-    // add default certificate
-    cert_resolver.add_cert_path(key_path, cert_path, true)?;
-
-    // add extra certificates
-    if let Some(certs_dir) = certs_dir {
-        for entry in std::fs::read_dir(certs_dir)? {
-            let entry = entry?;
-            let path = entry.path();
-            if path.is_dir() {
-                // file names aligned with default cert-manager names
-                let key_path = path.join("tls.key");
-                let cert_path = path.join("tls.crt");
-                if key_path.exists() && cert_path.exists() {
-                    cert_resolver.add_cert_path(
-                        &key_path.to_string_lossy(),
-                        &cert_path.to_string_lossy(),
-                        false,
-                    )?;
-                }
-            }
-        }
-    }
-
-    let common_names = cert_resolver.get_common_names();
-
-    let cert_resolver = Arc::new(cert_resolver);
-
-    // allow TLS 1.2 to be compatible with older client libraries
-    let mut config =
-        rustls::ServerConfig::builder_with_provider(Arc::new(ring::default_provider()))
-            .with_protocol_versions(&[&rustls::version::TLS13, &rustls::version::TLS12])
-            .context("ring should support TLS1.2 and TLS1.3")?
-            .with_no_client_auth()
-            .with_cert_resolver(cert_resolver.clone());
-
-    config.alpn_protocols = vec![PG_ALPN_PROTOCOL.to_vec()];
-
-    if allow_tls_keylogfile {
-        // KeyLogFile will check for the SSLKEYLOGFILE environment variable.
-        config.key_log = Arc::new(rustls::KeyLogFile::new());
-    }
-
-    Ok(TlsConfig {
-        config: Arc::new(config),
-        common_names,
-        cert_resolver,
-    })
-}
-
-/// Channel binding parameter
-///
-/// <https://www.rfc-editor.org/rfc/rfc5929#section-4>
-/// Description: The hash of the TLS server's certificate as it
-/// appears, octet for octet, in the server's Certificate message.  Note
-/// that the Certificate message contains a certificate_list, in which
-/// the first element is the server's certificate.
-///
-/// The hash function is to be selected as follows:
-///
-/// * if the certificate's signatureAlgorithm uses a single hash
-///   function, and that hash function is either MD5 or SHA-1, then use SHA-256;
-///
-/// * if the certificate's signatureAlgorithm uses a single hash
-///   function and that hash function neither MD5 nor SHA-1, then use
-///   the hash function associated with the certificate's
-///   signatureAlgorithm;
-///
-/// * if the certificate's signatureAlgorithm uses no hash functions or
-///   uses multiple hash functions, then this channel binding type's
-///   channel bindings are undefined at this time (updates to is channel
-///   binding type may occur to address this issue if it ever arises).
-#[derive(Debug, Clone, Copy)]
-pub enum TlsServerEndPoint {
-    Sha256([u8; 32]),
-    Undefined,
-}
-
-impl TlsServerEndPoint {
-    pub fn new(cert: &CertificateDer<'_>) -> anyhow::Result<Self> {
-        let sha256_oids = [
-            // I'm explicitly not adding MD5 or SHA1 here... They're bad.
-            oid_registry::OID_SIG_ECDSA_WITH_SHA256,
-            oid_registry::OID_PKCS1_SHA256WITHRSA,
-        ];
-
-        let pem = x509_parser::parse_x509_certificate(cert)
-            .context("Failed to parse PEM object from cerficiate")?
-            .1;
-
-        info!(subject = %pem.subject, "parsing TLS certificate");
-
-        let reg = oid_registry::OidRegistry::default().with_all_crypto();
-        let oid = pem.signature_algorithm.oid();
-        let alg = reg.get(oid);
-        if sha256_oids.contains(oid) {
-            let tls_server_end_point: [u8; 32] = Sha256::new().chain_update(cert).finalize().into();
-            info!(subject = %pem.subject, signature_algorithm = alg.map(|a| a.description()), tls_server_end_point = %base64::encode(tls_server_end_point), "determined channel binding");
-            Ok(Self::Sha256(tls_server_end_point))
-        } else {
-            error!(subject = %pem.subject, signature_algorithm = alg.map(|a| a.description()), "unknown channel binding");
-            Ok(Self::Undefined)
-        }
-    }
-
-    pub fn supported(&self) -> bool {
-        !matches!(self, TlsServerEndPoint::Undefined)
-    }
-}
-
-#[derive(Default, Debug)]
-pub struct CertResolver {
-    certs: HashMap<String, (Arc<rustls::sign::CertifiedKey>, TlsServerEndPoint)>,
-    default: Option<(Arc<rustls::sign::CertifiedKey>, TlsServerEndPoint)>,
-}
-
-impl CertResolver {
-    pub fn new() -> Self {
-        Self::default()
-    }
-
-    fn add_cert_path(
-        &mut self,
-        key_path: &str,
-        cert_path: &str,
-        is_default: bool,
-    ) -> anyhow::Result<()> {
-        let priv_key = {
-            let key_bytes = std::fs::read(key_path)
-                .with_context(|| format!("Failed to read TLS keys at '{key_path}'"))?;
-            rustls_pemfile::private_key(&mut &key_bytes[..])
-                .with_context(|| format!("Failed to parse TLS keys at '{key_path}'"))?
-                .with_context(|| format!("Failed to parse TLS keys at '{key_path}'"))?
-        };
-
-        let cert_chain_bytes = std::fs::read(cert_path)
-            .context(format!("Failed to read TLS cert file at '{cert_path}.'"))?;
-
-        let cert_chain = {
-            rustls_pemfile::certs(&mut &cert_chain_bytes[..])
-                .try_collect()
-                .with_context(|| {
-                    format!("Failed to read TLS certificate chain from bytes from file at '{cert_path}'.")
-                })?
-        };
-
-        self.add_cert(priv_key, cert_chain, is_default)
-    }
-
-    pub fn add_cert(
-        &mut self,
-        priv_key: PrivateKeyDer<'static>,
-        cert_chain: Vec<CertificateDer<'static>>,
-        is_default: bool,
-    ) -> anyhow::Result<()> {
-        let key = sign::any_supported_type(&priv_key).context("invalid private key")?;
-
-        let first_cert = &cert_chain[0];
-        let tls_server_end_point = TlsServerEndPoint::new(first_cert)?;
-        let pem = x509_parser::parse_x509_certificate(first_cert)
-            .context("Failed to parse PEM object from cerficiate")?
-            .1;
-
-        let common_name = pem.subject().to_string();
-
-        // We need to get the canonical name for this certificate so we can match them against any domain names
-        // seen within the proxy codebase.
-        //
-        // In scram-proxy we use wildcard certificates only, with the database endpoint as the wildcard subdomain, taken from SNI.
-        // We need to remove the wildcard prefix for the purposes of certificate selection.
-        //
-        // auth-broker does not use SNI and instead uses the Neon-Connection-String header.
-        // Auth broker has the subdomain `apiauth` we need to remove for the purposes of validating the Neon-Connection-String.
-        //
-        // Console Redirect proxy does not use any wildcard domains and does not need any certificate selection or conn string
-        // validation, so let's we can continue with any common-name
-        let common_name = if let Some(s) = common_name.strip_prefix("CN=*.") {
-            s.to_string()
-        } else if let Some(s) = common_name.strip_prefix("CN=apiauth.") {
-            s.to_string()
-        } else if let Some(s) = common_name.strip_prefix("CN=") {
-            s.to_string()
-        } else {
-            bail!("Failed to parse common name from certificate")
-        };
-
-        let cert = Arc::new(rustls::sign::CertifiedKey::new(cert_chain, key));
-
-        if is_default {
-            self.default = Some((cert.clone(), tls_server_end_point));
-        }
-
-        self.certs.insert(common_name, (cert, tls_server_end_point));
-
-        Ok(())
-    }
-
-    pub fn get_common_names(&self) -> HashSet<String> {
-        self.certs.keys().map(|s| s.to_string()).collect()
-    }
-}
-
-impl rustls::server::ResolvesServerCert for CertResolver {
-    fn resolve(
-        &self,
-        client_hello: rustls::server::ClientHello<'_>,
-    ) -> Option<Arc<rustls::sign::CertifiedKey>> {
-        self.resolve(client_hello.server_name()).map(|x| x.0)
-    }
-}
-
-impl CertResolver {
-    pub fn resolve(
-        &self,
-        server_name: Option<&str>,
-    ) -> Option<(Arc<rustls::sign::CertifiedKey>, TlsServerEndPoint)> {
-        // loop here and cut off more and more subdomains until we find
-        // a match to get a proper wildcard support. OTOH, we now do not
-        // use nested domains, so keep this simple for now.
-        //
-        // With the current coding foo.com will match *.foo.com and that
-        // repeats behavior of the old code.
-        if let Some(mut sni_name) = server_name {
-            loop {
-                if let Some(cert) = self.certs.get(sni_name) {
-                    return Some(cert.clone());
-                }
-                if let Some((_, rest)) = sni_name.split_once('.') {
-                    sni_name = rest;
-                } else {
-                    return None;
-                }
-            }
-        } else {
-            // No SNI, use the default certificate, otherwise we can't get to
-            // options parameter which can be used to set endpoint name too.
-            // That means that non-SNI flow will not work for CNAME domains in
-            // verify-full mode.
-            //
-            // If that will be a problem we can:
-            //
-            // a) Instead of multi-cert approach use single cert with extra
-            //    domains listed in Subject Alternative Name (SAN).
-            // b) Deploy separate proxy instances for extra domains.
-            self.default.clone()
-        }
-    }
-}
-
 #[derive(Debug)]
 pub struct EndpointCacheConfig {
     /// Batch size to receive all endpoints on the startup.
diff --git a/proxy/src/console_redirect_proxy.rs b/proxy/src/console_redirect_proxy.rs
index c477822e85..25a549039c 100644
--- a/proxy/src/console_redirect_proxy.rs
+++ b/proxy/src/console_redirect_proxy.rs
@@ -115,7 +115,7 @@ pub async fn task_main(
                 Ok(Some(p)) => {
                     ctx.set_success();
                     let _disconnect = ctx.log_connect();
-                    match p.proxy_pass().await {
+                    match p.proxy_pass(&config.connect_to_compute).await {
                         Ok(()) => {}
                         Err(ErrorSource::Client(e)) => {
                             error!(?session_id, "per-client task finished with an IO error from the client: {e:#}");
@@ -216,7 +216,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
         },
         &user_info,
         config.wake_compute_retry_config,
-        config.connect_to_compute_retry_config,
+        &config.connect_to_compute,
     )
     .or_else(|e| stream.throw_error(e))
     .await?;
diff --git a/proxy/src/control_plane/mod.rs b/proxy/src/control_plane/mod.rs
index 0ca1a6aae0..c65041df0e 100644
--- a/proxy/src/control_plane/mod.rs
+++ b/proxy/src/control_plane/mod.rs
@@ -10,13 +10,13 @@ pub mod client;
 pub(crate) mod errors;
 
 use std::sync::Arc;
-use std::time::Duration;
 
 use crate::auth::backend::jwt::AuthRule;
 use crate::auth::backend::{ComputeCredentialKeys, ComputeUserInfo};
 use crate::auth::IpPattern;
 use crate::cache::project_info::ProjectInfoCacheImpl;
 use crate::cache::{Cached, TimedLru};
+use crate::config::ComputeConfig;
 use crate::context::RequestContext;
 use crate::control_plane::messages::{ControlPlaneErrorMessage, MetricsAuxInfo};
 use crate::intern::ProjectIdInt;
@@ -73,9 +73,9 @@ impl NodeInfo {
     pub(crate) async fn connect(
         &self,
         ctx: &RequestContext,
-        timeout: Duration,
+        config: &ComputeConfig,
     ) -> Result<compute::PostgresConnection, compute::ConnectionError> {
-        self.config.connect(ctx, self.aux.clone(), timeout).await
+        self.config.connect(ctx, self.aux.clone(), config).await
     }
 
     pub(crate) fn reuse_settings(&mut self, other: Self) {
diff --git a/proxy/src/lib.rs b/proxy/src/lib.rs
index a5a72f26d9..c56474edd7 100644
--- a/proxy/src/lib.rs
+++ b/proxy/src/lib.rs
@@ -89,7 +89,6 @@ pub mod jemalloc;
 pub mod logging;
 pub mod metrics;
 pub mod parse;
-pub mod postgres_rustls;
 pub mod protocol2;
 pub mod proxy;
 pub mod rate_limiter;
@@ -99,6 +98,7 @@ pub mod scram;
 pub mod serverless;
 pub mod signals;
 pub mod stream;
+pub mod tls;
 pub mod types;
 pub mod url;
 pub mod usage_metrics;
diff --git a/proxy/src/proxy/connect_compute.rs b/proxy/src/proxy/connect_compute.rs
index 4a30d23985..8a80494860 100644
--- a/proxy/src/proxy/connect_compute.rs
+++ b/proxy/src/proxy/connect_compute.rs
@@ -6,7 +6,7 @@ use tracing::{debug, info, warn};
 use super::retry::ShouldRetryWakeCompute;
 use crate::auth::backend::ComputeCredentialKeys;
 use crate::compute::{self, PostgresConnection, COULD_NOT_CONNECT};
-use crate::config::RetryConfig;
+use crate::config::{ComputeConfig, RetryConfig};
 use crate::context::RequestContext;
 use crate::control_plane::errors::WakeComputeError;
 use crate::control_plane::locks::ApiLocks;
@@ -19,8 +19,6 @@ use crate::proxy::retry::{retry_after, should_retry, CouldRetry};
 use crate::proxy::wake_compute::wake_compute;
 use crate::types::Host;
 
-const CONNECT_TIMEOUT: time::Duration = time::Duration::from_secs(2);
-
 /// If we couldn't connect, a cached connection info might be to blame
 /// (e.g. the compute node's address might've changed at the wrong time).
 /// Invalidate the cache entry (if any) to prevent subsequent errors.
@@ -49,7 +47,7 @@ pub(crate) trait ConnectMechanism {
         &self,
         ctx: &RequestContext,
         node_info: &control_plane::CachedNodeInfo,
-        timeout: time::Duration,
+        config: &ComputeConfig,
     ) -> Result<Self::Connection, Self::ConnectError>;
 
     fn update_connect_config(&self, conf: &mut compute::ConnCfg);
@@ -86,11 +84,11 @@ impl ConnectMechanism for TcpMechanism<'_> {
         &self,
         ctx: &RequestContext,
         node_info: &control_plane::CachedNodeInfo,
-        timeout: time::Duration,
+        config: &ComputeConfig,
     ) -> Result<PostgresConnection, Self::Error> {
         let host = node_info.config.get_host();
         let permit = self.locks.get_permit(&host).await?;
-        permit.release_result(node_info.connect(ctx, timeout).await)
+        permit.release_result(node_info.connect(ctx, config).await)
     }
 
     fn update_connect_config(&self, config: &mut compute::ConnCfg) {
@@ -105,7 +103,7 @@ pub(crate) async fn connect_to_compute<M: ConnectMechanism, B: ComputeConnectBac
     mechanism: &M,
     user_info: &B,
     wake_compute_retry_config: RetryConfig,
-    connect_to_compute_retry_config: RetryConfig,
+    compute: &ComputeConfig,
 ) -> Result<M::Connection, M::Error>
 where
     M::ConnectError: CouldRetry + ShouldRetryWakeCompute + std::fmt::Debug,
@@ -119,10 +117,7 @@ where
     mechanism.update_connect_config(&mut node_info.config);
 
     // try once
-    let err = match mechanism
-        .connect_once(ctx, &node_info, CONNECT_TIMEOUT)
-        .await
-    {
+    let err = match mechanism.connect_once(ctx, &node_info, compute).await {
         Ok(res) => {
             ctx.success();
             Metrics::get().proxy.retries_metric.observe(
@@ -142,7 +137,7 @@ where
     let node_info = if !node_info.cached() || !err.should_retry_wake_compute() {
         // If we just recieved this from cplane and didn't get it from cache, we shouldn't retry.
         // Do not need to retrieve a new node_info, just return the old one.
-        if should_retry(&err, num_retries, connect_to_compute_retry_config) {
+        if should_retry(&err, num_retries, compute.retry) {
             Metrics::get().proxy.retries_metric.observe(
                 RetriesMetricGroup {
                     outcome: ConnectOutcome::Failed,
@@ -172,10 +167,7 @@ where
     debug!("wake_compute success. attempting to connect");
     num_retries = 1;
     loop {
-        match mechanism
-            .connect_once(ctx, &node_info, CONNECT_TIMEOUT)
-            .await
-        {
+        match mechanism.connect_once(ctx, &node_info, compute).await {
             Ok(res) => {
                 ctx.success();
                 Metrics::get().proxy.retries_metric.observe(
@@ -190,7 +182,7 @@ where
                 return Ok(res);
             }
             Err(e) => {
-                if !should_retry(&e, num_retries, connect_to_compute_retry_config) {
+                if !should_retry(&e, num_retries, compute.retry) {
                     // Don't log an error here, caller will print the error
                     Metrics::get().proxy.retries_metric.observe(
                         RetriesMetricGroup {
@@ -206,7 +198,7 @@ where
             }
         };
 
-        let wait_duration = retry_after(num_retries, connect_to_compute_retry_config);
+        let wait_duration = retry_after(num_retries, compute.retry);
         num_retries += 1;
 
         let pause = ctx.latency_timer_pause(crate::metrics::Waiting::RetryTimeout);
diff --git a/proxy/src/proxy/handshake.rs b/proxy/src/proxy/handshake.rs
index e27c211932..955f754497 100644
--- a/proxy/src/proxy/handshake.rs
+++ b/proxy/src/proxy/handshake.rs
@@ -8,12 +8,13 @@ use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::{debug, info, warn};
 
 use crate::auth::endpoint_sni;
-use crate::config::{TlsConfig, PG_ALPN_PROTOCOL};
+use crate::config::TlsConfig;
 use crate::context::RequestContext;
 use crate::error::ReportableError;
 use crate::metrics::Metrics;
 use crate::proxy::ERR_INSECURE_CONNECTION;
 use crate::stream::{PqStream, Stream, StreamUpgradeError};
+use crate::tls::PG_ALPN_PROTOCOL;
 
 #[derive(Error, Debug)]
 pub(crate) enum HandshakeError {
diff --git a/proxy/src/proxy/mod.rs b/proxy/src/proxy/mod.rs
index dbe174cab7..3926c56fec 100644
--- a/proxy/src/proxy/mod.rs
+++ b/proxy/src/proxy/mod.rs
@@ -152,7 +152,7 @@ pub async fn task_main(
                 Ok(Some(p)) => {
                     ctx.set_success();
                     let _disconnect = ctx.log_connect();
-                    match p.proxy_pass().await {
+                    match p.proxy_pass(&config.connect_to_compute).await {
                         Ok(()) => {}
                         Err(ErrorSource::Client(e)) => {
                             warn!(?session_id, "per-client task finished with an IO error from the client: {e:#}");
@@ -351,7 +351,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
         },
         &user_info,
         config.wake_compute_retry_config,
-        config.connect_to_compute_retry_config,
+        &config.connect_to_compute,
     )
     .or_else(|e| stream.throw_error(e))
     .await?;
diff --git a/proxy/src/proxy/passthrough.rs b/proxy/src/proxy/passthrough.rs
index dcaa81e5cd..a42f9aad39 100644
--- a/proxy/src/proxy/passthrough.rs
+++ b/proxy/src/proxy/passthrough.rs
@@ -5,6 +5,7 @@ use utils::measured_stream::MeasuredStream;
 use super::copy_bidirectional::ErrorSource;
 use crate::cancellation;
 use crate::compute::PostgresConnection;
+use crate::config::ComputeConfig;
 use crate::control_plane::messages::MetricsAuxInfo;
 use crate::metrics::{Direction, Metrics, NumClientConnectionsGuard, NumConnectionRequestsGuard};
 use crate::stream::Stream;
@@ -67,9 +68,17 @@ pub(crate) struct ProxyPassthrough<P, S> {
 }
 
 impl<P, S: AsyncRead + AsyncWrite + Unpin> ProxyPassthrough<P, S> {
-    pub(crate) async fn proxy_pass(self) -> Result<(), ErrorSource> {
+    pub(crate) async fn proxy_pass(
+        self,
+        compute_config: &ComputeConfig,
+    ) -> Result<(), ErrorSource> {
         let res = proxy_pass(self.client, self.compute.stream, self.aux).await;
-        if let Err(err) = self.compute.cancel_closure.try_cancel_query().await {
+        if let Err(err) = self
+            .compute
+            .cancel_closure
+            .try_cancel_query(compute_config)
+            .await
+        {
             tracing::warn!(session_id = ?self.session_id, ?err, "could not cancel the query in the database");
         }
         res
diff --git a/proxy/src/proxy/tests/mod.rs b/proxy/src/proxy/tests/mod.rs
index 95c518fed9..10db2bcb30 100644
--- a/proxy/src/proxy/tests/mod.rs
+++ b/proxy/src/proxy/tests/mod.rs
@@ -22,14 +22,16 @@ use super::*;
 use crate::auth::backend::{
     ComputeCredentialKeys, ComputeCredentials, ComputeUserInfo, MaybeOwned,
 };
-use crate::config::{CertResolver, RetryConfig};
+use crate::config::{ComputeConfig, RetryConfig};
 use crate::control_plane::client::{ControlPlaneClient, TestControlPlaneClient};
 use crate::control_plane::messages::{ControlPlaneErrorMessage, Details, MetricsAuxInfo, Status};
 use crate::control_plane::{
     self, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret, NodeInfo, NodeInfoCache,
 };
 use crate::error::ErrorKind;
-use crate::postgres_rustls::MakeRustlsConnect;
+use crate::tls::client_config::compute_client_config_with_certs;
+use crate::tls::postgres_rustls::MakeRustlsConnect;
+use crate::tls::server_config::CertResolver;
 use crate::types::{BranchId, EndpointId, ProjectId};
 use crate::{sasl, scram};
 
@@ -67,7 +69,7 @@ fn generate_certs(
 }
 
 struct ClientConfig<'a> {
-    config: rustls::ClientConfig,
+    config: Arc<rustls::ClientConfig>,
     hostname: &'a str,
 }
 
@@ -110,16 +112,7 @@ fn generate_tls_config<'a>(
     };
 
     let client_config = {
-        let config =
-            rustls::ClientConfig::builder_with_provider(Arc::new(ring::default_provider()))
-                .with_safe_default_protocol_versions()
-                .context("ring should support the default protocol versions")?
-                .with_root_certificates({
-                    let mut store = rustls::RootCertStore::empty();
-                    store.add(ca)?;
-                    store
-                })
-                .with_no_client_auth();
+        let config = Arc::new(compute_client_config_with_certs([ca]));
 
         ClientConfig { config, hostname }
     };
@@ -468,7 +461,7 @@ impl ConnectMechanism for TestConnectMechanism {
         &self,
         _ctx: &RequestContext,
         _node_info: &control_plane::CachedNodeInfo,
-        _timeout: std::time::Duration,
+        _config: &ComputeConfig,
     ) -> Result<Self::Connection, Self::ConnectError> {
         let mut counter = self.counter.lock().unwrap();
         let action = self.sequence[*counter];
@@ -576,6 +569,20 @@ fn helper_create_connect_info(
     user_info
 }
 
+fn config() -> ComputeConfig {
+    let retry = RetryConfig {
+        base_delay: Duration::from_secs(1),
+        max_retries: 5,
+        backoff_factor: 2.0,
+    };
+
+    ComputeConfig {
+        retry,
+        tls: Arc::new(compute_client_config_with_certs(std::iter::empty())),
+        timeout: Duration::from_secs(2),
+    }
+}
+
 #[tokio::test]
 async fn connect_to_compute_success() {
     let _ = env_logger::try_init();
@@ -583,12 +590,8 @@ async fn connect_to_compute_success() {
     let ctx = RequestContext::test();
     let mechanism = TestConnectMechanism::new(vec![Wake, Connect]);
     let user_info = helper_create_connect_info(&mechanism);
-    let config = RetryConfig {
-        base_delay: Duration::from_secs(1),
-        max_retries: 5,
-        backoff_factor: 2.0,
-    };
-    connect_to_compute(&ctx, &mechanism, &user_info, config, config)
+    let config = config();
+    connect_to_compute(&ctx, &mechanism, &user_info, config.retry, &config)
         .await
         .unwrap();
     mechanism.verify();
@@ -601,12 +604,8 @@ async fn connect_to_compute_retry() {
     let ctx = RequestContext::test();
     let mechanism = TestConnectMechanism::new(vec![Wake, Retry, Wake, Connect]);
     let user_info = helper_create_connect_info(&mechanism);
-    let config = RetryConfig {
-        base_delay: Duration::from_secs(1),
-        max_retries: 5,
-        backoff_factor: 2.0,
-    };
-    connect_to_compute(&ctx, &mechanism, &user_info, config, config)
+    let config = config();
+    connect_to_compute(&ctx, &mechanism, &user_info, config.retry, &config)
         .await
         .unwrap();
     mechanism.verify();
@@ -620,12 +619,8 @@ async fn connect_to_compute_non_retry_1() {
     let ctx = RequestContext::test();
     let mechanism = TestConnectMechanism::new(vec![Wake, Retry, Wake, Fail]);
     let user_info = helper_create_connect_info(&mechanism);
-    let config = RetryConfig {
-        base_delay: Duration::from_secs(1),
-        max_retries: 5,
-        backoff_factor: 2.0,
-    };
-    connect_to_compute(&ctx, &mechanism, &user_info, config, config)
+    let config = config();
+    connect_to_compute(&ctx, &mechanism, &user_info, config.retry, &config)
         .await
         .unwrap_err();
     mechanism.verify();
@@ -639,12 +634,8 @@ async fn connect_to_compute_non_retry_2() {
     let ctx = RequestContext::test();
     let mechanism = TestConnectMechanism::new(vec![Wake, Fail, Wake, Connect]);
     let user_info = helper_create_connect_info(&mechanism);
-    let config = RetryConfig {
-        base_delay: Duration::from_secs(1),
-        max_retries: 5,
-        backoff_factor: 2.0,
-    };
-    connect_to_compute(&ctx, &mechanism, &user_info, config, config)
+    let config = config();
+    connect_to_compute(&ctx, &mechanism, &user_info, config.retry, &config)
         .await
         .unwrap();
     mechanism.verify();
@@ -665,17 +656,13 @@ async fn connect_to_compute_non_retry_3() {
         max_retries: 1,
         backoff_factor: 2.0,
     };
-    let connect_to_compute_retry_config = RetryConfig {
-        base_delay: Duration::from_secs(1),
-        max_retries: 5,
-        backoff_factor: 2.0,
-    };
+    let config = config();
     connect_to_compute(
         &ctx,
         &mechanism,
         &user_info,
         wake_compute_retry_config,
-        connect_to_compute_retry_config,
+        &config,
     )
     .await
     .unwrap_err();
@@ -690,12 +677,8 @@ async fn wake_retry() {
     let ctx = RequestContext::test();
     let mechanism = TestConnectMechanism::new(vec![WakeRetry, Wake, Connect]);
     let user_info = helper_create_connect_info(&mechanism);
-    let config = RetryConfig {
-        base_delay: Duration::from_secs(1),
-        max_retries: 5,
-        backoff_factor: 2.0,
-    };
-    connect_to_compute(&ctx, &mechanism, &user_info, config, config)
+    let config = config();
+    connect_to_compute(&ctx, &mechanism, &user_info, config.retry, &config)
         .await
         .unwrap();
     mechanism.verify();
@@ -709,12 +692,8 @@ async fn wake_non_retry() {
     let ctx = RequestContext::test();
     let mechanism = TestConnectMechanism::new(vec![WakeRetry, WakeFail]);
     let user_info = helper_create_connect_info(&mechanism);
-    let config = RetryConfig {
-        base_delay: Duration::from_secs(1),
-        max_retries: 5,
-        backoff_factor: 2.0,
-    };
-    connect_to_compute(&ctx, &mechanism, &user_info, config, config)
+    let config = config();
+    connect_to_compute(&ctx, &mechanism, &user_info, config.retry, &config)
         .await
         .unwrap_err();
     mechanism.verify();
diff --git a/proxy/src/redis/notifications.rs b/proxy/src/redis/notifications.rs
index d18dfd2465..80b93b6c4f 100644
--- a/proxy/src/redis/notifications.rs
+++ b/proxy/src/redis/notifications.rs
@@ -12,6 +12,7 @@ use uuid::Uuid;
 use super::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
 use crate::cache::project_info::ProjectInfoCache;
 use crate::cancellation::{CancelMap, CancellationHandler};
+use crate::config::ProxyConfig;
 use crate::intern::{ProjectIdInt, RoleNameInt};
 use crate::metrics::{Metrics, RedisErrors, RedisEventsCount};
 
@@ -249,6 +250,7 @@ async fn handle_messages<C: ProjectInfoCache + Send + Sync + 'static>(
 /// Handle console's invalidation messages.
 #[tracing::instrument(name = "redis_notifications", skip_all)]
 pub async fn task_main<C>(
+    config: &'static ProxyConfig,
     redis: ConnectionWithCredentialsProvider,
     cache: Arc<C>,
     cancel_map: CancelMap,
@@ -258,6 +260,7 @@ where
     C: ProjectInfoCache + Send + Sync + 'static,
 {
     let cancellation_handler = Arc::new(CancellationHandler::<()>::new(
+        &config.connect_to_compute,
         cancel_map,
         crate::metrics::CancellationSource::FromRedis,
     ));
diff --git a/proxy/src/scram/exchange.rs b/proxy/src/scram/exchange.rs
index 6a13f645a5..77853db3db 100644
--- a/proxy/src/scram/exchange.rs
+++ b/proxy/src/scram/exchange.rs
@@ -13,7 +13,6 @@ use super::secret::ServerSecret;
 use super::signature::SignatureBuilder;
 use super::threadpool::ThreadPool;
 use super::ScramKey;
-use crate::config;
 use crate::intern::EndpointIdInt;
 use crate::sasl::{self, ChannelBinding, Error as SaslError};
 
@@ -59,14 +58,14 @@ enum ExchangeState {
 pub(crate) struct Exchange<'a> {
     state: ExchangeState,
     secret: &'a ServerSecret,
-    tls_server_end_point: config::TlsServerEndPoint,
+    tls_server_end_point: crate::tls::TlsServerEndPoint,
 }
 
 impl<'a> Exchange<'a> {
     pub(crate) fn new(
         secret: &'a ServerSecret,
         nonce: fn() -> [u8; SCRAM_RAW_NONCE_LEN],
-        tls_server_end_point: config::TlsServerEndPoint,
+        tls_server_end_point: crate::tls::TlsServerEndPoint,
     ) -> Self {
         Self {
             state: ExchangeState::Initial(SaslInitial { nonce }),
@@ -120,7 +119,7 @@ impl SaslInitial {
     fn transition(
         &self,
         secret: &ServerSecret,
-        tls_server_end_point: &config::TlsServerEndPoint,
+        tls_server_end_point: &crate::tls::TlsServerEndPoint,
         input: &str,
     ) -> sasl::Result<sasl::Step<SaslSentInner, Infallible>> {
         let client_first_message = ClientFirstMessage::parse(input)
@@ -155,7 +154,7 @@ impl SaslSentInner {
     fn transition(
         &self,
         secret: &ServerSecret,
-        tls_server_end_point: &config::TlsServerEndPoint,
+        tls_server_end_point: &crate::tls::TlsServerEndPoint,
         input: &str,
     ) -> sasl::Result<sasl::Step<Infallible, super::ScramKey>> {
         let Self {
@@ -168,8 +167,8 @@ impl SaslSentInner {
             .ok_or(SaslError::BadClientMessage("invalid client-final-message"))?;
 
         let channel_binding = cbind_flag.encode(|_| match tls_server_end_point {
-            config::TlsServerEndPoint::Sha256(x) => Ok(x),
-            config::TlsServerEndPoint::Undefined => Err(SaslError::MissingBinding),
+            crate::tls::TlsServerEndPoint::Sha256(x) => Ok(x),
+            crate::tls::TlsServerEndPoint::Undefined => Err(SaslError::MissingBinding),
         })?;
 
         // This might've been caused by a MITM attack
diff --git a/proxy/src/scram/mod.rs b/proxy/src/scram/mod.rs
index b49a9f32ee..cfa571cbe1 100644
--- a/proxy/src/scram/mod.rs
+++ b/proxy/src/scram/mod.rs
@@ -77,11 +77,8 @@ mod tests {
         const NONCE: [u8; 18] = [
             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
         ];
-        let mut exchange = Exchange::new(
-            &secret,
-            || NONCE,
-            crate::config::TlsServerEndPoint::Undefined,
-        );
+        let mut exchange =
+            Exchange::new(&secret, || NONCE, crate::tls::TlsServerEndPoint::Undefined);
 
         let client_first = "n,,n=user,r=rOprNGfwEbeRWgbNEkqO";
         let client_final = "c=biws,r=rOprNGfwEbeRWgbNEkqOAQIDBAUGBwgJCgsMDQ4PEBES,p=rw1r5Kph5ThxmaUBC2GAQ6MfXbPnNkFiTIvdb/Rear0=";
diff --git a/proxy/src/serverless/backend.rs b/proxy/src/serverless/backend.rs
index 449d50b6e7..b398c3ddd0 100644
--- a/proxy/src/serverless/backend.rs
+++ b/proxy/src/serverless/backend.rs
@@ -22,7 +22,7 @@ use crate::compute;
 use crate::compute_ctl::{
     ComputeCtlError, ExtensionInstallRequest, Privilege, SetRoleGrantsRequest,
 };
-use crate::config::ProxyConfig;
+use crate::config::{ComputeConfig, ProxyConfig};
 use crate::context::RequestContext;
 use crate::control_plane::client::ApiLockError;
 use crate::control_plane::errors::{GetAuthInfoError, WakeComputeError};
@@ -196,7 +196,7 @@ impl PoolingBackend {
             },
             &backend,
             self.config.wake_compute_retry_config,
-            self.config.connect_to_compute_retry_config,
+            &self.config.connect_to_compute,
         )
         .await
     }
@@ -237,7 +237,7 @@ impl PoolingBackend {
             },
             &backend,
             self.config.wake_compute_retry_config,
-            self.config.connect_to_compute_retry_config,
+            &self.config.connect_to_compute,
         )
         .await
     }
@@ -502,7 +502,7 @@ impl ConnectMechanism for TokioMechanism {
         &self,
         ctx: &RequestContext,
         node_info: &CachedNodeInfo,
-        timeout: Duration,
+        compute_config: &ComputeConfig,
     ) -> Result<Self::Connection, Self::ConnectError> {
         let host = node_info.config.get_host();
         let permit = self.locks.get_permit(&host).await?;
@@ -511,7 +511,7 @@ impl ConnectMechanism for TokioMechanism {
         let config = config
             .user(&self.conn_info.user_info.user)
             .dbname(&self.conn_info.dbname)
-            .connect_timeout(timeout);
+            .connect_timeout(compute_config.timeout);
 
         let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);
         let res = config.connect(postgres_client::NoTls).await;
@@ -552,7 +552,7 @@ impl ConnectMechanism for HyperMechanism {
         &self,
         ctx: &RequestContext,
         node_info: &CachedNodeInfo,
-        timeout: Duration,
+        config: &ComputeConfig,
     ) -> Result<Self::Connection, Self::ConnectError> {
         let host = node_info.config.get_host();
         let permit = self.locks.get_permit(&host).await?;
@@ -560,7 +560,7 @@ impl ConnectMechanism for HyperMechanism {
         let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);
 
         let port = node_info.config.get_port();
-        let res = connect_http2(&host, port, timeout).await;
+        let res = connect_http2(&host, port, config.timeout).await;
         drop(pause);
         let (client, connection) = permit.release_result(res)?;
 
diff --git a/proxy/src/serverless/websocket.rs b/proxy/src/serverless/websocket.rs
index 812fedaf04..47326c1181 100644
--- a/proxy/src/serverless/websocket.rs
+++ b/proxy/src/serverless/websocket.rs
@@ -168,7 +168,7 @@ pub(crate) async fn serve_websocket(
         Ok(Some(p)) => {
             ctx.set_success();
             ctx.log_connect();
-            match p.proxy_pass().await {
+            match p.proxy_pass(&config.connect_to_compute).await {
                 Ok(()) => Ok(()),
                 Err(ErrorSource::Client(err)) => Err(err).context("client"),
                 Err(ErrorSource::Compute(err)) => Err(err).context("compute"),
diff --git a/proxy/src/stream.rs b/proxy/src/stream.rs
index 11f426819d..ace27a7284 100644
--- a/proxy/src/stream.rs
+++ b/proxy/src/stream.rs
@@ -11,9 +11,9 @@ use tokio::io::{AsyncRead, AsyncWrite, ReadBuf};
 use tokio_rustls::server::TlsStream;
 use tracing::debug;
 
-use crate::config::TlsServerEndPoint;
 use crate::error::{ErrorKind, ReportableError, UserFacingError};
 use crate::metrics::Metrics;
+use crate::tls::TlsServerEndPoint;
 
 /// Stream wrapper which implements libpq's protocol.
 ///
diff --git a/proxy/src/tls/client_config.rs b/proxy/src/tls/client_config.rs
new file mode 100644
index 0000000000..a2d695aae1
--- /dev/null
+++ b/proxy/src/tls/client_config.rs
@@ -0,0 +1,42 @@
+use std::sync::Arc;
+
+use anyhow::bail;
+use rustls::crypto::ring;
+
+pub(crate) fn load_certs() -> anyhow::Result<Arc<rustls::RootCertStore>> {
+    let der_certs = rustls_native_certs::load_native_certs();
+
+    if !der_certs.errors.is_empty() {
+        bail!("could not parse certificates: {:?}", der_certs.errors);
+    }
+
+    let mut store = rustls::RootCertStore::empty();
+    store.add_parsable_certificates(der_certs.certs);
+    Ok(Arc::new(store))
+}
+
+/// Loads the root certificates and constructs a client config suitable for connecting to the neon compute.
+/// This function is blocking.
+pub fn compute_client_config_with_root_certs() -> anyhow::Result<rustls::ClientConfig> {
+    Ok(
+        rustls::ClientConfig::builder_with_provider(Arc::new(ring::default_provider()))
+            .with_safe_default_protocol_versions()
+            .expect("ring should support the default protocol versions")
+            .with_root_certificates(load_certs()?)
+            .with_no_client_auth(),
+    )
+}
+
+#[cfg(test)]
+pub fn compute_client_config_with_certs(
+    certs: impl IntoIterator<Item = rustls::pki_types::CertificateDer<'static>>,
+) -> rustls::ClientConfig {
+    let mut store = rustls::RootCertStore::empty();
+    store.add_parsable_certificates(certs);
+
+    rustls::ClientConfig::builder_with_provider(Arc::new(ring::default_provider()))
+        .with_safe_default_protocol_versions()
+        .expect("ring should support the default protocol versions")
+        .with_root_certificates(store)
+        .with_no_client_auth()
+}
diff --git a/proxy/src/tls/mod.rs b/proxy/src/tls/mod.rs
new file mode 100644
index 0000000000..d6ce6bd9fc
--- /dev/null
+++ b/proxy/src/tls/mod.rs
@@ -0,0 +1,72 @@
+pub mod client_config;
+pub mod postgres_rustls;
+pub mod server_config;
+
+use anyhow::Context;
+use rustls::pki_types::CertificateDer;
+use sha2::{Digest, Sha256};
+use tracing::{error, info};
+use x509_parser::oid_registry;
+
+/// <https://github.com/postgres/postgres/blob/ca481d3c9ab7bf69ff0c8d71ad3951d407f6a33c/src/include/libpq/pqcomm.h#L159>
+pub const PG_ALPN_PROTOCOL: &[u8] = b"postgresql";
+
+/// Channel binding parameter
+///
+/// <https://www.rfc-editor.org/rfc/rfc5929#section-4>
+/// Description: The hash of the TLS server's certificate as it
+/// appears, octet for octet, in the server's Certificate message.  Note
+/// that the Certificate message contains a certificate_list, in which
+/// the first element is the server's certificate.
+///
+/// The hash function is to be selected as follows:
+///
+/// * if the certificate's signatureAlgorithm uses a single hash
+///   function, and that hash function is either MD5 or SHA-1, then use SHA-256;
+///
+/// * if the certificate's signatureAlgorithm uses a single hash
+///   function and that hash function neither MD5 nor SHA-1, then use
+///   the hash function associated with the certificate's
+///   signatureAlgorithm;
+///
+/// * if the certificate's signatureAlgorithm uses no hash functions or
+///   uses multiple hash functions, then this channel binding type's
+///   channel bindings are undefined at this time (updates to is channel
+///   binding type may occur to address this issue if it ever arises).
+#[derive(Debug, Clone, Copy)]
+pub enum TlsServerEndPoint {
+    Sha256([u8; 32]),
+    Undefined,
+}
+
+impl TlsServerEndPoint {
+    pub fn new(cert: &CertificateDer<'_>) -> anyhow::Result<Self> {
+        let sha256_oids = [
+            // I'm explicitly not adding MD5 or SHA1 here... They're bad.
+            oid_registry::OID_SIG_ECDSA_WITH_SHA256,
+            oid_registry::OID_PKCS1_SHA256WITHRSA,
+        ];
+
+        let pem = x509_parser::parse_x509_certificate(cert)
+            .context("Failed to parse PEM object from cerficiate")?
+            .1;
+
+        info!(subject = %pem.subject, "parsing TLS certificate");
+
+        let reg = oid_registry::OidRegistry::default().with_all_crypto();
+        let oid = pem.signature_algorithm.oid();
+        let alg = reg.get(oid);
+        if sha256_oids.contains(oid) {
+            let tls_server_end_point: [u8; 32] = Sha256::new().chain_update(cert).finalize().into();
+            info!(subject = %pem.subject, signature_algorithm = alg.map(|a| a.description()), tls_server_end_point = %base64::encode(tls_server_end_point), "determined channel binding");
+            Ok(Self::Sha256(tls_server_end_point))
+        } else {
+            error!(subject = %pem.subject, signature_algorithm = alg.map(|a| a.description()), "unknown channel binding");
+            Ok(Self::Undefined)
+        }
+    }
+
+    pub fn supported(&self) -> bool {
+        !matches!(self, TlsServerEndPoint::Undefined)
+    }
+}
diff --git a/proxy/src/postgres_rustls/mod.rs b/proxy/src/tls/postgres_rustls.rs
similarity index 96%
rename from proxy/src/postgres_rustls/mod.rs
rename to proxy/src/tls/postgres_rustls.rs
index 5ef20991c3..0ad279b635 100644
--- a/proxy/src/postgres_rustls/mod.rs
+++ b/proxy/src/tls/postgres_rustls.rs
@@ -18,7 +18,7 @@ mod private {
     use tokio_rustls::client::TlsStream;
     use tokio_rustls::TlsConnector;
 
-    use crate::config::TlsServerEndPoint;
+    use crate::tls::TlsServerEndPoint;
 
     pub struct TlsConnectFuture<S> {
         inner: tokio_rustls::Connect<S>,
@@ -126,16 +126,14 @@ mod private {
 /// That way you can connect to PostgreSQL using `rustls` as the TLS stack.
 #[derive(Clone)]
 pub struct MakeRustlsConnect {
-    config: Arc<ClientConfig>,
+    pub config: Arc<ClientConfig>,
 }
 
 impl MakeRustlsConnect {
     /// Creates a new `MakeRustlsConnect` from the provided `ClientConfig`.
     #[must_use]
-    pub fn new(config: ClientConfig) -> Self {
-        Self {
-            config: Arc::new(config),
-        }
+    pub fn new(config: Arc<ClientConfig>) -> Self {
+        Self { config }
     }
 }
 
diff --git a/proxy/src/tls/server_config.rs b/proxy/src/tls/server_config.rs
new file mode 100644
index 0000000000..2cc1657eea
--- /dev/null
+++ b/proxy/src/tls/server_config.rs
@@ -0,0 +1,218 @@
+use std::collections::{HashMap, HashSet};
+use std::sync::Arc;
+
+use anyhow::{bail, Context};
+use itertools::Itertools;
+use rustls::crypto::ring::{self, sign};
+use rustls::pki_types::{CertificateDer, PrivateKeyDer};
+
+use super::{TlsServerEndPoint, PG_ALPN_PROTOCOL};
+
+pub struct TlsConfig {
+    pub config: Arc<rustls::ServerConfig>,
+    pub common_names: HashSet<String>,
+    pub cert_resolver: Arc<CertResolver>,
+}
+
+impl TlsConfig {
+    pub fn to_server_config(&self) -> Arc<rustls::ServerConfig> {
+        self.config.clone()
+    }
+}
+
+/// Configure TLS for the main endpoint.
+pub fn configure_tls(
+    key_path: &str,
+    cert_path: &str,
+    certs_dir: Option<&String>,
+    allow_tls_keylogfile: bool,
+) -> anyhow::Result<TlsConfig> {
+    let mut cert_resolver = CertResolver::new();
+
+    // add default certificate
+    cert_resolver.add_cert_path(key_path, cert_path, true)?;
+
+    // add extra certificates
+    if let Some(certs_dir) = certs_dir {
+        for entry in std::fs::read_dir(certs_dir)? {
+            let entry = entry?;
+            let path = entry.path();
+            if path.is_dir() {
+                // file names aligned with default cert-manager names
+                let key_path = path.join("tls.key");
+                let cert_path = path.join("tls.crt");
+                if key_path.exists() && cert_path.exists() {
+                    cert_resolver.add_cert_path(
+                        &key_path.to_string_lossy(),
+                        &cert_path.to_string_lossy(),
+                        false,
+                    )?;
+                }
+            }
+        }
+    }
+
+    let common_names = cert_resolver.get_common_names();
+
+    let cert_resolver = Arc::new(cert_resolver);
+
+    // allow TLS 1.2 to be compatible with older client libraries
+    let mut config =
+        rustls::ServerConfig::builder_with_provider(Arc::new(ring::default_provider()))
+            .with_protocol_versions(&[&rustls::version::TLS13, &rustls::version::TLS12])
+            .context("ring should support TLS1.2 and TLS1.3")?
+            .with_no_client_auth()
+            .with_cert_resolver(cert_resolver.clone());
+
+    config.alpn_protocols = vec![PG_ALPN_PROTOCOL.to_vec()];
+
+    if allow_tls_keylogfile {
+        // KeyLogFile will check for the SSLKEYLOGFILE environment variable.
+        config.key_log = Arc::new(rustls::KeyLogFile::new());
+    }
+
+    Ok(TlsConfig {
+        config: Arc::new(config),
+        common_names,
+        cert_resolver,
+    })
+}
+
+#[derive(Default, Debug)]
+pub struct CertResolver {
+    certs: HashMap<String, (Arc<rustls::sign::CertifiedKey>, TlsServerEndPoint)>,
+    default: Option<(Arc<rustls::sign::CertifiedKey>, TlsServerEndPoint)>,
+}
+
+impl CertResolver {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    fn add_cert_path(
+        &mut self,
+        key_path: &str,
+        cert_path: &str,
+        is_default: bool,
+    ) -> anyhow::Result<()> {
+        let priv_key = {
+            let key_bytes = std::fs::read(key_path)
+                .with_context(|| format!("Failed to read TLS keys at '{key_path}'"))?;
+            rustls_pemfile::private_key(&mut &key_bytes[..])
+                .with_context(|| format!("Failed to parse TLS keys at '{key_path}'"))?
+                .with_context(|| format!("Failed to parse TLS keys at '{key_path}'"))?
+        };
+
+        let cert_chain_bytes = std::fs::read(cert_path)
+            .context(format!("Failed to read TLS cert file at '{cert_path}.'"))?;
+
+        let cert_chain = {
+            rustls_pemfile::certs(&mut &cert_chain_bytes[..])
+                .try_collect()
+                .with_context(|| {
+                    format!("Failed to read TLS certificate chain from bytes from file at '{cert_path}'.")
+                })?
+        };
+
+        self.add_cert(priv_key, cert_chain, is_default)
+    }
+
+    pub fn add_cert(
+        &mut self,
+        priv_key: PrivateKeyDer<'static>,
+        cert_chain: Vec<CertificateDer<'static>>,
+        is_default: bool,
+    ) -> anyhow::Result<()> {
+        let key = sign::any_supported_type(&priv_key).context("invalid private key")?;
+
+        let first_cert = &cert_chain[0];
+        let tls_server_end_point = TlsServerEndPoint::new(first_cert)?;
+        let pem = x509_parser::parse_x509_certificate(first_cert)
+            .context("Failed to parse PEM object from cerficiate")?
+            .1;
+
+        let common_name = pem.subject().to_string();
+
+        // We need to get the canonical name for this certificate so we can match them against any domain names
+        // seen within the proxy codebase.
+        //
+        // In scram-proxy we use wildcard certificates only, with the database endpoint as the wildcard subdomain, taken from SNI.
+        // We need to remove the wildcard prefix for the purposes of certificate selection.
+        //
+        // auth-broker does not use SNI and instead uses the Neon-Connection-String header.
+        // Auth broker has the subdomain `apiauth` we need to remove for the purposes of validating the Neon-Connection-String.
+        //
+        // Console Redirect proxy does not use any wildcard domains and does not need any certificate selection or conn string
+        // validation, so let's we can continue with any common-name
+        let common_name = if let Some(s) = common_name.strip_prefix("CN=*.") {
+            s.to_string()
+        } else if let Some(s) = common_name.strip_prefix("CN=apiauth.") {
+            s.to_string()
+        } else if let Some(s) = common_name.strip_prefix("CN=") {
+            s.to_string()
+        } else {
+            bail!("Failed to parse common name from certificate")
+        };
+
+        let cert = Arc::new(rustls::sign::CertifiedKey::new(cert_chain, key));
+
+        if is_default {
+            self.default = Some((cert.clone(), tls_server_end_point));
+        }
+
+        self.certs.insert(common_name, (cert, tls_server_end_point));
+
+        Ok(())
+    }
+
+    pub fn get_common_names(&self) -> HashSet<String> {
+        self.certs.keys().map(|s| s.to_string()).collect()
+    }
+}
+
+impl rustls::server::ResolvesServerCert for CertResolver {
+    fn resolve(
+        &self,
+        client_hello: rustls::server::ClientHello<'_>,
+    ) -> Option<Arc<rustls::sign::CertifiedKey>> {
+        self.resolve(client_hello.server_name()).map(|x| x.0)
+    }
+}
+
+impl CertResolver {
+    pub fn resolve(
+        &self,
+        server_name: Option<&str>,
+    ) -> Option<(Arc<rustls::sign::CertifiedKey>, TlsServerEndPoint)> {
+        // loop here and cut off more and more subdomains until we find
+        // a match to get a proper wildcard support. OTOH, we now do not
+        // use nested domains, so keep this simple for now.
+        //
+        // With the current coding foo.com will match *.foo.com and that
+        // repeats behavior of the old code.
+        if let Some(mut sni_name) = server_name {
+            loop {
+                if let Some(cert) = self.certs.get(sni_name) {
+                    return Some(cert.clone());
+                }
+                if let Some((_, rest)) = sni_name.split_once('.') {
+                    sni_name = rest;
+                } else {
+                    return None;
+                }
+            }
+        } else {
+            // No SNI, use the default certificate, otherwise we can't get to
+            // options parameter which can be used to set endpoint name too.
+            // That means that non-SNI flow will not work for CNAME domains in
+            // verify-full mode.
+            //
+            // If that will be a problem we can:
+            //
+            // a) Instead of multi-cert approach use single cert with extra
+            //    domains listed in Subject Alternative Name (SAN).
+            // b) Deploy separate proxy instances for extra domains.
+            self.default.clone()
+        }
+    }
+}

From b3cd883f93e63d9dab6f3ac8e6e8e3f697c14c29 Mon Sep 17 00:00:00 2001
From: Konstantin Knizhnik <knizhnik@garret.ru>
Date: Thu, 2 Jan 2025 14:28:15 +0300
Subject: [PATCH 24/63] Unlock LFC mutex when LFC cache is disabled (#10235)

## Problem

See https://github.com/neondatabase/neon/issues/10233
`lfc_containsv` returns with holding lock when LFC was disabled.
This bug was introduced in commit  78938d1b59

## Summary of changes

Release lock before return.

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
---
 pgxn/neon/file_cache.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pgxn/neon/file_cache.c b/pgxn/neon/file_cache.c
index f49415be68..ad5667cbab 100644
--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -541,6 +541,7 @@ lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		}
 		else
 		{
+			LWLockRelease(lfc_lock);
 			return found;
 		}
 

From 26600f2973b9d9495889a347cdd21f2c3b7c99e6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?JC=20Gr=C3=BCnhage?= <jc@neon.tech>
Date: Thu, 2 Jan 2025 12:33:42 +0100
Subject: [PATCH 25/63] Skip running clippy without default features (#10098)

## Problem

Running clippy with `cargo hack --feature-powerset` in CI isn't
particularly fast. This PR follows-up on
https://github.com/neondatabase/neon/pull/8912 to improve the speed of
our clippy runs.

Parallelism as suggested in
https://github.com/neondatabase/neon/issues/9901 was tested, but didn't
show consistent enough improvements to be worth it. It actually
increased the amount of work done, as there's less cache hits when
clippy runs are spread out over multiple target directories.
Additionally, parallelism makes it so caching needs to be thought about
more actively and copying around target directories to enable
parallelism eats the rest of the performance gains from parallel
execution.

After some discussion, the decision was to instead cut down on the
number of jobs that are running further. The easiest way to do this is
to not run clippy *without* default features. The list of default
features is empty for all crates, and I haven't found anything using
`cfg(feature = "default")` either, so this is likely not going to change
anything except speeding the runs up.

## Summary of changes

Reduce the amount of feature combinations tried by `cargo hack` (as
suggested in
https://github.com/neondatabase/neon/pull/8912#pullrequestreview-2286482368)
by never disabling default features.

## Alternatives

- We can split things out into different jobs which reduces the time
until everything is finished by running more things in parallel. This
does however decreases the amount of cache hits and increases the amount
of time spent on overhead tasks like repo cloning and restoring caches
by doing those multiple times instead of once.
- We could replace `cargo hack [...] clippy` with `cargo clippy [...];
cargo clippy --features testing`. I'm not 100% sure how this compares to
the change here in the PR, but it does seem to run a bit faster. That
likely means it's doing less work, but without understanding what
exactly we loose by that I'd rather not do that for now. I'd appreciate
input on this though.
---
 .github/workflows/build_and_test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 55c4bf08b9..12b1ac98ac 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -212,7 +212,7 @@ jobs:
           fi
           echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV
       - name: Run cargo clippy (debug)
-        run: cargo hack --feature-powerset clippy $CLIPPY_COMMON_ARGS
+        run: cargo hack --features default --ignore-unknown-features --feature-powerset clippy $CLIPPY_COMMON_ARGS
 
       - name: Check documentation generation
         run: cargo doc --workspace --no-deps --document-private-items

From ee22d4c9ef3d8dc59360e7675b0c36b5e5fd7be8 Mon Sep 17 00:00:00 2001
From: Folke Behrens <folke@neon.tech>
Date: Thu, 2 Jan 2025 14:32:24 +0100
Subject: [PATCH 26/63] proxy: Set TCP_NODELAY for compute connections (#10240)

neondatabase/cloud#19184
---
 proxy/src/compute.rs | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs
index d60dfd0f80..89de6692ad 100644
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -193,11 +193,15 @@ impl ConnCfg {
 
         let connect_once = |host, port| {
             debug!("trying to connect to compute node at {host}:{port}");
-            connect_with_timeout(host, port).and_then(|socket| async {
-                let socket_addr = socket.peer_addr()?;
+            connect_with_timeout(host, port).and_then(|stream| async {
+                let socket_addr = stream.peer_addr()?;
+                let socket = socket2::SockRef::from(&stream);
+                // Disable Nagle's algorithm to not introduce latency between
+                // client and compute.
+                socket.set_nodelay(true)?;
                 // This prevents load balancer from severing the connection.
-                socket2::SockRef::from(&socket).set_keepalive(true)?;
-                Ok((socket_addr, socket))
+                socket.set_keepalive(true)?;
+                Ok((socket_addr, stream))
             })
         };
 

From 8c7dcd259846646858b230a05fd3923361d8a717 Mon Sep 17 00:00:00 2001
From: Konstantin Knizhnik <knizhnik@garret.ru>
Date: Thu, 2 Jan 2025 17:14:18 +0300
Subject: [PATCH 27/63] Set heartbeat interval for chaos test (#10222)

## Problem

See https://neondb.slack.com/archives/C033RQ5SPDH/p1734707873215729

test_timeline_archival_chaos becomes more flaky with increased heartbeat
interval

Resolves #10250.

## Summary of changes

Override heatbeat interval for `test_timelirn_archival_chaos.py`

---------

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
---
 test_runner/regress/test_timeline_archive.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test_runner/regress/test_timeline_archive.py b/test_runner/regress/test_timeline_archive.py
index 87579f9e92..9b3a48add9 100644
--- a/test_runner/regress/test_timeline_archive.py
+++ b/test_runner/regress/test_timeline_archive.py
@@ -398,6 +398,7 @@ def test_timeline_archival_chaos(neon_env_builder: NeonEnvBuilder):
 
     # Offloading is off by default at time of writing: remove this line when it's on by default
     neon_env_builder.pageserver_config_override = "timeline_offloading = true"
+    neon_env_builder.storage_controller_config = {"heartbeat_interval": "100msec"}
     neon_env_builder.enable_pageserver_remote_storage(s3_storage())
 
     # We will exercise migrations, so need multiple pageservers

From 1622fd8bda096d1eefb81bf4970ee54542be4595 Mon Sep 17 00:00:00 2001
From: Raphael 'kena' Poss <knz@thaumogen.net>
Date: Thu, 2 Jan 2025 17:02:48 +0100
Subject: [PATCH 28/63] proxy: recognize but ignore the 3 new redis message
 types (#10197)

## Problem

https://neondb.slack.com/archives/C085MBDUSS2/p1734604792755369

## Summary of changes

Recognize and ignore the 3 new broadcast messages:
- `/block_public_or_vpc_access_updated`
- `/allowed_vpc_endpoints_updated_for_org`
- `/allowed_vpc_endpoints_updated_for_projects`
---
 proxy/src/redis/notifications.rs | 56 +++++++++++++++++++++++++++++++-
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/proxy/src/redis/notifications.rs b/proxy/src/redis/notifications.rs
index 80b93b6c4f..671305a300 100644
--- a/proxy/src/redis/notifications.rs
+++ b/proxy/src/redis/notifications.rs
@@ -40,6 +40,27 @@ pub(crate) enum Notification {
     AllowedIpsUpdate {
         allowed_ips_update: AllowedIpsUpdate,
     },
+    #[serde(
+        rename = "/block_public_or_vpc_access_updated",
+        deserialize_with = "deserialize_json_string"
+    )]
+    BlockPublicOrVpcAccessUpdated {
+        block_public_or_vpc_access_updated: BlockPublicOrVpcAccessUpdated,
+    },
+    #[serde(
+        rename = "/allowed_vpc_endpoints_updated_for_org",
+        deserialize_with = "deserialize_json_string"
+    )]
+    AllowedVpcEndpointsUpdatedForOrg {
+        allowed_vpc_endpoints_updated_for_org: AllowedVpcEndpointsUpdatedForOrg,
+    },
+    #[serde(
+        rename = "/allowed_vpc_endpoints_updated_for_projects",
+        deserialize_with = "deserialize_json_string"
+    )]
+    AllowedVpcEndpointsUpdatedForProjects {
+        allowed_vpc_endpoints_updated_for_projects: AllowedVpcEndpointsUpdatedForProjects,
+    },
     #[serde(
         rename = "/password_updated",
         deserialize_with = "deserialize_json_string"
@@ -52,6 +73,24 @@ pub(crate) enum Notification {
 pub(crate) struct AllowedIpsUpdate {
     project_id: ProjectIdInt,
 }
+
+#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
+pub(crate) struct BlockPublicOrVpcAccessUpdated {
+    project_id: ProjectIdInt,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
+pub(crate) struct AllowedVpcEndpointsUpdatedForOrg {
+    // TODO: change type once the implementation is more fully fledged.
+    // See e.g. https://github.com/neondatabase/neon/pull/10073.
+    account_id: ProjectIdInt,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
+pub(crate) struct AllowedVpcEndpointsUpdatedForProjects {
+    project_ids: Vec<ProjectIdInt>,
+}
+
 #[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
 pub(crate) struct PasswordUpdate {
     project_id: ProjectIdInt,
@@ -165,7 +204,11 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
                     }
                 }
             }
-            Notification::AllowedIpsUpdate { .. } | Notification::PasswordUpdate { .. } => {
+            Notification::AllowedIpsUpdate { .. }
+            | Notification::PasswordUpdate { .. }
+            | Notification::BlockPublicOrVpcAccessUpdated { .. }
+            | Notification::AllowedVpcEndpointsUpdatedForOrg { .. }
+            | Notification::AllowedVpcEndpointsUpdatedForProjects { .. } => {
                 invalidate_cache(self.cache.clone(), msg.clone());
                 if matches!(msg, Notification::AllowedIpsUpdate { .. }) {
                     Metrics::get()
@@ -178,6 +221,8 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
                         .redis_events_count
                         .inc(RedisEventsCount::PasswordUpdate);
                 }
+                // TODO: add additional metrics for the other event types.
+
                 // It might happen that the invalid entry is on the way to be cached.
                 // To make sure that the entry is invalidated, let's repeat the invalidation in INVALIDATION_LAG seconds.
                 // TODO: include the version (or the timestamp) in the message and invalidate only if the entry is cached before the message.
@@ -204,6 +249,15 @@ fn invalidate_cache<C: ProjectInfoCache>(cache: Arc<C>, msg: Notification) {
                 password_update.role_name,
             ),
         Notification::Cancel(_) => unreachable!("cancel message should be handled separately"),
+        Notification::BlockPublicOrVpcAccessUpdated { .. } => {
+            // https://github.com/neondatabase/neon/pull/10073
+        }
+        Notification::AllowedVpcEndpointsUpdatedForOrg { .. } => {
+            // https://github.com/neondatabase/neon/pull/10073
+        }
+        Notification::AllowedVpcEndpointsUpdatedForProjects { .. } => {
+            // https://github.com/neondatabase/neon/pull/10073
+        }
     }
 }
 

From 56e6ebfe172ce0c7fb0faf9a3e64e8e2b3902b37 Mon Sep 17 00:00:00 2001
From: Conrad Ludgate <conrad@neon.tech>
Date: Thu, 2 Jan 2025 16:05:14 +0000
Subject: [PATCH 29/63] chore: building compute_tools and local_proxy together
 (#10257)

## Problem

Building local_proxy and compute_tools features the same dependency
tree, but as they are currently built in separate clean layers all that
progress is wasted. For our arm builds that's an extra 10 minutes.

## Summary of changes

Combines the compute_tools and local_proxy build layers.
---
 compute/compute-node.Dockerfile | 20 +++-----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/compute/compute-node.Dockerfile b/compute/compute-node.Dockerfile
index 5e7b4e8287..06aaf9e7f4 100644
--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -1285,7 +1285,7 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \
 
 #########################################################################################
 #
-# Compile and run the Neon-specific `compute_ctl` and `fast_import` binaries
+# Compile the Neon-specific `compute_ctl`, `fast_import`, and `local_proxy` binaries
 #
 #########################################################################################
 FROM $REPOSITORY/$IMAGE:$TAG AS compute-tools
@@ -1295,7 +1295,7 @@ ENV BUILD_TAG=$BUILD_TAG
 USER nonroot
 # Copy entire project to get Cargo.* files with proper dependencies for the whole project
 COPY --chown=nonroot . .
-RUN cd compute_tools && mold -run cargo build --locked --profile release-line-debug-size-lto
+RUN mold -run cargo build --locked --profile release-line-debug-size-lto --bin compute_ctl --bin fast_import --bin local_proxy
 
 #########################################################################################
 #
@@ -1338,20 +1338,6 @@ RUN set -e \
     && make -j $(nproc) dist_man_MANS= \
     && make install dist_man_MANS=
 
-#########################################################################################
-#
-# Compile the Neon-specific `local_proxy` binary
-#
-#########################################################################################
-FROM $REPOSITORY/$IMAGE:$TAG AS local_proxy
-ARG BUILD_TAG
-ENV BUILD_TAG=$BUILD_TAG
-
-USER nonroot
-# Copy entire project to get Cargo.* files with proper dependencies for the whole project
-COPY --chown=nonroot . .
-RUN mold -run cargo build --locked --profile release-line-debug-size-lto --bin local_proxy
-
 #########################################################################################
 #
 # Layers "postgres-exporter" and "sql-exporter"
@@ -1491,7 +1477,7 @@ COPY --from=pgbouncer         /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/
 COPY --chmod=0666 --chown=postgres compute/etc/pgbouncer.ini /etc/pgbouncer.ini
 
 # local_proxy and its config
-COPY --from=local_proxy --chown=postgres /home/nonroot/target/release-line-debug-size-lto/local_proxy /usr/local/bin/local_proxy
+COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/local_proxy /usr/local/bin/local_proxy
 RUN mkdir -p /etc/local_proxy && chown postgres:postgres /etc/local_proxy
 
 # Metrics exporter binaries and  configuration files

From 363ea97f691cbf9026309c08239300a0d351018d Mon Sep 17 00:00:00 2001
From: Tristan Partin <tristan@neon.tech>
Date: Thu, 2 Jan 2025 12:37:50 -0600
Subject: [PATCH 30/63] Add more substantial tests for compute migrations
 (#9811)

The previous tests really didn't do much. This set should be quite a bit
more encompassing.

Signed-off-by: Tristan Partin <tristan@neon.tech>
---
 Cargo.lock                                    |  1 +
 compute_tools/Cargo.toml                      |  3 +-
 compute_tools/src/bin/compute_ctl.rs          |  5 ++
 compute_tools/src/compute.rs                  | 15 +++-
 compute_tools/src/http/api.rs                 | 15 ++++
 compute_tools/src/migration.rs                | 41 ++++++++-
 .../tests/0001-neon_superuser_bypass_rls.sql  |  9 ++
 .../src/migrations/tests/0002-alter_roles.sql | 25 ++++++
 ..._create_subscription_to_neon_superuser.sql | 10 +++
 ...004-grant_pg_monitor_to_neon_superuser.sql | 19 ++++
 ...-grant_all_on_tables_to_neon_superuser.sql |  2 +
 ...ant_all_on_sequences_to_neon_superuser.sql |  2 +
 ...es_to_neon_superuser_with_grant_option.sql |  2 +
 ...es_to_neon_superuser_with_grant_option.sql |  2 +
 ...plication_for_previously_allowed_roles.sql |  2 +
 ...ynchronization_funcs_to_neon_superuser.sql | 13 +++
 ...cation_origin_status_to_neon_superuser.sql | 13 +++
 libs/utils/src/failpoint_support.rs           |  4 +-
 test_runner/conftest.py                       |  1 +
 test_runner/fixtures/compute_migrations.py    | 34 +++++++
 test_runner/fixtures/endpoint/http.py         | 14 +++
 test_runner/fixtures/neon_cli.py              |  5 +-
 test_runner/fixtures/neon_fixtures.py         | 12 ++-
 test_runner/fixtures/paths.py                 |  2 +-
 .../regress/test_compute_migrations.py        | 90 +++++++++++++++++++
 test_runner/regress/test_migrations.py        | 33 -------
 26 files changed, 327 insertions(+), 47 deletions(-)
 create mode 100644 compute_tools/src/migrations/tests/0001-neon_superuser_bypass_rls.sql
 create mode 100644 compute_tools/src/migrations/tests/0002-alter_roles.sql
 create mode 100644 compute_tools/src/migrations/tests/0003-grant_pg_create_subscription_to_neon_superuser.sql
 create mode 100644 compute_tools/src/migrations/tests/0004-grant_pg_monitor_to_neon_superuser.sql
 create mode 100644 compute_tools/src/migrations/tests/0005-grant_all_on_tables_to_neon_superuser.sql
 create mode 100644 compute_tools/src/migrations/tests/0006-grant_all_on_sequences_to_neon_superuser.sql
 create mode 100644 compute_tools/src/migrations/tests/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql
 create mode 100644 compute_tools/src/migrations/tests/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql
 create mode 100644 compute_tools/src/migrations/tests/0009-revoke_replication_for_previously_allowed_roles.sql
 create mode 100644 compute_tools/src/migrations/tests/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql
 create mode 100644 compute_tools/src/migrations/tests/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql
 create mode 100644 test_runner/fixtures/compute_migrations.py
 create mode 100644 test_runner/regress/test_compute_migrations.py
 delete mode 100644 test_runner/regress/test_migrations.py

diff --git a/Cargo.lock b/Cargo.lock
index d9ac167042..420def152d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1274,6 +1274,7 @@ dependencies = [
  "chrono",
  "clap",
  "compute_api",
+ "fail",
  "flate2",
  "futures",
  "hyper 0.14.30",
diff --git a/compute_tools/Cargo.toml b/compute_tools/Cargo.toml
index c0c390caef..9525b27818 100644
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -7,7 +7,7 @@ license.workspace = true
 [features]
 default = []
 # Enables test specific features.
-testing = []
+testing = ["fail/failpoints"]
 
 [dependencies]
 base64.workspace = true
@@ -19,6 +19,7 @@ camino.workspace = true
 chrono.workspace = true
 cfg-if.workspace = true
 clap.workspace = true
+fail.workspace = true
 flate2.workspace = true
 futures.workspace = true
 hyper0 = { workspace = true, features = ["full"] }
diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs
index bb248734a8..95ade9a87d 100644
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -67,12 +67,15 @@ use compute_tools::params::*;
 use compute_tools::spec::*;
 use compute_tools::swap::resize_swap;
 use rlimit::{setrlimit, Resource};
+use utils::failpoint_support;
 
 // this is an arbitrary build tag. Fine as a default / for testing purposes
 // in-case of not-set environment var
 const BUILD_TAG_DEFAULT: &str = "latest";
 
 fn main() -> Result<()> {
+    let scenario = failpoint_support::init();
+
     let (build_tag, clap_args) = init()?;
 
     // enable core dumping for all child processes
@@ -100,6 +103,8 @@ fn main() -> Result<()> {
 
     maybe_delay_exit(delay_exit);
 
+    scenario.teardown();
+
     deinit_and_exit(wait_pg_result);
 }
 
diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs
index d72a04f2f9..78f6033429 100644
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -1181,8 +1181,19 @@ impl ComputeNode {
             let mut conf = postgres::config::Config::from(conf);
             conf.application_name("compute_ctl:migrations");
 
-            let mut client = conf.connect(NoTls)?;
-            handle_migrations(&mut client).context("apply_config handle_migrations")
+            match conf.connect(NoTls) {
+                Ok(mut client) => {
+                    if let Err(e) = handle_migrations(&mut client) {
+                        error!("Failed to run migrations: {}", e);
+                    }
+                }
+                Err(e) => {
+                    error!(
+                        "Failed to connect to the compute for running migrations: {}",
+                        e
+                    );
+                }
+            };
         });
 
         Ok::<(), anyhow::Error>(())
diff --git a/compute_tools/src/http/api.rs b/compute_tools/src/http/api.rs
index 7fa6426d8f..a4b1a63e6d 100644
--- a/compute_tools/src/http/api.rs
+++ b/compute_tools/src/http/api.rs
@@ -24,8 +24,11 @@ use metrics::proto::MetricFamily;
 use metrics::Encoder;
 use metrics::TextEncoder;
 use tokio::task;
+use tokio_util::sync::CancellationToken;
 use tracing::{debug, error, info, warn};
 use tracing_utils::http::OtelName;
+use utils::failpoint_support::failpoints_handler;
+use utils::http::error::ApiError;
 use utils::http::request::must_get_query_param;
 
 fn status_response_from_state(state: &ComputeState) -> ComputeStatusResponse {
@@ -310,6 +313,18 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
             }
         }
 
+        (&Method::POST, "/failpoints") if cfg!(feature = "testing") => {
+            match failpoints_handler(req, CancellationToken::new()).await {
+                Ok(r) => r,
+                Err(ApiError::BadRequest(e)) => {
+                    render_json_error(&e.to_string(), StatusCode::BAD_REQUEST)
+                }
+                Err(_) => {
+                    render_json_error("Internal server error", StatusCode::INTERNAL_SERVER_ERROR)
+                }
+            }
+        }
+
         // download extension files from remote extension storage on demand
         (&Method::POST, route) if route.starts_with("/extension_server/") => {
             info!("serving {:?} POST request", route);
diff --git a/compute_tools/src/migration.rs b/compute_tools/src/migration.rs
index 22ab145eda..07d738abe9 100644
--- a/compute_tools/src/migration.rs
+++ b/compute_tools/src/migration.rs
@@ -1,13 +1,16 @@
 use anyhow::{Context, Result};
+use fail::fail_point;
 use postgres::Client;
 use tracing::info;
 
+/// Runs a series of migrations on a target database
 pub(crate) struct MigrationRunner<'m> {
     client: &'m mut Client,
     migrations: &'m [&'m str],
 }
 
 impl<'m> MigrationRunner<'m> {
+    /// Create a new migration runner
     pub fn new(client: &'m mut Client, migrations: &'m [&'m str]) -> Self {
         // The neon_migration.migration_id::id column is a bigint, which is equivalent to an i64
         assert!(migrations.len() + 1 < i64::MAX as usize);
@@ -15,6 +18,7 @@ impl<'m> MigrationRunner<'m> {
         Self { client, migrations }
     }
 
+    /// Get the current value neon_migration.migration_id
     fn get_migration_id(&mut self) -> Result<i64> {
         let query = "SELECT id FROM neon_migration.migration_id";
         let row = self
@@ -25,9 +29,34 @@ impl<'m> MigrationRunner<'m> {
         Ok(row.get::<&str, i64>("id"))
     }
 
+    /// Update the neon_migration.migration_id value
+    ///
+    /// This function has a fail point called compute-migration, which can be
+    /// used if you would like to fail the application of a series of migrations
+    /// at some point.
     fn update_migration_id(&mut self, migration_id: i64) -> Result<()> {
         let setval = format!("UPDATE neon_migration.migration_id SET id={}", migration_id);
 
+        // We use this fail point in order to check that failing in the
+        // middle of applying a series of migrations fails in an expected
+        // manner
+        if cfg!(feature = "testing") {
+            let fail = (|| {
+                fail_point!("compute-migration", |fail_migration_id| {
+                    migration_id == fail_migration_id.unwrap().parse::<i64>().unwrap()
+                });
+
+                false
+            })();
+
+            if fail {
+                return Err(anyhow::anyhow!(format!(
+                    "migration {} was configured to fail because of a failpoint",
+                    migration_id
+                )));
+            }
+        }
+
         self.client
             .simple_query(&setval)
             .context("run_migrations update id")?;
@@ -35,7 +64,8 @@ impl<'m> MigrationRunner<'m> {
         Ok(())
     }
 
-    fn prepare_migrations(&mut self) -> Result<()> {
+    /// Prepare the migrations the target database for handling migrations
+    fn prepare_database(&mut self) -> Result<()> {
         let query = "CREATE SCHEMA IF NOT EXISTS neon_migration";
         self.client.simple_query(query)?;
 
@@ -54,8 +84,9 @@ impl<'m> MigrationRunner<'m> {
         Ok(())
     }
 
+    /// Run the configrured set of migrations
     pub fn run_migrations(mut self) -> Result<()> {
-        self.prepare_migrations()?;
+        self.prepare_database()?;
 
         let mut current_migration = self.get_migration_id()? as usize;
         while current_migration < self.migrations.len() {
@@ -69,6 +100,11 @@ impl<'m> MigrationRunner<'m> {
 
             if migration.starts_with("-- SKIP") {
                 info!("Skipping migration id={}", migration_id!(current_migration));
+
+                // Even though we are skipping the migration, updating the
+                // migration ID should help keep logic easy to understand when
+                // trying to understand the state of a cluster.
+                self.update_migration_id(migration_id!(current_migration))?;
             } else {
                 info!(
                     "Running migration id={}:\n{}\n",
@@ -87,7 +123,6 @@ impl<'m> MigrationRunner<'m> {
                     )
                 })?;
 
-                // Migration IDs start at 1
                 self.update_migration_id(migration_id!(current_migration))?;
 
                 self.client
diff --git a/compute_tools/src/migrations/tests/0001-neon_superuser_bypass_rls.sql b/compute_tools/src/migrations/tests/0001-neon_superuser_bypass_rls.sql
new file mode 100644
index 0000000000..0c81cef1c4
--- /dev/null
+++ b/compute_tools/src/migrations/tests/0001-neon_superuser_bypass_rls.sql
@@ -0,0 +1,9 @@
+DO $$
+DECLARE
+    bypassrls boolean;
+BEGIN
+    SELECT rolbypassrls INTO bypassrls FROM pg_roles WHERE rolname = 'neon_superuser';
+    IF NOT bypassrls THEN
+        RAISE EXCEPTION 'neon_superuser cannot bypass RLS';
+    END IF;
+END $$;
diff --git a/compute_tools/src/migrations/tests/0002-alter_roles.sql b/compute_tools/src/migrations/tests/0002-alter_roles.sql
new file mode 100644
index 0000000000..433f7b34f7
--- /dev/null
+++ b/compute_tools/src/migrations/tests/0002-alter_roles.sql
@@ -0,0 +1,25 @@
+DO $$
+DECLARE
+    role record;
+BEGIN
+    FOR role IN
+        SELECT rolname AS name, rolinherit AS inherit
+        FROM pg_roles
+        WHERE pg_has_role(rolname, 'neon_superuser', 'member')
+    LOOP
+        IF NOT role.inherit THEN
+            RAISE EXCEPTION '% cannot inherit', quote_ident(role.name);
+        END IF;
+    END LOOP;
+
+    FOR role IN
+        SELECT rolname AS name, rolbypassrls AS bypassrls
+        FROM pg_roles
+        WHERE NOT pg_has_role(rolname, 'neon_superuser', 'member')
+            AND NOT starts_with(rolname, 'pg_')
+    LOOP
+        IF role.bypassrls THEN
+            RAISE EXCEPTION  '% can bypass RLS', quote_ident(role.name);
+        END IF;
+    END LOOP;
+END $$;
diff --git a/compute_tools/src/migrations/tests/0003-grant_pg_create_subscription_to_neon_superuser.sql b/compute_tools/src/migrations/tests/0003-grant_pg_create_subscription_to_neon_superuser.sql
new file mode 100644
index 0000000000..b164d61295
--- /dev/null
+++ b/compute_tools/src/migrations/tests/0003-grant_pg_create_subscription_to_neon_superuser.sql
@@ -0,0 +1,10 @@
+DO $$
+BEGIN
+    IF (SELECT current_setting('server_version_num')::numeric < 160000) THEN
+        RETURN;
+    END IF;
+
+    IF NOT (SELECT pg_has_role('neon_superuser', 'pg_create_subscription', 'member')) THEN
+        RAISE EXCEPTION 'neon_superuser cannot execute pg_create_subscription';
+    END IF;
+END $$;
diff --git a/compute_tools/src/migrations/tests/0004-grant_pg_monitor_to_neon_superuser.sql b/compute_tools/src/migrations/tests/0004-grant_pg_monitor_to_neon_superuser.sql
new file mode 100644
index 0000000000..acb8dd417d
--- /dev/null
+++ b/compute_tools/src/migrations/tests/0004-grant_pg_monitor_to_neon_superuser.sql
@@ -0,0 +1,19 @@
+DO $$
+DECLARE
+    monitor record;
+BEGIN
+    SELECT pg_has_role('neon_superuser', 'pg_monitor', 'member') AS member,
+            admin_option AS admin
+        INTO monitor
+        FROM pg_auth_members
+        WHERE roleid = 'pg_monitor'::regrole
+            AND member = 'pg_monitor'::regrole;
+
+    IF NOT monitor.member THEN
+        RAISE EXCEPTION 'neon_superuser is not a member of pg_monitor';
+    END IF;
+
+    IF NOT monitor.admin THEN
+        RAISE EXCEPTION 'neon_superuser cannot grant pg_monitor';
+    END IF;
+END $$;
diff --git a/compute_tools/src/migrations/tests/0005-grant_all_on_tables_to_neon_superuser.sql b/compute_tools/src/migrations/tests/0005-grant_all_on_tables_to_neon_superuser.sql
new file mode 100644
index 0000000000..f99101bd65
--- /dev/null
+++ b/compute_tools/src/migrations/tests/0005-grant_all_on_tables_to_neon_superuser.sql
@@ -0,0 +1,2 @@
+-- This test was never written becuase at the time migration tests were added
+-- the accompanying migration was already skipped.
diff --git a/compute_tools/src/migrations/tests/0006-grant_all_on_sequences_to_neon_superuser.sql b/compute_tools/src/migrations/tests/0006-grant_all_on_sequences_to_neon_superuser.sql
new file mode 100644
index 0000000000..f99101bd65
--- /dev/null
+++ b/compute_tools/src/migrations/tests/0006-grant_all_on_sequences_to_neon_superuser.sql
@@ -0,0 +1,2 @@
+-- This test was never written becuase at the time migration tests were added
+-- the accompanying migration was already skipped.
diff --git a/compute_tools/src/migrations/tests/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql b/compute_tools/src/migrations/tests/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql
new file mode 100644
index 0000000000..f99101bd65
--- /dev/null
+++ b/compute_tools/src/migrations/tests/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql
@@ -0,0 +1,2 @@
+-- This test was never written becuase at the time migration tests were added
+-- the accompanying migration was already skipped.
diff --git a/compute_tools/src/migrations/tests/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql b/compute_tools/src/migrations/tests/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql
new file mode 100644
index 0000000000..f99101bd65
--- /dev/null
+++ b/compute_tools/src/migrations/tests/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql
@@ -0,0 +1,2 @@
+-- This test was never written becuase at the time migration tests were added
+-- the accompanying migration was already skipped.
diff --git a/compute_tools/src/migrations/tests/0009-revoke_replication_for_previously_allowed_roles.sql b/compute_tools/src/migrations/tests/0009-revoke_replication_for_previously_allowed_roles.sql
new file mode 100644
index 0000000000..f99101bd65
--- /dev/null
+++ b/compute_tools/src/migrations/tests/0009-revoke_replication_for_previously_allowed_roles.sql
@@ -0,0 +1,2 @@
+-- This test was never written becuase at the time migration tests were added
+-- the accompanying migration was already skipped.
diff --git a/compute_tools/src/migrations/tests/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql b/compute_tools/src/migrations/tests/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql
new file mode 100644
index 0000000000..af7f50e95d
--- /dev/null
+++ b/compute_tools/src/migrations/tests/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql
@@ -0,0 +1,13 @@
+DO $$
+DECLARE
+    can_execute boolean;
+BEGIN
+    SELECT bool_and(has_function_privilege('neon_superuser', oid, 'execute'))
+       INTO can_execute
+       FROM pg_proc
+       WHERE proname IN ('pg_export_snapshot', 'pg_log_standby_snapshot')
+           AND pronamespace = 'pg_catalog'::regnamespace;
+    IF NOT can_execute THEN
+        RAISE EXCEPTION 'neon_superuser cannot execute both pg_export_snapshot and pg_log_standby_snapshot';
+    END IF;
+END $$;
diff --git a/compute_tools/src/migrations/tests/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql b/compute_tools/src/migrations/tests/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql
new file mode 100644
index 0000000000..e55dcdc3b6
--- /dev/null
+++ b/compute_tools/src/migrations/tests/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql
@@ -0,0 +1,13 @@
+DO $$
+DECLARE
+    can_execute boolean;
+BEGIN
+    SELECT has_function_privilege('neon_superuser', oid, 'execute')
+       INTO can_execute
+       FROM pg_proc
+       WHERE proname = 'pg_show_replication_origin_status'
+           AND pronamespace = 'pg_catalog'::regnamespace;
+    IF NOT can_execute THEN
+        RAISE EXCEPTION 'neon_superuser cannot execute pg_show_replication_origin_status';
+    END IF;
+END $$;
diff --git a/libs/utils/src/failpoint_support.rs b/libs/utils/src/failpoint_support.rs
index 870684b399..701ba2d42c 100644
--- a/libs/utils/src/failpoint_support.rs
+++ b/libs/utils/src/failpoint_support.rs
@@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize};
 use tokio_util::sync::CancellationToken;
 use tracing::*;
 
-/// Declare a failpoint that can use the `pause` failpoint action.
+/// Declare a failpoint that can use to `pause` failpoint action.
 /// We don't want to block the executor thread, hence, spawn_blocking + await.
 #[macro_export]
 macro_rules! pausable_failpoint {
@@ -181,7 +181,7 @@ pub async fn failpoints_handler(
 ) -> Result<Response<Body>, ApiError> {
     if !fail::has_failpoints() {
         return Err(ApiError::BadRequest(anyhow::anyhow!(
-            "Cannot manage failpoints because storage was compiled without failpoints support"
+            "Cannot manage failpoints because neon was compiled without failpoints support"
         )));
     }
 
diff --git a/test_runner/conftest.py b/test_runner/conftest.py
index 887bfef478..9e32469d69 100644
--- a/test_runner/conftest.py
+++ b/test_runner/conftest.py
@@ -8,6 +8,7 @@ pytest_plugins = (
     "fixtures.compute_reconfigure",
     "fixtures.storage_controller_proxy",
     "fixtures.paths",
+    "fixtures.compute_migrations",
     "fixtures.neon_fixtures",
     "fixtures.benchmark_fixture",
     "fixtures.pg_stats",
diff --git a/test_runner/fixtures/compute_migrations.py b/test_runner/fixtures/compute_migrations.py
new file mode 100644
index 0000000000..ea99785af0
--- /dev/null
+++ b/test_runner/fixtures/compute_migrations.py
@@ -0,0 +1,34 @@
+from __future__ import annotations
+
+import os
+from typing import TYPE_CHECKING
+
+import pytest
+
+from fixtures.paths import BASE_DIR
+
+if TYPE_CHECKING:
+    from collections.abc import Iterator
+    from pathlib import Path
+
+COMPUTE_MIGRATIONS_DIR = BASE_DIR / "compute_tools" / "src" / "migrations"
+COMPUTE_MIGRATIONS_TEST_DIR = COMPUTE_MIGRATIONS_DIR / "tests"
+
+COMPUTE_MIGRATIONS = sorted(next(os.walk(COMPUTE_MIGRATIONS_DIR))[2])
+NUM_COMPUTE_MIGRATIONS = len(COMPUTE_MIGRATIONS)
+
+
+@pytest.fixture(scope="session")
+def compute_migrations_dir() -> Iterator[Path]:
+    """
+    Retrieve the path to the compute migrations directory.
+    """
+    yield COMPUTE_MIGRATIONS_DIR
+
+
+@pytest.fixture(scope="session")
+def compute_migrations_test_dir() -> Iterator[Path]:
+    """
+    Retrieve the path to the compute migrations test directory.
+    """
+    yield COMPUTE_MIGRATIONS_TEST_DIR
diff --git a/test_runner/fixtures/endpoint/http.py b/test_runner/fixtures/endpoint/http.py
index 1cd9158c68..aa0d95fe80 100644
--- a/test_runner/fixtures/endpoint/http.py
+++ b/test_runner/fixtures/endpoint/http.py
@@ -55,3 +55,17 @@ class EndpointHttpClient(requests.Session):
         res = self.get(f"http://localhost:{self.port}/metrics")
         res.raise_for_status()
         return res.text
+
+    def configure_failpoints(self, *args: tuple[str, str]) -> None:
+        body: list[dict[str, str]] = []
+
+        for fp in args:
+            body.append(
+                {
+                    "name": fp[0],
+                    "action": fp[1],
+                }
+            )
+
+        res = self.post(f"http://localhost:{self.port}/failpoints", json=body)
+        res.raise_for_status()
diff --git a/test_runner/fixtures/neon_cli.py b/test_runner/fixtures/neon_cli.py
index a85a191455..adbd6414a7 100644
--- a/test_runner/fixtures/neon_cli.py
+++ b/test_runner/fixtures/neon_cli.py
@@ -522,14 +522,15 @@ class NeonLocalCli(AbstractNeonCli):
         safekeepers: list[int] | None = None,
         remote_ext_config: str | None = None,
         pageserver_id: int | None = None,
-        allow_multiple=False,
+        allow_multiple: bool = False,
         basebackup_request_tries: int | None = None,
+        env: dict[str, str] | None = None,
     ) -> subprocess.CompletedProcess[str]:
         args = [
             "endpoint",
             "start",
         ]
-        extra_env_vars = {}
+        extra_env_vars = env or {}
         if basebackup_request_tries is not None:
             extra_env_vars["NEON_COMPUTE_TESTING_BASEBACKUP_TRIES"] = str(basebackup_request_tries)
         if remote_ext_config is not None:
diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py
index 9f78ad120b..a0c642163d 100644
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -54,6 +54,7 @@ from fixtures.common_types import (
     TimelineArchivalState,
     TimelineId,
 )
+from fixtures.compute_migrations import NUM_COMPUTE_MIGRATIONS
 from fixtures.endpoint.http import EndpointHttpClient
 from fixtures.h2server import H2Server
 from fixtures.log_helper import log
@@ -3855,6 +3856,7 @@ class Endpoint(PgProtocol, LogUtils):
         safekeepers: list[int] | None = None,
         allow_multiple: bool = False,
         basebackup_request_tries: int | None = None,
+        env: dict[str, str] | None = None,
     ) -> Self:
         """
         Start the Postgres instance.
@@ -3875,6 +3877,7 @@ class Endpoint(PgProtocol, LogUtils):
             pageserver_id=pageserver_id,
             allow_multiple=allow_multiple,
             basebackup_request_tries=basebackup_request_tries,
+            env=env,
         )
         self._running.release(1)
         self.log_config_value("shared_buffers")
@@ -3988,14 +3991,17 @@ class Endpoint(PgProtocol, LogUtils):
             log.info("Updating compute spec to: %s", json.dumps(data_dict, indent=4))
             json.dump(data_dict, file, indent=4)
 
-    # Please note: Migrations only run if pg_skip_catalog_updates is false
-    def wait_for_migrations(self, num_migrations: int = 11):
+    def wait_for_migrations(self, wait_for: int = NUM_COMPUTE_MIGRATIONS) -> None:
+        """
+        Wait for all compute migrations to be ran. Remember that migrations only
+        run if "pg_skip_catalog_updates" is set in the compute spec to false.
+        """
         with self.cursor() as cur:
 
             def check_migrations_done():
                 cur.execute("SELECT id FROM neon_migration.migration_id")
                 migration_id: int = cur.fetchall()[0][0]
-                assert migration_id >= num_migrations
+                assert migration_id >= wait_for
 
             wait_until(check_migrations_done)
 
diff --git a/test_runner/fixtures/paths.py b/test_runner/fixtures/paths.py
index 80777d65e9..fc4fb3629b 100644
--- a/test_runner/fixtures/paths.py
+++ b/test_runner/fixtures/paths.py
@@ -21,8 +21,8 @@ if TYPE_CHECKING:
 
 
 BASE_DIR = Path(__file__).parents[2]
-COMPUTE_CONFIG_DIR = BASE_DIR / "compute" / "etc"
 DEFAULT_OUTPUT_DIR: str = "test_output"
+COMPUTE_CONFIG_DIR = BASE_DIR / "compute" / "etc"
 
 
 def get_test_dir(request: FixtureRequest, top_output_dir: Path, prefix: str | None = None) -> Path:
diff --git a/test_runner/regress/test_compute_migrations.py b/test_runner/regress/test_compute_migrations.py
new file mode 100644
index 0000000000..803702a6f8
--- /dev/null
+++ b/test_runner/regress/test_compute_migrations.py
@@ -0,0 +1,90 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import TYPE_CHECKING, cast
+
+import pytest
+from fixtures.compute_migrations import COMPUTE_MIGRATIONS, NUM_COMPUTE_MIGRATIONS
+
+if TYPE_CHECKING:
+    from fixtures.neon_fixtures import NeonEnv
+
+
+def test_compute_migrations_retry(neon_simple_env: NeonEnv, compute_migrations_dir: Path):
+    """
+    Test that compute_ctl can recover from migration failures next time it
+    starts, and that the persisted migration ID is correct in such cases.
+    """
+    env = neon_simple_env
+
+    endpoint = env.endpoints.create("main")
+    endpoint.respec(skip_pg_catalog_updates=False)
+
+    for i in range(1, NUM_COMPUTE_MIGRATIONS + 1):
+        endpoint.start(env={"FAILPOINTS": f"compute-migration=return({i})"})
+
+        # Make sure that the migrations ran
+        endpoint.wait_for_migrations(wait_for=i - 1)
+
+        # Confirm that we correctly recorded that in the
+        # neon_migration.migration_id table
+        with endpoint.cursor() as cur:
+            cur.execute("SELECT id FROM neon_migration.migration_id")
+            migration_id = cast("int", cur.fetchall()[0][0])
+            assert migration_id == i - 1
+
+        endpoint.stop()
+
+    endpoint.start()
+
+    # Now wait for the rest of the migrations
+    endpoint.wait_for_migrations()
+
+    with endpoint.cursor() as cur:
+        cur.execute("SELECT id FROM neon_migration.migration_id")
+        migration_id = cast("int", cur.fetchall()[0][0])
+        assert migration_id == NUM_COMPUTE_MIGRATIONS
+
+    for i, m in enumerate(COMPUTE_MIGRATIONS, start=1):
+        migration_query = (compute_migrations_dir / m).read_text(encoding="utf-8")
+        if not migration_query.startswith("-- SKIP"):
+            pattern = rf"Skipping migration id={i}"
+        else:
+            pattern = rf"Running migration id={i}"
+
+        endpoint.log_contains(pattern)
+
+
+@pytest.mark.parametrize(
+    "migration",
+    (pytest.param((i, m), id=str(i)) for i, m in enumerate(COMPUTE_MIGRATIONS, start=1)),
+)
+def test_compute_migrations_e2e(
+    neon_simple_env: NeonEnv,
+    compute_migrations_dir: Path,
+    compute_migrations_test_dir: Path,
+    migration: tuple[int, str],
+):
+    """
+    Test that the migrations perform as advertised.
+    """
+    env = neon_simple_env
+
+    migration_id = migration[0]
+    migration_filename = migration[1]
+
+    migration_query = (compute_migrations_dir / migration_filename).read_text(encoding="utf-8")
+    if migration_query.startswith("-- SKIP"):
+        pytest.skip("The migration is marked as SKIP")
+
+    endpoint = env.endpoints.create("main")
+    endpoint.respec(skip_pg_catalog_updates=False)
+
+    # Stop applying migrations after the one we want to test, so that we can
+    # test the state of the cluster at the given migration ID
+    endpoint.start(env={"FAILPOINTS": f"compute-migration=return({migration_id + 1})"})
+
+    endpoint.wait_for_migrations(wait_for=migration_id)
+
+    check_query = (compute_migrations_test_dir / migration_filename).read_text(encoding="utf-8")
+    endpoint.safe_psql(check_query)
diff --git a/test_runner/regress/test_migrations.py b/test_runner/regress/test_migrations.py
deleted file mode 100644
index 7211619a99..0000000000
--- a/test_runner/regress/test_migrations.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from __future__ import annotations
-
-import time
-from typing import TYPE_CHECKING
-
-if TYPE_CHECKING:
-    from fixtures.neon_fixtures import NeonEnv
-
-
-def test_migrations(neon_simple_env: NeonEnv):
-    env = neon_simple_env
-
-    endpoint = env.endpoints.create("main")
-    endpoint.respec(skip_pg_catalog_updates=False)
-    endpoint.start()
-
-    num_migrations = 11
-    endpoint.wait_for_migrations(num_migrations=num_migrations)
-
-    with endpoint.cursor() as cur:
-        cur.execute("SELECT id FROM neon_migration.migration_id")
-        migration_id = cur.fetchall()
-        assert migration_id[0][0] == num_migrations
-
-    endpoint.stop()
-    endpoint.start()
-    # We don't have a good way of knowing that the migrations code path finished executing
-    # in compute_ctl in the case that no migrations are being run
-    time.sleep(1)
-    with endpoint.cursor() as cur:
-        cur.execute("SELECT id FROM neon_migration.migration_id")
-        migration_id = cur.fetchall()
-        assert migration_id[0][0] == num_migrations

From cd10c719f9c7ec4424a6c30a6419256589c60d2e Mon Sep 17 00:00:00 2001
From: Em Sharnoff <sharnoff@neon.tech>
Date: Thu, 2 Jan 2025 11:45:59 -0800
Subject: [PATCH 31/63] compute: Add spec support for disabling LFC resizing
 (#10132)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ref neondatabase/cloud#21731

## Problem

When we manually override the LFC size for particular computes,
autoscaling will typically undo that because vm-monitor will resize LFC
itself.

So, we'd like a way to make vm-monitor not set LFC size — this actually
already exists, if we just don't give vm-monitor a postgres connection
string.

## Summary of changes

Add a new field to the compute spec, `disable_lfc_resizing`. When set to
`true`, we pass in `None` for its postgres connection string. That
matches the configuration tested in `neondatabase/autoscaling` CI.
---
 compute_tools/src/bin/compute_ctl.rs | 19 +++++++++++++++----
 control_plane/src/endpoint.rs        |  1 +
 libs/compute_api/src/spec.rs         |  9 +++++++++
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs
index 95ade9a87d..26ae25ec20 100644
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -424,9 +424,13 @@ fn start_postgres(
         "running compute with features: {:?}",
         state.pspec.as_ref().unwrap().spec.features
     );
-    // before we release the mutex, fetch the swap size (if any) for later.
-    let swap_size_bytes = state.pspec.as_ref().unwrap().spec.swap_size_bytes;
-    let disk_quota_bytes = state.pspec.as_ref().unwrap().spec.disk_quota_bytes;
+    // before we release the mutex, fetch some parameters for later.
+    let &ComputeSpec {
+        swap_size_bytes,
+        disk_quota_bytes,
+        disable_lfc_resizing,
+        ..
+    } = &state.pspec.as_ref().unwrap().spec;
     drop(state);
 
     // Launch remaining service threads
@@ -531,11 +535,18 @@ fn start_postgres(
             // This token is used internally by the monitor to clean up all threads
             let token = CancellationToken::new();
 
+            // don't pass postgres connection string to vm-monitor if we don't want it to resize LFC
+            let pgconnstr = if disable_lfc_resizing.unwrap_or(false) {
+                None
+            } else {
+                file_cache_connstr.cloned()
+            };
+
             let vm_monitor = rt.as_ref().map(|rt| {
                 rt.spawn(vm_monitor::start(
                     Box::leak(Box::new(vm_monitor::Args {
                         cgroup: cgroup.cloned(),
-                        pgconnstr: file_cache_connstr.cloned(),
+                        pgconnstr,
                         addr: vm_monitor_addr.clone(),
                     })),
                     token.clone(),
diff --git a/control_plane/src/endpoint.rs b/control_plane/src/endpoint.rs
index 5ebf842813..5e47ec4811 100644
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -585,6 +585,7 @@ impl Endpoint {
             features: self.features.clone(),
             swap_size_bytes: None,
             disk_quota_bytes: None,
+            disable_lfc_resizing: None,
             cluster: Cluster {
                 cluster_id: None, // project ID: not used
                 name: None,       // project name: not used
diff --git a/libs/compute_api/src/spec.rs b/libs/compute_api/src/spec.rs
index 6d9c353cda..54d6a1d38f 100644
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -67,6 +67,15 @@ pub struct ComputeSpec {
     #[serde(default)]
     pub disk_quota_bytes: Option<u64>,
 
+    /// Disables the vm-monitor behavior that resizes LFC on upscale/downscale, instead relying on
+    /// the initial size of LFC.
+    ///
+    /// This is intended for use when the LFC size is being overridden from the default but
+    /// autoscaling is still enabled, and we don't want the vm-monitor to interfere with the custom
+    /// LFC sizing.
+    #[serde(default)]
+    pub disable_lfc_resizing: Option<bool>,
+
     /// Expected cluster state at the end of transition process.
     pub cluster: Cluster,
     pub delta_operations: Option<Vec<DeltaOp>>,

From eefad27538757a760532ccf4ae2da0088dafb47a Mon Sep 17 00:00:00 2001
From: Tristan Partin <tristan@neon.tech>
Date: Thu, 2 Jan 2025 16:12:56 -0600
Subject: [PATCH 32/63] Inline various migration queries (#10231)

There was no value in saving them off to temporary variables.

Signed-off-by: Tristan Partin <tristan@neon.tech>

Signed-off-by: Tristan Partin <tristan@neon.tech>
---
 compute_tools/src/migration.rs | 31 ++++++++++++++-----------------
 1 file changed, 14 insertions(+), 17 deletions(-)

diff --git a/compute_tools/src/migration.rs b/compute_tools/src/migration.rs
index 07d738abe9..1f3de65806 100644
--- a/compute_tools/src/migration.rs
+++ b/compute_tools/src/migration.rs
@@ -35,8 +35,6 @@ impl<'m> MigrationRunner<'m> {
     /// used if you would like to fail the application of a series of migrations
     /// at some point.
     fn update_migration_id(&mut self, migration_id: i64) -> Result<()> {
-        let setval = format!("UPDATE neon_migration.migration_id SET id={}", migration_id);
-
         // We use this fail point in order to check that failing in the
         // middle of applying a series of migrations fails in an expected
         // manner
@@ -58,7 +56,10 @@ impl<'m> MigrationRunner<'m> {
         }
 
         self.client
-            .simple_query(&setval)
+            .query(
+                "UPDATE neon_migration.migration_id SET id = $1",
+                &[&migration_id],
+            )
             .context("run_migrations update id")?;
 
         Ok(())
@@ -66,20 +67,16 @@ impl<'m> MigrationRunner<'m> {
 
     /// Prepare the migrations the target database for handling migrations
     fn prepare_database(&mut self) -> Result<()> {
-        let query = "CREATE SCHEMA IF NOT EXISTS neon_migration";
-        self.client.simple_query(query)?;
-
-        let query = "CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)";
-        self.client.simple_query(query)?;
-
-        let query = "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING";
-        self.client.simple_query(query)?;
-
-        let query = "ALTER SCHEMA neon_migration OWNER TO cloud_admin";
-        self.client.simple_query(query)?;
-
-        let query = "REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC";
-        self.client.simple_query(query)?;
+        self.client
+            .simple_query("CREATE SCHEMA IF NOT EXISTS neon_migration")?;
+        self.client.simple_query("CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)")?;
+        self.client.simple_query(
+            "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING",
+        )?;
+        self.client
+            .simple_query("ALTER SCHEMA neon_migration OWNER TO cloud_admin")?;
+        self.client
+            .simple_query("REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC")?;
 
         Ok(())
     }

From 7a598b9842f47707d9d93dda621094494221c5a6 Mon Sep 17 00:00:00 2001
From: Ivan Efremov <ivan@neon.tech>
Date: Fri, 3 Jan 2025 12:04:58 +0200
Subject: [PATCH 33/63] [proxy/docs]imprv: Add local testing section to proxy
 README (#10230)

Add commands to run proxy locally with the mocked control plane
---
 proxy/README.md | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/proxy/README.md b/proxy/README.md
index 8d850737be..4b98342d72 100644
--- a/proxy/README.md
+++ b/proxy/README.md
@@ -102,23 +102,39 @@ User can pass several optional headers that will affect resulting json.
 2. `Neon-Array-Mode: true`. Return postgres rows as arrays instead of objects. That is more compact representation and also helps in some edge
 cases where it is hard to use rows represented as objects (e.g. when several fields have the same name).
 
+## Test proxy locally
 
-## Using SNI-based routing on localhost
-
-Now proxy determines project name from the subdomain, request to the `round-rice-566201.somedomain.tld` will be routed to the project named `round-rice-566201`. Unfortunately, `/etc/hosts` does not support domain wildcards, so I usually use `*.localtest.me` which resolves to `127.0.0.1`. Now we can create self-signed certificate and play with proxy:
+Proxy determines project name from the subdomain, request to the `round-rice-566201.somedomain.tld` will be routed to the project named `round-rice-566201`. Unfortunately, `/etc/hosts` does not support domain wildcards, so we can use *.localtest.me` which resolves to `127.0.0.1`.
 
+Let's create self-signed certificate by running:
 ```sh
 openssl req -new -x509 -days 365 -nodes -text -out server.crt -keyout server.key -subj "/CN=*.localtest.me"
 ```
 
-start proxy
-
+Then we need to build proxy with 'testing' feature and run, e.g.:
 ```sh
-./target/debug/proxy -c server.crt -k server.key
+RUST_LOG=proxy cargo run -p proxy --bin proxy --features testing -- --auth-backend postgres --auth-endpoint 'postgresql://proxy:password@endpoint.localtest.me:5432/postgres' --is-private-access-proxy true -c server.crt -k server.key
 ```
 
-and connect to it
+We will also need to have a postgres instance. Assuming that we have setted up docker we can set it up as follows:
+```sh
+docker run \
+  --detach \
+  --name proxy-postgres \
+  --env POSTGRES_PASSWORD=proxy-postgres \
+  --publish 5432:5432 \
+  postgres:17-bookworm
+```
+
+Next step is setting up auth table and schema as well as creating role (without the JWT table):
+```sh
+docker exec -it proxy-postgres psql -U postgres -c "CREATE SCHEMA IF NOT EXISTS neon_control_plane"
+docker exec -it proxy-postgres psql -U postgres -c "CREATE TABLE neon_control_plane.endpoints (endpoint_id VARCHAR(255) PRIMARY KEY, allowed_ips VARCHAR(255))"
+docker exec -it proxy-postgres psql -U postgres -c "CREATE ROLE proxy WITH SUPERUSER LOGIN PASSWORD 'password';"
+```
+
+Now from client you can start a new session:
 
 ```sh
-PGSSLROOTCERT=./server.crt psql 'postgres://my-cluster-42.localtest.me:1234?sslmode=verify-full'
-```
+PGSSLROOTCERT=./server.crt psql  "postgresql://proxy:password@endpoint.localtest.me:4432/postgres?sslmode=verify-full"
+```
\ No newline at end of file

From 2d4f267983858c197de795074046a2b1376a8616 Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Fri, 3 Jan 2025 10:20:18 +0000
Subject: [PATCH 34/63] cargo: update diesel, pq-sys (#10256)

## Problem

Versions of `diesel` and `pq-sys` were somewhat stale. I was checking on
libpq->openssl versions while investigating a segfault via
https://github.com/neondatabase/cloud/issues/21010. I don't think these
rust bindings are likely to be the source of issues, but we might as
well freshen them as a precaution.

## Summary of changes

- Update diesel to 2.2.6
- Update pq-sys to 0.6.3
---
 Cargo.lock                    | 8 ++++----
 storage_controller/Cargo.toml | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 420def152d..9e0e343996 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1733,9 +1733,9 @@ checksum = "ab03c107fafeb3ee9f5925686dbb7a73bc76e3932abb0d2b365cb64b169cf04c"
 
 [[package]]
 name = "diesel"
-version = "2.2.3"
+version = "2.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "65e13bab2796f412722112327f3e575601a3e9cdcbe426f0d30dbf43f3f5dc71"
+checksum = "ccf1bedf64cdb9643204a36dd15b19a6ce8e7aa7f7b105868e9f1fad5ffa7d12"
 dependencies = [
  "bitflags 2.4.1",
  "byteorder",
@@ -4494,9 +4494,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
 
 [[package]]
 name = "pq-sys"
-version = "0.4.8"
+version = "0.6.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "31c0052426df997c0cbd30789eb44ca097e3541717a7b8fa36b1c464ee7edebd"
+checksum = "f6cc05d7ea95200187117196eee9edd0644424911821aeb28a18ce60ea0b8793"
 dependencies = [
  "vcpkg",
 ]
diff --git a/storage_controller/Cargo.toml b/storage_controller/Cargo.toml
index 2f5d266567..5f3319512d 100644
--- a/storage_controller/Cargo.toml
+++ b/storage_controller/Cargo.toml
@@ -43,13 +43,13 @@ scopeguard.workspace = true
 strum.workspace = true
 strum_macros.workspace = true
 
-diesel = { version = "2.1.4", features = [
+diesel = { version = "2.2.6", features = [
     "serde_json",
     "postgres",
     "r2d2",
     "chrono",
 ] }
-diesel_migrations = { version = "2.1.0" }
+diesel_migrations = { version = "2.2.0" }
 r2d2 = { version = "0.8.10" }
 
 utils = { path = "../libs/utils/" }

From ba9722a2fd913d30b639ec506c42761cdca44440 Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Fri, 3 Jan 2025 10:55:07 +0000
Subject: [PATCH 35/63] tests: add upload wait in
 test_scrubber_physical_gc_ancestors (#10260)

## Problem

We see periodic failures in `test_scrubber_physical_gc_ancestors`, where
the logs show that the pageserver is creating image layers that should
cause child shards to no longer reference their parents' layers, but
then the scrubber runs and doesn't find any unreferenced layers.[


https://neon-github-public-dev.s3.amazonaws.com/reports/pr-10256/12582034135/index.html#/testresult/78ea06dea6ba8dd3

From inspecting the code & test, it seems like this could be as simple
as the test failing to wait for uploads before running the scrubber. It
had a 2 second delay built in to satisfy the scrubbers time threshold
checks, which on a lightly loaded machine would also have been easily
enough for uploads to complete, but our test machines are more heavily
loaded all the time.

## Summary of changes

- Wait for uploads to complete after generating images layers in
test_scrubber_physical_gc_ancestors, so that the scrubber should
reliably see the post-compaction metadata.
---
 test_runner/regress/test_storage_scrubber.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/test_runner/regress/test_storage_scrubber.py b/test_runner/regress/test_storage_scrubber.py
index 198e4f0460..220c428531 100644
--- a/test_runner/regress/test_storage_scrubber.py
+++ b/test_runner/regress/test_storage_scrubber.py
@@ -266,7 +266,9 @@ def test_scrubber_physical_gc_ancestors(neon_env_builder: NeonEnvBuilder, shard_
     for shard in shards:
         ps = env.get_tenant_pageserver(shard)
         assert ps is not None
-        ps.http_client().timeline_compact(shard, timeline_id, force_image_layer_creation=True)
+        ps.http_client().timeline_compact(
+            shard, timeline_id, force_image_layer_creation=True, wait_until_uploaded=True
+        )
         ps.http_client().timeline_gc(shard, timeline_id, 0)
 
     # We will use a min_age_secs=1 threshold for deletion, let it pass

From c08759f367063fde2558f979b83f6f9209741c7a Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Fri, 3 Jan 2025 10:55:15 +0000
Subject: [PATCH 36/63] storcon: verbose logs in rare case of shards not
 attached yet (#10262)

## Problem

When we do a timeline CRUD operation, we check that the shards we need
to mutate are currently attached to a pageserver, by reading
`generation` and `generation_pageserver` from the database.

If any don't appear to be attached, we respond with a a 503 and "One or
more shards in tenant is not yet attached".

This is happening more often than expected, and it's not obvious with
current logging what's going on: specifically which shard has a problem,
and exactly what we're seeing in these persistent generation columns.

(Aside: it's possible that we broke something with the change in #10011
which clears generation_pageserver when we detach a shard, although if
so the mechanism isn't trivial: what should happen is that if we stamp
on generation_pageserver if a reconciler is running, then it shouldn't
matter because we're about to

## Summary of changes

- When we are in Attached mode but find that
generation_pageserver/generation are unset, output details while looping
over shards.
---
 storage_controller/src/service.rs | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs
index c0c5bc371a..222cb9fdd4 100644
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -3572,6 +3572,11 @@ impl Service {
                 .iter()
                 .any(|i| i.generation.is_none() || i.generation_pageserver.is_none())
             {
+                let shard_generations = generations
+                    .into_iter()
+                    .map(|i| (i.tenant_shard_id, (i.generation, i.generation_pageserver)))
+                    .collect::<HashMap<_, _>>();
+
                 // One or more shards has not been attached to a pageserver.  Check if this is because it's configured
                 // to be detached (409: caller should give up), or because it's meant to be attached but isn't yet (503: caller should retry)
                 let locked = self.inner.read().unwrap();
@@ -3582,6 +3587,28 @@ impl Service {
                         PlacementPolicy::Attached(_) => {
                             // This shard is meant to be attached: the caller is not wrong to try and
                             // use this function, but we can't service the request right now.
+                            let Some(generation) = shard_generations.get(shard_id) else {
+                                // This can only happen if there is a split brain controller modifying the database.  This should
+                                // never happen when testing, and if it happens in production we can only log the issue.
+                                debug_assert!(false);
+                                tracing::error!("Shard {shard_id} not found in generation state!  Is another rogue controller running?");
+                                continue;
+                            };
+                            let (generation, generation_pageserver) = generation;
+                            if let Some(generation) = generation {
+                                if generation_pageserver.is_none() {
+                                    // This is legitimate only in a very narrow window where the shard was only just configured into
+                                    // Attached mode after being created in Secondary or Detached mode, and it has had its generation
+                                    // set but not yet had a Reconciler run (reconciler is the only thing that sets generation_pageserver).
+                                    tracing::warn!("Shard {shard_id} generation is set ({generation:?}) but generation_pageserver is None, reconciler not run yet?");
+                                }
+                            } else {
+                                // This should never happen: a shard with no generation is only permitted when it was created in some state
+                                // other than PlacementPolicy::Attached (and generation is always written to DB before setting Attached in memory)
+                                debug_assert!(false);
+                                tracing::error!("Shard {shard_id} generation is None, but it is in PlacementPolicy::Attached mode!");
+                                continue;
+                            }
                         }
                         PlacementPolicy::Secondary | PlacementPolicy::Detached => {
                             return Err(ApiError::Conflict(format!(

From 1303cd5d05062c95660ec00f8846b4258fc62b4c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Arpad=20M=C3=BCller?= <arpad-m@users.noreply.github.com>
Date: Fri, 3 Jan 2025 13:36:01 +0100
Subject: [PATCH 37/63] Fix defusing race between Tenant::shutdown and
 offload_timeline (#10150)

There is a race condition between `Tenant::shutdown`'s `defuse_for_drop`
loop and `offload_timeline`, where timeline offloading can insert into a
tenant that is in the process of shutting down, in fact so far
progressed that the `defuse_for_drop` has already been called.

This prevents warn log lines of the form:

```
offloaded timeline <hash> was dropped without having cleaned it up at the ancestor
```

The solution piggybacks on the `offloaded_timelines` lock: both the
defuse loop and the offloaded timeline insertion need to acquire the
lock, and we know that the defuse loop only runs after the tenant has
set its `TenantState` to `Stopping`.

So if we hold the `offloaded_timelines` lock, and know that the
`TenantState` is not `Stopping`, then we know that the defuse loop has
not ran yet, and holding the lock ensures that it doesn't start running
while we are inserting the offloaded timeline.

Fixes #10070
---
 pageserver/src/tenant/timeline/offload.rs | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/pageserver/src/tenant/timeline/offload.rs b/pageserver/src/tenant/timeline/offload.rs
index 3bfbfb5061..15628a9645 100644
--- a/pageserver/src/tenant/timeline/offload.rs
+++ b/pageserver/src/tenant/timeline/offload.rs
@@ -1,5 +1,7 @@
 use std::sync::Arc;
 
+use pageserver_api::models::TenantState;
+
 use super::delete::{delete_local_timeline_directory, DeleteTimelineFlow, DeletionGuard};
 use super::Timeline;
 use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
@@ -70,6 +72,15 @@ pub(crate) async fn offload_timeline(
 
     {
         let mut offloaded_timelines = tenant.timelines_offloaded.lock().unwrap();
+        if matches!(
+            tenant.current_state(),
+            TenantState::Stopping { .. } | TenantState::Broken { .. }
+        ) {
+            // Cancel the operation if the tenant is shutting down. Do this while the
+            // timelines_offloaded lock is held to prevent a race with Tenant::shutdown
+            // for defusing the lock
+            return Err(OffloadError::Cancelled);
+        }
         offloaded_timelines.insert(
             timeline.timeline_id,
             Arc::new(

From e9d30edc7f7f5fab66f7bdffcca9e8c5bb9b8d27 Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Fri, 3 Jan 2025 13:13:22 +0000
Subject: [PATCH 38/63] pageserver: fix a 500 during timeline creation +
 shutdown (#10259)

## Problem

The test_create_churn_during_restart test fails if timeline creation
calls return 500 errors (because the API shouldn't do it), and it's
sometimes failing, for example:

https://neon-github-public-dev.s3.amazonaws.com/reports/pr-10256/12582034135/index.html#/testresult/3ce2e7045465012e

## Summary of changes

- Avoid handling UploadQueueShutDownOrStopped case as an Other (i.e.
500)
---
 pageserver/src/tenant.rs | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs
index 2e4c47c6e4..90017b25f2 100644
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -2604,9 +2604,15 @@ impl Tenant {
                 WaitCompletionError::NotInitialized(
                     e, // If the queue is already stopped, it's a shutdown error.
                 ) if e.is_stopping() => CreateTimelineError::ShuttingDown,
-                e => CreateTimelineError::Other(e.into()),
-            })
-            .context("wait for timeline initial uploads to complete")?;
+                WaitCompletionError::NotInitialized(_) => {
+                    // This is a bug: we should never try to wait for uploads before initializing the timeline
+                    debug_assert!(false);
+                    CreateTimelineError::Other(anyhow::anyhow!("timeline not initialized"))
+                }
+                WaitCompletionError::UploadQueueShutDownOrStopped => {
+                    CreateTimelineError::ShuttingDown
+                }
+            })?;
 
         // The creating task is responsible for activating the timeline.
         // We do this after `wait_completion()` so that we don't spin up tasks that start

From b33299dc37d9269fe55bd3256b7a4a72c129b81c Mon Sep 17 00:00:00 2001
From: Erik Grinaker <erik@neon.tech>
Date: Fri, 3 Jan 2025 16:21:31 +0100
Subject: [PATCH 39/63] pageserver,safekeeper: disable heap profiling (#10268)

## Problem

Since enabling continuous profiling in staging, we've seen frequent seg
faults. This is suspected to be because jemalloc and pprof-rs take a
stack trace at the same time, and the handlers aren't signal safe.
jemalloc does this probabilistically on every allocation, regardless of
whether someone is taking a heap profile, which means that any CPU
profile has a chance to cause a seg fault.

Touches #10225.

## Summary of changes

For now, just disable heap profiles -- CPU profiles are more important,
and we need to be able to take them without risking a crash.
---
 pageserver/src/bin/pageserver.rs | 10 ++++++----
 safekeeper/src/bin/safekeeper.rs | 10 ++++++----
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs
index 567a69da3b..b92ff4ebf9 100644
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -53,10 +53,12 @@ project_build_tag!(BUILD_TAG);
 #[global_allocator]
 static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
 
-/// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20).
-#[allow(non_upper_case_globals)]
-#[export_name = "malloc_conf"]
-pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";
+// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20).
+// TODO: disabled because concurrent CPU profiles cause seg faults. See:
+// https://github.com/neondatabase/neon/issues/10225.
+//#[allow(non_upper_case_globals)]
+//#[export_name = "malloc_conf"]
+//pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";
 
 const PID_FILE_NAME: &str = "pageserver.pid";
 
diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs
index 13f6e34575..e0ba38d638 100644
--- a/safekeeper/src/bin/safekeeper.rs
+++ b/safekeeper/src/bin/safekeeper.rs
@@ -51,10 +51,12 @@ use utils::{
 #[global_allocator]
 static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
 
-/// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20).
-#[allow(non_upper_case_globals)]
-#[export_name = "malloc_conf"]
-pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";
+// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20).
+// TODO: disabled because concurrent CPU profiles cause seg faults. See:
+// https://github.com/neondatabase/neon/issues/10225.
+//#[allow(non_upper_case_globals)]
+//#[export_name = "malloc_conf"]
+//pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";
 
 const PID_FILE_NAME: &str = "safekeeper.pid";
 const ID_FILE_NAME: &str = "safekeeper.id";

From 1393cc668bce904cf5300f8829addce86437e755 Mon Sep 17 00:00:00 2001
From: Erik Grinaker <erik@neon.tech>
Date: Fri, 3 Jan 2025 16:38:51 +0100
Subject: [PATCH 40/63] Revert "pageserver: revert flush backpressure (#8550)
 (#10135)" (#10270)

This reverts commit f3ecd5d76ad8b858b2bfaaabba5018046aca46ac.

It is
[suspected](https://neondb.slack.com/archives/C033RQ5SPDH/p1735907405716759)
to have caused significant read amplification in the [ingest
benchmark](https://neonprod.grafana.net/d/de3mupf4g68e8e/perf-test3a-ingest-benchmark?orgId=1&from=now-30d&to=now&timezone=utc&var-new_project_endpoint_id=ep-solitary-sun-w22bmut6&var-large_tenant_endpoint_id=ep-holy-bread-w203krzs)
(specifically during index creation).

We will revisit an intermediate improvement here to unblock [upload
parallelism](https://github.com/neondatabase/neon/issues/10096) before
properly addressing [compaction
backpressure](https://github.com/neondatabase/neon/issues/8390).
---
 pageserver/src/metrics.rs                  | 25 ++++++++++-
 pageserver/src/tenant/timeline.rs          | 38 +++++++++++++----
 test_runner/fixtures/metrics.py            |  1 +
 test_runner/regress/test_branching.py      | 13 ++++--
 test_runner/regress/test_remote_storage.py | 48 ++++++++++++++++++++++
 5 files changed, 112 insertions(+), 13 deletions(-)

diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs
index bdbabf3f75..b4e20cb8b9 100644
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -3,7 +3,7 @@ use metrics::{
     register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
     register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
     register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
-    Counter, CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
+    Counter, CounterVec, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
     IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
 };
 use once_cell::sync::Lazy;
@@ -445,6 +445,15 @@ pub(crate) static WAIT_LSN_TIME: Lazy<Histogram> = Lazy::new(|| {
     .expect("failed to define a metric")
 });
 
+static FLUSH_WAIT_UPLOAD_TIME: Lazy<GaugeVec> = Lazy::new(|| {
+    register_gauge_vec!(
+        "pageserver_flush_wait_upload_seconds",
+        "Time spent waiting for preceding uploads during layer flush",
+        &["tenant_id", "shard_id", "timeline_id"]
+    )
+    .expect("failed to define a metric")
+});
+
 static LAST_RECORD_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
     register_int_gauge_vec!(
         "pageserver_last_record_lsn",
@@ -2577,6 +2586,7 @@ pub(crate) struct TimelineMetrics {
     shard_id: String,
     timeline_id: String,
     pub flush_time_histo: StorageTimeMetrics,
+    pub flush_wait_upload_time_gauge: Gauge,
     pub compact_time_histo: StorageTimeMetrics,
     pub create_images_time_histo: StorageTimeMetrics,
     pub logical_size_histo: StorageTimeMetrics,
@@ -2622,6 +2632,9 @@ impl TimelineMetrics {
             &shard_id,
             &timeline_id,
         );
+        let flush_wait_upload_time_gauge = FLUSH_WAIT_UPLOAD_TIME
+            .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
+            .unwrap();
         let compact_time_histo = StorageTimeMetrics::new(
             StorageTimeOperation::Compact,
             &tenant_id,
@@ -2767,6 +2780,7 @@ impl TimelineMetrics {
             shard_id,
             timeline_id,
             flush_time_histo,
+            flush_wait_upload_time_gauge,
             compact_time_histo,
             create_images_time_histo,
             logical_size_histo,
@@ -2816,6 +2830,14 @@ impl TimelineMetrics {
         self.resident_physical_size_gauge.get()
     }
 
+    pub(crate) fn flush_wait_upload_time_gauge_add(&self, duration: f64) {
+        self.flush_wait_upload_time_gauge.add(duration);
+        crate::metrics::FLUSH_WAIT_UPLOAD_TIME
+            .get_metric_with_label_values(&[&self.tenant_id, &self.shard_id, &self.timeline_id])
+            .unwrap()
+            .add(duration);
+    }
+
     pub(crate) fn shutdown(&self) {
         let was_shutdown = self
             .shutdown
@@ -2833,6 +2855,7 @@ impl TimelineMetrics {
         let shard_id = &self.shard_id;
         let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
         let _ = DISK_CONSISTENT_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
+        let _ = FLUSH_WAIT_UPLOAD_TIME.remove_label_values(&[tenant_id, shard_id, timeline_id]);
         let _ = STANDBY_HORIZON.remove_label_values(&[tenant_id, shard_id, timeline_id]);
         {
             RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());
diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index e71cb4db80..b36c2f487f 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -144,15 +144,19 @@ use self::layer_manager::LayerManager;
 use self::logical_size::LogicalSize;
 use self::walreceiver::{WalReceiver, WalReceiverConf};
 
-use super::config::TenantConf;
-use super::remote_timeline_client::index::IndexPart;
-use super::remote_timeline_client::RemoteTimelineClient;
-use super::secondary::heatmap::{HeatMapLayer, HeatMapTimeline};
-use super::storage_layer::{LayerFringe, LayerVisibilityHint, ReadableLayer};
-use super::upload_queue::NotInitialized;
-use super::GcError;
 use super::{
-    debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf, MaybeOffloaded,
+    config::TenantConf, storage_layer::LayerVisibilityHint, upload_queue::NotInitialized,
+    MaybeOffloaded,
+};
+use super::{debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf};
+use super::{remote_timeline_client::index::IndexPart, storage_layer::LayerFringe};
+use super::{
+    remote_timeline_client::RemoteTimelineClient, remote_timeline_client::WaitCompletionError,
+    storage_layer::ReadableLayer,
+};
+use super::{
+    secondary::heatmap::{HeatMapLayer, HeatMapTimeline},
+    GcError,
 };
 
 #[cfg(test)]
@@ -3836,6 +3840,24 @@ impl Timeline {
             // release lock on 'layers'
         };
 
+        // Backpressure mechanism: wait with continuation of the flush loop until we have uploaded all layer files.
+        // This makes us refuse ingest until the new layers have been persisted to the remote
+        let start = Instant::now();
+        self.remote_client
+            .wait_completion()
+            .await
+            .map_err(|e| match e {
+                WaitCompletionError::UploadQueueShutDownOrStopped
+                | WaitCompletionError::NotInitialized(
+                    NotInitialized::ShuttingDown | NotInitialized::Stopped,
+                ) => FlushLayerError::Cancelled,
+                WaitCompletionError::NotInitialized(NotInitialized::Uninitialized) => {
+                    FlushLayerError::Other(anyhow!(e).into())
+                }
+            })?;
+        let duration = start.elapsed().as_secs_f64();
+        self.metrics.flush_wait_upload_time_gauge_add(duration);
+
         // FIXME: between create_delta_layer and the scheduling of the upload in `update_metadata_file`,
         // a compaction can delete the file and then it won't be available for uploads any more.
         // We still schedule the upload, resulting in an error, but ideally we'd somehow avoid this
diff --git a/test_runner/fixtures/metrics.py b/test_runner/fixtures/metrics.py
index eb3d06b949..c5295360c3 100644
--- a/test_runner/fixtures/metrics.py
+++ b/test_runner/fixtures/metrics.py
@@ -170,6 +170,7 @@ PAGESERVER_PER_TENANT_METRICS: tuple[str, ...] = (
     "pageserver_evictions_with_low_residence_duration_total",
     "pageserver_aux_file_estimated_size",
     "pageserver_valid_lsn_lease_count",
+    "pageserver_flush_wait_upload_seconds",
     counter("pageserver_tenant_throttling_count_accounted_start"),
     counter("pageserver_tenant_throttling_count_accounted_finish"),
     counter("pageserver_tenant_throttling_wait_usecs_sum"),
diff --git a/test_runner/regress/test_branching.py b/test_runner/regress/test_branching.py
index a4056404f0..34e4e994cb 100644
--- a/test_runner/regress/test_branching.py
+++ b/test_runner/regress/test_branching.py
@@ -19,7 +19,6 @@ from fixtures.pageserver.utils import wait_until_tenant_active
 from fixtures.utils import query_scalar
 from performance.test_perf_pgbench import get_scales_matrix
 from requests import RequestException
-from requests.exceptions import RetryError
 
 
 # Test branch creation
@@ -177,8 +176,11 @@ def test_cannot_create_endpoint_on_non_uploaded_timeline(neon_env_builder: NeonE
 
         env.neon_cli.mappings_map_branch(initial_branch, env.initial_tenant, env.initial_timeline)
 
-        with pytest.raises(RuntimeError, match="is not active, state: Loading"):
-            env.endpoints.create_start(initial_branch, tenant_id=env.initial_tenant)
+        with pytest.raises(RuntimeError, match="ERROR: Not found: Timeline"):
+            env.endpoints.create_start(
+                initial_branch, tenant_id=env.initial_tenant, basebackup_request_tries=2
+            )
+        ps_http.configure_failpoints(("before-upload-index-pausable", "off"))
     finally:
         env.pageserver.stop(immediate=True)
 
@@ -219,7 +221,10 @@ def test_cannot_branch_from_non_uploaded_branch(neon_env_builder: NeonEnvBuilder
 
         branch_id = TimelineId.generate()
 
-        with pytest.raises(RetryError, match="too many 503 error responses"):
+        with pytest.raises(
+            PageserverApiException,
+            match="Cannot branch off the timeline that's not present in pageserver",
+        ):
             ps_http.timeline_create(
                 env.pg_version,
                 env.initial_tenant,
diff --git a/test_runner/regress/test_remote_storage.py b/test_runner/regress/test_remote_storage.py
index 52b6b254aa..76a42ef4a2 100644
--- a/test_runner/regress/test_remote_storage.py
+++ b/test_runner/regress/test_remote_storage.py
@@ -784,6 +784,54 @@ def test_empty_branch_remote_storage_upload_on_restart(neon_env_builder: NeonEnv
         create_thread.join()
 
 
+def test_paused_upload_stalls_checkpoint(
+    neon_env_builder: NeonEnvBuilder,
+):
+    """
+    This test checks that checkpoints block on uploads to remote storage.
+    """
+    neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
+
+    env = neon_env_builder.init_start(
+        initial_tenant_conf={
+            # Set a small compaction threshold
+            "compaction_threshold": "3",
+            # Disable GC
+            "gc_period": "0s",
+            # disable PITR
+            "pitr_interval": "0s",
+        }
+    )
+
+    env.pageserver.allowed_errors.append(
+        f".*PUT.* path=/v1/tenant/{env.initial_tenant}/timeline.* request was dropped before completing"
+    )
+
+    tenant_id = env.initial_tenant
+    timeline_id = env.initial_timeline
+
+    client = env.pageserver.http_client()
+    layers_at_creation = client.layer_map_info(tenant_id, timeline_id)
+    deltas_at_creation = len(layers_at_creation.delta_layers())
+    assert (
+        deltas_at_creation == 1
+    ), "are you fixing #5863? make sure we end up with 2 deltas at the end of endpoint lifecycle"
+
+    # Make new layer uploads get stuck.
+    # Note that timeline creation waits for the initial layers to reach remote storage.
+    # So at this point, the `layers_at_creation` are in remote storage.
+    client.configure_failpoints(("before-upload-layer-pausable", "pause"))
+
+    with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
+        # Build two tables with some data inside
+        endpoint.safe_psql("CREATE TABLE foo AS SELECT x FROM generate_series(1, 10000) g(x)")
+        wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
+
+        with pytest.raises(ReadTimeout):
+            client.timeline_checkpoint(tenant_id, timeline_id, timeout=5)
+        client.configure_failpoints(("before-upload-layer-pausable", "off"))
+
+
 def wait_upload_queue_empty(
     client: PageserverHttpClient, tenant_id: TenantId, timeline_id: TimelineId
 ):

From a77e87a48a628abcac77afeb1f64e5a491275f1c Mon Sep 17 00:00:00 2001
From: Erik Grinaker <erik@neon.tech>
Date: Fri, 3 Jan 2025 17:03:19 +0100
Subject: [PATCH 41/63] pageserver: assert that uploads don't modify indexed
 layers (#10228)

## Problem

It's not legal to modify layers that are referenced by the current layer
index. Assert this in the upload queue, as preparation for upload queue
reordering.

Touches #10096.

## Summary of changes

Add a debug assertion that the upload queue does not modify layers
referenced by the current index.

I could be convinced that this should be a plain assertion, but will be
conservative for now.
---
 .../src/tenant/remote_timeline_client.rs      | 39 +++++++++++++++++++
 .../tenant/remote_timeline_client/index.rs    | 21 +++++++---
 2 files changed, 54 insertions(+), 6 deletions(-)

diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs
index fee11bc742..b27ac3e933 100644
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -1943,6 +1943,30 @@ impl RemoteTimelineClient {
                 return;
             }
 
+            // Assert that we don't modify a layer that's referenced by the current index.
+            if cfg!(debug_assertions) {
+                let modified = match &task.op {
+                    UploadOp::UploadLayer(layer, layer_metadata, _) => {
+                        vec![(layer.layer_desc().layer_name(), layer_metadata)]
+                    }
+                    UploadOp::Delete(delete) => {
+                        delete.layers.iter().map(|(n, m)| (n.clone(), m)).collect()
+                    }
+                    // These don't modify layers.
+                    UploadOp::UploadMetadata { .. } => Vec::new(),
+                    UploadOp::Barrier(_) => Vec::new(),
+                    UploadOp::Shutdown => Vec::new(),
+                };
+                if let Ok(queue) = self.upload_queue.lock().unwrap().initialized_mut() {
+                    for (ref name, metadata) in modified {
+                        debug_assert!(
+                            !queue.clean.0.references(name, metadata),
+                            "layer {name} modified while referenced by index",
+                        );
+                    }
+                }
+            }
+
             let upload_result: anyhow::Result<()> = match &task.op {
                 UploadOp::UploadLayer(ref layer, ref layer_metadata, mode) => {
                     if let Some(OpType::FlushDeletion) = mode {
@@ -2509,6 +2533,21 @@ pub fn remote_layer_path(
     RemotePath::from_string(&path).expect("Failed to construct path")
 }
 
+/// Returns true if a and b have the same layer path within a tenant/timeline. This is essentially
+/// remote_layer_path(a) == remote_layer_path(b) without the string allocations.
+///
+/// TODO: there should be a variant of LayerName for the physical path that contains information
+/// about the shard and generation, such that this could be replaced by a simple comparison.
+pub fn is_same_remote_layer_path(
+    aname: &LayerName,
+    ameta: &LayerFileMetadata,
+    bname: &LayerName,
+    bmeta: &LayerFileMetadata,
+) -> bool {
+    // NB: don't assert remote_layer_path(a) == remote_layer_path(b); too expensive even for debug.
+    aname == bname && ameta.shard == bmeta.shard && ameta.generation == bmeta.generation
+}
+
 pub fn remote_initdb_archive_path(tenant_id: &TenantId, timeline_id: &TimelineId) -> RemotePath {
     RemotePath::from_string(&format!(
         "tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{INITDB_PATH}"
diff --git a/pageserver/src/tenant/remote_timeline_client/index.rs b/pageserver/src/tenant/remote_timeline_client/index.rs
index 506990fb2f..51f093cb87 100644
--- a/pageserver/src/tenant/remote_timeline_client/index.rs
+++ b/pageserver/src/tenant/remote_timeline_client/index.rs
@@ -8,14 +8,14 @@ use std::collections::HashMap;
 use chrono::NaiveDateTime;
 use pageserver_api::models::AuxFilePolicy;
 use serde::{Deserialize, Serialize};
-use utils::id::TimelineId;
 
+use super::is_same_remote_layer_path;
 use crate::tenant::metadata::TimelineMetadata;
 use crate::tenant::storage_layer::LayerName;
 use crate::tenant::timeline::import_pgdata;
 use crate::tenant::Generation;
 use pageserver_api::shard::ShardIndex;
-
+use utils::id::TimelineId;
 use utils::lsn::Lsn;
 
 /// In-memory representation of an `index_part.json` file
@@ -45,10 +45,8 @@ pub struct IndexPart {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub import_pgdata: Option<import_pgdata::index_part_format::Root>,
 
-    /// Per layer file name metadata, which can be present for a present or missing layer file.
-    ///
-    /// Older versions of `IndexPart` will not have this property or have only a part of metadata
-    /// that latest version stores.
+    /// Layer filenames and metadata. For an index persisted in remote storage, all layers must
+    /// exist in remote storage.
     pub layer_metadata: HashMap<LayerName, LayerFileMetadata>,
 
     /// Because of the trouble of eyeballing the legacy "metadata" field, we copied the
@@ -143,6 +141,17 @@ impl IndexPart {
     pub(crate) fn example() -> Self {
         Self::empty(TimelineMetadata::example())
     }
+
+    /// Returns true if the index contains a reference to the given layer (i.e. file path).
+    ///
+    /// TODO: there should be a variant of LayerName for the physical remote path that contains
+    /// information about the shard and generation, to avoid passing in metadata.
+    pub fn references(&self, name: &LayerName, metadata: &LayerFileMetadata) -> bool {
+        let Some(index_metadata) = self.layer_metadata.get(name) else {
+            return false;
+        };
+        is_same_remote_layer_path(name, metadata, name, index_metadata)
+    }
 }
 
 /// Metadata gathered for each of the layer files.

From 4b2f56862dc5738407893df451348e78696abd65 Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Fri, 3 Jan 2025 16:16:04 +0000
Subject: [PATCH 42/63] docker: include vanilla debian postgres client (#10269)

## Problem

We are chasing down segfaults in the storage controller
https://github.com/neondatabase/cloud/issues/21010

This is for use by the storage controller, which links dynamically with
`libpq`. We currently use the neon-built libpq, but this may be unsafe
for use from multi-threaded programs like the controller, as it uses a
statically linked openssl

Precursor to https://github.com/neondatabase/neon/pull/10258

## Summary of changes

- Include `postgresql-15` in container builds.

The reason for using version 15 is simply because that is what's
available in Debian 12 without adding any extra repositories, and we
don't have any special need for latest version in our libpq usage.
---
 Dockerfile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Dockerfile b/Dockerfile
index e888efbae2..df9bcb3002 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -69,6 +69,8 @@ RUN set -e \
         libreadline-dev \
         libseccomp-dev \
         ca-certificates \
+	# System postgres for use with client libraries (e.g. in storage controller)
+        postgresql-15 \
     && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
     && useradd -d /data neon \
     && chown -R neon:neon /data

From b368e62cfc374bd48ca656b476c5c081c4018546 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sat, 4 Jan 2025 15:40:50 +0000
Subject: [PATCH 43/63] build(deps): bump jinja2 from 3.1.4 to 3.1.5 in the pip
 group (#10236)

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 poetry.lock    | 20 +++++++++++++++-----
 pyproject.toml |  2 +-
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 59ae5cf1ca..072bf9a5e9 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand.
 
 [[package]]
 name = "aiohappyeyeballs"
@@ -1322,13 +1322,13 @@ files = [
 
 [[package]]
 name = "jinja2"
-version = "3.1.4"
+version = "3.1.5"
 description = "A very fast and expressive template engine."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"},
-    {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"},
+    {file = "jinja2-3.1.5-py3-none-any.whl", hash = "sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb"},
+    {file = "jinja2-3.1.5.tar.gz", hash = "sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb"},
 ]
 
 [package.dependencies]
@@ -3309,6 +3309,16 @@ files = [
     {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"},
     {file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"},
     {file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"},
+    {file = "wrapt-1.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ecee4132c6cd2ce5308e21672015ddfed1ff975ad0ac8d27168ea82e71413f55"},
+    {file = "wrapt-1.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2020f391008ef874c6d9e208b24f28e31bcb85ccff4f335f15a3251d222b92d9"},
+    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2feecf86e1f7a86517cab34ae6c2f081fd2d0dac860cb0c0ded96d799d20b335"},
+    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:240b1686f38ae665d1b15475966fe0472f78e71b1b4903c143a842659c8e4cb9"},
+    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9008dad07d71f68487c91e96579c8567c98ca4c3881b9b113bc7b33e9fd78b8"},
+    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6447e9f3ba72f8e2b985a1da758767698efa72723d5b59accefd716e9e8272bf"},
+    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:acae32e13a4153809db37405f5eba5bac5fbe2e2ba61ab227926a22901051c0a"},
+    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49ef582b7a1152ae2766557f0550a9fcbf7bbd76f43fbdc94dd3bf07cc7168be"},
+    {file = "wrapt-1.14.1-cp311-cp311-win32.whl", hash = "sha256:358fe87cc899c6bb0ddc185bf3dbfa4ba646f05b1b0b9b5a27c2cb92c2cea204"},
+    {file = "wrapt-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:26046cd03936ae745a502abf44dac702a5e6880b2b01c29aea8ddf3353b68224"},
     {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"},
     {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"},
     {file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"},
@@ -3524,4 +3534,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.11"
-content-hash = "426c385df93f578ba3537c40a269535e27fbcca1978b3cf266096ecbc298c6a9"
+content-hash = "9032c11f264f2f6d8a50230e5021c606d460aafdf370da0524784c3f0f1f31b1"
diff --git a/pyproject.toml b/pyproject.toml
index 01d15ee6bb..ba4ab0b1f7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,7 +13,7 @@ requests = "^2.32.3"
 pytest-xdist = "^3.3.1"
 asyncpg = "^0.29.0"
 aiopg = "^1.4.0"
-Jinja2 = "^3.1.4"
+Jinja2 = "^3.1.5"
 types-requests = "^2.31.0.0"
 types-psycopg2 = "^2.9.21.20241019"
 boto3 = "^1.34.11"

From 406cca643b9529979522b33abf3a0457681fc987 Mon Sep 17 00:00:00 2001
From: Busra Kugler <busra@neon.tech>
Date: Mon, 6 Jan 2025 11:44:23 +0100
Subject: [PATCH 44/63] Update neon_fixtures.py - remove logs (#10219)

We need to remove this line to prevent aws keys exposing in the public
s3 buckets
---
 test_runner/fixtures/neon_fixtures.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py
index a0c642163d..8fd9eec8ce 100644
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -4606,7 +4606,8 @@ class StorageScrubber:
         ]
         args = base_args + args
 
-        log.info(f"Invoking scrubber command {args} with env: {env}")
+        log.info(f"Invoking scrubber command {args}")
+
         (output_path, stdout, status_code) = subprocess_capture(
             self.log_dir,
             args,

From fda52a0005ea4ca8e4e1d6a16de75724a7e619fe Mon Sep 17 00:00:00 2001
From: Conrad Ludgate <conrad@neon.tech>
Date: Mon, 6 Jan 2025 13:05:35 +0000
Subject: [PATCH 45/63] feat(proxy): dont trigger error alerts for unknown
 topics (#10266)

## Problem

Before the holidays, and just before our code freeze, a change to cplane
was made that started publishing the topics from #10197. This triggered
our alerts and put us in a sticky situation as it was not an error, and
we didn't want to silence the alert for the entire holidays, and we
didn't want to release proxy 2 days in a row if it was not essential.

We fixed it eventually by rewriting the alert based on logs, but this is
not a good solution.

## Summary of changes

Introduces an intermediate parsing step to check the topic name first,
to allow us to ignore parsing errors for any topics we do not know
about.
---
 proxy/src/redis/notifications.rs | 48 +++++++++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/proxy/src/redis/notifications.rs b/proxy/src/redis/notifications.rs
index 671305a300..4383d6be2c 100644
--- a/proxy/src/redis/notifications.rs
+++ b/proxy/src/redis/notifications.rs
@@ -30,8 +30,14 @@ async fn try_connect(client: &ConnectionWithCredentialsProvider) -> anyhow::Resu
     Ok(conn)
 }
 
+#[derive(Debug, Deserialize)]
+struct NotificationHeader<'a> {
+    topic: &'a str,
+}
+
 #[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
 #[serde(tag = "topic", content = "data")]
+// Message to contributors: Make sure to align these topic names with the list below.
 pub(crate) enum Notification {
     #[serde(
         rename = "/allowed_ips_updated",
@@ -69,6 +75,22 @@ pub(crate) enum Notification {
     #[serde(rename = "/cancel_session")]
     Cancel(CancelSession),
 }
+
+/// Returns true if the topic name given is a known topic that we can deserialize and action on.
+/// Returns false otherwise.
+fn known_topic(s: &str) -> bool {
+    // Message to contributors: Make sure to align these topic names with the enum above.
+    matches!(
+        s,
+        "/allowed_ips_updated"
+            | "/block_public_or_vpc_access_updated"
+            | "/allowed_vpc_endpoints_updated_for_org"
+            | "/allowed_vpc_endpoints_updated_for_projects"
+            | "/password_updated"
+            | "/cancel_session"
+    )
+}
+
 #[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
 pub(crate) struct AllowedIpsUpdate {
     project_id: ProjectIdInt,
@@ -96,6 +118,7 @@ pub(crate) struct PasswordUpdate {
     project_id: ProjectIdInt,
     role_name: RoleNameInt,
 }
+
 #[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
 pub(crate) struct CancelSession {
     pub(crate) region_id: Option<String>,
@@ -141,18 +164,23 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
             region_id,
         }
     }
+
     pub(crate) async fn increment_active_listeners(&self) {
         self.cache.increment_active_listeners().await;
     }
+
     pub(crate) async fn decrement_active_listeners(&self) {
         self.cache.decrement_active_listeners().await;
     }
+
     #[tracing::instrument(skip(self, msg), fields(session_id = tracing::field::Empty))]
     async fn handle_message(&self, msg: redis::Msg) -> anyhow::Result<()> {
         let payload: String = msg.get_payload()?;
         tracing::debug!(?payload, "received a message payload");
 
-        let msg: Notification = match serde_json::from_str(&payload) {
+        // For better error handling, we first parse the payload to extract the topic.
+        // If there's a topic we don't support, we can handle that error more gracefully.
+        let header: NotificationHeader = match serde_json::from_str(&payload) {
             Ok(msg) => msg,
             Err(e) => {
                 Metrics::get().proxy.redis_errors_total.inc(RedisErrors {
@@ -162,6 +190,24 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
                 return Ok(());
             }
         };
+
+        if !known_topic(header.topic) {
+            // don't update the metric for redis errors if it's just a topic we don't know about.
+            tracing::warn!(topic = header.topic, "unknown topic");
+            return Ok(());
+        }
+
+        let msg: Notification = match serde_json::from_str(&payload) {
+            Ok(msg) => msg,
+            Err(e) => {
+                Metrics::get().proxy.redis_errors_total.inc(RedisErrors {
+                    channel: msg.get_channel_name(),
+                });
+                tracing::error!(topic = header.topic, "broken message: {e}");
+                return Ok(());
+            }
+        };
+
         tracing::debug!(?msg, "received a message");
         match msg {
             Notification::Cancel(cancel_session) => {

From 95f1920231465e2b898b71a9959acec9ddd63896 Mon Sep 17 00:00:00 2001
From: Erik Grinaker <erik@neon.tech>
Date: Mon, 6 Jan 2025 18:27:08 +0100
Subject: [PATCH 46/63] cargo: build with frame pointers (#10226)

## Problem

Frame pointers are typically disabled by default (depending on CPU
architecture), to improve performance. This frees up a CPU register, and
avoids a couple of instructions per function call. However, it makes
stack unwinding much more inefficient, since it has to use DWARF debug
information instead, and gives worse results with e.g. `perf` and eBPF
profiles. The `backtrace` implementation of `libunwind` is also
suspected to cause seg faults.

The performance benefit of frame pointer omission doesn't appear to
matter that much on modern 64-bit CPU architectures (which have plenty
of registers and optimized instruction execution), and benchmarks did
not show measurable overhead.

The Rust standard library and jemalloc already enable frame pointers by
default.

For more information, see
https://www.brendangregg.com/blog/2024-03-17/the-return-of-the-frame-pointers.html.

Resolves #10224.
Touches #10225.

## Summary of changes

Enable frame pointers in all builds, and use frame pointers for pprof-rs
stack sampling.
---
 .cargo/config.toml | 8 ++++++++
 Cargo.toml         | 4 +++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/.cargo/config.toml b/.cargo/config.toml
index 5e452974ad..20a2a929b9 100644
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -3,6 +3,14 @@
 # by the RUSTDOCFLAGS env var in CI.
 rustdocflags = ["-Arustdoc::private_intra_doc_links"]
 
+# Enable frame pointers. This may have a minor performance overhead, but makes it easier and more
+# efficient to obtain stack traces (and thus CPU/heap profiles). With continuous profiling, this is
+# likely a net win, and allows higher profiling resolution. See also:
+#
+# * <https://www.brendangregg.com/blog/2024-03-17/the-return-of-the-frame-pointers.html>
+# * <https://github.com/rust-lang/rust/pull/122646>
+rustflags = ["-Cforce-frame-pointers=yes"]
+
 [alias]
 build_testing = ["build", "--features", "testing"]
 neon = ["run", "--bin", "neon_local"]
diff --git a/Cargo.toml b/Cargo.toml
index 885f02ba81..197808d5ae 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -135,7 +135,7 @@ parquet = { version = "53", default-features = false, features = ["zstd"] }
 parquet_derive = "53"
 pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
 pin-project-lite = "0.2"
-pprof = { version = "0.14", features = ["criterion", "flamegraph", "protobuf", "protobuf-codec"] }
+pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointer", "protobuf", "protobuf-codec"] }
 procfs = "0.16"
 prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
 prost = "0.13"
@@ -266,6 +266,8 @@ tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", br
 [profile.release]
 # This is useful for profiling and, to some extent, debug.
 # Besides, debug info should not affect the performance.
+#
+# NB: we also enable frame pointers for improved profiling, see .cargo/config.toml.
 debug = true
 
 # disable debug symbols for all packages except this one to decrease binaries size

From 4a6556e269018844a8c3413bd7414331cd968fce Mon Sep 17 00:00:00 2001
From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com>
Date: Mon, 6 Jan 2025 14:29:18 -0500
Subject: [PATCH 47/63] fix(pageserver): ensure GC computes time cutoff using
 the same start time (#10193)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Problem

close https://github.com/neondatabase/neon/issues/10192

## Summary of changes

* `find_gc_time_cutoff` takes `now` parameter so that all branches
compute the cutoff based on the same start time, avoiding races.
* gc-compaction uses a single `get_gc_compaction_watermark` function to
get the safe LSN to compact.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
Co-authored-by: Arpad Müller <arpad-m@users.noreply.github.com>
---
 pageserver/src/tenant.rs                     |  6 +++++-
 pageserver/src/tenant/timeline.rs            |  5 +++--
 pageserver/src/tenant/timeline/compaction.rs | 22 ++++++++++++++++++--
 3 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs
index 90017b25f2..e3dab2fc1d 100644
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -4488,13 +4488,17 @@ impl Tenant {
         let mut gc_cutoffs: HashMap<TimelineId, GcCutoffs> =
             HashMap::with_capacity(timelines.len());
 
+        // Ensures all timelines use the same start time when computing the time cutoff.
+        let now_ts_for_pitr_calc = SystemTime::now();
         for timeline in timelines.iter() {
             let cutoff = timeline
                 .get_last_record_lsn()
                 .checked_sub(horizon)
                 .unwrap_or(Lsn(0));
 
-            let cutoffs = timeline.find_gc_cutoffs(cutoff, pitr, cancel, ctx).await?;
+            let cutoffs = timeline
+                .find_gc_cutoffs(now_ts_for_pitr_calc, cutoff, pitr, cancel, ctx)
+                .await?;
             let old = gc_cutoffs.insert(timeline.timeline_id, cutoffs);
             assert!(old.is_none());
         }
diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index b36c2f487f..c1b71262e0 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -4859,6 +4859,7 @@ impl Timeline {
 
     async fn find_gc_time_cutoff(
         &self,
+        now: SystemTime,
         pitr: Duration,
         cancel: &CancellationToken,
         ctx: &RequestContext,
@@ -4866,7 +4867,6 @@ impl Timeline {
         debug_assert_current_span_has_tenant_and_timeline_id();
         if self.shard_identity.is_shard_zero() {
             // Shard Zero has SLRU data and can calculate the PITR time -> LSN mapping itself
-            let now = SystemTime::now();
             let time_range = if pitr == Duration::ZERO {
                 humantime::parse_duration(DEFAULT_PITR_INTERVAL).expect("constant is invalid")
             } else {
@@ -4952,6 +4952,7 @@ impl Timeline {
     #[instrument(skip_all, fields(timeline_id=%self.timeline_id))]
     pub(super) async fn find_gc_cutoffs(
         &self,
+        now: SystemTime,
         space_cutoff: Lsn,
         pitr: Duration,
         cancel: &CancellationToken,
@@ -4979,7 +4980,7 @@ impl Timeline {
         // - if PITR interval is set, then this is our cutoff.
         // - if PITR interval is not set, then we do a lookup
         //   based on DEFAULT_PITR_INTERVAL, so that size-based retention does not result in keeping history around permanently on idle databases.
-        let time_cutoff = self.find_gc_time_cutoff(pitr, cancel, ctx).await?;
+        let time_cutoff = self.find_gc_time_cutoff(now, pitr, cancel, ctx).await?;
 
         Ok(match (pitr, time_cutoff) {
             (Duration::ZERO, Some(time_cutoff)) => {
diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs
index 94c65631b2..55cde8603e 100644
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -1799,6 +1799,24 @@ impl Timeline {
         Ok(())
     }
 
+    /// Get a watermark for gc-compaction, that is the lowest LSN that we can use as the `gc_horizon` for
+    /// the compaction algorithm. It is min(space_cutoff, time_cutoff, latest_gc_cutoff, standby_horizon).
+    /// Leases and retain_lsns are considered in the gc-compaction job itself so we don't need to account for them
+    /// here.
+    pub(crate) fn get_gc_compaction_watermark(self: &Arc<Self>) -> Lsn {
+        let gc_cutoff_lsn = {
+            let gc_info = self.gc_info.read().unwrap();
+            gc_info.min_cutoff()
+        };
+
+        // TODO: standby horizon should use leases so we don't really need to consider it here.
+        // let watermark = watermark.min(self.standby_horizon.load());
+
+        // TODO: ensure the child branches will not use anything below the watermark, or consider
+        // them when computing the watermark.
+        gc_cutoff_lsn.min(*self.get_latest_gc_cutoff_lsn())
+    }
+
     /// Split a gc-compaction job into multiple compaction jobs. The split is based on the key range and the estimated size of the compaction job.
     /// The function returns a list of compaction jobs that can be executed separately. If the upper bound of the compact LSN
     /// range is not specified, we will use the latest gc_cutoff as the upper bound, so that all jobs in the jobset acts
@@ -1811,7 +1829,7 @@ impl Timeline {
         let compact_below_lsn = if job.compact_lsn_range.end != Lsn::MAX {
             job.compact_lsn_range.end
         } else {
-            *self.get_latest_gc_cutoff_lsn() // use the real gc cutoff
+            self.get_gc_compaction_watermark()
         };
 
         // Split compaction job to about 4GB each
@@ -2006,7 +2024,7 @@ impl Timeline {
                 // Therefore, it can only clean up data that cannot be cleaned up with legacy gc, instead of
                 // cleaning everything that theoritically it could. In the future, it should use `self.gc_info`
                 // to get the truth data.
-                let real_gc_cutoff = *self.get_latest_gc_cutoff_lsn();
+                let real_gc_cutoff = self.get_gc_compaction_watermark();
                 // The compaction algorithm will keep all keys above the gc_cutoff while keeping only necessary keys below the gc_cutoff for
                 // each of the retain_lsn. Therefore, if the user-provided `compact_lsn_range.end` is larger than the real gc cutoff, we will use
                 // the real cutoff.

From b342a02b1c591642e2d52be606ccc42857af112d Mon Sep 17 00:00:00 2001
From: Erik Grinaker <erik@neon.tech>
Date: Mon, 6 Jan 2025 21:17:43 +0100
Subject: [PATCH 48/63] Dockerfile: build with `force-frame-pointers=yes`
 (#10286)

See
https://github.com/neondatabase/neon/pull/10226#issuecomment-2573725182.
---
 .cargo/config.toml | 6 ++++--
 Dockerfile         | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/.cargo/config.toml b/.cargo/config.toml
index 20a2a929b9..c71d491303 100644
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -4,11 +4,13 @@
 rustdocflags = ["-Arustdoc::private_intra_doc_links"]
 
 # Enable frame pointers. This may have a minor performance overhead, but makes it easier and more
-# efficient to obtain stack traces (and thus CPU/heap profiles). With continuous profiling, this is
-# likely a net win, and allows higher profiling resolution. See also:
+# efficient to obtain stack traces (and thus CPU/heap profiles). It may also avoid seg faults that
+# we've seen with libunwind-based profiling. See also:
 #
 # * <https://www.brendangregg.com/blog/2024-03-17/the-return-of-the-frame-pointers.html>
 # * <https://github.com/rust-lang/rust/pull/122646>
+#
+# NB: the RUSTFLAGS envvar will replace this. Make sure to update e.g. Dockerfile as well.
 rustflags = ["-Cforce-frame-pointers=yes"]
 
 [alias]
diff --git a/Dockerfile b/Dockerfile
index df9bcb3002..2c157b3b2a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -45,7 +45,7 @@ COPY --chown=nonroot . .
 
 ARG ADDITIONAL_RUSTFLAGS
 RUN set -e \
-    && PQ_LIB_DIR=$(pwd)/pg_install/v${STABLE_PG_VERSION}/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment ${ADDITIONAL_RUSTFLAGS}" cargo build \
+    && PQ_LIB_DIR=$(pwd)/pg_install/v${STABLE_PG_VERSION}/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}" cargo build \
       --bin pg_sni_router  \
       --bin pageserver  \
       --bin pagectl  \

From ad7f14d526a4e7e5195ca5d8651672aae0c96d93 Mon Sep 17 00:00:00 2001
From: Alexander Bayandin <alexander@neon.tech>
Date: Mon, 6 Jan 2025 20:25:31 +0000
Subject: [PATCH 49/63] test_runner: update packages for Python 3.13 (#10285)

## Problem

It's impossible to run regression tests with Python 3.13 as some
dependencies don't support it (some of them are outdated, and `jsonnet`
doesn't support it at all yet)

## Summary of changes
- Update dependencies for Python 3.13
- Install `jsonnet` only on Python < 3.13 and skip relevant tests on
Python 3.13

Closes #10237
---
 poetry.lock                                 | 771 +++++++++++---------
 pyproject.toml                              |  11 +-
 test_runner/regress/test_compute_metrics.py |   9 +-
 3 files changed, 459 insertions(+), 332 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 072bf9a5e9..5f15223dca 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -239,60 +239,66 @@ files = [
 
 [[package]]
 name = "asyncpg"
-version = "0.29.0"
+version = "0.30.0"
 description = "An asyncio PostgreSQL driver"
 optional = false
 python-versions = ">=3.8.0"
 files = [
-    {file = "asyncpg-0.29.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72fd0ef9f00aeed37179c62282a3d14262dbbafb74ec0ba16e1b1864d8a12169"},
-    {file = "asyncpg-0.29.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:52e8f8f9ff6e21f9b39ca9f8e3e33a5fcdceaf5667a8c5c32bee158e313be385"},
-    {file = "asyncpg-0.29.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9e6823a7012be8b68301342ba33b4740e5a166f6bbda0aee32bc01638491a22"},
-    {file = "asyncpg-0.29.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:746e80d83ad5d5464cfbf94315eb6744222ab00aa4e522b704322fb182b83610"},
-    {file = "asyncpg-0.29.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:ff8e8109cd6a46ff852a5e6bab8b0a047d7ea42fcb7ca5ae6eaae97d8eacf397"},
-    {file = "asyncpg-0.29.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:97eb024685b1d7e72b1972863de527c11ff87960837919dac6e34754768098eb"},
-    {file = "asyncpg-0.29.0-cp310-cp310-win32.whl", hash = "sha256:5bbb7f2cafd8d1fa3e65431833de2642f4b2124be61a449fa064e1a08d27e449"},
-    {file = "asyncpg-0.29.0-cp310-cp310-win_amd64.whl", hash = "sha256:76c3ac6530904838a4b650b2880f8e7af938ee049e769ec2fba7cd66469d7772"},
-    {file = "asyncpg-0.29.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4900ee08e85af01adb207519bb4e14b1cae8fd21e0ccf80fac6aa60b6da37b4"},
-    {file = "asyncpg-0.29.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a65c1dcd820d5aea7c7d82a3fdcb70e096f8f70d1a8bf93eb458e49bfad036ac"},
-    {file = "asyncpg-0.29.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b52e46f165585fd6af4863f268566668407c76b2c72d366bb8b522fa66f1870"},
-    {file = "asyncpg-0.29.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc600ee8ef3dd38b8d67421359779f8ccec30b463e7aec7ed481c8346decf99f"},
-    {file = "asyncpg-0.29.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:039a261af4f38f949095e1e780bae84a25ffe3e370175193174eb08d3cecab23"},
-    {file = "asyncpg-0.29.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6feaf2d8f9138d190e5ec4390c1715c3e87b37715cd69b2c3dfca616134efd2b"},
-    {file = "asyncpg-0.29.0-cp311-cp311-win32.whl", hash = "sha256:1e186427c88225ef730555f5fdda6c1812daa884064bfe6bc462fd3a71c4b675"},
-    {file = "asyncpg-0.29.0-cp311-cp311-win_amd64.whl", hash = "sha256:cfe73ffae35f518cfd6e4e5f5abb2618ceb5ef02a2365ce64f132601000587d3"},
-    {file = "asyncpg-0.29.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6011b0dc29886ab424dc042bf9eeb507670a3b40aece3439944006aafe023178"},
-    {file = "asyncpg-0.29.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b544ffc66b039d5ec5a7454667f855f7fec08e0dfaf5a5490dfafbb7abbd2cfb"},
-    {file = "asyncpg-0.29.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d84156d5fb530b06c493f9e7635aa18f518fa1d1395ef240d211cb563c4e2364"},
-    {file = "asyncpg-0.29.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54858bc25b49d1114178d65a88e48ad50cb2b6f3e475caa0f0c092d5f527c106"},
-    {file = "asyncpg-0.29.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bde17a1861cf10d5afce80a36fca736a86769ab3579532c03e45f83ba8a09c59"},
-    {file = "asyncpg-0.29.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:37a2ec1b9ff88d8773d3eb6d3784dc7e3fee7756a5317b67f923172a4748a175"},
-    {file = "asyncpg-0.29.0-cp312-cp312-win32.whl", hash = "sha256:bb1292d9fad43112a85e98ecdc2e051602bce97c199920586be83254d9dafc02"},
-    {file = "asyncpg-0.29.0-cp312-cp312-win_amd64.whl", hash = "sha256:2245be8ec5047a605e0b454c894e54bf2ec787ac04b1cb7e0d3c67aa1e32f0fe"},
-    {file = "asyncpg-0.29.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0009a300cae37b8c525e5b449233d59cd9868fd35431abc470a3e364d2b85cb9"},
-    {file = "asyncpg-0.29.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5cad1324dbb33f3ca0cd2074d5114354ed3be2b94d48ddfd88af75ebda7c43cc"},
-    {file = "asyncpg-0.29.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:012d01df61e009015944ac7543d6ee30c2dc1eb2f6b10b62a3f598beb6531548"},
-    {file = "asyncpg-0.29.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:000c996c53c04770798053e1730d34e30cb645ad95a63265aec82da9093d88e7"},
-    {file = "asyncpg-0.29.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e0bfe9c4d3429706cf70d3249089de14d6a01192d617e9093a8e941fea8ee775"},
-    {file = "asyncpg-0.29.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:642a36eb41b6313ffa328e8a5c5c2b5bea6ee138546c9c3cf1bffaad8ee36dd9"},
-    {file = "asyncpg-0.29.0-cp38-cp38-win32.whl", hash = "sha256:a921372bbd0aa3a5822dd0409da61b4cd50df89ae85150149f8c119f23e8c408"},
-    {file = "asyncpg-0.29.0-cp38-cp38-win_amd64.whl", hash = "sha256:103aad2b92d1506700cbf51cd8bb5441e7e72e87a7b3a2ca4e32c840f051a6a3"},
-    {file = "asyncpg-0.29.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5340dd515d7e52f4c11ada32171d87c05570479dc01dc66d03ee3e150fb695da"},
-    {file = "asyncpg-0.29.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e17b52c6cf83e170d3d865571ba574577ab8e533e7361a2b8ce6157d02c665d3"},
-    {file = "asyncpg-0.29.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f100d23f273555f4b19b74a96840aa27b85e99ba4b1f18d4ebff0734e78dc090"},
-    {file = "asyncpg-0.29.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48e7c58b516057126b363cec8ca02b804644fd012ef8e6c7e23386b7d5e6ce83"},
-    {file = "asyncpg-0.29.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f9ea3f24eb4c49a615573724d88a48bd1b7821c890c2effe04f05382ed9e8810"},
-    {file = "asyncpg-0.29.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8d36c7f14a22ec9e928f15f92a48207546ffe68bc412f3be718eedccdf10dc5c"},
-    {file = "asyncpg-0.29.0-cp39-cp39-win32.whl", hash = "sha256:797ab8123ebaed304a1fad4d7576d5376c3a006a4100380fb9d517f0b59c1ab2"},
-    {file = "asyncpg-0.29.0-cp39-cp39-win_amd64.whl", hash = "sha256:cce08a178858b426ae1aa8409b5cc171def45d4293626e7aa6510696d46decd8"},
-    {file = "asyncpg-0.29.0.tar.gz", hash = "sha256:d1c49e1f44fffafd9a55e1a9b101590859d881d639ea2922516f5d9c512d354e"},
+    {file = "asyncpg-0.30.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bfb4dd5ae0699bad2b233672c8fc5ccbd9ad24b89afded02341786887e37927e"},
+    {file = "asyncpg-0.30.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dc1f62c792752a49f88b7e6f774c26077091b44caceb1983509edc18a2222ec0"},
+    {file = "asyncpg-0.30.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3152fef2e265c9c24eec4ee3d22b4f4d2703d30614b0b6753e9ed4115c8a146f"},
+    {file = "asyncpg-0.30.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c7255812ac85099a0e1ffb81b10dc477b9973345793776b128a23e60148dd1af"},
+    {file = "asyncpg-0.30.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:578445f09f45d1ad7abddbff2a3c7f7c291738fdae0abffbeb737d3fc3ab8b75"},
+    {file = "asyncpg-0.30.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c42f6bb65a277ce4d93f3fba46b91a265631c8df7250592dd4f11f8b0152150f"},
+    {file = "asyncpg-0.30.0-cp310-cp310-win32.whl", hash = "sha256:aa403147d3e07a267ada2ae34dfc9324e67ccc4cdca35261c8c22792ba2b10cf"},
+    {file = "asyncpg-0.30.0-cp310-cp310-win_amd64.whl", hash = "sha256:fb622c94db4e13137c4c7f98834185049cc50ee01d8f657ef898b6407c7b9c50"},
+    {file = "asyncpg-0.30.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5e0511ad3dec5f6b4f7a9e063591d407eee66b88c14e2ea636f187da1dcfff6a"},
+    {file = "asyncpg-0.30.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:915aeb9f79316b43c3207363af12d0e6fd10776641a7de8a01212afd95bdf0ed"},
+    {file = "asyncpg-0.30.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c198a00cce9506fcd0bf219a799f38ac7a237745e1d27f0e1f66d3707c84a5a"},
+    {file = "asyncpg-0.30.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3326e6d7381799e9735ca2ec9fd7be4d5fef5dcbc3cb555d8a463d8460607956"},
+    {file = "asyncpg-0.30.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:51da377487e249e35bd0859661f6ee2b81db11ad1f4fc036194bc9cb2ead5056"},
+    {file = "asyncpg-0.30.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bc6d84136f9c4d24d358f3b02be4b6ba358abd09f80737d1ac7c444f36108454"},
+    {file = "asyncpg-0.30.0-cp311-cp311-win32.whl", hash = "sha256:574156480df14f64c2d76450a3f3aaaf26105869cad3865041156b38459e935d"},
+    {file = "asyncpg-0.30.0-cp311-cp311-win_amd64.whl", hash = "sha256:3356637f0bd830407b5597317b3cb3571387ae52ddc3bca6233682be88bbbc1f"},
+    {file = "asyncpg-0.30.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c902a60b52e506d38d7e80e0dd5399f657220f24635fee368117b8b5fce1142e"},
+    {file = "asyncpg-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aca1548e43bbb9f0f627a04666fedaca23db0a31a84136ad1f868cb15deb6e3a"},
+    {file = "asyncpg-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c2a2ef565400234a633da0eafdce27e843836256d40705d83ab7ec42074efb3"},
+    {file = "asyncpg-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1292b84ee06ac8a2ad8e51c7475aa309245874b61333d97411aab835c4a2f737"},
+    {file = "asyncpg-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0f5712350388d0cd0615caec629ad53c81e506b1abaaf8d14c93f54b35e3595a"},
+    {file = "asyncpg-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:db9891e2d76e6f425746c5d2da01921e9a16b5a71a1c905b13f30e12a257c4af"},
+    {file = "asyncpg-0.30.0-cp312-cp312-win32.whl", hash = "sha256:68d71a1be3d83d0570049cd1654a9bdfe506e794ecc98ad0873304a9f35e411e"},
+    {file = "asyncpg-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:9a0292c6af5c500523949155ec17b7fe01a00ace33b68a476d6b5059f9630305"},
+    {file = "asyncpg-0.30.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:05b185ebb8083c8568ea8a40e896d5f7af4b8554b64d7719c0eaa1eb5a5c3a70"},
+    {file = "asyncpg-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c47806b1a8cbb0a0db896f4cd34d89942effe353a5035c62734ab13b9f938da3"},
+    {file = "asyncpg-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b6fde867a74e8c76c71e2f64f80c64c0f3163e687f1763cfaf21633ec24ec33"},
+    {file = "asyncpg-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46973045b567972128a27d40001124fbc821c87a6cade040cfcd4fa8a30bcdc4"},
+    {file = "asyncpg-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9110df111cabc2ed81aad2f35394a00cadf4f2e0635603db6ebbd0fc896f46a4"},
+    {file = "asyncpg-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:04ff0785ae7eed6cc138e73fc67b8e51d54ee7a3ce9b63666ce55a0bf095f7ba"},
+    {file = "asyncpg-0.30.0-cp313-cp313-win32.whl", hash = "sha256:ae374585f51c2b444510cdf3595b97ece4f233fde739aa14b50e0d64e8a7a590"},
+    {file = "asyncpg-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:f59b430b8e27557c3fb9869222559f7417ced18688375825f8f12302c34e915e"},
+    {file = "asyncpg-0.30.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:29ff1fc8b5bf724273782ff8b4f57b0f8220a1b2324184846b39d1ab4122031d"},
+    {file = "asyncpg-0.30.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:64e899bce0600871b55368b8483e5e3e7f1860c9482e7f12e0a771e747988168"},
+    {file = "asyncpg-0.30.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b290f4726a887f75dcd1b3006f484252db37602313f806e9ffc4e5996cfe5cb"},
+    {file = "asyncpg-0.30.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f86b0e2cd3f1249d6fe6fd6cfe0cd4538ba994e2d8249c0491925629b9104d0f"},
+    {file = "asyncpg-0.30.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:393af4e3214c8fa4c7b86da6364384c0d1b3298d45803375572f415b6f673f38"},
+    {file = "asyncpg-0.30.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:fd4406d09208d5b4a14db9a9dbb311b6d7aeeab57bded7ed2f8ea41aeef39b34"},
+    {file = "asyncpg-0.30.0-cp38-cp38-win32.whl", hash = "sha256:0b448f0150e1c3b96cb0438a0d0aa4871f1472e58de14a3ec320dbb2798fb0d4"},
+    {file = "asyncpg-0.30.0-cp38-cp38-win_amd64.whl", hash = "sha256:f23b836dd90bea21104f69547923a02b167d999ce053f3d502081acea2fba15b"},
+    {file = "asyncpg-0.30.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6f4e83f067b35ab5e6371f8a4c93296e0439857b4569850b178a01385e82e9ad"},
+    {file = "asyncpg-0.30.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5df69d55add4efcd25ea2a3b02025b669a285b767bfbf06e356d68dbce4234ff"},
+    {file = "asyncpg-0.30.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3479a0d9a852c7c84e822c073622baca862d1217b10a02dd57ee4a7a081f708"},
+    {file = "asyncpg-0.30.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26683d3b9a62836fad771a18ecf4659a30f348a561279d6227dab96182f46144"},
+    {file = "asyncpg-0.30.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:1b982daf2441a0ed314bd10817f1606f1c28b1136abd9e4f11335358c2c631cb"},
+    {file = "asyncpg-0.30.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1c06a3a50d014b303e5f6fc1e5f95eb28d2cee89cf58384b700da621e5d5e547"},
+    {file = "asyncpg-0.30.0-cp39-cp39-win32.whl", hash = "sha256:1b11a555a198b08f5c4baa8f8231c74a366d190755aa4f99aacec5970afe929a"},
+    {file = "asyncpg-0.30.0-cp39-cp39-win_amd64.whl", hash = "sha256:8b684a3c858a83cd876f05958823b68e8d14ec01bb0c0d14a6704c5bf9711773"},
+    {file = "asyncpg-0.30.0.tar.gz", hash = "sha256:c551e9928ab6707602f44811817f82ba3c446e018bfe1d3abecc8ba5f3eac851"},
 ]
 
-[package.dependencies]
-async-timeout = {version = ">=4.0.3", markers = "python_version < \"3.12.0\""}
-
 [package.extras]
-docs = ["Sphinx (>=5.3.0,<5.4.0)", "sphinx-rtd-theme (>=1.2.2)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"]
-test = ["flake8 (>=6.1,<7.0)", "uvloop (>=0.15.3)"]
+docs = ["Sphinx (>=8.1.3,<8.2.0)", "sphinx-rtd-theme (>=1.2.2)"]
+gssauth = ["gssapi", "sspilib"]
+test = ["distro (>=1.9.0,<1.10.0)", "flake8 (>=6.1,<7.0)", "flake8-pyi (>=24.1.0,<24.2.0)", "gssapi", "k5test", "mypy (>=1.8.0,<1.9.0)", "sspilib", "uvloop (>=0.15.3)"]
 
 [[package]]
 name = "attrs"
@@ -766,75 +772,78 @@ files = [
 
 [[package]]
 name = "cffi"
-version = "1.15.1"
+version = "1.17.1"
 description = "Foreign Function Interface for Python calling C code."
 optional = false
-python-versions = "*"
+python-versions = ">=3.8"
 files = [
-    {file = "cffi-1.15.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a66d3508133af6e8548451b25058d5812812ec3798c886bf38ed24a98216fab2"},
-    {file = "cffi-1.15.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:470c103ae716238bbe698d67ad020e1db9d9dba34fa5a899b5e21577e6d52ed2"},
-    {file = "cffi-1.15.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:9ad5db27f9cabae298d151c85cf2bad1d359a1b9c686a275df03385758e2f914"},
-    {file = "cffi-1.15.1-cp27-cp27m-win32.whl", hash = "sha256:b3bbeb01c2b273cca1e1e0c5df57f12dce9a4dd331b4fa1635b8bec26350bde3"},
-    {file = "cffi-1.15.1-cp27-cp27m-win_amd64.whl", hash = "sha256:e00b098126fd45523dd056d2efba6c5a63b71ffe9f2bbe1a4fe1716e1d0c331e"},
-    {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:d61f4695e6c866a23a21acab0509af1cdfd2c013cf256bbf5b6b5e2695827162"},
-    {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:ed9cb427ba5504c1dc15ede7d516b84757c3e3d7868ccc85121d9310d27eed0b"},
-    {file = "cffi-1.15.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:39d39875251ca8f612b6f33e6b1195af86d1b3e60086068be9cc053aa4376e21"},
-    {file = "cffi-1.15.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:285d29981935eb726a4399badae8f0ffdff4f5050eaa6d0cfc3f64b857b77185"},
-    {file = "cffi-1.15.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3eb6971dcff08619f8d91607cfc726518b6fa2a9eba42856be181c6d0d9515fd"},
-    {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21157295583fe8943475029ed5abdcf71eb3911894724e360acff1d61c1d54bc"},
-    {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5635bd9cb9731e6d4a1132a498dd34f764034a8ce60cef4f5319c0541159392f"},
-    {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2012c72d854c2d03e45d06ae57f40d78e5770d252f195b93f581acf3ba44496e"},
-    {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd86c085fae2efd48ac91dd7ccffcfc0571387fe1193d33b6394db7ef31fe2a4"},
-    {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:fa6693661a4c91757f4412306191b6dc88c1703f780c8234035eac011922bc01"},
-    {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:59c0b02d0a6c384d453fece7566d1c7e6b7bae4fc5874ef2ef46d56776d61c9e"},
-    {file = "cffi-1.15.1-cp310-cp310-win32.whl", hash = "sha256:cba9d6b9a7d64d4bd46167096fc9d2f835e25d7e4c121fb2ddfc6528fb0413b2"},
-    {file = "cffi-1.15.1-cp310-cp310-win_amd64.whl", hash = "sha256:ce4bcc037df4fc5e3d184794f27bdaab018943698f4ca31630bc7f84a7b69c6d"},
-    {file = "cffi-1.15.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3d08afd128ddaa624a48cf2b859afef385b720bb4b43df214f85616922e6a5ac"},
-    {file = "cffi-1.15.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3799aecf2e17cf585d977b780ce79ff0dc9b78d799fc694221ce814c2c19db83"},
-    {file = "cffi-1.15.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a591fe9e525846e4d154205572a029f653ada1a78b93697f3b5a8f1f2bc055b9"},
-    {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3548db281cd7d2561c9ad9984681c95f7b0e38881201e157833a2342c30d5e8c"},
-    {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91fc98adde3d7881af9b59ed0294046f3806221863722ba7d8d120c575314325"},
-    {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94411f22c3985acaec6f83c6df553f2dbe17b698cc7f8ae751ff2237d96b9e3c"},
-    {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:03425bdae262c76aad70202debd780501fabeaca237cdfddc008987c0e0f59ef"},
-    {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cc4d65aeeaa04136a12677d3dd0b1c0c94dc43abac5860ab33cceb42b801c1e8"},
-    {file = "cffi-1.15.1-cp311-cp311-win32.whl", hash = "sha256:a0f100c8912c114ff53e1202d0078b425bee3649ae34d7b070e9697f93c5d52d"},
-    {file = "cffi-1.15.1-cp311-cp311-win_amd64.whl", hash = "sha256:04ed324bda3cda42b9b695d51bb7d54b680b9719cfab04227cdd1e04e5de3104"},
-    {file = "cffi-1.15.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50a74364d85fd319352182ef59c5c790484a336f6db772c1a9231f1c3ed0cbd7"},
-    {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e263d77ee3dd201c3a142934a086a4450861778baaeeb45db4591ef65550b0a6"},
-    {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cec7d9412a9102bdc577382c3929b337320c4c4c4849f2c5cdd14d7368c5562d"},
-    {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4289fc34b2f5316fbb762d75362931e351941fa95fa18789191b33fc4cf9504a"},
-    {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:173379135477dc8cac4bc58f45db08ab45d228b3363adb7af79436135d028405"},
-    {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6975a3fac6bc83c4a65c9f9fcab9e47019a11d3d2cf7f3c0d03431bf145a941e"},
-    {file = "cffi-1.15.1-cp36-cp36m-win32.whl", hash = "sha256:2470043b93ff09bf8fb1d46d1cb756ce6132c54826661a32d4e4d132e1977adf"},
-    {file = "cffi-1.15.1-cp36-cp36m-win_amd64.whl", hash = "sha256:30d78fbc8ebf9c92c9b7823ee18eb92f2e6ef79b45ac84db507f52fbe3ec4497"},
-    {file = "cffi-1.15.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:198caafb44239b60e252492445da556afafc7d1e3ab7a1fb3f0584ef6d742375"},
-    {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5ef34d190326c3b1f822a5b7a45f6c4535e2f47ed06fec77d3d799c450b2651e"},
-    {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8102eaf27e1e448db915d08afa8b41d6c7ca7a04b7d73af6514df10a3e74bd82"},
-    {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5df2768244d19ab7f60546d0c7c63ce1581f7af8b5de3eb3004b9b6fc8a9f84b"},
-    {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a8c4917bd7ad33e8eb21e9a5bbba979b49d9a97acb3a803092cbc1133e20343c"},
-    {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2642fe3142e4cc4af0799748233ad6da94c62a8bec3a6648bf8ee68b1c7426"},
-    {file = "cffi-1.15.1-cp37-cp37m-win32.whl", hash = "sha256:e229a521186c75c8ad9490854fd8bbdd9a0c9aa3a524326b55be83b54d4e0ad9"},
-    {file = "cffi-1.15.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a0b71b1b8fbf2b96e41c4d990244165e2c9be83d54962a9a1d118fd8657d2045"},
-    {file = "cffi-1.15.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:320dab6e7cb2eacdf0e658569d2575c4dad258c0fcc794f46215e1e39f90f2c3"},
-    {file = "cffi-1.15.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e74c6b51a9ed6589199c787bf5f9875612ca4a8a0785fb2d4a84429badaf22a"},
-    {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5c84c68147988265e60416b57fc83425a78058853509c1b0629c180094904a5"},
-    {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b926aa83d1edb5aa5b427b4053dc420ec295a08e40911296b9eb1b6170f6cca"},
-    {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:87c450779d0914f2861b8526e035c5e6da0a3199d8f1add1a665e1cbc6fc6d02"},
-    {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f2c9f67e9821cad2e5f480bc8d83b8742896f1242dba247911072d4fa94c192"},
-    {file = "cffi-1.15.1-cp38-cp38-win32.whl", hash = "sha256:8b7ee99e510d7b66cdb6c593f21c043c248537a32e0bedf02e01e9553a172314"},
-    {file = "cffi-1.15.1-cp38-cp38-win_amd64.whl", hash = "sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5"},
-    {file = "cffi-1.15.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:54a2db7b78338edd780e7ef7f9f6c442500fb0d41a5a4ea24fff1c929d5af585"},
-    {file = "cffi-1.15.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fcd131dd944808b5bdb38e6f5b53013c5aa4f334c5cad0c72742f6eba4b73db0"},
-    {file = "cffi-1.15.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7473e861101c9e72452f9bf8acb984947aa1661a7704553a9f6e4baa5ba64415"},
-    {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c9a799e985904922a4d207a94eae35c78ebae90e128f0c4e521ce339396be9d"},
-    {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3bcde07039e586f91b45c88f8583ea7cf7a0770df3a1649627bf598332cb6984"},
-    {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33ab79603146aace82c2427da5ca6e58f2b3f2fb5da893ceac0c42218a40be35"},
-    {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d598b938678ebf3c67377cdd45e09d431369c3b1a5b331058c338e201f12b27"},
-    {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db0fbb9c62743ce59a9ff687eb5f4afbe77e5e8403d6697f7446e5f609976f76"},
-    {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:98d85c6a2bef81588d9227dde12db8a7f47f639f4a17c9ae08e773aa9c697bf3"},
-    {file = "cffi-1.15.1-cp39-cp39-win32.whl", hash = "sha256:40f4774f5a9d4f5e344f31a32b5096977b5d48560c5592e2f3d2c4374bd543ee"},
-    {file = "cffi-1.15.1-cp39-cp39-win_amd64.whl", hash = "sha256:70df4e3b545a17496c9b3f41f5115e69a4f2e77e94e1d2a8e1070bc0c38c8a3c"},
-    {file = "cffi-1.15.1.tar.gz", hash = "sha256:d400bfb9a37b1351253cb402671cea7e89bdecc294e8016a707f6d1d8ac934f9"},
+    {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"},
+    {file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"},
+    {file = "cffi-1.17.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382"},
+    {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702"},
+    {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3"},
+    {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6"},
+    {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17"},
+    {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8"},
+    {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e"},
+    {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be"},
+    {file = "cffi-1.17.1-cp310-cp310-win32.whl", hash = "sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c"},
+    {file = "cffi-1.17.1-cp310-cp310-win_amd64.whl", hash = "sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15"},
+    {file = "cffi-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401"},
+    {file = "cffi-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf"},
+    {file = "cffi-1.17.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4"},
+    {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41"},
+    {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1"},
+    {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6"},
+    {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d"},
+    {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6"},
+    {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f"},
+    {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b"},
+    {file = "cffi-1.17.1-cp311-cp311-win32.whl", hash = "sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655"},
+    {file = "cffi-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0"},
+    {file = "cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4"},
+    {file = "cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c"},
+    {file = "cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36"},
+    {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5"},
+    {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff"},
+    {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99"},
+    {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93"},
+    {file = "cffi-1.17.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3"},
+    {file = "cffi-1.17.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8"},
+    {file = "cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65"},
+    {file = "cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903"},
+    {file = "cffi-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e"},
+    {file = "cffi-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2"},
+    {file = "cffi-1.17.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3"},
+    {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683"},
+    {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5"},
+    {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4"},
+    {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd"},
+    {file = "cffi-1.17.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed"},
+    {file = "cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9"},
+    {file = "cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d"},
+    {file = "cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a"},
+    {file = "cffi-1.17.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:636062ea65bd0195bc012fea9321aca499c0504409f413dc88af450b57ffd03b"},
+    {file = "cffi-1.17.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7eac2ef9b63c79431bc4b25f1cd649d7f061a28808cbc6c47b534bd789ef964"},
+    {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e221cf152cff04059d011ee126477f0d9588303eb57e88923578ace7baad17f9"},
+    {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:31000ec67d4221a71bd3f67df918b1f88f676f1c3b535a7eb473255fdc0b83fc"},
+    {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f17be4345073b0a7b8ea599688f692ac3ef23ce28e5df79c04de519dbc4912c"},
+    {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1"},
+    {file = "cffi-1.17.1-cp38-cp38-win32.whl", hash = "sha256:7596d6620d3fa590f677e9ee430df2958d2d6d6de2feeae5b20e82c00b76fbf8"},
+    {file = "cffi-1.17.1-cp38-cp38-win_amd64.whl", hash = "sha256:78122be759c3f8a014ce010908ae03364d00a1f81ab5c7f4a7a5120607ea56e1"},
+    {file = "cffi-1.17.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16"},
+    {file = "cffi-1.17.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36"},
+    {file = "cffi-1.17.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8"},
+    {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca74b8dbe6e8e8263c0ffd60277de77dcee6c837a3d0881d8c1ead7268c9e576"},
+    {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87"},
+    {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98e3969bcff97cae1b2def8ba499ea3d6f31ddfdb7635374834cf89a1a08ecf0"},
+    {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdf5ce3acdfd1661132f2a9c19cac174758dc2352bfe37d98aa7512c6b7178b3"},
+    {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9755e4345d1ec879e3849e62222a18c7174d65a6a92d5b346b1863912168b595"},
+    {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f1e22e8c4419538cb197e4dd60acc919d7696e5ef98ee4da4e01d3f8cfa4cc5a"},
+    {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c03e868a0b3bc35839ba98e74211ed2b05d2119be4e8a0f224fba9384f1fe02e"},
+    {file = "cffi-1.17.1-cp39-cp39-win32.whl", hash = "sha256:e31ae45bc2e29f6b2abd0de1cc3b9d5205aa847cafaecb8af1476a609a2f6eb7"},
+    {file = "cffi-1.17.1-cp39-cp39-win_amd64.whl", hash = "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662"},
+    {file = "cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824"},
 ]
 
 [package.dependencies]
@@ -1114,72 +1123,103 @@ Flask = ">=0.9"
 
 [[package]]
 name = "frozenlist"
-version = "1.4.0"
+version = "1.5.0"
 description = "A list-like structure which implements collections.abc.MutableSequence"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:764226ceef3125e53ea2cb275000e309c0aa5464d43bd72abd661e27fffc26ab"},
-    {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d6484756b12f40003c6128bfcc3fa9f0d49a687e171186c2d85ec82e3758c559"},
-    {file = "frozenlist-1.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9ac08e601308e41eb533f232dbf6b7e4cea762f9f84f6357136eed926c15d12c"},
-    {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d081f13b095d74b67d550de04df1c756831f3b83dc9881c38985834387487f1b"},
-    {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:71932b597f9895f011f47f17d6428252fc728ba2ae6024e13c3398a087c2cdea"},
-    {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:981b9ab5a0a3178ff413bca62526bb784249421c24ad7381e39d67981be2c326"},
-    {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e41f3de4df3e80de75845d3e743b3f1c4c8613c3997a912dbf0229fc61a8b963"},
-    {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6918d49b1f90821e93069682c06ffde41829c346c66b721e65a5c62b4bab0300"},
-    {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0e5c8764c7829343d919cc2dfc587a8db01c4f70a4ebbc49abde5d4b158b007b"},
-    {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8d0edd6b1c7fb94922bf569c9b092ee187a83f03fb1a63076e7774b60f9481a8"},
-    {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e29cda763f752553fa14c68fb2195150bfab22b352572cb36c43c47bedba70eb"},
-    {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:0c7c1b47859ee2cac3846fde1c1dc0f15da6cec5a0e5c72d101e0f83dcb67ff9"},
-    {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:901289d524fdd571be1c7be054f48b1f88ce8dddcbdf1ec698b27d4b8b9e5d62"},
-    {file = "frozenlist-1.4.0-cp310-cp310-win32.whl", hash = "sha256:1a0848b52815006ea6596c395f87449f693dc419061cc21e970f139d466dc0a0"},
-    {file = "frozenlist-1.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:b206646d176a007466358aa21d85cd8600a415c67c9bd15403336c331a10d956"},
-    {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:de343e75f40e972bae1ef6090267f8260c1446a1695e77096db6cfa25e759a95"},
-    {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ad2a9eb6d9839ae241701d0918f54c51365a51407fd80f6b8289e2dfca977cc3"},
-    {file = "frozenlist-1.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bd7bd3b3830247580de99c99ea2a01416dfc3c34471ca1298bccabf86d0ff4dc"},
-    {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bdf1847068c362f16b353163391210269e4f0569a3c166bc6a9f74ccbfc7e839"},
-    {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:38461d02d66de17455072c9ba981d35f1d2a73024bee7790ac2f9e361ef1cd0c"},
-    {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5a32087d720c608f42caed0ef36d2b3ea61a9d09ee59a5142d6070da9041b8f"},
-    {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dd65632acaf0d47608190a71bfe46b209719bf2beb59507db08ccdbe712f969b"},
-    {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:261b9f5d17cac914531331ff1b1d452125bf5daa05faf73b71d935485b0c510b"},
-    {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b89ac9768b82205936771f8d2eb3ce88503b1556324c9f903e7156669f521472"},
-    {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:008eb8b31b3ea6896da16c38c1b136cb9fec9e249e77f6211d479db79a4eaf01"},
-    {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e74b0506fa5aa5598ac6a975a12aa8928cbb58e1f5ac8360792ef15de1aa848f"},
-    {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:490132667476f6781b4c9458298b0c1cddf237488abd228b0b3650e5ecba7467"},
-    {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:76d4711f6f6d08551a7e9ef28c722f4a50dd0fc204c56b4bcd95c6cc05ce6fbb"},
-    {file = "frozenlist-1.4.0-cp311-cp311-win32.whl", hash = "sha256:a02eb8ab2b8f200179b5f62b59757685ae9987996ae549ccf30f983f40602431"},
-    {file = "frozenlist-1.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:515e1abc578dd3b275d6a5114030b1330ba044ffba03f94091842852f806f1c1"},
-    {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f0ed05f5079c708fe74bf9027e95125334b6978bf07fd5ab923e9e55e5fbb9d3"},
-    {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ca265542ca427bf97aed183c1676e2a9c66942e822b14dc6e5f42e038f92a503"},
-    {file = "frozenlist-1.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:491e014f5c43656da08958808588cc6c016847b4360e327a62cb308c791bd2d9"},
-    {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17ae5cd0f333f94f2e03aaf140bb762c64783935cc764ff9c82dff626089bebf"},
-    {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e78fb68cf9c1a6aa4a9a12e960a5c9dfbdb89b3695197aa7064705662515de2"},
-    {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5655a942f5f5d2c9ed93d72148226d75369b4f6952680211972a33e59b1dfdc"},
-    {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c11b0746f5d946fecf750428a95f3e9ebe792c1ee3b1e96eeba145dc631a9672"},
-    {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e66d2a64d44d50d2543405fb183a21f76b3b5fd16f130f5c99187c3fb4e64919"},
-    {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:88f7bc0fcca81f985f78dd0fa68d2c75abf8272b1f5c323ea4a01a4d7a614efc"},
-    {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5833593c25ac59ede40ed4de6d67eb42928cca97f26feea219f21d0ed0959b79"},
-    {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:fec520865f42e5c7f050c2a79038897b1c7d1595e907a9e08e3353293ffc948e"},
-    {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:b826d97e4276750beca7c8f0f1a4938892697a6bcd8ec8217b3312dad6982781"},
-    {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ceb6ec0a10c65540421e20ebd29083c50e6d1143278746a4ef6bcf6153171eb8"},
-    {file = "frozenlist-1.4.0-cp38-cp38-win32.whl", hash = "sha256:2b8bcf994563466db019fab287ff390fffbfdb4f905fc77bc1c1d604b1c689cc"},
-    {file = "frozenlist-1.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:a6c8097e01886188e5be3e6b14e94ab365f384736aa1fca6a0b9e35bd4a30bc7"},
-    {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6c38721585f285203e4b4132a352eb3daa19121a035f3182e08e437cface44bf"},
-    {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a0c6da9aee33ff0b1a451e867da0c1f47408112b3391dd43133838339e410963"},
-    {file = "frozenlist-1.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:93ea75c050c5bb3d98016b4ba2497851eadf0ac154d88a67d7a6816206f6fa7f"},
-    {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f61e2dc5ad442c52b4887f1fdc112f97caeff4d9e6ebe78879364ac59f1663e1"},
-    {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa384489fefeb62321b238e64c07ef48398fe80f9e1e6afeff22e140e0850eef"},
-    {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:10ff5faaa22786315ef57097a279b833ecab1a0bfb07d604c9cbb1c4cdc2ed87"},
-    {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:007df07a6e3eb3e33e9a1fe6a9db7af152bbd8a185f9aaa6ece10a3529e3e1c6"},
-    {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f4f399d28478d1f604c2ff9119907af9726aed73680e5ed1ca634d377abb087"},
-    {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c5374b80521d3d3f2ec5572e05adc94601985cc526fb276d0c8574a6d749f1b3"},
-    {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ce31ae3e19f3c902de379cf1323d90c649425b86de7bbdf82871b8a2a0615f3d"},
-    {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7211ef110a9194b6042449431e08c4d80c0481e5891e58d429df5899690511c2"},
-    {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:556de4430ce324c836789fa4560ca62d1591d2538b8ceb0b4f68fb7b2384a27a"},
-    {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7645a8e814a3ee34a89c4a372011dcd817964ce8cb273c8ed6119d706e9613e3"},
-    {file = "frozenlist-1.4.0-cp39-cp39-win32.whl", hash = "sha256:19488c57c12d4e8095a922f328df3f179c820c212940a498623ed39160bc3c2f"},
-    {file = "frozenlist-1.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:6221d84d463fb110bdd7619b69cb43878a11d51cbb9394ae3105d082d5199167"},
-    {file = "frozenlist-1.4.0.tar.gz", hash = "sha256:09163bdf0b2907454042edb19f887c6d33806adc71fbd54afc14908bfdc22251"},
+    {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5b6a66c18b5b9dd261ca98dffcb826a525334b2f29e7caa54e182255c5f6a65a"},
+    {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d1b3eb7b05ea246510b43a7e53ed1653e55c2121019a97e60cad7efb881a97bb"},
+    {file = "frozenlist-1.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:15538c0cbf0e4fa11d1e3a71f823524b0c46299aed6e10ebb4c2089abd8c3bec"},
+    {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e79225373c317ff1e35f210dd5f1344ff31066ba8067c307ab60254cd3a78ad5"},
+    {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9272fa73ca71266702c4c3e2d4a28553ea03418e591e377a03b8e3659d94fa76"},
+    {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:498524025a5b8ba81695761d78c8dd7382ac0b052f34e66939c42df860b8ff17"},
+    {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:92b5278ed9d50fe610185ecd23c55d8b307d75ca18e94c0e7de328089ac5dcba"},
+    {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f3c8c1dacd037df16e85227bac13cca58c30da836c6f936ba1df0c05d046d8d"},
+    {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f2ac49a9bedb996086057b75bf93538240538c6d9b38e57c82d51f75a73409d2"},
+    {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e66cc454f97053b79c2ab09c17fbe3c825ea6b4de20baf1be28919460dd7877f"},
+    {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:5a3ba5f9a0dfed20337d3e966dc359784c9f96503674c2faf015f7fe8e96798c"},
+    {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:6321899477db90bdeb9299ac3627a6a53c7399c8cd58d25da094007402b039ab"},
+    {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:76e4753701248476e6286f2ef492af900ea67d9706a0155335a40ea21bf3b2f5"},
+    {file = "frozenlist-1.5.0-cp310-cp310-win32.whl", hash = "sha256:977701c081c0241d0955c9586ffdd9ce44f7a7795df39b9151cd9a6fd0ce4cfb"},
+    {file = "frozenlist-1.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:189f03b53e64144f90990d29a27ec4f7997d91ed3d01b51fa39d2dbe77540fd4"},
+    {file = "frozenlist-1.5.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:fd74520371c3c4175142d02a976aee0b4cb4a7cc912a60586ffd8d5929979b30"},
+    {file = "frozenlist-1.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2f3f7a0fbc219fb4455264cae4d9f01ad41ae6ee8524500f381de64ffaa077d5"},
+    {file = "frozenlist-1.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f47c9c9028f55a04ac254346e92977bf0f166c483c74b4232bee19a6697e4778"},
+    {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0996c66760924da6e88922756d99b47512a71cfd45215f3570bf1e0b694c206a"},
+    {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a2fe128eb4edeabe11896cb6af88fca5346059f6c8d807e3b910069f39157869"},
+    {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1a8ea951bbb6cacd492e3948b8da8c502a3f814f5d20935aae74b5df2b19cf3d"},
+    {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:de537c11e4aa01d37db0d403b57bd6f0546e71a82347a97c6a9f0dcc532b3a45"},
+    {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c2623347b933fcb9095841f1cc5d4ff0b278addd743e0e966cb3d460278840d"},
+    {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cee6798eaf8b1416ef6909b06f7dc04b60755206bddc599f52232606e18179d3"},
+    {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f5f9da7f5dbc00a604fe74aa02ae7c98bcede8a3b8b9666f9f86fc13993bc71a"},
+    {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:90646abbc7a5d5c7c19461d2e3eeb76eb0b204919e6ece342feb6032c9325ae9"},
+    {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:bdac3c7d9b705d253b2ce370fde941836a5f8b3c5c2b8fd70940a3ea3af7f4f2"},
+    {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:03d33c2ddbc1816237a67f66336616416e2bbb6beb306e5f890f2eb22b959cdf"},
+    {file = "frozenlist-1.5.0-cp311-cp311-win32.whl", hash = "sha256:237f6b23ee0f44066219dae14c70ae38a63f0440ce6750f868ee08775073f942"},
+    {file = "frozenlist-1.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:0cc974cc93d32c42e7b0f6cf242a6bd941c57c61b618e78b6c0a96cb72788c1d"},
+    {file = "frozenlist-1.5.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:31115ba75889723431aa9a4e77d5f398f5cf976eea3bdf61749731f62d4a4a21"},
+    {file = "frozenlist-1.5.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7437601c4d89d070eac8323f121fcf25f88674627505334654fd027b091db09d"},
+    {file = "frozenlist-1.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7948140d9f8ece1745be806f2bfdf390127cf1a763b925c4a805c603df5e697e"},
+    {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:feeb64bc9bcc6b45c6311c9e9b99406660a9c05ca8a5b30d14a78555088b0b3a"},
+    {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:683173d371daad49cffb8309779e886e59c2f369430ad28fe715f66d08d4ab1a"},
+    {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7d57d8f702221405a9d9b40f9da8ac2e4a1a8b5285aac6100f3393675f0a85ee"},
+    {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30c72000fbcc35b129cb09956836c7d7abf78ab5416595e4857d1cae8d6251a6"},
+    {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:000a77d6034fbad9b6bb880f7ec073027908f1b40254b5d6f26210d2dab1240e"},
+    {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5d7f5a50342475962eb18b740f3beecc685a15b52c91f7d975257e13e029eca9"},
+    {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:87f724d055eb4785d9be84e9ebf0f24e392ddfad00b3fe036e43f489fafc9039"},
+    {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:6e9080bb2fb195a046e5177f10d9d82b8a204c0736a97a153c2466127de87784"},
+    {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9b93d7aaa36c966fa42efcaf716e6b3900438632a626fb09c049f6a2f09fc631"},
+    {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:52ef692a4bc60a6dd57f507429636c2af8b6046db8b31b18dac02cbc8f507f7f"},
+    {file = "frozenlist-1.5.0-cp312-cp312-win32.whl", hash = "sha256:29d94c256679247b33a3dc96cce0f93cbc69c23bf75ff715919332fdbb6a32b8"},
+    {file = "frozenlist-1.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:8969190d709e7c48ea386db202d708eb94bdb29207a1f269bab1196ce0dcca1f"},
+    {file = "frozenlist-1.5.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:7a1a048f9215c90973402e26c01d1cff8a209e1f1b53f72b95c13db61b00f953"},
+    {file = "frozenlist-1.5.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:dd47a5181ce5fcb463b5d9e17ecfdb02b678cca31280639255ce9d0e5aa67af0"},
+    {file = "frozenlist-1.5.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1431d60b36d15cda188ea222033eec8e0eab488f39a272461f2e6d9e1a8e63c2"},
+    {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6482a5851f5d72767fbd0e507e80737f9c8646ae7fd303def99bfe813f76cf7f"},
+    {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:44c49271a937625619e862baacbd037a7ef86dd1ee215afc298a417ff3270608"},
+    {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:12f78f98c2f1c2429d42e6a485f433722b0061d5c0b0139efa64f396efb5886b"},
+    {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce3aa154c452d2467487765e3adc730a8c153af77ad84096bc19ce19a2400840"},
+    {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b7dc0c4338e6b8b091e8faf0db3168a37101943e687f373dce00959583f7439"},
+    {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:45e0896250900b5aa25180f9aec243e84e92ac84bd4a74d9ad4138ef3f5c97de"},
+    {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:561eb1c9579d495fddb6da8959fd2a1fca2c6d060d4113f5844b433fc02f2641"},
+    {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:df6e2f325bfee1f49f81aaac97d2aa757c7646534a06f8f577ce184afe2f0a9e"},
+    {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:140228863501b44b809fb39ec56b5d4071f4d0aa6d216c19cbb08b8c5a7eadb9"},
+    {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7707a25d6a77f5d27ea7dc7d1fc608aa0a478193823f88511ef5e6b8a48f9d03"},
+    {file = "frozenlist-1.5.0-cp313-cp313-win32.whl", hash = "sha256:31a9ac2b38ab9b5a8933b693db4939764ad3f299fcaa931a3e605bc3460e693c"},
+    {file = "frozenlist-1.5.0-cp313-cp313-win_amd64.whl", hash = "sha256:11aabdd62b8b9c4b84081a3c246506d1cddd2dd93ff0ad53ede5defec7886b28"},
+    {file = "frozenlist-1.5.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:dd94994fc91a6177bfaafd7d9fd951bc8689b0a98168aa26b5f543868548d3ca"},
+    {file = "frozenlist-1.5.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2d0da8bbec082bf6bf18345b180958775363588678f64998c2b7609e34719b10"},
+    {file = "frozenlist-1.5.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:73f2e31ea8dd7df61a359b731716018c2be196e5bb3b74ddba107f694fbd7604"},
+    {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:828afae9f17e6de596825cf4228ff28fbdf6065974e5ac1410cecc22f699d2b3"},
+    {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f1577515d35ed5649d52ab4319db757bb881ce3b2b796d7283e6634d99ace307"},
+    {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2150cc6305a2c2ab33299453e2968611dacb970d2283a14955923062c8d00b10"},
+    {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a72b7a6e3cd2725eff67cd64c8f13335ee18fc3c7befc05aed043d24c7b9ccb9"},
+    {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c16d2fa63e0800723139137d667e1056bee1a1cf7965153d2d104b62855e9b99"},
+    {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:17dcc32fc7bda7ce5875435003220a457bcfa34ab7924a49a1c19f55b6ee185c"},
+    {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:97160e245ea33d8609cd2b8fd997c850b56db147a304a262abc2b3be021a9171"},
+    {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:f1e6540b7fa044eee0bb5111ada694cf3dc15f2b0347ca125ee9ca984d5e9e6e"},
+    {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:91d6c171862df0a6c61479d9724f22efb6109111017c87567cfeb7b5d1449fdf"},
+    {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c1fac3e2ace2eb1052e9f7c7db480818371134410e1f5c55d65e8f3ac6d1407e"},
+    {file = "frozenlist-1.5.0-cp38-cp38-win32.whl", hash = "sha256:b97f7b575ab4a8af9b7bc1d2ef7f29d3afee2226bd03ca3875c16451ad5a7723"},
+    {file = "frozenlist-1.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:374ca2dabdccad8e2a76d40b1d037f5bd16824933bf7bcea3e59c891fd4a0923"},
+    {file = "frozenlist-1.5.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:9bbcdfaf4af7ce002694a4e10a0159d5a8d20056a12b05b45cea944a4953f972"},
+    {file = "frozenlist-1.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1893f948bf6681733aaccf36c5232c231e3b5166d607c5fa77773611df6dc336"},
+    {file = "frozenlist-1.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2b5e23253bb709ef57a8e95e6ae48daa9ac5f265637529e4ce6b003a37b2621f"},
+    {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f253985bb515ecd89629db13cb58d702035ecd8cfbca7d7a7e29a0e6d39af5f"},
+    {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:04a5c6babd5e8fb7d3c871dc8b321166b80e41b637c31a995ed844a6139942b6"},
+    {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a9fe0f1c29ba24ba6ff6abf688cb0b7cf1efab6b6aa6adc55441773c252f7411"},
+    {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:226d72559fa19babe2ccd920273e767c96a49b9d3d38badd7c91a0fdeda8ea08"},
+    {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15b731db116ab3aedec558573c1a5eec78822b32292fe4f2f0345b7f697745c2"},
+    {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:366d8f93e3edfe5a918c874702f78faac300209a4d5bf38352b2c1bdc07a766d"},
+    {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1b96af8c582b94d381a1c1f51ffaedeb77c821c690ea5f01da3d70a487dd0a9b"},
+    {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:c03eff4a41bd4e38415cbed054bbaff4a075b093e2394b6915dca34a40d1e38b"},
+    {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:50cf5e7ee9b98f22bdecbabf3800ae78ddcc26e4a435515fc72d97903e8488e0"},
+    {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1e76bfbc72353269c44e0bc2cfe171900fbf7f722ad74c9a7b638052afe6a00c"},
+    {file = "frozenlist-1.5.0-cp39-cp39-win32.whl", hash = "sha256:666534d15ba8f0fda3f53969117383d5dc021266b3c1a42c9ec4855e4b58b9d3"},
+    {file = "frozenlist-1.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:5c28f4b5dbef8a0d8aad0d4de24d1e9e981728628afaf4ea0792f5d0939372f0"},
+    {file = "frozenlist-1.5.0-py3-none-any.whl", hash = "sha256:d994863bba198a4a518b467bb971c56e1db3f180a25c6cf7bb1949c267f748c3"},
+    {file = "frozenlist-1.5.0.tar.gz", hash = "sha256:81d5af29e61b9c8348e876d442253723928dce6433e0e76cd925cd83f1b4b817"},
 ]
 
 [[package]]
@@ -2295,109 +2335,131 @@ files = [
 
 [[package]]
 name = "pydantic"
-version = "2.7.1"
+version = "2.10.4"
 description = "Data validation using Python type hints"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pydantic-2.7.1-py3-none-any.whl", hash = "sha256:e029badca45266732a9a79898a15ae2e8b14840b1eabbb25844be28f0b33f3d5"},
-    {file = "pydantic-2.7.1.tar.gz", hash = "sha256:e9dbb5eada8abe4d9ae5f46b9939aead650cd2b68f249bb3a8139dbe125803cc"},
+    {file = "pydantic-2.10.4-py3-none-any.whl", hash = "sha256:597e135ea68be3a37552fb524bc7d0d66dcf93d395acd93a00682f1efcb8ee3d"},
+    {file = "pydantic-2.10.4.tar.gz", hash = "sha256:82f12e9723da6de4fe2ba888b5971157b3be7ad914267dea8f05f82b28254f06"},
 ]
 
 [package.dependencies]
-annotated-types = ">=0.4.0"
-pydantic-core = "2.18.2"
-typing-extensions = ">=4.6.1"
+annotated-types = ">=0.6.0"
+pydantic-core = "2.27.2"
+typing-extensions = ">=4.12.2"
 
 [package.extras]
 email = ["email-validator (>=2.0.0)"]
+timezone = ["tzdata"]
 
 [[package]]
 name = "pydantic-core"
-version = "2.18.2"
+version = "2.27.2"
 description = "Core functionality for Pydantic validation and serialization"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pydantic_core-2.18.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:9e08e867b306f525802df7cd16c44ff5ebbe747ff0ca6cf3fde7f36c05a59a81"},
-    {file = "pydantic_core-2.18.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f0a21cbaa69900cbe1a2e7cad2aa74ac3cf21b10c3efb0fa0b80305274c0e8a2"},
-    {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0680b1f1f11fda801397de52c36ce38ef1c1dc841a0927a94f226dea29c3ae3d"},
-    {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:95b9d5e72481d3780ba3442eac863eae92ae43a5f3adb5b4d0a1de89d42bb250"},
-    {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fcf5cd9c4b655ad666ca332b9a081112cd7a58a8b5a6ca7a3104bc950f2038"},
-    {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b5155ff768083cb1d62f3e143b49a8a3432e6789a3abee8acd005c3c7af1c74"},
-    {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:553ef617b6836fc7e4df130bb851e32fe357ce36336d897fd6646d6058d980af"},
-    {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b89ed9eb7d616ef5714e5590e6cf7f23b02d0d539767d33561e3675d6f9e3857"},
-    {file = "pydantic_core-2.18.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:75f7e9488238e920ab6204399ded280dc4c307d034f3924cd7f90a38b1829563"},
-    {file = "pydantic_core-2.18.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ef26c9e94a8c04a1b2924149a9cb081836913818e55681722d7f29af88fe7b38"},
-    {file = "pydantic_core-2.18.2-cp310-none-win32.whl", hash = "sha256:182245ff6b0039e82b6bb585ed55a64d7c81c560715d1bad0cbad6dfa07b4027"},
-    {file = "pydantic_core-2.18.2-cp310-none-win_amd64.whl", hash = "sha256:e23ec367a948b6d812301afc1b13f8094ab7b2c280af66ef450efc357d2ae543"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:219da3f096d50a157f33645a1cf31c0ad1fe829a92181dd1311022f986e5fbe3"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cc1cfd88a64e012b74e94cd00bbe0f9c6df57049c97f02bb07d39e9c852e19a4"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05b7133a6e6aeb8df37d6f413f7705a37ab4031597f64ab56384c94d98fa0e90"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:224c421235f6102e8737032483f43c1a8cfb1d2f45740c44166219599358c2cd"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b14d82cdb934e99dda6d9d60dc84a24379820176cc4a0d123f88df319ae9c150"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2728b01246a3bba6de144f9e3115b532ee44bd6cf39795194fb75491824a1413"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:470b94480bb5ee929f5acba6995251ada5e059a5ef3e0dfc63cca287283ebfa6"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:997abc4df705d1295a42f95b4eec4950a37ad8ae46d913caeee117b6b198811c"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:75250dbc5290e3f1a0f4618db35e51a165186f9034eff158f3d490b3fed9f8a0"},
-    {file = "pydantic_core-2.18.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4456f2dca97c425231d7315737d45239b2b51a50dc2b6f0c2bb181fce6207664"},
-    {file = "pydantic_core-2.18.2-cp311-none-win32.whl", hash = "sha256:269322dcc3d8bdb69f054681edff86276b2ff972447863cf34c8b860f5188e2e"},
-    {file = "pydantic_core-2.18.2-cp311-none-win_amd64.whl", hash = "sha256:800d60565aec896f25bc3cfa56d2277d52d5182af08162f7954f938c06dc4ee3"},
-    {file = "pydantic_core-2.18.2-cp311-none-win_arm64.whl", hash = "sha256:1404c69d6a676245199767ba4f633cce5f4ad4181f9d0ccb0577e1f66cf4c46d"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:fb2bd7be70c0fe4dfd32c951bc813d9fe6ebcbfdd15a07527796c8204bd36242"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6132dd3bd52838acddca05a72aafb6eab6536aa145e923bb50f45e78b7251043"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7d904828195733c183d20a54230c0df0eb46ec746ea1a666730787353e87182"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c9bd70772c720142be1020eac55f8143a34ec9f82d75a8e7a07852023e46617f"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2b8ed04b3582771764538f7ee7001b02e1170223cf9b75dff0bc698fadb00cf3"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e6dac87ddb34aaec85f873d737e9d06a3555a1cc1a8e0c44b7f8d5daeb89d86f"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ca4ae5a27ad7a4ee5170aebce1574b375de390bc01284f87b18d43a3984df72"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:886eec03591b7cf058467a70a87733b35f44707bd86cf64a615584fd72488b7c"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ca7b0c1f1c983e064caa85f3792dd2fe3526b3505378874afa84baf662e12241"},
-    {file = "pydantic_core-2.18.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4b4356d3538c3649337df4074e81b85f0616b79731fe22dd11b99499b2ebbdf3"},
-    {file = "pydantic_core-2.18.2-cp312-none-win32.whl", hash = "sha256:8b172601454f2d7701121bbec3425dd71efcb787a027edf49724c9cefc14c038"},
-    {file = "pydantic_core-2.18.2-cp312-none-win_amd64.whl", hash = "sha256:b1bd7e47b1558ea872bd16c8502c414f9e90dcf12f1395129d7bb42a09a95438"},
-    {file = "pydantic_core-2.18.2-cp312-none-win_arm64.whl", hash = "sha256:98758d627ff397e752bc339272c14c98199c613f922d4a384ddc07526c86a2ec"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:9fdad8e35f278b2c3eb77cbdc5c0a49dada440657bf738d6905ce106dc1de439"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1d90c3265ae107f91a4f279f4d6f6f1d4907ac76c6868b27dc7fb33688cfb347"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:390193c770399861d8df9670fb0d1874f330c79caaca4642332df7c682bf6b91"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:82d5d4d78e4448683cb467897fe24e2b74bb7b973a541ea1dcfec1d3cbce39fb"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4774f3184d2ef3e14e8693194f661dea5a4d6ca4e3dc8e39786d33a94865cefd"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d4d938ec0adf5167cb335acb25a4ee69a8107e4984f8fbd2e897021d9e4ca21b"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e0e8b1be28239fc64a88a8189d1df7fad8be8c1ae47fcc33e43d4be15f99cc70"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:868649da93e5a3d5eacc2b5b3b9235c98ccdbfd443832f31e075f54419e1b96b"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:78363590ef93d5d226ba21a90a03ea89a20738ee5b7da83d771d283fd8a56761"},
-    {file = "pydantic_core-2.18.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:852e966fbd035a6468fc0a3496589b45e2208ec7ca95c26470a54daed82a0788"},
-    {file = "pydantic_core-2.18.2-cp38-none-win32.whl", hash = "sha256:6a46e22a707e7ad4484ac9ee9f290f9d501df45954184e23fc29408dfad61350"},
-    {file = "pydantic_core-2.18.2-cp38-none-win_amd64.whl", hash = "sha256:d91cb5ea8b11607cc757675051f61b3d93f15eca3cefb3e6c704a5d6e8440f4e"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:ae0a8a797a5e56c053610fa7be147993fe50960fa43609ff2a9552b0e07013e8"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:042473b6280246b1dbf530559246f6842b56119c2926d1e52b631bdc46075f2a"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a388a77e629b9ec814c1b1e6b3b595fe521d2cdc625fcca26fbc2d44c816804"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e25add29b8f3b233ae90ccef2d902d0ae0432eb0d45370fe315d1a5cf231004b"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f459a5ce8434614dfd39bbebf1041952ae01da6bed9855008cb33b875cb024c0"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eff2de745698eb46eeb51193a9f41d67d834d50e424aef27df2fcdee1b153845"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8309f67285bdfe65c372ea3722b7a5642680f3dba538566340a9d36e920b5f0"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f93a8a2e3938ff656a7c1bc57193b1319960ac015b6e87d76c76bf14fe0244b4"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:22057013c8c1e272eb8d0eebc796701167d8377441ec894a8fed1af64a0bf399"},
-    {file = "pydantic_core-2.18.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cfeecd1ac6cc1fb2692c3d5110781c965aabd4ec5d32799773ca7b1456ac636b"},
-    {file = "pydantic_core-2.18.2-cp39-none-win32.whl", hash = "sha256:0d69b4c2f6bb3e130dba60d34c0845ba31b69babdd3f78f7c0c8fae5021a253e"},
-    {file = "pydantic_core-2.18.2-cp39-none-win_amd64.whl", hash = "sha256:d9319e499827271b09b4e411905b24a426b8fb69464dfa1696258f53a3334641"},
-    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a1874c6dd4113308bd0eb568418e6114b252afe44319ead2b4081e9b9521fe75"},
-    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:ccdd111c03bfd3666bd2472b674c6899550e09e9f298954cfc896ab92b5b0e6d"},
-    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e18609ceaa6eed63753037fc06ebb16041d17d28199ae5aba0052c51449650a9"},
-    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e5c584d357c4e2baf0ff7baf44f4994be121e16a2c88918a5817331fc7599d7"},
-    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43f0f463cf89ace478de71a318b1b4f05ebc456a9b9300d027b4b57c1a2064fb"},
-    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:e1b395e58b10b73b07b7cf740d728dd4ff9365ac46c18751bf8b3d8cca8f625a"},
-    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:0098300eebb1c837271d3d1a2cd2911e7c11b396eac9661655ee524a7f10587b"},
-    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:36789b70d613fbac0a25bb07ab3d9dba4d2e38af609c020cf4d888d165ee0bf3"},
-    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3f9a801e7c8f1ef8718da265bba008fa121243dfe37c1cea17840b0944dfd72c"},
-    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:3a6515ebc6e69d85502b4951d89131ca4e036078ea35533bb76327f8424531ce"},
-    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20aca1e2298c56ececfd8ed159ae4dde2df0781988c97ef77d5c16ff4bd5b400"},
-    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:223ee893d77a310a0391dca6df00f70bbc2f36a71a895cecd9a0e762dc37b349"},
-    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2334ce8c673ee93a1d6a65bd90327588387ba073c17e61bf19b4fd97d688d63c"},
-    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:cbca948f2d14b09d20268cda7b0367723d79063f26c4ffc523af9042cad95592"},
-    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b3ef08e20ec49e02d5c6717a91bb5af9b20f1805583cb0adfe9ba2c6b505b5ae"},
-    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c6fdc8627910eed0c01aed6a390a252fe3ea6d472ee70fdde56273f198938374"},
-    {file = "pydantic_core-2.18.2.tar.gz", hash = "sha256:2e29d20810dfc3043ee13ac7d9e25105799817683348823f305ab3f349b9386e"},
+    {file = "pydantic_core-2.27.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2d367ca20b2f14095a8f4fa1210f5a7b78b8a20009ecced6b12818f455b1e9fa"},
+    {file = "pydantic_core-2.27.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:491a2b73db93fab69731eaee494f320faa4e093dbed776be1a829c2eb222c34c"},
+    {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7969e133a6f183be60e9f6f56bfae753585680f3b7307a8e555a948d443cc05a"},
+    {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3de9961f2a346257caf0aa508a4da705467f53778e9ef6fe744c038119737ef5"},
+    {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e2bb4d3e5873c37bb3dd58714d4cd0b0e6238cebc4177ac8fe878f8b3aa8e74c"},
+    {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:280d219beebb0752699480fe8f1dc61ab6615c2046d76b7ab7ee38858de0a4e7"},
+    {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47956ae78b6422cbd46f772f1746799cbb862de838fd8d1fbd34a82e05b0983a"},
+    {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:14d4a5c49d2f009d62a2a7140d3064f686d17a5d1a268bc641954ba181880236"},
+    {file = "pydantic_core-2.27.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:337b443af21d488716f8d0b6164de833e788aa6bd7e3a39c005febc1284f4962"},
+    {file = "pydantic_core-2.27.2-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:03d0f86ea3184a12f41a2d23f7ccb79cdb5a18e06993f8a45baa8dfec746f0e9"},
+    {file = "pydantic_core-2.27.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7041c36f5680c6e0f08d922aed302e98b3745d97fe1589db0a3eebf6624523af"},
+    {file = "pydantic_core-2.27.2-cp310-cp310-win32.whl", hash = "sha256:50a68f3e3819077be2c98110c1f9dcb3817e93f267ba80a2c05bb4f8799e2ff4"},
+    {file = "pydantic_core-2.27.2-cp310-cp310-win_amd64.whl", hash = "sha256:e0fd26b16394ead34a424eecf8a31a1f5137094cabe84a1bcb10fa6ba39d3d31"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:8e10c99ef58cfdf2a66fc15d66b16c4a04f62bca39db589ae8cba08bc55331bc"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:26f32e0adf166a84d0cb63be85c562ca8a6fa8de28e5f0d92250c6b7e9e2aff7"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c19d1ea0673cd13cc2f872f6c9ab42acc4e4f492a7ca9d3795ce2b112dd7e15"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5e68c4446fe0810e959cdff46ab0a41ce2f2c86d227d96dc3847af0ba7def306"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d9640b0059ff4f14d1f37321b94061c6db164fbe49b334b31643e0528d100d99"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:40d02e7d45c9f8af700f3452f329ead92da4c5f4317ca9b896de7ce7199ea459"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c1fd185014191700554795c99b347d64f2bb637966c4cfc16998a0ca700d048"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d81d2068e1c1228a565af076598f9e7451712700b673de8f502f0334f281387d"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1a4207639fb02ec2dbb76227d7c751a20b1a6b4bc52850568e52260cae64ca3b"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:3de3ce3c9ddc8bbd88f6e0e304dea0e66d843ec9de1b0042b0911c1663ffd474"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:30c5f68ded0c36466acede341551106821043e9afaad516adfb6e8fa80a4e6a6"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-win32.whl", hash = "sha256:c70c26d2c99f78b125a3459f8afe1aed4d9687c24fd677c6a4436bc042e50d6c"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-win_amd64.whl", hash = "sha256:08e125dbdc505fa69ca7d9c499639ab6407cfa909214d500897d02afb816e7cc"},
+    {file = "pydantic_core-2.27.2-cp311-cp311-win_arm64.whl", hash = "sha256:26f0d68d4b235a2bae0c3fc585c585b4ecc51382db0e3ba402a22cbc440915e4"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:9e0c8cfefa0ef83b4da9588448b6d8d2a2bf1a53c3f1ae5fca39eb3061e2f0b0"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:83097677b8e3bd7eaa6775720ec8e0405f1575015a463285a92bfdfe254529ef"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:172fce187655fece0c90d90a678424b013f8fbb0ca8b036ac266749c09438cb7"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:519f29f5213271eeeeb3093f662ba2fd512b91c5f188f3bb7b27bc5973816934"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:05e3a55d124407fffba0dd6b0c0cd056d10e983ceb4e5dbd10dda135c31071d6"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c3ed807c7b91de05e63930188f19e921d1fe90de6b4f5cd43ee7fcc3525cb8c"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fb4aadc0b9a0c063206846d603b92030eb6f03069151a625667f982887153e2"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:28ccb213807e037460326424ceb8b5245acb88f32f3d2777427476e1b32c48c4"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:de3cd1899e2c279b140adde9357c4495ed9d47131b4a4eaff9052f23398076b3"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:220f892729375e2d736b97d0e51466252ad84c51857d4d15f5e9692f9ef12be4"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a0fcd29cd6b4e74fe8ddd2c90330fd8edf2e30cb52acda47f06dd615ae72da57"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-win32.whl", hash = "sha256:1e2cb691ed9834cd6a8be61228471d0a503731abfb42f82458ff27be7b2186fc"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-win_amd64.whl", hash = "sha256:cc3f1a99a4f4f9dd1de4fe0312c114e740b5ddead65bb4102884b384c15d8bc9"},
+    {file = "pydantic_core-2.27.2-cp312-cp312-win_arm64.whl", hash = "sha256:3911ac9284cd8a1792d3cb26a2da18f3ca26c6908cc434a18f730dc0db7bfa3b"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:7d14bd329640e63852364c306f4d23eb744e0f8193148d4044dd3dacdaacbd8b"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:82f91663004eb8ed30ff478d77c4d1179b3563df6cdb15c0817cd1cdaf34d154"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71b24c7d61131bb83df10cc7e687433609963a944ccf45190cfc21e0887b08c9"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fa8e459d4954f608fa26116118bb67f56b93b209c39b008277ace29937453dc9"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ce8918cbebc8da707ba805b7fd0b382816858728ae7fe19a942080c24e5b7cd1"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eda3f5c2a021bbc5d976107bb302e0131351c2ba54343f8a496dc8783d3d3a6a"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd8086fa684c4775c27f03f062cbb9eaa6e17f064307e86b21b9e0abc9c0f02e"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8d9b3388db186ba0c099a6d20f0604a44eabdeef1777ddd94786cdae158729e4"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:7a66efda2387de898c8f38c0cf7f14fca0b51a8ef0b24bfea5849f1b3c95af27"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:18a101c168e4e092ab40dbc2503bdc0f62010e95d292b27827871dc85450d7ee"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ba5dd002f88b78a4215ed2f8ddbdf85e8513382820ba15ad5ad8955ce0ca19a1"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-win32.whl", hash = "sha256:1ebaf1d0481914d004a573394f4be3a7616334be70261007e47c2a6fe7e50130"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-win_amd64.whl", hash = "sha256:953101387ecf2f5652883208769a79e48db18c6df442568a0b5ccd8c2723abee"},
+    {file = "pydantic_core-2.27.2-cp313-cp313-win_arm64.whl", hash = "sha256:ac4dbfd1691affb8f48c2c13241a2e3b60ff23247cbcf981759c768b6633cf8b"},
+    {file = "pydantic_core-2.27.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:d3e8d504bdd3f10835468f29008d72fc8359d95c9c415ce6e767203db6127506"},
+    {file = "pydantic_core-2.27.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:521eb9b7f036c9b6187f0b47318ab0d7ca14bd87f776240b90b21c1f4f149320"},
+    {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:85210c4d99a0114f5a9481b44560d7d1e35e32cc5634c656bc48e590b669b145"},
+    {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d716e2e30c6f140d7560ef1538953a5cd1a87264c737643d481f2779fc247fe1"},
+    {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f66d89ba397d92f840f8654756196d93804278457b5fbede59598a1f9f90b228"},
+    {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:669e193c1c576a58f132e3158f9dfa9662969edb1a250c54d8fa52590045f046"},
+    {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdbe7629b996647b99c01b37f11170a57ae675375b14b8c13b8518b8320ced5"},
+    {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d262606bf386a5ba0b0af3b97f37c83d7011439e3dc1a9298f21efb292e42f1a"},
+    {file = "pydantic_core-2.27.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:cabb9bcb7e0d97f74df8646f34fc76fbf793b7f6dc2438517d7a9e50eee4f14d"},
+    {file = "pydantic_core-2.27.2-cp38-cp38-musllinux_1_1_armv7l.whl", hash = "sha256:d2d63f1215638d28221f664596b1ccb3944f6e25dd18cd3b86b0a4c408d5ebb9"},
+    {file = "pydantic_core-2.27.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:bca101c00bff0adb45a833f8451b9105d9df18accb8743b08107d7ada14bd7da"},
+    {file = "pydantic_core-2.27.2-cp38-cp38-win32.whl", hash = "sha256:f6f8e111843bbb0dee4cb6594cdc73e79b3329b526037ec242a3e49012495b3b"},
+    {file = "pydantic_core-2.27.2-cp38-cp38-win_amd64.whl", hash = "sha256:fd1aea04935a508f62e0d0ef1f5ae968774a32afc306fb8545e06f5ff5cdf3ad"},
+    {file = "pydantic_core-2.27.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:c10eb4f1659290b523af58fa7cffb452a61ad6ae5613404519aee4bfbf1df993"},
+    {file = "pydantic_core-2.27.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ef592d4bad47296fb11f96cd7dc898b92e795032b4894dfb4076cfccd43a9308"},
+    {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c61709a844acc6bf0b7dce7daae75195a10aac96a596ea1b776996414791ede4"},
+    {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:42c5f762659e47fdb7b16956c71598292f60a03aa92f8b6351504359dbdba6cf"},
+    {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4c9775e339e42e79ec99c441d9730fccf07414af63eac2f0e48e08fd38a64d76"},
+    {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:57762139821c31847cfb2df63c12f725788bd9f04bc2fb392790959b8f70f118"},
+    {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d1e85068e818c73e048fe28cfc769040bb1f475524f4745a5dc621f75ac7630"},
+    {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:097830ed52fd9e427942ff3b9bc17fab52913b2f50f2880dc4a5611446606a54"},
+    {file = "pydantic_core-2.27.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:044a50963a614ecfae59bb1eaf7ea7efc4bc62f49ed594e18fa1e5d953c40e9f"},
+    {file = "pydantic_core-2.27.2-cp39-cp39-musllinux_1_1_armv7l.whl", hash = "sha256:4e0b4220ba5b40d727c7f879eac379b822eee5d8fff418e9d3381ee45b3b0362"},
+    {file = "pydantic_core-2.27.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5e4f4bb20d75e9325cc9696c6802657b58bc1dbbe3022f32cc2b2b632c3fbb96"},
+    {file = "pydantic_core-2.27.2-cp39-cp39-win32.whl", hash = "sha256:cca63613e90d001b9f2f9a9ceb276c308bfa2a43fafb75c8031c4f66039e8c6e"},
+    {file = "pydantic_core-2.27.2-cp39-cp39-win_amd64.whl", hash = "sha256:77d1bca19b0f7021b3a982e6f903dcd5b2b06076def36a652e3907f596e29f67"},
+    {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:2bf14caea37e91198329b828eae1618c068dfb8ef17bb33287a7ad4b61ac314e"},
+    {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:b0cb791f5b45307caae8810c2023a184c74605ec3bcbb67d13846c28ff731ff8"},
+    {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:688d3fd9fcb71f41c4c015c023d12a79d1c4c0732ec9eb35d96e3388a120dcf3"},
+    {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d591580c34f4d731592f0e9fe40f9cc1b430d297eecc70b962e93c5c668f15f"},
+    {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:82f986faf4e644ffc189a7f1aafc86e46ef70372bb153e7001e8afccc6e54133"},
+    {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:bec317a27290e2537f922639cafd54990551725fc844249e64c523301d0822fc"},
+    {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:0296abcb83a797db256b773f45773da397da75a08f5fcaef41f2044adec05f50"},
+    {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:0d75070718e369e452075a6017fbf187f788e17ed67a3abd47fa934d001863d9"},
+    {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:7e17b560be3c98a8e3aa66ce828bdebb9e9ac6ad5466fba92eb74c4c95cb1151"},
+    {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c33939a82924da9ed65dab5a65d427205a73181d8098e79b6b426bdf8ad4e656"},
+    {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:00bad2484fa6bda1e216e7345a798bd37c68fb2d97558edd584942aa41b7d278"},
+    {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c817e2b40aba42bac6f457498dacabc568c3b7a986fc9ba7c8d9d260b71485fb"},
+    {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:251136cdad0cb722e93732cb45ca5299fb56e1344a833640bf93b2803f8d1bfd"},
+    {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d2088237af596f0a524d3afc39ab3b036e8adb054ee57cbb1dcf8e09da5b29cc"},
+    {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:d4041c0b966a84b4ae7a09832eb691a35aec90910cd2dbe7a208de59be77965b"},
+    {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:8083d4e875ebe0b864ffef72a4304827015cff328a1be6e22cc850753bfb122b"},
+    {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f141ee28a0ad2123b6611b6ceff018039df17f32ada8b534e6aa039545a3efb2"},
+    {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7d0c8399fcc1848491f00e0314bd59fb34a9c008761bcb422a057670c3f65e35"},
+    {file = "pydantic_core-2.27.2.tar.gz", hash = "sha256:eb026e5a4c1fee05726072337ff51d1efb6f59090b7da90d30ea58625b1ffb39"},
 ]
 
 [package.dependencies]
@@ -2638,6 +2700,20 @@ files = [
 [package.dependencies]
 six = ">=1.5"
 
+[[package]]
+name = "python-dotenv"
+version = "1.0.1"
+description = "Read key-value pairs from a .env file and set them as environment variables"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"},
+    {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"},
+]
+
+[package.extras]
+cli = ["click (>=5.0)"]
+
 [[package]]
 name = "pytz"
 version = "2024.1"
@@ -2992,17 +3068,18 @@ mpmath = ">=0.19"
 
 [[package]]
 name = "testcontainers"
-version = "4.8.1"
+version = "4.9.0"
 description = "Python library for throwaway instances of anything that can run in a Docker container"
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "testcontainers-4.8.1-py3-none-any.whl", hash = "sha256:d8ae43e8fe34060fcd5c3f494e0b7652b7774beabe94568a2283d0881e94d489"},
-    {file = "testcontainers-4.8.1.tar.gz", hash = "sha256:5ded4820b7227ad526857eb3caaafcabce1bbac05d22ad194849b136ffae3cb0"},
+    {file = "testcontainers-4.9.0-py3-none-any.whl", hash = "sha256:c6fee929990972c40bf6b91b7072c94064ff3649b405a14fde0274c8b2479d32"},
+    {file = "testcontainers-4.9.0.tar.gz", hash = "sha256:2cd6af070109ff68c1ab5389dc89c86c2dc3ab30a21ca734b2cb8f0f80ad479e"},
 ]
 
 [package.dependencies]
 docker = "*"
+python-dotenv = "*"
 typing-extensions = "*"
 urllib3 = "*"
 wrapt = "*"
@@ -3160,13 +3237,13 @@ files = [
 
 [[package]]
 name = "typing-extensions"
-version = "4.6.1"
-description = "Backported and Experimental Type Hints for Python 3.7+"
+version = "4.12.2"
+description = "Backported and Experimental Type Hints for Python 3.8+"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "typing_extensions-4.6.1-py3-none-any.whl", hash = "sha256:6bac751f4789b135c43228e72de18637e9a6c29d12777023a703fd1a6858469f"},
-    {file = "typing_extensions-4.6.1.tar.gz", hash = "sha256:558bc0c4145f01e6405f4a5fdbd82050bd221b119f4bf72a961a1cfd471349d6"},
+    {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
+    {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
 ]
 
 [[package]]
@@ -3309,16 +3386,6 @@ files = [
     {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"},
     {file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"},
     {file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"},
-    {file = "wrapt-1.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ecee4132c6cd2ce5308e21672015ddfed1ff975ad0ac8d27168ea82e71413f55"},
-    {file = "wrapt-1.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2020f391008ef874c6d9e208b24f28e31bcb85ccff4f335f15a3251d222b92d9"},
-    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2feecf86e1f7a86517cab34ae6c2f081fd2d0dac860cb0c0ded96d799d20b335"},
-    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:240b1686f38ae665d1b15475966fe0472f78e71b1b4903c143a842659c8e4cb9"},
-    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9008dad07d71f68487c91e96579c8567c98ca4c3881b9b113bc7b33e9fd78b8"},
-    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6447e9f3ba72f8e2b985a1da758767698efa72723d5b59accefd716e9e8272bf"},
-    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:acae32e13a4153809db37405f5eba5bac5fbe2e2ba61ab227926a22901051c0a"},
-    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49ef582b7a1152ae2766557f0550a9fcbf7bbd76f43fbdc94dd3bf07cc7168be"},
-    {file = "wrapt-1.14.1-cp311-cp311-win32.whl", hash = "sha256:358fe87cc899c6bb0ddc185bf3dbfa4ba646f05b1b0b9b5a27c2cb92c2cea204"},
-    {file = "wrapt-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:26046cd03936ae745a502abf44dac702a5e6880b2b01c29aea8ddf3353b68224"},
     {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"},
     {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"},
     {file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"},
@@ -3475,54 +3542,108 @@ propcache = ">=0.2.0"
 
 [[package]]
 name = "zstandard"
-version = "0.21.0"
+version = "0.23.0"
 description = "Zstandard bindings for Python"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "zstandard-0.21.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:649a67643257e3b2cff1c0a73130609679a5673bf389564bc6d4b164d822a7ce"},
-    {file = "zstandard-0.21.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:144a4fe4be2e747bf9c646deab212666e39048faa4372abb6a250dab0f347a29"},
-    {file = "zstandard-0.21.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b72060402524ab91e075881f6b6b3f37ab715663313030d0ce983da44960a86f"},
-    {file = "zstandard-0.21.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8257752b97134477fb4e413529edaa04fc0457361d304c1319573de00ba796b1"},
-    {file = "zstandard-0.21.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:c053b7c4cbf71cc26808ed67ae955836232f7638444d709bfc302d3e499364fa"},
-    {file = "zstandard-0.21.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2769730c13638e08b7a983b32cb67775650024632cd0476bf1ba0e6360f5ac7d"},
-    {file = "zstandard-0.21.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7d3bc4de588b987f3934ca79140e226785d7b5e47e31756761e48644a45a6766"},
-    {file = "zstandard-0.21.0-cp310-cp310-win32.whl", hash = "sha256:67829fdb82e7393ca68e543894cd0581a79243cc4ec74a836c305c70a5943f07"},
-    {file = "zstandard-0.21.0-cp310-cp310-win_amd64.whl", hash = "sha256:e6048a287f8d2d6e8bc67f6b42a766c61923641dd4022b7fd3f7439e17ba5a4d"},
-    {file = "zstandard-0.21.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7f2afab2c727b6a3d466faee6974a7dad0d9991241c498e7317e5ccf53dbc766"},
-    {file = "zstandard-0.21.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ff0852da2abe86326b20abae912d0367878dd0854b8931897d44cfeb18985472"},
-    {file = "zstandard-0.21.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d12fa383e315b62630bd407477d750ec96a0f438447d0e6e496ab67b8b451d39"},
-    {file = "zstandard-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1b9703fe2e6b6811886c44052647df7c37478af1b4a1a9078585806f42e5b15"},
-    {file = "zstandard-0.21.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:df28aa5c241f59a7ab524f8ad8bb75d9a23f7ed9d501b0fed6d40ec3064784e8"},
-    {file = "zstandard-0.21.0-cp311-cp311-win32.whl", hash = "sha256:0aad6090ac164a9d237d096c8af241b8dcd015524ac6dbec1330092dba151657"},
-    {file = "zstandard-0.21.0-cp311-cp311-win_amd64.whl", hash = "sha256:48b6233b5c4cacb7afb0ee6b4f91820afbb6c0e3ae0fa10abbc20000acdf4f11"},
-    {file = "zstandard-0.21.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e7d560ce14fd209db6adacce8908244503a009c6c39eee0c10f138996cd66d3e"},
-    {file = "zstandard-0.21.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e6e131a4df2eb6f64961cea6f979cdff22d6e0d5516feb0d09492c8fd36f3bc"},
-    {file = "zstandard-0.21.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1e0c62a67ff425927898cf43da2cf6b852289ebcc2054514ea9bf121bec10a5"},
-    {file = "zstandard-0.21.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:1545fb9cb93e043351d0cb2ee73fa0ab32e61298968667bb924aac166278c3fc"},
-    {file = "zstandard-0.21.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fe6c821eb6870f81d73bf10e5deed80edcac1e63fbc40610e61f340723fd5f7c"},
-    {file = "zstandard-0.21.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ddb086ea3b915e50f6604be93f4f64f168d3fc3cef3585bb9a375d5834392d4f"},
-    {file = "zstandard-0.21.0-cp37-cp37m-win32.whl", hash = "sha256:57ac078ad7333c9db7a74804684099c4c77f98971c151cee18d17a12649bc25c"},
-    {file = "zstandard-0.21.0-cp37-cp37m-win_amd64.whl", hash = "sha256:1243b01fb7926a5a0417120c57d4c28b25a0200284af0525fddba812d575f605"},
-    {file = "zstandard-0.21.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ea68b1ba4f9678ac3d3e370d96442a6332d431e5050223626bdce748692226ea"},
-    {file = "zstandard-0.21.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8070c1cdb4587a8aa038638acda3bd97c43c59e1e31705f2766d5576b329e97c"},
-    {file = "zstandard-0.21.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4af612c96599b17e4930fe58bffd6514e6c25509d120f4eae6031b7595912f85"},
-    {file = "zstandard-0.21.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cff891e37b167bc477f35562cda1248acc115dbafbea4f3af54ec70821090965"},
-    {file = "zstandard-0.21.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:a9fec02ce2b38e8b2e86079ff0b912445495e8ab0b137f9c0505f88ad0d61296"},
-    {file = "zstandard-0.21.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0bdbe350691dec3078b187b8304e6a9c4d9db3eb2d50ab5b1d748533e746d099"},
-    {file = "zstandard-0.21.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b69cccd06a4a0a1d9fb3ec9a97600055cf03030ed7048d4bcb88c574f7895773"},
-    {file = "zstandard-0.21.0-cp38-cp38-win32.whl", hash = "sha256:9980489f066a391c5572bc7dc471e903fb134e0b0001ea9b1d3eff85af0a6f1b"},
-    {file = "zstandard-0.21.0-cp38-cp38-win_amd64.whl", hash = "sha256:0e1e94a9d9e35dc04bf90055e914077c80b1e0c15454cc5419e82529d3e70728"},
-    {file = "zstandard-0.21.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d2d61675b2a73edcef5e327e38eb62bdfc89009960f0e3991eae5cc3d54718de"},
-    {file = "zstandard-0.21.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:25fbfef672ad798afab12e8fd204d122fca3bc8e2dcb0a2ba73bf0a0ac0f5f07"},
-    {file = "zstandard-0.21.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62957069a7c2626ae80023998757e27bd28d933b165c487ab6f83ad3337f773d"},
-    {file = "zstandard-0.21.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14e10ed461e4807471075d4b7a2af51f5234c8f1e2a0c1d37d5ca49aaaad49e8"},
-    {file = "zstandard-0.21.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:9cff89a036c639a6a9299bf19e16bfb9ac7def9a7634c52c257166db09d950e7"},
-    {file = "zstandard-0.21.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:52b2b5e3e7670bd25835e0e0730a236f2b0df87672d99d3bf4bf87248aa659fb"},
-    {file = "zstandard-0.21.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b1367da0dde8ae5040ef0413fb57b5baeac39d8931c70536d5f013b11d3fc3a5"},
-    {file = "zstandard-0.21.0-cp39-cp39-win32.whl", hash = "sha256:db62cbe7a965e68ad2217a056107cc43d41764c66c895be05cf9c8b19578ce9c"},
-    {file = "zstandard-0.21.0-cp39-cp39-win_amd64.whl", hash = "sha256:a8d200617d5c876221304b0e3fe43307adde291b4a897e7b0617a61611dfff6a"},
-    {file = "zstandard-0.21.0.tar.gz", hash = "sha256:f08e3a10d01a247877e4cb61a82a319ea746c356a3786558bed2481e6c405546"},
+    {file = "zstandard-0.23.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bf0a05b6059c0528477fba9054d09179beb63744355cab9f38059548fedd46a9"},
+    {file = "zstandard-0.23.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fc9ca1c9718cb3b06634c7c8dec57d24e9438b2aa9a0f02b8bb36bf478538880"},
+    {file = "zstandard-0.23.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77da4c6bfa20dd5ea25cbf12c76f181a8e8cd7ea231c673828d0386b1740b8dc"},
+    {file = "zstandard-0.23.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b2170c7e0367dde86a2647ed5b6f57394ea7f53545746104c6b09fc1f4223573"},
+    {file = "zstandard-0.23.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c16842b846a8d2a145223f520b7e18b57c8f476924bda92aeee3a88d11cfc391"},
+    {file = "zstandard-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:157e89ceb4054029a289fb504c98c6a9fe8010f1680de0201b3eb5dc20aa6d9e"},
+    {file = "zstandard-0.23.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:203d236f4c94cd8379d1ea61db2fce20730b4c38d7f1c34506a31b34edc87bdd"},
+    {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:dc5d1a49d3f8262be192589a4b72f0d03b72dcf46c51ad5852a4fdc67be7b9e4"},
+    {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:752bf8a74412b9892f4e5b58f2f890a039f57037f52c89a740757ebd807f33ea"},
+    {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:80080816b4f52a9d886e67f1f96912891074903238fe54f2de8b786f86baded2"},
+    {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:84433dddea68571a6d6bd4fbf8ff398236031149116a7fff6f777ff95cad3df9"},
+    {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ab19a2d91963ed9e42b4e8d77cd847ae8381576585bad79dbd0a8837a9f6620a"},
+    {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:59556bf80a7094d0cfb9f5e50bb2db27fefb75d5138bb16fb052b61b0e0eeeb0"},
+    {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:27d3ef2252d2e62476389ca8f9b0cf2bbafb082a3b6bfe9d90cbcbb5529ecf7c"},
+    {file = "zstandard-0.23.0-cp310-cp310-win32.whl", hash = "sha256:5d41d5e025f1e0bccae4928981e71b2334c60f580bdc8345f824e7c0a4c2a813"},
+    {file = "zstandard-0.23.0-cp310-cp310-win_amd64.whl", hash = "sha256:519fbf169dfac1222a76ba8861ef4ac7f0530c35dd79ba5727014613f91613d4"},
+    {file = "zstandard-0.23.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:34895a41273ad33347b2fc70e1bff4240556de3c46c6ea430a7ed91f9042aa4e"},
+    {file = "zstandard-0.23.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:77ea385f7dd5b5676d7fd943292ffa18fbf5c72ba98f7d09fc1fb9e819b34c23"},
+    {file = "zstandard-0.23.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:983b6efd649723474f29ed42e1467f90a35a74793437d0bc64a5bf482bedfa0a"},
+    {file = "zstandard-0.23.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80a539906390591dd39ebb8d773771dc4db82ace6372c4d41e2d293f8e32b8db"},
+    {file = "zstandard-0.23.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:445e4cb5048b04e90ce96a79b4b63140e3f4ab5f662321975679b5f6360b90e2"},
+    {file = "zstandard-0.23.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd30d9c67d13d891f2360b2a120186729c111238ac63b43dbd37a5a40670b8ca"},
+    {file = "zstandard-0.23.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d20fd853fbb5807c8e84c136c278827b6167ded66c72ec6f9a14b863d809211c"},
+    {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ed1708dbf4d2e3a1c5c69110ba2b4eb6678262028afd6c6fbcc5a8dac9cda68e"},
+    {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:be9b5b8659dff1f913039c2feee1aca499cfbc19e98fa12bc85e037c17ec6ca5"},
+    {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:65308f4b4890aa12d9b6ad9f2844b7ee42c7f7a4fd3390425b242ffc57498f48"},
+    {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:98da17ce9cbf3bfe4617e836d561e433f871129e3a7ac16d6ef4c680f13a839c"},
+    {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:8ed7d27cb56b3e058d3cf684d7200703bcae623e1dcc06ed1e18ecda39fee003"},
+    {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:b69bb4f51daf461b15e7b3db033160937d3ff88303a7bc808c67bbc1eaf98c78"},
+    {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:034b88913ecc1b097f528e42b539453fa82c3557e414b3de9d5632c80439a473"},
+    {file = "zstandard-0.23.0-cp311-cp311-win32.whl", hash = "sha256:f2d4380bf5f62daabd7b751ea2339c1a21d1c9463f1feb7fc2bdcea2c29c3160"},
+    {file = "zstandard-0.23.0-cp311-cp311-win_amd64.whl", hash = "sha256:62136da96a973bd2557f06ddd4e8e807f9e13cbb0bfb9cc06cfe6d98ea90dfe0"},
+    {file = "zstandard-0.23.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b4567955a6bc1b20e9c31612e615af6b53733491aeaa19a6b3b37f3b65477094"},
+    {file = "zstandard-0.23.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e172f57cd78c20f13a3415cc8dfe24bf388614324d25539146594c16d78fcc8"},
+    {file = "zstandard-0.23.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b0e166f698c5a3e914947388c162be2583e0c638a4703fc6a543e23a88dea3c1"},
+    {file = "zstandard-0.23.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:12a289832e520c6bd4dcaad68e944b86da3bad0d339ef7989fb7e88f92e96072"},
+    {file = "zstandard-0.23.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d50d31bfedd53a928fed6707b15a8dbeef011bb6366297cc435accc888b27c20"},
+    {file = "zstandard-0.23.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72c68dda124a1a138340fb62fa21b9bf4848437d9ca60bd35db36f2d3345f373"},
+    {file = "zstandard-0.23.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:53dd9d5e3d29f95acd5de6802e909ada8d8d8cfa37a3ac64836f3bc4bc5512db"},
+    {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:6a41c120c3dbc0d81a8e8adc73312d668cd34acd7725f036992b1b72d22c1772"},
+    {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:40b33d93c6eddf02d2c19f5773196068d875c41ca25730e8288e9b672897c105"},
+    {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9206649ec587e6b02bd124fb7799b86cddec350f6f6c14bc82a2b70183e708ba"},
+    {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:76e79bc28a65f467e0409098fa2c4376931fd3207fbeb6b956c7c476d53746dd"},
+    {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:66b689c107857eceabf2cf3d3fc699c3c0fe8ccd18df2219d978c0283e4c508a"},
+    {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9c236e635582742fee16603042553d276cca506e824fa2e6489db04039521e90"},
+    {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a8fffdbd9d1408006baaf02f1068d7dd1f016c6bcb7538682622c556e7b68e35"},
+    {file = "zstandard-0.23.0-cp312-cp312-win32.whl", hash = "sha256:dc1d33abb8a0d754ea4763bad944fd965d3d95b5baef6b121c0c9013eaf1907d"},
+    {file = "zstandard-0.23.0-cp312-cp312-win_amd64.whl", hash = "sha256:64585e1dba664dc67c7cdabd56c1e5685233fbb1fc1966cfba2a340ec0dfff7b"},
+    {file = "zstandard-0.23.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:576856e8594e6649aee06ddbfc738fec6a834f7c85bf7cadd1c53d4a58186ef9"},
+    {file = "zstandard-0.23.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:38302b78a850ff82656beaddeb0bb989a0322a8bbb1bf1ab10c17506681d772a"},
+    {file = "zstandard-0.23.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2240ddc86b74966c34554c49d00eaafa8200a18d3a5b6ffbf7da63b11d74ee2"},
+    {file = "zstandard-0.23.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2ef230a8fd217a2015bc91b74f6b3b7d6522ba48be29ad4ea0ca3a3775bf7dd5"},
+    {file = "zstandard-0.23.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:774d45b1fac1461f48698a9d4b5fa19a69d47ece02fa469825b442263f04021f"},
+    {file = "zstandard-0.23.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f77fa49079891a4aab203d0b1744acc85577ed16d767b52fc089d83faf8d8ed"},
+    {file = "zstandard-0.23.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ac184f87ff521f4840e6ea0b10c0ec90c6b1dcd0bad2f1e4a9a1b4fa177982ea"},
+    {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c363b53e257246a954ebc7c488304b5592b9c53fbe74d03bc1c64dda153fb847"},
+    {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e7792606d606c8df5277c32ccb58f29b9b8603bf83b48639b7aedf6df4fe8171"},
+    {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a0817825b900fcd43ac5d05b8b3079937073d2b1ff9cf89427590718b70dd840"},
+    {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:9da6bc32faac9a293ddfdcb9108d4b20416219461e4ec64dfea8383cac186690"},
+    {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:fd7699e8fd9969f455ef2926221e0233f81a2542921471382e77a9e2f2b57f4b"},
+    {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d477ed829077cd945b01fc3115edd132c47e6540ddcd96ca169facff28173057"},
+    {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa6ce8b52c5987b3e34d5674b0ab529a4602b632ebab0a93b07bfb4dfc8f8a33"},
+    {file = "zstandard-0.23.0-cp313-cp313-win32.whl", hash = "sha256:a9b07268d0c3ca5c170a385a0ab9fb7fdd9f5fd866be004c4ea39e44edce47dd"},
+    {file = "zstandard-0.23.0-cp313-cp313-win_amd64.whl", hash = "sha256:f3513916e8c645d0610815c257cbfd3242adfd5c4cfa78be514e5a3ebb42a41b"},
+    {file = "zstandard-0.23.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2ef3775758346d9ac6214123887d25c7061c92afe1f2b354f9388e9e4d48acfc"},
+    {file = "zstandard-0.23.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4051e406288b8cdbb993798b9a45c59a4896b6ecee2f875424ec10276a895740"},
+    {file = "zstandard-0.23.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e2d1a054f8f0a191004675755448d12be47fa9bebbcffa3cdf01db19f2d30a54"},
+    {file = "zstandard-0.23.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f83fa6cae3fff8e98691248c9320356971b59678a17f20656a9e59cd32cee6d8"},
+    {file = "zstandard-0.23.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:32ba3b5ccde2d581b1e6aa952c836a6291e8435d788f656fe5976445865ae045"},
+    {file = "zstandard-0.23.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f146f50723defec2975fb7e388ae3a024eb7151542d1599527ec2aa9cacb152"},
+    {file = "zstandard-0.23.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1bfe8de1da6d104f15a60d4a8a768288f66aa953bbe00d027398b93fb9680b26"},
+    {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:29a2bc7c1b09b0af938b7a8343174b987ae021705acabcbae560166567f5a8db"},
+    {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:61f89436cbfede4bc4e91b4397eaa3e2108ebe96d05e93d6ccc95ab5714be512"},
+    {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:53ea7cdc96c6eb56e76bb06894bcfb5dfa93b7adcf59d61c6b92674e24e2dd5e"},
+    {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:a4ae99c57668ca1e78597d8b06d5af837f377f340f4cce993b551b2d7731778d"},
+    {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:379b378ae694ba78cef921581ebd420c938936a153ded602c4fea612b7eaa90d"},
+    {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:50a80baba0285386f97ea36239855f6020ce452456605f262b2d33ac35c7770b"},
+    {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:61062387ad820c654b6a6b5f0b94484fa19515e0c5116faf29f41a6bc91ded6e"},
+    {file = "zstandard-0.23.0-cp38-cp38-win32.whl", hash = "sha256:b8c0bd73aeac689beacd4e7667d48c299f61b959475cdbb91e7d3d88d27c56b9"},
+    {file = "zstandard-0.23.0-cp38-cp38-win_amd64.whl", hash = "sha256:a05e6d6218461eb1b4771d973728f0133b2a4613a6779995df557f70794fd60f"},
+    {file = "zstandard-0.23.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3aa014d55c3af933c1315eb4bb06dd0459661cc0b15cd61077afa6489bec63bb"},
+    {file = "zstandard-0.23.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0a7f0804bb3799414af278e9ad51be25edf67f78f916e08afdb983e74161b916"},
+    {file = "zstandard-0.23.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb2b1ecfef1e67897d336de3a0e3f52478182d6a47eda86cbd42504c5cbd009a"},
+    {file = "zstandard-0.23.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:837bb6764be6919963ef41235fd56a6486b132ea64afe5fafb4cb279ac44f259"},
+    {file = "zstandard-0.23.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1516c8c37d3a053b01c1c15b182f3b5f5eef19ced9b930b684a73bad121addf4"},
+    {file = "zstandard-0.23.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48ef6a43b1846f6025dde6ed9fee0c24e1149c1c25f7fb0a0585572b2f3adc58"},
+    {file = "zstandard-0.23.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:11e3bf3c924853a2d5835b24f03eeba7fc9b07d8ca499e247e06ff5676461a15"},
+    {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:2fb4535137de7e244c230e24f9d1ec194f61721c86ebea04e1581d9d06ea1269"},
+    {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8c24f21fa2af4bb9f2c492a86fe0c34e6d2c63812a839590edaf177b7398f700"},
+    {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a8c86881813a78a6f4508ef9daf9d4995b8ac2d147dcb1a450448941398091c9"},
+    {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:fe3b385d996ee0822fd46528d9f0443b880d4d05528fd26a9119a54ec3f91c69"},
+    {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:82d17e94d735c99621bf8ebf9995f870a6b3e6d14543b99e201ae046dfe7de70"},
+    {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:c7c517d74bea1a6afd39aa612fa025e6b8011982a0897768a2f7c8ab4ebb78a2"},
+    {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1fd7e0f1cfb70eb2f95a19b472ee7ad6d9a0a992ec0ae53286870c104ca939e5"},
+    {file = "zstandard-0.23.0-cp39-cp39-win32.whl", hash = "sha256:43da0f0092281bf501f9c5f6f3b4c975a8a0ea82de49ba3f7100e64d422a1274"},
+    {file = "zstandard-0.23.0-cp39-cp39-win_amd64.whl", hash = "sha256:f8346bfa098532bc1fb6c7ef06783e969d87a99dd1d2a5a18a892c1d7a643c58"},
+    {file = "zstandard-0.23.0.tar.gz", hash = "sha256:b2d8c62d08e7255f68f7a740bae85b3c9b8e5466baa9cbf7f57f1cde0ac6bc09"},
 ]
 
 [package.dependencies]
@@ -3534,4 +3655,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.11"
-content-hash = "9032c11f264f2f6d8a50230e5021c606d460aafdf370da0524784c3f0f1f31b1"
+content-hash = "e6904aca09abc6c805604b21a5702a97e0056406f9ec7469b091d35ee10a6b16"
diff --git a/pyproject.toml b/pyproject.toml
index ba4ab0b1f7..735d12d756 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,11 +7,11 @@ package-mode = false
 python = "^3.11"
 pytest = "^7.4.4"
 psycopg2-binary = "^2.9.10"
-typing-extensions = "^4.6.1"
+typing-extensions = "^4.12.2"
 PyJWT = {version = "^2.1.0", extras = ["crypto"]}
 requests = "^2.32.3"
 pytest-xdist = "^3.3.1"
-asyncpg = "^0.29.0"
+asyncpg = "^0.30.0"
 aiopg = "^1.4.0"
 Jinja2 = "^3.1.5"
 types-requests = "^2.31.0.0"
@@ -36,7 +36,7 @@ aiohttp = "3.10.11"
 pytest-rerunfailures = "^15.0"
 types-pytest-lazy-fixture = "^0.6.3.3"
 pytest-split = "^0.8.1"
-zstandard = "^0.21.0"
+zstandard = "^0.23.0"
 httpx = {extras = ["http2"], version = "^0.26.0"}
 pytest-repeat = "^0.9.3"
 websockets = "^12.0"
@@ -47,8 +47,9 @@ h2 = "^4.1.0"
 types-jwcrypto = "^1.5.0.20240925"
 pyyaml = "^6.0.2"
 types-pyyaml = "^6.0.12.20240917"
-testcontainers = "^4.8.1"
-jsonnet = "^0.20.0"
+testcontainers = "^4.9.0"
+# Jsonnet doesn't support Python 3.13 yet
+jsonnet = { version = "^0.20.0", markers = "python_version < '3.13'" }
 
 [tool.poetry.group.dev.dependencies]
 mypy = "==1.13.0"
diff --git a/test_runner/regress/test_compute_metrics.py b/test_runner/regress/test_compute_metrics.py
index 787790103f..71963355b7 100644
--- a/test_runner/regress/test_compute_metrics.py
+++ b/test_runner/regress/test_compute_metrics.py
@@ -3,12 +3,11 @@ from __future__ import annotations
 import enum
 import os
 import shutil
+import sys
 from enum import StrEnum
 from pathlib import Path
 from typing import TYPE_CHECKING, cast
 
-# Docs are available at https://jsonnet.org/ref/bindings.html#python_api
-import _jsonnet
 import pytest
 import requests
 import yaml
@@ -87,6 +86,10 @@ def jsonnet_evaluate_file(
     ext_vars: str | dict[str, str] | None = None,
     tla_vars: str | dict[str, str] | None = None,
 ) -> str:
+    # Jsonnet doesn't support Python 3.13 yet
+    # Docs are available at https://jsonnet.org/ref/bindings.html#python_api
+    import _jsonnet
+
     return cast(
         "str",
         _jsonnet.evaluate_file(
@@ -121,6 +124,7 @@ class SqlExporterProcess(StrEnum):
     AUTOSCALING = "autoscaling"
 
 
+@pytest.mark.xfail(sys.version_info >= (3, 13), reason="Jsonnet doesn't support Python 3.13 yet")
 @pytest.mark.parametrize(
     "collector_name",
     ["neon_collector", "neon_collector_autoscaling"],
@@ -352,6 +356,7 @@ else:
             self.__proc.wait()
 
 
+@pytest.mark.xfail(sys.version_info >= (3, 13), reason="Jsonnet doesn't support Python 3.13 yet")
 @pytest.mark.parametrize(
     "exporter",
     [SqlExporterProcess.COMPUTE, SqlExporterProcess.AUTOSCALING],

From 02f81b6469c88187ddda548c2dbe32c8c5a9a41d Mon Sep 17 00:00:00 2001
From: Alexander Bayandin <alexander@neon.tech>
Date: Mon, 6 Jan 2025 20:28:33 +0000
Subject: [PATCH 50/63] Fix clippy warning on macOS (#10282)

## Problem

On macOS:

```
error: unused variable: `disable_lfc_resizing`
   --> compute_tools/src/bin/compute_ctl.rs:431:9
    |
431 |         disable_lfc_resizing,
    |         ^^^^^^^^^^^^^^^^^^^^ help: try ignoring the field: `disable_lfc_resizing: _`
    |
    = note: `-D unused-variables` implied by `-D warnings`
    = help: to override `-D warnings` add `#[allow(unused_variables)]`
```

## Summary of changes
- Initialise `disable_lfc_resizing` only on Linux (because it's used on
Linux only in further bloc)
---
 compute_tools/src/bin/compute_ctl.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs
index 26ae25ec20..6ede5fdceb 100644
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -428,6 +428,7 @@ fn start_postgres(
     let &ComputeSpec {
         swap_size_bytes,
         disk_quota_bytes,
+        #[cfg(target_os = "linux")]
         disable_lfc_resizing,
         ..
     } = &state.pspec.as_ref().unwrap().spec;

From 30863c010421affd737977dbfe21ea08f18a43cb Mon Sep 17 00:00:00 2001
From: Matthias van de Meent <matthias@neon.tech>
Date: Tue, 7 Jan 2025 10:07:38 +0100
Subject: [PATCH 51/63] libpagestore: timeout = max(0, difference), not min(0,
 difference) (#10274)

Using `min(0, ...)` causes us to fail to wait in most situations, so a
lack of data would be a hot wait loop, which is bad.

## Problem

We noticed high CPU usage in some situations
---
 pgxn/neon/libpagestore.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c
index 88d0a5292b..fa2a570ea8 100644
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -680,7 +680,7 @@ call_PQgetCopyData(shardno_t shard_no, char **buffer)
 	 * but in the cases that take exceptionally long, it's useful to log the
 	 * exact timestamps.
 	 */
-#define LOG_INTERVAL_US		UINT64CONST(10 * 1000000)
+#define LOG_INTERVAL_MS		INT64CONST(10 * 1000)
 
 	INSTR_TIME_SET_CURRENT(now);
 	start_ts = last_log_ts = now;
@@ -694,7 +694,7 @@ retry:
 		WaitEvent	event;
 		long		timeout;
 
-		timeout = Min(0, LOG_INTERVAL_US - INSTR_TIME_GET_MICROSEC(since_last_log));
+		timeout = Max(0, LOG_INTERVAL_MS - INSTR_TIME_GET_MILLISEC(since_last_log));
 
 		/* Sleep until there's something to do */
 		(void) WaitEventSetWait(shard->wes_read, timeout, &event, 1,
@@ -723,7 +723,7 @@ retry:
 		INSTR_TIME_SET_CURRENT(now);
 		since_last_log = now;
 		INSTR_TIME_SUBTRACT(since_last_log, last_log_ts);
-		if (INSTR_TIME_GET_MICROSEC(since_last_log) >= LOG_INTERVAL_US)
+		if (INSTR_TIME_GET_MILLISEC(since_last_log) >= LOG_INTERVAL_MS)
 		{
 			since_start = now;
 			INSTR_TIME_SUBTRACT(since_start, start_ts);

From ea84ec357fa4caa5a48ec65a0aab9e37d1a9fda4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?JC=20Gr=C3=BCnhage?= <jc@neon.tech>
Date: Tue, 7 Jan 2025 11:36:05 +0100
Subject: [PATCH 52/63] Split promote-images into promote-images-dev and
 promote-images-prod (#10267)

## Problem
`trigger-e2e-tests` waits half an hour before starting to run. Nearly
half of that time can be saved by promoting images before tests on them
are complete, so the e2e tests can run in parallel.

On `main` and `release{,-proxy,-compute}`, `promote-images` updates
`latest` and pushes things to prod ecr, so we want to run
`promote-images` only after `test-images` is done, but on other
branches, there is no harm in promoting images that aren't tested yet.

## Summary of changes

To promote images into dev container registries sooner, `promote-images`
is split into `promote-images-dev` and `promote-images-prod`. The former
pushes to dev container registries, the latter to prod ones. The latter
also waits for `test-images`, while the former doesn't. This allows to
run `trigger-e2e-tests` sooner.
---
 .github/workflows/actionlint.yml        |  2 +-
 .github/workflows/build_and_test.yml    | 35 +++++++++++++++++++------
 .github/workflows/trigger-e2e-tests.yml |  8 +++---
 3 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/actionlint.yml b/.github/workflows/actionlint.yml
index 85cfe7446e..0e53830040 100644
--- a/.github/workflows/actionlint.yml
+++ b/.github/workflows/actionlint.yml
@@ -33,7 +33,7 @@ jobs:
           # SC2086 - Double quote to prevent globbing and word splitting. - https://www.shellcheck.net/wiki/SC2086
           SHELLCHECK_OPTS: --exclude=SC2046,SC2086
         with:
-          fail_on_error: true
+          fail_level: error
           filter_mode: nofilter
           level: error
 
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 12b1ac98ac..5c2b397c82 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -538,7 +538,7 @@ jobs:
 
   trigger-e2e-tests:
     if: ${{ !github.event.pull_request.draft || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft') || github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute' }}
-    needs: [ check-permissions, promote-images, tag ]
+    needs: [ check-permissions, promote-images-dev, tag ]
     uses: ./.github/workflows/trigger-e2e-tests.yml
     secrets: inherit
 
@@ -930,8 +930,8 @@ jobs:
           docker compose -f ./docker-compose/docker-compose.yml logs || 0
           docker compose -f ./docker-compose/docker-compose.yml down
 
-  promote-images:
-    needs: [ check-permissions, tag, test-images, vm-compute-node-image ]
+  promote-images-dev:
+    needs: [ check-permissions, tag, vm-compute-node-image ]
     runs-on: ubuntu-22.04
 
     permissions:
@@ -965,6 +965,25 @@ jobs:
                                                neondatabase/vm-compute-node-${version}:${{ needs.tag.outputs.build-tag }}
           done
 
+  promote-images-prod:
+    needs: [ check-permissions, tag, test-images, vm-compute-node-image ]
+    runs-on: ubuntu-22.04
+    if: github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
+
+    permissions:
+      id-token: write # aws-actions/configure-aws-credentials
+      statuses: write
+      contents: read
+
+    env:
+      VERSIONS: v14 v15 v16 v17
+
+    steps:
+      - uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+
       - name: Add latest tag to images
         if: github.ref_name == 'main'
         run: |
@@ -1010,7 +1029,7 @@ jobs:
 
   push-to-acr-dev:
     if: github.ref_name == 'main'
-    needs: [ tag, promote-images ]
+    needs: [ tag, promote-images-dev ]
     uses: ./.github/workflows/_push-to-acr.yml
     with:
       client_id: ${{ vars.AZURE_DEV_CLIENT_ID }}
@@ -1022,7 +1041,7 @@ jobs:
 
   push-to-acr-prod:
     if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
-    needs: [ tag, promote-images ]
+    needs: [ tag, promote-images-prod ]
     uses: ./.github/workflows/_push-to-acr.yml
     with:
       client_id: ${{ vars.AZURE_PROD_CLIENT_ID }}
@@ -1112,7 +1131,7 @@ jobs:
           exit 1
 
   deploy:
-    needs: [ check-permissions, promote-images, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait, push-to-acr-dev, push-to-acr-prod ]
+    needs: [ check-permissions, promote-images-prod, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait, push-to-acr-dev, push-to-acr-prod ]
     # `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod`
     if: (github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute') && !failure() && !cancelled()
     permissions:
@@ -1333,7 +1352,7 @@ jobs:
           done
 
   pin-build-tools-image:
-    needs: [ build-build-tools-image, promote-images, build-and-test-locally ]
+    needs: [ build-build-tools-image, promote-images-prod, build-and-test-locally ]
     if: github.ref_name == 'main'
     uses: ./.github/workflows/pin-build-tools-image.yml
     with:
@@ -1356,7 +1375,7 @@ jobs:
       - build-and-test-locally
       - check-codestyle-python
       - check-codestyle-rust
-      - promote-images
+      - promote-images-dev
       - test-images
       - trigger-custom-extensions-build-and-wait
     runs-on: ubuntu-22.04
diff --git a/.github/workflows/trigger-e2e-tests.yml b/.github/workflows/trigger-e2e-tests.yml
index 70c2e8549f..31696248b0 100644
--- a/.github/workflows/trigger-e2e-tests.yml
+++ b/.github/workflows/trigger-e2e-tests.yml
@@ -68,7 +68,7 @@ jobs:
       GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
       TAG: ${{ needs.tag.outputs.build-tag }}
     steps:
-      - name: Wait for `promote-images` job to finish
+      - name: Wait for `promote-images-dev` job to finish
         # It's important to have a timeout here, the script in the step can run infinitely
         timeout-minutes: 60
         run: |
@@ -79,17 +79,17 @@ jobs:
           # For PRs we use the run id as the tag
           BUILD_AND_TEST_RUN_ID=${TAG}
           while true; do
-            conclusion=$(gh run --repo ${GITHUB_REPOSITORY} view ${BUILD_AND_TEST_RUN_ID} --json jobs --jq '.jobs[] | select(.name == "promote-images") | .conclusion')
+            conclusion=$(gh run --repo ${GITHUB_REPOSITORY} view ${BUILD_AND_TEST_RUN_ID} --json jobs --jq '.jobs[] | select(.name == "promote-images-dev") | .conclusion')
             case "$conclusion" in
               success)
                 break
                 ;;
               failure | cancelled | skipped)
-                echo "The 'promote-images' job didn't succeed: '${conclusion}'. Exiting..."
+                echo "The 'promote-images-dev' job didn't succeed: '${conclusion}'. Exiting..."
                 exit 1
                 ;;
               *)
-                echo "The 'promote-images' hasn't succeed yet. Waiting..."
+                echo "The 'promote-images-dev' hasn't succeed yet. Waiting..."
                 sleep 60
                 ;;
             esac

From be38123e62b029dcd9f9cc0beb765ad2d3333906 Mon Sep 17 00:00:00 2001
From: Matthias van de Meent <matthias@neon.tech>
Date: Tue, 7 Jan 2025 11:41:52 +0100
Subject: [PATCH 53/63] Fix accounting of dropped prefetched GetPage requests
 (#10276)

Apparently, we failed to do this bookkeeping in quite a few places...

## Problem

Fixes https://github.com/neondatabase/cloud/issues/22364

## Summary of changes

Add accounting of dropped requests. Note that this includes prefetches
dropped due to things like "PS connection dropped unexpectedly" or
"prefetch queue is already full", but *not* (yet?) "dropped due to
backend shutdown".
---
 pgxn/neon/pagestore_smgr.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/pgxn/neon/pagestore_smgr.c b/pgxn/neon/pagestore_smgr.c
index 385905d9ce..b733807026 100644
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -716,6 +716,8 @@ prefetch_on_ps_disconnect(void)
 		MyPState->ring_receive += 1;
 
 		prefetch_set_unused(ring_index);
+		pgBufferUsage.prefetch.expired += 1;
+		MyNeonCounters->getpage_prefetch_discards_total += 1;
 	}
 
 	/*
@@ -935,7 +937,8 @@ Retry:
 					prefetch_set_unused(ring_index);
 					entry = NULL;
 					slot = NULL;
-					MyNeonCounters->getpage_prefetch_discards_total++;
+					pgBufferUsage.prefetch.expired += 1;
+					MyNeonCounters->getpage_prefetch_discards_total += 1;
 				}
 			}
 
@@ -1026,10 +1029,14 @@ Retry:
 						if (!prefetch_wait_for(cleanup_index))
 							goto Retry;
 						prefetch_set_unused(cleanup_index);
+						pgBufferUsage.prefetch.expired += 1;
+						MyNeonCounters->getpage_prefetch_discards_total += 1;
 						break;
 					case PRFS_RECEIVED:
 					case PRFS_TAG_REMAINS:
 						prefetch_set_unused(cleanup_index);
+						pgBufferUsage.prefetch.expired += 1;
+						MyNeonCounters->getpage_prefetch_discards_total += 1;
 						break;
 					default:
 						pg_unreachable();

From 4aa9786c6bffe8094cfc82947504d2044e284d7b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?JC=20Gr=C3=BCnhage?= <jc@neon.tech>
Date: Tue, 7 Jan 2025 14:45:18 +0100
Subject: [PATCH 54/63] Fix promote-images-prod after splitting it out (#10292)

## Problem
`promote-images` was split into `promote-images-dev` and
`promote-images-prod` in
https://github.com/neondatabase/neon/pull/10267.

`dev` credentials were loaded in `promote-images-dev` and `prod`
credentials were loaded in `promote-images-prod`, but
`promote-images-prod` needs `dev` credentials as well to access the
`dev` images to replicate them from `dev` to `prod`.

## Summary of changes
Load `dev` credentials in `promote-images-prod` as well.
---
 .github/workflows/build_and_test.yml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 5c2b397c82..01f5c3ede9 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -979,6 +979,16 @@ jobs:
       VERSIONS: v14 v15 v16 v17
 
     steps:
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          aws-region: eu-central-1
+          role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          role-duration-seconds: 3600
+
+      - name: Login to Amazon Dev ECR
+        uses: aws-actions/amazon-ecr-login@v2
+
       - uses: docker/login-action@v3
         with:
           username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}

From 0a117fb1f1a9c2a062c8a60d53f2eb00637392e8 Mon Sep 17 00:00:00 2001
From: Folke Behrens <folke@neon.tech>
Date: Tue, 7 Jan 2025 16:24:54 +0100
Subject: [PATCH 55/63] proxy: Parse Notification twice only for unknown topic
 (#10296)

## Problem

We currently parse Notification twice even in the happy path.

## Summary of changes

Use `#[serde(other)]` to catch unknown topics and defer the second
parsing.
---
 proxy/src/redis/notifications.rs | 55 ++++++++++++--------------------
 1 file changed, 21 insertions(+), 34 deletions(-)

diff --git a/proxy/src/redis/notifications.rs b/proxy/src/redis/notifications.rs
index 4383d6be2c..bf9d61ded3 100644
--- a/proxy/src/redis/notifications.rs
+++ b/proxy/src/redis/notifications.rs
@@ -37,7 +37,6 @@ struct NotificationHeader<'a> {
 
 #[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
 #[serde(tag = "topic", content = "data")]
-// Message to contributors: Make sure to align these topic names with the list below.
 pub(crate) enum Notification {
     #[serde(
         rename = "/allowed_ips_updated",
@@ -74,21 +73,9 @@ pub(crate) enum Notification {
     PasswordUpdate { password_update: PasswordUpdate },
     #[serde(rename = "/cancel_session")]
     Cancel(CancelSession),
-}
 
-/// Returns true if the topic name given is a known topic that we can deserialize and action on.
-/// Returns false otherwise.
-fn known_topic(s: &str) -> bool {
-    // Message to contributors: Make sure to align these topic names with the enum above.
-    matches!(
-        s,
-        "/allowed_ips_updated"
-            | "/block_public_or_vpc_access_updated"
-            | "/allowed_vpc_endpoints_updated_for_org"
-            | "/allowed_vpc_endpoints_updated_for_projects"
-            | "/password_updated"
-            | "/cancel_session"
-    )
+    #[serde(other, skip_serializing)]
+    UnknownTopic,
 }
 
 #[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
@@ -178,32 +165,29 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
         let payload: String = msg.get_payload()?;
         tracing::debug!(?payload, "received a message payload");
 
-        // For better error handling, we first parse the payload to extract the topic.
-        // If there's a topic we don't support, we can handle that error more gracefully.
-        let header: NotificationHeader = match serde_json::from_str(&payload) {
-            Ok(msg) => msg,
-            Err(e) => {
-                Metrics::get().proxy.redis_errors_total.inc(RedisErrors {
-                    channel: msg.get_channel_name(),
-                });
-                tracing::error!("broken message: {e}");
+        let msg: Notification = match serde_json::from_str(&payload) {
+            Ok(Notification::UnknownTopic) => {
+                match serde_json::from_str::<NotificationHeader>(&payload) {
+                    // don't update the metric for redis errors if it's just a topic we don't know about.
+                    Ok(header) => tracing::warn!(topic = header.topic, "unknown topic"),
+                    Err(e) => {
+                        Metrics::get().proxy.redis_errors_total.inc(RedisErrors {
+                            channel: msg.get_channel_name(),
+                        });
+                        tracing::error!("broken message: {e}");
+                    }
+                };
                 return Ok(());
             }
-        };
-
-        if !known_topic(header.topic) {
-            // don't update the metric for redis errors if it's just a topic we don't know about.
-            tracing::warn!(topic = header.topic, "unknown topic");
-            return Ok(());
-        }
-
-        let msg: Notification = match serde_json::from_str(&payload) {
             Ok(msg) => msg,
             Err(e) => {
                 Metrics::get().proxy.redis_errors_total.inc(RedisErrors {
                     channel: msg.get_channel_name(),
                 });
-                tracing::error!(topic = header.topic, "broken message: {e}");
+                match serde_json::from_str::<NotificationHeader>(&payload) {
+                    Ok(header) => tracing::error!(topic = header.topic, "broken message: {e}"),
+                    Err(_) => tracing::error!("broken message: {e}"),
+                };
                 return Ok(());
             }
         };
@@ -278,6 +262,8 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
                     invalidate_cache(cache, msg);
                 });
             }
+
+            Notification::UnknownTopic => unreachable!(),
         }
 
         Ok(())
@@ -304,6 +290,7 @@ fn invalidate_cache<C: ProjectInfoCache>(cache: Arc<C>, msg: Notification) {
         Notification::AllowedVpcEndpointsUpdatedForProjects { .. } => {
             // https://github.com/neondatabase/neon/pull/10073
         }
+        Notification::UnknownTopic => unreachable!(),
     }
 }
 

From 43a5e575d611c546c9bf5d538a57d8984560589d Mon Sep 17 00:00:00 2001
From: Fedor Dikarev <fedor@neon.tech>
Date: Tue, 7 Jan 2025 21:00:56 +0100
Subject: [PATCH 56/63] ci: use reusable workflow for MacOs build (#9889)

Closes: https://github.com/neondatabase/cloud/issues/17784

## Problem
Currently, we run the whole CI pipeline for any changes. It's slow and
expensive.

## Suggestion
Starting with MacOs builds:
- check what files were changed
- rebuild only needed parts
- reuse results from previous builds when available
- run builds in parallel when possible

---------

Co-authored-by: Alexander Bayandin <alexander@neon.tech>
---
 .github/file-filters.yaml               |  12 ++
 .github/workflows/build-macos.yml       | 241 ++++++++++++++++++++++++
 .github/workflows/neon_extra_builds.yml | 139 ++++----------
 3 files changed, 290 insertions(+), 102 deletions(-)
 create mode 100644 .github/file-filters.yaml
 create mode 100644 .github/workflows/build-macos.yml

diff --git a/.github/file-filters.yaml b/.github/file-filters.yaml
new file mode 100644
index 0000000000..886cd3919a
--- /dev/null
+++ b/.github/file-filters.yaml
@@ -0,0 +1,12 @@
+rust_code: ['**/*.rs', '**/Cargo.toml', '**/Cargo.lock']
+
+v14: ['vendor/postgres-v14/**', 'Makefile', 'pgxn/**']
+v15: ['vendor/postgres-v15/**', 'Makefile', 'pgxn/**']
+v16: ['vendor/postgres-v16/**', 'Makefile', 'pgxn/**']
+v17: ['vendor/postgres-v17/**', 'Makefile', 'pgxn/**']
+
+rebuild_neon_extra:
+    - .github/workflows/neon_extra_builds.yml
+
+rebuild_macos:
+    - .github/workflows/build-macos.yml
diff --git a/.github/workflows/build-macos.yml b/.github/workflows/build-macos.yml
new file mode 100644
index 0000000000..01d82a1ed2
--- /dev/null
+++ b/.github/workflows/build-macos.yml
@@ -0,0 +1,241 @@
+name: Check neon with MacOS builds
+
+on:
+  workflow_call:
+    inputs:
+      pg_versions:
+        description: "Array of the pg versions to build for, for example: ['v14', 'v17']"
+        type: string
+        default: '[]'
+        required: false
+      rebuild_rust_code:
+        description: "Rebuild Rust code"
+        type: boolean
+        default: false
+        required: false
+      rebuild_everything:
+        description: "If true, rebuild for all versions"
+        type: boolean
+        default: false
+        required: false
+
+env:
+  RUST_BACKTRACE: 1
+  COPT: '-Werror'
+
+# TODO: move `check-*` and `files-changed` jobs to the "Caller" Workflow
+# We should care about that as Github has limitations:
+# - You can connect up to four levels of workflows
+# - You can call a maximum of 20 unique reusable workflows from a single workflow file.
+# https://docs.github.com/en/actions/sharing-automations/reusing-workflows#limitations
+jobs:
+  build-pgxn:
+    if: |
+      (inputs.pg_versions != '[]' || inputs.rebuild_everything) && (
+        contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
+        contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
+        github.ref_name == 'main'
+      )
+    timeout-minutes: 30
+    runs-on: macos-15
+    strategy:
+      matrix:
+        postgres-version: ${{ inputs.rebuild_everything && fromJson('["v14", "v15", "v16", "v17"]') || fromJSON(inputs.pg_versions) }}
+    env:
+      # Use release build only, to have less debug info around
+      # Hence keeping target/ (and general cache size) smaller
+      BUILD_TYPE: release
+    steps:
+      - name: Checkout main repo
+        uses: actions/checkout@v4
+
+      - name: Set pg ${{ matrix.postgres-version }} for caching
+        id: pg_rev
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-${{ matrix.postgres-version }}) | tee -a "${GITHUB_OUTPUT}"
+
+      - name: Cache postgres ${{ matrix.postgres-version }} build
+        id: cache_pg
+        uses: actions/cache@v4
+        with:
+          path: pg_install/${{ matrix.postgres-version }}
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ matrix.postgres-version }}-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+
+      - name: Checkout submodule vendor/postgres-${{ matrix.postgres-version }}
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          git submodule init vendor/postgres-${{ matrix.postgres-version }}
+          git submodule update --depth 1 --recursive
+
+      - name: Install build dependencies
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          brew install flex bison openssl protobuf icu4c
+
+      - name: Set extra env for macOS
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
+          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
+
+      - name: Build Postgres ${{ matrix.postgres-version }}
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          make postgres-${{ matrix.postgres-version }} -j$(sysctl -n hw.ncpu)
+
+      - name: Build Neon Pg Ext ${{ matrix.postgres-version }}
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          make "neon-pg-ext-${{ matrix.postgres-version }}" -j$(sysctl -n hw.ncpu)
+
+      - name: Get postgres headers ${{ matrix.postgres-version }}
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          make postgres-headers-${{ matrix.postgres-version }} -j$(sysctl -n hw.ncpu)
+
+  build-walproposer-lib:
+    if: |
+      (inputs.pg_versions != '[]' || inputs.rebuild_everything) && (
+        contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
+        contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
+        github.ref_name == 'main'
+      )
+    timeout-minutes: 30
+    runs-on: macos-15
+    needs: [build-pgxn]
+    env:
+      # Use release build only, to have less debug info around
+      # Hence keeping target/ (and general cache size) smaller
+      BUILD_TYPE: release
+    steps:
+      - name: Checkout main repo
+        uses: actions/checkout@v4
+
+      - name: Set pg v17 for caching
+        id: pg_rev
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) | tee -a "${GITHUB_OUTPUT}"
+
+      - name: Cache postgres v17 build
+        id: cache_pg
+        uses: actions/cache@v4
+        with:
+          path: pg_install/v17
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+
+      - name: Cache walproposer-lib
+        id: cache_walproposer_lib
+        uses: actions/cache@v4
+        with:
+          path: pg_install/build/walproposer-lib
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+
+      - name: Checkout submodule vendor/postgres-v17
+        if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
+        run: |
+          git submodule init vendor/postgres-v17
+          git submodule update --depth 1 --recursive
+
+      - name: Install build dependencies
+        if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
+        run: |
+          brew install flex bison openssl protobuf icu4c
+
+      - name: Set extra env for macOS
+        if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
+        run: |
+          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
+          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
+
+      - name: Build walproposer-lib (only for v17)
+        if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
+        run:
+          make walproposer-lib -j$(sysctl -n hw.ncpu)
+
+  cargo-build:
+    if: |
+      (inputs.pg_versions != '[]' || inputs.rebuild_rust_code || inputs.rebuild_everything) && (
+        contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
+        contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
+        github.ref_name == 'main'
+      )
+    timeout-minutes: 30
+    runs-on: macos-15
+    needs: [build-pgxn, build-walproposer-lib]
+    env:
+      # Use release build only, to have less debug info around
+      # Hence keeping target/ (and general cache size) smaller
+      BUILD_TYPE: release
+    steps:
+      - name: Checkout main repo
+        uses: actions/checkout@v4
+        with:
+          submodules: true
+
+      - name: Set pg v14 for caching
+        id: pg_rev_v14
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) | tee -a "${GITHUB_OUTPUT}"
+      - name: Set pg v15 for caching
+        id: pg_rev_v15
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) | tee -a "${GITHUB_OUTPUT}"
+      - name: Set pg v16 for caching
+        id: pg_rev_v16
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) | tee -a "${GITHUB_OUTPUT}"
+      - name: Set pg v17 for caching
+        id: pg_rev_v17
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) | tee -a "${GITHUB_OUTPUT}"
+
+      - name: Cache postgres v14 build
+        id: cache_pg
+        uses: actions/cache@v4
+        with:
+          path: pg_install/v14
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v14-${{ steps.pg_rev_v14.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+      - name: Cache postgres v15 build
+        id: cache_pg_v15
+        uses: actions/cache@v4
+        with:
+          path: pg_install/v15
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v15-${{ steps.pg_rev_v15.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+      - name: Cache postgres v16 build
+        id: cache_pg_v16
+        uses: actions/cache@v4
+        with:
+          path: pg_install/v16
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v16-${{ steps.pg_rev_v16.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+      - name: Cache postgres v17 build
+        id: cache_pg_v17
+        uses: actions/cache@v4
+        with:
+          path: pg_install/v17
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v17-${{ steps.pg_rev_v17.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+
+      - name: Cache cargo deps (only for v17)
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cargo/registry
+            !~/.cargo/registry/src
+            ~/.cargo/git
+            target
+          key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust
+
+      - name: Cache walproposer-lib
+        id: cache_walproposer_lib
+        uses: actions/cache@v4
+        with:
+          path: pg_install/build/walproposer-lib
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev_v17.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+
+      - name: Install build dependencies
+        run: |
+          brew install flex bison openssl protobuf icu4c
+
+      - name: Set extra env for macOS
+        run: |
+          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
+          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
+
+      - name: Run cargo build (only for v17)
+        run: PQ_LIB_DIR=$(pwd)/pg_install/v17/lib cargo build --all --release -j$(sysctl -n hw.ncpu)
+
+      - name: Check that no warnings are produced (only for v17)
+        run: ./run_clippy.sh
diff --git a/.github/workflows/neon_extra_builds.yml b/.github/workflows/neon_extra_builds.yml
index 1f85c2e102..5b5910badf 100644
--- a/.github/workflows/neon_extra_builds.yml
+++ b/.github/workflows/neon_extra_builds.yml
@@ -31,19 +31,15 @@ jobs:
     uses: ./.github/workflows/build-build-tools-image.yml
     secrets: inherit
 
-  check-macos-build:
-    needs: [ check-permissions ]
-    if: |
-      contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
-      contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
-      github.ref_name == 'main'
-    timeout-minutes: 90
-    runs-on: macos-15
-
-    env:
-      # Use release build only, to have less debug info around
-      # Hence keeping target/ (and general cache size) smaller
-      BUILD_TYPE: release
+  files-changed:
+    name: Detect what files changed
+    runs-on: ubuntu-22.04
+    timeout-minutes: 3
+    outputs:
+      v17: ${{ steps.files_changed.outputs.v17 }}
+      postgres_changes: ${{ steps.postgres_changes.outputs.changes }}
+      rebuild_rust_code: ${{ steps.files_changed.outputs.rust_code }}
+      rebuild_everything: ${{ steps.files_changed.outputs.rebuild_neon_extra || steps.files_changed.outputs.rebuild_macos }}
 
     steps:
       - name: Checkout
@@ -51,106 +47,45 @@ jobs:
         with:
           submodules: true
 
-      - name: Install macOS postgres dependencies
-        run: brew install flex bison openssl protobuf icu4c
-
-      - name: Set pg 14 revision for caching
-        id: pg_v14_rev
-        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
-
-      - name: Set pg 15 revision for caching
-        id: pg_v15_rev
-        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
-
-      - name: Set pg 16 revision for caching
-        id: pg_v16_rev
-        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
-
-      - name: Set pg 17 revision for caching
-        id: pg_v17_rev
-        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) >> $GITHUB_OUTPUT
-
-      - name: Cache postgres v14 build
-        id: cache_pg_14
-        uses: actions/cache@v4
+      - name: Check for Postgres changes
+        uses: dorny/paths-filter@1441771bbfdd59dcd748680ee64ebd8faab1a242  #v3
+        id: files_changed
         with:
-          path: pg_install/v14
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+          token: ${{ github.token }}
+          filters: .github/file-filters.yaml
+          base: ${{ github.event_name != 'pull_request' && (github.event.merge_group.base_ref || github.ref_name) || '' }}
+          ref: ${{ github.event_name != 'pull_request' && (github.event.merge_group.head_ref || github.ref) || '' }}
 
-      - name: Cache postgres v15 build
-        id: cache_pg_15
-        uses: actions/cache@v4
-        with:
-          path: pg_install/v15
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
-
-      - name: Cache postgres v16 build
-        id: cache_pg_16
-        uses: actions/cache@v4
-        with:
-          path: pg_install/v16
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
-
-      - name: Cache postgres v17 build
-        id: cache_pg_17
-        uses: actions/cache@v4
-        with:
-          path: pg_install/v17
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
-
-      - name: Set extra env for macOS
+      - name: Filter out only v-string for build matrix
+        id: postgres_changes
         run: |
-          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
-          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
+          v_strings_only_as_json_array=$(echo ${{ steps.files_changed.outputs.chnages }} | jq '.[]|select(test("v\\d+"))' | jq --slurp -c)
+          echo "changes=${v_strings_only_as_json_array}" | tee -a "${GITHUB_OUTPUT}"
 
-      - name: Cache cargo deps
-        uses: actions/cache@v4
-        with:
-          path: |
-            ~/.cargo/registry
-            !~/.cargo/registry/src
-            ~/.cargo/git
-            target
-          key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust
-
-      - name: Build postgres v14
-        if: steps.cache_pg_14.outputs.cache-hit != 'true'
-        run: make postgres-v14 -j$(sysctl -n hw.ncpu)
-
-      - name: Build postgres v15
-        if: steps.cache_pg_15.outputs.cache-hit != 'true'
-        run: make postgres-v15 -j$(sysctl -n hw.ncpu)
-
-      - name: Build postgres v16
-        if: steps.cache_pg_16.outputs.cache-hit != 'true'
-        run: make postgres-v16 -j$(sysctl -n hw.ncpu)
-
-      - name: Build postgres v17
-        if: steps.cache_pg_17.outputs.cache-hit != 'true'
-        run: make postgres-v17 -j$(sysctl -n hw.ncpu)
-
-      - name: Build neon extensions
-        run: make neon-pg-ext -j$(sysctl -n hw.ncpu)
-
-      - name: Build walproposer-lib
-        run: make walproposer-lib -j$(sysctl -n hw.ncpu)
-
-      - name: Run cargo build
-        run: PQ_LIB_DIR=$(pwd)/pg_install/v16/lib cargo build --all --release
-
-      - name: Check that no warnings are produced
-        run: ./run_clippy.sh
+  check-macos-build:
+    needs: [ check-permissions, files-changed ]
+    if: |
+      contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
+      contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
+      github.ref_name == 'main'
+    uses: ./.github/workflows/build-macos.yml
+    with:
+      pg_versions: ${{ needs.files-changed.outputs.postgres_changes }}
+      rebuild_rust_code: ${{ needs.files-changed.outputs.rebuild_rust_code }}
+      rebuild_everything: ${{ fromJson(needs.files-changed.outputs.rebuild_everything) }}
 
   gather-rust-build-stats:
-    needs: [ check-permissions, build-build-tools-image ]
+    needs: [ check-permissions, build-build-tools-image, files-changed ]
     permissions:
       id-token: write # aws-actions/configure-aws-credentials
       statuses: write
       contents: write
     if: |
-      contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats') ||
-      contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
-      github.ref_name == 'main'
+      (needs.files-changed.outputs.v17 == 'true' || needs.files-changed.outputs.rebuild_everything == 'true') && (
+        contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats') ||
+        contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
+        github.ref_name == 'main'
+      )
     runs-on: [ self-hosted, large ]
     container:
       image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm

From 237dae71a1bd00d1611fb2298b2e3cb13883155b Mon Sep 17 00:00:00 2001
From: Erik Grinaker <erik@neon.tech>
Date: Tue, 7 Jan 2025 23:49:00 +0100
Subject: [PATCH 57/63] Revert "pageserver,safekeeper: disable heap profiling
 (#10268)" (#10303)

This reverts commit b33299dc37d9269fe55bd3256b7a4a72c129b81c.

Heap profiles weren't the culprit after all.

Touches #10225.
---
 pageserver/src/bin/pageserver.rs | 10 ++++------
 safekeeper/src/bin/safekeeper.rs | 10 ++++------
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs
index b92ff4ebf9..567a69da3b 100644
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -53,12 +53,10 @@ project_build_tag!(BUILD_TAG);
 #[global_allocator]
 static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
 
-// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20).
-// TODO: disabled because concurrent CPU profiles cause seg faults. See:
-// https://github.com/neondatabase/neon/issues/10225.
-//#[allow(non_upper_case_globals)]
-//#[export_name = "malloc_conf"]
-//pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";
+/// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20).
+#[allow(non_upper_case_globals)]
+#[export_name = "malloc_conf"]
+pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";
 
 const PID_FILE_NAME: &str = "pageserver.pid";
 
diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs
index e0ba38d638..13f6e34575 100644
--- a/safekeeper/src/bin/safekeeper.rs
+++ b/safekeeper/src/bin/safekeeper.rs
@@ -51,12 +51,10 @@ use utils::{
 #[global_allocator]
 static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
 
-// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20).
-// TODO: disabled because concurrent CPU profiles cause seg faults. See:
-// https://github.com/neondatabase/neon/issues/10225.
-//#[allow(non_upper_case_globals)]
-//#[export_name = "malloc_conf"]
-//pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";
+/// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20).
+#[allow(non_upper_case_globals)]
+#[export_name = "malloc_conf"]
+pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";
 
 const PID_FILE_NAME: &str = "safekeeper.pid";
 const ID_FILE_NAME: &str = "safekeeper.id";

From 5c76e2a983295f3123631b0178309a942f584596 Mon Sep 17 00:00:00 2001
From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com>
Date: Tue, 7 Jan 2025 18:24:17 -0500
Subject: [PATCH 58/63] fix(storage-scrubber): ignore errors if index_part is
 not consistent (#10304)

## Problem

Consider the pageserver is doing the following sequence of operations:

* upload X files
* update index_part to add X and remove Y
* delete Y files

When storage scrubber obtains the initial timeline snapshot before
"update index_part" (that is the old version that contains Y but not X),
and then obtains the index_part file after it gets updated, it will
report all Y files are missing.

## Summary of changes

Do not report layer file missing if index_part listed and downloaded are
not the same (i.e. different last_modified times)

Signed-off-by: Alex Chi Z <chi@neon.tech>
---
 storage_scrubber/src/checks.rs                | 24 +++++++++++++------
 storage_scrubber/src/lib.rs                   |  7 +++---
 .../src/pageserver_physical_gc.rs             |  6 ++++-
 .../src/scan_pageserver_metadata.rs           | 10 ++++++--
 storage_scrubber/src/tenant_snapshot.rs       |  2 ++
 5 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/storage_scrubber/src/checks.rs b/storage_scrubber/src/checks.rs
index 32c86052ef..b42709868b 100644
--- a/storage_scrubber/src/checks.rs
+++ b/storage_scrubber/src/checks.rs
@@ -1,4 +1,5 @@
 use std::collections::{HashMap, HashSet};
+use std::time::SystemTime;
 
 use itertools::Itertools;
 use pageserver::tenant::checks::check_valid_layermap;
@@ -88,9 +89,14 @@ pub(crate) async fn branch_cleanup_and_check_errors(
             match s3_data.blob_data {
                 BlobDataParseResult::Parsed {
                     index_part,
-                    index_part_generation: _index_part_generation,
-                    s3_layers: _s3_layers,
+                    index_part_generation: _,
+                    s3_layers: _,
+                    index_part_last_modified_time,
+                    index_part_snapshot_time,
                 } => {
+                    // Ignore missing file error if index_part downloaded is different from the one when listing the layer files.
+                    let ignore_error = index_part_snapshot_time < index_part_last_modified_time
+                        && !cfg!(debug_assertions);
                     if !IndexPart::KNOWN_VERSIONS.contains(&index_part.version()) {
                         result
                             .errors
@@ -171,7 +177,7 @@ pub(crate) async fn branch_cleanup_and_check_errors(
                                     is_l0,
                                 );
 
-                                if is_l0 {
+                                if is_l0 || ignore_error {
                                     result.warnings.push(msg);
                                 } else {
                                     result.errors.push(msg);
@@ -308,6 +314,8 @@ pub(crate) enum BlobDataParseResult {
     Parsed {
         index_part: Box<IndexPart>,
         index_part_generation: Generation,
+        index_part_last_modified_time: SystemTime,
+        index_part_snapshot_time: SystemTime,
         s3_layers: HashSet<(LayerName, Generation)>,
     },
     /// The remains of an uncleanly deleted Timeline or aborted timeline creation(e.g. an initdb archive only, or some layer without an index)
@@ -484,9 +492,9 @@ async fn list_timeline_blobs_impl(
     }
 
     if let Some(index_part_object_key) = index_part_object.as_ref() {
-        let index_part_bytes =
+        let (index_part_bytes, index_part_last_modified_time) =
             match download_object_with_retries(remote_client, &index_part_object_key.key).await {
-                Ok(index_part_bytes) => index_part_bytes,
+                Ok(data) => data,
                 Err(e) => {
                     // It is possible that the branch gets deleted in-between we list the objects
                     // and we download the index part file.
@@ -500,7 +508,7 @@ async fn list_timeline_blobs_impl(
                     ));
                 }
             };
-
+        let index_part_snapshot_time = index_part_object_key.last_modified;
         match serde_json::from_slice(&index_part_bytes) {
             Ok(index_part) => {
                 return Ok(ListTimelineBlobsResult::Ready(RemoteTimelineBlobData {
@@ -508,6 +516,8 @@ async fn list_timeline_blobs_impl(
                         index_part: Box::new(index_part),
                         index_part_generation,
                         s3_layers,
+                        index_part_last_modified_time,
+                        index_part_snapshot_time,
                     },
                     unused_index_keys: index_part_keys,
                     unknown_keys,
@@ -625,7 +635,7 @@ pub(crate) async fn list_tenant_manifests(
 
     let manifest_bytes =
         match download_object_with_retries(remote_client, &latest_listing_object.key).await {
-            Ok(bytes) => bytes,
+            Ok((bytes, _)) => bytes,
             Err(e) => {
                 // It is possible that the tenant gets deleted in-between we list the objects
                 // and we download the manifest file.
diff --git a/storage_scrubber/src/lib.rs b/storage_scrubber/src/lib.rs
index be526daaf0..224235098c 100644
--- a/storage_scrubber/src/lib.rs
+++ b/storage_scrubber/src/lib.rs
@@ -13,7 +13,7 @@ pub mod tenant_snapshot;
 use std::env;
 use std::fmt::Display;
 use std::sync::Arc;
-use std::time::Duration;
+use std::time::{Duration, SystemTime};
 
 use anyhow::Context;
 use aws_config::retry::{RetryConfigBuilder, RetryMode};
@@ -509,10 +509,11 @@ async fn list_objects_with_retries(
     panic!("MAX_RETRIES is not allowed to be 0");
 }
 
+/// Returns content, last modified time
 async fn download_object_with_retries(
     remote_client: &GenericRemoteStorage,
     key: &RemotePath,
-) -> anyhow::Result<Vec<u8>> {
+) -> anyhow::Result<(Vec<u8>, SystemTime)> {
     let cancel = CancellationToken::new();
     for trial in 0..MAX_RETRIES {
         let mut buf = Vec::new();
@@ -535,7 +536,7 @@ async fn download_object_with_retries(
         {
             Ok(bytes_read) => {
                 tracing::debug!("Downloaded {bytes_read} bytes for object {key}");
-                return Ok(buf);
+                return Ok((buf, download.last_modified));
             }
             Err(e) => {
                 error!("Failed to stream object body for key {key}: {e}");
diff --git a/storage_scrubber/src/pageserver_physical_gc.rs b/storage_scrubber/src/pageserver_physical_gc.rs
index d19b8a5f91..a997373375 100644
--- a/storage_scrubber/src/pageserver_physical_gc.rs
+++ b/storage_scrubber/src/pageserver_physical_gc.rs
@@ -450,6 +450,8 @@ async fn gc_ancestor(
                 index_part: _,
                 index_part_generation: _,
                 s3_layers,
+                index_part_last_modified_time: _,
+                index_part_snapshot_time: _,
             } => s3_layers,
             BlobDataParseResult::Relic => {
                 // Post-deletion tenant location: don't try and GC it.
@@ -586,7 +588,9 @@ async fn gc_timeline(
         BlobDataParseResult::Parsed {
             index_part,
             index_part_generation,
-            s3_layers: _s3_layers,
+            s3_layers: _,
+            index_part_last_modified_time: _,
+            index_part_snapshot_time: _,
         } => (index_part, *index_part_generation, data.unused_index_keys),
         BlobDataParseResult::Relic => {
             // Post-deletion tenant location: don't try and GC it.
diff --git a/storage_scrubber/src/scan_pageserver_metadata.rs b/storage_scrubber/src/scan_pageserver_metadata.rs
index c8de6e46b3..a31fb5b242 100644
--- a/storage_scrubber/src/scan_pageserver_metadata.rs
+++ b/storage_scrubber/src/scan_pageserver_metadata.rs
@@ -47,6 +47,8 @@ impl MetadataSummary {
             index_part,
             index_part_generation: _,
             s3_layers: _,
+            index_part_last_modified_time: _,
+            index_part_snapshot_time: _,
         } = &data.blob_data
         {
             *self
@@ -195,7 +197,9 @@ pub async fn scan_pageserver_metadata(
                     if let BlobDataParseResult::Parsed {
                         index_part,
                         index_part_generation,
-                        s3_layers: _s3_layers,
+                        s3_layers: _,
+                        index_part_last_modified_time: _,
+                        index_part_snapshot_time: _,
                     } = &data.blob_data
                     {
                         if index_part.deleted_at.is_some() {
@@ -318,9 +322,11 @@ pub async fn scan_pageserver_metadata(
 
         match &data.blob_data {
             BlobDataParseResult::Parsed {
-                index_part: _index_part,
+                index_part: _,
                 index_part_generation: _index_part_generation,
                 s3_layers,
+                index_part_last_modified_time: _,
+                index_part_snapshot_time: _,
             } => {
                 tenant_objects.push(ttid, s3_layers.clone());
             }
diff --git a/storage_scrubber/src/tenant_snapshot.rs b/storage_scrubber/src/tenant_snapshot.rs
index 39e0b5c9b4..60e79fb859 100644
--- a/storage_scrubber/src/tenant_snapshot.rs
+++ b/storage_scrubber/src/tenant_snapshot.rs
@@ -268,6 +268,8 @@ impl SnapshotDownloader {
                         index_part,
                         index_part_generation,
                         s3_layers: _,
+                        index_part_last_modified_time: _,
+                        index_part_snapshot_time: _,
                     } => {
                         self.download_timeline(
                             ttid,

From dc284247a5b0d4fd442868c9dc555dd0ab50c0c3 Mon Sep 17 00:00:00 2001
From: Vlad Lazar <vlad@neon.tech>
Date: Wed, 8 Jan 2025 10:26:53 +0000
Subject: [PATCH 59/63] storage_controller: fix node flap detach race (#10298)

## Problem

The observed state removal may race with the inline updates of the
observed state done from `Service::node_activate_reconcile`.

This was intended to work as follows:
1. Detaches while the node is unavailable remove the entry from the
   observed state.
2. `Service::node_activate_reconcile` diffs the locations returned
   by the pageserver with the observed state and detaches in-line
   when required.

## Summary of changes

This PR removes step (1) and lets background reconciliations
deal with the mismatch between the intent and observed state.
A follow up will attempt to remove `Service::node_activate_reconcile`
altogether.

Closes https://github.com/neondatabase/neon/issues/10253
---
 storage_controller/src/reconciler.rs          |  16 ++-
 storage_controller/src/service.rs             |   4 +
 test_runner/fixtures/neon_fixtures.py         |   7 +-
 .../regress/test_storage_controller.py        | 122 +++++++++++++++++-
 4 files changed, 139 insertions(+), 10 deletions(-)

diff --git a/storage_controller/src/reconciler.rs b/storage_controller/src/reconciler.rs
index 475f91eff4..e0a854fff7 100644
--- a/storage_controller/src/reconciler.rs
+++ b/storage_controller/src/reconciler.rs
@@ -14,7 +14,6 @@ use std::sync::Arc;
 use std::time::{Duration, Instant};
 use tokio_util::sync::CancellationToken;
 use utils::backoff::exponential_backoff;
-use utils::failpoint_support;
 use utils::generation::Generation;
 use utils::id::{NodeId, TimelineId};
 use utils::lsn::Lsn;
@@ -212,11 +211,12 @@ impl Reconciler {
         lazy: bool,
     ) -> Result<(), ReconcileError> {
         if !node.is_available() && config.mode == LocationConfigMode::Detached {
-            // Attempts to detach from offline nodes may be imitated without doing I/O: a node which is offline
-            // will get fully reconciled wrt the shard's intent state when it is reactivated, irrespective of
-            // what we put into `observed`, in [`crate::service::Service::node_activate_reconcile`]
-            tracing::info!("Node {node} is unavailable during detach: proceeding anyway, it will be detached on next activation");
-            self.observed.locations.remove(&node.get_id());
+            // [`crate::service::Service::node_activate_reconcile`] will update the observed state
+            // when the node comes back online. At that point, the intent and observed states will
+            // be mismatched and a background reconciliation will detach.
+            tracing::info!(
+                "Node {node} is unavailable during detach: proceeding anyway, it will be detached via background reconciliation"
+            );
             return Ok(());
         }
 
@@ -749,6 +749,8 @@ impl Reconciler {
                     };
 
                     if increment_generation {
+                        pausable_failpoint!("reconciler-pre-increment-generation");
+
                         let generation = self
                             .persistence
                             .increment_generation(self.tenant_shard_id, node.get_id())
@@ -824,7 +826,7 @@ impl Reconciler {
                 .handle_detach(self.tenant_shard_id, self.shard.stripe_size);
         }
 
-        failpoint_support::sleep_millis_async!("sleep-on-reconcile-epilogue");
+        pausable_failpoint!("reconciler-epilogue");
 
         Ok(())
     }
diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs
index 222cb9fdd4..359fcb3288 100644
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -83,6 +83,7 @@ use utils::{
     generation::Generation,
     http::error::ApiError,
     id::{NodeId, TenantId, TimelineId},
+    pausable_failpoint,
     sync::gate::Gate,
 };
 
@@ -1024,6 +1025,8 @@ impl Service {
                     )
                     .await;
 
+                    pausable_failpoint!("heartbeat-pre-node-state-configure");
+
                     // This is the code path for geniune availability transitions (i.e node
                     // goes unavailable and/or comes back online).
                     let res = self
@@ -2492,6 +2495,7 @@ impl Service {
                 // Persist updates
                 // Ordering: write to the database before applying changes in-memory, so that
                 // we will not appear time-travel backwards on a restart.
+
                 let mut schedule_context = ScheduleContext::default();
                 for ShardUpdate {
                     tenant_shard_id,
diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py
index 8fd9eec8ce..00fdda2998 100644
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -2521,6 +2521,7 @@ class NeonPageserver(PgProtocol, LogUtils):
         self,
         extra_env_vars: dict[str, str] | None = None,
         timeout_in_seconds: int | None = None,
+        await_active: bool = True,
     ) -> Self:
         """
         Start the page server.
@@ -2547,8 +2548,10 @@ class NeonPageserver(PgProtocol, LogUtils):
         )
         self.running = True
 
-        if self.env.storage_controller.running and self.env.storage_controller.node_registered(
-            self.id
+        if (
+            await_active
+            and self.env.storage_controller.running
+            and self.env.storage_controller.node_registered(self.id)
         ):
             self.env.storage_controller.poll_node_status(
                 self.id, PageserverAvailability.ACTIVE, None, max_attempts=200, backoff=0.1
diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py
index 7062c35e05..973d0cdf82 100644
--- a/test_runner/regress/test_storage_controller.py
+++ b/test_runner/regress/test_storage_controller.py
@@ -17,6 +17,7 @@ from fixtures.compute_reconfigure import ComputeReconfigure
 from fixtures.log_helper import log
 from fixtures.neon_fixtures import (
     DEFAULT_AZ_ID,
+    LogCursor,
     NeonEnv,
     NeonEnvBuilder,
     NeonPageserver,
@@ -2406,7 +2407,14 @@ def test_storage_controller_step_down(neon_env_builder: NeonEnvBuilder):
     env.storage_controller.tenant_create(tid)
 
     env.storage_controller.reconcile_until_idle()
-    env.storage_controller.configure_failpoints(("sleep-on-reconcile-epilogue", "return(10000)"))
+    env.storage_controller.configure_failpoints(("reconciler-epilogue", "pause"))
+
+    def unpause_failpoint():
+        time.sleep(2)
+        env.storage_controller.configure_failpoints(("reconciler-epilogue", "off"))
+
+    thread = threading.Thread(target=unpause_failpoint)
+    thread.start()
 
     # Make a change to the tenant config to trigger a slow reconcile
     virtual_ps_http = PageserverHttpClient(env.storage_controller_port, lambda: True)
@@ -2421,6 +2429,8 @@ def test_storage_controller_step_down(neon_env_builder: NeonEnvBuilder):
     observed_state = env.storage_controller.step_down()
     log.info(f"Storage controller stepped down with {observed_state=}")
 
+    thread.join()
+
     # Validate that we waited for the slow reconcile to complete
     # and updated the observed state in the storcon before stepping down.
     node_id = str(env.pageserver.id)
@@ -3294,3 +3304,113 @@ def test_storage_controller_detached_stopped(
 
     # Confirm the detach happened
     assert env.pageserver.http_client().tenant_list_locations()["tenant_shards"] == []
+
+
+@run_only_on_default_postgres("Postgres version makes no difference here")
+def test_storage_controller_node_flap_detach_race(
+    neon_env_builder: NeonEnvBuilder,
+):
+    """
+    Reproducer for https://github.com/neondatabase/neon/issues/10253.
+
+    When a node's availability flaps, the reconciliations spawned by the node
+    going offline may race with the reconciliation done when then node comes
+    back online.
+    """
+    neon_env_builder.num_pageservers = 4
+
+    env = neon_env_builder.init_configs()
+    env.start()
+
+    tenant_id = TenantId.generate()
+    env.storage_controller.tenant_create(
+        tenant_id,
+        shard_count=2,
+    )
+    env.storage_controller.reconcile_until_idle()
+
+    stopped_nodes = [s["node_id"] for s in env.storage_controller.locate(tenant_id)]
+
+    def has_hit_failpoint(failpoint: str, offset: LogCursor | None = None) -> LogCursor:
+        res = env.storage_controller.log_contains(f"at failpoint {failpoint}", offset=offset)
+        assert res
+        return res[1]
+
+    # Stop the nodes which host attached shards.
+    # This will trigger reconciliations which pause before incrmenenting the generation,
+    # and, more importantly, updating the `generation_pageserver` of the shards.
+    env.storage_controller.configure_failpoints(("reconciler-pre-increment-generation", "pause"))
+    for node_id in stopped_nodes:
+        env.get_pageserver(node_id).stop(immediate=True)
+
+    def failure_handled() -> LogCursor:
+        stop_offset = None
+
+        for node_id in stopped_nodes:
+            res = env.storage_controller.log_contains(f"node {node_id} going offline")
+            assert res
+            stop_offset = res[1]
+
+        assert stop_offset
+        return stop_offset
+
+    offset = wait_until(failure_handled)
+
+    # Now restart the nodes and make them pause before marking themselves as available
+    # or running the activation reconciliation.
+    env.storage_controller.configure_failpoints(("heartbeat-pre-node-state-configure", "pause"))
+
+    for node_id in stopped_nodes:
+        env.get_pageserver(node_id).start(await_active=False)
+
+    offset = wait_until(
+        lambda: has_hit_failpoint("heartbeat-pre-node-state-configure", offset=offset)
+    )
+
+    # The nodes have restarted and are waiting to perform activaction reconciliation.
+    # Unpause the initial reconciliation triggered by the nodes going offline.
+    # It will attempt to detach from the old location, but notice that the old location
+    # is not yet available, and then stop before processing the results of the reconciliation.
+    env.storage_controller.configure_failpoints(("reconciler-epilogue", "pause"))
+    env.storage_controller.configure_failpoints(("reconciler-pre-increment-generation", "off"))
+
+    offset = wait_until(lambda: has_hit_failpoint("reconciler-epilogue", offset=offset))
+
+    # Let the nodes perform activation reconciliation while still holding up processing the result
+    # from the initial reconcile triggered by going offline.
+    env.storage_controller.configure_failpoints(("heartbeat-pre-node-state-configure", "off"))
+
+    def activate_reconciliation_done():
+        for node_id in stopped_nodes:
+            assert env.storage_controller.log_contains(
+                f"Node {node_id} transition to active", offset=offset
+            )
+
+    wait_until(activate_reconciliation_done)
+
+    # Finally, allow the initial reconcile to finish up.
+    env.storage_controller.configure_failpoints(("reconciler-epilogue", "off"))
+
+    # Give things a chance to settle and validate that no stale locations exist
+    env.storage_controller.reconcile_until_idle()
+
+    def validate_locations():
+        shard_locations = defaultdict(list)
+        for ps in env.pageservers:
+            locations = ps.http_client().tenant_list_locations()["tenant_shards"]
+            for loc in locations:
+                shard_locations[loc[0]].append(
+                    {"generation": loc[1]["generation"], "mode": loc[1]["mode"], "node": ps.id}
+                )
+
+        log.info(f"Shard locations: {shard_locations}")
+
+        attached_locations = {
+            k: list(filter(lambda loc: loc["mode"] == "AttachedSingle", v))
+            for k, v in shard_locations.items()
+        }
+
+        for shard, locs in attached_locations.items():
+            assert len(locs) == 1, f"{shard} has {len(locs)} attached locations"
+
+    wait_until(validate_locations, timeout=10)

From 68d8acfd058b7b2d0deb041d14252f17d50ad05f Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Wed, 8 Jan 2025 18:12:09 +0000
Subject: [PATCH 60/63] storage controller: don't hold detached tenants in
 memory (#10264)

## Problem

Typical deployments of neon have some tenants that stay in use
continuously, and a background churning population of tenants that are
created and then fall idle, and are configured to Detached state.
Currently, this churn of short lived tenants results in an
ever-increasing memory footprint.

Closes: https://github.com/neondatabase/neon/issues/9712

## Summary of changes

- At startup, filter to only load shards that don't have Detached policy
- In process_result, check if a tenant's shards are all Detached and
observed=={}, and if so drop them from memory
- In tenant_location_conf and other tenant mutators, load the tenants'
shards on-demand if they are not present
---
 storage_controller/src/id_lock_map.rs         |   8 +
 storage_controller/src/persistence.rs         |  34 ++-
 storage_controller/src/service.rs             | 195 +++++++++++++++---
 storage_controller/src/tenant_shard.rs        |   4 +
 .../regress/test_storage_controller.py        | 105 +++++++++-
 5 files changed, 318 insertions(+), 28 deletions(-)

diff --git a/storage_controller/src/id_lock_map.rs b/storage_controller/src/id_lock_map.rs
index fcd3eb57e2..2d8b674f86 100644
--- a/storage_controller/src/id_lock_map.rs
+++ b/storage_controller/src/id_lock_map.rs
@@ -112,6 +112,14 @@ where
         }
     }
 
+    pub(crate) fn try_exclusive(&self, key: T, operation: I) -> Option<TracingExclusiveGuard<I>> {
+        let mut locked = self.entities.lock().unwrap();
+        let entry = locked.entry(key).or_default().clone();
+        let mut guard = TracingExclusiveGuard::new(entry.try_write_owned().ok()?);
+        *guard.guard = Some(operation);
+        Some(guard)
+    }
+
     /// Rather than building a lock guard that re-takes the [`Self::entities`] lock, we just do
     /// periodic housekeeping to avoid the map growing indefinitely
     pub(crate) fn housekeeping(&self) {
diff --git a/storage_controller/src/persistence.rs b/storage_controller/src/persistence.rs
index cc377e606e..c5eb106f24 100644
--- a/storage_controller/src/persistence.rs
+++ b/storage_controller/src/persistence.rs
@@ -97,6 +97,7 @@ pub(crate) enum DatabaseOperation {
     TenantGenerations,
     ShardGenerations,
     ListTenantShards,
+    LoadTenant,
     InsertTenantShards,
     UpdateTenantShard,
     DeleteTenant,
@@ -330,11 +331,40 @@ impl Persistence {
 
     /// At startup, load the high level state for shards, such as their config + policy.  This will
     /// be enriched at runtime with state discovered on pageservers.
-    pub(crate) async fn list_tenant_shards(&self) -> DatabaseResult<Vec<TenantShardPersistence>> {
+    ///
+    /// We exclude shards configured to be detached.  During startup, if we see any attached locations
+    /// for such shards, they will automatically be detached as 'orphans'.
+    pub(crate) async fn load_active_tenant_shards(
+        &self,
+    ) -> DatabaseResult<Vec<TenantShardPersistence>> {
+        use crate::schema::tenant_shards::dsl::*;
         self.with_measured_conn(
             DatabaseOperation::ListTenantShards,
             move |conn| -> DatabaseResult<_> {
-                Ok(crate::schema::tenant_shards::table.load::<TenantShardPersistence>(conn)?)
+                let query = tenant_shards.filter(
+                    placement_policy.ne(serde_json::to_string(&PlacementPolicy::Detached).unwrap()),
+                );
+                let result = query.load::<TenantShardPersistence>(conn)?;
+
+                Ok(result)
+            },
+        )
+        .await
+    }
+
+    /// When restoring a previously detached tenant into memory, load it from the database
+    pub(crate) async fn load_tenant(
+        &self,
+        filter_tenant_id: TenantId,
+    ) -> DatabaseResult<Vec<TenantShardPersistence>> {
+        use crate::schema::tenant_shards::dsl::*;
+        self.with_measured_conn(
+            DatabaseOperation::LoadTenant,
+            move |conn| -> DatabaseResult<_> {
+                let query = tenant_shards.filter(tenant_id.eq(filter_tenant_id.to_string()));
+                let result = query.load::<TenantShardPersistence>(conn)?;
+
+                Ok(result)
             },
         )
         .await
diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs
index 359fcb3288..fd4ee7fd10 100644
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -155,6 +155,7 @@ enum TenantOperations {
     TimelineArchivalConfig,
     TimelineDetachAncestor,
     TimelineGcBlockUnblock,
+    DropDetached,
 }
 
 #[derive(Clone, strum_macros::Display)]
@@ -416,8 +417,8 @@ pub struct Service {
     /// Queue of tenants who are waiting for concurrency limits to permit them to reconcile
     /// Send into this queue to promptly attempt to reconcile this shard next time units are available.
     ///
-    /// Note that this state logically lives inside ServiceInner, but carrying Sender here makes the code simpler
-    /// by avoiding needing a &mut ref to something inside the ServiceInner.  This could be optimized to
+    /// Note that this state logically lives inside ServiceState, but carrying Sender here makes the code simpler
+    /// by avoiding needing a &mut ref to something inside the ServiceState.  This could be optimized to
     /// use a VecDeque instead of a channel to reduce synchronization overhead, at the cost of some code complexity.
     delayed_reconcile_tx: tokio::sync::mpsc::Sender<TenantShardId>,
 
@@ -1165,6 +1166,20 @@ impl Service {
             }
         }
 
+        // If we just finished detaching all shards for a tenant, it might be time to drop it from memory.
+        if tenant.policy == PlacementPolicy::Detached {
+            // We may only drop a tenant from memory while holding the exclusive lock on the tenant ID: this protects us
+            // from concurrent execution wrt a request handler that might expect the tenant to remain in memory for the
+            // duration of the request.
+            let guard = self.tenant_op_locks.try_exclusive(
+                tenant.tenant_shard_id.tenant_id,
+                TenantOperations::DropDetached,
+            );
+            if let Some(guard) = guard {
+                self.maybe_drop_tenant(tenant.tenant_shard_id.tenant_id, &mut locked, &guard);
+            }
+        }
+
         // Maybe some other work can proceed now that this job finished.
         if self.reconciler_concurrency.available_permits() > 0 {
             while let Ok(tenant_shard_id) = locked.delayed_reconcile_rx.try_recv() {
@@ -1294,7 +1309,7 @@ impl Service {
             .set(nodes.len() as i64);
 
         tracing::info!("Loading shards from database...");
-        let mut tenant_shard_persistence = persistence.list_tenant_shards().await?;
+        let mut tenant_shard_persistence = persistence.load_active_tenant_shards().await?;
         tracing::info!(
             "Loaded {} shards from database.",
             tenant_shard_persistence.len()
@@ -1546,8 +1561,14 @@ impl Service {
         // the pageserver API (not via this service), we will auto-create any missing tenant
         // shards with default state.
         let insert = {
-            let locked = self.inner.write().unwrap();
-            !locked.tenants.contains_key(&attach_req.tenant_shard_id)
+            match self
+                .maybe_load_tenant(attach_req.tenant_shard_id.tenant_id, &_tenant_lock)
+                .await
+            {
+                Ok(_) => false,
+                Err(ApiError::NotFound(_)) => true,
+                Err(e) => return Err(e.into()),
+            }
         };
 
         if insert {
@@ -2439,6 +2460,99 @@ impl Service {
         }
     }
 
+    /// For APIs that might act on tenants with [`PlacementPolicy::Detached`], first check if
+    /// the tenant is present in memory. If not, load it from the database.  If it is found
+    /// in neither location, return a NotFound error.
+    ///
+    /// Caller must demonstrate they hold a lock guard, as otherwise two callers might try and load
+    /// it at the same time, or we might race with [`Self::maybe_drop_tenant`]
+    async fn maybe_load_tenant(
+        &self,
+        tenant_id: TenantId,
+        _guard: &TracingExclusiveGuard<TenantOperations>,
+    ) -> Result<(), ApiError> {
+        let present_in_memory = {
+            let locked = self.inner.read().unwrap();
+            locked
+                .tenants
+                .range(TenantShardId::tenant_range(tenant_id))
+                .next()
+                .is_some()
+        };
+
+        if present_in_memory {
+            return Ok(());
+        }
+
+        let tenant_shards = self.persistence.load_tenant(tenant_id).await?;
+        if tenant_shards.is_empty() {
+            return Err(ApiError::NotFound(
+                anyhow::anyhow!("Tenant {} not found", tenant_id).into(),
+            ));
+        }
+
+        // TODO: choose a fresh AZ to use for this tenant when un-detaching: there definitely isn't a running
+        // compute, so no benefit to making AZ sticky across detaches.
+
+        let mut locked = self.inner.write().unwrap();
+        tracing::info!(
+            "Loaded {} shards for tenant {}",
+            tenant_shards.len(),
+            tenant_id
+        );
+
+        locked.tenants.extend(tenant_shards.into_iter().map(|p| {
+            let intent = IntentState::new();
+            let shard =
+                TenantShard::from_persistent(p, intent).expect("Corrupt shard row in database");
+
+            // Sanity check: when loading on-demand, we should always be loaded something Detached
+            debug_assert!(shard.policy == PlacementPolicy::Detached);
+            if shard.policy != PlacementPolicy::Detached {
+                tracing::error!(
+                    "Tenant shard {} loaded on-demand, but has non-Detached policy {:?}",
+                    shard.tenant_shard_id,
+                    shard.policy
+                );
+            }
+
+            (shard.tenant_shard_id, shard)
+        }));
+
+        Ok(())
+    }
+
+    /// If all shards for a tenant are detached, and in a fully quiescent state (no observed locations on pageservers),
+    /// and have no reconciler running, then we can drop the tenant from memory.  It will be reloaded on-demand
+    /// if we are asked to attach it again (see [`Self::maybe_load_tenant`]).
+    ///
+    /// Caller must demonstrate they hold a lock guard, as otherwise it is unsafe to drop a tenant from
+    /// memory while some other function might assume it continues to exist while not holding the lock on Self::inner.
+    fn maybe_drop_tenant(
+        &self,
+        tenant_id: TenantId,
+        locked: &mut std::sync::RwLockWriteGuard<ServiceState>,
+        _guard: &TracingExclusiveGuard<TenantOperations>,
+    ) {
+        let mut tenant_shards = locked.tenants.range(TenantShardId::tenant_range(tenant_id));
+        if tenant_shards.all(|(_id, shard)| {
+            shard.policy == PlacementPolicy::Detached
+                && shard.reconciler.is_none()
+                && shard.observed.is_empty()
+        }) {
+            let keys = locked
+                .tenants
+                .range(TenantShardId::tenant_range(tenant_id))
+                .map(|(id, _)| id)
+                .copied()
+                .collect::<Vec<_>>();
+            for key in keys {
+                tracing::info!("Dropping detached tenant shard {} from memory", key);
+                locked.tenants.remove(&key);
+            }
+        }
+    }
+
     /// This API is used by the cloud control plane to migrate unsharded tenants that it created
     /// directly with pageservers into this service.
     ///
@@ -2465,14 +2579,26 @@ impl Service {
         )
         .await;
 
-        if !tenant_shard_id.is_unsharded() {
+        let tenant_id = if !tenant_shard_id.is_unsharded() {
             return Err(ApiError::BadRequest(anyhow::anyhow!(
                 "This API is for importing single-sharded or unsharded tenants"
             )));
-        }
+        } else {
+            tenant_shard_id.tenant_id
+        };
+
+        // In case we are waking up a Detached tenant
+        match self.maybe_load_tenant(tenant_id, &_tenant_lock).await {
+            Ok(()) | Err(ApiError::NotFound(_)) => {
+                // This is a creation or an update
+            }
+            Err(e) => {
+                return Err(e);
+            }
+        };
 
         // First check if this is a creation or an update
-        let create_or_update = self.tenant_location_config_prepare(tenant_shard_id.tenant_id, req);
+        let create_or_update = self.tenant_location_config_prepare(tenant_id, req);
 
         let mut result = TenantLocationConfigResponse {
             shards: Vec::new(),
@@ -2600,6 +2726,8 @@ impl Service {
         let tenant_id = req.tenant_id;
         let patch = req.config;
 
+        self.maybe_load_tenant(tenant_id, &_tenant_lock).await?;
+
         let base = {
             let locked = self.inner.read().unwrap();
             let shards = locked
@@ -2644,19 +2772,7 @@ impl Service {
         )
         .await;
 
-        let tenant_exists = {
-            let locked = self.inner.read().unwrap();
-            let mut r = locked
-                .tenants
-                .range(TenantShardId::tenant_range(req.tenant_id));
-            r.next().is_some()
-        };
-
-        if !tenant_exists {
-            return Err(ApiError::NotFound(
-                anyhow::anyhow!("Tenant {} not found", req.tenant_id).into(),
-            ));
-        }
+        self.maybe_load_tenant(req.tenant_id, &_tenant_lock).await?;
 
         self.set_tenant_config_and_reconcile(req.tenant_id, req.config)
             .await
@@ -2949,6 +3065,8 @@ impl Service {
         let _tenant_lock =
             trace_exclusive_lock(&self.tenant_op_locks, tenant_id, TenantOperations::Delete).await;
 
+        self.maybe_load_tenant(tenant_id, &_tenant_lock).await?;
+
         // Detach all shards. This also deletes local pageserver shard data.
         let (detach_waiters, node) = {
             let mut detach_waiters = Vec::new();
@@ -3068,6 +3186,8 @@ impl Service {
         )
         .await;
 
+        self.maybe_load_tenant(tenant_id, &_tenant_lock).await?;
+
         failpoint_support::sleep_millis_async!("tenant-update-policy-exclusive-lock");
 
         let TenantPolicyRequest {
@@ -5150,11 +5270,13 @@ impl Service {
             )));
         }
 
-        let mut shards = self.persistence.list_tenant_shards().await?;
-        shards.sort_by_key(|tsp| (tsp.tenant_id.clone(), tsp.shard_number, tsp.shard_count));
+        let mut persistent_shards = self.persistence.load_active_tenant_shards().await?;
+        persistent_shards
+            .sort_by_key(|tsp| (tsp.tenant_id.clone(), tsp.shard_number, tsp.shard_count));
+
         expect_shards.sort_by_key(|tsp| (tsp.tenant_id.clone(), tsp.shard_number, tsp.shard_count));
 
-        if shards != expect_shards {
+        if persistent_shards != expect_shards {
             tracing::error!("Consistency check failed on shards.");
             tracing::error!(
                 "Shards in memory: {}",
@@ -5163,7 +5285,7 @@ impl Service {
             );
             tracing::error!(
                 "Shards in database: {}",
-                serde_json::to_string(&shards)
+                serde_json::to_string(&persistent_shards)
                     .map_err(|e| ApiError::InternalServerError(e.into()))?
             );
             return Err(ApiError::InternalServerError(anyhow::anyhow!(
@@ -6119,6 +6241,10 @@ impl Service {
         let mut pending_reconciles = 0;
         let mut az_violations = 0;
 
+        // If we find any tenants to drop from memory, stash them to offload after
+        // we're done traversing the map of tenants.
+        let mut drop_detached_tenants = Vec::new();
+
         let mut reconciles_spawned = 0;
         for shard in tenants.values_mut() {
             // Accumulate scheduling statistics
@@ -6152,6 +6278,25 @@ impl Service {
                 // Shard wanted to reconcile but for some reason couldn't.
                 pending_reconciles += 1;
             }
+
+            // If this tenant is detached, try dropping it from memory. This is usually done
+            // proactively in [`Self::process_results`], but we do it here to handle the edge
+            // case where a reconcile completes while someone else is holding an op lock for the tenant.
+            if shard.tenant_shard_id.shard_number == ShardNumber(0)
+                && shard.policy == PlacementPolicy::Detached
+            {
+                if let Some(guard) = self.tenant_op_locks.try_exclusive(
+                    shard.tenant_shard_id.tenant_id,
+                    TenantOperations::DropDetached,
+                ) {
+                    drop_detached_tenants.push((shard.tenant_shard_id.tenant_id, guard));
+                }
+            }
+        }
+
+        // Process any deferred tenant drops
+        for (tenant_id, guard) in drop_detached_tenants {
+            self.maybe_drop_tenant(tenant_id, &mut locked, &guard);
         }
 
         metrics::METRICS_REGISTRY
diff --git a/storage_controller/src/tenant_shard.rs b/storage_controller/src/tenant_shard.rs
index cba579e8a7..c17989a316 100644
--- a/storage_controller/src/tenant_shard.rs
+++ b/storage_controller/src/tenant_shard.rs
@@ -465,6 +465,10 @@ impl ObservedState {
             locations: HashMap::new(),
         }
     }
+
+    pub(crate) fn is_empty(&self) -> bool {
+        self.locations.is_empty()
+    }
 }
 
 impl TenantShard {
diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py
index 973d0cdf82..207f55a214 100644
--- a/test_runner/regress/test_storage_controller.py
+++ b/test_runner/regress/test_storage_controller.py
@@ -3299,13 +3299,116 @@ def test_storage_controller_detached_stopped(
             "generation": None,
         },
     )
-
+    env.storage_controller.reconcile_until_idle()
     env.storage_controller.consistency_check()
 
     # Confirm the detach happened
     assert env.pageserver.http_client().tenant_list_locations()["tenant_shards"] == []
 
 
+@run_only_on_default_postgres("Postgres version makes no difference here")
+def test_storage_controller_detach_lifecycle(
+    neon_env_builder: NeonEnvBuilder,
+):
+    """
+    Test that detached tenants are handled properly through their lifecycle: getting dropped
+    from memory when detached, then getting loaded back on-demand.
+    """
+
+    remote_storage_kind = s3_storage()
+    neon_env_builder.enable_pageserver_remote_storage(remote_storage_kind)
+
+    neon_env_builder.num_pageservers = 1
+
+    env = neon_env_builder.init_configs()
+    env.start()
+    virtual_ps_http = PageserverHttpClient(env.storage_controller_port, lambda: True)
+
+    tenant_id = TenantId.generate()
+    timeline_id = TimelineId.generate()
+    env.storage_controller.tenant_create(
+        tenant_id,
+        shard_count=1,
+    )
+    virtual_ps_http.timeline_create(PgVersion.NOT_SET, tenant_id, timeline_id)
+
+    remote_prefix = "/".join(
+        (
+            "tenants",
+            str(tenant_id),
+        )
+    )
+    # We will later check data is gone after deletion, so as a control check that it is present to begin with
+    assert_prefix_not_empty(
+        neon_env_builder.pageserver_remote_storage,
+        prefix=remote_prefix,
+    )
+
+    assert len(env.pageserver.http_client().tenant_list_locations()["tenant_shards"]) == 1
+    assert len(env.storage_controller.tenant_list()) == 1
+
+    # Detach the tenant
+    virtual_ps_http.tenant_location_conf(
+        tenant_id,
+        {
+            "mode": "Detached",
+            "secondary_conf": None,
+            "tenant_conf": {},
+            "generation": None,
+        },
+    )
+    # Ensure reconciles are done (the one we do inline in location_conf is advisory and if it takes too long that API just succeeds anyway)
+    env.storage_controller.reconcile_until_idle()
+    env.storage_controller.consistency_check()
+
+    # Confirm the detach happened on pageserver
+    assert env.pageserver.http_client().tenant_list_locations()["tenant_shards"] == []
+    # Confirm the tenant is not in memory on the controller
+    assert env.storage_controller.tenant_list() == []
+
+    # The detached tenant does not get loaded into memory across a controller restart
+    env.storage_controller.stop()
+    env.storage_controller.start()
+    assert env.storage_controller.tenant_list() == []
+    env.storage_controller.consistency_check()
+
+    # The detached tenant can be re-attached
+    virtual_ps_http.tenant_location_conf(
+        tenant_id,
+        {
+            "mode": "AttachedSingle",
+            "secondary_conf": None,
+            "tenant_conf": {},
+            "generation": None,
+        },
+    )
+    assert len(env.pageserver.http_client().tenant_list_locations()["tenant_shards"]) == 1
+    assert len(env.storage_controller.tenant_list()) == 1
+    env.storage_controller.consistency_check()
+
+    # Detach it again before doing deletion
+    virtual_ps_http.tenant_location_conf(
+        tenant_id,
+        {
+            "mode": "Detached",
+            "secondary_conf": None,
+            "tenant_conf": {},
+            "generation": None,
+        },
+    )
+    env.storage_controller.reconcile_until_idle()
+    env.storage_controller.consistency_check()
+
+    # A detached tenant can be deleted
+    virtual_ps_http.tenant_delete(tenant_id)
+
+    # Such deletions really work (empty remote storage)
+    assert_prefix_empty(
+        neon_env_builder.pageserver_remote_storage,
+        prefix=remote_prefix,
+    )
+
+
 @run_only_on_default_postgres("Postgres version makes no difference here")
 def test_storage_controller_node_flap_detach_race(
     neon_env_builder: NeonEnvBuilder,

From 0ad0db6ff8b5b491244d251fa09c8093f725a1d3 Mon Sep 17 00:00:00 2001
From: Anastasia Lubennikova <anastasia@neon.tech>
Date: Wed, 8 Jan 2025 18:55:04 +0000
Subject: [PATCH 61/63] compute: dropdb DROP SUBSCRIPTION fix (#10066)

## Problem
Project gets stuck if database with subscriptions was deleted via API /
UI.

https://github.com/neondatabase/cloud/issues/18646

## Summary of changes
Before dropping the database, drop all the subscriptions in it.
Do not drop slot on publisher, because we have no guarantee that the
slot still exists or that the publisher is reachable.

Add `DropSubscriptionsForDeletedDatabases` phase to run these operations
in all databases, we're about to delete.
Ignore the error if the database does not exist.
---
 compute_tools/src/compute.rs                  |  95 ++++++++++++--
 compute_tools/src/spec_apply.rs               |  32 ++++-
 .../sql/drop_subscription_for_drop_dbs.sql    |  11 ++
 test_runner/fixtures/neon_fixtures.py         |  27 +++-
 test_runner/regress/test_compute_catalog.py   | 116 +++++++++++++++++-
 5 files changed, 262 insertions(+), 19 deletions(-)
 create mode 100644 compute_tools/src/sql/drop_subscription_for_drop_dbs.sql

diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs
index 78f6033429..1ac97a378b 100644
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -15,7 +15,7 @@ use std::time::Instant;
 
 use anyhow::{Context, Result};
 use chrono::{DateTime, Utc};
-use compute_api::spec::{PgIdent, Role};
+use compute_api::spec::{Database, PgIdent, Role};
 use futures::future::join_all;
 use futures::stream::FuturesUnordered;
 use futures::StreamExt;
@@ -45,8 +45,10 @@ use crate::spec_apply::ApplySpecPhase::{
     DropInvalidDatabases, DropRoles, HandleNeonExtension, HandleOtherExtensions,
     RenameAndDeleteDatabases, RenameRoles, RunInEachDatabase,
 };
+use crate::spec_apply::PerDatabasePhase;
 use crate::spec_apply::PerDatabasePhase::{
-    ChangeSchemaPerms, DeleteDBRoleReferences, HandleAnonExtension,
+    ChangeSchemaPerms, DeleteDBRoleReferences, DropSubscriptionsForDeletedDatabases,
+    HandleAnonExtension,
 };
 use crate::spec_apply::{apply_operations, MutableApplyContext, DB};
 use crate::sync_sk::{check_if_synced, ping_safekeeper};
@@ -834,7 +836,7 @@ impl ComputeNode {
         conf
     }
 
-    async fn get_maintenance_client(
+    pub async fn get_maintenance_client(
         conf: &tokio_postgres::Config,
     ) -> Result<tokio_postgres::Client> {
         let mut conf = conf.clone();
@@ -943,6 +945,78 @@ impl ComputeNode {
                 dbs: databases,
             }));
 
+            // Apply special pre drop database phase.
+            // NOTE: we use the code of RunInEachDatabase phase for parallelism
+            // and connection management, but we don't really run it in *each* database,
+            // only in databases, we're about to drop.
+            info!("Applying PerDatabase (pre-dropdb) phase");
+            let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency));
+
+            // Run the phase for each database that we're about to drop.
+            let db_processes = spec
+                .delta_operations
+                .iter()
+                .flatten()
+                .filter_map(move |op| {
+                    if op.action.as_str() == "delete_db" {
+                        Some(op.name.clone())
+                    } else {
+                        None
+                    }
+                })
+                .map(|dbname| {
+                    let spec = spec.clone();
+                    let ctx = ctx.clone();
+                    let jwks_roles = jwks_roles.clone();
+                    let mut conf = conf.as_ref().clone();
+                    let concurrency_token = concurrency_token.clone();
+                    // We only need dbname field for this phase, so set other fields to dummy values
+                    let db = DB::UserDB(Database {
+                        name: dbname.clone(),
+                        owner: "cloud_admin".to_string(),
+                        options: None,
+                        restrict_conn: false,
+                        invalid: false,
+                    });
+
+                    debug!("Applying per-database phases for Database {:?}", &db);
+
+                    match &db {
+                        DB::SystemDB => {}
+                        DB::UserDB(db) => {
+                            conf.dbname(db.name.as_str());
+                        }
+                    }
+
+                    let conf = Arc::new(conf);
+                    let fut = Self::apply_spec_sql_db(
+                        spec.clone(),
+                        conf,
+                        ctx.clone(),
+                        jwks_roles.clone(),
+                        concurrency_token.clone(),
+                        db,
+                        [DropSubscriptionsForDeletedDatabases].to_vec(),
+                    );
+
+                    Ok(spawn(fut))
+                })
+                .collect::<Vec<Result<_, anyhow::Error>>>();
+
+            for process in db_processes.into_iter() {
+                let handle = process?;
+                if let Err(e) = handle.await? {
+                    // Handle the error case where the database does not exist
+                    // We do not check whether the DB exists or not in the deletion phase,
+                    // so we shouldn't be strict about it in pre-deletion cleanup as well.
+                    if e.to_string().contains("does not exist") {
+                        warn!("Error dropping subscription: {}", e);
+                    } else {
+                        return Err(e);
+                    }
+                };
+            }
+
             for phase in [
                 CreateSuperUser,
                 DropInvalidDatabases,
@@ -962,7 +1036,7 @@ impl ComputeNode {
                 .await?;
             }
 
-            info!("Applying RunInEachDatabase phase");
+            info!("Applying RunInEachDatabase2 phase");
             let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency));
 
             let db_processes = spec
@@ -997,6 +1071,12 @@ impl ComputeNode {
                         jwks_roles.clone(),
                         concurrency_token.clone(),
                         db,
+                        [
+                            DeleteDBRoleReferences,
+                            ChangeSchemaPerms,
+                            HandleAnonExtension,
+                        ]
+                        .to_vec(),
                     );
 
                     Ok(spawn(fut))
@@ -1043,16 +1123,13 @@ impl ComputeNode {
         jwks_roles: Arc<HashSet<String>>,
         concurrency_token: Arc<tokio::sync::Semaphore>,
         db: DB,
+        subphases: Vec<PerDatabasePhase>,
     ) -> Result<()> {
         let _permit = concurrency_token.acquire().await?;
 
         let mut client_conn = None;
 
-        for subphase in [
-            DeleteDBRoleReferences,
-            ChangeSchemaPerms,
-            HandleAnonExtension,
-        ] {
+        for subphase in subphases {
             apply_operations(
                 spec.clone(),
                 ctx.clone(),
diff --git a/compute_tools/src/spec_apply.rs b/compute_tools/src/spec_apply.rs
index 7308d5d36e..695a722d6d 100644
--- a/compute_tools/src/spec_apply.rs
+++ b/compute_tools/src/spec_apply.rs
@@ -47,6 +47,7 @@ pub enum PerDatabasePhase {
     DeleteDBRoleReferences,
     ChangeSchemaPerms,
     HandleAnonExtension,
+    DropSubscriptionsForDeletedDatabases,
 }
 
 #[derive(Clone, Debug)]
@@ -326,13 +327,12 @@ async fn get_operations<'a>(
 
                             // Use FORCE to drop database even if there are active connections.
                             // We run this from `cloud_admin`, so it should have enough privileges.
+                            //
                             // NB: there could be other db states, which prevent us from dropping
                             // the database. For example, if db is used by any active subscription
                             // or replication slot.
-                            // TODO: deal with it once we allow logical replication. Proper fix should
-                            // involve returning an error code to the control plane, so it could
-                            // figure out that this is a non-retryable error, return it to the user
-                            // and fail operation permanently.
+                            // Such cases are handled in the DropSubscriptionsForDeletedDatabases
+                            // phase. We do all the cleanup before actually dropping the database.
                             let drop_db_query: String = format!(
                                 "DROP DATABASE IF EXISTS {} WITH (FORCE)",
                                 &op.name.pg_quote()
@@ -444,6 +444,30 @@ async fn get_operations<'a>(
         }
         ApplySpecPhase::RunInEachDatabase { db, subphase } => {
             match subphase {
+                PerDatabasePhase::DropSubscriptionsForDeletedDatabases => {
+                    match &db {
+                        DB::UserDB(db) => {
+                            let drop_subscription_query: String = format!(
+                                include_str!("sql/drop_subscription_for_drop_dbs.sql"),
+                                datname_str = escape_literal(&db.name),
+                            );
+
+                            let operations = vec![Operation {
+                                query: drop_subscription_query,
+                                comment: Some(format!(
+                                    "optionally dropping subscriptions for DB {}",
+                                    db.name,
+                                )),
+                            }]
+                            .into_iter();
+
+                            Ok(Box::new(operations))
+                        }
+                        // skip this cleanup for the system databases
+                        // because users can't drop them
+                        DB::SystemDB => Ok(Box::new(empty())),
+                    }
+                }
                 PerDatabasePhase::DeleteDBRoleReferences => {
                     let ctx = ctx.read().await;
 
diff --git a/compute_tools/src/sql/drop_subscription_for_drop_dbs.sql b/compute_tools/src/sql/drop_subscription_for_drop_dbs.sql
new file mode 100644
index 0000000000..dfb925e48e
--- /dev/null
+++ b/compute_tools/src/sql/drop_subscription_for_drop_dbs.sql
@@ -0,0 +1,11 @@
+DO $$
+DECLARE
+    subname TEXT;
+BEGIN
+    FOR subname IN SELECT pg_subscription.subname FROM pg_subscription WHERE subdbid = (SELECT oid FROM pg_database WHERE datname = {datname_str}) LOOP
+        EXECUTE format('ALTER SUBSCRIPTION %I DISABLE;', subname);
+        EXECUTE format('ALTER SUBSCRIPTION %I SET (slot_name = NONE);', subname);
+        EXECUTE format('DROP SUBSCRIPTION %I;', subname);
+    END LOOP;
+END;
+$$;
diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py
index 00fdda2998..e22e452a52 100644
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -4933,13 +4933,30 @@ def check_restored_datadir_content(
     assert (mismatch, error) == ([], [])
 
 
-def logical_replication_sync(subscriber: PgProtocol, publisher: PgProtocol) -> Lsn:
+def logical_replication_sync(
+    subscriber: PgProtocol,
+    publisher: PgProtocol,
+    sub_dbname: str | None = None,
+    pub_dbname: str | None = None,
+) -> Lsn:
     """Wait logical replication subscriber to sync with publisher."""
-    publisher_lsn = Lsn(publisher.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
+    if pub_dbname is not None:
+        publisher_lsn = Lsn(
+            publisher.safe_psql("SELECT pg_current_wal_flush_lsn()", dbname=pub_dbname)[0][0]
+        )
+    else:
+        publisher_lsn = Lsn(publisher.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
+
     while True:
-        res = subscriber.safe_psql("select latest_end_lsn from pg_catalog.pg_stat_subscription")[0][
-            0
-        ]
+        if sub_dbname is not None:
+            res = subscriber.safe_psql(
+                "select latest_end_lsn from pg_catalog.pg_stat_subscription", dbname=sub_dbname
+            )[0][0]
+        else:
+            res = subscriber.safe_psql(
+                "select latest_end_lsn from pg_catalog.pg_stat_subscription"
+            )[0][0]
+
         if res:
             log.info(f"subscriber_lsn={res}")
             subscriber_lsn = Lsn(res)
diff --git a/test_runner/regress/test_compute_catalog.py b/test_runner/regress/test_compute_catalog.py
index b3719a45ed..e411aad97d 100644
--- a/test_runner/regress/test_compute_catalog.py
+++ b/test_runner/regress/test_compute_catalog.py
@@ -1,7 +1,9 @@
 from __future__ import annotations
 
+import logging
+
 import requests
-from fixtures.neon_fixtures import NeonEnv
+from fixtures.neon_fixtures import NeonEnv, logical_replication_sync
 
 TEST_DB_NAMES = [
     {
@@ -136,3 +138,115 @@ def test_compute_create_databases(neon_simple_env: NeonEnv):
             assert curr_db is not None
             assert len(curr_db) == 1
             assert curr_db[0] == db["name"]
+
+
+def test_dropdb_with_subscription(neon_simple_env: NeonEnv):
+    """
+    Test that compute_ctl can drop a database that has a logical replication subscription.
+    """
+    env = neon_simple_env
+
+    # Create and start endpoint so that neon_local put all the generated
+    # stuff into the spec.json file.
+    endpoint = env.endpoints.create_start("main")
+
+    TEST_DB_NAMES = [
+        {
+            "name": "neondb",
+            "owner": "cloud_admin",
+        },
+        {
+            "name": "subscriber_db",
+            "owner": "cloud_admin",
+        },
+        {
+            "name": "publisher_db",
+            "owner": "cloud_admin",
+        },
+    ]
+
+    # Update the spec.json file to create the databases
+    # and reconfigure the endpoint to apply the changes.
+    endpoint.respec_deep(
+        **{
+            "skip_pg_catalog_updates": False,
+            "cluster": {
+                "databases": TEST_DB_NAMES,
+            },
+        }
+    )
+    endpoint.reconfigure()
+
+    # connect to the publisher_db and create a publication
+    with endpoint.cursor(dbname="publisher_db") as cursor:
+        cursor.execute("CREATE PUBLICATION mypub FOR ALL TABLES")
+        cursor.execute("select pg_catalog.pg_create_logical_replication_slot('mysub', 'pgoutput');")
+        cursor.execute("CREATE TABLE t(a int)")
+        cursor.execute("INSERT INTO t VALUES (1)")
+
+    # connect to the subscriber_db and create a subscription
+    # Note that we need to create subscription with
+    connstr = endpoint.connstr(dbname="publisher_db").replace("'", "''")
+    with endpoint.cursor(dbname="subscriber_db") as cursor:
+        cursor.execute("CREATE TABLE t(a int)")
+        cursor.execute(
+            f"CREATE SUBSCRIPTION mysub CONNECTION '{connstr}' PUBLICATION mypub  WITH (create_slot = false) "
+        )
+
+    # wait for the subscription to be active
+    logical_replication_sync(
+        endpoint, endpoint, sub_dbname="subscriber_db", pub_dbname="publisher_db"
+    )
+
+    # Check that replication is working
+    with endpoint.cursor(dbname="subscriber_db") as cursor:
+        cursor.execute("SELECT * FROM t")
+        rows = cursor.fetchall()
+        assert len(rows) == 1
+        assert rows[0][0] == 1
+
+    # drop the subscriber_db from the list
+    TEST_DB_NAMES_NEW = [
+        {
+            "name": "neondb",
+            "owner": "cloud_admin",
+        },
+        {
+            "name": "publisher_db",
+            "owner": "cloud_admin",
+        },
+    ]
+    # Update the spec.json file to drop the database
+    # and reconfigure the endpoint to apply the changes.
+    endpoint.respec_deep(
+        **{
+            "skip_pg_catalog_updates": False,
+            "cluster": {
+                "databases": TEST_DB_NAMES_NEW,
+            },
+            "delta_operations": [
+                {"action": "delete_db", "name": "subscriber_db"},
+                # also test the case when we try to delete a non-existent database
+                # shouldn't happen in normal operation,
+                # but can occur when failed operations are retried
+                {"action": "delete_db", "name": "nonexistent_db"},
+            ],
+        }
+    )
+
+    logging.info("Reconfiguring the endpoint to drop the subscriber_db")
+    endpoint.reconfigure()
+
+    # Check that the subscriber_db is dropped
+    with endpoint.cursor() as cursor:
+        cursor.execute("SELECT datname FROM pg_database WHERE datname = %s", ("subscriber_db",))
+        catalog_db = cursor.fetchone()
+        assert catalog_db is None
+
+    # Check that we can still connect to the publisher_db
+    with endpoint.cursor(dbname="publisher_db") as cursor:
+        cursor.execute("SELECT * FROM current_database()")
+        curr_db = cursor.fetchone()
+        assert curr_db is not None
+        assert len(curr_db) == 1
+        assert curr_db[0] == "publisher_db"

From fcfff724547cda260e884d2d680f199fdbc9471c Mon Sep 17 00:00:00 2001
From: Ivan Efremov <ivan@neon.tech>
Date: Wed, 8 Jan 2025 21:34:53 +0200
Subject: [PATCH 62/63] impr(proxy): Decouple ip_allowlist from the
 CancelClosure (#10199)

This PR removes the direct dependency of the IP allowlist from
CancelClosure, allowing for more scalable and flexible IP restrictions
and enabling the future use of Redis-based CancelMap storage.

Changes:
- Introduce a new BackendAuth async trait that retrieves the IP
allowlist through existing authentication methods;
- Improve cancellation error handling by instrument() async
cancel_sesion() rather than dropping it.
- Set and store IP allowlist for SCRAM Proxy to consistently perform IP
allowance check

 Relates to #9660
---
 proxy/src/auth/backend/console_redirect.rs    |  32 ++++-
 proxy/src/auth/backend/mod.rs                 |  47 ++++++--
 proxy/src/bin/proxy.rs                        |  54 ++++++++-
 proxy/src/cancellation.rs                     | 111 +++++++++++++++++-
 proxy/src/compute.rs                          |  28 ++++-
 proxy/src/console_redirect_proxy.rs           |  28 ++---
 .../control_plane/client/cplane_proxy_v1.rs   |  36 ++++--
 proxy/src/proxy/mod.rs                        |  31 +++--
 8 files changed, 307 insertions(+), 60 deletions(-)

diff --git a/proxy/src/auth/backend/console_redirect.rs b/proxy/src/auth/backend/console_redirect.rs
index c3de77b352..dbfda588cc 100644
--- a/proxy/src/auth/backend/console_redirect.rs
+++ b/proxy/src/auth/backend/console_redirect.rs
@@ -1,16 +1,18 @@
 use async_trait::async_trait;
 use postgres_client::config::SslMode;
 use pq_proto::BeMessage as Be;
+use std::fmt;
 use thiserror::Error;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::{info, info_span};
 
-use super::ComputeCredentialKeys;
+use super::{ComputeCredentialKeys, ControlPlaneApi};
+use crate::auth::backend::{BackendIpAllowlist, ComputeUserInfo};
 use crate::auth::IpPattern;
 use crate::cache::Cached;
 use crate::config::AuthenticationConfig;
 use crate::context::RequestContext;
-use crate::control_plane::{self, CachedNodeInfo, NodeInfo};
+use crate::control_plane::{self, client::cplane_proxy_v1, CachedNodeInfo, NodeInfo};
 use crate::error::{ReportableError, UserFacingError};
 use crate::proxy::connect_compute::ComputeConnectBackend;
 use crate::stream::PqStream;
@@ -31,6 +33,13 @@ pub(crate) enum ConsoleRedirectError {
 #[derive(Debug)]
 pub struct ConsoleRedirectBackend {
     console_uri: reqwest::Url,
+    api: cplane_proxy_v1::NeonControlPlaneClient,
+}
+
+impl fmt::Debug for cplane_proxy_v1::NeonControlPlaneClient {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "NeonControlPlaneClient")
+    }
 }
 
 impl UserFacingError for ConsoleRedirectError {
@@ -71,9 +80,24 @@ pub(crate) fn new_psql_session_id() -> String {
     hex::encode(rand::random::<[u8; 8]>())
 }
 
+#[async_trait]
+impl BackendIpAllowlist for ConsoleRedirectBackend {
+    async fn get_allowed_ips(
+        &self,
+        ctx: &RequestContext,
+        user_info: &ComputeUserInfo,
+    ) -> auth::Result<Vec<auth::IpPattern>> {
+        self.api
+            .get_allowed_ips_and_secret(ctx, user_info)
+            .await
+            .map(|(ips, _)| ips.as_ref().clone())
+            .map_err(|e| e.into())
+    }
+}
+
 impl ConsoleRedirectBackend {
-    pub fn new(console_uri: reqwest::Url) -> Self {
-        Self { console_uri }
+    pub fn new(console_uri: reqwest::Url, api: cplane_proxy_v1::NeonControlPlaneClient) -> Self {
+        Self { console_uri, api }
     }
 
     pub(crate) async fn authenticate(
diff --git a/proxy/src/auth/backend/mod.rs b/proxy/src/auth/backend/mod.rs
index 0c9a7f7825..de48be2952 100644
--- a/proxy/src/auth/backend/mod.rs
+++ b/proxy/src/auth/backend/mod.rs
@@ -16,7 +16,9 @@ use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::{debug, info, warn};
 
 use crate::auth::credentials::check_peer_addr_is_in_list;
-use crate::auth::{self, validate_password_and_exchange, AuthError, ComputeUserInfoMaybeEndpoint};
+use crate::auth::{
+    self, validate_password_and_exchange, AuthError, ComputeUserInfoMaybeEndpoint, IpPattern,
+};
 use crate::cache::Cached;
 use crate::config::AuthenticationConfig;
 use crate::context::RequestContext;
@@ -131,7 +133,7 @@ pub(crate) struct ComputeUserInfoNoEndpoint {
     pub(crate) options: NeonOptions,
 }
 
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, Default)]
 pub(crate) struct ComputeUserInfo {
     pub(crate) endpoint: EndpointId,
     pub(crate) user: RoleName,
@@ -244,6 +246,15 @@ impl AuthenticationConfig {
     }
 }
 
+#[async_trait::async_trait]
+pub(crate) trait BackendIpAllowlist {
+    async fn get_allowed_ips(
+        &self,
+        ctx: &RequestContext,
+        user_info: &ComputeUserInfo,
+    ) -> auth::Result<Vec<auth::IpPattern>>;
+}
+
 /// True to its name, this function encapsulates our current auth trade-offs.
 /// Here, we choose the appropriate auth flow based on circumstances.
 ///
@@ -256,7 +267,7 @@ async fn auth_quirks(
     allow_cleartext: bool,
     config: &'static AuthenticationConfig,
     endpoint_rate_limiter: Arc<EndpointRateLimiter>,
-) -> auth::Result<ComputeCredentials> {
+) -> auth::Result<(ComputeCredentials, Option<Vec<IpPattern>>)> {
     // If there's no project so far, that entails that client doesn't
     // support SNI or other means of passing the endpoint (project) name.
     // We now expect to see a very specific payload in the place of password.
@@ -315,7 +326,7 @@ async fn auth_quirks(
     )
     .await
     {
-        Ok(keys) => Ok(keys),
+        Ok(keys) => Ok((keys, Some(allowed_ips.as_ref().clone()))),
         Err(e) => {
             if e.is_password_failed() {
                 // The password could have been changed, so we invalidate the cache.
@@ -385,7 +396,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> {
         allow_cleartext: bool,
         config: &'static AuthenticationConfig,
         endpoint_rate_limiter: Arc<EndpointRateLimiter>,
-    ) -> auth::Result<Backend<'a, ComputeCredentials>> {
+    ) -> auth::Result<(Backend<'a, ComputeCredentials>, Option<Vec<IpPattern>>)> {
         let res = match self {
             Self::ControlPlane(api, user_info) => {
                 debug!(
@@ -394,7 +405,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> {
                     "performing authentication using the console"
                 );
 
-                let credentials = auth_quirks(
+                let (credentials, ip_allowlist) = auth_quirks(
                     ctx,
                     &*api,
                     user_info,
@@ -404,7 +415,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> {
                     endpoint_rate_limiter,
                 )
                 .await?;
-                Backend::ControlPlane(api, credentials)
+                Ok((Backend::ControlPlane(api, credentials), ip_allowlist))
             }
             Self::Local(_) => {
                 return Err(auth::AuthError::bad_auth_method("invalid for local proxy"))
@@ -413,7 +424,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> {
 
         // TODO: replace with some metric
         info!("user successfully authenticated");
-        Ok(res)
+        res
     }
 }
 
@@ -441,6 +452,24 @@ impl Backend<'_, ComputeUserInfo> {
     }
 }
 
+#[async_trait::async_trait]
+impl BackendIpAllowlist for Backend<'_, ()> {
+    async fn get_allowed_ips(
+        &self,
+        ctx: &RequestContext,
+        user_info: &ComputeUserInfo,
+    ) -> auth::Result<Vec<auth::IpPattern>> {
+        let auth_data = match self {
+            Self::ControlPlane(api, ()) => api.get_allowed_ips_and_secret(ctx, user_info).await,
+            Self::Local(_) => Ok((Cached::new_uncached(Arc::new(vec![])), None)),
+        };
+
+        auth_data
+            .map(|(ips, _)| ips.as_ref().clone())
+            .map_err(|e| e.into())
+    }
+}
+
 #[async_trait::async_trait]
 impl ComputeConnectBackend for Backend<'_, ComputeCredentials> {
     async fn wake_compute(
@@ -786,7 +815,7 @@ mod tests {
         .await
         .unwrap();
 
-        assert_eq!(creds.info.endpoint, "my-endpoint");
+        assert_eq!(creds.0.info.endpoint, "my-endpoint");
 
         handle.await.unwrap();
     }
diff --git a/proxy/src/bin/proxy.rs b/proxy/src/bin/proxy.rs
index 3b122d771c..70b50436bf 100644
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -744,9 +744,59 @@ fn build_auth_backend(
         }
 
         AuthBackendType::ConsoleRedirect => {
-            let url = args.uri.parse()?;
-            let backend = ConsoleRedirectBackend::new(url);
+            let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?;
+            let project_info_cache_config: ProjectInfoCacheOptions =
+                args.project_info_cache.parse()?;
+            let endpoint_cache_config: config::EndpointCacheConfig =
+                args.endpoint_cache_config.parse()?;
 
+            info!("Using NodeInfoCache (wake_compute) with options={wake_compute_cache_config:?}");
+            info!(
+                "Using AllowedIpsCache (wake_compute) with options={project_info_cache_config:?}"
+            );
+            info!("Using EndpointCacheConfig with options={endpoint_cache_config:?}");
+            let caches = Box::leak(Box::new(control_plane::caches::ApiCaches::new(
+                wake_compute_cache_config,
+                project_info_cache_config,
+                endpoint_cache_config,
+            )));
+
+            let config::ConcurrencyLockOptions {
+                shards,
+                limiter,
+                epoch,
+                timeout,
+            } = args.wake_compute_lock.parse()?;
+            info!(?limiter, shards, ?epoch, "Using NodeLocks (wake_compute)");
+            let locks = Box::leak(Box::new(control_plane::locks::ApiLocks::new(
+                "wake_compute_lock",
+                limiter,
+                shards,
+                timeout,
+                epoch,
+                &Metrics::get().wake_compute_lock,
+            )?));
+
+            let url = args.uri.clone().parse()?;
+            let ep_url: proxy::url::ApiUrl = args.auth_endpoint.parse()?;
+            let endpoint = http::Endpoint::new(ep_url, http::new_client());
+            let mut wake_compute_rps_limit = args.wake_compute_limit.clone();
+            RateBucketInfo::validate(&mut wake_compute_rps_limit)?;
+            let wake_compute_endpoint_rate_limiter =
+                Arc::new(WakeComputeRateLimiter::new(wake_compute_rps_limit));
+
+            // Since we use only get_allowed_ips_and_secret() wake_compute_endpoint_rate_limiter
+            // and locks are not used in ConsoleRedirectBackend,
+            // but they are required by the NeonControlPlaneClient
+            let api = control_plane::client::cplane_proxy_v1::NeonControlPlaneClient::new(
+                endpoint,
+                args.control_plane_token.clone(),
+                caches,
+                locks,
+                wake_compute_endpoint_rate_limiter,
+            );
+
+            let backend = ConsoleRedirectBackend::new(url, api);
             let config = Box::leak(Box::new(backend));
 
             Ok(Either::Right(config))
diff --git a/proxy/src/cancellation.rs b/proxy/src/cancellation.rs
index df618cf242..a96c43f2ce 100644
--- a/proxy/src/cancellation.rs
+++ b/proxy/src/cancellation.rs
@@ -12,8 +12,10 @@ use tokio::sync::Mutex;
 use tracing::{debug, info};
 use uuid::Uuid;
 
-use crate::auth::{check_peer_addr_is_in_list, IpPattern};
+use crate::auth::backend::{BackendIpAllowlist, ComputeUserInfo};
+use crate::auth::{check_peer_addr_is_in_list, AuthError, IpPattern};
 use crate::config::ComputeConfig;
+use crate::context::RequestContext;
 use crate::error::ReportableError;
 use crate::ext::LockExt;
 use crate::metrics::{CancellationRequest, CancellationSource, Metrics};
@@ -56,6 +58,9 @@ pub(crate) enum CancelError {
 
     #[error("IP is not allowed")]
     IpNotAllowed,
+
+    #[error("Authentication backend error")]
+    AuthError(#[from] AuthError),
 }
 
 impl ReportableError for CancelError {
@@ -68,6 +73,7 @@ impl ReportableError for CancelError {
             CancelError::Postgres(_) => crate::error::ErrorKind::Compute,
             CancelError::RateLimit => crate::error::ErrorKind::RateLimit,
             CancelError::IpNotAllowed => crate::error::ErrorKind::User,
+            CancelError::AuthError(_) => crate::error::ErrorKind::ControlPlane,
         }
     }
 }
@@ -102,10 +108,7 @@ impl<P: CancellationPublisher> CancellationHandler<P> {
         }
     }
 
-    /// Try to cancel a running query for the corresponding connection.
-    /// If the cancellation key is not found, it will be published to Redis.
-    /// check_allowed - if true, check if the IP is allowed to cancel the query
-    /// return Result primarily for tests
+    /// Cancelling only in notification, will be removed
     pub(crate) async fn cancel_session(
         &self,
         key: CancelKeyData,
@@ -134,7 +137,8 @@ impl<P: CancellationPublisher> CancellationHandler<P> {
         }
 
         // NB: we should immediately release the lock after cloning the token.
-        let Some(cancel_closure) = self.map.get(&key).and_then(|x| x.clone()) else {
+        let cancel_state = self.map.get(&key).and_then(|x| x.clone());
+        let Some(cancel_closure) = cancel_state else {
             tracing::warn!("query cancellation key not found: {key}");
             Metrics::get()
                 .proxy
@@ -185,6 +189,96 @@ impl<P: CancellationPublisher> CancellationHandler<P> {
         cancel_closure.try_cancel_query(self.compute_config).await
     }
 
+    /// Try to cancel a running query for the corresponding connection.
+    /// If the cancellation key is not found, it will be published to Redis.
+    /// check_allowed - if true, check if the IP is allowed to cancel the query.
+    /// Will fetch IP allowlist internally.
+    ///
+    /// return Result primarily for tests
+    pub(crate) async fn cancel_session_auth<T: BackendIpAllowlist>(
+        &self,
+        key: CancelKeyData,
+        ctx: RequestContext,
+        check_allowed: bool,
+        auth_backend: &T,
+    ) -> Result<(), CancelError> {
+        // TODO: check for unspecified address is only for backward compatibility, should be removed
+        if !ctx.peer_addr().is_unspecified() {
+            let subnet_key = match ctx.peer_addr() {
+                IpAddr::V4(ip) => IpNet::V4(Ipv4Net::new_assert(ip, 24).trunc()), // use defaut mask here
+                IpAddr::V6(ip) => IpNet::V6(Ipv6Net::new_assert(ip, 64).trunc()),
+            };
+            if !self.limiter.lock_propagate_poison().check(subnet_key, 1) {
+                // log only the subnet part of the IP address to know which subnet is rate limited
+                tracing::warn!("Rate limit exceeded. Skipping cancellation message, {subnet_key}");
+                Metrics::get()
+                    .proxy
+                    .cancellation_requests_total
+                    .inc(CancellationRequest {
+                        source: self.from,
+                        kind: crate::metrics::CancellationOutcome::RateLimitExceeded,
+                    });
+                return Err(CancelError::RateLimit);
+            }
+        }
+
+        // NB: we should immediately release the lock after cloning the token.
+        let cancel_state = self.map.get(&key).and_then(|x| x.clone());
+        let Some(cancel_closure) = cancel_state else {
+            tracing::warn!("query cancellation key not found: {key}");
+            Metrics::get()
+                .proxy
+                .cancellation_requests_total
+                .inc(CancellationRequest {
+                    source: self.from,
+                    kind: crate::metrics::CancellationOutcome::NotFound,
+                });
+
+            if ctx.session_id() == Uuid::nil() {
+                // was already published, do not publish it again
+                return Ok(());
+            }
+
+            match self
+                .client
+                .try_publish(key, ctx.session_id(), ctx.peer_addr())
+                .await
+            {
+                Ok(()) => {} // do nothing
+                Err(e) => {
+                    // log it here since cancel_session could be spawned in a task
+                    tracing::error!("failed to publish cancellation key: {key}, error: {e}");
+                    return Err(CancelError::IO(std::io::Error::new(
+                        std::io::ErrorKind::Other,
+                        e.to_string(),
+                    )));
+                }
+            }
+            return Ok(());
+        };
+
+        let ip_allowlist = auth_backend
+            .get_allowed_ips(&ctx, &cancel_closure.user_info)
+            .await
+            .map_err(CancelError::AuthError)?;
+
+        if check_allowed && !check_peer_addr_is_in_list(&ctx.peer_addr(), &ip_allowlist) {
+            // log it here since cancel_session could be spawned in a task
+            tracing::warn!("IP is not allowed to cancel the query: {key}");
+            return Err(CancelError::IpNotAllowed);
+        }
+
+        Metrics::get()
+            .proxy
+            .cancellation_requests_total
+            .inc(CancellationRequest {
+                source: self.from,
+                kind: crate::metrics::CancellationOutcome::Found,
+            });
+        info!("cancelling query per user's request using key {key}");
+        cancel_closure.try_cancel_query(self.compute_config).await
+    }
+
     #[cfg(test)]
     fn contains(&self, session: &Session<P>) -> bool {
         self.map.contains_key(&session.key)
@@ -248,6 +342,7 @@ pub struct CancelClosure {
     cancel_token: CancelToken,
     ip_allowlist: Vec<IpPattern>,
     hostname: String, // for pg_sni router
+    user_info: ComputeUserInfo,
 }
 
 impl CancelClosure {
@@ -256,12 +351,14 @@ impl CancelClosure {
         cancel_token: CancelToken,
         ip_allowlist: Vec<IpPattern>,
         hostname: String,
+        user_info: ComputeUserInfo,
     ) -> Self {
         Self {
             socket_addr,
             cancel_token,
             ip_allowlist,
             hostname,
+            user_info,
         }
     }
     /// Cancels the query running on user's compute node.
@@ -288,6 +385,8 @@ impl CancelClosure {
         debug!("query was cancelled");
         Ok(())
     }
+
+    /// Obsolete (will be removed after moving CancelMap to Redis), only for notifications
     pub(crate) fn set_ip_allowlist(&mut self, ip_allowlist: Vec<IpPattern>) {
         self.ip_allowlist = ip_allowlist;
     }
diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs
index 89de6692ad..788bd63fee 100644
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -13,6 +13,7 @@ use thiserror::Error;
 use tokio::net::TcpStream;
 use tracing::{debug, error, info, warn};
 
+use crate::auth::backend::ComputeUserInfo;
 use crate::auth::parse_endpoint_param;
 use crate::cancellation::CancelClosure;
 use crate::config::ComputeConfig;
@@ -23,8 +24,10 @@ use crate::control_plane::messages::MetricsAuxInfo;
 use crate::error::{ReportableError, UserFacingError};
 use crate::metrics::{Metrics, NumDbConnectionsGuard};
 use crate::proxy::neon_option;
+use crate::proxy::NeonOptions;
 use crate::tls::postgres_rustls::MakeRustlsConnect;
 use crate::types::Host;
+use crate::types::{EndpointId, RoleName};
 
 pub const COULD_NOT_CONNECT: &str = "Couldn't connect to compute node";
 
@@ -284,6 +287,28 @@ impl ConnCfg {
             self.0.get_ssl_mode()
         );
 
+        let compute_info = match parameters.get("user") {
+            Some(user) => {
+                match parameters.get("database") {
+                    Some(database) => {
+                        ComputeUserInfo {
+                            user: RoleName::from(user),
+                            options: NeonOptions::default(), // just a shim, we don't need options
+                            endpoint: EndpointId::from(database),
+                        }
+                    }
+                    None => {
+                        warn!("compute node didn't return database name");
+                        ComputeUserInfo::default()
+                    }
+                }
+            }
+            None => {
+                warn!("compute node didn't return user name");
+                ComputeUserInfo::default()
+            }
+        };
+
         // NB: CancelToken is supposed to hold socket_addr, but we use connect_raw.
         // Yet another reason to rework the connection establishing code.
         let cancel_closure = CancelClosure::new(
@@ -294,8 +319,9 @@ impl ConnCfg {
                 process_id,
                 secret_key,
             },
-            vec![],
+            vec![], // TODO: deprecated, will be removed
             host.to_string(),
+            compute_info,
         );
 
         let connection = PostgresConnection {
diff --git a/proxy/src/console_redirect_proxy.rs b/proxy/src/console_redirect_proxy.rs
index 25a549039c..846f55f9e1 100644
--- a/proxy/src/console_redirect_proxy.rs
+++ b/proxy/src/console_redirect_proxy.rs
@@ -159,6 +159,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
     let request_gauge = metrics.connection_requests.guard(proto);
 
     let tls = config.tls_config.as_ref();
+
     let record_handshake_error = !ctx.has_private_peer_addr();
     let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
     let do_handshake = handshake(ctx, stream, tls, record_handshake_error);
@@ -171,23 +172,20 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
             // spawn a task to cancel the session, but don't wait for it
             cancellations.spawn({
                 let cancellation_handler_clone = Arc::clone(&cancellation_handler);
-                let session_id = ctx.session_id();
-                let peer_ip = ctx.peer_addr();
-                let cancel_span = tracing::span!(parent: None, tracing::Level::INFO, "cancel_session", session_id = ?session_id);
+                let ctx = ctx.clone();
+                let cancel_span = tracing::span!(parent: None, tracing::Level::INFO, "cancel_session", session_id = ?ctx.session_id());
                 cancel_span.follows_from(tracing::Span::current());
                 async move {
-                    drop(
-                        cancellation_handler_clone
-                            .cancel_session(
-                                cancel_key_data,
-                                session_id,
-                                peer_ip,
-                                config.authentication_config.ip_allowlist_check_enabled,
-                            )
-                            .instrument(cancel_span)
-                            .await,
-                    );
-                }
+                    cancellation_handler_clone
+                        .cancel_session_auth(
+                            cancel_key_data,
+                            ctx,
+                            config.authentication_config.ip_allowlist_check_enabled,
+                            backend,
+                        )
+                        .await
+                        .inspect_err(|e | debug!(error = ?e, "cancel_session failed")).ok();
+                }.instrument(cancel_span)
             });
 
             return Ok(None);
diff --git a/proxy/src/control_plane/client/cplane_proxy_v1.rs b/proxy/src/control_plane/client/cplane_proxy_v1.rs
index 00038a6ac6..ece03156d1 100644
--- a/proxy/src/control_plane/client/cplane_proxy_v1.rs
+++ b/proxy/src/control_plane/client/cplane_proxy_v1.rs
@@ -29,7 +29,7 @@ use crate::rate_limiter::WakeComputeRateLimiter;
 use crate::types::{EndpointCacheKey, EndpointId};
 use crate::{compute, http, scram};
 
-const X_REQUEST_ID: HeaderName = HeaderName::from_static("x-request-id");
+pub(crate) const X_REQUEST_ID: HeaderName = HeaderName::from_static("x-request-id");
 
 #[derive(Clone)]
 pub struct NeonControlPlaneClient {
@@ -78,15 +78,30 @@ impl NeonControlPlaneClient {
             info!("endpoint is not valid, skipping the request");
             return Ok(AuthInfo::default());
         }
-        let request_id = ctx.session_id().to_string();
-        let application_name = ctx.console_application_name();
+        self.do_get_auth_req(user_info, &ctx.session_id(), Some(ctx))
+            .await
+    }
+
+    async fn do_get_auth_req(
+        &self,
+        user_info: &ComputeUserInfo,
+        session_id: &uuid::Uuid,
+        ctx: Option<&RequestContext>,
+    ) -> Result<AuthInfo, GetAuthInfoError> {
+        let request_id: String = session_id.to_string();
+        let application_name = if let Some(ctx) = ctx {
+            ctx.console_application_name()
+        } else {
+            "auth_cancellation".to_string()
+        };
+
         async {
             let request = self
                 .endpoint
                 .get_path("get_endpoint_access_control")
                 .header(X_REQUEST_ID, &request_id)
                 .header(AUTHORIZATION, format!("Bearer {}", &self.jwt))
-                .query(&[("session_id", ctx.session_id())])
+                .query(&[("session_id", session_id)])
                 .query(&[
                     ("application_name", application_name.as_str()),
                     ("endpointish", user_info.endpoint.as_str()),
@@ -96,9 +111,16 @@ impl NeonControlPlaneClient {
 
             debug!(url = request.url().as_str(), "sending http request");
             let start = Instant::now();
-            let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane);
-            let response = self.endpoint.execute(request).await?;
-            drop(pause);
+            let response = match ctx {
+                Some(ctx) => {
+                    let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane);
+                    let rsp = self.endpoint.execute(request).await;
+                    drop(pause);
+                    rsp?
+                }
+                None => self.endpoint.execute(request).await?,
+            };
+
             info!(duration = ?start.elapsed(), "received http response");
             let body = match parse_body::<GetEndpointAccessControl>(response).await {
                 Ok(body) => body,
diff --git a/proxy/src/proxy/mod.rs b/proxy/src/proxy/mod.rs
index 3926c56fec..1f7dba2f9a 100644
--- a/proxy/src/proxy/mod.rs
+++ b/proxy/src/proxy/mod.rs
@@ -273,23 +273,20 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
             // spawn a task to cancel the session, but don't wait for it
             cancellations.spawn({
                 let cancellation_handler_clone = Arc::clone(&cancellation_handler);
-                let session_id = ctx.session_id();
-                let peer_ip = ctx.peer_addr();
-                let cancel_span = tracing::span!(parent: None, tracing::Level::INFO, "cancel_session", session_id = ?session_id);
+                let ctx = ctx.clone();
+                let cancel_span = tracing::span!(parent: None, tracing::Level::INFO, "cancel_session", session_id = ?ctx.session_id());
                 cancel_span.follows_from(tracing::Span::current());
                 async move {
-                    drop(
-                        cancellation_handler_clone
-                            .cancel_session(
-                                cancel_key_data,
-                                session_id,
-                                peer_ip,
-                                config.authentication_config.ip_allowlist_check_enabled,
-                            )
-                            .instrument(cancel_span)
-                            .await,
-                    );
-                }
+                    cancellation_handler_clone
+                        .cancel_session_auth(
+                            cancel_key_data,
+                            ctx,
+                            config.authentication_config.ip_allowlist_check_enabled,
+                            auth_backend,
+                        )
+                        .await
+                        .inspect_err(|e | debug!(error = ?e, "cancel_session failed")).ok();
+                }.instrument(cancel_span)
             });
 
             return Ok(None);
@@ -315,7 +312,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
     };
 
     let user = user_info.get_user().to_owned();
-    let user_info = match user_info
+    let (user_info, ip_allowlist) = match user_info
         .authenticate(
             ctx,
             &mut stream,
@@ -356,6 +353,8 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
     .or_else(|e| stream.throw_error(e))
     .await?;
 
+    node.cancel_closure
+        .set_ip_allowlist(ip_allowlist.unwrap_or_default());
     let session = cancellation_handler.get_session();
     prepare_client_connection(&node, &session, &mut stream).await?;
 

From 9c92242ca002d6b35196ce1f2a6819e64da442f9 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 9 Jan 2025 06:02:06 +0000
Subject: [PATCH 63/63] Proxy release 2025-01-09