Merge branch 'main' into elizabeth/connection-pool-with-tests

Refactor Makefile rules for building the extensions under pgxn/ (#12305 )
Avoid recompiling postgres_ffi when there has been no changes (#12292 )
2026-07-04 04:30:38 +00:00 · 2025-06-22 14:45:44 -07:00 · 2025-06-22 19:43:14 +00:00 · 2025-06-21 21:07:38 +00:00 · 2025-06-21 15:01:29 +00:00 · 2025-06-20 16:32:26 -07:00
52 changed files with 950 additions and 697 deletions
--- a/.github/workflows/_build-and-test-locally.yml
+++ b/.github/workflows/_build-and-test-locally.yml
@@ -313,10 +313,10 @@ jobs:
          # Use tar to copy files matching the pattern, preserving the paths in the destionation
          tar c \
            pg_install/v* \
-            pg_install/build/*/src/test/regress/*.so \
-            pg_install/build/*/src/test/regress/pg_regress \
-            pg_install/build/*/src/test/isolation/isolationtester \
-            pg_install/build/*/src/test/isolation/pg_isolation_regress \
+            build/*/src/test/regress/*.so \
+            build/*/src/test/regress/pg_regress \
+            build/*/src/test/isolation/isolationtester \
+            build/*/src/test/isolation/pg_isolation_regress \
            | tar  x -C /tmp/neon

      - name: Upload Neon artifact
--- a/.github/workflows/build-macos.yml
+++ b/.github/workflows/build-macos.yml
@@ -110,7 +110,7 @@ jobs:

  build-walproposer-lib:
    if: |
-      inputs.pg_versions != '[]' || inputs.rebuild_everything ||
+      contains(inputs.pg_versions, 'v17') || inputs.rebuild_everything ||
      contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
      contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
      github.ref_name == 'main'
@@ -144,7 +144,7 @@ jobs:
        id: cache_walproposer_lib
        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
        with:
-          path: pg_install/build/walproposer-lib
+          path: build/walproposer-lib
          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}

      - name: Checkout submodule vendor/postgres-v17
@@ -169,11 +169,11 @@ jobs:
        run:
          make walproposer-lib -j$(sysctl -n hw.ncpu)

-      - name: Upload "pg_install/build/walproposer-lib" artifact
+      - name: Upload "build/walproposer-lib" artifact
        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
        with:
-          name: pg_install--build--walproposer-lib
-          path: pg_install/build/walproposer-lib
+          name: build--walproposer-lib
+          path: build/walproposer-lib
          # The artifact is supposed to be used by the next job in the same workflow,
          # so there’s no need to store it for too long.
          retention-days: 1
@@ -226,11 +226,11 @@ jobs:
          name: pg_install--v17
          path: pg_install/v17

-      - name: Download "pg_install/build/walproposer-lib" artifact
+      - name: Download "build/walproposer-lib" artifact
        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
        with:
-          name: pg_install--build--walproposer-lib
-          path: pg_install/build/walproposer-lib
+          name: build--walproposer-lib
+          path: build/walproposer-lib

      # `actions/download-artifact` doesn't preserve permissions:
      # https://github.com/actions/download-artifact?tab=readme-ov-file#permission-loss
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 /artifact_cache
+/build
 /pg_install
 /target
 /tmp_check
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1235,6 +1235,25 @@ dependencies = [
 "replace_with",
 ]

+[[package]]
+name = "client_cache"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "bytes",
+ "futures",
+ "http 1.1.0",
+ "hyper-util",
+ "priority-queue",
+ "rand 0.8.5",
+ "tokio",
+ "tokio-util",
+ "tonic 0.13.1",
+ "tower 0.5.2",
+ "uuid",
+ "workspace_hack",
+]
+
 [[package]]
 name = "colorchoice"
 version = "1.0.0"
@@ -4255,6 +4274,7 @@ dependencies = [
 "tokio-util",
 "tonic 0.13.1",
 "tracing",
+ "url",
 "utils",
 "workspace_hack",
 ]
@@ -4472,6 +4492,8 @@ dependencies = [
 "pageserver_api",
 "postgres_ffi",
 "prost 0.13.5",
+ "strum",
+ "strum_macros",
 "thiserror 1.0.69",
 "tokio",
 "tonic 0.13.1",
@@ -5026,6 +5048,17 @@ dependencies = [
 "elliptic-curve 0.13.8",
 ]

+[[package]]
+name = "priority-queue"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5676d703dda103cbb035b653a9f11448c0a7216c7926bd35fcb5865475d0c970"
+dependencies = [
+ "autocfg",
+ "equivalent",
+ "indexmap 2.9.0",
+]
+
 [[package]]
 name = "proc-macro2"
 version = "1.0.94"
@@ -5644,9 +5677,16 @@ dependencies = [

 [[package]]
 name = "replace_with"
-version = "0.1.7"
+version = "0.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e3a8614ee435691de62bcffcf4a66d91b3594bf1428a5722e79103249a095690"
+checksum = "51743d3e274e2b18df81c4dc6caf8a5b8e15dbe799e0dca05c7617380094e884"
+
+[[package]]
+name = "request_tracker"
+version = "0.1.0"
+dependencies = [
+ "workspace_hack",
+]

 [[package]]
 name = "reqwest"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -8,8 +8,10 @@ members = [
    "pageserver/compaction",
    "pageserver/ctl",
    "pageserver/client",
+    "pageserver/communicator_pools/client_cache",
    "pageserver/pagebench",
    "pageserver/page_api",
+    "pageserver/communicator_pools/request_tracker",
    "proxy",
    "safekeeper",
    "safekeeper/client",
@@ -257,6 +259,8 @@ pageserver_api = { version = "0.1", path = "./libs/pageserver_api/" }
 pageserver_client = { path = "./pageserver/client" }
 pageserver_compaction = { version = "0.1", path = "./pageserver/compaction/" }
 pageserver_page_api = { path = "./pageserver/page_api" }
+client_cache = { path = "./pageserver/communicator_pools/client_cache" }
+request_tracker = { path = "./pageserver/communicator_pools/request_tracker" }
 postgres_backend = { version = "0.1", path = "./libs/postgres_backend/" }
 postgres_connection = { version = "0.1", path = "./libs/postgres_connection/" }
 postgres_ffi = { version = "0.1", path = "./libs/postgres_ffi/" }
--- a/1
+++ b/1
@@ -45,7 +45,6 @@ COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh
 ENV BUILD_TYPE=release
 RUN set -e \
    && mold -run make -j $(nproc) -s neon-pg-ext \
-    && rm -rf pg_install/build \
    && tar -C pg_install -czf /home/nonroot/postgres_install.tar.gz .

 # Prepare cargo-chef recipe
--- a/97
+++ b/97
@@ -1,8 +1,12 @@
 ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))

-# Where to install Postgres, default is ./pg_install, maybe useful for package managers
+# Where to install Postgres, default is ./pg_install, maybe useful for package
+# managers.
 POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install/

+# All intermediate build artifacts are stored here.
+BUILD_DIR := build
+
 ICU_PREFIX_DIR := /usr/local/icu

 #
@@ -104,21 +108,20 @@ cargo-target-dir:
 # Some rules are duplicated for Postgres v14 and 15. We may want to refactor
 # to avoid the duplication in the future, but it's tolerable for now.
 #
-$(POSTGRES_INSTALL_DIR)/build/%/config.status:
-
-	mkdir -p $(POSTGRES_INSTALL_DIR)
-	test -e $(POSTGRES_INSTALL_DIR)/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > $(POSTGRES_INSTALL_DIR)/CACHEDIR.TAG
+$(BUILD_DIR)/%/config.status:
+	mkdir -p $(BUILD_DIR)
+	test -e $(BUILD_DIR)/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > $(BUILD_DIR)/CACHEDIR.TAG

 	+@echo "Configuring Postgres $* build"
 	@test -s $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure || { \
 		echo "\nPostgres submodule not found in $(ROOT_PROJECT_DIR)/vendor/postgres-$*/, execute "; \
 		echo "'git submodule update --init --recursive --depth 2 --progress .' in project root.\n"; \
 		exit 1; }
-	mkdir -p $(POSTGRES_INSTALL_DIR)/build/$*
+	mkdir -p $(BUILD_DIR)/$*

 	VERSION=$*; \
 	EXTRA_VERSION=$$(cd $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION && git rev-parse HEAD); \
-	(cd $(POSTGRES_INSTALL_DIR)/build/$$VERSION && \
+	(cd $(BUILD_DIR)/$$VERSION && \
 	env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION/configure \
 		CFLAGS='$(PG_CFLAGS)' LDFLAGS='$(PG_LDFLAGS)' \
 		$(PG_CONFIGURE_OPTS) --with-extra-version=" ($$EXTRA_VERSION)" \
@@ -130,74 +133,54 @@ $(POSTGRES_INSTALL_DIR)/build/%/config.status:
 # the "build-all-versions" entry points) where direct mention of PostgreSQL
 # versions is used.
 .PHONY: postgres-configure-v17
-postgres-configure-v17: $(POSTGRES_INSTALL_DIR)/build/v17/config.status
+postgres-configure-v17: $(BUILD_DIR)/v17/config.status
 .PHONY: postgres-configure-v16
-postgres-configure-v16: $(POSTGRES_INSTALL_DIR)/build/v16/config.status
+postgres-configure-v16: $(BUILD_DIR)/v16/config.status
 .PHONY: postgres-configure-v15
-postgres-configure-v15: $(POSTGRES_INSTALL_DIR)/build/v15/config.status
+postgres-configure-v15: $(BUILD_DIR)/v15/config.status
 .PHONY: postgres-configure-v14
-postgres-configure-v14: $(POSTGRES_INSTALL_DIR)/build/v14/config.status
+postgres-configure-v14: $(BUILD_DIR)/v14/config.status

 # Install the PostgreSQL header files into $(POSTGRES_INSTALL_DIR)/<version>/include
 .PHONY: postgres-headers-%
 postgres-headers-%: postgres-configure-%
 	+@echo "Installing PostgreSQL $* headers"
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/include MAKELEVEL=0 install
+	$(MAKE) -C $(BUILD_DIR)/$*/src/include MAKELEVEL=0 install

 # Compile and install PostgreSQL
 .PHONY: postgres-%
 postgres-%: postgres-configure-% \
 		  postgres-headers-% # to prevent `make install` conflicts with neon's `postgres-headers`
 	+@echo "Compiling PostgreSQL $*"
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$* MAKELEVEL=0 install
+	$(MAKE) -C $(BUILD_DIR)/$* MAKELEVEL=0 install
 	+@echo "Compiling libpq $*"
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/interfaces/libpq install
+	$(MAKE) -C $(BUILD_DIR)/$*/src/interfaces/libpq install
 	+@echo "Compiling pg_prewarm $*"
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_prewarm install
+	$(MAKE) -C $(BUILD_DIR)/$*/contrib/pg_prewarm install
 	+@echo "Compiling pg_buffercache $*"
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_buffercache install
+	$(MAKE) -C $(BUILD_DIR)/$*/contrib/pg_buffercache install
 	+@echo "Compiling pg_visibility $*"
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_visibility install
+	$(MAKE) -C $(BUILD_DIR)/$*/contrib/pg_visibility install
 	+@echo "Compiling pageinspect $*"
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect install
+	$(MAKE) -C $(BUILD_DIR)/$*/contrib/pageinspect install
 	+@echo "Compiling pg_trgm $*"
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_trgm install
+	$(MAKE) -C $(BUILD_DIR)/$*/contrib/pg_trgm install
 	+@echo "Compiling amcheck $*"
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/amcheck install
+	$(MAKE) -C $(BUILD_DIR)/$*/contrib/amcheck install
 	+@echo "Compiling test_decoding $*"
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/test_decoding install
+	$(MAKE) -C $(BUILD_DIR)/$*/contrib/test_decoding install

 .PHONY: postgres-check-%
 postgres-check-%: postgres-%
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$* MAKELEVEL=0 check
+	$(MAKE) -C $(BUILD_DIR)/$* MAKELEVEL=0 check

 .PHONY: neon-pg-ext-%
 neon-pg-ext-%: postgres-%
-	+@echo "Compiling neon $*"
-	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-$*
+	+@echo "Compiling neon-specific Postgres extensions for $*"
+	mkdir -p $(BUILD_DIR)/pgxn-$*
 	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
-		-C $(POSTGRES_INSTALL_DIR)/build/neon-$* \
-		-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install
-	+@echo "Compiling neon_walredo $*"
-	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$*
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
-		-C $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$* \
-		-f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile install
-	+@echo "Compiling neon_rmgr $*"
-	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$*
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
-		-C $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$* \
-		-f $(ROOT_PROJECT_DIR)/pgxn/neon_rmgr/Makefile install
-	+@echo "Compiling neon_test_utils $*"
-	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$*
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
-		-C $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$* \
-		-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile install
-	+@echo "Compiling neon_utils $*"
-	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-utils-$*
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
-		-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
-		-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile install
+		-C $(BUILD_DIR)/pgxn-$*\
+		-f $(ROOT_PROJECT_DIR)/pgxn/Makefile  install

 # Build walproposer as a static library. walproposer source code is located
 # in the pgxn/neon directory.
@@ -211,15 +194,15 @@ neon-pg-ext-%: postgres-%
 .PHONY: walproposer-lib
 walproposer-lib: neon-pg-ext-v17
 	+@echo "Compiling walproposer-lib"
-	mkdir -p $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
+	mkdir -p $(BUILD_DIR)/walproposer-lib
 	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config COPT='$(COPT)' \
-		-C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \
+		-C $(BUILD_DIR)/walproposer-lib \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile walproposer-lib
-	cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgport.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
-	cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgcommon.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
-	$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgport.a \
+	cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgport.a $(BUILD_DIR)/walproposer-lib
+	cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgcommon.a $(BUILD_DIR)/walproposer-lib
+	$(AR) d $(BUILD_DIR)/walproposer-lib/libpgport.a \
 		pg_strong_random.o
-	$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgcommon.a \
+	$(AR) d $(BUILD_DIR)/walproposer-lib/libpgcommon.a \
 		checksum_helper.o \
 		cryptohash_openssl.o \
 		hmac_openssl.o \
@@ -227,7 +210,7 @@ walproposer-lib: neon-pg-ext-v17
 		parse_manifest.o \
 		scram-common.o
 ifeq ($(UNAME_S),Linux)
-	$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgcommon.a \
+	$(AR) d $(BUILD_DIR)/walproposer-lib/libpgcommon.a \
 		pg_crc32c.o
 endif

@@ -272,7 +255,7 @@ fmt:

 postgres-%-pg-bsd-indent: postgres-%
 	+@echo "Compiling pg_bsd_indent"
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/tools/pg_bsd_indent/
+	$(MAKE) -C $(BUILD_DIR)/$*/src/tools/pg_bsd_indent/

 # Create typedef list for the core. Note that generally it should be combined with
 # buildfarm one to cover platform specific stuff.
@@ -291,7 +274,7 @@ postgres-%-pgindent: postgres-%-pg-bsd-indent postgres-%-typedefs.list
 	cat $(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/typedefs.list |\
 		cat - postgres-$*-typedefs.list | sort | uniq > postgres-$*-typedefs-full.list
 	+@echo note: you might want to run it on selected files/dirs instead.
-	INDENT=$(POSTGRES_INSTALL_DIR)/build/$*/src/tools/pg_bsd_indent/pg_bsd_indent \
+	INDENT=$(BUILD_DIR)/$*/src/tools/pg_bsd_indent/pg_bsd_indent \
 		$(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/pgindent --typedefs postgres-$*-typedefs-full.list \
 		$(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/ \
 		--excludes $(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/exclude_file_patterns
@@ -302,9 +285,9 @@ postgres-%-pgindent: postgres-%-pg-bsd-indent postgres-%-typedefs.list
 neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
 	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config COPT='$(COPT)' \
 		FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \
-		INDENT=$(POSTGRES_INSTALL_DIR)/build/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
+		INDENT=$(BUILD_DIR)/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
 		PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \
-		-C $(POSTGRES_INSTALL_DIR)/build/neon-v17 \
+		-C $(BUILD_DIR)/neon-v17 \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile pgindent


--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -1634,18 +1634,7 @@ FROM pg-build AS neon-ext-build
 ARG PG_VERSION

 COPY pgxn/ pgxn/
-RUN make -j $(getconf _NPROCESSORS_ONLN) \
-        -C pgxn/neon \
-        -s install && \
-    make -j $(getconf _NPROCESSORS_ONLN) \
-        -C pgxn/neon_utils \
-        -s install && \
-    make -j $(getconf _NPROCESSORS_ONLN) \
-        -C pgxn/neon_test_utils \
-        -s install && \
-    make -j $(getconf _NPROCESSORS_ONLN) \
-        -C pgxn/neon_rmgr \
-        -s install
+RUN make -j $(getconf _NPROCESSORS_ONLN) -C pgxn -s install-compute

 #########################################################################################
 #
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -408,7 +408,9 @@ impl ComputeNode {
        // N.B. keep it in sync with `ZENITH_OPTIONS` in `get_maintenance_client()`.
        const EXTRA_OPTIONS: &str = "-c role=cloud_admin -c default_transaction_read_only=off -c search_path=public -c statement_timeout=0";
        let options = match conn_conf.get_options() {
-            Some(options) => format!("{} {}", options, EXTRA_OPTIONS),
+            // Allow the control plane to override any options set by the
+            // compute
+            Some(options) => format!("{} {}", EXTRA_OPTIONS, options),
            None => EXTRA_OPTIONS.to_string(),
        };
        conn_conf.options(&options);
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -209,6 +209,8 @@ pub struct NeonStorageControllerConf {
    pub use_https_safekeeper_api: bool,

    pub use_local_compute_notifications: bool,
+
+    pub timeline_safekeeper_count: Option<i64>,
 }

 impl NeonStorageControllerConf {
@@ -239,6 +241,7 @@ impl Default for NeonStorageControllerConf {
            timelines_onto_safekeepers: true,
            use_https_safekeeper_api: false,
            use_local_compute_notifications: true,
+            timeline_safekeeper_count: None,
        }
    }
 }
--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -628,6 +628,10 @@ impl StorageController {
            args.push("--timelines-onto-safekeepers".to_string());
        }

+        if let Some(sk_cnt) = self.config.timeline_safekeeper_count {
+            args.push(format!("--timeline-safekeeper-count={sk_cnt}"));
+        }
+
        println!("Starting storage controller");

        background_process::start_process(
--- a/libs/compute_api/src/requests.rs
+++ b/libs/compute_api/src/requests.rs
@@ -16,6 +16,7 @@ pub static COMPUTE_AUDIENCE: &str = "compute";
 pub enum ComputeClaimsScope {
    /// An admin-scoped token allows access to all of `compute_ctl`'s authorized
    /// facilities.
+    #[serde(rename = "compute_ctl:admin")]
    Admin,
 }

@@ -24,7 +25,7 @@ impl FromStr for ComputeClaimsScope {

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        match s {
-            "admin" => Ok(ComputeClaimsScope::Admin),
+            "compute_ctl:admin" => Ok(ComputeClaimsScope::Admin),
            _ => Err(anyhow::anyhow!("invalid compute claims scope \"{s}\"")),
        }
    }
@@ -80,3 +81,23 @@ pub struct SetRoleGrantsRequest {
    pub privileges: Vec<Privilege>,
    pub role: PgIdent,
 }
+
+#[cfg(test)]
+mod test {
+    use std::str::FromStr;
+
+    use crate::requests::ComputeClaimsScope;
+
+    /// Confirm that whether we parse the scope by string or through serde, the
+    /// same values parse to the same enum variant.
+    #[test]
+    fn compute_request_scopes() {
+        const ADMIN_SCOPE: &str = "compute_ctl:admin";
+
+        let from_serde: ComputeClaimsScope =
+            serde_json::from_str(&format!("\"{ADMIN_SCOPE}\"")).unwrap();
+        let from_str = ComputeClaimsScope::from_str(ADMIN_SCOPE).unwrap();
+
+        assert_eq!(from_serde, from_str);
+    }
+}
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -816,7 +816,7 @@ pub mod tenant_conf_defaults {
    // By default ingest enough WAL for two new L0 layers before checking if new image
    // image layers should be created.
    pub const DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD: u8 = 2;
-    pub const DEFAULT_GC_COMPACTION_ENABLED: bool = false;
+    pub const DEFAULT_GC_COMPACTION_ENABLED: bool = true;
    pub const DEFAULT_GC_COMPACTION_VERIFICATION: bool = true;
    pub const DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB: u64 = 5 * 1024 * 1024; // 5GB
    pub const DEFAULT_GC_COMPACTION_RATIO_PERCENT: u64 = 100;
--- a/libs/pageserver_api/src/upcall_api.rs
+++ b/libs/pageserver_api/src/upcall_api.rs
@@ -23,22 +23,12 @@ pub struct ReAttachRequest {
    pub register: Option<NodeRegisterRequest>,
 }

-fn default_mode() -> LocationConfigMode {
-    LocationConfigMode::AttachedSingle
-}
-
 #[derive(Serialize, Deserialize, Debug)]
 pub struct ReAttachResponseTenant {
    pub id: TenantShardId,
    /// Mandatory if LocationConfigMode is None or set to an Attached* mode
    pub r#gen: Option<u32>,
-
-    /// Default value only for backward compat: this field should be set
-    #[serde(default = "default_mode")]
    pub mode: LocationConfigMode,
-
-    // Default value only for backward compat: this field should be set
-    #[serde(default = "ShardStripeSize::default")]
    pub stripe_size: ShardStripeSize,
 }
 #[derive(Serialize, Deserialize)]
--- a/libs/proxy/tokio-postgres2/src/cancel_query.rs
+++ b/libs/proxy/tokio-postgres2/src/cancel_query.rs
@@ -1,5 +1,3 @@
-use std::io;
-
 use tokio::net::TcpStream;

 use crate::client::SocketConfig;
@@ -8,7 +6,7 @@ use crate::tls::MakeTlsConnect;
 use crate::{Error, cancel_query_raw, connect_socket};

 pub(crate) async fn cancel_query<T>(
-    config: Option<SocketConfig>,
+    config: SocketConfig,
    ssl_mode: SslMode,
    tls: T,
    process_id: i32,
@@ -17,16 +15,6 @@ pub(crate) async fn cancel_query<T>(
 where
    T: MakeTlsConnect<TcpStream>,
 {
-    let config = match config {
-        Some(config) => config,
-        None => {
-            return Err(Error::connect(io::Error::new(
-                io::ErrorKind::InvalidInput,
-                "unknown host",
-            )));
-        }
-    };
-
    let hostname = match &config.host {
        Host::Tcp(host) => &**host,
    };
--- a/libs/proxy/tokio-postgres2/src/cancel_token.rs
+++ b/libs/proxy/tokio-postgres2/src/cancel_token.rs
@@ -7,11 +7,16 @@ use crate::config::SslMode;
 use crate::tls::{MakeTlsConnect, TlsConnect};
 use crate::{Error, cancel_query, cancel_query_raw};

-/// The capability to request cancellation of in-progress queries on a
-/// connection.
-#[derive(Clone, Serialize, Deserialize)]
+/// A cancellation token that allows easy cancellation of a query.
+#[derive(Clone)]
 pub struct CancelToken {
-    pub socket_config: Option<SocketConfig>,
+    pub socket_config: SocketConfig,
+    pub raw: RawCancelToken,
+}
+
+/// A raw cancellation token that allows cancellation of a query, given a fresh connection to postgres.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct RawCancelToken {
    pub ssl_mode: SslMode,
    pub process_id: i32,
    pub secret_key: i32,
@@ -36,14 +41,16 @@ impl CancelToken {
    {
        cancel_query::cancel_query(
            self.socket_config.clone(),
-            self.ssl_mode,
+            self.raw.ssl_mode,
            tls,
-            self.process_id,
-            self.secret_key,
+            self.raw.process_id,
+            self.raw.secret_key,
        )
        .await
    }
+}

+impl RawCancelToken {
    /// Like `cancel_query`, but uses a stream which is already connected to the server rather than opening a new
    /// connection itself.
    pub async fn cancel_query_raw<S, T>(&self, stream: S, tls: T) -> Result<(), Error>
--- a/libs/proxy/tokio-postgres2/src/client.rs
+++ b/libs/proxy/tokio-postgres2/src/client.rs
@@ -12,6 +12,7 @@ use postgres_protocol2::message::frontend;
 use serde::{Deserialize, Serialize};
 use tokio::sync::mpsc;

+use crate::cancel_token::RawCancelToken;
 use crate::codec::{BackendMessages, FrontendMessage};
 use crate::config::{Host, SslMode};
 use crate::query::RowStream;
@@ -331,10 +332,12 @@ impl Client {
    /// connection associated with this client.
    pub fn cancel_token(&self) -> CancelToken {
        CancelToken {
-            socket_config: Some(self.socket_config.clone()),
-            ssl_mode: self.ssl_mode,
-            process_id: self.process_id,
-            secret_key: self.secret_key,
+            socket_config: self.socket_config.clone(),
+            raw: RawCancelToken {
+                ssl_mode: self.ssl_mode,
+                process_id: self.process_id,
+                secret_key: self.secret_key,
+            },
        }
    }

--- a/libs/proxy/tokio-postgres2/src/lib.rs
+++ b/libs/proxy/tokio-postgres2/src/lib.rs
@@ -3,7 +3,7 @@

 use postgres_protocol2::message::backend::ReadyForQueryBody;

-pub use crate::cancel_token::CancelToken;
+pub use crate::cancel_token::{CancelToken, RawCancelToken};
 pub use crate::client::{Client, SocketConfig};
 pub use crate::config::Config;
 pub use crate::connect_raw::RawConnection;
--- a/libs/walproposer/build.rs
+++ b/libs/walproposer/build.rs
@@ -13,22 +13,24 @@ fn main() -> anyhow::Result<()> {
    // Tell cargo to invalidate the built crate whenever the wrapper changes
    println!("cargo:rerun-if-changed=bindgen_deps.h");

+    let root_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../..");
+
    // Finding the location of built libraries and Postgres C headers:
    // - if POSTGRES_INSTALL_DIR is set look into it, otherwise look into `<project_root>/pg_install`
    // - if there's a `bin/pg_config` file use it for getting include server, otherwise use `<project_root>/pg_install/{PG_MAJORVERSION}/include/postgresql/server`
    let pg_install_dir = if let Some(postgres_install_dir) = env::var_os("POSTGRES_INSTALL_DIR") {
        postgres_install_dir.into()
    } else {
-        PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../pg_install")
+        root_path.join("pg_install")
    };

    let pg_install_abs = std::fs::canonicalize(pg_install_dir)?;
-    let walproposer_lib_dir = pg_install_abs.join("build/walproposer-lib");
+    let walproposer_lib_dir = root_path.join("build/walproposer-lib");
    let walproposer_lib_search_str = walproposer_lib_dir
        .to_str()
        .ok_or(anyhow!("Bad non-UTF path"))?;

-    let pgxn_neon = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../pgxn/neon");
+    let pgxn_neon = root_path.join("pgxn/neon");
    let pgxn_neon = std::fs::canonicalize(pgxn_neon)?;
    let pgxn_neon = pgxn_neon.to_str().ok_or(anyhow!("Bad non-UTF path"))?;

--- a/pageserver/communicator_pools/client_cache/Cargo.toml
+++ b/pageserver/communicator_pools/client_cache/Cargo.toml
@@ -0,0 +1,21 @@
+[package]
+name = "client_cache"
+version = "0.1.0"
+edition.workspace = true
+license.workspace = true
+
+[dependencies]
+async-trait.workspace = true
+bytes.workspace = true
+futures.workspace = true
+hyper-util.workspace = true
+http.workspace = true
+priority-queue = "2.3.1"
+rand.workspace = true
+tonic.workspace = true
+tokio.workspace = true
+tokio-util.workspace = true
+tower.workspace = true
+uuid.workspace = true
+workspace_hack.workspace = true
+
--- a/pageserver/communicator_pools/client_cache/src/lib.rs
+++ b/pageserver/communicator_pools/client_cache/src/lib.rs
@@ -0,0 +1,105 @@
+use async_trait::async_trait;
+use priority_queue::PriorityQueue;
+use std::{
+    collections::HashMap,
+    sync::Arc,
+    time::{Duration, Instant},
+};
+use tokio::sync::{Mutex, OwnedSemaphorePermit, Semaphore};
+
+#[async_trait]
+pub trait PooledClientFactory<T>: Send + Sync + 'static {
+    /// Create a new pooled item.
+    async fn create(
+        &self,
+        connect_timeout: Duration,
+    ) -> Result<Result<T, tonic::Status>, tokio::time::error::Elapsed>;
+}
+
+/// A pooled gRPC client with capacity tracking and error handling.
+#[allow(dead_code)]
+pub struct ClientCache<T> {
+    inner: Mutex<Inner<T>>,
+
+    fact: Arc<dyn PooledClientFactory<T> + Send + Sync>,
+
+    connect_timeout: Duration,
+    connect_backoff: Duration,
+
+    /// The maximum number of consumers that can use a single connection.
+    max_consumers: usize,
+
+    /// The number of consecutive errors before a connection is removed from the pool.
+    error_threshold: usize,
+
+    /// The maximum duration a connection can be idle before being removed.
+    max_idle_duration: Duration,
+    max_total_connections: usize,
+
+    client_semaphore: Arc<Semaphore>,
+}
+
+#[allow(dead_code)]
+struct Inner<T> {
+    entries: HashMap<uuid::Uuid, CacheEntry<T>>,
+    pq: PriorityQueue<uuid::Uuid, usize>,
+    // This is updated when a connection is dropped, or we fail
+    // to create a new connection.
+    last_connect_failure: Option<Instant>,
+    waiters: usize,
+    in_progress: usize,
+}
+
+#[allow(dead_code)]
+struct CacheEntry<T> {
+    client: T,
+    active_consumers: usize,
+    consecutive_errors: usize,
+    last_used: Instant,
+}
+
+/// A client borrowed from the pool.
+#[allow(dead_code)]
+pub struct PooledClient<T> {
+    pub client: T,
+    pool: Arc<ClientCache<T>>,
+    is_ok: bool,
+    id: uuid::Uuid,
+    permit: OwnedSemaphorePermit,
+}
+
+impl<T: Clone + Send + 'static> ClientCache<T> {
+    pub fn new(
+        fact: Arc<dyn PooledClientFactory<T> + Send + Sync>,
+        connect_timeout: Duration,
+        connect_backoff: Duration,
+        max_consumers: usize,
+        error_threshold: usize,
+        max_idle_duration: Duration,
+        max_total_connections: usize,
+    ) -> Arc<Self> {
+        Arc::new(Self {
+            inner: Mutex::new(Inner::<T> {
+                entries: HashMap::new(),
+                pq: PriorityQueue::new(),
+                last_connect_failure: None,
+                waiters: 0,
+                in_progress: 0,
+            }),
+            fact: Arc::clone(&fact),
+            connect_timeout,
+            connect_backoff,
+            max_consumers,
+            error_threshold,
+            max_idle_duration,
+            max_total_connections,
+            client_semaphore: Arc::new(Semaphore::new(0)),
+        })
+    }
+}
+
+impl<T: Clone + Send + 'static> PooledClient<T> {
+    pub fn client(&self) -> T {
+        self.client.clone()
+    }
+}
--- a/pageserver/communicator_pools/request_tracker/Cargo.toml
+++ b/pageserver/communicator_pools/request_tracker/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "request_tracker"
+version = "0.1.0"
+edition.workspace = true
+license.workspace = true
+
+[dependencies]
+workspace_hack.workspace = true
--- a/pageserver/communicator_pools/request_tracker/src/lib.rs
+++ b/pageserver/communicator_pools/request_tracker/src/lib.rs
@@ -0,0 +1,15 @@
+// Temporary placeholder until the request tracker is implemented
+pub fn add(left: u64, right: u64) -> u64 {
+    left + right
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn it_works() {
+        let result = add(2, 2);
+        assert_eq!(result, 4);
+    }
+}
--- a/pageserver/page_api/Cargo.toml
+++ b/pageserver/page_api/Cargo.toml
@@ -11,6 +11,8 @@ futures.workspace = true
 pageserver_api.workspace = true
 postgres_ffi.workspace = true
 prost.workspace = true
+strum.workspace = true
+strum_macros.workspace = true
 thiserror.workspace = true
 tokio.workspace = true
 tonic.workspace = true
--- a/pageserver/page_api/src/model.rs
+++ b/pageserver/page_api/src/model.rs
@@ -459,7 +459,7 @@ impl GetPageResponse {
 /// These are effectively equivalent to gRPC statuses. However, we use a bidirectional stream
 /// (potentially shared by many backends), and a gRPC status response would terminate the stream so
 /// we send GetPageResponse messages with these codes instead.
-#[derive(Clone, Copy, Debug)]
+#[derive(Clone, Copy, Debug, PartialEq, strum_macros::Display)]
 pub enum GetPageStatusCode {
    /// Unknown status. For forwards compatibility: used when an older client version receives a new
    /// status code from a newer server version.
--- a/pageserver/pagebench/Cargo.toml
+++ b/pageserver/pagebench/Cargo.toml
@@ -25,6 +25,7 @@ tokio.workspace = true
 tokio-stream.workspace = true
 tokio-util.workspace = true
 tonic.workspace = true
+url.workspace = true

 pageserver_client.workspace = true
 pageserver_api.workspace = true
--- a/pageserver/pagebench/src/cmd/basebackup.rs
+++ b/pageserver/pagebench/src/cmd/basebackup.rs
@@ -13,7 +13,6 @@ use pageserver_client::mgmt_api::ForceAwaitLogicalSize;
 use pageserver_client::page_service::BasebackupRequest;
 use pageserver_page_api as page_api;
 use rand::prelude::*;
-use reqwest::Url;
 use tokio::io::AsyncRead;
 use tokio::sync::Barrier;
 use tokio::task::JoinSet;
@@ -21,6 +20,7 @@ use tokio_util::compat::{TokioAsyncReadCompatExt as _, TokioAsyncWriteCompatExt
 use tokio_util::io::StreamReader;
 use tonic::async_trait;
 use tracing::{info, instrument};
+use url::Url;
 use utils::id::TenantTimelineId;
 use utils::lsn::Lsn;
 use utils::shard::ShardIndex;
@@ -156,12 +156,16 @@ async fn main_impl(

    let mut work_senders = HashMap::new();
    let mut tasks = Vec::new();
-    let connurl = Url::parse(&args.page_service_connstring)?;
+    let scheme = match Url::parse(&args.page_service_connstring) {
+        Ok(url) => url.scheme().to_lowercase().to_string(),
+        Err(url::ParseError::RelativeUrlWithoutBase) => "postgresql".to_string(),
+        Err(err) => return Err(anyhow!("invalid connstring: {err}")),
+    };
    for &tl in &timelines {
        let (sender, receiver) = tokio::sync::mpsc::channel(1); // TODO: not sure what the implications of this are
        work_senders.insert(tl, sender);

-        let client: Box<dyn Client> = match connurl.scheme() {
+        let client: Box<dyn Client> = match scheme.as_str() {
            "postgresql" | "postgres" => Box::new(
                LibpqClient::new(&args.page_service_connstring, tl, !args.no_compression).await?,
            ),
--- a/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
+++ b/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
@@ -10,33 +10,31 @@ use anyhow::Context;
 use async_trait::async_trait;
 use bytes::Bytes;
 use camino::Utf8PathBuf;
+use futures::{Stream, StreamExt as _};
 use pageserver_api::key::Key;
 use pageserver_api::keyspace::KeySpaceAccum;
 use pageserver_api::pagestream_api::{PagestreamGetPageRequest, PagestreamRequest};
 use pageserver_api::reltag::RelTag;
 use pageserver_api::shard::TenantShardId;
-use pageserver_page_api::proto;
+use pageserver_page_api as page_api;
 use rand::prelude::*;
 use tokio::task::JoinSet;
 use tokio_util::sync::CancellationToken;
 use tracing::info;
+use url::Url;
 use utils::id::TenantTimelineId;
 use utils::lsn::Lsn;
+use utils::shard::ShardIndex;

 use crate::util::tokio_thread_local_stats::AllThreadLocalStats;
 use crate::util::{request_stats, tokio_thread_local_stats};

-#[derive(clap::ValueEnum, Clone, Debug)]
-enum Protocol {
-    Libpq,
-    Grpc,
-}
-
 /// GetPage@LatestLSN, uniformly distributed across the compute-accessible keyspace.
 #[derive(clap::Parser)]
 pub(crate) struct Args {
    #[clap(long, default_value = "http://localhost:9898")]
    mgmt_api_endpoint: String,
+    /// Pageserver connection string. Supports postgresql:// and grpc:// protocols.
    #[clap(long, default_value = "postgres://postgres@localhost:64000")]
    page_service_connstring: String,
    #[clap(long)]
@@ -45,8 +43,9 @@ pub(crate) struct Args {
    num_clients: NonZeroUsize,
    #[clap(long)]
    runtime: Option<humantime::Duration>,
-    #[clap(long, value_enum, default_value = "libpq")]
-    protocol: Protocol,
+    /// If true, enable compression (only for gRPC).
+    #[clap(long)]
+    compression: bool,
    /// Each client sends requests at the given rate.
    ///
    /// If a request takes too long and we should be issuing a new request already,
@@ -325,18 +324,32 @@ async fn main_impl(
                .unwrap();

        Box::pin(async move {
-            let client: Box<dyn Client> = match args.protocol {
-                Protocol::Libpq => Box::new(
-                    LibpqClient::new(args.page_service_connstring.clone(), worker_id.timeline)
-                        .await
-                        .unwrap(),
+            let scheme = match Url::parse(&args.page_service_connstring) {
+                Ok(url) => url.scheme().to_lowercase().to_string(),
+                Err(url::ParseError::RelativeUrlWithoutBase) => "postgresql".to_string(),
+                Err(err) => panic!("invalid connstring: {err}"),
+            };
+            let client: Box<dyn Client> = match scheme.as_str() {
+                "postgresql" | "postgres" => {
+                    assert!(!args.compression, "libpq does not support compression");
+                    Box::new(
+                        LibpqClient::new(&args.page_service_connstring, worker_id.timeline)
+                            .await
+                            .unwrap(),
+                    )
+                }
+
+                "grpc" => Box::new(
+                    GrpcClient::new(
+                        &args.page_service_connstring,
+                        worker_id.timeline,
+                        args.compression,
+                    )
+                    .await
+                    .unwrap(),
                ),

-                Protocol::Grpc => Box::new(
-                    GrpcClient::new(args.page_service_connstring.clone(), worker_id.timeline)
-                        .await
-                        .unwrap(),
-                ),
+                scheme => panic!("unsupported scheme {scheme}"),
            };
            run_worker(args, client, ss, cancel, rps_period, ranges, weights).await
        })
@@ -543,8 +556,8 @@ struct LibpqClient {
 }

 impl LibpqClient {
-    async fn new(connstring: String, ttid: TenantTimelineId) -> anyhow::Result<Self> {
-        let inner = pageserver_client::page_service::Client::new(connstring)
+    async fn new(connstring: &str, ttid: TenantTimelineId) -> anyhow::Result<Self> {
+        let inner = pageserver_client::page_service::Client::new(connstring.to_string())
            .await?
            .pagestream(ttid.tenant_id, ttid.timeline_id)
            .await?;
@@ -600,34 +613,36 @@ impl Client for LibpqClient {
    }
 }

-/// A gRPC client using the raw, no-frills gRPC client.
+/// A gRPC Pageserver client.
 struct GrpcClient {
-    req_tx: tokio::sync::mpsc::Sender<proto::GetPageRequest>,
-    resp_rx: tonic::Streaming<proto::GetPageResponse>,
+    req_tx: tokio::sync::mpsc::Sender<page_api::GetPageRequest>,
+    resp_rx: Pin<Box<dyn Stream<Item = Result<page_api::GetPageResponse, tonic::Status>> + Send>>,
 }

 impl GrpcClient {
-    async fn new(connstring: String, ttid: TenantTimelineId) -> anyhow::Result<Self> {
-        let mut client = pageserver_page_api::proto::PageServiceClient::connect(connstring).await?;
+    async fn new(
+        connstring: &str,
+        ttid: TenantTimelineId,
+        compression: bool,
+    ) -> anyhow::Result<Self> {
+        let mut client = page_api::Client::new(
+            connstring.to_string(),
+            ttid.tenant_id,
+            ttid.timeline_id,
+            ShardIndex::unsharded(),
+            None,
+            compression.then_some(tonic::codec::CompressionEncoding::Zstd),
+        )
+        .await?;

        // The channel has a buffer size of 1, since 0 is not allowed. It does not matter, since the
        // benchmark will control the queue depth (i.e. in-flight requests) anyway, and requests are
        // buffered by Tonic and the OS too.
        let (req_tx, req_rx) = tokio::sync::mpsc::channel(1);
        let req_stream = tokio_stream::wrappers::ReceiverStream::new(req_rx);
-        let mut req = tonic::Request::new(req_stream);
-        let metadata = req.metadata_mut();
-        metadata.insert("neon-tenant-id", ttid.tenant_id.to_string().try_into()?);
-        metadata.insert("neon-timeline-id", ttid.timeline_id.to_string().try_into()?);
-        metadata.insert("neon-shard-id", "0000".try_into()?);
+        let resp_rx = Box::pin(client.get_pages(req_stream).await?);

-        let resp = client.get_pages(req).await?;
-        let resp_stream = resp.into_inner();
-
-        Ok(Self {
-            req_tx,
-            resp_rx: resp_stream,
-        })
+        Ok(Self { req_tx, resp_rx })
    }
 }

@@ -641,27 +656,27 @@ impl Client for GrpcClient {
        rel: RelTag,
        blks: Vec<u32>,
    ) -> anyhow::Result<()> {
-        let req = proto::GetPageRequest {
+        let req = page_api::GetPageRequest {
            request_id: req_id,
-            request_class: proto::GetPageClass::Normal as i32,
-            read_lsn: Some(proto::ReadLsn {
-                request_lsn: req_lsn.0,
-                not_modified_since_lsn: mod_lsn.0,
-            }),
-            rel: Some(rel.into()),
-            block_number: blks,
+            request_class: page_api::GetPageClass::Normal,
+            read_lsn: page_api::ReadLsn {
+                request_lsn: req_lsn,
+                not_modified_since_lsn: Some(mod_lsn),
+            },
+            rel,
+            block_numbers: blks,
        };
        self.req_tx.send(req).await?;
        Ok(())
    }

    async fn recv_get_page(&mut self) -> anyhow::Result<(u64, Vec<Bytes>)> {
-        let resp = self.resp_rx.message().await?.unwrap();
+        let resp = self.resp_rx.next().await.unwrap().unwrap();
        anyhow::ensure!(
-            resp.status_code == proto::GetPageStatusCode::Ok as i32,
+            resp.status_code == page_api::GetPageStatusCode::Ok,
            "unexpected status code: {}",
-            resp.status_code
+            resp.status_code,
        );
-        Ok((resp.request_id, resp.page_image))
+        Ok((resp.request_id, resp.page_images))
    }
 }
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -61,8 +61,10 @@ pub(crate) struct LocationConf {
    /// The detailed shard identity.  This structure is already scoped within
    /// a TenantShardId, but we need the full ShardIdentity to enable calculating
    /// key->shard mappings.
-    // TODO(vlad): Remove this default once all configs have a shard identity on disk.
-    #[serde(default = "ShardIdentity::unsharded")]
+    ///
+    /// NB: we store this even for unsharded tenants, so that we agree with storcon on the intended
+    /// stripe size. Otherwise, a split request that does not specify a stripe size may use a
+    /// different default than storcon, which can lead to incorrect stripe sizes and corruption.
    pub(crate) shard: ShardIdentity,

    /// The pan-cluster tenant configuration, the same on all locations
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -6543,7 +6543,7 @@ impl Timeline {

        debug!("retain_lsns: {:?}", retain_lsns);

-        let mut layers_to_remove = Vec::new();
+        let max_retain_lsn = retain_lsns.iter().max();

        // Scan all layers in the timeline (remote or on-disk).
        //
@@ -6553,108 +6553,110 @@ impl Timeline {
        // 3. it doesn't need to be retained for 'retain_lsns';
        // 4. it does not need to be kept for LSNs holding valid leases.
        // 5. newer on-disk image layers cover the layer's whole key range
-        //
-        // TODO holding a write lock is too agressive and avoidable
-        let mut guard = self
-            .layers
-            .write(LayerManagerLockHolder::GarbageCollection)
-            .await;
-        let layers = guard.layer_map()?;
-        'outer: for l in layers.iter_historic_layers() {
-            result.layers_total += 1;
+        let layers_to_remove = {
+            let mut layers_to_remove = Vec::new();

-            // 1. Is it newer than GC horizon cutoff point?
-            if l.get_lsn_range().end > space_cutoff {
-                info!(
-                    "keeping {} because it's newer than space_cutoff {}",
-                    l.layer_name(),
-                    space_cutoff,
-                );
-                result.layers_needed_by_cutoff += 1;
-                continue 'outer;
-            }
+            let guard = self
+                .layers
+                .read(LayerManagerLockHolder::GarbageCollection)
+                .await;
+            let layers = guard.layer_map()?;
+            'outer: for l in layers.iter_historic_layers() {
+                result.layers_total += 1;

-            // 2. It is newer than PiTR cutoff point?
-            if l.get_lsn_range().end > time_cutoff {
-                info!(
-                    "keeping {} because it's newer than time_cutoff {}",
-                    l.layer_name(),
-                    time_cutoff,
-                );
-                result.layers_needed_by_pitr += 1;
-                continue 'outer;
-            }
-
-            // 3. Is it needed by a child branch?
-            // NOTE With that we would keep data that
-            // might be referenced by child branches forever.
-            // We can track this in child timeline GC and delete parent layers when
-            // they are no longer needed. This might be complicated with long inheritance chains.
-            //
-            // TODO Vec is not a great choice for `retain_lsns`
-            for retain_lsn in &retain_lsns {
-                // start_lsn is inclusive
-                if &l.get_lsn_range().start <= retain_lsn {
-                    info!(
-                        "keeping {} because it's still might be referenced by child branch forked at {} is_dropped: xx is_incremental: {}",
+                // 1. Is it newer than GC horizon cutoff point?
+                if l.get_lsn_range().end > space_cutoff {
+                    debug!(
+                        "keeping {} because it's newer than space_cutoff {}",
                        l.layer_name(),
-                        retain_lsn,
-                        l.is_incremental(),
+                        space_cutoff,
                    );
-                    result.layers_needed_by_branches += 1;
+                    result.layers_needed_by_cutoff += 1;
                    continue 'outer;
                }
-            }

-            // 4. Is there a valid lease that requires us to keep this layer?
-            if let Some(lsn) = &max_lsn_with_valid_lease {
-                // keep if layer start <= any of the lease
-                if &l.get_lsn_range().start <= lsn {
-                    info!(
-                        "keeping {} because there is a valid lease preventing GC at {}",
+                // 2. It is newer than PiTR cutoff point?
+                if l.get_lsn_range().end > time_cutoff {
+                    debug!(
+                        "keeping {} because it's newer than time_cutoff {}",
                        l.layer_name(),
-                        lsn,
+                        time_cutoff,
                    );
-                    result.layers_needed_by_leases += 1;
+                    result.layers_needed_by_pitr += 1;
                    continue 'outer;
                }
+
+                // 3. Is it needed by a child branch?
+                // NOTE With that we would keep data that
+                // might be referenced by child branches forever.
+                // We can track this in child timeline GC and delete parent layers when
+                // they are no longer needed. This might be complicated with long inheritance chains.
+                if let Some(retain_lsn) = max_retain_lsn {
+                    // start_lsn is inclusive
+                    if &l.get_lsn_range().start <= retain_lsn {
+                        debug!(
+                            "keeping {} because it's still might be referenced by child branch forked at {} is_dropped: xx is_incremental: {}",
+                            l.layer_name(),
+                            retain_lsn,
+                            l.is_incremental(),
+                        );
+                        result.layers_needed_by_branches += 1;
+                        continue 'outer;
+                    }
+                }
+
+                // 4. Is there a valid lease that requires us to keep this layer?
+                if let Some(lsn) = &max_lsn_with_valid_lease {
+                    // keep if layer start <= any of the lease
+                    if &l.get_lsn_range().start <= lsn {
+                        debug!(
+                            "keeping {} because there is a valid lease preventing GC at {}",
+                            l.layer_name(),
+                            lsn,
+                        );
+                        result.layers_needed_by_leases += 1;
+                        continue 'outer;
+                    }
+                }
+
+                // 5. Is there a later on-disk layer for this relation?
+                //
+                // The end-LSN is exclusive, while disk_consistent_lsn is
+                // inclusive. For example, if disk_consistent_lsn is 100, it is
+                // OK for a delta layer to have end LSN 101, but if the end LSN
+                // is 102, then it might not have been fully flushed to disk
+                // before crash.
+                //
+                // For example, imagine that the following layers exist:
+                //
+                // 1000      - image (A)
+                // 1000-2000 - delta (B)
+                // 2000      - image (C)
+                // 2000-3000 - delta (D)
+                // 3000      - image (E)
+                //
+                // If GC horizon is at 2500, we can remove layers A and B, but
+                // we cannot remove C, even though it's older than 2500, because
+                // the delta layer 2000-3000 depends on it.
+                if !layers
+                    .image_layer_exists(&l.get_key_range(), &(l.get_lsn_range().end..new_gc_cutoff))
+                {
+                    debug!("keeping {} because it is the latest layer", l.layer_name());
+                    result.layers_not_updated += 1;
+                    continue 'outer;
+                }
+
+                // We didn't find any reason to keep this file, so remove it.
+                info!(
+                    "garbage collecting {} is_dropped: xx is_incremental: {}",
+                    l.layer_name(),
+                    l.is_incremental(),
+                );
+                layers_to_remove.push(l);
            }

-            // 5. Is there a later on-disk layer for this relation?
-            //
-            // The end-LSN is exclusive, while disk_consistent_lsn is
-            // inclusive. For example, if disk_consistent_lsn is 100, it is
-            // OK for a delta layer to have end LSN 101, but if the end LSN
-            // is 102, then it might not have been fully flushed to disk
-            // before crash.
-            //
-            // For example, imagine that the following layers exist:
-            //
-            // 1000      - image (A)
-            // 1000-2000 - delta (B)
-            // 2000      - image (C)
-            // 2000-3000 - delta (D)
-            // 3000      - image (E)
-            //
-            // If GC horizon is at 2500, we can remove layers A and B, but
-            // we cannot remove C, even though it's older than 2500, because
-            // the delta layer 2000-3000 depends on it.
-            if !layers
-                .image_layer_exists(&l.get_key_range(), &(l.get_lsn_range().end..new_gc_cutoff))
-            {
-                info!("keeping {} because it is the latest layer", l.layer_name());
-                result.layers_not_updated += 1;
-                continue 'outer;
-            }
-
-            // We didn't find any reason to keep this file, so remove it.
-            info!(
-                "garbage collecting {} is_dropped: xx is_incremental: {}",
-                l.layer_name(),
-                l.is_incremental(),
-            );
-            layers_to_remove.push(l);
-        }
+            layers_to_remove
+        };

        if !layers_to_remove.is_empty() {
            // Persist the new GC cutoff value before we actually remove anything.
@@ -6670,15 +6672,19 @@ impl Timeline {
                    }
                })?;

+            let mut guard = self
+                .layers
+                .write(LayerManagerLockHolder::GarbageCollection)
+                .await;
+
            let gc_layers = layers_to_remove
                .iter()
-                .map(|x| guard.get_from_desc(x))
+                .flat_map(|desc| guard.try_get_from_key(&desc.key()).cloned())
                .collect::<Vec<Layer>>();

            result.layers_removed = gc_layers.len() as u64;

            self.remote_client.schedule_gc_update(&gc_layers)?;
-
            guard.open_mut()?.finish_gc_timeline(&gc_layers);

            #[cfg(feature = "testing")]
--- a/pgxn/Makefile
+++ b/pgxn/Makefile
@@ -0,0 +1,28 @@
+# This makefile assumes that 'pg_config' is in the path, or is passed in the
+# PG_CONFIG variable.
+#
+# This is used in two different ways:
+#
+# 1. The main makefile calls this, when you invoke the `make neon-pg-ext-%`
+#    target. It passes PG_CONFIG pointing to pg_install/%/bin/pg_config.
+#    This is a VPATH build; the current directory is build/pgxn-%, and
+#    the path to the Makefile is passed with the -f argument.
+#
+# 2. compute-node.Dockerfile invokes this to build the compute extensions
+#    for the specific Postgres version. It relies on pg_config already
+#    being in $(PATH).
+
+srcdir = $(dir $(firstword $(MAKEFILE_LIST)))
+
+PG_CONFIG = pg_config
+
+subdirs = neon neon_rmgr neon_walredo neon_utils neon_test_utils
+
+.PHONY: install install-compute install-storage $(subdirs)
+install: $(subdirs)
+install-compute: neon neon_utils neon_test_utils neon_rmgr
+install-storage: neon_rmgr neon_walredo
+
+$(subdirs): %:
+	mkdir -p $*
+	$(MAKE) PG_CONFIG=$(PG_CONFIG) -C $* -f $(abspath $(srcdir)/$@/Makefile) install
--- a/proxy/src/batch.rs
+++ b/proxy/src/batch.rs
@@ -0,0 +1,146 @@
+//! Batch processing system based on intrusive linked lists.
+//!
+//! Enqueuing a batch job requires no allocations, with
+//! direct support for cancelling jobs early.
+use std::collections::BTreeMap;
+use std::pin::pin;
+use std::sync::Mutex;
+
+use futures::future::Either;
+use scopeguard::ScopeGuard;
+use tokio::sync::oneshot::error::TryRecvError;
+
+use crate::ext::LockExt;
+
+pub trait QueueProcessing: Send + 'static {
+    type Req: Send + 'static;
+    type Res: Send;
+
+    /// Get the desired batch size.
+    fn batch_size(&self, queue_size: usize) -> usize;
+
+    /// This applies a full batch of events.
+    /// Must respond with a full batch of replies.
+    ///
+    /// If this apply can error, it's expected that errors be forwarded to each Self::Res.
+    ///
+    /// Batching does not need to happen atomically.
+    fn apply(&mut self, req: Vec<Self::Req>) -> impl Future<Output = Vec<Self::Res>> + Send;
+}
+
+pub struct BatchQueue<P: QueueProcessing> {
+    processor: tokio::sync::Mutex<P>,
+    inner: Mutex<BatchQueueInner<P>>,
+}
+
+struct BatchJob<P: QueueProcessing> {
+    req: P::Req,
+    res: tokio::sync::oneshot::Sender<P::Res>,
+}
+
+impl<P: QueueProcessing> BatchQueue<P> {
+    pub fn new(p: P) -> Self {
+        Self {
+            processor: tokio::sync::Mutex::new(p),
+            inner: Mutex::new(BatchQueueInner {
+                version: 0,
+                queue: BTreeMap::new(),
+            }),
+        }
+    }
+
+    pub async fn call(&self, req: P::Req) -> P::Res {
+        let (id, mut rx) = self.inner.lock_propagate_poison().register_job(req);
+        let guard = scopeguard::guard(id, move |id| {
+            let mut inner = self.inner.lock_propagate_poison();
+            if inner.queue.remove(&id).is_some() {
+                tracing::debug!("batched task cancelled before completion");
+            }
+        });
+
+        let resp = loop {
+            // try become the leader, or try wait for success.
+            let mut processor = match futures::future::select(rx, pin!(self.processor.lock())).await
+            {
+                // we got the resp.
+                Either::Left((resp, _)) => break resp.ok(),
+                // we are the leader.
+                Either::Right((p, rx_)) => {
+                    rx = rx_;
+                    p
+                }
+            };
+
+            let (reqs, resps) = self.inner.lock_propagate_poison().get_batch(&processor);
+
+            // apply a batch.
+            let values = processor.apply(reqs).await;
+
+            // send response values.
+            for (tx, value) in std::iter::zip(resps, values) {
+                // sender hung up but that's fine.
+                drop(tx.send(value));
+            }
+
+            match rx.try_recv() {
+                Ok(resp) => break Some(resp),
+                Err(TryRecvError::Closed) => break None,
+                // edge case - there was a race condition where
+                // we became the leader but were not in the batch.
+                //
+                // Example:
+                // thread 1: register job id=1
+                // thread 2: register job id=2
+                // thread 2: processor.lock().await
+                // thread 1: processor.lock().await
+                // thread 2: becomes leader, batch_size=1, jobs=[1].
+                Err(TryRecvError::Empty) => {}
+            }
+        };
+
+        // already removed.
+        ScopeGuard::into_inner(guard);
+
+        resp.expect("no response found. batch processer should not panic")
+    }
+}
+
+struct BatchQueueInner<P: QueueProcessing> {
+    version: u64,
+    queue: BTreeMap<u64, BatchJob<P>>,
+}
+
+impl<P: QueueProcessing> BatchQueueInner<P> {
+    fn register_job(&mut self, req: P::Req) -> (u64, tokio::sync::oneshot::Receiver<P::Res>) {
+        let (tx, rx) = tokio::sync::oneshot::channel();
+
+        let id = self.version;
+
+        // Overflow concern:
+        // This is a u64, and we might enqueue 2^16 tasks per second.
+        // This gives us 2^48 seconds (9 million years).
+        // Even if this does overflow, it will not break, but some
+        // jobs with the higher version might never get prioritised.
+        self.version += 1;
+
+        self.queue.insert(id, BatchJob { req, res: tx });
+
+        (id, rx)
+    }
+
+    fn get_batch(&mut self, p: &P) -> (Vec<P::Req>, Vec<tokio::sync::oneshot::Sender<P::Res>>) {
+        let batch_size = p.batch_size(self.queue.len());
+        let mut reqs = Vec::with_capacity(batch_size);
+        let mut resps = Vec::with_capacity(batch_size);
+
+        while reqs.len() < batch_size {
+            let Some((_, job)) = self.queue.pop_first() else {
+                break;
+            };
+            reqs.push(job.req);
+            resps.push(job.res);
+        }
+
+        (reqs, resps)
+    }
+}
--- a/proxy/src/binary/local_proxy.rs
+++ b/proxy/src/binary/local_proxy.rs
@@ -201,7 +201,7 @@ pub async fn run() -> anyhow::Result<()> {
        auth_backend,
        http_listener,
        shutdown.clone(),
-        Arc::new(CancellationHandler::new(&config.connect_to_compute, None)),
+        Arc::new(CancellationHandler::new(&config.connect_to_compute)),
        endpoint_rate_limiter,
    );

--- a/proxy/src/binary/proxy.rs
+++ b/proxy/src/binary/proxy.rs
@@ -23,7 +23,8 @@ use utils::{project_build_tag, project_git_version};

 use crate::auth::backend::jwt::JwkCache;
 use crate::auth::backend::{ConsoleRedirectBackend, MaybeOwned};
-use crate::cancellation::{CancellationHandler, handle_cancel_messages};
+use crate::batch::BatchQueue;
+use crate::cancellation::{CancellationHandler, CancellationProcessor};
 use crate::config::{
    self, AuthenticationConfig, CacheOptions, ComputeConfig, HttpConfig, ProjectInfoCacheOptions,
    ProxyConfig, ProxyProtocolV2, remote_storage_from_toml,
@@ -392,13 +393,7 @@ pub async fn run() -> anyhow::Result<()> {
        .as_ref()
        .map(|redis_publisher| RedisKVClient::new(redis_publisher.clone(), redis_rps_limit));

-    // channel size should be higher than redis client limit to avoid blocking
-    let cancel_ch_size = args.cancellation_ch_size;
-    let (tx_cancel, rx_cancel) = tokio::sync::mpsc::channel(cancel_ch_size);
-    let cancellation_handler = Arc::new(CancellationHandler::new(
-        &config.connect_to_compute,
-        Some(tx_cancel),
-    ));
+    let cancellation_handler = Arc::new(CancellationHandler::new(&config.connect_to_compute));

    let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new_with_shards(
        RateBucketInfo::to_leaky_bucket(&args.endpoint_rps_limit)
@@ -530,21 +525,11 @@ pub async fn run() -> anyhow::Result<()> {
                    match redis_kv_client.try_connect().await {
                        Ok(()) => {
                            info!("Connected to Redis KV client");
-                            maintenance_tasks.spawn(async move {
-                                handle_cancel_messages(
-                                    &mut redis_kv_client,
-                                    rx_cancel,
-                                    args.cancellation_batch_size,
-                                )
-                                .await?;
+                            cancellation_handler.init_tx(BatchQueue::new(CancellationProcessor {
+                                client: redis_kv_client,
+                                batch_size: args.cancellation_batch_size,
+                            }));

-                                drop(redis_kv_client);
-
-                                // `handle_cancel_messages` was terminated due to the tx_cancel
-                                // being dropped. this is not worthy of an error, and this task can only return `Err`,
-                                // so let's wait forever instead.
-                                std::future::pending().await
-                            });
                            break;
                        }
                        Err(e) => {
--- a/proxy/src/cancellation.rs
+++ b/proxy/src/cancellation.rs
@@ -1,19 +1,23 @@
+use std::convert::Infallible;
 use std::net::{IpAddr, SocketAddr};
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
+use std::time::Duration;

-use anyhow::{Context, anyhow};
+use anyhow::anyhow;
+use futures::FutureExt;
 use ipnet::{IpNet, Ipv4Net, Ipv6Net};
-use postgres_client::CancelToken;
+use postgres_client::RawCancelToken;
 use postgres_client::tls::MakeTlsConnect;
 use redis::{Cmd, FromRedisValue, Value};
 use serde::{Deserialize, Serialize};
 use thiserror::Error;
 use tokio::net::TcpStream;
-use tokio::sync::{mpsc, oneshot};
-use tracing::{debug, error, info, warn};
+use tokio::time::timeout;
+use tracing::{debug, error, info};

 use crate::auth::AuthError;
 use crate::auth::backend::ComputeUserInfo;
+use crate::batch::{BatchQueue, QueueProcessing};
 use crate::config::ComputeConfig;
 use crate::context::RequestContext;
 use crate::control_plane::ControlPlaneApi;
@@ -27,46 +31,36 @@ use crate::redis::kv_ops::RedisKVClient;

 type IpSubnetKey = IpNet;

-const CANCEL_KEY_TTL: i64 = 1_209_600; // 2 weeks cancellation key expire time
+const CANCEL_KEY_TTL: std::time::Duration = std::time::Duration::from_secs(600);
+const CANCEL_KEY_REFRESH: std::time::Duration = std::time::Duration::from_secs(570);

 // Message types for sending through mpsc channel
 pub enum CancelKeyOp {
    StoreCancelKey {
-        key: String,
-        field: String,
-        value: String,
-        resp_tx: Option<oneshot::Sender<anyhow::Result<()>>>,
-        _guard: CancelChannelSizeGuard<'static>,
-        expire: i64, // TTL for key
+        key: CancelKeyData,
+        value: Box<str>,
+        expire: std::time::Duration,
    },
    GetCancelData {
-        key: String,
-        resp_tx: oneshot::Sender<anyhow::Result<Vec<(String, String)>>>,
-        _guard: CancelChannelSizeGuard<'static>,
-    },
-    RemoveCancelKey {
-        key: String,
-        field: String,
-        resp_tx: Option<oneshot::Sender<anyhow::Result<()>>>,
-        _guard: CancelChannelSizeGuard<'static>,
+        key: CancelKeyData,
    },
 }

 pub struct Pipeline {
    inner: redis::Pipeline,
-    replies: Vec<CancelReplyOp>,
+    replies: usize,
 }

 impl Pipeline {
    fn with_capacity(n: usize) -> Self {
        Self {
            inner: redis::Pipeline::with_capacity(n),
-            replies: Vec::with_capacity(n),
+            replies: 0,
        }
    }

-    async fn execute(&mut self, client: &mut RedisKVClient) {
-        let responses = self.replies.len();
+    async fn execute(self, client: &mut RedisKVClient) -> Vec<anyhow::Result<Value>> {
+        let responses = self.replies;
        let batch_size = self.inner.len();

        match client.query(&self.inner).await {
@@ -76,176 +70,73 @@ impl Pipeline {
                    batch_size,
                    responses, "successfully completed cancellation jobs",
                );
-                for (value, reply) in std::iter::zip(values, self.replies.drain(..)) {
-                    reply.send_value(value);
-                }
+                values.into_iter().map(Ok).collect()
            }
            Ok(value) => {
                error!(batch_size, ?value, "unexpected redis return value");
-                for reply in self.replies.drain(..) {
-                    reply.send_err(anyhow!("incorrect response type from redis"));
-                }
+                std::iter::repeat_with(|| Err(anyhow!("incorrect response type from redis")))
+                    .take(responses)
+                    .collect()
            }
            Err(err) => {
-                for reply in self.replies.drain(..) {
-                    reply.send_err(anyhow!("could not send cmd to redis: {err}"));
-                }
+                std::iter::repeat_with(|| Err(anyhow!("could not send cmd to redis: {err}")))
+                    .take(responses)
+                    .collect()
            }
        }
-
-        self.inner.clear();
-        self.replies.clear();
    }

-    fn add_command_with_reply(&mut self, cmd: Cmd, reply: CancelReplyOp) {
+    fn add_command_with_reply(&mut self, cmd: Cmd) {
        self.inner.add_command(cmd);
-        self.replies.push(reply);
+        self.replies += 1;
    }

    fn add_command_no_reply(&mut self, cmd: Cmd) {
        self.inner.add_command(cmd).ignore();
    }
-
-    fn add_command(&mut self, cmd: Cmd, reply: Option<CancelReplyOp>) {
-        match reply {
-            Some(reply) => self.add_command_with_reply(cmd, reply),
-            None => self.add_command_no_reply(cmd),
-        }
-    }
 }

 impl CancelKeyOp {
-    fn register(self, pipe: &mut Pipeline) {
+    fn register(&self, pipe: &mut Pipeline) {
        #[allow(clippy::used_underscore_binding)]
        match self {
-            CancelKeyOp::StoreCancelKey {
-                key,
-                field,
-                value,
-                resp_tx,
-                _guard,
-                expire,
-            } => {
-                let reply =
-                    resp_tx.map(|resp_tx| CancelReplyOp::StoreCancelKey { resp_tx, _guard });
-                pipe.add_command(Cmd::hset(&key, field, value), reply);
-                pipe.add_command_no_reply(Cmd::expire(key, expire));
+            CancelKeyOp::StoreCancelKey { key, value, expire } => {
+                let key = KeyPrefix::Cancel(*key).build_redis_key();
+                pipe.add_command_with_reply(Cmd::hset(&key, "data", &**value));
+                pipe.add_command_no_reply(Cmd::expire(&key, expire.as_secs() as i64));
            }
-            CancelKeyOp::GetCancelData {
-                key,
-                resp_tx,
-                _guard,
-            } => {
-                let reply = CancelReplyOp::GetCancelData { resp_tx, _guard };
-                pipe.add_command_with_reply(Cmd::hgetall(key), reply);
-            }
-            CancelKeyOp::RemoveCancelKey {
-                key,
-                field,
-                resp_tx,
-                _guard,
-            } => {
-                let reply =
-                    resp_tx.map(|resp_tx| CancelReplyOp::RemoveCancelKey { resp_tx, _guard });
-                pipe.add_command(Cmd::hdel(key, field), reply);
+            CancelKeyOp::GetCancelData { key } => {
+                let key = KeyPrefix::Cancel(*key).build_redis_key();
+                pipe.add_command_with_reply(Cmd::hget(key, "data"));
            }
        }
    }
 }

-// Message types for sending through mpsc channel
-pub enum CancelReplyOp {
-    StoreCancelKey {
-        resp_tx: oneshot::Sender<anyhow::Result<()>>,
-        _guard: CancelChannelSizeGuard<'static>,
-    },
-    GetCancelData {
-        resp_tx: oneshot::Sender<anyhow::Result<Vec<(String, String)>>>,
-        _guard: CancelChannelSizeGuard<'static>,
-    },
-    RemoveCancelKey {
-        resp_tx: oneshot::Sender<anyhow::Result<()>>,
-        _guard: CancelChannelSizeGuard<'static>,
-    },
+pub struct CancellationProcessor {
+    pub client: RedisKVClient,
+    pub batch_size: usize,
 }

-impl CancelReplyOp {
-    fn send_err(self, e: anyhow::Error) {
-        match self {
-            CancelReplyOp::StoreCancelKey { resp_tx, _guard } => {
-                resp_tx
-                    .send(Err(e))
-                    .inspect_err(|_| tracing::debug!("could not send reply"))
-                    .ok();
-            }
-            CancelReplyOp::GetCancelData { resp_tx, _guard } => {
-                resp_tx
-                    .send(Err(e))
-                    .inspect_err(|_| tracing::debug!("could not send reply"))
-                    .ok();
-            }
-            CancelReplyOp::RemoveCancelKey { resp_tx, _guard } => {
-                resp_tx
-                    .send(Err(e))
-                    .inspect_err(|_| tracing::debug!("could not send reply"))
-                    .ok();
-            }
-        }
+impl QueueProcessing for CancellationProcessor {
+    type Req = (CancelChannelSizeGuard<'static>, CancelKeyOp);
+    type Res = anyhow::Result<redis::Value>;
+
+    fn batch_size(&self, _queue_size: usize) -> usize {
+        self.batch_size
    }

-    fn send_value(self, v: redis::Value) {
-        match self {
-            CancelReplyOp::StoreCancelKey { resp_tx, _guard } => {
-                let send =
-                    FromRedisValue::from_owned_redis_value(v).context("could not parse value");
-                resp_tx
-                    .send(send)
-                    .inspect_err(|_| tracing::debug!("could not send reply"))
-                    .ok();
-            }
-            CancelReplyOp::GetCancelData { resp_tx, _guard } => {
-                let send =
-                    FromRedisValue::from_owned_redis_value(v).context("could not parse value");
-                resp_tx
-                    .send(send)
-                    .inspect_err(|_| tracing::debug!("could not send reply"))
-                    .ok();
-            }
-            CancelReplyOp::RemoveCancelKey { resp_tx, _guard } => {
-                let send =
-                    FromRedisValue::from_owned_redis_value(v).context("could not parse value");
-                resp_tx
-                    .send(send)
-                    .inspect_err(|_| tracing::debug!("could not send reply"))
-                    .ok();
-            }
-        }
-    }
-}
-
-// Running as a separate task to accept messages through the rx channel
-pub async fn handle_cancel_messages(
-    client: &mut RedisKVClient,
-    mut rx: mpsc::Receiver<CancelKeyOp>,
-    batch_size: usize,
-) -> anyhow::Result<()> {
-    let mut batch = Vec::with_capacity(batch_size);
-    let mut pipeline = Pipeline::with_capacity(batch_size);
-
-    loop {
-        if rx.recv_many(&mut batch, batch_size).await == 0 {
-            warn!("shutting down cancellation queue");
-            break Ok(());
-        }
+    async fn apply(&mut self, batch: Vec<Self::Req>) -> Vec<Self::Res> {
+        let mut pipeline = Pipeline::with_capacity(batch.len());

        let batch_size = batch.len();
        debug!(batch_size, "running cancellation jobs");

-        for msg in batch.drain(..) {
-            msg.register(&mut pipeline);
+        for (_, op) in &batch {
+            op.register(&mut pipeline);
        }

-        pipeline.execute(client).await;
+        pipeline.execute(&mut self.client).await
    }
 }

@@ -256,7 +147,7 @@ pub struct CancellationHandler {
    compute_config: &'static ComputeConfig,
    // rate limiter of cancellation requests
    limiter: Arc<std::sync::Mutex<LeakyBucketRateLimiter<IpSubnetKey>>>,
-    tx: Option<mpsc::Sender<CancelKeyOp>>, // send messages to the redis KV client task
+    tx: OnceLock<BatchQueue<CancellationProcessor>>, // send messages to the redis KV client task
 }

 #[derive(Debug, Error)]
@@ -296,13 +187,10 @@ impl ReportableError for CancelError {
 }

 impl CancellationHandler {
-    pub fn new(
-        compute_config: &'static ComputeConfig,
-        tx: Option<mpsc::Sender<CancelKeyOp>>,
-    ) -> Self {
+    pub fn new(compute_config: &'static ComputeConfig) -> Self {
        Self {
            compute_config,
-            tx,
+            tx: OnceLock::new(),
            limiter: Arc::new(std::sync::Mutex::new(
                LeakyBucketRateLimiter::<IpSubnetKey>::new_with_shards(
                    LeakyBucketRateLimiter::<IpSubnetKey>::DEFAULT,
@@ -312,7 +200,14 @@ impl CancellationHandler {
        }
    }

-    pub(crate) fn get_key(self: &Arc<Self>) -> Session {
+    pub fn init_tx(&self, queue: BatchQueue<CancellationProcessor>) {
+        self.tx
+            .set(queue)
+            .map_err(|_| {})
+            .expect("cancellation queue should be registered once");
+    }
+
+    pub(crate) fn get_key(self: Arc<Self>) -> Session {
        // we intentionally generate a random "backend pid" and "secret key" here.
        // we use the corresponding u64 as an identifier for the
        // actual endpoint+pid+secret for postgres/pgbouncer.
@@ -322,14 +217,10 @@ impl CancellationHandler {

        let key: CancelKeyData = rand::random();

-        let prefix_key: KeyPrefix = KeyPrefix::Cancel(key);
-        let redis_key = prefix_key.build_redis_key();
-
        debug!("registered new query cancellation key {key}");
        Session {
            key,
-            redis_key,
-            cancellation_handler: Arc::clone(self),
+            cancellation_handler: self,
        }
    }

@@ -337,62 +228,43 @@ impl CancellationHandler {
        &self,
        key: CancelKeyData,
    ) -> Result<Option<CancelClosure>, CancelError> {
-        let prefix_key: KeyPrefix = KeyPrefix::Cancel(key);
-        let redis_key = prefix_key.build_redis_key();
+        let guard = Metrics::get()
+            .proxy
+            .cancel_channel_size
+            .guard(RedisMsgKind::HGet);
+        let op = CancelKeyOp::GetCancelData { key };

-        let (resp_tx, resp_rx) = tokio::sync::oneshot::channel();
-        let op = CancelKeyOp::GetCancelData {
-            key: redis_key,
-            resp_tx,
-            _guard: Metrics::get()
-                .proxy
-                .cancel_channel_size
-                .guard(RedisMsgKind::HGetAll),
-        };
-
-        let Some(tx) = &self.tx else {
+        let Some(tx) = self.tx.get() else {
            tracing::warn!("cancellation handler is not available");
            return Err(CancelError::InternalError);
        };

-        tx.try_send(op)
+        const TIMEOUT: Duration = Duration::from_secs(5);
+        let result = timeout(TIMEOUT, tx.call((guard, op)))
+            .await
+            .map_err(|_| {
+                tracing::warn!("timed out waiting to receive GetCancelData response");
+                CancelError::RateLimit
+            })?
            .map_err(|e| {
-                tracing::warn!("failed to send GetCancelData for {key}: {e}");
-            })
-            .map_err(|()| CancelError::InternalError)?;
+                tracing::warn!("failed to receive GetCancelData response: {e}");
+                CancelError::InternalError
+            })?;

-        let result = resp_rx.await.map_err(|e| {
+        let cancel_state_str = String::from_owned_redis_value(result).map_err(|e| {
            tracing::warn!("failed to receive GetCancelData response: {e}");
            CancelError::InternalError
        })?;

-        let cancel_state_str: Option<String> = match result {
-            Ok(mut state) => {
-                if state.len() == 1 {
-                    Some(state.remove(0).1)
-                } else {
-                    tracing::warn!("unexpected number of entries in cancel state: {state:?}");
-                    return Err(CancelError::InternalError);
-                }
-            }
-            Err(e) => {
-                tracing::warn!("failed to receive cancel state from redis: {e}");
-                return Err(CancelError::InternalError);
-            }
-        };
+        let cancel_closure: CancelClosure =
+            serde_json::from_str(&cancel_state_str).map_err(|e| {
+                tracing::warn!("failed to deserialize cancel state: {e}");
+                CancelError::InternalError
+            })?;

-        let cancel_state: Option<CancelClosure> = match cancel_state_str {
-            Some(state) => {
-                let cancel_closure: CancelClosure = serde_json::from_str(&state).map_err(|e| {
-                    tracing::warn!("failed to deserialize cancel state: {e}");
-                    CancelError::InternalError
-                })?;
-                Some(cancel_closure)
-            }
-            None => None,
-        };
-        Ok(cancel_state)
+        Ok(Some(cancel_closure))
    }
+
    /// Try to cancel a running query for the corresponding connection.
    /// If the cancellation key is not found, it will be published to Redis.
    /// check_allowed - if true, check if the IP is allowed to cancel the query.
@@ -467,10 +339,10 @@ impl CancellationHandler {
 /// This should've been a [`std::future::Future`], but
 /// it's impossible to name a type of an unboxed future
 /// (we'd need something like `#![feature(type_alias_impl_trait)]`).
-#[derive(Clone, Serialize, Deserialize)]
+#[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct CancelClosure {
    socket_addr: SocketAddr,
-    cancel_token: CancelToken,
+    cancel_token: RawCancelToken,
    hostname: String, // for pg_sni router
    user_info: ComputeUserInfo,
 }
@@ -478,7 +350,7 @@ pub struct CancelClosure {
 impl CancelClosure {
    pub(crate) fn new(
        socket_addr: SocketAddr,
-        cancel_token: CancelToken,
+        cancel_token: RawCancelToken,
        hostname: String,
        user_info: ComputeUserInfo,
    ) -> Self {
@@ -491,7 +363,7 @@ impl CancelClosure {
    }
    /// Cancels the query running on user's compute node.
    pub(crate) async fn try_cancel_query(
-        self,
+        &self,
        compute_config: &ComputeConfig,
    ) -> Result<(), CancelError> {
        let socket = TcpStream::connect(self.socket_addr).await?;
@@ -512,7 +384,6 @@ impl CancelClosure {
 pub(crate) struct Session {
    /// The user-facing key identifying this session.
    key: CancelKeyData,
-    redis_key: String,
    cancellation_handler: Arc<CancellationHandler>,
 }

@@ -521,60 +392,66 @@ impl Session {
        &self.key
    }

-    // Send the store key op to the cancellation handler and set TTL for the key
-    pub(crate) fn write_cancel_key(
+    /// Ensure the cancel key is continously refreshed,
+    /// but stop when the channel is dropped.
+    pub(crate) async fn maintain_cancel_key(
        &self,
-        cancel_closure: CancelClosure,
-    ) -> Result<(), CancelError> {
-        let Some(tx) = &self.cancellation_handler.tx else {
-            tracing::warn!("cancellation handler is not available");
-            return Err(CancelError::InternalError);
-        };
+        session_id: uuid::Uuid,
+        cancel: tokio::sync::oneshot::Receiver<Infallible>,
+        cancel_closure: &CancelClosure,
+        compute_config: &ComputeConfig,
+    ) {
+        futures::future::select(
+            std::pin::pin!(self.maintain_redis_cancel_key(cancel_closure)),
+            cancel,
+        )
+        .await;

-        let closure_json = serde_json::to_string(&cancel_closure).map_err(|e| {
-            tracing::warn!("failed to serialize cancel closure: {e}");
-            CancelError::InternalError
-        })?;
-
-        let op = CancelKeyOp::StoreCancelKey {
-            key: self.redis_key.clone(),
-            field: "data".to_string(),
-            value: closure_json,
-            resp_tx: None,
-            _guard: Metrics::get()
-                .proxy
-                .cancel_channel_size
-                .guard(RedisMsgKind::HSet),
-            expire: CANCEL_KEY_TTL,
-        };
-
-        let _ = tx.try_send(op).map_err(|e| {
-            let key = self.key;
-            tracing::warn!("failed to send StoreCancelKey for {key}: {e}");
-        });
-        Ok(())
+        if let Err(err) = cancel_closure
+            .try_cancel_query(compute_config)
+            .boxed()
+            .await
+        {
+            tracing::warn!(
+                ?session_id,
+                ?err,
+                "could not cancel the query in the database"
+            );
+        }
    }

-    pub(crate) fn remove_cancel_key(&self) -> Result<(), CancelError> {
-        let Some(tx) = &self.cancellation_handler.tx else {
+    // Ensure the cancel key is continously refreshed.
+    async fn maintain_redis_cancel_key(&self, cancel_closure: &CancelClosure) -> ! {
+        let Some(tx) = self.cancellation_handler.tx.get() else {
            tracing::warn!("cancellation handler is not available");
-            return Err(CancelError::InternalError);
+            // don't exit, as we only want to exit if cancelled externally.
+            std::future::pending().await
        };

-        let op = CancelKeyOp::RemoveCancelKey {
-            key: self.redis_key.clone(),
-            field: "data".to_string(),
-            resp_tx: None,
-            _guard: Metrics::get()
+        let closure_json = serde_json::to_string(&cancel_closure)
+            .expect("serialising to json string should not fail")
+            .into_boxed_str();
+
+        loop {
+            let guard = Metrics::get()
                .proxy
                .cancel_channel_size
-                .guard(RedisMsgKind::HDel),
-        };
+                .guard(RedisMsgKind::HSet);
+            let op = CancelKeyOp::StoreCancelKey {
+                key: self.key,
+                value: closure_json.clone(),
+                expire: CANCEL_KEY_TTL,
+            };

-        let _ = tx.try_send(op).map_err(|e| {
-            let key = self.key;
-            tracing::warn!("failed to send RemoveCancelKey for {key}: {e}");
-        });
-        Ok(())
+            tracing::debug!(
+                src=%self.key,
+                dest=?cancel_closure.cancel_token,
+                "registering cancellation key"
+            );
+
+            if tx.call((guard, op)).await.is_ok() {
+                tokio::time::sleep(CANCEL_KEY_REFRESH).await;
+            }
+        }
    }
 }
--- a/proxy/src/compute/mod.rs
+++ b/proxy/src/compute/mod.rs
@@ -9,7 +9,7 @@ use itertools::Itertools;
 use postgres_client::config::{AuthKeys, SslMode};
 use postgres_client::maybe_tls_stream::MaybeTlsStream;
 use postgres_client::tls::MakeTlsConnect;
-use postgres_client::{CancelToken, NoTls, RawConnection};
+use postgres_client::{NoTls, RawCancelToken, RawConnection};
 use postgres_protocol::message::backend::NoticeResponseBody;
 use thiserror::Error;
 use tokio::net::{TcpStream, lookup_host};
@@ -265,7 +265,8 @@ impl ConnectInfo {
    }
 }

-type RustlsStream = <ComputeConfig as MakeTlsConnect<tokio::net::TcpStream>>::Stream;
+pub type RustlsStream = <ComputeConfig as MakeTlsConnect<tokio::net::TcpStream>>::Stream;
+pub type MaybeRustlsStream = MaybeTlsStream<tokio::net::TcpStream, RustlsStream>;

 pub(crate) struct PostgresConnection {
    /// Socket connected to a compute node.
@@ -279,7 +280,7 @@ pub(crate) struct PostgresConnection {
    /// Notices received from compute after authenticating
    pub(crate) delayed_notice: Vec<NoticeResponseBody>,

-    _guage: NumDbConnectionsGuard<'static>,
+    pub(crate) guage: NumDbConnectionsGuard<'static>,
 }

 impl ConnectInfo {
@@ -327,8 +328,7 @@ impl ConnectInfo {
        // Yet another reason to rework the connection establishing code.
        let cancel_closure = CancelClosure::new(
            socket_addr,
-            CancelToken {
-                socket_config: None,
+            RawCancelToken {
                ssl_mode: self.ssl_mode,
                process_id,
                secret_key,
@@ -343,7 +343,7 @@ impl ConnectInfo {
            delayed_notice,
            cancel_closure,
            aux,
-            _guage: Metrics::get().proxy.db_connections.guard(ctx.protocol()),
+            guage: Metrics::get().proxy.db_connections.guard(ctx.protocol()),
        };

        Ok(connection)
--- a/proxy/src/console_redirect_proxy.rs
+++ b/proxy/src/console_redirect_proxy.rs
@@ -120,7 +120,7 @@ pub async fn task_main(
                Ok(Some(p)) => {
                    ctx.set_success();
                    let _disconnect = ctx.log_connect();
-                    match p.proxy_pass(&config.connect_to_compute).await {
+                    match p.proxy_pass().await {
                        Ok(()) => {}
                        Err(ErrorSource::Client(e)) => {
                            error!(
@@ -232,22 +232,35 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin + Send>(
    .or_else(|e| async { Err(stream.throw_error(e, Some(ctx)).await) })
    .await?;

-    let cancellation_handler_clone = Arc::clone(&cancellation_handler);
-    let session = cancellation_handler_clone.get_key();
-
-    session.write_cancel_key(node.cancel_closure.clone())?;
+    let session = cancellation_handler.get_key();

    prepare_client_connection(&node, *session.key(), &mut stream);
    let stream = stream.flush_and_into_inner().await?;

+    let session_id = ctx.session_id();
+    let (cancel_on_shutdown, cancel) = tokio::sync::oneshot::channel();
+    tokio::spawn(async move {
+        session
+            .maintain_cancel_key(
+                session_id,
+                cancel,
+                &node.cancel_closure,
+                &config.connect_to_compute,
+            )
+            .await;
+    });
+
    Ok(Some(ProxyPassthrough {
        client: stream,
-        aux: node.aux.clone(),
+        compute: node.stream,
+
+        aux: node.aux,
        private_link_id: None,
-        compute: node,
-        session_id: ctx.session_id(),
-        cancel: session,
+
+        _cancel_on_shutdown: cancel_on_shutdown,
+
        _req: request_gauge,
        _conn: conn_gauge,
+        _db_conn: node.guage,
    }))
 }
--- a/proxy/src/lib.rs
+++ b/proxy/src/lib.rs
@@ -75,6 +75,7 @@
 pub mod binary;

 mod auth;
+mod batch;
 mod cache;
 mod cancellation;
 mod compute;
--- a/proxy/src/pglb/passthrough.rs
+++ b/proxy/src/pglb/passthrough.rs
@@ -1,15 +1,17 @@
-use futures::FutureExt;
+use std::convert::Infallible;
+
 use smol_str::SmolStr;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::debug;
 use utils::measured_stream::MeasuredStream;

 use super::copy_bidirectional::ErrorSource;
-use crate::cancellation;
-use crate::compute::PostgresConnection;
-use crate::config::ComputeConfig;
+use crate::compute::MaybeRustlsStream;
 use crate::control_plane::messages::MetricsAuxInfo;
-use crate::metrics::{Direction, Metrics, NumClientConnectionsGuard, NumConnectionRequestsGuard};
+use crate::metrics::{
+    Direction, Metrics, NumClientConnectionsGuard, NumConnectionRequestsGuard,
+    NumDbConnectionsGuard,
+};
 use crate::stream::Stream;
 use crate::usage_metrics::{Ids, MetricCounterRecorder, USAGE_METRICS};

@@ -64,40 +66,20 @@ pub(crate) async fn proxy_pass(

 pub(crate) struct ProxyPassthrough<S> {
    pub(crate) client: Stream<S>,
-    pub(crate) compute: PostgresConnection,
+    pub(crate) compute: MaybeRustlsStream,
+
    pub(crate) aux: MetricsAuxInfo,
-    pub(crate) session_id: uuid::Uuid,
    pub(crate) private_link_id: Option<SmolStr>,
-    pub(crate) cancel: cancellation::Session,
+
+    pub(crate) _cancel_on_shutdown: tokio::sync::oneshot::Sender<Infallible>,

    pub(crate) _req: NumConnectionRequestsGuard<'static>,
    pub(crate) _conn: NumClientConnectionsGuard<'static>,
+    pub(crate) _db_conn: NumDbConnectionsGuard<'static>,
 }

 impl<S: AsyncRead + AsyncWrite + Unpin> ProxyPassthrough<S> {
-    pub(crate) async fn proxy_pass(
-        self,
-        compute_config: &ComputeConfig,
-    ) -> Result<(), ErrorSource> {
-        let res = proxy_pass(
-            self.client,
-            self.compute.stream,
-            self.aux,
-            self.private_link_id,
-        )
-        .await;
-        if let Err(err) = self
-            .compute
-            .cancel_closure
-            .try_cancel_query(compute_config)
-            .boxed()
-            .await
-        {
-            tracing::warn!(session_id = ?self.session_id, ?err, "could not cancel the query in the database");
-        }
-
-        drop(self.cancel.remove_cancel_key()); // we don't need a result. If the queue is full, we just log the error
-
-        res
+    pub(crate) async fn proxy_pass(self) -> Result<(), ErrorSource> {
+        proxy_pass(self.client, self.compute, self.aux, self.private_link_id).await
    }
 }
--- a/proxy/src/proxy/mod.rs
+++ b/proxy/src/proxy/mod.rs
@@ -155,7 +155,7 @@ pub async fn task_main(
                Ok(Some(p)) => {
                    ctx.set_success();
                    let _disconnect = ctx.log_connect();
-                    match p.proxy_pass(&config.connect_to_compute).await {
+                    match p.proxy_pass().await {
                        Ok(()) => {}
                        Err(ErrorSource::Client(e)) => {
                            warn!(
@@ -372,13 +372,24 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin + Send>(
        Err(e) => Err(stream.throw_error(e, Some(ctx)).await)?,
    };

-    let cancellation_handler_clone = Arc::clone(&cancellation_handler);
-    let session = cancellation_handler_clone.get_key();
+    let session = cancellation_handler.get_key();

-    session.write_cancel_key(node.cancel_closure.clone())?;
    prepare_client_connection(&node, *session.key(), &mut stream);
    let stream = stream.flush_and_into_inner().await?;

+    let session_id = ctx.session_id();
+    let (cancel_on_shutdown, cancel) = tokio::sync::oneshot::channel();
+    tokio::spawn(async move {
+        session
+            .maintain_cancel_key(
+                session_id,
+                cancel,
+                &node.cancel_closure,
+                &config.connect_to_compute,
+            )
+            .await;
+    });
+
    let private_link_id = match ctx.extra() {
        Some(ConnectionInfoExtra::Aws { vpce_id }) => Some(vpce_id.clone()),
        Some(ConnectionInfoExtra::Azure { link_id }) => Some(link_id.to_smolstr()),
@@ -387,13 +398,16 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin + Send>(

    Ok(Some(ProxyPassthrough {
        client: stream,
-        aux: node.aux.clone(),
+        compute: node.stream,
+
+        aux: node.aux,
        private_link_id,
-        compute: node,
-        session_id: ctx.session_id(),
-        cancel: session,
+
+        _cancel_on_shutdown: cancel_on_shutdown,
+
        _req: request_gauge,
        _conn: conn_gauge,
+        _db_conn: node.guage,
    }))
 }

--- a/proxy/src/redis/keys.rs
+++ b/proxy/src/redis/keys.rs
@@ -1,8 +1,4 @@
-use std::io::ErrorKind;
-
-use anyhow::Ok;
-
-use crate::pqproto::{CancelKeyData, id_to_cancel_key};
+use crate::pqproto::CancelKeyData;

 pub mod keyspace {
    pub const CANCEL_PREFIX: &str = "cancel";
@@ -23,39 +19,12 @@ impl KeyPrefix {
            }
        }
    }
-
-    #[allow(dead_code)]
-    pub(crate) fn as_str(&self) -> &'static str {
-        match self {
-            KeyPrefix::Cancel(_) => keyspace::CANCEL_PREFIX,
-        }
-    }
-}
-
-#[allow(dead_code)]
-pub(crate) fn parse_redis_key(key: &str) -> anyhow::Result<KeyPrefix> {
-    let (prefix, key_str) = key.split_once(':').ok_or_else(|| {
-        anyhow::anyhow!(std::io::Error::new(
-            ErrorKind::InvalidData,
-            "missing prefix"
-        ))
-    })?;
-
-    match prefix {
-        keyspace::CANCEL_PREFIX => {
-            let id = u64::from_str_radix(key_str, 16)?;
-
-            Ok(KeyPrefix::Cancel(id_to_cancel_key(id)))
-        }
-        _ => Err(anyhow::anyhow!(std::io::Error::new(
-            ErrorKind::InvalidData,
-            "unknown prefix"
-        ))),
-    }
 }

 #[cfg(test)]
 mod tests {
+    use crate::pqproto::id_to_cancel_key;
+
    use super::*;

    #[test]
@@ -65,16 +34,4 @@ mod tests {
        let redis_key = cancel_key.build_redis_key();
        assert_eq!(redis_key, "cancel:30390000d431");
    }
-
-    #[test]
-    fn test_parse_redis_key() {
-        let redis_key = "cancel:30390000d431";
-        let key: KeyPrefix = parse_redis_key(redis_key).expect("Failed to parse key");
-
-        let ref_key = id_to_cancel_key(12345 << 32 | 54321);
-
-        assert_eq!(key.as_str(), KeyPrefix::Cancel(ref_key).as_str());
-        let KeyPrefix::Cancel(cancel_key) = key;
-        assert_eq!(ref_key, cancel_key);
-    }
 }
--- a/proxy/src/redis/kv_ops.rs
+++ b/proxy/src/redis/kv_ops.rs
@@ -1,3 +1,6 @@
+use std::time::Duration;
+
+use futures::FutureExt;
 use redis::aio::ConnectionLike;
 use redis::{Cmd, FromRedisValue, Pipeline, RedisResult};

@@ -35,14 +38,11 @@ impl RedisKVClient {
    }

    pub async fn try_connect(&mut self) -> anyhow::Result<()> {
-        match self.client.connect().await {
-            Ok(()) => {}
-            Err(e) => {
-                tracing::error!("failed to connect to redis: {e}");
-                return Err(e);
-            }
-        }
-        Ok(())
+        self.client
+            .connect()
+            .boxed()
+            .await
+            .inspect_err(|e| tracing::error!("failed to connect to redis: {e}"))
    }

    pub(crate) async fn query<T: FromRedisValue>(
@@ -54,15 +54,25 @@ impl RedisKVClient {
            return Err(anyhow::anyhow!("Rate limit exceeded"));
        }

-        match q.query(&mut self.client).await {
+        let e = match q.query(&mut self.client).await {
            Ok(t) => return Ok(t),
-            Err(e) => {
-                tracing::error!("failed to run query: {e}");
+            Err(e) => e,
+        };
+
+        tracing::error!("failed to run query: {e}");
+        match e.retry_method() {
+            redis::RetryMethod::Reconnect => {
+                tracing::info!("Redis client is disconnected. Reconnecting...");
+                self.try_connect().await?;
            }
+            redis::RetryMethod::RetryImmediately => {}
+            redis::RetryMethod::WaitAndRetry => {
+                // somewhat arbitrary.
+                tokio::time::sleep(Duration::from_millis(100)).await;
+            }
+            _ => Err(e)?,
        }

-        tracing::info!("Redis client is disconnected. Reconnecting...");
-        self.try_connect().await?;
        Ok(q.query(&mut self.client).await?)
    }
 }
--- a/proxy/src/serverless/websocket.rs
+++ b/proxy/src/serverless/websocket.rs
@@ -167,7 +167,7 @@ pub(crate) async fn serve_websocket(
        Ok(Some(p)) => {
            ctx.set_success();
            ctx.log_connect();
-            match p.proxy_pass(&config.connect_to_compute).await {
+            match p.proxy_pass().await {
                Ok(()) => Ok(()),
                Err(ErrorSource::Client(err)) => Err(err).context("client"),
                Err(ErrorSource::Compute(err)) => Err(err).context("compute"),
--- a/storage_controller/src/main.rs
+++ b/storage_controller/src/main.rs
@@ -207,6 +207,12 @@ struct Cli {
    /// the compute notification directly (instead of via control plane).
    #[arg(long, default_value = "false")]
    use_local_compute_notifications: bool,
+
+    /// Number of safekeepers to choose for a timeline when creating it.
+    /// Safekeepers will be choosen from different availability zones.
+    /// This option exists primarily for testing purposes.
+    #[arg(long, default_value = "3", value_parser = clap::value_parser!(i64).range(1..))]
+    timeline_safekeeper_count: i64,
 }

 enum StrictMode {
@@ -371,6 +377,11 @@ async fn async_main() -> anyhow::Result<()> {
        StrictMode::Strict if args.use_local_compute_notifications => {
            anyhow::bail!("`--use-local-compute-notifications` is only permitted in `--dev` mode");
        }
+        StrictMode::Strict if args.timeline_safekeeper_count < 3 => {
+            anyhow::bail!(
+                "Running with less than 3 safekeepers per timeline is only permitted in `--dev` mode"
+            );
+        }
        StrictMode::Strict => {
            tracing::info!("Starting in strict mode: configuration is OK.")
        }
@@ -433,6 +444,7 @@ async fn async_main() -> anyhow::Result<()> {
        ssl_ca_certs,
        timelines_onto_safekeepers: args.timelines_onto_safekeepers,
        use_local_compute_notifications: args.use_local_compute_notifications,
+        timeline_safekeeper_count: args.timeline_safekeeper_count,
    };

    // Validate that we can connect to the database
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -466,6 +466,10 @@ pub struct Config {
    pub timelines_onto_safekeepers: bool,

    pub use_local_compute_notifications: bool,
+
+    /// Number of safekeepers to choose for a timeline when creating it.
+    /// Safekeepers will be choosen from different availability zones.
+    pub timeline_safekeeper_count: i64,
 }

 impl From<DatabaseError> for ApiError {
--- a/storage_controller/src/service/safekeeper_service.rs
+++ b/storage_controller/src/service/safekeeper_service.rs
@@ -1,3 +1,4 @@
+use std::cmp::max;
 use std::collections::HashSet;
 use std::str::FromStr;
 use std::sync::Arc;
@@ -608,7 +609,8 @@ impl Service {
        Ok(())
    }

-    /// Choose safekeepers for the new timeline: 3 in different azs.
+    /// Choose safekeepers for the new timeline in different azs.
+    /// 3 are choosen by default, but may be configured via config (for testing).
    pub(crate) async fn safekeepers_for_new_timeline(
        &self,
    ) -> Result<Vec<SafekeeperInfo>, ApiError> {
@@ -651,18 +653,14 @@ impl Service {
            )
        });
        // Number of safekeepers in different AZs we are looking for
-        let wanted_count = match all_safekeepers.len() {
-            0 => {
-                return Err(ApiError::InternalServerError(anyhow::anyhow!(
-                    "couldn't find any active safekeeper for new timeline",
-                )));
-            }
-            // Have laxer requirements on testig mode as we don't want to
-            // spin up three safekeepers for every single test
-            #[cfg(feature = "testing")]
-            1 | 2 => all_safekeepers.len(),
-            _ => 3,
-        };
+        let mut wanted_count = self.config.timeline_safekeeper_count as usize;
+        // TODO(diko): remove this when `timeline_safekeeper_count` option is in the release
+        // branch and is specified in tests/neon_local config.
+        if cfg!(feature = "testing") && all_safekeepers.len() < wanted_count {
+            // In testing mode, we can have less safekeepers than the config says
+            wanted_count = max(all_safekeepers.len(), 1);
+        }
+
        let mut sks = Vec::new();
        let mut azs = HashSet::new();
        for (_sk_util, sk_info, az_id) in all_safekeepers.iter() {
--- a/test_runner/fixtures/endpoint/http.py
+++ b/test_runner/fixtures/endpoint/http.py
@@ -24,7 +24,7 @@ The value to place in the `aud` claim.

@final
 class ComputeClaimsScope(StrEnum):
-    ADMIN = "admin"
+    ADMIN = "compute_ctl:admin"


@final
--- a/test_runner/regress/test_attach_tenant_config.py
+++ b/test_runner/regress/test_attach_tenant_config.py
@@ -184,7 +184,7 @@ def test_fully_custom_config(positive_env: NeonEnv):
        "timeline_offloading": False,
        "rel_size_v2_enabled": True,
        "relsize_snapshot_cache_capacity": 10000,
-        "gc_compaction_enabled": True,
+        "gc_compaction_enabled": False,
        "gc_compaction_verification": False,
        "gc_compaction_initial_threshold_kb": 1024000,
        "gc_compaction_ratio_percent": 200,
--- a/test_runner/regress/test_pg_regress.py
+++ b/test_runner/regress/test_pg_regress.py
@@ -173,7 +173,11 @@ def test_pg_regress(
    (runpath / "testtablespace").mkdir(parents=True)

    # Compute all the file locations that pg_regress will need.
-    build_path = pg_distrib_dir / f"build/{env.pg_version.v_prefixed}/src/test/regress"
+    #
+    # XXX: We assume that the `build` directory is a sibling of the
+    # pg_distrib_dir.  That is the default when you check out the
+    # repository; `build` and `pg_install` are created side by side.
+    build_path = pg_distrib_dir / f"../build/{env.pg_version.v_prefixed}/src/test/regress"
    src_path = base_dir / f"vendor/postgres-{env.pg_version.v_prefixed}/src/test/regress"
    bindir = pg_distrib_dir / f"v{env.pg_version}/bin"
    schedule = src_path / "parallel_schedule"
@@ -250,7 +254,11 @@ def test_isolation(
    (runpath / "testtablespace").mkdir(parents=True)

    # Compute all the file locations that pg_isolation_regress will need.
-    build_path = pg_distrib_dir / f"build/{env.pg_version.v_prefixed}/src/test/isolation"
+    #
+    # XXX: We assume that the `build` directory is a sibling of the
+    # pg_distrib_dir.  That is the default when you check out the
+    # repository; `build` and `pg_install` are created side by side.
+    build_path = pg_distrib_dir / f"../build/{env.pg_version.v_prefixed}/src/test/isolation"
    src_path = base_dir / f"vendor/postgres-{env.pg_version.v_prefixed}/src/test/isolation"
    bindir = pg_distrib_dir / f"v{env.pg_version}/bin"
    schedule = src_path / "isolation_schedule"
@@ -314,8 +322,11 @@ def test_sql_regress(
    (runpath / "testtablespace").mkdir(parents=True)

    # Compute all the file locations that pg_regress will need.
-    # This test runs neon specific tests
-    build_path = pg_distrib_dir / f"build/v{env.pg_version}/src/test/regress"
+    #
+    # XXX: We assume that the `build` directory is a sibling of the
+    # pg_distrib_dir.  That is the default when you check out the
+    # repository; `build` and `pg_install` are created side by side.
+    build_path = pg_distrib_dir / f"../build/{env.pg_version.v_prefixed}/src/test/regress"
    src_path = base_dir / "test_runner/sql_regress"
    bindir = pg_distrib_dir / f"v{env.pg_version}/bin"
    schedule = src_path / "parallel_schedule"
--- a/test_runner/regress/test_s3_restore.py
+++ b/test_runner/regress/test_s3_restore.py
@@ -74,7 +74,7 @@ def test_tenant_s3_restore(
            last_flush_lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
            last_flush_lsns.append(last_flush_lsn)
        ps_http.timeline_checkpoint(tenant_id, timeline_id)
-        wait_for_upload(ps_http, tenant_id, timeline_id, last_flush_lsn)
+        wait_for_upload(ps_http, tenant_id, timeline_id, last_flush_lsn, timeout=60)
        log.info(f"{timeline} timeline {timeline_id} {last_flush_lsn=}")
        parent = timeline

--- a/vendor/postgres-v14
+++ b/vendor/postgres-v14
--- a/vendor/revisions.json
+++ b/vendor/revisions.json
@@ -13,6 +13,6 @@
  ],
  "v14": [
    "14.18",
-    "6770bc251301ef40c66f7ecb731741dc435b5051"
+    "9085654ee8022d5cc4ca719380a1dc53e5e3246f"
  ]
 }
Author	SHA1	Message	Date
Elizabeth Murray	2ff2eb6a9e	Merge branch 'main' into elizabeth/connection-pool-with-tests	2025-06-22 14:45:44 -07:00
Heikki Linnakangas	3d822dbbde	Refactor Makefile rules for building the extensions under pgxn/ (#12305 )	2025-06-22 19:43:14 +00:00
Heikki Linnakangas	af46b5286f	Avoid recompiling `postgres_ffi` when there has been no changes (#12292 ) Every time you run `make`, it runs `make install` on all the PostgreSQL sources, which copies the header files. That in turn triggers a rebuild of the `postgres_ffi` crate, and everything that depends on it. We had worked around this earlier (see #2458), by passing a custom INSTALL script to the Postgres makefiles, which refrains from updating the modification timestamp on headers when they have not been changed, but the v14 makefile didn't obey INSTALL for the header files. Backporting c0a1d7621b to v14 fixes that. This backports upstream PostgreSQL commit c0a1d7621b to v14. Corresponding PR in the 'postgres' repo: https://github.com/neondatabase/postgres/pull/660	2025-06-21 21:07:38 +00:00
Erik Grinaker	47f7efee06	pageserver: require stripe size (#12257 ) ## Problem In #12217, we began passing the stripe size in reattach responses, and persisting it in the on-disk state. This is necessary to ensure the storage controller and Pageserver have a consistent view of the intended stripe size of unsharded tenants, which will be used for splits that do not specify a stripe size. However, for backwards compatibility, these stripe sizes were optional. ## Summary of changes Make the stripe sizes required for reattach responses and on-disk location configs. These will always be provided by the previous (current) release.	2025-06-21 15:01:29 +00:00
Elizabeth Murray	bdfc6d3ef9	Merge branch 'main' into elizabeth/connection-pool-with-tests	2025-06-20 16:32:26 -07:00
Elizabeth Murray	f47e90fd42	Add version strings to new modules.	2025-06-20 16:15:13 -07:00
Tristan Partin	868c38f522	Rename the compute_ctl admin scope to compute_ctl:admin (#12263 ) Signed-off-by: Tristan Partin <tristan@neon.tech>	2025-06-20 22:49:05 +00:00
Elizabeth Murray	9cc79672f3	Fix cargo deny check error, to see if this fixes Cargo.lock CI issue.	2025-06-20 15:00:09 -07:00
Elizabeth Murray	4a9b1ad5cb	Merge branch 'main' into elizabeth/connection-pool-with-tests	2025-06-20 11:59:58 -07:00
Tristan Partin	c8b2ac93cf	Allow the control plane to override any Postgres connection options (#12262 ) The previous behavior was for the compute to override control plane options if there was a conflict. We want to change the behavior so that the control plane has the absolute power on what is right. In the event that we need a new option passed to the compute as soon as possible, we can initially roll it out in the control plane, and then migrate the option to EXTRA_OPTIONS within the compute later, for instance. Signed-off-by: Tristan Partin <tristan@neon.tech>	2025-06-20 18:46:30 +00:00
Elizabeth Murray	dc4238896a	Regenerate Cargo.lock as it is producing Dockerfile errors.	2025-06-20 11:45:41 -07:00
Elizabeth Murray	e1fa844da4	Clippy updates, add Cargo.lock.	2025-06-20 10:18:17 -07:00
Elizabeth Murray	c8a2612207	Add workspace license and version.	2025-06-20 09:48:10 -07:00
Elizabeth Murray	b6e89a3af8	Run clippy.	2025-06-20 09:42:11 -07:00
Dmitrii Kovalkov	b2954d16ff	storcon, neon_local: add timeline_safekeeper_count (#12303 ) ## Problem We need to specify the number of safekeepers for neon_local without `testing` feature. Also we need this option for testing different configurations of safekeeper migration code. We cannot set it in `neon_fixtures.py` and in the default config of `neon_local` yet, because it will fail compatibility tests. I'll make a separate PR with removing `cfg!("testing")` completely and specifying this option in the config when this option reaches the release branch. - Part of https://github.com/neondatabase/neon/issues/12298 ## Summary of changes - Add `timeline_safekeeper_count` config option to storcon and neon_local	2025-06-20 16:03:17 +00:00
Alex Chi Z.	79485e7c3a	feat(pageserver): enable gc-compaction by default everywhere (#12105 ) Enable it across tests and set it as default. Marks the first milestone of https://github.com/neondatabase/neon/issues/9114. We already enabled it in all AWS regions and planning to enable it in all Azure regions next week. will merge after we roll out in all regions. --------- Signed-off-by: Alex Chi Z <chi@neon.tech>	2025-06-20 15:35:11 +00:00
Elizabeth Murray	261a9ae093	Run cargo fmt.	2025-06-20 08:28:40 -07:00
Elizabeth Murray	cac4ee8ea3	Merge branch 'main' into elizabeth/connection-pool-with-tests	2025-06-20 08:27:57 -07:00
Heikki Linnakangas	eaf1ab21c4	Store intermediate build files in `build/` rather than `pg_install/build/` (#12295 ) This way, `pg_install` contains only the final build artifacts, not intermediate files like *.o files. Seems cleaner.	2025-06-20 14:50:03 +00:00
Vlad Lazar	6508f4e5c1	pageserver: revise gc layer map lock handling (#12290 ) ## Problem Timeline GC is very aggressive with regards to layer map locking. We've seen timelines with loads of layers in production that hold the write lock for the layer map for 30 minutes at a time. This blocks reads and the write path to some extent. ## Summary of changes Determining the set of layers to GC is done under the read lock. Applying the updates is done under the write lock. Previously, everything was done under write lock.	2025-06-20 11:57:30 +00:00
Conrad Ludgate	a298d2c29b	[proxy] replace the batch cancellation queue, shorten the TTL for cancel keys (#11943 ) See #11942 Idea: * if connections are short lived, they can get enqueued and then also remove themselves later if they never made it to redis. This reduces the load on the queue. * short lived connections (<10m, most?) will only issue 1 command, we remove the delete command and rely on ttl. * we can enqueue as many commands as we want, as we can always cancel the enqueue, thanks to the ~~intrusive linked lists~~ `BTreeMap`.	2025-06-20 11:48:01 +00:00
Arpad Müller	8b197de7ff	Increase upload timeout for test_tenant_s3_restore (#12297 ) Increase the upload timeout of the test to avoid hitting timeouts (which we sometimes do). Fixes https://github.com/neondatabase/neon/issues/12212	2025-06-20 10:33:11 +00:00
Erik Grinaker	15d079cd41	pagebench: improve `getpage-latest-lsn` gRPC support (#12293 ) This improves `pagebench getpage-latest-lsn` gRPC support by: * Using `page_api::Client`. * Removing `--protocol`, and using the `page-server-connstring` scheme instead. * Adding `--compression` to enable zstd compression.	2025-06-20 08:31:40 +00:00
Elizabeth Murray	7636c4085a	Add initial skeleton for client cache code, and request tracker.	2025-06-19 14:50:23 -07:00