refactor: lift inspection of CompactionError::Other(x) => x.root_cause() into CompactionError::is_cancel

There are a couple of places that call CompactionError::is_cancel but don't check the Other variant for root cause. But they should, because some cancellations are observed by code that results in ::Other errors. I don't think there's a _serious_ case where this causes problems. The worst case one is the circuit breaker which we do currently trip on ::Other errors that are due to cancellation. Tripped circuit breaker on shutting down timelines doesn't really matter practically, but it's unaesthetic and might cause noise down the line, so, this PR fixes that at least. In any way, this PR forces future callers of is_cancel() to explicitly recognize the suboptimal state of affairs wrt error handling in compaction, thereby hopefully preventing errors of this kind from creeping in. (The _right_ solution for the compaction code probably is the approach I took in #11853: keep using anyhow but have a unified way / pattern of bubbling up cancellation, so that we don't need to perform the downcast trickery).
refactor: force explicit mapping to CreateImageLayersError::Other
2026-05-17 13:10:38 +00:00 · 2025-06-27 14:10:24 +02:00 · 2025-06-27 13:31:49 +02:00 · 2025-06-27 13:17:19 +02:00 · 2025-06-04 10:44:23 +00:00 · 2025-06-04 09:57:31 +00:00
57 changed files with 2156 additions and 543 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4236,6 +4236,7 @@ name = "pagebench"
 version = "0.1.0"
 dependencies = [
 "anyhow",
+ "async-trait",
 "camino",
 "clap",
 "futures",
@@ -4244,12 +4245,15 @@ dependencies = [
 "humantime-serde",
 "pageserver_api",
 "pageserver_client",
+ "pageserver_page_api",
 "rand 0.8.5",
 "reqwest",
 "serde",
 "serde_json",
 "tokio",
+ "tokio-stream",
 "tokio-util",
+ "tonic 0.13.1",
 "tracing",
 "utils",
 "workspace_hack",
@@ -4305,6 +4309,7 @@ dependencies = [
 "hashlink",
 "hex",
 "hex-literal",
+ "http 1.1.0",
 "http-utils",
 "humantime",
 "humantime-serde",
@@ -4367,6 +4372,7 @@ dependencies = [
 "toml_edit",
 "tonic 0.13.1",
 "tonic-reflection",
+ "tower 0.5.2",
 "tracing",
 "tracing-utils",
 "twox-hash",
@@ -4463,7 +4469,6 @@ dependencies = [
 "pageserver_api",
 "postgres_ffi",
 "prost 0.13.5",
- "smallvec",
 "thiserror 1.0.69",
 "tonic 0.13.1",
 "tonic-build",
--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -297,6 +297,7 @@ RUN ./autogen.sh && \
    ./configure --with-sfcgal=/usr/local/bin/sfcgal-config && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
+    make staged-install && \
    cd extensions/postgis && \
    make clean && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
@@ -1180,14 +1181,14 @@ RUN cd exts/rag && \
 RUN cd exts/rag_bge_small_en_v15 && \
    sed -i 's/pgrx = "0.14.1"/pgrx = { version = "0.14.1", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
    ORT_LIB_LOCATION=/ext-src/onnxruntime-src/build/Linux \
-        REMOTE_ONNX_URL=http://pg-ext-s3-gateway/pgrag-data/bge_small_en_v15.onnx \
+        REMOTE_ONNX_URL=http://pg-ext-s3-gateway.pg-ext-s3-gateway.svc.cluster.local/pgrag-data/bge_small_en_v15.onnx \
        cargo pgrx install --release --features remote_onnx && \
    echo "trusted = true" >> /usr/local/pgsql/share/extension/rag_bge_small_en_v15.control

 RUN cd exts/rag_jina_reranker_v1_tiny_en && \
    sed -i 's/pgrx = "0.14.1"/pgrx = { version = "0.14.1", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
    ORT_LIB_LOCATION=/ext-src/onnxruntime-src/build/Linux \
-        REMOTE_ONNX_URL=http://pg-ext-s3-gateway/pgrag-data/jina_reranker_v1_tiny_en.onnx \
+        REMOTE_ONNX_URL=http://pg-ext-s3-gateway.pg-ext-s3-gateway.svc.cluster.local/pgrag-data/jina_reranker_v1_tiny_en.onnx \
        cargo pgrx install --release --features remote_onnx && \
    echo "trusted = true" >> /usr/local/pgsql/share/extension/rag_jina_reranker_v1_tiny_en.control

@@ -1842,10 +1843,25 @@ RUN make PG_VERSION="${PG_VERSION:?}" -C compute

 FROM pg-build AS extension-tests
 ARG PG_VERSION
+# This is required for the PostGIS test
+RUN apt-get update && case $DEBIAN_VERSION in \
+      bullseye) \
+        apt-get install -y libproj19 libgdal28 time; \
+      ;; \
+      bookworm) \
+        apt-get install -y libgdal32 libproj25 time; \
+      ;; \
+      *) \
+        echo "Unknown Debian version ${DEBIAN_VERSION}" && exit 1 \
+      ;; \
+    esac
+
 COPY docker-compose/ext-src/ /ext-src/

 COPY --from=pg-build /postgres /postgres
-#COPY --from=postgis-src /ext-src/ /ext-src/
+COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=postgis-build /ext-src/postgis-src /ext-src/postgis-src
+COPY --from=postgis-build /sfcgal/* /usr
 COPY --from=plv8-src /ext-src/ /ext-src/
 COPY --from=h3-pg-src /ext-src/h3-pg-src /ext-src/h3-pg-src
 COPY --from=postgresql-unit-src /ext-src/ /ext-src/
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -396,7 +396,7 @@ impl ComputeNode {
        // because QEMU will already have its memory allocated from the host, and
        // the necessary binaries will already be cached.
        if cli_spec.is_none() {
-            this.prewarm_postgres()?;
+            this.prewarm_postgres_vm_memory()?;
        }

        // Set the up metric with Empty status before starting the HTTP server.
@@ -779,7 +779,7 @@ impl ComputeNode {
        // Spawn the extension stats background task
        self.spawn_extension_stats_task();

-        if pspec.spec.prewarm_lfc_on_startup {
+        if pspec.spec.autoprewarm {
            self.prewarm_lfc();
        }
        Ok(())
@@ -1307,8 +1307,8 @@ impl ComputeNode {
    }

    /// Start and stop a postgres process to warm up the VM for startup.
-    pub fn prewarm_postgres(&self) -> Result<()> {
-        info!("prewarming");
+    pub fn prewarm_postgres_vm_memory(&self) -> Result<()> {
+        info!("prewarming VM memory");

        // Create pgdata
        let pgdata = &format!("{}.warmup", self.params.pgdata);
@@ -1350,7 +1350,7 @@ impl ComputeNode {
        kill(pm_pid, Signal::SIGQUIT)?;
        info!("sent SIGQUIT signal");
        pg.wait()?;
-        info!("done prewarming");
+        info!("done prewarming vm memory");

        // clean up
        let _ok = fs::remove_dir_all(pgdata);
--- a/compute_tools/src/http/mod.rs
+++ b/compute_tools/src/http/mod.rs
@@ -48,11 +48,9 @@ impl JsonResponse {

    /// Create an error response related to the compute being in an invalid state
    pub(self) fn invalid_status(status: ComputeStatus) -> Response {
-        Self::create_response(
+        Self::error(
            StatusCode::PRECONDITION_FAILED,
-            &GenericAPIError {
-                error: format!("invalid compute status: {status}"),
-            },
+            format!("invalid compute status: {status}"),
        )
    }
 }
--- a/compute_tools/tests/pg_helpers_tests.rs
+++ b/compute_tools/tests/pg_helpers_tests.rs
@@ -30,7 +30,7 @@ mod pg_helpers_tests {
            r#"fsync = off
 wal_level = logical
 hot_standby = on
-prewarm_lfc_on_startup = off
+autoprewarm = off
 neon.safekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'
 wal_log_hints = on
 log_connections = on
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -747,7 +747,7 @@ impl Endpoint {
                logs_export_host: None::<String>,
                endpoint_storage_addr: Some(endpoint_storage_addr),
                endpoint_storage_token: Some(endpoint_storage_token),
-                prewarm_lfc_on_startup: false,
+                autoprewarm: false,
            };

            // this strange code is needed to support respec() in tests
--- a/docker-compose/compute_wrapper/Dockerfile
+++ b/docker-compose/compute_wrapper/Dockerfile
@@ -13,6 +13,6 @@ RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
                       jq   \
                       netcat-openbsd
 #This is required for the pg_hintplan test
-RUN mkdir -p /ext-src/pg_hint_plan-src /postgres/contrib/file_fdw && chown postgres /ext-src/pg_hint_plan-src /postgres/contrib/file_fdw
+RUN mkdir -p /ext-src/pg_hint_plan-src /postgres/contrib/file_fdw /ext-src/postgis-src/ && chown postgres /ext-src/pg_hint_plan-src /postgres/contrib/file_fdw /ext-src/postgis-src

 USER postgres
--- a/docker-compose/docker-compose.yml
+++ b/docker-compose/docker-compose.yml
@@ -186,13 +186,14 @@ services:

  neon-test-extensions:
    profiles: ["test-extensions"]
-    image: ${REPOSITORY:-ghcr.io/neondatabase}/neon-test-extensions-v${PG_TEST_VERSION:-16}:${TEST_EXTENSIONS_TAG:-${TAG:-latest}}
+    image: ${REPOSITORY:-ghcr.io/neondatabase}/neon-test-extensions-v${PG_TEST_VERSION:-${PG_VERSION:-16}}:${TEST_EXTENSIONS_TAG:-${TAG:-latest}}
    environment:
-      - PGPASSWORD=cloud_admin
+      - PGUSER=${PGUSER:-cloud_admin}
+      - PGPASSWORD=${PGPASSWORD:-cloud_admin}
    entrypoint:
      - "/bin/bash"
      - "-c"
    command:
-      - sleep 1800
+      - sleep 3600
    depends_on:
      - compute
--- a/docker-compose/docker_compose_test.sh
+++ b/docker-compose/docker_compose_test.sh
@@ -54,6 +54,15 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
        # It cannot be moved to Dockerfile now because the database directory is created after the start of the container
        echo Adding dummy config
        docker compose exec compute touch /var/db/postgres/compute/compute_ctl_temp_override.conf
+        # Prepare for the PostGIS test
+        docker compose exec compute mkdir -p /tmp/pgis_reg/pgis_reg_tmp
+        TMPDIR=$(mktemp -d)
+        docker compose cp neon-test-extensions:/ext-src/postgis-src/raster/test "${TMPDIR}"
+        docker compose cp neon-test-extensions:/ext-src/postgis-src/regress/00-regress-install "${TMPDIR}"
+        docker compose exec compute mkdir -p /ext-src/postgis-src/raster /ext-src/postgis-src/regress /ext-src/postgis-src/regress/00-regress-install
+        docker compose cp "${TMPDIR}/test" compute:/ext-src/postgis-src/raster/test
+        docker compose cp "${TMPDIR}/00-regress-install" compute:/ext-src/postgis-src/regress
+        rm -rf "${TMPDIR}"
        # The following block copies the files for the pg_hintplan test to the compute node for the extension test in an isolated docker-compose environment
        TMPDIR=$(mktemp -d)
        docker compose cp neon-test-extensions:/ext-src/pg_hint_plan-src/data "${TMPDIR}/data"
@@ -68,7 +77,7 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
        docker compose exec -T neon-test-extensions bash -c "(cd /postgres && patch -p1)" <"../compute/patches/contrib_pg${pg_version}.patch"
        # We are running tests now
        rm -f testout.txt testout_contrib.txt
-        docker compose exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,postgis-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src,rag_jina_reranker_v1_tiny_en-src,rag_bge_small_en_v15-src \
+        docker compose exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src,rag_jina_reranker_v1_tiny_en-src,rag_bge_small_en_v15-src \
        neon-test-extensions /run-tests.sh /ext-src | tee testout.txt && EXT_SUCCESS=1 || EXT_SUCCESS=0
        docker compose exec -e SKIP=start-scripts,postgres_fdw,ltree_plpython,jsonb_plpython,jsonb_plperl,hstore_plpython,hstore_plperl,dblink,bool_plperl \
        neon-test-extensions /run-tests.sh /postgres/contrib | tee testout_contrib.txt && CONTRIB_SUCCESS=1 || CONTRIB_SUCCESS=0
--- a/docker-compose/ext-src/postgis-src/README-Neon.md
+++ b/docker-compose/ext-src/postgis-src/README-Neon.md
@@ -0,0 +1,70 @@
+# PostGIS Testing in Neon
+
+This directory contains configuration files and patches for running PostGIS tests in the Neon database environment.
+
+## Overview
+
+PostGIS is a spatial database extension for PostgreSQL that adds support for geographic objects. Testing PostGIS compatibility ensures that Neon's modifications to PostgreSQL don't break compatibility with this critical extension.
+
+## PostGIS Versions
+
+- PostgreSQL v17: PostGIS 3.5.0
+- PostgreSQL v14/v15/v16: PostGIS 3.3.3
+
+## Test Configuration
+
+The test setup includes:
+
+- `postgis-no-upgrade-test.patch`: Disables upgrade tests by removing the upgrade test section from regress/runtest.mk
+- `postgis-regular-v16.patch`: Version-specific patch for PostgreSQL v16
+- `postgis-regular-v17.patch`: Version-specific patch for PostgreSQL v17
+- `regular-test.sh`: Script to run PostGIS tests as a regular user
+- `neon-test.sh`: Script to handle version-specific test configurations
+- `raster_outdb_template.sql`: Template for raster tests with explicit file paths
+
+## Excluded Tests
+
+**Important Note:** The test exclusions listed below are specifically for regular-user tests against staging instances. These exclusions are necessary because staging instances run with limited privileges and cannot perform operations requiring superuser access. Docker-compose based tests are not affected by these exclusions.
+
+### Tests Requiring Superuser Permissions
+
+These tests cannot be run as a regular user:
+- `estimatedextent`
+- `regress/core/legacy`
+- `regress/core/typmod`
+- `regress/loader/TestSkipANALYZE`
+- `regress/loader/TestANALYZE`
+
+### Tests Requiring Filesystem Access
+
+These tests need direct filesystem access that is only possible for superusers:
+- `loader/load_outdb`
+
+### Tests with Flaky Results
+
+These tests have assumptions that don't always hold true:
+- `regress/core/computed_columns` - Assumes computed columns always outperform alternatives, which is not consistently true
+
+### Tests Requiring Tunable Parameter Modifications
+
+These tests attempt to modify the `postgis.gdal_enabled_drivers` parameter, which is only accessible to superusers:
+- `raster/test/regress/rt_wkb`
+- `raster/test/regress/rt_addband`
+- `raster/test/regress/rt_setbandpath`
+- `raster/test/regress/rt_fromgdalraster`
+- `raster/test/regress/rt_asgdalraster`
+- `raster/test/regress/rt_astiff`
+- `raster/test/regress/rt_asjpeg`
+- `raster/test/regress/rt_aspng`
+- `raster/test/regress/permitted_gdal_drivers`
+- Loader tests: `BasicOutDB`, `Tiled10x10`, `Tiled10x10Copy`, `Tiled8x8`, `TiledAuto`, `TiledAutoSkipNoData`, `TiledAutoCopyn`
+
+### Topology Tests (v17 only)
+- `populate_topology_layer`
+- `renametopogeometrycolumn`
+
+## Other Modifications
+
+- Binary.sql tests are modified to use explicit file paths
+- Server-side SQL COPY commands (which require superuser privileges) are converted to client-side `\copy` commands
+- Upgrade tests are disabled
--- a/docker-compose/ext-src/postgis-src/neon-test.sh
+++ b/docker-compose/ext-src/postgis-src/neon-test.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+set -ex
+cd "$(dirname "$0")"
+if [[ ${PG_VERSION} = v17 ]]; then
+  sed -i '/computed_columns/d' regress/core/tests.mk
+fi
+patch -p1 <postgis-no-upgrade-test.patch
+trap 'echo Cleaning up; patch -R -p1 <postgis-no-upgrade-test.patch' EXIT
+make installcheck-base
--- a/docker-compose/ext-src/postgis-src/postgis-no-upgrade-test.patch
+++ b/docker-compose/ext-src/postgis-src/postgis-no-upgrade-test.patch
@@ -0,0 +1,21 @@
+diff --git a/regress/runtest.mk b/regress/runtest.mk
+index c051f03..010e493 100644
+--- a/regress/runtest.mk
+++ b/regress/runtest.mk
+@@ -24,16 +24,6 @@ check-regress:
+ 
+ 	POSTGIS_TOP_BUILD_DIR=$(abs_top_builddir) $(PERL) $(top_srcdir)/regress/run_test.pl $(RUNTESTFLAGS) $(RUNTESTFLAGS_INTERNAL) $(TESTS)
+ 
+-	@if echo "$(RUNTESTFLAGS)" | grep -vq -- --upgrade; then \
+-		echo "Running upgrade test as RUNTESTFLAGS did not contain that"; \
+-		POSTGIS_TOP_BUILD_DIR=$(abs_top_builddir) $(PERL) $(top_srcdir)/regress/run_test.pl \
+-      --upgrade \
+-      $(RUNTESTFLAGS) \
+-      $(RUNTESTFLAGS_INTERNAL) \
+-      $(TESTS); \
+-	else \
+-		echo "Skipping upgrade test as RUNTESTFLAGS already requested upgrades"; \
+-	fi
+ 
+ check-long:
+ 	$(PERL) $(top_srcdir)/regress/run_test.pl $(RUNTESTFLAGS) $(TESTS) $(TESTS_SLOW)
--- a/docker-compose/ext-src/postgis-src/postgis-regular-v16.patch
+++ b/docker-compose/ext-src/postgis-src/postgis-regular-v16.patch
@@ -0,0 +1,198 @@
+diff --git a/raster/test/regress/tests.mk b/raster/test/regress/tests.mk
+index 00918e1..7e2b6cd 100644
+--- a/raster/test/regress/tests.mk
+++ b/raster/test/regress/tests.mk
+@@ -17,9 +17,7 @@ override RUNTESTFLAGS_INTERNAL := \
+   $(RUNTESTFLAGS_INTERNAL) \
+   --after-upgrade-script $(top_srcdir)/raster/test/regress/hooks/hook-after-upgrade-raster.sql
+ 
+-RASTER_TEST_FIRST = \
+-	$(top_srcdir)/raster/test/regress/check_gdal \
+-	$(top_srcdir)/raster/test/regress/loader/load_outdb
+RASTER_TEST_FIRST =
+ 
+ RASTER_TEST_LAST = \
+ 	$(top_srcdir)/raster/test/regress/clean
+@@ -33,9 +31,7 @@ RASTER_TEST_IO = \
+ 
+ RASTER_TEST_BASIC_FUNC = \
+ 	$(top_srcdir)/raster/test/regress/rt_bytea \
+-	$(top_srcdir)/raster/test/regress/rt_wkb \
+ 	$(top_srcdir)/raster/test/regress/box3d \
+-	$(top_srcdir)/raster/test/regress/rt_addband \
+ 	$(top_srcdir)/raster/test/regress/rt_band \
+ 	$(top_srcdir)/raster/test/regress/rt_tile
+ 
+@@ -73,16 +69,10 @@ RASTER_TEST_BANDPROPS = \
+ 	$(top_srcdir)/raster/test/regress/rt_neighborhood \
+ 	$(top_srcdir)/raster/test/regress/rt_nearestvalue \
+ 	$(top_srcdir)/raster/test/regress/rt_pixelofvalue \
+-	$(top_srcdir)/raster/test/regress/rt_polygon \
+-	$(top_srcdir)/raster/test/regress/rt_setbandpath
+	$(top_srcdir)/raster/test/regress/rt_polygon
+ 
+ RASTER_TEST_UTILITY = \
+ 	$(top_srcdir)/raster/test/regress/rt_utility \
+-	$(top_srcdir)/raster/test/regress/rt_fromgdalraster \
+-	$(top_srcdir)/raster/test/regress/rt_asgdalraster \
+-	$(top_srcdir)/raster/test/regress/rt_astiff \
+-	$(top_srcdir)/raster/test/regress/rt_asjpeg \
+-	$(top_srcdir)/raster/test/regress/rt_aspng \
+ 	$(top_srcdir)/raster/test/regress/rt_reclass \
+ 	$(top_srcdir)/raster/test/regress/rt_gdalwarp \
+ 	$(top_srcdir)/raster/test/regress/rt_gdalcontour \
+@@ -120,21 +110,13 @@ RASTER_TEST_SREL = \
+ 
+ RASTER_TEST_BUGS = \
+ 	$(top_srcdir)/raster/test/regress/bug_test_car5 \
+-	$(top_srcdir)/raster/test/regress/permitted_gdal_drivers \
+ 	$(top_srcdir)/raster/test/regress/tickets
+ 
+ RASTER_TEST_LOADER = \
+ 	$(top_srcdir)/raster/test/regress/loader/Basic \
+ 	$(top_srcdir)/raster/test/regress/loader/Projected \
+ 	$(top_srcdir)/raster/test/regress/loader/BasicCopy \
+-	$(top_srcdir)/raster/test/regress/loader/BasicFilename \
+-	$(top_srcdir)/raster/test/regress/loader/BasicOutDB \
+-	$(top_srcdir)/raster/test/regress/loader/Tiled10x10 \
+-	$(top_srcdir)/raster/test/regress/loader/Tiled10x10Copy \
+-	$(top_srcdir)/raster/test/regress/loader/Tiled8x8 \
+-	$(top_srcdir)/raster/test/regress/loader/TiledAuto \
+-	$(top_srcdir)/raster/test/regress/loader/TiledAutoSkipNoData \
+-	$(top_srcdir)/raster/test/regress/loader/TiledAutoCopyn
+	$(top_srcdir)/raster/test/regress/loader/BasicFilename
+ 
+ RASTER_TESTS := $(RASTER_TEST_FIRST) \
+ 	$(RASTER_TEST_METADATA) $(RASTER_TEST_IO) $(RASTER_TEST_BASIC_FUNC) \
+diff --git a/regress/core/binary.sql b/regress/core/binary.sql
+index 7a36b65..ad78fc7 100644
+--- a/regress/core/binary.sql
+++ b/regress/core/binary.sql
+@@ -1,4 +1,5 @@
+ SET client_min_messages TO warning;
+
+ CREATE SCHEMA tm;
+ 
+ CREATE TABLE tm.geoms (id serial, g geometry);
+@@ -31,24 +32,39 @@ SELECT st_force4d(g) FROM tm.geoms WHERE id < 15 ORDER BY id;
+ INSERT INTO tm.geoms(g)
+ SELECT st_setsrid(g,4326) FROM tm.geoms ORDER BY id;
+ 
+-COPY tm.geoms TO :tmpfile WITH BINARY;
+-- define temp file path
+\set tmpfile '/tmp/postgis_binary_test.dat'
+
+-- export
+\set command '\\copy tm.geoms TO ':tmpfile' WITH (FORMAT BINARY)'
+:command
+
+-- import
+ CREATE TABLE tm.geoms_in AS SELECT * FROM tm.geoms LIMIT 0;
+-COPY tm.geoms_in FROM :tmpfile WITH BINARY;
+-SELECT 'geometry', count(*) FROM tm.geoms_in i, tm.geoms o WHERE i.id = o.id
+- AND ST_OrderingEquals(i.g, o.g);
+\set command '\\copy tm.geoms_in FROM ':tmpfile' WITH (FORMAT BINARY)'
+:command
+
+SELECT 'geometry', count(*) FROM tm.geoms_in i, tm.geoms o
+WHERE i.id = o.id AND ST_OrderingEquals(i.g, o.g);
+ 
+ CREATE TABLE tm.geogs AS SELECT id,g::geography FROM tm.geoms
+ WHERE geometrytype(g) NOT LIKE '%CURVE%'
+   AND geometrytype(g) NOT LIKE '%CIRCULAR%'
+   AND geometrytype(g) NOT LIKE '%SURFACE%'
+   AND geometrytype(g) NOT LIKE 'TRIANGLE%'
+-  AND geometrytype(g) NOT LIKE 'TIN%'
+-;
+  AND geometrytype(g) NOT LIKE 'TIN%';
+ 
+-COPY tm.geogs TO :tmpfile WITH BINARY;
+-- export
+\set command '\\copy tm.geogs TO ':tmpfile' WITH (FORMAT BINARY)'
+:command
+
+-- import
+ CREATE TABLE tm.geogs_in AS SELECT * FROM tm.geogs LIMIT 0;
+-COPY tm.geogs_in FROM :tmpfile WITH BINARY;
+-SELECT 'geometry', count(*) FROM tm.geogs_in i, tm.geogs o WHERE i.id = o.id
+- AND ST_OrderingEquals(i.g::geometry, o.g::geometry);
+\set command '\\copy tm.geogs_in FROM ':tmpfile' WITH (FORMAT BINARY)'
+:command
+
+SELECT 'geometry', count(*) FROM tm.geogs_in i, tm.geogs o
+WHERE i.id = o.id AND ST_OrderingEquals(i.g::geometry, o.g::geometry);
+ 
+ DROP SCHEMA tm CASCADE;
+
+diff --git a/regress/core/tests.mk b/regress/core/tests.mk
+index 3abd7bc..94903c3 100644
+--- a/regress/core/tests.mk
+++ b/regress/core/tests.mk
+@@ -23,7 +23,6 @@ current_dir := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
+ RUNTESTFLAGS_INTERNAL += \
+   --before-upgrade-script $(top_srcdir)/regress/hooks/hook-before-upgrade.sql \
+   --after-upgrade-script  $(top_srcdir)/regress/hooks/hook-after-upgrade.sql \
+-  --after-create-script   $(top_srcdir)/regress/hooks/hook-after-create.sql \
+   --before-uninstall-script $(top_srcdir)/regress/hooks/hook-before-uninstall.sql
+ 
+ TESTS += \
+@@ -40,7 +39,6 @@ TESTS += \
+ 	$(top_srcdir)/regress/core/dumppoints \
+ 	$(top_srcdir)/regress/core/dumpsegments \
+ 	$(top_srcdir)/regress/core/empty \
+-	$(top_srcdir)/regress/core/estimatedextent \
+ 	$(top_srcdir)/regress/core/forcecurve \
+ 	$(top_srcdir)/regress/core/flatgeobuf \
+ 	$(top_srcdir)/regress/core/geography \
+@@ -55,7 +53,6 @@ TESTS += \
+ 	$(top_srcdir)/regress/core/out_marc21 \
+ 	$(top_srcdir)/regress/core/in_encodedpolyline \
+ 	$(top_srcdir)/regress/core/iscollection \
+-	$(top_srcdir)/regress/core/legacy \
+ 	$(top_srcdir)/regress/core/letters \
+ 	$(top_srcdir)/regress/core/long_xact \
+ 	$(top_srcdir)/regress/core/lwgeom_regress \
+@@ -112,7 +109,6 @@ TESTS += \
+ 	$(top_srcdir)/regress/core/temporal_knn \
+ 	$(top_srcdir)/regress/core/tickets \
+ 	$(top_srcdir)/regress/core/twkb \
+-	$(top_srcdir)/regress/core/typmod \
+ 	$(top_srcdir)/regress/core/wkb \
+ 	$(top_srcdir)/regress/core/wkt \
+ 	$(top_srcdir)/regress/core/wmsservers \
+@@ -144,11 +140,6 @@ TESTS_SLOW = \
+ 	$(top_srcdir)/regress/core/concave_hull_hard \
+ 	$(top_srcdir)/regress/core/knn_recheck
+ 
+-ifeq ($(shell expr "$(POSTGIS_PGSQL_VERSION)" ">=" 120),1)
+-	TESTS += \
+-		$(top_srcdir)/regress/core/computed_columns
+-endif
+-
+ ifeq ($(shell expr "$(POSTGIS_GEOS_VERSION)" ">=" 30700),1)
+ 	# GEOS-3.7 adds:
+ 	# ST_FrechetDistance
+diff --git a/regress/loader/tests.mk b/regress/loader/tests.mk
+index 1fc77ac..c3cb9de 100644
+--- a/regress/loader/tests.mk
+++ b/regress/loader/tests.mk
+@@ -38,7 +38,5 @@ TESTS += \
+ 	$(top_srcdir)/regress/loader/Latin1 \
+ 	$(top_srcdir)/regress/loader/Latin1-implicit \
+ 	$(top_srcdir)/regress/loader/mfile \
+-	$(top_srcdir)/regress/loader/TestSkipANALYZE \
+-	$(top_srcdir)/regress/loader/TestANALYZE \
+ 	$(top_srcdir)/regress/loader/CharNoWidth
+ 
+diff --git a/regress/run_test.pl b/regress/run_test.pl
+index 0ec5b2d..1c331f4 100755
+--- a/regress/run_test.pl
+++ b/regress/run_test.pl
+@@ -147,7 +147,6 @@ $ENV{"LANG"} = "C";
+ # Add locale info to the psql options
+ # Add pg12 precision suppression
+ my $PGOPTIONS = $ENV{"PGOPTIONS"};
+-$PGOPTIONS .= " -c lc_messages=C";
+ $PGOPTIONS .= " -c client_min_messages=NOTICE";
+ $PGOPTIONS .= " -c extra_float_digits=0";
+ $ENV{"PGOPTIONS"} = $PGOPTIONS;
--- a/docker-compose/ext-src/postgis-src/postgis-regular-v17.patch
+++ b/docker-compose/ext-src/postgis-src/postgis-regular-v17.patch
@@ -0,0 +1,218 @@
+diff --git a/raster/test/regress/tests.mk b/raster/test/regress/tests.mk
+index 00918e1..7e2b6cd 100644
+--- a/raster/test/regress/tests.mk
+++ b/raster/test/regress/tests.mk
+@@ -17,9 +17,7 @@ override RUNTESTFLAGS_INTERNAL := \
+   $(RUNTESTFLAGS_INTERNAL) \
+   --after-upgrade-script $(top_srcdir)/raster/test/regress/hooks/hook-after-upgrade-raster.sql
+ 
+-RASTER_TEST_FIRST = \
+-	$(top_srcdir)/raster/test/regress/check_gdal \
+-	$(top_srcdir)/raster/test/regress/loader/load_outdb
+RASTER_TEST_FIRST =
+ 
+ RASTER_TEST_LAST = \
+ 	$(top_srcdir)/raster/test/regress/clean
+@@ -33,9 +31,7 @@ RASTER_TEST_IO = \
+ 
+ RASTER_TEST_BASIC_FUNC = \
+ 	$(top_srcdir)/raster/test/regress/rt_bytea \
+-	$(top_srcdir)/raster/test/regress/rt_wkb \
+ 	$(top_srcdir)/raster/test/regress/box3d \
+-	$(top_srcdir)/raster/test/regress/rt_addband \
+ 	$(top_srcdir)/raster/test/regress/rt_band \
+ 	$(top_srcdir)/raster/test/regress/rt_tile
+ 
+@@ -73,16 +69,10 @@ RASTER_TEST_BANDPROPS = \
+ 	$(top_srcdir)/raster/test/regress/rt_neighborhood \
+ 	$(top_srcdir)/raster/test/regress/rt_nearestvalue \
+ 	$(top_srcdir)/raster/test/regress/rt_pixelofvalue \
+-	$(top_srcdir)/raster/test/regress/rt_polygon \
+-	$(top_srcdir)/raster/test/regress/rt_setbandpath
+	$(top_srcdir)/raster/test/regress/rt_polygon
+ 
+ RASTER_TEST_UTILITY = \
+ 	$(top_srcdir)/raster/test/regress/rt_utility \
+-	$(top_srcdir)/raster/test/regress/rt_fromgdalraster \
+-	$(top_srcdir)/raster/test/regress/rt_asgdalraster \
+-	$(top_srcdir)/raster/test/regress/rt_astiff \
+-	$(top_srcdir)/raster/test/regress/rt_asjpeg \
+-	$(top_srcdir)/raster/test/regress/rt_aspng \
+ 	$(top_srcdir)/raster/test/regress/rt_reclass \
+ 	$(top_srcdir)/raster/test/regress/rt_gdalwarp \
+ 	$(top_srcdir)/raster/test/regress/rt_gdalcontour \
+@@ -120,21 +110,13 @@ RASTER_TEST_SREL = \
+ 
+ RASTER_TEST_BUGS = \
+ 	$(top_srcdir)/raster/test/regress/bug_test_car5 \
+-	$(top_srcdir)/raster/test/regress/permitted_gdal_drivers \
+ 	$(top_srcdir)/raster/test/regress/tickets
+ 
+ RASTER_TEST_LOADER = \
+ 	$(top_srcdir)/raster/test/regress/loader/Basic \
+ 	$(top_srcdir)/raster/test/regress/loader/Projected \
+ 	$(top_srcdir)/raster/test/regress/loader/BasicCopy \
+-	$(top_srcdir)/raster/test/regress/loader/BasicFilename \
+-	$(top_srcdir)/raster/test/regress/loader/BasicOutDB \
+-	$(top_srcdir)/raster/test/regress/loader/Tiled10x10 \
+-	$(top_srcdir)/raster/test/regress/loader/Tiled10x10Copy \
+-	$(top_srcdir)/raster/test/regress/loader/Tiled8x8 \
+-	$(top_srcdir)/raster/test/regress/loader/TiledAuto \
+-	$(top_srcdir)/raster/test/regress/loader/TiledAutoSkipNoData \
+-	$(top_srcdir)/raster/test/regress/loader/TiledAutoCopyn
+	$(top_srcdir)/raster/test/regress/loader/BasicFilename
+ 
+ RASTER_TESTS := $(RASTER_TEST_FIRST) \
+ 	$(RASTER_TEST_METADATA) $(RASTER_TEST_IO) $(RASTER_TEST_BASIC_FUNC) \
+diff --git a/regress/core/binary.sql b/regress/core/binary.sql
+index 7a36b65..ad78fc7 100644
+--- a/regress/core/binary.sql
+++ b/regress/core/binary.sql
+@@ -1,4 +1,5 @@
+ SET client_min_messages TO warning;
+
+ CREATE SCHEMA tm;
+ 
+ CREATE TABLE tm.geoms (id serial, g geometry);
+@@ -31,24 +32,39 @@ SELECT st_force4d(g) FROM tm.geoms WHERE id < 15 ORDER BY id;
+ INSERT INTO tm.geoms(g)
+ SELECT st_setsrid(g,4326) FROM tm.geoms ORDER BY id;
+ 
+-COPY tm.geoms TO :tmpfile WITH BINARY;
+-- define temp file path
+\set tmpfile '/tmp/postgis_binary_test.dat'
+
+-- export
+\set command '\\copy tm.geoms TO ':tmpfile' WITH (FORMAT BINARY)'
+:command
+
+-- import
+ CREATE TABLE tm.geoms_in AS SELECT * FROM tm.geoms LIMIT 0;
+-COPY tm.geoms_in FROM :tmpfile WITH BINARY;
+-SELECT 'geometry', count(*) FROM tm.geoms_in i, tm.geoms o WHERE i.id = o.id
+- AND ST_OrderingEquals(i.g, o.g);
+\set command '\\copy tm.geoms_in FROM ':tmpfile' WITH (FORMAT BINARY)'
+:command
+
+SELECT 'geometry', count(*) FROM tm.geoms_in i, tm.geoms o
+WHERE i.id = o.id AND ST_OrderingEquals(i.g, o.g);
+ 
+ CREATE TABLE tm.geogs AS SELECT id,g::geography FROM tm.geoms
+ WHERE geometrytype(g) NOT LIKE '%CURVE%'
+   AND geometrytype(g) NOT LIKE '%CIRCULAR%'
+   AND geometrytype(g) NOT LIKE '%SURFACE%'
+   AND geometrytype(g) NOT LIKE 'TRIANGLE%'
+-  AND geometrytype(g) NOT LIKE 'TIN%'
+-;
+  AND geometrytype(g) NOT LIKE 'TIN%';
+ 
+-COPY tm.geogs TO :tmpfile WITH BINARY;
+-- export
+\set command '\\copy tm.geogs TO ':tmpfile' WITH (FORMAT BINARY)'
+:command
+
+-- import
+ CREATE TABLE tm.geogs_in AS SELECT * FROM tm.geogs LIMIT 0;
+-COPY tm.geogs_in FROM :tmpfile WITH BINARY;
+-SELECT 'geometry', count(*) FROM tm.geogs_in i, tm.geogs o WHERE i.id = o.id
+- AND ST_OrderingEquals(i.g::geometry, o.g::geometry);
+\set command '\\copy tm.geogs_in FROM ':tmpfile' WITH (FORMAT BINARY)'
+:command
+
+SELECT 'geometry', count(*) FROM tm.geogs_in i, tm.geogs o
+WHERE i.id = o.id AND ST_OrderingEquals(i.g::geometry, o.g::geometry);
+ 
+ DROP SCHEMA tm CASCADE;
+
+diff --git a/regress/core/tests.mk b/regress/core/tests.mk
+index 9e05244..a63a3e1 100644
+--- a/regress/core/tests.mk
+++ b/regress/core/tests.mk
+@@ -16,14 +16,13 @@ POSTGIS_PGSQL_VERSION=170
+ POSTGIS_GEOS_VERSION=31101
+ HAVE_JSON=yes
+ HAVE_SPGIST=yes
+-INTERRUPTTESTS=yes
+INTERRUPTTESTS=no
+ 
+ current_dir := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
+ 
+ RUNTESTFLAGS_INTERNAL += \
+   --before-upgrade-script $(top_srcdir)/regress/hooks/hook-before-upgrade.sql \
+   --after-upgrade-script  $(top_srcdir)/regress/hooks/hook-after-upgrade.sql \
+-  --after-create-script   $(top_srcdir)/regress/hooks/hook-after-create.sql \
+   --before-uninstall-script $(top_srcdir)/regress/hooks/hook-before-uninstall.sql
+ 
+ TESTS += \
+@@ -40,7 +39,6 @@ TESTS += \
+ 	$(top_srcdir)/regress/core/dumppoints \
+ 	$(top_srcdir)/regress/core/dumpsegments \
+ 	$(top_srcdir)/regress/core/empty \
+-	$(top_srcdir)/regress/core/estimatedextent \
+ 	$(top_srcdir)/regress/core/forcecurve \
+ 	$(top_srcdir)/regress/core/flatgeobuf \
+ 	$(top_srcdir)/regress/core/frechet \
+@@ -60,7 +58,6 @@ TESTS += \
+ 	$(top_srcdir)/regress/core/out_marc21 \
+ 	$(top_srcdir)/regress/core/in_encodedpolyline \
+ 	$(top_srcdir)/regress/core/iscollection \
+-	$(top_srcdir)/regress/core/legacy \
+ 	$(top_srcdir)/regress/core/letters \
+ 	$(top_srcdir)/regress/core/lwgeom_regress \
+ 	$(top_srcdir)/regress/core/measures \
+@@ -119,7 +116,6 @@ TESTS += \
+ 	$(top_srcdir)/regress/core/temporal_knn \
+ 	$(top_srcdir)/regress/core/tickets \
+ 	$(top_srcdir)/regress/core/twkb \
+-	$(top_srcdir)/regress/core/typmod \
+ 	$(top_srcdir)/regress/core/wkb \
+ 	$(top_srcdir)/regress/core/wkt \
+ 	$(top_srcdir)/regress/core/wmsservers \
+@@ -143,8 +139,7 @@ TESTS += \
+ 	$(top_srcdir)/regress/core/oriented_envelope \
+ 	$(top_srcdir)/regress/core/point_coordinates \
+ 	$(top_srcdir)/regress/core/out_geojson \
+-  $(top_srcdir)/regress/core/wrapx \
+-	$(top_srcdir)/regress/core/computed_columns
+  $(top_srcdir)/regress/core/wrapx 
+ 
+ # Slow slow tests
+ TESTS_SLOW = \
+diff --git a/regress/loader/tests.mk b/regress/loader/tests.mk
+index ac4f8ad..4bad4fc 100644
+--- a/regress/loader/tests.mk
+++ b/regress/loader/tests.mk
+@@ -38,7 +38,5 @@ TESTS += \
+ 	$(top_srcdir)/regress/loader/Latin1 \
+ 	$(top_srcdir)/regress/loader/Latin1-implicit \
+ 	$(top_srcdir)/regress/loader/mfile \
+-	$(top_srcdir)/regress/loader/TestSkipANALYZE \
+-	$(top_srcdir)/regress/loader/TestANALYZE \
+ 	$(top_srcdir)/regress/loader/CharNoWidth \
+ 
+diff --git a/regress/run_test.pl b/regress/run_test.pl
+index cac4b2e..4c7c82b 100755
+--- a/regress/run_test.pl
+++ b/regress/run_test.pl
+@@ -238,7 +238,6 @@ $ENV{"LANG"} = "C";
+ # Add locale info to the psql options
+ # Add pg12 precision suppression
+ my $PGOPTIONS = $ENV{"PGOPTIONS"};
+-$PGOPTIONS .= " -c lc_messages=C";
+ $PGOPTIONS .= " -c client_min_messages=NOTICE";
+ $PGOPTIONS .= " -c extra_float_digits=0";
+ $ENV{"PGOPTIONS"} = $PGOPTIONS;
+diff --git a/topology/test/tests.mk b/topology/test/tests.mk
+index cbe2633..2c7c18f 100644
+--- a/topology/test/tests.mk
+++ b/topology/test/tests.mk
+@@ -46,9 +46,7 @@ TESTS += \
+ 	$(top_srcdir)/topology/test/regress/legacy_query.sql \
+ 	$(top_srcdir)/topology/test/regress/legacy_validate.sql \
+ 	$(top_srcdir)/topology/test/regress/polygonize.sql \
+-	$(top_srcdir)/topology/test/regress/populate_topology_layer.sql \
+ 	$(top_srcdir)/topology/test/regress/removeunusedprimitives.sql \
+-	$(top_srcdir)/topology/test/regress/renametopogeometrycolumn.sql \
+ 	$(top_srcdir)/topology/test/regress/renametopology.sql \
+ 	$(top_srcdir)/topology/test/regress/share_sequences.sql \
+ 	$(top_srcdir)/topology/test/regress/sqlmm.sql \
--- a/docker-compose/ext-src/postgis-src/raster_outdb_template.sql
+++ b/docker-compose/ext-src/postgis-src/raster_outdb_template.sql
--- a/docker-compose/ext-src/postgis-src/regular-test.sh
+++ b/docker-compose/ext-src/postgis-src/regular-test.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+set -ex
+cd "$(dirname "${0}")"
+dropdb --if-exist contrib_regression
+createdb contrib_regression
+psql -d contrib_regression -c "ALTER DATABASE contrib_regression SET TimeZone='UTC'" \
+     -c "ALTER DATABASE contrib_regression SET DateStyle='ISO, MDY'" \
+     -c "CREATE EXTENSION postgis SCHEMA public" \
+     -c "CREATE EXTENSION postgis_topology" \
+     -c "CREATE EXTENSION postgis_tiger_geocoder CASCADE" \
+     -c "CREATE EXTENSION postgis_raster SCHEMA public" \
+     -c "CREATE EXTENSION postgis_sfcgal SCHEMA public"
+patch -p1 <postgis-no-upgrade-test.patch
+patch -p1 <"postgis-regular-${PG_VERSION}.patch"
+psql -d contrib_regression -f raster_outdb_template.sql
+trap 'patch -R -p1 <postgis-no-upgrade-test.patch && patch -R -p1 <"postgis-regular-${PG_VERSION}.patch"' EXIT
+POSTGIS_REGRESS_DB=contrib_regression RUNTESTFLAGS=--nocreate make installcheck-base
--- a/docker-compose/run-tests.sh
+++ b/docker-compose/run-tests.sh
@@ -63,5 +63,9 @@ done
 for d in ${FAILED}; do
  cat "$(find $d -name regression.diffs)"
 done
+for postgis_diff in /tmp/pgis_reg/*_diff; do
+  echo "${postgis_diff}:"
+  cat "${postgis_diff}"
+done
 echo "${FAILED}"
 exit 1
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -178,9 +178,9 @@ pub struct ComputeSpec {
    /// JWT for authorizing requests to endpoint storage service
    pub endpoint_storage_token: Option<String>,

-    /// If true, download LFC state from endpoint_storage and pass it to Postgres on startup
+    /// Download LFC state from endpoint_storage and pass it to Postgres on startup
    #[serde(default)]
-    pub prewarm_lfc_on_startup: bool,
+    pub autoprewarm: bool,
 }

 /// Feature flag to signal `compute_ctl` to enable certain experimental functionality.
--- a/libs/compute_api/tests/cluster_spec.json
+++ b/libs/compute_api/tests/cluster_spec.json
@@ -85,7 +85,7 @@
                "vartype": "bool"
            },
            {
-                "name": "prewarm_lfc_on_startup",
+                "name": "autoprewarm",
                "value": "off",
                "vartype": "bool"
            },
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -181,6 +181,7 @@ pub struct ConfigToml {
    pub virtual_file_io_engine: Option<crate::models::virtual_file::IoEngineKind>,
    pub ingest_batch_size: u64,
    pub max_vectored_read_bytes: MaxVectoredReadBytes,
+    pub max_get_vectored_keys: MaxGetVectoredKeys,
    pub image_compression: ImageCompressionAlgorithm,
    pub timeline_offloading: bool,
    pub ephemeral_bytes_per_memory_kb: usize,
@@ -229,7 +230,7 @@ pub enum PageServicePipeliningConfig {
 }
 #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
 pub struct PageServicePipeliningConfigPipelined {
-    /// Causes runtime errors if larger than max get_vectored batch size.
+    /// Failed config parsing and validation if larger than `max_get_vectored_keys`.
    pub max_batch_size: NonZeroUsize,
    pub execution: PageServiceProtocolPipelinedExecutionStrategy,
    // The default below is such that new versions of the software can start
@@ -329,6 +330,8 @@ pub struct TimelineImportConfig {
    pub import_job_concurrency: NonZeroUsize,
    pub import_job_soft_size_limit: NonZeroUsize,
    pub import_job_checkpoint_threshold: NonZeroUsize,
+    /// Max size of the remote storage partial read done by any job
+    pub import_job_max_byte_range_size: NonZeroUsize,
 }

 #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
@@ -403,6 +406,16 @@ impl Default for EvictionOrder {
 #[serde(transparent)]
 pub struct MaxVectoredReadBytes(pub NonZeroUsize);

+#[derive(Copy, Clone, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
+#[serde(transparent)]
+pub struct MaxGetVectoredKeys(NonZeroUsize);
+
+impl MaxGetVectoredKeys {
+    pub fn get(&self) -> usize {
+        self.0.get()
+    }
+}
+
 /// Tenant-level configuration values, used for various purposes.
 #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
 #[serde(default)]
@@ -587,6 +600,8 @@ pub mod defaults {
    /// That is, slightly above 128 kB.
    pub const DEFAULT_MAX_VECTORED_READ_BYTES: usize = 130 * 1024; // 130 KiB

+    pub const DEFAULT_MAX_GET_VECTORED_KEYS: usize = 32;
+
    pub const DEFAULT_IMAGE_COMPRESSION: ImageCompressionAlgorithm =
        ImageCompressionAlgorithm::Zstd { level: Some(1) };

@@ -595,7 +610,10 @@ pub mod defaults {
    pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 512;

    pub const DEFAULT_WAL_RECEIVER_PROTOCOL: utils::postgres_client::PostgresClientProtocol =
-        utils::postgres_client::PostgresClientProtocol::Vanilla;
+        utils::postgres_client::PostgresClientProtocol::Interpreted {
+            format: utils::postgres_client::InterpretedFormat::Protobuf,
+            compression: Some(utils::postgres_client::Compression::Zstd { level: 1 }),
+        };

    pub const DEFAULT_SSL_KEY_FILE: &str = "server.key";
    pub const DEFAULT_SSL_CERT_FILE: &str = "server.crt";
@@ -685,6 +703,9 @@ impl Default for ConfigToml {
            max_vectored_read_bytes: (MaxVectoredReadBytes(
                NonZeroUsize::new(DEFAULT_MAX_VECTORED_READ_BYTES).unwrap(),
            )),
+            max_get_vectored_keys: (MaxGetVectoredKeys(
+                NonZeroUsize::new(DEFAULT_MAX_GET_VECTORED_KEYS).unwrap(),
+            )),
            image_compression: (DEFAULT_IMAGE_COMPRESSION),
            timeline_offloading: true,
            ephemeral_bytes_per_memory_kb: (DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
@@ -716,6 +737,7 @@ impl Default for ConfigToml {
                import_job_concurrency: NonZeroUsize::new(32).unwrap(),
                import_job_soft_size_limit: NonZeroUsize::new(256 * 1024 * 1024).unwrap(),
                import_job_checkpoint_threshold: NonZeroUsize::new(32).unwrap(),
+                import_job_max_byte_range_size: NonZeroUsize::new(4 * 1024 * 1024).unwrap(),
            },
            basebackup_cache_config: None,
            posthog_config: None,
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -1934,7 +1934,7 @@ pub enum PagestreamFeMessage {
 }

 // Wrapped in libpq CopyData
-#[derive(strum_macros::EnumProperty)]
+#[derive(Debug, strum_macros::EnumProperty)]
 pub enum PagestreamBeMessage {
    Exists(PagestreamExistsResponse),
    Nblocks(PagestreamNblocksResponse),
@@ -2045,7 +2045,7 @@ pub enum PagestreamProtocolVersion {

 pub type RequestId = u64;

-#[derive(Debug, PartialEq, Eq, Clone, Copy)]
+#[derive(Debug, Default, PartialEq, Eq, Clone, Copy)]
 pub struct PagestreamRequest {
    pub reqid: RequestId,
    pub request_lsn: Lsn,
@@ -2064,7 +2064,7 @@ pub struct PagestreamNblocksRequest {
    pub rel: RelTag,
 }

-#[derive(Debug, PartialEq, Eq, Clone, Copy)]
+#[derive(Debug, Default, PartialEq, Eq, Clone, Copy)]
 pub struct PagestreamGetPageRequest {
    pub hdr: PagestreamRequest,
    pub rel: RelTag,
--- a/libs/pageserver_api/src/reltag.rs
+++ b/libs/pageserver_api/src/reltag.rs
@@ -24,7 +24,7 @@ use serde::{Deserialize, Serialize};
 // FIXME: should move 'forknum' as last field to keep this consistent with Postgres.
 // Then we could replace the custom Ord and PartialOrd implementations below with
 // deriving them. This will require changes in walredoproc.c.
-#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Serialize, Deserialize)]
+#[derive(Debug, Default, PartialEq, Eq, Hash, Clone, Copy, Serialize, Deserialize)]
 pub struct RelTag {
    pub forknum: u8,
    pub spcnode: Oid,
@@ -184,12 +184,12 @@ pub enum SlruKind {
    MultiXactOffsets,
 }

-impl SlruKind {
-    pub fn to_str(&self) -> &'static str {
+impl fmt::Display for SlruKind {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
-            Self::Clog => "pg_xact",
-            Self::MultiXactMembers => "pg_multixact/members",
-            Self::MultiXactOffsets => "pg_multixact/offsets",
+            Self::Clog => write!(f, "pg_xact"),
+            Self::MultiXactMembers => write!(f, "pg_multixact/members"),
+            Self::MultiXactOffsets => write!(f, "pg_multixact/offsets"),
        }
    }
 }
--- a/libs/posthog_client_lite/src/lib.rs
+++ b/libs/posthog_client_lite/src/lib.rs
@@ -22,6 +22,16 @@ pub enum PostHogEvaluationError {
    Internal(String),
 }

+impl PostHogEvaluationError {
+    pub fn as_variant_str(&self) -> &'static str {
+        match self {
+            PostHogEvaluationError::NotAvailable(_) => "not_available",
+            PostHogEvaluationError::NoConditionGroupMatched => "no_condition_group_matched",
+            PostHogEvaluationError::Internal(_) => "internal",
+        }
+    }
+}
+
 #[derive(Deserialize)]
 pub struct LocalEvaluationResponse {
    pub flags: Vec<LocalEvaluationFlag>,
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -73,6 +73,7 @@ pub mod error;
 /// async timeout helper
 pub mod timeout;

+pub mod span;
 pub mod sync;

 pub mod failpoint_support;
--- a/libs/utils/src/span.rs
+++ b/libs/utils/src/span.rs
@@ -0,0 +1,19 @@
+//! Tracing span helpers.
+
+/// Records the given fields in the current span, as a single call. The fields must already have
+/// been declared for the span (typically with empty values).
+#[macro_export]
+macro_rules! span_record {
+    ($($tokens:tt)*) => {$crate::span_record_in!(::tracing::Span::current(), $($tokens)*)};
+}
+
+/// Records the given fields in the given span, as a single call. The fields must already have been
+/// declared for the span (typically with empty values).
+#[macro_export]
+macro_rules! span_record_in {
+    ($span:expr, $($tokens:tt)*) => {
+        if let Some(meta) = $span.metadata() {
+            $span.record_all(&tracing::valueset!(meta.fields(), $($tokens)*));
+        }
+    };
+}
--- a/libs/utils/src/sync/gate.rs
+++ b/libs/utils/src/sync/gate.rs
@@ -86,6 +86,14 @@ pub enum GateError {
    GateClosed,
 }

+impl GateError {
+    pub fn is_cancel(&self) -> bool {
+        match self {
+            GateError::GateClosed => true,
+        }
+    }
+}
+
 impl Default for Gate {
    fn default() -> Self {
        Self {
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -34,6 +34,7 @@ fail.workspace = true
 futures.workspace = true
 hashlink.workspace = true
 hex.workspace = true
+http.workspace = true
 http-utils.workspace = true
 humantime-serde.workspace = true
 humantime.workspace = true
@@ -93,6 +94,7 @@ tokio-util.workspace = true
 toml_edit = { workspace = true, features = [ "serde" ] }
 tonic.workspace = true
 tonic-reflection.workspace = true
+tower.workspace = true
 tracing.workspace = true
 tracing-utils.workspace = true
 url.workspace = true
--- a/pageserver/page_api/Cargo.toml
+++ b/pageserver/page_api/Cargo.toml
@@ -9,7 +9,6 @@ bytes.workspace = true
 pageserver_api.workspace = true
 postgres_ffi.workspace = true
 prost.workspace = true
-smallvec.workspace = true
 thiserror.workspace = true
 tonic.workspace = true
 utils.workspace = true
--- a/pageserver/page_api/src/model.rs
+++ b/pageserver/page_api/src/model.rs
@@ -9,10 +9,16 @@
 //! - Use more precise datatypes, e.g. Lsn and uints shorter than 32 bits.
 //!
 //! - Validate protocol invariants, via try_from() and try_into().
+//!
+//! Validation only happens on the receiver side, i.e. when converting from Protobuf to domain
+//! types. This is where it matters -- the Protobuf types are less strict than the domain types, and
+//! receivers should expect all sorts of junk from senders. This also allows the sender to use e.g.
+//! stream combinators without dealing with errors, and avoids validating the same message twice.
+
+use std::fmt::Display;

 use bytes::Bytes;
 use postgres_ffi::Oid;
-use smallvec::SmallVec;
 // TODO: split out Lsn, RelTag, SlruKind, Oid and other basic types to a separate crate, to avoid
 // pulling in all of their other crate dependencies when building the client.
 use utils::lsn::Lsn;
@@ -48,7 +54,8 @@ pub struct ReadLsn {
    pub request_lsn: Lsn,
    /// If given, the caller guarantees that the page has not been modified since this LSN. Must be
    /// smaller than or equal to request_lsn. This allows the Pageserver to serve an old page
-    /// without waiting for the request LSN to arrive. Valid for all request types.
+    /// without waiting for the request LSN to arrive. If not given, the request will read at the
+    /// request_lsn and wait for it to arrive if necessary. Valid for all request types.
    ///
    /// It is undefined behaviour to make a request such that the page was, in fact, modified
    /// between request_lsn and not_modified_since_lsn. The Pageserver might detect it and return an
@@ -58,19 +65,14 @@ pub struct ReadLsn {
    pub not_modified_since_lsn: Option<Lsn>,
 }

-impl ReadLsn {
-    /// Validates the ReadLsn.
-    pub fn validate(&self) -> Result<(), ProtocolError> {
-        if self.request_lsn == Lsn::INVALID {
-            return Err(ProtocolError::invalid("request_lsn", self.request_lsn));
+impl Display for ReadLsn {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let req_lsn = self.request_lsn;
+        if let Some(mod_lsn) = self.not_modified_since_lsn {
+            write!(f, "{req_lsn}>={mod_lsn}")
+        } else {
+            req_lsn.fmt(f)
        }
-        if self.not_modified_since_lsn > Some(self.request_lsn) {
-            return Err(ProtocolError::invalid(
-                "not_modified_since_lsn",
-                self.not_modified_since_lsn,
-            ));
-        }
-        Ok(())
    }
 }

@@ -78,27 +80,31 @@ impl TryFrom<proto::ReadLsn> for ReadLsn {
    type Error = ProtocolError;

    fn try_from(pb: proto::ReadLsn) -> Result<Self, Self::Error> {
-        let read_lsn = Self {
+        if pb.request_lsn == 0 {
+            return Err(ProtocolError::invalid("request_lsn", pb.request_lsn));
+        }
+        if pb.not_modified_since_lsn > pb.request_lsn {
+            return Err(ProtocolError::invalid(
+                "not_modified_since_lsn",
+                pb.not_modified_since_lsn,
+            ));
+        }
+        Ok(Self {
            request_lsn: Lsn(pb.request_lsn),
            not_modified_since_lsn: match pb.not_modified_since_lsn {
                0 => None,
                lsn => Some(Lsn(lsn)),
            },
-        };
-        read_lsn.validate()?;
-        Ok(read_lsn)
+        })
    }
 }

-impl TryFrom<ReadLsn> for proto::ReadLsn {
-    type Error = ProtocolError;
-
-    fn try_from(read_lsn: ReadLsn) -> Result<Self, Self::Error> {
-        read_lsn.validate()?;
-        Ok(Self {
+impl From<ReadLsn> for proto::ReadLsn {
+    fn from(read_lsn: ReadLsn) -> Self {
+        Self {
            request_lsn: read_lsn.request_lsn.0,
            not_modified_since_lsn: read_lsn.not_modified_since_lsn.unwrap_or_default().0,
-        })
+        }
    }
 }

@@ -153,6 +159,15 @@ impl TryFrom<proto::CheckRelExistsRequest> for CheckRelExistsRequest {
    }
 }

+impl From<CheckRelExistsRequest> for proto::CheckRelExistsRequest {
+    fn from(request: CheckRelExistsRequest) -> Self {
+        Self {
+            read_lsn: Some(request.read_lsn.into()),
+            rel: Some(request.rel.into()),
+        }
+    }
+}
+
 pub type CheckRelExistsResponse = bool;

 impl From<proto::CheckRelExistsResponse> for CheckRelExistsResponse {
@@ -190,14 +205,12 @@ impl TryFrom<proto::GetBaseBackupRequest> for GetBaseBackupRequest {
    }
 }

-impl TryFrom<GetBaseBackupRequest> for proto::GetBaseBackupRequest {
-    type Error = ProtocolError;
-
-    fn try_from(request: GetBaseBackupRequest) -> Result<Self, Self::Error> {
-        Ok(Self {
-            read_lsn: Some(request.read_lsn.try_into()?),
+impl From<GetBaseBackupRequest> for proto::GetBaseBackupRequest {
+    fn from(request: GetBaseBackupRequest) -> Self {
+        Self {
+            read_lsn: Some(request.read_lsn.into()),
            replica: request.replica,
-        })
+        }
    }
 }

@@ -214,14 +227,9 @@ impl TryFrom<proto::GetBaseBackupResponseChunk> for GetBaseBackupResponseChunk {
    }
 }

-impl TryFrom<GetBaseBackupResponseChunk> for proto::GetBaseBackupResponseChunk {
-    type Error = ProtocolError;
-
-    fn try_from(chunk: GetBaseBackupResponseChunk) -> Result<Self, Self::Error> {
-        if chunk.is_empty() {
-            return Err(ProtocolError::Missing("chunk"));
-        }
-        Ok(Self { chunk })
+impl From<GetBaseBackupResponseChunk> for proto::GetBaseBackupResponseChunk {
+    fn from(chunk: GetBaseBackupResponseChunk) -> Self {
+        Self { chunk }
    }
 }

@@ -246,14 +254,12 @@ impl TryFrom<proto::GetDbSizeRequest> for GetDbSizeRequest {
    }
 }

-impl TryFrom<GetDbSizeRequest> for proto::GetDbSizeRequest {
-    type Error = ProtocolError;
-
-    fn try_from(request: GetDbSizeRequest) -> Result<Self, Self::Error> {
-        Ok(Self {
-            read_lsn: Some(request.read_lsn.try_into()?),
+impl From<GetDbSizeRequest> for proto::GetDbSizeRequest {
+    fn from(request: GetDbSizeRequest) -> Self {
+        Self {
+            read_lsn: Some(request.read_lsn.into()),
            db_oid: request.db_oid,
-        })
+        }
    }
 }

@@ -288,7 +294,7 @@ pub struct GetPageRequest {
    /// Multiple pages will be executed as a single batch by the Pageserver, amortizing layer access
    /// costs and parallelizing them. This may increase the latency of any individual request, but
    /// improves the overall latency and throughput of the batch as a whole.
-    pub block_numbers: SmallVec<[u32; 1]>,
+    pub block_numbers: Vec<u32>,
 }

 impl TryFrom<proto::GetPageRequest> for GetPageRequest {
@@ -306,25 +312,20 @@ impl TryFrom<proto::GetPageRequest> for GetPageRequest {
                .ok_or(ProtocolError::Missing("read_lsn"))?
                .try_into()?,
            rel: pb.rel.ok_or(ProtocolError::Missing("rel"))?.try_into()?,
-            block_numbers: pb.block_number.into(),
+            block_numbers: pb.block_number,
        })
    }
 }

-impl TryFrom<GetPageRequest> for proto::GetPageRequest {
-    type Error = ProtocolError;
-
-    fn try_from(request: GetPageRequest) -> Result<Self, Self::Error> {
-        if request.block_numbers.is_empty() {
-            return Err(ProtocolError::Missing("block_number"));
-        }
-        Ok(Self {
+impl From<GetPageRequest> for proto::GetPageRequest {
+    fn from(request: GetPageRequest) -> Self {
+        Self {
            request_id: request.request_id,
            request_class: request.request_class.into(),
-            read_lsn: Some(request.read_lsn.try_into()?),
+            read_lsn: Some(request.read_lsn.into()),
            rel: Some(request.rel.into()),
-            block_number: request.block_numbers.into_vec(),
-        })
+            block_number: request.block_numbers,
+        }
    }
 }

@@ -396,7 +397,7 @@ pub struct GetPageResponse {
    /// A string describing the status, if any.
    pub reason: Option<String>,
    /// The 8KB page images, in the same order as the request. Empty if status != OK.
-    pub page_images: SmallVec<[Bytes; 1]>,
+    pub page_images: Vec<Bytes>,
 }

 impl From<proto::GetPageResponse> for GetPageResponse {
@@ -405,7 +406,7 @@ impl From<proto::GetPageResponse> for GetPageResponse {
            request_id: pb.request_id,
            status_code: pb.status_code.into(),
            reason: Some(pb.reason).filter(|r| !r.is_empty()),
-            page_images: pb.page_image.into(),
+            page_images: pb.page_image,
        }
    }
 }
@@ -416,7 +417,7 @@ impl From<GetPageResponse> for proto::GetPageResponse {
            request_id: response.request_id,
            status_code: response.status_code.into(),
            reason: response.reason.unwrap_or_default(),
-            page_image: response.page_images.into_vec(),
+            page_image: response.page_images,
        }
    }
 }
@@ -505,14 +506,12 @@ impl TryFrom<proto::GetRelSizeRequest> for GetRelSizeRequest {
    }
 }

-impl TryFrom<GetRelSizeRequest> for proto::GetRelSizeRequest {
-    type Error = ProtocolError;
-
-    fn try_from(request: GetRelSizeRequest) -> Result<Self, Self::Error> {
-        Ok(Self {
-            read_lsn: Some(request.read_lsn.try_into()?),
+impl From<GetRelSizeRequest> for proto::GetRelSizeRequest {
+    fn from(request: GetRelSizeRequest) -> Self {
+        Self {
+            read_lsn: Some(request.read_lsn.into()),
            rel: Some(request.rel.into()),
-        })
+        }
    }
 }

@@ -555,15 +554,13 @@ impl TryFrom<proto::GetSlruSegmentRequest> for GetSlruSegmentRequest {
    }
 }

-impl TryFrom<GetSlruSegmentRequest> for proto::GetSlruSegmentRequest {
-    type Error = ProtocolError;
-
-    fn try_from(request: GetSlruSegmentRequest) -> Result<Self, Self::Error> {
-        Ok(Self {
-            read_lsn: Some(request.read_lsn.try_into()?),
+impl From<GetSlruSegmentRequest> for proto::GetSlruSegmentRequest {
+    fn from(request: GetSlruSegmentRequest) -> Self {
+        Self {
+            read_lsn: Some(request.read_lsn.into()),
            kind: request.kind as u32,
            segno: request.segno,
-        })
+        }
    }
 }

@@ -580,14 +577,9 @@ impl TryFrom<proto::GetSlruSegmentResponse> for GetSlruSegmentResponse {
    }
 }

-impl TryFrom<GetSlruSegmentResponse> for proto::GetSlruSegmentResponse {
-    type Error = ProtocolError;
-
-    fn try_from(segment: GetSlruSegmentResponse) -> Result<Self, Self::Error> {
-        if segment.is_empty() {
-            return Err(ProtocolError::Missing("segment"));
-        }
-        Ok(Self { segment })
+impl From<GetSlruSegmentResponse> for proto::GetSlruSegmentResponse {
+    fn from(segment: GetSlruSegmentResponse) -> Self {
+        Self { segment }
    }
 }

--- a/pageserver/pagebench/Cargo.toml
+++ b/pageserver/pagebench/Cargo.toml
@@ -8,6 +8,7 @@ license.workspace = true

 [dependencies]
 anyhow.workspace = true
+async-trait.workspace = true
 camino.workspace = true
 clap.workspace = true
 futures.workspace = true
@@ -15,14 +16,17 @@ hdrhistogram.workspace = true
 humantime.workspace = true
 humantime-serde.workspace = true
 rand.workspace = true
-reqwest.workspace=true
+reqwest.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 tracing.workspace = true
 tokio.workspace = true
+tokio-stream.workspace = true
 tokio-util.workspace = true
+tonic.workspace = true

 pageserver_client.workspace = true
 pageserver_api.workspace = true
+pageserver_page_api.workspace = true
 utils = { path = "../../libs/utils/" }
 workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
+++ b/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
@@ -7,11 +7,15 @@ use std::sync::{Arc, Mutex};
 use std::time::{Duration, Instant};

 use anyhow::Context;
+use async_trait::async_trait;
 use camino::Utf8PathBuf;
 use pageserver_api::key::Key;
 use pageserver_api::keyspace::KeySpaceAccum;
-use pageserver_api::models::{PagestreamGetPageRequest, PagestreamRequest};
+use pageserver_api::models::{
+    PagestreamGetPageRequest, PagestreamGetPageResponse, PagestreamRequest,
+};
 use pageserver_api::shard::TenantShardId;
+use pageserver_page_api::proto;
 use rand::prelude::*;
 use tokio::task::JoinSet;
 use tokio_util::sync::CancellationToken;
@@ -22,6 +26,12 @@ use utils::lsn::Lsn;
 use crate::util::tokio_thread_local_stats::AllThreadLocalStats;
 use crate::util::{request_stats, tokio_thread_local_stats};

+#[derive(clap::ValueEnum, Clone, Debug)]
+enum Protocol {
+    Libpq,
+    Grpc,
+}
+
 /// GetPage@LatestLSN, uniformly distributed across the compute-accessible keyspace.
 #[derive(clap::Parser)]
 pub(crate) struct Args {
@@ -35,6 +45,8 @@ pub(crate) struct Args {
    num_clients: NonZeroUsize,
    #[clap(long)]
    runtime: Option<humantime::Duration>,
+    #[clap(long, value_enum, default_value = "libpq")]
+    protocol: Protocol,
    /// Each client sends requests at the given rate.
    ///
    /// If a request takes too long and we should be issuing a new request already,
@@ -303,7 +315,20 @@ async fn main_impl(
                .unwrap();

        Box::pin(async move {
-            client_libpq(args, worker_id, ss, cancel, rps_period, ranges, weights).await
+            let client: Box<dyn Client> = match args.protocol {
+                Protocol::Libpq => Box::new(
+                    LibpqClient::new(args.page_service_connstring.clone(), worker_id.timeline)
+                        .await
+                        .unwrap(),
+                ),
+
+                Protocol::Grpc => Box::new(
+                    GrpcClient::new(args.page_service_connstring.clone(), worker_id.timeline)
+                        .await
+                        .unwrap(),
+                ),
+            };
+            run_worker(args, client, ss, cancel, rps_period, ranges, weights).await
        })
    };

@@ -355,23 +380,15 @@ async fn main_impl(
    anyhow::Ok(())
 }

-async fn client_libpq(
+async fn run_worker(
    args: &Args,
-    worker_id: WorkerId,
+    mut client: Box<dyn Client>,
    shared_state: Arc<SharedState>,
    cancel: CancellationToken,
    rps_period: Option<Duration>,
    ranges: Vec<KeyRange>,
    weights: rand::distributions::weighted::WeightedIndex<i128>,
 ) {
-    let client = pageserver_client::page_service::Client::new(args.page_service_connstring.clone())
-        .await
-        .unwrap();
-    let mut client = client
-        .pagestream(worker_id.timeline.tenant_id, worker_id.timeline.timeline_id)
-        .await
-        .unwrap();
-
    shared_state.start_work_barrier.wait().await;
    let client_start = Instant::now();
    let mut ticks_processed = 0;
@@ -415,12 +432,12 @@ async fn client_libpq(
                    blkno: block_no,
                }
            };
-            client.getpage_send(req).await.unwrap();
+            client.send_get_page(req).await.unwrap();
            inflight.push_back(start);
        }

        let start = inflight.pop_front().unwrap();
-        client.getpage_recv().await.unwrap();
+        client.recv_get_page().await.unwrap();
        let end = Instant::now();
        shared_state.live_stats.request_done();
        ticks_processed += 1;
@@ -442,3 +459,104 @@ async fn client_libpq(
        }
    }
 }
+
+/// A benchmark client, to allow switching out the transport protocol.
+///
+/// For simplicity, this just uses separate asynchronous send/recv methods. The send method could
+/// return a future that resolves when the response is received, but we don't really need it.
+#[async_trait]
+trait Client: Send {
+    /// Sends an asynchronous GetPage request to the pageserver.
+    async fn send_get_page(&mut self, req: PagestreamGetPageRequest) -> anyhow::Result<()>;
+
+    /// Receives the next GetPage response from the pageserver.
+    async fn recv_get_page(&mut self) -> anyhow::Result<PagestreamGetPageResponse>;
+}
+
+/// A libpq-based Pageserver client.
+struct LibpqClient {
+    inner: pageserver_client::page_service::PagestreamClient,
+}
+
+impl LibpqClient {
+    async fn new(connstring: String, ttid: TenantTimelineId) -> anyhow::Result<Self> {
+        let inner = pageserver_client::page_service::Client::new(connstring)
+            .await?
+            .pagestream(ttid.tenant_id, ttid.timeline_id)
+            .await?;
+        Ok(Self { inner })
+    }
+}
+
+#[async_trait]
+impl Client for LibpqClient {
+    async fn send_get_page(&mut self, req: PagestreamGetPageRequest) -> anyhow::Result<()> {
+        self.inner.getpage_send(req).await
+    }
+
+    async fn recv_get_page(&mut self) -> anyhow::Result<PagestreamGetPageResponse> {
+        self.inner.getpage_recv().await
+    }
+}
+
+/// A gRPC client using the raw, no-frills gRPC client.
+struct GrpcClient {
+    req_tx: tokio::sync::mpsc::Sender<proto::GetPageRequest>,
+    resp_rx: tonic::Streaming<proto::GetPageResponse>,
+}
+
+impl GrpcClient {
+    async fn new(connstring: String, ttid: TenantTimelineId) -> anyhow::Result<Self> {
+        let mut client = pageserver_page_api::proto::PageServiceClient::connect(connstring).await?;
+
+        // The channel has a buffer size of 1, since 0 is not allowed. It does not matter, since the
+        // benchmark will control the queue depth (i.e. in-flight requests) anyway, and requests are
+        // buffered by Tonic and the OS too.
+        let (req_tx, req_rx) = tokio::sync::mpsc::channel(1);
+        let req_stream = tokio_stream::wrappers::ReceiverStream::new(req_rx);
+        let mut req = tonic::Request::new(req_stream);
+        let metadata = req.metadata_mut();
+        metadata.insert("neon-tenant-id", ttid.tenant_id.to_string().try_into()?);
+        metadata.insert("neon-timeline-id", ttid.timeline_id.to_string().try_into()?);
+        metadata.insert("neon-shard-id", "0000".try_into()?);
+
+        let resp = client.get_pages(req).await?;
+        let resp_stream = resp.into_inner();
+
+        Ok(Self {
+            req_tx,
+            resp_rx: resp_stream,
+        })
+    }
+}
+
+#[async_trait]
+impl Client for GrpcClient {
+    async fn send_get_page(&mut self, req: PagestreamGetPageRequest) -> anyhow::Result<()> {
+        let req = proto::GetPageRequest {
+            request_id: 0,
+            request_class: proto::GetPageClass::Normal as i32,
+            read_lsn: Some(proto::ReadLsn {
+                request_lsn: req.hdr.request_lsn.0,
+                not_modified_since_lsn: req.hdr.not_modified_since.0,
+            }),
+            rel: Some(req.rel.into()),
+            block_number: vec![req.blkno],
+        };
+        self.req_tx.send(req).await?;
+        Ok(())
+    }
+
+    async fn recv_get_page(&mut self) -> anyhow::Result<PagestreamGetPageResponse> {
+        let resp = self.resp_rx.message().await?.unwrap();
+        anyhow::ensure!(
+            resp.status_code == proto::GetPageStatusCode::Ok as i32,
+            "unexpected status code: {}",
+            resp.status_code
+        );
+        Ok(PagestreamGetPageResponse {
+            page: resp.page_image[0].clone(),
+            req: PagestreamGetPageRequest::default(), // dummy
+        })
+    }
+}
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -65,6 +65,30 @@ impl From<GetVectoredError> for BasebackupError {
    }
 }

+impl From<BasebackupError> for postgres_backend::QueryError {
+    fn from(err: BasebackupError) -> Self {
+        use postgres_backend::QueryError;
+        use pq_proto::framed::ConnectionError;
+        match err {
+            BasebackupError::Client(err, _) => QueryError::Disconnected(ConnectionError::Io(err)),
+            BasebackupError::Server(err) => QueryError::Other(err),
+            BasebackupError::Shutdown => QueryError::Shutdown,
+        }
+    }
+}
+
+impl From<BasebackupError> for tonic::Status {
+    fn from(err: BasebackupError) -> Self {
+        use tonic::Code;
+        let code = match &err {
+            BasebackupError::Client(_, _) => Code::Cancelled,
+            BasebackupError::Server(_) => Code::Internal,
+            BasebackupError::Shutdown => Code::Unavailable,
+        };
+        tonic::Status::new(code, err.to_string())
+    }
+}
+
 /// Create basebackup with non-rel data in it.
 /// Only include relational data if 'full_backup' is true.
 ///
@@ -248,7 +272,7 @@ where
    async fn flush(&mut self) -> Result<(), BasebackupError> {
        let nblocks = self.buf.len() / BLCKSZ as usize;
        let (kind, segno) = self.current_segment.take().unwrap();
-        let segname = format!("{}/{:>04X}", kind.to_str(), segno);
+        let segname = format!("{kind}/{segno:>04X}");
        let header = new_tar_header(&segname, self.buf.len() as u64)?;
        self.ar
            .append(&header, self.buf.as_slice())
@@ -347,7 +371,7 @@ where
                .await?
                .partition(
                    self.timeline.get_shard_identity(),
-                    Timeline::MAX_GET_VECTORED_KEYS * BLCKSZ as u64,
+                    self.timeline.conf.max_get_vectored_keys.get() as u64 * BLCKSZ as u64,
                );

            let mut slru_builder = SlruSegmentsBuilder::new(&mut self.ar);
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -804,7 +804,7 @@ fn start_pageserver(
        } else {
            None
        },
-        basebackup_cache.clone(),
+        basebackup_cache,
    );

    // Spawn a Pageserver gRPC server task. It will spawn separate tasks for
@@ -816,12 +816,10 @@ fn start_pageserver(
    let mut page_service_grpc = None;
    if let Some(grpc_listener) = grpc_listener {
        page_service_grpc = Some(page_service::spawn_grpc(
-            conf,
            tenant_manager.clone(),
            grpc_auth,
            otel_guard.as_ref().map(|g| g.dispatch.clone()),
            grpc_listener,
-            basebackup_cache,
        )?);
    }

--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -14,7 +14,10 @@ use std::time::Duration;
 use anyhow::{Context, bail, ensure};
 use camino::{Utf8Path, Utf8PathBuf};
 use once_cell::sync::OnceCell;
-use pageserver_api::config::{DiskUsageEvictionTaskConfig, MaxVectoredReadBytes, PostHogConfig};
+use pageserver_api::config::{
+    DiskUsageEvictionTaskConfig, MaxGetVectoredKeys, MaxVectoredReadBytes,
+    PageServicePipeliningConfig, PageServicePipeliningConfigPipelined, PostHogConfig,
+};
 use pageserver_api::models::ImageCompressionAlgorithm;
 use pageserver_api::shard::TenantShardId;
 use pem::Pem;
@@ -185,6 +188,9 @@ pub struct PageServerConf {

    pub max_vectored_read_bytes: MaxVectoredReadBytes,

+    /// Maximum number of keys to be read in a single get_vectored call.
+    pub max_get_vectored_keys: MaxGetVectoredKeys,
+
    pub image_compression: ImageCompressionAlgorithm,

    /// Whether to offload archived timelines automatically
@@ -404,6 +410,7 @@ impl PageServerConf {
            secondary_download_concurrency,
            ingest_batch_size,
            max_vectored_read_bytes,
+            max_get_vectored_keys,
            image_compression,
            timeline_offloading,
            ephemeral_bytes_per_memory_kb,
@@ -470,6 +477,7 @@ impl PageServerConf {
            secondary_download_concurrency,
            ingest_batch_size,
            max_vectored_read_bytes,
+            max_get_vectored_keys,
            image_compression,
            timeline_offloading,
            ephemeral_bytes_per_memory_kb,
@@ -598,6 +606,19 @@ impl PageServerConf {
                )
            })?;

+        if let PageServicePipeliningConfig::Pipelined(PageServicePipeliningConfigPipelined {
+            max_batch_size,
+            ..
+        }) = conf.page_service_pipelining
+        {
+            if max_batch_size.get() > conf.max_get_vectored_keys.get() {
+                return Err(anyhow::anyhow!(
+                    "`max_batch_size` ({max_batch_size}) must be less than or equal to `max_get_vectored_keys` ({})",
+                    conf.max_get_vectored_keys.get()
+                ));
+            }
+        };
+
        Ok(conf)
    }

@@ -685,6 +706,7 @@ impl ConfigurableSemaphore {
 mod tests {

    use camino::Utf8PathBuf;
+    use rstest::rstest;
    use utils::id::NodeId;

    use super::PageServerConf;
@@ -724,4 +746,28 @@ mod tests {
        PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir)
            .expect_err("parse_and_validate should fail for endpoint without scheme");
    }
+
+    #[rstest]
+    #[case(32, 32, true)]
+    #[case(64, 32, false)]
+    #[case(64, 64, true)]
+    #[case(128, 128, true)]
+    fn test_config_max_batch_size_is_valid(
+        #[case] max_batch_size: usize,
+        #[case] max_get_vectored_keys: usize,
+        #[case] is_valid: bool,
+    ) {
+        let input = format!(
+            r#"
+            control_plane_api = "http://localhost:6666"
+            max_get_vectored_keys = {max_get_vectored_keys}
+            page_service_pipelining = {{ mode="pipelined", execution="concurrent-futures", max_batch_size={max_batch_size}, batching="uniform-lsn" }}
+        "#,
+        );
+        let config_toml = toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(&input)
+            .expect("config has valid fields");
+        let workdir = Utf8PathBuf::from("/nonexistent");
+        let result = PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir);
+        assert_eq!(result.is_ok(), is_valid);
+    }
 }
--- a/pageserver/src/feature_resolver.rs
+++ b/pageserver/src/feature_resolver.rs
@@ -6,7 +6,7 @@ use posthog_client_lite::{
 use tokio_util::sync::CancellationToken;
 use utils::id::TenantId;

-use crate::config::PageServerConf;
+use crate::{config::PageServerConf, metrics::FEATURE_FLAG_EVALUATION};

 #[derive(Clone)]
 pub struct FeatureResolver {
@@ -55,11 +55,24 @@ impl FeatureResolver {
        tenant_id: TenantId,
    ) -> Result<String, PostHogEvaluationError> {
        if let Some(inner) = &self.inner {
-            inner.feature_store().evaluate_multivariate(
+            let res = inner.feature_store().evaluate_multivariate(
                flag_key,
                &tenant_id.to_string(),
                &HashMap::new(),
-            )
+            );
+            match &res {
+                Ok(value) => {
+                    FEATURE_FLAG_EVALUATION
+                        .with_label_values(&[flag_key, "ok", value])
+                        .inc();
+                }
+                Err(e) => {
+                    FEATURE_FLAG_EVALUATION
+                        .with_label_values(&[flag_key, "error", e.as_variant_str()])
+                        .inc();
+                }
+            }
+            res
        } else {
            Err(PostHogEvaluationError::NotAvailable(
                "PostHog integration is not enabled".to_string(),
@@ -80,11 +93,24 @@ impl FeatureResolver {
        tenant_id: TenantId,
    ) -> Result<(), PostHogEvaluationError> {
        if let Some(inner) = &self.inner {
-            inner.feature_store().evaluate_boolean(
+            let res = inner.feature_store().evaluate_boolean(
                flag_key,
                &tenant_id.to_string(),
                &HashMap::new(),
-            )
+            );
+            match &res {
+                Ok(()) => {
+                    FEATURE_FLAG_EVALUATION
+                        .with_label_values(&[flag_key, "ok", "true"])
+                        .inc();
+                }
+                Err(e) => {
+                    FEATURE_FLAG_EVALUATION
+                        .with_label_values(&[flag_key, "error", e.as_variant_str()])
+                        .inc();
+                }
+            }
+            res
        } else {
            Err(PostHogEvaluationError::NotAvailable(
                "PostHog integration is not enabled".to_string(),
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -15,6 +15,7 @@ use metrics::{
    register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
 };
 use once_cell::sync::Lazy;
+use pageserver_api::config::defaults::DEFAULT_MAX_GET_VECTORED_KEYS;
 use pageserver_api::config::{
    PageServicePipeliningConfig, PageServicePipeliningConfigPipelined,
    PageServiceProtocolPipelinedBatchingStrategy, PageServiceProtocolPipelinedExecutionStrategy,
@@ -32,7 +33,6 @@ use crate::config::PageServerConf;
 use crate::context::{PageContentKind, RequestContext};
 use crate::pgdatadir_mapping::DatadirModificationStats;
 use crate::task_mgr::TaskKind;
-use crate::tenant::Timeline;
 use crate::tenant::layer_map::LayerMap;
 use crate::tenant::mgr::TenantSlot;
 use crate::tenant::storage_layer::{InMemoryLayer, PersistentLayerDesc};
@@ -446,6 +446,15 @@ static PAGE_CACHE_ERRORS: Lazy<IntCounterVec> = Lazy::new(|| {
    .expect("failed to define a metric")
 });

+pub(crate) static FEATURE_FLAG_EVALUATION: Lazy<CounterVec> = Lazy::new(|| {
+    register_counter_vec!(
+        "pageserver_feature_flag_evaluation",
+        "Number of times a feature flag is evaluated",
+        &["flag_key", "status", "value"],
+    )
+    .unwrap()
+});
+
 #[derive(IntoStaticStr)]
 #[strum(serialize_all = "kebab_case")]
 pub(crate) enum PageCacheErrorKind {
@@ -1939,7 +1948,7 @@ static SMGR_QUERY_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
 });

 static PAGE_SERVICE_BATCH_SIZE_BUCKETS_GLOBAL: Lazy<Vec<f64>> = Lazy::new(|| {
-    (1..=u32::try_from(Timeline::MAX_GET_VECTORED_KEYS).unwrap())
+    (1..=u32::try_from(DEFAULT_MAX_GET_VECTORED_KEYS).unwrap())
        .map(|v| v.into())
        .collect()
 });
@@ -1957,7 +1966,7 @@ static PAGE_SERVICE_BATCH_SIZE_BUCKETS_PER_TIMELINE: Lazy<Vec<f64>> = Lazy::new(
    let mut buckets = Vec::new();
    for i in 0.. {
        let bucket = 1 << i;
-        if bucket > u32::try_from(Timeline::MAX_GET_VECTORED_KEYS).unwrap() {
+        if bucket > u32::try_from(DEFAULT_MAX_GET_VECTORED_KEYS).unwrap() {
            break;
        }
        buckets.push(bucket.into());
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -431,10 +431,10 @@ impl Timeline {
                        GetVectoredError::InvalidLsn(e) => {
                            Err(anyhow::anyhow!("invalid LSN: {e:?}").into())
                        }
-                        // NB: this should never happen in practice because we limit MAX_GET_VECTORED_KEYS
+                        // NB: this should never happen in practice because we limit batch size to be smaller than max_get_vectored_keys
                        // TODO: we can prevent this error class by moving this check into the type system
-                        GetVectoredError::Oversized(err) => {
-                            Err(anyhow::anyhow!("batching oversized: {err:?}").into())
+                        GetVectoredError::Oversized(err, max) => {
+                            Err(anyhow::anyhow!("batching oversized: {err} > {max}").into())
                        }
                    };

@@ -471,8 +471,19 @@ impl Timeline {

        let rels = self.list_rels(spcnode, dbnode, version, ctx).await?;

+        if rels.is_empty() {
+            return Ok(0);
+        }
+
+        // Pre-deserialize the rel directory to avoid duplicated work in `get_relsize_cached`.
+        let reldir_key = rel_dir_to_key(spcnode, dbnode);
+        let buf = version.get(self, reldir_key, ctx).await?;
+        let reldir = RelDirectory::des(&buf)?;
+
        for rel in rels {
-            let n_blocks = self.get_rel_size(rel, version, ctx).await?;
+            let n_blocks = self
+                .get_rel_size_in_reldir(rel, version, Some((reldir_key, &reldir)), ctx)
+                .await?;
            total_blocks += n_blocks as usize;
        }
        Ok(total_blocks)
@@ -487,6 +498,19 @@ impl Timeline {
        tag: RelTag,
        version: Version<'_>,
        ctx: &RequestContext,
+    ) -> Result<BlockNumber, PageReconstructError> {
+        self.get_rel_size_in_reldir(tag, version, None, ctx).await
+    }
+
+    /// Get size of a relation file. The relation must exist, otherwise an error is returned.
+    ///
+    /// See [`Self::get_rel_exists_in_reldir`] on why we need `deserialized_reldir_v1`.
+    pub(crate) async fn get_rel_size_in_reldir(
+        &self,
+        tag: RelTag,
+        version: Version<'_>,
+        deserialized_reldir_v1: Option<(Key, &RelDirectory)>,
+        ctx: &RequestContext,
    ) -> Result<BlockNumber, PageReconstructError> {
        if tag.relnode == 0 {
            return Err(PageReconstructError::Other(
@@ -499,7 +523,9 @@ impl Timeline {
        }

        if (tag.forknum == FSM_FORKNUM || tag.forknum == VISIBILITYMAP_FORKNUM)
-            && !self.get_rel_exists(tag, version, ctx).await?
+            && !self
+                .get_rel_exists_in_reldir(tag, version, deserialized_reldir_v1, ctx)
+                .await?
        {
            // FIXME: Postgres sometimes calls smgrcreate() to create
            // FSM, and smgrnblocks() on it immediately afterwards,
@@ -521,11 +547,28 @@ impl Timeline {
    ///
    /// Only shard 0 has a full view of the relations. Other shards only know about relations that
    /// the shard stores pages for.
+    ///
    pub(crate) async fn get_rel_exists(
        &self,
        tag: RelTag,
        version: Version<'_>,
        ctx: &RequestContext,
+    ) -> Result<bool, PageReconstructError> {
+        self.get_rel_exists_in_reldir(tag, version, None, ctx).await
+    }
+
+    /// Does the relation exist? With a cached deserialized `RelDirectory`.
+    ///
+    /// There are some cases where the caller loops across all relations. In that specific case,
+    /// the caller should obtain the deserialized `RelDirectory` first and then call this function
+    /// to avoid duplicated work of deserliazation. This is a hack and should be removed by introducing
+    /// a new API (e.g., `get_rel_exists_batched`).
+    pub(crate) async fn get_rel_exists_in_reldir(
+        &self,
+        tag: RelTag,
+        version: Version<'_>,
+        deserialized_reldir_v1: Option<(Key, &RelDirectory)>,
+        ctx: &RequestContext,
    ) -> Result<bool, PageReconstructError> {
        if tag.relnode == 0 {
            return Err(PageReconstructError::Other(
@@ -568,6 +611,17 @@ impl Timeline {
        // fetch directory listing (old)

        let key = rel_dir_to_key(tag.spcnode, tag.dbnode);
+
+        if let Some((cached_key, dir)) = deserialized_reldir_v1 {
+            if cached_key == key {
+                return Ok(dir.rels.contains(&(tag.relnode, tag.forknum)));
+            } else if cfg!(test) || cfg!(feature = "testing") {
+                panic!("cached reldir key mismatch: {cached_key} != {key}");
+            } else {
+                warn!("cached reldir key mismatch: {cached_key} != {key}");
+            }
+            // Fallback to reading the directory from the datadir.
+        }
        let buf = version.get(self, key, ctx).await?;

        let dir = RelDirectory::des(&buf)?;
@@ -665,7 +719,7 @@ impl Timeline {

        let batches = keyspace.partition(
            self.get_shard_identity(),
-            Timeline::MAX_GET_VECTORED_KEYS * BLCKSZ as u64,
+            self.conf.max_get_vectored_keys.get() as u64 * BLCKSZ as u64,
        );

        let io_concurrency = IoConcurrency::spawn_from_conf(
@@ -905,7 +959,7 @@ impl Timeline {

            let batches = keyspace.partition(
                self.get_shard_identity(),
-                Timeline::MAX_GET_VECTORED_KEYS * BLCKSZ as u64,
+                self.conf.max_get_vectored_keys.get() as u64 * BLCKSZ as u64,
            );

            let io_concurrency = IoConcurrency::spawn_from_conf(
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -99,6 +99,7 @@ use crate::tenant::remote_timeline_client::{
    INITDB_PATH, MaybeDeletedIndexPart, remote_initdb_archive_path,
 };
 use crate::tenant::storage_layer::{DeltaLayer, ImageLayer};
+use crate::tenant::timeline::CheckOtherForCancel;
 use crate::tenant::timeline::delete::DeleteTimelineFlow;
 use crate::tenant::timeline::uninit::cleanup_timeline_directory;
 use crate::virtual_file::VirtualFile;
@@ -3261,11 +3262,11 @@ impl TenantShard {

    /// Trips the compaction circuit breaker if appropriate.
    pub(crate) fn maybe_trip_compaction_breaker(&self, err: &CompactionError) {
+        if err.is_cancel(CheckOtherForCancel::No /* XXX flip this to Yes so that all the Other() errors that are cancel don't trip the circuit breaker? */) {
+            return;
+        }
        match err {
-            err if err.is_cancel() => {}
-            CompactionError::ShuttingDown => (),
-            // Offload failures don't trip the circuit breaker, since they're cheap to retry and
-            // shouldn't block compaction.
+            CompactionError::ShuttingDown => unreachable!("is_cancel"),
            CompactionError::Offload(_) => {}
            CompactionError::CollectKeySpaceError(err) => {
                // CollectKeySpaceError::Cancelled and PageRead::Cancelled are handled in `err.is_cancel` branch.
@@ -3280,7 +3281,7 @@ impl TenantShard {
                    .unwrap()
                    .fail(&CIRCUIT_BREAKERS_BROKEN, err);
            }
-            CompactionError::AlreadyRunning(_) => {}
+            CompactionError::AlreadyRunning(_) => unreachable!("is_cancel, but XXX why?"),
        }
    }

@@ -7197,7 +7198,7 @@ mod tests {
            let end = desc
                .key_range
                .start
-                .add(Timeline::MAX_GET_VECTORED_KEYS.try_into().unwrap());
+                .add(tenant.conf.max_get_vectored_keys.get() as u32);
            reads.push(KeySpace {
                ranges: vec![start..end],
            });
@@ -11260,11 +11261,11 @@ mod tests {
                let mut keyspaces_at_lsn: HashMap<Lsn, KeySpaceRandomAccum> = HashMap::default();
                let mut used_keys: HashSet<Key> = HashSet::default();

-                while used_keys.len() < Timeline::MAX_GET_VECTORED_KEYS as usize {
+                while used_keys.len() < tenant.conf.max_get_vectored_keys.get() {
                    let selected_lsn = interesting_lsns.choose(&mut random).expect("not empty");
                    let mut selected_key = start_key.add(random.gen_range(0..KEY_DIMENSION_SIZE));

-                    while used_keys.len() < Timeline::MAX_GET_VECTORED_KEYS as usize {
+                    while used_keys.len() < tenant.conf.max_get_vectored_keys.get() {
                        if used_keys.contains(&selected_key)
                            || selected_key >= start_key.add(KEY_DIMENSION_SIZE)
                        {
--- a/pageserver/src/tenant/storage_layer/batch_split_writer.rs
+++ b/pageserver/src/tenant/storage_layer/batch_split_writer.rs
@@ -55,11 +55,11 @@ pub struct BatchLayerWriter {
 }

 impl BatchLayerWriter {
-    pub async fn new(conf: &'static PageServerConf) -> anyhow::Result<Self> {
-        Ok(Self {
+    pub fn new(conf: &'static PageServerConf) -> Self {
+        Self {
            generated_layer_writers: Vec::new(),
            conf,
-        })
+        }
    }

    pub fn add_unfinished_image_writer(
@@ -209,6 +209,7 @@ impl<'a> SplitImageLayerWriter<'a> {
    ) -> anyhow::Result<Self> {
        Ok(Self {
            target_layer_size,
+            // XXX make this lazy like in SplitDeltaLayerWriter?
            inner: ImageLayerWriter::new(
                conf,
                timeline_id,
@@ -223,7 +224,7 @@ impl<'a> SplitImageLayerWriter<'a> {
            conf,
            timeline_id,
            tenant_shard_id,
-            batches: BatchLayerWriter::new(conf).await?,
+            batches: BatchLayerWriter::new(conf),
            lsn,
            start_key,
            gate,
@@ -319,7 +320,7 @@ pub struct SplitDeltaLayerWriter<'a> {
 }

 impl<'a> SplitDeltaLayerWriter<'a> {
-    pub async fn new(
+    pub fn new(
        conf: &'static PageServerConf,
        timeline_id: TimelineId,
        tenant_shard_id: TenantShardId,
@@ -327,8 +328,8 @@ impl<'a> SplitDeltaLayerWriter<'a> {
        target_layer_size: u64,
        gate: &'a utils::sync::gate::Gate,
        cancel: CancellationToken,
-    ) -> anyhow::Result<Self> {
-        Ok(Self {
+    ) -> Self {
+        Self {
            target_layer_size,
            inner: None,
            conf,
@@ -336,10 +337,10 @@ impl<'a> SplitDeltaLayerWriter<'a> {
            tenant_shard_id,
            lsn_range,
            last_key_written: Key::MIN,
-            batches: BatchLayerWriter::new(conf).await?,
+            batches: BatchLayerWriter::new(conf),
            gate,
            cancel,
-        })
+        }
    }

    pub async fn put_value(
@@ -510,9 +511,7 @@ mod tests {
            4 * 1024 * 1024,
            &tline.gate,
            tline.cancel.clone(),
-        )
-        .await
-        .unwrap();
+        );

        image_writer
            .put_image(get_key(0), get_img(0), &ctx)
@@ -590,9 +589,7 @@ mod tests {
            4 * 1024 * 1024,
            &tline.gate,
            tline.cancel.clone(),
-        )
-        .await
-        .unwrap();
+        );
        const N: usize = 2000;
        for i in 0..N {
            let i = i as u32;
@@ -692,9 +689,7 @@ mod tests {
            4 * 1024,
            &tline.gate,
            tline.cancel.clone(),
-        )
-        .await
-        .unwrap();
+        );

        image_writer
            .put_image(get_key(0), get_img(0), &ctx)
@@ -770,9 +765,7 @@ mod tests {
            4 * 1024 * 1024,
            &tline.gate,
            tline.cancel.clone(),
-        )
-        .await
-        .unwrap();
+        );

        for i in 0..N {
            let i = i as u32;
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -22,8 +22,8 @@ use crate::context::{DownloadBehavior, RequestContext};
 use crate::metrics::{self, BackgroundLoopSemaphoreMetricsRecorder, TENANT_TASK_EVENTS};
 use crate::task_mgr::{self, BACKGROUND_RUNTIME, TOKIO_WORKER_THREADS, TaskKind};
 use crate::tenant::throttle::Stats;
-use crate::tenant::timeline::CompactionError;
 use crate::tenant::timeline::compaction::CompactionOutcome;
+use crate::tenant::timeline::{CheckOtherForCancel, CompactionError};
 use crate::tenant::{TenantShard, TenantState};

 /// Semaphore limiting concurrent background tasks (across all tenants).
@@ -292,35 +292,12 @@ pub(crate) fn log_compaction_error(
    task_cancelled: bool,
    degrade_to_warning: bool,
 ) {
-    use CompactionError::*;
+    let is_cancel = err.is_cancel(CheckOtherForCancel::Yes);

-    use crate::tenant::PageReconstructError;
-    use crate::tenant::upload_queue::NotInitialized;
-
-    let level = match err {
-        e if e.is_cancel() => return,
-        ShuttingDown => return,
-        Offload(_) => Level::ERROR,
-        AlreadyRunning(_) => Level::ERROR,
-        CollectKeySpaceError(_) => Level::ERROR,
-        _ if task_cancelled => Level::INFO,
-        Other(err) => {
-            let root_cause = err.root_cause();
-
-            let upload_queue = root_cause
-                .downcast_ref::<NotInitialized>()
-                .is_some_and(|e| e.is_stopping());
-            let timeline = root_cause
-                .downcast_ref::<PageReconstructError>()
-                .is_some_and(|e| e.is_stopping());
-            let is_stopping = upload_queue || timeline;
-
-            if is_stopping {
-                Level::INFO
-            } else {
-                Level::ERROR
-            }
-        }
+    let level = if is_cancel || task_cancelled {
+        Level::INFO
+    } else {
+        Level::ERROR
    };

    if let Some((error_count, sleep_duration)) = retry_info {
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -75,7 +75,7 @@ use utils::postgres_client::PostgresClientProtocol;
 use utils::rate_limit::RateLimit;
 use utils::seqwait::SeqWait;
 use utils::simple_rcu::{Rcu, RcuReadGuard};
-use utils::sync::gate::{Gate, GateGuard};
+use utils::sync::gate::{Gate, GateError, GateGuard};
 use utils::{completion, critical, fs_ext, pausable_failpoint};
 use wal_decoder::serialized_batch::{SerializedValueBatch, ValueMeta};

@@ -116,6 +116,7 @@ use crate::pgdatadir_mapping::{
    MAX_AUX_FILE_V2_DELTAS, MetricsUpdate,
 };
 use crate::task_mgr::TaskKind;
+use crate::tenant::blob_io::WriteBlobError;
 use crate::tenant::config::AttachmentMode;
 use crate::tenant::gc_result::GcResult;
 use crate::tenant::layer_map::LayerMap;
@@ -130,6 +131,7 @@ use crate::tenant::storage_layer::{
 };
 use crate::tenant::tasks::BackgroundLoopKind;
 use crate::tenant::timeline::logical_size::CurrentLogicalSize;
+use crate::virtual_file::owned_buffers_io::write::FlushTaskError;
 use crate::virtual_file::{MaybeFatalIo, VirtualFile};
 use crate::walingest::WalLagCooldown;
 use crate::walredo::RedoAttemptType;
@@ -760,7 +762,7 @@ pub(crate) enum CreateImageLayersError {
    PageReconstructError(#[source] PageReconstructError),

    #[error(transparent)]
-    Other(#[from] anyhow::Error),
+    Other(anyhow::Error),
 }

 impl From<layer_manager::Shutdown> for CreateImageLayersError {
@@ -817,8 +819,8 @@ pub(crate) enum GetVectoredError {
    #[error("timeline shutting down")]
    Cancelled,

-    #[error("requested too many keys: {0} > {}", Timeline::MAX_GET_VECTORED_KEYS)]
-    Oversized(u64),
+    #[error("requested too many keys: {0} > {1}")]
+    Oversized(u64, u64),

    #[error("requested at invalid LSN: {0}")]
    InvalidLsn(Lsn),
@@ -950,6 +952,18 @@ pub(crate) enum WaitLsnError {
    Timeout(String),
 }

+impl From<WaitLsnError> for tonic::Status {
+    fn from(err: WaitLsnError) -> Self {
+        use tonic::Code;
+        let code = match &err {
+            WaitLsnError::Timeout(_) => Code::Internal,
+            WaitLsnError::BadState(_) => Code::Internal,
+            WaitLsnError::Shutdown => Code::Unavailable,
+        };
+        tonic::Status::new(code, err.to_string())
+    }
+}
+
 // The impls below achieve cancellation mapping for errors.
 // Perhaps there's a way of achieving this with less cruft.

@@ -1007,7 +1021,7 @@ impl From<GetVectoredError> for PageReconstructError {
        match e {
            GetVectoredError::Cancelled => PageReconstructError::Cancelled,
            GetVectoredError::InvalidLsn(_) => PageReconstructError::Other(anyhow!("Invalid LSN")),
-            err @ GetVectoredError::Oversized(_) => PageReconstructError::Other(err.into()),
+            err @ GetVectoredError::Oversized(_, _) => PageReconstructError::Other(err.into()),
            GetVectoredError::MissingKey(err) => PageReconstructError::MissingKey(err),
            GetVectoredError::GetReadyAncestorError(err) => PageReconstructError::from(err),
            GetVectoredError::Other(err) => PageReconstructError::Other(err),
@@ -1187,7 +1201,6 @@ impl Timeline {
        }
    }

-    pub(crate) const MAX_GET_VECTORED_KEYS: u64 = 32;
    pub(crate) const LAYERS_VISITED_WARN_THRESHOLD: u32 = 100;

    /// Look up multiple page versions at a given LSN
@@ -1202,9 +1215,12 @@ impl Timeline {
    ) -> Result<BTreeMap<Key, Result<Bytes, PageReconstructError>>, GetVectoredError> {
        let total_keyspace = query.total_keyspace();

-        let key_count = total_keyspace.total_raw_size().try_into().unwrap();
-        if key_count > Timeline::MAX_GET_VECTORED_KEYS {
-            return Err(GetVectoredError::Oversized(key_count));
+        let key_count = total_keyspace.total_raw_size();
+        if key_count > self.conf.max_get_vectored_keys.get() {
+            return Err(GetVectoredError::Oversized(
+                key_count as u64,
+                self.conf.max_get_vectored_keys.get() as u64,
+            ));
        }

        for range in &total_keyspace.ranges {
@@ -2047,9 +2063,10 @@ impl Timeline {
        };

        // Signal compaction failure to avoid L0 flush stalls when it's broken.
+        // XXX this looks an awful lot like the circuit breaker code? Can we dedupe classification?
        match &result {
            Ok(_) => self.compaction_failed.store(false, AtomicOrdering::Relaxed),
-            Err(e) if e.is_cancel() => {}
+            Err(e) if e.is_cancel(CheckOtherForCancel::No /* XXX flip this to Yes so that all the Other() errors that are cancel don't trip the circuit breaker? */) => {}
            Err(CompactionError::ShuttingDown) => {
                // Covered by the `Err(e) if e.is_cancel()` branch.
            }
@@ -5258,7 +5275,7 @@ impl Timeline {
                key = key.next();

                // Maybe flush `key_rest_accum`
-                if key_request_accum.raw_size() >= Timeline::MAX_GET_VECTORED_KEYS
+                if key_request_accum.raw_size() >= self.conf.max_get_vectored_keys.get() as u64
                    || (last_key_in_range && key_request_accum.raw_size() > 0)
                {
                    let query =
@@ -5516,7 +5533,7 @@ impl Timeline {
                self.should_check_if_image_layers_required(lsn)
            };

-        let mut batch_image_writer = BatchLayerWriter::new(self.conf).await?;
+        let mut batch_image_writer = BatchLayerWriter::new(self.conf);

        let mut all_generated = true;

@@ -5620,7 +5637,8 @@ impl Timeline {
                self.cancel.clone(),
                ctx,
            )
-            .await?;
+            .await
+            .map_err(CreateImageLayersError::Other)?;

            fail_point!("image-layer-writer-fail-before-finish", |_| {
                Err(CreateImageLayersError::Other(anyhow::anyhow!(
@@ -5715,7 +5733,10 @@ impl Timeline {
            }
        }

-        let image_layers = batch_image_writer.finish(self, ctx).await?;
+        let image_layers = batch_image_writer
+            .finish(self, ctx)
+            .await
+            .map_err(CreateImageLayersError::Other)?;

        let mut guard = self.layers.write().await;

@@ -5917,19 +5938,61 @@ pub(crate) enum CompactionError {
    AlreadyRunning(&'static str),
 }

+/// Whether [`CompactionError::is_cancel`] should inspect the
+/// [`CompactionError::Other`] anyhow Error's root cause for
+/// typical causes of cancellation.
+pub(crate) enum CheckOtherForCancel {
+    No,
+    Yes,
+}
+
 impl CompactionError {
    /// Errors that can be ignored, i.e., cancel and shutdown.
-    pub fn is_cancel(&self) -> bool {
-        matches!(
+    pub fn is_cancel(&self, check_other: CheckOtherForCancel) -> bool {
+        if matches!(
            self,
            Self::ShuttingDown
-                | Self::AlreadyRunning(_)
+                | Self::AlreadyRunning(_) // XXX why do we treat AlreadyRunning as cancel?
                | Self::CollectKeySpaceError(CollectKeySpaceError::Cancelled)
                | Self::CollectKeySpaceError(CollectKeySpaceError::PageRead(
                    PageReconstructError::Cancelled
                ))
                | Self::Offload(OffloadError::Cancelled)
-        )
+        ) {
+            return true;
+        }
+
+        let root_cause = match &check_other {
+            CheckOtherForCancel::No => return false,
+            CheckOtherForCancel::Yes => {
+                if let Self::Other(other) = self {
+                    other.root_cause()
+                } else {
+                    return false;
+                }
+            }
+        };
+
+        let upload_queue = root_cause
+            .downcast_ref::<NotInitialized>()
+            .is_some_and(|e| e.is_stopping());
+        let timeline = root_cause
+            .downcast_ref::<PageReconstructError>()
+            .is_some_and(|e| e.is_stopping());
+        let buffered_writer_flush_task_canelled = root_cause
+            .downcast_ref::<FlushTaskError>()
+            .is_some_and(|e| e.is_cancel());
+        let write_blob_cancelled = root_cause
+            .downcast_ref::<WriteBlobError>()
+            .is_some_and(|e| e.is_cancel());
+        let gate_closed = root_cause
+            .downcast_ref::<GateError>()
+            .is_some_and(|e| e.is_cancel());
+        upload_queue
+            || timeline
+            || buffered_writer_flush_task_canelled
+            || write_blob_cancelled
+            || gate_closed
    }

    /// Critical errors that indicate data corruption.
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -11,9 +11,9 @@ use std::time::{Duration, Instant};

 use super::layer_manager::LayerManager;
 use super::{
-    CompactFlags, CompactOptions, CompactionError, CreateImageLayersError, DurationRecorder,
-    GetVectoredError, ImageLayerCreationMode, LastImageLayerCreationStatus, RecordedDuration,
-    Timeline,
+    CheckOtherForCancel, CompactFlags, CompactOptions, CompactionError, CreateImageLayersError,
+    DurationRecorder, GetVectoredError, ImageLayerCreationMode, LastImageLayerCreationStatus,
+    RecordedDuration, Timeline,
 };

 use crate::tenant::timeline::DeltaEntry;
@@ -1396,7 +1396,7 @@ impl Timeline {

            // Suppress errors when cancelled.
            Err(_) if self.cancel.is_cancelled() => {}
-            Err(err) if err.is_cancel() => {}
+            Err(err) if err.is_cancel(CheckOtherForCancel::No) => {}

            // Alert on critical errors that indicate data corruption.
            Err(err) if err.is_critical() => {
@@ -3516,10 +3516,7 @@ impl Timeline {
            self.get_compaction_target_size(),
            &self.gate,
            self.cancel.clone(),
-        )
-        .await
-        .context("failed to create delta layer writer")
-        .map_err(CompactionError::Other)?;
+        );

        #[derive(Default)]
        struct RewritingLayers {
@@ -4297,7 +4294,8 @@ impl TimelineAdaptor {
            self.timeline.cancel.clone(),
            ctx,
        )
-        .await?;
+        .await
+        .map_err(CreateImageLayersError::Other)?;

        fail_point!("image-layer-writer-fail-before-finish", |_| {
            Err(CreateImageLayersError::Other(anyhow::anyhow!(
@@ -4306,7 +4304,10 @@ impl TimelineAdaptor {
        });

        let keyspace = KeySpace {
-            ranges: self.get_keyspace(key_range, lsn, ctx).await?,
+            ranges: self
+                .get_keyspace(key_range, lsn, ctx)
+                .await
+                .map_err(CreateImageLayersError::Other)?,
        };
        // TODO set proper (stateful) start. The create_image_layer_for_rel_blocks function mostly
        let outcome = self
@@ -4325,9 +4326,13 @@ impl TimelineAdaptor {
            unfinished_image_layer,
        } = outcome
        {
-            let (desc, path) = unfinished_image_layer.finish(ctx).await?;
+            let (desc, path) = unfinished_image_layer
+                .finish(ctx)
+                .await
+                .map_err(CreateImageLayersError::Other)?;
            let image_layer =
-                Layer::finish_creating(self.timeline.conf, &self.timeline, desc, &path)?;
+                Layer::finish_creating(self.timeline.conf, &self.timeline, desc, &path)
+                    .map_err(CreateImageLayersError::Other)?;
            self.new_images.push(image_layer);
        }

--- a/pageserver/src/tenant/timeline/import_pgdata.rs
+++ b/pageserver/src/tenant/timeline/import_pgdata.rs
@@ -201,8 +201,8 @@ async fn prepare_import(
        .await;
        match res {
            Ok(_) => break,
-            Err(err) => {
-                info!(?err, "indefinitely waiting for pgdata to finish");
+            Err(_err) => {
+                info!("indefinitely waiting for pgdata to finish");
                if tokio::time::timeout(std::time::Duration::from_secs(10), cancel.cancelled())
                    .await
                    .is_ok()
--- a/pageserver/src/tenant/timeline/import_pgdata/flow.rs
+++ b/pageserver/src/tenant/timeline/import_pgdata/flow.rs
@@ -100,6 +100,7 @@ async fn run_v1(
                        .unwrap(),
                    import_job_concurrency: base.import_job_concurrency,
                    import_job_checkpoint_threshold: base.import_job_checkpoint_threshold,
+                    import_job_max_byte_range_size: base.import_job_max_byte_range_size,
                }
            }
            None => timeline.conf.timeline_import_config.clone(),
@@ -441,6 +442,7 @@ impl Plan {

        let mut last_completed_job_idx = start_after_job_idx.unwrap_or(0);
        let checkpoint_every: usize = import_config.import_job_checkpoint_threshold.into();
+        let max_byte_range_size: usize = import_config.import_job_max_byte_range_size.into();

        // Run import jobs concurrently up to the limit specified by the pageserver configuration.
        // Note that we process completed futures in the oreder of insertion. This will be the
@@ -456,7 +458,7 @@ impl Plan {

                    work.push_back(tokio::task::spawn(async move {
                        let _permit = permit;
-                        let res = job.run(job_timeline, &ctx).await;
+                        let res = job.run(job_timeline, max_byte_range_size, &ctx).await;
                        (job_idx, res)
                    }));
                },
@@ -471,6 +473,8 @@ impl Plan {
                            last_completed_job_idx = job_idx;

                            if last_completed_job_idx % checkpoint_every == 0 {
+                                tracing::info!(last_completed_job_idx, jobs=%jobs_in_plan, "Checkpointing import status");
+
                                let progress = ShardImportProgressV1 {
                                    jobs: jobs_in_plan,
                                    completed: last_completed_job_idx,
@@ -492,8 +496,6 @@ impl Plan {
                                    anyhow::anyhow!("Shut down while putting timeline import status")
                                })?;
                            }
-
-                            tracing::info!(last_completed_job_idx, jobs=%jobs_in_plan, "Checkpointing import status");
                        },
                        Some(Err(_)) => {
                            anyhow::bail!(
@@ -679,6 +681,7 @@ trait ImportTask {
    async fn doit(
        self,
        layer_writer: &mut ImageLayerWriter,
+        max_byte_range_size: usize,
        ctx: &RequestContext,
    ) -> anyhow::Result<usize>;
 }
@@ -715,6 +718,7 @@ impl ImportTask for ImportSingleKeyTask {
    async fn doit(
        self,
        layer_writer: &mut ImageLayerWriter,
+        _max_byte_range_size: usize,
        ctx: &RequestContext,
    ) -> anyhow::Result<usize> {
        layer_writer.put_image(self.key, self.buf, ctx).await?;
@@ -768,10 +772,9 @@ impl ImportTask for ImportRelBlocksTask {
    async fn doit(
        self,
        layer_writer: &mut ImageLayerWriter,
+        max_byte_range_size: usize,
        ctx: &RequestContext,
    ) -> anyhow::Result<usize> {
-        const MAX_BYTE_RANGE_SIZE: usize = 4 * 1024 * 1024;
-
        debug!("Importing relation file");

        let (rel_tag, start_blk) = self.key_range.start.to_rel_block()?;
@@ -796,7 +799,7 @@ impl ImportTask for ImportRelBlocksTask {
                assert_eq!(key.len(), 1);
                assert!(!acc.is_empty());
                assert!(acc_end > acc_start);
-                if acc_end == start && end - acc_start <= MAX_BYTE_RANGE_SIZE {
+                if acc_end == start && end - acc_start <= max_byte_range_size {
                    acc.push(key.pop().unwrap());
                    Ok((acc, acc_start, end))
                } else {
@@ -860,6 +863,7 @@ impl ImportTask for ImportSlruBlocksTask {
    async fn doit(
        self,
        layer_writer: &mut ImageLayerWriter,
+        _max_byte_range_size: usize,
        ctx: &RequestContext,
    ) -> anyhow::Result<usize> {
        debug!("Importing SLRU segment file {}", self.path);
@@ -906,12 +910,13 @@ impl ImportTask for AnyImportTask {
    async fn doit(
        self,
        layer_writer: &mut ImageLayerWriter,
+        max_byte_range_size: usize,
        ctx: &RequestContext,
    ) -> anyhow::Result<usize> {
        match self {
-            Self::SingleKey(t) => t.doit(layer_writer, ctx).await,
-            Self::RelBlocks(t) => t.doit(layer_writer, ctx).await,
-            Self::SlruBlocks(t) => t.doit(layer_writer, ctx).await,
+            Self::SingleKey(t) => t.doit(layer_writer, max_byte_range_size, ctx).await,
+            Self::RelBlocks(t) => t.doit(layer_writer, max_byte_range_size, ctx).await,
+            Self::SlruBlocks(t) => t.doit(layer_writer, max_byte_range_size, ctx).await,
        }
    }
 }
@@ -952,7 +957,12 @@ impl ChunkProcessingJob {
        }
    }

-    async fn run(self, timeline: Arc<Timeline>, ctx: &RequestContext) -> anyhow::Result<()> {
+    async fn run(
+        self,
+        timeline: Arc<Timeline>,
+        max_byte_range_size: usize,
+        ctx: &RequestContext,
+    ) -> anyhow::Result<()> {
        let mut writer = ImageLayerWriter::new(
            timeline.conf,
            timeline.timeline_id,
@@ -967,7 +977,7 @@ impl ChunkProcessingJob {

        let mut nimages = 0;
        for task in self.tasks {
-            nimages += task.doit(&mut writer, ctx).await?;
+            nimages += task.doit(&mut writer, max_byte_range_size, ctx).await?;
        }

        let resident_layer = if nimages > 0 {
--- a/proxy/src/auth/backend/classic.rs
+++ b/proxy/src/auth/backend/classic.rs
@@ -25,19 +25,15 @@ pub(super) async fn authenticate(
        }
        AuthSecret::Scram(secret) => {
            debug!("auth endpoint chooses SCRAM");
-            let scram = auth::Scram(&secret, ctx);

-            let auth_outcome = tokio::time::timeout(config.scram_protocol_timeout, async {
-                AuthFlow::new(client, scram)
-                    .authenticate()
-                    .await
-                    .inspect_err(|error| {
-                        warn!(?error, "error processing scram messages");
-                    })
-            })
+            let auth_outcome = tokio::time::timeout(
+                config.scram_protocol_timeout,
+                AuthFlow::new(client, auth::Scram(&secret, ctx)).authenticate(),
+            )
            .await
            .inspect_err(|_| warn!("error processing scram messages error = authentication timed out, execution time exceeded {} seconds", config.scram_protocol_timeout.as_secs()))
-            .map_err(auth::AuthError::user_timeout)??;
+            .map_err(auth::AuthError::user_timeout)?
+            .inspect_err(|error| warn!(?error, "error processing scram messages"))?;

            let client_key = match auth_outcome {
                sasl::Outcome::Success(key) => key,
--- a/proxy/src/console_redirect_proxy.rs
+++ b/proxy/src/console_redirect_proxy.rs
@@ -159,7 +159,7 @@ pub async fn task_main(
 }

 #[allow(clippy::too_many_arguments)]
-pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
+pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin + Send>(
    config: &'static ProxyConfig,
    backend: &'static ConsoleRedirectBackend,
    ctx: &RequestContext,
--- a/proxy/src/control_plane/client/cplane_proxy_v1.rs
+++ b/proxy/src/control_plane/client/cplane_proxy_v1.rs
@@ -7,7 +7,9 @@ use std::time::Duration;

 use ::http::HeaderName;
 use ::http::header::AUTHORIZATION;
+use bytes::Bytes;
 use futures::TryFutureExt;
+use hyper::StatusCode;
 use postgres_client::config::SslMode;
 use tokio::time::Instant;
 use tracing::{Instrument, debug, info, info_span, warn};
@@ -72,28 +74,34 @@ impl NeonControlPlaneClient {
        role: &RoleName,
    ) -> Result<AuthInfo, GetAuthInfoError> {
        async {
-            let request = self
-                .endpoint
-                .get_path("get_endpoint_access_control")
-                .header(X_REQUEST_ID, ctx.session_id().to_string())
-                .header(AUTHORIZATION, format!("Bearer {}", &self.jwt))
-                .query(&[("session_id", ctx.session_id())])
-                .query(&[
-                    ("application_name", ctx.console_application_name().as_str()),
-                    ("endpointish", endpoint.as_str()),
-                    ("role", role.as_str()),
-                ])
-                .build()?;
-
-            debug!(url = request.url().as_str(), "sending http request");
-            let start = Instant::now();
            let response = {
-                let _pause = ctx.latency_timer_pause_at(start, crate::metrics::Waiting::Cplane);
-                self.endpoint.execute(request).await?
-            };
-            info!(duration = ?start.elapsed(), "received http response");
+                let request = self
+                    .endpoint
+                    .get_path("get_endpoint_access_control")
+                    .header(X_REQUEST_ID, ctx.session_id().to_string())
+                    .header(AUTHORIZATION, format!("Bearer {}", &self.jwt))
+                    .query(&[("session_id", ctx.session_id())])
+                    .query(&[
+                        ("application_name", ctx.console_application_name().as_str()),
+                        ("endpointish", endpoint.as_str()),
+                        ("role", role.as_str()),
+                    ])
+                    .build()?;

-            let body = match parse_body::<GetEndpointAccessControl>(response).await {
+                debug!(url = request.url().as_str(), "sending http request");
+                let start = Instant::now();
+                let _pause = ctx.latency_timer_pause_at(start, crate::metrics::Waiting::Cplane);
+                let response = self.endpoint.execute(request).await?;
+
+                info!(duration = ?start.elapsed(), "received http response");
+
+                response
+            };
+
+            let body = match parse_body::<GetEndpointAccessControl>(
+                response.status(),
+                response.bytes().await?,
+            ) {
                Ok(body) => body,
                // Error 404 is special: it's ok not to have a secret.
                // TODO(anna): retry
@@ -184,7 +192,10 @@ impl NeonControlPlaneClient {
            drop(pause);
            info!(duration = ?start.elapsed(), "received http response");

-            let body = parse_body::<EndpointJwksResponse>(response).await?;
+            let body = parse_body::<EndpointJwksResponse>(
+                response.status(),
+                response.bytes().await.map_err(ControlPlaneError::from)?,
+            )?;

            let rules = body
                .jwks
@@ -236,7 +247,7 @@ impl NeonControlPlaneClient {
            let response = self.endpoint.execute(request).await?;
            drop(pause);
            info!(duration = ?start.elapsed(), "received http response");
-            let body = parse_body::<WakeCompute>(response).await?;
+            let body = parse_body::<WakeCompute>(response.status(), response.bytes().await?)?;

            // Unfortunately, ownership won't let us use `Option::ok_or` here.
            let (host, port) = match parse_host_port(&body.address) {
@@ -487,33 +498,33 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
 }

 /// Parse http response body, taking status code into account.
-async fn parse_body<T: for<'a> serde::Deserialize<'a>>(
-    response: http::Response,
+fn parse_body<T: for<'a> serde::Deserialize<'a>>(
+    status: StatusCode,
+    body: Bytes,
 ) -> Result<T, ControlPlaneError> {
-    let status = response.status();
    if status.is_success() {
        // We shouldn't log raw body because it may contain secrets.
        info!("request succeeded, processing the body");
-        return Ok(response.json().await?);
+        return Ok(serde_json::from_slice(&body).map_err(std::io::Error::other)?);
    }
-    let s = response.bytes().await?;
+
    // Log plaintext to be able to detect, whether there are some cases not covered by the error struct.
-    info!("response_error plaintext: {:?}", s);
+    info!("response_error plaintext: {:?}", body);

    // Don't throw an error here because it's not as important
    // as the fact that the request itself has failed.
-    let mut body = serde_json::from_slice(&s).unwrap_or_else(|e| {
+    let mut body = serde_json::from_slice(&body).unwrap_or_else(|e| {
        warn!("failed to parse error body: {e}");
-        ControlPlaneErrorMessage {
+        Box::new(ControlPlaneErrorMessage {
            error: "reason unclear (malformed error message)".into(),
            http_status_code: status,
            status: None,
-        }
+        })
    });
    body.http_status_code = status;

    warn!("console responded with an error ({status}): {body:?}");
-    Err(ControlPlaneError::Message(Box::new(body)))
+    Err(ControlPlaneError::Message(body))
 }

 fn parse_host_port(input: &str) -> Option<(&str, u16)> {
--- a/proxy/src/http/mod.rs
+++ b/proxy/src/http/mod.rs
@@ -4,9 +4,10 @@

 pub mod health_server;

-use std::time::Duration;
+use std::time::{Duration, Instant};

 use bytes::Bytes;
+use futures::FutureExt;
 use http::Method;
 use http_body_util::BodyExt;
 use hyper::body::Body;
@@ -109,15 +110,31 @@ impl Endpoint {
    }

    /// Execute a [request](reqwest::Request).
-    pub(crate) async fn execute(&self, request: Request) -> Result<Response, Error> {
-        let _timer = Metrics::get()
+    pub(crate) fn execute(
+        &self,
+        request: Request,
+    ) -> impl Future<Output = Result<Response, Error>> {
+        let metric = Metrics::get()
            .proxy
            .console_request_latency
-            .start_timer(ConsoleRequest {
+            .with_labels(ConsoleRequest {
                request: request.url().path(),
            });

-        self.client.execute(request).await
+        let req = self.client.execute(request).boxed();
+
+        async move {
+            let start = Instant::now();
+            scopeguard::defer!({
+                Metrics::get()
+                    .proxy
+                    .console_request_latency
+                    .get_metric(metric)
+                    .observe_duration_since(start);
+            });
+
+            req.await
+        }
    }
 }

--- a/proxy/src/pqproto.rs
+++ b/proxy/src/pqproto.rs
@@ -186,7 +186,7 @@ where
 pub async fn read_message<'a, S>(
    stream: &mut S,
    buf: &'a mut Vec<u8>,
-    max: usize,
+    max: u32,
 ) -> io::Result<(u8, &'a mut [u8])>
 where
    S: AsyncRead + Unpin,
@@ -206,7 +206,7 @@ where
    let header = read!(stream => Header);

    // as described above, the length must be at least 4.
-    let Some(len) = (header.len.get() as usize).checked_sub(4) else {
+    let Some(len) = header.len.get().checked_sub(4) else {
        return Err(io::Error::other(format!(
            "invalid startup message length {}, must be at least 4.",
            header.len,
@@ -222,7 +222,7 @@ where
    }

    // read in our entire message.
-    buf.resize(len, 0);
+    buf.resize(len as usize, 0);
    stream.read_exact(buf).await?;

    Ok((header.tag, buf))
--- a/proxy/src/proxy/handshake.rs
+++ b/proxy/src/proxy/handshake.rs
@@ -1,3 +1,4 @@
+use futures::{FutureExt, TryFutureExt};
 use thiserror::Error;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::{debug, info, warn};
@@ -57,7 +58,7 @@ pub(crate) enum HandshakeData<S> {
 /// It's easier to work with owned `stream` here as we need to upgrade it to TLS;
 /// we also take an extra care of propagating only the select handshake errors to client.
 #[tracing::instrument(skip_all)]
-pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
+pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin + Send>(
    ctx: &RequestContext,
    stream: S,
    mut tls: Option<&TlsConfig>,
@@ -108,7 +109,9 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
                                        }
                                    }
                                }
-                            });
+                            })
+                            .map_ok(Box::new)
+                            .boxed();

                        res?;

@@ -146,7 +149,7 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
                            tls.cert_resolver.resolve(conn_info.server_name());

                        let tls = Stream::Tls {
-                            tls: Box::new(tls_stream),
+                            tls: tls_stream,
                            tls_server_end_point,
                        };
                        (stream, msg) = PqStream::parse_startup(tls).await?;
--- a/proxy/src/proxy/mod.rs
+++ b/proxy/src/proxy/mod.rs
@@ -270,7 +270,7 @@ impl ReportableError for ClientRequestError {
 }

 #[allow(clippy::too_many_arguments)]
-pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
+pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin + Send>(
    config: &'static ProxyConfig,
    auth_backend: &'static auth::Backend<'static, ()>,
    ctx: &RequestContext,
--- a/proxy/src/proxy/passthrough.rs
+++ b/proxy/src/proxy/passthrough.rs
@@ -1,3 +1,4 @@
+use futures::FutureExt;
 use smol_str::SmolStr;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::debug;
@@ -89,6 +90,7 @@ impl<S: AsyncRead + AsyncWrite + Unpin> ProxyPassthrough<S> {
            .compute
            .cancel_closure
            .try_cancel_query(compute_config)
+            .boxed()
            .await
        {
            tracing::warn!(session_id = ?self.session_id, ?err, "could not cancel the query in the database");
--- a/proxy/src/sasl/stream.rs
+++ b/proxy/src/sasl/stream.rs
@@ -30,52 +30,53 @@ where
    F: FnOnce(&str) -> super::Result<M>,
    M: Mechanism,
 {
-    let sasl = {
+    let (mut mechanism, mut input) = {
        // pause the timer while we communicate with the client
        let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Client);

        // Initial client message contains the chosen auth method's name.
        let msg = stream.read_password_message().await?;
-        super::FirstMessage::parse(msg).ok_or(super::Error::BadClientMessage("bad sasl message"))?
+
+        let sasl = super::FirstMessage::parse(msg)
+            .ok_or(super::Error::BadClientMessage("bad sasl message"))?;
+
+        (mechanism(sasl.method)?, sasl.message)
    };

-    let mut mechanism = mechanism(sasl.method)?;
-    let mut input = sasl.message;
    loop {
-        let step = mechanism
-            .exchange(input)
-            .inspect_err(|error| tracing::info!(?error, "error during SASL exchange"))?;
-
-        match step {
-            Step::Continue(moved_mechanism, reply) => {
+        match mechanism.exchange(input) {
+            Ok(Step::Continue(moved_mechanism, reply)) => {
                mechanism = moved_mechanism;

-                // pause the timer while we communicate with the client
-                let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
-
                // write reply
                let sasl_msg = BeAuthenticationSaslMessage::Continue(reply.as_bytes());
                stream.write_message(BeMessage::AuthenticationSasl(sasl_msg));
-
-                // get next input
-                stream.flush().await?;
-                let msg = stream.read_password_message().await?;
-                input = std::str::from_utf8(msg)
-                    .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "bad encoding"))?;
+                drop(reply);
            }
-            Step::Success(result, reply) => {
-                // pause the timer while we communicate with the client
-                let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
-
+            Ok(Step::Success(result, reply)) => {
                // write reply
                let sasl_msg = BeAuthenticationSaslMessage::Final(reply.as_bytes());
                stream.write_message(BeMessage::AuthenticationSasl(sasl_msg));
                stream.write_message(BeMessage::AuthenticationOk);
+
                // exit with success
                break Ok(Outcome::Success(result));
            }
            // exit with failure
-            Step::Failure(reason) => break Ok(Outcome::Failure(reason)),
+            Ok(Step::Failure(reason)) => break Ok(Outcome::Failure(reason)),
+            Err(error) => {
+                tracing::info!(?error, "error during SASL exchange");
+                return Err(error);
+            }
        }
+
+        // pause the timer while we communicate with the client
+        let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
+
+        // get next input
+        stream.flush().await?;
+        let msg = stream.read_password_message().await?;
+        input = std::str::from_utf8(msg)
+            .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "bad encoding"))?;
    }
 }
--- a/proxy/src/stream.rs
+++ b/proxy/src/stream.rs
@@ -72,7 +72,7 @@ impl<S: AsyncRead + AsyncWrite + Unpin> PqStream<S> {
 impl<S: AsyncRead + Unpin> PqStream<S> {
    /// Read a raw postgres packet, which will respect the max length requested.
    /// This is not cancel safe.
-    async fn read_raw_expect(&mut self, tag: u8, max: usize) -> io::Result<&mut [u8]> {
+    async fn read_raw_expect(&mut self, tag: u8, max: u32) -> io::Result<&mut [u8]> {
        let (actual_tag, msg) = read_message(&mut self.stream, &mut self.read, max).await?;
        if actual_tag != tag {
            return Err(io::Error::other(format!(
@@ -89,7 +89,7 @@ impl<S: AsyncRead + Unpin> PqStream<S> {
        // passwords are usually pretty short
        // and SASL SCRAM messages are no longer than 256 bytes in my testing
        // (a few hashes and random bytes, encoded into base64).
-        const MAX_PASSWORD_LENGTH: usize = 512;
+        const MAX_PASSWORD_LENGTH: u32 = 512;
        self.read_raw_expect(FE_PASSWORD_MESSAGE, MAX_PASSWORD_LENGTH)
            .await
    }
--- a/proxy/src/tls/postgres_rustls.rs
+++ b/proxy/src/tls/postgres_rustls.rs
@@ -31,7 +31,9 @@ mod private {
        type Output = io::Result<RustlsStream<S>>;

        fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
-            Pin::new(&mut self.inner).poll(cx).map_ok(RustlsStream)
+            Pin::new(&mut self.inner)
+                .poll(cx)
+                .map_ok(|s| RustlsStream(Box::new(s)))
        }
    }

@@ -57,7 +59,7 @@ mod private {
        }
    }

-    pub struct RustlsStream<S>(TlsStream<S>);
+    pub struct RustlsStream<S>(Box<TlsStream<S>>);

    impl<S> postgres_client::tls::TlsStream for RustlsStream<S>
    where
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -423,6 +423,7 @@ class PageserverImportConfig:
            "import_job_concurrency": self.import_job_concurrency,
            "import_job_soft_size_limit": self.import_job_soft_size_limit,
            "import_job_checkpoint_threshold": self.import_job_checkpoint_threshold,
+            "import_job_max_byte_range_size": 4 * 1024 * 1024,  # Pageserver default
        }
        return ("timeline_import_config", value)
Author	SHA1	Message	Date
Christian Schwarz	318700600d	refactor: lift inspection of CompactionError::Other(x) => x.root_cause() into CompactionError::is_cancel There are a couple of places that call CompactionError::is_cancel but don't check the Other variant for root cause. But they should, because some cancellations are observed by code that results in ::Other errors. I don't think there's a _serious_ case where this causes problems. The worst case one is the circuit breaker which we do currently trip on ::Other errors that are due to cancellation. Tripped circuit breaker on shutting down timelines doesn't really matter practically, but it's unaesthetic and might cause noise down the line, so, this PR fixes that at least. In any way, this PR forces future callers of is_cancel() to explicitly recognize the suboptimal state of affairs wrt error handling in compaction, thereby hopefully preventing errors of this kind from creeping in. (The _right_ solution for the compaction code probably is the approach I took in #11853: keep using anyhow but have a unified way / pattern of bubbling up cancellation, so that we don't need to perform the downcast trickery).	2025-06-27 14:10:24 +02:00
Christian Schwarz	75ef17e2f7	refactor: force explicit mapping to CreateImageLayersError::Other	2025-06-27 13:31:49 +02:00
Christian Schwarz	93026b7899	address most types of error that happen during creation of delta layers	2025-06-27 13:17:19 +02:00
Vlad Lazar	b69d103b90	pageserver: make import job max byte range size configurable (#12117 ) ## Problem We want to repro an OOM situation, but large partial reads are required. ## Summary of Changes Make the max partial read size configurable for import jobs.	2025-06-04 10:44:23 +00:00
a-masterov	208cbd52d4	Add postgis to the test image (#11672 ) ## Problem We don't currently run tests for PostGIS in our test environment. ## Summary of Changes - Added PostGIS test support for PostgreSQL v16 and v17 - Configured different PostGIS versions based on PostgreSQL version: - PostgreSQL v17: PostGIS 3.5.0 - PostgreSQL v14/v15/v16: PostGIS 3.3.3 - Added necessary test scripts and configurations This ensures our PostgreSQL implementation remains compatible with this widely-used extension. --------- Co-authored-by: Alexander Bayandin <alexander@neon.tech> Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>	2025-06-04 09:57:31 +00:00
Alex Chi Z.	c567ed0de0	feat(pageserver): feature flag counter metrics (#12112 ) ## Problem Part of https://github.com/neondatabase/neon/issues/11813 ## Summary of changes Add a counter on the feature evaluation outcome and we will set up alerts for too many failed evaluations in the future. Signed-off-by: Alex Chi Z <chi@neon.tech>	2025-06-04 06:41:42 +00:00
Mikhail	c698cee19a	ComputeSpec: prewarm_lfc_on_startup -> autoprewarm (#12120 ) https://github.com/neondatabase/cloud/pull/29472 https://github.com/neondatabase/cloud/issues/26346	2025-06-04 05:38:03 +00:00
Tristan Partin	4a3f32bf4a	Clean up compute_tools::http::JsonResponse::invalid_status() (#12110 ) JsonResponse::error() properly logs an error message which can be read in the compute logs. invalid_status() was not going through that helper function, thus not logging anything. Signed-off-by: Tristan Partin <tristan@neon.tech>	2025-06-03 16:00:56 +00:00
Vlad Lazar	a963aab14b	pagserver: set default wal receiver proto to interpreted (#12100 ) ## Problem This is already the default in production and in our test suite. ## Summary of changes Set the default proto to interpreted to reduce friction when spinning up new regions or cells.	2025-06-03 14:57:36 +00:00
Erik Grinaker	5bdba70f7d	page_api: only validate Protobuf → domain type conversion (#12115 ) ## Problem Currently, `page_api` domain types validate message invariants both when converting Protobuf → domain and domain → Protobuf. This is annoying for clients, because they can't use stream combinators to convert streamed requests (needed for hot path performance), and also performs the validation twice in the common case. Blocks #12099. ## Summary of changes Only validate the Protobuf → domain type conversion, i.e. on the receiver side, and make domain → Protobuf infallible. This is where it matters -- the Protobuf types are less strict than the domain types, and receivers should expect all sorts of junk from senders (they're not required to validate anyway, and can just construct an invalid message manually). Also adds a missing `impl From<CheckRelExistsRequest> for proto::CheckRelExistsRequest`.	2025-06-03 13:50:41 +00:00
Trung Dinh	25fffd3a55	Validate max_batch_size against max_get_vectored_keys (#12052 ) ## Problem Setting `max_batch_size` to anything higher than `Timeline::MAX_GET_VECTORED_KEYS` will cause runtime error. We should rather fail fast at startup if this is the case. ## Summary of changes * Create `max_get_vectored_keys` as a new configuration (default to 32); * Validate `max_batch_size` against `max_get_vectored_keys` right at config parsing and validation. Closes https://github.com/neondatabase/neon/issues/11994	2025-06-03 13:37:11 +00:00
Erik Grinaker	e00fd45bba	page_api: remove smallvec (#12095 ) ## Problem The gRPC `page_api` domain types used smallvecs to avoid heap allocations in the common case where a single page is requested. However, this is pointless: the Protobuf types use a normal vec, and converting a smallvec into a vec always causes a heap allocation anyway. ## Summary of changes Use a normal `Vec` instead of a `SmallVec` in `page_api` domain types.	2025-06-03 12:20:34 +00:00
Vlad Lazar	3b8be98b67	pageserver: remove backtrace in info level log (#12108 ) ## Problem We print a backtrace in an info level log every 10 seconds while waiting for the import data to land in the bucket. ## Summary of changes The backtrace is not useful. Remove it.	2025-06-03 09:07:07 +00:00
a-masterov	3e72edede5	Use full hostname for ONNX URL (#12064 ) ## Problem We should use the full host name for computes, according to https://github.com/neondatabase/cloud/issues/26005 , but now a truncated host name is used. ## Summary of changes The URL for REMOTE_ONNX is rewritten using the FQDN.	2025-06-03 07:23:17 +00:00
Alex Chi Z.	a650f7f5af	fix(pageserver): only deserialize reldir key once during get_db_size (#12102 ) ## Problem fix https://github.com/neondatabase/neon/issues/12101; this is a quick hack and we need better API in the future. In `get_db_size`, we call `get_reldir_size` for every relation. However, we do the same deserializing the reldir directory thing for every relation. This creates huge CPU overhead. ## Summary of changes Get and deserialize the reldir v1 key once and use it across all get_rel_size requests. --------- Signed-off-by: Alex Chi Z <chi@neon.tech>	2025-06-03 05:00:34 +00:00
Erik Grinaker	fc3994eb71	pageserver: initial gRPC page service implementation (#12094 ) ## Problem We should expose the page service over gRPC. Requires #12093. Touches #11728. ## Summary of changes This patch adds an initial page service implementation over gRPC. It ties in with the existing `PageServerHandler` request logic, to avoid the implementations drifting apart for the core read path. This is just a bare-bones functional implementation. Several important aspects have been omitted, and will be addressed in follow-up PRs: * Limited observability: minimal tracing, no logging, limited metrics and timing, etc. * Rate limiting will currently block. * No performance optimization. * No cancellation handling. * No tests. I've only done rudimentary testing of this, but Pagebench passes at least.	2025-06-02 17:15:18 +00:00
Conrad Ludgate	781bf4945d	proxy: optimise future layout allocations (#12104 ) A smaller version of #12066 that is somewhat easier to review. Now that I've been using https://crates.io/crates/top-type-sizes I've found a lot more of the low hanging fruit that can be tweaks to reduce the memory usage. Some context for the optimisations: Rust's stack allocation in futures is quite naive. Stack variables, even if moved, often still end up taking space in the future. Rearranging the order in which variables are defined, and properly scoping them can go a long way. `async fn` and `async move {}` have a consequence that they always duplicate the "upvars" (aka captures). All captures are permanently allocated in the future, even if moved. We can be mindful when writing futures to only capture as little as possible. TlsStream is massive. Needs boxing so it doesn't contribute to the above issue. ## Measurements from `top-type-sizes`: ### Before ``` 10328 {async block@proxy::proxy::task_main::{closure#0}::{closure#0}} align=8 6120 {async fn body of proxy::proxy::handle_client<proxy::protocol2::ChainRW<tokio::net::TcpStream>>()} align=8 ``` ### After ``` 4040 {async block@proxy::proxy::task_main::{closure#0}::{closure#0}} 4704 {async fn body of proxy::proxy::handle_client<proxy::protocol2::ChainRW<tokio::net::TcpStream>>()} align=8 ```	2025-06-02 16:13:30 +00:00
Erik Grinaker	a21c1174ed	pagebench: add gRPC support for `get-page-latest-lsn` (#12077 ) ## Problem We need gRPC support in Pagebench to benchmark the new gRPC Pageserver implementation. Touches #11728. ## Summary of changes Adds a `Client` trait to make the client transport swappable, and a gRPC client via a `--protocol grpc` parameter. This must also specify the connstring with the gRPC port: ``` pagebench get-page-latest-lsn --protocol grpc --page-service-connstring grpc://localhost:51051 ``` The client is implemented using the raw Tonic-generated gRPC client, to minimize client overhead.	2025-06-02 14:50:49 +00:00
Erik Grinaker	8d7ed2a4ee	pageserver: add gRPC observability middleware (#12093 ) ## Problem The page service logic asserts that a tracing span is present with tenant/timeline/shard IDs. An initial gRPC page service implementation thus requires a tracing span. Touches https://github.com/neondatabase/neon/issues/11728. ## Summary of changes Adds an `ObservabilityLayer` middleware that generates a tracing span and decorates it with IDs from the gRPC metadata. This is a minimal implementation to address the tracing span assertion. It will be extended with additional observability in later PRs.	2025-06-02 11:46:50 +00:00