Fix sfcgal_version for v17

fix typos
Build PostGIS 3.5.0 only for v17
2026-01-21 12:22:56 +00:00 · 2024-10-08 11:56:36 +01:00 · 2024-10-07 13:47:55 +01:00 · 2024-10-07 13:38:16 +01:00 · 2024-10-04 17:01:06 +01:00 · 2024-10-04 16:28:06 +01:00
356 changed files with 3441 additions and 6400 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -5,7 +5,9 @@
 !Cargo.toml
 !Makefile
 !rust-toolchain.toml
+!scripts/combine_control_files.py
 !scripts/ninstall.sh
+!vm-cgconfig.conf
 !docker-compose/run-tests.sh

 # Directories
@@ -15,12 +17,15 @@
 !compute_tools/
 !control_plane/
 !libs/
+!neon_local/
 !pageserver/
+!patches/
 !pgxn/
 !proxy/
 !storage_scrubber/
 !safekeeper/
 !storage_broker/
 !storage_controller/
+!trace/
 !vendor/postgres-*/
 !workspace_hack/
--- a/.github/workflows/report-workflow-stats.yml
+++ b/.github/workflows/report-workflow-stats.yml
@@ -1,41 +0,0 @@
-name: Report Workflow Stats
-
-on:
-  workflow_run:
-    workflows:
-    - Add `external` label to issues and PRs created by external users
-    - Benchmarking
-    - Build and Test
-    - Build and Test Locally
-    - Build build-tools image
-    - Check Permissions
-    - Check build-tools image
-    - Check neon with extra platform builds
-    - Cloud Regression Test
-    - Create Release Branch
-    - Handle `approved-for-ci-run` label
-    - Lint GitHub Workflows
-    - Notify Slack channel about upcoming release
-    - Periodic pagebench performance test on dedicated EC2 machine in eu-central-1 region
-    - Pin build-tools image
-    - Prepare benchmarking databases by restoring dumps
-    - Push images to ACR
-    - Test Postgres client libraries
-    - Trigger E2E Tests
-    - cleanup caches by a branch
-    types: [completed]
-
-jobs:
-  gh-workflow-stats:
-    name: Github Workflow Stats
-    runs-on: ubuntu-22.04
-    permissions:
-      actions: read
-    steps:
-    - name: Export GH Workflow Stats
-      uses: fedordikarev/gh-workflow-stats-action@v0.1.2
-      with:
-        DB_URI: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
-        DB_TABLE: "gh_workflow_stats_neon"
-        GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        GH_RUN_ID: ${{ github.event.workflow_run.id }}
--- a/1
+++ b/1
@@ -1,6 +1,5 @@
 /compute_tools/ @neondatabase/control-plane @neondatabase/compute
 /storage_controller @neondatabase/storage
-/storage_scrubber @neondatabase/storage
 /libs/pageserver_api/ @neondatabase/storage
 /libs/postgres_ffi/ @neondatabase/compute @neondatabase/storage
 /libs/remote_storage/ @neondatabase/storage
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -668,20 +668,19 @@ dependencies = [

 [[package]]
 name = "axum"
-version = "0.7.5"
+version = "0.6.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf"
+checksum = "3b829e4e32b91e643de6eafe82b1d90675f5874230191a4ffbc1b336dec4d6bf"
 dependencies = [
 "async-trait",
- "axum-core",
+ "axum-core 0.3.4",
 "base64 0.21.1",
+ "bitflags 1.3.2",
 "bytes",
 "futures-util",
- "http 1.1.0",
- "http-body 1.0.0",
- "http-body-util",
- "hyper 1.4.1",
- "hyper-util",
+ "http 0.2.9",
+ "http-body 0.4.5",
+ "hyper 0.14.30",
 "itoa",
 "matchit 0.7.0",
 "memchr",
@@ -694,13 +693,56 @@ dependencies = [
 "serde_path_to_error",
 "serde_urlencoded",
 "sha1",
- "sync_wrapper 1.0.1",
+ "sync_wrapper 0.1.2",
 "tokio",
 "tokio-tungstenite",
 "tower",
 "tower-layer",
 "tower-service",
- "tracing",
+]
+
+[[package]]
+name = "axum"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf"
+dependencies = [
+ "async-trait",
+ "axum-core 0.4.5",
+ "bytes",
+ "futures-util",
+ "http 1.1.0",
+ "http-body 1.0.0",
+ "http-body-util",
+ "itoa",
+ "matchit 0.7.0",
+ "memchr",
+ "mime",
+ "percent-encoding",
+ "pin-project-lite",
+ "rustversion",
+ "serde",
+ "sync_wrapper 1.0.1",
+ "tower",
+ "tower-layer",
+ "tower-service",
+]
+
+[[package]]
+name = "axum-core"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "759fa577a247914fd3f7f76d62972792636412fbfd634cd452f6a385a74d2d2c"
+dependencies = [
+ "async-trait",
+ "bytes",
+ "futures-util",
+ "http 0.2.9",
+ "http-body 0.4.5",
+ "mime",
+ "rustversion",
+ "tower-layer",
+ "tower-service",
 ]

 [[package]]
@@ -721,7 +763,6 @@ dependencies = [
 "sync_wrapper 1.0.1",
 "tower-layer",
 "tower-service",
- "tracing",
 ]

 [[package]]
@@ -1220,7 +1261,6 @@ version = "0.1.0"
 dependencies = [
 "anyhow",
 "bytes",
- "camino",
 "cfg-if",
 "chrono",
 "clap",
@@ -1820,7 +1860,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47"
 dependencies = [
 "base16ct 0.2.0",
- "base64ct",
 "crypto-bigint 0.5.5",
 "digest",
 "ff 0.13.0",
@@ -1830,8 +1869,6 @@ dependencies = [
 "pkcs8 0.10.2",
 "rand_core 0.6.4",
 "sec1 0.7.3",
- "serde_json",
- "serdect",
 "subtle",
 "zeroize",
 ]
@@ -4040,8 +4077,6 @@ dependencies = [
 "bytes",
 "fallible-iterator",
 "postgres-protocol",
- "serde",
- "serde_json",
 ]

 [[package]]
@@ -5261,7 +5296,6 @@ dependencies = [
 "der 0.7.8",
 "generic-array",
 "pkcs8 0.10.2",
- "serdect",
 "subtle",
 "zeroize",
 ]
@@ -5516,16 +5550,6 @@ dependencies = [
 "syn 2.0.52",
 ]

-[[package]]
-name = "serdect"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a84f14a19e9a014bb9f4512488d9829a68e04ecabffb0f9904cd1ace94598177"
-dependencies = [
- "base16ct 0.2.0",
- "serde",
-]
-
 [[package]]
 name = "sha1"
 version = "0.10.5"
@@ -6306,9 +6330,9 @@ dependencies = [

 [[package]]
 name = "tokio-tungstenite"
-version = "0.21.0"
+version = "0.20.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c83b561d025642014097b66e6c1bb422783339e0909e4429cde4749d1990bc38"
+checksum = "2b2dbec703c26b00d74844519606ef15d09a7d6857860f84ad223dec002ddea2"
 dependencies = [
 "futures-util",
 "log",
@@ -6375,7 +6399,7 @@ checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
 dependencies = [
 "async-stream",
 "async-trait",
- "axum",
+ "axum 0.7.5",
 "base64 0.22.1",
 "bytes",
 "h2 0.4.4",
@@ -6582,14 +6606,14 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"

 [[package]]
 name = "tungstenite"
-version = "0.21.0"
+version = "0.20.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ef1a641ea34f399a848dea702823bbecfb4c486f911735368f1f137cb8257e1"
+checksum = "9e3dac10fd62eaf6617d3a904ae222845979aec67c615d1c842b4002c7666fb9"
 dependencies = [
 "byteorder",
 "bytes",
 "data-encoding",
- "http 1.1.0",
+ "http 0.2.9",
 "httparse",
 "log",
 "rand 0.8.5",
@@ -6817,7 +6841,7 @@ name = "vm_monitor"
 version = "0.1.0"
 dependencies = [
 "anyhow",
- "axum",
+ "axum 0.6.20",
 "cgroups-rs",
 "clap",
 "futures",
@@ -7276,8 +7300,12 @@ version = "0.1.0"
 dependencies = [
 "ahash",
 "anyhow",
- "axum",
- "axum-core",
+ "aws-config",
+ "aws-runtime",
+ "aws-sigv4",
+ "aws-smithy-async",
+ "aws-smithy-http",
+ "aws-smithy-types",
 "base64 0.21.1",
 "base64ct",
 "bytes",
@@ -7318,7 +7346,6 @@ dependencies = [
 "num-traits",
 "once_cell",
 "parquet",
- "postgres-types",
 "prettyplease",
 "proc-macro2",
 "prost",
@@ -7328,6 +7355,7 @@ dependencies = [
 "regex-automata 0.4.3",
 "regex-syntax 0.8.2",
 "reqwest 0.12.4",
+ "rustls 0.21.11",
 "scopeguard",
 "serde",
 "serde_json",
@@ -7343,7 +7371,6 @@ dependencies = [
 "time",
 "time-macros",
 "tokio",
- "tokio-postgres",
 "tokio-stream",
 "tokio-util",
 "toml_edit",
@@ -7352,6 +7379,7 @@ dependencies = [
 "tracing",
 "tracing-core",
 "url",
+ "uuid",
 "zeroize",
 "zstd",
 "zstd-safe",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -53,7 +53,7 @@ azure_storage_blobs = { version = "0.19", default-features = false, features = [
 flate2 = "1.0.26"
 async-stream = "0.3"
 async-trait = "0.1"
-aws-config = { version = "1.5", default-features = false, features=["rustls", "sso"] }
+aws-config = { version = "1.5", default-features = false, features=["rustls"] }
 aws-sdk-s3 = "1.52"
 aws-sdk-iam = "1.46.0"
 aws-smithy-async = { version = "1.2.1", default-features = false, features=["rt-tokio"] }
@@ -61,7 +61,7 @@ aws-smithy-types = "1.2"
 aws-credential-types = "1.2.0"
 aws-sigv4 = { version = "1.2", features = ["sign-http"] }
 aws-types = "1.3"
-axum = { version = "0.7.5", features = ["ws"] }
+axum = { version = "0.6.20", features = ["ws"] }
 base64 = "0.13.0"
 bincode = "1.3"
 bindgen = "0.70"
@@ -99,10 +99,10 @@ http-types = { version = "2", default-features = false }
 http-body-util = "0.1.2"
 humantime = "2.1"
 humantime-serde = "1.1.1"
-hyper0 = { package = "hyper", version = "0.14" }
-hyper = "1.4"
+hyper = "0.14"
+hyper_1 = { package = "hyper", version = "1.4" }
 hyper-util = "0.1"
-tokio-tungstenite = "0.21.0"
+tokio-tungstenite = "0.20.0"
 indexmap = "2"
 indoc = "2"
 ipnet = "2.9.0"
--- a/14
+++ b/14
@@ -168,27 +168,27 @@ postgres-check-%: postgres-%
 neon-pg-ext-%: postgres-%
 	+@echo "Compiling neon $*"
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-$*
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
 		-C $(POSTGRES_INSTALL_DIR)/build/neon-$* \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install
 	+@echo "Compiling neon_walredo $*"
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$*
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
 		-C $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$* \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile install
 	+@echo "Compiling neon_rmgr $*"
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$*
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
 		-C $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$* \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon_rmgr/Makefile install
 	+@echo "Compiling neon_test_utils $*"
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$*
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
 		-C $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$* \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile install
 	+@echo "Compiling neon_utils $*"
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-utils-$*
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
 		-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile install

@@ -220,7 +220,7 @@ neon-pg-clean-ext-%:
 walproposer-lib: neon-pg-ext-v17
 	+@echo "Compiling walproposer-lib"
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config COPT='$(COPT)' \
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
 		-C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile walproposer-lib
 	cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgport.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
@@ -333,7 +333,7 @@ postgres-%-pgindent: postgres-%-pg-bsd-indent postgres-%-typedefs.list
 # Indent pxgn/neon.
 .PHONY: neon-pgindent
 neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config COPT='$(COPT)' \
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
 		FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \
 		INDENT=$(POSTGRES_INSTALL_DIR)/build/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
 		PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \
--- a/README.md
+++ b/README.md
@@ -58,7 +58,7 @@ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
 1. Install XCode and dependencies
 ```
 xcode-select --install
-brew install protobuf openssl flex bison icu4c pkg-config m4
+brew install protobuf openssl flex bison icu4c pkg-config

 # add openssl to PATH, required for ed25519 keys generation in neon_local
 echo 'export PATH="$(brew --prefix openssl)/bin:$PATH"' >> ~/.zshrc
--- a/compute/Dockerfile.compute-node
+++ b/compute/Dockerfile.compute-node
@@ -17,13 +17,14 @@ RUN case $DEBIAN_FLAVOR in \
      # Version-specific installs for Bullseye (PG14-PG16):
      # The h3_pg extension needs a cmake 3.20+, but Debian bullseye has 3.18.
      # Install newer version (3.25) from backports.
+      # libstdc++-10-dev is required for plv8
      bullseye*) \
        echo "deb http://deb.debian.org/debian bullseye-backports main" > /etc/apt/sources.list.d/bullseye-backports.list; \
-        VERSION_INSTALLS="cmake/bullseye-backports cmake-data/bullseye-backports"; \
+        VERSION_INSTALLS="cmake/bullseye-backports cmake-data/bullseye-backports libstdc++-10-dev"; \
      ;; \
      # Version-specific installs for Bookworm (PG17):
      bookworm*) \
-        VERSION_INSTALLS="cmake"; \
+        VERSION_INSTALLS="cmake libstdc++-12-dev"; \
      ;; \
    esac && \
    apt update &&  \
@@ -116,7 +117,7 @@ RUN apt update && \
 # SFCGAL > 1.3 requires CGAL > 5.2, Bullseye's libcgal-dev is 5.2
 # and also we must check backward compatibility with older versions of PostGIS.
 #
-# Use new version only for v17
+# To move faster, use newer versions only for v17
 RUN case "${PG_VERSION}" in \
    "v17") \
        export SFCGAL_VERSION=1.4.1 \
@@ -164,6 +165,7 @@ RUN case "${PG_VERSION}" in \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
    cd extensions/postgis && \
    make clean && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_raster.control && \
@@ -181,27 +183,10 @@ RUN case "${PG_VERSION}" in \
    cp /usr/local/pgsql/share/extension/address_standardizer.control /extensions/postgis && \
    cp /usr/local/pgsql/share/extension/address_standardizer_data_us.control /extensions/postgis

-# Uses versioned libraries, i.e. libpgrouting-3.4
-# and may introduce function signature changes between releases
-# i.e. release 3.5.0 has new signature for pg_dijkstra function
-#
-# Use new version only for v17
-# last release v3.6.2 - Mar 30, 2024
-RUN case "${PG_VERSION}" in \
-    "v17") \
-        export PGROUTING_VERSION=3.6.2 \
-        export PGROUTING_CHECKSUM=f4a1ed79d6f714e52548eca3bb8e5593c6745f1bde92eb5fb858efd8984dffa2 \
-    ;; \
-    "v14" | "v15" | "v16") \
-        export PGROUTING_VERSION=3.4.2 \
-        export PGROUTING_CHECKSUM=cac297c07d34460887c4f3b522b35c470138760fe358e351ad1db4edb6ee306e \
-    ;; \
-    *) \
-        echo "unexpected PostgreSQL version" && exit 1 \
-    ;; \
-    esac && \
-    wget https://github.com/pgRouting/pgrouting/archive/v${PGROUTING_VERSION}.tar.gz -O pgrouting.tar.gz && \
-    echo "${PGROUTING_CHECKSUM} pgrouting.tar.gz" | sha256sum --check && \
+# not version-specific
+# last release v3.6.2 - Mar 30, 2023
+RUN wget https://github.com/pgRouting/pgrouting/archive/v3.6.2.tar.gz -O pgrouting.tar.gz && \
+    echo "f4a1ed79d6f714e52548eca3bb8e5593c6745f1bde92eb5fb858efd8984dffa2 pgrouting.tar.gz" | sha256sum --check && \
    mkdir pgrouting-src && cd pgrouting-src && tar xzf ../pgrouting.tar.gz --strip-components=1 -C . && \
    mkdir build && cd build && \
    cmake -DCMAKE_BUILD_TYPE=Release .. && \
@@ -223,18 +208,18 @@ FROM build-deps AS plv8-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    apt update && \
+RUN apt update && \
    apt install --no-install-recommends -y ninja-build python3-dev libncurses5 binutils clang

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.10.tar.gz -O plv8.tar.gz && \
-    echo "7096c3290928561f0d4901b7a52794295dc47f6303102fae3f8e42dd575ad97d plv8.tar.gz" | sha256sum --check && \
-    mkdir plv8-src && cd plv8-src && tar xzf ../plv8.tar.gz --strip-components=1 -C . && \
+# plv8 3.2.3 supports v17
+# last release v3.2.3 - Sep 7, 2024
+# clone the repo instead of downloading the release tarball because plv8 has submodule dependencies
+# and the release tarball doesn't include them
+ENV PLV8_TAG=v3.2.3
+RUN set -e \
+    && git clone --recurse-submodules --depth 1 --branch ${PLV8_TAG} https://github.com/plv8/plv8.git plv8-src && \
+    tar -czf plv8.tar.gz --exclude .git plv8-src && \
+    cd plv8-src && \
    # generate and copy upgrade scripts
    mkdir -p upgrade && ./generate_upgrade.sh 3.1.10 && \
    cp upgrade/* /usr/local/pgsql/share/extension/ && \
@@ -244,8 +229,10 @@ RUN case "${PG_VERSION}" in "v17") \
    find /usr/local/pgsql/ -name "plv8-*.so" | xargs strip && \
    # don't break computes with installed old version of plv8
    cd /usr/local/pgsql/lib/ && \
-    ln -s plv8-3.1.10.so plv8-3.1.5.so && \
-    ln -s plv8-3.1.10.so plv8-3.1.8.so && \
+    # TODO test this case !!
+    ln -s plv8-3.2.3.so plv8-3.1.8.so && \
+    ln -s plv8-3.2.3.so plv8-3.1.5.so && \
+    ln -s plv8-3.2.3.so plv8-3.1.10.so && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plcoffee.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plls.control
@@ -323,6 +310,8 @@ COPY compute/patches/pgvector.patch /pgvector.patch
 # By default, pgvector Makefile uses `-march=native`. We don't want that,
 # because we build the images on different machines than where we run them.
 # Pass OPTFLAGS="" to remove it.
+#
+# v17 is not supported yet because of upstream issue https://github.com/pgvector/pgvector/issues/669
 RUN case "${PG_VERSION}" in "v17") \
    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
    esac && \
@@ -362,11 +351,10 @@ FROM build-deps AS hypopg-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.0.tar.gz -O hypopg.tar.gz && \
-    echo "0821011743083226fc9b813c1f2ef5897a91901b57b6bea85a78e466187c6819 hypopg.tar.gz" | sha256sum --check && \
+# HypoPG 1.4.1 supports v17
+# last release 1.4.1 - Apr 28, 2024
+RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.1.tar.gz -O hypopg.tar.gz && \
+    echo "9afe6357fd389d8d33fad81703038ce520b09275ec00153c6c89282bcdedd6bc hypopg.tar.gz" | sha256sum --check && \
    mkdir hypopg-src && cd hypopg-src && tar xzf ../hypopg.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -402,7 +390,8 @@ ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 COPY compute/patches/rum.patch /rum.patch
-
+# maybe version-specific
+# last release 1.3.13 - Sep 19, 2022
 RUN case "${PG_VERSION}" in "v17") \
    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
    esac && \
@@ -424,11 +413,10 @@ FROM build-deps AS pgtap-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgtap.tar.gz && \
-    echo "9c7c3de67ea41638e14f06da5da57bac6f5bd03fea05c165a0ec862205a5c052 pgtap.tar.gz" | sha256sum --check && \
+# pgtap 1.3.3 supports v17
+# last release v1.3.3 - Apr 8, 2024
+RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.3.3.tar.gz -O pgtap.tar.gz && \
+    echo "325ea79d0d2515bce96bce43f6823dcd3effbd6c54cb2a4d6c2384fffa3a14c7 pgtap.tar.gz" | sha256sum --check && \
    mkdir pgtap-src && cd pgtap-src && tar xzf ../pgtap.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -483,7 +471,7 @@ ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 # not version-specific
-# last release v2.18 - Aug 29, 2023
+# last release v2.18 - Sep 15, 2023
 RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz -O hll.tar.gz && \
    echo "e2f55a6f4c4ab95ee4f1b4a2b73280258c5136b161fe9d059559556079694f0e hll.tar.gz" | sha256sum --check && \
    mkdir hll-src && cd hll-src && tar xzf ../hll.tar.gz --strip-components=1 -C . && \
@@ -501,11 +489,10 @@ FROM build-deps AS plpgsql-check-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.5.3.tar.gz -O plpgsql_check.tar.gz && \
-    echo "6631ec3e7fb3769eaaf56e3dfedb829aa761abf163d13dba354b4c218508e1c0 plpgsql_check.tar.gz" | sha256sum --check && \
+# plpgsql_check v2.7.11 supports v17
+# last release v2.7.11 - Sep 16, 2024
+RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.7.11.tar.gz -O plpgsql_check.tar.gz && \
+    echo "208933f8dbe8e0d2628eb3851e9f52e6892b8e280c63700c0f1ce7883625d172 plpgsql_check.tar.gz" | sha256sum --check && \
    mkdir plpgsql_check-src && cd plpgsql_check-src && tar xzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
@@ -523,6 +510,8 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 ARG PG_VERSION
 ENV PATH="/usr/local/pgsql/bin:$PATH"

+# v17 is not supported yet. TimescaleDB 2.17.0 will support it
+# https://github.com/timescale/timescaledb/issues/6949
 RUN case "${PG_VERSION}" in "v17") \
    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
    esac && \
@@ -557,10 +546,9 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 ARG PG_VERSION
 ENV PATH="/usr/local/pgsql/bin:$PATH"

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    case "${PG_VERSION}" in \
+# version-specific, has separate releases for each version
+# TODO check minor upgrades for v14-16
+RUN case "${PG_VERSION}" in \
      "v14") \
        export PG_HINT_PLAN_VERSION=14_1_4_1 \
        export PG_HINT_PLAN_CHECKSUM=c3501becf70ead27f70626bce80ea401ceac6a77e2083ee5f3ff1f1444ec1ad1 \
@@ -574,7 +562,8 @@ RUN case "${PG_VERSION}" in "v17") \
        export PG_HINT_PLAN_CHECKSUM=fc85a9212e7d2819d4ae4ac75817481101833c3cfa9f0fe1f980984e12347d00 \
        ;; \
      "v17") \
-        echo "TODO: PG17 pg_hint_plan support" && exit 0 \
+        export PG_HINT_PLAN_VERSION=17_1_7_0 \
+        export PG_HINT_PLAN_CHECKSUM=06dd306328c67a4248f48403c50444f30959fb61ebe963248dbc2afb396fe600 \
        ;; \
      *) \
        echo "Export the valid PG_HINT_PLAN_VERSION variable" && exit 1 \
@@ -598,6 +587,9 @@ FROM build-deps AS pg-cron-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

+# 1.6.4 available, supports v17
+# FIXME: Why is it here? We don't support pg_cron...
+# !Do not remove! We set it in shared_preload_libraries and computes will fail to start if library is not found.
 ENV PATH="/usr/local/pgsql/bin/:$PATH"
 RUN case "${PG_VERSION}" in "v17") \
    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
@@ -619,23 +611,21 @@ FROM build-deps AS rdkit-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    apt-get update && \
+RUN apt-get update && \
    apt-get install --no-install-recommends -y \
        libboost-iostreams1.74-dev \
        libboost-regex1.74-dev \
        libboost-serialization1.74-dev \
        libboost-system1.74-dev \
+#TODO check what exactly is needed
+        libboost-all-dev \
        libeigen3-dev

+# rdkit Release_2024_09_1 supports v17
+# last release Release_2024_09_1 - Sep 27, 2024
 ENV PATH="/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH"
-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.gz -O rdkit.tar.gz && \
-    echo "bdbf9a2e6988526bfeb8c56ce3cdfe2998d60ac289078e2215374288185e8c8d rdkit.tar.gz" | sha256sum --check && \
+RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2024_09_1.tar.gz -O rdkit.tar.gz && \
+    echo "034c00d6e9de323506834da03400761ed8c3721095114369d06805409747a60f rdkit.tar.gz" | sha256sum --check && \
    mkdir rdkit-src && cd rdkit-src && tar xzf ../rdkit.tar.gz --strip-components=1 -C . && \
    cmake \
        -D RDK_BUILD_CAIRO_SUPPORT=OFF \
@@ -674,12 +664,11 @@ FROM build-deps AS pg-uuidv7-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

+# not version-specific
+# last release v1.5.0 - Mar 21, 2024
 ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz -O pg_uuidv7.tar.gz && \
-    echo "0d0759ab01b7fb23851ecffb0bce27822e1868a4a5819bfd276101c716637a7a pg_uuidv7.tar.gz" | sha256sum --check && \
+RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.5.0.tar.gz -O pg_uuidv7.tar.gz && \
+    echo "5f53e5ce0fa4e01c1d489f736478224571d4b2b88eaf63186265e38f0b8d3d78 pg_uuidv7.tar.gz" | sha256sum --check && \
    mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
@@ -715,27 +704,11 @@ FROM build-deps AS pg-semver-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-# Release 0.40.0 breaks backward compatibility with previous versions
-# see release note https://github.com/theory/pg-semver/releases/tag/v0.40.0
-# Use new version only for v17
-#
+# not version-specific
 # last release v0.40.0 - Jul 22, 2024
 ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN case "${PG_VERSION}" in \
-    "v17") \
-        export SEMVER_VERSION=0.40.0 \
-        export SEMVER_CHECKSUM=3e50bcc29a0e2e481e7b6d2bc937cadc5f5869f55d983b5a1aafeb49f5425cfc \
-    ;; \
-    "v14" | "v15" | "v16") \
-        export SEMVER_VERSION=0.32.1 \
-        export SEMVER_CHECKSUM=fbdaf7512026d62eec03fad8687c15ed509b6ba395bff140acd63d2e4fbe25d7 \
-    ;; \
-    *) \
-        echo "unexpected PostgreSQL version" && exit 1 \
-    ;; \
-    esac && \
-    wget https://github.com/theory/pg-semver/archive/refs/tags/v${SEMVER_VERSION}.tar.gz -O pg_semver.tar.gz && \
-    echo "${SEMVER_CHECKSUM} pg_semver.tar.gz" | sha256sum --check && \
+RUN wget https://github.com/theory/pg-semver/archive/refs/tags/v0.40.0.tar.gz -O pg_semver.tar.gz && \
+    echo "3e50bcc29a0e2e481e7b6d2bc937cadc5f5869f55d983b5a1aafeb49f5425cfc pg_semver.tar.gz" | sha256sum --check && \
    mkdir pg_semver-src && cd pg_semver-src && tar xzf ../pg_semver.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
@@ -750,6 +723,8 @@ RUN case "${PG_VERSION}" in \
 FROM build-deps AS pg-embedding-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

+# This is our extension, support stopped in favor of pgvector
+# TODO: deprecate it
 ARG PG_VERSION
 ENV PATH="/usr/local/pgsql/bin/:$PATH"
 RUN case "${PG_VERSION}" in \
@@ -776,6 +751,9 @@ FROM build-deps AS pg-anon-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

+# This was experimental extension, that never got to real production.
+# TODO deprecate it
+# !Do not remove! It can be present in shared_preload_libraries and compute will fail to start if library is not found.
 ENV PATH="/usr/local/pgsql/bin/:$PATH"
 RUN case "${PG_VERSION}" in "v17") \
    echo "postgresql_anonymizer does not yet support PG17" && exit 0;; \
@@ -930,6 +908,9 @@ RUN case "${PG_VERSION}" in "v17") \
    mkdir pg_session_jwt-src && cd pg_session_jwt-src && tar xzf ../pg_session_jwt.tar.gz --strip-components=1 -C . && \
    sed -i 's/pgrx = "=0.11.3"/pgrx = { version = "=0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
    cargo pgrx install --release
+    # it's needed to enable extension because it uses untrusted C language
+    # sed -i 's/superuser = false/superuser = true/g' /usr/local/pgsql/share/extension/pg_session_jwt.control && \
+    # echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_session_jwt.control

 #########################################################################################
 #
@@ -942,13 +923,12 @@ FROM build-deps AS wal2json-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

+# wal2json wal2json_2_6 supports v17
+# last release wal2json_2_6 - Apr 25, 2024
 ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN case "${PG_VERSION}" in "v17") \
-    echo "We'll need to update wal2json to 2.6+ for pg17 support" && exit 0;; \
-    esac && \
-    wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.gz && \
-    echo "b516653575541cf221b99cf3f8be9b6821f6dbcfc125675c85f35090f824f00e wal2json_2_5.tar.gz" | sha256sum --check && \
-    mkdir wal2json-src && cd wal2json-src && tar xzf ../wal2json_2_5.tar.gz --strip-components=1 -C . && \
+RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_6.tar.gz -O wal2json.tar.gz && \
+    echo "18b4bdec28c74a8fc98a11c72de38378a760327ef8e5e42e975b0029eb96ba0d wal2json.tar.gz" | sha256sum --check && \
+    mkdir wal2json-src && cd wal2json-src && tar xzf ../wal2json.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install

@@ -962,12 +942,11 @@ FROM build-deps AS pg-ivm-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

+# pg_ivm v1.9 supports v17
+# last release v1.9 - Jul 31
 ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN case "${PG_VERSION}" in "v17") \
-    echo "We'll need to update pg_ivm to 1.9+ for pg17 support" && exit 0;; \
-    esac && \
-    wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_ivm.tar.gz && \
-    echo "ebfde04f99203c7be4b0e873f91104090e2e83e5429c32ac242d00f334224d5e pg_ivm.tar.gz" | sha256sum --check && \
+RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.9.tar.gz -O pg_ivm.tar.gz && \
+    echo "59e15722939f274650abf637f315dd723c87073496ca77236b044cb205270d8b pg_ivm.tar.gz" | sha256sum --check && \
    mkdir pg_ivm-src && cd pg_ivm-src && tar xzf ../pg_ivm.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
@@ -983,12 +962,11 @@ FROM build-deps AS pg-partman-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

+# should support v17 https://github.com/pgpartman/pg_partman/discussions/693
+# last release 5.1.0  Apr 2, 2024
 ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN case "${PG_VERSION}" in "v17") \
-    echo "pg_partman doesn't support PG17 yet" && exit 0;; \
-    esac && \
-    wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.0.1.tar.gz -O pg_partman.tar.gz && \
-    echo "75b541733a9659a6c90dbd40fccb904a630a32880a6e3044d0c4c5f4c8a65525 pg_partman.tar.gz" | sha256sum --check && \
+RUN wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.1.0.tar.gz -O pg_partman.tar.gz && \
+    echo "3e3a27d7ff827295d5c55ef72f07a49062d6204b3cb0b9a048645d6db9f3cb9f pg_partman.tar.gz" | sha256sum --check && \
    mkdir pg_partman-src && cd pg_partman-src && tar xzf ../pg_partman.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
@@ -1125,20 +1103,6 @@ RUN set -e \
    && make -j $(nproc) dist_man_MANS= \
    && make install dist_man_MANS=

-#########################################################################################
-#
-# Compile the Neon-specific `local_proxy` binary
-#
-#########################################################################################
-FROM $REPOSITORY/$IMAGE:$TAG AS local_proxy
-ARG BUILD_TAG
-ENV BUILD_TAG=$BUILD_TAG
-
-USER nonroot
-# Copy entire project to get Cargo.* files with proper dependencies for the whole project
-COPY --chown=nonroot . .
-RUN mold -run cargo build --locked --profile release-line-debug-size-lto --bin local_proxy
-
 #########################################################################################
 #
 # Layers "postgres-exporter" and "sql-exporter"
@@ -1277,10 +1241,6 @@ COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-deb
 COPY --from=pgbouncer         /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer
 COPY --chmod=0666 --chown=postgres compute/etc/pgbouncer.ini /etc/pgbouncer.ini

-# local_proxy and its config
-COPY --from=local_proxy --chown=postgres /home/nonroot/target/release-line-debug-size-lto/local_proxy /usr/local/bin/local_proxy
-RUN mkdir -p /etc/local_proxy && chown postgres:postgres /etc/local_proxy
-
 # Metrics exporter binaries and  configuration files
 COPY --from=postgres-exporter /bin/postgres_exporter /bin/postgres_exporter
 COPY --from=sql-exporter      /bin/sql_exporter      /bin/sql_exporter
--- a/compute/vm-image-spec.yaml
+++ b/compute/vm-image-spec.yaml
@@ -19,10 +19,6 @@ commands:
    user: postgres
    sysvInitAction: respawn
    shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini'
-  - name: local_proxy
-    user: postgres
-    sysvInitAction: respawn
-    shell: '/usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
  - name: postgres-exporter
    user: nobody
    sysvInitAction: respawn
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -11,13 +11,12 @@ testing = []

 [dependencies]
 anyhow.workspace = true
-camino.workspace = true
 chrono.workspace = true
 cfg-if.workspace = true
 clap.workspace = true
 flate2.workspace = true
 futures.workspace = true
-hyper0 = { workspace = true, features = ["full"] }
+hyper = { workspace = true, features = ["full"] }
 nix.workspace = true
 notify.workspace = true
 num_cpus.workspace = true
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -402,7 +402,8 @@ fn start_postgres(
 ) -> Result<(Option<PostgresHandle>, StartPostgresResult)> {
    // We got all we need, update the state.
    let mut state = compute.state.lock().unwrap();
-    state.set_status(ComputeStatus::Init, &compute.state_changed);
+    state.status = ComputeStatus::Init;
+    compute.state_changed.notify_all();

    info!(
        "running compute with features: {:?}",
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -34,7 +34,6 @@ use nix::sys::signal::{kill, Signal};
 use remote_storage::{DownloadError, RemotePath};

 use crate::checker::create_availability_check_data;
-use crate::local_proxy;
 use crate::logger::inlinify;
 use crate::pg_helpers::*;
 use crate::spec::*;
@@ -109,18 +108,6 @@ impl ComputeState {
            metrics: ComputeMetrics::default(),
        }
    }
-
-    pub fn set_status(&mut self, status: ComputeStatus, state_changed: &Condvar) {
-        let prev = self.status;
-        info!("Changing compute status from {} to {}", prev, status);
-        self.status = status;
-        state_changed.notify_all();
-    }
-
-    pub fn set_failed_status(&mut self, err: anyhow::Error, state_changed: &Condvar) {
-        self.error = Some(format!("{err:?}"));
-        self.set_status(ComputeStatus::Failed, state_changed);
-    }
 }

 impl Default for ComputeState {
@@ -315,12 +302,15 @@ impl ComputeNode {

    pub fn set_status(&self, status: ComputeStatus) {
        let mut state = self.state.lock().unwrap();
-        state.set_status(status, &self.state_changed);
+        state.status = status;
+        self.state_changed.notify_all();
    }

    pub fn set_failed_status(&self, err: anyhow::Error) {
        let mut state = self.state.lock().unwrap();
-        state.set_failed_status(err, &self.state_changed);
+        state.error = Some(format!("{err:?}"));
+        state.status = ComputeStatus::Failed;
+        self.state_changed.notify_all();
    }

    pub fn get_status(&self) -> ComputeStatus {
@@ -896,11 +886,6 @@ impl ComputeNode {
        // 'Close' connection
        drop(client);

-        if let Some(ref local_proxy) = spec.local_proxy_config {
-            info!("configuring local_proxy");
-            local_proxy::configure(local_proxy).context("apply_config local_proxy")?;
-        }
-
        // Run migrations separately to not hold up cold starts
        thread::spawn(move || {
            let mut connstr = connstr.clone();
@@ -951,19 +936,6 @@ impl ComputeNode {
            });
        }

-        if let Some(ref local_proxy) = spec.local_proxy_config {
-            info!("configuring local_proxy");
-
-            // Spawn a thread to do the configuration,
-            // so that we don't block the main thread that starts Postgres.
-            let local_proxy = local_proxy.clone();
-            let _handle = Some(thread::spawn(move || {
-                if let Err(err) = local_proxy::configure(&local_proxy) {
-                    error!("error while configuring local_proxy: {err:?}");
-                }
-            }));
-        }
-
        // Write new config
        let pgdata_path = Path::new(&self.pgdata);
        let postgresql_conf_path = pgdata_path.join("postgresql.conf");
@@ -1051,19 +1023,6 @@ impl ComputeNode {
            });
        }

-        if let Some(local_proxy) = &pspec.spec.local_proxy_config {
-            info!("configuring local_proxy");
-
-            // Spawn a thread to do the configuration,
-            // so that we don't block the main thread that starts Postgres.
-            let local_proxy = local_proxy.clone();
-            let _handle = thread::spawn(move || {
-                if let Err(err) = local_proxy::configure(&local_proxy) {
-                    error!("error while configuring local_proxy: {err:?}");
-                }
-            });
-        }
-
        info!(
            "start_compute spec.remote_extensions {:?}",
            pspec.spec.remote_extensions
@@ -1484,28 +1443,6 @@ LIMIT 100",
            info!("Pageserver config changed");
        }
    }
-
-    // Gather info about installed extensions
-    pub fn get_installed_extensions(&self) -> Result<()> {
-        let connstr = self.connstr.clone();
-
-        let rt = tokio::runtime::Builder::new_current_thread()
-            .enable_all()
-            .build()
-            .expect("failed to create runtime");
-        let result = rt
-            .block_on(crate::installed_extensions::get_installed_extensions(
-                connstr,
-            ))
-            .expect("failed to get installed extensions");
-
-        info!(
-            "{}",
-            serde_json::to_string(&result).expect("failed to serialize extensions list")
-        );
-
-        Ok(())
-    }
 }

 pub fn forward_termination_signal() {
--- a/compute_tools/src/configurator.rs
+++ b/compute_tools/src/configurator.rs
@@ -24,7 +24,8 @@ fn configurator_main_loop(compute: &Arc<ComputeNode>) {
        // Re-check the status after waking up
        if state.status == ComputeStatus::ConfigurationPending {
            info!("got configuration request");
-            state.set_status(ComputeStatus::Configuration, &compute.state_changed);
+            state.status = ComputeStatus::Configuration;
+            compute.state_changed.notify_all();
            drop(state);

            let mut new_status = ComputeStatus::Failed;
--- a/compute_tools/src/http/api.rs
+++ b/compute_tools/src/http/api.rs
@@ -165,32 +165,6 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
            }
        }

-        // get the list of installed extensions
-        // currently only used in python tests
-        // TODO: call it from cplane
-        (&Method::GET, "/installed_extensions") => {
-            info!("serving /installed_extensions GET request");
-            let status = compute.get_status();
-            if status != ComputeStatus::Running {
-                let msg = format!(
-                    "invalid compute status for extensions request: {:?}",
-                    status
-                );
-                error!(msg);
-                return Response::new(Body::from(msg));
-            }
-
-            let connstr = compute.connstr.clone();
-            let res = crate::installed_extensions::get_installed_extensions(connstr).await;
-            match res {
-                Ok(res) => render_json(Body::from(serde_json::to_string(&res).unwrap())),
-                Err(e) => render_json_error(
-                    &format!("could not get list of installed extensions: {}", e),
-                    StatusCode::INTERNAL_SERVER_ERROR,
-                ),
-            }
-        }
-
        // download extension files from remote extension storage on demand
        (&Method::POST, route) if route.starts_with("/extension_server/") => {
            info!("serving {:?} POST request", route);
@@ -314,7 +288,8 @@ async fn handle_configure_request(
                return Err((msg, StatusCode::PRECONDITION_FAILED));
            }
            state.pspec = Some(parsed_spec);
-            state.set_status(ComputeStatus::ConfigurationPending, &compute.state_changed);
+            state.status = ComputeStatus::ConfigurationPending;
+            compute.state_changed.notify_all();
            drop(state);
            info!("set new spec and notified waiters");
        }
@@ -387,15 +362,15 @@ async fn handle_terminate_request(compute: &Arc<ComputeNode>) -> Result<(), (Str
        }
        if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
            let msg = format!(
-                "invalid compute status for termination request: {}",
-                state.status
+                "invalid compute status for termination request: {:?}",
+                state.status.clone()
            );
            return Err((msg, StatusCode::PRECONDITION_FAILED));
        }
-        state.set_status(ComputeStatus::TerminationPending, &compute.state_changed);
+        state.status = ComputeStatus::TerminationPending;
+        compute.state_changed.notify_all();
        drop(state);
    }
-
    forward_termination_signal();
    info!("sent signal and notified waiters");

@@ -409,8 +384,7 @@ async fn handle_terminate_request(compute: &Arc<ComputeNode>) -> Result<(), (Str
        while state.status != ComputeStatus::Terminated {
            state = c.state_changed.wait(state).unwrap();
            info!(
-                "waiting for compute to become {}, current status: {:?}",
-                ComputeStatus::Terminated,
+                "waiting for compute to become Terminated, current status: {:?}",
                state.status
            );
        }
--- a/compute_tools/src/http/openapi_spec.yaml
+++ b/compute_tools/src/http/openapi_spec.yaml
@@ -53,20 +53,6 @@ paths:
              schema:
                $ref: "#/components/schemas/ComputeInsights"

-  /installed_extensions:
-    get:
-      tags:
-      - Info
-      summary: Get installed extensions.
-      description: ""
-      operationId: getInstalledExtensions
-      responses:
-        200:
-          description: List of installed extensions
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/InstalledExtensions"
  /info:
    get:
      tags:
@@ -409,24 +395,6 @@ components:
        - configuration
      example: running

-    InstalledExtensions:
-      type: object
-      properties:
-        extensions:
-          description: Contains list of installed extensions.
-          type: array
-          items:
-            type: object
-            properties:
-              extname:
-                type: string
-              versions:
-                type: array
-                items:
-                  type: string
-              n_databases:
-                type: integer
-
    #
    # Errors
    #
--- a/compute_tools/src/installed_extensions.rs
+++ b/compute_tools/src/installed_extensions.rs
@@ -1,80 +0,0 @@
-use compute_api::responses::{InstalledExtension, InstalledExtensions};
-use std::collections::HashMap;
-use std::collections::HashSet;
-use url::Url;
-
-use anyhow::Result;
-use postgres::{Client, NoTls};
-use tokio::task;
-
-/// We don't reuse get_existing_dbs() just for code clarity
-/// and to make database listing query here more explicit.
-///
-/// Limit the number of databases to 500 to avoid excessive load.
-fn list_dbs(client: &mut Client) -> Result<Vec<String>> {
-    // `pg_database.datconnlimit = -2` means that the database is in the
-    // invalid state
-    let databases = client
-        .query(
-            "SELECT datname FROM pg_catalog.pg_database
-                WHERE datallowconn
-                AND datconnlimit <> - 2
-                LIMIT 500",
-            &[],
-        )?
-        .iter()
-        .map(|row| {
-            let db: String = row.get("datname");
-            db
-        })
-        .collect();
-
-    Ok(databases)
-}
-
-/// Connect to every database (see list_dbs above) and get the list of installed extensions.
-/// Same extension can be installed in multiple databases with different versions,
-/// we only keep the highest and lowest version across all databases.
-pub async fn get_installed_extensions(connstr: Url) -> Result<InstalledExtensions> {
-    let mut connstr = connstr.clone();
-
-    task::spawn_blocking(move || {
-        let mut client = Client::connect(connstr.as_str(), NoTls)?;
-        let databases: Vec<String> = list_dbs(&mut client)?;
-
-        let mut extensions_map: HashMap<String, InstalledExtension> = HashMap::new();
-        for db in databases.iter() {
-            connstr.set_path(db);
-            let mut db_client = Client::connect(connstr.as_str(), NoTls)?;
-            let extensions: Vec<(String, String)> = db_client
-                .query(
-                    "SELECT extname, extversion FROM pg_catalog.pg_extension;",
-                    &[],
-                )?
-                .iter()
-                .map(|row| (row.get("extname"), row.get("extversion")))
-                .collect();
-
-            for (extname, v) in extensions.iter() {
-                let version = v.to_string();
-                extensions_map
-                    .entry(extname.to_string())
-                    .and_modify(|e| {
-                        e.versions.insert(version.clone());
-                        // count the number of databases where the extension is installed
-                        e.n_databases += 1;
-                    })
-                    .or_insert(InstalledExtension {
-                        extname: extname.to_string(),
-                        versions: HashSet::from([version.clone()]),
-                        n_databases: 1,
-                    });
-            }
-        }
-
-        Ok(InstalledExtensions {
-            extensions: extensions_map.values().cloned().collect(),
-        })
-    })
-    .await?
-}
--- a/compute_tools/src/lib.rs
+++ b/compute_tools/src/lib.rs
@@ -2,9 +2,6 @@
 //! configuration.
 #![deny(unsafe_code)]
 #![deny(clippy::undocumented_unsafe_blocks)]
-
-extern crate hyper0 as hyper;
-
 pub mod checker;
 pub mod config;
 pub mod configurator;
@@ -15,8 +12,6 @@ pub mod catalog;
 pub mod compute;
 pub mod disk_quota;
 pub mod extension_server;
-pub mod installed_extensions;
-pub mod local_proxy;
 pub mod lsn_lease;
 mod migration;
 pub mod monitor;
--- a/compute_tools/src/local_proxy.rs
+++ b/compute_tools/src/local_proxy.rs
@@ -1,56 +0,0 @@
-//! Local Proxy is a feature of our BaaS Neon Authorize project.
-//!
-//! Local Proxy validates JWTs and manages the pg_session_jwt extension.
-//! It also maintains a connection pool to postgres.
-
-use anyhow::{Context, Result};
-use camino::Utf8Path;
-use compute_api::spec::LocalProxySpec;
-use nix::sys::signal::Signal;
-use utils::pid_file::{self, PidFileRead};
-
-pub fn configure(local_proxy: &LocalProxySpec) -> Result<()> {
-    write_local_proxy_conf("/etc/local_proxy/config.json".as_ref(), local_proxy)?;
-    notify_local_proxy("/etc/local_proxy/pid".as_ref())?;
-
-    Ok(())
-}
-
-/// Create or completely rewrite configuration file specified by `path`
-fn write_local_proxy_conf(path: &Utf8Path, local_proxy: &LocalProxySpec) -> Result<()> {
-    let config =
-        serde_json::to_string_pretty(local_proxy).context("serializing LocalProxySpec to json")?;
-    std::fs::write(path, config).with_context(|| format!("writing {path}"))?;
-
-    Ok(())
-}
-
-/// Notify local proxy about a new config file.
-fn notify_local_proxy(path: &Utf8Path) -> Result<()> {
-    match pid_file::read(path)? {
-        // if the file doesn't exist, or isn't locked, local_proxy isn't running
-        // and will naturally pick up our config later
-        PidFileRead::NotExist | PidFileRead::NotHeldByAnyProcess(_) => {}
-        PidFileRead::LockedByOtherProcess(pid) => {
-            // From the pid_file docs:
-            //
-            // > 1. The other process might exit at any time, turning the given PID stale.
-            // > 2. There is a small window in which `claim_for_current_process` has already
-            // >    locked the file but not yet updates its contents. [`read`] will return
-            // >    this variant here, but with the old file contents, i.e., a stale PID.
-            // >
-            // > The kernel is free to recycle PID once it has been `wait(2)`ed upon by
-            // > its creator. Thus, acting upon a stale PID, e.g., by issuing a `kill`
-            // > system call on it, bears the risk of killing an unrelated process.
-            // > This is an inherent limitation of using pidfiles.
-            // > The only race-free solution is to have a supervisor-process with a lifetime
-            // > that exceeds that of all of its child-processes (e.g., `runit`, `supervisord`).
-            //
-            // This is an ok risk as we only send a SIGHUP which likely won't actually
-            // kill the process, only reload config.
-            nix::sys::signal::kill(pid, Signal::SIGHUP).context("sending signal to local_proxy")?;
-        }
-    }
-
-    Ok(())
-}
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -1,4 +1,3 @@
-use std::collections::HashSet;
 use std::fs::File;
 use std::path::Path;
 use std::str::FromStr;
@@ -190,15 +189,6 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
    let mut xact = client.transaction()?;
    let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;

-    let mut jwks_roles = HashSet::new();
-    if let Some(local_proxy) = &spec.local_proxy_config {
-        for jwks_setting in local_proxy.jwks.iter().flatten() {
-            for role_name in &jwks_setting.role_names {
-                jwks_roles.insert(role_name.clone());
-            }
-        }
-    }
-
    // Print a list of existing Postgres roles (only in debug mode)
    if span_enabled!(Level::INFO) {
        let mut vec = Vec::new();
@@ -318,9 +308,6 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
                    "CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser",
                    name.pg_quote()
                );
-                if jwks_roles.contains(name.as_str()) {
-                    query = format!("CREATE ROLE {}", name.pg_quote());
-                }
                info!("running role create query: '{}'", &query);
                query.push_str(&role.to_pg_options());
                xact.execute(query.as_str(), &[])?;
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -14,7 +14,7 @@ humantime.workspace = true
 nix.workspace = true
 once_cell.workspace = true
 humantime-serde.workspace = true
-hyper0.workspace = true
+hyper.workspace = true
 regex.workspace = true
 reqwest = { workspace = true, features = ["blocking", "json"] }
 scopeguard.workspace = true
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -599,7 +599,6 @@ impl Endpoint {
            remote_extensions,
            pgbouncer_settings: None,
            shard_stripe_size: Some(shard_stripe_size),
-            local_proxy_config: None,
        };
        let spec_path = self.endpoint_path().join("spec.json");
        std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;
--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -3,7 +3,7 @@ use crate::{
    local_env::{LocalEnv, NeonStorageControllerConf},
 };
 use camino::{Utf8Path, Utf8PathBuf};
-use hyper0::Uri;
+use hyper::Uri;
 use nix::unistd::Pid;
 use pageserver_api::{
    controller_api::{
--- a/docs/docker.md
+++ b/docs/docker.md
@@ -5,7 +5,7 @@
 Currently we build two main images:

 - [neondatabase/neon](https://hub.docker.com/repository/docker/neondatabase/neon) — image with pre-built `pageserver`, `safekeeper` and `proxy` binaries and all the required runtime dependencies. Built from [/Dockerfile](/Dockerfile).
- [neondatabase/compute-node-v16](https://hub.docker.com/repository/docker/neondatabase/compute-node-v16) — compute node image with pre-built Postgres binaries from [neondatabase/postgres](https://github.com/neondatabase/postgres). Similar images exist for v15 and v14. Built from [/compute-node/Dockerfile](/compute/Dockerfile.compute-node).
+- [neondatabase/compute-node-v16](https://hub.docker.com/repository/docker/neondatabase/compute-node-v16) — compute node image with pre-built Postgres binaries from [neondatabase/postgres](https://github.com/neondatabase/postgres). Similar images exist for v15 and v14.

 And additional intermediate image:

--- a/libs/compute_api/src/responses.rs
+++ b/libs/compute_api/src/responses.rs
@@ -1,8 +1,5 @@
 //! Structs representing the JSON formats used in the compute_ctl's HTTP API.

-use std::collections::HashSet;
-use std::fmt::Display;
-
 use chrono::{DateTime, Utc};
 use serde::{Deserialize, Serialize, Serializer};

@@ -61,21 +58,6 @@ pub enum ComputeStatus {
    Terminated,
 }

-impl Display for ComputeStatus {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            ComputeStatus::Empty => f.write_str("empty"),
-            ComputeStatus::ConfigurationPending => f.write_str("configuration-pending"),
-            ComputeStatus::Init => f.write_str("init"),
-            ComputeStatus::Running => f.write_str("running"),
-            ComputeStatus::Configuration => f.write_str("configuration"),
-            ComputeStatus::Failed => f.write_str("failed"),
-            ComputeStatus::TerminationPending => f.write_str("termination-pending"),
-            ComputeStatus::Terminated => f.write_str("terminated"),
-        }
-    }
-}
-
 fn rfc3339_serialize<S>(x: &Option<DateTime<Utc>>, s: S) -> Result<S::Ok, S::Error>
 where
    S: Serializer,
@@ -156,15 +138,3 @@ pub enum ControlPlaneComputeStatus {
    // should be able to start with provided spec.
    Attached,
 }
-
-#[derive(Clone, Debug, Default, Serialize)]
-pub struct InstalledExtension {
-    pub extname: String,
-    pub versions: HashSet<String>,
-    pub n_databases: u32, // Number of databases using this extension
-}
-
-#[derive(Clone, Debug, Default, Serialize)]
-pub struct InstalledExtensions {
-    pub extensions: Vec<InstalledExtension>,
-}
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -106,10 +106,6 @@ pub struct ComputeSpec {
    // Stripe size for pageserver sharding, in pages
    #[serde(default)]
    pub shard_stripe_size: Option<usize>,
-
-    /// Local Proxy configuration used for JWT authentication
-    #[serde(default)]
-    pub local_proxy_config: Option<LocalProxySpec>,
 }

 /// Feature flag to signal `compute_ctl` to enable certain experimental functionality.
@@ -282,13 +278,11 @@ pub struct GenericOption {
 /// declare a `trait` on it.
 pub type GenericOptions = Option<Vec<GenericOption>>;

-/// Configured the local_proxy application with the relevant JWKS and roles it should
+/// Configured the local-proxy application with the relevant JWKS and roles it should
 /// use for authorizing connect requests using JWT.
 #[derive(Clone, Debug, Deserialize, Serialize)]
 pub struct LocalProxySpec {
-    #[serde(default)]
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub jwks: Option<Vec<JwksSettings>>,
+    pub jwks: Vec<JwksSettings>,
 }

 #[derive(Clone, Debug, Deserialize, Serialize)]
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -104,7 +104,8 @@ pub struct ConfigToml {
    pub image_compression: ImageCompressionAlgorithm,
    pub ephemeral_bytes_per_memory_kb: usize,
    pub l0_flush: Option<crate::models::L0FlushConfig>,
-    pub virtual_file_io_mode: Option<crate::models::virtual_file::IoMode>,
+    pub virtual_file_direct_io: crate::models::virtual_file::DirectIoMode,
+    pub io_buffer_alignment: usize,
 }

 #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
@@ -295,14 +296,7 @@ pub mod defaults {

    pub const DEFAULT_INGEST_BATCH_SIZE: u64 = 100;

-    /// Soft limit for the maximum size of a vectored read.
-    ///
-    /// This is determined by the largest NeonWalRecord that can exist (minus dbdir and reldir keys
-    /// which are bounded by the blob io limits only). As of this writing, that is a `NeonWalRecord::ClogSetCommitted` record,
-    /// with 32k xids. That's the max number of XIDS on a single CLOG page. The size of such a record
-    /// is `sizeof(Transactionid) * 32768 + (some fixed overhead from 'timestamp`, the Vec length and whatever extra serde serialization adds)`.
-    /// That is, slightly above 128 kB.
-    pub const DEFAULT_MAX_VECTORED_READ_BYTES: usize = 130 * 1024; // 130 KiB
+    pub const DEFAULT_MAX_VECTORED_READ_BYTES: usize = 128 * 1024; // 128 KiB

    pub const DEFAULT_IMAGE_COMPRESSION: ImageCompressionAlgorithm =
        ImageCompressionAlgorithm::Zstd { level: Some(1) };
@@ -387,7 +381,10 @@ impl Default for ConfigToml {
            image_compression: (DEFAULT_IMAGE_COMPRESSION),
            ephemeral_bytes_per_memory_kb: (DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
            l0_flush: None,
-            virtual_file_io_mode: None,
+            virtual_file_direct_io: crate::models::virtual_file::DirectIoMode::default(),
+
+            io_buffer_alignment: DEFAULT_IO_BUFFER_ALIGNMENT,
+
            tenant_config: TenantConfigToml::default(),
        }
    }
--- a/libs/pageserver_api/src/key.rs
+++ b/libs/pageserver_api/src/key.rs
@@ -748,16 +748,6 @@ impl Key {
        self.field1 == 0x00 && self.field4 != 0 && self.field6 != 0xffffffff
    }

-    #[inline(always)]
-    pub fn is_rel_dir_key(&self) -> bool {
-        self.field1 == 0x00
-            && self.field2 != 0
-            && self.field3 != 0
-            && self.field4 == 0
-            && self.field5 == 0
-            && self.field6 == 1
-    }
-
    /// Guaranteed to return `Ok()` if [`Self::is_rel_block_key`] returns `true` for `key`.
    #[inline(always)]
    pub fn to_rel_block(self) -> anyhow::Result<(RelTag, BlockNumber)> {
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -972,6 +972,8 @@ pub struct TopTenantShardsResponse {
 }

 pub mod virtual_file {
+    use std::path::PathBuf;
+
    #[derive(
        Copy,
        Clone,
@@ -992,45 +994,50 @@ pub mod virtual_file {
    }

    /// Direct IO modes for a pageserver.
-    #[derive(
-        Copy,
-        Clone,
-        PartialEq,
-        Eq,
-        Hash,
-        strum_macros::EnumString,
-        strum_macros::Display,
-        serde_with::DeserializeFromStr,
-        serde_with::SerializeDisplay,
-        Debug,
-    )]
-    #[strum(serialize_all = "kebab-case")]
-    #[repr(u8)]
-    pub enum IoMode {
-        /// Uses buffered IO.
-        Buffered,
-        /// Uses direct IO, error out if the operation fails.
-        #[cfg(target_os = "linux")]
-        Direct,
+    #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize, Default)]
+    #[serde(tag = "mode", rename_all = "kebab-case", deny_unknown_fields)]
+    pub enum DirectIoMode {
+        /// Direct IO disabled (uses usual buffered IO).
+        #[default]
+        Disabled,
+        /// Direct IO disabled (performs checks and perf simulations).
+        Evaluate {
+            /// Alignment check level
+            alignment_check: DirectIoAlignmentCheckLevel,
+            /// Latency padded for performance simulation.
+            latency_padding: DirectIoLatencyPadding,
+        },
+        /// Direct IO enabled.
+        Enabled {
+            /// Actions to perform on alignment error.
+            on_alignment_error: DirectIoOnAlignmentErrorAction,
+        },
    }

-    impl IoMode {
-        pub const fn preferred() -> Self {
-            Self::Buffered
-        }
+    #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize, Default)]
+    #[serde(rename_all = "kebab-case")]
+    pub enum DirectIoAlignmentCheckLevel {
+        #[default]
+        Error,
+        Log,
+        None,
    }

-    impl TryFrom<u8> for IoMode {
-        type Error = u8;
+    #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize, Default)]
+    #[serde(rename_all = "kebab-case")]
+    pub enum DirectIoOnAlignmentErrorAction {
+        Error,
+        #[default]
+        FallbackToBuffered,
+    }

-        fn try_from(value: u8) -> Result<Self, Self::Error> {
-            Ok(match value {
-                v if v == (IoMode::Buffered as u8) => IoMode::Buffered,
-                #[cfg(target_os = "linux")]
-                v if v == (IoMode::Direct as u8) => IoMode::Direct,
-                x => return Err(x),
-            })
-        }
+    #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize, Default)]
+    #[serde(tag = "type", rename_all = "kebab-case")]
+    pub enum DirectIoLatencyPadding {
+        /// Pad virtual file operations with IO to a fake file.
+        FakeFileRW { path: PathBuf },
+        #[default]
+        None,
    }
 }

--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -16,7 +16,7 @@ aws-sdk-s3.workspace = true
 bytes.workspace = true
 camino = { workspace = true, features = ["serde1"] }
 humantime-serde.workspace = true
-hyper0 = { workspace = true, features = ["stream"] }
+hyper = { workspace = true, features = ["stream"] }
 futures.workspace = true
 serde.workspace = true
 serde_json.workspace = true
--- a/libs/remote_storage/src/azure_blob.rs
+++ b/libs/remote_storage/src/azure_blob.rs
@@ -14,7 +14,7 @@ use std::time::SystemTime;

 use super::REMOTE_STORAGE_PREFIX_SEPARATOR;
 use anyhow::Result;
-use azure_core::request_options::{IfMatchCondition, MaxResults, Metadata, Range};
+use azure_core::request_options::{MaxResults, Metadata, Range};
 use azure_core::{Continuable, RetryOptions};
 use azure_identity::DefaultAzureCredential;
 use azure_storage::StorageCredentials;
@@ -33,10 +33,10 @@ use tracing::debug;
 use utils::backoff;

 use crate::metrics::{start_measuring_requests, AttemptOutcome, RequestKind};
+use crate::ListingObject;
 use crate::{
-    config::AzureConfig, error::Cancelled, ConcurrencyLimiter, Download, DownloadError,
-    DownloadOpts, Listing, ListingMode, ListingObject, RemotePath, RemoteStorage, StorageMetadata,
-    TimeTravelError, TimeoutOrCancel,
+    config::AzureConfig, error::Cancelled, ConcurrencyLimiter, Download, DownloadError, Listing,
+    ListingMode, RemotePath, RemoteStorage, StorageMetadata, TimeTravelError, TimeoutOrCancel,
 };

 pub struct AzureBlobStorage {
@@ -259,7 +259,6 @@ fn to_download_error(error: azure_core::Error) -> DownloadError {
    if let Some(http_err) = error.as_http_error() {
        match http_err.status() {
            StatusCode::NotFound => DownloadError::NotFound,
-            StatusCode::NotModified => DownloadError::Unmodified,
            StatusCode::BadRequest => DownloadError::BadInput(anyhow::Error::new(error)),
            _ => DownloadError::Other(anyhow::Error::new(error)),
        }
@@ -485,23 +484,32 @@ impl RemoteStorage for AzureBlobStorage {
    async fn download(
        &self,
        from: &RemotePath,
-        opts: &DownloadOpts,
+        cancel: &CancellationToken,
+    ) -> Result<Download, DownloadError> {
+        let blob_client = self.client.blob_client(self.relative_path_to_name(from));
+
+        let builder = blob_client.get();
+
+        self.download_for_builder(builder, cancel).await
+    }
+
+    async fn download_byte_range(
+        &self,
+        from: &RemotePath,
+        start_inclusive: u64,
+        end_exclusive: Option<u64>,
        cancel: &CancellationToken,
    ) -> Result<Download, DownloadError> {
        let blob_client = self.client.blob_client(self.relative_path_to_name(from));

        let mut builder = blob_client.get();

-        if let Some(ref etag) = opts.etag {
-            builder = builder.if_match(IfMatchCondition::NotMatch(etag.to_string()))
-        }
-
-        if let Some((start, end)) = opts.byte_range() {
-            builder = builder.range(match end {
-                Some(end) => Range::Range(start..end),
-                None => Range::RangeFrom(start..),
-            });
-        }
+        let range: Range = if let Some(end_exclusive) = end_exclusive {
+            (start_inclusive..end_exclusive).into()
+        } else {
+            (start_inclusive..).into()
+        };
+        builder = builder.range(range);

        self.download_for_builder(builder, cancel).await
    }
--- a/libs/remote_storage/src/error.rs
+++ b/libs/remote_storage/src/error.rs
@@ -5,8 +5,6 @@ pub enum DownloadError {
    BadInput(anyhow::Error),
    /// The file was not found in the remote storage.
    NotFound,
-    /// The caller provided an ETag, and the file was not modified.
-    Unmodified,
    /// A cancellation token aborted the download, typically during
    /// tenant detach or process shutdown.
    Cancelled,
@@ -26,7 +24,6 @@ impl std::fmt::Display for DownloadError {
                write!(f, "Failed to download a remote file due to user input: {e}")
            }
            DownloadError::NotFound => write!(f, "No file found for the remote object id given"),
-            DownloadError::Unmodified => write!(f, "File was not modified"),
            DownloadError::Cancelled => write!(f, "Cancelled, shutting down"),
            DownloadError::Timeout => write!(f, "timeout"),
            DownloadError::Other(e) => write!(f, "Failed to download a remote file: {e:?}"),
@@ -41,7 +38,7 @@ impl DownloadError {
    pub fn is_permanent(&self) -> bool {
        use DownloadError::*;
        match self {
-            BadInput(_) | NotFound | Unmodified | Cancelled => true,
+            BadInput(_) | NotFound | Cancelled => true,
            Timeout | Other(_) => false,
        }
    }
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -19,8 +19,7 @@ mod simulate_failures;
 mod support;

 use std::{
-    collections::HashMap, fmt::Debug, num::NonZeroU32, ops::Bound, pin::Pin, sync::Arc,
-    time::SystemTime,
+    collections::HashMap, fmt::Debug, num::NonZeroU32, pin::Pin, sync::Arc, time::SystemTime,
 };

 use anyhow::Context;
@@ -162,63 +161,6 @@ pub struct Listing {
    pub keys: Vec<ListingObject>,
 }

-/// Options for downloads. The default value is a plain GET.
-pub struct DownloadOpts {
-    /// If given, returns [`DownloadError::Unmodified`] if the object still has
-    /// the same ETag (using If-None-Match).
-    pub etag: Option<Etag>,
-    /// The start of the byte range to download, or unbounded.
-    pub byte_start: Bound<u64>,
-    /// The end of the byte range to download, or unbounded. Must be after the
-    /// start bound.
-    pub byte_end: Bound<u64>,
-}
-
-impl Default for DownloadOpts {
-    fn default() -> Self {
-        Self {
-            etag: Default::default(),
-            byte_start: Bound::Unbounded,
-            byte_end: Bound::Unbounded,
-        }
-    }
-}
-
-impl DownloadOpts {
-    /// Returns the byte range with inclusive start and exclusive end, or None
-    /// if unbounded.
-    pub fn byte_range(&self) -> Option<(u64, Option<u64>)> {
-        if self.byte_start == Bound::Unbounded && self.byte_end == Bound::Unbounded {
-            return None;
-        }
-        let start = match self.byte_start {
-            Bound::Excluded(i) => i + 1,
-            Bound::Included(i) => i,
-            Bound::Unbounded => 0,
-        };
-        let end = match self.byte_end {
-            Bound::Excluded(i) => Some(i),
-            Bound::Included(i) => Some(i + 1),
-            Bound::Unbounded => None,
-        };
-        if let Some(end) = end {
-            assert!(start < end, "range end {end} at or before start {start}");
-        }
-        Some((start, end))
-    }
-
-    /// Returns the byte range as an RFC 2616 Range header value with inclusive
-    /// bounds, or None if unbounded.
-    pub fn byte_range_header(&self) -> Option<String> {
-        self.byte_range()
-            .map(|(start, end)| (start, end.map(|end| end - 1))) // make end inclusive
-            .map(|(start, end)| match end {
-                Some(end) => format!("bytes={start}-{end}"),
-                None => format!("bytes={start}-"),
-            })
-    }
-}
-
 /// Storage (potentially remote) API to manage its state.
 /// This storage tries to be unaware of any layered repository context,
 /// providing basic CRUD operations for storage files.
@@ -303,7 +245,21 @@ pub trait RemoteStorage: Send + Sync + 'static {
    async fn download(
        &self,
        from: &RemotePath,
-        opts: &DownloadOpts,
+        cancel: &CancellationToken,
+    ) -> Result<Download, DownloadError>;
+
+    /// Streams a given byte range of the remote storage entry contents.
+    ///
+    /// The returned download stream will obey initial timeout and cancellation signal by erroring
+    /// on whichever happens first. Only one of the reasons will fail the stream, which is usually
+    /// enough for `tokio::io::copy_buf` usage. If needed the error can be filtered out.
+    ///
+    /// Returns the metadata, if any was stored with the file previously.
+    async fn download_byte_range(
+        &self,
+        from: &RemotePath,
+        start_inclusive: u64,
+        end_exclusive: Option<u64>,
        cancel: &CancellationToken,
    ) -> Result<Download, DownloadError>;

@@ -445,18 +401,43 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
        }
    }

-    /// See [`RemoteStorage::download`]
    pub async fn download(
        &self,
        from: &RemotePath,
-        opts: &DownloadOpts,
        cancel: &CancellationToken,
    ) -> Result<Download, DownloadError> {
        match self {
-            Self::LocalFs(s) => s.download(from, opts, cancel).await,
-            Self::AwsS3(s) => s.download(from, opts, cancel).await,
-            Self::AzureBlob(s) => s.download(from, opts, cancel).await,
-            Self::Unreliable(s) => s.download(from, opts, cancel).await,
+            Self::LocalFs(s) => s.download(from, cancel).await,
+            Self::AwsS3(s) => s.download(from, cancel).await,
+            Self::AzureBlob(s) => s.download(from, cancel).await,
+            Self::Unreliable(s) => s.download(from, cancel).await,
+        }
+    }
+
+    pub async fn download_byte_range(
+        &self,
+        from: &RemotePath,
+        start_inclusive: u64,
+        end_exclusive: Option<u64>,
+        cancel: &CancellationToken,
+    ) -> Result<Download, DownloadError> {
+        match self {
+            Self::LocalFs(s) => {
+                s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
+                    .await
+            }
+            Self::AwsS3(s) => {
+                s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
+                    .await
+            }
+            Self::AzureBlob(s) => {
+                s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
+                    .await
+            }
+            Self::Unreliable(s) => {
+                s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
+                    .await
+            }
        }
    }

@@ -581,6 +562,20 @@ impl GenericRemoteStorage {
            })
    }

+    /// Downloads the storage object into the `to_path` provided.
+    /// `byte_range` could be specified to dowload only a part of the file, if needed.
+    pub async fn download_storage_object(
+        &self,
+        byte_range: Option<(u64, Option<u64>)>,
+        from: &RemotePath,
+        cancel: &CancellationToken,
+    ) -> Result<Download, DownloadError> {
+        match byte_range {
+            Some((start, end)) => self.download_byte_range(from, start, end, cancel).await,
+            None => self.download(from, cancel).await,
+        }
+    }
+
    /// The name of the bucket/container/etc.
    pub fn bucket_name(&self) -> Option<&str> {
        match self {
@@ -654,76 +649,6 @@ impl ConcurrencyLimiter {
 mod tests {
    use super::*;

-    /// DownloadOpts::byte_range() should generate (inclusive, exclusive) ranges
-    /// with optional end bound, or None when unbounded.
-    #[test]
-    fn download_opts_byte_range() {
-        // Consider using test_case or a similar table-driven test framework.
-        let cases = [
-            // (byte_start, byte_end, expected)
-            (Bound::Unbounded, Bound::Unbounded, None),
-            (Bound::Unbounded, Bound::Included(7), Some((0, Some(8)))),
-            (Bound::Unbounded, Bound::Excluded(7), Some((0, Some(7)))),
-            (Bound::Included(3), Bound::Unbounded, Some((3, None))),
-            (Bound::Included(3), Bound::Included(7), Some((3, Some(8)))),
-            (Bound::Included(3), Bound::Excluded(7), Some((3, Some(7)))),
-            (Bound::Excluded(3), Bound::Unbounded, Some((4, None))),
-            (Bound::Excluded(3), Bound::Included(7), Some((4, Some(8)))),
-            (Bound::Excluded(3), Bound::Excluded(7), Some((4, Some(7)))),
-            // 1-sized ranges are fine, 0 aren't and will panic (separate test).
-            (Bound::Included(3), Bound::Included(3), Some((3, Some(4)))),
-            (Bound::Included(3), Bound::Excluded(4), Some((3, Some(4)))),
-        ];
-
-        for (byte_start, byte_end, expect) in cases {
-            let opts = DownloadOpts {
-                byte_start,
-                byte_end,
-                ..Default::default()
-            };
-            let result = opts.byte_range();
-            assert_eq!(
-                result, expect,
-                "byte_start={byte_start:?} byte_end={byte_end:?}"
-            );
-
-            // Check generated HTTP header, which uses an inclusive range.
-            let expect_header = expect.map(|(start, end)| match end {
-                Some(end) => format!("bytes={start}-{}", end - 1), // inclusive end
-                None => format!("bytes={start}-"),
-            });
-            assert_eq!(
-                opts.byte_range_header(),
-                expect_header,
-                "byte_start={byte_start:?} byte_end={byte_end:?}"
-            );
-        }
-    }
-
-    /// DownloadOpts::byte_range() zero-sized byte range should panic.
-    #[test]
-    #[should_panic]
-    fn download_opts_byte_range_zero() {
-        DownloadOpts {
-            byte_start: Bound::Included(3),
-            byte_end: Bound::Excluded(3),
-            ..Default::default()
-        }
-        .byte_range();
-    }
-
-    /// DownloadOpts::byte_range() negative byte range should panic.
-    #[test]
-    #[should_panic]
-    fn download_opts_byte_range_negative() {
-        DownloadOpts {
-            byte_start: Bound::Included(3),
-            byte_end: Bound::Included(2),
-            ..Default::default()
-        }
-        .byte_range();
-    }
-
    #[test]
    fn test_object_name() {
        let k = RemotePath::new(Utf8Path::new("a/b/c")).unwrap();
--- a/libs/remote_storage/src/local_fs.rs
+++ b/libs/remote_storage/src/local_fs.rs
@@ -23,8 +23,8 @@ use tokio_util::{io::ReaderStream, sync::CancellationToken};
 use utils::crashsafe::path_with_suffix_extension;

 use crate::{
-    Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject, RemotePath,
-    TimeTravelError, TimeoutOrCancel, REMOTE_STORAGE_PREFIX_SEPARATOR,
+    Download, DownloadError, Listing, ListingMode, ListingObject, RemotePath, TimeTravelError,
+    TimeoutOrCancel, REMOTE_STORAGE_PREFIX_SEPARATOR,
 };

 use super::{RemoteStorage, StorageMetadata};
@@ -494,41 +494,22 @@ impl RemoteStorage for LocalFs {
    async fn download(
        &self,
        from: &RemotePath,
-        opts: &DownloadOpts,
        cancel: &CancellationToken,
    ) -> Result<Download, DownloadError> {
        let target_path = from.with_base(&self.storage_root);

        let file_metadata = file_metadata(&target_path).await?;
-        let etag = mock_etag(&file_metadata);

-        if opts.etag.as_ref() == Some(&etag) {
-            return Err(DownloadError::Unmodified);
-        }
-
-        let mut file = fs::OpenOptions::new()
-            .read(true)
-            .open(&target_path)
-            .await
-            .with_context(|| {
-                format!("Failed to open source file {target_path:?} to use in the download")
-            })
-            .map_err(DownloadError::Other)?;
-
-        let mut take = file_metadata.len();
-        if let Some((start, end)) = opts.byte_range() {
-            if start > 0 {
-                file.seek(io::SeekFrom::Start(start))
-                    .await
-                    .context("Failed to seek to the range start in a local storage file")
-                    .map_err(DownloadError::Other)?;
-            }
-            if let Some(end) = end {
-                take = end - start;
-            }
-        }
-
-        let source = ReaderStream::new(file.take(take));
+        let source = ReaderStream::new(
+            fs::OpenOptions::new()
+                .read(true)
+                .open(&target_path)
+                .await
+                .with_context(|| {
+                    format!("Failed to open source file {target_path:?} to use in the download")
+                })
+                .map_err(DownloadError::Other)?,
+        );

        let metadata = self
            .read_storage_metadata(&target_path)
@@ -538,6 +519,69 @@ impl RemoteStorage for LocalFs {
        let cancel_or_timeout = crate::support::cancel_or_timeout(self.timeout, cancel.clone());
        let source = crate::support::DownloadStream::new(cancel_or_timeout, source);

+        let etag = mock_etag(&file_metadata);
+        Ok(Download {
+            metadata,
+            last_modified: file_metadata
+                .modified()
+                .map_err(|e| DownloadError::Other(anyhow::anyhow!(e).context("Reading mtime")))?,
+            etag,
+            download_stream: Box::pin(source),
+        })
+    }
+
+    async fn download_byte_range(
+        &self,
+        from: &RemotePath,
+        start_inclusive: u64,
+        end_exclusive: Option<u64>,
+        cancel: &CancellationToken,
+    ) -> Result<Download, DownloadError> {
+        if let Some(end_exclusive) = end_exclusive {
+            if end_exclusive <= start_inclusive {
+                return Err(DownloadError::Other(anyhow::anyhow!("Invalid range, start ({start_inclusive}) is not less than end_exclusive ({end_exclusive:?})")));
+            };
+            if start_inclusive == end_exclusive.saturating_sub(1) {
+                return Err(DownloadError::Other(anyhow::anyhow!("Invalid range, start ({start_inclusive}) and end_exclusive ({end_exclusive:?}) difference is zero bytes")));
+            }
+        }
+
+        let target_path = from.with_base(&self.storage_root);
+        let file_metadata = file_metadata(&target_path).await?;
+        let mut source = tokio::fs::OpenOptions::new()
+            .read(true)
+            .open(&target_path)
+            .await
+            .with_context(|| {
+                format!("Failed to open source file {target_path:?} to use in the download")
+            })
+            .map_err(DownloadError::Other)?;
+
+        let len = source
+            .metadata()
+            .await
+            .context("query file length")
+            .map_err(DownloadError::Other)?
+            .len();
+
+        source
+            .seek(io::SeekFrom::Start(start_inclusive))
+            .await
+            .context("Failed to seek to the range start in a local storage file")
+            .map_err(DownloadError::Other)?;
+
+        let metadata = self
+            .read_storage_metadata(&target_path)
+            .await
+            .map_err(DownloadError::Other)?;
+
+        let source = source.take(end_exclusive.unwrap_or(len) - start_inclusive);
+        let source = ReaderStream::new(source);
+
+        let cancel_or_timeout = crate::support::cancel_or_timeout(self.timeout, cancel.clone());
+        let source = crate::support::DownloadStream::new(cancel_or_timeout, source);
+
+        let etag = mock_etag(&file_metadata);
        Ok(Download {
            metadata,
            last_modified: file_metadata
@@ -639,7 +683,7 @@ mod fs_tests {
    use super::*;

    use camino_tempfile::tempdir;
-    use std::{collections::HashMap, io::Write, ops::Bound};
+    use std::{collections::HashMap, io::Write};

    async fn read_and_check_metadata(
        storage: &LocalFs,
@@ -648,7 +692,7 @@ mod fs_tests {
    ) -> anyhow::Result<String> {
        let cancel = CancellationToken::new();
        let download = storage
-            .download(remote_storage_path, &DownloadOpts::default(), &cancel)
+            .download(remote_storage_path, &cancel)
            .await
            .map_err(|e| anyhow::anyhow!("Download failed: {e}"))?;
        ensure!(
@@ -729,8 +773,8 @@ mod fs_tests {
            "We should upload and download the same contents"
        );

-        let non_existing_path = RemotePath::new(Utf8Path::new("somewhere/else"))?;
-        match storage.download(&non_existing_path, &DownloadOpts::default(), &cancel).await {
+        let non_existing_path = "somewhere/else";
+        match storage.download(&RemotePath::new(Utf8Path::new(non_existing_path))?, &cancel).await {
            Err(DownloadError::NotFound) => {} // Should get NotFound for non existing keys
            other => panic!("Should get a NotFound error when downloading non-existing storage files, but got: {other:?}"),
        }
@@ -755,12 +799,10 @@ mod fs_tests {
        let (first_part_local, second_part_local) = uploaded_bytes.split_at(3);

        let first_part_download = storage
-            .download(
+            .download_byte_range(
                &upload_target,
-                &DownloadOpts {
-                    byte_end: Bound::Excluded(first_part_local.len() as u64),
-                    ..Default::default()
-                },
+                0,
+                Some(first_part_local.len() as u64),
                &cancel,
            )
            .await?;
@@ -776,15 +818,10 @@ mod fs_tests {
        );

        let second_part_download = storage
-            .download(
+            .download_byte_range(
                &upload_target,
-                &DownloadOpts {
-                    byte_start: Bound::Included(first_part_local.len() as u64),
-                    byte_end: Bound::Excluded(
-                        (first_part_local.len() + second_part_local.len()) as u64,
-                    ),
-                    ..Default::default()
-                },
+                first_part_local.len() as u64,
+                Some((first_part_local.len() + second_part_local.len()) as u64),
                &cancel,
            )
            .await?;
@@ -800,14 +837,7 @@ mod fs_tests {
        );

        let suffix_bytes = storage
-            .download(
-                &upload_target,
-                &DownloadOpts {
-                    byte_start: Bound::Included(13),
-                    ..Default::default()
-                },
-                &cancel,
-            )
+            .download_byte_range(&upload_target, 13, None, &cancel)
            .await?
            .download_stream;
        let suffix_bytes = aggregate(suffix_bytes).await?;
@@ -815,7 +845,7 @@ mod fs_tests {
        assert_eq!(upload_name, suffix);

        let all_bytes = storage
-            .download(&upload_target, &DownloadOpts::default(), &cancel)
+            .download_byte_range(&upload_target, 0, None, &cancel)
            .await?
            .download_stream;
        let all_bytes = aggregate(all_bytes).await?;
@@ -826,26 +856,48 @@ mod fs_tests {
    }

    #[tokio::test]
-    #[should_panic(expected = "at or before start")]
-    async fn download_file_range_negative() {
-        let (storage, cancel) = create_storage().unwrap();
+    async fn download_file_range_negative() -> anyhow::Result<()> {
+        let (storage, cancel) = create_storage()?;
        let upload_name = "upload_1";
-        let upload_target = upload_dummy_file(&storage, upload_name, None, &cancel)
-            .await
-            .unwrap();
+        let upload_target = upload_dummy_file(&storage, upload_name, None, &cancel).await?;

-        storage
-            .download(
+        let start = 1_000_000_000;
+        let end = start + 1;
+        match storage
+            .download_byte_range(
                &upload_target,
-                &DownloadOpts {
-                    byte_start: Bound::Included(10),
-                    byte_end: Bound::Excluded(10),
-                    ..Default::default()
-                },
+                start,
+                Some(end), // exclusive end
                &cancel,
            )
            .await
-            .unwrap();
+        {
+            Ok(_) => panic!("Should not allow downloading wrong ranges"),
+            Err(e) => {
+                let error_string = e.to_string();
+                assert!(error_string.contains("zero bytes"));
+                assert!(error_string.contains(&start.to_string()));
+                assert!(error_string.contains(&end.to_string()));
+            }
+        }
+
+        let start = 10000;
+        let end = 234;
+        assert!(start > end, "Should test an incorrect range");
+        match storage
+            .download_byte_range(&upload_target, start, Some(end), &cancel)
+            .await
+        {
+            Ok(_) => panic!("Should not allow downloading wrong ranges"),
+            Err(e) => {
+                let error_string = e.to_string();
+                assert!(error_string.contains("Invalid range"));
+                assert!(error_string.contains(&start.to_string()));
+                assert!(error_string.contains(&end.to_string()));
+            }
+        }
+
+        Ok(())
    }

    #[tokio::test]
@@ -888,12 +940,10 @@ mod fs_tests {
        let (first_part_local, _) = uploaded_bytes.split_at(3);

        let partial_download_with_metadata = storage
-            .download(
+            .download_byte_range(
                &upload_target,
-                &DownloadOpts {
-                    byte_end: Bound::Excluded(first_part_local.len() as u64),
-                    ..Default::default()
-                },
+                0,
+                Some(first_part_local.len() as u64),
                &cancel,
            )
            .await?;
@@ -1051,13 +1101,7 @@ mod fs_tests {
            storage.upload(body, len, &path, None, &cancel).await?;
        }

-        let read = aggregate(
-            storage
-                .download(&path, &DownloadOpts::default(), &cancel)
-                .await?
-                .download_stream,
-        )
-        .await?;
+        let read = aggregate(storage.download(&path, &cancel).await?.download_stream).await?;
        assert_eq!(body, read);

        let shorter = Bytes::from_static(b"shorter body");
@@ -1068,13 +1112,7 @@ mod fs_tests {
            storage.upload(body, len, &path, None, &cancel).await?;
        }

-        let read = aggregate(
-            storage
-                .download(&path, &DownloadOpts::default(), &cancel)
-                .await?
-                .download_stream,
-        )
-        .await?;
+        let read = aggregate(storage.download(&path, &cancel).await?.download_stream).await?;
        assert_eq!(shorter, read);
        Ok(())
    }
@@ -1107,13 +1145,7 @@ mod fs_tests {
            storage.upload(body, len, &path, None, &cancel).await?;
        }

-        let read = aggregate(
-            storage
-                .download(&path, &DownloadOpts::default(), &cancel)
-                .await?
-                .download_stream,
-        )
-        .await?;
+        let read = aggregate(storage.download(&path, &cancel).await?.download_stream).await?;
        assert_eq!(body, read);

        Ok(())
--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -28,13 +28,12 @@ use aws_sdk_s3::{
    Client,
 };
 use aws_smithy_async::rt::sleep::TokioSleep;
-use http_types::StatusCode;

 use aws_smithy_types::{body::SdkBody, DateTime};
 use aws_smithy_types::{byte_stream::ByteStream, date_time::ConversionError};
 use bytes::Bytes;
 use futures::stream::Stream;
-use hyper0::Body;
+use hyper::Body;
 use scopeguard::ScopeGuard;
 use tokio_util::sync::CancellationToken;
 use utils::backoff;
@@ -45,8 +44,8 @@ use crate::{
    error::Cancelled,
    metrics::{start_counting_cancelled_wait, start_measuring_requests},
    support::PermitCarrying,
-    ConcurrencyLimiter, Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject,
-    RemotePath, RemoteStorage, TimeTravelError, TimeoutOrCancel, MAX_KEYS_PER_DELETE,
+    ConcurrencyLimiter, Download, DownloadError, Listing, ListingMode, ListingObject, RemotePath,
+    RemoteStorage, TimeTravelError, TimeoutOrCancel, MAX_KEYS_PER_DELETE,
    REMOTE_STORAGE_PREFIX_SEPARATOR,
 };

@@ -68,7 +67,6 @@ pub struct S3Bucket {
 struct GetObjectRequest {
    bucket: String,
    key: String,
-    etag: Option<String>,
    range: Option<String>,
 }
 impl S3Bucket {
@@ -250,18 +248,13 @@ impl S3Bucket {

        let started_at = start_measuring_requests(kind);

-        let mut builder = self
+        let get_object = self
            .client
            .get_object()
            .bucket(request.bucket)
            .key(request.key)
-            .set_range(request.range);
-
-        if let Some(etag) = request.etag {
-            builder = builder.if_none_match(etag);
-        }
-
-        let get_object = builder.send();
+            .set_range(request.range)
+            .send();

        let get_object = tokio::select! {
            res = get_object => res,
@@ -284,20 +277,6 @@ impl S3Bucket {
                );
                return Err(DownloadError::NotFound);
            }
-            Err(SdkError::ServiceError(e))
-                // aws_smithy_runtime_api::http::response::StatusCode isn't
-                // re-exported by any aws crates, so just check the numeric
-                // status against http_types::StatusCode instead of pulling it.
-                if e.raw().status().as_u16() == StatusCode::NotModified =>
-            {
-                // Count an unmodified file as a success.
-                crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(
-                    kind,
-                    AttemptOutcome::Ok,
-                    started_at,
-                );
-                return Err(DownloadError::Unmodified);
-            }
            Err(e) => {
                crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(
                    kind,
@@ -794,7 +773,6 @@ impl RemoteStorage for S3Bucket {
    async fn download(
        &self,
        from: &RemotePath,
-        opts: &DownloadOpts,
        cancel: &CancellationToken,
    ) -> Result<Download, DownloadError> {
        // if prefix is not none then download file `prefix/from`
@@ -803,8 +781,33 @@ impl RemoteStorage for S3Bucket {
            GetObjectRequest {
                bucket: self.bucket_name.clone(),
                key: self.relative_path_to_s3_object(from),
-                etag: opts.etag.as_ref().map(|e| e.to_string()),
-                range: opts.byte_range_header(),
+                range: None,
+            },
+            cancel,
+        )
+        .await
+    }
+
+    async fn download_byte_range(
+        &self,
+        from: &RemotePath,
+        start_inclusive: u64,
+        end_exclusive: Option<u64>,
+        cancel: &CancellationToken,
+    ) -> Result<Download, DownloadError> {
+        // S3 accepts ranges as https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
+        // and needs both ends to be exclusive
+        let end_inclusive = end_exclusive.map(|end| end.saturating_sub(1));
+        let range = Some(match end_inclusive {
+            Some(end_inclusive) => format!("bytes={start_inclusive}-{end_inclusive}"),
+            None => format!("bytes={start_inclusive}-"),
+        });
+
+        self.download_object(
+            GetObjectRequest {
+                bucket: self.bucket_name.clone(),
+                key: self.relative_path_to_s3_object(from),
+                range,
            },
            cancel,
        )
--- a/libs/remote_storage/src/simulate_failures.rs
+++ b/libs/remote_storage/src/simulate_failures.rs
@@ -12,8 +12,8 @@ use std::{collections::hash_map::Entry, sync::Arc};
 use tokio_util::sync::CancellationToken;

 use crate::{
-    Download, DownloadError, DownloadOpts, GenericRemoteStorage, Listing, ListingMode, RemotePath,
-    RemoteStorage, StorageMetadata, TimeTravelError,
+    Download, DownloadError, GenericRemoteStorage, Listing, ListingMode, RemotePath, RemoteStorage,
+    StorageMetadata, TimeTravelError,
 };

 pub struct UnreliableWrapper {
@@ -167,14 +167,28 @@ impl RemoteStorage for UnreliableWrapper {
    async fn download(
        &self,
        from: &RemotePath,
-        opts: &DownloadOpts,
        cancel: &CancellationToken,
    ) -> Result<Download, DownloadError> {
-        // Note: We treat any byte range as an "attempt" of the same operation.
-        // We don't pay attention to the ranges. That's good enough for now.
        self.attempt(RemoteOp::Download(from.clone()))
            .map_err(DownloadError::Other)?;
-        self.inner.download(from, opts, cancel).await
+        self.inner.download(from, cancel).await
+    }
+
+    async fn download_byte_range(
+        &self,
+        from: &RemotePath,
+        start_inclusive: u64,
+        end_exclusive: Option<u64>,
+        cancel: &CancellationToken,
+    ) -> Result<Download, DownloadError> {
+        // Note: We treat any download_byte_range as an "attempt" of the same
+        // operation. We don't pay attention to the ranges. That's good enough
+        // for now.
+        self.attempt(RemoteOp::Download(from.clone()))
+            .map_err(DownloadError::Other)?;
+        self.inner
+            .download_byte_range(from, start_inclusive, end_exclusive, cancel)
+            .await
    }

    async fn delete(&self, path: &RemotePath, cancel: &CancellationToken) -> anyhow::Result<()> {
--- a/libs/remote_storage/tests/common/tests.rs
+++ b/libs/remote_storage/tests/common/tests.rs
@@ -1,8 +1,8 @@
 use anyhow::Context;
 use camino::Utf8Path;
 use futures::StreamExt;
-use remote_storage::{DownloadError, DownloadOpts, ListingMode, ListingObject, RemotePath};
-use std::ops::Bound;
+use remote_storage::ListingMode;
+use remote_storage::RemotePath;
 use std::sync::Arc;
 use std::{collections::HashSet, num::NonZeroU32};
 use test_context::test_context;
@@ -284,25 +284,14 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
    ctx.client.upload(data, len, &path, None, &cancel).await?;

    // Normal download request
-    let dl = ctx
-        .client
-        .download(&path, &DownloadOpts::default(), &cancel)
-        .await?;
+    let dl = ctx.client.download(&path, &cancel).await?;
    let buf = download_to_vec(dl).await?;
    assert_eq!(&buf, &orig);

    // Full range (end specified)
    let dl = ctx
        .client
-        .download(
-            &path,
-            &DownloadOpts {
-                byte_start: Bound::Included(0),
-                byte_end: Bound::Excluded(len as u64),
-                ..Default::default()
-            },
-            &cancel,
-        )
+        .download_byte_range(&path, 0, Some(len as u64), &cancel)
        .await?;
    let buf = download_to_vec(dl).await?;
    assert_eq!(&buf, &orig);
@@ -310,15 +299,7 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
    // partial range (end specified)
    let dl = ctx
        .client
-        .download(
-            &path,
-            &DownloadOpts {
-                byte_start: Bound::Included(4),
-                byte_end: Bound::Excluded(10),
-                ..Default::default()
-            },
-            &cancel,
-        )
+        .download_byte_range(&path, 4, Some(10), &cancel)
        .await?;
    let buf = download_to_vec(dl).await?;
    assert_eq!(&buf, &orig[4..10]);
@@ -326,15 +307,7 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
    // partial range (end beyond real end)
    let dl = ctx
        .client
-        .download(
-            &path,
-            &DownloadOpts {
-                byte_start: Bound::Included(8),
-                byte_end: Bound::Excluded(len as u64 * 100),
-                ..Default::default()
-            },
-            &cancel,
-        )
+        .download_byte_range(&path, 8, Some(len as u64 * 100), &cancel)
        .await?;
    let buf = download_to_vec(dl).await?;
    assert_eq!(&buf, &orig[8..]);
@@ -342,14 +315,7 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
    // Partial range (end unspecified)
    let dl = ctx
        .client
-        .download(
-            &path,
-            &DownloadOpts {
-                byte_start: Bound::Included(4),
-                ..Default::default()
-            },
-            &cancel,
-        )
+        .download_byte_range(&path, 4, None, &cancel)
        .await?;
    let buf = download_to_vec(dl).await?;
    assert_eq!(&buf, &orig[4..]);
@@ -357,14 +323,7 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
    // Full range (end unspecified)
    let dl = ctx
        .client
-        .download(
-            &path,
-            &DownloadOpts {
-                byte_start: Bound::Included(0),
-                ..Default::default()
-            },
-            &cancel,
-        )
+        .download_byte_range(&path, 0, None, &cancel)
        .await?;
    let buf = download_to_vec(dl).await?;
    assert_eq!(&buf, &orig);
@@ -378,54 +337,6 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
    Ok(())
 }

-/// Tests that conditional downloads work properly, by returning
-/// DownloadError::Unmodified when the object ETag matches the given ETag.
-#[test_context(MaybeEnabledStorage)]
-#[tokio::test]
-async fn download_conditional(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
-    let MaybeEnabledStorage::Enabled(ctx) = ctx else {
-        return Ok(());
-    };
-    let cancel = CancellationToken::new();
-
-    // Create a file.
-    let path = RemotePath::new(Utf8Path::new(format!("{}/file", ctx.base_prefix).as_str()))?;
-    let data = bytes::Bytes::from_static("foo".as_bytes());
-    let (stream, len) = wrap_stream(data);
-    ctx.client.upload(stream, len, &path, None, &cancel).await?;
-
-    // Download it to obtain its etag.
-    let mut opts = DownloadOpts::default();
-    let download = ctx.client.download(&path, &opts, &cancel).await?;
-
-    // Download with the etag yields DownloadError::Unmodified.
-    opts.etag = Some(download.etag);
-    let result = ctx.client.download(&path, &opts, &cancel).await;
-    assert!(
-        matches!(result, Err(DownloadError::Unmodified)),
-        "expected DownloadError::Unmodified, got {result:?}"
-    );
-
-    // Replace the file contents.
-    let data = bytes::Bytes::from_static("bar".as_bytes());
-    let (stream, len) = wrap_stream(data);
-    ctx.client.upload(stream, len, &path, None, &cancel).await?;
-
-    // A download with the old etag should yield the new file.
-    let download = ctx.client.download(&path, &opts, &cancel).await?;
-    assert_ne!(download.etag, opts.etag.unwrap(), "ETag did not change");
-
-    // A download with the new etag should yield Unmodified again.
-    opts.etag = Some(download.etag);
-    let result = ctx.client.download(&path, &opts, &cancel).await;
-    assert!(
-        matches!(result, Err(DownloadError::Unmodified)),
-        "expected DownloadError::Unmodified, got {result:?}"
-    );
-
-    Ok(())
-}
-
 #[test_context(MaybeEnabledStorage)]
 #[tokio::test]
 async fn copy_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
@@ -453,10 +364,7 @@ async fn copy_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
    // Normal download request
    ctx.client.copy_object(&path, &path_dest, &cancel).await?;

-    let dl = ctx
-        .client
-        .download(&path_dest, &DownloadOpts::default(), &cancel)
-        .await?;
+    let dl = ctx.client.download(&path_dest, &cancel).await?;
    let buf = download_to_vec(dl).await?;
    assert_eq!(&buf, &orig);

@@ -468,56 +376,3 @@ async fn copy_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {

    Ok(())
 }
-
-/// Tests that head_object works properly.
-#[test_context(MaybeEnabledStorage)]
-#[tokio::test]
-async fn head_object(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
-    let MaybeEnabledStorage::Enabled(ctx) = ctx else {
-        return Ok(());
-    };
-    let cancel = CancellationToken::new();
-
-    let path = RemotePath::new(Utf8Path::new(format!("{}/file", ctx.base_prefix).as_str()))?;
-
-    // Errors on missing file.
-    let result = ctx.client.head_object(&path, &cancel).await;
-    assert!(
-        matches!(result, Err(DownloadError::NotFound)),
-        "expected NotFound, got {result:?}"
-    );
-
-    // Create the file.
-    let data = bytes::Bytes::from_static("foo".as_bytes());
-    let (stream, len) = wrap_stream(data);
-    ctx.client.upload(stream, len, &path, None, &cancel).await?;
-
-    // Fetch the head metadata.
-    let object = ctx.client.head_object(&path, &cancel).await?;
-    assert_eq!(
-        object,
-        ListingObject {
-            key: path.clone(),
-            last_modified: object.last_modified, // ignore
-            size: 3
-        }
-    );
-
-    // Wait for a couple of seconds, and then update the file to check the last
-    // modified timestamp.
-    tokio::time::sleep(std::time::Duration::from_secs(2)).await;
-
-    let data = bytes::Bytes::from_static("bar".as_bytes());
-    let (stream, len) = wrap_stream(data);
-    ctx.client.upload(stream, len, &path, None, &cancel).await?;
-    let new = ctx.client.head_object(&path, &cancel).await?;
-
-    assert!(
-        !new.last_modified
-            .duration_since(object.last_modified)?
-            .is_zero(),
-        "last_modified did not advance"
-    );
-
-    Ok(())
-}
--- a/libs/remote_storage/tests/test_real_s3.rs
+++ b/libs/remote_storage/tests/test_real_s3.rs
@@ -12,8 +12,8 @@ use anyhow::Context;
 use camino::Utf8Path;
 use futures_util::StreamExt;
 use remote_storage::{
-    DownloadError, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath,
-    RemoteStorageConfig, RemoteStorageKind, S3Config,
+    DownloadError, GenericRemoteStorage, ListingMode, RemotePath, RemoteStorageConfig,
+    RemoteStorageKind, S3Config,
 };
 use test_context::test_context;
 use test_context::AsyncTestContext;
@@ -121,8 +121,7 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:

    // A little check to ensure that our clock is not too far off from the S3 clock
    {
-        let opts = DownloadOpts::default();
-        let dl = retry(|| ctx.client.download(&path2, &opts, &cancel)).await?;
+        let dl = retry(|| ctx.client.download(&path2, &cancel)).await?;
        let last_modified = dl.last_modified;
        let half_wt = WAIT_TIME.mul_f32(0.5);
        let t0_hwt = t0 + half_wt;
@@ -160,12 +159,7 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:
    let t2_files_recovered = list_files(&ctx.client, &cancel).await?;
    println!("after recovery to t2: {t2_files_recovered:?}");
    assert_eq!(t2_files, t2_files_recovered);
-    let path2_recovered_t2 = download_to_vec(
-        ctx.client
-            .download(&path2, &DownloadOpts::default(), &cancel)
-            .await?,
-    )
-    .await?;
+    let path2_recovered_t2 = download_to_vec(ctx.client.download(&path2, &cancel).await?).await?;
    assert_eq!(path2_recovered_t2, new_data.as_bytes());

    // after recovery to t1: path1 is back, path2 has the old content
@@ -176,12 +170,7 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:
    let t1_files_recovered = list_files(&ctx.client, &cancel).await?;
    println!("after recovery to t1: {t1_files_recovered:?}");
    assert_eq!(t1_files, t1_files_recovered);
-    let path2_recovered_t1 = download_to_vec(
-        ctx.client
-            .download(&path2, &DownloadOpts::default(), &cancel)
-            .await?,
-    )
-    .await?;
+    let path2_recovered_t1 = download_to_vec(ctx.client.download(&path2, &cancel).await?).await?;
    assert_eq!(path2_recovered_t1, old_data.as_bytes());

    // after recovery to t0: everything is gone except for path1
@@ -427,7 +416,7 @@ async fn download_is_timeouted(ctx: &mut MaybeEnabledStorage) {
    let started_at = std::time::Instant::now();
    let mut stream = ctx
        .client
-        .download(&path, &DownloadOpts::default(), &cancel)
+        .download(&path, &cancel)
        .await
        .expect("download succeeds")
        .download_stream;
@@ -502,7 +491,7 @@ async fn download_is_cancelled(ctx: &mut MaybeEnabledStorage) {
    {
        let stream = ctx
            .client
-            .download(&path, &DownloadOpts::default(), &cancel)
+            .download(&path, &cancel)
            .await
            .expect("download succeeds")
            .download_stream;
--- a/libs/tracing-utils/Cargo.toml
+++ b/libs/tracing-utils/Cargo.toml
@@ -5,7 +5,7 @@ edition.workspace = true
 license.workspace = true

 [dependencies]
-hyper0.workspace = true
+hyper.workspace = true
 opentelemetry = { workspace = true, features = ["trace"] }
 opentelemetry_sdk = { workspace = true, features = ["rt-tokio"] }
 opentelemetry-otlp = { workspace = true, default-features = false, features = ["http-proto", "trace", "http", "reqwest-client"] }
--- a/libs/tracing-utils/src/http.rs
+++ b/libs/tracing-utils/src/http.rs
@@ -1,7 +1,7 @@
 //! Tracing wrapper for Hyper HTTP server

-use hyper0::HeaderMap;
-use hyper0::{Body, Request, Response};
+use hyper::HeaderMap;
+use hyper::{Body, Request, Response};
 use std::future::Future;
 use tracing::Instrument;
 use tracing_opentelemetry::OpenTelemetrySpanExt;
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -22,7 +22,7 @@ chrono.workspace = true
 git-version.workspace = true
 hex = { workspace = true, features = ["serde"] }
 humantime.workspace = true
-hyper0 = { workspace = true, features = ["full"] }
+hyper = { workspace = true, features = ["full"] }
 fail.workspace = true
 futures = { workspace = true}
 jsonwebtoken.workspace = true
--- a/libs/utils/src/auth.rs
+++ b/libs/utils/src/auth.rs
@@ -31,12 +31,9 @@ pub enum Scope {
    /// The scope used by pageservers in upcalls to storage controller and cloud control plane
    #[serde(rename = "generations_api")]
    GenerationsApi,
-    /// Allows access to control plane managment API and all storage controller endpoints.
+    /// Allows access to control plane managment API and some storage controller endpoints.
    Admin,

-    /// Allows access to control plane & storage controller endpoints used in infrastructure automation (e.g. node registration)
-    Infra,
-
    /// Allows access to storage controller APIs used by the scrubber, to interrogate the state
    /// of a tenant & post scrub results.
    Scrubber,
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -2,8 +2,6 @@
 //! between other crates in this repository.
 #![deny(clippy::undocumented_unsafe_blocks)]

-extern crate hyper0 as hyper;
-
 pub mod backoff;

 /// `Lsn` type implements common tasks on Log Sequence Numbers
--- a/libs/vm_monitor/src/lib.rs
+++ b/libs/vm_monitor/src/lib.rs
@@ -7,13 +7,11 @@ use axum::{
    extract::{ws::WebSocket, State, WebSocketUpgrade},
    response::Response,
 };
-use axum::{routing::get, Router};
+use axum::{routing::get, Router, Server};
 use clap::Parser;
 use futures::Future;
-use std::net::SocketAddr;
 use std::{fmt::Debug, time::Duration};
 use sysinfo::{RefreshKind, System, SystemExt};
-use tokio::net::TcpListener;
 use tokio::{sync::broadcast, task::JoinHandle};
 use tokio_util::sync::CancellationToken;
 use tracing::{error, info};
@@ -134,14 +132,14 @@ pub async fn start(args: &'static Args, token: CancellationToken) -> anyhow::Res
            args,
        });

-    let addr_str = args.addr();
-    let addr: SocketAddr = addr_str.parse().expect("parsing address should not fail");
-
-    let listener = TcpListener::bind(&addr)
-        .await
+    let addr = args.addr();
+    let bound = Server::try_bind(&addr.parse().expect("parsing address should not fail"))
        .with_context(|| format!("failed to bind to {addr}"))?;
-    info!(addr_str, "server bound");
-    axum::serve(listener, app.into_make_service())
+
+    info!(addr, "server bound");
+
+    bound
+        .serve(app.into_make_service())
        .await
        .context("server exited")?;

--- a/libs/vm_monitor/src/runner.rs
+++ b/libs/vm_monitor/src/runner.rs
@@ -79,7 +79,8 @@ pub struct Config {
    /// memory.
    ///
    /// The default value of `0.15` means that we *guarantee* sending upscale requests if the
-    /// cgroup is using more than 85% of total memory.
+    /// cgroup is using more than 85% of total memory (even if we're *not* separately reserving
+    /// memory for the file cache).
    cgroup_min_overhead_fraction: f64,

    cgroup_downscale_threshold_buffer_bytes: u64,
@@ -96,12 +97,24 @@ impl Default for Config {
 }

 impl Config {
-    fn cgroup_threshold(&self, total_mem: u64) -> u64 {
-        // We want our threshold to be met gracefully instead of letting postgres get OOM-killed
-        // (or if there's room, spilling to swap).
+    fn cgroup_threshold(&self, total_mem: u64, file_cache_disk_size: u64) -> u64 {
+        // If the file cache is in tmpfs, then it will count towards shmem usage of the cgroup,
+        // and thus be non-reclaimable, so we should allow for additional memory usage.
+        //
+        // If the file cache sits on disk, our desired stable system state is for it to be fully
+        // page cached (its contents should only be paged to/from disk in situations where we can't
+        // upscale fast enough). Page-cached memory is reclaimable, so we need to lower the
+        // threshold for non-reclaimable memory so we scale up *before* the kernel starts paging
+        // out the file cache.
+        let memory_remaining_for_cgroup = total_mem.saturating_sub(file_cache_disk_size);
+
+        // Even if we're not separately making room for the file cache (if it's in tmpfs), we still
+        // want our threshold to be met gracefully instead of letting postgres get OOM-killed.
        // So we guarantee that there's at least `cgroup_min_overhead_fraction` of total memory
        // remaining above the threshold.
-        (total_mem as f64 * (1.0 - self.cgroup_min_overhead_fraction)) as u64
+        let max_threshold = (total_mem as f64 * (1.0 - self.cgroup_min_overhead_fraction)) as u64;
+
+        memory_remaining_for_cgroup.min(max_threshold)
    }
 }

@@ -136,6 +149,11 @@ impl Runner {

        let mem = get_total_system_memory();

+        let mut file_cache_disk_size = 0;
+
+        // We need to process file cache initialization before cgroup initialization, so that the memory
+        // allocated to the file cache is appropriately taken into account when we decide the cgroup's
+        // memory limits.
        if let Some(connstr) = &args.pgconnstr {
            info!("initializing file cache");
            let config = FileCacheConfig::default();
@@ -166,6 +184,7 @@ impl Runner {
                info!("file cache size actually got set to {actual_size}")
            }

+            file_cache_disk_size = actual_size;
            state.filecache = Some(file_cache);
        }

@@ -188,7 +207,7 @@ impl Runner {
                cgroup.watch(hist_tx).await
            });

-            let threshold = state.config.cgroup_threshold(mem);
+            let threshold = state.config.cgroup_threshold(mem, file_cache_disk_size);
            info!(threshold, "set initial cgroup threshold",);

            state.cgroup = Some(CgroupState {
@@ -240,7 +259,9 @@ impl Runner {
                return Ok((false, status.to_owned()));
            }

-            let new_threshold = self.config.cgroup_threshold(usable_system_memory);
+            let new_threshold = self
+                .config
+                .cgroup_threshold(usable_system_memory, expected_file_cache_size);

            let current = last_history.avg_non_reclaimable;

@@ -261,11 +282,13 @@ impl Runner {

        // The downscaling has been approved. Downscale the file cache, then the cgroup.
        let mut status = vec![];
+        let mut file_cache_disk_size = 0;
        if let Some(file_cache) = &mut self.filecache {
            let actual_usage = file_cache
                .set_file_cache_size(expected_file_cache_size)
                .await
                .context("failed to set file cache size")?;
+            file_cache_disk_size = actual_usage;
            let message = format!(
                "set file cache size to {} MiB",
                bytes_to_mebibytes(actual_usage),
@@ -275,7 +298,9 @@ impl Runner {
        }

        if let Some(cgroup) = &mut self.cgroup {
-            let new_threshold = self.config.cgroup_threshold(usable_system_memory);
+            let new_threshold = self
+                .config
+                .cgroup_threshold(usable_system_memory, file_cache_disk_size);

            let message = format!(
                "set cgroup memory threshold from {} MiB to {} MiB, of new total {} MiB",
@@ -304,6 +329,7 @@ impl Runner {
        let new_mem = resources.mem;
        let usable_system_memory = new_mem.saturating_sub(self.config.sys_buffer_bytes);

+        let mut file_cache_disk_size = 0;
        if let Some(file_cache) = &mut self.filecache {
            let expected_usage = file_cache.config.calculate_cache_size(usable_system_memory);
            info!(
@@ -316,6 +342,7 @@ impl Runner {
                .set_file_cache_size(expected_usage)
                .await
                .context("failed to set file cache size")?;
+            file_cache_disk_size = actual_usage;

            if actual_usage != expected_usage {
                warn!(
@@ -327,7 +354,9 @@ impl Runner {
        }

        if let Some(cgroup) = &mut self.cgroup {
-            let new_threshold = self.config.cgroup_threshold(usable_system_memory);
+            let new_threshold = self
+                .config
+                .cgroup_threshold(usable_system_memory, file_cache_disk_size);

            info!(
                "set cgroup memory threshold from {} MiB to {} MiB of new total {} MiB",
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -30,7 +30,7 @@ futures.workspace = true
 hex.workspace = true
 humantime.workspace = true
 humantime-serde.workspace = true
-hyper0.workspace = true
+hyper.workspace = true
 itertools.workspace = true
 md5.workspace = true
 nix.workspace = true
--- a/pageserver/benches/bench_ingest.rs
+++ b/pageserver/benches/bench_ingest.rs
@@ -164,7 +164,11 @@ fn criterion_benchmark(c: &mut Criterion) {
    let conf: &'static PageServerConf = Box::leak(Box::new(
        pageserver::config::PageServerConf::dummy_conf(temp_dir.path().to_path_buf()),
    ));
-    virtual_file::init(16384, virtual_file::io_engine_for_bench());
+    virtual_file::init(
+        16384,
+        virtual_file::io_engine_for_bench(),
+        pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT,
+    );
    page_cache::init(conf.page_cache_size);

    {
--- a/pageserver/client/src/mgmt_api.rs
+++ b/pageserver/client/src/mgmt_api.rs
@@ -540,13 +540,10 @@ impl Client {
            .map_err(Error::ReceiveBody)
    }

-    /// Configs io mode at runtime.
-    pub async fn put_io_mode(
-        &self,
-        mode: &pageserver_api::models::virtual_file::IoMode,
-    ) -> Result<()> {
-        let uri = format!("{}/v1/io_mode", self.mgmt_api_endpoint);
-        self.request(Method::PUT, uri, mode)
+    /// Configs io buffer alignment at runtime.
+    pub async fn put_io_alignment(&self, align: usize) -> Result<()> {
+        let uri = format!("{}/v1/io_alignment", self.mgmt_api_endpoint);
+        self.request(Method::PUT, uri, align)
            .await?
            .json()
            .await
--- a/pageserver/ctl/src/layer_map_analyzer.rs
+++ b/pageserver/ctl/src/layer_map_analyzer.rs
@@ -152,7 +152,11 @@ pub(crate) async fn main(cmd: &AnalyzeLayerMapCmd) -> Result<()> {
    let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);

    // Initialize virtual_file (file desriptor cache) and page cache which are needed to access layer persistent B-Tree.
-    pageserver::virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
+    pageserver::virtual_file::init(
+        10,
+        virtual_file::api::IoEngineKind::StdFs,
+        pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT,
+    );
    pageserver::page_cache::init(100);

    let mut total_delta_layers = 0usize;
--- a/pageserver/ctl/src/layers.rs
+++ b/pageserver/ctl/src/layers.rs
@@ -59,7 +59,7 @@ pub(crate) enum LayerCmd {

 async fn read_delta_file(path: impl AsRef<Path>, ctx: &RequestContext) -> Result<()> {
    let path = Utf8Path::from_path(path.as_ref()).expect("non-Unicode path");
-    virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
+    virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs, 1);
    page_cache::init(100);
    let file = VirtualFile::open(path, ctx).await?;
    let file_id = page_cache::next_file_id();
@@ -190,7 +190,11 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
            new_tenant_id,
            new_timeline_id,
        } => {
-            pageserver::virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
+            pageserver::virtual_file::init(
+                10,
+                virtual_file::api::IoEngineKind::StdFs,
+                pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT,
+            );
            pageserver::page_cache::init(100);

            let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
--- a/pageserver/ctl/src/main.rs
+++ b/pageserver/ctl/src/main.rs
@@ -26,7 +26,7 @@ use pageserver::{
    tenant::{dump_layerfile_from_path, metadata::TimelineMetadata},
    virtual_file,
 };
-use pageserver_api::shard::TenantShardId;
+use pageserver_api::{config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT, shard::TenantShardId};
 use postgres_ffi::ControlFileData;
 use remote_storage::{RemotePath, RemoteStorageConfig};
 use tokio_util::sync::CancellationToken;
@@ -205,7 +205,11 @@ fn read_pg_control_file(control_file_path: &Utf8Path) -> anyhow::Result<()> {

 async fn print_layerfile(path: &Utf8Path) -> anyhow::Result<()> {
    // Basic initialization of things that don't change after startup
-    virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
+    virtual_file::init(
+        10,
+        virtual_file::api::IoEngineKind::StdFs,
+        DEFAULT_IO_BUFFER_ALIGNMENT,
+    );
    page_cache::init(100);
    let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
    dump_layerfile_from_path(path, true, &ctx).await
--- a/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
+++ b/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
@@ -59,9 +59,9 @@ pub(crate) struct Args {
    #[clap(long)]
    set_io_engine: Option<pageserver_api::models::virtual_file::IoEngineKind>,

-    /// Before starting the benchmark, live-reconfigure the pageserver to use specified io mode (buffered vs. direct).
+    /// Before starting the benchmark, live-reconfigure the pageserver to use specified alignment for io buffers.
    #[clap(long)]
-    set_io_mode: Option<pageserver_api::models::virtual_file::IoMode>,
+    set_io_alignment: Option<usize>,

    targets: Option<Vec<TenantTimelineId>>,
 }
@@ -129,8 +129,8 @@ async fn main_impl(
        mgmt_api_client.put_io_engine(engine_str).await?;
    }

-    if let Some(mode) = &args.set_io_mode {
-        mgmt_api_client.put_io_mode(mode).await?;
+    if let Some(align) = args.set_io_alignment {
+        mgmt_api_client.put_io_alignment(align).await?;
    }

    // discover targets
--- a/pageserver/src/auth.rs
+++ b/pageserver/src/auth.rs
@@ -14,19 +14,14 @@ pub fn check_permission(claims: &Claims, tenant_id: Option<TenantId>) -> Result<
        }
        (Scope::PageServerApi, None) => Ok(()), // access to management api for PageServerApi scope
        (Scope::PageServerApi, Some(_)) => Ok(()), // access to tenant api using PageServerApi scope
-        (
-            Scope::Admin
-            | Scope::SafekeeperData
-            | Scope::GenerationsApi
-            | Scope::Infra
-            | Scope::Scrubber,
-            _,
-        ) => Err(AuthError(
-            format!(
-                "JWT scope '{:?}' is ineligible for Pageserver auth",
-                claims.scope
-            )
-            .into(),
-        )),
+        (Scope::Admin | Scope::SafekeeperData | Scope::GenerationsApi | Scope::Scrubber, _) => {
+            Err(AuthError(
+                format!(
+                    "JWT scope '{:?}' is ineligible for Pageserver auth",
+                    claims.scope
+                )
+                .into(),
+            ))
+        }
    }
 }
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -125,7 +125,8 @@ fn main() -> anyhow::Result<()> {

    // after setting up logging, log the effective IO engine choice and read path implementations
    info!(?conf.virtual_file_io_engine, "starting with virtual_file IO engine");
-    info!(?conf.virtual_file_io_mode, "starting with virtual_file IO mode");
+    info!(?conf.virtual_file_direct_io, "starting with virtual_file Direct IO settings");
+    info!(?conf.io_buffer_alignment, "starting with setting for IO buffer alignment");

    // The tenants directory contains all the pageserver local disk state.
    // Create if not exists and make sure all the contents are durable before proceeding.
@@ -167,7 +168,11 @@ fn main() -> anyhow::Result<()> {
    let scenario = failpoint_support::init();

    // Basic initialization of things that don't change after startup
-    virtual_file::init(conf.max_file_descriptors, conf.virtual_file_io_engine);
+    virtual_file::init(
+        conf.max_file_descriptors,
+        conf.virtual_file_io_engine,
+        conf.io_buffer_alignment,
+    );
    page_cache::init(conf.page_cache_size);

    start_pageserver(launch_ts, conf).context("Failed to start pageserver")?;
@@ -570,7 +575,7 @@ fn start_pageserver(
            .build()
            .map_err(|err| anyhow!(err))?;
        let service = utils::http::RouterService::new(router).unwrap();
-        let server = hyper0::Server::from_tcp(http_listener)?
+        let server = hyper::Server::from_tcp(http_listener)?
            .serve(service)
            .with_graceful_shutdown({
                let cancel = cancel.clone();
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -174,7 +174,9 @@ pub struct PageServerConf {
    pub l0_flush: crate::l0_flush::L0FlushConfig,

    /// Direct IO settings
-    pub virtual_file_io_mode: virtual_file::IoMode,
+    pub virtual_file_direct_io: virtual_file::DirectIoMode,
+
+    pub io_buffer_alignment: usize,
 }

 /// Token for authentication to safekeepers
@@ -323,10 +325,11 @@ impl PageServerConf {
            image_compression,
            ephemeral_bytes_per_memory_kb,
            l0_flush,
-            virtual_file_io_mode,
+            virtual_file_direct_io,
            concurrent_tenant_warmup,
            concurrent_tenant_size_logical_size_queries,
            virtual_file_io_engine,
+            io_buffer_alignment,
            tenant_config,
        } = config_toml;

@@ -365,6 +368,8 @@ impl PageServerConf {
            max_vectored_read_bytes,
            image_compression,
            ephemeral_bytes_per_memory_kb,
+            virtual_file_direct_io,
+            io_buffer_alignment,

            // ------------------------------------------------------------
            // fields that require additional validation or custom handling
@@ -403,7 +408,6 @@ impl PageServerConf {
            l0_flush: l0_flush
                .map(crate::l0_flush::L0FlushConfig::from)
                .unwrap_or_default(),
-            virtual_file_io_mode: virtual_file_io_mode.unwrap_or(virtual_file::IoMode::preferred()),
        };

        // ------------------------------------------------------------
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -17,7 +17,6 @@ use hyper::header;
 use hyper::StatusCode;
 use hyper::{Body, Request, Response, Uri};
 use metrics::launch_timestamp::LaunchTimestamp;
-use pageserver_api::models::virtual_file::IoMode;
 use pageserver_api::models::AuxFilePolicy;
 use pageserver_api::models::DownloadRemoteLayersTaskSpawnRequest;
 use pageserver_api::models::IngestAuxFilesRequest;
@@ -704,8 +703,6 @@ async fn timeline_archival_config_handler(
    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;

-    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);
-
    let request_data: TimelineArchivalConfigRequest = json_request(&mut request).await?;
    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
    let state = get_state(&request);
@@ -716,7 +713,7 @@ async fn timeline_archival_config_handler(
            .get_attached_tenant_shard(tenant_shard_id)?;

        tenant
-            .apply_timeline_archival_config(timeline_id, request_data.state, ctx)
+            .apply_timeline_archival_config(timeline_id, request_data.state)
            .await?;
        Ok::<_, ApiError>(())
    }
@@ -2382,13 +2379,17 @@ async fn put_io_engine_handler(
    json_response(StatusCode::OK, ())
 }

-async fn put_io_mode_handler(
+async fn put_io_alignment_handler(
    mut r: Request<Body>,
    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
    check_permission(&r, None)?;
-    let mode: IoMode = json_request(&mut r).await?;
-    crate::virtual_file::set_io_mode(mode);
+    let align: usize = json_request(&mut r).await?;
+    crate::virtual_file::set_io_buffer_alignment(align).map_err(|align| {
+        ApiError::PreconditionFailed(
+            format!("Requested io alignment ({align}) is not a power of two").into(),
+        )
+    })?;
    json_response(StatusCode::OK, ())
 }

@@ -3079,7 +3080,9 @@ pub fn make_router(
            |r| api_handler(r, timeline_collect_keyspace),
        )
        .put("/v1/io_engine", |r| api_handler(r, put_io_engine_handler))
-        .put("/v1/io_mode", |r| api_handler(r, put_io_mode_handler))
+        .put("/v1/io_alignment", |r| {
+            api_handler(r, put_io_alignment_handler)
+        })
        .put(
            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/force_aux_policy_switch",
            |r| api_handler(r, force_aux_policy_switch_handler),
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -13,8 +13,6 @@ pub mod http;
 pub mod import_datadir;
 pub mod l0_flush;

-extern crate hyper0 as hyper;
-
 use futures::{stream::FuturesUnordered, StreamExt};
 pub use pageserver_api::keyspace;
 use tokio_util::sync::CancellationToken;
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -38,7 +38,6 @@ use std::future::Future;
 use std::sync::Weak;
 use std::time::SystemTime;
 use storage_broker::BrokerClientChannel;
-use timeline::offload::offload_timeline;
 use tokio::io::BufReader;
 use tokio::sync::watch;
 use tokio::task::JoinSet;
@@ -288,13 +287,9 @@ pub struct Tenant {

    /// During timeline creation, we first insert the TimelineId to the
    /// creating map, then `timelines`, then remove it from the creating map.
-    /// **Lock order**: if acquiring both, acquire`timelines` before `timelines_creating`
+    /// **Lock order**: if acquring both, acquire`timelines` before `timelines_creating`
    timelines_creating: std::sync::Mutex<HashSet<TimelineId>>,

-    /// Possibly offloaded and archived timelines
-    /// **Lock order**: if acquiring both, acquire`timelines` before `timelines_offloaded`
-    timelines_offloaded: Mutex<HashMap<TimelineId, Arc<OffloadedTimeline>>>,
-
    // This mutex prevents creation of new timelines during GC.
    // Adding yet another mutex (in addition to `timelines`) is needed because holding
    // `timelines` mutex during all GC iteration
@@ -489,65 +484,6 @@ impl WalRedoManager {
    }
 }

-pub struct OffloadedTimeline {
-    pub tenant_shard_id: TenantShardId,
-    pub timeline_id: TimelineId,
-    pub ancestor_timeline_id: Option<TimelineId>,
-
-    // TODO: once we persist offloaded state, make this lazily constructed
-    pub remote_client: Arc<RemoteTimelineClient>,
-
-    /// Prevent two tasks from deleting the timeline at the same time. If held, the
-    /// timeline is being deleted. If 'true', the timeline has already been deleted.
-    pub delete_progress: Arc<tokio::sync::Mutex<DeleteTimelineFlow>>,
-}
-
-impl OffloadedTimeline {
-    fn from_timeline(timeline: &Timeline) -> Self {
-        Self {
-            tenant_shard_id: timeline.tenant_shard_id,
-            timeline_id: timeline.timeline_id,
-            ancestor_timeline_id: timeline.get_ancestor_timeline_id(),
-
-            remote_client: timeline.remote_client.clone(),
-            delete_progress: timeline.delete_progress.clone(),
-        }
-    }
-}
-
-#[derive(Clone)]
-pub enum TimelineOrOffloaded {
-    Timeline(Arc<Timeline>),
-    Offloaded(Arc<OffloadedTimeline>),
-}
-
-impl TimelineOrOffloaded {
-    pub fn tenant_shard_id(&self) -> TenantShardId {
-        match self {
-            TimelineOrOffloaded::Timeline(timeline) => timeline.tenant_shard_id,
-            TimelineOrOffloaded::Offloaded(offloaded) => offloaded.tenant_shard_id,
-        }
-    }
-    pub fn timeline_id(&self) -> TimelineId {
-        match self {
-            TimelineOrOffloaded::Timeline(timeline) => timeline.timeline_id,
-            TimelineOrOffloaded::Offloaded(offloaded) => offloaded.timeline_id,
-        }
-    }
-    pub fn delete_progress(&self) -> &Arc<tokio::sync::Mutex<DeleteTimelineFlow>> {
-        match self {
-            TimelineOrOffloaded::Timeline(timeline) => &timeline.delete_progress,
-            TimelineOrOffloaded::Offloaded(offloaded) => &offloaded.delete_progress,
-        }
-    }
-    pub fn remote_client(&self) -> &Arc<RemoteTimelineClient> {
-        match self {
-            TimelineOrOffloaded::Timeline(timeline) => &timeline.remote_client,
-            TimelineOrOffloaded::Offloaded(offloaded) => &offloaded.remote_client,
-        }
-    }
-}
-
 #[derive(Debug, thiserror::Error, PartialEq, Eq)]
 pub enum GetTimelineError {
    #[error("Timeline is shutting down")]
@@ -1470,192 +1406,52 @@ impl Tenant {
        }
    }

-    fn check_to_be_archived_has_no_unarchived_children(
-        timeline_id: TimelineId,
-        timelines: &std::sync::MutexGuard<'_, HashMap<TimelineId, Arc<Timeline>>>,
-    ) -> Result<(), TimelineArchivalError> {
-        let children: Vec<TimelineId> = timelines
-            .iter()
-            .filter_map(|(id, entry)| {
-                if entry.get_ancestor_timeline_id() != Some(timeline_id) {
-                    return None;
-                }
-                if entry.is_archived() == Some(true) {
-                    return None;
-                }
-                Some(*id)
-            })
-            .collect();
-
-        if !children.is_empty() {
-            return Err(TimelineArchivalError::HasUnarchivedChildren(children));
-        }
-        Ok(())
-    }
-
-    fn check_ancestor_of_to_be_unarchived_is_not_archived(
-        ancestor_timeline_id: TimelineId,
-        timelines: &std::sync::MutexGuard<'_, HashMap<TimelineId, Arc<Timeline>>>,
-        offloaded_timelines: &std::sync::MutexGuard<
-            '_,
-            HashMap<TimelineId, Arc<OffloadedTimeline>>,
-        >,
-    ) -> Result<(), TimelineArchivalError> {
-        let has_archived_parent =
-            if let Some(ancestor_timeline) = timelines.get(&ancestor_timeline_id) {
-                ancestor_timeline.is_archived() == Some(true)
-            } else if offloaded_timelines.contains_key(&ancestor_timeline_id) {
-                true
-            } else {
-                error!("ancestor timeline {ancestor_timeline_id} not found");
-                if cfg!(debug_assertions) {
-                    panic!("ancestor timeline {ancestor_timeline_id} not found");
-                }
-                return Err(TimelineArchivalError::NotFound);
-            };
-        if has_archived_parent {
-            return Err(TimelineArchivalError::HasArchivedParent(
-                ancestor_timeline_id,
-            ));
-        }
-        Ok(())
-    }
-
-    fn check_to_be_unarchived_timeline_has_no_archived_parent(
-        timeline: &Arc<Timeline>,
-    ) -> Result<(), TimelineArchivalError> {
-        if let Some(ancestor_timeline) = timeline.ancestor_timeline() {
-            if ancestor_timeline.is_archived() == Some(true) {
-                return Err(TimelineArchivalError::HasArchivedParent(
-                    ancestor_timeline.timeline_id,
-                ));
-            }
-        }
-        Ok(())
-    }
-
-    /// Loads the specified (offloaded) timeline from S3 and attaches it as a loaded timeline
-    async fn unoffload_timeline(
-        self: &Arc<Self>,
-        timeline_id: TimelineId,
-        ctx: RequestContext,
-    ) -> Result<Arc<Timeline>, TimelineArchivalError> {
-        let cancel = self.cancel.clone();
-        let timeline_preload = self
-            .load_timeline_metadata(timeline_id, self.remote_storage.clone(), cancel)
-            .await;
-
-        let index_part = match timeline_preload.index_part {
-            Ok(index_part) => {
-                debug!("remote index part exists for timeline {timeline_id}");
-                index_part
-            }
-            Err(DownloadError::NotFound) => {
-                error!(%timeline_id, "index_part not found on remote");
-                return Err(TimelineArchivalError::NotFound);
-            }
-            Err(e) => {
-                // Some (possibly ephemeral) error happened during index_part download.
-                warn!(%timeline_id, "Failed to load index_part from remote storage, failed creation? ({e})");
-                return Err(TimelineArchivalError::Other(
-                    anyhow::Error::new(e).context("downloading index_part from remote storage"),
-                ));
-            }
-        };
-        let index_part = match index_part {
-            MaybeDeletedIndexPart::IndexPart(index_part) => index_part,
-            MaybeDeletedIndexPart::Deleted(_index_part) => {
-                info!("timeline is deleted according to index_part.json");
-                return Err(TimelineArchivalError::NotFound);
-            }
-        };
-        let remote_metadata = index_part.metadata.clone();
-        let timeline_resources = self.build_timeline_resources(timeline_id);
-        self.load_remote_timeline(
-            timeline_id,
-            index_part,
-            remote_metadata,
-            timeline_resources,
-            &ctx,
-        )
-        .await
-        .with_context(|| {
-            format!(
-                "failed to load remote timeline {} for tenant {}",
-                timeline_id, self.tenant_shard_id
-            )
-        })?;
-        let timelines = self.timelines.lock().unwrap();
-        if let Some(timeline) = timelines.get(&timeline_id) {
-            let mut offloaded_timelines = self.timelines_offloaded.lock().unwrap();
-            if offloaded_timelines.remove(&timeline_id).is_none() {
-                warn!("timeline already removed from offloaded timelines");
-            }
-            Ok(Arc::clone(timeline))
-        } else {
-            warn!("timeline not available directly after attach");
-            Err(TimelineArchivalError::Other(anyhow::anyhow!(
-                "timeline not available directly after attach"
-            )))
-        }
-    }
-
    pub(crate) async fn apply_timeline_archival_config(
-        self: &Arc<Self>,
+        &self,
        timeline_id: TimelineId,
-        new_state: TimelineArchivalState,
-        ctx: RequestContext,
+        state: TimelineArchivalState,
    ) -> Result<(), TimelineArchivalError> {
        info!("setting timeline archival config");
-        // First part: figure out what is needed to do, and do validation
-        let timeline_or_unarchive_offloaded = 'outer: {
+        let timeline = {
            let timelines = self.timelines.lock().unwrap();

            let Some(timeline) = timelines.get(&timeline_id) else {
-                let offloaded_timelines = self.timelines_offloaded.lock().unwrap();
-                let Some(offloaded) = offloaded_timelines.get(&timeline_id) else {
-                    return Err(TimelineArchivalError::NotFound);
-                };
-                if new_state == TimelineArchivalState::Archived {
-                    // It's offloaded already, so nothing to do
-                    return Ok(());
-                }
-                if let Some(ancestor_timeline_id) = offloaded.ancestor_timeline_id {
-                    Self::check_ancestor_of_to_be_unarchived_is_not_archived(
-                        ancestor_timeline_id,
-                        &timelines,
-                        &offloaded_timelines,
-                    )?;
-                }
-                break 'outer None;
+                return Err(TimelineArchivalError::NotFound);
            };

-            // Do some validation. We release the timelines lock below, so there is potential
-            // for race conditions: these checks are more present to prevent misunderstandings of
-            // the API's capabilities, instead of serving as the sole way to defend their invariants.
-            match new_state {
-                TimelineArchivalState::Unarchived => {
-                    Self::check_to_be_unarchived_timeline_has_no_archived_parent(timeline)?
-                }
-                TimelineArchivalState::Archived => {
-                    Self::check_to_be_archived_has_no_unarchived_children(timeline_id, &timelines)?
+            if state == TimelineArchivalState::Unarchived {
+                if let Some(ancestor_timeline) = timeline.ancestor_timeline() {
+                    if ancestor_timeline.is_archived() == Some(true) {
+                        return Err(TimelineArchivalError::HasArchivedParent(
+                            ancestor_timeline.timeline_id,
+                        ));
+                    }
                }
            }
-            Some(Arc::clone(timeline))
+
+            // Ensure that there are no non-archived child timelines
+            let children: Vec<TimelineId> = timelines
+                .iter()
+                .filter_map(|(id, entry)| {
+                    if entry.get_ancestor_timeline_id() != Some(timeline_id) {
+                        return None;
+                    }
+                    if entry.is_archived() == Some(true) {
+                        return None;
+                    }
+                    Some(*id)
+                })
+                .collect();
+
+            if !children.is_empty() && state == TimelineArchivalState::Archived {
+                return Err(TimelineArchivalError::HasUnarchivedChildren(children));
+            }
+            Arc::clone(timeline)
        };

-        // Second part: unarchive timeline (if needed)
-        let timeline = if let Some(timeline) = timeline_or_unarchive_offloaded {
-            timeline
-        } else {
-            // Turn offloaded timeline into a non-offloaded one
-            self.unoffload_timeline(timeline_id, ctx).await?
-        };
-
-        // Third part: upload new timeline archival state and block until it is present in S3
        let upload_needed = timeline
            .remote_client
-            .schedule_index_upload_for_timeline_archival_state(new_state)?;
+            .schedule_index_upload_for_timeline_archival_state(state)?;

        if upload_needed {
            info!("Uploading new state");
@@ -2088,7 +1884,7 @@ impl Tenant {
    ///
    /// Returns whether we have pending compaction task.
    async fn compaction_iteration(
-        self: &Arc<Self>,
+        &self,
        cancel: &CancellationToken,
        ctx: &RequestContext,
    ) -> Result<bool, timeline::CompactionError> {
@@ -2109,28 +1905,21 @@ impl Tenant {
        // while holding the lock. Then drop the lock and actually perform the
        // compactions.  We don't want to block everything else while the
        // compaction runs.
-        let timelines_to_compact_or_offload;
-        {
+        let timelines_to_compact = {
            let timelines = self.timelines.lock().unwrap();
-            timelines_to_compact_or_offload = timelines
+            let timelines_to_compact = timelines
                .iter()
                .filter_map(|(timeline_id, timeline)| {
-                    let (is_active, can_offload) = (timeline.is_active(), timeline.can_offload());
-                    let has_no_unoffloaded_children = {
-                        !timelines
-                            .iter()
-                            .any(|(_id, tl)| tl.get_ancestor_timeline_id() == Some(*timeline_id))
-                    };
-                    let can_offload = can_offload && has_no_unoffloaded_children;
-                    if (is_active, can_offload) == (false, false) {
-                        None
+                    if timeline.is_active() {
+                        Some((*timeline_id, timeline.clone()))
                    } else {
-                        Some((*timeline_id, timeline.clone(), (is_active, can_offload)))
+                        None
                    }
                })
                .collect::<Vec<_>>();
            drop(timelines);
-        }
+            timelines_to_compact
+        };

        // Before doing any I/O work, check our circuit breaker
        if self.compaction_circuit_breaker.lock().unwrap().is_broken() {
@@ -2140,34 +1929,20 @@ impl Tenant {

        let mut has_pending_task = false;

-        for (timeline_id, timeline, (can_compact, can_offload)) in &timelines_to_compact_or_offload
-        {
-            let pending_task_left = if *can_compact {
-                Some(
-                    timeline
-                        .compact(cancel, EnumSet::empty(), ctx)
-                        .instrument(info_span!("compact_timeline", %timeline_id))
-                        .await
-                        .inspect_err(|e| match e {
-                            timeline::CompactionError::ShuttingDown => (),
-                            timeline::CompactionError::Other(e) => {
-                                self.compaction_circuit_breaker
-                                    .lock()
-                                    .unwrap()
-                                    .fail(&CIRCUIT_BREAKERS_BROKEN, e);
-                            }
-                        })?,
-                )
-            } else {
-                None
-            };
-            has_pending_task |= pending_task_left.unwrap_or(false);
-            if pending_task_left == Some(false) && *can_offload {
-                offload_timeline(self, timeline)
-                    .instrument(info_span!("offload_timeline", %timeline_id))
-                    .await
-                    .map_err(timeline::CompactionError::Other)?;
-            }
+        for (timeline_id, timeline) in &timelines_to_compact {
+            has_pending_task |= timeline
+                .compact(cancel, EnumSet::empty(), ctx)
+                .instrument(info_span!("compact_timeline", %timeline_id))
+                .await
+                .inspect_err(|e| match e {
+                    timeline::CompactionError::ShuttingDown => (),
+                    timeline::CompactionError::Other(e) => {
+                        self.compaction_circuit_breaker
+                            .lock()
+                            .unwrap()
+                            .fail(&CIRCUIT_BREAKERS_BROKEN, e);
+                    }
+                })?;
        }

        self.compaction_circuit_breaker
@@ -3077,7 +2852,6 @@ impl Tenant {
            constructed_at: Instant::now(),
            timelines: Mutex::new(HashMap::new()),
            timelines_creating: Mutex::new(HashSet::new()),
-            timelines_offloaded: Mutex::new(HashMap::new()),
            gc_cs: tokio::sync::Mutex::new(()),
            walredo_mgr,
            remote_storage,
--- a/pageserver/src/tenant/ephemeral_file.rs
+++ b/pageserver/src/tenant/ephemeral_file.rs
@@ -84,7 +84,7 @@ impl Drop for EphemeralFile {
    fn drop(&mut self) {
        // unlink the file
        // we are clear to do this, because we have entered a gate
-        let path = self.buffered_writer.as_inner().as_inner().path();
+        let path = &self.buffered_writer.as_inner().as_inner().path;
        let res = std::fs::remove_file(path);
        if let Err(e) = res {
            if e.kind() != std::io::ErrorKind::NotFound {
@@ -356,7 +356,7 @@ mod tests {
        }

        let file_contents =
-            std::fs::read(file.buffered_writer.as_inner().as_inner().path()).unwrap();
+            std::fs::read(&file.buffered_writer.as_inner().as_inner().path).unwrap();
        assert_eq!(file_contents, &content[0..cap]);

        let buffer_contents = file.buffered_writer.inspect_buffer();
@@ -392,7 +392,7 @@ mod tests {
            .buffered_writer
            .as_inner()
            .as_inner()
-            .path()
+            .path
            .metadata()
            .unwrap();
        assert_eq!(
--- a/pageserver/src/tenant/gc_block.rs
+++ b/pageserver/src/tenant/gc_block.rs
@@ -141,14 +141,14 @@ impl GcBlock {
        Ok(())
    }

-    pub(crate) fn before_delete(&self, timeline_id: &super::TimelineId) {
+    pub(crate) fn before_delete(&self, timeline: &super::Timeline) {
        let unblocked = {
            let mut g = self.reasons.lock().unwrap();
            if g.is_empty() {
                return;
            }

-            g.remove(timeline_id);
+            g.remove(&timeline.timeline_id);

            BlockingReasons::clean_and_summarize(g).is_none()
        };
--- a/pageserver/src/tenant/remote_timeline_client/download.rs
+++ b/pageserver/src/tenant/remote_timeline_client/download.rs
@@ -27,7 +27,7 @@ use crate::tenant::Generation;
 use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
 use crate::virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile};
 use crate::TEMP_FILE_SUFFIX;
-use remote_storage::{DownloadError, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath};
+use remote_storage::{DownloadError, GenericRemoteStorage, ListingMode, RemotePath};
 use utils::crashsafe::path_with_suffix_extension;
 use utils::id::{TenantId, TimelineId};
 use utils::pausable_failpoint;
@@ -153,9 +153,7 @@ async fn download_object<'a>(
                    .with_context(|| format!("create a destination file for layer '{dst_path}'"))
                    .map_err(DownloadError::Other)?;

-                let download = storage
-                    .download(src_path, &DownloadOpts::default(), cancel)
-                    .await?;
+                let download = storage.download(src_path, cancel).await?;

                pausable_failpoint!("before-downloading-layer-stream-pausable");

@@ -206,9 +204,7 @@ async fn download_object<'a>(
                    .with_context(|| format!("create a destination file for layer '{dst_path}'"))
                    .map_err(DownloadError::Other)?;

-                let mut download = storage
-                    .download(src_path, &DownloadOpts::default(), cancel)
-                    .await?;
+                let mut download = storage.download(src_path, cancel).await?;

                pausable_failpoint!("before-downloading-layer-stream-pausable");

@@ -348,9 +344,7 @@ async fn do_download_index_part(

    let index_part_bytes = download_retry_forever(
        || async {
-            let download = storage
-                .download(&remote_path, &DownloadOpts::default(), cancel)
-                .await?;
+            let download = storage.download(&remote_path, cancel).await?;

            let mut bytes = Vec::new();

@@ -532,15 +526,10 @@ pub(crate) async fn download_initdb_tar_zst(
                .with_context(|| format!("tempfile creation {temp_path}"))
                .map_err(DownloadError::Other)?;

-            let download = match storage
-                .download(&remote_path, &DownloadOpts::default(), cancel)
-                .await
-            {
+            let download = match storage.download(&remote_path, cancel).await {
                Ok(dl) => dl,
                Err(DownloadError::NotFound) => {
-                    storage
-                        .download(&remote_preserved_path, &DownloadOpts::default(), cancel)
-                        .await?
+                    storage.download(&remote_preserved_path, cancel).await?
                }
                Err(other) => Err(other)?,
            };
--- a/pageserver/src/tenant/secondary/downloader.rs
+++ b/pageserver/src/tenant/secondary/downloader.rs
@@ -49,7 +49,7 @@ use futures::Future;
 use metrics::UIntGauge;
 use pageserver_api::models::SecondaryProgress;
 use pageserver_api::shard::TenantShardId;
-use remote_storage::{DownloadError, DownloadOpts, Etag, GenericRemoteStorage};
+use remote_storage::{DownloadError, Etag, GenericRemoteStorage};

 use tokio_util::sync::CancellationToken;
 use tracing::{info_span, instrument, warn, Instrument};
@@ -944,35 +944,36 @@ impl<'a> TenantDownloader<'a> {
    ) -> Result<HeatMapDownload, UpdateError> {
        debug_assert_current_span_has_tenant_id();
        let tenant_shard_id = self.secondary_state.get_tenant_shard_id();
+        // TODO: pull up etag check into the request, to do a conditional GET rather than
+        // issuing a GET and then maybe ignoring the response body
+        // (https://github.com/neondatabase/neon/issues/6199)
        tracing::debug!("Downloading heatmap for secondary tenant",);

        let heatmap_path = remote_heatmap_path(tenant_shard_id);
        let cancel = &self.secondary_state.cancel;
-        let opts = DownloadOpts {
-            etag: prev_etag.cloned(),
-            ..Default::default()
-        };

        backoff::retry(
            || async {
-                let download = match self
+                let download = self
                    .remote_storage
-                    .download(&heatmap_path, &opts, cancel)
+                    .download(&heatmap_path, cancel)
                    .await
-                {
-                    Ok(download) => download,
-                    Err(DownloadError::Unmodified) => return Ok(HeatMapDownload::Unmodified),
-                    Err(err) => return Err(err.into()),
-                };
+                    .map_err(UpdateError::from)?;

-                let mut heatmap_bytes = Vec::new();
-                let mut body = tokio_util::io::StreamReader::new(download.download_stream);
-                let _size = tokio::io::copy_buf(&mut body, &mut heatmap_bytes).await?;
-                Ok(HeatMapDownload::Modified(HeatMapModified {
-                    etag: download.etag,
-                    last_modified: download.last_modified,
-                    bytes: heatmap_bytes,
-                }))
+                SECONDARY_MODE.download_heatmap.inc();
+
+                if Some(&download.etag) == prev_etag {
+                    Ok(HeatMapDownload::Unmodified)
+                } else {
+                    let mut heatmap_bytes = Vec::new();
+                    let mut body = tokio_util::io::StreamReader::new(download.download_stream);
+                    let _size = tokio::io::copy_buf(&mut body, &mut heatmap_bytes).await?;
+                    Ok(HeatMapDownload::Modified(HeatMapModified {
+                        etag: download.etag,
+                        last_modified: download.last_modified,
+                        bytes: heatmap_bytes,
+                    }))
+                }
            },
            |e| matches!(e, UpdateError::NoData | UpdateError::Cancelled),
            FAILED_DOWNLOAD_WARN_THRESHOLD,
@@ -983,7 +984,6 @@ impl<'a> TenantDownloader<'a> {
        .await
        .ok_or_else(|| UpdateError::Cancelled)
        .and_then(|x| x)
-        .inspect(|_| SECONDARY_MODE.download_heatmap.inc())
    }

    /// Download heatmap layers that are not present on local disk, or update their
--- a/pageserver/src/tenant/storage_layer/delta_layer.rs
+++ b/pageserver/src/tenant/storage_layer/delta_layer.rs
@@ -53,7 +53,6 @@ use camino::{Utf8Path, Utf8PathBuf};
 use futures::StreamExt;
 use itertools::Itertools;
 use pageserver_api::config::MaxVectoredReadBytes;
-use pageserver_api::key::DBDIR_KEY;
 use pageserver_api::keyspace::KeySpace;
 use pageserver_api::models::ImageCompressionAlgorithm;
 use pageserver_api::shard::TenantShardId;
@@ -573,7 +572,7 @@ impl DeltaLayerWriterInner {
        ensure!(
            metadata.len() <= S3_UPLOAD_LIMIT,
            "Created delta layer file at {} of size {} above limit {S3_UPLOAD_LIMIT}!",
-            file.path(),
+            file.path,
            metadata.len()
        );

@@ -791,7 +790,7 @@ impl DeltaLayerInner {
        max_vectored_read_bytes: Option<MaxVectoredReadBytes>,
        ctx: &RequestContext,
    ) -> anyhow::Result<Self> {
-        let file = VirtualFile::open_v2(path, ctx)
+        let file = VirtualFile::open(path, ctx)
            .await
            .context("open layer file")?;

@@ -964,25 +963,14 @@ impl DeltaLayerInner {
                .blobs_at
                .as_slice()
                .iter()
-                .filter_map(|(_, blob_meta)| {
-                    if blob_meta.key.is_rel_dir_key() || blob_meta.key == DBDIR_KEY {
-                        // The size of values for these keys is unbounded and can
-                        // grow very large in pathological cases.
-                        None
-                    } else {
-                        Some(format!("{}@{}", blob_meta.key, blob_meta.lsn))
-                    }
-                })
+                .map(|(_, blob_meta)| format!("{}@{}", blob_meta.key, blob_meta.lsn))
                .join(", ");
-
-            if !offenders.is_empty() {
-                tracing::warn!(
-                    "Oversized vectored read ({} > {}) for keys {}",
-                    largest_read_size,
-                    read_size_soft_max,
-                    offenders
-                );
-            }
+            tracing::warn!(
+                "Oversized vectored read ({} > {}) for keys {}",
+                largest_read_size,
+                read_size_soft_max,
+                offenders
+            );
        }

        largest_read_size
@@ -1022,7 +1010,7 @@ impl DeltaLayerInner {
                            blob_meta.key,
                            PageReconstructError::Other(anyhow!(
                                "Failed to read blobs from virtual file {}: {}",
-                                self.file.path(),
+                                self.file.path,
                                kind
                            )),
                        );
@@ -1048,7 +1036,7 @@ impl DeltaLayerInner {
                            meta.meta.key,
                            PageReconstructError::Other(anyhow!(e).context(format!(
                                "Failed to decompress blob from virtual file {}",
-                                self.file.path(),
+                                self.file.path,
                            ))),
                        );

@@ -1066,7 +1054,7 @@ impl DeltaLayerInner {
                            meta.meta.key,
                            PageReconstructError::Other(anyhow!(e).context(format!(
                                "Failed to deserialize blob from virtual file {}",
-                                self.file.path(),
+                                self.file.path,
                            ))),
                        );

@@ -1198,6 +1186,7 @@ impl DeltaLayerInner {
        let mut prev: Option<(Key, Lsn, BlobRef)> = None;

        let mut read_builder: Option<ChunkedVectoredReadBuilder> = None;
+        let align = virtual_file::get_io_buffer_alignment();

        let max_read_size = self
            .max_vectored_read_bytes
@@ -1246,6 +1235,7 @@ impl DeltaLayerInner {
                        offsets.end.pos(),
                        meta,
                        max_read_size,
+                        align,
                    ))
                }
            } else {
--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -49,7 +49,6 @@ use camino::{Utf8Path, Utf8PathBuf};
 use hex;
 use itertools::Itertools;
 use pageserver_api::config::MaxVectoredReadBytes;
-use pageserver_api::key::DBDIR_KEY;
 use pageserver_api::keyspace::KeySpace;
 use pageserver_api::shard::{ShardIdentity, TenantShardId};
 use rand::{distributions::Alphanumeric, Rng};
@@ -389,7 +388,7 @@ impl ImageLayerInner {
        max_vectored_read_bytes: Option<MaxVectoredReadBytes>,
        ctx: &RequestContext,
    ) -> anyhow::Result<Self> {
-        let file = VirtualFile::open_v2(path, ctx)
+        let file = VirtualFile::open(path, ctx)
            .await
            .context("open layer file")?;
        let file_id = page_cache::next_file_id();
@@ -588,25 +587,14 @@ impl ImageLayerInner {
                    .blobs_at
                    .as_slice()
                    .iter()
-                    .filter_map(|(_, blob_meta)| {
-                        if blob_meta.key.is_rel_dir_key() || blob_meta.key == DBDIR_KEY {
-                            // The size of values for these keys is unbounded and can
-                            // grow very large in pathological cases.
-                            None
-                        } else {
-                            Some(format!("{}@{}", blob_meta.key, blob_meta.lsn))
-                        }
-                    })
+                    .map(|(_, blob_meta)| format!("{}@{}", blob_meta.key, blob_meta.lsn))
                    .join(", ");
-
-                if !offenders.is_empty() {
-                    tracing::warn!(
-                        "Oversized vectored read ({} > {}) for keys {}",
-                        buf_size,
-                        max_vectored_read_bytes,
-                        offenders
-                    );
-                }
+                tracing::warn!(
+                    "Oversized vectored read ({} > {}) for keys {}",
+                    buf_size,
+                    max_vectored_read_bytes,
+                    offenders
+                );
            }

            let buf = BytesMut::with_capacity(buf_size);
@@ -626,7 +614,7 @@ impl ImageLayerInner {
                                    meta.meta.key,
                                    PageReconstructError::Other(anyhow!(e).context(format!(
                                        "Failed to decompress blob from virtual file {}",
-                                        self.file.path(),
+                                        self.file.path,
                                    ))),
                                );

@@ -647,7 +635,7 @@ impl ImageLayerInner {
                            blob_meta.key,
                            PageReconstructError::from(anyhow!(
                                "Failed to read blobs from virtual file {}: {}",
-                                self.file.path(),
+                                self.file.path,
                                kind
                            )),
                        );
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -7,7 +7,6 @@ pub(crate) mod handle;
 mod init;
 pub mod layer_manager;
 pub(crate) mod logical_size;
-pub mod offload;
 pub mod span;
 pub mod uninit;
 mod walreceiver;
@@ -1557,17 +1556,6 @@ impl Timeline {
        }
    }

-    /// Checks if the internal state of the timeline is consistent with it being able to be offloaded.
-    /// This is neccessary but not sufficient for offloading of the timeline as it might have
-    /// child timelines that are not offloaded yet.
-    pub(crate) fn can_offload(&self) -> bool {
-        if self.remote_client.is_archived() != Some(true) {
-            return false;
-        }
-
-        true
-    }
-
    /// Outermost timeline compaction operation; downloads needed layers. Returns whether we have pending
    /// compaction tasks.
    pub(crate) async fn compact(
@@ -1830,6 +1818,7 @@ impl Timeline {
        self.current_state() == TimelineState::Active
    }

+    #[allow(unused)]
    pub(crate) fn is_archived(&self) -> Option<bool> {
        self.remote_client.is_archived()
    }
--- a/pageserver/src/tenant/timeline/delete.rs
+++ b/pageserver/src/tenant/timeline/delete.rs
@@ -15,7 +15,7 @@ use crate::{
    tenant::{
        metadata::TimelineMetadata,
        remote_timeline_client::{PersistIndexPartWithDeletedFlagError, RemoteTimelineClient},
-        CreateTimelineCause, DeleteTimelineError, Tenant, TimelineOrOffloaded,
+        CreateTimelineCause, DeleteTimelineError, Tenant,
    },
 };

@@ -24,14 +24,12 @@ use super::{Timeline, TimelineResources};
 /// Mark timeline as deleted in S3 so we won't pick it up next time
 /// during attach or pageserver restart.
 /// See comment in persist_index_part_with_deleted_flag.
-async fn set_deleted_in_remote_index(
-    timeline: &TimelineOrOffloaded,
-) -> Result<(), DeleteTimelineError> {
-    let res = timeline
-        .remote_client()
+async fn set_deleted_in_remote_index(timeline: &Timeline) -> Result<(), DeleteTimelineError> {
+    match timeline
+        .remote_client
        .persist_index_part_with_deleted_flag()
-        .await;
-    match res {
+        .await
+    {
        // If we (now, or already) marked it successfully as deleted, we can proceed
        Ok(()) | Err(PersistIndexPartWithDeletedFlagError::AlreadyDeleted(_)) => (),
        // Bail out otherwise
@@ -129,9 +127,9 @@ pub(super) async fn delete_local_timeline_directory(
 }

 /// Removes remote layers and an index file after them.
-async fn delete_remote_layers_and_index(timeline: &TimelineOrOffloaded) -> anyhow::Result<()> {
+async fn delete_remote_layers_and_index(timeline: &Timeline) -> anyhow::Result<()> {
    timeline
-        .remote_client()
+        .remote_client
        .delete_all()
        .await
        .context("delete_all")
@@ -139,41 +137,27 @@ async fn delete_remote_layers_and_index(timeline: &TimelineOrOffloaded) -> anyho

 /// It is important that this gets called when DeletionGuard is being held.
 /// For more context see comments in [`DeleteTimelineFlow::prepare`]
-async fn remove_maybe_offloaded_timeline_from_tenant(
+async fn remove_timeline_from_tenant(
    tenant: &Tenant,
-    timeline: &TimelineOrOffloaded,
+    timeline: &Timeline,
    _: &DeletionGuard, // using it as a witness
 ) -> anyhow::Result<()> {
    // Remove the timeline from the map.
-    // This observes the locking order between timelines and timelines_offloaded
    let mut timelines = tenant.timelines.lock().unwrap();
-    let mut timelines_offloaded = tenant.timelines_offloaded.lock().unwrap();
-    let offloaded_children_exist = timelines_offloaded
-        .iter()
-        .any(|(_, entry)| entry.ancestor_timeline_id == Some(timeline.timeline_id()));
    let children_exist = timelines
        .iter()
-        .any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline.timeline_id()));
-    // XXX this can happen because of race conditions with branch creation.
-    // We already deleted the remote layer files, so it's probably best to panic.
-    if children_exist || offloaded_children_exist {
+        .any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline.timeline_id));
+    // XXX this can happen because `branch_timeline` doesn't check `TimelineState::Stopping`.
+    // We already deleted the layer files, so it's probably best to panic.
+    // (Ideally, above remove_dir_all is atomic so we don't see this timeline after a restart)
+    if children_exist {
        panic!("Timeline grew children while we removed layer files");
    }

-    match timeline {
-        TimelineOrOffloaded::Timeline(timeline) => {
-            timelines.remove(&timeline.timeline_id).expect(
-                "timeline that we were deleting was concurrently removed from 'timelines' map",
-            );
-        }
-        TimelineOrOffloaded::Offloaded(timeline) => {
-            timelines_offloaded
-                .remove(&timeline.timeline_id)
-                .expect("timeline that we were deleting was concurrently removed from 'timelines_offloaded' map");
-        }
-    }
+    timelines
+        .remove(&timeline.timeline_id)
+        .expect("timeline that we were deleting was concurrently removed from 'timelines' map");

-    drop(timelines_offloaded);
    drop(timelines);

    Ok(())
@@ -223,11 +207,9 @@ impl DeleteTimelineFlow {
        guard.mark_in_progress()?;

        // Now that the Timeline is in Stopping state, request all the related tasks to shut down.
-        if let TimelineOrOffloaded::Timeline(timeline) = &timeline {
-            timeline.shutdown(super::ShutdownMode::Hard).await;
-        }
+        timeline.shutdown(super::ShutdownMode::Hard).await;

-        tenant.gc_block.before_delete(&timeline.timeline_id());
+        tenant.gc_block.before_delete(&timeline);

        fail::fail_point!("timeline-delete-before-index-deleted-at", |_| {
            Err(anyhow::anyhow!(
@@ -303,16 +285,15 @@ impl DeleteTimelineFlow {

        guard.mark_in_progress()?;

-        let timeline = TimelineOrOffloaded::Timeline(timeline);
        Self::schedule_background(guard, tenant.conf, tenant, timeline);

        Ok(())
    }

-    pub(super) fn prepare(
+    fn prepare(
        tenant: &Tenant,
        timeline_id: TimelineId,
-    ) -> Result<(TimelineOrOffloaded, DeletionGuard), DeleteTimelineError> {
+    ) -> Result<(Arc<Timeline>, DeletionGuard), DeleteTimelineError> {
        // Note the interaction between this guard and deletion guard.
        // Here we attempt to lock deletion guard when we're holding a lock on timelines.
        // This is important because when you take into account `remove_timeline_from_tenant`
@@ -326,14 +307,8 @@ impl DeleteTimelineFlow {
        let timelines = tenant.timelines.lock().unwrap();

        let timeline = match timelines.get(&timeline_id) {
-            Some(t) => TimelineOrOffloaded::Timeline(Arc::clone(t)),
-            None => {
-                let offloaded_timelines = tenant.timelines_offloaded.lock().unwrap();
-                match offloaded_timelines.get(&timeline_id) {
-                    Some(t) => TimelineOrOffloaded::Offloaded(Arc::clone(t)),
-                    None => return Err(DeleteTimelineError::NotFound),
-                }
-            }
+            Some(t) => t,
+            None => return Err(DeleteTimelineError::NotFound),
        };

        // Ensure that there are no child timelines **attached to that pageserver**,
@@ -359,32 +334,30 @@ impl DeleteTimelineFlow {
        // to remove the timeline from it.
        // Always if you have two locks that are taken in different order this can result in a deadlock.

-        let delete_progress = Arc::clone(timeline.delete_progress());
+        let delete_progress = Arc::clone(&timeline.delete_progress);
        let delete_lock_guard = match delete_progress.try_lock_owned() {
            Ok(guard) => DeletionGuard(guard),
            Err(_) => {
                // Unfortunately if lock fails arc is consumed.
                return Err(DeleteTimelineError::AlreadyInProgress(Arc::clone(
-                    timeline.delete_progress(),
+                    &timeline.delete_progress,
                )));
            }
        };

-        if let TimelineOrOffloaded::Timeline(timeline) = &timeline {
-            timeline.set_state(TimelineState::Stopping);
-        }
+        timeline.set_state(TimelineState::Stopping);

-        Ok((timeline, delete_lock_guard))
+        Ok((Arc::clone(timeline), delete_lock_guard))
    }

    fn schedule_background(
        guard: DeletionGuard,
        conf: &'static PageServerConf,
        tenant: Arc<Tenant>,
-        timeline: TimelineOrOffloaded,
+        timeline: Arc<Timeline>,
    ) {
-        let tenant_shard_id = timeline.tenant_shard_id();
-        let timeline_id = timeline.timeline_id();
+        let tenant_shard_id = timeline.tenant_shard_id;
+        let timeline_id = timeline.timeline_id;

        task_mgr::spawn(
            task_mgr::BACKGROUND_RUNTIME.handle(),
@@ -395,9 +368,7 @@ impl DeleteTimelineFlow {
            async move {
                if let Err(err) = Self::background(guard, conf, &tenant, &timeline).await {
                    error!("Error: {err:#}");
-                    if let TimelineOrOffloaded::Timeline(timeline) = timeline {
-                        timeline.set_broken(format!("{err:#}"))
-                    }
+                    timeline.set_broken(format!("{err:#}"))
                };
                Ok(())
            }
@@ -409,19 +380,15 @@ impl DeleteTimelineFlow {
        mut guard: DeletionGuard,
        conf: &PageServerConf,
        tenant: &Tenant,
-        timeline: &TimelineOrOffloaded,
+        timeline: &Timeline,
    ) -> Result<(), DeleteTimelineError> {
-        // Offloaded timelines have no local state
-        // TODO: once we persist offloaded information, delete the timeline from there, too
-        if let TimelineOrOffloaded::Timeline(timeline) = timeline {
-            delete_local_timeline_directory(conf, tenant.tenant_shard_id, timeline).await?;
-        }
+        delete_local_timeline_directory(conf, tenant.tenant_shard_id, timeline).await?;

        delete_remote_layers_and_index(timeline).await?;

        pausable_failpoint!("in_progress_delete");

-        remove_maybe_offloaded_timeline_from_tenant(tenant, timeline, &guard).await?;
+        remove_timeline_from_tenant(tenant, timeline, &guard).await?;

        *guard = Self::Finished;

@@ -433,7 +400,7 @@ impl DeleteTimelineFlow {
    }
 }

-pub(super) struct DeletionGuard(OwnedMutexGuard<DeleteTimelineFlow>);
+struct DeletionGuard(OwnedMutexGuard<DeleteTimelineFlow>);

 impl Deref for DeletionGuard {
    type Target = DeleteTimelineFlow;
--- a/pageserver/src/tenant/timeline/offload.rs
+++ b/pageserver/src/tenant/timeline/offload.rs
@@ -1,69 +0,0 @@
-use std::sync::Arc;
-
-use crate::tenant::{OffloadedTimeline, Tenant, TimelineOrOffloaded};
-
-use super::{
-    delete::{delete_local_timeline_directory, DeleteTimelineFlow, DeletionGuard},
-    Timeline,
-};
-
-pub(crate) async fn offload_timeline(
-    tenant: &Tenant,
-    timeline: &Arc<Timeline>,
-) -> anyhow::Result<()> {
-    tracing::info!("offloading archived timeline");
-    let (timeline, guard) = DeleteTimelineFlow::prepare(tenant, timeline.timeline_id)?;
-
-    let TimelineOrOffloaded::Timeline(timeline) = timeline else {
-        tracing::error!("timeline already offloaded, but given timeline object");
-        return Ok(());
-    };
-
-    // TODO extend guard mechanism above with method
-    // to make deletions possible while offloading is in progress
-
-    // TODO mark timeline as offloaded in S3
-
-    let conf = &tenant.conf;
-    delete_local_timeline_directory(conf, tenant.tenant_shard_id, &timeline).await?;
-
-    remove_timeline_from_tenant(tenant, &timeline, &guard).await?;
-
-    {
-        let mut offloaded_timelines = tenant.timelines_offloaded.lock().unwrap();
-        offloaded_timelines.insert(
-            timeline.timeline_id,
-            Arc::new(OffloadedTimeline::from_timeline(&timeline)),
-        );
-    }
-
-    Ok(())
-}
-
-/// It is important that this gets called when DeletionGuard is being held.
-/// For more context see comments in [`DeleteTimelineFlow::prepare`]
-async fn remove_timeline_from_tenant(
-    tenant: &Tenant,
-    timeline: &Timeline,
-    _: &DeletionGuard, // using it as a witness
-) -> anyhow::Result<()> {
-    // Remove the timeline from the map.
-    let mut timelines = tenant.timelines.lock().unwrap();
-    let children_exist = timelines
-        .iter()
-        .any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline.timeline_id));
-    // XXX this can happen because `branch_timeline` doesn't check `TimelineState::Stopping`.
-    // We already deleted the layer files, so it's probably best to panic.
-    // (Ideally, above remove_dir_all is atomic so we don't see this timeline after a restart)
-    if children_exist {
-        panic!("Timeline grew children while we removed layer files");
-    }
-
-    timelines
-        .remove(&timeline.timeline_id)
-        .expect("timeline that we were deleting was concurrently removed from 'timelines' map");
-
-    drop(timelines);
-
-    Ok(())
-}
--- a/pageserver/src/tenant/vectored_blob_io.rs
+++ b/pageserver/src/tenant/vectored_blob_io.rs
@@ -194,6 +194,8 @@ pub(crate) struct ChunkedVectoredReadBuilder {
    /// Start offset and metadata for each blob in this read
    blobs_at: VecMap<u64, BlobMeta>,
    max_read_size: Option<usize>,
+    /// Chunk size reads are coalesced into.
+    chunk_size: usize,
 }

 /// Computes x / d rounded up.
@@ -202,7 +204,6 @@ fn div_round_up(x: usize, d: usize) -> usize {
 }

 impl ChunkedVectoredReadBuilder {
-    const CHUNK_SIZE: usize = virtual_file::get_io_buffer_alignment();
    /// Start building a new vectored read.
    ///
    /// Note that by design, this does not check against reading more than `max_read_size` to
@@ -213,19 +214,21 @@ impl ChunkedVectoredReadBuilder {
        end_offset: u64,
        meta: BlobMeta,
        max_read_size: Option<usize>,
+        chunk_size: usize,
    ) -> Self {
        let mut blobs_at = VecMap::default();
        blobs_at
            .append(start_offset, meta)
            .expect("First insertion always succeeds");

-        let start_blk_no = start_offset as usize / Self::CHUNK_SIZE;
-        let end_blk_no = div_round_up(end_offset as usize, Self::CHUNK_SIZE);
+        let start_blk_no = start_offset as usize / chunk_size;
+        let end_blk_no = div_round_up(end_offset as usize, chunk_size);
        Self {
            start_blk_no,
            end_blk_no,
            blobs_at,
            max_read_size,
+            chunk_size,
        }
    }

@@ -234,12 +237,18 @@ impl ChunkedVectoredReadBuilder {
        end_offset: u64,
        meta: BlobMeta,
        max_read_size: usize,
+        align: usize,
    ) -> Self {
-        Self::new_impl(start_offset, end_offset, meta, Some(max_read_size))
+        Self::new_impl(start_offset, end_offset, meta, Some(max_read_size), align)
    }

-    pub(crate) fn new_streaming(start_offset: u64, end_offset: u64, meta: BlobMeta) -> Self {
-        Self::new_impl(start_offset, end_offset, meta, None)
+    pub(crate) fn new_streaming(
+        start_offset: u64,
+        end_offset: u64,
+        meta: BlobMeta,
+        align: usize,
+    ) -> Self {
+        Self::new_impl(start_offset, end_offset, meta, None, align)
    }

    /// Attempts to extend the current read with a new blob if the new blob resides in the same or the immediate next chunk.
@@ -247,12 +256,12 @@ impl ChunkedVectoredReadBuilder {
    /// The resulting size also must be below the max read size.
    pub(crate) fn extend(&mut self, start: u64, end: u64, meta: BlobMeta) -> VectoredReadExtended {
        tracing::trace!(start, end, "trying to extend");
-        let start_blk_no = start as usize / Self::CHUNK_SIZE;
-        let end_blk_no = div_round_up(end as usize, Self::CHUNK_SIZE);
+        let start_blk_no = start as usize / self.chunk_size;
+        let end_blk_no = div_round_up(end as usize, self.chunk_size);

        let not_limited_by_max_read_size = {
            if let Some(max_read_size) = self.max_read_size {
-                let coalesced_size = (end_blk_no - self.start_blk_no) * Self::CHUNK_SIZE;
+                let coalesced_size = (end_blk_no - self.start_blk_no) * self.chunk_size;
                coalesced_size <= max_read_size
            } else {
                true
@@ -283,12 +292,12 @@ impl ChunkedVectoredReadBuilder {
    }

    pub(crate) fn size(&self) -> usize {
-        (self.end_blk_no - self.start_blk_no) * Self::CHUNK_SIZE
+        (self.end_blk_no - self.start_blk_no) * self.chunk_size
    }

    pub(crate) fn build(self) -> VectoredRead {
-        let start = (self.start_blk_no * Self::CHUNK_SIZE) as u64;
-        let end = (self.end_blk_no * Self::CHUNK_SIZE) as u64;
+        let start = (self.start_blk_no * self.chunk_size) as u64;
+        let end = (self.end_blk_no * self.chunk_size) as u64;
        VectoredRead {
            start,
            end,
@@ -319,14 +328,18 @@ pub struct VectoredReadPlanner {
    prev: Option<(Key, Lsn, u64, BlobFlag)>,

    max_read_size: usize,
+
+    align: usize,
 }

 impl VectoredReadPlanner {
    pub fn new(max_read_size: usize) -> Self {
+        let align = virtual_file::get_io_buffer_alignment();
        Self {
            blobs: BTreeMap::new(),
            prev: None,
            max_read_size,
+            align,
        }
    }

@@ -405,6 +418,7 @@ impl VectoredReadPlanner {
                        end_offset,
                        BlobMeta { key, lsn },
                        self.max_read_size,
+                        self.align,
                    );

                    let prev_read_builder = current_read_builder.replace(next_read_builder);
@@ -458,13 +472,13 @@ impl<'a> VectoredBlobReader<'a> {
        );

        if cfg!(debug_assertions) {
-            const ALIGN: u64 = virtual_file::get_io_buffer_alignment() as u64;
+            let align = virtual_file::get_io_buffer_alignment() as u64;
            debug_assert_eq!(
-                read.start % ALIGN,
+                read.start % align,
                0,
                "Read start at {} does not satisfy the required io buffer alignment ({} bytes)",
                read.start,
-                ALIGN
+                align
            );
        }

@@ -539,18 +553,22 @@ pub struct StreamingVectoredReadPlanner {
    max_cnt: usize,
    /// Size of the current batch
    cnt: usize,
+
+    align: usize,
 }

 impl StreamingVectoredReadPlanner {
    pub fn new(max_read_size: u64, max_cnt: usize) -> Self {
        assert!(max_cnt > 0);
        assert!(max_read_size > 0);
+        let align = virtual_file::get_io_buffer_alignment();
        Self {
            read_builder: None,
            prev: None,
            max_cnt,
            max_read_size,
            cnt: 0,
+            align,
        }
    }

@@ -603,6 +621,7 @@ impl StreamingVectoredReadPlanner {
                        start_offset,
                        end_offset,
                        BlobMeta { key, lsn },
+                        self.align,
                    ))
                };
            }
@@ -637,9 +656,9 @@ mod tests {
    use super::*;

    fn validate_read(read: &VectoredRead, offset_range: &[(Key, Lsn, u64, BlobFlag)]) {
-        const ALIGN: u64 = virtual_file::get_io_buffer_alignment() as u64;
-        assert_eq!(read.start % ALIGN, 0);
-        assert_eq!(read.start / ALIGN, offset_range.first().unwrap().2 / ALIGN);
+        let align = virtual_file::get_io_buffer_alignment() as u64;
+        assert_eq!(read.start % align, 0);
+        assert_eq!(read.start / align, offset_range.first().unwrap().2 / align);

        let expected_offsets_in_read: Vec<_> = offset_range.iter().map(|o| o.2).collect();

@@ -657,27 +676,32 @@ mod tests {
    fn planner_chunked_coalesce_all_test() {
        use crate::virtual_file;

-        const CHUNK_SIZE: u64 = virtual_file::get_io_buffer_alignment() as u64;
+        let chunk_size = virtual_file::get_io_buffer_alignment() as u64;

-        let max_read_size = CHUNK_SIZE as usize * 8;
+        // The test explicitly does not check chunk size < 512
+        if chunk_size < 512 {
+            return;
+        }
+
+        let max_read_size = chunk_size as usize * 8;
        let key = Key::MIN;
        let lsn = Lsn(0);

        let blob_descriptions = [
-            (key, lsn, CHUNK_SIZE / 8, BlobFlag::None), // Read 1 BEGIN
-            (key, lsn, CHUNK_SIZE / 4, BlobFlag::Ignore), // Gap
-            (key, lsn, CHUNK_SIZE / 2, BlobFlag::None),
-            (key, lsn, CHUNK_SIZE - 2, BlobFlag::Ignore), // Gap
-            (key, lsn, CHUNK_SIZE, BlobFlag::None),
-            (key, lsn, CHUNK_SIZE * 2 - 1, BlobFlag::None),
-            (key, lsn, CHUNK_SIZE * 2 + 1, BlobFlag::Ignore), // Gap
-            (key, lsn, CHUNK_SIZE * 3 + 1, BlobFlag::None),
-            (key, lsn, CHUNK_SIZE * 5 + 1, BlobFlag::None),
-            (key, lsn, CHUNK_SIZE * 6 + 1, BlobFlag::Ignore), // skipped chunk size, but not a chunk: should coalesce.
-            (key, lsn, CHUNK_SIZE * 7 + 1, BlobFlag::None),
-            (key, lsn, CHUNK_SIZE * 8, BlobFlag::None), // Read 2 BEGIN (b/c max_read_size)
-            (key, lsn, CHUNK_SIZE * 9, BlobFlag::Ignore), // ==== skipped a chunk
-            (key, lsn, CHUNK_SIZE * 10, BlobFlag::None), // Read 3 BEGIN (cannot coalesce)
+            (key, lsn, chunk_size / 8, BlobFlag::None), // Read 1 BEGIN
+            (key, lsn, chunk_size / 4, BlobFlag::Ignore), // Gap
+            (key, lsn, chunk_size / 2, BlobFlag::None),
+            (key, lsn, chunk_size - 2, BlobFlag::Ignore), // Gap
+            (key, lsn, chunk_size, BlobFlag::None),
+            (key, lsn, chunk_size * 2 - 1, BlobFlag::None),
+            (key, lsn, chunk_size * 2 + 1, BlobFlag::Ignore), // Gap
+            (key, lsn, chunk_size * 3 + 1, BlobFlag::None),
+            (key, lsn, chunk_size * 5 + 1, BlobFlag::None),
+            (key, lsn, chunk_size * 6 + 1, BlobFlag::Ignore), // skipped chunk size, but not a chunk: should coalesce.
+            (key, lsn, chunk_size * 7 + 1, BlobFlag::None),
+            (key, lsn, chunk_size * 8, BlobFlag::None), // Read 2 BEGIN (b/c max_read_size)
+            (key, lsn, chunk_size * 9, BlobFlag::Ignore), // ==== skipped a chunk
+            (key, lsn, chunk_size * 10, BlobFlag::None), // Read 3 BEGIN (cannot coalesce)
        ];

        let ranges = [
@@ -756,19 +780,19 @@ mod tests {

    #[test]
    fn planner_replacement_test() {
-        const CHUNK_SIZE: u64 = virtual_file::get_io_buffer_alignment() as u64;
-        let max_read_size = 128 * CHUNK_SIZE as usize;
+        let chunk_size = virtual_file::get_io_buffer_alignment() as u64;
+        let max_read_size = 128 * chunk_size as usize;
        let first_key = Key::MIN;
        let second_key = first_key.next();
        let lsn = Lsn(0);

        let blob_descriptions = vec![
            (first_key, lsn, 0, BlobFlag::None),          // First in read 1
-            (first_key, lsn, CHUNK_SIZE, BlobFlag::None), // Last in read 1
-            (second_key, lsn, 2 * CHUNK_SIZE, BlobFlag::ReplaceAll),
-            (second_key, lsn, 3 * CHUNK_SIZE, BlobFlag::None),
-            (second_key, lsn, 4 * CHUNK_SIZE, BlobFlag::ReplaceAll), // First in read 2
-            (second_key, lsn, 5 * CHUNK_SIZE, BlobFlag::None),       // Last in read 2
+            (first_key, lsn, chunk_size, BlobFlag::None), // Last in read 1
+            (second_key, lsn, 2 * chunk_size, BlobFlag::ReplaceAll),
+            (second_key, lsn, 3 * chunk_size, BlobFlag::None),
+            (second_key, lsn, 4 * chunk_size, BlobFlag::ReplaceAll), // First in read 2
+            (second_key, lsn, 5 * chunk_size, BlobFlag::None),       // Last in read 2
        ];

        let ranges = [&blob_descriptions[0..2], &blob_descriptions[4..]];
@@ -778,7 +802,7 @@ mod tests {
            planner.handle(key, lsn, offset, flag);
        }

-        planner.handle_range_end(6 * CHUNK_SIZE);
+        planner.handle_range_end(6 * chunk_size);

        let reads = planner.finish();
        assert_eq!(reads.len(), 2);
@@ -923,6 +947,7 @@ mod tests {
        let reserved_bytes = blobs.iter().map(|bl| bl.len()).max().unwrap() * 2 + 16;
        let mut buf = BytesMut::with_capacity(reserved_bytes);

+        let align = virtual_file::get_io_buffer_alignment();
        let vectored_blob_reader = VectoredBlobReader::new(&file);
        let meta = BlobMeta {
            key: Key::MIN,
@@ -934,7 +959,8 @@ mod tests {
            if idx + 1 == offsets.len() {
                continue;
            }
-            let read_builder = ChunkedVectoredReadBuilder::new(*offset, *end, meta, 16 * 4096);
+            let read_builder =
+                ChunkedVectoredReadBuilder::new(*offset, *end, meta, 16 * 4096, align);
            let read = read_builder.build();
            let result = vectored_blob_reader.read_blobs(&read, buf, &ctx).await?;
            assert_eq!(result.blobs.len(), 1);
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -23,12 +23,10 @@ use pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT;
 use pageserver_api::shard::TenantShardId;
 use std::fs::File;
 use std::io::{Error, ErrorKind, Seek, SeekFrom};
-#[cfg(target_os = "linux")]
-use std::os::unix::fs::OpenOptionsExt;
 use tokio_epoll_uring::{BoundedBuf, IoBuf, IoBufMut, Slice};

 use std::os::fd::{AsRawFd, FromRawFd, IntoRawFd, OwnedFd, RawFd};
-use std::sync::atomic::{AtomicBool, AtomicU8, AtomicUsize, Ordering};
+use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
 use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
 use tokio::time::Instant;

@@ -40,7 +38,7 @@ pub use io_engine::FeatureTestResult as IoEngineFeatureTestResult;
 mod metadata;
 mod open_options;
 use self::owned_buffers_io::write::OwnedAsyncWriter;
-pub(crate) use api::IoMode;
+pub(crate) use api::DirectIoMode;
 pub(crate) use io_engine::IoEngineKind;
 pub(crate) use metadata::Metadata;
 pub(crate) use open_options::*;
@@ -63,171 +61,6 @@ pub(crate) mod owned_buffers_io {
    }
 }

-#[derive(Debug)]
-pub struct VirtualFile {
-    inner: VirtualFileInner,
-    _mode: IoMode,
-}
-
-impl VirtualFile {
-    /// Open a file in read-only mode. Like File::open.
-    pub async fn open<P: AsRef<Utf8Path>>(
-        path: P,
-        ctx: &RequestContext,
-    ) -> Result<Self, std::io::Error> {
-        let inner = VirtualFileInner::open(path, ctx).await?;
-        Ok(VirtualFile {
-            inner,
-            _mode: IoMode::Buffered,
-        })
-    }
-
-    /// Open a file in read-only mode. Like File::open.
-    ///
-    /// `O_DIRECT` will be enabled base on `virtual_file_io_mode`.
-    pub async fn open_v2<P: AsRef<Utf8Path>>(
-        path: P,
-        ctx: &RequestContext,
-    ) -> Result<Self, std::io::Error> {
-        Self::open_with_options_v2(path.as_ref(), OpenOptions::new().read(true), ctx).await
-    }
-
-    pub async fn create<P: AsRef<Utf8Path>>(
-        path: P,
-        ctx: &RequestContext,
-    ) -> Result<Self, std::io::Error> {
-        let inner = VirtualFileInner::create(path, ctx).await?;
-        Ok(VirtualFile {
-            inner,
-            _mode: IoMode::Buffered,
-        })
-    }
-
-    pub async fn create_v2<P: AsRef<Utf8Path>>(
-        path: P,
-        ctx: &RequestContext,
-    ) -> Result<Self, std::io::Error> {
-        VirtualFile::open_with_options_v2(
-            path.as_ref(),
-            OpenOptions::new().write(true).create(true).truncate(true),
-            ctx,
-        )
-        .await
-    }
-
-    pub async fn open_with_options<P: AsRef<Utf8Path>>(
-        path: P,
-        open_options: &OpenOptions,
-        ctx: &RequestContext, /* TODO: carry a pointer to the metrics in the RequestContext instead of the parsing https://github.com/neondatabase/neon/issues/6107 */
-    ) -> Result<Self, std::io::Error> {
-        let inner = VirtualFileInner::open_with_options(path, open_options, ctx).await?;
-        Ok(VirtualFile {
-            inner,
-            _mode: IoMode::Buffered,
-        })
-    }
-
-    pub async fn open_with_options_v2<P: AsRef<Utf8Path>>(
-        path: P,
-        open_options: &OpenOptions,
-        ctx: &RequestContext, /* TODO: carry a pointer to the metrics in the RequestContext instead of the parsing https://github.com/neondatabase/neon/issues/6107 */
-    ) -> Result<Self, std::io::Error> {
-        let file = match get_io_mode() {
-            IoMode::Buffered => {
-                let inner = VirtualFileInner::open_with_options(path, open_options, ctx).await?;
-                VirtualFile {
-                    inner,
-                    _mode: IoMode::Buffered,
-                }
-            }
-            #[cfg(target_os = "linux")]
-            IoMode::Direct => {
-                let inner = VirtualFileInner::open_with_options(
-                    path,
-                    open_options.clone().custom_flags(nix::libc::O_DIRECT),
-                    ctx,
-                )
-                .await?;
-                VirtualFile {
-                    inner,
-                    _mode: IoMode::Direct,
-                }
-            }
-        };
-        Ok(file)
-    }
-
-    pub fn path(&self) -> &Utf8Path {
-        self.inner.path.as_path()
-    }
-
-    pub async fn crashsafe_overwrite<B: BoundedBuf<Buf = Buf> + Send, Buf: IoBuf + Send>(
-        final_path: Utf8PathBuf,
-        tmp_path: Utf8PathBuf,
-        content: B,
-    ) -> std::io::Result<()> {
-        VirtualFileInner::crashsafe_overwrite(final_path, tmp_path, content).await
-    }
-
-    pub async fn sync_all(&self) -> Result<(), Error> {
-        self.inner.sync_all().await
-    }
-
-    pub async fn sync_data(&self) -> Result<(), Error> {
-        self.inner.sync_data().await
-    }
-
-    pub async fn metadata(&self) -> Result<Metadata, Error> {
-        self.inner.metadata().await
-    }
-
-    pub fn remove(self) {
-        self.inner.remove();
-    }
-
-    pub async fn seek(&mut self, pos: SeekFrom) -> Result<u64, Error> {
-        self.inner.seek(pos).await
-    }
-
-    pub async fn read_exact_at<Buf>(
-        &self,
-        slice: Slice<Buf>,
-        offset: u64,
-        ctx: &RequestContext,
-    ) -> Result<Slice<Buf>, Error>
-    where
-        Buf: IoBufMut + Send,
-    {
-        self.inner.read_exact_at(slice, offset, ctx).await
-    }
-
-    pub async fn read_exact_at_page(
-        &self,
-        page: PageWriteGuard<'static>,
-        offset: u64,
-        ctx: &RequestContext,
-    ) -> Result<PageWriteGuard<'static>, Error> {
-        self.inner.read_exact_at_page(page, offset, ctx).await
-    }
-
-    pub async fn write_all_at<Buf: IoBuf + Send>(
-        &self,
-        buf: FullSlice<Buf>,
-        offset: u64,
-        ctx: &RequestContext,
-    ) -> (FullSlice<Buf>, Result<(), Error>) {
-        self.inner.write_all_at(buf, offset, ctx).await
-    }
-
-    pub async fn write_all<Buf: IoBuf + Send>(
-        &mut self,
-        buf: FullSlice<Buf>,
-        ctx: &RequestContext,
-    ) -> (FullSlice<Buf>, Result<usize, Error>) {
-        self.inner.write_all(buf, ctx).await
-    }
-}
-
 ///
 /// A virtual file descriptor. You can use this just like std::fs::File, but internally
 /// the underlying file is closed if the system is low on file descriptors,
@@ -244,7 +77,7 @@ impl VirtualFile {
 /// 'tag' field is used to detect whether the handle still is valid or not.
 ///
 #[derive(Debug)]
-pub struct VirtualFileInner {
+pub struct VirtualFile {
    /// Lazy handle to the global file descriptor cache. The slot that this points to
    /// might contain our File, or it may be empty, or it may contain a File that
    /// belongs to a different VirtualFile.
@@ -517,12 +350,12 @@ macro_rules! with_file {
    }};
 }

-impl VirtualFileInner {
+impl VirtualFile {
    /// Open a file in read-only mode. Like File::open.
    pub async fn open<P: AsRef<Utf8Path>>(
        path: P,
        ctx: &RequestContext,
-    ) -> Result<VirtualFileInner, std::io::Error> {
+    ) -> Result<VirtualFile, std::io::Error> {
        Self::open_with_options(path.as_ref(), OpenOptions::new().read(true), ctx).await
    }

@@ -531,7 +364,7 @@ impl VirtualFileInner {
    pub async fn create<P: AsRef<Utf8Path>>(
        path: P,
        ctx: &RequestContext,
-    ) -> Result<VirtualFileInner, std::io::Error> {
+    ) -> Result<VirtualFile, std::io::Error> {
        Self::open_with_options(
            path.as_ref(),
            OpenOptions::new().write(true).create(true).truncate(true),
@@ -549,7 +382,7 @@ impl VirtualFileInner {
        path: P,
        open_options: &OpenOptions,
        _ctx: &RequestContext, /* TODO: carry a pointer to the metrics in the RequestContext instead of the parsing https://github.com/neondatabase/neon/issues/6107 */
-    ) -> Result<VirtualFileInner, std::io::Error> {
+    ) -> Result<VirtualFile, std::io::Error> {
        let path_ref = path.as_ref();
        let path_str = path_ref.to_string();
        let parts = path_str.split('/').collect::<Vec<&str>>();
@@ -590,7 +423,7 @@ impl VirtualFileInner {
        reopen_options.create_new(false);
        reopen_options.truncate(false);

-        let vfile = VirtualFileInner {
+        let vfile = VirtualFile {
            handle: RwLock::new(handle),
            pos: 0,
            path: path_ref.to_path_buf(),
@@ -1201,21 +1034,6 @@ impl tokio_epoll_uring::IoFd for FileGuard {

 #[cfg(test)]
 impl VirtualFile {
-    pub(crate) async fn read_blk(
-        &self,
-        blknum: u32,
-        ctx: &RequestContext,
-    ) -> Result<crate::tenant::block_io::BlockLease<'_>, std::io::Error> {
-        self.inner.read_blk(blknum, ctx).await
-    }
-
-    async fn read_to_end(&mut self, buf: &mut Vec<u8>, ctx: &RequestContext) -> Result<(), Error> {
-        self.inner.read_to_end(buf, ctx).await
-    }
-}
-
-#[cfg(test)]
-impl VirtualFileInner {
    pub(crate) async fn read_blk(
        &self,
        blknum: u32,
@@ -1249,7 +1067,7 @@ impl VirtualFileInner {
    }
 }

-impl Drop for VirtualFileInner {
+impl Drop for VirtualFile {
    /// If a VirtualFile is dropped, close the underlying file if it was open.
    fn drop(&mut self) {
        let handle = self.handle.get_mut();
@@ -1325,10 +1143,15 @@ impl OpenFiles {
 /// server startup.
 ///
 #[cfg(not(test))]
-pub fn init(num_slots: usize, engine: IoEngineKind) {
+pub fn init(num_slots: usize, engine: IoEngineKind, io_buffer_alignment: usize) {
    if OPEN_FILES.set(OpenFiles::new(num_slots)).is_err() {
        panic!("virtual_file::init called twice");
    }
+    if set_io_buffer_alignment(io_buffer_alignment).is_err() {
+        panic!(
+            "IO buffer alignment needs to be a power of two and greater than 512, got {io_buffer_alignment}"
+        );
+    }
    io_engine::init(engine);
    crate::metrics::virtual_file_descriptor_cache::SIZE_MAX.set(num_slots as u64);
 }
@@ -1352,20 +1175,47 @@ fn get_open_files() -> &'static OpenFiles {
    }
 }

+static IO_BUFFER_ALIGNMENT: AtomicUsize = AtomicUsize::new(DEFAULT_IO_BUFFER_ALIGNMENT);
+
+/// Returns true if the alignment is a power of two and is greater or equal to 512.
+fn is_valid_io_buffer_alignment(align: usize) -> bool {
+    align.is_power_of_two() && align >= 512
+}
+
+/// Sets IO buffer alignment requirement. Returns error if the alignment requirement is
+/// not a power of two or less than 512 bytes.
+#[allow(unused)]
+pub(crate) fn set_io_buffer_alignment(align: usize) -> Result<(), usize> {
+    if is_valid_io_buffer_alignment(align) {
+        IO_BUFFER_ALIGNMENT.store(align, std::sync::atomic::Ordering::Relaxed);
+        Ok(())
+    } else {
+        Err(align)
+    }
+}
+
 /// Gets the io buffer alignment.
-pub(crate) const fn get_io_buffer_alignment() -> usize {
-    DEFAULT_IO_BUFFER_ALIGNMENT
+///
+/// This function should be used for getting the actual alignment value to use.
+pub(crate) fn get_io_buffer_alignment() -> usize {
+    let align = IO_BUFFER_ALIGNMENT.load(std::sync::atomic::Ordering::Relaxed);
+
+    if cfg!(test) {
+        let env_var_name = "NEON_PAGESERVER_UNIT_TEST_IO_BUFFER_ALIGNMENT";
+        if let Some(test_align) = utils::env::var(env_var_name) {
+            if is_valid_io_buffer_alignment(test_align) {
+                test_align
+            } else {
+                panic!("IO buffer alignment needs to be a power of two and greater than 512, got {test_align}");
+            }
+        } else {
+            align
+        }
+    } else {
+        align
+    }
 }

-static IO_MODE: AtomicU8 = AtomicU8::new(IoMode::preferred() as u8);
-
-pub(crate) fn set_io_mode(mode: IoMode) {
-    IO_MODE.store(mode as u8, std::sync::atomic::Ordering::Relaxed);
-}
-
-pub(crate) fn get_io_mode() -> IoMode {
-    IoMode::try_from(IO_MODE.load(Ordering::Relaxed)).unwrap()
-}
 #[cfg(test)]
 mod tests {
    use crate::context::DownloadBehavior;
@@ -1674,7 +1524,7 @@ mod tests {
        // Open the file many times.
        let mut files = Vec::new();
        for _ in 0..VIRTUAL_FILES {
-            let f = VirtualFileInner::open_with_options(
+            let f = VirtualFile::open_with_options(
                &test_file_path,
                OpenOptions::new().read(true),
                &ctx,
@@ -1726,7 +1576,7 @@ mod tests {
        let path = testdir.join("myfile");
        let tmp_path = testdir.join("myfile.tmp");

-        VirtualFileInner::crashsafe_overwrite(path.clone(), tmp_path.clone(), b"foo".to_vec())
+        VirtualFile::crashsafe_overwrite(path.clone(), tmp_path.clone(), b"foo".to_vec())
            .await
            .unwrap();
        let mut file = MaybeVirtualFile::from(VirtualFile::open(&path, &ctx).await.unwrap());
@@ -1735,7 +1585,7 @@ mod tests {
        assert!(!tmp_path.exists());
        drop(file);

-        VirtualFileInner::crashsafe_overwrite(path.clone(), tmp_path.clone(), b"bar".to_vec())
+        VirtualFile::crashsafe_overwrite(path.clone(), tmp_path.clone(), b"bar".to_vec())
            .await
            .unwrap();
        let mut file = MaybeVirtualFile::from(VirtualFile::open(&path, &ctx).await.unwrap());
@@ -1758,7 +1608,7 @@ mod tests {
        std::fs::write(&tmp_path, "some preexisting junk that should be removed").unwrap();
        assert!(tmp_path.exists());

-        VirtualFileInner::crashsafe_overwrite(path.clone(), tmp_path.clone(), b"foo".to_vec())
+        VirtualFile::crashsafe_overwrite(path.clone(), tmp_path.clone(), b"foo".to_vec())
            .await
            .unwrap();

--- a/pgxn/neon/control_plane_connector.c
+++ b/pgxn/neon/control_plane_connector.c
@@ -146,8 +146,6 @@ ConstructDeltaMessage()
 	if (RootTable.role_table)
 	{
 		JsonbValue	roles;
-		HASH_SEQ_STATUS status;
-		RoleEntry  *entry;

 		roles.type = jbvString;
 		roles.val.string.val = "roles";
@@ -155,6 +153,9 @@ ConstructDeltaMessage()
 		pushJsonbValue(&state, WJB_KEY, &roles);
 		pushJsonbValue(&state, WJB_BEGIN_ARRAY, NULL);

+		HASH_SEQ_STATUS status;
+		RoleEntry  *entry;
+
 		hash_seq_init(&status, RootTable.role_table);
 		while ((entry = hash_seq_search(&status)) != NULL)
 		{
@@ -189,12 +190,10 @@ ConstructDeltaMessage()
 		}
 		pushJsonbValue(&state, WJB_END_ARRAY, NULL);
 	}
-	{
-		JsonbValue *result = pushJsonbValue(&state, WJB_END_OBJECT, NULL);
-		Jsonb	   *jsonb = JsonbValueToJsonb(result);
+	JsonbValue *result = pushJsonbValue(&state, WJB_END_OBJECT, NULL);
+	Jsonb	   *jsonb = JsonbValueToJsonb(result);

-		return JsonbToCString(NULL, &jsonb->root, 0 /* estimated_len */ );
-	}
+	return JsonbToCString(NULL, &jsonb->root, 0 /* estimated_len */ );
 }

 #define ERROR_SIZE 1024
@@ -273,28 +272,32 @@ SendDeltasToControlPlane()
 		curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, ErrorWriteCallback);
 	}

+	char	   *message = ConstructDeltaMessage();
+	ErrorString str;
+
+	str.size = 0;
+
+	curl_easy_setopt(handle, CURLOPT_POSTFIELDS, message);
+	curl_easy_setopt(handle, CURLOPT_WRITEDATA, &str);
+
+	const int	num_retries = 5;
+	CURLcode	curl_status;
+
+	for (int i = 0; i < num_retries; i++)
+	{
+		if ((curl_status = curl_easy_perform(handle)) == 0)
+			break;
+		elog(LOG, "Curl request failed on attempt %d: %s", i, CurlErrorBuf);
+		pg_usleep(1000 * 1000);
+	}
+	if (curl_status != CURLE_OK)
+	{
+		elog(ERROR, "Failed to perform curl request: %s", CurlErrorBuf);
+	}
+	else
 	{
-		char	   *message = ConstructDeltaMessage();
-		ErrorString str;
-		const int	num_retries = 5;
-		CURLcode	curl_status;
 		long		response_code;

-		str.size = 0;
-
-		curl_easy_setopt(handle, CURLOPT_POSTFIELDS, message);
-		curl_easy_setopt(handle, CURLOPT_WRITEDATA, &str);
-
-		for (int i = 0; i < num_retries; i++)
-		{
-			if ((curl_status = curl_easy_perform(handle)) == 0)
-				break;
-			elog(LOG, "Curl request failed on attempt %d: %s", i, CurlErrorBuf);
-			pg_usleep(1000 * 1000);
-		}
-		if (curl_status != CURLE_OK)
-			elog(ERROR, "Failed to perform curl request: %s", CurlErrorBuf);
-
 		if (curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &response_code) != CURLE_UNKNOWN_OPTION)
 		{
 			if (response_code != 200)
@@ -373,11 +376,10 @@ MergeTable()

 	if (old_table->db_table)
 	{
+		InitDbTableIfNeeded();
 		DbEntry    *entry;
 		HASH_SEQ_STATUS status;

-		InitDbTableIfNeeded();
-
 		hash_seq_init(&status, old_table->db_table);
 		while ((entry = hash_seq_search(&status)) != NULL)
 		{
@@ -419,11 +421,10 @@ MergeTable()

 	if (old_table->role_table)
 	{
+		InitRoleTableIfNeeded();
 		RoleEntry  *entry;
 		HASH_SEQ_STATUS status;

-		InitRoleTableIfNeeded();
-
 		hash_seq_init(&status, old_table->role_table);
 		while ((entry = hash_seq_search(&status)) != NULL)
 		{
@@ -514,12 +515,9 @@ RoleIsNeonSuperuser(const char *role_name)
 static void
 HandleCreateDb(CreatedbStmt *stmt)
 {
+	InitDbTableIfNeeded();
 	DefElem    *downer = NULL;
 	ListCell   *option;
-	bool		found = false;
-	DbEntry    *entry;
-
-	InitDbTableIfNeeded();

 	foreach(option, stmt->options)
 	{
@@ -528,11 +526,13 @@ HandleCreateDb(CreatedbStmt *stmt)
 		if (strcmp(defel->defname, "owner") == 0)
 			downer = defel;
 	}
+	bool		found = false;
+	DbEntry    *entry = hash_search(
+									CurrentDdlTable->db_table,
+									stmt->dbname,
+									HASH_ENTER,
+									&found);

-	entry = hash_search(CurrentDdlTable->db_table,
-						stmt->dbname,
-						HASH_ENTER,
-						&found);
 	if (!found)
 		memset(entry->old_name, 0, sizeof(entry->old_name));

@@ -554,24 +554,21 @@ HandleCreateDb(CreatedbStmt *stmt)
 static void
 HandleAlterOwner(AlterOwnerStmt *stmt)
 {
-	const char *name;
-	bool		found = false;
-	DbEntry    *entry;
-	const char *new_owner;
-
 	if (stmt->objectType != OBJECT_DATABASE)
 		return;
 	InitDbTableIfNeeded();
+	const char *name = strVal(stmt->object);
+	bool		found = false;
+	DbEntry    *entry = hash_search(
+									CurrentDdlTable->db_table,
+									name,
+									HASH_ENTER,
+									&found);

-	name = strVal(stmt->object);
-	entry = hash_search(CurrentDdlTable->db_table,
-						name,
-						HASH_ENTER,
-						&found);
 	if (!found)
 		memset(entry->old_name, 0, sizeof(entry->old_name));
+	const char *new_owner = get_rolespec_name(stmt->newowner);

-	new_owner = get_rolespec_name(stmt->newowner);
 	if (RoleIsNeonSuperuser(new_owner))
 		elog(ERROR, "can't alter owner to neon_superuser");
 	entry->owner = get_role_oid(new_owner, false);
@@ -581,23 +578,21 @@ HandleAlterOwner(AlterOwnerStmt *stmt)
 static void
 HandleDbRename(RenameStmt *stmt)
 {
-	bool		found = false;
-	DbEntry    *entry;
-	DbEntry    *entry_for_new_name;
-
 	Assert(stmt->renameType == OBJECT_DATABASE);
 	InitDbTableIfNeeded();
-	entry = hash_search(CurrentDdlTable->db_table,
-						stmt->subname,
-						HASH_FIND,
-						&found);
+	bool		found = false;
+	DbEntry    *entry = hash_search(
+									CurrentDdlTable->db_table,
+									stmt->subname,
+									HASH_FIND,
+									&found);
+	DbEntry    *entry_for_new_name = hash_search(
+												 CurrentDdlTable->db_table,
+												 stmt->newname,
+												 HASH_ENTER,
+												 NULL);

-	entry_for_new_name = hash_search(CurrentDdlTable->db_table,
-									 stmt->newname,
-									 HASH_ENTER,
-									 NULL);
 	entry_for_new_name->type = Op_Set;
-
 	if (found)
 	{
 		if (entry->old_name[0] != '\0')
@@ -605,7 +600,8 @@ HandleDbRename(RenameStmt *stmt)
 		else
 			strlcpy(entry_for_new_name->old_name, entry->name, NAMEDATALEN);
 		entry_for_new_name->owner = entry->owner;
-		hash_search(CurrentDdlTable->db_table,
+		hash_search(
+					CurrentDdlTable->db_table,
 					stmt->subname,
 					HASH_REMOVE,
 					NULL);
@@ -620,15 +616,14 @@ HandleDbRename(RenameStmt *stmt)
 static void
 HandleDropDb(DropdbStmt *stmt)
 {
-	bool		found = false;
-	DbEntry    *entry;
-
 	InitDbTableIfNeeded();
+	bool		found = false;
+	DbEntry    *entry = hash_search(
+									CurrentDdlTable->db_table,
+									stmt->dbname,
+									HASH_ENTER,
+									&found);

-	entry = hash_search(CurrentDdlTable->db_table,
-						stmt->dbname,
-						HASH_ENTER,
-						&found);
 	entry->type = Op_Delete;
 	entry->owner = InvalidOid;
 	if (!found)
@@ -638,14 +633,16 @@ HandleDropDb(DropdbStmt *stmt)
 static void
 HandleCreateRole(CreateRoleStmt *stmt)
 {
+	InitRoleTableIfNeeded();
 	bool		found = false;
-	RoleEntry  *entry;
-	DefElem    *dpass;
+	RoleEntry  *entry = hash_search(
+									CurrentDdlTable->role_table,
+									stmt->role,
+									HASH_ENTER,
+									&found);
+	DefElem    *dpass = NULL;
 	ListCell   *option;

-	InitRoleTableIfNeeded();
-
-	dpass = NULL;
 	foreach(option, stmt->options)
 	{
 		DefElem    *defel = lfirst(option);
@@ -653,11 +650,6 @@ HandleCreateRole(CreateRoleStmt *stmt)
 		if (strcmp(defel->defname, "password") == 0)
 			dpass = defel;
 	}
-
-	entry = hash_search(CurrentDdlTable->role_table,
-						stmt->role,
-						HASH_ENTER,
-						&found);
 	if (!found)
 		memset(entry->old_name, 0, sizeof(entry->old_name));
 	if (dpass && dpass->arg)
@@ -670,18 +662,14 @@ HandleCreateRole(CreateRoleStmt *stmt)
 static void
 HandleAlterRole(AlterRoleStmt *stmt)
 {
-	const char *role_name = stmt->role->rolename;
-	DefElem    *dpass;
-	ListCell   *option;
-	bool		found = false;
-	RoleEntry  *entry;
-
 	InitRoleTableIfNeeded();
+	DefElem    *dpass = NULL;
+	ListCell   *option;
+	const char *role_name = stmt->role->rolename;

 	if (RoleIsNeonSuperuser(role_name) && !superuser())
 		elog(ERROR, "can't ALTER neon_superuser");

-	dpass = NULL;
 	foreach(option, stmt->options)
 	{
 		DefElem    *defel = lfirst(option);
@@ -692,11 +680,13 @@ HandleAlterRole(AlterRoleStmt *stmt)
 	/* We only care about updates to the password */
 	if (!dpass)
 		return;
+	bool		found = false;
+	RoleEntry  *entry = hash_search(
+									CurrentDdlTable->role_table,
+									role_name,
+									HASH_ENTER,
+									&found);

-	entry = hash_search(CurrentDdlTable->role_table,
-						role_name,
-						HASH_ENTER,
-						&found);
 	if (!found)
 		memset(entry->old_name, 0, sizeof(entry->old_name));
 	if (dpass->arg)
@@ -709,22 +699,20 @@ HandleAlterRole(AlterRoleStmt *stmt)
 static void
 HandleRoleRename(RenameStmt *stmt)
 {
-	bool		found = false;
-	RoleEntry  *entry;
-	RoleEntry  *entry_for_new_name;
-
-	Assert(stmt->renameType == OBJECT_ROLE);
 	InitRoleTableIfNeeded();
+	Assert(stmt->renameType == OBJECT_ROLE);
+	bool		found = false;
+	RoleEntry  *entry = hash_search(
+									CurrentDdlTable->role_table,
+									stmt->subname,
+									HASH_FIND,
+									&found);

-	entry = hash_search(CurrentDdlTable->role_table,
-						stmt->subname,
-						HASH_FIND,
-						&found);
-
-	entry_for_new_name = hash_search(CurrentDdlTable->role_table,
-									 stmt->newname,
-									 HASH_ENTER,
-									 NULL);
+	RoleEntry  *entry_for_new_name = hash_search(
+												 CurrentDdlTable->role_table,
+												 stmt->newname,
+												 HASH_ENTER,
+												 NULL);

 	entry_for_new_name->type = Op_Set;
 	if (found)
@@ -750,9 +738,8 @@ HandleRoleRename(RenameStmt *stmt)
 static void
 HandleDropRole(DropRoleStmt *stmt)
 {
-	ListCell   *item;
-
 	InitRoleTableIfNeeded();
+	ListCell   *item;

 	foreach(item, stmt->roles)
 	{
--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -170,14 +170,12 @@ lfc_disable(char const *op)

 		if (lfc_desc > 0)
 		{
-			int			rc;
-
 			/*
 			 * If the reason of error is ENOSPC, then truncation of file may
 			 * help to reclaim some space
 			 */
 			pgstat_report_wait_start(WAIT_EVENT_NEON_LFC_TRUNCATE);
-			rc = ftruncate(lfc_desc, 0);
+			int			rc = ftruncate(lfc_desc, 0);
 			pgstat_report_wait_end();

 			if (rc < 0)
@@ -618,7 +616,7 @@ lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
 	 */
 	if (entry->bitmap[chunk_offs >> 5] == 0)
 	{
-		bool		has_remaining_pages = false;
+		bool		has_remaining_pages;

 		for (int i = 0; i < CHUNK_BITMAP_SIZE; i++)
 		{
@@ -668,6 +666,7 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	BufferTag	tag;
 	FileCacheEntry *entry;
 	ssize_t		rc;
+	bool		result = true;
 	uint32		hash;
 	uint64		generation;
 	uint32		entry_offset;
@@ -926,10 +925,10 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 				/* We can reuse a hole that was left behind when the LFC was shrunk previously */
 				FileCacheEntry *hole = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->holes));
 				uint32		offset = hole->offset;
-				bool		hole_found;
+				bool		found;
 	
-				hash_search_with_hash_value(lfc_hash, &hole->key, hole->hash, HASH_REMOVE, &hole_found);
-				CriticalAssert(hole_found);
+				hash_search_with_hash_value(lfc_hash, &hole->key, hole->hash, HASH_REMOVE, &found);
+				CriticalAssert(found);
 	
 				lfc_ctl->used += 1;
 				entry->offset = offset;	/* reuse the hole */
@@ -1005,7 +1004,7 @@ neon_get_lfc_stats(PG_FUNCTION_ARGS)
 	Datum		result;
 	HeapTuple	tuple;
 	char const *key;
-	uint64		value = 0;
+	uint64		value;
 	Datum		values[NUM_NEON_GET_STATS_COLS];
 	bool		nulls[NUM_NEON_GET_STATS_COLS];

--- a/pgxn/neon/hll.c
+++ b/pgxn/neon/hll.c
@@ -116,6 +116,8 @@ addSHLL(HyperLogLogState *cState, uint32 hash)
 {
 	uint8		count;
 	uint32		index;
+	size_t		i;
+	size_t		j;

 	TimestampTz	now = GetCurrentTimestamp();
 	/* Use the first "k" (registerWidth) bits as a zero based index */
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -89,6 +89,7 @@ typedef struct

 #if PG_VERSION_NUM >= 150000
 static shmem_request_hook_type prev_shmem_request_hook = NULL;
+static void walproposer_shmem_request(void);
 #endif
 static shmem_startup_hook_type prev_shmem_startup_hook;
 static PagestoreShmemState *pagestore_shared;
@@ -440,8 +441,8 @@ pageserver_connect(shardno_t shard_no, int elevel)
 			return false;
 		}
 		shard->state = PS_Connecting_Startup;
+		/* fallthrough */
 	}
-	/* FALLTHROUGH */
 	case PS_Connecting_Startup:
 	{
 		char	   *pagestream_query;
@@ -452,6 +453,8 @@ pageserver_connect(shardno_t shard_no, int elevel)

 		do
 		{
+			WaitEvent	event;
+
 			switch (poll_result)
 			{
 			default: /* unknown/unused states are handled as a failed connection */
@@ -582,8 +585,8 @@ pageserver_connect(shardno_t shard_no, int elevel)
 		}

 		shard->state = PS_Connecting_PageStream;
+		/* fallthrough */
 	}
-	/* FALLTHROUGH */
 	case PS_Connecting_PageStream:
 	{
 		neon_shard_log(shard_no, DEBUG5, "Connection state: Connecting_PageStream");
@@ -628,8 +631,8 @@ pageserver_connect(shardno_t shard_no, int elevel)
 		}

 		shard->state = PS_Connected;
+		/* fallthrough */
 	}
-	/* FALLTHROUGH */
 	case PS_Connected:
 		/*
 		 * We successfully connected. Future connections to this PageServer
--- a/pgxn/neon/neon_perf_counters.c
+++ b/pgxn/neon/neon_perf_counters.c
@@ -94,6 +94,7 @@ neon_perf_counters_to_metrics(neon_per_backend_counters *counters)
 	metric_t   *metrics = palloc((NUM_METRICS + 1) * sizeof(metric_t));
 	uint64		bucket_accum;
 	int			i = 0;
+	Datum		getpage_wait_str;

 	metrics[i].name = "getpage_wait_seconds_count";
 	metrics[i].is_bucket = false;
@@ -223,6 +224,7 @@ neon_get_perf_counters(PG_FUNCTION_ARGS)
 	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
 	Datum		values[3];
 	bool		nulls[3];
+	Datum		getpage_wait_str;
 	neon_per_backend_counters totals = {0};
 	metric_t   *metrics;

--- a/pgxn/neon/neon_pgversioncompat.h
+++ b/pgxn/neon/neon_pgversioncompat.h
@@ -7,7 +7,6 @@
 #define NEON_PGVERSIONCOMPAT_H

 #include "fmgr.h"
-#include "storage/buf_internals.h"

 #if PG_MAJORVERSION_NUM < 17
 #define NRelFileInfoBackendIsTemp(rinfo) (rinfo.backend != InvalidBackendId)
@@ -21,24 +20,11 @@
 	NInfoGetRelNumber(a) == NInfoGetRelNumber(b) \
 )

-/* These macros were turned into static inline functions in v16 */
+/* buftag population & RelFileNode/RelFileLocator rework */
 #if PG_MAJORVERSION_NUM < 16
-static inline bool
-BufferTagsEqual(const BufferTag *tag1, const BufferTag *tag2)
-{
-	return BUFFERTAGS_EQUAL(*tag1, *tag2);
-}

-static inline void
-InitBufferTag(BufferTag *tag, const RelFileNode *rnode,
-			  ForkNumber forkNum, BlockNumber blockNum)
-{
-	INIT_BUFFERTAG(*tag, *rnode, forkNum, blockNum);
-}
-#endif
+#define InitBufferTag(tag, rfn, fn, bn) INIT_BUFFERTAG(*tag, *rfn, fn, bn)

-/* RelFileNode -> RelFileLocator rework */
-#if PG_MAJORVERSION_NUM < 16
 #define USE_RELFILENODE

 #define RELFILEINFO_HDR "storage/relfilenode.h"
@@ -87,6 +73,8 @@ InitBufferTag(BufferTag *tag, const RelFileNode *rnode,

 #define USE_RELFILELOCATOR

+#define BUFFERTAGS_EQUAL(a, b) BufferTagsEqual(&(a), &(b))
+
 #define RELFILEINFO_HDR "storage/relfilelocator.h"

 #define NRelFileInfo RelFileLocator
--- a/pgxn/neon/pagestore_client.h
+++ b/pgxn/neon/pagestore_client.h
@@ -213,6 +213,32 @@ extern const f_smgr *smgr_neon(ProcNumber backend, NRelFileInfo rinfo);
 extern void smgr_init_neon(void);
 extern void readahead_buffer_resize(int newsize, void *extra);

+/* Neon storage manager functionality */
+
+extern void neon_init(void);
+extern void neon_open(SMgrRelation reln);
+extern void neon_close(SMgrRelation reln, ForkNumber forknum);
+extern void neon_create(SMgrRelation reln, ForkNumber forknum, bool isRedo);
+extern bool neon_exists(SMgrRelation reln, ForkNumber forknum);
+extern void neon_unlink(NRelFileInfoBackend rnode, ForkNumber forknum, bool isRedo);
+#if PG_MAJORVERSION_NUM < 16
+extern void neon_extend(SMgrRelation reln, ForkNumber forknum,
+						BlockNumber blocknum, char *buffer, bool skipFsync);
+#else
+extern void neon_extend(SMgrRelation reln, ForkNumber forknum,
+						BlockNumber blocknum, const void *buffer, bool skipFsync);
+extern void neon_zeroextend(SMgrRelation reln, ForkNumber forknum,
+							BlockNumber blocknum, int nbuffers, bool skipFsync);
+#endif
+
+#if PG_MAJORVERSION_NUM >=17
+extern bool neon_prefetch(SMgrRelation reln, ForkNumber forknum,
+						  BlockNumber blocknum, int nblocks);
+#else
+extern bool neon_prefetch(SMgrRelation reln, ForkNumber forknum,
+						  BlockNumber blocknum);
+#endif
+
 /*
 * LSN values associated with each request to the pageserver
 */
@@ -252,7 +278,13 @@ extern PGDLLEXPORT void neon_read_at_lsn(NRelFileInfo rnode, ForkNumber forkNum,
 extern PGDLLEXPORT void neon_read_at_lsn(NRelFileInfo rnode, ForkNumber forkNum, BlockNumber blkno,
 										 neon_request_lsns request_lsns, void *buffer);
 #endif
+extern void neon_writeback(SMgrRelation reln, ForkNumber forknum,
+						   BlockNumber blocknum, BlockNumber nblocks);
+extern BlockNumber neon_nblocks(SMgrRelation reln, ForkNumber forknum);
 extern int64 neon_dbsize(Oid dbNode);
+extern void neon_truncate(SMgrRelation reln, ForkNumber forknum,
+						  BlockNumber nblocks);
+extern void neon_immedsync(SMgrRelation reln, ForkNumber forknum);

 /* utils for neon relsize cache */
 extern void relsize_hash_init(void);
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -118,8 +118,6 @@ static UnloggedBuildPhase unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;
 static bool neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id);
 static bool (*old_redo_read_buffer_filter) (XLogReaderState *record, uint8 block_id) = NULL;

-static BlockNumber neon_nblocks(SMgrRelation reln, ForkNumber forknum);
-
 /*
 * Prefetch implementation:
 *
@@ -217,7 +215,7 @@ typedef struct PrfHashEntry
 	sizeof(BufferTag) \
 )

-#define SH_EQUAL(tb, a, b)	(BufferTagsEqual(&(a)->buftag, &(b)->buftag))
+#define SH_EQUAL(tb, a, b)	(BUFFERTAGS_EQUAL((a)->buftag, (b)->buftag))
 #define SH_SCOPE			static inline
 #define SH_DEFINE
 #define SH_DECLARE
@@ -738,7 +736,7 @@ static void
 prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns)
 {
 	bool		found;
-	uint64		mySlotNo PG_USED_FOR_ASSERTS_ONLY = slot->my_ring_index;
+	uint64		mySlotNo = slot->my_ring_index;

 	NeonGetPageRequest request = {
 		.req.tag = T_NeonGetPageRequest,
@@ -805,19 +803,15 @@ prefetch_register_bufferv(BufferTag tag, neon_request_lsns *frlsns,
 						  bool is_prefetch)
 {
 	uint64		min_ring_index;
-	PrefetchRequest hashkey;
+	PrefetchRequest req;
 #if USE_ASSERT_CHECKING
 	bool		any_hits = false;
 #endif
 	/* We will never read further ahead than our buffer can store. */
 	nblocks = Max(1, Min(nblocks, readahead_buffer_size));

-	/*
-	 * Use an intermediate PrefetchRequest struct as the hash key to ensure
-	 * correct alignment and that the padding bytes are cleared.
-	 */
-	memset(&hashkey.buftag, 0, sizeof(BufferTag));
-	hashkey.buftag = tag;
+	/* use an intermediate PrefetchRequest struct to ensure correct alignment */
+	req.buftag = tag;

 Retry:
 	min_ring_index = UINT64_MAX;
@@ -843,8 +837,8 @@ Retry:
 		slot = NULL;
 		entry = NULL;

-		hashkey.buftag.blockNum = tag.blockNum + i;
-		entry = prfh_lookup(MyPState->prf_hash, &hashkey);
+		req.buftag.blockNum = tag.blockNum + i;
+		entry = prfh_lookup(MyPState->prf_hash, (PrefetchRequest *) &req);

 		if (entry != NULL)
 		{
@@ -855,7 +849,7 @@ Retry:
 			Assert(slot->status != PRFS_UNUSED);
 			Assert(MyPState->ring_last <= ring_index &&
 				   ring_index < MyPState->ring_unused);
-			Assert(BufferTagsEqual(&slot->buftag, &hashkey.buftag));
+			Assert(BUFFERTAGS_EQUAL(slot->buftag, req.buftag));

 			/*
 			 * If the caller specified a request LSN to use, only accept
@@ -892,19 +886,12 @@ Retry:
 				{
 					min_ring_index = Min(min_ring_index, ring_index);
 					/* The buffered request is good enough, return that index */
-					if (is_prefetch)
-						pgBufferUsage.prefetch.duplicates++;
-					else
-						pgBufferUsage.prefetch.hits++;
+					pgBufferUsage.prefetch.duplicates++;
 					continue;
 				}
 			}
 		}
-		else if (!is_prefetch)
-		{
-			pgBufferUsage.prefetch.misses += 1;
-			MyNeonCounters->getpage_prefetch_misses_total++;
-		}
+
 		/*
 		 * We can only leave the block above by finding that there's
 		 * no entry that can satisfy this request, either because there
@@ -987,7 +974,7 @@ Retry:
 		 * We must update the slot data before insertion, because the hash
 		 * function reads the buffer tag from the slot.
 		 */
-		slot->buftag = hashkey.buftag;
+		slot->buftag = req.buftag;
 		slot->shard_no = get_shard_number(&tag);
 		slot->my_ring_index = ring_index;

@@ -1465,6 +1452,7 @@ log_newpages_copy(NRelFileInfo * rinfo, ForkNumber forkNum, BlockNumber blkno,
 	BlockNumber	blknos[XLR_MAX_BLOCK_ID];
 	Page		pageptrs[XLR_MAX_BLOCK_ID];
 	int			nregistered = 0;
+	XLogRecPtr	result = 0;

 	for (int i = 0; i < nblocks; i++)
 	{
@@ -1777,7 +1765,7 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co
 /*
 *	neon_init() -- Initialize private state
 */
-static void
+void
 neon_init(void)
 {
 	Size		prfs_size;
@@ -2167,7 +2155,7 @@ neon_prefetch_response_usable(neon_request_lsns *request_lsns,
 /*
 *	neon_exists() -- Does the physical file exist?
 */
-static bool
+bool
 neon_exists(SMgrRelation reln, ForkNumber forkNum)
 {
 	bool		exists;
@@ -2273,7 +2261,7 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
 *
 * If isRedo is true, it's okay for the relation to exist already.
 */
-static void
+void
 neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
 {
 	switch (reln->smgr_relpersistence)
@@ -2349,7 +2337,7 @@ neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
 * Note: any failure should be reported as WARNING not ERROR, because
 * we are usually not in a transaction anymore when this is called.
 */
-static void
+void
 neon_unlink(NRelFileInfoBackend rinfo, ForkNumber forkNum, bool isRedo)
 {
 	/*
@@ -2373,7 +2361,7 @@ neon_unlink(NRelFileInfoBackend rinfo, ForkNumber forkNum, bool isRedo)
 *		EOF).  Note that we assume writing a block beyond current EOF
 *		causes intervening file space to become filled with zeroes.
 */
-static void
+void
 #if PG_MAJORVERSION_NUM < 16
 neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
 			char *buffer, bool skipFsync)
@@ -2465,7 +2453,7 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
 }

 #if PG_MAJORVERSION_NUM >= 16
-static void
+void
 neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
 				int nblocks, bool skipFsync)
 {
@@ -2561,7 +2549,7 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
 /*
 *  neon_open() -- Initialize newly-opened relation.
 */
-static void
+void
 neon_open(SMgrRelation reln)
 {
 	/*
@@ -2579,7 +2567,7 @@ neon_open(SMgrRelation reln)
 /*
 *	neon_close() -- Close the specified relation, if it isn't closed already.
 */
-static void
+void
 neon_close(SMgrRelation reln, ForkNumber forknum)
 {
 	/*
@@ -2594,12 +2582,13 @@ neon_close(SMgrRelation reln, ForkNumber forknum)
 /*
 *	neon_prefetch() -- Initiate asynchronous read of the specified block of a relation
 */
-static bool
+bool
 neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 			  int nblocks)
 {
 	uint64		ring_index PG_USED_FOR_ASSERTS_ONLY;
 	BufferTag	tag;
+	bool		io_initiated = false;

 	switch (reln->smgr_relpersistence)
 	{
@@ -2623,6 +2612,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 	while (nblocks > 0)
 	{
 		int		iterblocks = Min(nblocks, PG_IOV_MAX);
+		int		seqlen = 0;
 		bits8		lfc_present[PG_IOV_MAX / 8];
 		memset(lfc_present, 0, sizeof(lfc_present));

@@ -2634,6 +2624,8 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 			continue;
 		}

+		io_initiated = true;
+
 		tag.blockNum = blocknum;
 		
 		for (int i = 0; i < PG_IOV_MAX / 8; i++)
@@ -2656,7 +2648,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 /*
 *	neon_prefetch() -- Initiate asynchronous read of the specified block of a relation
 */
-static bool
+bool
 neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 {
 	uint64		ring_index PG_USED_FOR_ASSERTS_ONLY;
@@ -2700,7 +2692,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 * This accepts a range of blocks because flushing several pages at once is
 * considerably more efficient than doing so individually.
 */
-static void
+void
 neon_writeback(SMgrRelation reln, ForkNumber forknum,
 			   BlockNumber blocknum, BlockNumber nblocks)
 {
@@ -2750,19 +2742,14 @@ neon_read_at_lsnv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber base_block
 	uint64		ring_index;
 	PrfHashEntry *entry;
 	PrefetchRequest *slot;
-	PrefetchRequest hashkey;
+	BufferTag	buftag = {0};

 	Assert(PointerIsValid(request_lsns));
 	Assert(nblocks >= 1);

-	/*
-	 * Use an intermediate PrefetchRequest struct as the hash key to ensure
-	 * correct alignment and that the padding bytes are cleared.
-	 */
-	memset(&hashkey.buftag, 0, sizeof(BufferTag));
-	CopyNRelFileInfoToBufTag(hashkey.buftag, rinfo);
-	hashkey.buftag.forkNum = forkNum;
-	hashkey.buftag.blockNum = base_blockno;
+	CopyNRelFileInfoToBufTag(buftag, rinfo);
+	buftag.forkNum = forkNum;
+	buftag.blockNum = base_blockno;

 	/*
 	 * The redo process does not lock pages that it needs to replay but are
@@ -2780,7 +2767,7 @@ neon_read_at_lsnv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber base_block
 	 * weren't for the behaviour of the LwLsn cache that uses the highest
 	 * value of the LwLsn cache when the entry is not found.
 	 */
-	prefetch_register_bufferv(hashkey.buftag, request_lsns, nblocks, mask, false);
+	prefetch_register_bufferv(buftag, request_lsns, nblocks, mask, false);

 	for (int i = 0; i < nblocks; i++)
 	{
@@ -2801,8 +2788,8 @@ neon_read_at_lsnv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber base_block
 		 * Try to find prefetched page in the list of received pages.
 		 */
 Retry:
-		hashkey.buftag.blockNum = blockno;
-		entry = prfh_lookup(MyPState->prf_hash, &hashkey);
+		buftag.blockNum = blockno;
+		entry = prfh_lookup(MyPState->prf_hash, (PrefetchRequest *) &buftag);

 		if (entry != NULL)
 		{
@@ -2810,6 +2797,7 @@ Retry:
 			if (neon_prefetch_response_usable(reqlsns, slot))
 			{
 				ring_index = slot->my_ring_index;
+				pgBufferUsage.prefetch.hits += 1;
 			}
 			else
 			{
@@ -2839,7 +2827,10 @@ Retry:
 		{
 			if (entry == NULL)
 			{
-				ring_index = prefetch_register_bufferv(hashkey.buftag, reqlsns, 1, NULL, false);
+				pgBufferUsage.prefetch.misses += 1;
+				MyNeonCounters->getpage_prefetch_misses_total++;
+
+				ring_index = prefetch_register_bufferv(buftag, reqlsns, 1, NULL, false);
 				Assert(ring_index != UINT64_MAX);
 				slot = GetPrfSlot(ring_index);
 			}
@@ -2864,8 +2855,8 @@ Retry:
 		} while (!prefetch_wait_for(ring_index));

 		Assert(slot->status == PRFS_RECEIVED);
-		Assert(memcmp(&hashkey.buftag, &slot->buftag, sizeof(BufferTag)) == 0);
-		Assert(hashkey.buftag.blockNum == base_blockno + i);
+		Assert(memcmp(&buftag, &slot->buftag, sizeof(BufferTag)) == 0);
+		Assert(buftag.blockNum == base_blockno + i);

 		resp = slot->response;

@@ -2921,10 +2912,10 @@ neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 *	neon_read() -- Read the specified block from a relation.
 */
 #if PG_MAJORVERSION_NUM < 16
-static void
+void
 neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, char *buffer)
 #else
-static void
+void
 neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer)
 #endif
 {
@@ -3033,7 +3024,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
 #endif /* PG_MAJORVERSION_NUM <= 16 */

 #if PG_MAJORVERSION_NUM >= 17
-static void
+void
 neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 		void **buffers, BlockNumber nblocks)
 {
@@ -3068,9 +3059,6 @@ neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 	lfc_result = lfc_readv_select(InfoFromSMgrRel(reln), forknum, blocknum, buffers,
 								  nblocks, read);

-	if (lfc_result > 0)
-		MyNeonCounters->file_cache_hits_total += lfc_result;
-
 	/* Read all blocks from LFC, so we're done */
 	if (lfc_result == nblocks)
 		return;
@@ -3197,7 +3185,6 @@ hexdump_page(char *page)
 }
 #endif

-#if PG_MAJORVERSION_NUM < 17
 /*
 *	neon_write() -- Write the supplied block at the appropriate location.
 *
@@ -3205,7 +3192,7 @@ hexdump_page(char *page)
 *		relation (ie, those before the current EOF).  To extend a relation,
 *		use mdextend().
 */
-static void
+void
 #if PG_MAJORVERSION_NUM < 16
 neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
 #else
@@ -3271,12 +3258,11 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const vo
 		#endif
 #endif
 }
-#endif



 #if PG_MAJORVERSION_NUM >= 17
-static void
+void
 neon_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
 			 const void **buffers, BlockNumber nblocks, bool skipFsync)
 {
@@ -3326,7 +3312,7 @@ neon_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
 /*
 *	neon_nblocks() -- Get the number of blocks stored in a relation.
 */
-static BlockNumber
+BlockNumber
 neon_nblocks(SMgrRelation reln, ForkNumber forknum)
 {
 	NeonResponse *resp;
@@ -3463,7 +3449,7 @@ neon_dbsize(Oid dbNode)
 /*
 *	neon_truncate() -- Truncate relation to specified number of blocks.
 */
-static void
+void
 neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 {
 	XLogRecPtr	lsn;
@@ -3532,7 +3518,7 @@ neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 * crash before the next checkpoint syncs the newly-inactive segment, that
 * segment may survive recovery, reintroducing unwanted data into the table.
 */
-static void
+void
 neon_immedsync(SMgrRelation reln, ForkNumber forknum)
 {
 	switch (reln->smgr_relpersistence)
@@ -3562,8 +3548,8 @@ neon_immedsync(SMgrRelation reln, ForkNumber forknum)
 }

 #if PG_MAJORVERSION_NUM >= 17
-static void
-neon_registersync(SMgrRelation reln, ForkNumber forknum)
+void
+neon_regisersync(SMgrRelation reln, ForkNumber forknum)
 {
 	switch (reln->smgr_relpersistence)
 	{
@@ -3747,8 +3733,6 @@ neon_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buf
 	SlruKind	kind;
 	int			n_blocks;
 	shardno_t	shard_no = 0; /* All SLRUs are at shard 0 */
-	NeonResponse *resp;
-	NeonGetSlruSegmentRequest request;

 	/*
 	 * Compute a request LSN to use, similar to neon_get_request_lsns() but the
@@ -3787,7 +3771,8 @@ neon_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buf
 	else
 		return -1;

-	request = (NeonGetSlruSegmentRequest) {
+	NeonResponse *resp;
+	NeonGetSlruSegmentRequest request = {
 		.req.tag = T_NeonGetSlruSegmentRequest,
 		.req.lsn = request_lsn,
 		.req.not_modified_since = not_modified_since,
@@ -3894,7 +3879,7 @@ static const struct f_smgr neon_smgr =
 	.smgr_truncate = neon_truncate,
 	.smgr_immedsync = neon_immedsync,
 #if PG_MAJORVERSION_NUM >= 17
-	.smgr_registersync = neon_registersync,
+	.smgr_registersync = neon_regisersync,
 #endif
 	.smgr_start_unlogged_build = neon_start_unlogged_build,
 	.smgr_finish_unlogged_build_phase_1 = neon_finish_unlogged_build_phase_1,
--- a/pgxn/neon/walproposer.c
+++ b/pgxn/neon/walproposer.c
@@ -252,6 +252,8 @@ WalProposerPoll(WalProposer *wp)
 		/* timeout expired: poll state */
 		if (rc == 0 || TimeToReconnect(wp, now) <= 0)
 		{
+			TimestampTz now;
+
 			/*
 			 * If no WAL was generated during timeout (and we have already
 			 * collected the quorum), then send empty keepalive message
@@ -267,7 +269,8 @@ WalProposerPoll(WalProposer *wp)
 			now = wp->api.get_current_timestamp(wp);
 			for (int i = 0; i < wp->n_safekeepers; i++)
 			{
-				sk = &wp->safekeeper[i];
+				Safekeeper *sk = &wp->safekeeper[i];
+
 				if (TimestampDifferenceExceeds(sk->latestMsgReceivedAt, now,
 											   wp->config->safekeeper_connection_timeout))
 				{
@@ -1077,7 +1080,7 @@ SendProposerElected(Safekeeper *sk)
 	ProposerElected msg;
 	TermHistory *th;
 	term_t		lastCommonTerm;
-	int			idx;
+	int			i;

 	/* Now that we are ready to send it's a good moment to create WAL reader */
 	wp->api.wal_reader_allocate(sk);
@@ -1096,15 +1099,15 @@ SendProposerElected(Safekeeper *sk)
 	/* We must start somewhere. */
 	Assert(wp->propTermHistory.n_entries >= 1);

-	for (idx = 0; idx < Min(wp->propTermHistory.n_entries, th->n_entries); idx++)
+	for (i = 0; i < Min(wp->propTermHistory.n_entries, th->n_entries); i++)
 	{
-		if (wp->propTermHistory.entries[idx].term != th->entries[idx].term)
+		if (wp->propTermHistory.entries[i].term != th->entries[i].term)
 			break;
 		/* term must begin everywhere at the same point */
-		Assert(wp->propTermHistory.entries[idx].lsn == th->entries[idx].lsn);
+		Assert(wp->propTermHistory.entries[i].lsn == th->entries[i].lsn);
 	}
-	idx--;						/* step back to the last common term */
-	if (idx < 0)
+	i--;						/* step back to the last common term */
+	if (i < 0)
 	{
 		/* safekeeper is empty or no common point, start from the beginning */
 		sk->startStreamingAt = wp->propTermHistory.entries[0].lsn;
@@ -1125,14 +1128,14 @@ SendProposerElected(Safekeeper *sk)
 		 * proposer, LSN it is currently writing, but then we just pick
 		 * safekeeper pos as it obviously can't be higher.
 		 */
-		if (wp->propTermHistory.entries[idx].term == wp->propTerm)
+		if (wp->propTermHistory.entries[i].term == wp->propTerm)
 		{
 			sk->startStreamingAt = sk->voteResponse.flushLsn;
 		}
 		else
 		{
-			XLogRecPtr	propEndLsn = wp->propTermHistory.entries[idx + 1].lsn;
-			XLogRecPtr	skEndLsn = (idx + 1 < th->n_entries ? th->entries[idx + 1].lsn : sk->voteResponse.flushLsn);
+			XLogRecPtr	propEndLsn = wp->propTermHistory.entries[i + 1].lsn;
+			XLogRecPtr	skEndLsn = (i + 1 < th->n_entries ? th->entries[i + 1].lsn : sk->voteResponse.flushLsn);

 			sk->startStreamingAt = Min(propEndLsn, skEndLsn);
 		}
@@ -1146,7 +1149,7 @@ SendProposerElected(Safekeeper *sk)
 	msg.termHistory = &wp->propTermHistory;
 	msg.timelineStartLsn = wp->timelineStartLsn;

-	lastCommonTerm = idx >= 0 ? wp->propTermHistory.entries[idx].term : 0;
+	lastCommonTerm = i >= 0 ? wp->propTermHistory.entries[i].term : 0;
 	wp_log(LOG,
 		   "sending elected msg to node " UINT64_FORMAT " term=" UINT64_FORMAT ", startStreamingAt=%X/%X (lastCommonTerm=" UINT64_FORMAT "), termHistory.n_entries=%u to %s:%s, timelineStartLsn=%X/%X",
 		   sk->greetResponse.nodeId, msg.term, LSN_FORMAT_ARGS(msg.startStreamingAt), lastCommonTerm, msg.termHistory->n_entries, sk->host, sk->port, LSN_FORMAT_ARGS(msg.timelineStartLsn));
@@ -1638,7 +1641,7 @@ UpdateDonorShmem(WalProposer *wp)
 * Process AppendResponse message from safekeeper.
 */
 static void
-HandleSafekeeperResponse(WalProposer *wp, Safekeeper *fromsk)
+HandleSafekeeperResponse(WalProposer *wp, Safekeeper *sk)
 {
 	XLogRecPtr	candidateTruncateLsn;
 	XLogRecPtr	newCommitLsn;
@@ -1657,7 +1660,7 @@ HandleSafekeeperResponse(WalProposer *wp, Safekeeper *fromsk)
 	 * and WAL is committed by the quorum. BroadcastAppendRequest() should be
 	 * called to notify safekeepers about the new commitLsn.
 	 */
-	wp->api.process_safekeeper_feedback(wp, fromsk);
+	wp->api.process_safekeeper_feedback(wp, sk);

 	/*
 	 * Try to advance truncateLsn -- the last record flushed to all
--- a/pgxn/neon/walproposer.h
+++ b/pgxn/neon/walproposer.h
@@ -725,7 +725,7 @@ extern void WalProposerBroadcast(WalProposer *wp, XLogRecPtr startpos, XLogRecPt
 extern void WalProposerPoll(WalProposer *wp);
 extern void WalProposerFree(WalProposer *wp);

-extern WalproposerShmemState *GetWalpropShmemState(void);
+extern WalproposerShmemState *GetWalpropShmemState();

 /*
 * WaitEventSet API doesn't allow to remove socket, so walproposer_pg uses it to
@@ -745,7 +745,7 @@ extern TimeLineID walprop_pg_get_timeline_id(void);
 * catch logging.
 */
 #ifdef WALPROPOSER_LIB
-extern void WalProposerLibLog(WalProposer *wp, int elevel, char *fmt,...) pg_attribute_printf(3, 4);
+extern void WalProposerLibLog(WalProposer *wp, int elevel, char *fmt,...);
 #define wp_log(elevel, fmt, ...) WalProposerLibLog(wp, elevel, fmt, ## __VA_ARGS__)
 #else
 #define wp_log(elevel, fmt, ...) elog(elevel, WP_LOG_PREFIX fmt, ## __VA_ARGS__)
--- a/pgxn/neon/walproposer_pg.c
+++ b/pgxn/neon/walproposer_pg.c
@@ -286,9 +286,6 @@ safekeepers_cmp(char *old, char *new)
 static void
 assign_neon_safekeepers(const char *newval, void *extra)
 {
-	char	   *newval_copy;
-	char	   *oldval;
-
 	if (!am_walproposer)
 		return;

@@ -298,8 +295,8 @@ assign_neon_safekeepers(const char *newval, void *extra)
 	}

 	/* Copy values because we will modify them in split_safekeepers_list() */
-	newval_copy = pstrdup(newval);
-	oldval = pstrdup(wal_acceptors_list);
+	char *newval_copy = pstrdup(newval);
+	char *oldval = pstrdup(wal_acceptors_list);

 	/* 
 	 * TODO: restarting through FATAL is stupid and introduces 1s delay before
@@ -541,7 +538,7 @@ nwp_shmem_startup_hook(void)
 }

 WalproposerShmemState *
-GetWalpropShmemState(void)
+GetWalpropShmemState()
 {
 	Assert(walprop_shared != NULL);
 	return walprop_shared;
--- a/pgxn/neon/walsender_hooks.c
+++ b/pgxn/neon/walsender_hooks.c
@@ -191,14 +191,13 @@ NeonOnDemandXLogReaderRoutines(XLogReaderRoutine *xlr)

 	if (!wal_reader)
 	{
-		XLogRecPtr	basebackupLsn = GetRedoStartLsn();
+		XLogRecPtr	epochStartLsn = pg_atomic_read_u64(&GetWalpropShmemState()->propEpochStartLsn);

-		/* should never happen */
-		if (basebackupLsn == 0)
+		if (epochStartLsn == 0)
 		{
-			elog(ERROR, "unable to start walsender when basebackupLsn is 0");
+			elog(ERROR, "Unable to start walsender when propEpochStartLsn is 0!");
 		}
-		wal_reader = NeonWALReaderAllocate(wal_segment_size, basebackupLsn, "[walsender] ");
+		wal_reader = NeonWALReaderAllocate(wal_segment_size, epochStartLsn, "[walsender] ");
 	}
 	xlr->page_read = NeonWALPageRead;
 	xlr->segment_open = NeonWALReadSegmentOpen;
--- a/pgxn/neon_rmgr/neon_rmgr_desc.c
+++ b/pgxn/neon_rmgr/neon_rmgr_desc.c
@@ -44,6 +44,27 @@ infobits_desc(StringInfo buf, uint8 infobits, const char *keyname)
 	appendStringInfoString(buf, "]");
 }

+static void
+truncate_flags_desc(StringInfo buf, uint8 flags)
+{
+	appendStringInfoString(buf, "flags: [");
+
+	if (flags & XLH_TRUNCATE_CASCADE)
+		appendStringInfoString(buf, "CASCADE, ");
+	if (flags & XLH_TRUNCATE_RESTART_SEQS)
+		appendStringInfoString(buf, "RESTART_SEQS, ");
+
+	if (buf->data[buf->len - 1] == ' ')
+	{
+		/* Truncate-away final unneeded ", "  */
+		Assert(buf->data[buf->len - 2] == ',');
+		buf->len -= 2;
+		buf->data[buf->len] = '\0';
+	}
+
+	appendStringInfoString(buf, "]");
+}
+
 void
 neon_rm_desc(StringInfo buf, XLogReaderState *record)
 {
--- a/pgxn/neon_walredo/walredoproc.c
+++ b/pgxn/neon_walredo/walredoproc.c
@@ -136,7 +136,7 @@ static bool redo_block_filter(XLogReaderState *record, uint8 block_id);
 static void GetPage(StringInfo input_message);
 static void Ping(StringInfo input_message);
 static ssize_t buffered_read(void *buf, size_t count);
-static void CreateFakeSharedMemoryAndSemaphores(void);
+static void CreateFakeSharedMemoryAndSemaphores();

 static BufferTag target_redo_tag;

@@ -170,40 +170,6 @@ close_range_syscall(unsigned int start_fd, unsigned int count, unsigned int flag
    return syscall(__NR_close_range, start_fd, count, flags);
 }

-
-static PgSeccompRule allowed_syscalls[] =
-{
-	/* Hard requirements */
-	PG_SCMP_ALLOW(exit_group),
-	PG_SCMP_ALLOW(pselect6),
-	PG_SCMP_ALLOW(read),
-	PG_SCMP_ALLOW(select),
-	PG_SCMP_ALLOW(write),
-
-	/* Memory allocation */
-	PG_SCMP_ALLOW(brk),
-#ifndef MALLOC_NO_MMAP
-	/* TODO: musl doesn't have mallopt */
-	PG_SCMP_ALLOW(mmap),
-	PG_SCMP_ALLOW(munmap),
-#endif
-	/*
-	 * getpid() is called on assertion failure, in ExceptionalCondition.
-	 * It's not really needed, but seems pointless to hide it either. The
-	 * system call unlikely to expose a kernel vulnerability, and the PID
-	 * is stored in MyProcPid anyway.
-	 */
-	PG_SCMP_ALLOW(getpid),
-
-	/* Enable those for a proper shutdown. */
-#if 0
-	   PG_SCMP_ALLOW(munmap),
-	   PG_SCMP_ALLOW(shmctl),
-	   PG_SCMP_ALLOW(shmdt),
-	   PG_SCMP_ALLOW(unlink),	/* shm_unlink */
-#endif
-};
-
 static void
 enter_seccomp_mode(void)
 {
@@ -217,12 +183,44 @@ enter_seccomp_mode(void)
 				(errcode(ERRCODE_SYSTEM_ERROR),
 				 errmsg("seccomp: could not close files >= fd 3")));

+	PgSeccompRule syscalls[] =
+	{
+		/* Hard requirements */
+		PG_SCMP_ALLOW(exit_group),
+		PG_SCMP_ALLOW(pselect6),
+		PG_SCMP_ALLOW(read),
+		PG_SCMP_ALLOW(select),
+		PG_SCMP_ALLOW(write),
+
+		/* Memory allocation */
+		PG_SCMP_ALLOW(brk),
+#ifndef MALLOC_NO_MMAP
+		/* TODO: musl doesn't have mallopt */
+		PG_SCMP_ALLOW(mmap),
+		PG_SCMP_ALLOW(munmap),
+#endif
+		/*
+		 * getpid() is called on assertion failure, in ExceptionalCondition.
+		 * It's not really needed, but seems pointless to hide it either. The
+		 * system call unlikely to expose a kernel vulnerability, and the PID
+		 * is stored in MyProcPid anyway.
+		 */
+		PG_SCMP_ALLOW(getpid),
+
+		/* Enable those for a proper shutdown.
+		PG_SCMP_ALLOW(munmap),
+		PG_SCMP_ALLOW(shmctl),
+		PG_SCMP_ALLOW(shmdt),
+		PG_SCMP_ALLOW(unlink), // shm_unlink
+	 */
+	};
+
 #ifdef MALLOC_NO_MMAP
 	/* Ask glibc not to use mmap() */
 	mallopt(M_MMAP_MAX, 0);
 #endif

-	seccomp_load_rules(allowed_syscalls, lengthof(allowed_syscalls));
+	seccomp_load_rules(syscalls, lengthof(syscalls));
 }
 #endif /* HAVE_LIBSECCOMP */

@@ -451,7 +449,7 @@ WalRedoMain(int argc, char *argv[])
 * half-initialized postgres.
 */
 static void
-CreateFakeSharedMemoryAndSemaphores(void)
+CreateFakeSharedMemoryAndSemaphores()
 {
 	PGShmemHeader *shim = NULL;
 	PGShmemHeader *hdr;
@@ -994,7 +992,7 @@ redo_block_filter(XLogReaderState *record, uint8 block_id)
 	 * If this block isn't one we are currently restoring, then return 'true'
 	 * so that this gets ignored
 	 */
-	return !BufferTagsEqual(&target_tag, &target_redo_tag);
+	return !BUFFERTAGS_EQUAL(target_tag, target_redo_tag);
 }

 /*
--- a/poetry.lock
+++ b/poetry.lock
@@ -2095,7 +2095,6 @@ files = [
    {file = "psycopg2_binary-2.9.9-cp311-cp311-win32.whl", hash = "sha256:dc4926288b2a3e9fd7b50dc6a1909a13bbdadfc67d93f3374d984e56f885579d"},
    {file = "psycopg2_binary-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:b76bedd166805480ab069612119ea636f5ab8f8771e640ae103e05a4aae3e417"},
    {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8532fd6e6e2dc57bcb3bc90b079c60de896d2128c5d9d6f24a63875a95a088cf"},
-    {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0605eaed3eb239e87df0d5e3c6489daae3f7388d455d0c0b4df899519c6a38d"},
    {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f8544b092a29a6ddd72f3556a9fcf249ec412e10ad28be6a0c0d948924f2212"},
    {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d423c8d8a3c82d08fe8af900ad5b613ce3632a1249fd6a223941d0735fce493"},
    {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e5afae772c00980525f6d6ecf7cbca55676296b580c0e6abb407f15f3706996"},
@@ -2104,8 +2103,6 @@ files = [
    {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:cb16c65dcb648d0a43a2521f2f0a2300f40639f6f8c1ecbc662141e4e3e1ee07"},
    {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:911dda9c487075abd54e644ccdf5e5c16773470a6a5d3826fda76699410066fb"},
    {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:57fede879f08d23c85140a360c6a77709113efd1c993923c59fde17aa27599fe"},
-    {file = "psycopg2_binary-2.9.9-cp312-cp312-win32.whl", hash = "sha256:64cf30263844fa208851ebb13b0732ce674d8ec6a0c86a4e160495d299ba3c93"},
-    {file = "psycopg2_binary-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:81ff62668af011f9a48787564ab7eded4e9fb17a4a6a74af5ffa6a457400d2ab"},
    {file = "psycopg2_binary-2.9.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2293b001e319ab0d869d660a704942c9e2cce19745262a8aba2115ef41a0a42a"},
    {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ef7df18daf2c4c07e2695e8cfd5ee7f748a1d54d802330985a78d2a5a6dca9"},
    {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a602ea5aff39bb9fac6308e9c9d82b9a35c2bf288e184a816002c9fae930b77"},
@@ -2587,7 +2584,6 @@ files = [
    {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
    {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
    {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
-    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
    {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
    {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
@@ -2733,22 +2729,21 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]

 [[package]]
 name = "responses"
-version = "0.25.3"
+version = "0.21.0"
 description = "A utility library for mocking out the `requests` Python library."
 optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.7"
 files = [
-    {file = "responses-0.25.3-py3-none-any.whl", hash = "sha256:521efcbc82081ab8daa588e08f7e8a64ce79b91c39f6e62199b19159bea7dbcb"},
-    {file = "responses-0.25.3.tar.gz", hash = "sha256:617b9247abd9ae28313d57a75880422d55ec63c29d33d629697590a034358dba"},
+    {file = "responses-0.21.0-py3-none-any.whl", hash = "sha256:2dcc863ba63963c0c3d9ee3fa9507cbe36b7d7b0fccb4f0bdfd9e96c539b1487"},
+    {file = "responses-0.21.0.tar.gz", hash = "sha256:b82502eb5f09a0289d8e209e7bad71ef3978334f56d09b444253d5ad67bf5253"},
 ]

 [package.dependencies]
-pyyaml = "*"
-requests = ">=2.30.0,<3.0"
-urllib3 = ">=1.25.10,<3.0"
+requests = ">=2.0,<3.0"
+urllib3 = ">=1.25.10"

 [package.extras]
-tests = ["coverage (>=6.0.0)", "flake8", "mypy", "pytest (>=7.0.0)", "pytest-asyncio", "pytest-cov", "pytest-httpserver", "tomli", "tomli-w", "types-PyYAML", "types-requests"]
+tests = ["coverage (>=6.0.0)", "flake8", "mypy", "pytest (>=7.0.0)", "pytest-asyncio", "pytest-cov", "pytest-localserver", "types-mock", "types-requests"]

 [[package]]
 name = "rfc3339-validator"
@@ -3142,16 +3137,6 @@ files = [
    {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"},
    {file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"},
    {file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"},
-    {file = "wrapt-1.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ecee4132c6cd2ce5308e21672015ddfed1ff975ad0ac8d27168ea82e71413f55"},
-    {file = "wrapt-1.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2020f391008ef874c6d9e208b24f28e31bcb85ccff4f335f15a3251d222b92d9"},
-    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2feecf86e1f7a86517cab34ae6c2f081fd2d0dac860cb0c0ded96d799d20b335"},
-    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:240b1686f38ae665d1b15475966fe0472f78e71b1b4903c143a842659c8e4cb9"},
-    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9008dad07d71f68487c91e96579c8567c98ca4c3881b9b113bc7b33e9fd78b8"},
-    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6447e9f3ba72f8e2b985a1da758767698efa72723d5b59accefd716e9e8272bf"},
-    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:acae32e13a4153809db37405f5eba5bac5fbe2e2ba61ab227926a22901051c0a"},
-    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49ef582b7a1152ae2766557f0550a9fcbf7bbd76f43fbdc94dd3bf07cc7168be"},
-    {file = "wrapt-1.14.1-cp311-cp311-win32.whl", hash = "sha256:358fe87cc899c6bb0ddc185bf3dbfa4ba646f05b1b0b9b5a27c2cb92c2cea204"},
-    {file = "wrapt-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:26046cd03936ae745a502abf44dac702a5e6880b2b01c29aea8ddf3353b68224"},
    {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"},
    {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"},
    {file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"},
--- a/pre-commit.py
+++ b/pre-commit.py
@@ -1,12 +1,11 @@
 #!/usr/bin/env python3

-from __future__ import annotations
-
 import argparse
 import enum
 import os
 import subprocess
 import sys
+from typing import List


@enum.unique
@@ -56,12 +55,12 @@ def mypy() -> str:
    return "poetry run mypy"


-def get_commit_files() -> list[str]:
+def get_commit_files() -> List[str]:
    files = subprocess.check_output("git diff --cached --name-only --diff-filter=ACM".split())
    return files.decode().splitlines()


-def check(name: str, suffix: str, cmd: str, changed_files: list[str], no_color: bool = False):
+def check(name: str, suffix: str, cmd: str, changed_files: List[str], no_color: bool = False):
    print(f"Checking: {name} ", end="")
    applicable_files = list(filter(lambda fname: fname.strip().endswith(suffix), changed_files))
    if not applicable_files:
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -38,8 +38,8 @@ hostname.workspace = true
 http.workspace = true
 humantime.workspace = true
 humantime-serde.workspace = true
-hyper0.workspace = true
-hyper = { workspace = true, features = ["server", "http1", "http2"] }
+hyper.workspace = true
+hyper1 = { package = "hyper", version = "1.2", features = ["server"] }
 hyper-util = { version = "0.1", features = ["server", "http1", "http2", "tokio"] }
 http-body-util = { version = "0.1" }
 indexmap.workspace = true
@@ -77,7 +77,7 @@ subtle.workspace = true
 thiserror.workspace = true
 tikv-jemallocator.workspace = true
 tikv-jemalloc-ctl = { workspace = true, features = ["use_std"] }
-tokio-postgres = { workspace = true, features = ["with-serde_json-1"] }
+tokio-postgres.workspace = true
 tokio-postgres-rustls.workspace = true
 tokio-rustls.workspace = true
 tokio-util.workspace = true
@@ -101,7 +101,7 @@ jose-jwa = "0.1.2"
 jose-jwk = { version = "0.1.2", features = ["p256", "p384", "rsa"] }
 signature = "2"
 ecdsa = "0.16"
-p256 = { version = "0.13", features = ["jwk"] }
+p256 = "0.13"
 rsa = "0.9"

 workspace_hack.workspace = true
--- a/proxy/src/auth/mod.rs
+++ b/proxy/src/auth/mod.rs
@@ -18,7 +18,7 @@ pub(crate) use flow::*;
 use tokio::time::error::Elapsed;

 use crate::{
-    control_plane,
+    console,
    error::{ReportableError, UserFacingError},
 };
 use std::{io, net::IpAddr};
@@ -34,7 +34,7 @@ pub(crate) enum AuthErrorImpl {
    Web(#[from] backend::WebAuthError),

    #[error(transparent)]
-    GetAuthInfo(#[from] control_plane::errors::GetAuthInfoError),
+    GetAuthInfo(#[from] console::errors::GetAuthInfoError),

    /// SASL protocol errors (includes [SCRAM](crate::scram)).
    #[error(transparent)]
--- a/proxy/src/auth/backend/mod.rs
+++ b/proxy/src/auth/backend/mod.rs
@@ -1,27 +1,27 @@
 mod classic;
-mod console_redirect;
 mod hacks;
 pub mod jwt;
 pub mod local;
+mod web;

 use std::net::IpAddr;
 use std::sync::Arc;
 use std::time::Duration;

-pub(crate) use console_redirect::WebAuthError;
 use ipnet::{Ipv4Net, Ipv6Net};
 use local::LocalBackend;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tokio_postgres::config::AuthKeys;
 use tracing::{info, warn};
+pub(crate) use web::WebAuthError;

 use crate::auth::credentials::check_peer_addr_is_in_list;
 use crate::auth::{validate_password_and_exchange, AuthError};
 use crate::cache::Cached;
+use crate::console::errors::GetAuthInfoError;
+use crate::console::provider::{CachedRoleSecret, ConsoleBackend};
+use crate::console::{AuthSecret, NodeInfo};
 use crate::context::RequestMonitoring;
-use crate::control_plane::errors::GetAuthInfoError;
-use crate::control_plane::provider::{CachedRoleSecret, ControlPlaneBackend};
-use crate::control_plane::{AuthSecret, NodeInfo};
 use crate::intern::EndpointIdInt;
 use crate::metrics::Metrics;
 use crate::proxy::connect_compute::ComputeConnectBackend;
@@ -31,7 +31,7 @@ use crate::stream::Stream;
 use crate::{
    auth::{self, ComputeUserInfoMaybeEndpoint},
    config::AuthenticationConfig,
-    control_plane::{
+    console::{
        self,
        provider::{CachedAllowedIps, CachedNodeInfo},
        Api,
@@ -67,19 +67,19 @@ impl<T> std::ops::Deref for MaybeOwned<'_, T> {
 ///   backends which require them for the authentication process.
 pub enum Backend<'a, T, D> {
    /// Cloud API (V2).
-    ControlPlane(MaybeOwned<'a, ControlPlaneBackend>, T),
+    Console(MaybeOwned<'a, ConsoleBackend>, T),
    /// Authentication via a web browser.
-    ConsoleRedirect(MaybeOwned<'a, url::ApiUrl>, D),
+    Web(MaybeOwned<'a, url::ApiUrl>, D),
    /// Local proxy uses configured auth credentials and does not wake compute
    Local(MaybeOwned<'a, LocalBackend>),
 }

 #[cfg(test)]
 pub(crate) trait TestBackend: Send + Sync + 'static {
-    fn wake_compute(&self) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError>;
+    fn wake_compute(&self) -> Result<CachedNodeInfo, console::errors::WakeComputeError>;
    fn get_allowed_ips_and_secret(
        &self,
-    ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), control_plane::errors::GetAuthInfoError>;
+    ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), console::errors::GetAuthInfoError>;
    fn dyn_clone(&self) -> Box<dyn TestBackend>;
 }

@@ -93,23 +93,18 @@ impl Clone for Box<dyn TestBackend> {
 impl std::fmt::Display for Backend<'_, (), ()> {
    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
-            Self::ControlPlane(api, ()) => match &**api {
-                ControlPlaneBackend::Management(endpoint) => fmt
-                    .debug_tuple("ControlPlane::Management")
-                    .field(&endpoint.url())
-                    .finish(),
+            Self::Console(api, ()) => match &**api {
+                ConsoleBackend::Console(endpoint) => {
+                    fmt.debug_tuple("Console").field(&endpoint.url()).finish()
+                }
                #[cfg(any(test, feature = "testing"))]
-                ControlPlaneBackend::PostgresMock(endpoint) => fmt
-                    .debug_tuple("ControlPlane::PostgresMock")
-                    .field(&endpoint.url())
-                    .finish(),
+                ConsoleBackend::Postgres(endpoint) => {
+                    fmt.debug_tuple("Postgres").field(&endpoint.url()).finish()
+                }
                #[cfg(test)]
-                ControlPlaneBackend::Test(_) => fmt.debug_tuple("ControlPlane::Test").finish(),
+                ConsoleBackend::Test(_) => fmt.debug_tuple("Test").finish(),
            },
-            Self::ConsoleRedirect(url, ()) => fmt
-                .debug_tuple("ConsoleRedirect")
-                .field(&url.as_str())
-                .finish(),
+            Self::Web(url, ()) => fmt.debug_tuple("Web").field(&url.as_str()).finish(),
            Self::Local(_) => fmt.debug_tuple("Local").finish(),
        }
    }
@@ -120,8 +115,8 @@ impl<T, D> Backend<'_, T, D> {
    /// This helps us pass structured config to async tasks.
    pub(crate) fn as_ref(&self) -> Backend<'_, &T, &D> {
        match self {
-            Self::ControlPlane(c, x) => Backend::ControlPlane(MaybeOwned::Borrowed(c), x),
-            Self::ConsoleRedirect(c, x) => Backend::ConsoleRedirect(MaybeOwned::Borrowed(c), x),
+            Self::Console(c, x) => Backend::Console(MaybeOwned::Borrowed(c), x),
+            Self::Web(c, x) => Backend::Web(MaybeOwned::Borrowed(c), x),
            Self::Local(l) => Backend::Local(MaybeOwned::Borrowed(l)),
        }
    }
@@ -133,8 +128,8 @@ impl<'a, T, D> Backend<'a, T, D> {
    /// a function to a contained value.
    pub(crate) fn map<R>(self, f: impl FnOnce(T) -> R) -> Backend<'a, R, D> {
        match self {
-            Self::ControlPlane(c, x) => Backend::ControlPlane(c, f(x)),
-            Self::ConsoleRedirect(c, x) => Backend::ConsoleRedirect(c, x),
+            Self::Console(c, x) => Backend::Console(c, f(x)),
+            Self::Web(c, x) => Backend::Web(c, x),
            Self::Local(l) => Backend::Local(l),
        }
    }
@@ -144,8 +139,8 @@ impl<'a, T, D, E> Backend<'a, Result<T, E>, D> {
    /// This is most useful for error handling.
    pub(crate) fn transpose(self) -> Result<Backend<'a, T, D>, E> {
        match self {
-            Self::ControlPlane(c, x) => x.map(|x| Backend::ControlPlane(c, x)),
-            Self::ConsoleRedirect(c, x) => Ok(Backend::ConsoleRedirect(c, x)),
+            Self::Console(c, x) => x.map(|x| Backend::Console(c, x)),
+            Self::Web(c, x) => Ok(Backend::Web(c, x)),
            Self::Local(l) => Ok(Backend::Local(l)),
        }
    }
@@ -175,12 +170,10 @@ impl ComputeUserInfo {
    }
 }

-#[cfg_attr(test, derive(Debug))]
 pub(crate) enum ComputeCredentialKeys {
    #[cfg(any(test, feature = "testing"))]
    Password(Vec<u8>),
    AuthKeys(AuthKeys),
-    JwtPayload(Vec<u8>),
    None,
 }

@@ -297,7 +290,7 @@ impl AuthenticationConfig {
 /// All authentication flows will emit an AuthenticationOk message if successful.
 async fn auth_quirks(
    ctx: &RequestMonitoring,
-    api: &impl control_plane::Api,
+    api: &impl console::Api,
    user_info: ComputeUserInfoMaybeEndpoint,
    client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
    allow_cleartext: bool,
@@ -419,8 +412,8 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint, &()> {
    /// Get username from the credentials.
    pub(crate) fn get_user(&self) -> &str {
        match self {
-            Self::ControlPlane(_, user_info) => &user_info.user,
-            Self::ConsoleRedirect(_, ()) => "web",
+            Self::Console(_, user_info) => &user_info.user,
+            Self::Web(_, ()) => "web",
            Self::Local(_) => "local",
        }
    }
@@ -436,7 +429,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint, &()> {
        endpoint_rate_limiter: Arc<EndpointRateLimiter>,
    ) -> auth::Result<Backend<'a, ComputeCredentials, NodeInfo>> {
        let res = match self {
-            Self::ControlPlane(api, user_info) => {
+            Self::Console(api, user_info) => {
                info!(
                    user = &*user_info.user,
                    project = user_info.endpoint(),
@@ -453,15 +446,15 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint, &()> {
                    endpoint_rate_limiter,
                )
                .await?;
-                Backend::ControlPlane(api, credentials)
+                Backend::Console(api, credentials)
            }
            // NOTE: this auth backend doesn't use client credentials.
-            Self::ConsoleRedirect(url, ()) => {
+            Self::Web(url, ()) => {
                info!("performing web authentication");

-                let info = console_redirect::authenticate(ctx, config, &url, client).await?;
+                let info = web::authenticate(ctx, config, &url, client).await?;

-                Backend::ConsoleRedirect(url, info)
+                Backend::Web(url, info)
            }
            Self::Local(_) => {
                return Err(auth::AuthError::bad_auth_method("invalid for local proxy"))
@@ -479,8 +472,8 @@ impl Backend<'_, ComputeUserInfo, &()> {
        ctx: &RequestMonitoring,
    ) -> Result<CachedRoleSecret, GetAuthInfoError> {
        match self {
-            Self::ControlPlane(api, user_info) => api.get_role_secret(ctx, user_info).await,
-            Self::ConsoleRedirect(_, ()) => Ok(Cached::new_uncached(None)),
+            Self::Console(api, user_info) => api.get_role_secret(ctx, user_info).await,
+            Self::Web(_, ()) => Ok(Cached::new_uncached(None)),
            Self::Local(_) => Ok(Cached::new_uncached(None)),
        }
    }
@@ -490,10 +483,8 @@ impl Backend<'_, ComputeUserInfo, &()> {
        ctx: &RequestMonitoring,
    ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), GetAuthInfoError> {
        match self {
-            Self::ControlPlane(api, user_info) => {
-                api.get_allowed_ips_and_secret(ctx, user_info).await
-            }
-            Self::ConsoleRedirect(_, ()) => Ok((Cached::new_uncached(Arc::new(vec![])), None)),
+            Self::Console(api, user_info) => api.get_allowed_ips_and_secret(ctx, user_info).await,
+            Self::Web(_, ()) => Ok((Cached::new_uncached(Arc::new(vec![])), None)),
            Self::Local(_) => Ok((Cached::new_uncached(Arc::new(vec![])), None)),
        }
    }
@@ -504,18 +495,18 @@ impl ComputeConnectBackend for Backend<'_, ComputeCredentials, NodeInfo> {
    async fn wake_compute(
        &self,
        ctx: &RequestMonitoring,
-    ) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> {
+    ) -> Result<CachedNodeInfo, console::errors::WakeComputeError> {
        match self {
-            Self::ControlPlane(api, creds) => api.wake_compute(ctx, &creds.info).await,
-            Self::ConsoleRedirect(_, info) => Ok(Cached::new_uncached(info.clone())),
+            Self::Console(api, creds) => api.wake_compute(ctx, &creds.info).await,
+            Self::Web(_, info) => Ok(Cached::new_uncached(info.clone())),
            Self::Local(local) => Ok(Cached::new_uncached(local.node_info.clone())),
        }
    }

    fn get_keys(&self) -> &ComputeCredentialKeys {
        match self {
-            Self::ControlPlane(_, creds) => &creds.keys,
-            Self::ConsoleRedirect(_, _) => &ComputeCredentialKeys::None,
+            Self::Console(_, creds) => &creds.keys,
+            Self::Web(_, _) => &ComputeCredentialKeys::None,
            Self::Local(_) => &ComputeCredentialKeys::None,
        }
    }
@@ -526,10 +517,10 @@ impl ComputeConnectBackend for Backend<'_, ComputeCredentials, &()> {
    async fn wake_compute(
        &self,
        ctx: &RequestMonitoring,
-    ) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> {
+    ) -> Result<CachedNodeInfo, console::errors::WakeComputeError> {
        match self {
-            Self::ControlPlane(api, creds) => api.wake_compute(ctx, &creds.info).await,
-            Self::ConsoleRedirect(_, ()) => {
+            Self::Console(api, creds) => api.wake_compute(ctx, &creds.info).await,
+            Self::Web(_, ()) => {
                unreachable!("web auth flow doesn't support waking the compute")
            }
            Self::Local(local) => Ok(Cached::new_uncached(local.node_info.clone())),
@@ -538,8 +529,8 @@ impl ComputeConnectBackend for Backend<'_, ComputeCredentials, &()> {

    fn get_keys(&self) -> &ComputeCredentialKeys {
        match self {
-            Self::ControlPlane(_, creds) => &creds.keys,
-            Self::ConsoleRedirect(_, ()) => &ComputeCredentialKeys::None,
+            Self::Console(_, creds) => &creds.keys,
+            Self::Web(_, ()) => &ComputeCredentialKeys::None,
            Self::Local(_) => &ComputeCredentialKeys::None,
        }
    }
@@ -562,12 +553,12 @@ mod tests {
    use crate::{
        auth::{backend::MaskedIp, ComputeUserInfoMaybeEndpoint, IpPattern},
        config::AuthenticationConfig,
-        context::RequestMonitoring,
-        control_plane::{
+        console::{
            self,
            provider::{self, CachedAllowedIps, CachedRoleSecret},
            CachedNodeInfo,
        },
+        context::RequestMonitoring,
        proxy::NeonOptions,
        rate_limiter::{EndpointRateLimiter, RateBucketInfo},
        scram::{threadpool::ThreadPool, ServerSecret},
@@ -581,12 +572,12 @@ mod tests {
        secret: AuthSecret,
    }

-    impl control_plane::Api for Auth {
+    impl console::Api for Auth {
        async fn get_role_secret(
            &self,
            _ctx: &RequestMonitoring,
            _user_info: &super::ComputeUserInfo,
-        ) -> Result<CachedRoleSecret, control_plane::errors::GetAuthInfoError> {
+        ) -> Result<CachedRoleSecret, console::errors::GetAuthInfoError> {
            Ok(CachedRoleSecret::new_uncached(Some(self.secret.clone())))
        }

@@ -594,10 +585,8 @@ mod tests {
            &self,
            _ctx: &RequestMonitoring,
            _user_info: &super::ComputeUserInfo,
-        ) -> Result<
-            (CachedAllowedIps, Option<CachedRoleSecret>),
-            control_plane::errors::GetAuthInfoError,
-        > {
+        ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), console::errors::GetAuthInfoError>
+        {
            Ok((
                CachedAllowedIps::new_uncached(Arc::new(self.ips.clone())),
                Some(CachedRoleSecret::new_uncached(Some(self.secret.clone()))),
@@ -616,7 +605,7 @@ mod tests {
            &self,
            _ctx: &RequestMonitoring,
            _user_info: &super::ComputeUserInfo,
-        ) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> {
+        ) -> Result<CachedNodeInfo, console::errors::WakeComputeError> {
            unimplemented!()
        }
    }
--- a/proxy/src/auth/backend/classic.rs
+++ b/proxy/src/auth/backend/classic.rs
@@ -3,8 +3,8 @@ use crate::{
    auth::{self, backend::ComputeCredentialKeys, AuthFlow},
    compute,
    config::AuthenticationConfig,
+    console::AuthSecret,
    context::RequestMonitoring,
-    control_plane::AuthSecret,
    sasl,
    stream::{PqStream, Stream},
 };
--- a/proxy/src/auth/backend/hacks.rs
+++ b/proxy/src/auth/backend/hacks.rs
@@ -2,8 +2,8 @@ use super::{ComputeCredentials, ComputeUserInfo, ComputeUserInfoNoEndpoint};
 use crate::{
    auth::{self, AuthFlow},
    config::AuthenticationConfig,
+    console::AuthSecret,
    context::RequestMonitoring,
-    control_plane::AuthSecret,
    intern::EndpointIdInt,
    sasl,
    stream::{self, Stream},
--- a/proxy/src/auth/backend/jwt.rs
+++ b/proxy/src/auth/backend/jwt.rs
@@ -17,8 +17,6 @@ use crate::{
    RoleName,
 };

-use super::ComputeCredentialKeys;
-
 // TODO(conrad): make these configurable.
 const CLOCK_SKEW_LEEWAY: Duration = Duration::from_secs(30);
 const MIN_RENEW: Duration = Duration::from_secs(30);
@@ -243,7 +241,7 @@ impl JwkCacheEntryLock {
        endpoint: EndpointId,
        role_name: &RoleName,
        fetch: &F,
-    ) -> Result<ComputeCredentialKeys, anyhow::Error> {
+    ) -> Result<(), anyhow::Error> {
        // JWT compact form is defined to be
        // <B64(Header)> || . || <B64(Payload)> || . || <B64(Signature)>
        // where Signature = alg(<B64(Header)> || . || <B64(Payload)>);
@@ -302,9 +300,9 @@ impl JwkCacheEntryLock {
            key => bail!("unsupported key type {key:?}"),
        };

-        let payloadb = base64::decode_config(payload, base64::URL_SAFE_NO_PAD)
+        let payload = base64::decode_config(payload, base64::URL_SAFE_NO_PAD)
            .context("Provided authentication token is not a valid JWT encoding")?;
-        let payload = serde_json::from_slice::<JwtPayload<'_>>(&payloadb)
+        let payload = serde_json::from_slice::<JwtPayload<'_>>(&payload)
            .context("Provided authentication token is not a valid JWT encoding")?;

        tracing::debug!(?payload, "JWT signature valid with claims");
@@ -329,7 +327,7 @@ impl JwkCacheEntryLock {
            );
        }

-        Ok(ComputeCredentialKeys::JwtPayload(payloadb))
+        Ok(())
    }
 }

@@ -341,7 +339,7 @@ impl JwkCache {
        role_name: &RoleName,
        fetch: &F,
        jwt: &str,
-    ) -> Result<ComputeCredentialKeys, anyhow::Error> {
+    ) -> Result<(), anyhow::Error> {
        // try with just a read lock first
        let key = (endpoint.clone(), role_name.clone());
        let entry = self.map.get(&key).as_deref().map(Arc::clone);
@@ -573,7 +571,7 @@ mod tests {
    use bytes::Bytes;
    use http::Response;
    use http_body_util::Full;
-    use hyper::service::service_fn;
+    use hyper1::service::service_fn;
    use hyper_util::rt::TokioIo;
    use rand::rngs::OsRng;
    use rsa::pkcs8::DecodePrivateKey;
@@ -738,7 +736,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL
        });

        let listener = TcpListener::bind("0.0.0.0:0").await.unwrap();
-        let server = hyper::server::conn::http1::Builder::new();
+        let server = hyper1::server::conn::http1::Builder::new();
        let addr = listener.local_addr().unwrap();
        tokio::spawn(async move {
            loop {
--- a/proxy/src/auth/backend/local.rs
+++ b/proxy/src/auth/backend/local.rs
@@ -5,11 +5,11 @@ use arc_swap::ArcSwapOption;

 use crate::{
    compute::ConnCfg,
-    context::RequestMonitoring,
-    control_plane::{
+    console::{
        messages::{ColdStartInfo, EndpointJwksResponse, MetricsAuxInfo},
        NodeInfo,
    },
+    context::RequestMonitoring,
    intern::{BranchIdTag, EndpointIdTag, InternId, ProjectIdTag},
    EndpointId,
 };
--- a/proxy/src/auth/backend/console_redirect.rs
+++ b/proxy/src/auth/backend/console_redirect.rs
@@ -1,8 +1,8 @@
 use crate::{
    auth, compute,
    config::AuthenticationConfig,
+    console::{self, provider::NodeInfo},
    context::RequestMonitoring,
-    control_plane::{self, provider::NodeInfo},
    error::{ReportableError, UserFacingError},
    stream::PqStream,
    waiters,
@@ -70,7 +70,7 @@ pub(super) async fn authenticate(
    let (psql_session_id, waiter) = loop {
        let psql_session_id = new_psql_session_id();

-        match control_plane::mgmt::get_waiter(&psql_session_id) {
+        match console::mgmt::get_waiter(&psql_session_id) {
            Ok(waiter) => break (psql_session_id, waiter),
            Err(_e) => continue,
        }
--- a/proxy/src/auth/flow.rs
+++ b/proxy/src/auth/flow.rs
@@ -3,8 +3,8 @@
 use super::{backend::ComputeCredentialKeys, AuthErrorImpl, PasswordHackPayload};
 use crate::{
    config::TlsServerEndPoint,
+    console::AuthSecret,
    context::RequestMonitoring,
-    control_plane::AuthSecret,
    intern::EndpointIdInt,
    sasl,
    scram::{self, threadpool::ThreadPool},
--- a/proxy/src/bin/local_proxy.rs
+++ b/proxy/src/bin/local_proxy.rs
@@ -12,7 +12,7 @@ use proxy::{
    },
    cancellation::CancellationHandlerMain,
    config::{self, AuthenticationConfig, HttpConfig, ProxyConfig, RetryConfig},
-    control_plane::{
+    console::{
        locks::ApiLocks,
        messages::{EndpointJwksResponse, JwksSettings},
    },
@@ -77,10 +77,10 @@ struct LocalProxyCliArgs {
    #[clap(long, default_value = "127.0.0.1:5432")]
    compute: SocketAddr,
    /// Path of the local proxy config file
-    #[clap(long, default_value = "./local_proxy.json")]
+    #[clap(long, default_value = "./localproxy.json")]
    config_path: Utf8PathBuf,
    /// Path of the local proxy PID file
-    #[clap(long, default_value = "./local_proxy.pid")]
+    #[clap(long, default_value = "./localproxy.pid")]
    pid_path: Utf8PathBuf,
 }

@@ -109,7 +109,7 @@ struct SqlOverHttpArgs {

 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
-    let _logging_guard = proxy::logging::init_local_proxy()?;
+    let _logging_guard = proxy::logging::init().await?;
    let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook();
    let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);

@@ -138,7 +138,7 @@ async fn main() -> anyhow::Result<()> {
    // in order to trigger the appropriate SIGHUP on config change.
    //
    // This also claims a "lock" that makes sure only one instance
-    // of local_proxy runs at a time.
+    // of local-proxy runs at a time.
    let _process_guard = loop {
        match pid_file::claim_for_current_process(&args.pid_path) {
            Ok(guard) => break guard,
@@ -164,6 +164,12 @@ async fn main() -> anyhow::Result<()> {
        16,
    ));

+    // write the process ID to a file so that compute-ctl can find our process later
+    // in order to trigger the appropriate SIGHUP on config change.
+    let pid = std::process::id();
+    info!("process running in PID {pid}");
+    std::fs::write(args.pid_path, format!("{pid}\n")).context("writing PID to file")?;
+
    let mut maintenance_tasks = JoinSet::new();

    let refresh_config_notify = Arc::new(Notify::new());
@@ -176,9 +182,9 @@ async fn main() -> anyhow::Result<()> {

    // trigger the first config load **after** setting up the signal hook
    // to avoid the race condition where:
-    // 1. No config file registered when local_proxy starts up
+    // 1. No config file registered when local-proxy starts up
    // 2. The config file is written but the signal hook is not yet received
-    // 3. local_proxy completes startup but has no config loaded, despite there being a registerd config.
+    // 3. local-proxy completes startup but has no config loaded, despite there being a registerd config.
    refresh_config_notify.notify_one();
    tokio::spawn(refresh_config_loop(args.config_path, refresh_config_notify));

@@ -305,7 +311,7 @@ async fn refresh_config_inner(path: &Utf8Path) -> anyhow::Result<()> {

    let mut jwks_set = vec![];

-    for jwks in data.jwks.into_iter().flatten() {
+    for jwks in data.jwks {
        let mut jwks_url = url::Url::from_str(&jwks.jwks_url).context("parsing JWKS url")?;

        ensure!(
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -19,8 +19,8 @@ use proxy::config::CacheOptions;
 use proxy::config::HttpConfig;
 use proxy::config::ProjectInfoCacheOptions;
 use proxy::config::ProxyProtocolV2;
+use proxy::console;
 use proxy::context::parquet::ParquetUploadArgs;
-use proxy::control_plane;
 use proxy::http;
 use proxy::http::health_server::AppMetrics;
 use proxy::metrics::Metrics;
@@ -495,7 +495,7 @@ async fn main() -> anyhow::Result<()> {
            proxy: proxy::metrics::Metrics::get(),
        },
    ));
-    maintenance_tasks.spawn(control_plane::mgmt::task_main(mgmt_listener));
+    maintenance_tasks.spawn(console::mgmt::task_main(mgmt_listener));

    if let Some(metrics_config) = &config.metric_collection {
        // TODO: Add gc regardles of the metric collection being enabled.
@@ -506,8 +506,8 @@ async fn main() -> anyhow::Result<()> {
        ));
    }

-    if let auth::Backend::ControlPlane(api, _) = &config.auth_backend {
-        if let proxy::control_plane::provider::ControlPlaneBackend::Management(api) = &**api {
+    if let auth::Backend::Console(api, _) = &config.auth_backend {
+        if let proxy::console::provider::ConsoleBackend::Console(api) = &**api {
            match (redis_notifications_client, regional_redis_client.clone()) {
                (None, None) => {}
                (client1, client2) => {
@@ -623,7 +623,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
                "Using AllowedIpsCache (wake_compute) with options={project_info_cache_config:?}"
            );
            info!("Using EndpointCacheConfig with options={endpoint_cache_config:?}");
-            let caches = Box::leak(Box::new(control_plane::caches::ApiCaches::new(
+            let caches = Box::leak(Box::new(console::caches::ApiCaches::new(
                wake_compute_cache_config,
                project_info_cache_config,
                endpoint_cache_config,
@@ -636,7 +636,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
                timeout,
            } = args.wake_compute_lock.parse()?;
            info!(?limiter, shards, ?epoch, "Using NodeLocks (wake_compute)");
-            let locks = Box::leak(Box::new(control_plane::locks::ApiLocks::new(
+            let locks = Box::leak(Box::new(console::locks::ApiLocks::new(
                "wake_compute_lock",
                limiter,
                shards,
@@ -653,27 +653,27 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
            RateBucketInfo::validate(&mut wake_compute_rps_limit)?;
            let wake_compute_endpoint_rate_limiter =
                Arc::new(WakeComputeRateLimiter::new(wake_compute_rps_limit));
-            let api = control_plane::provider::neon::Api::new(
+            let api = console::provider::neon::Api::new(
                endpoint,
                caches,
                locks,
                wake_compute_endpoint_rate_limiter,
            );
-            let api = control_plane::provider::ControlPlaneBackend::Management(api);
-            auth::Backend::ControlPlane(MaybeOwned::Owned(api), ())
+            let api = console::provider::ConsoleBackend::Console(api);
+            auth::Backend::Console(MaybeOwned::Owned(api), ())
        }

        AuthBackendType::Web => {
            let url = args.uri.parse()?;
-            auth::Backend::ConsoleRedirect(MaybeOwned::Owned(url), ())
+            auth::Backend::Web(MaybeOwned::Owned(url), ())
        }

        #[cfg(feature = "testing")]
        AuthBackendType::Postgres => {
            let url = args.auth_endpoint.parse()?;
-            let api = control_plane::provider::mock::Api::new(url, !args.is_private_access_proxy);
-            let api = control_plane::provider::ControlPlaneBackend::PostgresMock(api);
-            auth::Backend::ControlPlane(MaybeOwned::Owned(api), ())
+            let api = console::provider::mock::Api::new(url, !args.is_private_access_proxy);
+            let api = console::provider::ConsoleBackend::Postgres(api);
+            auth::Backend::Console(MaybeOwned::Owned(api), ())
        }
    };

@@ -689,7 +689,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
        ?epoch,
        "Using NodeLocks (connect_compute)"
    );
-    let connect_compute_locks = control_plane::locks::ApiLocks::new(
+    let connect_compute_locks = console::locks::ApiLocks::new(
        "connect_compute_lock",
        limiter,
        shards,
--- a/proxy/src/cache/mod.rs
+++ b/proxy/src/cache/mod.rs
--- a/proxy/src/cache/project_info.rs
+++ b/proxy/src/cache/project_info.rs
@@ -16,7 +16,7 @@ use tracing::{debug, info};
 use crate::{
    auth::IpPattern,
    config::ProjectInfoCacheOptions,
-    control_plane::AuthSecret,
+    console::AuthSecret,
    intern::{EndpointIdInt, ProjectIdInt, RoleNameInt},
    EndpointId, RoleName,
 };
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -1,8 +1,8 @@
 use crate::{
    auth::parse_endpoint_param,
    cancellation::CancelClosure,
+    console::{errors::WakeComputeError, messages::MetricsAuxInfo, provider::ApiLockError},
    context::RequestMonitoring,
-    control_plane::{errors::WakeComputeError, messages::MetricsAuxInfo, provider::ApiLockError},
    error::{ReportableError, UserFacingError},
    metrics::{Metrics, NumDbConnectionsGuard},
    proxy::neon_option,
@@ -20,7 +20,7 @@ use tokio_postgres::tls::MakeTlsConnect;
 use tokio_postgres_rustls::MakeRustlsConnect;
 use tracing::{error, info, warn};

-pub const COULD_NOT_CONNECT: &str = "Couldn't connect to compute node";
+const COULD_NOT_CONNECT: &str = "Couldn't connect to compute node";

 #[derive(Debug, Error)]
 pub(crate) enum ConnectionError {
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -3,7 +3,7 @@ use crate::{
        self,
        backend::{jwt::JwkCache, AuthRateLimiter},
    },
-    control_plane::locks::ApiLocks,
+    console::locks::ApiLocks,
    rate_limiter::{RateBucketInfo, RateLimitAlgorithm, RateLimiterConfig},
    scram::threadpool::ThreadPool,
    serverless::{cancel_set::CancelSet, GlobalConnPoolOptions},
@@ -372,7 +372,7 @@ pub struct EndpointCacheConfig {
 }

 impl EndpointCacheConfig {
-    /// Default options for [`crate::control_plane::provider::NodeInfoCache`].
+    /// Default options for [`crate::console::provider::NodeInfoCache`].
    /// Notice that by default the limiter is empty, which means that cache is disabled.
    pub const CACHE_DEFAULT_OPTIONS: &'static str =
        "initial_batch_size=1000,default_batch_size=10,xread_timeout=5m,stream_name=controlPlane,disable_cache=true,limiter_info=1000@1s,retry_interval=1s";
@@ -447,7 +447,7 @@ pub struct CacheOptions {
 }

 impl CacheOptions {
-    /// Default options for [`crate::control_plane::provider::NodeInfoCache`].
+    /// Default options for [`crate::console::provider::NodeInfoCache`].
    pub const CACHE_DEFAULT_OPTIONS: &'static str = "size=4000,ttl=4m";

    /// Parse cache options passed via cmdline.
@@ -503,7 +503,7 @@ pub struct ProjectInfoCacheOptions {
 }

 impl ProjectInfoCacheOptions {
-    /// Default options for [`crate::control_plane::provider::NodeInfoCache`].
+    /// Default options for [`crate::console::provider::NodeInfoCache`].
    pub const CACHE_DEFAULT_OPTIONS: &'static str =
        "size=10000,ttl=4m,max_roles=10,gc_interval=60m";

@@ -622,9 +622,9 @@ pub struct ConcurrencyLockOptions {
 }

 impl ConcurrencyLockOptions {
-    /// Default options for [`crate::control_plane::provider::ApiLocks`].
+    /// Default options for [`crate::console::provider::ApiLocks`].
    pub const DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK: &'static str = "permits=0";
-    /// Default options for [`crate::control_plane::provider::ApiLocks`].
+    /// Default options for [`crate::console::provider::ApiLocks`].
    pub const DEFAULT_OPTIONS_CONNECT_COMPUTE_LOCK: &'static str =
        "shards=64,permits=100,epoch=10m,timeout=10ms";

--- a/proxy/src/control_plane/mod.rs
+++ b/proxy/src/control_plane/mod.rs
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Anastasia Lubennikova	dfe815a032	Fix sfcgal_version for v17	2024-10-08 11:56:36 +01:00
Anastasia Lubennikova	8c6a83659a	fix typos	2024-10-07 13:47:55 +01:00
Anastasia Lubennikova	0151deb76a	Build PostGIS 3.5.0 only for v17 because of sfcgal dependency issue. See comments in compute/Dockerfile.compute-node	2024-10-07 13:38:16 +01:00
Anastasia Lubennikova	461a680d64	fix unit and plpgsql_check	2024-10-04 17:01:06 +01:00
Anastasia Lubennikova	3de2add040	enable ip4r and pg_ivm for v17	2024-10-04 16:28:06 +01:00
Anastasia Lubennikova	aa6500a64e	fix test tar for plv8	2024-10-04 11:07:25 +01:00
Anastasia Lubennikova	9968aec2c5	Fix build dependency on debian bookworm for plv8	2024-10-03 16:05:54 +01:00
Anastasia Lubennikova	ad9800aa18	Fix plv8 build	2024-10-03 15:40:21 +01:00
Anastasia Lubennikova	29e032d326	Fix plv8 build	2024-10-03 12:53:14 +01:00
Anastasia Lubennikova	d754a70802	Merge branch 'main' into enable_v17_extensions	2024-10-03 11:31:08 +01:00
Anastasia Lubennikova	278fe5e736	fix postgis build	2024-10-03 11:23:44 +01:00
Anastasia Lubennikova	a72919e0ec	fix typo in postgis build	2024-10-02 16:37:48 +01:00
Anastasia Lubennikova	5a195605ad	Fix rdkit build	2024-10-02 15:45:24 +01:00
Anastasia Lubennikova	8d64d9cca4	Merge branch 'main' into enable_v17_extensions	2024-10-02 14:38:14 +01:00
Anastasia Lubennikova	321014d37a	fix cgal version	2024-10-02 14:31:35 +01:00
Anastasia Lubennikova	e410e794d6	Enable support of extensions for v17 except for Rust extensions and ones that do not have new release yet. Add comments for each extension.	2024-10-02 13:53:55 +01:00