add sudo package to build-tools image

2026-05-21 15:10:44 +00:00 · 2025-05-16 17:13:15 +02:00
64 changed files with 655 additions and 2168 deletions
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -963,7 +963,7 @@ jobs:
          fi

      - name: Verify docker-compose example and test extensions
-        timeout-minutes: 60
+        timeout-minutes: 20
        env:
          TAG: >-
            ${{
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3794,16 +3794,6 @@ version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"

-[[package]]
-name = "neon-shmem"
-version = "0.1.0"
-dependencies = [
- "nix 0.30.1",
- "tempfile",
- "thiserror 1.0.69",
- "workspace_hack",
-]
-
 [[package]]
 name = "never-say-never"
 version = "6.6.666"
@@ -3898,16 +3888,6 @@ dependencies = [
 "winapi",
 ]

-[[package]]
-name = "nu-ansi-term"
-version = "0.46.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
-dependencies = [
- "overload",
- "winapi",
-]
-
 [[package]]
 name = "num"
 version = "0.4.1"
@@ -4192,12 +4172,6 @@ version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a"

-[[package]]
-name = "overload"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
-
 [[package]]
 name = "p256"
 version = "0.11.1"
@@ -4302,7 +4276,6 @@ dependencies = [
 "enumset",
 "fail",
 "futures",
- "hashlink",
 "hex",
 "hex-literal",
 "http-utils",
@@ -4451,16 +4424,6 @@ dependencies = [
 "workspace_hack",
 ]

-[[package]]
-name = "pageserver_page_api"
-version = "0.1.0"
-dependencies = [
- "prost 0.13.3",
- "tonic",
- "tonic-build",
- "workspace_hack",
-]
-
 [[package]]
 name = "papaya"
 version = "0.2.1"
@@ -5255,7 +5218,6 @@ dependencies = [
 "tracing-log",
 "tracing-opentelemetry",
 "tracing-subscriber",
- "tracing-test",
 "tracing-utils",
 "try-lock",
 "typed-json",
@@ -7706,7 +7668,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008"
 dependencies = [
 "matchers",
- "nu-ansi-term",
 "once_cell",
 "regex",
 "serde",
@@ -7720,27 +7681,6 @@ dependencies = [
 "tracing-serde",
 ]

-[[package]]
-name = "tracing-test"
-version = "0.2.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "557b891436fe0d5e0e363427fc7f217abf9ccd510d5136549847bdcbcd011d68"
-dependencies = [
- "tracing-core",
- "tracing-subscriber",
- "tracing-test-macro",
-]
-
-[[package]]
-name = "tracing-test-macro"
-version = "0.2.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04659ddb06c87d233c566112c1c9c5b9e98256d9af50ec3bc9c8327f873a7568"
-dependencies = [
- "quote",
- "syn 2.0.100",
-]
-
 [[package]]
 name = "tracing-utils"
 version = "0.1.0"
@@ -8542,7 +8482,6 @@ dependencies = [
 "log",
 "memchr",
 "nix 0.26.4",
- "nix 0.30.1",
 "nom",
 "num",
 "num-bigint",
@@ -8593,7 +8532,6 @@ dependencies = [
 "tracing",
 "tracing-core",
 "tracing-log",
- "tracing-subscriber",
 "url",
 "uuid",
 "zeroize",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,7 +9,6 @@ members = [
    "pageserver/ctl",
    "pageserver/client",
    "pageserver/pagebench",
-    "pageserver/page_api",
    "proxy",
    "safekeeper",
    "safekeeper/client",
@@ -24,7 +23,6 @@ members = [
    "libs/postgres_ffi",
    "libs/safekeeper_api",
    "libs/desim",
-    "libs/neon-shmem",
    "libs/utils",
    "libs/consumption_metrics",
    "libs/postgres_backend",
@@ -129,7 +127,7 @@ md5 = "0.7.0"
 measured = { version = "0.0.22", features=["lasso"] }
 measured-process = { version = "0.0.22" }
 memoffset = "0.9"
-nix = { version = "0.30.1", features = ["dir", "fs", "mman", "process", "socket", "signal", "poll"] }
+nix = { version = "0.30.1", features = ["dir", "fs", "process", "socket", "signal", "poll"] }
 # Do not update to >= 7.0.0, at least. The update will have a significant impact
 # on compute startup metrics (start_postgres_ms), >= 25% degradation.
 notify = "6.0.0"
@@ -253,7 +251,6 @@ pageserver = { path = "./pageserver" }
 pageserver_api = { version = "0.1", path = "./libs/pageserver_api/" }
 pageserver_client = { path = "./pageserver/client" }
 pageserver_compaction = { version = "0.1", path = "./pageserver/compaction/" }
-pageserver_page_api = { path = "./pageserver/page_api" }
 postgres_backend = { version = "0.1", path = "./libs/postgres_backend/" }
 postgres_connection = { version = "0.1", path = "./libs/postgres_connection/" }
 postgres_ffi = { version = "0.1", path = "./libs/postgres_ffi/" }
--- a/build-tools.Dockerfile
+++ b/build-tools.Dockerfile
@@ -144,6 +144,7 @@ RUN set -e \
        openssh-client \
        parallel \
        pkg-config \
+        sudo \
        unzip \
        wget \
        xz-utils \
--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -1439,38 +1439,6 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_partman.control

-#########################################################################################
-# Layer "pg_tracing"
-# compile pg_tracing extension
-#
-#########################################################################################
-FROM build-deps AS pg_tracing-src
-ARG PG_VERSION
-WORKDIR /ext-src
-RUN case "${PG_VERSION:?}" in \
-      "v14" | "v15") \
-        echo "pg_tracing not supported on this PostgreSQL version." && exit 0 \
-	;; \
-      *) \
-      ;; \
-    esac && \
-    wget https://github.com/DataDog/pg_tracing/archive/refs/tags/v0.1.3.tar.gz -O pg_tracing.tar.gz && \
-    echo "d0a7cca7279bb29601ba6c4c1aaeb3a44d71e6afa3b78aae1e3b7269e688f907 pg_tracing.tar.gz" | sha256sum --check && \
-    mkdir pg_tracing-src && cd pg_tracing-src && tar xzf ../pg_tracing.tar.gz --strip-components=1 -C .
-
-FROM pg-build AS pg_tracing-build
-COPY --from=pg_tracing-src /ext-src/ /ext-src/
-WORKDIR /ext-src/pg_tracing-src
-RUN case "${PG_VERSION:?}" in \
-      "v14" | "v15") \
-        echo "pg_tracing not supported on this PostgreSQL version." && exit 0 \
-        ;; \
-      *) \
-      ;; \
-    esac && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
-    make -j $(getconf _NPROCESSORS_ONLN) install
-
 #########################################################################################
 #
 # Layer "pg_mooncake"
@@ -1701,7 +1669,6 @@ COPY --from=wal2json-build /usr/local/pgsql /usr/local/pgsql
 COPY --from=pg-anon-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_ivm-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_partman-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg_tracing-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_mooncake-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_duckdb-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_repack-build /usr/local/pgsql/ /usr/local/pgsql/
@@ -1878,7 +1845,6 @@ COPY --from=pg_semver-src /ext-src/ /ext-src/
 #COPY --from=wal2json-src /ext-src/ /ext-src/
 COPY --from=pg_ivm-src /ext-src/ /ext-src/
 COPY --from=pg_partman-src /ext-src/ /ext-src/
-COPY --from=pg_tracing-src /ext-src/ /ext-src/
 #COPY --from=pg_mooncake-src /ext-src/ /ext-src/
 COPY --from=pg_repack-src /ext-src/ /ext-src/
 COPY --from=pg_repack-build /usr/local/pgsql/ /usr/local/pgsql/
--- a/compute/patches/rum.patch
+++ b/compute/patches/rum.patch
@@ -7,7 +7,7 @@ index 255e616..1c6edb7 100644
 			 RelationGetRelationName(index));
 
 +#ifdef NEON_SMGR
-+	smgr_start_unlogged_build(RelationGetSmgr(index));
+	smgr_start_unlogged_build(index->rd_smgr);
 +#endif
 +
 	initRumState(&buildstate.rumstate, index);
@@ -18,7 +18,7 @@ index 255e616..1c6edb7 100644
 	rumUpdateStats(index, &buildstate.buildStats, buildstate.rumstate.isBuild);
 
 +#ifdef NEON_SMGR
-+	smgr_finish_unlogged_build_phase_1(RelationGetSmgr(index));
+	smgr_finish_unlogged_build_phase_1(index->rd_smgr);
 +#endif
 +
 	/*
@@ -29,7 +29,7 @@ index 255e616..1c6edb7 100644
 	}
 
 +#ifdef NEON_SMGR
-+	smgr_end_unlogged_build(RelationGetSmgr(index));
+	smgr_end_unlogged_build(index->rd_smgr);
 +#endif
 +
 	/*
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -213,10 +213,8 @@ impl Escaping for PgIdent {

        // Find the first suitable tag that is not present in the string.
        // Postgres' max role/DB name length is 63 bytes, so even in the
-        // worst case it won't take long. Outer tag is always `tag + "x"`,
-        // so if `tag` is not present in the string, `outer_tag` is not
-        // present in the string either.
-        while self.contains(&tag.to_string()) {
+        // worst case it won't take long.
+        while self.contains(&format!("${tag}$")) || self.contains(&format!("${outer_tag}$")) {
            tag += "x";
            outer_tag = tag.clone() + "x";
        }
--- a/compute_tools/tests/pg_helpers_tests.rs
+++ b/compute_tools/tests/pg_helpers_tests.rs
@@ -71,14 +71,6 @@ test.escaping = 'here''s a backslash \\ and a quote '' and a double-quote " hoor
            ("name$$$", ("$x$name$$$$x$", "xx")),
            ("name$$$$", ("$x$name$$$$$x$", "xx")),
            ("name$x$", ("$xx$name$x$$xx$", "xxx")),
-            ("x", ("$xx$x$xx$", "xxx")),
-            ("xx", ("$xxx$xx$xxx$", "xxxx")),
-            ("$x", ("$xx$$x$xx$", "xxx")),
-            ("x$", ("$xx$x$$xx$", "xxx")),
-            ("$x$", ("$xx$$x$$xx$", "xxx")),
-            ("xx$", ("$xxx$xx$$xxx$", "xxxx")),
-            ("$xx", ("$xxx$$xx$xxx$", "xxxx")),
-            ("$xx$", ("$xxx$$xx$$xxx$", "xxxx")),
        ];

        for (input, expected) in test_cases {
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -546,11 +546,6 @@ impl PageServerNode {
                .map(serde_json::from_str)
                .transpose()
                .context("Falied to parse 'sampling_ratio'")?,
-            relsize_snapshot_cache_capacity: settings
-                .remove("relsize snapshot cache capacity")
-                .map(|x| x.parse::<usize>())
-                .transpose()
-                .context("Falied to parse 'relsize_snapshot_cache_capacity' as integer")?,
        };
        if !settings.is_empty() {
            bail!("Unrecognized tenant settings: {settings:?}")
--- a/endpoint_storage/src/app.rs
+++ b/endpoint_storage/src/app.rs
@@ -462,8 +462,6 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH
        if var(REAL_S3_ENV).is_ok() {
            assert!(body.contains("remote_storage_s3_deleted_objects_total"));
        }
-
-        #[cfg(target_os = "linux")]
        assert!(body.contains("process_threads"));
    }

--- a/libs/neon-shmem/Cargo.toml
+++ b/libs/neon-shmem/Cargo.toml
@@ -1,13 +0,0 @@
-[package]
-name = "neon-shmem"
-version = "0.1.0"
-edition.workspace = true
-license.workspace = true
-
-[dependencies]
-thiserror.workspace = true
-nix.workspace=true
-workspace_hack = { version = "0.1", path = "../../workspace_hack" }
-
-[target.'cfg(target_os = "macos")'.dependencies]
-tempfile = "3.14.0"
--- a/libs/neon-shmem/src/lib.rs
+++ b/libs/neon-shmem/src/lib.rs
@@ -1,418 +0,0 @@
-//! Shared memory utilities for neon communicator
-
-use std::num::NonZeroUsize;
-use std::os::fd::{AsFd, BorrowedFd, OwnedFd};
-use std::ptr::NonNull;
-use std::sync::atomic::{AtomicUsize, Ordering};
-
-use nix::errno::Errno;
-use nix::sys::mman::MapFlags;
-use nix::sys::mman::ProtFlags;
-use nix::sys::mman::mmap as nix_mmap;
-use nix::sys::mman::munmap as nix_munmap;
-use nix::unistd::ftruncate as nix_ftruncate;
-
-/// ShmemHandle represents a shared memory area that can be shared by processes over fork().
-/// Unlike shared memory allocated by Postgres, this area is resizable, up to 'max_size' that's
-/// specified at creation.
-///
-/// The area is backed by an anonymous file created with memfd_create(). The full address space for
-/// 'max_size' is reserved up-front with mmap(), but whenever you call [`ShmemHandle::set_size`],
-/// the underlying file is resized. Do not access the area beyond the current size. Currently, that
-/// will cause the file to be expanded, but we might use mprotect() etc. to enforce that in the
-/// future.
-pub struct ShmemHandle {
-    /// memfd file descriptor
-    fd: OwnedFd,
-
-    max_size: usize,
-
-    // Pointer to the beginning of the shared memory area. The header is stored there.
-    shared_ptr: NonNull<SharedStruct>,
-
-    // Pointer to the beginning of the user data
-    pub data_ptr: NonNull<u8>,
-}
-
-/// This is stored at the beginning in the shared memory area.
-struct SharedStruct {
-    max_size: usize,
-
-    /// Current size of the backing file. The high-order bit is used for the RESIZE_IN_PROGRESS flag
-    current_size: AtomicUsize,
-}
-
-const RESIZE_IN_PROGRESS: usize = 1 << 63;
-
-const HEADER_SIZE: usize = std::mem::size_of::<SharedStruct>();
-
-/// Error type returned by the ShmemHandle functions.
-#[derive(thiserror::Error, Debug)]
-#[error("{msg}: {errno}")]
-pub struct Error {
-    pub msg: String,
-    pub errno: Errno,
-}
-
-impl Error {
-    fn new(msg: &str, errno: Errno) -> Error {
-        Error {
-            msg: msg.to_string(),
-            errno,
-        }
-    }
-}
-
-impl ShmemHandle {
-    /// Create a new shared memory area. To communicate between processes, the processes need to be
-    /// fork()'d after calling this, so that the ShmemHandle is inherited by all processes.
-    ///
-    /// If the ShmemHandle is dropped, the memory is unmapped from the current process. Other
-    /// processes can continue using it, however.
-    pub fn new(name: &str, initial_size: usize, max_size: usize) -> Result<ShmemHandle, Error> {
-        // create the backing anonymous file.
-        let fd = create_backing_file(name)?;
-
-        Self::new_with_fd(fd, initial_size, max_size)
-    }
-
-    fn new_with_fd(
-        fd: OwnedFd,
-        initial_size: usize,
-        max_size: usize,
-    ) -> Result<ShmemHandle, Error> {
-        // We reserve the high-order bit for the RESIZE_IN_PROGRESS flag, and the actual size
-        // is a little larger than this because of the SharedStruct header. Make the upper limit
-        // somewhat smaller than that, because with anything close to that, you'll run out of
-        // memory anyway.
-        if max_size >= 1 << 48 {
-            panic!("max size {} too large", max_size);
-        }
-        if initial_size > max_size {
-            panic!("initial size {initial_size} larger than max size {max_size}");
-        }
-
-        // The actual initial / max size is the one given by the caller, plus the size of
-        // 'SharedStruct'.
-        let initial_size = HEADER_SIZE + initial_size;
-        let max_size = NonZeroUsize::new(HEADER_SIZE + max_size).unwrap();
-
-        // Reserve address space for it with mmap
-        //
-        // TODO: Use MAP_HUGETLB if possible
-        let start_ptr = unsafe {
-            nix_mmap(
-                None,
-                max_size,
-                ProtFlags::PROT_READ | ProtFlags::PROT_WRITE,
-                MapFlags::MAP_SHARED,
-                &fd,
-                0,
-            )
-        }
-        .map_err(|e| Error::new("mmap failed: {e}", e))?;
-
-        // Reserve space for the initial size
-        enlarge_file(fd.as_fd(), initial_size as u64)?;
-
-        // Initialize the header
-        let shared: NonNull<SharedStruct> = start_ptr.cast();
-        unsafe {
-            shared.write(SharedStruct {
-                max_size: max_size.into(),
-                current_size: AtomicUsize::new(initial_size),
-            })
-        };
-
-        // The user data begins after the header
-        let data_ptr = unsafe { start_ptr.cast().add(HEADER_SIZE) };
-
-        Ok(ShmemHandle {
-            fd,
-            max_size: max_size.into(),
-            shared_ptr: shared,
-            data_ptr,
-        })
-    }
-
-    // return reference to the header
-    fn shared(&self) -> &SharedStruct {
-        unsafe { self.shared_ptr.as_ref() }
-    }
-
-    /// Resize the shared memory area. 'new_size' must not be larger than the 'max_size' specified
-    /// when creating the area.
-    ///
-    /// This may only be called from one process/thread concurrently. We detect that case
-    /// and return an Error.
-    pub fn set_size(&self, new_size: usize) -> Result<(), Error> {
-        let new_size = new_size + HEADER_SIZE;
-        let shared = self.shared();
-
-        if new_size > self.max_size {
-            panic!(
-                "new size ({} is greater than max size ({})",
-                new_size, self.max_size
-            );
-        }
-        assert_eq!(self.max_size, shared.max_size);
-
-        // Lock the area by setting the bit in 'current_size'
-        //
-        // Ordering::Relaxed would probably be sufficient here, as we don't access any other memory
-        // and the posix_fallocate/ftruncate call is surely a synchronization point anyway. But
-        // since this is not performance-critical, better safe than sorry .
-        let mut old_size = shared.current_size.load(Ordering::Acquire);
-        loop {
-            if (old_size & RESIZE_IN_PROGRESS) != 0 {
-                return Err(Error::new(
-                    "concurrent resize detected",
-                    Errno::UnknownErrno,
-                ));
-            }
-            match shared.current_size.compare_exchange(
-                old_size,
-                new_size,
-                Ordering::Acquire,
-                Ordering::Relaxed,
-            ) {
-                Ok(_) => break,
-                Err(x) => old_size = x,
-            }
-        }
-
-        // Ok, we got the lock.
-        //
-        // NB: If anything goes wrong, we *must* clear the bit!
-        let result = {
-            use std::cmp::Ordering::{Equal, Greater, Less};
-            match new_size.cmp(&old_size) {
-                Less => nix_ftruncate(&self.fd, new_size as i64).map_err(|e| {
-                    Error::new("could not shrink shmem segment, ftruncate failed: {e}", e)
-                }),
-                Equal => Ok(()),
-                Greater => enlarge_file(self.fd.as_fd(), new_size as u64),
-            }
-        };
-
-        // Unlock
-        shared.current_size.store(
-            if result.is_ok() { new_size } else { old_size },
-            Ordering::Release,
-        );
-
-        result
-    }
-
-    /// Returns the current user-visible size of the shared memory segment.
-    ///
-    /// NOTE: a concurrent set_size() call can change the size at any time. It is the caller's
-    /// responsibility not to access the area beyond the current size.
-    pub fn current_size(&self) -> usize {
-        let total_current_size =
-            self.shared().current_size.load(Ordering::Relaxed) & !RESIZE_IN_PROGRESS;
-        total_current_size - HEADER_SIZE
-    }
-}
-
-impl Drop for ShmemHandle {
-    fn drop(&mut self) {
-        // SAFETY: The pointer was obtained from mmap() with the given size.
-        // We unmap the entire region.
-        let _ = unsafe { nix_munmap(self.shared_ptr.cast(), self.max_size) };
-        // The fd is dropped automatically by OwnedFd.
-    }
-}
-
-/// Create a "backing file" for the shared memory area. On Linux, use memfd_create(), to create an
-/// anonymous in-memory file. One macos, fall back to a regular file. That's good enough for
-/// development and testing, but in production we want the file to stay in memory.
-///
-/// disable 'unused_variables' warnings, because in the macos path, 'name' is unused.
-#[allow(unused_variables)]
-fn create_backing_file(name: &str) -> Result<OwnedFd, Error> {
-    #[cfg(not(target_os = "macos"))]
-    {
-        nix::sys::memfd::memfd_create(name, nix::sys::memfd::MFdFlags::empty())
-            .map_err(|e| Error::new("memfd_create failed: {e}", e))
-    }
-    #[cfg(target_os = "macos")]
-    {
-        let file = tempfile::tempfile().map_err(|e| {
-            Error::new(
-                "could not create temporary file to back shmem area: {e}",
-                nix::errno::Errno::from_raw(e.raw_os_error().unwrap_or(0)),
-            )
-        })?;
-        Ok(OwnedFd::from(file))
-    }
-}
-
-fn enlarge_file(fd: BorrowedFd, size: u64) -> Result<(), Error> {
-    // Use posix_fallocate() to enlarge the file. It reserves the space correctly, so that
-    // we don't get a segfault later when trying to actually use it.
-    #[cfg(not(target_os = "macos"))]
-    {
-        nix::fcntl::posix_fallocate(fd, 0, size as i64).map_err(|e| {
-            Error::new(
-                "could not grow shmem segment, posix_fallocate failed: {e}",
-                e,
-            )
-        })
-    }
-    // As a fallback on macos, which doesn't have posix_fallocate, use plain 'fallocate'
-    #[cfg(target_os = "macos")]
-    {
-        nix::unistd::ftruncate(fd, size as i64)
-            .map_err(|e| Error::new("could not grow shmem segment, ftruncate failed: {e}", e))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use nix::unistd::ForkResult;
-    use std::ops::Range;
-
-    /// check that all bytes in given range have the expected value.
-    fn assert_range(ptr: *const u8, expected: u8, range: Range<usize>) {
-        for i in range {
-            let b = unsafe { *(ptr.add(i)) };
-            assert_eq!(expected, b, "unexpected byte at offset {}", i);
-        }
-    }
-
-    /// Write 'b' to all bytes in the given range
-    fn write_range(ptr: *mut u8, b: u8, range: Range<usize>) {
-        unsafe { std::ptr::write_bytes(ptr.add(range.start), b, range.end - range.start) };
-    }
-
-    // simple single-process test of growing and shrinking
-    #[test]
-    fn test_shmem_resize() -> Result<(), Error> {
-        let max_size = 1024 * 1024;
-        let init_struct = ShmemHandle::new("test_shmem_resize", 0, max_size)?;
-
-        assert_eq!(init_struct.current_size(), 0);
-
-        // Initial grow
-        let size1 = 10000;
-        init_struct.set_size(size1).unwrap();
-        assert_eq!(init_struct.current_size(), size1);
-
-        // Write some data
-        let data_ptr = init_struct.data_ptr.as_ptr();
-        write_range(data_ptr, 0xAA, 0..size1);
-        assert_range(data_ptr, 0xAA, 0..size1);
-
-        // Shrink
-        let size2 = 5000;
-        init_struct.set_size(size2).unwrap();
-        assert_eq!(init_struct.current_size(), size2);
-
-        // Grow again
-        let size3 = 20000;
-        init_struct.set_size(size3).unwrap();
-        assert_eq!(init_struct.current_size(), size3);
-
-        // Try to read it. The area that was shrunk and grown again should read as all zeros now
-        assert_range(data_ptr, 0xAA, 0..5000);
-        assert_range(data_ptr, 0, 5000..size1);
-
-        // Try to grow beyond max_size
-        //let size4 = max_size + 1;
-        //assert!(init_struct.set_size(size4).is_err());
-
-        // Dropping init_struct should unmap the memory
-        drop(init_struct);
-
-        Ok(())
-    }
-
-    /// This is used in tests to coordinate between test processes. It's like std::sync::Barrier,
-    /// but is stored in the shared memory area and works across processes. It's implemented by
-    /// polling, because e.g. standard rust mutexes are not guaranteed to work across processes.
-    struct SimpleBarrier {
-        num_procs: usize,
-        count: AtomicUsize,
-    }
-
-    impl SimpleBarrier {
-        unsafe fn init(ptr: *mut SimpleBarrier, num_procs: usize) {
-            unsafe {
-                *ptr = SimpleBarrier {
-                    num_procs,
-                    count: AtomicUsize::new(0),
-                }
-            }
-        }
-
-        pub fn wait(&self) {
-            let old = self.count.fetch_add(1, Ordering::Relaxed);
-
-            let generation = old / self.num_procs;
-
-            let mut current = old + 1;
-            while current < (generation + 1) * self.num_procs {
-                std::thread::sleep(std::time::Duration::from_millis(10));
-                current = self.count.load(Ordering::Relaxed);
-            }
-        }
-    }
-
-    #[test]
-    fn test_multi_process() {
-        // Initialize
-        let max_size = 1_000_000_000_000;
-        let init_struct = ShmemHandle::new("test_multi_process", 0, max_size).unwrap();
-        let ptr = init_struct.data_ptr.as_ptr();
-
-        // Store the SimpleBarrier in the first 1k of the area.
-        init_struct.set_size(10000).unwrap();
-        let barrier_ptr: *mut SimpleBarrier = unsafe {
-            ptr.add(ptr.align_offset(std::mem::align_of::<SimpleBarrier>()))
-                .cast()
-        };
-        unsafe { SimpleBarrier::init(barrier_ptr, 2) };
-        let barrier = unsafe { barrier_ptr.as_ref().unwrap() };
-
-        // Fork another test process. The code after this runs in both processes concurrently.
-        let fork_result = unsafe { nix::unistd::fork().unwrap() };
-
-        // In the parent, fill bytes between 1000..2000. In the child, between 2000..3000
-        if fork_result.is_parent() {
-            write_range(ptr, 0xAA, 1000..2000);
-        } else {
-            write_range(ptr, 0xBB, 2000..3000);
-        }
-        barrier.wait();
-        // Verify the contents. (in both processes)
-        assert_range(ptr, 0xAA, 1000..2000);
-        assert_range(ptr, 0xBB, 2000..3000);
-
-        // Grow, from the child this time
-        let size = 10_000_000;
-        if !fork_result.is_parent() {
-            init_struct.set_size(size).unwrap();
-        }
-        barrier.wait();
-
-        // make some writes at the end
-        if fork_result.is_parent() {
-            write_range(ptr, 0xAA, (size - 10)..size);
-        } else {
-            write_range(ptr, 0xBB, (size - 20)..(size - 10));
-        }
-        barrier.wait();
-
-        // Verify the contents. (This runs in both processes)
-        assert_range(ptr, 0, (size - 1000)..(size - 20));
-        assert_range(ptr, 0xBB, (size - 20)..(size - 10));
-        assert_range(ptr, 0xAA, (size - 10)..size);
-
-        if let ForkResult::Parent { child } = fork_result {
-            nix::sys::wait::waitpid(child, None).unwrap();
-        }
-    }
-}
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -235,7 +235,7 @@ pub enum PageServiceProtocolPipelinedBatchingStrategy {
    ScatteredLsn,
 }

-#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
+#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
 #[serde(tag = "mode", rename_all = "kebab-case")]
 pub enum GetVectoredConcurrentIo {
    /// The read path is fully sequential: layers are visited
@@ -491,8 +491,6 @@ pub struct TenantConfigToml {
    /// Tenant level performance sampling ratio override. Controls the ratio of get page requests
    /// that will get perf sampling for the tenant.
    pub sampling_ratio: Option<Ratio>,
-    /// Capacity of relsize snapshot cache (used by replicas).
-    pub relsize_snapshot_cache_capacity: usize,
 }

 pub mod defaults {
@@ -732,7 +730,6 @@ pub mod tenant_conf_defaults {
    pub const DEFAULT_GC_COMPACTION_VERIFICATION: bool = true;
    pub const DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB: u64 = 5 * 1024 * 1024; // 5GB
    pub const DEFAULT_GC_COMPACTION_RATIO_PERCENT: u64 = 100;
-    pub const DEFAULT_RELSIZE_SNAPSHOT_CACHE_CAPACITY: usize = 1000;
 }

 impl Default for TenantConfigToml {
@@ -790,7 +787,6 @@ impl Default for TenantConfigToml {
            gc_compaction_initial_threshold_kb: DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB,
            gc_compaction_ratio_percent: DEFAULT_GC_COMPACTION_RATIO_PERCENT,
            sampling_ratio: None,
-            relsize_snapshot_cache_capacity: DEFAULT_RELSIZE_SNAPSHOT_CACHE_CAPACITY,
        }
    }
 }
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -630,8 +630,6 @@ pub struct TenantConfigPatch {
    pub gc_compaction_ratio_percent: FieldPatch<u64>,
    #[serde(skip_serializing_if = "FieldPatch::is_noop")]
    pub sampling_ratio: FieldPatch<Option<Ratio>>,
-    #[serde(skip_serializing_if = "FieldPatch::is_noop")]
-    pub relsize_snapshot_cache_capacity: FieldPatch<usize>,
 }

 /// Like [`crate::config::TenantConfigToml`], but preserves the information
@@ -761,9 +759,6 @@ pub struct TenantConfig {

    #[serde(skip_serializing_if = "Option::is_none")]
    pub sampling_ratio: Option<Option<Ratio>>,
-
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub relsize_snapshot_cache_capacity: Option<usize>,
 }

 impl TenantConfig {
@@ -809,7 +804,6 @@ impl TenantConfig {
            mut gc_compaction_initial_threshold_kb,
            mut gc_compaction_ratio_percent,
            mut sampling_ratio,
-            mut relsize_snapshot_cache_capacity,
        } = self;

        patch.checkpoint_distance.apply(&mut checkpoint_distance);
@@ -911,9 +905,6 @@ impl TenantConfig {
            .gc_compaction_ratio_percent
            .apply(&mut gc_compaction_ratio_percent);
        patch.sampling_ratio.apply(&mut sampling_ratio);
-        patch
-            .relsize_snapshot_cache_capacity
-            .apply(&mut relsize_snapshot_cache_capacity);

        Ok(Self {
            checkpoint_distance,
@@ -953,7 +944,6 @@ impl TenantConfig {
            gc_compaction_initial_threshold_kb,
            gc_compaction_ratio_percent,
            sampling_ratio,
-            relsize_snapshot_cache_capacity,
        })
    }

@@ -1062,9 +1052,6 @@ impl TenantConfig {
                .gc_compaction_ratio_percent
                .unwrap_or(global_conf.gc_compaction_ratio_percent),
            sampling_ratio: self.sampling_ratio.unwrap_or(global_conf.sampling_ratio),
-            relsize_snapshot_cache_capacity: self
-                .relsize_snapshot_cache_capacity
-                .unwrap_or(global_conf.relsize_snapshot_cache_capacity),
        }
    }
 }
--- a/libs/proxy/tokio-postgres2/src/error/mod.rs
+++ b/libs/proxy/tokio-postgres2/src/error/mod.rs
@@ -86,27 +86,6 @@ pub struct DbError {
 }

 impl DbError {
-    pub fn new_test_error(code: SqlState, message: String) -> Self {
-        DbError {
-            severity: "ERROR".to_string(),
-            parsed_severity: Some(Severity::Error),
-            code,
-            message,
-            detail: None,
-            hint: None,
-            position: None,
-            where_: None,
-            schema: None,
-            table: None,
-            column: None,
-            datatype: None,
-            constraint: None,
-            file: None,
-            line: None,
-            routine: None,
-        }
-    }
-
    pub(crate) fn parse(fields: &mut ErrorFields<'_>) -> io::Result<DbError> {
        let mut severity = None;
        let mut parsed_severity = None;
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -30,7 +30,6 @@ crc32c.workspace = true
 either.workspace = true
 fail.workspace = true
 futures.workspace = true
-hashlink.workspace = true
 hex.workspace = true
 humantime.workspace = true
 humantime-serde.workspace = true
--- a/pageserver/page_api/Cargo.toml
+++ b/pageserver/page_api/Cargo.toml
@@ -1,13 +0,0 @@
-[package]
-name = "pageserver_page_api"
-version = "0.1.0"
-edition.workspace = true
-license.workspace = true
-
-[dependencies]
-prost.workspace = true
-tonic.workspace = true
-workspace_hack.workspace = true
-
-[build-dependencies]
-tonic-build.workspace = true
--- a/pageserver/page_api/build.rs
+++ b/pageserver/page_api/build.rs
@@ -1,13 +0,0 @@
-use std::env;
-use std::path::PathBuf;
-
-/// Generates Rust code from .proto Protobuf schemas, along with a binary file
-/// descriptor set for Protobuf schema reflection.
-fn main() -> Result<(), Box<dyn std::error::Error>> {
-    let out_dir = PathBuf::from(env::var("OUT_DIR")?);
-    tonic_build::configure()
-        .bytes(["."])
-        .file_descriptor_set_path(out_dir.join("page_api_descriptor.bin"))
-        .compile_protos(&["proto/page_service.proto"], &["proto"])
-        .map_err(|err| err.into())
-}
--- a/pageserver/page_api/proto/page_service.proto
+++ b/pageserver/page_api/proto/page_service.proto
@@ -1,233 +0,0 @@
-// Page service, presented by pageservers for computes.
-//
-// This is the compute read path. It primarily serves page versions at given
-// LSNs, but also base backups, SLRU segments, and relation metadata.
-//
-// EXPERIMENTAL: this is still under development and subject to change.
-//
-// Request metadata headers:
-// - authorization: JWT token ("Bearer <token>"), if auth is enabled
-// - neon-tenant-id: tenant ID ("7c4a1f9e3bd6470c8f3e21a65bd2e980")
-// - neon-shard-id: shard ID, as <number><count> in hex ("0b10" = shard 11 of 16, 0-based)
-// - neon-timeline-id: timeline ID ("f08c4e9a2d5f76b1e3a7c2d8910f4b3e")
-//
-// The service can be accessed via e.g. grpcurl:
-//
-//    ```
-//    grpcurl \
-//      -plaintext \
-//      -H "neon-tenant-id: 7c4a1f9e3bd6470c8f3e21a65bd2e980" \
-//      -H "neon-shard-id: 0b10" \
-//      -H "neon-timeline-id: f08c4e9a2d5f76b1e3a7c2d8910f4b3e" \
-//      -H "authorization: Bearer $JWT" \
-//      -d '{"read_lsn": {"request_lsn": 1234567890}, "rel": {"spc_oid": 1663, "db_oid": 1234, "rel_number": 5678, "fork_number": 0}}'
-//      localhost:51051 page_api.PageService/CheckRelExists
-//    ```
-//
-// TODO: consider adding neon-compute-mode ("primary", "static", "replica").
-// However, this will require reconnecting when changing modes.
-//
-// TODO: write implementation guidance on
-// - Health checks
-// - Tracing, OpenTelemetry
-// - Compression
-
-syntax = "proto3";
-package page_api;
-
-service PageService {
-  // Returns whether a relation exists.
-  rpc CheckRelExists(CheckRelExistsRequest) returns (CheckRelExistsResponse);
-
-  // Fetches a base backup.
-  rpc GetBaseBackup (GetBaseBackupRequest) returns (stream GetBaseBackupResponseChunk);
-
-  // Returns the total size of a database, as # of bytes.
-  rpc GetDbSize (GetDbSizeRequest) returns (GetDbSizeResponse);
-
-  // Fetches pages.
-  //
-  // This is implemented as a bidirectional streaming RPC for performance. Unary
-  // requests incur costs for e.g. HTTP/2 stream setup, header parsing,
-  // authentication, and so on -- with streaming, we only pay these costs during
-  // the initial stream setup. This ~doubles throughput in benchmarks. Other
-  // RPCs use regular unary requests, since they are not as frequent and
-  // performance-critical, and this simplifies implementation.
-  //
-  // NB: a status response (e.g. errors) will terminate the stream. The stream
-  // may be shared by e.g. multiple Postgres backends, so we should avoid this.
-  // Most errors are therefore sent as GetPageResponse.status instead.
-  rpc GetPages (stream GetPageRequest) returns (stream GetPageResponse);
-
-  // Returns the size of a relation, as # of blocks.
-  rpc GetRelSize (GetRelSizeRequest) returns (GetRelSizeResponse);
-
-  // Fetches an SLRU segment.
-  rpc GetSlruSegment (GetSlruSegmentRequest) returns (GetSlruSegmentResponse);
-}
-
-// The LSN a request should read at.
-message ReadLsn {
-  // The request's read LSN. Required.
-  uint64 request_lsn = 1;
-  // If given, the caller guarantees that the page has not been modified since
-  // this LSN. Must be smaller than or equal to request_lsn. This allows the
-  // Pageserver to serve an old page without waiting for the request LSN to
-  // arrive. Valid for all request types.
-  //
-  // It is undefined behaviour to make a request such that the page was, in
-  // fact, modified between request_lsn and not_modified_since_lsn. The
-  // Pageserver might detect it and return an error, or it might return the old
-  // page version or the new page version. Setting not_modified_since_lsn equal
-  // to request_lsn is always safe, but can lead to unnecessary waiting.
-  uint64 not_modified_since_lsn = 2;
-}
-
-// A relation identifier.
-message RelTag {
-    uint32 spc_oid = 1;
-    uint32 db_oid = 2;
-    uint32 rel_number = 3;
-    uint32 fork_number = 4;
-}
-
-// Checks whether a relation exists, at the given LSN. Only valid on shard 0,
-// other shards will error.
-message CheckRelExistsRequest {
-  ReadLsn read_lsn = 1;
-  RelTag rel = 2;
-}
-
-message CheckRelExistsResponse {
-  bool exists = 1;
-}
-
-// Requests a base backup at a given LSN.
-message GetBaseBackupRequest {
-  // The LSN to fetch a base backup at.
-  ReadLsn read_lsn = 1;
-  // If true, logical replication slots will not be created.
-  bool replica = 2;
-}
-
-// Base backup response chunk, returned as an ordered stream.
-message GetBaseBackupResponseChunk {
-  // A basebackup data chunk. The size is undefined, but bounded by the 4 MB
-  // gRPC message size limit.
-  bytes chunk = 1;
-}
-
-// Requests the size of a database, as # of bytes. Only valid on shard 0, other
-// shards will error.
-message GetDbSizeRequest {
-  ReadLsn read_lsn = 1;
-  uint32 db_oid = 2;
-}
-
-message GetDbSizeResponse {
-  uint64 num_bytes = 1;
-}
-
-// Requests one or more pages.
-message GetPageRequest {
-  // A request ID. Will be included in the response. Should be unique for
-  // in-flight requests on the stream.
-  uint64 request_id = 1;
-  // The request class.
-  GetPageClass request_class = 2;
-  // The LSN to read at.
-  ReadLsn read_lsn = 3;
-  // The relation to read from.
-  RelTag rel = 4;
-  // Page numbers to read. Must belong to the remote shard.
-  //
-  // Multiple pages will be executed as a single batch by the Pageserver,
-  // amortizing layer access costs and parallelizing them. This may increase the
-  // latency of any individual request, but improves the overall latency and
-  // throughput of the batch as a whole.
-  //
-  // TODO: this causes an allocation in the common single-block case. The sender
-  // can use a SmallVec to stack-allocate it, but Prost will always deserialize
-  // into a heap-allocated Vec. Consider optimizing this.
-  //
-  // TODO: we might be able to avoid a sort or something if we mandate that these
-  // are always in order. But we can't currenly rely on this on the server, because
-  // of compatibility with the libpq protocol handler.
-  repeated uint32 block_number = 5;
-}
-
-// A GetPageRequest class. Primarily intended for observability, but may also be
-// used for prioritization in the future.
-enum GetPageClass {
-  // Unknown class. For forwards compatibility: used when the client sends a
-  // class that the server doesn't know about.
-  GET_PAGE_CLASS_UNKNOWN = 0;
-  // A normal request. This is the default.
-  GET_PAGE_CLASS_NORMAL = 1;
-  // A prefetch request. NB: can only be classified on pg < 18.
-  GET_PAGE_CLASS_PREFETCH = 2;
-  // A background request (e.g. vacuum).
-  GET_PAGE_CLASS_BACKGROUND = 3;
-}
-
-// A GetPage response.
-//
-// A batch response will contain all of the requested pages. We could eagerly
-// emit individual pages as soon as they are ready, but on a readv() Postgres
-// holds buffer pool locks on all pages in the batch and we'll only return once
-// the entire batch is ready, so no one can make use of the individual pages.
-message GetPageResponse {
-  // The original request's ID.
-  uint64 request_id = 1;
-  // The response status code.
-  GetPageStatus status = 2;
-  // A string describing the status, if any.
-  string reason = 3;
-  // The 8KB page images, in the same order as the request. Empty if status != OK.
-  repeated bytes page_image = 4;
-}
-
-// A GetPageResponse status code. Since we use a bidirectional stream, we don't
-// want to send errors as gRPC statuses, since this would terminate the stream.
-enum GetPageStatus {
-  // Unknown status. For forwards compatibility: used when the server sends a
-  // status code that the client doesn't know about.
-  GET_PAGE_STATUS_UNKNOWN = 0;
-  // The request was successful.
-  GET_PAGE_STATUS_OK = 1;
-  // The page did not exist. The tenant/timeline/shard has already been
-  // validated during stream setup.
-  GET_PAGE_STATUS_NOT_FOUND = 2;
-  // The request was invalid.
-  GET_PAGE_STATUS_INVALID = 3;
-  // The tenant is rate limited. Slow down and retry later.
-  GET_PAGE_STATUS_SLOW_DOWN = 4;
-  // TODO: consider adding a GET_PAGE_STATUS_LAYER_DOWNLOAD in the case of a
-  // layer download. This could free up the server task to process other
-  // requests while the layer download is in progress.
-}
-
-// Fetches the size of a relation at a given LSN, as # of blocks. Only valid on
-// shard 0, other shards will error.
-message GetRelSizeRequest {
-  ReadLsn read_lsn = 1;
-  RelTag rel = 2;
-}
-
-message GetRelSizeResponse {
-  uint32 num_blocks = 1;
-}
-
-// Requests an SLRU segment. Only valid on shard 0, other shards will error.
-message GetSlruSegmentRequest {
-  ReadLsn read_lsn = 1;
-  uint32 kind = 2;
-  uint32 segno = 3;
-}
-
-// Returns an SLRU segment.
-//
-// These are up 32 pages (256 KB), so we can send them as a single response.
-message GetSlruSegmentResponse {
-  bytes segment = 1;
-}
--- a/pageserver/page_api/src/lib.rs
+++ b/pageserver/page_api/src/lib.rs
@@ -1,19 +0,0 @@
-//! This crate provides the Pageserver's page API. It contains:
-//!
-//! * proto/page_service.proto: the Protobuf schema for the page API.
-//! * proto: auto-generated Protobuf types for gRPC.
-//!
-//! This crate is used by both the client and the server. Try to keep it slim.
-
-// Code generated by protobuf.
-pub mod proto {
-    tonic::include_proto!("page_api");
-
-    /// File descriptor set for Protobuf schema reflection. This allows using
-    /// e.g. grpcurl with the API.
-    pub const FILE_DESCRIPTOR_SET: &[u8] =
-        tonic::include_file_descriptor_set!("page_api_descriptor");
-
-    pub use page_service_client::PageServiceClient;
-    pub use page_service_server::{PageService, PageServiceServer};
-}
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -144,7 +144,7 @@ where
        replica,
        ctx,
        io_concurrency: IoConcurrency::spawn_from_conf(
-            timeline.conf.get_vectored_concurrent_io,
+            timeline.conf,
            timeline
                .gate
                .enter()
@@ -343,7 +343,7 @@ where
            // Gather non-relational files from object storage pages.
            let slru_partitions = self
                .timeline
-                .get_slru_keyspace(Version::at(self.lsn), self.ctx)
+                .get_slru_keyspace(Version::Lsn(self.lsn), self.ctx)
                .await?
                .partition(
                    self.timeline.get_shard_identity(),
@@ -378,7 +378,7 @@ where
            // Otherwise only include init forks of unlogged relations.
            let rels = self
                .timeline
-                .list_rels(spcnode, dbnode, Version::at(self.lsn), self.ctx)
+                .list_rels(spcnode, dbnode, Version::Lsn(self.lsn), self.ctx)
                .await?;
            for &rel in rels.iter() {
                // Send init fork as main fork to provide well formed empty
@@ -517,7 +517,7 @@ where
    async fn add_rel(&mut self, src: RelTag, dst: RelTag) -> Result<(), BasebackupError> {
        let nblocks = self
            .timeline
-            .get_rel_size(src, Version::at(self.lsn), self.ctx)
+            .get_rel_size(src, Version::Lsn(self.lsn), self.ctx)
            .await?;

        // If the relation is empty, create an empty file
@@ -577,7 +577,7 @@ where
        let relmap_img = if has_relmap_file {
            let img = self
                .timeline
-                .get_relmap_file(spcnode, dbnode, Version::at(self.lsn), self.ctx)
+                .get_relmap_file(spcnode, dbnode, Version::Lsn(self.lsn), self.ctx)
                .await?;

            if img.len()
@@ -631,7 +631,7 @@ where
            if !has_relmap_file
                && self
                    .timeline
-                    .list_rels(spcnode, dbnode, Version::at(self.lsn), self.ctx)
+                    .list_rels(spcnode, dbnode, Version::Lsn(self.lsn), self.ctx)
                    .await?
                    .is_empty()
            {
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -544,23 +544,6 @@ impl PageServerConf {
                    ratio.numerator, ratio.denominator
                )
            );
-
-            let url = Url::parse(&tracing_config.export_config.endpoint)
-                .map_err(anyhow::Error::msg)
-                .with_context(|| {
-                    format!(
-                        "tracing endpoint URL is invalid : {}",
-                        tracing_config.export_config.endpoint
-                    )
-                })?;
-
-            ensure!(
-                url.scheme() == "http" || url.scheme() == "https",
-                format!(
-                    "tracing endpoint URL must start with http:// or https://: {}",
-                    tracing_config.export_config.endpoint
-                )
-            );
        }

        IndexEntry::validate_checkpoint_distance(conf.default_tenant_conf.checkpoint_distance)
@@ -677,25 +660,4 @@ mod tests {
        PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir)
            .expect("parse_and_validate");
    }
-
-    #[test]
-    fn test_config_tracing_endpoint_is_invalid() {
-        let input = r#"
-            control_plane_api = "http://localhost:6666"
-
-            [tracing]
-
-            sampling_ratio = { numerator = 1, denominator = 0 }
-
-            [tracing.export_config]
-            endpoint = "localhost:4317"
-            protocol = "http-binary"
-            timeout = "1ms"
-        "#;
-        let config_toml = toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(input)
-            .expect("config has valid fields");
-        let workdir = Utf8PathBuf::from("/nonexistent");
-        PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir)
-            .expect_err("parse_and_validate should fail for endpoint without scheme");
-    }
 }
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -449,7 +449,7 @@ async fn build_timeline_info_common(
    // Internally we distinguish between the planned GC cutoff (PITR point) and the "applied" GC cutoff (where we
    // actually trimmed data to), which can pass each other when PITR is changed.
    let min_readable_lsn = std::cmp::max(
-        timeline.get_gc_cutoff_lsn().unwrap_or_default(),
+        timeline.get_gc_cutoff_lsn(),
        *timeline.get_applied_gc_cutoff_lsn(),
    );

@@ -3199,7 +3199,7 @@ async fn list_aux_files(
            .await?;

    let io_concurrency = IoConcurrency::spawn_from_conf(
-        state.conf.get_vectored_concurrent_io,
+        state.conf,
        timeline.gate.enter().map_err(|_| ApiError::Cancelled)?,
    );

--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -843,50 +843,23 @@ pub(crate) static COMPRESSION_IMAGE_OUTPUT_BYTES: Lazy<IntCounter> = Lazy::new(|
    .expect("failed to define a metric")
 });

-pub(crate) static RELSIZE_LATEST_CACHE_ENTRIES: Lazy<UIntGauge> = Lazy::new(|| {
+pub(crate) static RELSIZE_CACHE_ENTRIES: Lazy<UIntGauge> = Lazy::new(|| {
    register_uint_gauge!(
-        "pageserver_relsize_latest_cache_entries",
-        "Number of entries in the latest relation size cache",
+        "pageserver_relsize_cache_entries",
+        "Number of entries in the relation size cache",
    )
    .expect("failed to define a metric")
 });

-pub(crate) static RELSIZE_LATEST_CACHE_HITS: Lazy<IntCounter> = Lazy::new(|| {
+pub(crate) static RELSIZE_CACHE_HITS: Lazy<IntCounter> = Lazy::new(|| {
+    register_int_counter!("pageserver_relsize_cache_hits", "Relation size cache hits",)
+        .expect("failed to define a metric")
+});
+
+pub(crate) static RELSIZE_CACHE_MISSES: Lazy<IntCounter> = Lazy::new(|| {
    register_int_counter!(
-        "pageserver_relsize_latest_cache_hits",
-        "Latest relation size cache hits",
-    )
-    .expect("failed to define a metric")
-});
-
-pub(crate) static RELSIZE_LATEST_CACHE_MISSES: Lazy<IntCounter> = Lazy::new(|| {
-    register_int_counter!(
-        "pageserver_relsize_latest_cache_misses",
-        "Relation size latest cache misses",
-    )
-    .expect("failed to define a metric")
-});
-
-pub(crate) static RELSIZE_SNAPSHOT_CACHE_ENTRIES: Lazy<UIntGauge> = Lazy::new(|| {
-    register_uint_gauge!(
-        "pageserver_relsize_snapshot_cache_entries",
-        "Number of entries in the pitr relation size cache",
-    )
-    .expect("failed to define a metric")
-});
-
-pub(crate) static RELSIZE_SNAPSHOT_CACHE_HITS: Lazy<IntCounter> = Lazy::new(|| {
-    register_int_counter!(
-        "pageserver_relsize_snapshot_cache_hits",
-        "Pitr relation size cache hits",
-    )
-    .expect("failed to define a metric")
-});
-
-pub(crate) static RELSIZE_SNAPSHOT_CACHE_MISSES: Lazy<IntCounter> = Lazy::new(|| {
-    register_int_counter!(
-        "pageserver_relsize_snapshot_cache_misses",
-        "Relation size snapshot cache misses",
+        "pageserver_relsize_cache_misses",
+        "Relation size cache misses",
    )
    .expect("failed to define a metric")
 });
@@ -1066,15 +1039,6 @@ pub(crate) static TENANT_SYNTHETIC_SIZE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|
    .expect("Failed to register pageserver_tenant_synthetic_cached_size_bytes metric")
 });

-pub(crate) static TENANT_OFFLOADED_TIMELINES: Lazy<UIntGaugeVec> = Lazy::new(|| {
-    register_uint_gauge_vec!(
-        "pageserver_tenant_offloaded_timelines",
-        "Number of offloaded timelines of a tenant",
-        &["tenant_id", "shard_id"]
-    )
-    .expect("Failed to register pageserver_tenant_offloaded_timelines metric")
-});
-
 pub(crate) static EVICTION_ITERATION_DURATION: Lazy<HistogramVec> = Lazy::new(|| {
    register_histogram_vec!(
        "pageserver_eviction_iteration_duration_seconds_global",
@@ -3560,14 +3524,11 @@ impl TimelineMetrics {
 }

 pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {
-    let tid = tenant_shard_id.tenant_id.to_string();
-    let shard_id = tenant_shard_id.shard_slug().to_string();
-
    // Only shard zero deals in synthetic sizes
    if tenant_shard_id.is_shard_zero() {
+        let tid = tenant_shard_id.tenant_id.to_string();
        let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]);
    }
-    let _ = TENANT_OFFLOADED_TIMELINES.remove_label_values(&[&tid, &shard_id]);

    tenant_throttling::remove_tenant_metrics(tenant_shard_id);

--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -18,7 +18,7 @@ use itertools::Itertools;
 use jsonwebtoken::TokenData;
 use once_cell::sync::OnceCell;
 use pageserver_api::config::{
-    GetVectoredConcurrentIo, PageServicePipeliningConfig, PageServicePipeliningConfigPipelined,
+    PageServicePipeliningConfig, PageServicePipeliningConfigPipelined,
    PageServiceProtocolPipelinedBatchingStrategy, PageServiceProtocolPipelinedExecutionStrategy,
 };
 use pageserver_api::key::rel_block_to_key;
@@ -62,7 +62,7 @@ use crate::metrics::{
    self, COMPUTE_COMMANDS_COUNTERS, ComputeCommandKind, GetPageBatchBreakReason, LIVE_CONNECTIONS,
    SmgrOpTimer, TimelineMetrics,
 };
-use crate::pgdatadir_mapping::{LsnRange, Version};
+use crate::pgdatadir_mapping::Version;
 use crate::span::{
    debug_assert_current_span_has_tenant_and_timeline_id,
    debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id,
@@ -331,10 +331,10 @@ async fn page_service_conn_main(
    // But it's in a shared crate, so, we store connection_ctx inside PageServerHandler
    // and create the per-query context in process_query ourselves.
    let mut conn_handler = PageServerHandler::new(
+        conf,
        tenant_manager,
        auth,
        pipelining_config,
-        conf.get_vectored_concurrent_io,
        perf_span_fields,
        connection_ctx,
        cancel.clone(),
@@ -371,6 +371,7 @@ async fn page_service_conn_main(
 }

 struct PageServerHandler {
+    conf: &'static PageServerConf,
    auth: Option<Arc<SwappableJwtAuth>>,
    claims: Option<Claims>,

@@ -388,7 +389,6 @@ struct PageServerHandler {
    timeline_handles: Option<TimelineHandles>,

    pipelining_config: PageServicePipeliningConfig,
-    get_vectored_concurrent_io: GetVectoredConcurrentIo,

    gate_guard: GateGuard,
 }
@@ -642,7 +642,7 @@ impl std::fmt::Display for BatchedPageStreamError {
 struct BatchedGetPageRequest {
    req: PagestreamGetPageRequest,
    timer: SmgrOpTimer,
-    lsn_range: LsnRange,
+    effective_request_lsn: Lsn,
    ctx: RequestContext,
 }

@@ -764,12 +764,12 @@ impl BatchedFeMessage {
                match batching_strategy {
                    PageServiceProtocolPipelinedBatchingStrategy::UniformLsn => {
                        if let Some(last_in_batch) = accum_pages.last() {
-                            if last_in_batch.lsn_range.effective_lsn
-                                != this_pages[0].lsn_range.effective_lsn
+                            if last_in_batch.effective_request_lsn
+                                != this_pages[0].effective_request_lsn
                            {
                                trace!(
-                                    accum_lsn = %last_in_batch.lsn_range.effective_lsn,
-                                    this_lsn = %this_pages[0].lsn_range.effective_lsn,
+                                    accum_lsn = %last_in_batch.effective_request_lsn,
+                                    this_lsn = %this_pages[0].effective_request_lsn,
                                    "stopping batching because LSN changed"
                                );

@@ -784,15 +784,15 @@ impl BatchedFeMessage {
                        let same_page_different_lsn = accum_pages.iter().any(|batched| {
                            batched.req.rel == this_pages[0].req.rel
                                && batched.req.blkno == this_pages[0].req.blkno
-                                && batched.lsn_range.effective_lsn
-                                    != this_pages[0].lsn_range.effective_lsn
+                                && batched.effective_request_lsn
+                                    != this_pages[0].effective_request_lsn
                        });

                        if same_page_different_lsn {
                            trace!(
                                rel=%this_pages[0].req.rel,
                                blkno=%this_pages[0].req.blkno,
-                                lsn=%this_pages[0].lsn_range.effective_lsn,
+                                lsn=%this_pages[0].effective_request_lsn,
                                "stopping batching because same page was requested at different LSNs"
                            );

@@ -844,16 +844,17 @@ impl BatchedFeMessage {
 impl PageServerHandler {
    #[allow(clippy::too_many_arguments)]
    pub fn new(
+        conf: &'static PageServerConf,
        tenant_manager: Arc<TenantManager>,
        auth: Option<Arc<SwappableJwtAuth>>,
        pipelining_config: PageServicePipeliningConfig,
-        get_vectored_concurrent_io: GetVectoredConcurrentIo,
        perf_span_fields: ConnectionPerfSpanFields,
        connection_ctx: RequestContext,
        cancel: CancellationToken,
        gate_guard: GateGuard,
    ) -> Self {
        PageServerHandler {
+            conf,
            auth,
            claims: None,
            connection_ctx,
@@ -861,7 +862,6 @@ impl PageServerHandler {
            timeline_handles: Some(TimelineHandles::new(tenant_manager)),
            cancel,
            pipelining_config,
-            get_vectored_concurrent_io,
            gate_guard,
        }
    }
@@ -1158,7 +1158,7 @@ impl PageServerHandler {
                .await?;

                // We're holding the Handle
-                let effective_lsn = match Self::effective_request_lsn(
+                let effective_request_lsn = match Self::effective_request_lsn(
                    &shard,
                    shard.get_last_record_lsn(),
                    req.hdr.request_lsn,
@@ -1177,10 +1177,7 @@ impl PageServerHandler {
                    pages: smallvec::smallvec![BatchedGetPageRequest {
                        req,
                        timer,
-                        lsn_range: LsnRange {
-                            effective_lsn,
-                            request_lsn: req.hdr.request_lsn
-                        },
+                        effective_request_lsn,
                        ctx,
                    }],
                    // The executor grabs the batch when it becomes idle.
@@ -1281,7 +1278,7 @@ impl PageServerHandler {
    }

    #[instrument(level = tracing::Level::DEBUG, skip_all)]
-    async fn pagestream_handle_batched_message<IO>(
+    async fn pagesteam_handle_batched_message<IO>(
        &mut self,
        pgb_writer: &mut PostgresBackend<IO>,
        batch: BatchedFeMessage,
@@ -1626,7 +1623,7 @@ impl PageServerHandler {
        }

        let io_concurrency = IoConcurrency::spawn_from_conf(
-            self.get_vectored_concurrent_io,
+            self.conf,
            match self.gate_guard.try_clone() {
                Ok(guard) => guard,
                Err(_) => {
@@ -1736,7 +1733,7 @@ impl PageServerHandler {
            };

            let result = self
-                .pagestream_handle_batched_message(
+                .pagesteam_handle_batched_message(
                    pgb_writer,
                    msg,
                    io_concurrency.clone(),
@@ -1912,7 +1909,7 @@ impl PageServerHandler {
                            return Err(e);
                        }
                    };
-                    self.pagestream_handle_batched_message(
+                    self.pagesteam_handle_batched_message(
                        pgb_writer,
                        batch,
                        io_concurrency.clone(),
@@ -2130,14 +2127,7 @@ impl PageServerHandler {
        .await?;

        let exists = timeline
-            .get_rel_exists(
-                req.rel,
-                Version::LsnRange(LsnRange {
-                    effective_lsn: lsn,
-                    request_lsn: req.hdr.request_lsn,
-                }),
-                ctx,
-            )
+            .get_rel_exists(req.rel, Version::Lsn(lsn), ctx)
            .await?;

        Ok(PagestreamBeMessage::Exists(PagestreamExistsResponse {
@@ -2164,14 +2154,7 @@ impl PageServerHandler {
        .await?;

        let n_blocks = timeline
-            .get_rel_size(
-                req.rel,
-                Version::LsnRange(LsnRange {
-                    effective_lsn: lsn,
-                    request_lsn: req.hdr.request_lsn,
-                }),
-                ctx,
-            )
+            .get_rel_size(req.rel, Version::Lsn(lsn), ctx)
            .await?;

        Ok(PagestreamBeMessage::Nblocks(PagestreamNblocksResponse {
@@ -2198,15 +2181,7 @@ impl PageServerHandler {
        .await?;

        let total_blocks = timeline
-            .get_db_size(
-                DEFAULTTABLESPACE_OID,
-                req.dbnode,
-                Version::LsnRange(LsnRange {
-                    effective_lsn: lsn,
-                    request_lsn: req.hdr.request_lsn,
-                }),
-                ctx,
-            )
+            .get_db_size(DEFAULTTABLESPACE_OID, req.dbnode, Version::Lsn(lsn), ctx)
            .await?;
        let db_size = total_blocks as i64 * BLCKSZ as i64;

@@ -2239,7 +2214,7 @@ impl PageServerHandler {
                // Ignore error (trace buffer may be full or tracer may have disconnected).
                _ = page_trace.try_send(PageTraceEvent {
                    key,
-                    effective_lsn: batch.lsn_range.effective_lsn,
+                    effective_lsn: batch.effective_request_lsn,
                    time,
                });
            }
@@ -2254,7 +2229,7 @@ impl PageServerHandler {
                    perf_instrument = true;
                }

-                req.lsn_range.effective_lsn
+                req.effective_request_lsn
            })
            .max()
            .expect("batch is never empty");
@@ -2308,7 +2283,7 @@ impl PageServerHandler {
                    (
                        &p.req.rel,
                        &p.req.blkno,
-                        p.lsn_range,
+                        p.effective_request_lsn,
                        p.ctx.attached_child(),
                    )
                }),
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -43,9 +43,7 @@ use crate::aux_file;
 use crate::context::{PerfInstrumentFutureExt, RequestContext, RequestContextBuilder};
 use crate::keyspace::{KeySpace, KeySpaceAccum};
 use crate::metrics::{
-    RELSIZE_CACHE_MISSES_OLD, RELSIZE_LATEST_CACHE_ENTRIES, RELSIZE_LATEST_CACHE_HITS,
-    RELSIZE_LATEST_CACHE_MISSES, RELSIZE_SNAPSHOT_CACHE_ENTRIES, RELSIZE_SNAPSHOT_CACHE_HITS,
-    RELSIZE_SNAPSHOT_CACHE_MISSES,
+    RELSIZE_CACHE_ENTRIES, RELSIZE_CACHE_HITS, RELSIZE_CACHE_MISSES, RELSIZE_CACHE_MISSES_OLD,
 };
 use crate::span::{
    debug_assert_current_span_has_tenant_and_timeline_id,
@@ -92,28 +90,6 @@ pub enum LsnForTimestamp {
    NoData(Lsn),
 }

-/// Each request to page server contains LSN range: `not_modified_since..request_lsn`.
-/// See comments libs/pageserver_api/src/models.rs.
-/// Based on this range and `last_record_lsn` PS calculates `effective_lsn`.
-/// But to distinguish requests from primary and replicas we need also to pass `request_lsn`.
-#[derive(Debug, Clone, Copy, Default)]
-pub struct LsnRange {
-    pub effective_lsn: Lsn,
-    pub request_lsn: Lsn,
-}
-
-impl LsnRange {
-    pub fn at(lsn: Lsn) -> LsnRange {
-        LsnRange {
-            effective_lsn: lsn,
-            request_lsn: lsn,
-        }
-    }
-    pub fn is_latest(&self) -> bool {
-        self.request_lsn == Lsn::MAX
-    }
-}
-
 #[derive(Debug, thiserror::Error)]
 pub(crate) enum CalculateLogicalSizeError {
    #[error("cancelled")]
@@ -226,13 +202,13 @@ impl Timeline {
        io_concurrency: IoConcurrency,
    ) -> Result<Bytes, PageReconstructError> {
        match version {
-            Version::LsnRange(lsns) => {
+            Version::Lsn(effective_lsn) => {
                let pages: smallvec::SmallVec<[_; 1]> = smallvec::smallvec![(tag, blknum)];
                let res = self
                    .get_rel_page_at_lsn_batched(
-                        pages
-                            .iter()
-                            .map(|(tag, blknum)| (tag, blknum, lsns, ctx.attached_child())),
+                        pages.iter().map(|(tag, blknum)| {
+                            (tag, blknum, effective_lsn, ctx.attached_child())
+                        }),
                        io_concurrency.clone(),
                        ctx,
                    )
@@ -270,7 +246,7 @@ impl Timeline {
    /// The ordering of the returned vec corresponds to the ordering of `pages`.
    pub(crate) async fn get_rel_page_at_lsn_batched(
        &self,
-        pages: impl ExactSizeIterator<Item = (&RelTag, &BlockNumber, LsnRange, RequestContext)>,
+        pages: impl ExactSizeIterator<Item = (&RelTag, &BlockNumber, Lsn, RequestContext)>,
        io_concurrency: IoConcurrency,
        ctx: &RequestContext,
    ) -> Vec<Result<Bytes, PageReconstructError>> {
@@ -289,7 +265,7 @@ impl Timeline {
        let mut req_keyspaces: HashMap<Lsn, KeySpaceRandomAccum> =
            HashMap::with_capacity(pages.len());

-        for (response_slot_idx, (tag, blknum, lsns, ctx)) in pages.enumerate() {
+        for (response_slot_idx, (tag, blknum, lsn, ctx)) in pages.enumerate() {
            if tag.relnode == 0 {
                result_slots[response_slot_idx].write(Err(PageReconstructError::Other(
                    RelationError::InvalidRelnode.into(),
@@ -298,7 +274,7 @@ impl Timeline {
                slots_filled += 1;
                continue;
            }
-            let lsn = lsns.effective_lsn;
+
            let nblocks = {
                let ctx = RequestContextBuilder::from(&ctx)
                    .perf_span(|crnt_perf_span| {
@@ -313,7 +289,7 @@ impl Timeline {
                    .attached_child();

                match self
-                    .get_rel_size(*tag, Version::LsnRange(lsns), &ctx)
+                    .get_rel_size(*tag, Version::Lsn(lsn), &ctx)
                    .maybe_perf_instrument(&ctx, |crnt_perf_span| crnt_perf_span.clone())
                    .await
                {
@@ -494,7 +470,7 @@ impl Timeline {
            ));
        }

-        if let Some(nblocks) = self.get_cached_rel_size(&tag, version) {
+        if let Some(nblocks) = self.get_cached_rel_size(&tag, version.get_lsn()) {
            return Ok(nblocks);
        }

@@ -512,7 +488,7 @@ impl Timeline {
        let mut buf = version.get(self, key, ctx).await?;
        let nblocks = buf.get_u32_le();

-        self.update_cached_rel_size(tag, version, nblocks);
+        self.update_cached_rel_size(tag, version.get_lsn(), nblocks);

        Ok(nblocks)
    }
@@ -534,7 +510,7 @@ impl Timeline {
        }

        // first try to lookup relation in cache
-        if let Some(_nblocks) = self.get_cached_rel_size(&tag, version) {
+        if let Some(_nblocks) = self.get_cached_rel_size(&tag, version.get_lsn()) {
            return Ok(true);
        }
        // then check if the database was already initialized.
@@ -610,7 +586,7 @@ impl Timeline {
        // scan directory listing (new), merge with the old results
        let key_range = rel_tag_sparse_key_range(spcnode, dbnode);
        let io_concurrency = IoConcurrency::spawn_from_conf(
-            self.conf.get_vectored_concurrent_io,
+            self.conf,
            self.gate
                .enter()
                .map_err(|_| PageReconstructError::Cancelled)?,
@@ -656,7 +632,7 @@ impl Timeline {
    ) -> Result<Bytes, PageReconstructError> {
        assert!(self.tenant_shard_id.is_shard_zero());
        let n_blocks = self
-            .get_slru_segment_size(kind, segno, Version::at(lsn), ctx)
+            .get_slru_segment_size(kind, segno, Version::Lsn(lsn), ctx)
            .await?;

        let keyspace = KeySpace::single(
@@ -669,7 +645,7 @@ impl Timeline {
        );

        let io_concurrency = IoConcurrency::spawn_from_conf(
-            self.conf.get_vectored_concurrent_io,
+            self.conf,
            self.gate
                .enter()
                .map_err(|_| PageReconstructError::Cancelled)?,
@@ -891,11 +867,11 @@ impl Timeline {
        mut f: impl FnMut(TimestampTz) -> ControlFlow<T>,
    ) -> Result<T, PageReconstructError> {
        for segno in self
-            .list_slru_segments(SlruKind::Clog, Version::at(probe_lsn), ctx)
+            .list_slru_segments(SlruKind::Clog, Version::Lsn(probe_lsn), ctx)
            .await?
        {
            let nblocks = self
-                .get_slru_segment_size(SlruKind::Clog, segno, Version::at(probe_lsn), ctx)
+                .get_slru_segment_size(SlruKind::Clog, segno, Version::Lsn(probe_lsn), ctx)
                .await?;

            let keyspace = KeySpace::single(
@@ -909,7 +885,7 @@ impl Timeline {
            );

            let io_concurrency = IoConcurrency::spawn_from_conf(
-                self.conf.get_vectored_concurrent_io,
+                self.conf,
                self.gate
                    .enter()
                    .map_err(|_| PageReconstructError::Cancelled)?,
@@ -1161,7 +1137,7 @@ impl Timeline {
        let mut total_size: u64 = 0;
        for (spcnode, dbnode) in dbdir.dbdirs.keys() {
            for rel in self
-                .list_rels(*spcnode, *dbnode, Version::at(lsn), ctx)
+                .list_rels(*spcnode, *dbnode, Version::Lsn(lsn), ctx)
                .await?
            {
                if self.cancel.is_cancelled() {
@@ -1236,7 +1212,7 @@ impl Timeline {
            result.add_key(rel_dir_to_key(spcnode, dbnode));

            let mut rels: Vec<RelTag> = self
-                .list_rels(spcnode, dbnode, Version::at(lsn), ctx)
+                .list_rels(spcnode, dbnode, Version::Lsn(lsn), ctx)
                .await?
                .into_iter()
                .collect();
@@ -1353,75 +1329,59 @@ impl Timeline {
        Ok((dense_keyspace, sparse_keyspace))
    }

-    /// Get cached size of relation. There are two caches: one for primary updates, it captures the latest state of
-    /// of the timeline and snapshot cache, which key includes LSN and so can be used by replicas to get relation size
-    /// at the particular LSN (snapshot).
-    pub fn get_cached_rel_size(&self, tag: &RelTag, version: Version<'_>) -> Option<BlockNumber> {
-        let lsn = version.get_lsn();
-        {
-            let rel_size_cache = self.rel_size_latest_cache.read().unwrap();
-            if let Some((cached_lsn, nblocks)) = rel_size_cache.get(tag) {
-                if lsn >= *cached_lsn {
-                    RELSIZE_LATEST_CACHE_HITS.inc();
-                    return Some(*nblocks);
-                }
-                RELSIZE_CACHE_MISSES_OLD.inc();
+    /// Get cached size of relation if it not updated after specified LSN
+    pub fn get_cached_rel_size(&self, tag: &RelTag, lsn: Lsn) -> Option<BlockNumber> {
+        let rel_size_cache = self.rel_size_cache.read().unwrap();
+        if let Some((cached_lsn, nblocks)) = rel_size_cache.map.get(tag) {
+            if lsn >= *cached_lsn {
+                RELSIZE_CACHE_HITS.inc();
+                return Some(*nblocks);
            }
+            RELSIZE_CACHE_MISSES_OLD.inc();
        }
-        {
-            let mut rel_size_cache = self.rel_size_snapshot_cache.lock().unwrap();
-            if let Some(nblock) = rel_size_cache.get(&(lsn, *tag)) {
-                RELSIZE_SNAPSHOT_CACHE_HITS.inc();
-                return Some(*nblock);
-            }
-        }
-        if version.is_latest() {
-            RELSIZE_LATEST_CACHE_MISSES.inc();
-        } else {
-            RELSIZE_SNAPSHOT_CACHE_MISSES.inc();
-        }
+        RELSIZE_CACHE_MISSES.inc();
        None
    }

    /// Update cached relation size if there is no more recent update
-    pub fn update_cached_rel_size(&self, tag: RelTag, version: Version<'_>, nblocks: BlockNumber) {
-        let lsn = version.get_lsn();
-        if version.is_latest() {
-            let mut rel_size_cache = self.rel_size_latest_cache.write().unwrap();
-            match rel_size_cache.entry(tag) {
-                hash_map::Entry::Occupied(mut entry) => {
-                    let cached_lsn = entry.get_mut();
-                    if lsn >= cached_lsn.0 {
-                        *cached_lsn = (lsn, nblocks);
-                    }
-                }
-                hash_map::Entry::Vacant(entry) => {
-                    entry.insert((lsn, nblocks));
-                    RELSIZE_LATEST_CACHE_ENTRIES.inc();
+    pub fn update_cached_rel_size(&self, tag: RelTag, lsn: Lsn, nblocks: BlockNumber) {
+        let mut rel_size_cache = self.rel_size_cache.write().unwrap();
+
+        if lsn < rel_size_cache.complete_as_of {
+            // Do not cache old values. It's safe to cache the size on read, as long as
+            // the read was at an LSN since we started the WAL ingestion. Reasoning: we
+            // never evict values from the cache, so if the relation size changed after
+            // 'lsn', the new value is already in the cache.
+            return;
+        }
+
+        match rel_size_cache.map.entry(tag) {
+            hash_map::Entry::Occupied(mut entry) => {
+                let cached_lsn = entry.get_mut();
+                if lsn >= cached_lsn.0 {
+                    *cached_lsn = (lsn, nblocks);
                }
            }
-        } else {
-            let mut rel_size_cache = self.rel_size_snapshot_cache.lock().unwrap();
-            if rel_size_cache.capacity() != 0 {
-                rel_size_cache.insert((lsn, tag), nblocks);
-                RELSIZE_SNAPSHOT_CACHE_ENTRIES.set(rel_size_cache.len() as u64);
+            hash_map::Entry::Vacant(entry) => {
+                entry.insert((lsn, nblocks));
+                RELSIZE_CACHE_ENTRIES.inc();
            }
        }
    }

    /// Store cached relation size
    pub fn set_cached_rel_size(&self, tag: RelTag, lsn: Lsn, nblocks: BlockNumber) {
-        let mut rel_size_cache = self.rel_size_latest_cache.write().unwrap();
-        if rel_size_cache.insert(tag, (lsn, nblocks)).is_none() {
-            RELSIZE_LATEST_CACHE_ENTRIES.inc();
+        let mut rel_size_cache = self.rel_size_cache.write().unwrap();
+        if rel_size_cache.map.insert(tag, (lsn, nblocks)).is_none() {
+            RELSIZE_CACHE_ENTRIES.inc();
        }
    }

    /// Remove cached relation size
    pub fn remove_cached_rel_size(&self, tag: &RelTag) {
-        let mut rel_size_cache = self.rel_size_latest_cache.write().unwrap();
-        if rel_size_cache.remove(tag).is_some() {
-            RELSIZE_LATEST_CACHE_ENTRIES.dec();
+        let mut rel_size_cache = self.rel_size_cache.write().unwrap();
+        if rel_size_cache.map.remove(tag).is_some() {
+            RELSIZE_CACHE_ENTRIES.dec();
        }
    }
 }
@@ -1625,10 +1585,7 @@ impl DatadirModification<'_> {
        //       check the cache too. This is because eagerly checking the cache results in
        //       less work overall and 10% better performance. It's more work on cache miss
        //       but cache miss is rare.
-        if let Some(nblocks) = self
-            .tline
-            .get_cached_rel_size(&rel, Version::Modified(self))
-        {
+        if let Some(nblocks) = self.tline.get_cached_rel_size(&rel, self.get_lsn()) {
            Ok(nblocks)
        } else if !self
            .tline
@@ -2710,7 +2667,7 @@ pub struct DatadirModificationStats {
 /// timeline to not miss the latest updates.
 #[derive(Clone, Copy)]
 pub enum Version<'a> {
-    LsnRange(LsnRange),
+    Lsn(Lsn),
    Modified(&'a DatadirModification<'a>),
 }

@@ -2722,7 +2679,7 @@ impl Version<'_> {
        ctx: &RequestContext,
    ) -> Result<Bytes, PageReconstructError> {
        match self {
-            Version::LsnRange(lsns) => timeline.get(key, lsns.effective_lsn, ctx).await,
+            Version::Lsn(lsn) => timeline.get(key, *lsn, ctx).await,
            Version::Modified(modification) => modification.get(key, ctx).await,
        }
    }
@@ -2744,26 +2701,12 @@ impl Version<'_> {
        }
    }

-    pub fn is_latest(&self) -> bool {
+    fn get_lsn(&self) -> Lsn {
        match self {
-            Version::LsnRange(lsns) => lsns.is_latest(),
-            Version::Modified(_) => true,
-        }
-    }
-
-    pub fn get_lsn(&self) -> Lsn {
-        match self {
-            Version::LsnRange(lsns) => lsns.effective_lsn,
+            Version::Lsn(lsn) => *lsn,
            Version::Modified(modification) => modification.lsn,
        }
    }
-
-    pub fn at(lsn: Lsn) -> Self {
-        Version::LsnRange(LsnRange {
-            effective_lsn: lsn,
-            request_lsn: lsn,
-        })
-    }
 }

 //--- Metadata structs stored in key-value pairs in the repository.
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -86,8 +86,8 @@ use crate::deletion_queue::{DeletionQueueClient, DeletionQueueError};
 use crate::l0_flush::L0FlushGlobalState;
 use crate::metrics::{
    BROKEN_TENANTS_SET, CIRCUIT_BREAKERS_BROKEN, CIRCUIT_BREAKERS_UNBROKEN, CONCURRENT_INITDBS,
-    INITDB_RUN_TIME, INITDB_SEMAPHORE_ACQUISITION_TIME, TENANT, TENANT_OFFLOADED_TIMELINES,
-    TENANT_STATE_METRIC, TENANT_SYNTHETIC_SIZE_METRIC, remove_tenant_metrics,
+    INITDB_RUN_TIME, INITDB_SEMAPHORE_ACQUISITION_TIME, TENANT, TENANT_STATE_METRIC,
+    TENANT_SYNTHETIC_SIZE_METRIC, remove_tenant_metrics,
 };
 use crate::task_mgr::TaskKind;
 use crate::tenant::config::LocationMode;
@@ -3348,13 +3348,6 @@ impl TenantShard {
                activated_timelines += 1;
            }

-            let tid = self.tenant_shard_id.tenant_id.to_string();
-            let shard_id = self.tenant_shard_id.shard_slug().to_string();
-            let offloaded_timeline_count = timelines_offloaded_accessor.len();
-            TENANT_OFFLOADED_TIMELINES
-                .with_label_values(&[&tid, &shard_id])
-                .set(offloaded_timeline_count as u64);
-
            self.state.send_modify(move |current_state| {
                assert!(
                    matches!(current_state, TenantState::Activating(_)),
@@ -4594,7 +4587,7 @@ impl TenantShard {

            target.cutoffs = GcCutoffs {
                space: space_cutoff,
-                time: None,
+                time: Lsn::INVALID,
            };
        }
    }
@@ -4678,7 +4671,7 @@ impl TenantShard {
                if let Some(ancestor_id) = timeline.get_ancestor_timeline_id() {
                    if let Some(ancestor_gc_cutoffs) = gc_cutoffs.get(&ancestor_id) {
                        target.within_ancestor_pitr =
-                            Some(timeline.get_ancestor_lsn()) >= ancestor_gc_cutoffs.time;
+                            timeline.get_ancestor_lsn() >= ancestor_gc_cutoffs.time;
                    }
                }

@@ -4691,15 +4684,13 @@ impl TenantShard {
                    } else {
                        0
                    });
-                if let Some(time_cutoff) = target.cutoffs.time {
-                    timeline.metrics.pitr_history_size.set(
-                        timeline
-                            .get_last_record_lsn()
-                            .checked_sub(time_cutoff)
-                            .unwrap_or_default()
-                            .0,
-                    );
-                }
+                timeline.metrics.pitr_history_size.set(
+                    timeline
+                        .get_last_record_lsn()
+                        .checked_sub(target.cutoffs.time)
+                        .unwrap_or(Lsn(0))
+                        .0,
+                );

                // Apply the cutoffs we found to the Timeline's GcInfo.  Why might we _not_ have cutoffs for a timeline?
                // - this timeline was created while we were finding cutoffs
@@ -4708,8 +4699,8 @@ impl TenantShard {
                    let original_cutoffs = target.cutoffs.clone();
                    // GC cutoffs should never go back
                    target.cutoffs = GcCutoffs {
-                        space: cutoffs.space.max(original_cutoffs.space),
-                        time: cutoffs.time.max(original_cutoffs.time),
+                        space: Lsn(cutoffs.space.0.max(original_cutoffs.space.0)),
+                        time: Lsn(cutoffs.time.0.max(original_cutoffs.time.0)),
                    }
                }
            }
@@ -5569,14 +5560,6 @@ impl TenantShard {
            }
        }

-        // Update metrics
-        let tid = self.tenant_shard_id.to_string();
-        let shard_id = self.tenant_shard_id.shard_slug().to_string();
-        let set_key = &[tid.as_str(), shard_id.as_str()][..];
-        TENANT_OFFLOADED_TIMELINES
-            .with_label_values(set_key)
-            .set(manifest.offloaded_timelines.len() as u64);
-
        // Upload the manifest. Remote storage does no retries internally, so retry here.
        match backoff::retry(
            || async {
@@ -8613,10 +8596,8 @@ mod tests {
        lsn: Lsn,
        ctx: &RequestContext,
    ) -> Result<Option<Bytes>, GetVectoredError> {
-        let io_concurrency = IoConcurrency::spawn_from_conf(
-            tline.conf.get_vectored_concurrent_io,
-            tline.gate.enter().unwrap(),
-        );
+        let io_concurrency =
+            IoConcurrency::spawn_from_conf(tline.conf, tline.gate.enter().unwrap());
        let mut reconstruct_state = ValuesReconstructState::new(io_concurrency);
        let query = VersionedKeySpaceQuery::uniform(KeySpace::single(key..key.next()), lsn);
        let mut res = tline
@@ -8954,7 +8935,7 @@ mod tests {
                .await;
            // Update GC info
            let mut guard = tline.gc_info.write().unwrap();
-            guard.cutoffs.time = Some(Lsn(0x30));
+            guard.cutoffs.time = Lsn(0x30);
            guard.cutoffs.space = Lsn(0x30);
        }

@@ -9062,7 +9043,7 @@ mod tests {
                .await;
            // Update GC info
            let mut guard = tline.gc_info.write().unwrap();
-            guard.cutoffs.time = Some(Lsn(0x40));
+            guard.cutoffs.time = Lsn(0x40);
            guard.cutoffs.space = Lsn(0x40);
        }
        tline
@@ -9480,7 +9461,7 @@ mod tests {
            *guard = GcInfo {
                retain_lsns: vec![],
                cutoffs: GcCutoffs {
-                    time: Some(Lsn(0x30)),
+                    time: Lsn(0x30),
                    space: Lsn(0x30),
                },
                leases: Default::default(),
@@ -9564,7 +9545,7 @@ mod tests {
                .await;
            // Update GC info
            let mut guard = tline.gc_info.write().unwrap();
-            guard.cutoffs.time = Some(Lsn(0x40));
+            guard.cutoffs.time = Lsn(0x40);
            guard.cutoffs.space = Lsn(0x40);
        }
        tline
@@ -10035,7 +10016,7 @@ mod tests {
                    (Lsn(0x20), tline.timeline_id, MaybeOffloaded::No),
                ],
                cutoffs: GcCutoffs {
-                    time: Some(Lsn(0x30)),
+                    time: Lsn(0x30),
                    space: Lsn(0x30),
                },
                leases: Default::default(),
@@ -10098,7 +10079,7 @@ mod tests {
        let verify_result = || async {
            let gc_horizon = {
                let gc_info = tline.gc_info.read().unwrap();
-                gc_info.cutoffs.time.unwrap_or_default()
+                gc_info.cutoffs.time
            };
            for idx in 0..10 {
                assert_eq!(
@@ -10176,7 +10157,7 @@ mod tests {
                .await;
            // Update GC info
            let mut guard = tline.gc_info.write().unwrap();
-            guard.cutoffs.time = Some(Lsn(0x38));
+            guard.cutoffs.time = Lsn(0x38);
            guard.cutoffs.space = Lsn(0x38);
        }
        tline
@@ -10284,7 +10265,7 @@ mod tests {
                    (Lsn(0x20), tline.timeline_id, MaybeOffloaded::No),
                ],
                cutoffs: GcCutoffs {
-                    time: Some(Lsn(0x30)),
+                    time: Lsn(0x30),
                    space: Lsn(0x30),
                },
                leases: Default::default(),
@@ -10347,7 +10328,7 @@ mod tests {
        let verify_result = || async {
            let gc_horizon = {
                let gc_info = tline.gc_info.read().unwrap();
-                gc_info.cutoffs.time.unwrap_or_default()
+                gc_info.cutoffs.time
            };
            for idx in 0..10 {
                assert_eq!(
@@ -10533,7 +10514,7 @@ mod tests {
            *guard = GcInfo {
                retain_lsns: vec![(Lsn(0x18), branch_tline.timeline_id, MaybeOffloaded::No)],
                cutoffs: GcCutoffs {
-                    time: Some(Lsn(0x10)),
+                    time: Lsn(0x10),
                    space: Lsn(0x10),
                },
                leases: Default::default(),
@@ -10553,7 +10534,7 @@ mod tests {
            *guard = GcInfo {
                retain_lsns: vec![(Lsn(0x40), branch_tline.timeline_id, MaybeOffloaded::No)],
                cutoffs: GcCutoffs {
-                    time: Some(Lsn(0x50)),
+                    time: Lsn(0x50),
                    space: Lsn(0x50),
                },
                leases: Default::default(),
@@ -11274,7 +11255,7 @@ mod tests {
            *guard = GcInfo {
                retain_lsns: vec![(Lsn(0x20), tline.timeline_id, MaybeOffloaded::No)],
                cutoffs: GcCutoffs {
-                    time: Some(Lsn(0x30)),
+                    time: Lsn(0x30),
                    space: Lsn(0x30),
                },
                leases: Default::default(),
@@ -11663,7 +11644,7 @@ mod tests {
                    (Lsn(0x20), tline.timeline_id, MaybeOffloaded::No),
                ],
                cutoffs: GcCutoffs {
-                    time: Some(Lsn(0x30)),
+                    time: Lsn(0x30),
                    space: Lsn(0x30),
                },
                leases: Default::default(),
@@ -11726,7 +11707,7 @@ mod tests {
        let verify_result = || async {
            let gc_horizon = {
                let gc_info = tline.gc_info.read().unwrap();
-                gc_info.cutoffs.time.unwrap_or_default()
+                gc_info.cutoffs.time
            };
            for idx in 0..10 {
                assert_eq!(
@@ -11915,7 +11896,7 @@ mod tests {
                    (Lsn(0x20), tline.timeline_id, MaybeOffloaded::No),
                ],
                cutoffs: GcCutoffs {
-                    time: Some(Lsn(0x30)),
+                    time: Lsn(0x30),
                    space: Lsn(0x30),
                },
                leases: Default::default(),
@@ -11978,7 +11959,7 @@ mod tests {
        let verify_result = || async {
            let gc_horizon = {
                let gc_info = tline.gc_info.read().unwrap();
-                gc_info.cutoffs.time.unwrap_or_default()
+                gc_info.cutoffs.time
            };
            for idx in 0..10 {
                assert_eq!(
@@ -12241,7 +12222,7 @@ mod tests {
            *guard = GcInfo {
                retain_lsns: vec![],
                cutoffs: GcCutoffs {
-                    time: Some(Lsn(0x30)),
+                    time: Lsn(0x30),
                    space: Lsn(0x30),
                },
                leases: Default::default(),
--- a/pageserver/src/tenant/size.rs
+++ b/pageserver/src/tenant/size.rs
@@ -235,7 +235,7 @@ pub(super) async fn gather_inputs(
        // than our internal space cutoff.  This means that if someone drops a database and waits for their
        // PITR interval, they will see synthetic size decrease, even if we are still storing data inside
        // the space cutoff.
-        let mut next_pitr_cutoff = gc_info.cutoffs.time.unwrap_or_default(); // TODO: handle None
+        let mut next_pitr_cutoff = gc_info.cutoffs.time;

        // If the caller provided a shorter retention period, use that instead of the GC cutoff.
        let retention_param_cutoff = if let Some(max_retention_period) = max_retention_period {
--- a/pageserver/src/tenant/storage_layer.rs
+++ b/pageserver/src/tenant/storage_layer.rs
@@ -31,7 +31,6 @@ pub use inmemory_layer::InMemoryLayer;
 pub(crate) use layer::{EvictionError, Layer, ResidentLayer};
 pub use layer_desc::{PersistentLayerDesc, PersistentLayerKey};
 pub use layer_name::{DeltaLayerName, ImageLayerName, LayerName};
-use pageserver_api::config::GetVectoredConcurrentIo;
 use pageserver_api::key::Key;
 use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum};
 use pageserver_api::record::NeonWalRecord;
@@ -44,6 +43,7 @@ use self::inmemory_layer::InMemoryLayerFileId;
 use super::PageReconstructError;
 use super::layer_map::InMemoryLayerDesc;
 use super::timeline::{GetVectoredError, ReadPath};
+use crate::config::PageServerConf;
 use crate::context::{
    AccessStatsBehavior, PerfInstrumentFutureExt, RequestContext, RequestContextBuilder,
 };
@@ -318,10 +318,11 @@ impl IoConcurrency {
    }

    pub(crate) fn spawn_from_conf(
-        conf: GetVectoredConcurrentIo,
+        conf: &'static PageServerConf,
        gate_guard: GateGuard,
    ) -> IoConcurrency {
-        let selected = match conf {
+        use pageserver_api::config::GetVectoredConcurrentIo;
+        let selected = match conf.get_vectored_concurrent_io {
            GetVectoredConcurrentIo::Sequential => SelectedIoConcurrency::Sequential,
            GetVectoredConcurrentIo::SidecarTask => SelectedIoConcurrency::SidecarTask(gate_guard),
        };
--- a/pageserver/src/tenant/storage_layer/inmemory_layer.rs
+++ b/pageserver/src/tenant/storage_layer/inmemory_layer.rs
@@ -63,28 +63,7 @@ pub struct InMemoryLayer {

    opened_at: Instant,

-    /// All versions of all pages in the layer are kept here. Indexed
-    /// by block number and LSN. The [`IndexEntry`] is an offset into the
-    /// ephemeral file where the page version is stored.
-    ///
-    /// We use a separate lock for the index to reduce the critical section
-    /// during which reads cannot be planned.
-    ///
-    /// If you need access to both the index and the underlying file at the same time,
-    /// respect the following locking order to avoid deadlocks:
-    /// 1. [`InMemoryLayer::inner`]
-    /// 2. [`InMemoryLayer::index`]
-    ///
-    /// Note that the file backing [`InMemoryLayer::inner`] is append-only,
-    /// so it is not necessary to hold simultaneous locks on index.
-    /// This avoids holding index locks across IO, and is crucial for avoiding read tail latency.
-    /// In particular:
-    /// 1. It is safe to read and release [`InMemoryLayer::index`] before locking and reading from [`InMemoryLayer::inner`].
-    /// 2. It is safe to write and release [`InMemoryLayer::inner`] before locking and updating [`InMemoryLayer::index`].
-    index: RwLock<BTreeMap<CompactKey, VecMap<Lsn, IndexEntry>>>,
-
-    /// The above fields never change, except for `end_lsn`, which is only set once,
-    /// and `index` (see rationale there).
+    /// The above fields never change, except for `end_lsn`, which is only set once.
    /// All other changing parts are in `inner`, and protected by a mutex.
    inner: RwLock<InMemoryLayerInner>,

@@ -102,6 +81,11 @@ impl std::fmt::Debug for InMemoryLayer {
 }

 pub struct InMemoryLayerInner {
+    /// All versions of all pages in the layer are kept here. Indexed
+    /// by block number and LSN. The [`IndexEntry`] is an offset into the
+    /// ephemeral file where the page version is stored.
+    index: BTreeMap<CompactKey, VecMap<Lsn, IndexEntry>>,
+
    /// The values are stored in a serialized format in this file.
    /// Each serialized Value is preceded by a 'u32' length field.
    /// PerSeg::page_versions map stores offsets into this file.
@@ -121,7 +105,7 @@ const MAX_SUPPORTED_BLOB_LEN_BITS: usize = {
    trailing_ones
 };

-/// See [`InMemoryLayer::index`].
+/// See [`InMemoryLayerInner::index`].
 ///
 /// For memory efficiency, the data is packed into a u64.
 ///
@@ -441,7 +425,7 @@ impl InMemoryLayer {
            .page_content_kind(PageContentKind::InMemoryLayer)
            .attached_child();

-        let index = self.index.read().await;
+        let inner = self.inner.read().await;

        struct ValueRead {
            entry_lsn: Lsn,
@@ -451,7 +435,10 @@ impl InMemoryLayer {
        let mut ios: HashMap<(Key, Lsn), OnDiskValueIo> = Default::default();

        for range in keyspace.ranges.iter() {
-            for (key, vec_map) in index.range(range.start.to_compact()..range.end.to_compact()) {
+            for (key, vec_map) in inner
+                .index
+                .range(range.start.to_compact()..range.end.to_compact())
+            {
                let key = Key::from_compact(*key);
                let slice = vec_map.slice_range(lsn_range.clone());

@@ -479,7 +466,7 @@ impl InMemoryLayer {
                }
            }
        }
-        drop(index); // release the lock before we spawn the IO; if it's serial-mode IO we will deadlock on the read().await below
+        drop(inner); // release the lock before we spawn the IO; if it's serial-mode IO we will deadlock on the read().await below
        let read_from = Arc::clone(self);
        let read_ctx = ctx.attached_child();
        reconstruct_state
@@ -586,8 +573,8 @@ impl InMemoryLayer {
            start_lsn,
            end_lsn: OnceLock::new(),
            opened_at: Instant::now(),
-            index: RwLock::new(BTreeMap::new()),
            inner: RwLock::new(InMemoryLayerInner {
+                index: BTreeMap::new(),
                file,
                resource_units: GlobalResourceUnits::new(),
            }),
@@ -605,39 +592,31 @@ impl InMemoryLayer {
        serialized_batch: SerializedValueBatch,
        ctx: &RequestContext,
    ) -> anyhow::Result<()> {
-        let (base_offset, metadata) = {
-            let mut inner = self.inner.write().await;
-            self.assert_writable();
+        let mut inner = self.inner.write().await;
+        self.assert_writable();

-            let base_offset = inner.file.len();
+        let base_offset = inner.file.len();

-            let SerializedValueBatch {
-                raw,
-                metadata,
-                max_lsn: _,
-                len: _,
-            } = serialized_batch;
+        let SerializedValueBatch {
+            raw,
+            metadata,
+            max_lsn: _,
+            len: _,
+        } = serialized_batch;

-            // Write the batch to the file
-            inner.file.write_raw(&raw, ctx).await?;
-            let new_size = inner.file.len();
+        // Write the batch to the file
+        inner.file.write_raw(&raw, ctx).await?;
+        let new_size = inner.file.len();

-            let expected_new_len = base_offset
-                .checked_add(raw.len().into_u64())
-                // write_raw would error if we were to overflow u64.
-                // also IndexEntry and higher levels in
-                //the code don't allow the file to grow that large
-                .unwrap();
-            assert_eq!(new_size, expected_new_len);
-
-            inner.resource_units.maybe_publish_size(new_size);
-
-            (base_offset, metadata)
-        };
+        let expected_new_len = base_offset
+            .checked_add(raw.len().into_u64())
+            // write_raw would error if we were to overflow u64.
+            // also IndexEntry and higher levels in
+            //the code don't allow the file to grow that large
+            .unwrap();
+        assert_eq!(new_size, expected_new_len);

        // Update the index with the new entries
-        let mut index = self.index.write().await;
-
        for meta in metadata {
            let SerializedValueMeta {
                key,
@@ -660,7 +639,7 @@ impl InMemoryLayer {
                will_init,
            })?;

-            let vec_map = index.entry(key).or_default();
+            let vec_map = inner.index.entry(key).or_default();
            let old = vec_map.append_or_update_last(lsn, index_entry).unwrap().0;
            if old.is_some() {
                // This should not break anything, but is unexpected: ingestion code aims to filter out
@@ -679,6 +658,8 @@ impl InMemoryLayer {
            );
        }

+        inner.resource_units.maybe_publish_size(new_size);
+
        Ok(())
    }

@@ -699,18 +680,6 @@ impl InMemoryLayer {

    /// Records the end_lsn for non-dropped layers.
    /// `end_lsn` is exclusive
-    ///
-    /// A note on locking:
-    /// The current API of [`InMemoryLayer`] does not ensure that there's no ongoing
-    /// writes while freezing the layer. This is enforced at a higher level via
-    /// [`crate::tenant::Timeline::write_lock`]. Freeze might be called via two code paths:
-    /// 1. Via the active [`crate::tenant::timeline::TimelineWriter`]. This holds the
-    ///    Timeline::write_lock for its lifetime. The rolling is handled in
-    ///    [`crate::tenant::timeline::TimelineWriter::put_batch`]. It's a &mut self function
-    ///    so can't be called from different threads.
-    /// 2. In the background via [`crate::tenant::Timeline::maybe_freeze_ephemeral_layer`].
-    ///    This only proceeds if try_lock on Timeline::write_lock succeeds (i.e. there's no active writer),
-    ///    hence there can be no concurrent writes
    pub async fn freeze(&self, end_lsn: Lsn) {
        assert!(
            self.start_lsn < end_lsn,
@@ -731,8 +700,8 @@ impl InMemoryLayer {

        #[cfg(debug_assertions)]
        {
-            let index = self.index.read().await;
-            for vec_map in index.values() {
+            let inner = self.inner.write().await;
+            for vec_map in inner.index.values() {
                for (lsn, _) in vec_map.as_slice() {
                    assert!(*lsn < end_lsn);
                }
@@ -755,11 +724,14 @@ impl InMemoryLayer {
    ) -> Result<Option<(PersistentLayerDesc, Utf8PathBuf)>> {
        // Grab the lock in read-mode. We hold it over the I/O, but because this
        // layer is not writeable anymore, no one should be trying to acquire the
-        // write lock on it, so we shouldn't block anyone. See the comment on
-        // [`InMemoryLayer::freeze`] to understand how locking between the append path
-        // and layer flushing works.
+        // write lock on it, so we shouldn't block anyone. There's one exception
+        // though: another thread might have grabbed a reference to this layer
+        // in `get_layer_for_write' just before the checkpointer called
+        // `freeze`, and then `write_to_disk` on it. When the thread gets the
+        // lock, it will see that it's not writeable anymore and retry, but it
+        // would have to wait until we release it. That race condition is very
+        // rare though, so we just accept the potential latency hit for now.
        let inner = self.inner.read().await;
-        let index = self.index.read().await;

        use l0_flush::Inner;
        let _concurrency_permit = match l0_flush_global_state {
@@ -771,9 +743,13 @@ impl InMemoryLayer {
        let key_count = if let Some(key_range) = key_range {
            let key_range = key_range.start.to_compact()..key_range.end.to_compact();

-            index.iter().filter(|(k, _)| key_range.contains(k)).count()
+            inner
+                .index
+                .iter()
+                .filter(|(k, _)| key_range.contains(k))
+                .count()
        } else {
-            index.len()
+            inner.index.len()
        };
        if key_count == 0 {
            return Ok(None);
@@ -796,7 +772,7 @@ impl InMemoryLayer {
                let file_contents = inner.file.load_to_io_buf(ctx).await?;
                let file_contents = file_contents.freeze();

-                for (key, vec_map) in index.iter() {
+                for (key, vec_map) in inner.index.iter() {
                    // Write all page versions
                    for (lsn, entry) in vec_map
                        .as_slice()
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -14,7 +14,6 @@ pub mod span;
 pub mod uninit;
 mod walreceiver;

-use hashlink::LruCache;
 use std::array;
 use std::cmp::{max, min};
 use std::collections::btree_map::Entry;
@@ -198,6 +197,16 @@ pub struct TimelineResources {
    pub l0_flush_global_state: l0_flush::L0FlushGlobalState,
 }

+/// The relation size cache caches relation sizes at the end of the timeline. It speeds up WAL
+/// ingestion considerably, because WAL ingestion needs to check on most records if the record
+/// implicitly extends the relation.  At startup, `complete_as_of` is initialized to the current end
+/// of the timeline (disk_consistent_lsn).  It's used on reads of relation sizes to check if the
+/// value can be used to also update the cache, see [`Timeline::update_cached_rel_size`].
+pub(crate) struct RelSizeCache {
+    pub(crate) complete_as_of: Lsn,
+    pub(crate) map: HashMap<RelTag, (Lsn, BlockNumber)>,
+}
+
 pub struct Timeline {
    pub(crate) conf: &'static PageServerConf,
    tenant_conf: Arc<ArcSwap<AttachedTenantConf>>,
@@ -356,8 +365,7 @@ pub struct Timeline {
    pub walreceiver: Mutex<Option<WalReceiver>>,

    /// Relation size cache
-    pub(crate) rel_size_latest_cache: RwLock<HashMap<RelTag, (Lsn, BlockNumber)>>,
-    pub(crate) rel_size_snapshot_cache: Mutex<LruCache<(Lsn, RelTag), BlockNumber>>,
+    pub(crate) rel_size_cache: RwLock<RelSizeCache>,

    download_all_remote_layers_task_info: RwLock<Option<DownloadRemoteLayersTaskInfo>>,

@@ -529,24 +537,29 @@ impl GcInfo {
 /// The `GcInfo` component describing which Lsns need to be retained.  Functionally, this
 /// is a single number (the oldest LSN which we must retain), but it internally distinguishes
 /// between time-based and space-based retention for observability and consumption metrics purposes.
-#[derive(Clone, Debug, Default)]
+#[derive(Debug, Clone)]
 pub(crate) struct GcCutoffs {
    /// Calculated from the [`pageserver_api::models::TenantConfig::gc_horizon`], this LSN indicates how much
    /// history we must keep to retain a specified number of bytes of WAL.
    pub(crate) space: Lsn,

-    /// Calculated from [`pageserver_api::models::TenantConfig::pitr_interval`], this LSN indicates
-    /// how much history we must keep to enable reading back at least the PITR interval duration.
-    ///
-    /// None indicates that the PITR cutoff has not been computed. A PITR interval of 0 will yield
-    /// Some(last_record_lsn).
-    pub(crate) time: Option<Lsn>,
+    /// Calculated from [`pageserver_api::models::TenantConfig::pitr_interval`], this LSN indicates how much
+    /// history we must keep to enable reading back at least the PITR interval duration.
+    pub(crate) time: Lsn,
+}
+
+impl Default for GcCutoffs {
+    fn default() -> Self {
+        Self {
+            space: Lsn::INVALID,
+            time: Lsn::INVALID,
+        }
+    }
 }

 impl GcCutoffs {
    fn select_min(&self) -> Lsn {
-        // NB: if we haven't computed the PITR cutoff yet, we can't GC anything.
-        self.space.min(self.time.unwrap_or_default())
+        std::cmp::min(self.space, self.time)
    }
 }

@@ -1083,14 +1096,11 @@ impl Timeline {
    /// Get the bytes written since the PITR cutoff on this branch, and
    /// whether this branch's ancestor_lsn is within its parent's PITR.
    pub(crate) fn get_pitr_history_stats(&self) -> (u64, bool) {
-        // TODO: for backwards compatibility, we return the full history back to 0 when the PITR
-        // cutoff has not yet been initialized. This should return None instead, but this is exposed
-        // in external HTTP APIs and callers may not handle a null value.
        let gc_info = self.gc_info.read().unwrap();
        let history = self
            .get_last_record_lsn()
-            .checked_sub(gc_info.cutoffs.time.unwrap_or_default())
-            .unwrap_or_default()
+            .checked_sub(gc_info.cutoffs.time)
+            .unwrap_or(Lsn(0))
            .0;
        (history, gc_info.within_ancestor_pitr)
    }
@@ -1100,10 +1110,9 @@ impl Timeline {
        self.applied_gc_cutoff_lsn.read()
    }

-    /// Read timeline's planned GC cutoff: this is the logical end of history that users are allowed
-    /// to read (based on configured PITR), even if physically we have more history. Returns None
-    /// if the PITR cutoff has not yet been initialized.
-    pub(crate) fn get_gc_cutoff_lsn(&self) -> Option<Lsn> {
+    /// Read timeline's planned GC cutoff: this is the logical end of history that users
+    /// are allowed to read (based on configured PITR), even if physically we have more history.
+    pub(crate) fn get_gc_cutoff_lsn(&self) -> Lsn {
        self.gc_info.read().unwrap().cutoffs.time
    }

@@ -2811,13 +2820,6 @@ impl Timeline {

            self.remote_client.update_config(&new_conf.location);

-            let mut rel_size_cache = self.rel_size_snapshot_cache.lock().unwrap();
-            if let Some(new_capacity) = new_conf.tenant_conf.relsize_snapshot_cache_capacity {
-                if new_capacity != rel_size_cache.capacity() {
-                    rel_size_cache.set_capacity(new_capacity);
-                }
-            }
-
            self.metrics
                .evictions_with_low_residence_duration
                .write()
@@ -2876,14 +2878,6 @@ impl Timeline {
            ancestor_gc_info.insert_child(timeline_id, metadata.ancestor_lsn(), is_offloaded);
        }

-        let relsize_snapshot_cache_capacity = {
-            let loaded_tenant_conf = tenant_conf.load();
-            loaded_tenant_conf
-                .tenant_conf
-                .relsize_snapshot_cache_capacity
-                .unwrap_or(conf.default_tenant_conf.relsize_snapshot_cache_capacity)
-        };
-
        Arc::new_cyclic(|myself| {
            let metrics = Arc::new(TimelineMetrics::new(
                &tenant_shard_id,
@@ -2975,8 +2969,10 @@ impl Timeline {
                last_image_layer_creation_check_instant: Mutex::new(None),

                last_received_wal: Mutex::new(None),
-                rel_size_latest_cache: RwLock::new(HashMap::new()),
-                rel_size_snapshot_cache: Mutex::new(LruCache::new(relsize_snapshot_cache_capacity)),
+                rel_size_cache: RwLock::new(RelSizeCache {
+                    complete_as_of: disk_consistent_lsn,
+                    map: HashMap::new(),
+                }),

                download_all_remote_layers_task_info: RwLock::new(None),

@@ -3534,7 +3530,7 @@ impl Timeline {
                };

                let io_concurrency = IoConcurrency::spawn_from_conf(
-                    self_ref.conf.get_vectored_concurrent_io,
+                    self_ref.conf,
                    self_ref
                        .gate
                        .enter()
@@ -5563,7 +5559,7 @@ impl Timeline {
            });

            let io_concurrency = IoConcurrency::spawn_from_conf(
-                self.conf.get_vectored_concurrent_io,
+                self.conf,
                self.gate
                    .enter()
                    .map_err(|_| CreateImageLayersError::Cancelled)?,
@@ -6234,12 +6230,14 @@ impl Timeline {

        pausable_failpoint!("Timeline::find_gc_cutoffs-pausable");

-        if cfg!(test) && pitr == Duration::ZERO {
+        if cfg!(test) {
            // Unit tests which specify zero PITR interval expect to avoid doing any I/O for timestamp lookup
-            return Ok(GcCutoffs {
-                time: Some(self.get_last_record_lsn()),
-                space: space_cutoff,
-            });
+            if pitr == Duration::ZERO {
+                return Ok(GcCutoffs {
+                    time: self.get_last_record_lsn(),
+                    space: space_cutoff,
+                });
+            }
        }

        // Calculate a time-based limit on how much to retain:
@@ -6253,14 +6251,14 @@ impl Timeline {
                // PITR is not set. Retain the size-based limit, or the default time retention,
                // whichever requires less data.
                GcCutoffs {
-                    time: Some(self.get_last_record_lsn()),
+                    time: self.get_last_record_lsn(),
                    space: std::cmp::max(time_cutoff, space_cutoff),
                }
            }
            (Duration::ZERO, None) => {
                // PITR is not set, and time lookup failed
                GcCutoffs {
-                    time: Some(self.get_last_record_lsn()),
+                    time: self.get_last_record_lsn(),
                    space: space_cutoff,
                }
            }
@@ -6268,7 +6266,7 @@ impl Timeline {
                // PITR interval is set & we didn't look up a timestamp successfully.  Conservatively assume PITR
                // cannot advance beyond what was already GC'd, and respect space-based retention
                GcCutoffs {
-                    time: Some(*self.get_applied_gc_cutoff_lsn()),
+                    time: *self.get_applied_gc_cutoff_lsn(),
                    space: space_cutoff,
                }
            }
@@ -6276,7 +6274,7 @@ impl Timeline {
                // PITR interval is set and we looked up timestamp successfully.  Ignore
                // size based retention and make time cutoff authoritative
                GcCutoffs {
-                    time: Some(time_cutoff),
+                    time: time_cutoff,
                    space: time_cutoff,
                }
            }
@@ -6329,7 +6327,7 @@ impl Timeline {
            )
        };

-        let mut new_gc_cutoff = space_cutoff.min(time_cutoff.unwrap_or_default());
+        let mut new_gc_cutoff = Lsn::min(space_cutoff, time_cutoff);
        let standby_horizon = self.standby_horizon.load();
        // Hold GC for the standby, but as a safety guard do it only within some
        // reasonable lag.
@@ -6378,7 +6376,7 @@ impl Timeline {
    async fn gc_timeline(
        &self,
        space_cutoff: Lsn,
-        time_cutoff: Option<Lsn>, // None if uninitialized
+        time_cutoff: Lsn,
        retain_lsns: Vec<Lsn>,
        max_lsn_with_valid_lease: Option<Lsn>,
        new_gc_cutoff: Lsn,
@@ -6397,12 +6395,6 @@ impl Timeline {
            return Ok(result);
        }

-        let Some(time_cutoff) = time_cutoff else {
-            // The GC cutoff should have been computed by now, but let's be defensive.
-            info!("Nothing to GC: time_cutoff not yet computed");
-            return Ok(result);
-        };
-
        // We need to ensure that no one tries to read page versions or create
        // branches at a point before latest_gc_cutoff_lsn. See branch_timeline()
        // for details. This will block until the old value is no longer in use.
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -1526,7 +1526,7 @@ impl Timeline {
        info!(
            "starting shard ancestor compaction, rewriting {} layers and dropping {} layers, \
                checked {layers_checked}/{layers_total} layers \
-                (latest_gc_cutoff={} pitr_cutoff={:?})",
+                (latest_gc_cutoff={} pitr_cutoff={})",
            layers_to_rewrite.len(),
            drop_layers.len(),
            *latest_gc_cutoff,
--- a/pageserver/src/tenant/timeline/detach_ancestor.rs
+++ b/pageserver/src/tenant/timeline/detach_ancestor.rs
@@ -188,7 +188,7 @@ pub(crate) async fn generate_tombstone_image_layer(
        "removing non-inherited keys by writing an image layer with tombstones at the detach LSN"
    );
    let io_concurrency = IoConcurrency::spawn_from_conf(
-        detached.conf.get_vectored_concurrent_io,
+        detached.conf,
        detached.gate.enter().map_err(|_| Error::ShuttingDown)?,
    );
    let mut reconstruct_state = ValuesReconstructState::new(io_concurrency);
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -1684,31 +1684,31 @@ mod tests {
        // The relation was created at LSN 2, not visible at LSN 1 yet.
        assert_eq!(
            tline
-                .get_rel_exists(TESTREL_A, Version::at(Lsn(0x10)), &ctx)
+                .get_rel_exists(TESTREL_A, Version::Lsn(Lsn(0x10)), &ctx)
                .await?,
            false
        );
        assert!(
            tline
-                .get_rel_size(TESTREL_A, Version::at(Lsn(0x10)), &ctx)
+                .get_rel_size(TESTREL_A, Version::Lsn(Lsn(0x10)), &ctx)
                .await
                .is_err()
        );
        assert_eq!(
            tline
-                .get_rel_exists(TESTREL_A, Version::at(Lsn(0x20)), &ctx)
+                .get_rel_exists(TESTREL_A, Version::Lsn(Lsn(0x20)), &ctx)
                .await?,
            true
        );
        assert_eq!(
            tline
-                .get_rel_size(TESTREL_A, Version::at(Lsn(0x20)), &ctx)
+                .get_rel_size(TESTREL_A, Version::Lsn(Lsn(0x20)), &ctx)
                .await?,
            1
        );
        assert_eq!(
            tline
-                .get_rel_size(TESTREL_A, Version::at(Lsn(0x50)), &ctx)
+                .get_rel_size(TESTREL_A, Version::Lsn(Lsn(0x50)), &ctx)
                .await?,
            3
        );
@@ -1719,7 +1719,7 @@ mod tests {
                .get_rel_page_at_lsn(
                    TESTREL_A,
                    0,
-                    Version::at(Lsn(0x20)),
+                    Version::Lsn(Lsn(0x20)),
                    &ctx,
                    io_concurrency.clone()
                )
@@ -1733,7 +1733,7 @@ mod tests {
                .get_rel_page_at_lsn(
                    TESTREL_A,
                    0,
-                    Version::at(Lsn(0x30)),
+                    Version::Lsn(Lsn(0x30)),
                    &ctx,
                    io_concurrency.clone()
                )
@@ -1747,7 +1747,7 @@ mod tests {
                .get_rel_page_at_lsn(
                    TESTREL_A,
                    0,
-                    Version::at(Lsn(0x40)),
+                    Version::Lsn(Lsn(0x40)),
                    &ctx,
                    io_concurrency.clone()
                )
@@ -1760,7 +1760,7 @@ mod tests {
                .get_rel_page_at_lsn(
                    TESTREL_A,
                    1,
-                    Version::at(Lsn(0x40)),
+                    Version::Lsn(Lsn(0x40)),
                    &ctx,
                    io_concurrency.clone()
                )
@@ -1774,7 +1774,7 @@ mod tests {
                .get_rel_page_at_lsn(
                    TESTREL_A,
                    0,
-                    Version::at(Lsn(0x50)),
+                    Version::Lsn(Lsn(0x50)),
                    &ctx,
                    io_concurrency.clone()
                )
@@ -1787,7 +1787,7 @@ mod tests {
                .get_rel_page_at_lsn(
                    TESTREL_A,
                    1,
-                    Version::at(Lsn(0x50)),
+                    Version::Lsn(Lsn(0x50)),
                    &ctx,
                    io_concurrency.clone()
                )
@@ -1800,7 +1800,7 @@ mod tests {
                .get_rel_page_at_lsn(
                    TESTREL_A,
                    2,
-                    Version::at(Lsn(0x50)),
+                    Version::Lsn(Lsn(0x50)),
                    &ctx,
                    io_concurrency.clone()
                )
@@ -1820,7 +1820,7 @@ mod tests {
        // Check reported size and contents after truncation
        assert_eq!(
            tline
-                .get_rel_size(TESTREL_A, Version::at(Lsn(0x60)), &ctx)
+                .get_rel_size(TESTREL_A, Version::Lsn(Lsn(0x60)), &ctx)
                .await?,
            2
        );
@@ -1829,7 +1829,7 @@ mod tests {
                .get_rel_page_at_lsn(
                    TESTREL_A,
                    0,
-                    Version::at(Lsn(0x60)),
+                    Version::Lsn(Lsn(0x60)),
                    &ctx,
                    io_concurrency.clone()
                )
@@ -1842,7 +1842,7 @@ mod tests {
                .get_rel_page_at_lsn(
                    TESTREL_A,
                    1,
-                    Version::at(Lsn(0x60)),
+                    Version::Lsn(Lsn(0x60)),
                    &ctx,
                    io_concurrency.clone()
                )
@@ -1854,7 +1854,7 @@ mod tests {
        // should still see the truncated block with older LSN
        assert_eq!(
            tline
-                .get_rel_size(TESTREL_A, Version::at(Lsn(0x50)), &ctx)
+                .get_rel_size(TESTREL_A, Version::Lsn(Lsn(0x50)), &ctx)
                .await?,
            3
        );
@@ -1863,7 +1863,7 @@ mod tests {
                .get_rel_page_at_lsn(
                    TESTREL_A,
                    2,
-                    Version::at(Lsn(0x50)),
+                    Version::Lsn(Lsn(0x50)),
                    &ctx,
                    io_concurrency.clone()
                )
@@ -1880,7 +1880,7 @@ mod tests {
        m.commit(&ctx).await?;
        assert_eq!(
            tline
-                .get_rel_size(TESTREL_A, Version::at(Lsn(0x68)), &ctx)
+                .get_rel_size(TESTREL_A, Version::Lsn(Lsn(0x68)), &ctx)
                .await?,
            0
        );
@@ -1893,7 +1893,7 @@ mod tests {
        m.commit(&ctx).await?;
        assert_eq!(
            tline
-                .get_rel_size(TESTREL_A, Version::at(Lsn(0x70)), &ctx)
+                .get_rel_size(TESTREL_A, Version::Lsn(Lsn(0x70)), &ctx)
                .await?,
            2
        );
@@ -1902,7 +1902,7 @@ mod tests {
                .get_rel_page_at_lsn(
                    TESTREL_A,
                    0,
-                    Version::at(Lsn(0x70)),
+                    Version::Lsn(Lsn(0x70)),
                    &ctx,
                    io_concurrency.clone()
                )
@@ -1915,7 +1915,7 @@ mod tests {
                .get_rel_page_at_lsn(
                    TESTREL_A,
                    1,
-                    Version::at(Lsn(0x70)),
+                    Version::Lsn(Lsn(0x70)),
                    &ctx,
                    io_concurrency.clone()
                )
@@ -1932,7 +1932,7 @@ mod tests {
        m.commit(&ctx).await?;
        assert_eq!(
            tline
-                .get_rel_size(TESTREL_A, Version::at(Lsn(0x80)), &ctx)
+                .get_rel_size(TESTREL_A, Version::Lsn(Lsn(0x80)), &ctx)
                .await?,
            1501
        );
@@ -1942,7 +1942,7 @@ mod tests {
                    .get_rel_page_at_lsn(
                        TESTREL_A,
                        blk,
-                        Version::at(Lsn(0x80)),
+                        Version::Lsn(Lsn(0x80)),
                        &ctx,
                        io_concurrency.clone()
                    )
@@ -1956,7 +1956,7 @@ mod tests {
                .get_rel_page_at_lsn(
                    TESTREL_A,
                    1500,
-                    Version::at(Lsn(0x80)),
+                    Version::Lsn(Lsn(0x80)),
                    &ctx,
                    io_concurrency.clone()
                )
@@ -1990,13 +1990,13 @@ mod tests {
        // Check that rel exists and size is correct
        assert_eq!(
            tline
-                .get_rel_exists(TESTREL_A, Version::at(Lsn(0x20)), &ctx)
+                .get_rel_exists(TESTREL_A, Version::Lsn(Lsn(0x20)), &ctx)
                .await?,
            true
        );
        assert_eq!(
            tline
-                .get_rel_size(TESTREL_A, Version::at(Lsn(0x20)), &ctx)
+                .get_rel_size(TESTREL_A, Version::Lsn(Lsn(0x20)), &ctx)
                .await?,
            1
        );
@@ -2011,7 +2011,7 @@ mod tests {
        // Check that rel is not visible anymore
        assert_eq!(
            tline
-                .get_rel_exists(TESTREL_A, Version::at(Lsn(0x30)), &ctx)
+                .get_rel_exists(TESTREL_A, Version::Lsn(Lsn(0x30)), &ctx)
                .await?,
            false
        );
@@ -2029,13 +2029,13 @@ mod tests {
        // Check that rel exists and size is correct
        assert_eq!(
            tline
-                .get_rel_exists(TESTREL_A, Version::at(Lsn(0x40)), &ctx)
+                .get_rel_exists(TESTREL_A, Version::Lsn(Lsn(0x40)), &ctx)
                .await?,
            true
        );
        assert_eq!(
            tline
-                .get_rel_size(TESTREL_A, Version::at(Lsn(0x40)), &ctx)
+                .get_rel_size(TESTREL_A, Version::Lsn(Lsn(0x40)), &ctx)
                .await?,
            1
        );
@@ -2077,26 +2077,26 @@ mod tests {
        // The relation was created at LSN 20, not visible at LSN 1 yet.
        assert_eq!(
            tline
-                .get_rel_exists(TESTREL_A, Version::at(Lsn(0x10)), &ctx)
+                .get_rel_exists(TESTREL_A, Version::Lsn(Lsn(0x10)), &ctx)
                .await?,
            false
        );
        assert!(
            tline
-                .get_rel_size(TESTREL_A, Version::at(Lsn(0x10)), &ctx)
+                .get_rel_size(TESTREL_A, Version::Lsn(Lsn(0x10)), &ctx)
                .await
                .is_err()
        );

        assert_eq!(
            tline
-                .get_rel_exists(TESTREL_A, Version::at(Lsn(0x20)), &ctx)
+                .get_rel_exists(TESTREL_A, Version::Lsn(Lsn(0x20)), &ctx)
                .await?,
            true
        );
        assert_eq!(
            tline
-                .get_rel_size(TESTREL_A, Version::at(Lsn(0x20)), &ctx)
+                .get_rel_size(TESTREL_A, Version::Lsn(Lsn(0x20)), &ctx)
                .await?,
            relsize
        );
@@ -2110,7 +2110,7 @@ mod tests {
                    .get_rel_page_at_lsn(
                        TESTREL_A,
                        blkno,
-                        Version::at(lsn),
+                        Version::Lsn(lsn),
                        &ctx,
                        io_concurrency.clone()
                    )
@@ -2131,7 +2131,7 @@ mod tests {
        // Check reported size and contents after truncation
        assert_eq!(
            tline
-                .get_rel_size(TESTREL_A, Version::at(Lsn(0x60)), &ctx)
+                .get_rel_size(TESTREL_A, Version::Lsn(Lsn(0x60)), &ctx)
                .await?,
            1
        );
@@ -2144,7 +2144,7 @@ mod tests {
                    .get_rel_page_at_lsn(
                        TESTREL_A,
                        blkno,
-                        Version::at(Lsn(0x60)),
+                        Version::Lsn(Lsn(0x60)),
                        &ctx,
                        io_concurrency.clone()
                    )
@@ -2157,7 +2157,7 @@ mod tests {
        // should still see all blocks with older LSN
        assert_eq!(
            tline
-                .get_rel_size(TESTREL_A, Version::at(Lsn(0x50)), &ctx)
+                .get_rel_size(TESTREL_A, Version::Lsn(Lsn(0x50)), &ctx)
                .await?,
            relsize
        );
@@ -2169,7 +2169,7 @@ mod tests {
                    .get_rel_page_at_lsn(
                        TESTREL_A,
                        blkno,
-                        Version::at(Lsn(0x50)),
+                        Version::Lsn(Lsn(0x50)),
                        &ctx,
                        io_concurrency.clone()
                    )
@@ -2193,13 +2193,13 @@ mod tests {

        assert_eq!(
            tline
-                .get_rel_exists(TESTREL_A, Version::at(Lsn(0x80)), &ctx)
+                .get_rel_exists(TESTREL_A, Version::Lsn(Lsn(0x80)), &ctx)
                .await?,
            true
        );
        assert_eq!(
            tline
-                .get_rel_size(TESTREL_A, Version::at(Lsn(0x80)), &ctx)
+                .get_rel_size(TESTREL_A, Version::Lsn(Lsn(0x80)), &ctx)
                .await?,
            relsize
        );
@@ -2212,7 +2212,7 @@ mod tests {
                    .get_rel_page_at_lsn(
                        TESTREL_A,
                        blkno,
-                        Version::at(Lsn(0x80)),
+                        Version::Lsn(Lsn(0x80)),
                        &ctx,
                        io_concurrency.clone()
                    )
@@ -2250,7 +2250,7 @@ mod tests {

        assert_eq!(
            tline
-                .get_rel_size(TESTREL_A, Version::at(Lsn(lsn)), &ctx)
+                .get_rel_size(TESTREL_A, Version::Lsn(Lsn(lsn)), &ctx)
                .await?,
            RELSEG_SIZE + 1
        );
@@ -2264,7 +2264,7 @@ mod tests {
        m.commit(&ctx).await?;
        assert_eq!(
            tline
-                .get_rel_size(TESTREL_A, Version::at(Lsn(lsn)), &ctx)
+                .get_rel_size(TESTREL_A, Version::Lsn(Lsn(lsn)), &ctx)
                .await?,
            RELSEG_SIZE
        );
@@ -2279,7 +2279,7 @@ mod tests {
        m.commit(&ctx).await?;
        assert_eq!(
            tline
-                .get_rel_size(TESTREL_A, Version::at(Lsn(lsn)), &ctx)
+                .get_rel_size(TESTREL_A, Version::Lsn(Lsn(lsn)), &ctx)
                .await?,
            RELSEG_SIZE - 1
        );
@@ -2297,7 +2297,7 @@ mod tests {
            m.commit(&ctx).await?;
            assert_eq!(
                tline
-                    .get_rel_size(TESTREL_A, Version::at(Lsn(lsn)), &ctx)
+                    .get_rel_size(TESTREL_A, Version::Lsn(Lsn(lsn)), &ctx)
                    .await?,
                size as BlockNumber
            );
--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -936,44 +936,6 @@ lfc_prewarm_main(Datum main_arg)
 	lfc_ctl->prewarm_workers[worker_id].completed = GetCurrentTimestamp();
 }

-void
-lfc_invalidate(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber nblocks)
-{
-	BufferTag	tag;
-	FileCacheEntry *entry;
-	uint32		hash;
-
-	if (lfc_maybe_disabled())	/* fast exit if file cache is disabled */
-		return;
-
-	CopyNRelFileInfoToBufTag(tag, rinfo);
-	tag.forkNum = forkNum;
-
-	CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
-
-	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
-	if (LFC_ENABLED())
-	{
-		for (BlockNumber blkno = 0; blkno < nblocks; blkno += lfc_blocks_per_chunk)
-		{
-			tag.blockNum = blkno;
-			hash = get_hash_value(lfc_hash, &tag);
-			entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
-			if (entry != NULL)
-			{
-				for (int i = 0; i < lfc_blocks_per_chunk; i++)
-				{
-					if (GET_STATE(entry, i) == AVAILABLE)
-					{
-						lfc_ctl->used_pages -= 1;
-						SET_STATE(entry, i, UNAVAILABLE);
-					}
-				}
-			}
-		}
-	}
-	LWLockRelease(lfc_lock);
-}

 /*
 * Check if page is present in the cache.
--- a/pgxn/neon/file_cache.h
+++ b/pgxn/neon/file_cache.h
@@ -28,7 +28,6 @@ typedef struct FileCacheState
 extern bool lfc_store_prefetch_result;

 /* functions for local file cache */
-extern void lfc_invalidate(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber nblocks);
 extern void lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum,
 					   BlockNumber blkno, const void *const *buffers,
 					   BlockNumber nblocks);
--- a/pgxn/neon/neon_pgversioncompat.h
+++ b/pgxn/neon/neon_pgversioncompat.h
@@ -86,7 +86,7 @@ InitBufferTag(BufferTag *tag, const RelFileNode *rnode,

 #define InvalidRelFileNumber InvalidOid

-#define SMgrRelGetRelInfo(reln)				\
+#define SMgrRelGetRelInfo(reln) \
 	(reln->smgr_rnode.node)

 #define DropRelationAllLocalBuffers DropRelFileNodeAllLocalBuffers
@@ -148,12 +148,6 @@ InitBufferTag(BufferTag *tag, const RelFileNode *rnode,
 #define DropRelationAllLocalBuffers DropRelationAllLocalBuffers
 #endif

-#define NRelFileInfoInvalidate(rinfo) do { \
-		NInfoGetSpcOid(rinfo) = InvalidOid; \
-		NInfoGetDbOid(rinfo) = InvalidOid; \
-		NInfoGetRelNumber(rinfo) = InvalidRelFileNumber; \
-	} while (0)
-
 #if PG_MAJORVERSION_NUM < 17
 #define ProcNumber BackendId
 #define INVALID_PROC_NUMBER InvalidBackendId
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -108,7 +108,7 @@ typedef enum
 	UNLOGGED_BUILD_NOT_PERMANENT
 } UnloggedBuildPhase;

-static NRelFileInfo unlogged_build_rel_info;
+static SMgrRelation unlogged_build_rel = NULL;
 static UnloggedBuildPhase unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;

 static bool neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id);
@@ -912,19 +912,16 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
 	{
 		case 0:
 			neon_log(ERROR, "cannot call smgrextend() on rel with unknown persistence");
-			break;

 		case RELPERSISTENCE_PERMANENT:
-			if (RelFileInfoEquals(unlogged_build_rel_info, InfoFromSMgrRel(reln)))
-			{
-				mdextend(reln, forkNum, blkno, buffer, skipFsync);
-				return;
-			}
 			break;

 		case RELPERSISTENCE_TEMP:
 		case RELPERSISTENCE_UNLOGGED:
 			mdextend(reln, forkNum, blkno, buffer, skipFsync);
+			/* Update LFC in case of unlogged index build */
+			if (reln == unlogged_build_rel && unlogged_build_phase == UNLOGGED_BUILD_PHASE_2)
+				lfc_write(InfoFromSMgrRel(reln), forkNum, blkno, buffer);
 			return;

 		default:
@@ -1006,19 +1003,21 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
 	{
 		case 0:
 			neon_log(ERROR, "cannot call smgrextend() on rel with unknown persistence");
-			break;

 		case RELPERSISTENCE_PERMANENT:
-			if (RelFileInfoEquals(unlogged_build_rel_info, InfoFromSMgrRel(reln)))
-			{
-				mdzeroextend(reln, forkNum, blocknum, nblocks, skipFsync);
-				return;
-			}
 			break;

 		case RELPERSISTENCE_TEMP:
 		case RELPERSISTENCE_UNLOGGED:
 			mdzeroextend(reln, forkNum, blocknum, nblocks, skipFsync);
+			/* Update LFC in case of unlogged index build */
+			if (reln == unlogged_build_rel && unlogged_build_phase == UNLOGGED_BUILD_PHASE_2)
+			{
+				for (int i = 0; i < nblocks; i++)
+				{
+					lfc_write(InfoFromSMgrRel(reln), forkNum, blocknum + i, buffer.data);
+				}
+			}
 			return;

 		default:
@@ -1388,14 +1387,8 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
 	{
 		case 0:
 			neon_log(ERROR, "cannot call smgrread() on rel with unknown persistence");
-			break;

 		case RELPERSISTENCE_PERMANENT:
-			if (RelFileInfoEquals(unlogged_build_rel_info, InfoFromSMgrRel(reln)))
-			{
-				mdread(reln, forkNum, blkno, buffer);
-				return;
-			}
 			break;

 		case RELPERSISTENCE_TEMP:
@@ -1481,14 +1474,8 @@ neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 	{
 		case 0:
 			neon_log(ERROR, "cannot call smgrread() on rel with unknown persistence");
-			break;

 		case RELPERSISTENCE_PERMANENT:
-			if (RelFileInfoEquals(unlogged_build_rel_info, InfoFromSMgrRel(reln)))
-			{
-				mdreadv(reln, forknum, blocknum, buffers, nblocks);
-				return;
-			}
 			break;

 		case RELPERSISTENCE_TEMP:
@@ -1621,15 +1608,6 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const vo
 			break;

 		case RELPERSISTENCE_PERMANENT:
-			if (RelFileInfoEquals(unlogged_build_rel_info, InfoFromSMgrRel(reln)))
-			{
-#if PG_MAJORVERSION_NUM >= 17
-				mdwritev(reln, forknum, blocknum, &buffer, 1, skipFsync);
-#else
-				mdwrite(reln, forknum, blocknum, buffer, skipFsync);
-#endif
-				return;
-			}
 			break;

 		case RELPERSISTENCE_TEMP:
@@ -1639,6 +1617,9 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const vo
 			#else
 			mdwrite(reln, forknum, blocknum, buffer, skipFsync);
 			#endif
+			/* Update LFC in case of unlogged index build */
+			if (reln == unlogged_build_rel && unlogged_build_phase == UNLOGGED_BUILD_PHASE_2)
+				lfc_write(InfoFromSMgrRel(reln), forknum, blocknum, buffer);
 			return;
 		default:
 			neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
@@ -1699,16 +1680,14 @@ neon_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
 			break;

 		case RELPERSISTENCE_PERMANENT:
-			if (RelFileInfoEquals(unlogged_build_rel_info, InfoFromSMgrRel(reln)))
-			{
-				mdwritev(reln, forknum, blkno, buffers, nblocks, skipFsync);
-				return;
-			}
 			break;

 		case RELPERSISTENCE_TEMP:
 		case RELPERSISTENCE_UNLOGGED:
 			mdwritev(reln, forknum, blkno, buffers, nblocks, skipFsync);
+			/* Update LFC in case of unlogged index build */
+			if (reln == unlogged_build_rel && unlogged_build_phase == UNLOGGED_BUILD_PHASE_2)
+				lfc_writev(InfoFromSMgrRel(reln), forknum, blkno, buffers, nblocks);
 			return;
 		default:
 			neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
@@ -1744,10 +1723,6 @@ neon_nblocks(SMgrRelation reln, ForkNumber forknum)
 			break;

 		case RELPERSISTENCE_PERMANENT:
-			if (RelFileInfoEquals(unlogged_build_rel_info, InfoFromSMgrRel(reln)))
-			{
-				return mdnblocks(reln, forknum);
-			}
 			break;

 		case RELPERSISTENCE_TEMP:
@@ -1817,11 +1792,6 @@ neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber old_blocks, Blo
 			break;

 		case RELPERSISTENCE_PERMANENT:
-			if (RelFileInfoEquals(unlogged_build_rel_info, InfoFromSMgrRel(reln)))
-			{
-				mdtruncate(reln, forknum, old_blocks, nblocks);
-				return;
-			}
 			break;

 		case RELPERSISTENCE_TEMP:
@@ -1960,6 +1930,7 @@ neon_start_unlogged_build(SMgrRelation reln)
 	 */
 	if (unlogged_build_phase != UNLOGGED_BUILD_NOT_IN_PROGRESS)
 		neon_log(ERROR, "unlogged relation build is already in progress");
+	Assert(unlogged_build_rel == NULL);

 	ereport(SmgrTrace,
 			(errmsg(NEON_TAG "starting unlogged build of relation %u/%u/%u",
@@ -1976,7 +1947,7 @@ neon_start_unlogged_build(SMgrRelation reln)

 		case RELPERSISTENCE_TEMP:
 		case RELPERSISTENCE_UNLOGGED:
-			unlogged_build_rel_info = InfoFromSMgrRel(reln);
+			unlogged_build_rel = reln;
 			unlogged_build_phase = UNLOGGED_BUILD_NOT_PERMANENT;
 #ifdef DEBUG_COMPARE_LOCAL
 			if (!IsParallelWorker())
@@ -1997,9 +1968,12 @@ neon_start_unlogged_build(SMgrRelation reln)
 		neon_log(ERROR, "cannot perform unlogged index build, index is not empty ");
 #endif

-	unlogged_build_rel_info = InfoFromSMgrRel(reln);
+	unlogged_build_rel = reln;
 	unlogged_build_phase = UNLOGGED_BUILD_PHASE_1;

+	/* Make the relation look like it's unlogged */
+	reln->smgr_relpersistence = RELPERSISTENCE_UNLOGGED;
+
 	/*
 	 * Create the local file. In a parallel build, the leader is expected to
 	 * call this first and do it.
@@ -2026,16 +2000,17 @@ neon_start_unlogged_build(SMgrRelation reln)
 static void
 neon_finish_unlogged_build_phase_1(SMgrRelation reln)
 {
-	Assert(RelFileInfoEquals(unlogged_build_rel_info, InfoFromSMgrRel(reln)));
+	Assert(unlogged_build_rel == reln);

 	ereport(SmgrTrace,
 			(errmsg(NEON_TAG "finishing phase 1 of unlogged build of relation %u/%u/%u",
-					RelFileInfoFmt((unlogged_build_rel_info)))));
+					RelFileInfoFmt(InfoFromSMgrRel(reln)))));

 	if (unlogged_build_phase == UNLOGGED_BUILD_NOT_PERMANENT)
 		return;

 	Assert(unlogged_build_phase == UNLOGGED_BUILD_PHASE_1);
+	Assert(reln->smgr_relpersistence == RELPERSISTENCE_UNLOGGED);

 	/*
 	 * In a parallel build, (only) the leader process performs the 2nd
@@ -2043,7 +2018,7 @@ neon_finish_unlogged_build_phase_1(SMgrRelation reln)
 	 */
 	if (IsParallelWorker())
 	{
-		NRelFileInfoInvalidate(unlogged_build_rel_info);
+		unlogged_build_rel = NULL;
 		unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;
 	}
 	else
@@ -2064,11 +2039,11 @@ neon_end_unlogged_build(SMgrRelation reln)
 {
 	NRelFileInfoBackend rinfob = InfoBFromSMgrRel(reln);

-	Assert(RelFileInfoEquals(unlogged_build_rel_info, InfoFromSMgrRel(reln)));
+	Assert(unlogged_build_rel == reln);

 	ereport(SmgrTrace,
 			(errmsg(NEON_TAG "ending unlogged build of relation %u/%u/%u",
-					RelFileInfoFmt(unlogged_build_rel_info))));
+					RelFileInfoFmt(InfoFromNInfoB(rinfob)))));

 	if (unlogged_build_phase != UNLOGGED_BUILD_NOT_PERMANENT)
 	{
@@ -2076,6 +2051,7 @@ neon_end_unlogged_build(SMgrRelation reln)
 		BlockNumber nblocks;

 		Assert(unlogged_build_phase == UNLOGGED_BUILD_PHASE_2);
+		Assert(reln->smgr_relpersistence == RELPERSISTENCE_UNLOGGED);

 		/*
 		 * Update the last-written LSN cache.
@@ -2096,6 +2072,9 @@ neon_end_unlogged_build(SMgrRelation reln)
 								InfoFromNInfoB(rinfob),
 								MAIN_FORKNUM);

+		/* Make the relation look permanent again */
+		reln->smgr_relpersistence = RELPERSISTENCE_PERMANENT;
+
 		/* Remove local copy */
 		for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
 		{
@@ -2104,8 +2083,6 @@ neon_end_unlogged_build(SMgrRelation reln)
 				 forknum);

 			forget_cached_relsize(InfoFromNInfoB(rinfob), forknum);
-			lfc_invalidate(InfoFromNInfoB(rinfob), forknum, nblocks);
-
 			mdclose(reln, forknum);
 #ifndef DEBUG_COMPARE_LOCAL
 			/* use isRedo == true, so that we drop it immediately */
@@ -2116,7 +2093,7 @@ neon_end_unlogged_build(SMgrRelation reln)
 		mdunlink(rinfob, INIT_FORKNUM, true);
 #endif
 	}
-	NRelFileInfoInvalidate(unlogged_build_rel_info);
+	unlogged_build_rel = NULL;
 	unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;
 }

@@ -2189,7 +2166,7 @@ AtEOXact_neon(XactEvent event, void *arg)
 			 * Forget about any build we might have had in progress. The local
 			 * file will be unlinked by smgrDoPendingDeletes()
 			 */
-			NRelFileInfoInvalidate(unlogged_build_rel_info);
+			unlogged_build_rel = NULL;
 			unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;
 			break;

@@ -2201,7 +2178,7 @@ AtEOXact_neon(XactEvent event, void *arg)
 		case XACT_EVENT_PRE_PREPARE:
 			if (unlogged_build_phase != UNLOGGED_BUILD_NOT_IN_PROGRESS)
 			{
-				NRelFileInfoInvalidate(unlogged_build_rel_info);
+				unlogged_build_rel = NULL;
 				unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;
 				ereport(ERROR,
 						(errcode(ERRCODE_INTERNAL_ERROR),
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand.

 [[package]]
 name = "aiohappyeyeballs"
@@ -1145,19 +1145,18 @@ dotenv = ["python-dotenv"]

 [[package]]
 name = "flask-cors"
-version = "6.0.0"
-description = "A Flask extension simplifying CORS support"
+version = "5.0.0"
+description = "A Flask extension adding a decorator for CORS support"
 optional = false
-python-versions = "<4.0,>=3.9"
+python-versions = "*"
 groups = ["main"]
 files = [
-    {file = "flask_cors-6.0.0-py3-none-any.whl", hash = "sha256:6332073356452343a8ccddbfec7befdc3fdd040141fe776ec9b94c262f058657"},
-    {file = "flask_cors-6.0.0.tar.gz", hash = "sha256:4592c1570246bf7beee96b74bc0adbbfcb1b0318f6ba05c412e8909eceec3393"},
+    {file = "Flask_Cors-5.0.0-py2.py3-none-any.whl", hash = "sha256:b9e307d082a9261c100d8fb0ba909eec6a228ed1b60a8315fd85f783d61910bc"},
+    {file = "flask_cors-5.0.0.tar.gz", hash = "sha256:5aadb4b950c4e93745034594d9f3ea6591f734bb3662e16e255ffbf5e89c88ef"},
 ]

 [package.dependencies]
-flask = ">=0.9"
-Werkzeug = ">=0.7"
+Flask = ">=0.9"

 [[package]]
 name = "frozenlist"
@@ -3170,24 +3169,19 @@ pbr = "*"

 [[package]]
 name = "setuptools"
-version = "78.1.1"
+version = "70.0.0"
 description = "Easily download, build, install, upgrade, and uninstall Python packages"
 optional = false
-python-versions = ">=3.9"
+python-versions = ">=3.8"
 groups = ["main"]
 files = [
-    {file = "setuptools-78.1.1-py3-none-any.whl", hash = "sha256:c3a9c4211ff4c309edb8b8c4f1cbfa7ae324c4ba9f91ff254e3d305b9fd54561"},
-    {file = "setuptools-78.1.1.tar.gz", hash = "sha256:fcc17fd9cd898242f6b4adfaca46137a9edef687f43e6f78469692a5e70d851d"},
+    {file = "setuptools-70.0.0-py3-none-any.whl", hash = "sha256:54faa7f2e8d2d11bcd2c07bed282eef1046b5c080d1c32add737d7b5817b1ad4"},
+    {file = "setuptools-70.0.0.tar.gz", hash = "sha256:f211a66637b8fa059bb28183da127d4e86396c991a942b028c6650d4319c3fd0"},
 ]

 [package.extras]
-check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "ruff (>=0.8.0) ; sys_platform != \"cygwin\""]
-core = ["importlib_metadata (>=6) ; python_version < \"3.10\"", "jaraco.functools (>=4)", "jaraco.text (>=3.7)", "more_itertools", "more_itertools (>=8.8)", "packaging (>=24.2)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1) ; python_version < \"3.11\"", "wheel (>=0.43.0)"]
-cover = ["pytest-cov"]
-doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"]
-enabler = ["pytest-enabler (>=2.2)"]
-test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.7.2)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"]
-type = ["importlib_metadata (>=7.0.2) ; python_version < \"3.10\"", "jaraco.develop (>=7.21) ; sys_platform != \"cygwin\"", "mypy (==1.14.*)", "pytest-mypy"]
+docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
+testing = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov ; platform_python_implementation != \"PyPy\"", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]

 [[package]]
 name = "six"
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -127,4 +127,3 @@ rstest.workspace = true
 walkdir.workspace = true
 rand_distr = "0.4"
 tokio-postgres.workspace = true
-tracing-test = "0.2"
--- a/proxy/src/auth/backend/mod.rs
+++ b/proxy/src/auth/backend/mod.rs
@@ -80,22 +80,10 @@ impl std::fmt::Display for Backend<'_, ()> {
                    .field(&endpoint.url())
                    .finish(),
                #[cfg(any(test, feature = "testing"))]
-                ControlPlaneClient::PostgresMock(endpoint) => {
-                    let url = endpoint.url();
-                    match url::Url::parse(url) {
-                        Ok(mut url) => {
-                            let _ = url.set_password(Some("_redacted_"));
-                            let url = url.as_str();
-                            fmt.debug_tuple("ControlPlane::PostgresMock")
-                                .field(&url)
-                                .finish()
-                        }
-                        Err(_) => fmt
-                            .debug_tuple("ControlPlane::PostgresMock")
-                            .field(&url)
-                            .finish(),
-                    }
-                }
+                ControlPlaneClient::PostgresMock(endpoint) => fmt
+                    .debug_tuple("ControlPlane::PostgresMock")
+                    .field(&endpoint.url())
+                    .finish(),
                #[cfg(test)]
                ControlPlaneClient::Test(_) => fmt.debug_tuple("ControlPlane::Test").finish(),
            },
--- a/proxy/src/binary/proxy.rs
+++ b/proxy/src/binary/proxy.rs
@@ -1,13 +1,9 @@
-#[cfg(any(test, feature = "testing"))]
-use std::env;
 use std::net::SocketAddr;
 use std::path::PathBuf;
 use std::pin::pin;
 use std::sync::Arc;
 use std::time::Duration;

-#[cfg(any(test, feature = "testing"))]
-use anyhow::Context;
 use anyhow::{bail, ensure};
 use arc_swap::ArcSwapOption;
 use futures::future::Either;
@@ -39,8 +35,6 @@ use crate::scram::threadpool::ThreadPool;
 use crate::serverless::GlobalConnPoolOptions;
 use crate::serverless::cancel_set::CancelSet;
 use crate::tls::client_config::compute_client_config_with_root_certs;
-#[cfg(any(test, feature = "testing"))]
-use crate::url::ApiUrl;
 use crate::{auth, control_plane, http, serverless, usage_metrics};

 project_git_version!(GIT_VERSION);
@@ -167,11 +161,8 @@ struct ProxyCliArgs {
    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_REDIS_SET)]
    redis_rps_limit: Vec<RateBucketInfo>,
    /// Cancellation channel size (max queue size for redis kv client)
-    #[clap(long, default_value_t = 1024)]
+    #[clap(long, default_value = "1024")]
    cancellation_ch_size: usize,
-    /// Cancellation ops batch size for redis
-    #[clap(long, default_value_t = 8)]
-    cancellation_batch_size: usize,
    /// cache for `allowed_ips` (use `size=0` to disable)
    #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
    allowed_ips_cache: String,
@@ -551,12 +542,7 @@ pub async fn run() -> anyhow::Result<()> {
            if let Some(mut redis_kv_client) = redis_kv_client {
                maintenance_tasks.spawn(async move {
                    redis_kv_client.try_connect().await?;
-                    handle_cancel_messages(
-                        &mut redis_kv_client,
-                        rx_cancel,
-                        args.cancellation_batch_size,
-                    )
-                    .await?;
+                    handle_cancel_messages(&mut redis_kv_client, rx_cancel).await?;

                    drop(redis_kv_client);

@@ -783,13 +769,7 @@ fn build_auth_backend(

        #[cfg(any(test, feature = "testing"))]
        AuthBackendType::Postgres => {
-            let mut url: ApiUrl = args.auth_endpoint.parse()?;
-            if url.password().is_none() {
-                let password = env::var("PGPASSWORD")
-                    .with_context(|| "auth-endpoint does not contain a password and environment variable `PGPASSWORD` is not set")?;
-                url.set_password(Some(&password))
-                    .expect("Failed to set password");
-            }
+            let url = args.auth_endpoint.parse()?;
            let api = control_plane::client::mock::MockControlPlane::new(
                url,
                !args.is_private_access_proxy,
--- a/proxy/src/cancellation.rs
+++ b/proxy/src/cancellation.rs
@@ -30,6 +30,8 @@ use crate::tls::postgres_rustls::MakeRustlsConnect;
 type IpSubnetKey = IpNet;

 const CANCEL_KEY_TTL: i64 = 1_209_600; // 2 weeks cancellation key expire time
+const REDIS_SEND_TIMEOUT: std::time::Duration = std::time::Duration::from_millis(10);
+const BATCH_SIZE: usize = 8;

 // Message types for sending through mpsc channel
 pub enum CancelKeyOp {
@@ -229,13 +231,12 @@ impl CancelReplyOp {
 pub async fn handle_cancel_messages(
    client: &mut RedisKVClient,
    mut rx: mpsc::Receiver<CancelKeyOp>,
-    batch_size: usize,
 ) -> anyhow::Result<()> {
-    let mut batch = Vec::with_capacity(batch_size);
-    let mut pipeline = Pipeline::with_capacity(batch_size);
+    let mut batch = Vec::with_capacity(BATCH_SIZE);
+    let mut pipeline = Pipeline::with_capacity(BATCH_SIZE);

    loop {
-        if rx.recv_many(&mut batch, batch_size).await == 0 {
+        if rx.recv_many(&mut batch, BATCH_SIZE).await == 0 {
            warn!("shutting down cancellation queue");
            break Ok(());
        }
@@ -366,7 +367,8 @@ impl CancellationHandler {
            return Err(CancelError::InternalError);
        };

-        tx.try_send(op)
+        tx.send_timeout(op, REDIS_SEND_TIMEOUT)
+            .await
            .map_err(|e| {
                tracing::warn!("failed to send GetCancelData for {key}: {e}");
            })
@@ -568,7 +570,7 @@ impl Session {
    }

    // Send the store key op to the cancellation handler and set TTL for the key
-    pub(crate) fn write_cancel_key(
+    pub(crate) async fn write_cancel_key(
        &self,
        cancel_closure: CancelClosure,
    ) -> Result<(), CancelError> {
@@ -594,14 +596,14 @@ impl Session {
            expire: CANCEL_KEY_TTL,
        };

-        let _ = tx.try_send(op).map_err(|e| {
+        let _ = tx.send_timeout(op, REDIS_SEND_TIMEOUT).await.map_err(|e| {
            let key = self.key;
            tracing::warn!("failed to send StoreCancelKey for {key}: {e}");
        });
        Ok(())
    }

-    pub(crate) fn remove_cancel_key(&self) -> Result<(), CancelError> {
+    pub(crate) async fn remove_cancel_key(&self) -> Result<(), CancelError> {
        let Some(tx) = &self.cancellation_handler.tx else {
            tracing::warn!("cancellation handler is not available");
            return Err(CancelError::InternalError);
@@ -617,7 +619,7 @@ impl Session {
                .guard(RedisMsgKind::HDel),
        };

-        let _ = tx.try_send(op).map_err(|e| {
+        let _ = tx.send_timeout(op, REDIS_SEND_TIMEOUT).await.map_err(|e| {
            let key = self.key;
            tracing::warn!("failed to send RemoveCancelKey for {key}: {e}");
        });
--- a/proxy/src/console_redirect_proxy.rs
+++ b/proxy/src/console_redirect_proxy.rs
@@ -244,7 +244,9 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
    let cancellation_handler_clone = Arc::clone(&cancellation_handler);
    let session = cancellation_handler_clone.get_key();

-    session.write_cancel_key(node.cancel_closure.clone())?;
+    session
+        .write_cancel_key(node.cancel_closure.clone())
+        .await?;

    prepare_client_connection(&node, *session.key(), &mut stream).await?;

--- a/proxy/src/logging.rs
+++ b/proxy/src/logging.rs
@@ -1,11 +1,13 @@
-use std::cell::RefCell;
+use std::cell::{Cell, RefCell};
 use std::collections::HashMap;
-use std::sync::Arc;
+use std::hash::BuildHasher;
 use std::sync::atomic::{AtomicU32, Ordering};
-use std::{env, io};
+use std::{array, env, fmt, io};

 use chrono::{DateTime, Utc};
+use indexmap::IndexSet;
 use opentelemetry::trace::TraceContextExt;
+use scopeguard::defer;
 use serde::ser::{SerializeMap, Serializer};
 use tracing::subscriber::Interest;
 use tracing::{Event, Metadata, Span, Subscriber, callsite, span};
@@ -17,6 +19,7 @@ use tracing_subscriber::fmt::{FormatEvent, FormatFields};
 use tracing_subscriber::layer::{Context, Layer};
 use tracing_subscriber::prelude::*;
 use tracing_subscriber::registry::{LookupSpan, SpanRef};
+use try_lock::TryLock;

 /// Initialize logging and OpenTelemetry tracing and exporter.
 ///
@@ -52,7 +55,7 @@ pub async fn init() -> anyhow::Result<LoggingGuard> {
            StderrWriter {
                stderr: std::io::stderr(),
            },
-            &["request_id", "session_id", "conn_id"],
+            ["request_id", "session_id", "conn_id"],
        ))
    } else {
        None
@@ -180,65 +183,50 @@ impl Clock for RealClock {
 /// Name of the field used by tracing crate to store the event message.
 const MESSAGE_FIELD: &str = "message";

-/// Tracing used to enforce that spans/events have no more than 32 fields.
-/// It seems this is no longer the case, but it's still documented in some places.
-/// Generally, we shouldn't expect more than 32 fields anyway, so we can try and
-/// rely on it for some (minor) performance gains.
-const MAX_TRACING_FIELDS: usize = 32;
-
 thread_local! {
+    /// Protects against deadlocks and double panics during log writing.
+    /// The current panic handler will use tracing to log panic information.
+    static REENTRANCY_GUARD: Cell<bool> = const { Cell::new(false) };
    /// Thread-local instance with per-thread buffer for log writing.
-    static EVENT_FORMATTER: RefCell<EventFormatter> = const { RefCell::new(EventFormatter::new()) };
+    static EVENT_FORMATTER: RefCell<EventFormatter> = RefCell::new(EventFormatter::new());
    /// Cached OS thread ID.
    static THREAD_ID: u64 = gettid::gettid();
 }

-/// Map for values fixed at callsite registration.
-// We use papaya here because registration rarely happens post-startup.
-// papaya is good for read-heavy workloads.
-//
-// We use rustc_hash here because callsite::Identifier will always be an integer with low-bit entropy,
-// since it's always a pointer to static mutable data. rustc_hash was designed for low-bit entropy.
-type CallsiteMap<T> =
-    papaya::HashMap<callsite::Identifier, T, std::hash::BuildHasherDefault<rustc_hash::FxHasher>>;
-
 /// Implements tracing layer to handle events specific to logging.
-struct JsonLoggingLayer<C: Clock, W: MakeWriter> {
+struct JsonLoggingLayer<C: Clock, W: MakeWriter, const F: usize> {
    clock: C,
+    skipped_field_indices: papaya::HashMap<callsite::Identifier, SkippedFieldIndices>,
+    callsite_ids: papaya::HashMap<callsite::Identifier, CallsiteId>,
    writer: W,
-
-    /// tracks which fields of each **event** are duplicates
-    skipped_field_indices: CallsiteMap<SkippedFieldIndices>,
-
-    span_info: CallsiteMap<CallsiteSpanInfo>,
-
-    /// Fields we want to keep track of in a separate json object.
-    extract_fields: &'static [&'static str],
+    // We use a const generic and arrays to bypass one heap allocation.
+    extract_fields: IndexSet<&'static str>,
+    _marker: std::marker::PhantomData<[&'static str; F]>,
 }

-impl<C: Clock, W: MakeWriter> JsonLoggingLayer<C, W> {
-    fn new(clock: C, writer: W, extract_fields: &'static [&'static str]) -> Self {
+impl<C: Clock, W: MakeWriter, const F: usize> JsonLoggingLayer<C, W, F> {
+    fn new(clock: C, writer: W, extract_fields: [&'static str; F]) -> Self {
        JsonLoggingLayer {
            clock,
-            skipped_field_indices: CallsiteMap::default(),
-            span_info: CallsiteMap::default(),
+            skipped_field_indices: papaya::HashMap::default(),
+            callsite_ids: papaya::HashMap::default(),
            writer,
-            extract_fields,
+            extract_fields: IndexSet::from_iter(extract_fields),
+            _marker: std::marker::PhantomData,
        }
    }

    #[inline]
-    fn span_info(&self, metadata: &'static Metadata<'static>) -> CallsiteSpanInfo {
-        self.span_info
+    fn callsite_id(&self, cs: callsite::Identifier) -> CallsiteId {
+        *self
+            .callsite_ids
            .pin()
-            .get_or_insert_with(metadata.callsite(), || {
-                CallsiteSpanInfo::new(metadata, self.extract_fields)
-            })
-            .clone()
+            .get_or_insert_with(cs, CallsiteId::next)
    }
 }

-impl<S, C: Clock + 'static, W: MakeWriter + 'static> Layer<S> for JsonLoggingLayer<C, W>
+impl<S, C: Clock + 'static, W: MakeWriter + 'static, const F: usize> Layer<S>
+    for JsonLoggingLayer<C, W, F>
 where
    S: Subscriber + for<'a> LookupSpan<'a>,
 {
@@ -249,25 +237,35 @@ where
        //       early, before OTel machinery, and add as event extension.
        let now = self.clock.now();

-        let res: io::Result<()> = EVENT_FORMATTER.with(|f| {
-            let mut borrow = f.try_borrow_mut();
-            let formatter = match borrow.as_deref_mut() {
-                Ok(formatter) => formatter,
-                // If the thread local formatter is borrowed,
-                // then we likely hit an edge case were we panicked during formatting.
-                // We allow the logging to proceed with an uncached formatter.
-                Err(_) => &mut EventFormatter::new(),
-            };
+        let res: io::Result<()> = REENTRANCY_GUARD.with(move |entered| {
+            if entered.get() {
+                let mut formatter = EventFormatter::new();
+                formatter.format::<S, F>(
+                    now,
+                    event,
+                    &ctx,
+                    &self.skipped_field_indices,
+                    &self.callsite_ids,
+                    &self.extract_fields,
+                )?;
+                self.writer.make_writer().write_all(formatter.buffer())
+            } else {
+                entered.set(true);
+                defer!(entered.set(false););

-            formatter.reset();
-            formatter.format(
-                now,
-                event,
-                &ctx,
-                &self.skipped_field_indices,
-                self.extract_fields,
-            )?;
-            self.writer.make_writer().write_all(formatter.buffer())
+                EVENT_FORMATTER.with_borrow_mut(move |formatter| {
+                    formatter.reset();
+                    formatter.format::<S, F>(
+                        now,
+                        event,
+                        &ctx,
+                        &self.skipped_field_indices,
+                        &self.callsite_ids,
+                        &self.extract_fields,
+                    )?;
+                    self.writer.make_writer().write_all(formatter.buffer())
+                })
+            }
        });

        // In case logging fails we generate a simpler JSON object.
@@ -289,48 +287,50 @@ where
    /// Registers a SpanFields instance as span extension.
    fn on_new_span(&self, attrs: &span::Attributes<'_>, id: &span::Id, ctx: Context<'_, S>) {
        let span = ctx.span(id).expect("span must exist");
+        let fields = SpanFields::default();
+        fields.record_fields(attrs);

-        let mut fields = SpanFields::new(self.span_info(span.metadata()));
-        attrs.record(&mut fields);
+        // This could deadlock when there's a panic somewhere in the tracing
+        // event handling and a read or write guard is still held. This includes
+        // the OTel subscriber.
+        let mut exts = span.extensions_mut();

-        // This is a new span: the extensions should not be locked
-        // unless some layer spawned a thread to process this span.
-        // I don't think any layers do that.
-        span.extensions_mut().insert(fields);
+        exts.insert(fields);
    }

    fn on_record(&self, id: &span::Id, values: &span::Record<'_>, ctx: Context<'_, S>) {
        let span = ctx.span(id).expect("span must exist");
-
-        // assumption: `on_record` is rarely called.
-        // assumption: a span being updated by one thread,
-        //             and formatted by another thread is even rarer.
-        let mut ext = span.extensions_mut();
-        if let Some(fields) = ext.get_mut::<SpanFields>() {
-            values.record(fields);
+        let ext = span.extensions();
+        if let Some(data) = ext.get::<SpanFields>() {
+            data.record_fields(values);
        }
    }

-    /// Called (lazily) roughly once per event/span instance. We quickly check
-    /// for duplicate field names and record duplicates as skippable. Last field wins.
+    /// Called (lazily) whenever a new log call is executed. We quickly check
+    /// for duplicate field names and record duplicates as skippable. Last one
+    /// wins.
    fn register_callsite(&self, metadata: &'static Metadata<'static>) -> Interest {
-        debug_assert!(
-            metadata.fields().len() <= MAX_TRACING_FIELDS,
-            "callsite {metadata:?} has too many fields."
-        );
-
        if !metadata.is_event() {
-            // register the span info.
-            self.span_info(metadata);
+            self.callsite_id(metadata.callsite());
            // Must not be never because we wouldn't get trace and span data.
            return Interest::always();
        }

        let mut field_indices = SkippedFieldIndices::default();
-        let mut seen_fields = HashMap::new();
+        let mut seen_fields = HashMap::<&'static str, usize>::new();
        for field in metadata.fields() {
-            if let Some(old_index) = seen_fields.insert(field.name(), field.index()) {
-                field_indices.set(old_index);
+            use std::collections::hash_map::Entry;
+            match seen_fields.entry(field.name()) {
+                Entry::Vacant(entry) => {
+                    // field not seen yet
+                    entry.insert(field.index());
+                }
+                Entry::Occupied(mut entry) => {
+                    // replace currently stored index
+                    let old_index = entry.insert(field.index());
+                    // ... and append it to list of skippable indices
+                    field_indices.push(old_index);
+                }
            }
        }

@@ -344,113 +344,110 @@ where
    }
 }

-/// Any span info that is fixed to a particular callsite. Not variable between span instances.
-#[derive(Clone)]
-struct CallsiteSpanInfo {
-    /// index of each field to extract. usize::MAX if not found.
-    extract: Arc<[usize]>,
+#[derive(Copy, Clone, Debug, Default)]
+#[repr(transparent)]
+struct CallsiteId(u32);

-    /// tracks the fixed "callsite ID" for each span.
-    /// note: this is not stable between runs.
-    normalized_name: Arc<str>,
-}
-
-impl CallsiteSpanInfo {
-    fn new(metadata: &'static Metadata<'static>, extract_fields: &[&'static str]) -> Self {
+impl CallsiteId {
+    #[inline]
+    fn next() -> Self {
        // Start at 1 to reserve 0 for default.
        static COUNTER: AtomicU32 = AtomicU32::new(1);
+        CallsiteId(COUNTER.fetch_add(1, Ordering::Relaxed))
+    }
+}

-        let names: Vec<&'static str> = metadata.fields().iter().map(|f| f.name()).collect();
-
-        // get all the indices of span fields we want to focus
-        let extract = extract_fields
-            .iter()
-            // use rposition, since we want last match wins.
-            .map(|f1| names.iter().rposition(|f2| f1 == f2).unwrap_or(usize::MAX))
-            .collect();
-
-        // normalized_name is unique for each callsite, but it is not
-        // unified across separate proxy instances.
-        // todo: can we do better here?
-        let cid = COUNTER.fetch_add(1, Ordering::Relaxed);
-        let normalized_name = format!("{}#{cid}", metadata.name()).into();
-
-        Self {
-            extract,
-            normalized_name,
-        }
+impl fmt::Display for CallsiteId {
+    #[inline]
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        self.0.fmt(f)
    }
 }

 /// Stores span field values recorded during the spans lifetime.
+#[derive(Default)]
 struct SpanFields {
-    values: [serde_json::Value; MAX_TRACING_FIELDS],
-
-    /// cached span info so we can avoid extra hashmap lookups in the hot path.
-    span_info: CallsiteSpanInfo,
+    // TODO: Switch to custom enum with lasso::Spur for Strings?
+    fields: papaya::HashMap<&'static str, serde_json::Value>,
 }

 impl SpanFields {
-    fn new(span_info: CallsiteSpanInfo) -> Self {
-        Self {
-            span_info,
-            values: [const { serde_json::Value::Null }; MAX_TRACING_FIELDS],
-        }
+    #[inline]
+    fn record_fields<R: tracing_subscriber::field::RecordFields>(&self, fields: R) {
+        fields.record(&mut SpanFieldsRecorder {
+            fields: self.fields.pin(),
+        });
    }
 }

-impl tracing::field::Visit for SpanFields {
+/// Implements a tracing field visitor to convert and store values.
+struct SpanFieldsRecorder<'m, S, G> {
+    fields: papaya::HashMapRef<'m, &'static str, serde_json::Value, S, G>,
+}
+
+impl<S: BuildHasher, G: papaya::Guard> tracing::field::Visit for SpanFieldsRecorder<'_, S, G> {
    #[inline]
    fn record_f64(&mut self, field: &tracing::field::Field, value: f64) {
-        self.values[field.index()] = serde_json::Value::from(value);
+        self.fields
+            .insert(field.name(), serde_json::Value::from(value));
    }

    #[inline]
    fn record_i64(&mut self, field: &tracing::field::Field, value: i64) {
-        self.values[field.index()] = serde_json::Value::from(value);
+        self.fields
+            .insert(field.name(), serde_json::Value::from(value));
    }

    #[inline]
    fn record_u64(&mut self, field: &tracing::field::Field, value: u64) {
-        self.values[field.index()] = serde_json::Value::from(value);
+        self.fields
+            .insert(field.name(), serde_json::Value::from(value));
    }

    #[inline]
    fn record_i128(&mut self, field: &tracing::field::Field, value: i128) {
        if let Ok(value) = i64::try_from(value) {
-            self.values[field.index()] = serde_json::Value::from(value);
+            self.fields
+                .insert(field.name(), serde_json::Value::from(value));
        } else {
-            self.values[field.index()] = serde_json::Value::from(format!("{value}"));
+            self.fields
+                .insert(field.name(), serde_json::Value::from(format!("{value}")));
        }
    }

    #[inline]
    fn record_u128(&mut self, field: &tracing::field::Field, value: u128) {
        if let Ok(value) = u64::try_from(value) {
-            self.values[field.index()] = serde_json::Value::from(value);
+            self.fields
+                .insert(field.name(), serde_json::Value::from(value));
        } else {
-            self.values[field.index()] = serde_json::Value::from(format!("{value}"));
+            self.fields
+                .insert(field.name(), serde_json::Value::from(format!("{value}")));
        }
    }

    #[inline]
    fn record_bool(&mut self, field: &tracing::field::Field, value: bool) {
-        self.values[field.index()] = serde_json::Value::from(value);
+        self.fields
+            .insert(field.name(), serde_json::Value::from(value));
    }

    #[inline]
    fn record_bytes(&mut self, field: &tracing::field::Field, value: &[u8]) {
-        self.values[field.index()] = serde_json::Value::from(value);
+        self.fields
+            .insert(field.name(), serde_json::Value::from(value));
    }

    #[inline]
    fn record_str(&mut self, field: &tracing::field::Field, value: &str) {
-        self.values[field.index()] = serde_json::Value::from(value);
+        self.fields
+            .insert(field.name(), serde_json::Value::from(value));
    }

    #[inline]
    fn record_debug(&mut self, field: &tracing::field::Field, value: &dyn std::fmt::Debug) {
-        self.values[field.index()] = serde_json::Value::from(format!("{value:?}"));
+        self.fields
+            .insert(field.name(), serde_json::Value::from(format!("{value:?}")));
    }

    #[inline]
@@ -459,33 +456,38 @@ impl tracing::field::Visit for SpanFields {
        field: &tracing::field::Field,
        value: &(dyn std::error::Error + 'static),
    ) {
-        self.values[field.index()] = serde_json::Value::from(format!("{value}"));
+        self.fields
+            .insert(field.name(), serde_json::Value::from(format!("{value}")));
    }
 }

 /// List of field indices skipped during logging. Can list duplicate fields or
 /// metafields not meant to be logged.
-#[derive(Copy, Clone, Default)]
+#[derive(Clone, Default)]
 struct SkippedFieldIndices {
-    // 32-bits is large enough for `MAX_TRACING_FIELDS`
-    bits: u32,
+    bits: u64,
 }

 impl SkippedFieldIndices {
    #[inline]
-    fn is_empty(self) -> bool {
+    fn is_empty(&self) -> bool {
        self.bits == 0
    }

    #[inline]
-    fn set(&mut self, index: usize) {
-        debug_assert!(index <= 32, "index out of bounds of 32-bit set");
-        self.bits |= 1 << index;
+    fn push(&mut self, index: usize) {
+        self.bits |= 1u64
+            .checked_shl(index as u32)
+            .expect("field index too large");
    }

    #[inline]
-    fn contains(self, index: usize) -> bool {
-        self.bits & (1 << index) != 0
+    fn contains(&self, index: usize) -> bool {
+        self.bits
+            & 1u64
+                .checked_shl(index as u32)
+                .expect("field index too large")
+            != 0
    }
 }

@@ -497,7 +499,7 @@ struct EventFormatter {

 impl EventFormatter {
    #[inline]
-    const fn new() -> Self {
+    fn new() -> Self {
        EventFormatter {
            logline_buffer: Vec::new(),
        }
@@ -513,13 +515,14 @@ impl EventFormatter {
        self.logline_buffer.clear();
    }

-    fn format<S>(
+    fn format<S, const F: usize>(
        &mut self,
        now: DateTime<Utc>,
        event: &Event<'_>,
        ctx: &Context<'_, S>,
-        skipped_field_indices: &CallsiteMap<SkippedFieldIndices>,
-        extract_fields: &'static [&'static str],
+        skipped_field_indices: &papaya::HashMap<callsite::Identifier, SkippedFieldIndices>,
+        callsite_ids: &papaya::HashMap<callsite::Identifier, CallsiteId>,
+        extract_fields: &IndexSet<&'static str>,
    ) -> io::Result<()>
    where
        S: Subscriber + for<'a> LookupSpan<'a>,
@@ -530,11 +533,8 @@ impl EventFormatter {
        let normalized_meta = event.normalized_metadata();
        let meta = normalized_meta.as_ref().unwrap_or_else(|| event.metadata());

-        let skipped_field_indices = skipped_field_indices
-            .pin()
-            .get(&meta.callsite())
-            .copied()
-            .unwrap_or_default();
+        let skipped_field_indices = skipped_field_indices.pin();
+        let skipped_field_indices = skipped_field_indices.get(&meta.callsite());

        let mut serialize = || {
            let mut serializer = serde_json::Serializer::new(&mut self.logline_buffer);
@@ -565,11 +565,9 @@ impl EventFormatter {
            }

            let spans = SerializableSpans {
-                // collect all spans from parent to root.
-                spans: ctx
-                    .event_span(event)
-                    .map_or(vec![], |parent| parent.scope().collect()),
-                extracted: ExtractedSpanFields::new(extract_fields),
+                ctx,
+                callsite_ids,
+                extract: ExtractedSpanFields::<'_, F>::new(extract_fields),
            };
            serializer.serialize_entry("spans", &spans)?;

@@ -622,9 +620,9 @@ impl EventFormatter {
                }
            }

-            if spans.extracted.has_values() {
+            if spans.extract.has_values() {
                // TODO: add fields from event, too?
-                serializer.serialize_entry("extract", &spans.extracted)?;
+                serializer.serialize_entry("extract", &spans.extract)?;
            }

            serializer.end()
@@ -637,15 +635,15 @@ impl EventFormatter {
 }

 /// Extracts the message field that's mixed will other fields.
-struct MessageFieldExtractor<S: serde::ser::SerializeMap> {
+struct MessageFieldExtractor<'a, S: serde::ser::SerializeMap> {
    serializer: S,
-    skipped_field_indices: SkippedFieldIndices,
+    skipped_field_indices: Option<&'a SkippedFieldIndices>,
    state: Option<Result<(), S::Error>>,
 }

-impl<S: serde::ser::SerializeMap> MessageFieldExtractor<S> {
+impl<'a, S: serde::ser::SerializeMap> MessageFieldExtractor<'a, S> {
    #[inline]
-    fn new(serializer: S, skipped_field_indices: SkippedFieldIndices) -> Self {
+    fn new(serializer: S, skipped_field_indices: Option<&'a SkippedFieldIndices>) -> Self {
        Self {
            serializer,
            skipped_field_indices,
@@ -667,11 +665,13 @@ impl<S: serde::ser::SerializeMap> MessageFieldExtractor<S> {
    fn accept_field(&self, field: &tracing::field::Field) -> bool {
        self.state.is_none()
            && field.name() == MESSAGE_FIELD
-            && !self.skipped_field_indices.contains(field.index())
+            && !self
+                .skipped_field_indices
+                .is_some_and(|i| i.contains(field.index()))
    }
 }

-impl<S: serde::ser::SerializeMap> tracing::field::Visit for MessageFieldExtractor<S> {
+impl<S: serde::ser::SerializeMap> tracing::field::Visit for MessageFieldExtractor<'_, S> {
    #[inline]
    fn record_f64(&mut self, field: &tracing::field::Field, value: f64) {
        if self.accept_field(field) {
@@ -751,14 +751,14 @@ impl<S: serde::ser::SerializeMap> tracing::field::Visit for MessageFieldExtracto
 /// can be skipped.
 // This is entirely optional and only cosmetic, though maybe helps a
 // bit during log parsing in dashboards when there's no field with empty object.
-struct FieldsPresent(pub bool, SkippedFieldIndices);
+struct FieldsPresent<'a>(pub bool, Option<&'a SkippedFieldIndices>);

 // Even though some methods have an overhead (error, bytes) it is assumed the
 // compiler won't include this since we ignore the value entirely.
-impl tracing::field::Visit for FieldsPresent {
+impl tracing::field::Visit for FieldsPresent<'_> {
    #[inline]
    fn record_debug(&mut self, field: &tracing::field::Field, _: &dyn std::fmt::Debug) {
-        if !self.1.contains(field.index())
+        if !self.1.is_some_and(|i| i.contains(field.index()))
            && field.name() != MESSAGE_FIELD
            && !field.name().starts_with("log.")
        {
@@ -768,7 +768,10 @@ impl tracing::field::Visit for FieldsPresent {
 }

 /// Serializes the fields directly supplied with a log event.
-struct SerializableEventFields<'a, 'event>(&'a tracing::Event<'event>, SkippedFieldIndices);
+struct SerializableEventFields<'a, 'event>(
+    &'a tracing::Event<'event>,
+    Option<&'a SkippedFieldIndices>,
+);

 impl serde::ser::Serialize for SerializableEventFields<'_, '_> {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
@@ -785,15 +788,15 @@ impl serde::ser::Serialize for SerializableEventFields<'_, '_> {
 }

 /// A tracing field visitor that skips the message field.
-struct MessageFieldSkipper<S: serde::ser::SerializeMap> {
+struct MessageFieldSkipper<'a, S: serde::ser::SerializeMap> {
    serializer: S,
-    skipped_field_indices: SkippedFieldIndices,
+    skipped_field_indices: Option<&'a SkippedFieldIndices>,
    state: Result<(), S::Error>,
 }

-impl<S: serde::ser::SerializeMap> MessageFieldSkipper<S> {
+impl<'a, S: serde::ser::SerializeMap> MessageFieldSkipper<'a, S> {
    #[inline]
-    fn new(serializer: S, skipped_field_indices: SkippedFieldIndices) -> Self {
+    fn new(serializer: S, skipped_field_indices: Option<&'a SkippedFieldIndices>) -> Self {
        Self {
            serializer,
            skipped_field_indices,
@@ -806,7 +809,9 @@ impl<S: serde::ser::SerializeMap> MessageFieldSkipper<S> {
        self.state.is_ok()
            && field.name() != MESSAGE_FIELD
            && !field.name().starts_with("log.")
-            && !self.skipped_field_indices.contains(field.index())
+            && !self
+                .skipped_field_indices
+                .is_some_and(|i| i.contains(field.index()))
    }

    #[inline]
@@ -816,7 +821,7 @@ impl<S: serde::ser::SerializeMap> MessageFieldSkipper<S> {
    }
 }

-impl<S: serde::ser::SerializeMap> tracing::field::Visit for MessageFieldSkipper<S> {
+impl<S: serde::ser::SerializeMap> tracing::field::Visit for MessageFieldSkipper<'_, S> {
    #[inline]
    fn record_f64(&mut self, field: &tracing::field::Field, value: f64) {
        if self.accept_field(field) {
@@ -900,17 +905,18 @@ impl<S: serde::ser::SerializeMap> tracing::field::Visit for MessageFieldSkipper<
 /// with the span names as keys. To prevent collision we append a numberic value
 /// to the name. Also, collects any span fields we're interested in. Last one
 /// wins.
-struct SerializableSpans<'ctx, S>
+struct SerializableSpans<'a, 'ctx, Span, const F: usize>
 where
-    S: for<'lookup> LookupSpan<'lookup>,
+    Span: Subscriber + for<'lookup> LookupSpan<'lookup>,
 {
-    spans: Vec<SpanRef<'ctx, S>>,
-    extracted: ExtractedSpanFields,
+    ctx: &'a Context<'ctx, Span>,
+    callsite_ids: &'a papaya::HashMap<callsite::Identifier, CallsiteId>,
+    extract: ExtractedSpanFields<'a, F>,
 }

-impl<S> serde::ser::Serialize for SerializableSpans<'_, S>
+impl<Span, const F: usize> serde::ser::Serialize for SerializableSpans<'_, '_, Span, F>
 where
-    S: for<'lookup> LookupSpan<'lookup>,
+    Span: Subscriber + for<'lookup> LookupSpan<'lookup>,
 {
    fn serialize<Ser>(&self, serializer: Ser) -> Result<Ser::Ok, Ser::Error>
    where
@@ -918,22 +924,25 @@ where
    {
        let mut serializer = serializer.serialize_map(None)?;

-        for span in self.spans.iter().rev() {
-            let ext = span.extensions();
+        if let Some(leaf_span) = self.ctx.lookup_current() {
+            for span in leaf_span.scope().from_root() {
+                // Append a numeric callsite ID to the span name to keep the name unique
+                // in the JSON object.
+                let cid = self
+                    .callsite_ids
+                    .pin()
+                    .get(&span.metadata().callsite())
+                    .copied()
+                    .unwrap_or_default();

-            // all spans should have this extension.
-            let Some(fields) = ext.get() else { continue };
+                // Loki turns the # into an underscore during field name concatenation.
+                serializer.serialize_key(&format_args!("{}#{}", span.metadata().name(), &cid))?;

-            self.extracted.layer_span(fields);
-
-            let SpanFields { values, span_info } = fields;
-            serializer.serialize_entry(
-                &*span_info.normalized_name,
-                &SerializableSpanFields {
-                    fields: span.metadata().fields(),
-                    values,
-                },
-            )?;
+                serializer.serialize_value(&SerializableSpanFields {
+                    span: &span,
+                    extract: &self.extract,
+                })?;
+            }
        }

        serializer.end()
@@ -941,77 +950,80 @@ where
 }

 /// Serializes the span fields as object.
-struct SerializableSpanFields<'span> {
-    fields: &'span tracing::field::FieldSet,
-    values: &'span [serde_json::Value; MAX_TRACING_FIELDS],
+struct SerializableSpanFields<'a, 'span, Span, const F: usize>
+where
+    Span: for<'lookup> LookupSpan<'lookup>,
+{
+    span: &'a SpanRef<'span, Span>,
+    extract: &'a ExtractedSpanFields<'a, F>,
 }

-impl serde::ser::Serialize for SerializableSpanFields<'_> {
+impl<Span, const F: usize> serde::ser::Serialize for SerializableSpanFields<'_, '_, Span, F>
+where
+    Span: for<'lookup> LookupSpan<'lookup>,
+{
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: serde::ser::Serializer,
    {
        let mut serializer = serializer.serialize_map(None)?;

-        for (field, value) in std::iter::zip(self.fields, self.values) {
-            if value.is_null() {
-                continue;
+        let ext = self.span.extensions();
+        if let Some(data) = ext.get::<SpanFields>() {
+            for (name, value) in &data.fields.pin() {
+                serializer.serialize_entry(name, value)?;
+                // TODO: replace clone with reference, if possible.
+                self.extract.set(name, value.clone());
            }
-            serializer.serialize_entry(field.name(), value)?;
        }

        serializer.end()
    }
 }

-struct ExtractedSpanFields {
-    names: &'static [&'static str],
-    values: RefCell<Vec<serde_json::Value>>,
+struct ExtractedSpanFields<'a, const F: usize> {
+    names: &'a IndexSet<&'static str>,
+    // TODO: replace TryLock with something local thread and interior mutability.
+    //       serde API doesn't let us use `mut`.
+    values: TryLock<([Option<serde_json::Value>; F], bool)>,
 }

-impl ExtractedSpanFields {
-    fn new(names: &'static [&'static str]) -> Self {
+impl<'a, const F: usize> ExtractedSpanFields<'a, F> {
+    fn new(names: &'a IndexSet<&'static str>) -> Self {
        ExtractedSpanFields {
            names,
-            values: RefCell::new(vec![serde_json::Value::Null; names.len()]),
+            values: TryLock::new((array::from_fn(|_| Option::default()), false)),
        }
    }

-    fn layer_span(&self, fields: &SpanFields) {
-        let mut v = self.values.borrow_mut();
-        let SpanFields { values, span_info } = fields;
-
-        // extract the fields
-        for (i, &j) in span_info.extract.iter().enumerate() {
-            let Some(value) = values.get(j) else { continue };
-
-            if !value.is_null() {
-                // TODO: replace clone with reference, if possible.
-                v[i] = value.clone();
-            }
+    #[inline]
+    fn set(&self, name: &'static str, value: serde_json::Value) {
+        if let Some((index, _)) = self.names.get_full(name) {
+            let mut fields = self.values.try_lock().expect("thread-local use");
+            fields.0[index] = Some(value);
+            fields.1 = true;
        }
    }

    #[inline]
    fn has_values(&self) -> bool {
-        self.values.borrow().iter().any(|v| !v.is_null())
+        self.values.try_lock().expect("thread-local use").1
    }
 }

-impl serde::ser::Serialize for ExtractedSpanFields {
+impl<const F: usize> serde::ser::Serialize for ExtractedSpanFields<'_, F> {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: serde::ser::Serializer,
    {
        let mut serializer = serializer.serialize_map(None)?;

-        let values = self.values.borrow();
-        for (key, value) in std::iter::zip(self.names, &*values) {
-            if value.is_null() {
-                continue;
+        let values = self.values.try_lock().expect("thread-local use");
+        for (i, value) in values.0.iter().enumerate() {
+            if let Some(value) = value {
+                let key = self.names[i];
+                serializer.serialize_entry(key, value)?;
            }
-
-            serializer.serialize_entry(key, value)?;
        }

        serializer.end()
@@ -1020,6 +1032,7 @@ impl serde::ser::Serialize for ExtractedSpanFields {

 #[cfg(test)]
 mod tests {
+    use std::marker::PhantomData;
    use std::sync::{Arc, Mutex, MutexGuard};

    use assert_json_diff::assert_json_eq;
@@ -1068,9 +1081,10 @@ mod tests {
        let log_layer = JsonLoggingLayer {
            clock: clock.clone(),
            skipped_field_indices: papaya::HashMap::default(),
-            span_info: papaya::HashMap::default(),
+            callsite_ids: papaya::HashMap::default(),
            writer: buffer.clone(),
-            extract_fields: &["x"],
+            extract_fields: IndexSet::from_iter(["x"]),
+            _marker: PhantomData::<[&'static str; 1]>,
        };

        let registry = tracing_subscriber::Registry::default().with(log_layer);
--- a/proxy/src/proxy/mod.rs
+++ b/proxy/src/proxy/mod.rs
@@ -383,7 +383,9 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
    let cancellation_handler_clone = Arc::clone(&cancellation_handler);
    let session = cancellation_handler_clone.get_key();

-    session.write_cancel_key(node.cancel_closure.clone())?;
+    session
+        .write_cancel_key(node.cancel_closure.clone())
+        .await?;

    prepare_client_connection(&node, *session.key(), &mut stream).await?;

--- a/proxy/src/proxy/passthrough.rs
+++ b/proxy/src/proxy/passthrough.rs
@@ -94,7 +94,7 @@ impl<S: AsyncRead + AsyncWrite + Unpin> ProxyPassthrough<S> {
            tracing::warn!(session_id = ?self.session_id, ?err, "could not cancel the query in the database");
        }

-        drop(self.cancel.remove_cancel_key()); // we don't need a result. If the queue is full, we just log the error
+        drop(self.cancel.remove_cancel_key().await); // we don't need a result. If the queue is full, we just log the error

        res
    }
--- a/proxy/src/proxy/retry.rs
+++ b/proxy/src/proxy/retry.rs
@@ -48,7 +48,7 @@ impl ShouldRetryWakeCompute for postgres_client::error::DbError {
        use postgres_client::error::SqlState;
        // Here are errors that happens after the user successfully authenticated to the database.
        // TODO: there are pgbouncer errors that should be retried, but they are not listed here.
-        let non_retriable_pg_errors = matches!(
+        !matches!(
            self.code(),
            &SqlState::TOO_MANY_CONNECTIONS
                | &SqlState::OUT_OF_MEMORY
@@ -56,20 +56,8 @@ impl ShouldRetryWakeCompute for postgres_client::error::DbError {
                | &SqlState::T_R_SERIALIZATION_FAILURE
                | &SqlState::INVALID_CATALOG_NAME
                | &SqlState::INVALID_SCHEMA_NAME
-                | &SqlState::INVALID_PARAMETER_VALUE,
-        );
-        if non_retriable_pg_errors {
-            return false;
-        }
-        // PGBouncer errors that should not trigger a wake_compute retry.
-        if self.code() == &SqlState::PROTOCOL_VIOLATION {
-            // Source for the error message:
-            // https://github.com/pgbouncer/pgbouncer/blob/f15997fe3effe3a94ba8bcc1ea562e6117d1a131/src/client.c#L1070
-            return !self
-                .message()
-                .contains("no more connections allowed (max_client_conn)");
-        }
-        true
+                | &SqlState::INVALID_PARAMETER_VALUE
+        )
    }
 }

@@ -122,55 +110,3 @@ pub(crate) fn retry_after(num_retries: u32, config: RetryConfig) -> time::Durati
        .base_delay
        .mul_f64(config.backoff_factor.powi((num_retries as i32) - 1))
 }
-
-#[cfg(test)]
-mod tests {
-    use super::ShouldRetryWakeCompute;
-    use postgres_client::error::{DbError, SqlState};
-
-    #[test]
-    fn should_retry_wake_compute_for_db_error() {
-        // These SQLStates should NOT trigger a wake_compute retry.
-        let non_retry_states = [
-            SqlState::TOO_MANY_CONNECTIONS,
-            SqlState::OUT_OF_MEMORY,
-            SqlState::SYNTAX_ERROR,
-            SqlState::T_R_SERIALIZATION_FAILURE,
-            SqlState::INVALID_CATALOG_NAME,
-            SqlState::INVALID_SCHEMA_NAME,
-            SqlState::INVALID_PARAMETER_VALUE,
-        ];
-        for state in non_retry_states {
-            let err = DbError::new_test_error(state.clone(), "oops".to_string());
-            assert!(
-                !err.should_retry_wake_compute(),
-                "State {state:?} unexpectedly retried"
-            );
-        }
-
-        // Errors coming from pgbouncer should not trigger a wake_compute retry
-        let non_retry_pgbouncer_errors = ["no more connections allowed (max_client_conn)"];
-        for error in non_retry_pgbouncer_errors {
-            let err = DbError::new_test_error(SqlState::PROTOCOL_VIOLATION, error.to_string());
-            assert!(
-                !err.should_retry_wake_compute(),
-                "PGBouncer error {error:?} unexpectedly retried"
-            );
-        }
-
-        // These SQLStates should trigger a wake_compute retry.
-        let retry_states = [
-            SqlState::CONNECTION_FAILURE,
-            SqlState::CONNECTION_EXCEPTION,
-            SqlState::CONNECTION_DOES_NOT_EXIST,
-            SqlState::SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION,
-        ];
-        for state in retry_states {
-            let err = DbError::new_test_error(state.clone(), "oops".to_string());
-            assert!(
-                err.should_retry_wake_compute(),
-                "State {state:?} unexpectedly skipped retry"
-            );
-        }
-    }
-}
--- a/proxy/src/proxy/tests/mod.rs
+++ b/proxy/src/proxy/tests/mod.rs
@@ -15,7 +15,6 @@ use rstest::rstest;
 use rustls::crypto::ring;
 use rustls::pki_types;
 use tokio::io::DuplexStream;
-use tracing_test::traced_test;

 use super::connect_compute::ConnectMechanism;
 use super::retry::CouldRetry;
@@ -382,14 +381,8 @@ enum ConnectAction {
    WakeFail,
    WakeRetry,
    Connect,
-    // connect_once -> Err, could_retry = true, should_retry_wake_compute = true
    Retry,
-    // connect_once -> Err, could_retry = true, should_retry_wake_compute = false
-    RetryNoWake,
-    // connect_once -> Err, could_retry = false, should_retry_wake_compute = true
    Fail,
-    // connect_once -> Err, could_retry = false, should_retry_wake_compute = false
-    FailNoWake,
 }

 #[derive(Clone)]
@@ -431,7 +424,6 @@ struct TestConnection;
 #[derive(Debug)]
 struct TestConnectError {
    retryable: bool,
-    wakeable: bool,
    kind: crate::error::ErrorKind,
 }

@@ -456,7 +448,7 @@ impl CouldRetry for TestConnectError {
 }
 impl ShouldRetryWakeCompute for TestConnectError {
    fn should_retry_wake_compute(&self) -> bool {
-        self.wakeable
+        true
    }
 }

@@ -479,22 +471,10 @@ impl ConnectMechanism for TestConnectMechanism {
            ConnectAction::Connect => Ok(TestConnection),
            ConnectAction::Retry => Err(TestConnectError {
                retryable: true,
-                wakeable: true,
-                kind: ErrorKind::Compute,
-            }),
-            ConnectAction::RetryNoWake => Err(TestConnectError {
-                retryable: true,
-                wakeable: false,
                kind: ErrorKind::Compute,
            }),
            ConnectAction::Fail => Err(TestConnectError {
                retryable: false,
-                wakeable: true,
-                kind: ErrorKind::Compute,
-            }),
-            ConnectAction::FailNoWake => Err(TestConnectError {
-                retryable: false,
-                wakeable: false,
                kind: ErrorKind::Compute,
            }),
            x => panic!("expecting action {x:?}, connect is called instead"),
@@ -729,92 +709,3 @@ async fn wake_non_retry() {
        .unwrap_err();
    mechanism.verify();
 }
-
-#[tokio::test]
-#[traced_test]
-async fn fail_but_wake_invalidates_cache() {
-    let ctx = RequestContext::test();
-    let mech = TestConnectMechanism::new(vec![
-        ConnectAction::Wake,
-        ConnectAction::Fail,
-        ConnectAction::Wake,
-        ConnectAction::Connect,
-    ]);
-    let user = helper_create_connect_info(&mech);
-    let cfg = config();
-
-    connect_to_compute(&ctx, &mech, &user, cfg.retry, &cfg)
-        .await
-        .unwrap();
-
-    assert!(logs_contain(
-        "invalidating stalled compute node info cache entry"
-    ));
-}
-
-#[tokio::test]
-#[traced_test]
-async fn fail_no_wake_skips_cache_invalidation() {
-    let ctx = RequestContext::test();
-    let mech = TestConnectMechanism::new(vec![
-        ConnectAction::Wake,
-        ConnectAction::FailNoWake,
-        ConnectAction::Connect,
-    ]);
-    let user = helper_create_connect_info(&mech);
-    let cfg = config();
-
-    connect_to_compute(&ctx, &mech, &user, cfg.retry, &cfg)
-        .await
-        .unwrap();
-
-    assert!(!logs_contain(
-        "invalidating stalled compute node info cache entry"
-    ));
-}
-
-#[tokio::test]
-#[traced_test]
-async fn retry_but_wake_invalidates_cache() {
-    let _ = env_logger::try_init();
-    use ConnectAction::*;
-
-    let ctx = RequestContext::test();
-    // Wake → Retry (retryable + wakeable) → Wake → Connect
-    let mechanism = TestConnectMechanism::new(vec![Wake, Retry, Wake, Connect]);
-    let user_info = helper_create_connect_info(&mechanism);
-    let cfg = config();
-
-    connect_to_compute(&ctx, &mechanism, &user_info, cfg.retry, &cfg)
-        .await
-        .unwrap();
-    mechanism.verify();
-
-    // Because Retry has wakeable=true, we should see invalidate_cache
-    assert!(logs_contain(
-        "invalidating stalled compute node info cache entry"
-    ));
-}
-
-#[tokio::test]
-#[traced_test]
-async fn retry_no_wake_skips_invalidation() {
-    let _ = env_logger::try_init();
-    use ConnectAction::*;
-
-    let ctx = RequestContext::test();
-    // Wake → RetryNoWake (retryable + NOT wakeable)
-    let mechanism = TestConnectMechanism::new(vec![Wake, RetryNoWake]);
-    let user_info = helper_create_connect_info(&mechanism);
-    let cfg = config();
-
-    connect_to_compute(&ctx, &mechanism, &user_info, cfg.retry, &cfg)
-        .await
-        .unwrap_err();
-    mechanism.verify();
-
-    // Because RetryNoWake has wakeable=false, we must NOT see invalidate_cache
-    assert!(!logs_contain(
-        "invalidating stalled compute node info cache entry"
-    ));
-}
--- a/proxy/src/scram/pbkdf2.rs
+++ b/proxy/src/scram/pbkdf2.rs
@@ -13,19 +13,22 @@ pub(crate) struct Pbkdf2 {
 // inspired from <https://github.com/neondatabase/rust-postgres/blob/20031d7a9ee1addeae6e0968e3899ae6bf01cee2/postgres-protocol/src/authentication/sasl.rs#L36-L61>
 impl Pbkdf2 {
    pub(crate) fn start(str: &[u8], salt: &[u8], iterations: u32) -> Self {
-        // key the HMAC and derive the first block in-place
-        let mut hmac =
+        let hmac =
            Hmac::<Sha256>::new_from_slice(str).expect("HMAC is able to accept all key sizes");
-        hmac.update(salt);
-        hmac.update(&1u32.to_be_bytes());
-        let init_block = hmac.finalize_reset().into_bytes();
+
+        let prev = hmac
+            .clone()
+            .chain_update(salt)
+            .chain_update(1u32.to_be_bytes())
+            .finalize()
+            .into_bytes();

        Self {
            hmac,
-            // one iteration spent above
+            // one consumed for the hash above
            iterations: iterations - 1,
-            hi: init_block,
-            prev: init_block,
+            hi: prev,
+            prev,
        }
    }

@@ -41,17 +44,14 @@ impl Pbkdf2 {
            iterations,
        } = self;

-        // only do up to 4096 iterations per turn for fairness
+        // only do 4096 iterations per turn before sharing the thread for fairness
        let n = (*iterations).clamp(0, 4096);
        for _ in 0..n {
-            hmac.update(prev);
-            let block = hmac.finalize_reset().into_bytes();
+            *prev = hmac.clone().chain_update(*prev).finalize().into_bytes();

-            for (hi_byte, &b) in hi.iter_mut().zip(block.iter()) {
-                *hi_byte ^= b;
+            for (hi, prev) in hi.iter_mut().zip(*prev) {
+                *hi ^= prev;
            }
-
-            *prev = block;
        }

        *iterations -= n;
--- a/proxy/src/url.rs
+++ b/proxy/src/url.rs
@@ -43,12 +43,6 @@ impl std::ops::Deref for ApiUrl {
    }
 }

-impl std::ops::DerefMut for ApiUrl {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        &mut self.0
-    }
-}
-
 impl std::fmt::Display for ApiUrl {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        self.0.fmt(f)
--- a/test_runner/fixtures/metrics.py
+++ b/test_runner/fixtures/metrics.py
@@ -184,7 +184,6 @@ PAGESERVER_PER_TENANT_METRICS: tuple[str, ...] = (
    "pageserver_evictions_with_low_residence_duration_total",
    "pageserver_aux_file_estimated_size",
    "pageserver_valid_lsn_lease_count",
-    "pageserver_tenant_offloaded_timelines",
    counter("pageserver_tenant_throttling_count_accounted_start"),
    counter("pageserver_tenant_throttling_count_accounted_finish"),
    counter("pageserver_tenant_throttling_wait_usecs_sum"),
--- a/test_runner/fixtures/neon_cli.py
+++ b/test_runner/fixtures/neon_cli.py
@@ -103,7 +103,7 @@ class AbstractNeonCli:
            else:
                stdout = ""

-            log.warning(f"CLI timeout: stderr={stderr}, stdout={stdout}")
+            log.warn(f"CLI timeout: stderr={stderr}, stdout={stdout}")
            raise

        indent = "  "
--- a/test_runner/regress/test_attach_tenant_config.py
+++ b/test_runner/regress/test_attach_tenant_config.py
@@ -187,7 +187,6 @@ def test_fully_custom_config(positive_env: NeonEnv):
            "args": {"format": "bincode", "compression": {"zstd": {"level": 1}}},
        },
        "rel_size_v2_enabled": True,
-        "relsize_snapshot_cache_capacity": 10000,
        "gc_compaction_enabled": True,
        "gc_compaction_verification": False,
        "gc_compaction_initial_threshold_kb": 1024000,
--- a/test_runner/regress/test_compute_catalog.py
+++ b/test_runner/regress/test_compute_catalog.py
@@ -19,16 +19,6 @@ TEST_ROLE_NAMES = [
    {"name": "role$"},
    {"name": "role$$"},
    {"name": "role$x$"},
-    {"name": "x"},
-    {"name": "xx"},
-    {"name": "$x"},
-    {"name": "x$"},
-    {"name": "$x$"},
-    {"name": "xx$"},
-    {"name": "$xx"},
-    {"name": "$xx$"},
-    # 63 bytes is the limit for role/DB names in Postgres
-    {"name": "x" * 63},
 ]

 TEST_DB_NAMES = [
@@ -84,43 +74,6 @@ TEST_DB_NAMES = [
        "name": "db name$x$",
        "owner": "role$x$",
    },
-    {
-        "name": "x",
-        "owner": "x",
-    },
-    {
-        "name": "xx",
-        "owner": "xx",
-    },
-    {
-        "name": "$x",
-        "owner": "$x",
-    },
-    {
-        "name": "x$",
-        "owner": "x$",
-    },
-    {
-        "name": "$x$",
-        "owner": "$x$",
-    },
-    {
-        "name": "xx$",
-        "owner": "xx$",
-    },
-    {
-        "name": "$xx",
-        "owner": "$xx",
-    },
-    {
-        "name": "$xx$",
-        "owner": "$xx$",
-    },
-    # 63 bytes is the limit for role/DB names in Postgres
-    {
-        "name": "x" * 63,
-        "owner": "x" * 63,
-    },
 ]


@@ -193,10 +146,6 @@ def test_compute_create_drop_dbs_and_roles(neon_simple_env: NeonEnv):
    """
    Test that compute_ctl can create and work with databases and roles
    with special characters (whitespaces, %, tabs, etc.) in the name.
-    Also use `drop_subscriptions_before_start: true`. We do not actually
-    have any subscriptions in this test, so it should be no-op, but it
-    i) simulates the case when we create a second dev branch together with
-    a new project creation, and ii) just generally stresses more code paths.
    """
    env = neon_simple_env

@@ -210,7 +159,6 @@ def test_compute_create_drop_dbs_and_roles(neon_simple_env: NeonEnv):
        **{
            "spec": {
                "skip_pg_catalog_updates": False,
-                "drop_subscriptions_before_start": True,
                "cluster": {
                    "roles": TEST_ROLE_NAMES,
                    "databases": TEST_DB_NAMES,
@@ -254,7 +202,6 @@ def test_compute_create_drop_dbs_and_roles(neon_simple_env: NeonEnv):
        **{
            "spec": {
                "skip_pg_catalog_updates": False,
-                "drop_subscriptions_before_start": True,
                "cluster": {
                    "roles": [],
                    "databases": [],
--- a/test_runner/regress/test_pageserver_secondary.py
+++ b/test_runner/regress/test_pageserver_secondary.py
@@ -510,7 +510,7 @@ def list_elegible_layers(
        except KeyError:
            # Unexpected: tests should call this when pageservers are in a quiet state such that the layer map
            # matches what's on disk.
-            log.warning(f"Lookup {layer_file_name} from {list(visible_map.keys())}")
+            log.warn(f"Lookup {layer_file_name} from {list(visible_map.keys())}")
            raise

    return list(c for c in candidates if is_visible(c))
@@ -636,7 +636,7 @@ def test_secondary_downloads(neon_env_builder: NeonEnvBuilder):
    except:
        # On assertion failures, log some details to help with debugging
        heatmap = env.pageserver_remote_storage.heatmap_content(tenant_id)
-        log.warning(f"heatmap contents: {json.dumps(heatmap, indent=2)}")
+        log.warn(f"heatmap contents: {json.dumps(heatmap, indent=2)}")
        raise

    # Scrub the remote storage
--- a/test_runner/regress/test_replica_start.py
+++ b/test_runner/regress/test_replica_start.py
@@ -27,9 +27,8 @@ from contextlib import closing

 import psycopg2
 import pytest
-from fixtures.common_types import Lsn
 from fixtures.log_helper import log
-from fixtures.neon_fixtures import NeonEnv, PgBin, wait_for_last_flush_lsn, wait_replica_caughtup
+from fixtures.neon_fixtures import NeonEnv, wait_for_last_flush_lsn, wait_replica_caughtup
 from fixtures.pg_version import PgVersion
 from fixtures.utils import query_scalar, skip_on_postgres, wait_until

@@ -696,110 +695,3 @@ def test_replica_start_with_too_many_unused_xids(neon_simple_env: NeonEnv):
    with secondary.cursor() as secondary_cur:
        secondary_cur.execute("select count(*) from t")
        assert secondary_cur.fetchone() == (n_restarts,)
-
-
-def test_ephemeral_endpoints_vacuum(neon_simple_env: NeonEnv, pg_bin: PgBin):
-    env = neon_simple_env
-    endpoint = env.endpoints.create_start("main")
-
-    sql = """
-CREATE TABLE CHAR_TBL(f1 char(4));
-CREATE TABLE FLOAT8_TBL(f1 float8);
-CREATE TABLE INT2_TBL(f1 int2);
-CREATE TABLE INT4_TBL(f1 int4);
-CREATE TABLE INT8_TBL(q1 int8, q2 int8);
-CREATE TABLE POINT_TBL(f1 point);
-CREATE TABLE TEXT_TBL (f1 text);
-CREATE TABLE VARCHAR_TBL(f1 varchar(4));
-CREATE TABLE onek (unique1		int4);
-CREATE TABLE onek2 AS SELECT * FROM onek;
-CREATE TABLE tenk1 (unique1		int4);
-CREATE TABLE tenk2 AS SELECT * FROM tenk1;
-CREATE TABLE person (name text, age int4,location point);
-CREATE TABLE emp (salary int4, manager name) INHERITS (person);
-CREATE TABLE student (gpa float8) INHERITS (person);
-CREATE TABLE stud_emp (	percent 	int4) INHERITS (emp, student);
-CREATE TABLE road (name		text,thepath 	path);
-CREATE TABLE ihighway () INHERITS (road);
-CREATE TABLE shighway(surface		text) INHERITS (road);
-CREATE TABLE BOOLTBL3 (d text, b bool, o int);
-CREATE TABLE booltbl4(isfalse bool, istrue bool, isnul bool);
-DROP TABLE BOOLTBL3;
-DROP TABLE BOOLTBL4;
-CREATE TABLE ceil_floor_round (a numeric);
-DROP TABLE ceil_floor_round;
-CREATE TABLE width_bucket_test (operand_num numeric, operand_f8 float8);
-DROP TABLE width_bucket_test;
-CREATE TABLE num_input_test (n1 numeric);
-CREATE TABLE num_variance (a numeric);
-INSERT INTO num_variance VALUES (0);
-CREATE TABLE snapshot_test (nr	integer, snap	txid_snapshot);
-CREATE TABLE guid1(guid_field UUID, text_field TEXT DEFAULT(now()));
-CREATE TABLE guid2(guid_field UUID, text_field TEXT DEFAULT(now()));
-CREATE INDEX guid1_btree ON guid1 USING BTREE (guid_field);
-CREATE INDEX guid1_hash  ON guid1 USING HASH  (guid_field);
-TRUNCATE guid1;
-DROP TABLE guid1;
-DROP TABLE guid2 CASCADE;
-CREATE TABLE numrange_test (nr NUMRANGE);
-CREATE INDEX numrange_test_btree on numrange_test(nr);
-CREATE TABLE numrange_test2(nr numrange);
-CREATE INDEX numrange_test2_hash_idx on numrange_test2 using hash (nr);
-INSERT INTO numrange_test2 VALUES('[, 5)');
-CREATE TABLE textrange_test (tr text);
-CREATE INDEX textrange_test_btree on textrange_test(tr);
-CREATE TABLE test_range_gist(ir int4range);
-CREATE INDEX test_range_gist_idx on test_range_gist using gist (ir);
-DROP INDEX test_range_gist_idx;
-CREATE INDEX test_range_gist_idx on test_range_gist using gist (ir);
-CREATE TABLE test_range_spgist(ir int4range);
-CREATE INDEX test_range_spgist_idx on test_range_spgist using spgist (ir);
-DROP INDEX test_range_spgist_idx;
-CREATE INDEX test_range_spgist_idx on test_range_spgist using spgist (ir);
-CREATE TABLE test_range_elem(i int4);
-CREATE INDEX test_range_elem_idx on test_range_elem (i);
-CREATE INDEX ON test_range_elem using spgist(int4range(i,i+10));
-DROP TABLE test_range_elem;
-CREATE TABLE test_range_excl(room int4range, speaker int4range, during tsrange, exclude using gist (room with =, during with &&), exclude using gist (speaker with =, during with &&));
-CREATE TABLE f_test(f text, i int);
-CREATE TABLE i8r_array (f1 int, f2 text);
-CREATE TYPE arrayrange as range (subtype=int4[]);
-CREATE TYPE two_ints as (a int, b int);
-DROP TYPE two_ints cascade;
-CREATE TABLE text_support_test (t text);
-CREATE TABLE TEMP_FLOAT (f1 FLOAT8);
-CREATE TABLE TEMP_INT4 (f1 INT4);
-CREATE TABLE TEMP_INT2 (f1 INT2);
-CREATE TABLE TEMP_GROUP (f1 INT4, f2 INT4, f3 FLOAT8);
-CREATE TABLE POLYGON_TBL(f1 polygon);
-CREATE TABLE quad_poly_tbl (id int, p polygon);
-INSERT INTO quad_poly_tbl SELECT (x - 1) * 100 + y, polygon(circle(point(x * 10, y * 10), 1 + (x + y) % 10)) FROM generate_series(1, 200) x, generate_series(1, 100) y;
-CREATE TABLE quad_poly_tbl_ord_seq2 AS SELECT 1 FROM quad_poly_tbl;
-CREATE TABLE quad_poly_tbl_ord_idx2 AS SELECT 1 FROM quad_poly_tbl;
-"""
-
-    with endpoint.cursor() as cur:
-        lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
-        env.endpoints.create_start(branch_name="main", lsn=lsn)
-        log.info(f"lsn: {lsn}")
-
-        for line in sql.split("\n"):
-            if len(line.strip()) == 0 or line.startswith("--"):
-                continue
-            cur.execute(line)
-
-        lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
-        env.endpoints.create_start(branch_name="main", lsn=lsn)
-        log.info(f"lsn: {lsn}")
-
-        cur.execute("VACUUM FULL pg_class;")
-
-    for ep in env.endpoints.endpoints:
-        log.info(f"{ep.endpoint_id} / {ep.pg_port}")
-        pg_dump_command = ["pg_dumpall", "-f", f"/tmp/dump-{ep.endpoint_id}.sql"]
-        env_vars = {
-            "PGPORT": str(ep.pg_port),
-            "PGUSER": endpoint.default_options["user"],
-            "PGHOST": endpoint.default_options["host"],
-        }
-        pg_bin.run_capture(pg_dump_command, env=env_vars)
--- a/test_runner/regress/test_timeline_archive.py
+++ b/test_runner/regress/test_timeline_archive.py
@@ -193,11 +193,6 @@ def test_timeline_offloading(neon_env_builder: NeonEnvBuilder, manual_offload: b
        "test_ancestor_branch_archive_branch1", tenant_id, "test_ancestor_branch_archive_parent"
    )

-    offloaded_count = ps_http.get_metric_value(
-        "pageserver_tenant_offloaded_timelines", {"tenant_id": f"{tenant_id}"}
-    )
-    assert offloaded_count == 0
-
    ps_http.timeline_archival_config(
        tenant_id,
        leaf_timeline_id,
@@ -249,11 +244,6 @@ def test_timeline_offloading(neon_env_builder: NeonEnvBuilder, manual_offload: b
    wait_until(leaf_offloaded)
    wait_until(parent_offloaded)

-    offloaded_count = ps_http.get_metric_value(
-        "pageserver_tenant_offloaded_timelines", {"tenant_id": f"{tenant_id}"}
-    )
-    assert offloaded_count == 2
-
    # Offloaded child timelines should still prevent deletion
    with pytest.raises(
        PageserverApiException,
--- a/vendor/postgres-v14
+++ b/vendor/postgres-v14
--- a/vendor/postgres-v15
+++ b/vendor/postgres-v15
--- a/vendor/postgres-v16
+++ b/vendor/postgres-v16
--- a/vendor/postgres-v17
+++ b/vendor/postgres-v17
--- a/vendor/revisions.json
+++ b/vendor/revisions.json
@@ -1,18 +1,18 @@
 {
  "v17": [
    "17.5",
-    "8be779fd3ab9e87206da96a7e4842ef1abf04f44"
+    "e5374b72997b0afc8374137674e873f7a558120a"
  ],
  "v16": [
    "16.9",
-    "0bf96bd6d70301a0b43b0b3457bb3cf8fb43c198"
+    "15710a76b7d07912110fcbbaf0c8ad6d7e5a9fbc"
  ],
  "v15": [
    "15.13",
-    "de7640f55da07512834d5cc40c4b3fb376b5f04f"
+    "daa81cffcf063c54b29a9aabdb6604625f675ad0"
  ],
  "v14": [
    "14.18",
-    "55c0d45abe6467c02084c2192bca117eda6ce1e7"
+    "4cca6f8083483dda9e12eae292cf788d45bd561f"
  ]
 }
--- a/workspace_hack/Cargo.toml
+++ b/workspace_hack/Cargo.toml
@@ -60,8 +60,7 @@ lazy_static = { version = "1", default-features = false, features = ["spin_no_st
 libc = { version = "0.2", features = ["extra_traits", "use_std"] }
 log = { version = "0.4", default-features = false, features = ["std"] }
 memchr = { version = "2" }
-nix-2f80eeee3b1b6c7e = { package = "nix", version = "0.26" }
-nix-fa1f6196edfd7249 = { package = "nix", version = "0.30", features = ["dir", "ioctl", "mman", "poll", "signal", "socket"] }
+nix = { version = "0.26" }
 nom = { version = "7" }
 num = { version = "0.4" }
 num-bigint = { version = "0.4" }
@@ -107,7 +106,6 @@ tower = { version = "0.4", default-features = false, features = ["balance", "buf
 tracing = { version = "0.1", features = ["log"] }
 tracing-core = { version = "0.1" }
 tracing-log = { version = "0.2" }
-tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
 url = { version = "2", features = ["serde"] }
 uuid = { version = "1", features = ["serde", "v4", "v7"] }
 zeroize = { version = "1", features = ["derive", "serde"] }