diff --git a/.circleci/config.yml b/.circleci/config.yml
index 24d151f765..c94dd20ff0 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -7,7 +7,7 @@ executors:
   zenith-build-executor:
     resource_class: xlarge
     docker:
-      - image: cimg/rust:1.52.1
+      - image: cimg/rust:1.55.0
 
 jobs:
   check-codestyle:
@@ -110,7 +110,7 @@ jobs:
             # Require an exact match. While an out of date cache might speed up the build,
             # there's no way to clean out old packages, so the cache grows every time something
             # changes.
-            - v03-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
+            - v04-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
 
         # Build the rust code, including test binaries
       - run:
@@ -128,7 +128,7 @@ jobs:
 
       - save_cache:
           name: Save rust cache
-          key: v03-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
+          key: v04-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
           paths:
             - ~/.cargo/registry
             - ~/.cargo/git
@@ -182,6 +182,21 @@ jobs:
           paths:
             - "*"
 
+  check-python:
+    executor: python/default
+    steps:
+      - checkout
+      - run:
+          name: Install pipenv & deps
+          working_directory: test_runner
+          command: |
+            pip install pipenv
+            pipenv install --dev
+      - run:
+          name: Run yapf to ensure code format
+          working_directory: test_runner
+          command: pipenv run yapf --recursive --diff .
+
   run-pytest:
     #description: "Run pytest"
     executor: python/default
@@ -245,13 +260,13 @@ jobs:
             #
             # The junit.xml file allows CircleCI to display more fine-grained test information
             # in its "Tests" tab in the results page.
-            # -s prevents pytest from capturing output, which helps to see
-            # what's going on if the test hangs
             # --verbose prints name of each test (helpful when there are
             # multiple tests in one file)
             # -rA prints summary in the end
             # -n4 uses four processes to run tests via pytest-xdist
-            pipenv run pytest --junitxml=$TEST_OUTPUT/junit.xml --tb=short -s --verbose -rA $TEST_SELECTION $EXTRA_PARAMS
+            # -s is not used to prevent pytest from capturing output, because tests are running
+            # in parallel and logs are mixed between different tests
+            pipenv run pytest --junitxml=$TEST_OUTPUT/junit.xml --tb=short --verbose -rA $TEST_SELECTION $EXTRA_PARAMS
       - run:
           # CircleCI artifacts are preserved one file at a time, so skipping
           # this step isn't a good idea. If you want to extract the
@@ -260,7 +275,7 @@ jobs:
           when: always
           command: |
             du -sh /tmp/test_output/*
-            find /tmp/test_output -type f ! -name "pg.log" ! -name "pageserver.log" ! -name "wal_acceptor.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" -delete
+            find /tmp/test_output -type f ! -name "pg.log" ! -name "pageserver.log" ! -name "safekeeper.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" -delete
             du -sh /tmp/test_output/*
       - store_artifacts:
           path: /tmp/test_output
@@ -325,8 +340,7 @@ jobs:
                 \"inputs\": {
                   \"ci_job_name\": \"zenith-remote-ci\",
                   \"commit_hash\": \"$CIRCLE_SHA1\",
-                  \"remote_repo\": \"$LOCAL_REPO\",
-                  \"zenith_image_branch\": \"$CIRCLE_BRANCH\"
+                  \"remote_repo\": \"$LOCAL_REPO\"
                 }
               }"
 
@@ -334,6 +348,7 @@ workflows:
   build_and_test:
     jobs:
       - check-codestyle
+      - check-python
       - build-postgres:
           name: build-postgres-<< matrix.build_type >>
           matrix:
diff --git a/Cargo.lock b/Cargo.lock
index c217dfbebb..5f36f48966 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -26,18 +26,21 @@ dependencies = [
  "winapi",
 ]
 
+[[package]]
+name = "ansi_term"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
+dependencies = [
+ "winapi",
+]
+
 [[package]]
 name = "anyhow"
 version = "1.0.42"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "595d3cfa7a60d4555cb5067b99f07142a08ea778de5cf993f7b75c7d8fabc486"
 
-[[package]]
-name = "arc-swap"
-version = "1.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e906254e445520903e7fc9da4f709886c84ae4bc4ddaf0e093188d66df4dc820"
-
 [[package]]
 name = "async-trait"
 version = "0.1.50"
@@ -298,7 +301,7 @@ version = "2.33.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002"
 dependencies = [
- "ansi_term",
+ "ansi_term 0.11.0",
  "atty",
  "bitflags",
  "strsim",
@@ -387,26 +390,6 @@ dependencies = [
  "rustc_version",
 ]
 
-[[package]]
-name = "crossbeam-channel"
-version = "0.5.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4"
-dependencies = [
- "cfg-if 1.0.0",
- "crossbeam-utils",
-]
-
-[[package]]
-name = "crossbeam-utils"
-version = "0.8.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db"
-dependencies = [
- "cfg-if 1.0.0",
- "lazy_static",
-]
-
 [[package]]
 name = "crypto-mac"
 version = "0.10.0"
@@ -445,16 +428,6 @@ dependencies = [
  "dirs-sys",
 ]
 
-[[package]]
-name = "dirs-next"
-version = "2.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1"
-dependencies = [
- "cfg-if 1.0.0",
- "dirs-sys-next",
-]
-
 [[package]]
 name = "dirs-sys"
 version = "0.3.6"
@@ -466,17 +439,6 @@ dependencies = [
  "winapi",
 ]
 
-[[package]]
-name = "dirs-sys-next"
-version = "0.1.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d"
-dependencies = [
- "libc",
- "redox_users",
- "winapi",
-]
-
 [[package]]
 name = "dlv-list"
 version = "0.2.3"
@@ -956,6 +918,15 @@ dependencies = [
  "cfg-if 1.0.0",
 ]
 
+[[package]]
+name = "matchers"
+version = "0.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f099785f7595cc4b4553a174ce30dd7589ef93391ff414dbb67f62392b9e0ce1"
+dependencies = [
+ "regex-automata",
+]
+
 [[package]]
 name = "matches"
 version = "0.1.8"
@@ -1220,10 +1191,12 @@ dependencies = [
  "scopeguard",
  "serde",
  "serde_json",
+ "signal-hook",
  "tar",
  "thiserror",
  "tokio",
  "toml",
+ "tracing",
  "workspace_hack",
  "zenith_metrics",
  "zenith_utils",
@@ -1531,6 +1504,15 @@ dependencies = [
  "regex-syntax",
 ]
 
+[[package]]
+name = "regex-automata"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
+dependencies = [
+ "regex-syntax",
+]
+
 [[package]]
 name = "regex-syntax"
 version = "0.6.25"
@@ -1689,12 +1671,6 @@ dependencies = [
  "webpki",
 ]
 
-[[package]]
-name = "rustversion"
-version = "1.0.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "61b3909d758bb75c79f23d4736fac9433868679d3ad2ea7a61e3c25cfda9a088"
-
 [[package]]
 name = "ryu"
 version = "1.0.5"
@@ -1852,12 +1828,32 @@ dependencies = [
  "opaque-debug",
 ]
 
+[[package]]
+name = "sharded-slab"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "740223c51853f3145fe7c90360d2d4232f2b62e3449489c207eccde818979982"
+dependencies = [
+ "lazy_static",
+]
+
 [[package]]
 name = "shlex"
 version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "42a568c8f2cd051a4d283bd6eb0343ac214c1b0f1ac19f93e1175b2dee38c73d"
 
+[[package]]
+name = "signal-hook"
+version = "0.3.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c98891d737e271a2954825ef19e46bd16bdb98e2746f2eec4f7a4ef7946efd1"
+dependencies = [
+ "cc",
+ "libc",
+ "signal-hook-registry",
+]
+
 [[package]]
 name = "signal-hook-registry"
 version = "1.4.0"
@@ -1890,59 +1886,6 @@ version = "0.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f173ac3d1a7e3b28003f40de0b5ce7fe2710f9b9dc3fc38664cebee46b3b6527"
 
-[[package]]
-name = "slog"
-version = "2.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8347046d4ebd943127157b94d63abb990fcf729dc4e9978927fdf4ac3c998d06"
-
-[[package]]
-name = "slog-async"
-version = "2.6.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c60813879f820c85dbc4eabf3269befe374591289019775898d56a81a804fbdc"
-dependencies = [
- "crossbeam-channel",
- "slog",
- "take_mut",
- "thread_local",
-]
-
-[[package]]
-name = "slog-scope"
-version = "4.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2f95a4b4c3274cd2869549da82b57ccc930859bdbf5bcea0424bc5f140b3c786"
-dependencies = [
- "arc-swap",
- "lazy_static",
- "slog",
-]
-
-[[package]]
-name = "slog-stdlog"
-version = "4.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8228ab7302adbf4fcb37e66f3cda78003feb521e7fd9e3847ec117a7784d0f5a"
-dependencies = [
- "log",
- "slog",
- "slog-scope",
-]
-
-[[package]]
-name = "slog-term"
-version = "2.8.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "95c1e7e5aab61ced6006149ea772770b84a0d16ce0f7885def313e4829946d76"
-dependencies = [
- "atty",
- "chrono",
- "slog",
- "term",
- "thread_local",
-]
-
 [[package]]
 name = "smallvec"
 version = "1.6.1"
@@ -1998,12 +1941,6 @@ dependencies = [
  "unicode-xid",
 ]
 
-[[package]]
-name = "take_mut"
-version = "0.2.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f764005d11ee5f36500a149ace24e00e3da98b0158b3e2d53a7495660d3f4d60"
-
 [[package]]
 name = "tap"
 version = "1.0.1"
@@ -2035,17 +1972,6 @@ dependencies = [
  "winapi",
 ]
 
-[[package]]
-name = "term"
-version = "0.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f"
-dependencies = [
- "dirs-next",
- "rustversion",
- "winapi",
-]
-
 [[package]]
 name = "termcolor"
 version = "1.1.2"
@@ -2223,24 +2149,79 @@ checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6"
 
 [[package]]
 name = "tracing"
-version = "0.1.26"
+version = "0.1.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09adeb8c97449311ccd28a427f96fb563e7fd31aabf994189879d9da2394b89d"
+checksum = "375a639232caf30edfc78e8d89b2d4c375515393e7af7e16f01cd96917fb2105"
 dependencies = [
  "cfg-if 1.0.0",
  "pin-project-lite",
+ "tracing-attributes",
  "tracing-core",
 ]
 
 [[package]]
-name = "tracing-core"
+name = "tracing-attributes"
 version = "0.1.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a9ff14f98b1a4b289c6248a023c1c2fa1491062964e9fed67ab29c4e4da4a052"
+checksum = "f4f480b8f81512e825f337ad51e94c1eb5d3bbdf2b363dcd01e2b19a9ffe3f8e"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tracing-core"
+version = "0.1.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f4ed65637b8390770814083d20756f87bfa2c21bf2f110babdc5438351746e4"
 dependencies = [
  "lazy_static",
 ]
 
+[[package]]
+name = "tracing-log"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a6923477a48e41c1951f1999ef8bb5a3023eb723ceadafe78ffb65dc366761e3"
+dependencies = [
+ "lazy_static",
+ "log",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-serde"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fb65ea441fbb84f9f6748fd496cf7f63ec9af5bca94dd86456978d055e8eb28b"
+dependencies = [
+ "serde",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-subscriber"
+version = "0.2.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0e0d2eaa99c3c2e41547cfa109e910a68ea03823cccad4a0525dcbc9b01e8c71"
+dependencies = [
+ "ansi_term 0.12.1",
+ "chrono",
+ "lazy_static",
+ "matchers",
+ "regex",
+ "serde",
+ "serde_json",
+ "sharded-slab",
+ "smallvec",
+ "thread_local",
+ "tracing",
+ "tracing-core",
+ "tracing-log",
+ "tracing-serde",
+]
+
 [[package]]
 name = "try-lock"
 version = "0.2.3"
@@ -2339,11 +2320,13 @@ dependencies = [
  "byteorder",
  "bytes",
  "clap",
+ "const_format",
  "crc32c",
  "daemonize",
  "fs2",
  "hex",
  "humantime",
+ "hyper",
  "lazy_static",
  "log",
  "pageserver",
@@ -2351,6 +2334,7 @@ dependencies = [
  "postgres-protocol",
  "postgres_ffi",
  "regex",
+ "routerify",
  "rust-s3",
  "serde",
  "serde_json",
@@ -2358,6 +2342,7 @@ dependencies = [
  "tokio-stream",
  "walkdir",
  "workspace_hack",
+ "zenith_metrics",
  "zenith_utils",
 ]
 
@@ -2603,14 +2588,12 @@ dependencies = [
  "rustls-split",
  "serde",
  "serde_json",
- "slog",
- "slog-async",
- "slog-scope",
- "slog-stdlog",
- "slog-term",
  "tempfile",
  "thiserror",
  "tokio",
+ "tracing",
+ "tracing-log",
+ "tracing-subscriber",
  "webpki",
  "workspace_hack",
  "zenith_metrics",
diff --git a/Dockerfile b/Dockerfile
index b38bac4480..528f29597f 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -37,7 +37,7 @@ RUN apt-get update && apt-get -yq install libreadline-dev libseccomp-dev openssl
     mkdir zenith_install
 
 COPY --from=build /zenith/target/release/pageserver /usr/local/bin
-COPY --from=build /zenith/target/release/wal_acceptor /usr/local/bin
+COPY --from=build /zenith/target/release/safekeeper /usr/local/bin
 COPY --from=build /zenith/target/release/proxy /usr/local/bin
 COPY --from=pg-build /zenith/tmp_install postgres_install
 COPY docker-entrypoint.sh /docker-entrypoint.sh
diff --git a/Dockerfile.alpine b/Dockerfile.alpine
index a2a2fea1a4..dafb7eaf6b 100644
--- a/Dockerfile.alpine
+++ b/Dockerfile.alpine
@@ -81,7 +81,7 @@ FROM alpine:3.13
 RUN apk add --update openssl build-base libseccomp-dev
 RUN apk --no-cache --update --repository https://dl-cdn.alpinelinux.org/alpine/edge/testing add rocksdb
 COPY --from=build /zenith/target/release/pageserver /usr/local/bin
-COPY --from=build /zenith/target/release/wal_acceptor /usr/local/bin
+COPY --from=build /zenith/target/release/safekeeper /usr/local/bin
 COPY --from=build /zenith/target/release/proxy /usr/local/bin
 COPY --from=pg-build /zenith/tmp_install /usr/local
 COPY docker-entrypoint.sh /docker-entrypoint.sh
diff --git a/Makefile b/Makefile
index 2edf2a6b4a..ef26ceee2d 100644
--- a/Makefile
+++ b/Makefile
@@ -10,32 +10,43 @@ endif
 # We differentiate between release / debug build types using the BUILD_TYPE
 # environment variable.
 #
+BUILD_TYPE ?= debug
 ifeq ($(BUILD_TYPE),release)
 	PG_CONFIGURE_OPTS = --enable-debug
-	PG_CFLAGS = -O2 -g3 ${CFLAGS}
-else
+	PG_CFLAGS = -O2 -g3 $(CFLAGS)
+	# Unfortunately, `--profile=...` is a nightly feature
+	CARGO_BUILD_FLAGS += --release
+else ifeq ($(BUILD_TYPE),debug)
 	PG_CONFIGURE_OPTS = --enable-debug --enable-cassert --enable-depend
-	PG_CFLAGS = -O0 -g3 ${CFLAGS}
+	PG_CFLAGS = -O0 -g3 $(CFLAGS)
+else
+$(error Bad build type `$(BUILD_TYPE)', see Makefile for options)
 endif
 
+# Choose whether we should be silent or verbose
+CARGO_BUILD_FLAGS += --$(if $(filter s,$(MAKEFLAGS)),quiet,verbose)
+# Fix for a corner case when make doesn't pass a jobserver
+CARGO_BUILD_FLAGS += $(filter -j1,$(MAKEFLAGS))
+
+# This option has a side effect of passing make jobserver to cargo.
+# However, we shouldn't do this if `make -n` (--dry-run) has been asked.
+CARGO_CMD_PREFIX += $(if $(filter n,$(MAKEFLAGS)),,+)
+# Force cargo not to print progress bar
+CARGO_CMD_PREFIX += CARGO_TERM_PROGRESS_WHEN=never CI=1
+
 #
 # Top level Makefile to build Zenith and PostgreSQL
 #
+.PHONY: all
 all: zenith postgres
 
-# We don't want to run 'cargo build' in parallel with the postgres build,
-# because interleaving cargo build output with postgres build output looks
-# confusing. Also, 'cargo build' is parallel on its own, so it would be too
-# much parallelism. (Recursive invocation of postgres target still gets any
-# '-j' flag from the command line, so 'make -j' is still useful.)
-.NOTPARALLEL:
-
 ### Zenith Rust bits
 #
 # The 'postgres_ffi' depends on the Postgres headers.
 .PHONY: zenith
 zenith: postgres-headers
-	cargo build
+	+@echo "Compiling Zenith"
+	$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS)
 
 ### PostgreSQL parts
 tmp_install/build/config.status:
@@ -57,10 +68,10 @@ postgres-headers: postgres-configure
 	+@echo "Installing PostgreSQL headers"
 	$(MAKE) -C tmp_install/build/src/include MAKELEVEL=0 install
 
-
 # Compile and install PostgreSQL and contrib/zenith
 .PHONY: postgres
-postgres: postgres-configure
+postgres: postgres-configure \
+		  postgres-headers # to prevent `make install` conflicts with zenith's `postgres-headers`
 	+@echo "Compiling PostgreSQL"
 	$(MAKE) -C tmp_install/build MAKELEVEL=0 install
 	+@echo "Compiling contrib/zenith"
@@ -68,18 +79,21 @@ postgres: postgres-configure
 	+@echo "Compiling contrib/zenith_test_utils"
 	$(MAKE) -C tmp_install/build/contrib/zenith_test_utils install
 
+.PHONY: postgres-clean
 postgres-clean:
 	$(MAKE) -C tmp_install/build MAKELEVEL=0 clean
 
 # This doesn't remove the effects of 'configure'.
+.PHONY: clean
 clean:
-	cd tmp_install/build && ${MAKE} clean
-	cargo clean
+	cd tmp_install/build && $(MAKE) clean
+	$(CARGO_CMD_PREFIX) cargo clean
 
 # This removes everything
+.PHONY: distclean
 distclean:
 	rm -rf tmp_install
-	cargo clean
+	$(CARGO_CMD_PREFIX) cargo clean
 
 .PHONY: fmt
 fmt:
diff --git a/README.md b/README.md
index 1e0f20fd45..977d015bfc 100644
--- a/README.md
+++ b/README.md
@@ -28,7 +28,7 @@ apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libsec
 libssl-dev clang pkg-config libpq-dev
 ```
 
-[Rust] 1.52 or later is also required.
+[Rust] 1.55 or later is also required.
 
 To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `tmp_install/bin` and `tmp_install/lib`, respectively.
 
diff --git a/control_plane/src/compute.rs b/control_plane/src/compute.rs
index 5b4313494b..fb98eeca03 100644
--- a/control_plane/src/compute.rs
+++ b/control_plane/src/compute.rs
@@ -84,25 +84,53 @@ impl ComputeControlPlane {
         }
     }
 
+    // FIXME: see also parse_point_in_time in branches.rs.
+    fn parse_point_in_time(
+        &self,
+        tenantid: ZTenantId,
+        s: &str,
+    ) -> Result<(ZTimelineId, Option<Lsn>)> {
+        let mut strings = s.split('@');
+        let name = strings.next().unwrap();
+
+        let lsn: Option<Lsn>;
+        if let Some(lsnstr) = strings.next() {
+            lsn = Some(
+                Lsn::from_str(lsnstr)
+                    .with_context(|| "invalid LSN in point-in-time specification")?,
+            );
+        } else {
+            lsn = None
+        }
+
+        // Resolve the timeline ID, given the human-readable branch name
+        let timeline_id = self
+            .pageserver
+            .branch_get_by_name(&tenantid, name)?
+            .timeline_id;
+
+        Ok((timeline_id, lsn))
+    }
+
     pub fn new_node(
         &mut self,
         tenantid: ZTenantId,
-        branch_name: &str,
+        name: &str,
+        timeline_spec: &str,
         port: Option<u16>,
     ) -> Result<Arc<PostgresNode>> {
-        let timeline_id = self
-            .pageserver
-            .branch_get_by_name(&tenantid, branch_name)?
-            .timeline_id;
+        // Resolve the human-readable timeline spec into timeline ID and LSN
+        let (timelineid, lsn) = self.parse_point_in_time(tenantid, timeline_spec)?;
 
         let port = port.unwrap_or_else(|| self.get_port());
         let node = Arc::new(PostgresNode {
-            name: branch_name.to_owned(),
+            name: name.to_owned(),
             address: SocketAddr::new("127.0.0.1".parse().unwrap(), port),
             env: self.env.clone(),
             pageserver: Arc::clone(&self.pageserver),
             is_test: false,
-            timelineid: timeline_id,
+            timelineid,
+            lsn,
             tenantid,
             uses_wal_proposer: false,
         });
@@ -127,6 +155,7 @@ pub struct PostgresNode {
     pageserver: Arc<PageServerNode>,
     is_test: bool,
     pub timelineid: ZTimelineId,
+    pub lsn: Option<Lsn>, // if it's a read-only node. None for primary
     pub tenantid: ZTenantId,
     uses_wal_proposer: bool,
 }
@@ -161,9 +190,12 @@ impl PostgresNode {
         let port: u16 = conf.parse_field("port", &context)?;
         let timelineid: ZTimelineId = conf.parse_field("zenith.zenith_timeline", &context)?;
         let tenantid: ZTenantId = conf.parse_field("zenith.zenith_tenant", &context)?;
-
         let uses_wal_proposer = conf.get("wal_acceptors").is_some();
 
+        // parse recovery_target_lsn, if any
+        let recovery_target_lsn: Option<Lsn> =
+            conf.parse_field_optional("recovery_target_lsn", &context)?;
+
         // ok now
         Ok(PostgresNode {
             address: SocketAddr::new("127.0.0.1".parse().unwrap(), port),
@@ -172,6 +204,7 @@ impl PostgresNode {
             pageserver: Arc::clone(pageserver),
             is_test: false,
             timelineid,
+            lsn: recovery_target_lsn,
             tenantid,
             uses_wal_proposer,
         })
@@ -233,7 +266,7 @@ impl PostgresNode {
         // Read the archive directly from the `CopyOutReader`
         tar::Archive::new(copyreader)
             .unpack(&self.pgdata())
-            .with_context(|| "extracting page backup failed")?;
+            .with_context(|| "extracting base backup failed")?;
 
         Ok(())
     }
@@ -301,6 +334,9 @@ impl PostgresNode {
         conf.append("zenith.page_server_connstring", &pageserver_connstr);
         conf.append("zenith.zenith_tenant", &self.tenantid.to_string());
         conf.append("zenith.zenith_timeline", &self.timelineid.to_string());
+        if let Some(lsn) = self.lsn {
+            conf.append("recovery_target_lsn", &lsn.to_string());
+        }
         conf.append_line("");
 
         // Configure the node to stream WAL directly to the pageserver
@@ -314,7 +350,9 @@ impl PostgresNode {
     }
 
     fn load_basebackup(&self) -> Result<()> {
-        let lsn = if self.uses_wal_proposer {
+        let backup_lsn = if let Some(lsn) = self.lsn {
+            Some(lsn)
+        } else if self.uses_wal_proposer {
             // LSN 0 means that it is bootstrap and we need to download just
             // latest data from the pageserver. That is a bit clumsy but whole bootstrap
             // procedure evolves quite actively right now, so let's think about it again
@@ -329,7 +367,7 @@ impl PostgresNode {
             None
         };
 
-        self.do_basebackup(lsn)?;
+        self.do_basebackup(backup_lsn)?;
 
         Ok(())
     }
@@ -406,6 +444,10 @@ impl PostgresNode {
         // 3. Load basebackup
         self.load_basebackup()?;
 
+        if self.lsn.is_some() {
+            File::create(self.pgdata().join("standby.signal"))?;
+        }
+
         // 4. Finally start the compute node postgres
         println!("Starting postgres node at '{}'", self.connstr());
         self.pg_ctl(&["start"], auth_token)
diff --git a/control_plane/src/postgresql_conf.rs b/control_plane/src/postgresql_conf.rs
index bcd463999b..7f50fe9c2f 100644
--- a/control_plane/src/postgresql_conf.rs
+++ b/control_plane/src/postgresql_conf.rs
@@ -83,6 +83,22 @@ impl PostgresConf {
             .with_context(|| format!("could not parse '{}' option {}", field_name, context))
     }
 
+    pub fn parse_field_optional<T>(&self, field_name: &str, context: &str) -> Result<Option<T>>
+    where
+        T: FromStr,
+        <T as FromStr>::Err: std::error::Error + Send + Sync + 'static,
+    {
+        if let Some(val) = self.get(field_name) {
+            let result = val
+                .parse::<T>()
+                .with_context(|| format!("could not parse '{}' option {}", field_name, context))?;
+
+            Ok(Some(result))
+        } else {
+            Ok(None)
+        }
+    }
+
     ///
     /// Note: if you call this multiple times for the same option, the config
     /// file will a line for each call. It would be nice to have a function
diff --git a/control_plane/src/storage.rs b/control_plane/src/storage.rs
index 9d762c360f..3d331ca2a7 100644
--- a/control_plane/src/storage.rs
+++ b/control_plane/src/storage.rs
@@ -199,23 +199,45 @@ impl PageServerNode {
         bail!("pageserver failed to start in {} seconds", RETRIES);
     }
 
-    pub fn stop(&self) -> anyhow::Result<()> {
+    pub fn stop(&self, immediate: bool) -> anyhow::Result<()> {
         let pid = read_pidfile(&self.pid_file())?;
         let pid = Pid::from_raw(pid);
-        if kill(pid, Signal::SIGTERM).is_err() {
-            bail!("Failed to kill pageserver with pid {}", pid);
+        if immediate {
+            println!("Stop pageserver immediately");
+            if kill(pid, Signal::SIGQUIT).is_err() {
+                bail!("Failed to kill pageserver with pid {}", pid);
+            }
+        } else {
+            println!("Stop pageserver gracefully");
+            if kill(pid, Signal::SIGTERM).is_err() {
+                bail!("Failed to stop pageserver with pid {}", pid);
+            }
         }
 
-        // wait for pageserver stop
         let address = connection_address(&self.pg_connection_config);
-        for _ in 0..5 {
-            let stream = TcpStream::connect(&address);
-            thread::sleep(Duration::from_secs(1));
-            if let Err(_e) = stream {
-                println!("Pageserver stopped");
-                return Ok(());
+
+        // TODO Remove this "timeout" and handle it on caller side instead.
+        // Shutting down may take a long time,
+        // if pageserver checkpoints a lot of data
+        for _ in 0..100 {
+            if let Err(_e) = TcpStream::connect(&address) {
+                println!("Pageserver stopped receiving connections");
+
+                //Now check status
+                match self.check_status() {
+                    Ok(_) => {
+                        println!("Pageserver status is OK. Wait a bit.");
+                        thread::sleep(Duration::from_secs(1));
+                    }
+                    Err(err) => {
+                        println!("Pageserver status is: {}", err);
+                        return Ok(());
+                    }
+                }
+            } else {
+                println!("Pageserver still receives connections");
+                thread::sleep(Duration::from_secs(1));
             }
-            println!("Stopping pageserver on {}", address);
         }
 
         bail!("Failed to stop pageserver with pid {}", pid);
@@ -313,8 +335,9 @@ impl PageServerNode {
 
 impl Drop for PageServerNode {
     fn drop(&mut self) {
+        // TODO Looks like this flag is never set
         if self.kill_on_exit {
-            let _ = self.stop();
+            let _ = self.stop(true);
         }
     }
 }
diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh
index 3754c18193..566e77c1a4 100755
--- a/docker-entrypoint.sh
+++ b/docker-entrypoint.sh
@@ -7,7 +7,7 @@ if [ "$1" = 'pageserver' ]; then
         pageserver --init -D /data --postgres-distrib /usr/local
     fi
     echo "Staring pageserver at 0.0.0.0:6400"
-    pageserver -l 0.0.0.0:6400 -D /data
+    pageserver -l 0.0.0.0:6400 --listen-http 0.0.0.0:9898 -D /data
 else
     "$@"
 fi
diff --git a/docs/docker.md b/docs/docker.md
index 9a909ebfe3..14ba2146cb 100644
--- a/docs/docker.md
+++ b/docs/docker.md
@@ -4,7 +4,7 @@
 
 Currently we build two main images:
 
-- [zenithdb/zenith](https://hub.docker.com/repository/docker/zenithdb/zenith) — image with pre-built `pageserver`, `wal_acceptor` and `proxy` binaries and all the required runtime dependencies. Built from [/Dockerfile](/Dockerfile).
+- [zenithdb/zenith](https://hub.docker.com/repository/docker/zenithdb/zenith) — image with pre-built `pageserver`, `safekeeper` and `proxy` binaries and all the required runtime dependencies. Built from [/Dockerfile](/Dockerfile).
 - [zenithdb/compute-node](https://hub.docker.com/repository/docker/zenithdb/compute-node) — compute node image with pre-built Postgres binaries from [zenithdb/postgres](https://github.com/zenithdb/postgres).
 
 And two intermediate images used either to reduce build time or to deliver some additional binary tools from other repos:
diff --git a/docs/multitenancy.md b/docs/multitenancy.md
index c9a95116c5..4f1d45e970 100644
--- a/docs/multitenancy.md
+++ b/docs/multitenancy.md
@@ -56,4 +56,4 @@ Tenant id is passed to postgres via GUC the same way as the timeline. Tenant id
 
 ### Safety
 
-For now particular tenant can only appear on a particular pageserver. Set of WAL acceptors are also pinned to particular (tenantid, timeline) pair so there can only be one writer for particular (tenantid, timeline).
+For now particular tenant can only appear on a particular pageserver. Set of safekeepers are also pinned to particular (tenantid, timeline) pair so there can only be one writer for particular (tenantid, timeline).
diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml
index 0e5a82df88..33c911c840 100644
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -17,7 +17,7 @@ lazy_static = "1.4.0"
 log = "0.4.14"
 clap = "2.33.0"
 daemonize = "0.4.1"
-tokio = { version = "1.11", features = ["process", "macros", "fs"] }
+tokio = { version = "1.11", features = ["process", "macros", "fs", "rt"] }
 postgres-types = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
 postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
 postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
@@ -35,6 +35,8 @@ scopeguard = "1.1.0"
 rust-s3 = { version = "0.27.0-rc4", features = ["no-verify-ssl"] }
 async-trait = "0.1"
 const_format = "0.2.21"
+tracing = "0.1.27"
+signal-hook = {version = "0.3.10", features = ["extended-siginfo"] }
 
 postgres_ffi = { path = "../postgres_ffi" }
 zenith_metrics = { path = "../zenith_metrics" }
diff --git a/pageserver/src/basebackup.rs b/pageserver/src/basebackup.rs
index 5389d609a5..def815a32d 100644
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -13,6 +13,7 @@
 use anyhow::Result;
 use bytes::{BufMut, BytesMut};
 use log::*;
+use std::fmt::Write as FmtWrite;
 use std::io;
 use std::io::Write;
 use std::sync::Arc;
@@ -31,7 +32,7 @@ use zenith_utils::lsn::Lsn;
 pub struct Basebackup<'a> {
     ar: Builder<&'a mut dyn Write>,
     timeline: &'a Arc<dyn Timeline>,
-    lsn: Lsn,
+    pub lsn: Lsn,
     prev_record_lsn: Lsn,
 }
 
@@ -83,7 +84,7 @@ impl<'a> Basebackup<'a> {
 
         info!(
             "taking basebackup lsn={}, prev_lsn={}",
-            backup_prev, backup_lsn
+            backup_lsn, backup_prev
         );
 
         Ok(Basebackup {
@@ -97,7 +98,6 @@ impl<'a> Basebackup<'a> {
     pub fn send_tarball(&mut self) -> anyhow::Result<()> {
         // Create pgdata subdirs structure
         for dir in pg_constants::PGDATA_SUBDIRS.iter() {
-            info!("send subdir {:?}", *dir);
             let header = new_tar_header_dir(*dir)?;
             self.ar.append(&header, &mut io::empty())?;
         }
@@ -249,13 +249,7 @@ impl<'a> Basebackup<'a> {
         let mut pg_control = ControlFileData::decode(&pg_control_bytes)?;
         let mut checkpoint = CheckPoint::decode(&checkpoint_bytes)?;
 
-        // Generate new pg_control and WAL needed for bootstrap
-        let checkpoint_segno = self.lsn.segment_number(pg_constants::WAL_SEGMENT_SIZE);
-        let checkpoint_lsn = XLogSegNoOffsetToRecPtr(
-            checkpoint_segno,
-            XLOG_SIZE_OF_XLOG_LONG_PHD as u32,
-            pg_constants::WAL_SEGMENT_SIZE,
-        );
+        // Generate new pg_control needed for bootstrap
         checkpoint.redo = normalize_lsn(self.lsn, pg_constants::WAL_SEGMENT_SIZE).0;
 
         //reset some fields we don't want to preserve
@@ -264,19 +258,24 @@ impl<'a> Basebackup<'a> {
         checkpoint.oldestActiveXid = 0;
 
         //save new values in pg_control
-        pg_control.checkPoint = checkpoint_lsn;
+        pg_control.checkPoint = 0;
         pg_control.checkPointCopy = checkpoint;
         pg_control.state = pg_constants::DB_SHUTDOWNED;
 
         // add zenith.signal file
-        let xl_prev = if self.prev_record_lsn == Lsn(0) {
-            0xBAD0 // magic value to indicate that we don't know prev_lsn
+        let mut zenith_signal = String::new();
+        if self.prev_record_lsn == Lsn(0) {
+            if self.lsn == self.timeline.get_ancestor_lsn() {
+                write!(zenith_signal, "PREV LSN: none")?;
+            } else {
+                write!(zenith_signal, "PREV LSN: invalid")?;
+            }
         } else {
-            self.prev_record_lsn.0
-        };
+            write!(zenith_signal, "PREV LSN: {}", self.prev_record_lsn)?;
+        }
         self.ar.append(
-            &new_tar_header("zenith.signal", 8)?,
-            &xl_prev.to_le_bytes()[..],
+            &new_tar_header("zenith.signal", zenith_signal.len() as u64)?,
+            zenith_signal.as_bytes(),
         )?;
 
         //send pg_control
@@ -285,14 +284,15 @@ impl<'a> Basebackup<'a> {
         self.ar.append(&header, &pg_control_bytes[..])?;
 
         //send wal segment
+        let segno = self.lsn.segment_number(pg_constants::WAL_SEGMENT_SIZE);
         let wal_file_name = XLogFileName(
             1, // FIXME: always use Postgres timeline 1
-            checkpoint_segno,
+            segno,
             pg_constants::WAL_SEGMENT_SIZE,
         );
         let wal_file_path = format!("pg_wal/{}", wal_file_name);
         let header = new_tar_header(&wal_file_path, pg_constants::WAL_SEGMENT_SIZE as u64)?;
-        let wal_seg = generate_wal_segment(&pg_control);
+        let wal_seg = generate_wal_segment(segno, pg_control.system_identifier);
         assert!(wal_seg.len() == pg_constants::WAL_SEGMENT_SIZE);
         self.ar.append(&header, &wal_seg[..])?;
         Ok(())
diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs
index c763f98a7f..3a577476dc 100644
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -2,8 +2,6 @@
 // Main entry point for the Page Server executable
 //
 
-use log::*;
-use pageserver::defaults::*;
 use serde::{Deserialize, Serialize};
 use std::{
     env,
@@ -12,27 +10,33 @@ use std::{
     str::FromStr,
     thread,
 };
+use tracing::*;
 use zenith_utils::{auth::JwtAuth, logging, postgres_backend::AuthType};
 
 use anyhow::{bail, ensure, Context, Result};
+use signal_hook::consts::signal::*;
+use signal_hook::consts::TERM_SIGNALS;
+use signal_hook::flag;
+use signal_hook::iterator::exfiltrator::WithOrigin;
+use signal_hook::iterator::SignalsInfo;
+use std::process::exit;
+use std::sync::atomic::AtomicBool;
+use std::sync::Arc;
+
 use clap::{App, Arg, ArgMatches};
 use daemonize::Daemonize;
 
 use pageserver::{
-    branches,
-    defaults::{
-        DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_PG_LISTEN_ADDR,
-        DEFAULT_RELISH_STORAGE_MAX_CONCURRENT_SYNC_LIMITS,
-    },
-    http, page_service, relish_storage, tenant_mgr, PageServerConf, RelishStorageConfig,
-    RelishStorageKind, S3Config, LOG_FILE_NAME,
+    branches, defaults::*, http, page_service, relish_storage, tenant_mgr, PageServerConf,
+    RelishStorageConfig, RelishStorageKind, S3Config, LOG_FILE_NAME,
 };
 use zenith_utils::http::endpoint;
+use zenith_utils::postgres_backend;
 
 use const_format::formatcp;
 
 /// String arguments that can be declared via CLI or config file
-#[derive(Serialize, Deserialize)]
+#[derive(Serialize, Deserialize, PartialEq, Eq, Clone)]
 struct CfgFileParams {
     listen_pg_addr: Option<String>,
     listen_http_addr: Option<String>,
@@ -43,12 +47,21 @@ struct CfgFileParams {
     pg_distrib_dir: Option<String>,
     auth_validation_public_key_path: Option<String>,
     auth_type: Option<String>,
-    // see https://github.com/alexcrichton/toml-rs/blob/6c162e6562c3e432bf04c82a3d1d789d80761a86/examples/enum_external.rs for enum deserialisation examples
-    relish_storage: Option<RelishStorage>,
     relish_storage_max_concurrent_sync: Option<String>,
+    /////////////////////////////////
+    //// Don't put `Option<String>` and other "simple" values below.
+    ////
+    /// `Option<RelishStorage>` is a <a href='https://toml.io/en/v1.0.0#table'>table</a> in TOML.
+    /// Values in TOML cannot be defined after tables (other tables can),
+    /// and [`toml`] crate serializes all fields in the order of their appearance.
+    ////////////////////////////////
+    relish_storage: Option<RelishStorage>,
 }
 
-#[derive(Serialize, Deserialize, Clone)]
+#[derive(Serialize, Deserialize, PartialEq, Eq, Clone)]
+// Without this attribute, enums with values won't be serialized by the `toml` library (but can be deserialized nonetheless!).
+// See https://github.com/alexcrichton/toml-rs/blob/6c162e6562c3e432bf04c82a3d1d789d80761a86/examples/enum_external.rs for the examples
+#[serde(untagged)]
 enum RelishStorage {
     Local {
         local_path: String,
@@ -447,7 +460,18 @@ fn main() -> Result<()> {
 
 fn start_pageserver(conf: &'static PageServerConf) -> Result<()> {
     // Initialize logger
-    let (_scope_guard, log_file) = logging::init(LOG_FILE_NAME, conf.daemonize)?;
+    let log_file = logging::init(LOG_FILE_NAME, conf.daemonize)?;
+
+    let term_now = Arc::new(AtomicBool::new(false));
+    for sig in TERM_SIGNALS {
+        // When terminated by a second term signal, exit with exit code 1.
+        // This will do nothing the first time (because term_now is false).
+        flag::register_conditional_shutdown(*sig, 1, Arc::clone(&term_now))?;
+        // But this will "arm" the above for the second time, by setting it to true.
+        // The order of registering these is important, if you put this one first, it will
+        // first arm and then terminate ‒ all in the first round.
+        flag::register(*sig, Arc::clone(&term_now))?;
+    }
 
     // TODO: Check that it looks like a valid repository before going further
 
@@ -480,7 +504,7 @@ fn start_pageserver(conf: &'static PageServerConf) -> Result<()> {
 
         match daemonize.start() {
             Ok(_) => info!("Success, daemonized"),
-            Err(e) => error!("could not daemonize: {:#}", e),
+            Err(err) => error!(%err, "could not daemonize"),
         }
     }
 
@@ -525,13 +549,173 @@ fn start_pageserver(conf: &'static PageServerConf) -> Result<()> {
             page_service::thread_main(conf, auth, pageserver_listener, conf.auth_type)
         })?;
 
-    join_handles.push(page_service_thread);
+    for info in SignalsInfo::<WithOrigin>::new(TERM_SIGNALS)?.into_iter() {
+        match info.signal {
+            SIGQUIT => {
+                info!("Got SIGQUIT. Terminate pageserver in immediate shutdown mode");
+                exit(111);
+            }
+            SIGINT | SIGTERM => {
+                info!("Got SIGINT/SIGTERM. Terminate gracefully in fast shutdown mode");
+                // Terminate postgres backends
+                postgres_backend::set_pgbackend_shutdown_requested();
+                // Stop all tenants and flush their data
+                tenant_mgr::shutdown_all_tenants()?;
+                // Wait for pageservice thread to complete the job
+                page_service_thread
+                    .join()
+                    .expect("thread panicked")
+                    .expect("thread exited with an error");
 
-    for handle in join_handles.into_iter() {
-        handle
-            .join()
-            .expect("thread panicked")
-            .expect("thread exited with an error")
+                // Shut down http router
+                endpoint::shutdown();
+
+                // Wait for all threads
+                for handle in join_handles.into_iter() {
+                    handle
+                        .join()
+                        .expect("thread panicked")
+                        .expect("thread exited with an error");
+                }
+                info!("Pageserver shut down successfully completed");
+                exit(0);
+            }
+            unknown_signal => {
+                debug!("Unknown signal {}", unknown_signal);
+            }
+        }
     }
+
     Ok(())
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn page_server_conf_toml_serde() {
+        let params = CfgFileParams {
+            listen_pg_addr: Some("listen_pg_addr_VALUE".to_string()),
+            listen_http_addr: Some("listen_http_addr_VALUE".to_string()),
+            checkpoint_distance: Some("checkpoint_distance_VALUE".to_string()),
+            checkpoint_period: Some("checkpoint_period_VALUE".to_string()),
+            gc_horizon: Some("gc_horizon_VALUE".to_string()),
+            gc_period: Some("gc_period_VALUE".to_string()),
+            pg_distrib_dir: Some("pg_distrib_dir_VALUE".to_string()),
+            auth_validation_public_key_path: Some(
+                "auth_validation_public_key_path_VALUE".to_string(),
+            ),
+            auth_type: Some("auth_type_VALUE".to_string()),
+            relish_storage: Some(RelishStorage::Local {
+                local_path: "relish_storage_local_VALUE".to_string(),
+            }),
+            relish_storage_max_concurrent_sync: Some(
+                "relish_storage_max_concurrent_sync_VALUE".to_string(),
+            ),
+        };
+
+        let toml_string = toml::to_string(&params).expect("Failed to serialize correct config");
+        let toml_pretty_string =
+            toml::to_string_pretty(&params).expect("Failed to serialize correct config");
+        assert_eq!(
+            r#"listen_pg_addr = 'listen_pg_addr_VALUE'
+listen_http_addr = 'listen_http_addr_VALUE'
+checkpoint_distance = 'checkpoint_distance_VALUE'
+checkpoint_period = 'checkpoint_period_VALUE'
+gc_horizon = 'gc_horizon_VALUE'
+gc_period = 'gc_period_VALUE'
+pg_distrib_dir = 'pg_distrib_dir_VALUE'
+auth_validation_public_key_path = 'auth_validation_public_key_path_VALUE'
+auth_type = 'auth_type_VALUE'
+relish_storage_max_concurrent_sync = 'relish_storage_max_concurrent_sync_VALUE'
+
+[relish_storage]
+local_path = 'relish_storage_local_VALUE'
+"#,
+            toml_pretty_string
+        );
+
+        let params_from_serialized: CfgFileParams = toml::from_str(&toml_string)
+            .expect("Failed to deserialize the serialization result of the config");
+        let params_from_serialized_pretty: CfgFileParams = toml::from_str(&toml_pretty_string)
+            .expect("Failed to deserialize the prettified serialization result of the config");
+        assert!(
+            params_from_serialized == params,
+            "Expected the same config in the end of config -> serialize -> deserialize chain"
+        );
+        assert!(
+            params_from_serialized_pretty == params,
+            "Expected the same config in the end of config -> serialize pretty -> deserialize chain"
+        );
+    }
+
+    #[test]
+    fn credentials_omitted_during_serialization() {
+        let params = CfgFileParams {
+            listen_pg_addr: Some("listen_pg_addr_VALUE".to_string()),
+            listen_http_addr: Some("listen_http_addr_VALUE".to_string()),
+            checkpoint_distance: Some("checkpoint_distance_VALUE".to_string()),
+            checkpoint_period: Some("checkpoint_period_VALUE".to_string()),
+            gc_horizon: Some("gc_horizon_VALUE".to_string()),
+            gc_period: Some("gc_period_VALUE".to_string()),
+            pg_distrib_dir: Some("pg_distrib_dir_VALUE".to_string()),
+            auth_validation_public_key_path: Some(
+                "auth_validation_public_key_path_VALUE".to_string(),
+            ),
+            auth_type: Some("auth_type_VALUE".to_string()),
+            relish_storage: Some(RelishStorage::AwsS3 {
+                bucket_name: "bucket_name_VALUE".to_string(),
+                bucket_region: "bucket_region_VALUE".to_string(),
+                access_key_id: Some("access_key_id_VALUE".to_string()),
+                secret_access_key: Some("secret_access_key_VALUE".to_string()),
+            }),
+            relish_storage_max_concurrent_sync: Some(
+                "relish_storage_max_concurrent_sync_VALUE".to_string(),
+            ),
+        };
+
+        let toml_string = toml::to_string(&params).expect("Failed to serialize correct config");
+        let toml_pretty_string =
+            toml::to_string_pretty(&params).expect("Failed to serialize correct config");
+        assert_eq!(
+            r#"listen_pg_addr = 'listen_pg_addr_VALUE'
+listen_http_addr = 'listen_http_addr_VALUE'
+checkpoint_distance = 'checkpoint_distance_VALUE'
+checkpoint_period = 'checkpoint_period_VALUE'
+gc_horizon = 'gc_horizon_VALUE'
+gc_period = 'gc_period_VALUE'
+pg_distrib_dir = 'pg_distrib_dir_VALUE'
+auth_validation_public_key_path = 'auth_validation_public_key_path_VALUE'
+auth_type = 'auth_type_VALUE'
+relish_storage_max_concurrent_sync = 'relish_storage_max_concurrent_sync_VALUE'
+
+[relish_storage]
+bucket_name = 'bucket_name_VALUE'
+bucket_region = 'bucket_region_VALUE'
+"#,
+            toml_pretty_string
+        );
+
+        let params_from_serialized: CfgFileParams = toml::from_str(&toml_string)
+            .expect("Failed to deserialize the serialization result of the config");
+        let params_from_serialized_pretty: CfgFileParams = toml::from_str(&toml_pretty_string)
+            .expect("Failed to deserialize the prettified serialization result of the config");
+
+        let mut expected_params = params;
+        expected_params.relish_storage = Some(RelishStorage::AwsS3 {
+            bucket_name: "bucket_name_VALUE".to_string(),
+            bucket_region: "bucket_region_VALUE".to_string(),
+            access_key_id: None,
+            secret_access_key: None,
+        });
+        assert!(
+            params_from_serialized == expected_params,
+            "Expected the config without credentials in the end of a 'config -> serialize -> deserialize' chain"
+        );
+        assert!(
+            params_from_serialized_pretty == expected_params,
+            "Expected the config without credentials in the end of a 'config -> serialize pretty -> deserialize' chain"
+        );
+    }
+}
diff --git a/pageserver/src/branches.rs b/pageserver/src/branches.rs
index 57adf479ca..15e56b18e5 100644
--- a/pageserver/src/branches.rs
+++ b/pageserver/src/branches.rs
@@ -14,12 +14,12 @@ use std::{
     str::FromStr,
     sync::Arc,
 };
-use zenith_utils::zid::{ZTenantId, ZTimelineId};
+use tracing::*;
 
-use log::*;
 use zenith_utils::crashsafe_dir;
 use zenith_utils::logging;
 use zenith_utils::lsn::Lsn;
+use zenith_utils::zid::{ZTenantId, ZTimelineId};
 
 use crate::tenant_mgr;
 use crate::walredo::WalRedoManager;
@@ -100,7 +100,7 @@ pub struct PointInTime {
 pub fn init_pageserver(conf: &'static PageServerConf, create_tenant: Option<&str>) -> Result<()> {
     // Initialize logger
     // use true as daemonize parameter because otherwise we pollute zenith cli output with a few pages long output of info messages
-    let (_scope_guard, _log_file) = logging::init(LOG_FILE_NAME, true)?;
+    let _log_file = logging::init(LOG_FILE_NAME, true)?;
 
     // We don't use the real WAL redo manager, because we don't want to spawn the WAL redo
     // process during repository initialization.
@@ -176,13 +176,16 @@ fn get_lsn_from_controlfile(path: &Path) -> Result<Lsn> {
 // to get bootstrap data for timeline initialization.
 //
 fn run_initdb(conf: &'static PageServerConf, initdbpath: &Path) -> Result<()> {
-    info!("running initdb... ");
+    info!("running initdb in {}... ", initdbpath.display());
 
     let initdb_path = conf.pg_bin_dir().join("initdb");
     let initdb_output = Command::new(initdb_path)
         .args(&["-D", initdbpath.to_str().unwrap()])
         .args(&["-U", &conf.superuser])
         .arg("--no-instructions")
+        // This is only used for a temporary installation that is deleted shortly after,
+        // so no need to fsync it
+        .arg("--no-sync")
         .env_clear()
         .env("LD_LIBRARY_PATH", conf.pg_lib_dir().to_str().unwrap())
         .env("DYLD_LIBRARY_PATH", conf.pg_lib_dir().to_str().unwrap())
@@ -195,7 +198,6 @@ fn run_initdb(conf: &'static PageServerConf, initdbpath: &Path) -> Result<()> {
             String::from_utf8_lossy(&initdb_output.stderr)
         );
     }
-    info!("initdb succeeded");
 
     Ok(())
 }
@@ -210,6 +212,8 @@ fn bootstrap_timeline(
     tli: ZTimelineId,
     repo: &dyn Repository,
 ) -> Result<()> {
+    let _enter = info_span!("bootstrapping", timeline = %tli, tenant = %tenantid).entered();
+
     let initdb_path = conf.tenant_path(&tenantid).join("tmp");
 
     // Init temporarily repo to get bootstrap data
@@ -218,14 +222,12 @@ fn bootstrap_timeline(
 
     let lsn = get_lsn_from_controlfile(&pgdata_path)?.align();
 
-    info!("bootstrap_timeline {:?} at lsn {}", pgdata_path, lsn);
-
     // Import the contents of the data directory at the initial checkpoint
     // LSN, and any WAL after that.
     let timeline = repo.create_empty_timeline(tli)?;
     restore_local_repo::import_timeline_from_postgres_datadir(
         &pgdata_path,
-        timeline.as_ref(),
+        timeline.writer().as_ref(),
         lsn,
     )?;
     timeline.checkpoint()?;
diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs
index cd6b84b22f..cacb98ec84 100644
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -1,4 +1,3 @@
-use std::str::FromStr;
 use std::sync::Arc;
 
 use anyhow::Result;
@@ -6,6 +5,7 @@ use hyper::header;
 use hyper::StatusCode;
 use hyper::{Body, Request, Response, Uri};
 use routerify::{ext::RequestExt, RouterBuilder};
+use tracing::*;
 use zenith_utils::auth::JwtAuth;
 use zenith_utils::http::endpoint::attach_openapi_ui;
 use zenith_utils::http::endpoint::auth_middleware;
@@ -15,6 +15,8 @@ use zenith_utils::http::{
     endpoint,
     error::HttpErrorBody,
     json::{json_request, json_response},
+    request::get_request_param,
+    request::parse_request_param,
 };
 
 use super::models::BranchCreateRequest;
@@ -56,33 +58,6 @@ fn get_config(request: &Request<Body>) -> &'static PageServerConf {
     get_state(request).conf
 }
 
-fn get_request_param<'a>(
-    request: &'a Request<Body>,
-    param_name: &str,
-) -> Result<&'a str, ApiError> {
-    match request.param(param_name) {
-        Some(arg) => Ok(arg),
-        None => {
-            return Err(ApiError::BadRequest(format!(
-                "no {} specified in path param",
-                param_name
-            )))
-        }
-    }
-}
-
-fn parse_request_param<T: FromStr>(
-    request: &Request<Body>,
-    param_name: &str,
-) -> Result<T, ApiError> {
-    match get_request_param(request, param_name)?.parse() {
-        Ok(v) => Ok(v),
-        Err(_) => Err(ApiError::BadRequest(
-            "failed to parse tenant id".to_string(),
-        )),
-    }
-}
-
 // healthcheck handler
 async fn status_handler(_: Request<Body>) -> Result<Response<Body>, ApiError> {
     Ok(Response::builder()
@@ -98,6 +73,7 @@ async fn branch_create_handler(mut request: Request<Body>) -> Result<Response<Bo
     check_permission(&request, Some(request_data.tenant_id))?;
 
     let response_data = tokio::task::spawn_blocking(move || {
+        let _enter = info_span!("/branch_create", name = %request_data.name, tenant = %request_data.tenant_id, startpoint=%request_data.start_point).entered();
         branches::create_branch(
             get_config(&request),
             &request_data.name,
@@ -116,6 +92,7 @@ async fn branch_list_handler(request: Request<Body>) -> Result<Response<Body>, A
     check_permission(&request, Some(tenantid))?;
 
     let response_data = tokio::task::spawn_blocking(move || {
+        let _enter = info_span!("branch_list", tenant = %tenantid).entered();
         crate::branches::get_branches(get_config(&request), &tenantid)
     })
     .await
@@ -126,11 +103,12 @@ async fn branch_list_handler(request: Request<Body>) -> Result<Response<Body>, A
 // TODO add to swagger
 async fn branch_detail_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
     let tenantid: ZTenantId = parse_request_param(&request, "tenant_id")?;
-    let branch_name: &str = get_request_param(&request, "branch_name")?;
+    let branch_name: String = get_request_param(&request, "branch_name")?.to_string();
     let conf = get_state(&request).conf;
-    let path = conf.branch_path(branch_name, &tenantid);
+    let path = conf.branch_path(&branch_name, &tenantid);
 
     let response_data = tokio::task::spawn_blocking(move || {
+        let _enter = info_span!("branch_detail", tenant = %tenantid, branch=%branch_name).entered();
         let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
         BranchInfo::from_path(path, conf, &tenantid, &repo)
     })
@@ -144,10 +122,13 @@ async fn tenant_list_handler(request: Request<Body>) -> Result<Response<Body>, A
     // check for management permission
     check_permission(&request, None)?;
 
-    let response_data =
-        tokio::task::spawn_blocking(move || crate::branches::get_tenants(get_config(&request)))
-            .await
-            .map_err(ApiError::from_err)??;
+    let response_data = tokio::task::spawn_blocking(move || {
+        let _enter = info_span!("tenant_list").entered();
+        crate::branches::get_tenants(get_config(&request))
+    })
+    .await
+    .map_err(ApiError::from_err)??;
+
     Ok(json_response(StatusCode::OK, response_data)?)
 }
 
@@ -158,6 +139,7 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
     let request_data: TenantCreateRequest = json_request(&mut request).await?;
 
     let response_data = tokio::task::spawn_blocking(move || {
+        let _enter = info_span!("tenant_create", tenant = %request_data.tenant_id).entered();
         tenant_mgr::create_repository_for_tenant(get_config(&request), request_data.tenant_id)
     })
     .await
diff --git a/pageserver/src/layered_repository.rs b/pageserver/src/layered_repository.rs
index 1a7216c1c0..c78da6dfc5 100644
--- a/pageserver/src/layered_repository.rs
+++ b/pageserver/src/layered_repository.rs
@@ -15,9 +15,9 @@ use anyhow::{anyhow, bail, ensure, Context, Result};
 use bookfile::Book;
 use bytes::Bytes;
 use lazy_static::lazy_static;
-use log::*;
 use postgres_ffi::pg_constants::BLCKSZ;
 use serde::{Deserialize, Serialize};
+use tracing::*;
 
 use std::collections::hash_map::Entry;
 use std::collections::HashMap;
@@ -26,16 +26,18 @@ use std::convert::TryInto;
 use std::fs;
 use std::fs::{File, OpenOptions};
 use std::io::Write;
-use std::ops::Bound::Included;
+use std::ops::{Bound::Included, Deref};
 use std::path::{Path, PathBuf};
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::{Arc, Mutex, MutexGuard};
+use std::thread::JoinHandle;
 use std::time::{Duration, Instant};
 
-use crate::layered_repository::inmemory_layer::FreezeLayers;
 use crate::relish::*;
 use crate::relish_storage::schedule_timeline_upload;
-use crate::repository::{GcResult, Repository, Timeline, WALRecord};
+use crate::repository::{GcResult, Repository, Timeline, TimelineWriter, WALRecord};
+use crate::tenant_mgr;
+use crate::walreceiver;
 use crate::walreceiver::IS_WAL_RECEIVER;
 use crate::walredo::WalRedoManager;
 use crate::PageServerConf;
@@ -57,6 +59,7 @@ mod image_layer;
 mod inmemory_layer;
 mod interval_tree;
 mod layer_map;
+mod page_versions;
 mod storage_layer;
 
 use delta_layer::DeltaLayer;
@@ -68,8 +71,6 @@ use storage_layer::{
     Layer, PageReconstructData, PageReconstructResult, SegmentTag, RELISH_SEG_SIZE,
 };
 
-use self::inmemory_layer::{NonWriteableError, WriteResult};
-
 static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; 8192]);
 
 // Timeout when waiting for WAL receiver to catch up to an LSN given in a GetPage@LSN call.
@@ -110,6 +111,9 @@ lazy_static! {
     .expect("failed to define a metric");
 }
 
+/// The name of the metadata file pageserver creates per timeline.
+pub const METADATA_FILE_NAME: &str = "metadata";
+
 ///
 /// Repository consists of multiple timelines. Keep them in a hash table.
 ///
@@ -211,6 +215,23 @@ impl Repository for LayeredRepository {
                 self.gc_iteration_internal(target_timelineid, horizon, checkpoint_before_gc)
             })
     }
+
+    // Wait for all threads to complete and persist repository data before pageserver shutdown.
+    fn shutdown(&self) -> Result<()> {
+        trace!("LayeredRepository shutdown for tenant {}", self.tenantid);
+
+        let timelines = self.timelines.lock().unwrap();
+        for (timelineid, timeline) in timelines.iter() {
+            walreceiver::stop_wal_receiver(*timelineid);
+            // Wait for syncing data to disk
+            trace!("repo shutdown. checkpoint timeline {}", timelineid);
+            timeline.checkpoint()?;
+
+            //TODO Wait for walredo process to shutdown too
+        }
+
+        Ok(())
+    }
 }
 
 /// Private functions
@@ -239,6 +260,10 @@ impl LayeredRepository {
                     None
                 };
 
+                let _enter =
+                    info_span!("loading timeline", timeline = %timelineid, tenant = %self.tenantid)
+                        .entered();
+
                 let mut timeline = LayeredTimeline::new(
                     self.conf,
                     metadata,
@@ -251,7 +276,16 @@ impl LayeredRepository {
                 )?;
 
                 // List the layers on disk, and load them into the layer map
-                timeline.load_layer_map(disk_consistent_lsn)?;
+                let _loaded_layers = timeline.load_layer_map(disk_consistent_lsn)?;
+                if self.upload_relishes {
+                    schedule_timeline_upload(());
+                    // schedule_timeline_upload(
+                    //     self.tenantid,
+                    //     timelineid,
+                    //     loaded_layers,
+                    //     disk_consistent_lsn,
+                    // );
+                }
 
                 // needs to be after load_layer_map
                 timeline.init_current_logical_size()?;
@@ -281,21 +315,24 @@ impl LayeredRepository {
     ///
     /// Launch the checkpointer thread in given repository.
     ///
-    pub fn launch_checkpointer_thread(conf: &'static PageServerConf, rc: Arc<LayeredRepository>) {
-        let _thread = std::thread::Builder::new()
+    pub fn launch_checkpointer_thread(
+        conf: &'static PageServerConf,
+        rc: Arc<LayeredRepository>,
+    ) -> JoinHandle<()> {
+        std::thread::Builder::new()
             .name("Checkpointer thread".into())
             .spawn(move || {
                 // FIXME: relaunch it? Panic is not good.
                 rc.checkpoint_loop(conf).expect("Checkpointer thread died");
             })
-            .unwrap();
+            .unwrap()
     }
 
     ///
     /// Checkpointer thread's main loop
     ///
     fn checkpoint_loop(&self, conf: &'static PageServerConf) -> Result<()> {
-        loop {
+        while !tenant_mgr::shutdown_requested() {
             std::thread::sleep(conf.checkpoint_period);
             info!("checkpointer thread for tenant {} waking up", self.tenantid);
 
@@ -303,44 +340,60 @@ impl LayeredRepository {
             // bytes of WAL since last checkpoint.
             {
                 let timelines = self.timelines.lock().unwrap();
-                for (_timelineid, timeline) in timelines.iter() {
+                for (timelineid, timeline) in timelines.iter() {
+                    let _entered =
+                        info_span!("checkpoint", timeline = %timelineid, tenant = %self.tenantid)
+                            .entered();
+
                     STORAGE_TIME
                         .with_label_values(&["checkpoint_timed"])
                         .observe_closure_duration(|| {
-                            timeline.checkpoint_internal(conf.checkpoint_distance)
+                            timeline.checkpoint_internal(conf.checkpoint_distance, false)
                         })?
                 }
                 // release lock on 'timelines'
             }
         }
+        trace!("Checkpointer thread shut down");
+        Ok(())
     }
 
     ///
     /// Launch the GC thread in given repository.
     ///
-    pub fn launch_gc_thread(conf: &'static PageServerConf, rc: Arc<LayeredRepository>) {
-        let _thread = std::thread::Builder::new()
+    pub fn launch_gc_thread(
+        conf: &'static PageServerConf,
+        rc: Arc<LayeredRepository>,
+    ) -> JoinHandle<()> {
+        std::thread::Builder::new()
             .name("GC thread".into())
             .spawn(move || {
                 // FIXME: relaunch it? Panic is not good.
                 rc.gc_loop(conf).expect("GC thread died");
             })
-            .unwrap();
+            .unwrap()
     }
 
     ///
     /// GC thread's main loop
     ///
     fn gc_loop(&self, conf: &'static PageServerConf) -> Result<()> {
-        loop {
-            std::thread::sleep(conf.gc_period);
-            info!("gc thread for tenant {} waking up", self.tenantid);
-
+        while !tenant_mgr::shutdown_requested() {
             // Garbage collect old files that are not needed for PITR anymore
             if conf.gc_horizon > 0 {
                 self.gc_iteration(None, conf.gc_horizon, false).unwrap();
             }
+
+            // TODO Write it in more adequate way using
+            // condvar.wait_timeout() or something
+            let mut sleep_time = conf.gc_period.as_secs();
+            while sleep_time > 0 && !tenant_mgr::shutdown_requested() {
+                sleep_time -= 1;
+                std::thread::sleep(Duration::from_secs(1));
+            }
+            info!("gc thread for tenant {} waking up", self.tenantid);
         }
+        Ok(())
     }
 
     /// Save timeline metadata to file
@@ -350,17 +403,15 @@ impl LayeredRepository {
         tenantid: ZTenantId,
         data: &TimelineMetadata,
         first_save: bool,
-    ) -> Result<PathBuf> {
-        let timeline_path = conf.timeline_path(&timelineid, &tenantid);
-        let path = timeline_path.join("metadata");
+    ) -> Result<()> {
+        let _enter = info_span!("saving metadata").entered();
+        let path = metadata_path(conf, timelineid, tenantid);
         // use OpenOptions to ensure file presence is consistent with first_save
         let mut file = OpenOptions::new()
             .write(true)
             .create_new(first_save)
             .open(&path)?;
 
-        info!("saving metadata {}", path.display());
-
         let mut metadata_bytes = TimelineMetadata::ser(data)?;
 
         assert!(metadata_bytes.len() <= METADATA_MAX_DATA_SIZE);
@@ -376,11 +427,15 @@ impl LayeredRepository {
 
         // fsync the parent directory to ensure the directory entry is durable
         if first_save {
-            let timeline_dir = File::open(&timeline_path)?;
+            let timeline_dir = File::open(
+                &path
+                    .parent()
+                    .expect("Metadata should always have a parent dir"),
+            )?;
             timeline_dir.sync_all()?;
         }
 
-        Ok(path)
+        Ok(())
     }
 
     fn load_metadata(
@@ -388,7 +443,7 @@ impl LayeredRepository {
         timelineid: ZTimelineId,
         tenantid: ZTenantId,
     ) -> Result<TimelineMetadata> {
-        let path = conf.timeline_path(&timelineid, &tenantid).join("metadata");
+        let path = metadata_path(conf, timelineid, tenantid);
         let metadata_bytes = std::fs::read(&path)?;
         ensure!(metadata_bytes.len() == METADATA_MAX_SAFE_SIZE);
 
@@ -468,7 +523,7 @@ impl LayeredRepository {
             let timeline = self.get_timeline_locked(*timelineid, &mut *timelines)?;
 
             if let Some(ancestor_timeline) = &timeline.ancestor_timeline {
-                // If target_timeline is specified, we only need to know branchpoints of its childs
+                // If target_timeline is specified, we only need to know branchpoints of its children
                 if let Some(timelineid) = target_timelineid {
                     if ancestor_timeline.timelineid == timelineid {
                         all_branchpoints
@@ -485,6 +540,10 @@ impl LayeredRepository {
         // Ok, we now know all the branch points.
         // Perform GC for each timeline.
         for timelineid in timelineids {
+            if tenant_mgr::shutdown_requested() {
+                return Ok(totals);
+            }
+
             // We have already loaded all timelines above
             // so this operation is just a quick map lookup.
             let timeline = self.get_timeline_locked(timelineid, &mut *timelines)?;
@@ -608,10 +667,21 @@ pub struct LayeredTimeline {
 
     /// If `true`, will backup its timeline files to remote storage after freezing.
     upload_relishes: bool,
+
+    /// Ensures layers aren't frozen by checkpointer between
+    /// [`LayeredTimeline::get_layer_for_write`] and layer reads.
+    /// Locked automatically by [`LayeredTimelineWriter`] and checkpointer.
+    /// Must always be acquired before the layer map/individual layer lock
+    /// to avoid deadlock.
+    write_lock: Mutex<()>,
 }
 
 /// Public interface functions
 impl Timeline for LayeredTimeline {
+    fn get_ancestor_lsn(&self) -> Lsn {
+        self.ancestor_lsn
+    }
+
     /// Wait until WAL has been received up to the given LSN.
     fn wait_lsn(&self, lsn: Lsn) -> Result<()> {
         // This should never be called from the WAL receiver thread, because that could lead
@@ -670,13 +740,7 @@ impl Timeline for LayeredTimeline {
             let segsize;
             if let Some((layer, lsn)) = self.get_layer_for_read(seg, lsn)? {
                 segsize = layer.get_seg_size(lsn)?;
-                trace!(
-                    "get_seg_size: {} at {}/{} -> {}",
-                    seg,
-                    self.timelineid,
-                    lsn,
-                    segsize
-                );
+                trace!("get_seg_size: {} at {} -> {}", seg, lsn, segsize);
             } else {
                 if segno == 0 {
                     return Ok(None);
@@ -778,138 +842,13 @@ impl Timeline for LayeredTimeline {
                 result.insert(new_relish);
                 trace!("List object {}", new_relish);
             } else {
-                trace!("Filter out droped object {}", new_relish);
+                trace!("Filtered out dropped object {}", new_relish);
             }
         }
 
         Ok(result)
     }
 
-    fn put_wal_record(&self, rel: RelishTag, blknum: u32, rec: WALRecord) -> Result<()> {
-        if !rel.is_blocky() && blknum != 0 {
-            bail!(
-                "invalid request for block {} for non-blocky relish {}",
-                blknum,
-                rel
-            );
-        }
-        ensure!(rec.lsn.is_aligned(), "unaligned record LSN");
-
-        let seg = SegmentTag::from_blknum(rel, blknum);
-        let delta_size = self.perform_write_op(seg, rec.lsn, |layer| {
-            layer.put_wal_record(blknum, rec.clone())
-        })?;
-        self.increase_current_logical_size(delta_size * BLCKSZ as u32);
-        Ok(())
-    }
-
-    fn put_truncation(&self, rel: RelishTag, lsn: Lsn, relsize: u32) -> anyhow::Result<()> {
-        if !rel.is_blocky() {
-            bail!("invalid truncation for non-blocky relish {}", rel);
-        }
-        ensure!(lsn.is_aligned(), "unaligned record LSN");
-
-        debug!("put_truncation: {} to {} blocks at {}", rel, relsize, lsn);
-
-        let oldsize = self
-            .get_relish_size(rel, self.get_last_record_lsn())?
-            .ok_or_else(|| {
-                anyhow!(
-                    "attempted to truncate non-existent relish {} at {}",
-                    rel,
-                    lsn
-                )
-            })?;
-
-        if oldsize <= relsize {
-            return Ok(());
-        }
-        let old_last_seg = (oldsize - 1) / RELISH_SEG_SIZE;
-
-        let last_remain_seg = if relsize == 0 {
-            0
-        } else {
-            (relsize - 1) / RELISH_SEG_SIZE
-        };
-
-        // Drop segments beyond the last remaining segment.
-        for remove_segno in (last_remain_seg + 1)..=old_last_seg {
-            let seg = SegmentTag {
-                rel,
-                segno: remove_segno,
-            };
-            self.perform_write_op(seg, lsn, |layer| layer.drop_segment(lsn))?;
-        }
-
-        // Truncate the last remaining segment to the specified size
-        if relsize == 0 || relsize % RELISH_SEG_SIZE != 0 {
-            let seg = SegmentTag {
-                rel,
-                segno: last_remain_seg,
-            };
-            self.perform_write_op(seg, lsn, |layer| {
-                layer.put_truncation(lsn, relsize % RELISH_SEG_SIZE)
-            })?;
-        }
-        self.decrease_current_logical_size((oldsize - relsize) * BLCKSZ as u32);
-        Ok(())
-    }
-
-    fn drop_relish(&self, rel: RelishTag, lsn: Lsn) -> Result<()> {
-        trace!("drop_segment: {} at {}", rel, lsn);
-
-        if rel.is_blocky() {
-            if let Some(oldsize) = self.get_relish_size(rel, self.get_last_record_lsn())? {
-                let old_last_seg = if oldsize == 0 {
-                    0
-                } else {
-                    (oldsize - 1) / RELISH_SEG_SIZE
-                };
-
-                // Drop all segments of the relish
-                for remove_segno in 0..=old_last_seg {
-                    let seg = SegmentTag {
-                        rel,
-                        segno: remove_segno,
-                    };
-                    self.perform_write_op(seg, lsn, |layer| layer.drop_segment(lsn))?;
-                }
-                self.decrease_current_logical_size(oldsize * BLCKSZ as u32);
-            } else {
-                warn!(
-                    "drop_segment called on non-existent relish {} at {}",
-                    rel, lsn
-                );
-            }
-        } else {
-            // TODO handle TwoPhase relishes
-            let seg = SegmentTag::from_blknum(rel, 0);
-            self.perform_write_op(seg, lsn, |layer| layer.drop_segment(lsn))?;
-        }
-
-        Ok(())
-    }
-
-    fn put_page_image(&self, rel: RelishTag, blknum: u32, lsn: Lsn, img: Bytes) -> Result<()> {
-        if !rel.is_blocky() && blknum != 0 {
-            bail!(
-                "invalid request for block {} for non-blocky relish {}",
-                blknum,
-                rel
-            );
-        }
-        ensure!(lsn.is_aligned(), "unaligned record LSN");
-
-        let seg = SegmentTag::from_blknum(rel, blknum);
-
-        let delta_size = self.perform_write_op(seg, lsn, |layer| {
-            layer.put_page_image(blknum, lsn, img.clone())
-        })?;
-
-        self.increase_current_logical_size(delta_size * BLCKSZ as u32);
-        Ok(())
-    }
-
     /// Public entry point for checkpoint(). All the logic is in the private
     /// checkpoint_internal function, this public facade just wraps it for
     /// metrics collection.
@@ -917,16 +856,7 @@ impl Timeline for LayeredTimeline {
         STORAGE_TIME
             .with_label_values(&["checkpoint_force"])
             //pass checkpoint_distance=0 to force checkpoint
-            .observe_closure_duration(|| self.checkpoint_internal(0))
-    }
-
-    ///
-    /// Remember the (end of) last valid WAL record remembered in the timeline.
-    ///
-    fn advance_last_record_lsn(&self, new_lsn: Lsn) {
-        assert!(new_lsn.is_aligned());
-
-        self.last_record_lsn.advance(new_lsn);
+            .observe_closure_duration(|| self.checkpoint_internal(0, true))
     }
 
     fn get_last_record_lsn(&self) -> Lsn {
@@ -956,6 +886,8 @@ impl Timeline for LayeredTimeline {
     fn get_current_logical_size_non_incremental(&self, lsn: Lsn) -> Result<usize> {
         let mut total_blocks: usize = 0;
 
+        let _enter = info_span!("calc logical size", %lsn).entered();
+
         // list of all relations in this timeline, including ancestor timelines
         let all_rels = self.list_rels(0, 0, lsn)?;
 
@@ -980,6 +912,13 @@ impl Timeline for LayeredTimeline {
 
     fn get_disk_consistent_lsn(&self) -> Lsn {
         self.disk_consistent_lsn.load()
+	}
+
+    fn writer<'a>(&'a self) -> Box<dyn TimelineWriter + 'a> {
+        Box::new(LayeredTimelineWriter {
+            tl: self,
+            _write_guard: self.write_lock.lock().unwrap(),
+        })
     }
 }
 
@@ -1021,26 +960,26 @@ impl LayeredTimeline {
             current_logical_size: AtomicUsize::new(current_logical_size),
             current_logical_size_gauge,
             upload_relishes,
+
+            write_lock: Mutex::new(()),
         };
         Ok(timeline)
     }
 
     ///
-    /// Scan the timeline directory to populate the layer map
+    /// Scan the timeline directory to populate the layer map.
+    /// Returns all timeline-related files that were found and loaded.
     ///
-    fn load_layer_map(&self, disk_consistent_lsn: Lsn) -> anyhow::Result<()> {
-        info!(
-            "loading layer map for timeline {} into memory",
-            self.timelineid
-        );
+    fn load_layer_map(&self, disk_consistent_lsn: Lsn) -> anyhow::Result<Vec<PathBuf>> {
         let mut layers = self.layers.lock().unwrap();
+        let mut num_layers = 0;
         let (imgfilenames, deltafilenames) =
             filename::list_files(self.conf, self.timelineid, self.tenantid)?;
 
         let timeline_path = self.conf.timeline_path(&self.timelineid, &self.tenantid);
-
+        let mut local_layers = Vec::with_capacity(imgfilenames.len() + deltafilenames.len());
         // First create ImageLayer structs for each image file.
-        for filename in imgfilenames.iter() {
+        for filename in &imgfilenames {
             if filename.lsn > disk_consistent_lsn {
                 warn!(
                     "found future image layer {} on timeline {}",
@@ -1053,17 +992,13 @@ impl LayeredTimeline {
 
             let layer = ImageLayer::new(self.conf, self.timelineid, self.tenantid, filename);
 
-            info!(
-                "found layer {} {} on timeline {}",
-                layer.get_seg_tag(),
-                layer.get_start_lsn(),
-                self.timelineid
-            );
+            trace!("found layer {}", layer.filename().display());
+            local_layers.push(layer.path());
             layers.insert_historic(Arc::new(layer));
+            num_layers += 1;
         }
 
-        // Then for the Delta files.
-        for filename in deltafilenames.iter() {
+        for filename in &deltafilenames {
             ensure!(filename.start_lsn < filename.end_lsn);
             if filename.end_lsn > disk_consistent_lsn {
                 warn!(
@@ -1077,15 +1012,14 @@ impl LayeredTimeline {
 
             let layer = DeltaLayer::new(self.conf, self.timelineid, self.tenantid, filename);
 
-            info!(
-                "found layer {} on timeline {}",
-                layer.filename().display(),
-                self.timelineid,
-            );
+            trace!("found layer {}", layer.filename().display());
+            local_layers.push(layer.path());
             layers.insert_historic(Arc::new(layer));
+            num_layers += 1;
         }
+        info!("loaded layer map with {} layers", num_layers);
 
-        Ok(())
+        Ok(local_layers)
     }
 
     ///
@@ -1134,12 +1068,7 @@ impl LayeredTimeline {
         lsn: Lsn,
         self_layers: &MutexGuard<LayerMap>,
     ) -> Result<Option<(Arc<dyn Layer>, Lsn)>> {
-        trace!(
-            "get_layer_for_read called for {} at {}/{}",
-            seg,
-            self.timelineid,
-            lsn
-        );
+        trace!("get_layer_for_read called for {} at {}", seg, lsn);
 
         // If you requested a page at an older LSN, before the branch point, dig into
         // the right ancestor timeline. This can only happen if you launch a read-only
@@ -1257,17 +1186,15 @@ impl LayeredTimeline {
                 // First modification on this timeline
                 start_lsn = self.ancestor_lsn + 1;
                 trace!(
-                    "creating layer for write for {} at branch point {}/{}",
+                    "creating layer for write for {} at branch point {}",
                     seg,
-                    self.timelineid,
                     start_lsn
                 );
             } else {
                 start_lsn = prev_layer.get_end_lsn();
                 trace!(
-                    "creating layer for write for {} after previous layer {}/{}",
+                    "creating layer for write for {} after previous layer {}",
                     seg,
-                    self.timelineid,
                     start_lsn
                 );
             }
@@ -1308,31 +1235,20 @@ impl LayeredTimeline {
     /// Flush to disk all data that was written with the put_* functions
     ///
     /// NOTE: This has nothing to do with checkpoint in PostgreSQL.
-    fn checkpoint_internal(&self, checkpoint_distance: u64) -> Result<()> {
-        // Grab lock on the layer map.
-        //
-        // TODO: We hold it locked throughout the checkpoint operation. That's bad,
-        // the checkpointing could take many seconds, and any incoming get_page_at_lsn()
-        // requests will block.
+    fn checkpoint_internal(&self, checkpoint_distance: u64, forced: bool) -> Result<()> {
+        let mut write_guard = self.write_lock.lock().unwrap();
         let mut layers = self.layers.lock().unwrap();
 
         // Bump the generation number in the layer map, so that we can distinguish
         // entries inserted after the checkpoint started
         let current_generation = layers.increment_generation();
 
-        // Read 'last_record_lsn'. That becomes the cutoff LSN for frozen layers.
         let RecordLsn {
             last: last_record_lsn,
             prev: prev_record_lsn,
         } = self.last_record_lsn.load();
 
-        trace!(
-            "checkpointing timeline {} at {}",
-            self.timelineid,
-            last_record_lsn
-        );
-
-        let timeline_dir = File::open(self.conf.timeline_path(&self.timelineid, &self.tenantid))?;
+        trace!("checkpoint starting at {}", last_record_lsn);
 
         // Take the in-memory layer with the oldest WAL record. If it's older
         // than the threshold, write it out to disk as a new image and delta file.
@@ -1346,10 +1262,14 @@ impl LayeredTimeline {
         let mut disk_consistent_lsn = last_record_lsn;
 
         let mut created_historics = false;
-
+        let mut layer_uploads = Vec::new();
         while let Some((oldest_layer, oldest_generation)) = layers.peek_oldest_open() {
             let oldest_pending_lsn = oldest_layer.get_oldest_pending_lsn();
 
+            if tenant_mgr::shutdown_requested() && !forced {
+                return Ok(());
+            }
+
             // Does this layer need freezing?
             //
             // Write out all in-memory layers that contain WAL older than CHECKPOINT_DISTANCE.
@@ -1372,32 +1292,24 @@ impl LayeredTimeline {
                 break;
             }
 
-            // Freeze the layer.
-            //
-            // This is a two-step process. First, we "freeze" the in-memory
-            // layer, to close it for new writes, and replace the original
-            // layer with the new frozen in-memory layer (and possibly a new
-            // open layer to hold changes newer than the cutoff.) Then we write
-            // the frozen layer to disk, and replace the in-memory frozen layer
-            // with the new on-disk layers.
-            let FreezeLayers {
-                frozen,
-                open: maybe_new_open,
-            } = oldest_layer.freeze(last_record_lsn)?;
+            // Mark the layer as no longer accepting writes and record the end_lsn.
+            // This happens in-place, no new layers are created now.
+            // We call `get_last_record_lsn` again, which may be different from the
+            // original load, as we may have released the write lock since then.
+            oldest_layer.freeze(self.get_last_record_lsn());
 
-            // replace this layer with the new layers that 'freeze' returned
+            // The layer is no longer open, update the layer map to reflect this.
+            // We will replace it with on-disk historics below.
             layers.pop_oldest_open();
-            if let Some(new_open) = maybe_new_open.clone() {
-                layers.insert_open(new_open);
-            }
-
-            // We temporarily insert InMemory layer into historic list here.
-            // TODO: check that all possible concurrent users of 'historic' treat it right
-            layers.insert_historic(frozen.clone());
+            layers.insert_historic(oldest_layer.clone());
 
             // Write the now-frozen layer to disk. That could take a while, so release the lock while do it
             drop(layers);
-            let new_historics = frozen.write_to_disk(self)?;
+            drop(write_guard);
+
+            let new_historics = oldest_layer.write_to_disk(self)?;
+
+            write_guard = self.write_lock.lock().unwrap();
             layers = self.layers.lock().unwrap();
 
             if !new_historics.is_empty() {
@@ -1405,11 +1317,16 @@ impl LayeredTimeline {
             }
 
             // Finally, replace the frozen in-memory layer with the new on-disk layers
-            layers.remove_historic(frozen.clone());
+            layers.remove_historic(oldest_layer);
 
             // Add the historics to the LayerMap
-            for n in new_historics {
-                layers.insert_historic(n);
+            for delta_layer in new_historics.delta_layers {
+                layer_uploads.push(delta_layer.path());
+                layers.insert_historic(Arc::new(delta_layer));
+            }
+            for image_layer in new_historics.image_layers {
+                layer_uploads.push(image_layer.path());
+                layers.insert_historic(Arc::new(image_layer));
             }
         }
 
@@ -1421,59 +1338,64 @@ impl LayeredTimeline {
         }
 
         drop(layers);
+        drop(write_guard);
 
         if created_historics {
             // We must fsync the timeline dir to ensure the directory entries for
             // new layer files are durable
+            let timeline_dir =
+                File::open(self.conf.timeline_path(&self.timelineid, &self.tenantid))?;
             timeline_dir.sync_all()?;
         }
 
-        // Save the metadata, with updated 'disk_consistent_lsn', to a
-        // file in the timeline dir. After crash, we will restart WAL
-        // streaming and processing from that point.
+        // If we were able to advance 'disk_consistent_lsn', save it the metadata file.
+        // After crash, we will restart WAL streaming and processing from that point.
+        let old_disk_consistent_lsn = self.disk_consistent_lsn.load();
+        if disk_consistent_lsn != old_disk_consistent_lsn {
+            assert!(disk_consistent_lsn > old_disk_consistent_lsn);
 
-        // We can only save a valid 'prev_record_lsn' value on disk if we
-        // flushed *all* in-memory changes to disk. We only track
-        // 'prev_record_lsn' in memory for the latest processed record, so we
-        // don't remember what the correct value that corresponds to some old
-        // LSN is. But if we flush everything, then the value corresponding
-        // current 'last_record_lsn' is correct and we can store it on disk.
-        let ondisk_prev_record_lsn = if disk_consistent_lsn == last_record_lsn {
-            Some(prev_record_lsn)
-        } else {
-            None
-        };
+            // We can only save a valid 'prev_record_lsn' value on disk if we
+            // flushed *all* in-memory changes to disk. We only track
+            // 'prev_record_lsn' in memory for the latest processed record, so we
+            // don't remember what the correct value that corresponds to some old
+            // LSN is. But if we flush everything, then the value corresponding
+            // current 'last_record_lsn' is correct and we can store it on disk.
+            let ondisk_prev_record_lsn = if disk_consistent_lsn == last_record_lsn {
+                Some(prev_record_lsn)
+            } else {
+                None
+            };
 
-        let ancestor_timelineid = self.ancestor_timeline.as_ref().map(|x| x.timelineid);
+            let ancestor_timelineid = self.ancestor_timeline.as_ref().map(|x| x.timelineid);
 
-        let metadata = TimelineMetadata {
-            disk_consistent_lsn,
-            prev_record_lsn: ondisk_prev_record_lsn,
-            ancestor_timeline: ancestor_timelineid,
-            ancestor_lsn: self.ancestor_lsn,
-        };
-        let _metadata_path = LayeredRepository::save_metadata(
-            self.conf,
-            self.timelineid,
-            self.tenantid,
-            &metadata,
-            false,
-        )?;
-        if self.upload_relishes {
-            schedule_timeline_upload(())
-            // schedule_timeline_upload(LocalTimeline {
-            //     tenant_id: self.tenantid,
-            //     timeline_id: self.timelineid,
-            //     metadata_path,
-            //     image_layers: image_layer_uploads,
-            //     delta_layers: delta_layer_uploads,
-            //     disk_consistent_lsn,
-            // });
+            let metadata = TimelineMetadata {
+                disk_consistent_lsn,
+                prev_record_lsn: ondisk_prev_record_lsn,
+                ancestor_timeline: ancestor_timelineid,
+                ancestor_lsn: self.ancestor_lsn,
+            };
+            LayeredRepository::save_metadata(
+                self.conf,
+                self.timelineid,
+                self.tenantid,
+                &metadata,
+                false,
+            )?;
+
+            // Also update the in-memory copy
+            self.disk_consistent_lsn.store(disk_consistent_lsn);
+
+            if self.upload_relishes {
+                schedule_timeline_upload(())
+                // schedule_timeline_upload(
+                //     self.tenantid,
+                //     self.timelineid,
+                //     layer_uploads,
+                //     disk_consistent_lsn,
+                // });
+            }
         }
 
-        // Also update the in-memory copy
-        self.disk_consistent_lsn.store(disk_consistent_lsn);
-
         Ok(())
     }
 
@@ -1503,11 +1425,11 @@ impl LayeredTimeline {
         let now = Instant::now();
         let mut result: GcResult = Default::default();
 
-        info!(
-            "running GC on timeline {}, cutoff {}",
-            self.timelineid, cutoff
-        );
-        info!("retain_lsns:  {:?}", retain_lsns);
+        let _enter = info_span!("garbage collection", timeline = %self.timelineid, tenant = %self.tenantid, cutoff = %cutoff).entered();
+
+        info!("GC starting");
+
+        debug!("retain_lsns: {:?}", retain_lsns);
 
         let mut layers_to_remove: Vec<Arc<dyn Layer>> = Vec::new();
 
@@ -1769,10 +1691,9 @@ impl LayeredTimeline {
         if data.records.is_empty() {
             if let Some(img) = &data.page_img {
                 trace!(
-                    "found page image for blk {} in {} at {}/{}, no WAL redo required",
+                    "found page image for blk {} in {} at {}, no WAL redo required",
                     blknum,
                     rel,
-                    self.timelineid,
                     request_lsn
                 );
                 Ok(img.clone())
@@ -1786,7 +1707,7 @@ impl LayeredTimeline {
             //
             // If we don't have a base image, then the oldest WAL record better initialize
             // the page
-            if data.page_img.is_none() && !data.records.first().unwrap().will_init {
+            if data.page_img.is_none() && !data.records.first().unwrap().1.will_init {
                 // FIXME: this ought to be an error?
                 warn!(
                     "Base image for page {}/{} at {} not found, but got {} WAL records",
@@ -1798,9 +1719,9 @@ impl LayeredTimeline {
                 Ok(ZERO_PAGE.clone())
             } else {
                 if data.page_img.is_some() {
-                    trace!("found {} WAL records and a base image for blk {} in {} at {}/{}, performing WAL redo", data.records.len(), blknum, rel, self.timelineid, request_lsn);
+                    trace!("found {} WAL records and a base image for blk {} in {} at {}, performing WAL redo", data.records.len(), blknum, rel, request_lsn);
                 } else {
-                    trace!("found {} WAL records that will init the page for blk {} in {} at {}/{}, performing WAL redo", data.records.len(), blknum, rel, self.timelineid, request_lsn);
+                    trace!("found {} WAL records that will init the page for blk {} in {} at {}, performing WAL redo", data.records.len(), blknum, rel, request_lsn);
                 }
                 let img = self.walredo_mgr.request_redo(
                     rel,
@@ -1848,36 +1769,163 @@ impl LayeredTimeline {
         self.current_logical_size_gauge
             .set(val as i64 - diff as i64);
     }
+}
 
-    /// If a layer is in the process of being replaced in [`LayerMap`], write
-    /// operations will fail with [`NonWriteableError`]. This may happen due to
-    /// a race: the checkpointer thread freezes a layer just after
-    /// [`Self::get_layer_for_write`] returned it. To handle this error, we try
-    /// again getting the layer and attempt the write.
-    fn perform_write_op<R>(
-        &self,
-        seg: SegmentTag,
-        lsn: Lsn,
-        write_op: impl Fn(&Arc<InMemoryLayer>) -> WriteResult<R>,
-    ) -> anyhow::Result<R> {
-        let mut layer = self.get_layer_for_write(seg, lsn)?;
-        loop {
-            match write_op(&layer) {
-                Ok(r) => return Ok(r),
-                Err(NonWriteableError {}) => {}
-            }
+struct LayeredTimelineWriter<'a> {
+    tl: &'a LayeredTimeline,
+    _write_guard: MutexGuard<'a, ()>,
+}
 
-            info!(
-                "attempted to write to non-writeable layer, retrying {} {}",
-                seg, lsn
+impl Deref for LayeredTimelineWriter<'_> {
+    type Target = dyn Timeline;
+
+    fn deref(&self) -> &Self::Target {
+        self.tl
+    }
+}
+
+impl<'a> TimelineWriter for LayeredTimelineWriter<'a> {
+    fn put_wal_record(&self, lsn: Lsn, rel: RelishTag, blknum: u32, rec: WALRecord) -> Result<()> {
+        if !rel.is_blocky() && blknum != 0 {
+            bail!(
+                "invalid request for block {} for non-blocky relish {}",
+                blknum,
+                rel
             );
-
-            // layer was non-writeable, try again
-            let new_layer = self.get_layer_for_write(seg, lsn)?;
-            // the new layer does not have to be writeable, but it should at least be different
-            assert!(!Arc::ptr_eq(&layer, &new_layer));
-            layer = new_layer;
         }
+        ensure!(lsn.is_aligned(), "unaligned record LSN");
+
+        let seg = SegmentTag::from_blknum(rel, blknum);
+        let layer = self.tl.get_layer_for_write(seg, lsn)?;
+        let delta_size = layer.put_wal_record(lsn, blknum, rec);
+        self.tl
+            .increase_current_logical_size(delta_size * BLCKSZ as u32);
+        Ok(())
+    }
+
+    fn put_page_image(&self, rel: RelishTag, blknum: u32, lsn: Lsn, img: Bytes) -> Result<()> {
+        if !rel.is_blocky() && blknum != 0 {
+            bail!(
+                "invalid request for block {} for non-blocky relish {}",
+                blknum,
+                rel
+            );
+        }
+        ensure!(lsn.is_aligned(), "unaligned record LSN");
+
+        let seg = SegmentTag::from_blknum(rel, blknum);
+
+        let layer = self.tl.get_layer_for_write(seg, lsn)?;
+        let delta_size = layer.put_page_image(blknum, lsn, img);
+
+        self.tl
+            .increase_current_logical_size(delta_size * BLCKSZ as u32);
+        Ok(())
+    }
+
+    fn put_truncation(&self, rel: RelishTag, lsn: Lsn, relsize: u32) -> Result<()> {
+        if !rel.is_blocky() {
+            bail!("invalid truncation for non-blocky relish {}", rel);
+        }
+        ensure!(lsn.is_aligned(), "unaligned record LSN");
+
+        debug!("put_truncation: {} to {} blocks at {}", rel, relsize, lsn);
+
+        let oldsize = self
+            .tl
+            .get_relish_size(rel, self.tl.get_last_record_lsn())?
+            .ok_or_else(|| {
+                anyhow!(
+                    "attempted to truncate non-existent relish {} at {}",
+                    rel,
+                    lsn
+                )
+            })?;
+
+        if oldsize <= relsize {
+            return Ok(());
+        }
+        let old_last_seg = (oldsize - 1) / RELISH_SEG_SIZE;
+
+        let last_remain_seg = if relsize == 0 {
+            0
+        } else {
+            (relsize - 1) / RELISH_SEG_SIZE
+        };
+
+        // Drop segments beyond the last remaining segment.
+        for remove_segno in (last_remain_seg + 1)..=old_last_seg {
+            let seg = SegmentTag {
+                rel,
+                segno: remove_segno,
+            };
+
+            let layer = self.tl.get_layer_for_write(seg, lsn)?;
+            layer.drop_segment(lsn);
+        }
+
+        // Truncate the last remaining segment to the specified size
+        if relsize == 0 || relsize % RELISH_SEG_SIZE != 0 {
+            let seg = SegmentTag {
+                rel,
+                segno: last_remain_seg,
+            };
+            let layer = self.tl.get_layer_for_write(seg, lsn)?;
+            layer.put_truncation(lsn, relsize % RELISH_SEG_SIZE)
+        }
+        self.tl
+            .decrease_current_logical_size((oldsize - relsize) * BLCKSZ as u32);
+        Ok(())
+    }
+
+    fn drop_relish(&self, rel: RelishTag, lsn: Lsn) -> Result<()> {
+        trace!("drop_segment: {} at {}", rel, lsn);
+
+        if rel.is_blocky() {
+            if let Some(oldsize) = self
+                .tl
+                .get_relish_size(rel, self.tl.get_last_record_lsn())?
+            {
+                let old_last_seg = if oldsize == 0 {
+                    0
+                } else {
+                    (oldsize - 1) / RELISH_SEG_SIZE
+                };
+
+                // Drop all segments of the relish
+                for remove_segno in 0..=old_last_seg {
+                    let seg = SegmentTag {
+                        rel,
+                        segno: remove_segno,
+                    };
+                    let layer = self.tl.get_layer_for_write(seg, lsn)?;
+                    layer.drop_segment(lsn);
+                }
+                self.tl
+                    .decrease_current_logical_size(oldsize * BLCKSZ as u32);
+            } else {
+                warn!(
+                    "drop_segment called on non-existent relish {} at {}",
+                    rel, lsn
+                );
+            }
+        } else {
+            // TODO handle TwoPhase relishes
+            let seg = SegmentTag::from_blknum(rel, 0);
+            let layer = self.tl.get_layer_for_write(seg, lsn)?;
+            layer.drop_segment(lsn);
+        }
+
+        Ok(())
+    }
+
+    ///
+    /// Remember the (end of) last valid WAL record remembered in the timeline.
+    ///
+    fn advance_last_record_lsn(&self, new_lsn: Lsn) {
+        assert!(new_lsn.is_aligned());
+
+        self.tl.last_record_lsn.advance(new_lsn);
     }
 }
 
@@ -1899,6 +1947,15 @@ pub fn dump_layerfile_from_path(path: &Path) -> Result<()> {
     Ok(())
 }
 
+fn metadata_path(
+    conf: &'static PageServerConf,
+    timelineid: ZTimelineId,
+    tenantid: ZTenantId,
+) -> PathBuf {
+    conf.timeline_path(&timelineid, &tenantid)
+        .join(METADATA_FILE_NAME)
+}
+
 /// Add a suffix to a layer file's name: .{num}.old
 /// Uses the first available num (starts at 0)
 fn rename_to_backup(path: PathBuf) -> anyhow::Result<()> {
diff --git a/pageserver/src/layered_repository/delta_layer.rs b/pageserver/src/layered_repository/delta_layer.rs
index ad16a86030..24ed9d6e69 100644
--- a/pageserver/src/layered_repository/delta_layer.rs
+++ b/pageserver/src/layered_repository/delta_layer.rs
@@ -48,7 +48,7 @@ use crate::{ZTenantId, ZTimelineId};
 use anyhow::{bail, ensure, Result};
 use log::*;
 use serde::{Deserialize, Serialize};
-use std::collections::BTreeMap;
+use zenith_utils::vec_map::VecMap;
 // avoid binding to Write (conflicts with std::io::Write)
 // while being able to use std::fmt::Write's methods
 use std::fmt::Write as _;
@@ -141,10 +141,10 @@ pub struct DeltaLayerInner {
 
     /// All versions of all pages in the file are are kept here.
     /// Indexed by block number and LSN.
-    page_version_metas: BTreeMap<(u32, Lsn), BlobRange>,
+    page_version_metas: VecMap<(u32, Lsn), BlobRange>,
 
     /// `relsizes` tracks the size of the relation at different points in time.
-    relsizes: BTreeMap<Lsn, u32>,
+    relsizes: VecMap<Lsn, u32>,
 }
 
 impl Layer for DeltaLayer {
@@ -169,29 +169,7 @@ impl Layer for DeltaLayer {
     }
 
     fn filename(&self) -> PathBuf {
-        PathBuf::from(
-            DeltaFileName {
-                seg: self.seg,
-                start_lsn: self.start_lsn,
-                end_lsn: self.end_lsn,
-                dropped: self.dropped,
-            }
-            .to_string(),
-        )
-    }
-
-    fn path(&self) -> Option<PathBuf> {
-        Some(Self::path_for(
-            &self.path_or_conf,
-            self.timelineid,
-            self.tenantid,
-            &DeltaFileName {
-                seg: self.seg,
-                start_lsn: self.start_lsn,
-                end_lsn: self.end_lsn,
-                dropped: self.dropped,
-            },
-        ))
+        PathBuf::from(self.layer_name().to_string())
     }
 
     /// Look up given page in the cache.
@@ -215,10 +193,12 @@ impl Layer for DeltaLayer {
             // Scan the metadata BTreeMap backwards, starting from the given entry.
             let minkey = (blknum, Lsn(0));
             let maxkey = (blknum, lsn);
-            let mut iter = inner
+            let iter = inner
                 .page_version_metas
-                .range((Included(&minkey), Included(&maxkey)));
-            while let Some(((_blknum, _entry_lsn), blob_range)) = iter.next_back() {
+                .slice_range((Included(&minkey), Included(&maxkey)))
+                .iter()
+                .rev();
+            for ((_blknum, pv_lsn), blob_range) in iter {
                 let pv = PageVersion::des(&read_blob(&page_version_reader, blob_range)?)?;
 
                 if let Some(img) = pv.page_image {
@@ -228,7 +208,7 @@ impl Layer for DeltaLayer {
                     break;
                 } else if let Some(rec) = pv.record {
                     let will_init = rec.will_init;
-                    reconstruct_data.records.push(rec);
+                    reconstruct_data.records.push((*pv_lsn, rec));
                     if will_init {
                         // This WAL record initializes the page, so no need to go further back
                         need_image = false;
@@ -262,15 +242,15 @@ impl Layer for DeltaLayer {
 
         // Scan the BTreeMap backwards, starting from the given entry.
         let inner = self.load()?;
-        let mut iter = inner.relsizes.range((Included(&Lsn(0)), Included(&lsn)));
+        let slice = inner
+            .relsizes
+            .slice_range((Included(&Lsn(0)), Included(&lsn)));
 
-        let result;
-        if let Some((_entry_lsn, entry)) = iter.next_back() {
-            result = *entry;
+        if let Some((_entry_lsn, entry)) = slice.last() {
+            Ok(*entry)
         } else {
-            bail!("could not find seg size in delta layer");
+            Err(anyhow::anyhow!("could not find seg size in delta layer"))
         }
-        Ok(result)
     }
 
     /// Does this segment exist at given LSN?
@@ -290,17 +270,15 @@ impl Layer for DeltaLayer {
     ///
     fn unload(&self) -> Result<()> {
         let mut inner = self.inner.lock().unwrap();
-        inner.page_version_metas = BTreeMap::new();
-        inner.relsizes = BTreeMap::new();
+        inner.page_version_metas = VecMap::default();
+        inner.relsizes = VecMap::default();
         inner.loaded = false;
         Ok(())
     }
 
     fn delete(&self) -> Result<()> {
         // delete underlying file
-        if let Some(path) = self.path() {
-            fs::remove_file(path)?;
-        }
+        fs::remove_file(self.path())?;
         Ok(())
     }
 
@@ -317,13 +295,13 @@ impl Layer for DeltaLayer {
 
         println!("--- relsizes ---");
         let inner = self.load()?;
-        for (k, v) in inner.relsizes.iter() {
+        for (k, v) in inner.relsizes.as_slice() {
             println!("  {}: {}", k, v);
         }
         println!("--- page versions ---");
         let (_path, book) = self.open_book()?;
         let chapter = book.chapter_reader(PAGE_VERSIONS_CHAPTER)?;
-        for ((blk, lsn), blob_range) in inner.page_version_metas.iter() {
+        for ((blk, lsn), blob_range) in inner.page_version_metas.as_slice() {
             let mut desc = String::new();
 
             let buf = read_blob(&chapter, blob_range)?;
@@ -380,8 +358,8 @@ impl DeltaLayer {
         start_lsn: Lsn,
         end_lsn: Lsn,
         dropped: bool,
-        page_versions: impl Iterator<Item = (&'a (u32, Lsn), &'a PageVersion)>,
-        relsizes: BTreeMap<Lsn, u32>,
+        page_versions: impl Iterator<Item = (u32, Lsn, &'a PageVersion)>,
+        relsizes: VecMap<Lsn, u32>,
     ) -> Result<DeltaLayer> {
         if seg.rel.is_blocky() {
             assert!(!relsizes.is_empty());
@@ -397,16 +375,14 @@ impl DeltaLayer {
             dropped,
             inner: Mutex::new(DeltaLayerInner {
                 loaded: true,
-                page_version_metas: BTreeMap::new(),
+                page_version_metas: VecMap::default(),
                 relsizes,
             }),
         };
         let mut inner = delta_layer.inner.lock().unwrap();
 
         // Write the in-memory btreemaps into a file
-        let path = delta_layer
-            .path()
-            .expect("DeltaLayer is supposed to have a layer path on disk");
+        let path = delta_layer.path();
 
         // Note: This overwrites any existing file. There shouldn't be any.
         // FIXME: throw an error instead?
@@ -416,26 +392,27 @@ impl DeltaLayer {
 
         let mut page_version_writer = BlobWriter::new(book, PAGE_VERSIONS_CHAPTER);
 
-        for (key, page_version) in page_versions {
+        for (blknum, lsn, page_version) in page_versions {
             let buf = PageVersion::ser(page_version)?;
             let blob_range = page_version_writer.write_blob(&buf)?;
 
-            let old = inner.page_version_metas.insert(*key, blob_range);
-
-            assert!(old.is_none());
+            inner
+                .page_version_metas
+                .append((blknum, lsn), blob_range)
+                .unwrap();
         }
 
         let book = page_version_writer.close()?;
 
         // Write out page versions
         let mut chapter = book.new_chapter(PAGE_VERSION_METAS_CHAPTER);
-        let buf = BTreeMap::ser(&inner.page_version_metas)?;
+        let buf = VecMap::ser(&inner.page_version_metas)?;
         chapter.write_all(&buf)?;
         let book = chapter.close()?;
 
         // and relsizes to separate chapter
         let mut chapter = book.new_chapter(REL_SIZES_CHAPTER);
-        let buf = BTreeMap::ser(&inner.relsizes)?;
+        let buf = VecMap::ser(&inner.relsizes)?;
         chapter.write_all(&buf)?;
         let book = chapter.close()?;
 
@@ -469,12 +446,7 @@ impl DeltaLayer {
             &self.path_or_conf,
             self.timelineid,
             self.tenantid,
-            &DeltaFileName {
-                seg: self.seg,
-                start_lsn: self.start_lsn,
-                end_lsn: self.end_lsn,
-                dropped: self.dropped,
-            },
+            &self.layer_name(),
         );
 
         let file = File::open(&path)?;
@@ -522,10 +494,10 @@ impl DeltaLayer {
         }
 
         let chapter = book.read_chapter(PAGE_VERSION_METAS_CHAPTER)?;
-        let page_version_metas = BTreeMap::des(&chapter)?;
+        let page_version_metas = VecMap::des(&chapter)?;
 
         let chapter = book.read_chapter(REL_SIZES_CHAPTER)?;
-        let relsizes = BTreeMap::des(&chapter)?;
+        let relsizes = VecMap::des(&chapter)?;
 
         debug!("loaded from {}", &path.display());
 
@@ -555,8 +527,8 @@ impl DeltaLayer {
             dropped: filename.dropped,
             inner: Mutex::new(DeltaLayerInner {
                 loaded: false,
-                page_version_metas: BTreeMap::new(),
-                relsizes: BTreeMap::new(),
+                page_version_metas: VecMap::default(),
+                relsizes: VecMap::default(),
             }),
         }
     }
@@ -578,9 +550,28 @@ impl DeltaLayer {
             dropped: summary.dropped,
             inner: Mutex::new(DeltaLayerInner {
                 loaded: false,
-                page_version_metas: BTreeMap::new(),
-                relsizes: BTreeMap::new(),
+                page_version_metas: VecMap::default(),
+                relsizes: VecMap::default(),
             }),
         })
     }
+
+    fn layer_name(&self) -> DeltaFileName {
+        DeltaFileName {
+            seg: self.seg,
+            start_lsn: self.start_lsn,
+            end_lsn: self.end_lsn,
+            dropped: self.dropped,
+        }
+    }
+
+    /// Path to the layer file in pageserver workdir.
+    pub fn path(&self) -> PathBuf {
+        Self::path_for(
+            &self.path_or_conf,
+            self.timelineid,
+            self.tenantid,
+            &self.layer_name(),
+        )
+    }
 }
diff --git a/pageserver/src/layered_repository/filename.rs b/pageserver/src/layered_repository/filename.rs
index 50bfe2977e..afa106f939 100644
--- a/pageserver/src/layered_repository/filename.rs
+++ b/pageserver/src/layered_repository/filename.rs
@@ -13,6 +13,8 @@ use anyhow::Result;
 use log::*;
 use zenith_utils::lsn::Lsn;
 
+use super::METADATA_FILE_NAME;
+
 // Note: LayeredTimeline::load_layer_map() relies on this sort order
 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
 pub struct DeltaFileName {
@@ -35,7 +37,7 @@ impl DeltaFileName {
     /// Parse a string as a delta file name. Returns None if the filename does not
     /// match the expected pattern.
     ///
-    pub fn from_str(fname: &str) -> Option<Self> {
+    pub fn parse_str(fname: &str) -> Option<Self> {
         let rel;
         let mut parts;
         if let Some(rest) = fname.strip_prefix("rel_") {
@@ -168,7 +170,7 @@ impl ImageFileName {
     /// Parse a string as an image file name. Returns None if the filename does not
     /// match the expected pattern.
     ///
-    pub fn from_str(fname: &str) -> Option<Self> {
+    pub fn parse_str(fname: &str) -> Option<Self> {
         let rel;
         let mut parts;
         if let Some(rest) = fname.strip_prefix("rel_") {
@@ -286,11 +288,11 @@ pub fn list_files(
         let fname = direntry?.file_name();
         let fname = fname.to_str().unwrap();
 
-        if let Some(deltafilename) = DeltaFileName::from_str(fname) {
+        if let Some(deltafilename) = DeltaFileName::parse_str(fname) {
             deltafiles.push(deltafilename);
-        } else if let Some(imgfilename) = ImageFileName::from_str(fname) {
+        } else if let Some(imgfilename) = ImageFileName::parse_str(fname) {
             imgfiles.push(imgfilename);
-        } else if fname == "metadata" || fname == "ancestor" || fname.ends_with(".old") {
+        } else if fname == METADATA_FILE_NAME || fname == "ancestor" || fname.ends_with(".old") {
             // ignore these
         } else {
             warn!("unrecognized filename in timeline dir: {}", fname);
diff --git a/pageserver/src/layered_repository/image_layer.rs b/pageserver/src/layered_repository/image_layer.rs
index a9487a02d4..744f793558 100644
--- a/pageserver/src/layered_repository/image_layer.rs
+++ b/pageserver/src/layered_repository/image_layer.rs
@@ -114,25 +114,7 @@ pub struct ImageLayerInner {
 
 impl Layer for ImageLayer {
     fn filename(&self) -> PathBuf {
-        PathBuf::from(
-            ImageFileName {
-                seg: self.seg,
-                lsn: self.lsn,
-            }
-            .to_string(),
-        )
-    }
-
-    fn path(&self) -> Option<PathBuf> {
-        Some(Self::path_for(
-            &self.path_or_conf,
-            self.timelineid,
-            self.tenantid,
-            &ImageFileName {
-                seg: self.seg,
-                lsn: self.lsn,
-            },
-        ))
+        PathBuf::from(self.layer_name().to_string())
     }
 
     fn get_timeline_id(&self) -> ZTimelineId {
@@ -222,9 +204,7 @@ impl Layer for ImageLayer {
 
     fn delete(&self) -> Result<()> {
         // delete underlying file
-        if let Some(path) = self.path() {
-            fs::remove_file(path)?;
-        }
+        fs::remove_file(self.path())?;
         Ok(())
     }
 
@@ -300,9 +280,7 @@ impl ImageLayer {
         let inner = layer.inner.lock().unwrap();
 
         // Write the images into a file
-        let path = layer
-            .path()
-            .expect("ImageLayer is supposed to have a layer path on disk");
+        let path = layer.path();
         // Note: This overwrites any existing file. There shouldn't be any.
         // FIXME: throw an error instead?
         let file = File::create(&path)?;
@@ -340,7 +318,7 @@ impl ImageLayer {
         let writer = book.close()?;
         writer.get_ref().sync_all()?;
 
-        trace!("saved {}", &path.display());
+        trace!("saved {}", path.display());
 
         drop(inner);
 
@@ -445,15 +423,7 @@ impl ImageLayer {
     }
 
     fn open_book(&self) -> Result<(PathBuf, Book<File>)> {
-        let path = Self::path_for(
-            &self.path_or_conf,
-            self.timelineid,
-            self.tenantid,
-            &ImageFileName {
-                seg: self.seg,
-                lsn: self.lsn,
-            },
-        );
+        let path = self.path();
 
         let file = File::open(&path)?;
         let book = Book::new(file)?;
@@ -500,4 +470,21 @@ impl ImageLayer {
             }),
         })
     }
+
+    fn layer_name(&self) -> ImageFileName {
+        ImageFileName {
+            seg: self.seg,
+            lsn: self.lsn,
+        }
+    }
+
+    /// Path to the layer file in pageserver workdir.
+    pub fn path(&self) -> PathBuf {
+        Self::path_for(
+            &self.path_or_conf,
+            self.timelineid,
+            self.tenantid,
+            &self.layer_name(),
+        )
+    }
 }
diff --git a/pageserver/src/layered_repository/inmemory_layer.rs b/pageserver/src/layered_repository/inmemory_layer.rs
index f96b5e71d1..474eef09c4 100644
--- a/pageserver/src/layered_repository/inmemory_layer.rs
+++ b/pageserver/src/layered_repository/inmemory_layer.rs
@@ -15,15 +15,14 @@ use crate::{ZTenantId, ZTimelineId};
 use anyhow::{bail, ensure, Result};
 use bytes::Bytes;
 use log::*;
-use std::cmp::min;
-use std::collections::BTreeMap;
-use std::ops::Bound::Included;
 use std::path::PathBuf;
 use std::sync::{Arc, RwLock};
+use zenith_utils::vec_map::VecMap;
 
-use zenith_utils::accum::Accum;
 use zenith_utils::lsn::Lsn;
 
+use super::page_versions::PageVersions;
+
 pub struct InMemoryLayer {
     conf: &'static PageServerConf,
     tenantid: ZTenantId,
@@ -36,9 +35,6 @@ pub struct InMemoryLayer {
     ///
     start_lsn: Lsn,
 
-    /// Frozen in-memory layers have an inclusive end LSN.
-    end_lsn: Option<Lsn>,
-
     /// LSN of the oldest page version stored in this layer
     oldest_pending_lsn: Lsn,
 
@@ -51,14 +47,19 @@ pub struct InMemoryLayer {
 }
 
 pub struct InMemoryLayerInner {
+    /// Frozen in-memory layers have an exclusive end LSN.
+    /// Writes are only allowed when this is None
+    end_lsn: Option<Lsn>,
+
     /// If this relation was dropped, remember when that happened.
-    drop_lsn: Option<Lsn>,
+    /// The drop LSN is recorded in [`end_lsn`].
+    dropped: bool,
 
     ///
     /// All versions of all pages in the layer are are kept here.
     /// Indexed by block number and LSN.
     ///
-    page_versions: BTreeMap<(u32, Lsn), PageVersion>,
+    page_versions: PageVersions,
 
     ///
     /// `segsizes` tracks the size of the segment at different points in time.
@@ -67,28 +68,20 @@ pub struct InMemoryLayerInner {
     /// so that determining the size never depends on the predecessor layer. For
     /// a non-blocky rel, 'segsizes' is not used and is always empty.
     ///
-    segsizes: BTreeMap<Lsn, u32>,
-
-    /// Writes are only allowed when true.
-    /// Set to false when this layer is in the process of being replaced.
-    writeable: bool,
+    segsizes: VecMap<Lsn, u32>,
 }
 
 impl InMemoryLayerInner {
-    fn check_writeable(&self) -> WriteResult<()> {
-        if self.writeable {
-            Ok(())
-        } else {
-            Err(NonWriteableError)
-        }
+    fn assert_writeable(&self) {
+        assert!(self.end_lsn.is_none());
     }
 
     fn get_seg_size(&self, lsn: Lsn) -> u32 {
         // Scan the BTreeMap backwards, starting from the given entry.
-        let mut iter = self.segsizes.range((Included(&Lsn(0)), Included(&lsn)));
+        let slice = self.segsizes.slice_range(..=lsn);
 
         // We make sure there is always at least one entry
-        if let Some((_entry_lsn, entry)) = iter.next_back() {
+        if let Some((_entry_lsn, entry)) = slice.last() {
             *entry
         } else {
             panic!("could not find seg size in in-memory layer");
@@ -103,30 +96,23 @@ impl Layer for InMemoryLayer {
         let inner = self.inner.read().unwrap();
 
         let end_lsn;
-        let dropped;
-        if let Some(drop_lsn) = inner.drop_lsn {
+        if let Some(drop_lsn) = inner.end_lsn {
             end_lsn = drop_lsn;
-            dropped = true;
         } else {
             end_lsn = Lsn(u64::MAX);
-            dropped = false;
         }
 
         let delta_filename = DeltaFileName {
             seg: self.seg,
             start_lsn: self.start_lsn,
             end_lsn,
-            dropped,
+            dropped: inner.dropped,
         }
         .to_string();
 
         PathBuf::from(format!("inmem-{}", delta_filename))
     }
 
-    fn path(&self) -> Option<PathBuf> {
-        None
-    }
-
     fn get_timeline_id(&self) -> ZTimelineId {
         self.timelineid
     }
@@ -140,14 +126,10 @@ impl Layer for InMemoryLayer {
     }
 
     fn get_end_lsn(&self) -> Lsn {
-        if let Some(end_lsn) = self.end_lsn {
-            return Lsn(end_lsn.0 + 1);
-        }
-
         let inner = self.inner.read().unwrap();
 
-        if let Some(drop_lsn) = inner.drop_lsn {
-            drop_lsn
+        if let Some(end_lsn) = inner.end_lsn {
+            end_lsn
         } else {
             Lsn(u64::MAX)
         }
@@ -155,7 +137,7 @@ impl Layer for InMemoryLayer {
 
     fn is_dropped(&self) -> bool {
         let inner = self.inner.read().unwrap();
-        inner.drop_lsn.is_some()
+        inner.dropped
     }
 
     /// Look up given page in the cache.
@@ -172,19 +154,19 @@ impl Layer for InMemoryLayer {
         {
             let inner = self.inner.read().unwrap();
 
-            // Scan the BTreeMap backwards, starting from reconstruct_data.lsn.
-            let minkey = (blknum, Lsn(0));
-            let maxkey = (blknum, lsn);
-            let mut iter = inner
+            // Scan the page versions backwards, starting from `lsn`.
+            let iter = inner
                 .page_versions
-                .range((Included(&minkey), Included(&maxkey)));
-            while let Some(((_blknum, _entry_lsn), entry)) = iter.next_back() {
+                .get_block_lsn_range(blknum, ..=lsn)
+                .iter()
+                .rev();
+            for (entry_lsn, entry) in iter {
                 if let Some(img) = &entry.page_image {
                     reconstruct_data.page_img = Some(img.clone());
                     need_image = false;
                     break;
                 } else if let Some(rec) = &entry.record {
-                    reconstruct_data.records.push(rec.clone());
+                    reconstruct_data.records.push((*entry_lsn, rec.clone()));
                     if rec.will_init {
                         // This WAL record initializes the page, so no need to go further back
                         need_image = false;
@@ -233,8 +215,8 @@ impl Layer for InMemoryLayer {
         assert!(lsn >= self.start_lsn);
 
         // Is the requested LSN after the segment was dropped?
-        if let Some(drop_lsn) = inner.drop_lsn {
-            if lsn >= drop_lsn {
+        if let Some(end_lsn) = inner.end_lsn {
+            if lsn >= end_lsn {
                 return Ok(false);
             }
         }
@@ -265,27 +247,27 @@ impl Layer for InMemoryLayer {
         let inner = self.inner.read().unwrap();
 
         let end_str = inner
-            .drop_lsn
+            .end_lsn
             .as_ref()
-            .map(|drop_lsn| drop_lsn.to_string())
+            .map(Lsn::to_string)
             .unwrap_or_default();
 
         println!(
-            "----- in-memory layer for tli {} seg {} {}-{} ----",
-            self.timelineid, self.seg, self.start_lsn, end_str
+            "----- in-memory layer for tli {} seg {} {}-{} {} ----",
+            self.timelineid, self.seg, self.start_lsn, end_str, inner.dropped,
         );
 
-        for (k, v) in inner.segsizes.iter() {
+        for (k, v) in inner.segsizes.as_slice() {
             println!("segsizes {}: {}", k, v);
         }
 
-        for (k, v) in inner.page_versions.iter() {
+        for (blknum, lsn, pv) in inner.page_versions.ordered_page_version_iter(None) {
             println!(
                 "blk {} at {}: {}/{}\n",
-                k.0,
-                k.1,
-                v.page_image.is_some(),
-                v.record.is_some()
+                blknum,
+                lsn,
+                pv.page_image.is_some(),
+                pv.record.is_some()
             );
         }
 
@@ -293,26 +275,19 @@ impl Layer for InMemoryLayer {
     }
 }
 
-/// Write failed because the layer is in process of being replaced.
-/// See [`LayeredTimeline::perform_write_op`] for how to handle this error.
-#[derive(Debug)]
-pub struct NonWriteableError;
+/// A result of an inmemory layer data being written to disk.
+pub struct LayersOnDisk {
+    pub delta_layers: Vec<DeltaLayer>,
+    pub image_layers: Vec<ImageLayer>,
+}
 
-pub type WriteResult<T> = std::result::Result<T, NonWriteableError>;
-
-/// Helper struct to cleanup `InMemoryLayer::freeze` return signature.
-pub struct FreezeLayers {
-    /// Replacement layer for the layer which freeze was called on.
-    pub frozen: Arc<InMemoryLayer>,
-    /// New open layer containing leftover data.
-    pub open: Option<Arc<InMemoryLayer>>,
+impl LayersOnDisk {
+    pub fn is_empty(&self) -> bool {
+        self.delta_layers.is_empty() && self.image_layers.is_empty()
+    }
 }
 
 impl InMemoryLayer {
-    fn assert_not_frozen(&self) {
-        assert!(self.end_lsn.is_none());
-    }
-
     /// Return the oldest page version that's stored in this layer
     pub fn get_oldest_pending_lsn(&self) -> Lsn {
         self.oldest_pending_lsn
@@ -337,9 +312,9 @@ impl InMemoryLayer {
         );
 
         // The segment is initially empty, so initialize 'segsizes' with 0.
-        let mut segsizes = BTreeMap::new();
+        let mut segsizes = VecMap::default();
         if seg.rel.is_blocky() {
-            segsizes.insert(start_lsn, 0);
+            segsizes.append(start_lsn, 0).unwrap();
         }
 
         Ok(InMemoryLayer {
@@ -348,14 +323,13 @@ impl InMemoryLayer {
             tenantid,
             seg,
             start_lsn,
-            end_lsn: None,
             oldest_pending_lsn,
             incremental: false,
             inner: RwLock::new(InMemoryLayerInner {
-                drop_lsn: None,
-                page_versions: BTreeMap::new(),
+                end_lsn: None,
+                dropped: false,
+                page_versions: PageVersions::default(),
                 segsizes,
-                writeable: true,
             }),
         })
     }
@@ -363,10 +337,10 @@ impl InMemoryLayer {
     // Write operations
 
     /// Remember new page version, as a WAL record over previous version
-    pub fn put_wal_record(&self, blknum: u32, rec: WALRecord) -> WriteResult<u32> {
+    pub fn put_wal_record(&self, lsn: Lsn, blknum: u32, rec: WALRecord) -> u32 {
         self.put_page_version(
             blknum,
-            rec.lsn,
+            lsn,
             PageVersion {
                 page_image: None,
                 record: Some(rec),
@@ -375,7 +349,7 @@ impl InMemoryLayer {
     }
 
     /// Remember new page version, as a full page image
-    pub fn put_page_image(&self, blknum: u32, lsn: Lsn, img: Bytes) -> WriteResult<u32> {
+    pub fn put_page_image(&self, blknum: u32, lsn: Lsn, img: Bytes) -> u32 {
         self.put_page_version(
             blknum,
             lsn,
@@ -388,8 +362,7 @@ impl InMemoryLayer {
 
     /// Common subroutine of the public put_wal_record() and put_page_image() functions.
     /// Adds the page version to the in-memory tree
-    pub fn put_page_version(&self, blknum: u32, lsn: Lsn, pv: PageVersion) -> WriteResult<u32> {
-        self.assert_not_frozen();
+    pub fn put_page_version(&self, blknum: u32, lsn: Lsn, pv: PageVersion) -> u32 {
         assert!(self.seg.blknum_in_seg(blknum));
 
         trace!(
@@ -401,9 +374,9 @@ impl InMemoryLayer {
         );
         let mut inner = self.inner.write().unwrap();
 
-        inner.check_writeable()?;
+        inner.assert_writeable();
 
-        let old = inner.page_versions.insert((blknum, lsn), pv);
+        let old = inner.page_versions.append_or_update_last(blknum, lsn, pv);
 
         if old.is_some() {
             // We already had an entry for this LSN. That's odd..
@@ -448,7 +421,9 @@ impl InMemoryLayer {
                         gapblknum,
                         blknum
                     );
-                    let old = inner.page_versions.insert((gapblknum, lsn), zeropv);
+                    let old = inner
+                        .page_versions
+                        .append_or_update_last(gapblknum, lsn, zeropv);
                     // We already had an entry for this LSN. That's odd..
 
                     if old.is_some() {
@@ -459,53 +434,47 @@ impl InMemoryLayer {
                     }
                 }
 
-                inner.segsizes.insert(lsn, newsize);
-                return Ok(newsize - oldsize);
+                inner.segsizes.append_or_update_last(lsn, newsize).unwrap();
+                return newsize - oldsize;
             }
         }
-        Ok(0)
+
+        0
     }
 
     /// Remember that the relation was truncated at given LSN
-    pub fn put_truncation(&self, lsn: Lsn, segsize: u32) -> WriteResult<()> {
+    pub fn put_truncation(&self, lsn: Lsn, segsize: u32) {
         assert!(
             self.seg.rel.is_blocky(),
             "put_truncation() called on a non-blocky rel"
         );
-        self.assert_not_frozen();
 
         let mut inner = self.inner.write().unwrap();
-        inner.check_writeable()?;
+        inner.assert_writeable();
 
         // check that this we truncate to a smaller size than segment was before the truncation
         let oldsize = inner.get_seg_size(lsn);
         assert!(segsize < oldsize);
 
-        let old = inner.segsizes.insert(lsn, segsize);
+        let old = inner.segsizes.append_or_update_last(lsn, segsize).unwrap();
 
         if old.is_some() {
             // We already had an entry for this LSN. That's odd..
             warn!("Inserting truncation, but had an entry for the LSN already");
         }
-
-        Ok(())
     }
 
     /// Remember that the segment was dropped at given LSN
-    pub fn drop_segment(&self, lsn: Lsn) -> WriteResult<()> {
-        self.assert_not_frozen();
-
+    pub fn drop_segment(&self, lsn: Lsn) {
         let mut inner = self.inner.write().unwrap();
 
-        inner.check_writeable()?;
-
-        assert!(inner.drop_lsn.is_none());
-        inner.drop_lsn = Some(lsn);
-        inner.writeable = false;
+        assert!(inner.end_lsn.is_none());
+        assert!(!inner.dropped);
+        inner.dropped = true;
+        assert!(self.start_lsn < lsn);
+        inner.end_lsn = Some(lsn);
 
         trace!("dropped segment {} at {}", self.seg, lsn);
-
-        Ok(())
     }
 
     ///
@@ -533,10 +502,10 @@ impl InMemoryLayer {
         );
 
         // Copy the segment size at the start LSN from the predecessor layer.
-        let mut segsizes = BTreeMap::new();
+        let mut segsizes = VecMap::default();
         if seg.rel.is_blocky() {
             let size = src.get_seg_size(start_lsn)?;
-            segsizes.insert(start_lsn, size);
+            segsizes.append(start_lsn, size).unwrap();
         }
 
         Ok(InMemoryLayer {
@@ -545,124 +514,43 @@ impl InMemoryLayer {
             tenantid,
             seg,
             start_lsn,
-            end_lsn: None,
             oldest_pending_lsn,
             incremental: true,
             inner: RwLock::new(InMemoryLayerInner {
-                drop_lsn: None,
-                page_versions: BTreeMap::new(),
+                end_lsn: None,
+                dropped: false,
+                page_versions: PageVersions::default(),
                 segsizes,
-                writeable: true,
             }),
         })
     }
 
     pub fn is_writeable(&self) -> bool {
         let inner = self.inner.read().unwrap();
-        inner.writeable
+        inner.end_lsn.is_none()
     }
 
-    /// Splits `self` into two InMemoryLayers: `frozen` and `open`.
-    /// All data up to and including `cutoff_lsn`
-    /// is copied to `frozen`, while the remaining data is copied to `open`.
-    /// After completion, self is non-writeable, but not frozen.
-    pub fn freeze(self: Arc<Self>, cutoff_lsn: Lsn) -> Result<FreezeLayers> {
-        info!(
-            "freezing in memory layer {} on timeline {} at {} (oldest {})",
-            self.filename().display(),
-            self.timelineid,
-            cutoff_lsn,
-            self.oldest_pending_lsn
-        );
+    /// Make the layer non-writeable. Only call once.
+    /// Records the end_lsn for non-dropped layers.
+    /// `end_lsn` is inclusive
+    pub fn freeze(&self, end_lsn: Lsn) {
+        let mut inner = self.inner.write().unwrap();
 
-        self.assert_not_frozen();
-
-        let self_ref = self.clone();
-        let mut inner = self_ref.inner.write().unwrap();
-        // Dropped layers don't need any special freeze actions,
-        // they are marked as non-writeable at drop and just
-        // written out to disk by checkpointer.
-        if inner.drop_lsn.is_some() {
-            assert!(!inner.writeable);
-            info!(
-                "freezing in memory layer for {} on timeline {} is dropped at {}",
-                self.seg,
-                self.timelineid,
-                inner.drop_lsn.unwrap()
-            );
-
-            // There should be no newer layer that refers this non-writeable layer,
-            // because layer that is created after dropped one represents a new rel.
-            return Ok(FreezeLayers {
-                frozen: self,
-                open: None,
-            });
-        }
-        assert!(inner.writeable);
-        inner.writeable = false;
-
-        // Divide all the page versions into old and new
-        // at the 'cutoff_lsn' point.
-        let mut before_segsizes = BTreeMap::new();
-        let mut after_segsizes = BTreeMap::new();
-        let mut after_oldest_lsn: Accum<Lsn> = Accum(None);
-        for (lsn, size) in inner.segsizes.iter() {
-            if *lsn > cutoff_lsn {
-                after_segsizes.insert(*lsn, *size);
-                after_oldest_lsn.accum(min, *lsn);
-            } else {
-                before_segsizes.insert(*lsn, *size);
-            }
-        }
-
-        let mut before_page_versions = BTreeMap::new();
-        let mut after_page_versions = BTreeMap::new();
-        for ((blknum, lsn), pv) in inner.page_versions.iter() {
-            if *lsn > cutoff_lsn {
-                after_page_versions.insert((*blknum, *lsn), pv.clone());
-                after_oldest_lsn.accum(min, *lsn);
-            } else {
-                before_page_versions.insert((*blknum, *lsn), pv.clone());
-            }
-        }
-
-        let frozen = Arc::new(InMemoryLayer {
-            conf: self.conf,
-            tenantid: self.tenantid,
-            timelineid: self.timelineid,
-            seg: self.seg,
-            start_lsn: self.start_lsn,
-            end_lsn: Some(cutoff_lsn),
-            oldest_pending_lsn: self.start_lsn,
-            incremental: self.incremental,
-            inner: RwLock::new(InMemoryLayerInner {
-                drop_lsn: inner.drop_lsn,
-                page_versions: before_page_versions,
-                segsizes: before_segsizes,
-                writeable: false,
-            }),
-        });
-
-        let open = if !after_segsizes.is_empty() || !after_page_versions.is_empty() {
-            let mut new_open = Self::create_successor_layer(
-                self.conf,
-                frozen.clone(),
-                self.timelineid,
-                self.tenantid,
-                cutoff_lsn + 1,
-                after_oldest_lsn.0.unwrap(),
-            )?;
-
-            let new_inner = new_open.inner.get_mut().unwrap();
-            new_inner.page_versions.append(&mut after_page_versions);
-            new_inner.segsizes.append(&mut after_segsizes);
-
-            Some(Arc::new(new_open))
+        if inner.end_lsn.is_some() {
+            assert!(inner.dropped);
         } else {
-            None
-        };
+            assert!(!inner.dropped);
+            assert!(self.start_lsn < end_lsn + 1);
+            inner.end_lsn = Some(Lsn(end_lsn.0 + 1));
 
-        Ok(FreezeLayers { frozen, open })
+            if let Some((lsn, _)) = inner.segsizes.as_slice().last() {
+                assert!(lsn <= &end_lsn, "{:?} {:?}", lsn, end_lsn);
+            }
+
+            for (_blk, lsn, _pv) in inner.page_versions.ordered_page_version_iter(None) {
+                assert!(lsn <= end_lsn);
+            }
+        }
     }
 
     /// Write the this frozen in-memory layer to disk.
@@ -673,16 +561,15 @@ impl InMemoryLayer {
     /// WAL records between start and end LSN. (The delta layer is not needed
     /// when a new relish is created with a single LSN, so that the start and
     /// end LSN are the same.)
-    pub fn write_to_disk(&self, timeline: &LayeredTimeline) -> Result<Vec<Arc<dyn Layer>>> {
+    pub fn write_to_disk(&self, timeline: &LayeredTimeline) -> Result<LayersOnDisk> {
         trace!(
-            "write_to_disk {} end_lsn is {} get_end_lsn is {}",
+            "write_to_disk {} get_end_lsn is {}",
             self.filename().display(),
-            self.end_lsn.unwrap_or(Lsn(0)),
             self.get_end_lsn()
         );
 
         // Grab the lock in read-mode. We hold it over the I/O, but because this
-        // layer is not writeable anymore, no one should be trying to aquire the
+        // layer is not writeable anymore, no one should be trying to acquire the
         // write lock on it, so we shouldn't block anyone. There's one exception
         // though: another thread might have grabbed a reference to this layer
         // in `get_layer_for_write' just before the checkpointer called
@@ -691,46 +578,45 @@ impl InMemoryLayer {
         // would have to wait until we release it. That race condition is very
         // rare though, so we just accept the potential latency hit for now.
         let inner = self.inner.read().unwrap();
-        assert!(!inner.writeable);
+        let end_lsn_exclusive = inner.end_lsn.unwrap();
 
-        if let Some(drop_lsn) = inner.drop_lsn {
+        if inner.dropped {
             let delta_layer = DeltaLayer::create(
                 self.conf,
                 self.timelineid,
                 self.tenantid,
                 self.seg,
                 self.start_lsn,
-                drop_lsn,
+                end_lsn_exclusive,
                 true,
-                inner.page_versions.iter(),
+                inner.page_versions.ordered_page_version_iter(None),
                 inner.segsizes.clone(),
             )?;
             trace!(
                 "freeze: created delta layer for dropped segment {} {}-{}",
                 self.seg,
                 self.start_lsn,
-                drop_lsn
+                end_lsn_exclusive
             );
-            return Ok(vec![Arc::new(delta_layer)]);
+            return Ok(LayersOnDisk {
+                delta_layers: vec![delta_layer],
+                image_layers: Vec::new(),
+            });
         }
 
-        let end_lsn = self.end_lsn.unwrap();
+        // Since `end_lsn` is inclusive, subtract 1.
+        // We want to make an ImageLayer for the last included LSN,
+        // so the DeltaLayer should exlcude that LSN.
+        let end_lsn_inclusive = Lsn(end_lsn_exclusive.0 - 1);
 
-        let mut before_segsizes = BTreeMap::new();
-        for (lsn, size) in inner.segsizes.iter() {
-            if *lsn <= end_lsn {
-                before_segsizes.insert(*lsn, *size);
-            }
-        }
-        let mut before_page_versions = inner.page_versions.iter().filter(|tup| {
-            let ((_blknum, lsn), _pv) = tup;
+        let mut page_versions = inner
+            .page_versions
+            .ordered_page_version_iter(Some(end_lsn_inclusive));
 
-            *lsn < end_lsn
-        });
+        let mut delta_layers = Vec::new();
 
-        let mut frozen_layers: Vec<Arc<dyn Layer>> = Vec::new();
-
-        if self.start_lsn != end_lsn {
+        if self.start_lsn != end_lsn_inclusive {
+            let (segsizes, _) = inner.segsizes.split_at(&end_lsn_exclusive);
             // Write the page versions before the cutoff to disk.
             let delta_layer = DeltaLayer::create(
                 self.conf,
@@ -738,29 +624,36 @@ impl InMemoryLayer {
                 self.tenantid,
                 self.seg,
                 self.start_lsn,
-                end_lsn,
+                end_lsn_inclusive,
                 false,
-                before_page_versions,
-                before_segsizes,
+                page_versions,
+                segsizes,
             )?;
-            frozen_layers.push(Arc::new(delta_layer));
+            delta_layers.push(delta_layer);
             trace!(
                 "freeze: created delta layer {} {}-{}",
                 self.seg,
                 self.start_lsn,
-                end_lsn
+                end_lsn_inclusive
             );
         } else {
-            assert!(before_page_versions.next().is_none());
+            assert!(page_versions.next().is_none());
         }
 
         drop(inner);
 
         // Write a new base image layer at the cutoff point
-        let image_layer = ImageLayer::create_from_src(self.conf, timeline, self, end_lsn)?;
-        frozen_layers.push(Arc::new(image_layer));
-        trace!("freeze: created image layer {} at {}", self.seg, end_lsn);
+        let image_layer =
+            ImageLayer::create_from_src(self.conf, timeline, self, end_lsn_inclusive)?;
+        trace!(
+            "freeze: created image layer {} at {}",
+            self.seg,
+            end_lsn_inclusive
+        );
 
-        Ok(frozen_layers)
+        Ok(LayersOnDisk {
+            delta_layers,
+            image_layers: vec![image_layer],
+        })
     }
 }
diff --git a/pageserver/src/layered_repository/page_versions.rs b/pageserver/src/layered_repository/page_versions.rs
new file mode 100644
index 0000000000..90321f96cd
--- /dev/null
+++ b/pageserver/src/layered_repository/page_versions.rs
@@ -0,0 +1,150 @@
+use std::{collections::HashMap, ops::RangeBounds, slice};
+
+use zenith_utils::{lsn::Lsn, vec_map::VecMap};
+
+use super::storage_layer::PageVersion;
+
+const EMPTY_SLICE: &[(Lsn, PageVersion)] = &[];
+
+#[derive(Debug, Default)]
+pub struct PageVersions(HashMap<u32, VecMap<Lsn, PageVersion>>);
+
+impl PageVersions {
+    pub fn append_or_update_last(
+        &mut self,
+        blknum: u32,
+        lsn: Lsn,
+        page_version: PageVersion,
+    ) -> Option<PageVersion> {
+        let map = self.0.entry(blknum).or_insert_with(VecMap::default);
+        map.append_or_update_last(lsn, page_version).unwrap()
+    }
+
+    /// Get all [`PageVersion`]s in a block
+    pub fn get_block_slice(&self, blknum: u32) -> &[(Lsn, PageVersion)] {
+        self.0
+            .get(&blknum)
+            .map(VecMap::as_slice)
+            .unwrap_or(EMPTY_SLICE)
+    }
+
+    /// Get a range of [`PageVersions`] in a block
+    pub fn get_block_lsn_range<R: RangeBounds<Lsn>>(
+        &self,
+        blknum: u32,
+        range: R,
+    ) -> &[(Lsn, PageVersion)] {
+        self.0
+            .get(&blknum)
+            .map(|vec_map| vec_map.slice_range(range))
+            .unwrap_or(EMPTY_SLICE)
+    }
+
+    /// Iterate through [`PageVersion`]s in (block, lsn) order.
+    /// If a [`cutoff_lsn`] is set, only show versions with `lsn < cutoff_lsn`
+    pub fn ordered_page_version_iter(&self, cutoff_lsn: Option<Lsn>) -> OrderedPageVersionIter<'_> {
+        let mut ordered_blocks: Vec<u32> = self.0.keys().cloned().collect();
+        ordered_blocks.sort_unstable();
+
+        let slice = ordered_blocks
+            .first()
+            .map(|&blknum| self.get_block_slice(blknum))
+            .unwrap_or(EMPTY_SLICE);
+
+        OrderedPageVersionIter {
+            page_versions: self,
+            ordered_blocks,
+            cur_block_idx: 0,
+            cutoff_lsn,
+            cur_slice_iter: slice.iter(),
+        }
+    }
+}
+
+pub struct OrderedPageVersionIter<'a> {
+    page_versions: &'a PageVersions,
+
+    ordered_blocks: Vec<u32>,
+    cur_block_idx: usize,
+
+    cutoff_lsn: Option<Lsn>,
+
+    cur_slice_iter: slice::Iter<'a, (Lsn, PageVersion)>,
+}
+
+impl OrderedPageVersionIter<'_> {
+    fn is_lsn_before_cutoff(&self, lsn: &Lsn) -> bool {
+        if let Some(cutoff_lsn) = self.cutoff_lsn.as_ref() {
+            lsn < cutoff_lsn
+        } else {
+            true
+        }
+    }
+}
+
+impl<'a> Iterator for OrderedPageVersionIter<'a> {
+    type Item = (u32, Lsn, &'a PageVersion);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        loop {
+            if let Some((lsn, page_version)) = self.cur_slice_iter.next() {
+                if self.is_lsn_before_cutoff(lsn) {
+                    let blknum = self.ordered_blocks[self.cur_block_idx];
+                    return Some((blknum, *lsn, page_version));
+                }
+            }
+
+            let next_block_idx = self.cur_block_idx + 1;
+            let blknum: u32 = *self.ordered_blocks.get(next_block_idx)?;
+            self.cur_block_idx = next_block_idx;
+            self.cur_slice_iter = self.page_versions.get_block_slice(blknum).iter();
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    const EMPTY_PAGE_VERSION: PageVersion = PageVersion {
+        page_image: None,
+        record: None,
+    };
+
+    #[test]
+    fn test_ordered_iter() {
+        let mut page_versions = PageVersions::default();
+        const BLOCKS: u32 = 1000;
+        const LSNS: u64 = 50;
+
+        for blknum in 0..BLOCKS {
+            for lsn in 0..LSNS {
+                let old = page_versions.append_or_update_last(blknum, Lsn(lsn), EMPTY_PAGE_VERSION);
+                assert!(old.is_none());
+            }
+        }
+
+        let mut iter = page_versions.ordered_page_version_iter(None);
+        for blknum in 0..BLOCKS {
+            for lsn in 0..LSNS {
+                let (actual_blknum, actual_lsn, _pv) = iter.next().unwrap();
+                assert_eq!(actual_blknum, blknum);
+                assert_eq!(Lsn(lsn), actual_lsn);
+            }
+        }
+        assert!(iter.next().is_none());
+        assert!(iter.next().is_none()); // should be robust against excessive next() calls
+
+        const CUTOFF_LSN: Lsn = Lsn(30);
+        let mut iter = page_versions.ordered_page_version_iter(Some(CUTOFF_LSN));
+        for blknum in 0..BLOCKS {
+            for lsn in 0..CUTOFF_LSN.0 {
+                let (actual_blknum, actual_lsn, _pv) = iter.next().unwrap();
+                assert_eq!(actual_blknum, blknum);
+                assert_eq!(Lsn(lsn), actual_lsn);
+            }
+        }
+        assert!(iter.next().is_none());
+        assert!(iter.next().is_none()); // should be robust against excessive next() calls
+    }
+}
diff --git a/pageserver/src/layered_repository/storage_layer.rs b/pageserver/src/layered_repository/storage_layer.rs
index a107d63b40..0a86fe407d 100644
--- a/pageserver/src/layered_repository/storage_layer.rs
+++ b/pageserver/src/layered_repository/storage_layer.rs
@@ -78,7 +78,7 @@ pub struct PageVersion {
 /// 'records' contains the records to apply over the base image.
 ///
 pub struct PageReconstructData {
-    pub records: Vec<WALRecord>,
+    pub records: Vec<(Lsn, WALRecord)>,
     pub page_img: Option<Bytes>,
 }
 
@@ -123,10 +123,6 @@ pub trait Layer: Send + Sync {
     /// Is the segment represented by this layer dropped by PostgreSQL?
     fn is_dropped(&self) -> bool;
 
-    /// Gets the physical location of the layer on disk.
-    /// Some layers, such as in-memory, might not have the location.
-    fn path(&self) -> Option<PathBuf>;
-
     /// Filename used to store this layer on disk. (Even in-memory layers
     /// implement this, to print a handy unique identifier for the layer for
     /// log messages, even though they're never not on disk.)
diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs
index d592a83993..be849ce35f 100644
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -13,7 +13,6 @@
 use anyhow::{anyhow, bail, ensure, Result};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 use lazy_static::lazy_static;
-use log::*;
 use regex::Regex;
 use std::net::TcpListener;
 use std::str;
@@ -21,10 +20,12 @@ use std::str::FromStr;
 use std::sync::Arc;
 use std::thread;
 use std::{io, net::TcpStream};
+use tracing::*;
 use zenith_metrics::{register_histogram_vec, HistogramVec};
 use zenith_utils::auth::{self, JwtAuth};
 use zenith_utils::auth::{Claims, Scope};
 use zenith_utils::lsn::Lsn;
+use zenith_utils::postgres_backend::is_socket_read_timed_out;
 use zenith_utils::postgres_backend::PostgresBackend;
 use zenith_utils::postgres_backend::{self, AuthType};
 use zenith_utils::pq_proto::{
@@ -187,17 +188,32 @@ pub fn thread_main(
     listener: TcpListener,
     auth_type: AuthType,
 ) -> anyhow::Result<()> {
-    loop {
+    let mut join_handles = Vec::new();
+
+    while !tenant_mgr::shutdown_requested() {
         let (socket, peer_addr) = listener.accept()?;
         debug!("accepted connection from {}", peer_addr);
         socket.set_nodelay(true).unwrap();
         let local_auth = auth.clone();
-        thread::spawn(move || {
-            if let Err(err) = page_service_conn_main(conf, local_auth, socket, auth_type) {
-                error!("page server thread exiting with error: {:#}", err);
-            }
-        });
+
+        let handle = thread::Builder::new()
+            .name("serving Page Service thread".into())
+            .spawn(move || {
+                if let Err(err) = page_service_conn_main(conf, local_auth, socket, auth_type) {
+                    error!(%err, "page server thread exited with error");
+                }
+            })
+            .unwrap();
+
+        join_handles.push(handle);
     }
+
+    debug!("page_service loop terminated. wait for connections to cancel");
+    for handle in join_handles.into_iter() {
+        handle.join().unwrap();
+    }
+
+    Ok(())
 }
 
 fn page_service_conn_main(
@@ -216,7 +232,7 @@ fn page_service_conn_main(
     }
 
     let mut conn_handler = PageServerHandler::new(conf, auth);
-    let pgbackend = PostgresBackend::new(socket, auth_type, None)?;
+    let pgbackend = PostgresBackend::new(socket, auth_type, None, true)?;
     pgbackend.run(&mut conn_handler)
 }
 
@@ -260,50 +276,66 @@ impl PageServerHandler {
         timelineid: ZTimelineId,
         tenantid: ZTenantId,
     ) -> anyhow::Result<()> {
+        let _enter = info_span!("pagestream", timeline = %timelineid, tenant = %tenantid).entered();
+
         // Check that the timeline exists
         let timeline = tenant_mgr::get_timeline_for_tenant(tenantid, timelineid)?;
 
         /* switch client to COPYBOTH */
         pgb.write_message(&BeMessage::CopyBothResponse)?;
 
-        while let Some(message) = pgb.read_message()? {
-            trace!("query({:?}): {:?}", timelineid, message);
+        while !tenant_mgr::shutdown_requested() {
+            match pgb.read_message() {
+                Ok(message) => {
+                    if let Some(message) = message {
+                        trace!("query: {:?}", message);
 
-            let copy_data_bytes = match message {
-                FeMessage::CopyData(bytes) => bytes,
-                _ => continue,
-            };
+                        let copy_data_bytes = match message {
+                            FeMessage::CopyData(bytes) => bytes,
+                            _ => continue,
+                        };
 
-            let zenith_fe_msg = PagestreamFeMessage::parse(copy_data_bytes)?;
+                        let zenith_fe_msg = PagestreamFeMessage::parse(copy_data_bytes)?;
 
-            let response = match zenith_fe_msg {
-                PagestreamFeMessage::Exists(req) => SMGR_QUERY_TIME
-                    .with_label_values(&["get_rel_exists"])
-                    .observe_closure_duration(|| {
-                        self.handle_get_rel_exists_request(&*timeline, &req)
-                    }),
-                PagestreamFeMessage::Nblocks(req) => SMGR_QUERY_TIME
-                    .with_label_values(&["get_rel_size"])
-                    .observe_closure_duration(|| self.handle_get_nblocks_request(&*timeline, &req)),
-                PagestreamFeMessage::GetPage(req) => SMGR_QUERY_TIME
-                    .with_label_values(&["get_page_at_lsn"])
-                    .observe_closure_duration(|| {
-                        self.handle_get_page_at_lsn_request(&*timeline, &req)
-                    }),
-            };
+                        let response = match zenith_fe_msg {
+                            PagestreamFeMessage::Exists(req) => SMGR_QUERY_TIME
+                                .with_label_values(&["get_rel_exists"])
+                                .observe_closure_duration(|| {
+                                    self.handle_get_rel_exists_request(&*timeline, &req)
+                                }),
+                            PagestreamFeMessage::Nblocks(req) => SMGR_QUERY_TIME
+                                .with_label_values(&["get_rel_size"])
+                                .observe_closure_duration(|| {
+                                    self.handle_get_nblocks_request(&*timeline, &req)
+                                }),
+                            PagestreamFeMessage::GetPage(req) => SMGR_QUERY_TIME
+                                .with_label_values(&["get_page_at_lsn"])
+                                .observe_closure_duration(|| {
+                                    self.handle_get_page_at_lsn_request(&*timeline, &req)
+                                }),
+                        };
 
-            let response = response.unwrap_or_else(|e| {
-                // print the all details to the log with {:#}, but for the client the
-                // error message is enough
-                error!("error reading relation or page version: {:#}", e);
-                PagestreamBeMessage::Error(PagestreamErrorResponse {
-                    message: e.to_string(),
-                })
-            });
+                        let response = response.unwrap_or_else(|e| {
+                            // print the all details to the log with {:#}, but for the client the
+                            // error message is enough
+                            error!("error reading relation or page version: {:#}", e);
+                            PagestreamBeMessage::Error(PagestreamErrorResponse {
+                                message: e.to_string(),
+                            })
+                        });
 
-            pgb.write_message(&BeMessage::CopyData(&response.serialize()))?;
+                        pgb.write_message(&BeMessage::CopyData(&response.serialize()))?;
+                    } else {
+                        break;
+                    }
+                }
+                Err(e) => {
+                    if !is_socket_read_timed_out(&e) {
+                        return Err(e);
+                    }
+                }
+            }
         }
-
         Ok(())
     }
 
@@ -363,6 +395,8 @@ impl PageServerHandler {
         timeline: &dyn Timeline,
         req: &PagestreamExistsRequest,
     ) -> Result<PagestreamBeMessage> {
+        let _enter = info_span!("get_rel_exists", rel = %req.rel, req_lsn = %req.lsn).entered();
+
         let tag = RelishTag::Relation(req.rel);
         let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest)?;
 
@@ -378,6 +412,7 @@ impl PageServerHandler {
         timeline: &dyn Timeline,
         req: &PagestreamNblocksRequest,
     ) -> Result<PagestreamBeMessage> {
+        let _enter = info_span!("get_nblocks", rel = %req.rel, req_lsn = %req.lsn).entered();
         let tag = RelishTag::Relation(req.rel);
         let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest)?;
 
@@ -397,6 +432,8 @@ impl PageServerHandler {
         timeline: &dyn Timeline,
         req: &PagestreamGetPageRequest,
     ) -> Result<PagestreamBeMessage> {
+        let _enter = info_span!("get_page", rel = %req.rel, blkno = &req.blkno, req_lsn = %req.lsn)
+            .entered();
         let tag = RelishTag::Relation(req.rel);
         let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest)?;
 
@@ -414,17 +451,20 @@ impl PageServerHandler {
         lsn: Option<Lsn>,
         tenantid: ZTenantId,
     ) -> anyhow::Result<()> {
+        let span = info_span!("basebackup", timeline = %timelineid, tenant = %tenantid, lsn = field::Empty);
+        let _enter = span.enter();
+
         // check that the timeline exists
         let timeline = tenant_mgr::get_timeline_for_tenant(tenantid, timelineid)?;
 
-        /* switch client to COPYOUT */
+        // switch client to COPYOUT
         pgb.write_message(&BeMessage::CopyOutResponse)?;
-        info!("sent CopyOut");
 
         /* Send a tarball of the latest layer on the timeline */
         {
             let mut writer = CopyDataSink { pgb };
             let mut basebackup = basebackup::Basebackup::new(&mut writer, &timeline, lsn)?;
+            span.record("lsn", &basebackup.lsn.to_string().as_str());
             basebackup.send_tarball()?;
         }
         pgb.write_message(&BeMessage::CopyDone)?;
@@ -529,11 +569,6 @@ impl postgres_backend::Handler for PageServerHandler {
                 None
             };
 
-            info!(
-                "got basebackup command. tenantid=\"{}\" timelineid=\"{}\" lsn=\"{:#?}\"",
-                tenantid, timelineid, lsn
-            );
-
             // Check that the timeline exists
             self.handle_basebackup_request(pgb, timelineid, lsn, tenantid)?;
             pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
@@ -551,6 +586,9 @@ impl postgres_backend::Handler for PageServerHandler {
 
             self.check_permission(Some(tenantid))?;
 
+            let _enter =
+                info_span!("callmemaybe", timeline = %timelineid, tenant = %tenantid).entered();
+
             // Check that the timeline exists
             tenant_mgr::get_timeline_for_tenant(tenantid, timelineid)?;
 
@@ -573,6 +611,9 @@ impl postgres_backend::Handler for PageServerHandler {
 
             self.check_permission(Some(tenantid))?;
 
+            let _enter =
+                info_span!("branch_create", name = %branchname, tenant = %tenantid).entered();
+
             let branch =
                 branches::create_branch(self.conf, &branchname, &startpoint_str, &tenantid)?;
             let branch = serde_json::to_vec(&branch)?;
diff --git a/pageserver/src/relish_storage.rs b/pageserver/src/relish_storage.rs
index a687abe489..885ca9581f 100644
--- a/pageserver/src/relish_storage.rs
+++ b/pageserver/src/relish_storage.rs
@@ -12,14 +12,12 @@ mod rust_s3;
 /// local page server layer files with external storage.
 mod synced_storage;
 
-use std::path::Path;
-use std::thread;
+use std::{path::Path, thread};
 
 use anyhow::Context;
 
-use self::local_fs::LocalFs;
 pub use self::synced_storage::schedule_timeline_upload;
-use crate::relish_storage::rust_s3::RustS3;
+use self::{local_fs::LocalFs, rust_s3::RustS3};
 use crate::{PageServerConf, RelishStorageKind};
 
 pub fn run_storage_sync_thread(
@@ -57,15 +55,21 @@ pub trait RelishStorage: Send + Sync {
 
     async fn list_relishes(&self) -> anyhow::Result<Vec<Self::RelishStoragePath>>;
 
-    async fn download_relish(
+    async fn download_relish<W: 'static + std::io::Write + Send>(
         &self,
         from: &Self::RelishStoragePath,
-        to: &Path,
-    ) -> anyhow::Result<()>;
+        // rust_s3 `get_object_stream` method requires `std::io::BufWriter` for some reason, not the async counterpart
+        // that forces us to consume and return the writer to satisfy the blocking operation async wrapper requirements
+        to: std::io::BufWriter<W>,
+    ) -> anyhow::Result<std::io::BufWriter<W>>;
 
     async fn delete_relish(&self, path: &Self::RelishStoragePath) -> anyhow::Result<()>;
 
-    async fn upload_relish(&self, from: &Path, to: &Self::RelishStoragePath) -> anyhow::Result<()>;
+    async fn upload_relish<R: tokio::io::AsyncRead + std::marker::Unpin + Send>(
+        &self,
+        from: &mut tokio::io::BufReader<R>,
+        to: &Self::RelishStoragePath,
+    ) -> anyhow::Result<()>;
 }
 
 fn strip_workspace_prefix<'a>(
diff --git a/pageserver/src/relish_storage/local_fs.rs b/pageserver/src/relish_storage/local_fs.rs
index 78ee858a5b..49d656d5a6 100644
--- a/pageserver/src/relish_storage/local_fs.rs
+++ b/pageserver/src/relish_storage/local_fs.rs
@@ -9,11 +9,13 @@
 
 use std::{
     future::Future,
+    io::Write,
     path::{Path, PathBuf},
     pin::Pin,
 };
 
 use anyhow::{bail, Context};
+use tokio::{fs, io};
 
 use super::{strip_workspace_prefix, RelishStorage};
 
@@ -64,16 +66,33 @@ impl RelishStorage for LocalFs {
         Ok(get_all_files(&self.root).await?.into_iter().collect())
     }
 
-    async fn download_relish(
+    async fn download_relish<W: 'static + std::io::Write + Send>(
         &self,
         from: &Self::RelishStoragePath,
-        to: &Path,
-    ) -> anyhow::Result<()> {
+        mut to: std::io::BufWriter<W>,
+    ) -> anyhow::Result<std::io::BufWriter<W>> {
         let file_path = self.resolve_in_storage(from)?;
         if file_path.exists() && file_path.is_file() {
-            create_target_directory(to).await?;
-            tokio::fs::copy(file_path, to).await?;
-            Ok(())
+            let updated_buffer = tokio::task::spawn_blocking(move || {
+                let mut source = std::io::BufReader::new(
+                    std::fs::OpenOptions::new()
+                        .read(true)
+                        .open(&file_path)
+                        .with_context(|| {
+                            format!(
+                                "Failed to open source file '{}' to use in the download",
+                                file_path.display()
+                            )
+                        })?,
+                );
+                std::io::copy(&mut source, &mut to)
+                    .context("Failed to download the relish file")?;
+                to.flush().context("Failed to flush the download buffer")?;
+                Ok::<_, anyhow::Error>(to)
+            })
+            .await
+            .context("Failed to spawn a blocking task")??;
+            Ok(updated_buffer)
         } else {
             bail!(
                 "File '{}' either does not exist or is not a file",
@@ -94,18 +113,30 @@ impl RelishStorage for LocalFs {
         }
     }
 
-    async fn upload_relish(&self, from: &Path, to: &Self::RelishStoragePath) -> anyhow::Result<()> {
+    async fn upload_relish<R: io::AsyncRead + std::marker::Unpin + Send>(
+        &self,
+        from: &mut io::BufReader<R>,
+        to: &Self::RelishStoragePath,
+    ) -> anyhow::Result<()> {
         let target_file_path = self.resolve_in_storage(to)?;
         create_target_directory(&target_file_path).await?;
+        let mut destination = io::BufWriter::new(
+            fs::OpenOptions::new()
+                .write(true)
+                .create(true)
+                .open(&target_file_path)
+                .await
+                .with_context(|| {
+                    format!(
+                        "Failed to open target fs destination at '{}'",
+                        target_file_path.display()
+                    )
+                })?,
+        );
 
-        tokio::fs::copy(&from, &target_file_path)
+        io::copy_buf(from, &mut destination)
             .await
-            .with_context(|| {
-                format!(
-                    "Failed to upload relish '{}' to local storage",
-                    from.display(),
-                )
-            })?;
+            .context("Failed to upload relish to local storage")?;
         Ok(())
     }
 }
diff --git a/pageserver/src/relish_storage/rust_s3.rs b/pageserver/src/relish_storage/rust_s3.rs
index e98bf8949f..5dddaa36ca 100644
--- a/pageserver/src/relish_storage/rust_s3.rs
+++ b/pageserver/src/relish_storage/rust_s3.rs
@@ -1,13 +1,15 @@
 //! A wrapper around AWS S3 client library `rust_s3` to be used a relish storage.
 
+use std::io::Write;
 use std::path::Path;
 
 use anyhow::Context;
 use s3::{bucket::Bucket, creds::Credentials, region::Region};
 
-use crate::{relish_storage::strip_workspace_prefix, S3Config};
-
-use super::RelishStorage;
+use crate::{
+    relish_storage::{strip_workspace_prefix, RelishStorage},
+    S3Config,
+};
 
 const S3_FILE_SEPARATOR: char = '/';
 
@@ -82,18 +84,14 @@ impl RelishStorage for RustS3 {
             .collect())
     }
 
-    async fn download_relish(
+    async fn download_relish<W: 'static + std::io::Write + Send>(
         &self,
         from: &Self::RelishStoragePath,
-        to: &Path,
-    ) -> anyhow::Result<()> {
-        let mut target_file = std::fs::OpenOptions::new()
-            .write(true)
-            .open(to)
-            .with_context(|| format!("Failed to open target s3 destination at {}", to.display()))?;
+        mut to: std::io::BufWriter<W>,
+    ) -> anyhow::Result<std::io::BufWriter<W>> {
         let code = self
             .bucket
-            .get_object_stream(from.key(), &mut target_file)
+            .get_object_stream(from.key(), &mut to)
             .await
             .with_context(|| format!("Failed to download s3 object with key {}", from.key()))?;
         if code != 200 {
@@ -102,7 +100,12 @@ impl RelishStorage for RustS3 {
                 code
             ))
         } else {
-            Ok(())
+            tokio::task::spawn_blocking(move || {
+                to.flush().context("Failed to fluch the downoad buffer")?;
+                Ok::<_, anyhow::Error>(to)
+            })
+            .await
+            .context("Failed to joim the download buffer flush task")?
         }
     }
 
@@ -112,9 +115,9 @@ impl RelishStorage for RustS3 {
             .delete_object(path.key())
             .await
             .with_context(|| format!("Failed to delete s3 object with key {}", path.key()))?;
-        if code != 200 {
+        if code != 204 {
             Err(anyhow::format_err!(
-                "Received non-200 exit code during deleting object with key '{}', code: {}",
+                "Received non-204 exit code during deleting object with key '{}', code: {}",
                 path.key(),
                 code
             ))
@@ -123,12 +126,14 @@ impl RelishStorage for RustS3 {
         }
     }
 
-    async fn upload_relish(&self, from: &Path, to: &Self::RelishStoragePath) -> anyhow::Result<()> {
-        let mut local_file = tokio::fs::OpenOptions::new().read(true).open(from).await?;
-
+    async fn upload_relish<R: tokio::io::AsyncRead + std::marker::Unpin + Send>(
+        &self,
+        from: &mut tokio::io::BufReader<R>,
+        to: &Self::RelishStoragePath,
+    ) -> anyhow::Result<()> {
         let code = self
             .bucket
-            .put_object_stream(&mut local_file, to.key())
+            .put_object_stream(from, to.key())
             .await
             .with_context(|| format!("Failed to create s3 object with key {}", to.key()))?;
         if code != 200 {
diff --git a/pageserver/src/relish_storage/synced_storage.rs b/pageserver/src/relish_storage/synced_storage.rs
index f51e976a83..e9ac20ff8c 100644
--- a/pageserver/src/relish_storage/synced_storage.rs
+++ b/pageserver/src/relish_storage/synced_storage.rs
@@ -1,6 +1,7 @@
 use std::time::Duration;
 use std::{collections::BinaryHeap, sync::Mutex, thread};
 
+use crate::tenant_mgr;
 use crate::{relish_storage::RelishStorage, PageServerConf};
 
 lazy_static::lazy_static! {
@@ -31,22 +32,26 @@ pub fn run_storage_sync_thread<
 
     let handle = thread::Builder::new()
         .name("Queue based relish storage sync".to_string())
-        .spawn(move || loop {
-            let mut queue_accessor = UPLOAD_QUEUE.lock().unwrap();
-            log::debug!("Upload queue length: {}", queue_accessor.len());
-            let next_task = queue_accessor.pop();
-            drop(queue_accessor);
-            match next_task {
-                Some(task) => runtime.block_on(async {
-                    // suppress warnings
-                    let _ = (config, task, &relish_storage, max_concurrent_sync);
-                    todo!("omitted for brevity")
-                }),
-                None => {
-                    thread::sleep(Duration::from_secs(1));
-                    continue;
+        .spawn(move || {
+            while !tenant_mgr::shutdown_requested() {
+                let mut queue_accessor = UPLOAD_QUEUE.lock().unwrap();
+                log::debug!("Upload queue length: {}", queue_accessor.len());
+                let next_task = queue_accessor.pop();
+                drop(queue_accessor);
+                match next_task {
+                    Some(task) => runtime.block_on(async {
+                        // suppress warnings
+                        let _ = (config, task, &relish_storage, max_concurrent_sync);
+                        todo!("omitted for brevity")
+                    }),
+                    None => {
+                        thread::sleep(Duration::from_secs(1));
+                        continue;
+                    }
                 }
             }
+            log::debug!("Queue based relish storage sync thread shut down");
+            Ok(())
         })?;
     Ok(Some(handle))
 }
diff --git a/pageserver/src/repository.rs b/pageserver/src/repository.rs
index fa6a3e83e0..73c6f370d6 100644
--- a/pageserver/src/repository.rs
+++ b/pageserver/src/repository.rs
@@ -3,7 +3,7 @@ use anyhow::Result;
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 use serde::{Deserialize, Serialize};
 use std::collections::HashSet;
-use std::ops::AddAssign;
+use std::ops::{AddAssign, Deref};
 use std::sync::Arc;
 use std::time::Duration;
 use zenith_utils::lsn::{Lsn, RecordLsn};
@@ -13,6 +13,8 @@ use zenith_utils::zid::ZTimelineId;
 /// A repository corresponds to one .zenith directory. One repository holds multiple
 /// timelines, forked off from the same initial call to 'initdb'.
 pub trait Repository: Send + Sync {
+    fn shutdown(&self) -> Result<()>;
+
     /// Get Timeline handle for given zenith timeline ID.
     fn get_timeline(&self, timelineid: ZTimelineId) -> Result<Arc<dyn Timeline>>;
 
@@ -117,32 +119,15 @@ pub trait Timeline: Send + Sync {
     /// Get a list of all existing non-relational objects
     fn list_nonrels(&self, lsn: Lsn) -> Result<HashSet<RelishTag>>;
 
+    /// Get the LSN where this branch was created
+    fn get_ancestor_lsn(&self) -> Lsn;
+
     //------------------------------------------------------------------------------
     // Public PUT functions, to update the repository with new page versions.
     //
     // These are called by the WAL receiver to digest WAL records.
     //------------------------------------------------------------------------------
 
-    /// Put a new page version that can be constructed from a WAL record
-    ///
-    /// This will implicitly extend the relation, if the page is beyond the
-    /// current end-of-file.
-    fn put_wal_record(&self, tag: RelishTag, blknum: u32, rec: WALRecord) -> Result<()>;
-
-    /// Like put_wal_record, but with ready-made image of the page.
-    fn put_page_image(&self, tag: RelishTag, blknum: u32, lsn: Lsn, img: Bytes) -> Result<()>;
-
-    /// Truncate relation
-    fn put_truncation(&self, rel: RelishTag, lsn: Lsn, nblocks: u32) -> Result<()>;
-
-    /// This method is used for marking dropped relations and truncated SLRU files and aborted two phase records
-    fn drop_relish(&self, tag: RelishTag, lsn: Lsn) -> Result<()>;
-
-    /// Track end of the latest digested WAL record.
-    ///
-    /// Advance requires aligned LSN as an argument and would wake wait_lsn() callers.
-    /// Previous last record LSN is stored alongside the latest and can be read.
-    fn advance_last_record_lsn(&self, lsn: Lsn);
     /// Atomically get both last and prev.
     fn get_last_record_rlsn(&self) -> RecordLsn;
     /// Get last or prev record separately. Same as get_last_record_rlsn().last/prev.
@@ -151,6 +136,9 @@ pub trait Timeline: Send + Sync {
     fn get_start_lsn(&self) -> Lsn;
     fn get_disk_consistent_lsn(&self) -> Lsn;
 
+    /// Mutate the timeline with a [`TimelineWriter`].
+    fn writer<'a>(&'a self) -> Box<dyn TimelineWriter + 'a>;
+
     ///
     /// Flush to disk all data that was written with the put_* functions
     ///
@@ -169,9 +157,35 @@ pub trait Timeline: Send + Sync {
     fn get_current_logical_size_non_incremental(&self, lsn: Lsn) -> Result<usize>;
 }
 
+/// Various functions to mutate the timeline.
+// TODO Currently, Deref is used to allow easy access to read methods from this trait.
+// This is probably considered a bad practice in Rust and should be fixed eventually,
+// but will cause large code changes.
+pub trait TimelineWriter: Deref<Target = dyn Timeline> {
+    /// Put a new page version that can be constructed from a WAL record
+    ///
+    /// This will implicitly extend the relation, if the page is beyond the
+    /// current end-of-file.
+    fn put_wal_record(&self, lsn: Lsn, tag: RelishTag, blknum: u32, rec: WALRecord) -> Result<()>;
+
+    /// Like put_wal_record, but with ready-made image of the page.
+    fn put_page_image(&self, tag: RelishTag, blknum: u32, lsn: Lsn, img: Bytes) -> Result<()>;
+
+    /// Truncate relation
+    fn put_truncation(&self, rel: RelishTag, lsn: Lsn, nblocks: u32) -> Result<()>;
+
+    /// This method is used for marking dropped relations and truncated SLRU files and aborted two phase records
+    fn drop_relish(&self, tag: RelishTag, lsn: Lsn) -> Result<()>;
+
+    /// Track end of the latest digested WAL record.
+    ///
+    /// Advance requires aligned LSN as an argument and would wake wait_lsn() callers.
+    /// Previous last record LSN is stored alongside the latest and can be read.
+    fn advance_last_record_lsn(&self, lsn: Lsn);
+}
+
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
 pub struct WALRecord {
-    pub lsn: Lsn, // LSN at the *end* of the record
     pub will_init: bool,
     pub rec: Bytes,
     // Remember the offset of main_data in rec,
@@ -182,22 +196,19 @@ pub struct WALRecord {
 
 impl WALRecord {
     pub fn pack(&self, buf: &mut BytesMut) {
-        buf.put_u64(self.lsn.0);
         buf.put_u8(self.will_init as u8);
         buf.put_u32(self.main_data_offset);
         buf.put_u32(self.rec.len() as u32);
         buf.put_slice(&self.rec[..]);
     }
     pub fn unpack(buf: &mut Bytes) -> WALRecord {
-        let lsn = Lsn::from(buf.get_u64());
         let will_init = buf.get_u8() != 0;
         let main_data_offset = buf.get_u32();
-        let mut dst = vec![0u8; buf.get_u32() as usize];
-        buf.copy_to_slice(&mut dst);
+        let rec_len = buf.get_u32() as usize;
+        let rec = buf.split_to(rec_len);
         WALRecord {
-            lsn,
             will_init,
-            rec: Bytes::from(dst),
+            rec,
             main_data_offset,
         }
     }
@@ -210,7 +221,7 @@ impl WALRecord {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::layered_repository::LayeredRepository;
+    use crate::layered_repository::{LayeredRepository, METADATA_FILE_NAME};
     use crate::walredo::{WalRedoError, WalRedoManager};
     use crate::PageServerConf;
     use hex_literal::hex;
@@ -307,14 +318,15 @@ mod tests {
 
         // Create timeline to work on
         let tline = repo.create_empty_timeline(TIMELINE_ID)?;
+        let writer = tline.writer();
 
-        tline.put_page_image(TESTREL_A, 0, Lsn(0x20), TEST_IMG("foo blk 0 at 2"))?;
-        tline.put_page_image(TESTREL_A, 0, Lsn(0x20), TEST_IMG("foo blk 0 at 2"))?;
-        tline.put_page_image(TESTREL_A, 0, Lsn(0x30), TEST_IMG("foo blk 0 at 3"))?;
-        tline.put_page_image(TESTREL_A, 1, Lsn(0x40), TEST_IMG("foo blk 1 at 4"))?;
-        tline.put_page_image(TESTREL_A, 2, Lsn(0x50), TEST_IMG("foo blk 2 at 5"))?;
+        writer.put_page_image(TESTREL_A, 0, Lsn(0x20), TEST_IMG("foo blk 0 at 2"))?;
+        writer.put_page_image(TESTREL_A, 0, Lsn(0x20), TEST_IMG("foo blk 0 at 2"))?;
+        writer.put_page_image(TESTREL_A, 0, Lsn(0x30), TEST_IMG("foo blk 0 at 3"))?;
+        writer.put_page_image(TESTREL_A, 1, Lsn(0x40), TEST_IMG("foo blk 1 at 4"))?;
+        writer.put_page_image(TESTREL_A, 2, Lsn(0x50), TEST_IMG("foo blk 2 at 5"))?;
 
-        tline.advance_last_record_lsn(Lsn(0x50));
+        writer.advance_last_record_lsn(Lsn(0x50));
 
         assert_current_logical_size(&tline, Lsn(0x50));
 
@@ -360,8 +372,8 @@ mod tests {
         );
 
         // Truncate last block
-        tline.put_truncation(TESTREL_A, Lsn(0x60), 2)?;
-        tline.advance_last_record_lsn(Lsn(0x60));
+        writer.put_truncation(TESTREL_A, Lsn(0x60), 2)?;
+        writer.advance_last_record_lsn(Lsn(0x60));
         assert_current_logical_size(&tline, Lsn(0x60));
 
         // Check reported size and contents after truncation
@@ -383,13 +395,13 @@ mod tests {
         );
 
         // Truncate to zero length
-        tline.put_truncation(TESTREL_A, Lsn(0x68), 0)?;
-        tline.advance_last_record_lsn(Lsn(0x68));
+        writer.put_truncation(TESTREL_A, Lsn(0x68), 0)?;
+        writer.advance_last_record_lsn(Lsn(0x68));
         assert_eq!(tline.get_relish_size(TESTREL_A, Lsn(0x68))?.unwrap(), 0);
 
         // Extend from 0 to 2 blocks, leaving a gap
-        tline.put_page_image(TESTREL_A, 1, Lsn(0x70), TEST_IMG("foo blk 1"))?;
-        tline.advance_last_record_lsn(Lsn(0x70));
+        writer.put_page_image(TESTREL_A, 1, Lsn(0x70), TEST_IMG("foo blk 1"))?;
+        writer.advance_last_record_lsn(Lsn(0x70));
         assert_eq!(tline.get_relish_size(TESTREL_A, Lsn(0x70))?.unwrap(), 2);
         assert_eq!(tline.get_page_at_lsn(TESTREL_A, 0, Lsn(0x70))?, ZERO_PAGE);
         assert_eq!(
@@ -424,25 +436,26 @@ mod tests {
 
         // Create timeline to work on
         let tline = repo.create_empty_timeline(TIMELINE_ID)?;
+        let writer = tline.writer();
 
-        tline.put_page_image(TESTREL_A, 0, Lsn(0x20), TEST_IMG("foo blk 0 at 2"))?;
-        tline.advance_last_record_lsn(Lsn(0x20));
+        writer.put_page_image(TESTREL_A, 0, Lsn(0x20), TEST_IMG("foo blk 0 at 2"))?;
+        writer.advance_last_record_lsn(Lsn(0x20));
 
         // Check that rel exists and size is correct
         assert_eq!(tline.get_rel_exists(TESTREL_A, Lsn(0x20))?, true);
         assert_eq!(tline.get_relish_size(TESTREL_A, Lsn(0x20))?.unwrap(), 1);
 
         // Drop relish
-        tline.drop_relish(TESTREL_A, Lsn(0x30))?;
-        tline.advance_last_record_lsn(Lsn(0x30));
+        writer.drop_relish(TESTREL_A, Lsn(0x30))?;
+        writer.advance_last_record_lsn(Lsn(0x30));
 
         // Check that rel is not visible anymore
         assert_eq!(tline.get_rel_exists(TESTREL_A, Lsn(0x30))?, false);
         assert!(tline.get_relish_size(TESTREL_A, Lsn(0x30))?.is_none());
 
         // Extend it again
-        tline.put_page_image(TESTREL_A, 0, Lsn(0x40), TEST_IMG("foo blk 0 at 4"))?;
-        tline.advance_last_record_lsn(Lsn(0x40));
+        writer.put_page_image(TESTREL_A, 0, Lsn(0x40), TEST_IMG("foo blk 0 at 4"))?;
+        writer.advance_last_record_lsn(Lsn(0x40));
 
         // Check that rel exists and size is correct
         assert_eq!(tline.get_rel_exists(TESTREL_A, Lsn(0x40))?, true);
@@ -460,6 +473,7 @@ mod tests {
 
         // Create timeline to work on
         let tline = repo.create_empty_timeline(TIMELINE_ID)?;
+        let writer = tline.writer();
 
         //from storage_layer.rs
         const RELISH_SEG_SIZE: u32 = 10 * 1024 * 1024 / 8192;
@@ -469,10 +483,10 @@ mod tests {
         for blkno in 0..relsize {
             let lsn = Lsn(0x20);
             let data = format!("foo blk {} at {}", blkno, lsn);
-            tline.put_page_image(TESTREL_A, blkno, lsn, TEST_IMG(&data))?;
+            writer.put_page_image(TESTREL_A, blkno, lsn, TEST_IMG(&data))?;
         }
 
-        tline.advance_last_record_lsn(Lsn(0x20));
+        writer.advance_last_record_lsn(Lsn(0x20));
 
         // The relation was created at LSN 2, not visible at LSN 1 yet.
         assert_eq!(tline.get_rel_exists(TESTREL_A, Lsn(0x10))?, false);
@@ -496,8 +510,8 @@ mod tests {
 
         // Truncate relation so that second segment was dropped
         // - only leave one page
-        tline.put_truncation(TESTREL_A, Lsn(0x60), 1)?;
-        tline.advance_last_record_lsn(Lsn(0x60));
+        writer.put_truncation(TESTREL_A, Lsn(0x60), 1)?;
+        writer.advance_last_record_lsn(Lsn(0x60));
 
         // Check reported size and contents after truncation
         assert_eq!(tline.get_relish_size(TESTREL_A, Lsn(0x60))?.unwrap(), 1);
@@ -530,9 +544,9 @@ mod tests {
         for blkno in 0..relsize {
             let lsn = Lsn(0x80);
             let data = format!("foo blk {} at {}", blkno, lsn);
-            tline.put_page_image(TESTREL_A, blkno, lsn, TEST_IMG(&data))?;
+            writer.put_page_image(TESTREL_A, blkno, lsn, TEST_IMG(&data))?;
         }
-        tline.advance_last_record_lsn(Lsn(0x80));
+        writer.advance_last_record_lsn(Lsn(0x80));
 
         assert_eq!(tline.get_rel_exists(TESTREL_A, Lsn(0x80))?, true);
         assert_eq!(
@@ -558,14 +572,15 @@ mod tests {
     fn test_large_rel() -> Result<()> {
         let repo = RepoHarness::create("test_large_rel")?.load();
         let tline = repo.create_empty_timeline(TIMELINE_ID)?;
+        let writer = tline.writer();
 
         let mut lsn = 0x10;
         for blknum in 0..pg_constants::RELSEG_SIZE + 1 {
             let img = TEST_IMG(&format!("foo blk {} at {}", blknum, Lsn(lsn)));
             lsn += 0x10;
-            tline.put_page_image(TESTREL_A, blknum as u32, Lsn(lsn), img)?;
+            writer.put_page_image(TESTREL_A, blknum as u32, Lsn(lsn), img)?;
         }
-        tline.advance_last_record_lsn(Lsn(lsn));
+        writer.advance_last_record_lsn(Lsn(lsn));
 
         assert_current_logical_size(&tline, Lsn(lsn));
 
@@ -576,8 +591,8 @@ mod tests {
 
         // Truncate one block
         lsn += 0x10;
-        tline.put_truncation(TESTREL_A, Lsn(lsn), pg_constants::RELSEG_SIZE)?;
-        tline.advance_last_record_lsn(Lsn(lsn));
+        writer.put_truncation(TESTREL_A, Lsn(lsn), pg_constants::RELSEG_SIZE)?;
+        writer.advance_last_record_lsn(Lsn(lsn));
         assert_eq!(
             tline.get_relish_size(TESTREL_A, Lsn(lsn))?.unwrap(),
             pg_constants::RELSEG_SIZE
@@ -586,8 +601,8 @@ mod tests {
 
         // Truncate another block
         lsn += 0x10;
-        tline.put_truncation(TESTREL_A, Lsn(lsn), pg_constants::RELSEG_SIZE - 1)?;
-        tline.advance_last_record_lsn(Lsn(lsn));
+        writer.put_truncation(TESTREL_A, Lsn(lsn), pg_constants::RELSEG_SIZE - 1)?;
+        writer.advance_last_record_lsn(Lsn(lsn));
         assert_eq!(
             tline.get_relish_size(TESTREL_A, Lsn(lsn))?.unwrap(),
             pg_constants::RELSEG_SIZE - 1
@@ -599,8 +614,8 @@ mod tests {
         let mut size: i32 = 3000;
         while size >= 0 {
             lsn += 0x10;
-            tline.put_truncation(TESTREL_A, Lsn(lsn), size as u32)?;
-            tline.advance_last_record_lsn(Lsn(lsn));
+            writer.put_truncation(TESTREL_A, Lsn(lsn), size as u32)?;
+            writer.advance_last_record_lsn(Lsn(lsn));
             assert_eq!(
                 tline.get_relish_size(TESTREL_A, Lsn(lsn))?.unwrap(),
                 size as u32
@@ -620,16 +635,17 @@ mod tests {
     fn test_list_rels_drop() -> Result<()> {
         let repo = RepoHarness::create("test_list_rels_drop")?.load();
         let tline = repo.create_empty_timeline(TIMELINE_ID)?;
+        let writer = tline.writer();
         const TESTDB: u32 = 111;
 
         // Import initial dummy checkpoint record, otherwise the get_timeline() call
         // after branching fails below
-        tline.put_page_image(RelishTag::Checkpoint, 0, Lsn(0x10), ZERO_CHECKPOINT.clone())?;
+        writer.put_page_image(RelishTag::Checkpoint, 0, Lsn(0x10), ZERO_CHECKPOINT.clone())?;
 
         // Create a relation on the timeline
-        tline.put_page_image(TESTREL_A, 0, Lsn(0x20), TEST_IMG("foo blk 0 at 2"))?;
+        writer.put_page_image(TESTREL_A, 0, Lsn(0x20), TEST_IMG("foo blk 0 at 2"))?;
 
-        tline.advance_last_record_lsn(Lsn(0x30));
+        writer.advance_last_record_lsn(Lsn(0x30));
 
         // Check that list_rels() lists it after LSN 2, but no before it
         assert!(!tline.list_rels(0, TESTDB, Lsn(0x10))?.contains(&TESTREL_A));
@@ -639,14 +655,17 @@ mod tests {
         // Create a branch, check that the relation is visible there
         repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x30))?;
         let newtline = repo.get_timeline(NEW_TIMELINE_ID)?;
+        let new_writer = newtline.writer();
 
         assert!(newtline
             .list_rels(0, TESTDB, Lsn(0x30))?
             .contains(&TESTREL_A));
 
         // Drop it on the branch
-        newtline.drop_relish(TESTREL_A, Lsn(0x40))?;
-        newtline.advance_last_record_lsn(Lsn(0x40));
+        new_writer.drop_relish(TESTREL_A, Lsn(0x40))?;
+        new_writer.advance_last_record_lsn(Lsn(0x40));
+
+        drop(new_writer);
 
         // Check that it's no longer listed on the branch after the point where it was dropped
         assert!(newtline
@@ -674,28 +693,30 @@ mod tests {
     fn test_branch() -> Result<()> {
         let repo = RepoHarness::create("test_branch")?.load();
         let tline = repo.create_empty_timeline(TIMELINE_ID)?;
+        let writer = tline.writer();
 
         // Import initial dummy checkpoint record, otherwise the get_timeline() call
         // after branching fails below
-        tline.put_page_image(RelishTag::Checkpoint, 0, Lsn(0x10), ZERO_CHECKPOINT.clone())?;
+        writer.put_page_image(RelishTag::Checkpoint, 0, Lsn(0x10), ZERO_CHECKPOINT.clone())?;
 
         // Create a relation on the timeline
-        tline.put_page_image(TESTREL_A, 0, Lsn(0x20), TEST_IMG("foo blk 0 at 2"))?;
-        tline.put_page_image(TESTREL_A, 0, Lsn(0x30), TEST_IMG("foo blk 0 at 3"))?;
-        tline.put_page_image(TESTREL_A, 0, Lsn(0x40), TEST_IMG("foo blk 0 at 4"))?;
+        writer.put_page_image(TESTREL_A, 0, Lsn(0x20), TEST_IMG("foo blk 0 at 2"))?;
+        writer.put_page_image(TESTREL_A, 0, Lsn(0x30), TEST_IMG("foo blk 0 at 3"))?;
+        writer.put_page_image(TESTREL_A, 0, Lsn(0x40), TEST_IMG("foo blk 0 at 4"))?;
 
         // Create another relation
-        tline.put_page_image(TESTREL_B, 0, Lsn(0x20), TEST_IMG("foobar blk 0 at 2"))?;
+        writer.put_page_image(TESTREL_B, 0, Lsn(0x20), TEST_IMG("foobar blk 0 at 2"))?;
 
-        tline.advance_last_record_lsn(Lsn(0x40));
+        writer.advance_last_record_lsn(Lsn(0x40));
         assert_current_logical_size(&tline, Lsn(0x40));
 
         // Branch the history, modify relation differently on the new timeline
         repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x30))?;
         let newtline = repo.get_timeline(NEW_TIMELINE_ID)?;
+        let new_writer = newtline.writer();
 
-        newtline.put_page_image(TESTREL_A, 0, Lsn(0x40), TEST_IMG("bar blk 0 at 4"))?;
-        newtline.advance_last_record_lsn(Lsn(0x40));
+        new_writer.put_page_image(TESTREL_A, 0, Lsn(0x40), TEST_IMG("bar blk 0 at 4"))?;
+        new_writer.advance_last_record_lsn(Lsn(0x40));
 
         // Check page contents on both branches
         assert_eq!(
@@ -729,7 +750,7 @@ mod tests {
         repo.create_empty_timeline(TIMELINE_ID)?;
         drop(repo);
 
-        let metadata_path = harness.timeline_path(&TIMELINE_ID).join("metadata");
+        let metadata_path = harness.timeline_path(&TIMELINE_ID).join(METADATA_FILE_NAME);
 
         assert!(metadata_path.is_file());
 
@@ -811,7 +832,7 @@ mod tests {
             blknum: u32,
             lsn: Lsn,
             base_img: Option<Bytes>,
-            records: Vec<WALRecord>,
+            records: Vec<(Lsn, WALRecord)>,
         ) -> Result<Bytes, WalRedoError> {
             let s = format!(
                 "redo for {} blk {} to get to {}, with {} and {} records",
diff --git a/pageserver/src/restore_local_repo.rs b/pageserver/src/restore_local_repo.rs
index dfe3edd7ac..8afa2676e2 100644
--- a/pageserver/src/restore_local_repo.rs
+++ b/pageserver/src/restore_local_repo.rs
@@ -2,17 +2,17 @@
 //! Import data and WAL from a PostgreSQL data directory and WAL segments into
 //! zenith Timeline.
 //!
-use log::*;
 use postgres_ffi::nonrelfile_utils::clogpage_precedes;
 use postgres_ffi::nonrelfile_utils::slru_may_delete_clogsegment;
 use std::cmp::min;
 use std::fs;
 use std::fs::File;
-use std::io::Read;
-use std::path::Path;
+use std::io::{Read, Seek, SeekFrom};
+use std::path::{Path, PathBuf};
 
-use anyhow::{bail, Result};
+use anyhow::{anyhow, bail, Result};
 use bytes::{Buf, Bytes};
+use tracing::*;
 
 use crate::relish::*;
 use crate::repository::*;
@@ -34,9 +34,11 @@ static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; 8192]);
 ///
 pub fn import_timeline_from_postgres_datadir(
     path: &Path,
-    timeline: &dyn Timeline,
+    writer: &dyn TimelineWriter,
     lsn: Lsn,
 ) -> Result<()> {
+    let mut pg_control: Option<ControlFileData> = None;
+
     // Scan 'global'
     for direntry in fs::read_dir(path.join("global"))? {
         let direntry = direntry?;
@@ -44,10 +46,10 @@ pub fn import_timeline_from_postgres_datadir(
             None => continue,
 
             Some("pg_control") => {
-                import_control_file(timeline, lsn, &direntry.path())?;
+                pg_control = Some(import_control_file(writer, lsn, &direntry.path())?);
             }
             Some("pg_filenode.map") => import_nonrel_file(
-                timeline,
+                writer,
                 lsn,
                 RelishTag::FileNodeMap {
                     spcnode: pg_constants::GLOBALTABLESPACE_OID,
@@ -59,7 +61,7 @@ pub fn import_timeline_from_postgres_datadir(
             // Load any relation files into the page server
             _ => import_relfile(
                 &direntry.path(),
-                timeline,
+                writer,
                 lsn,
                 pg_constants::GLOBALTABLESPACE_OID,
                 0,
@@ -86,7 +88,7 @@ pub fn import_timeline_from_postgres_datadir(
 
                 Some("PG_VERSION") => continue,
                 Some("pg_filenode.map") => import_nonrel_file(
-                    timeline,
+                    writer,
                     lsn,
                     RelishTag::FileNodeMap {
                         spcnode: pg_constants::DEFAULTTABLESPACE_OID,
@@ -98,7 +100,7 @@ pub fn import_timeline_from_postgres_datadir(
                 // Load any relation files into the page server
                 _ => import_relfile(
                     &direntry.path(),
-                    timeline,
+                    writer,
                     lsn,
                     pg_constants::DEFAULTTABLESPACE_OID,
                     dboid,
@@ -108,24 +110,36 @@ pub fn import_timeline_from_postgres_datadir(
     }
     for entry in fs::read_dir(path.join("pg_xact"))? {
         let entry = entry?;
-        import_slru_file(timeline, lsn, SlruKind::Clog, &entry.path())?;
+        import_slru_file(writer, lsn, SlruKind::Clog, &entry.path())?;
     }
     for entry in fs::read_dir(path.join("pg_multixact").join("members"))? {
         let entry = entry?;
-        import_slru_file(timeline, lsn, SlruKind::MultiXactMembers, &entry.path())?;
+        import_slru_file(writer, lsn, SlruKind::MultiXactMembers, &entry.path())?;
     }
     for entry in fs::read_dir(path.join("pg_multixact").join("offsets"))? {
         let entry = entry?;
-        import_slru_file(timeline, lsn, SlruKind::MultiXactOffsets, &entry.path())?;
+        import_slru_file(writer, lsn, SlruKind::MultiXactOffsets, &entry.path())?;
     }
     for entry in fs::read_dir(path.join("pg_twophase"))? {
         let entry = entry?;
         let xid = u32::from_str_radix(entry.path().to_str().unwrap(), 16)?;
-        import_nonrel_file(timeline, lsn, RelishTag::TwoPhase { xid }, &entry.path())?;
+        import_nonrel_file(writer, lsn, RelishTag::TwoPhase { xid }, &entry.path())?;
     }
     // TODO: Scan pg_tblspc
 
-    timeline.advance_last_record_lsn(lsn);
+    writer.advance_last_record_lsn(lsn);
+
+    // Import WAL. This is needed even when starting from a shutdown checkpoint, because
+    // this reads the checkpoint record itself, advancing the tip of the timeline to
+    // *after* the checkpoint record. And crucially, it initializes the 'prev_lsn'
+    let pg_control = pg_control.ok_or_else(|| anyhow!("pg_control file not found"))?;
+    import_wal(
+        &path.join("pg_wal"),
+        writer,
+        Lsn(pg_control.checkPointCopy.redo),
+        lsn,
+        &mut pg_control.checkPointCopy.clone(),
+    )?;
 
     Ok(())
 }
@@ -133,12 +147,13 @@ pub fn import_timeline_from_postgres_datadir(
 // subroutine of import_timeline_from_postgres_datadir(), to load one relation file.
 fn import_relfile(
     path: &Path,
-    timeline: &dyn Timeline,
+    timeline: &dyn TimelineWriter,
     lsn: Lsn,
     spcoid: Oid,
     dboid: Oid,
 ) -> Result<()> {
     // Does it look like a relation file?
+    trace!("importing rel file {}", path.display());
 
     let p = parse_relfilename(path.file_name().unwrap().to_str().unwrap());
     if let Err(e) = p {
@@ -166,14 +181,14 @@ fn import_relfile(
             }
 
             // TODO: UnexpectedEof is expected
-            Err(e) => match e.kind() {
+            Err(err) => match err.kind() {
                 std::io::ErrorKind::UnexpectedEof => {
                     // reached EOF. That's expected.
                     // FIXME: maybe check that we read the full length of the file?
                     break;
                 }
                 _ => {
-                    bail!("error reading file {}: {:#}", path.display(), e);
+                    bail!("error reading file {}: {:#}", path.display(), err);
                 }
             },
         };
@@ -190,7 +205,7 @@ fn import_relfile(
 /// are just slurped into the repository as one blob.
 ///
 fn import_nonrel_file(
-    timeline: &dyn Timeline,
+    timeline: &dyn TimelineWriter,
     lsn: Lsn,
     tag: RelishTag,
     path: &Path,
@@ -200,7 +215,7 @@ fn import_nonrel_file(
     // read the whole file
     file.read_to_end(&mut buffer)?;
 
-    info!("importing non-rel file {}", path.display());
+    trace!("importing non-rel file {}", path.display());
 
     timeline.put_page_image(tag, 0, lsn, Bytes::copy_from_slice(&buffer[..]))?;
     Ok(())
@@ -211,13 +226,17 @@ fn import_nonrel_file(
 ///
 /// The control file is imported as is, but we also extract the checkpoint record
 /// from it and store it separated.
-fn import_control_file(timeline: &dyn Timeline, lsn: Lsn, path: &Path) -> Result<()> {
+fn import_control_file(
+    timeline: &dyn TimelineWriter,
+    lsn: Lsn,
+    path: &Path,
+) -> Result<ControlFileData> {
     let mut file = File::open(path)?;
     let mut buffer = Vec::new();
     // read the whole file
     file.read_to_end(&mut buffer)?;
 
-    info!("importing control file {}", path.display());
+    trace!("importing control file {}", path.display());
 
     // Import it as ControlFile
     timeline.put_page_image(
@@ -232,19 +251,24 @@ fn import_control_file(timeline: &dyn Timeline, lsn: Lsn, path: &Path) -> Result
     let checkpoint_bytes = pg_control.checkPointCopy.encode();
     timeline.put_page_image(RelishTag::Checkpoint, 0, lsn, checkpoint_bytes)?;
 
-    Ok(())
+    Ok(pg_control)
 }
 
 ///
 /// Import an SLRU segment file
 ///
-fn import_slru_file(timeline: &dyn Timeline, lsn: Lsn, slru: SlruKind, path: &Path) -> Result<()> {
+fn import_slru_file(
+    timeline: &dyn TimelineWriter,
+    lsn: Lsn,
+    slru: SlruKind,
+    path: &Path,
+) -> Result<()> {
     // Does it look like an SLRU file?
     let mut file = File::open(path)?;
     let mut buf: [u8; 8192] = [0u8; 8192];
     let segno = u32::from_str_radix(path.file_name().unwrap().to_str().unwrap(), 16)?;
 
-    info!("importing slru file {}", path.display());
+    trace!("importing slru file {}", path.display());
 
     let mut rpageno = 0;
     loop {
@@ -260,14 +284,14 @@ fn import_slru_file(timeline: &dyn Timeline, lsn: Lsn, slru: SlruKind, path: &Pa
             }
 
             // TODO: UnexpectedEof is expected
-            Err(e) => match e.kind() {
+            Err(err) => match err.kind() {
                 std::io::ErrorKind::UnexpectedEof => {
                     // reached EOF. That's expected.
                     // FIXME: maybe check that we read the full length of the file?
                     break;
                 }
                 _ => {
-                    bail!("error reading file {}: {:#}", path.display(), e);
+                    bail!("error reading file {}: {:#}", path.display(), err);
                 }
             },
         };
@@ -279,18 +303,119 @@ fn import_slru_file(timeline: &dyn Timeline, lsn: Lsn, slru: SlruKind, path: &Pa
     Ok(())
 }
 
+/// Scan PostgreSQL WAL files in given directory and load all records between
+/// 'startpoint' and 'endpoint' into the repository.
+fn import_wal(
+    walpath: &Path,
+    timeline: &dyn TimelineWriter,
+    startpoint: Lsn,
+    endpoint: Lsn,
+    checkpoint: &mut CheckPoint,
+) -> Result<()> {
+    let mut waldecoder = WalStreamDecoder::new(startpoint);
+
+    let mut segno = startpoint.segment_number(pg_constants::WAL_SEGMENT_SIZE);
+    let mut offset = startpoint.segment_offset(pg_constants::WAL_SEGMENT_SIZE);
+    let mut last_lsn = startpoint;
+
+    while last_lsn <= endpoint {
+        // FIXME: assume postgresql tli 1 for now
+        let filename = XLogFileName(1, segno, pg_constants::WAL_SEGMENT_SIZE);
+        let mut buf = Vec::new();
+
+        // Read local file
+        let mut path = walpath.join(&filename);
+
+        // It could be as .partial
+        if !PathBuf::from(&path).exists() {
+            path = walpath.join(filename + ".partial");
+        }
+
+        // Slurp the WAL file
+        let mut file = File::open(&path)?;
+
+        if offset > 0 {
+            file.seek(SeekFrom::Start(offset as u64))?;
+        }
+
+        let nread = file.read_to_end(&mut buf)?;
+        if nread != pg_constants::WAL_SEGMENT_SIZE - offset as usize {
+            // Maybe allow this for .partial files?
+            error!("read only {} bytes from WAL file", nread);
+        }
+
+        waldecoder.feed_bytes(&buf);
+
+        let mut nrecords = 0;
+        while last_lsn <= endpoint {
+            if let Some((lsn, recdata)) = waldecoder.poll_decode()? {
+                let mut checkpoint_modified = false;
+
+                let decoded = decode_wal_record(recdata.clone());
+                save_decoded_record(
+                    checkpoint,
+                    &mut checkpoint_modified,
+                    timeline,
+                    &decoded,
+                    recdata,
+                    lsn,
+                )?;
+                last_lsn = lsn;
+
+                if checkpoint_modified {
+                    let checkpoint_bytes = checkpoint.encode();
+                    timeline.put_page_image(
+                        RelishTag::Checkpoint,
+                        0,
+                        last_lsn,
+                        checkpoint_bytes,
+                    )?;
+                }
+
+                // Now that this record has been fully handled, including updating the
+                // checkpoint data, let the repository know that it is up-to-date to this LSN
+                timeline.advance_last_record_lsn(last_lsn);
+                nrecords += 1;
+
+                trace!("imported record at {} (end {})", lsn, endpoint);
+            }
+        }
+
+        debug!("imported {} records up to {}", nrecords, last_lsn);
+
+        segno += 1;
+        offset = 0;
+    }
+
+    if last_lsn != startpoint {
+        debug!(
+            "reached end of WAL at {}, updating checkpoint info",
+            last_lsn
+        );
+
+        timeline.advance_last_record_lsn(last_lsn);
+    } else {
+        info!("no WAL to import at {}", last_lsn);
+    }
+
+    Ok(())
+}
+
 ///
 /// Helper function to parse a WAL record and call the Timeline's PUT functions for all the
 /// relations/pages that the record affects.
 ///
 pub fn save_decoded_record(
     checkpoint: &mut CheckPoint,
-    timeline: &dyn Timeline,
+    checkpoint_modified: &mut bool,
+    timeline: &dyn TimelineWriter,
     decoded: &DecodedWALRecord,
     recdata: Bytes,
     lsn: Lsn,
 ) -> Result<()> {
-    checkpoint.update_next_xid(decoded.xl_xid);
+    if checkpoint.update_next_xid(decoded.xl_xid) {
+        *checkpoint_modified = true;
+    }
 
     // Iterate through all the blocks that the record modifies, and
     // "put" a separate copy of the record for each block.
@@ -303,13 +428,12 @@ pub fn save_decoded_record(
         });
 
         let rec = WALRecord {
-            lsn,
             will_init: blk.will_init || blk.apply_image,
             rec: recdata.clone(),
             main_data_offset: decoded.main_data_offset as u32,
         };
 
-        timeline.put_wal_record(tag, blk.blkno, rec)?;
+        timeline.put_wal_record(lsn, tag, blk.blkno, rec)?;
     }
 
     let mut buf = decoded.record.clone();
@@ -374,7 +498,7 @@ pub fn save_decoded_record(
         } else {
             assert!(info == pg_constants::CLOG_TRUNCATE);
             let xlrec = XlClogTruncate::decode(&mut buf);
-            save_clog_truncate_record(checkpoint, timeline, lsn, &xlrec)?;
+            save_clog_truncate_record(checkpoint, checkpoint_modified, timeline, lsn, &xlrec)?;
         }
     } else if decoded.xl_rmid == pg_constants::RM_XACT_ID {
         let info = decoded.xl_info & pg_constants::XLOG_XACT_OPMASK;
@@ -443,10 +567,17 @@ pub fn save_decoded_record(
             )?;
         } else if info == pg_constants::XLOG_MULTIXACT_CREATE_ID {
             let xlrec = XlMultiXactCreate::decode(&mut buf);
-            save_multixact_create_record(checkpoint, timeline, lsn, &xlrec, decoded)?;
+            save_multixact_create_record(
+                checkpoint,
+                checkpoint_modified,
+                timeline,
+                lsn,
+                &xlrec,
+                decoded,
+            )?;
         } else if info == pg_constants::XLOG_MULTIXACT_TRUNCATE_ID {
             let xlrec = XlMultiXactTruncate::decode(&mut buf);
-            save_multixact_truncate_record(checkpoint, timeline, lsn, &xlrec)?;
+            save_multixact_truncate_record(checkpoint, checkpoint_modified, timeline, lsn, &xlrec)?;
         }
     } else if decoded.xl_rmid == pg_constants::RM_RELMAP_ID {
         let xlrec = XlRelmapUpdate::decode(&mut buf);
@@ -455,7 +586,10 @@ pub fn save_decoded_record(
         let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
         if info == pg_constants::XLOG_NEXTOID {
             let next_oid = buf.get_u32_le();
-            checkpoint.nextOid = next_oid;
+            if checkpoint.nextOid != next_oid {
+                checkpoint.nextOid = next_oid;
+                *checkpoint_modified = true;
+            }
         } else if info == pg_constants::XLOG_CHECKPOINT_ONLINE
             || info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN
         {
@@ -471,6 +605,7 @@ pub fn save_decoded_record(
             );
             if (checkpoint.oldestXid.wrapping_sub(xlog_checkpoint.oldestXid) as i32) < 0 {
                 checkpoint.oldestXid = xlog_checkpoint.oldestXid;
+                *checkpoint_modified = true;
             }
         }
     }
@@ -478,7 +613,11 @@ pub fn save_decoded_record(
 }
 
 /// Subroutine of save_decoded_record(), to handle an XLOG_DBASE_CREATE record.
-fn save_xlog_dbase_create(timeline: &dyn Timeline, lsn: Lsn, rec: &XlCreateDatabase) -> Result<()> {
+fn save_xlog_dbase_create(
+    timeline: &dyn TimelineWriter,
+    lsn: Lsn,
+    rec: &XlCreateDatabase,
+) -> Result<()> {
     let db_id = rec.db_id;
     let tablespace_id = rec.tablespace_id;
     let src_db_id = rec.src_db_id;
@@ -555,7 +694,11 @@ fn save_xlog_dbase_create(timeline: &dyn Timeline, lsn: Lsn, rec: &XlCreateDatab
 /// Subroutine of save_decoded_record(), to handle an XLOG_SMGR_TRUNCATE record.
 ///
 /// This is the same logic as in PostgreSQL's smgr_redo() function.
-fn save_xlog_smgr_truncate(timeline: &dyn Timeline, lsn: Lsn, rec: &XlSmgrTruncate) -> Result<()> {
+fn save_xlog_smgr_truncate(
+    timeline: &dyn TimelineWriter,
+    lsn: Lsn,
+    rec: &XlSmgrTruncate,
+) -> Result<()> {
     let spcnode = rec.rnode.spcnode;
     let dbnode = rec.rnode.dbnode;
     let relnode = rec.rnode.relnode;
@@ -617,7 +760,7 @@ fn save_xlog_smgr_truncate(timeline: &dyn Timeline, lsn: Lsn, rec: &XlSmgrTrunca
 /// Subroutine of save_decoded_record(), to handle an XLOG_XACT_* records.
 ///
 fn save_xact_record(
-    timeline: &dyn Timeline,
+    timeline: &dyn TimelineWriter,
     lsn: Lsn,
     parsed: &XlXactParsedRecord,
     decoded: &DecodedWALRecord,
@@ -628,12 +771,12 @@ fn save_xact_record(
     let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
     let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
     let rec = WALRecord {
-        lsn,
         will_init: false,
         rec: decoded.record.clone(),
         main_data_offset: decoded.main_data_offset as u32,
     };
     timeline.put_wal_record(
+        lsn,
         RelishTag::Slru {
             slru: SlruKind::Clog,
             segno,
@@ -649,6 +792,7 @@ fn save_xact_record(
             let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
             let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
             timeline.put_wal_record(
+                lsn,
                 RelishTag::Slru {
                     slru: SlruKind::Clog,
                     segno,
@@ -674,7 +818,8 @@ fn save_xact_record(
 
 fn save_clog_truncate_record(
     checkpoint: &mut CheckPoint,
-    timeline: &dyn Timeline,
+    checkpoint_modified: &mut bool,
+    timeline: &dyn TimelineWriter,
     lsn: Lsn,
     xlrec: &XlClogTruncate,
 ) -> Result<()> {
@@ -692,6 +837,7 @@ fn save_clog_truncate_record(
     // TODO Figure out if there will be any issues with replica.
     checkpoint.oldestXid = xlrec.oldest_xid;
     checkpoint.oldestXidDB = xlrec.oldest_xid_db;
+    *checkpoint_modified = true;
 
     // TODO Treat AdvanceOldestClogXid() or write a comment why we don't need it
 
@@ -734,13 +880,13 @@ fn save_clog_truncate_record(
 
 fn save_multixact_create_record(
     checkpoint: &mut CheckPoint,
-    timeline: &dyn Timeline,
+    checkpoint_modified: &mut bool,
+    timeline: &dyn TimelineWriter,
     lsn: Lsn,
     xlrec: &XlMultiXactCreate,
     decoded: &DecodedWALRecord,
 ) -> Result<()> {
     let rec = WALRecord {
-        lsn,
         will_init: false,
         rec: decoded.record.clone(),
         main_data_offset: decoded.main_data_offset as u32,
@@ -749,6 +895,7 @@ fn save_multixact_create_record(
     let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
     let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
     timeline.put_wal_record(
+        lsn,
         RelishTag::Slru {
             slru: SlruKind::MultiXactOffsets,
             segno,
@@ -768,6 +915,7 @@ fn save_multixact_create_record(
         let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
         let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
         timeline.put_wal_record(
+            lsn,
             RelishTag::Slru {
                 slru: SlruKind::MultiXactMembers,
                 segno,
@@ -790,9 +938,11 @@ fn save_multixact_create_record(
     }
     if xlrec.mid >= checkpoint.nextMulti {
         checkpoint.nextMulti = xlrec.mid + 1;
+        *checkpoint_modified = true;
     }
     if xlrec.moff + xlrec.nmembers > checkpoint.nextMultiOffset {
         checkpoint.nextMultiOffset = xlrec.moff + xlrec.nmembers;
+        *checkpoint_modified = true;
     }
     let max_mbr_xid = xlrec.members.iter().fold(0u32, |acc, mbr| {
         if mbr.xid.wrapping_sub(acc) as i32 > 0 {
@@ -802,18 +952,22 @@ fn save_multixact_create_record(
         }
     });
 
-    checkpoint.update_next_xid(max_mbr_xid);
+    if checkpoint.update_next_xid(max_mbr_xid) {
+        *checkpoint_modified = true;
+    }
     Ok(())
 }
 
 fn save_multixact_truncate_record(
     checkpoint: &mut CheckPoint,
-    timeline: &dyn Timeline,
+    checkpoint_modified: &mut bool,
+    timeline: &dyn TimelineWriter,
     lsn: Lsn,
     xlrec: &XlMultiXactTruncate,
 ) -> Result<()> {
     checkpoint.oldestMulti = xlrec.end_trunc_off;
     checkpoint.oldestMultiDB = xlrec.oldest_multi_db;
+    *checkpoint_modified = true;
 
     // PerformMembersTruncation
     let maxsegment: i32 = mx_offset_to_member_segment(pg_constants::MAX_MULTIXACT_OFFSET);
@@ -847,7 +1001,7 @@ fn save_multixact_truncate_record(
 }
 
 fn save_relmap_page(
-    timeline: &dyn Timeline,
+    timeline: &dyn TimelineWriter,
     lsn: Lsn,
     xlrec: &XlRelmapUpdate,
     decoded: &DecodedWALRecord,
diff --git a/pageserver/src/tenant_mgr.rs b/pageserver/src/tenant_mgr.rs
index 4eb46ba71a..be3a36fda4 100644
--- a/pageserver/src/tenant_mgr.rs
+++ b/pageserver/src/tenant_mgr.rs
@@ -8,35 +8,92 @@ use crate::walredo::PostgresRedoManager;
 use crate::PageServerConf;
 use anyhow::{anyhow, bail, Context, Result};
 use lazy_static::lazy_static;
-use log::info;
-use std::collections::hash_map::Entry;
+use log::{debug, info};
 use std::collections::HashMap;
+use std::fmt;
 use std::fs;
 use std::str::FromStr;
+use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::{Arc, Mutex, MutexGuard};
+use std::thread::JoinHandle;
 use zenith_utils::zid::{ZTenantId, ZTimelineId};
 
 lazy_static! {
-    static ref REPOSITORY: Mutex<HashMap<ZTenantId, Arc<dyn Repository>>> =
-        Mutex::new(HashMap::new());
+    static ref TENANTS: Mutex<HashMap<ZTenantId, Tenant>> = Mutex::new(HashMap::new());
 }
 
-fn access_repository() -> MutexGuard<'static, HashMap<ZTenantId, Arc<dyn Repository>>> {
-    REPOSITORY.lock().unwrap()
+struct Tenant {
+    state: TenantState,
+    repo: Option<Arc<dyn Repository>>,
 }
 
-pub fn init(conf: &'static PageServerConf) {
-    let mut m = access_repository();
-    for dir_entry in fs::read_dir(conf.tenants_path()).unwrap() {
-        let tenantid =
-            ZTenantId::from_str(dir_entry.unwrap().file_name().to_str().unwrap()).unwrap();
-        let repo = init_repo(conf, tenantid);
-        info!("initialized storage for tenant: {}", &tenantid);
-        m.insert(tenantid, repo);
+#[derive(Debug)]
+enum TenantState {
+    // This tenant only exists in cloud storage. It cannot be accessed.
+    CloudOnly,
+    // This tenant exists in cloud storage, and we are currently downloading it to local disk.
+    // It cannot be accessed yet, not until it's been fully downloaded to local disk.
+    Downloading,
+    // All data for this tenant is complete on local disk, but we haven't loaded the Repository,
+    // Timeline and Layer structs into memory yet, so it cannot be accessed yet.
+    //Ready,
+    // This tenant exists on local disk, and the layer map has been loaded into memory.
+    // The local disk might have some newer files that don't exist in cloud storage yet.
+    Active,
+    // This tenant exists on local disk, and the layer map has been loaded into memory.
+    // The local disk might have some newer files that don't exist in cloud storage yet.
+    // The tenant cannot be accessed anymore for any reason, but graceful shutdown.
+    //Stopping,
+}
+
+impl fmt::Display for TenantState {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            TenantState::CloudOnly => f.write_str("CloudOnly"),
+            TenantState::Downloading => f.write_str("Downloading"),
+            TenantState::Active => f.write_str("Active"),
+        }
     }
 }
 
-fn init_repo(conf: &'static PageServerConf, tenant_id: ZTenantId) -> Arc<LayeredRepository> {
+fn access_tenants() -> MutexGuard<'static, HashMap<ZTenantId, Tenant>> {
+    TENANTS.lock().unwrap()
+}
+
+struct TenantHandleEntry {
+    checkpointer_handle: Option<JoinHandle<()>>,
+    gc_handle: Option<JoinHandle<()>>,
+}
+
+// Logically these handles belong to Repository,
+// but it's just simpler to store them separately
+lazy_static! {
+    static ref TENANT_HANDLES: Mutex<HashMap<ZTenantId, TenantHandleEntry>> =
+        Mutex::new(HashMap::new());
+}
+
+static SHUTDOWN_REQUESTED: AtomicBool = AtomicBool::new(false);
+
+pub fn init(conf: &'static PageServerConf) {
+    for dir_entry in fs::read_dir(conf.tenants_path()).unwrap() {
+        let tenantid =
+            ZTenantId::from_str(dir_entry.unwrap().file_name().to_str().unwrap()).unwrap();
+
+        {
+            let mut m = access_tenants();
+            let tenant = Tenant {
+                state: TenantState::CloudOnly,
+                repo: None,
+            };
+            m.insert(tenantid, tenant);
+        }
+
+        init_repo(conf, tenantid);
+        info!("initialized storage for tenant: {}", &tenantid);
+    }
+}
+
+fn init_repo(conf: &'static PageServerConf, tenant_id: ZTenantId) {
     // Set up a WAL redo manager, for applying WAL records.
     let walredo_mgr = PostgresRedoManager::new(conf, tenant_id);
 
@@ -47,9 +104,22 @@ fn init_repo(conf: &'static PageServerConf, tenant_id: ZTenantId) -> Arc<Layered
         tenant_id,
         true,
     ));
-    LayeredRepository::launch_checkpointer_thread(conf, repo.clone());
-    LayeredRepository::launch_gc_thread(conf, repo.clone());
-    repo
+
+    let checkpointer_handle = LayeredRepository::launch_checkpointer_thread(conf, repo.clone());
+    let gc_handle = LayeredRepository::launch_gc_thread(conf, repo.clone());
+
+    let mut handles = TENANT_HANDLES.lock().unwrap();
+    let h = TenantHandleEntry {
+        checkpointer_handle: Some(checkpointer_handle),
+        gc_handle: Some(gc_handle),
+    };
+
+    handles.insert(tenant_id, h);
+
+    let mut m = access_tenants();
+    let tenant = m.get_mut(&tenant_id).unwrap();
+    tenant.repo = Some(repo);
+    tenant.state = TenantState::Active;
 }
 
 // TODO kb Currently unused function, will later be used when the relish storage downloads a new layer.
@@ -64,15 +134,23 @@ pub fn register_relish_download(
         tenant_id,
         timeline_id
     );
-    match access_repository().entry(tenant_id) {
-        Entry::Occupied(o) => init_timeline(o.get().as_ref(), timeline_id),
-        Entry::Vacant(v) => {
-            log::info!("New repo initialized");
-            let new_repo = init_repo(conf, tenant_id);
-            init_timeline(new_repo.as_ref(), timeline_id);
-            v.insert(new_repo);
+
+    {
+        let mut m = access_tenants();
+        let mut tenant = m.get_mut(&tenant_id).unwrap();
+        tenant.state = TenantState::Downloading;
+        match &tenant.repo {
+            Some(repo) => init_timeline(repo.as_ref(), timeline_id),
+            None => {
+                log::info!("Initialize new repo");
+            }
         }
     }
+
+    // init repo updates Tenant state
+    init_repo(conf, tenant_id);
+    let new_repo = get_repository_for_tenant(tenant_id).unwrap();
+    init_timeline(new_repo.as_ref(), timeline_id);
 }
 
 fn init_timeline(repo: &dyn Repository, timeline_id: ZTimelineId) {
@@ -82,29 +160,73 @@ fn init_timeline(repo: &dyn Repository, timeline_id: ZTimelineId) {
     }
 }
 
+// Check this flag in the thread loops to know when to exit
+pub fn shutdown_requested() -> bool {
+    SHUTDOWN_REQUESTED.load(Ordering::Relaxed)
+}
+
+pub fn stop_tenant_threads(tenantid: ZTenantId) {
+    let mut handles = TENANT_HANDLES.lock().unwrap();
+    if let Some(h) = handles.get_mut(&tenantid) {
+        h.checkpointer_handle.take().map(JoinHandle::join);
+        debug!("checkpointer for tenant {} has stopped", tenantid);
+        h.gc_handle.take().map(JoinHandle::join);
+        debug!("gc for tenant {} has stopped", tenantid);
+    }
+}
+
+pub fn shutdown_all_tenants() -> Result<()> {
+    SHUTDOWN_REQUESTED.swap(true, Ordering::Relaxed);
+
+    let tenantids = list_tenantids()?;
+    for tenantid in tenantids {
+        stop_tenant_threads(tenantid);
+        let repo = get_repository_for_tenant(tenantid)?;
+        debug!("shutdown tenant {}", tenantid);
+        repo.shutdown()?;
+    }
+
+    Ok(())
+}
+
 pub fn create_repository_for_tenant(
     conf: &'static PageServerConf,
     tenantid: ZTenantId,
 ) -> Result<()> {
-    let mut m = access_repository();
-
-    // First check that the tenant doesn't exist already
-    if m.get(&tenantid).is_some() {
-        bail!("tenant {} already exists", tenantid);
+    {
+        let mut m = access_tenants();
+        // First check that the tenant doesn't exist already
+        if m.get(&tenantid).is_some() {
+            bail!("tenant {} already exists", tenantid);
+        }
+        let tenant = Tenant {
+            state: TenantState::CloudOnly,
+            repo: None,
+        };
+        m.insert(tenantid, tenant);
     }
+
     let wal_redo_manager = Arc::new(PostgresRedoManager::new(conf, tenantid));
     let repo = branches::create_repo(conf, tenantid, wal_redo_manager)?;
 
-    m.insert(tenantid, repo);
+    let mut m = access_tenants();
+    let tenant = m.get_mut(&tenantid).unwrap();
+    tenant.repo = Some(repo);
+    tenant.state = TenantState::Active;
 
     Ok(())
 }
 
 pub fn get_repository_for_tenant(tenantid: ZTenantId) -> Result<Arc<dyn Repository>> {
-    access_repository()
+    let m = access_tenants();
+    let tenant = m
         .get(&tenantid)
-        .map(Arc::clone)
-        .ok_or_else(|| anyhow!("repository not found for tenant name {}", tenantid))
+        .ok_or_else(|| anyhow!("Tenant not found for tenant {}", tenantid));
+
+    match &tenant.unwrap().repo {
+        Some(repo) => Ok(Arc::clone(repo)),
+        None => anyhow::bail!("Repository for tenant {} is not yet valid", tenantid),
+    }
 }
 
 pub fn get_timeline_for_tenant(
@@ -115,3 +237,13 @@ pub fn get_timeline_for_tenant(
         .get_timeline(timelineid)
         .with_context(|| format!("cannot fetch timeline {}", timelineid))
 }
+
+fn list_tenantids() -> Result<Vec<ZTenantId>> {
+    let m = access_tenants();
+    m.iter()
+        .map(|v| {
+            let (tenantid, _) = v;
+            Ok(*tenantid)
+        })
+        .collect()
+}
diff --git a/pageserver/src/waldecoder.rs b/pageserver/src/waldecoder.rs
index cb94b9248b..b1e8e3b54f 100644
--- a/pageserver/src/waldecoder.rs
+++ b/pageserver/src/waldecoder.rs
@@ -72,6 +72,10 @@ impl WalStreamDecoder {
     ///     Err(WalDecodeError): an error occured while decoding, meaning the input was invalid.
     ///
     pub fn poll_decode(&mut self) -> Result<Option<(Lsn, Bytes)>, WalDecodeError> {
+        let recordbuf;
+
+        // Run state machine that validates page headers, and reassembles records
+        // that cross page boundaries.
         loop {
             // parse and verify page boundaries as we go
             if self.lsn.segment_offset(pg_constants::WAL_SEGMENT_SIZE) == 0 {
@@ -120,29 +124,41 @@ impl WalStreamDecoder {
                 self.lsn += self.padlen as u64;
                 self.padlen = 0;
             } else if self.contlen == 0 {
-                // need to have at least the xl_tot_len field
+                assert!(self.recordbuf.is_empty());
 
+                // need to have at least the xl_tot_len field
                 if self.inputbuf.remaining() < 4 {
                     return Ok(None);
                 }
 
-                // read xl_tot_len FIXME: assumes little-endian
+                // peek xl_tot_len at the beginning of the record.
+                // FIXME: assumes little-endian
                 self.startlsn = self.lsn;
-                let xl_tot_len = self.inputbuf.get_u32_le();
+                let xl_tot_len = (&self.inputbuf[0..4]).get_u32_le();
                 if (xl_tot_len as usize) < XLOG_SIZE_OF_XLOG_RECORD {
                     return Err(WalDecodeError {
                         msg: format!("invalid xl_tot_len {}", xl_tot_len),
                         lsn: self.lsn,
                     });
                 }
-                self.lsn += 4;
 
-                self.recordbuf.clear();
-                self.recordbuf.reserve(xl_tot_len as usize);
-                self.recordbuf.put_u32_le(xl_tot_len);
-
-                self.contlen = xl_tot_len - 4;
-                continue;
+                // Fast path for the common case that the whole record fits on the page.
+                let pageleft = self.lsn.remaining_in_block() as u32;
+                if self.inputbuf.remaining() >= xl_tot_len as usize && xl_tot_len <= pageleft {
+                    // Take the record from the 'inputbuf', and validate it.
+                    recordbuf = self.inputbuf.copy_to_bytes(xl_tot_len as usize);
+                    self.lsn += xl_tot_len as u64;
+                    break;
+                } else {
+                    // Need to assemble the record from pieces. Remember the size of the
+                    // record, and loop back. On next iteration, we will reach the 'else'
+                    // branch below, and copy the part of the record that was on this page
+                    // to 'recordbuf'.  Subsequent iterations will skip page headers, and
+                    // append the continuations from the next pages to 'recordbuf'.
+                    self.recordbuf.reserve(xl_tot_len as usize);
+                    self.contlen = xl_tot_len;
+                    continue;
+                }
             } else {
                 // we're continuing a record, possibly from previous page.
                 let pageleft = self.lsn.remaining_in_block() as u32;
@@ -159,47 +175,42 @@ impl WalStreamDecoder {
                 self.contlen -= n as u32;
 
                 if self.contlen == 0 {
-                    let recordbuf = std::mem::replace(&mut self.recordbuf, BytesMut::new());
-
-                    let recordbuf = recordbuf.freeze();
-                    let mut buf = recordbuf.clone();
-
-                    let xlogrec = XLogRecord::from_bytes(&mut buf);
-
-                    // XLOG_SWITCH records are special. If we see one, we need to skip
-                    // to the next WAL segment.
-                    if xlogrec.is_xlog_switch_record() {
-                        trace!("saw xlog switch record at {}", self.lsn);
-                        self.padlen =
-                            self.lsn.calc_padding(pg_constants::WAL_SEGMENT_SIZE as u64) as u32;
-                    } else {
-                        // Pad to an 8-byte boundary
-                        self.padlen = self.lsn.calc_padding(8u32) as u32;
-                    }
-
-                    let mut crc = crc32c_append(0, &recordbuf[XLOG_RECORD_CRC_OFFS + 4..]);
-                    crc = crc32c_append(crc, &recordbuf[0..XLOG_RECORD_CRC_OFFS]);
-                    if crc != xlogrec.xl_crc {
-                        return Err(WalDecodeError {
-                            msg: "WAL record crc mismatch".into(),
-                            lsn: self.lsn,
-                        });
-                    }
-
-                    // Always align resulting LSN on 0x8 boundary -- that is important for getPage()
-                    // and WalReceiver integration. Since this code is used both for WalReceiver and
-                    // initial WAL import let's force alignment right here.
-                    let result = (self.lsn.align(), recordbuf);
-                    return Ok(Some(result));
+                    // The record is now complete.
+                    recordbuf = std::mem::replace(&mut self.recordbuf, BytesMut::new()).freeze();
+                    break;
                 }
                 continue;
             }
         }
-        // check record boundaries
 
-        // deal with continuation records
+        // We now have a record in the 'recordbuf' local variable.
+        let xlogrec = XLogRecord::from_slice(&recordbuf[0..XLOG_SIZE_OF_XLOG_RECORD]);
 
-        // deal with xlog_switch records
+        let mut crc = 0;
+        crc = crc32c_append(crc, &recordbuf[XLOG_RECORD_CRC_OFFS + 4..]);
+        crc = crc32c_append(crc, &recordbuf[0..XLOG_RECORD_CRC_OFFS]);
+        if crc != xlogrec.xl_crc {
+            return Err(WalDecodeError {
+                msg: "WAL record crc mismatch".into(),
+                lsn: self.lsn,
+            });
+        }
+
+        // XLOG_SWITCH records are special. If we see one, we need to skip
+        // to the next WAL segment.
+        if xlogrec.is_xlog_switch_record() {
+            trace!("saw xlog switch record at {}", self.lsn);
+            self.padlen = self.lsn.calc_padding(pg_constants::WAL_SEGMENT_SIZE as u64) as u32;
+        } else {
+            // Pad to an 8-byte boundary
+            self.padlen = self.lsn.calc_padding(8u32) as u32;
+        }
+
+        // Always align resulting LSN on 0x8 boundary -- that is important for getPage()
+        // and WalReceiver integration. Since this code is used both for WalReceiver and
+        // initial WAL import let's force alignment right here.
+        let result = (self.lsn.align(), recordbuf);
+        Ok(Some(result))
     }
 }
 
diff --git a/pageserver/src/walreceiver.rs b/pageserver/src/walreceiver.rs
index d7bdfd6f2e..65b3fa5cf6 100644
--- a/pageserver/src/walreceiver.rs
+++ b/pageserver/src/walreceiver.rs
@@ -12,7 +12,6 @@ use crate::waldecoder::*;
 use crate::PageServerConf;
 use anyhow::{bail, Error, Result};
 use lazy_static::lazy_static;
-use log::*;
 use postgres::fallible_iterator::FallibleIterator;
 use postgres::replication::ReplicationIter;
 use postgres::{Client, NoTls, SimpleQueryMessage, SimpleQueryRow};
@@ -25,8 +24,10 @@ use std::str::FromStr;
 use std::sync::Mutex;
 use std::thread;
 use std::thread::sleep;
+use std::thread::JoinHandle;
 use std::thread_local;
 use std::time::{Duration, SystemTime};
+use tracing::*;
 use zenith_utils::lsn::Lsn;
 use zenith_utils::zid::ZTenantId;
 use zenith_utils::zid::ZTimelineId;
@@ -36,6 +37,7 @@ use zenith_utils::zid::ZTimelineId;
 //
 struct WalReceiverEntry {
     wal_producer_connstr: String,
+    wal_receiver_handle: Option<JoinHandle<()>>,
 }
 
 lazy_static! {
@@ -50,6 +52,19 @@ thread_local! {
     pub(crate) static IS_WAL_RECEIVER: Cell<bool> = Cell::new(false);
 }
 
+// Wait for walreceiver to stop
+// Now it stops when pageserver shutdown is requested.
+// In future we can make this more granular and send shutdown signals
+// per tenant/timeline to cancel inactive walreceivers.
+// TODO deal with blocking pg connections
+pub fn stop_wal_receiver(timelineid: ZTimelineId) {
+    let mut receivers = WAL_RECEIVERS.lock().unwrap();
+    if let Some(r) = receivers.get_mut(&timelineid) {
+        r.wal_receiver_handle.take();
+        // r.wal_receiver_handle.take().map(JoinHandle::join);
+    }
+}
+
 // Launch a new WAL receiver, or tell one that's running about change in connection string
 pub fn launch_wal_receiver(
     conf: &'static PageServerConf,
@@ -64,19 +79,19 @@ pub fn launch_wal_receiver(
             receiver.wal_producer_connstr = wal_producer_connstr.into();
         }
         None => {
-            let receiver = WalReceiverEntry {
-                wal_producer_connstr: wal_producer_connstr.into(),
-            };
-            receivers.insert(timelineid, receiver);
-
-            // Also launch a new thread to handle this connection
-            let _walreceiver_thread = thread::Builder::new()
+            let wal_receiver_handle = thread::Builder::new()
                 .name("WAL receiver thread".into())
                 .spawn(move || {
                     IS_WAL_RECEIVER.with(|c| c.set(true));
                     thread_main(conf, timelineid, tenantid);
                 })
                 .unwrap();
+
+            let receiver = WalReceiverEntry {
+                wal_producer_connstr: wal_producer_connstr.into(),
+                wal_receiver_handle: Some(wal_receiver_handle),
+            };
+            receivers.insert(timelineid, receiver);
         }
     };
 }
@@ -96,16 +111,14 @@ fn get_wal_producer_connstr(timelineid: ZTimelineId) -> String {
 // This is the entry point for the WAL receiver thread.
 //
 fn thread_main(conf: &'static PageServerConf, timelineid: ZTimelineId, tenantid: ZTenantId) {
-    info!(
-        "WAL receiver thread started for timeline : '{}'",
-        timelineid
-    );
+    let _enter = info_span!("WAL receiver", timeline = %timelineid, tenant = %tenantid).entered();
+    info!("WAL receiver thread started");
 
     //
     // Make a connection to the WAL safekeeper, or directly to the primary PostgreSQL server,
     // and start streaming WAL from it. If the connection is lost, keep retrying.
     //
-    loop {
+    while !tenant_mgr::shutdown_requested() {
         // Look up the current WAL producer address
         let wal_producer_connstr = get_wal_producer_connstr(timelineid);
 
@@ -119,6 +132,7 @@ fn thread_main(conf: &'static PageServerConf, timelineid: ZTimelineId, tenantid:
             sleep(Duration::from_secs(1));
         }
     }
+    debug!("WAL streaming shut down");
 }
 
 fn walreceiver_main(
@@ -169,8 +183,8 @@ fn walreceiver_main(
     startpoint += startpoint.calc_padding(8u32);
 
     info!(
-        "last_record_lsn {} starting replication from {} for timeline {}, server is at {}...",
-        last_rec_lsn, startpoint, timelineid, end_of_wal
+        "last_record_lsn {} starting replication from {}, server is at {}...",
+        last_rec_lsn, startpoint, end_of_wal
     );
 
     let query = format!("START_REPLICATION PHYSICAL {}", startpoint);
@@ -198,27 +212,32 @@ fn walreceiver_main(
                 waldecoder.feed_bytes(data);
 
                 while let Some((lsn, recdata)) = waldecoder.poll_decode()? {
-                    // Save old checkpoint value to compare with it after decoding WAL record
-                    let old_checkpoint_bytes = checkpoint.encode();
-                    let decoded = decode_wal_record(recdata.clone());
+                    let _enter = info_span!("processing record", lsn = %lsn).entered();
 
                     // It is important to deal with the aligned records as lsn in getPage@LSN is
                     // aligned and can be several bytes bigger. Without this alignment we are
                     // at risk of hittind a deadlock.
                     assert!(lsn.is_aligned());
 
+                    let writer = timeline.writer();
+
+                    let mut checkpoint_modified = false;
+
+                    let decoded = decode_wal_record(recdata.clone());
                     restore_local_repo::save_decoded_record(
                         &mut checkpoint,
-                        &*timeline,
+                        &mut checkpoint_modified,
+                        writer.as_ref(),
                         &decoded,
                         recdata,
                         lsn,
                     )?;
 
-                    let new_checkpoint_bytes = checkpoint.encode();
                     // Check if checkpoint data was updated by save_decoded_record
-                    if new_checkpoint_bytes != old_checkpoint_bytes {
-                        timeline.put_page_image(
+                    if checkpoint_modified {
+                        let new_checkpoint_bytes = checkpoint.encode();
+
+                        writer.put_page_image(
                             RelishTag::Checkpoint,
                             0,
                             lsn,
@@ -228,7 +247,7 @@ fn walreceiver_main(
 
                     // Now that this record has been fully handled, including updating the
                     // checkpoint data, let the repository know that it is up-to-date to this LSN
-                    timeline.advance_last_record_lsn(lsn);
+                    writer.advance_last_record_lsn(lsn);
                     last_rec_lsn = lsn;
                 }
 
@@ -275,6 +294,11 @@ fn walreceiver_main(
             const NO_REPLY: u8 = 0;
             physical_stream.standby_status_update(write_lsn, flush_lsn, apply_lsn, ts, NO_REPLY)?;
         }
+
+        if tenant_mgr::shutdown_requested() {
+            debug!("stop walreceiver because pageserver shutdown is requested");
+            break;
+        }
     }
     Ok(())
 }
diff --git a/pageserver/src/walredo.rs b/pageserver/src/walredo.rs
index f233fceb3e..8cd696e8f3 100644
--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -82,7 +82,7 @@ pub trait WalRedoManager: Send + Sync {
         blknum: u32,
         lsn: Lsn,
         base_img: Option<Bytes>,
-        records: Vec<WALRecord>,
+        records: Vec<(Lsn, WALRecord)>,
     ) -> Result<Bytes, WalRedoError>;
 }
 
@@ -99,7 +99,7 @@ impl crate::walredo::WalRedoManager for DummyRedoManager {
         _blknum: u32,
         _lsn: Lsn,
         _base_img: Option<Bytes>,
-        _records: Vec<WALRecord>,
+        _records: Vec<(Lsn, WALRecord)>,
     ) -> Result<Bytes, WalRedoError> {
         Err(WalRedoError::InvalidState)
     }
@@ -150,7 +150,7 @@ struct WalRedoRequest {
     lsn: Lsn,
 
     base_img: Option<Bytes>,
-    records: Vec<WALRecord>,
+    records: Vec<(Lsn, WALRecord)>,
 }
 
 /// An error happened in WAL redo
@@ -179,7 +179,7 @@ impl WalRedoManager for PostgresRedoManager {
         blknum: u32,
         lsn: Lsn,
         base_img: Option<Bytes>,
-        records: Vec<WALRecord>,
+        records: Vec<(Lsn, WALRecord)>,
     ) -> Result<Bytes, WalRedoError> {
         let start_time;
         let lock_time;
@@ -277,7 +277,7 @@ impl PostgresRedoManager {
                 page.extend_from_slice(&ZERO_PAGE);
             }
             // Apply all collected WAL records
-            for record in records {
+            for (_lsn, record) in records {
                 let mut buf = record.rec.clone();
 
                 WAL_REDO_RECORD_COUNTER.inc();
@@ -544,7 +544,7 @@ impl PostgresRedoProcess {
         &mut self,
         tag: BufferTag,
         base_img: Option<Bytes>,
-        records: &[WALRecord],
+        records: &[(Lsn, WALRecord)],
     ) -> Result<Bytes, std::io::Error> {
         let stdout = &mut self.stdout;
         // Buffer the writes to avoid a lot of small syscalls.
@@ -565,22 +565,16 @@ impl PostgresRedoProcess {
                 stdin.write_all(&build_begin_redo_for_block_msg(tag)),
             )
             .await??;
-            if base_img.is_some() {
-                timeout(
-                    TIMEOUT,
-                    stdin.write_all(&build_push_page_msg(tag, base_img.unwrap())),
-                )
-                .await??;
+            if let Some(img) = base_img {
+                timeout(TIMEOUT, stdin.write_all(&build_push_page_msg(tag, &img))).await??;
             }
 
             // Send WAL records.
-            for rec in records.iter() {
-                let r = rec.clone();
-
+            for (lsn, rec) in records.iter() {
                 WAL_REDO_RECORD_COUNTER.inc();
 
                 stdin
-                    .write_all(&build_apply_record_msg(r.lsn, r.rec))
+                    .write_all(&build_apply_record_msg(*lsn, &rec.rec))
                     .await?;
 
                 //debug!("sent WAL record to wal redo postgres process ({:X}/{:X}",
@@ -617,58 +611,41 @@ impl PostgresRedoProcess {
 // process. See vendor/postgres/src/backend/tcop/zenith_wal_redo.c for
 // explanation of the protocol.
 
-fn build_begin_redo_for_block_msg(tag: BufferTag) -> Bytes {
+fn build_begin_redo_for_block_msg(tag: BufferTag) -> Vec<u8> {
     let len = 4 + 1 + 4 * 4;
-    let mut buf = BytesMut::with_capacity(1 + len);
+    let mut buf = Vec::with_capacity(1 + len);
 
     buf.put_u8(b'B');
     buf.put_u32(len as u32);
 
-    // FIXME: this is a temporary hack that should go away when we refactor
-    // the postgres protocol serialization + handlers.
-    //
-    // BytesMut is a dynamic growable buffer, used a lot in tokio code but
-    // not in the std library. To write to a BytesMut from a serde serializer,
-    // we need to either:
-    // - pre-allocate the required buffer space. This is annoying because we
-    //   shouldn't care what the exact serialized size is-- that's the
-    //   serializer's job.
-    // - Or, we need to create a temporary "writer" (which implements the
-    //   `Write` trait). It's a bit awkward, because the writer consumes the
-    //   underlying BytesMut, and we need to extract it later with
-    //   `into_inner`.
-    let mut writer = buf.writer();
-    tag.ser_into(&mut writer)
+    tag.ser_into(&mut buf)
         .expect("serialize BufferTag should always succeed");
-    let buf = writer.into_inner();
 
     debug_assert!(buf.len() == 1 + len);
 
-    buf.freeze()
+    buf
 }
 
-fn build_push_page_msg(tag: BufferTag, base_img: Bytes) -> Bytes {
+fn build_push_page_msg(tag: BufferTag, base_img: &[u8]) -> Vec<u8> {
     assert!(base_img.len() == 8192);
 
     let len = 4 + 1 + 4 * 4 + base_img.len();
-    let mut buf = BytesMut::with_capacity(1 + len);
+    let mut buf = Vec::with_capacity(1 + len);
 
     buf.put_u8(b'P');
     buf.put_u32(len as u32);
-    let mut writer = buf.writer();
-    tag.ser_into(&mut writer)
+    tag.ser_into(&mut buf)
         .expect("serialize BufferTag should always succeed");
-    let mut buf = writer.into_inner();
     buf.put(base_img);
 
     debug_assert!(buf.len() == 1 + len);
 
-    buf.freeze()
+    buf
 }
 
-fn build_apply_record_msg(endlsn: Lsn, rec: Bytes) -> Bytes {
+fn build_apply_record_msg(endlsn: Lsn, rec: &[u8]) -> Vec<u8> {
     let len = 4 + 8 + rec.len();
-    let mut buf = BytesMut::with_capacity(1 + len);
+    let mut buf: Vec<u8> = Vec::with_capacity(1 + len);
 
     buf.put_u8(b'A');
     buf.put_u32(len as u32);
@@ -677,21 +654,19 @@ fn build_apply_record_msg(endlsn: Lsn, rec: Bytes) -> Bytes {
 
     debug_assert!(buf.len() == 1 + len);
 
-    buf.freeze()
+    buf
 }
 
-fn build_get_page_msg(tag: BufferTag) -> Bytes {
+fn build_get_page_msg(tag: BufferTag) -> Vec<u8> {
     let len = 4 + 1 + 4 * 4;
-    let mut buf = BytesMut::with_capacity(1 + len);
+    let mut buf = Vec::with_capacity(1 + len);
 
     buf.put_u8(b'G');
     buf.put_u32(len as u32);
-    let mut writer = buf.writer();
-    tag.ser_into(&mut writer)
+    tag.ser_into(&mut buf)
         .expect("serialize BufferTag should always succeed");
-    let buf = writer.into_inner();
 
     debug_assert!(buf.len() == 1 + len);
 
-    buf.freeze()
+    buf
 }
diff --git a/postgres_ffi/src/xlog_utils.rs b/postgres_ffi/src/xlog_utils.rs
index c4caa18b32..7f88de4c85 100644
--- a/postgres_ffi/src/xlog_utils.rs
+++ b/postgres_ffi/src/xlog_utils.rs
@@ -9,7 +9,6 @@
 
 use crate::pg_constants;
 use crate::CheckPoint;
-use crate::ControlFileData;
 use crate::FullTransactionId;
 use crate::XLogLongPageHeaderData;
 use crate::XLogPageHeaderData;
@@ -18,8 +17,8 @@ use crate::XLOG_PAGE_MAGIC;
 
 use anyhow::{bail, Result};
 use byteorder::{ByteOrder, LittleEndian};
+use bytes::BytesMut;
 use bytes::{Buf, Bytes};
-use bytes::{BufMut, BytesMut};
 use crc32c::*;
 use log::*;
 use std::cmp::max;
@@ -329,7 +328,12 @@ pub fn main() {
 }
 
 impl XLogRecord {
-    pub fn from_bytes(buf: &mut Bytes) -> XLogRecord {
+    pub fn from_slice(buf: &[u8]) -> XLogRecord {
+        use zenith_utils::bin_ser::LeSer;
+        XLogRecord::des(buf).unwrap()
+    }
+
+    pub fn from_bytes<B: Buf>(buf: &mut B) -> XLogRecord {
         use zenith_utils::bin_ser::LeSer;
         XLogRecord::des_from(&mut buf.reader()).unwrap()
     }
@@ -377,10 +381,12 @@ impl CheckPoint {
         Ok(CheckPoint::des(buf)?)
     }
 
-    // Update next XID based on provided new_xid and stored epoch.
-    // Next XID should be greater than new_xid.
-    // Also take in account 32-bit wrap-around.
-    pub fn update_next_xid(&mut self, xid: u32) {
+    /// Update next XID based on provided new_xid and stored epoch.
+    /// Next XID should be greater than new_xid. This handles 32-bit
+    /// XID wraparound correctly.
+    ///
+    /// Returns 'true' if the XID was updated.
+    pub fn update_next_xid(&mut self, xid: u32) -> bool {
         let xid = xid.wrapping_add(XID_CHECKPOINT_INTERVAL - 1) & !(XID_CHECKPOINT_INTERVAL - 1);
         let full_xid = self.nextXid.value;
         let new_xid = std::cmp::max(xid + 1, pg_constants::FIRST_NORMAL_TRANSACTION_ID);
@@ -391,35 +397,37 @@ impl CheckPoint {
                 // wrap-around
                 epoch += 1;
             }
-            self.nextXid = FullTransactionId {
-                value: (epoch << 32) | new_xid as u64,
-            };
+            let nextXid = (epoch << 32) | new_xid as u64;
+
+            if nextXid != self.nextXid.value {
+                self.nextXid = FullTransactionId { value: nextXid };
+                return true;
+            }
         }
+        false
     }
 }
 
 //
-// Generate new WAL segment with single XLOG_CHECKPOINT_SHUTDOWN record.
+// Generate new, empty WAL segment.
 // We need this segment to start compute node.
-// In order to minimize changes in Postgres core, we prefer to
-// provide WAL segment from which is can extract checkpoint record in standard way,
-// rather then implement some alternative mechanism.
 //
-pub fn generate_wal_segment(pg_control: &ControlFileData) -> Bytes {
+pub fn generate_wal_segment(segno: u64, system_id: u64) -> Bytes {
     let mut seg_buf = BytesMut::with_capacity(pg_constants::WAL_SEGMENT_SIZE as usize);
 
+    let pageaddr = XLogSegNoOffsetToRecPtr(segno, 0, pg_constants::WAL_SEGMENT_SIZE);
     let hdr = XLogLongPageHeaderData {
         std: {
             XLogPageHeaderData {
                 xlp_magic: XLOG_PAGE_MAGIC as u16,
                 xlp_info: pg_constants::XLP_LONG_HEADER,
                 xlp_tli: 1, // FIXME: always use Postgres timeline 1
-                xlp_pageaddr: pg_control.checkPoint - XLOG_SIZE_OF_XLOG_LONG_PHD as u64,
+                xlp_pageaddr: pageaddr,
                 xlp_rem_len: 0,
                 ..Default::default() // Put 0 in padding fields.
             }
         },
-        xlp_sysid: pg_control.system_identifier,
+        xlp_sysid: system_id,
         xlp_seg_size: pg_constants::WAL_SEGMENT_SIZE as u32,
         xlp_xlog_blcksz: XLOG_BLCKSZ as u32,
     };
@@ -427,36 +435,6 @@ pub fn generate_wal_segment(pg_control: &ControlFileData) -> Bytes {
     let hdr_bytes = hdr.encode();
     seg_buf.extend_from_slice(&hdr_bytes);
 
-    let rec_hdr = XLogRecord {
-        xl_tot_len: (XLOG_SIZE_OF_XLOG_RECORD
-            + SIZE_OF_XLOG_RECORD_DATA_HEADER_SHORT
-            + SIZEOF_CHECKPOINT) as u32,
-        xl_xid: 0, //0 is for InvalidTransactionId
-        xl_prev: 0,
-        xl_info: pg_constants::XLOG_CHECKPOINT_SHUTDOWN,
-        xl_rmid: pg_constants::RM_XLOG_ID,
-        xl_crc: 0,
-        ..Default::default() // Put 0 in padding fields.
-    };
-
-    let mut rec_shord_hdr_bytes = BytesMut::new();
-    rec_shord_hdr_bytes.put_u8(pg_constants::XLR_BLOCK_ID_DATA_SHORT);
-    rec_shord_hdr_bytes.put_u8(SIZEOF_CHECKPOINT as u8);
-
-    let rec_bytes = rec_hdr.encode();
-    let checkpoint_bytes = pg_control.checkPointCopy.encode();
-
-    //calculate record checksum
-    let mut crc = 0;
-    crc = crc32c_append(crc, &rec_shord_hdr_bytes[..]);
-    crc = crc32c_append(crc, &checkpoint_bytes[..]);
-    crc = crc32c_append(crc, &rec_bytes[0..XLOG_RECORD_CRC_OFFS]);
-
-    seg_buf.extend_from_slice(&rec_bytes[0..XLOG_RECORD_CRC_OFFS]);
-    seg_buf.put_u32_le(crc);
-    seg_buf.extend_from_slice(&rec_shord_hdr_bytes);
-    seg_buf.extend_from_slice(&checkpoint_bytes);
-
     //zero out the rest of the file
     seg_buf.resize(pg_constants::WAL_SEGMENT_SIZE, 0);
     seg_buf.freeze()
diff --git a/proxy/src/mgmt.rs b/proxy/src/mgmt.rs
index 2b3259f8ec..1f33b68a1c 100644
--- a/proxy/src/mgmt.rs
+++ b/proxy/src/mgmt.rs
@@ -34,7 +34,7 @@ pub fn thread_main(state: &'static ProxyState, listener: TcpListener) -> anyhow:
 
 pub fn mgmt_conn_main(state: &'static ProxyState, socket: TcpStream) -> anyhow::Result<()> {
     let mut conn_handler = MgmtHandler { state };
-    let pgbackend = PostgresBackend::new(socket, AuthType::Trust, None)?;
+    let pgbackend = PostgresBackend::new(socket, AuthType::Trust, None, true)?;
     pgbackend.run(&mut conn_handler)
 }
 
diff --git a/proxy/src/proxy.rs b/proxy/src/proxy.rs
index f246d4470a..61a742cf38 100644
--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -64,6 +64,7 @@ pub fn proxy_conn_main(
             socket,
             postgres_backend::AuthType::MD5,
             state.conf.ssl_config.clone(),
+            false,
         )?,
         md5_salt: [0u8; 4],
         psql_session_id: "".into(),
diff --git a/test_runner/Pipfile b/test_runner/Pipfile
index f5ff0d7e2b..a98acc5718 100644
--- a/test_runner/Pipfile
+++ b/test_runner/Pipfile
@@ -11,11 +11,14 @@ pyjwt = {extras = ["crypto"], version = "*"}
 requests = "*"
 pytest-xdist = "*"
 asyncpg = "*"
+cached-property = "*"
 
 [dev-packages]
-yapf = "*"
 flake8 = "*"
 mypy = "*"
+# Behavior may change slightly between versions. These are run continuously,
+# so we pin exact versions to avoid suprising breaks. Update if comfortable.
+yapf = "==0.31.0"
 
 [requires]
 # we need at least 3.6, but pipenv doesn't allow to say this directly
diff --git a/test_runner/Pipfile.lock b/test_runner/Pipfile.lock
index 3c68c0ff3a..75fc17ffad 100644
--- a/test_runner/Pipfile.lock
+++ b/test_runner/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "3cdc048691824d0b93912b6b78a0aa01dc98f278212c1badb0cc2edbd2103c3a"
+            "sha256": "3645ae8d2dcf55bd2a54963c44cfeedf577f3b289d1077365214a80a7f36e643"
         },
         "pipfile-spec": 6,
         "requires": {
@@ -43,94 +43,108 @@
             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
             "version": "==21.2.0"
         },
+        "cached-property": {
+            "hashes": [
+                "sha256:9fa5755838eecbb2d234c3aa390bd80fbd3ac6b6869109bfc1b499f7bd89a130",
+                "sha256:df4f613cf7ad9a588cc381aaf4a512d26265ecebd5eb9e1ba12f1319eb85a6a0"
+            ],
+            "index": "pypi",
+            "version": "==1.5.2"
+        },
         "certifi": {
             "hashes": [
-                "sha256:2bbf76fd432960138b3ef6dda3dde0544f27cbf8546c458e60baf371917ba9ee",
-                "sha256:50b1e4f8446b06f41be7dd6338db18e0990601dce795c2b1686458aa7e8fa7d8"
+                "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872",
+                "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"
             ],
-            "version": "==2021.5.30"
+            "version": "==2021.10.8"
         },
         "cffi": {
             "hashes": [
-                "sha256:06c54a68935738d206570b20da5ef2b6b6d92b38ef3ec45c5422c0ebaf338d4d",
-                "sha256:0c0591bee64e438883b0c92a7bed78f6290d40bf02e54c5bf0978eaf36061771",
-                "sha256:19ca0dbdeda3b2615421d54bef8985f72af6e0c47082a8d26122adac81a95872",
-                "sha256:22b9c3c320171c108e903d61a3723b51e37aaa8c81255b5e7ce102775bd01e2c",
-                "sha256:26bb2549b72708c833f5abe62b756176022a7b9a7f689b571e74c8478ead51dc",
-                "sha256:33791e8a2dc2953f28b8d8d300dde42dd929ac28f974c4b4c6272cb2955cb762",
-                "sha256:3c8d896becff2fa653dc4438b54a5a25a971d1f4110b32bd3068db3722c80202",
-                "sha256:4373612d59c404baeb7cbd788a18b2b2a8331abcc84c3ba40051fcd18b17a4d5",
-                "sha256:487d63e1454627c8e47dd230025780e91869cfba4c753a74fda196a1f6ad6548",
-                "sha256:48916e459c54c4a70e52745639f1db524542140433599e13911b2f329834276a",
-                "sha256:4922cd707b25e623b902c86188aca466d3620892db76c0bdd7b99a3d5e61d35f",
-                "sha256:55af55e32ae468e9946f741a5d51f9896da6b9bf0bbdd326843fec05c730eb20",
-                "sha256:57e555a9feb4a8460415f1aac331a2dc833b1115284f7ded7278b54afc5bd218",
-                "sha256:5d4b68e216fc65e9fe4f524c177b54964af043dde734807586cf5435af84045c",
-                "sha256:64fda793737bc4037521d4899be780534b9aea552eb673b9833b01f945904c2e",
-                "sha256:6d6169cb3c6c2ad50db5b868db6491a790300ade1ed5d1da29289d73bbe40b56",
-                "sha256:7bcac9a2b4fdbed2c16fa5681356d7121ecabf041f18d97ed5b8e0dd38a80224",
-                "sha256:80b06212075346b5546b0417b9f2bf467fea3bfe7352f781ffc05a8ab24ba14a",
-                "sha256:818014c754cd3dba7229c0f5884396264d51ffb87ec86e927ef0be140bfdb0d2",
-                "sha256:8eb687582ed7cd8c4bdbff3df6c0da443eb89c3c72e6e5dcdd9c81729712791a",
-                "sha256:99f27fefe34c37ba9875f224a8f36e31d744d8083e00f520f133cab79ad5e819",
-                "sha256:9f3e33c28cd39d1b655ed1ba7247133b6f7fc16fa16887b120c0c670e35ce346",
-                "sha256:a8661b2ce9694ca01c529bfa204dbb144b275a31685a075ce123f12331be790b",
-                "sha256:a9da7010cec5a12193d1af9872a00888f396aba3dc79186604a09ea3ee7c029e",
-                "sha256:aedb15f0a5a5949ecb129a82b72b19df97bbbca024081ed2ef88bd5c0a610534",
-                "sha256:b315d709717a99f4b27b59b021e6207c64620790ca3e0bde636a6c7f14618abb",
-                "sha256:ba6f2b3f452e150945d58f4badd92310449876c4c954836cfb1803bdd7b422f0",
-                "sha256:c33d18eb6e6bc36f09d793c0dc58b0211fccc6ae5149b808da4a62660678b156",
-                "sha256:c9a875ce9d7fe32887784274dd533c57909b7b1dcadcc128a2ac21331a9765dd",
-                "sha256:c9e005e9bd57bc987764c32a1bee4364c44fdc11a3cc20a40b93b444984f2b87",
-                "sha256:d2ad4d668a5c0645d281dcd17aff2be3212bc109b33814bbb15c4939f44181cc",
-                "sha256:d950695ae4381ecd856bcaf2b1e866720e4ab9a1498cba61c602e56630ca7195",
-                "sha256:e22dcb48709fc51a7b58a927391b23ab37eb3737a98ac4338e2448bef8559b33",
-                "sha256:e8c6a99be100371dbb046880e7a282152aa5d6127ae01783e37662ef73850d8f",
-                "sha256:e9dc245e3ac69c92ee4c167fbdd7428ec1956d4e754223124991ef29eb57a09d",
-                "sha256:eb687a11f0a7a1839719edd80f41e459cc5366857ecbed383ff376c4e3cc6afd",
-                "sha256:eb9e2a346c5238a30a746893f23a9535e700f8192a68c07c0258e7ece6ff3728",
-                "sha256:ed38b924ce794e505647f7c331b22a693bee1538fdf46b0222c4717b42f744e7",
-                "sha256:f0010c6f9d1a4011e429109fda55a225921e3206e7f62a0c22a35344bfd13cca",
-                "sha256:f0c5d1acbfca6ebdd6b1e3eded8d261affb6ddcf2186205518f1428b8569bb99",
-                "sha256:f10afb1004f102c7868ebfe91c28f4a712227fe4cb24974350ace1f90e1febbf",
-                "sha256:f174135f5609428cc6e1b9090f9268f5c8935fddb1b25ccb8255a2d50de6789e",
-                "sha256:f3ebe6e73c319340830a9b2825d32eb6d8475c1dac020b4f0aa774ee3b898d1c",
-                "sha256:f627688813d0a4140153ff532537fbe4afea5a3dffce1f9deb7f91f848a832b5",
-                "sha256:fd4305f86f53dfd8cd3522269ed7fc34856a8ee3709a5e28b2836b2db9d4cd69"
+                "sha256:00c878c90cb53ccfaae6b8bc18ad05d2036553e6d9d1d9dbcf323bbe83854ca3",
+                "sha256:0104fb5ae2391d46a4cb082abdd5c69ea4eab79d8d44eaaf79f1b1fd806ee4c2",
+                "sha256:06c48159c1abed75c2e721b1715c379fa3200c7784271b3c46df01383b593636",
+                "sha256:0808014eb713677ec1292301ea4c81ad277b6cdf2fdd90fd540af98c0b101d20",
+                "sha256:10dffb601ccfb65262a27233ac273d552ddc4d8ae1bf93b21c94b8511bffe728",
+                "sha256:14cd121ea63ecdae71efa69c15c5543a4b5fbcd0bbe2aad864baca0063cecf27",
+                "sha256:17771976e82e9f94976180f76468546834d22a7cc404b17c22df2a2c81db0c66",
+                "sha256:181dee03b1170ff1969489acf1c26533710231c58f95534e3edac87fff06c443",
+                "sha256:23cfe892bd5dd8941608f93348c0737e369e51c100d03718f108bf1add7bd6d0",
+                "sha256:263cc3d821c4ab2213cbe8cd8b355a7f72a8324577dc865ef98487c1aeee2bc7",
+                "sha256:2756c88cbb94231c7a147402476be2c4df2f6078099a6f4a480d239a8817ae39",
+                "sha256:27c219baf94952ae9d50ec19651a687b826792055353d07648a5695413e0c605",
+                "sha256:2a23af14f408d53d5e6cd4e3d9a24ff9e05906ad574822a10563efcef137979a",
+                "sha256:31fb708d9d7c3f49a60f04cf5b119aeefe5644daba1cd2a0fe389b674fd1de37",
+                "sha256:3415c89f9204ee60cd09b235810be700e993e343a408693e80ce7f6a40108029",
+                "sha256:3773c4d81e6e818df2efbc7dd77325ca0dcb688116050fb2b3011218eda36139",
+                "sha256:3b96a311ac60a3f6be21d2572e46ce67f09abcf4d09344c49274eb9e0bf345fc",
+                "sha256:3f7d084648d77af029acb79a0ff49a0ad7e9d09057a9bf46596dac9514dc07df",
+                "sha256:41d45de54cd277a7878919867c0f08b0cf817605e4eb94093e7516505d3c8d14",
+                "sha256:4238e6dab5d6a8ba812de994bbb0a79bddbdf80994e4ce802b6f6f3142fcc880",
+                "sha256:45db3a33139e9c8f7c09234b5784a5e33d31fd6907800b316decad50af323ff2",
+                "sha256:45e8636704eacc432a206ac7345a5d3d2c62d95a507ec70d62f23cd91770482a",
+                "sha256:4958391dbd6249d7ad855b9ca88fae690783a6be9e86df65865058ed81fc860e",
+                "sha256:4a306fa632e8f0928956a41fa8e1d6243c71e7eb59ffbd165fc0b41e316b2474",
+                "sha256:57e9ac9ccc3101fac9d6014fba037473e4358ef4e89f8e181f8951a2c0162024",
+                "sha256:59888172256cac5629e60e72e86598027aca6bf01fa2465bdb676d37636573e8",
+                "sha256:5e069f72d497312b24fcc02073d70cb989045d1c91cbd53979366077959933e0",
+                "sha256:64d4ec9f448dfe041705426000cc13e34e6e5bb13736e9fd62e34a0b0c41566e",
+                "sha256:6dc2737a3674b3e344847c8686cf29e500584ccad76204efea14f451d4cc669a",
+                "sha256:74fdfdbfdc48d3f47148976f49fab3251e550a8720bebc99bf1483f5bfb5db3e",
+                "sha256:75e4024375654472cc27e91cbe9eaa08567f7fbdf822638be2814ce059f58032",
+                "sha256:786902fb9ba7433aae840e0ed609f45c7bcd4e225ebb9c753aa39725bb3e6ad6",
+                "sha256:8b6c2ea03845c9f501ed1313e78de148cd3f6cad741a75d43a29b43da27f2e1e",
+                "sha256:91d77d2a782be4274da750752bb1650a97bfd8f291022b379bb8e01c66b4e96b",
+                "sha256:91ec59c33514b7c7559a6acda53bbfe1b283949c34fe7440bcf917f96ac0723e",
+                "sha256:920f0d66a896c2d99f0adbb391f990a84091179542c205fa53ce5787aff87954",
+                "sha256:a5263e363c27b653a90078143adb3d076c1a748ec9ecc78ea2fb916f9b861962",
+                "sha256:abb9a20a72ac4e0fdb50dae135ba5e77880518e742077ced47eb1499e29a443c",
+                "sha256:c2051981a968d7de9dd2d7b87bcb9c939c74a34626a6e2f8181455dd49ed69e4",
+                "sha256:c21c9e3896c23007803a875460fb786118f0cdd4434359577ea25eb556e34c55",
+                "sha256:c2502a1a03b6312837279c8c1bd3ebedf6c12c4228ddbad40912d671ccc8a962",
+                "sha256:d4d692a89c5cf08a8557fdeb329b82e7bf609aadfaed6c0d79f5a449a3c7c023",
+                "sha256:da5db4e883f1ce37f55c667e5c0de439df76ac4cb55964655906306918e7363c",
+                "sha256:e7022a66d9b55e93e1a845d8c9eba2a1bebd4966cd8bfc25d9cd07d515b33fa6",
+                "sha256:ef1f279350da2c586a69d32fc8733092fd32cc8ac95139a00377841f59a3f8d8",
+                "sha256:f54a64f8b0c8ff0b64d18aa76675262e1700f3995182267998c31ae974fbc382",
+                "sha256:f5c7150ad32ba43a07c4479f40241756145a1f03b43480e058cfd862bf5041c7",
+                "sha256:f6f824dc3bce0edab5f427efcfb1d63ee75b6fcb7282900ccaf925be84efb0fc",
+                "sha256:fd8a250edc26254fe5b33be00402e6d287f562b6a5b2152dec302fa15bb3e997",
+                "sha256:ffaa5c925128e29efbde7301d8ecaf35c8c60ffbcd6a1ffd3a552177c8e5e796"
             ],
-            "version": "==1.14.6"
+            "version": "==1.15.0"
         },
         "charset-normalizer": {
             "hashes": [
-                "sha256:5d209c0a931f215cee683b6445e2d77677e7e75e159f78def0db09d68fafcaa6",
-                "sha256:5ec46d183433dcbd0ab716f2d7f29d8dee50505b3fdb40c6b985c7c4f5a3591f"
+                "sha256:e019de665e2bcf9c2b64e2e5aa025fa991da8720daa3c1138cadd2fd1856aed0",
+                "sha256:f7af805c321bfa1ce6714c51f254e0d5bb5e5834039bc17db7ebe3a4cec9492b"
             ],
             "markers": "python_version >= '3'",
-            "version": "==2.0.6"
+            "version": "==2.0.7"
         },
         "cryptography": {
             "hashes": [
-                "sha256:0a7dcbcd3f1913f664aca35d47c1331fce738d44ec34b7be8b9d332151b0b01e",
-                "sha256:1eb7bb0df6f6f583dd8e054689def236255161ebbcf62b226454ab9ec663746b",
-                "sha256:21ca464b3a4b8d8e86ba0ee5045e103a1fcfac3b39319727bc0fc58c09c6aff7",
-                "sha256:34dae04a0dce5730d8eb7894eab617d8a70d0c97da76b905de9efb7128ad7085",
-                "sha256:3520667fda779eb788ea00080124875be18f2d8f0848ec00733c0ec3bb8219fc",
-                "sha256:3c4129fc3fdc0fa8e40861b5ac0c673315b3c902bbdc05fc176764815b43dd1d",
-                "sha256:3fa3a7ccf96e826affdf1a0a9432be74dc73423125c8f96a909e3835a5ef194a",
-                "sha256:5b0fbfae7ff7febdb74b574055c7466da334a5371f253732d7e2e7525d570498",
-                "sha256:695104a9223a7239d155d7627ad912953b540929ef97ae0c34c7b8bf30857e89",
-                "sha256:8695456444f277af73a4877db9fc979849cd3ee74c198d04fc0776ebc3db52b9",
-                "sha256:94cc5ed4ceaefcbe5bf38c8fba6a21fc1d365bb8fb826ea1688e3370b2e24a1c",
-                "sha256:94fff993ee9bc1b2440d3b7243d488c6a3d9724cc2b09cdb297f6a886d040ef7",
-                "sha256:9965c46c674ba8cc572bc09a03f4c649292ee73e1b683adb1ce81e82e9a6a0fb",
-                "sha256:a00cf305f07b26c351d8d4e1af84ad7501eca8a342dedf24a7acb0e7b7406e14",
-                "sha256:a305600e7a6b7b855cd798e00278161b681ad6e9b7eca94c721d5f588ab212af",
-                "sha256:cd65b60cfe004790c795cc35f272e41a3df4631e2fb6b35aa7ac6ef2859d554e",
-                "sha256:d2a6e5ef66503da51d2110edf6c403dc6b494cc0082f85db12f54e9c5d4c3ec5",
-                "sha256:d9ec0e67a14f9d1d48dd87a2531009a9b251c02ea42851c060b25c782516ff06",
-                "sha256:f44d141b8c4ea5eb4dbc9b3ad992d45580c1d22bf5e24363f2fbf50c2d7ae8a7"
+                "sha256:07bb7fbfb5de0980590ddfc7f13081520def06dc9ed214000ad4372fb4e3c7f6",
+                "sha256:18d90f4711bf63e2fb21e8c8e51ed8189438e6b35a6d996201ebd98a26abbbe6",
+                "sha256:1ed82abf16df40a60942a8c211251ae72858b25b7421ce2497c2eb7a1cee817c",
+                "sha256:22a38e96118a4ce3b97509443feace1d1011d0571fae81fc3ad35f25ba3ea999",
+                "sha256:2d69645f535f4b2c722cfb07a8eab916265545b3475fdb34e0be2f4ee8b0b15e",
+                "sha256:4a2d0e0acc20ede0f06ef7aa58546eee96d2592c00f450c9acb89c5879b61992",
+                "sha256:54b2605e5475944e2213258e0ab8696f4f357a31371e538ef21e8d61c843c28d",
+                "sha256:7075b304cd567694dc692ffc9747f3e9cb393cc4aa4fb7b9f3abd6f5c4e43588",
+                "sha256:7b7ceeff114c31f285528ba8b390d3e9cfa2da17b56f11d366769a807f17cbaa",
+                "sha256:7eba2cebca600a7806b893cb1d541a6e910afa87e97acf2021a22b32da1df52d",
+                "sha256:928185a6d1ccdb816e883f56ebe92e975a262d31cc536429041921f8cb5a62fd",
+                "sha256:9933f28f70d0517686bd7de36166dda42094eac49415459d9bdf5e7df3e0086d",
+                "sha256:a688ebcd08250eab5bb5bca318cc05a8c66de5e4171a65ca51db6bd753ff8953",
+                "sha256:abb5a361d2585bb95012a19ed9b2c8f412c5d723a9836418fab7aaa0243e67d2",
+                "sha256:c10c797ac89c746e488d2ee92bd4abd593615694ee17b2500578b63cad6b93a8",
+                "sha256:ced40344e811d6abba00295ced98c01aecf0c2de39481792d87af4fa58b7b4d6",
+                "sha256:d57e0cdc1b44b6cdf8af1d01807db06886f10177469312fbde8f44ccbb284bc9",
+                "sha256:d99915d6ab265c22873f1b4d6ea5ef462ef797b4140be4c9d8b179915e0985c6",
+                "sha256:eb80e8a1f91e4b7ef8b33041591e6d89b2b8e122d787e87eeb2b08da71bb16ad",
+                "sha256:ebeddd119f526bcf323a89f853afb12e225902a24d29b55fe18dd6fcb2838a76"
             ],
-            "version": "==3.4.8"
+            "version": "==35.0.0"
         },
         "execnet": {
             "hashes": [
@@ -142,11 +156,11 @@
         },
         "idna": {
             "hashes": [
-                "sha256:14475042e284991034cb48e06f6851428fb14c4dc953acd9be9a5e95c7b6dd7a",
-                "sha256:467fbad99067910785144ce333826c71fb0e63a425657295239737f7ecd125f3"
+                "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff",
+                "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"
             ],
             "markers": "python_version >= '3'",
-            "version": "==3.2"
+            "version": "==3.3"
         },
         "iniconfig": {
             "hashes": [
@@ -207,11 +221,11 @@
                 "crypto"
             ],
             "hashes": [
-                "sha256:934d73fbba91b0483d3857d1aff50e96b2a892384ee2c17417ed3203f173fca1",
-                "sha256:fba44e7898bbca160a2b2b501f492824fc8382485d3a6f11ba5d0c1937ce6130"
+                "sha256:b888b4d56f06f6dcd777210c334e69c737be74755d3e5e9ee3fe67dc18a0ee41",
+                "sha256:e0c4bb8d9f0af0c7f5b1ec4c5036309617d03d56932877f2f7a0beeb5318322f"
             ],
             "index": "pypi",
-            "version": "==2.1.0"
+            "version": "==2.3.0"
         },
         "pyparsing": {
             "hashes": [
@@ -272,21 +286,21 @@
         },
         "urllib3": {
             "hashes": [
-                "sha256:39fb8672126159acb139a7718dd10806104dec1e2f0f6c88aab05d17df10c8d4",
-                "sha256:f57b4c16c62fa2760b7e3d97c35b255512fb6b59a259730f36ba32ce9f8e342f"
+                "sha256:4987c65554f7a2dbf30c18fd48778ef124af6fab771a377103da0585e2336ece",
+                "sha256:c4fdf4019605b6e5423637e01bc9fe4daef873709a7973e195ceba0a62bbc844"
             ],
             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
-            "version": "==1.26.6"
+            "version": "==1.26.7"
         }
     },
     "develop": {
         "flake8": {
             "hashes": [
-                "sha256:07528381786f2a6237b061f6e96610a4167b226cb926e2aa2b6b1d78057c576b",
-                "sha256:bf8fd333346d844f616e8d47905ef3a3384edae6b4e9beb0c5101e25e3110907"
+                "sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d",
+                "sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d"
             ],
             "index": "pypi",
-            "version": "==3.9.2"
+            "version": "==4.0.1"
         },
         "mccabe": {
             "hashes": [
@@ -333,19 +347,19 @@
         },
         "pycodestyle": {
             "hashes": [
-                "sha256:514f76d918fcc0b55c6680472f0a37970994e07bbb80725808c17089be302068",
-                "sha256:c389c1d06bf7904078ca03399a4816f974a1d590090fecea0c63ec26ebaf1cef"
+                "sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20",
+                "sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f"
             ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
-            "version": "==2.7.0"
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
+            "version": "==2.8.0"
         },
         "pyflakes": {
             "hashes": [
-                "sha256:7893783d01b8a89811dd72d7dfd4d84ff098e5eed95cfa8905b22bbffe52efc3",
-                "sha256:f5bc8ecabc05bb9d291eb5203d6810b49040f6ff446a756326104746cc00c1db"
+                "sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c",
+                "sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e"
             ],
             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
-            "version": "==2.3.1"
+            "version": "==2.4.0"
         },
         "toml": {
             "hashes": [
diff --git a/test_runner/README.md b/test_runner/README.md
index 62a95350aa..cdbf7e988d 100644
--- a/test_runner/README.md
+++ b/test_runner/README.md
@@ -53,8 +53,8 @@ Useful environment variables:
 should go.
 `TEST_SHARED_FIXTURES`: Try to re-use a single pageserver for all the tests.
 
-Let stdout and stderr go to the terminal instead of capturing them:
-`pytest -s ...`
+Let stdout, stderr and `INFO` log messages go to the terminal instead of capturing them:
+`pytest -s --log-cli-level=INFO ...`
 (Note many tests capture subprocess outputs separately, so this may not
 show much.)
 
@@ -95,11 +95,13 @@ Python destructors, e.g. `__del__()` aren't recommended for cleanup.
 
 ### Code quality
 
+We force code formatting via yapf:
+
+1. Install `yapf` and other tools (`flake8`, `mypy`) with `pipenv install --dev`.
+1. Reformat all your code by running `pipenv run yapf -ri .` in the `test_runner/` directory.
+
 Before submitting a patch, please consider:
 
 * Writing a couple of docstrings to clarify the reasoning behind a new test.
 * Running `flake8` (or a linter of your choice, e.g. `pycodestyle`) and fixing possible defects, if any.
-* Formatting the code with `yapf -r -i .` (TODO: implement an opt-in pre-commit hook for that).
 * (Optional) Typechecking the code with `mypy .`. Currently this mostly affects `fixtures/zenith_fixtures.py`.
-
-The tools can be installed with `pipenv install --dev`.
diff --git a/test_runner/batch_others/test_auth.py b/test_runner/batch_others/test_auth.py
index 614883d4b8..9fe7567902 100644
--- a/test_runner/batch_others/test_auth.py
+++ b/test_runner/batch_others/test_auth.py
@@ -1,4 +1,3 @@
-
 from contextlib import closing
 from typing import Iterator
 from uuid import uuid4
@@ -6,7 +5,6 @@ import psycopg2
 from fixtures.zenith_fixtures import PortDistributor, Postgres, ZenithCli, ZenithPageserver, PgBin
 import pytest
 
-
 pytest_plugins = ("fixtures.zenith_fixtures")
 
 
@@ -35,7 +33,9 @@ def test_pageserver_auth(pageserver_auth_enabled: ZenithPageserver):
     ps.safe_psql(f"tenant_create {uuid4().hex}", password=management_token)
 
     # fail to create tenant using tenant token
-    with pytest.raises(psycopg2.DatabaseError, match='Attempt to access management api with tenant scope. Permission denied'):
+    with pytest.raises(
+            psycopg2.DatabaseError,
+            match='Attempt to access management api with tenant scope. Permission denied'):
         ps.safe_psql(f"tenant_create {uuid4().hex}", password=tenant_token)
 
 
@@ -60,14 +60,14 @@ def test_compute_auth_to_pageserver(
         wa_factory.start_n_new(3, management_token)
 
     with Postgres(
-        zenith_cli=zenith_cli,
-        repo_dir=repo_dir,
-        pg_bin=pg_bin,
-        tenant_id=ps.initial_tenant,
-        port=port_distributor.get_port(),
+            zenith_cli=zenith_cli,
+            repo_dir=repo_dir,
+            pg_bin=pg_bin,
+            tenant_id=ps.initial_tenant,
+            port=port_distributor.get_port(),
     ).create_start(
-        branch,
-        wal_acceptors=wa_factory.get_connstrs() if with_wal_acceptors else None,
+            branch,
+            wal_acceptors=wa_factory.get_connstrs() if with_wal_acceptors else None,
     ) as pg:
         with closing(pg.connect()) as conn:
             with conn.cursor() as cur:
diff --git a/test_runner/batch_others/test_branch_behind.py b/test_runner/batch_others/test_branch_behind.py
index 9189017050..887671bf99 100644
--- a/test_runner/batch_others/test_branch_behind.py
+++ b/test_runner/batch_others/test_branch_behind.py
@@ -1,6 +1,6 @@
 import subprocess
 from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
-
+from fixtures.log_helper import log
 
 pytest_plugins = ("fixtures.zenith_fixtures")
 
@@ -13,7 +13,7 @@ def test_branch_behind(zenith_cli, pageserver: ZenithPageserver, postgres: Postg
     zenith_cli.run(["branch", "test_branch_behind", "empty"])
 
     pgmain = postgres.create_start('test_branch_behind')
-    print("postgres is running on 'test_branch_behind' branch")
+    log.info("postgres is running on 'test_branch_behind' branch")
 
     main_pg_conn = pgmain.connect()
     main_cur = main_pg_conn.cursor()
@@ -27,7 +27,7 @@ def test_branch_behind(zenith_cli, pageserver: ZenithPageserver, postgres: Postg
     ''')
     main_cur.execute('SELECT pg_current_wal_insert_lsn()')
     lsn_a = main_cur.fetchone()[0]
-    print('LSN after 100 rows: ' + lsn_a)
+    log.info(f'LSN after 100 rows: {lsn_a}')
 
     # Insert some more rows. (This generates enough WAL to fill a few segments.)
     main_cur.execute('''
@@ -37,7 +37,7 @@ def test_branch_behind(zenith_cli, pageserver: ZenithPageserver, postgres: Postg
     ''')
     main_cur.execute('SELECT pg_current_wal_insert_lsn()')
     lsn_b = main_cur.fetchone()[0]
-    print('LSN after 200100 rows: ' + lsn_b)
+    log.info(f'LSN after 200100 rows: {lsn_b}')
 
     # Branch at the point where only 100 rows were inserted
     zenith_cli.run(["branch", "test_branch_behind_hundred", "test_branch_behind@" + lsn_a])
@@ -52,7 +52,7 @@ def test_branch_behind(zenith_cli, pageserver: ZenithPageserver, postgres: Postg
 
     main_cur.execute('SELECT pg_current_wal_insert_lsn()')
     lsn_c = main_cur.fetchone()[0]
-    print('LSN after 400100 rows: ' + lsn_c)
+    log.info(f'LSN after 400100 rows: {lsn_c}')
 
     # Branch at the point where only 200100 rows were inserted
     zenith_cli.run(["branch", "test_branch_behind_more", "test_branch_behind@" + lsn_b])
@@ -86,7 +86,10 @@ def test_branch_behind(zenith_cli, pageserver: ZenithPageserver, postgres: Postg
     assert cur.fetchone() == (1, )
 
     # branch at pre-initdb lsn
+    #
+    # FIXME: This works currently, but probably shouldn't be allowed
     try:
         zenith_cli.run(["branch", "test_branch_preinitdb", "test_branch_behind@0/42"])
+        # FIXME: assert false, "branch with invalid LSN should have failed"
     except subprocess.CalledProcessError:
-        print("Branch creation with pre-initdb LSN failed (as expected)")
+        log.info("Branch creation with pre-initdb LSN failed (as expected)")
diff --git a/test_runner/batch_others/test_clog_truncate.py b/test_runner/batch_others/test_clog_truncate.py
index e9233986e4..a70e14d9a9 100644
--- a/test_runner/batch_others/test_clog_truncate.py
+++ b/test_runner/batch_others/test_clog_truncate.py
@@ -4,6 +4,7 @@ import os
 from contextlib import closing
 
 from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
+from fixtures.log_helper import log
 
 pytest_plugins = ("fixtures.zenith_fixtures")
 
@@ -17,14 +18,17 @@ def test_clog_truncate(zenith_cli, pageserver: ZenithPageserver, postgres: Postg
 
     # set agressive autovacuum to make sure that truncation will happen
     config = [
-        'autovacuum_max_workers=10', 'autovacuum_vacuum_threshold=0',
-        'autovacuum_vacuum_insert_threshold=0', 'autovacuum_vacuum_cost_delay=0',
-        'autovacuum_vacuum_cost_limit=10000', 'autovacuum_naptime =1s',
+        'autovacuum_max_workers=10',
+        'autovacuum_vacuum_threshold=0',
+        'autovacuum_vacuum_insert_threshold=0',
+        'autovacuum_vacuum_cost_delay=0',
+        'autovacuum_vacuum_cost_limit=10000',
+        'autovacuum_naptime =1s',
         'autovacuum_freeze_max_age=100000'
     ]
 
     pg = postgres.create_start('test_clog_truncate', config_lines=config)
-    print('postgres is running on test_clog_truncate branch')
+    log.info('postgres is running on test_clog_truncate branch')
 
     # Install extension containing function needed for test
     pg.safe_psql('CREATE EXTENSION zenith_test_utils')
@@ -33,22 +37,22 @@ def test_clog_truncate(zenith_cli, pageserver: ZenithPageserver, postgres: Postg
     with closing(pg.connect()) as conn:
         with conn.cursor() as cur:
             cur.execute('select test_consume_xids(1000*1000*10);')
-            print('xids consumed')
+            log.info('xids consumed')
 
             # call a checkpoint to trigger TruncateSubtrans
             cur.execute('CHECKPOINT;')
 
             # ensure WAL flush
             cur.execute('select txid_current()')
-            print(cur.fetchone())
+            log.info(cur.fetchone())
 
     # wait for autovacuum to truncate the pg_xact
     # XXX Is it worth to add a timeout here?
     pg_xact_0000_path = os.path.join(pg.pg_xact_dir_path(), '0000')
-    print("pg_xact_0000_path = " + pg_xact_0000_path)
+    log.info(f"pg_xact_0000_path = {pg_xact_0000_path}")
 
     while os.path.isfile(pg_xact_0000_path):
-        print("file exists. wait for truncation. " "pg_xact_0000_path = " + pg_xact_0000_path)
+        log.info(f"file exists. wait for truncation. " "pg_xact_0000_path = {pg_xact_0000_path}")
         time.sleep(5)
 
     # checkpoint to advance latest lsn
@@ -59,14 +63,14 @@ def test_clog_truncate(zenith_cli, pageserver: ZenithPageserver, postgres: Postg
             lsn_after_truncation = cur.fetchone()[0]
 
     # create new branch after clog truncation and start a compute node on it
-    print('create branch at lsn_after_truncation ' + lsn_after_truncation)
+    log.info(f'create branch at lsn_after_truncation {lsn_after_truncation}')
     zenith_cli.run(
         ["branch", "test_clog_truncate_new", "test_clog_truncate@" + lsn_after_truncation])
 
     pg2 = postgres.create_start('test_clog_truncate_new')
-    print('postgres is running on test_clog_truncate_new branch')
+    log.info('postgres is running on test_clog_truncate_new branch')
 
     # check that new node doesn't contain truncated segment
     pg_xact_0000_path_new = os.path.join(pg2.pg_xact_dir_path(), '0000')
-    print("pg_xact_0000_path_new = " + pg_xact_0000_path_new)
+    log.info(f"pg_xact_0000_path_new = {pg_xact_0000_path_new}")
     assert os.path.isfile(pg_xact_0000_path_new) is False
diff --git a/test_runner/batch_others/test_config.py b/test_runner/batch_others/test_config.py
index d8cc798839..d7c59c4e77 100644
--- a/test_runner/batch_others/test_config.py
+++ b/test_runner/batch_others/test_config.py
@@ -1,6 +1,7 @@
 from contextlib import closing
 
 from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
+from fixtures.log_helper import log
 
 pytest_plugins = ("fixtures.zenith_fixtures")
 
@@ -14,7 +15,7 @@ def test_config(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFact
 
     # change config
     pg = postgres.create_start('test_config', config_lines=['log_min_messages=debug1'])
-    print('postgres is running on test_config branch')
+    log.info('postgres is running on test_config branch')
 
     with closing(pg.connect()) as conn:
         with conn.cursor() as cur:
diff --git a/test_runner/batch_others/test_createdropdb.py b/test_runner/batch_others/test_createdropdb.py
index cbe89a77cb..5fe103496d 100644
--- a/test_runner/batch_others/test_createdropdb.py
+++ b/test_runner/batch_others/test_createdropdb.py
@@ -3,6 +3,7 @@ import pathlib
 
 from contextlib import closing
 from fixtures.zenith_fixtures import ZenithPageserver, PostgresFactory, ZenithCli, check_restored_datadir_content
+from fixtures.log_helper import log
 
 pytest_plugins = ("fixtures.zenith_fixtures")
 
@@ -19,7 +20,7 @@ def test_createdb(
     zenith_cli.run(["branch", "test_createdb", "empty"])
 
     pg = postgres.create_start('test_createdb')
-    print("postgres is running on 'test_createdb' branch")
+    log.info("postgres is running on 'test_createdb' branch")
 
     with closing(pg.connect()) as conn:
         with conn.cursor() as cur:
@@ -40,6 +41,7 @@ def test_createdb(
     for db in (pg, pg2):
         db.connect(dbname='foodb').close()
 
+
 #
 # Test DROP DATABASE
 #
@@ -48,12 +50,12 @@ def test_dropdb(
     pageserver: ZenithPageserver,
     postgres: PostgresFactory,
     pg_bin,
-    test_output_dir
+    test_output_dir,
 ):
     zenith_cli.run(["branch", "test_dropdb", "empty"])
 
     pg = postgres.create_start('test_dropdb')
-    print("postgres is running on 'test_dropdb' branch")
+    log.info("postgres is running on 'test_dropdb' branch")
 
     with closing(pg.connect()) as conn:
         with conn.cursor() as cur:
@@ -65,7 +67,6 @@ def test_dropdb(
             cur.execute("SELECT oid FROM pg_database WHERE datname='foodb';")
             dboid = cur.fetchone()[0]
 
-
     with closing(pg.connect()) as conn:
         with conn.cursor() as cur:
             cur.execute('DROP DATABASE foodb')
@@ -75,7 +76,6 @@ def test_dropdb(
             cur.execute('SELECT pg_current_wal_insert_lsn()')
             lsn_after_drop = cur.fetchone()[0]
 
-
     # Create two branches before and after database drop.
     zenith_cli.run(["branch", "test_before_dropdb", "test_dropdb@" + lsn_before_drop])
     pg_before = postgres.create_start('test_before_dropdb')
@@ -88,13 +88,13 @@ def test_dropdb(
 
     # Test that database subdir exists on the branch before drop
     dbpath = pathlib.Path(pg_before.pgdata_dir) / 'base' / str(dboid)
-    print(dbpath)
+    log.info(dbpath)
 
     assert os.path.isdir(dbpath) == True
 
     # Test that database subdir doesn't exist on the branch after drop
     dbpath = pathlib.Path(pg_after.pgdata_dir) / 'base' / str(dboid)
-    print(dbpath)
+    log.info(dbpath)
 
     assert os.path.isdir(dbpath) == False
 
diff --git a/test_runner/batch_others/test_createuser.py b/test_runner/batch_others/test_createuser.py
index f44df91c3c..57cc610f55 100644
--- a/test_runner/batch_others/test_createuser.py
+++ b/test_runner/batch_others/test_createuser.py
@@ -1,6 +1,7 @@
 from contextlib import closing
 
 from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
+from fixtures.log_helper import log
 
 pytest_plugins = ("fixtures.zenith_fixtures")
 
@@ -12,7 +13,7 @@ def test_createuser(zenith_cli, pageserver: ZenithPageserver, postgres: Postgres
     zenith_cli.run(["branch", "test_createuser", "empty"])
 
     pg = postgres.create_start('test_createuser')
-    print("postgres is running on 'test_createuser' branch")
+    log.info("postgres is running on 'test_createuser' branch")
 
     with closing(pg.connect()) as conn:
         with conn.cursor() as cur:
diff --git a/test_runner/batch_others/test_multixact.py b/test_runner/batch_others/test_multixact.py
index aaa9e7f58d..78504b95ed 100644
--- a/test_runner/batch_others/test_multixact.py
+++ b/test_runner/batch_others/test_multixact.py
@@ -1,4 +1,5 @@
 from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver, check_restored_datadir_content
+from fixtures.log_helper import log
 
 pytest_plugins = ("fixtures.zenith_fixtures")
 
@@ -9,13 +10,17 @@ pytest_plugins = ("fixtures.zenith_fixtures")
 # it only checks next_multixact_id field in restored pg_control,
 # since we don't have functions to check multixact internals.
 #
-def test_multixact(pageserver: ZenithPageserver, postgres: PostgresFactory,
-                    pg_bin, zenith_cli, base_dir, test_output_dir):
+def test_multixact(pageserver: ZenithPageserver,
+                   postgres: PostgresFactory,
+                   pg_bin,
+                   zenith_cli,
+                   base_dir,
+                   test_output_dir):
     # Create a branch for us
     zenith_cli.run(["branch", "test_multixact", "empty"])
     pg = postgres.create_start('test_multixact')
 
-    print("postgres is running on 'test_multixact' branch")
+    log.info("postgres is running on 'test_multixact' branch")
     pg_conn = pg.connect()
     cur = pg_conn.cursor()
 
@@ -55,7 +60,7 @@ def test_multixact(pageserver: ZenithPageserver, postgres: PostgresFactory,
     zenith_cli.run(["branch", "test_multixact_new", "test_multixact@" + lsn])
     pg_new = postgres.create_start('test_multixact_new')
 
-    print("postgres is running on 'test_multixact_new' branch")
+    log.info("postgres is running on 'test_multixact_new' branch")
     pg_new_conn = pg_new.connect()
     cur_new = pg_new_conn.cursor()
 
diff --git a/test_runner/batch_others/test_old_request_lsn.py b/test_runner/batch_others/test_old_request_lsn.py
index bb28bdd83f..6cc5c01b83 100644
--- a/test_runner/batch_others/test_old_request_lsn.py
+++ b/test_runner/batch_others/test_old_request_lsn.py
@@ -1,9 +1,11 @@
 from contextlib import closing
 
 from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
+from fixtures.log_helper import log
 
 pytest_plugins = ("fixtures.zenith_fixtures")
 
+
 #
 # Test where Postgres generates a lot of WAL, and it's garbage collected away, but
 # no pages are evicted so that Postgres uses an old LSN in a GetPage request.
@@ -14,11 +16,14 @@ pytest_plugins = ("fixtures.zenith_fixtures")
 # just a hint that the page hasn't been modified since that LSN, and the page
 # server should return the latest page version regardless of the LSN.
 #
-def test_old_request_lsn(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin):
+def test_old_request_lsn(zenith_cli,
+                         pageserver: ZenithPageserver,
+                         postgres: PostgresFactory,
+                         pg_bin):
     # Create a branch for us
     zenith_cli.run(["branch", "test_old_request_lsn", "empty"])
     pg = postgres.create_start('test_old_request_lsn')
-    print('postgres is running on test_old_request_lsn branch')
+    log.info('postgres is running on test_old_request_lsn branch')
 
     pg_conn = pg.connect()
     cur = pg_conn.cursor()
@@ -46,20 +51,20 @@ def test_old_request_lsn(zenith_cli, pageserver: ZenithPageserver, postgres: Pos
         from pg_settings where name = 'shared_buffers'
     ''')
     row = cur.fetchone()
-    print(f'shared_buffers is {row[0]}, table size {row[1]}');
+    log.info(f'shared_buffers is {row[0]}, table size {row[1]}')
     assert int(row[0]) < int(row[1])
 
-    cur.execute('VACUUM foo');
+    cur.execute('VACUUM foo')
 
     # Make a lot of updates on a single row, generating a lot of WAL. Trigger
     # garbage collections so that the page server will remove old page versions.
     for i in range(10):
         pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
         for j in range(100):
-            cur.execute('UPDATE foo SET val = val + 1 WHERE id = 1;');
+            cur.execute('UPDATE foo SET val = val + 1 WHERE id = 1;')
 
     # All (or at least most of) the updates should've been on the same page, so
     # that we haven't had to evict any dirty pages for a long time. Now run
     # a query that sends GetPage@LSN requests with the old LSN.
-    cur.execute("SELECT COUNT(*), SUM(val) FROM foo");
+    cur.execute("SELECT COUNT(*), SUM(val) FROM foo")
     assert cur.fetchone() == (100000, 101000)
diff --git a/test_runner/batch_others/test_pageserver_api.py b/test_runner/batch_others/test_pageserver_api.py
index 8d0f92a263..95b0172e4c 100644
--- a/test_runner/batch_others/test_pageserver_api.py
+++ b/test_runner/batch_others/test_pageserver_api.py
@@ -63,7 +63,8 @@ def test_tenant_list_psql(pageserver: ZenithPageserver, zenith_cli):
     cur = conn.cursor()
 
     # check same tenant cannot be created twice
-    with pytest.raises(psycopg2.DatabaseError, match=f'tenant {pageserver.initial_tenant} already exists'):
+    with pytest.raises(psycopg2.DatabaseError,
+                       match=f'tenant {pageserver.initial_tenant} already exists'):
         cur.execute(f'tenant_create {pageserver.initial_tenant}')
 
     # create one more tenant
@@ -102,5 +103,6 @@ def test_pageserver_http_api_client(pageserver: ZenithPageserver):
 
 
 def test_pageserver_http_api_client_auth_enabled(pageserver_auth_enabled: ZenithPageserver):
-    client = pageserver_auth_enabled.http_client(auth_token=pageserver_auth_enabled.auth_keys.generate_management_token())
+    client = pageserver_auth_enabled.http_client(
+        auth_token=pageserver_auth_enabled.auth_keys.generate_management_token())
     check_client(client, pageserver_auth_enabled.initial_tenant)
diff --git a/test_runner/batch_others/test_pageserver_restart.py b/test_runner/batch_others/test_pageserver_restart.py
index 18b17a4efb..5b4943aa27 100644
--- a/test_runner/batch_others/test_pageserver_restart.py
+++ b/test_runner/batch_others/test_pageserver_restart.py
@@ -5,20 +5,24 @@ import time
 from contextlib import closing
 from multiprocessing import Process, Value
 from fixtures.zenith_fixtures import WalAcceptorFactory, ZenithPageserver, PostgresFactory
+from fixtures.log_helper import log
 
 pytest_plugins = ("fixtures.zenith_fixtures")
 
+
 # Check that dead minority doesn't prevent the commits: execute insert n_inserts
 # times, with fault_probability chance of getting a wal acceptor down or up
 # along the way. 2 of 3 are always alive, so the work keeps going.
-def test_pageserver_restart(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, wa_factory: WalAcceptorFactory):
+def test_pageserver_restart(zenith_cli,
+                            pageserver: ZenithPageserver,
+                            postgres: PostgresFactory,
+                            wa_factory: WalAcceptorFactory):
 
     # One safekeeper is enough for this test.
     wa_factory.start_n_new(1)
 
     zenith_cli.run(["branch", "test_pageserver_restart", "empty"])
-    pg = postgres.create_start('test_pageserver_restart',
-                               wal_acceptors=wa_factory.get_connstrs())
+    pg = postgres.create_start('test_pageserver_restart', wal_acceptors=wa_factory.get_connstrs())
 
     pg_conn = pg.connect()
     cur = pg_conn.cursor()
@@ -40,14 +44,14 @@ def test_pageserver_restart(zenith_cli, pageserver: ZenithPageserver, postgres:
         from pg_settings where name = 'shared_buffers'
     ''')
     row = cur.fetchone()
-    print("shared_buffers is {}, table size {}", row[0], row[1]);
+    log.info(f"shared_buffers is {row[0]}, table size {row[1]}")
     assert int(row[0]) < int(row[1])
 
     # Stop and restart pageserver. This is a more or less graceful shutdown, although
     # the page server doesn't currently have a shutdown routine so there's no difference
     # between stopping and crashing.
-    pageserver.stop();
-    pageserver.start();
+    pageserver.stop()
+    pageserver.start()
 
     # Stopping the pageserver breaks the connection from the postgres backend to
     # the page server, and causes the next query on the connection to fail. Start a new
@@ -61,6 +65,5 @@ def test_pageserver_restart(zenith_cli, pageserver: ZenithPageserver, postgres:
     assert cur.fetchone() == (100000, )
 
     # Stop the page server by force, and restart it
-    pageserver.stop();
-    pageserver.start();
-
+    pageserver.stop()
+    pageserver.start()
diff --git a/test_runner/batch_others/test_pgbench.py b/test_runner/batch_others/test_pgbench.py
index a5423cf3d7..46633daa34 100644
--- a/test_runner/batch_others/test_pgbench.py
+++ b/test_runner/batch_others/test_pgbench.py
@@ -1,4 +1,5 @@
 from fixtures.zenith_fixtures import PostgresFactory
+from fixtures.log_helper import log
 
 pytest_plugins = ("fixtures.zenith_fixtures")
 
@@ -8,7 +9,7 @@ def test_pgbench(postgres: PostgresFactory, pg_bin, zenith_cli):
     zenith_cli.run(["branch", "test_pgbench", "empty"])
 
     pg = postgres.create_start('test_pgbench')
-    print("postgres is running on 'test_pgbench' branch")
+    log.info("postgres is running on 'test_pgbench' branch")
 
     connstr = pg.connstr()
 
diff --git a/test_runner/batch_others/test_readonly_node.py b/test_runner/batch_others/test_readonly_node.py
new file mode 100644
index 0000000000..cc6c11caad
--- /dev/null
+++ b/test_runner/batch_others/test_readonly_node.py
@@ -0,0 +1,89 @@
+import subprocess
+from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
+
+pytest_plugins = ("fixtures.zenith_fixtures")
+
+
+#
+# Create read-only compute nodes, anchored at historical points in time.
+#
+# This is very similar to the 'test_branch_behind' test, but instead of
+# creating branches, creates read-only nodes.
+#
+def test_readonly_node(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin):
+    zenith_cli.run(["branch", "test_readonly_node", "empty"])
+
+    pgmain = postgres.create_start('test_readonly_node')
+    print("postgres is running on 'test_readonly_node' branch")
+
+    main_pg_conn = pgmain.connect()
+    main_cur = main_pg_conn.cursor()
+
+    # Create table, and insert the first 100 rows
+    main_cur.execute('CREATE TABLE foo (t text)')
+    main_cur.execute('''
+        INSERT INTO foo
+            SELECT 'long string to consume some space' || g
+            FROM generate_series(1, 100) g
+    ''')
+    main_cur.execute('SELECT pg_current_wal_insert_lsn()')
+    lsn_a = main_cur.fetchone()[0]
+    print('LSN after 100 rows: ' + lsn_a)
+
+    # Insert some more rows. (This generates enough WAL to fill a few segments.)
+    main_cur.execute('''
+        INSERT INTO foo
+            SELECT 'long string to consume some space' || g
+            FROM generate_series(1, 200000) g
+    ''')
+    main_cur.execute('SELECT pg_current_wal_insert_lsn()')
+    lsn_b = main_cur.fetchone()[0]
+    print('LSN after 200100 rows: ' + lsn_b)
+
+    # Insert many more rows. This generates enough WAL to fill a few segments.
+    main_cur.execute('''
+        INSERT INTO foo
+            SELECT 'long string to consume some space' || g
+            FROM generate_series(1, 200000) g
+    ''')
+
+    main_cur.execute('SELECT pg_current_wal_insert_lsn()')
+    lsn_c = main_cur.fetchone()[0]
+    print('LSN after 400100 rows: ' + lsn_c)
+
+    # Create first read-only node at the point where only 100 rows were inserted
+    pg_hundred = postgres.create_start("test_readonly_node_hundred",
+                                       branch=f'test_readonly_node@{lsn_a}')
+
+    # And another at the point where 200100 rows were inserted
+    pg_more = postgres.create_start("test_readonly_node_more", branch=f'test_readonly_node@{lsn_b}')
+
+    # On the 'hundred' node, we should see only 100 rows
+    hundred_pg_conn = pg_hundred.connect()
+    hundred_cur = hundred_pg_conn.cursor()
+    hundred_cur.execute('SELECT count(*) FROM foo')
+    assert hundred_cur.fetchone() == (100, )
+
+    # On the 'more' node, we should see 100200 rows
+    more_pg_conn = pg_more.connect()
+    more_cur = more_pg_conn.cursor()
+    more_cur.execute('SELECT count(*) FROM foo')
+    assert more_cur.fetchone() == (200100, )
+
+    # All the rows are visible on the main branch
+    main_cur.execute('SELECT count(*) FROM foo')
+    assert main_cur.fetchone() == (400100, )
+
+    # Check creating a node at segment boundary
+    pg = postgres.create_start("test_branch_segment_boundary",
+                               branch="test_readonly_node@0/3000000")
+    cur = pg.connect().cursor()
+    cur.execute('SELECT 1')
+    assert cur.fetchone() == (1, )
+
+    # Create node at pre-initdb lsn
+    try:
+        zenith_cli.run(["pg", "start", "test_branch_preinitdb", "test_readonly_node@0/42"])
+        assert false, "compute node startup with invalid LSN should have failed"
+    except Exception:
+        print("Node creation with pre-initdb LSN failed (as expected)")
diff --git a/test_runner/batch_others/test_restart_compute.py b/test_runner/batch_others/test_restart_compute.py
index 193b675e23..5d47d32aac 100644
--- a/test_runner/batch_others/test_restart_compute.py
+++ b/test_runner/batch_others/test_restart_compute.py
@@ -2,6 +2,7 @@ import pytest
 
 from contextlib import closing
 from fixtures.zenith_fixtures import ZenithPageserver, PostgresFactory
+from fixtures.log_helper import log
 
 pytest_plugins = ("fixtures.zenith_fixtures")
 
@@ -11,13 +12,13 @@ pytest_plugins = ("fixtures.zenith_fixtures")
 #
 @pytest.mark.parametrize('with_wal_acceptors', [False, True])
 def test_restart_compute(
-        zenith_cli,
-        pageserver: ZenithPageserver,
-        postgres: PostgresFactory,
-        pg_bin,
-        wa_factory,
-        with_wal_acceptors: bool,
-    ):
+    zenith_cli,
+    pageserver: ZenithPageserver,
+    postgres: PostgresFactory,
+    pg_bin,
+    wa_factory,
+    with_wal_acceptors: bool,
+):
     wal_acceptor_connstrs = None
     zenith_cli.run(["branch", "test_restart_compute", "empty"])
 
@@ -25,9 +26,8 @@ def test_restart_compute(
         wa_factory.start_n_new(3)
         wal_acceptor_connstrs = wa_factory.get_connstrs()
 
-    pg = postgres.create_start('test_restart_compute',
-                               wal_acceptors=wal_acceptor_connstrs)
-    print("postgres is running on 'test_restart_compute' branch")
+    pg = postgres.create_start('test_restart_compute', wal_acceptors=wal_acceptor_connstrs)
+    log.info("postgres is running on 'test_restart_compute' branch")
 
     with closing(pg.connect()) as conn:
         with conn.cursor() as cur:
@@ -36,12 +36,10 @@ def test_restart_compute(
             cur.execute('SELECT sum(key) FROM t')
             r = cur.fetchone()
             assert r == (5000050000, )
-            print("res = ", r)
+            log.info(f"res = {r}")
 
     # Remove data directory and restart
-    pg.stop_and_destroy().create_start('test_restart_compute',
-                                       wal_acceptors=wal_acceptor_connstrs)
-
+    pg.stop_and_destroy().create_start('test_restart_compute', wal_acceptors=wal_acceptor_connstrs)
 
     with closing(pg.connect()) as conn:
         with conn.cursor() as cur:
@@ -49,7 +47,7 @@ def test_restart_compute(
             cur.execute('SELECT sum(key) FROM t')
             r = cur.fetchone()
             assert r == (5000050000, )
-            print("res = ", r)
+            log.info(f"res = {r}")
 
             # Insert another row
             cur.execute("INSERT INTO t VALUES (100001, 'payload2')")
@@ -57,11 +55,10 @@ def test_restart_compute(
 
             r = cur.fetchone()
             assert r == (100001, )
-            print("res = ", r)
+            log.info(f"res = {r}")
 
     # Again remove data directory and restart
-    pg.stop_and_destroy().create_start('test_restart_compute',
-                                       wal_acceptors=wal_acceptor_connstrs)
+    pg.stop_and_destroy().create_start('test_restart_compute', wal_acceptors=wal_acceptor_connstrs)
 
     # That select causes lots of FPI's and increases probability of wakeepers
     # lagging behind after query completion
@@ -72,11 +69,10 @@ def test_restart_compute(
 
             r = cur.fetchone()
             assert r == (100001, )
-            print("res = ", r)
+            log.info(f"res = {r}")
 
     # And again remove data directory and restart
-    pg.stop_and_destroy().create_start('test_restart_compute',
-                                       wal_acceptors=wal_acceptor_connstrs)
+    pg.stop_and_destroy().create_start('test_restart_compute', wal_acceptors=wal_acceptor_connstrs)
 
     with closing(pg.connect()) as conn:
         with conn.cursor() as cur:
@@ -85,4 +81,4 @@ def test_restart_compute(
 
             r = cur.fetchone()
             assert r == (100001, )
-            print("res = ", r)
+            log.info(f"res = {r}")
diff --git a/test_runner/batch_others/test_snapfiles_gc.py b/test_runner/batch_others/test_snapfiles_gc.py
index e01bf7f179..a799b34aa6 100644
--- a/test_runner/batch_others/test_snapfiles_gc.py
+++ b/test_runner/batch_others/test_snapfiles_gc.py
@@ -1,13 +1,19 @@
 from contextlib import closing
 import psycopg2.extras
-import time;
+import time
+from fixtures.log_helper import log
 
 pytest_plugins = ("fixtures.zenith_fixtures")
 
+
 def print_gc_result(row):
-    print("GC duration {elapsed} ms".format_map(row));
-    print("  REL    total: {layer_relfiles_total}, needed_by_cutoff {layer_relfiles_needed_by_cutoff}, needed_by_branches: {layer_relfiles_needed_by_branches}, not_updated: {layer_relfiles_not_updated}, needed_as_tombstone {layer_relfiles_needed_as_tombstone}, removed: {layer_relfiles_removed}, dropped: {layer_relfiles_dropped}".format_map(row))
-    print("  NONREL total: {layer_nonrelfiles_total}, needed_by_cutoff {layer_nonrelfiles_needed_by_cutoff}, needed_by_branches: {layer_nonrelfiles_needed_by_branches}, not_updated: {layer_nonrelfiles_not_updated}, needed_as_tombstone {layer_nonrelfiles_needed_as_tombstone}, removed: {layer_nonrelfiles_removed}, dropped: {layer_nonrelfiles_dropped}".format_map(row))
+    log.info("GC duration {elapsed} ms".format_map(row))
+    log.info(
+        "  REL    total: {layer_relfiles_total}, needed_by_cutoff {layer_relfiles_needed_by_cutoff}, needed_by_branches: {layer_relfiles_needed_by_branches}, not_updated: {layer_relfiles_not_updated}, needed_as_tombstone {layer_relfiles_needed_as_tombstone}, removed: {layer_relfiles_removed}, dropped: {layer_relfiles_dropped}"
+        .format_map(row))
+    log.info(
+        "  NONREL total: {layer_nonrelfiles_total}, needed_by_cutoff {layer_nonrelfiles_needed_by_cutoff}, needed_by_branches: {layer_nonrelfiles_needed_by_branches}, not_updated: {layer_nonrelfiles_not_updated}, needed_as_tombstone {layer_nonrelfiles_needed_as_tombstone}, removed: {layer_nonrelfiles_removed}, dropped: {layer_nonrelfiles_dropped}"
+        .format_map(row))
 
 
 #
@@ -23,7 +29,7 @@ def test_layerfiles_gc(zenith_cli, pageserver, postgres, pg_bin):
     with closing(pg.connect()) as conn:
         with conn.cursor() as cur:
             with closing(pageserver.connect()) as psconn:
-                with psconn.cursor(cursor_factory = psycopg2.extras.DictCursor) as pscur:
+                with psconn.cursor(cursor_factory=psycopg2.extras.DictCursor) as pscur:
 
                     # Get the timeline ID of our branch. We need it for the 'do_gc' command
                     cur.execute("SHOW zenith.zenith_timeline")
@@ -33,9 +39,9 @@ def test_layerfiles_gc(zenith_cli, pageserver, postgres, pg_bin):
                     cur.execute("CREATE TABLE foo(x integer)")
                     cur.execute("INSERT INTO foo VALUES (1)")
 
-                    cur.execute("select relfilenode from pg_class where oid = 'foo'::regclass");
-                    row = cur.fetchone();
-                    print("relfilenode is {}", row[0]);
+                    cur.execute("select relfilenode from pg_class where oid = 'foo'::regclass")
+                    row = cur.fetchone()
+                    log.info(f"relfilenode is {row[0]}")
 
                     # Run GC, to clear out any garbage left behind in the catalogs by
                     # the CREATE TABLE command. We want to have a clean slate with no garbage
@@ -50,22 +56,23 @@ def test_layerfiles_gc(zenith_cli, pageserver, postgres, pg_bin):
                     # update to confuse our numbers either.
                     cur.execute("DELETE FROM foo")
 
-                    print("Running GC before test")
+                    log.info("Running GC before test")
                     pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
                     row = pscur.fetchone()
-                    print_gc_result(row);
+                    print_gc_result(row)
                     # remember the number of files
-                    layer_relfiles_remain = row['layer_relfiles_total'] - row['layer_relfiles_removed']
+                    layer_relfiles_remain = (row['layer_relfiles_total'] -
+                                             row['layer_relfiles_removed'])
                     assert layer_relfiles_remain > 0
 
                     # Insert a row and run GC. Checkpoint should freeze the layer
                     # so that there is only the most recent image layer left for the rel,
                     # removing the old image and delta layer.
-                    print("Inserting one row and running GC")
+                    log.info("Inserting one row and running GC")
                     cur.execute("INSERT INTO foo VALUES (1)")
                     pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
                     row = pscur.fetchone()
-                    print_gc_result(row);
+                    print_gc_result(row)
                     assert row['layer_relfiles_total'] == layer_relfiles_remain + 2
                     assert row['layer_relfiles_removed'] == 2
                     assert row['layer_relfiles_dropped'] == 0
@@ -73,34 +80,34 @@ def test_layerfiles_gc(zenith_cli, pageserver, postgres, pg_bin):
                     # Insert two more rows and run GC.
                     # This should create new image and delta layer file with the new contents, and
                     # then remove the old one image and the just-created delta layer.
-                    print("Inserting two more rows and running GC")
+                    log.info("Inserting two more rows and running GC")
                     cur.execute("INSERT INTO foo VALUES (2)")
                     cur.execute("INSERT INTO foo VALUES (3)")
 
                     pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
                     row = pscur.fetchone()
-                    print_gc_result(row);
+                    print_gc_result(row)
                     assert row['layer_relfiles_total'] == layer_relfiles_remain + 2
                     assert row['layer_relfiles_removed'] == 2
                     assert row['layer_relfiles_dropped'] == 0
 
                     # Do it again. Should again create two new layer files and remove old ones.
-                    print("Inserting two more rows and running GC")
+                    log.info("Inserting two more rows and running GC")
                     cur.execute("INSERT INTO foo VALUES (2)")
                     cur.execute("INSERT INTO foo VALUES (3)")
 
                     pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
                     row = pscur.fetchone()
-                    print_gc_result(row);
+                    print_gc_result(row)
                     assert row['layer_relfiles_total'] == layer_relfiles_remain + 2
                     assert row['layer_relfiles_removed'] == 2
                     assert row['layer_relfiles_dropped'] == 0
 
                     # Run GC again, with no changes in the database. Should not remove anything.
-                    print("Run GC again, with nothing to do")
+                    log.info("Run GC again, with nothing to do")
                     pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
                     row = pscur.fetchone()
-                    print_gc_result(row);
+                    print_gc_result(row)
                     assert row['layer_relfiles_total'] == layer_relfiles_remain
                     assert row['layer_relfiles_removed'] == 0
                     assert row['layer_relfiles_dropped'] == 0
@@ -108,12 +115,12 @@ def test_layerfiles_gc(zenith_cli, pageserver, postgres, pg_bin):
                     #
                     # Test DROP TABLE checks that relation data and metadata was deleted by GC from object storage
                     #
-                    print("Drop table and run GC again");
+                    log.info("Drop table and run GC again")
                     cur.execute("DROP TABLE foo")
 
                     pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
                     row = pscur.fetchone()
-                    print_gc_result(row);
+                    print_gc_result(row)
 
                     # We still cannot remove the latest layers
                     # because they serve as tombstones for earlier layers.
diff --git a/test_runner/batch_others/test_tenants.py b/test_runner/batch_others/test_tenants.py
index ee6bb0bfd3..d646f10666 100644
--- a/test_runner/batch_others/test_tenants.py
+++ b/test_runner/batch_others/test_tenants.py
@@ -21,18 +21,30 @@ def test_tenants_normal_work(
     tenant_1 = tenant_factory.create()
     tenant_2 = tenant_factory.create()
 
-    zenith_cli.run(["branch", f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}", "main", f"--tenantid={tenant_1}"])
-    zenith_cli.run(["branch", f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}", "main", f"--tenantid={tenant_2}"])
+    zenith_cli.run([
+        "branch",
+        f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}",
+        "main",
+        f"--tenantid={tenant_1}"
+    ])
+    zenith_cli.run([
+        "branch",
+        f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}",
+        "main",
+        f"--tenantid={tenant_2}"
+    ])
     if with_wal_acceptors:
         wa_factory.start_n_new(3)
 
     pg_tenant1 = postgres.create_start(
         f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}",
+        None,  # branch name, None means same as node name
         tenant_1,
         wal_acceptors=wa_factory.get_connstrs() if with_wal_acceptors else None,
     )
     pg_tenant2 = postgres.create_start(
         f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}",
+        None,  # branch name, None means same as node name
         tenant_2,
         wal_acceptors=wa_factory.get_connstrs() if with_wal_acceptors else None,
     )
@@ -45,4 +57,4 @@ def test_tenants_normal_work(
                 cur.execute("CREATE TABLE t(key int primary key, value text)")
                 cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
                 cur.execute("SELECT sum(key) FROM t")
-                assert cur.fetchone() == (5000050000,)
+                assert cur.fetchone() == (5000050000, )
diff --git a/test_runner/batch_others/test_timeline_size.py b/test_runner/batch_others/test_timeline_size.py
index 45b0c98d40..819edc26b4 100644
--- a/test_runner/batch_others/test_timeline_size.py
+++ b/test_runner/batch_others/test_timeline_size.py
@@ -2,11 +2,10 @@ from contextlib import closing
 from uuid import UUID
 import psycopg2.extras
 from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
+from fixtures.log_helper import log
 
 
-def test_timeline_size(
-    zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin
-):
+def test_timeline_size(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin):
     # Branch at the point where only 100 rows were inserted
     zenith_cli.run(["branch", "test_timeline_size", "empty"])
 
@@ -15,7 +14,7 @@ def test_timeline_size(
     assert res["current_logical_size"] == res["current_logical_size_non_incremental"]
 
     pgmain = postgres.create_start("test_timeline_size")
-    print("postgres is running on 'test_timeline_size' branch")
+    log.info("postgres is running on 'test_timeline_size' branch")
 
     with closing(pgmain.connect()) as conn:
         with conn.cursor() as cur:
@@ -23,13 +22,11 @@ def test_timeline_size(
 
             # Create table, and insert the first 100 rows
             cur.execute("CREATE TABLE foo (t text)")
-            cur.execute(
-                """
+            cur.execute("""
                 INSERT INTO foo
                     SELECT 'long string to consume some space' || g
                     FROM generate_series(1, 10) g
-            """
-            )
+            """)
 
             res = client.branch_detail(UUID(pageserver.initial_tenant), "test_timeline_size")
             assert res["current_logical_size"] == res["current_logical_size_non_incremental"]
diff --git a/test_runner/batch_others/test_twophase.py b/test_runner/batch_others/test_twophase.py
index d818f04da4..bc6ee076c1 100644
--- a/test_runner/batch_others/test_twophase.py
+++ b/test_runner/batch_others/test_twophase.py
@@ -1,7 +1,7 @@
 import os
 
 from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver, PgBin
-
+from fixtures.log_helper import log
 
 pytest_plugins = ("fixtures.zenith_fixtures")
 
@@ -9,11 +9,14 @@ pytest_plugins = ("fixtures.zenith_fixtures")
 #
 # Test branching, when a transaction is in prepared state
 #
-def test_twophase(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin: PgBin):
+def test_twophase(zenith_cli,
+                  pageserver: ZenithPageserver,
+                  postgres: PostgresFactory,
+                  pg_bin: PgBin):
     zenith_cli.run(["branch", "test_twophase", "empty"])
 
     pg = postgres.create_start('test_twophase', config_lines=['max_prepared_transactions=5'])
-    print("postgres is running on 'test_twophase' branch")
+    log.info("postgres is running on 'test_twophase' branch")
 
     conn = pg.connect()
     cur = conn.cursor()
@@ -45,7 +48,7 @@ def test_twophase(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFa
     cur.execute('CHECKPOINT')
 
     twophase_files = os.listdir(pg.pg_twophase_dir_path())
-    print(twophase_files)
+    log.info(twophase_files)
     assert len(twophase_files) == 4
 
     cur.execute("COMMIT PREPARED 'insert_three'")
@@ -53,7 +56,7 @@ def test_twophase(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFa
     cur.execute('CHECKPOINT')
 
     twophase_files = os.listdir(pg.pg_twophase_dir_path())
-    print(twophase_files)
+    log.info(twophase_files)
     assert len(twophase_files) == 2
 
     # Create a branch with the transaction in prepared state
@@ -67,7 +70,7 @@ def test_twophase(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFa
 
     # Check that we restored only needed twophase files
     twophase_files2 = os.listdir(pg2.pg_twophase_dir_path())
-    print(twophase_files2)
+    log.info(twophase_files2)
     assert twophase_files2.sort() == twophase_files.sort()
 
     conn2 = pg2.connect()
@@ -79,8 +82,8 @@ def test_twophase(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFa
     cur2.execute("ROLLBACK PREPARED 'insert_two'")
 
     cur2.execute('SELECT * FROM foo')
-    assert cur2.fetchall() == [('one',), ('three',)]
+    assert cur2.fetchall() == [('one', ), ('three', )]
 
     # Only one committed insert is visible on the original branch
     cur.execute('SELECT * FROM foo')
-    assert cur.fetchall() == [('three',)]
+    assert cur.fetchall() == [('three', )]
diff --git a/test_runner/batch_others/test_vm_bits.py b/test_runner/batch_others/test_vm_bits.py
index 92509fcbbb..6f19940f2f 100644
--- a/test_runner/batch_others/test_vm_bits.py
+++ b/test_runner/batch_others/test_vm_bits.py
@@ -1,17 +1,23 @@
 from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
+from fixtures.log_helper import log
 
 pytest_plugins = ("fixtures.zenith_fixtures")
 
+
 #
 # Test that the VM bit is cleared correctly at a HEAP_DELETE and
 # HEAP_UPDATE record.
 #
-def test_vm_bit_clear(pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin, zenith_cli, base_dir):
+def test_vm_bit_clear(pageserver: ZenithPageserver,
+                      postgres: PostgresFactory,
+                      pg_bin,
+                      zenith_cli,
+                      base_dir):
     # Create a branch for us
     zenith_cli.run(["branch", "test_vm_bit_clear", "empty"])
     pg = postgres.create_start('test_vm_bit_clear')
 
-    print("postgres is running on 'test_vm_bit_clear' branch")
+    log.info("postgres is running on 'test_vm_bit_clear' branch")
     pg_conn = pg.connect()
     cur = pg_conn.cursor()
 
@@ -48,13 +54,12 @@ def test_vm_bit_clear(pageserver: ZenithPageserver, postgres: PostgresFactory, p
     ''')
 
     cur.execute('SELECT * FROM vmtest_delete WHERE id = 1')
-    assert(cur.fetchall() == []);
+    assert (cur.fetchall() == [])
     cur.execute('SELECT * FROM vmtest_update WHERE id = 1')
-    assert(cur.fetchall() == []);
+    assert (cur.fetchall() == [])
 
     cur.close()
 
-
     # Check the same thing on the branch that we created right after the DELETE
     #
     # As of this writing, the code in smgrwrite() creates a full-page image whenever
@@ -63,7 +68,7 @@ def test_vm_bit_clear(pageserver: ZenithPageserver, postgres: PostgresFactory, p
     # server at the right point-in-time avoids that full-page image.
     pg_new = postgres.create_start('test_vm_bit_clear_new')
 
-    print("postgres is running on 'test_vm_bit_clear_new' branch")
+    log.info("postgres is running on 'test_vm_bit_clear_new' branch")
     pg_new_conn = pg_new.connect()
     cur_new = pg_new_conn.cursor()
 
@@ -74,6 +79,6 @@ def test_vm_bit_clear(pageserver: ZenithPageserver, postgres: PostgresFactory, p
     ''')
 
     cur_new.execute('SELECT * FROM vmtest_delete WHERE id = 1')
-    assert(cur_new.fetchall() == []);
+    assert (cur_new.fetchall() == [])
     cur_new.execute('SELECT * FROM vmtest_update WHERE id = 1')
-    assert(cur_new.fetchall() == []);
+    assert (cur_new.fetchall() == [])
diff --git a/test_runner/batch_others/test_wal_acceptor.py b/test_runner/batch_others/test_wal_acceptor.py
index b5577f28d0..3eaadc78a6 100644
--- a/test_runner/batch_others/test_wal_acceptor.py
+++ b/test_runner/batch_others/test_wal_acceptor.py
@@ -9,13 +9,17 @@ from contextlib import closing
 from multiprocessing import Process, Value
 from fixtures.zenith_fixtures import WalAcceptorFactory, ZenithPageserver, PostgresFactory, PgBin
 from fixtures.utils import lsn_to_hex, mkdir_if_needed
+from fixtures.log_helper import log
 
 pytest_plugins = ("fixtures.zenith_fixtures")
 
 
 # basic test, write something in setup with wal acceptors, ensure that commits
 # succeed and data is written
-def test_normal_work(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, wa_factory):
+def test_normal_work(zenith_cli,
+                     pageserver: ZenithPageserver,
+                     postgres: PostgresFactory,
+                     wa_factory):
     zenith_cli.run(["branch", "test_wal_acceptors_normal_work", "empty"])
     wa_factory.start_n_new(3)
     pg = postgres.create_start('test_wal_acceptors_normal_work',
@@ -33,7 +37,10 @@ def test_normal_work(zenith_cli, pageserver: ZenithPageserver, postgres: Postgre
 
 # Run page server and multiple acceptors, and multiple compute nodes running
 # against different timelines.
-def test_many_timelines(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, wa_factory):
+def test_many_timelines(zenith_cli,
+                        pageserver: ZenithPageserver,
+                        postgres: PostgresFactory,
+                        wa_factory):
     n_timelines = 2
 
     wa_factory.start_n_new(3)
@@ -65,7 +72,10 @@ def test_many_timelines(zenith_cli, pageserver: ZenithPageserver, postgres: Post
 # Check that dead minority doesn't prevent the commits: execute insert n_inserts
 # times, with fault_probability chance of getting a wal acceptor down or up
 # along the way. 2 of 3 are always alive, so the work keeps going.
-def test_restarts(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, wa_factory: WalAcceptorFactory):
+def test_restarts(zenith_cli,
+                  pageserver: ZenithPageserver,
+                  postgres: PostgresFactory,
+                  wa_factory: WalAcceptorFactory):
     fault_probability = 0.01
     n_inserts = 1000
     n_acceptors = 3
@@ -176,7 +186,11 @@ def stop_value():
 
 
 # do inserts while concurrently getting up/down subsets of acceptors
-def test_race_conditions(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, wa_factory, stop_value):
+def test_race_conditions(zenith_cli,
+                         pageserver: ZenithPageserver,
+                         postgres: PostgresFactory,
+                         wa_factory,
+                         stop_value):
 
     wa_factory.start_n_new(3)
 
@@ -203,6 +217,7 @@ def test_race_conditions(zenith_cli, pageserver: ZenithPageserver, postgres: Pos
     stop_value.value = 1
     proc.join()
 
+
 class ProposerPostgres:
     """Object for running safekeepers sync with walproposer"""
     def __init__(self, pgdata_dir: str, pg_bin: PgBin, timeline_id: str, tenant_id: str):
@@ -284,10 +299,37 @@ def test_sync_safekeepers(repo_dir: str, pg_bin: PgBin, wa_factory: WalAcceptorF
         )
         lsn_hex = lsn_to_hex(res["inserted_wal"]["end_lsn"])
         lsn_after_append.append(lsn_hex)
-        print(f"safekeeper[{i}] lsn after append: {lsn_hex}")
+        log.info(f"safekeeper[{i}] lsn after append: {lsn_hex}")
 
     # run sync safekeepers
     lsn_after_sync = pg.sync_safekeepers()
-    print(f"lsn after sync = {lsn_after_sync}")
+    log.info(f"lsn after sync = {lsn_after_sync}")
 
     assert all(lsn_after_sync == lsn for lsn in lsn_after_append)
+
+
+def test_timeline_status(zenith_cli, pageserver, postgres, wa_factory: WalAcceptorFactory):
+    wa_factory.start_n_new(1)
+
+    zenith_cli.run(["branch", "test_timeline_status", "empty"])
+    pg = postgres.create_start('test_timeline_status', wal_acceptors=wa_factory.get_connstrs())
+
+    wa = wa_factory.instances[0]
+    wa_http_cli = wa.http_client()
+    wa_http_cli.check_status()
+
+    # learn zenith timeline from compute
+    tenant_id = pg.safe_psql("show zenith.zenith_tenant")[0][0]
+    timeline_id = pg.safe_psql("show zenith.zenith_timeline")[0][0]
+
+    # fetch something sensible from status
+    epoch = wa_http_cli.timeline_status(tenant_id, timeline_id).acceptor_epoch
+
+    pg.safe_psql("create table t(i int)")
+
+    # ensure epoch goes up after reboot
+    pg.stop().start()
+    pg.safe_psql("insert into t values(10)")
+
+    epoch_after_reboot = wa_http_cli.timeline_status(tenant_id, timeline_id).acceptor_epoch
+    assert epoch_after_reboot > epoch
diff --git a/test_runner/batch_others/test_wal_acceptor_async.py b/test_runner/batch_others/test_wal_acceptor_async.py
index b1647a8544..a5d4191375 100644
--- a/test_runner/batch_others/test_wal_acceptor_async.py
+++ b/test_runner/batch_others/test_wal_acceptor_async.py
@@ -3,9 +3,10 @@ import asyncpg
 import random
 
 from fixtures.zenith_fixtures import WalAcceptor, WalAcceptorFactory, ZenithPageserver, PostgresFactory, Postgres
+from fixtures.log_helper import getLogger
 from typing import List
-from fixtures.utils import debug_print
 
+log = getLogger('root.wal_acceptor_async')
 pytest_plugins = ("fixtures.zenith_fixtures")
 
 
@@ -18,13 +19,16 @@ class BankClient(object):
     async def initdb(self):
         await self.conn.execute('DROP TABLE IF EXISTS bank_accs')
         await self.conn.execute('CREATE TABLE bank_accs(uid int primary key, amount int)')
-        await self.conn.execute('''
+        await self.conn.execute(
+            '''
             INSERT INTO bank_accs
             SELECT *, $1 FROM generate_series(0, $2)
-        ''', self.init_amount, self.n_accounts - 1)
+        ''',
+            self.init_amount,
+            self.n_accounts - 1)
         await self.conn.execute('DROP TABLE IF EXISTS bank_log')
         await self.conn.execute('CREATE TABLE bank_log(from_uid int, to_uid int, amount int)')
-        
+
         # TODO: Remove when https://github.com/zenithdb/zenith/issues/644 is fixed
         await self.conn.execute('ALTER TABLE bank_accs SET (autovacuum_enabled = false)')
         await self.conn.execute('ALTER TABLE bank_log SET (autovacuum_enabled = false)')
@@ -33,6 +37,7 @@ class BankClient(object):
         row = await self.conn.fetchrow('SELECT sum(amount) AS sum FROM bank_accs')
         assert row['sum'] == self.n_accounts * self.init_amount
 
+
 async def bank_transfer(conn: asyncpg.Connection, from_uid, to_uid, amount):
     # avoid deadlocks by sorting uids
     if from_uid > to_uid:
@@ -41,16 +46,22 @@ async def bank_transfer(conn: asyncpg.Connection, from_uid, to_uid, amount):
     async with conn.transaction():
         await conn.execute(
             'UPDATE bank_accs SET amount = amount + ($1) WHERE uid = $2',
-            amount, to_uid,
+            amount,
+            to_uid,
         )
         await conn.execute(
             'UPDATE bank_accs SET amount = amount - ($1) WHERE uid = $2',
-            amount, from_uid,
+            amount,
+            from_uid,
         )
-        await conn.execute('INSERT INTO bank_log VALUES ($1, $2, $3)',
-            from_uid, to_uid, amount,
+        await conn.execute(
+            'INSERT INTO bank_log VALUES ($1, $2, $3)',
+            from_uid,
+            to_uid,
+            amount,
         )
 
+
 class WorkerStats(object):
     def __init__(self, n_workers):
         self.counters = [0] * n_workers
@@ -63,18 +74,18 @@ class WorkerStats(object):
         self.counters[worker_id] += 1
 
     def check_progress(self):
-        debug_print("Workers progress: {}".format(self.counters))
+        log.debug("Workers progress: {}".format(self.counters))
 
         # every worker should finish at least one tx
         assert all(cnt > 0 for cnt in self.counters)
 
         progress = sum(self.counters)
-        print('All workers made {} transactions'.format(progress))
+        log.info('All workers made {} transactions'.format(progress))
 
 
 async def run_random_worker(stats: WorkerStats, pg: Postgres, worker_id, n_accounts, max_transfer):
     pg_conn = await pg.connect_async()
-    debug_print('Started worker {}'.format(worker_id))
+    log.debug('Started worker {}'.format(worker_id))
 
     while stats.running:
         from_uid = random.randint(0, n_accounts - 1)
@@ -84,9 +95,9 @@ async def run_random_worker(stats: WorkerStats, pg: Postgres, worker_id, n_accou
         await bank_transfer(pg_conn, from_uid, to_uid, amount)
         stats.inc_progress(worker_id)
 
-        debug_print('Executed transfer({}) {} => {}'.format(amount, from_uid, to_uid))
+        log.debug('Executed transfer({}) {} => {}'.format(amount, from_uid, to_uid))
 
-    debug_print('Finished worker {}'.format(worker_id))
+    log.debug('Finished worker {}'.format(worker_id))
 
     await pg_conn.close()
 
@@ -113,7 +124,6 @@ async def run_restarts_under_load(pg: Postgres, acceptors: List[WalAcceptor], n_
         worker = run_random_worker(stats, pg, worker_id, bank.n_accounts, max_transfer)
         workers.append(asyncio.create_task(worker))
 
-
     for it in range(iterations):
         victim = acceptors[it % len(acceptors)]
         victim.stop()
@@ -121,10 +131,7 @@ async def run_restarts_under_load(pg: Postgres, acceptors: List[WalAcceptor], n_
         # Wait till previous victim recovers so it is ready for the next
         # iteration by making any writing xact.
         conn = await pg.connect_async()
-        await conn.execute(
-            'UPDATE bank_accs SET amount = amount WHERE uid = 1',
-            timeout=120
-        )
+        await conn.execute('UPDATE bank_accs SET amount = amount WHERE uid = 1', timeout=120)
         await conn.close()
 
         stats.reset()
@@ -134,7 +141,7 @@ async def run_restarts_under_load(pg: Postgres, acceptors: List[WalAcceptor], n_
 
         victim.start()
 
-    print('Iterations are finished, exiting coroutines...')
+    log.info('Iterations are finished, exiting coroutines...')
     stats.running = False
     # await all workers
     await asyncio.gather(*workers)
@@ -144,7 +151,9 @@ async def run_restarts_under_load(pg: Postgres, acceptors: List[WalAcceptor], n_
 
 
 # restart acceptors one by one, while executing and validating bank transactions
-def test_restarts_under_load(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory,
+def test_restarts_under_load(zenith_cli,
+                             pageserver: ZenithPageserver,
+                             postgres: PostgresFactory,
                              wa_factory: WalAcceptorFactory):
 
     wa_factory.start_n_new(3)
diff --git a/test_runner/batch_others/test_zenith_cli.py b/test_runner/batch_others/test_zenith_cli.py
index be9e2b07fd..7379cf2981 100644
--- a/test_runner/batch_others/test_zenith_cli.py
+++ b/test_runner/batch_others/test_zenith_cli.py
@@ -23,8 +23,11 @@ def helper_compare_branch_list(page_server_cur, zenith_cli, initial_tenant: str)
 
     res = zenith_cli.run(["branch", f"--tenantid={initial_tenant}"])
     res.check_returncode()
-    branches_cli_with_tenant_arg = sorted(map(lambda b: b.split(':')[-1].strip(), res.stdout.strip().split("\n")))
-    branches_cli_with_tenant_arg = [b for b in branches_cli if b.startswith('test_cli_') or b in ('empty', 'main')]
+    branches_cli_with_tenant_arg = sorted(
+        map(lambda b: b.split(':')[-1].strip(), res.stdout.strip().split("\n")))
+    branches_cli_with_tenant_arg = [
+        b for b in branches_cli if b.startswith('test_cli_') or b in ('empty', 'main')
+    ]
 
     assert branches_api == branches_cli == branches_cli_with_tenant_arg
 
@@ -54,6 +57,7 @@ def test_cli_branch_list(pageserver: ZenithPageserver, zenith_cli):
     assert 'test_cli_branch_list_main' in branches_cli
     assert 'test_cli_branch_list_nested' in branches_cli
 
+
 def helper_compare_tenant_list(page_server_cur, zenith_cli: ZenithCli):
     page_server_cur.execute(f'tenant_list')
     tenants_api = sorted(json.loads(page_server_cur.fetchone()[0]))
diff --git a/test_runner/batch_pg_regress/test_isolation.py b/test_runner/batch_pg_regress/test_isolation.py
index ae654401cc..0f215337be 100644
--- a/test_runner/batch_pg_regress/test_isolation.py
+++ b/test_runner/batch_pg_regress/test_isolation.py
@@ -6,8 +6,14 @@ from fixtures.zenith_fixtures import ZenithPageserver, PostgresFactory
 pytest_plugins = ("fixtures.zenith_fixtures")
 
 
-def test_isolation(pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin, zenith_cli, test_output_dir, pg_distrib_dir,
-                   base_dir, capsys):
+def test_isolation(pageserver: ZenithPageserver,
+                   postgres: PostgresFactory,
+                   pg_bin,
+                   zenith_cli,
+                   test_output_dir,
+                   pg_distrib_dir,
+                   base_dir,
+                   capsys):
 
     # Create a branch for us
     zenith_cli.run(["branch", "test_isolation", "empty"])
diff --git a/test_runner/batch_pg_regress/test_pg_regress.py b/test_runner/batch_pg_regress/test_pg_regress.py
index 6f61b77ebc..2fd7fee314 100644
--- a/test_runner/batch_pg_regress/test_pg_regress.py
+++ b/test_runner/batch_pg_regress/test_pg_regress.py
@@ -6,8 +6,14 @@ from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver, check_re
 pytest_plugins = ("fixtures.zenith_fixtures")
 
 
-def test_pg_regress(pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin, zenith_cli, test_output_dir, pg_distrib_dir,
-                    base_dir, capsys):
+def test_pg_regress(pageserver: ZenithPageserver,
+                    postgres: PostgresFactory,
+                    pg_bin,
+                    zenith_cli,
+                    test_output_dir,
+                    pg_distrib_dir,
+                    base_dir,
+                    capsys):
 
     # Create a branch for us
     zenith_cli.run(["branch", "test_pg_regress", "empty"])
diff --git a/test_runner/batch_pg_regress/test_zenith_regress.py b/test_runner/batch_pg_regress/test_zenith_regress.py
index 09f5f83933..ca1422388e 100644
--- a/test_runner/batch_pg_regress/test_zenith_regress.py
+++ b/test_runner/batch_pg_regress/test_zenith_regress.py
@@ -2,12 +2,19 @@ import os
 
 from fixtures.utils import mkdir_if_needed
 from fixtures.zenith_fixtures import PageserverPort, PostgresFactory, check_restored_datadir_content
+from fixtures.log_helper import log
 
 pytest_plugins = ("fixtures.zenith_fixtures")
 
 
-def test_zenith_regress(postgres: PostgresFactory, pg_bin, zenith_cli, test_output_dir, pg_distrib_dir,
-                        base_dir, capsys, pageserver_port: PageserverPort):
+def test_zenith_regress(postgres: PostgresFactory,
+                        pg_bin,
+                        zenith_cli,
+                        test_output_dir,
+                        pg_distrib_dir,
+                        base_dir,
+                        capsys,
+                        pageserver_port: PageserverPort):
 
     # Create a branch for us
     zenith_cli.run(["branch", "test_zenith_regress", "empty"])
@@ -38,7 +45,7 @@ def test_zenith_regress(postgres: PostgresFactory, pg_bin, zenith_cli, test_outp
         '--inputdir={}'.format(src_path),
     ]
 
-    print(pg_regress_command)
+    log.info(pg_regress_command)
     env = {
         'PGPORT': str(pg.port),
         'PGUSER': pg.username,
diff --git a/test_runner/fixtures/benchmark_fixture.py b/test_runner/fixtures/benchmark_fixture.py
index 328ebcc1f8..f41d66674d 100644
--- a/test_runner/fixtures/benchmark_fixture.py
+++ b/test_runner/fixtures/benchmark_fixture.py
@@ -1,5 +1,3 @@
-from pprint import pprint
-
 import os
 import re
 import timeit
@@ -26,7 +24,6 @@ from typing import Any, Callable, Dict, Iterator, List, Optional, TypeVar, cast
 from typing_extensions import Literal
 
 from .utils import (get_self_dir, mkdir_if_needed, subprocess_capture)
-
 """
 This file contains fixtures for micro-benchmarks.
 
@@ -57,7 +54,6 @@ in the test initialization, or measure disk usage after the test query.
 
 """
 
-
 # All the results are collected in this list, as a tuple:
 # (test_name: str, metric_name: str, metric_value: float, unit: str)
 #
@@ -67,6 +63,7 @@ in the test initialization, or measure disk usage after the test query.
 global zenbenchmark_results
 zenbenchmark_results = []
 
+
 class ZenithBenchmarkResults:
     """ An object for recording benchmark results. """
     def __init__(self):
@@ -79,6 +76,7 @@ class ZenithBenchmarkResults:
 
         self.results.append((test_name, metric_name, metric_value, unit))
 
+
 # Session scope fixture that initializes the results object
 @pytest.fixture(autouse=True, scope='session')
 def zenbenchmark_global(request) -> Iterator[ZenithBenchmarkResults]:
@@ -90,6 +88,7 @@ def zenbenchmark_global(request) -> Iterator[ZenithBenchmarkResults]:
 
     yield zenbenchmark_results
 
+
 class ZenithBenchmarker:
     """
     An object for recording benchmark results. This is created for each test
@@ -105,7 +104,6 @@ class ZenithBenchmarker:
         """
         self.results.record(self.request.node.name, metric_name, metric_value, unit)
 
-
     @contextmanager
     def record_duration(self, metric_name):
         """
@@ -136,7 +134,8 @@ class ZenithBenchmarker:
         # The metric should be an integer, as it's a number of bytes. But in general
         # all prometheus metrics are floats. So to be pedantic, read it as a float
         # and round to integer.
-        matches = re.search(r'^pageserver_disk_io_bytes{io_operation="write"} (\S+)$', all_metrics,
+        matches = re.search(r'^pageserver_disk_io_bytes{io_operation="write"} (\S+)$',
+                            all_metrics,
                             re.MULTILINE)
         return int(round(float(matches.group(1))))
 
@@ -147,8 +146,7 @@ class ZenithBenchmarker:
         # Fetch all the exposed prometheus metrics from page server
         all_metrics = pageserver.http_client().get_metrics()
         # See comment in get_io_writes()
-        matches = re.search(r'^pageserver_maxrss_kb (\S+)$', all_metrics,
-                            re.MULTILINE)
+        matches = re.search(r'^pageserver_maxrss_kb (\S+)$', all_metrics, re.MULTILINE)
         return int(round(float(matches.group(1))))
 
     def get_timeline_size(self, repo_dir: str, tenantid: str, timelineid: str):
@@ -173,7 +171,11 @@ class ZenithBenchmarker:
         yield
         after = self.get_io_writes(pageserver)
 
-        self.results.record(self.request.node.name, metric_name, round((after - before) / (1024 * 1024)), 'MB')
+        self.results.record(self.request.node.name,
+                            metric_name,
+                            round((after - before) / (1024 * 1024)),
+                            'MB')
+
 
 @pytest.fixture(scope='function')
 def zenbenchmark(zenbenchmark_global, request) -> Iterator[ZenithBenchmarker]:
@@ -187,9 +189,7 @@ def zenbenchmark(zenbenchmark_global, request) -> Iterator[ZenithBenchmarker]:
 
 # Hook to print the results at the end
 @pytest.hookimpl(hookwrapper=True)
-def pytest_terminal_summary(
-    terminalreporter: TerminalReporter, exitstatus: int, config: Config
-):
+def pytest_terminal_summary(terminalreporter: TerminalReporter, exitstatus: int, config: Config):
     yield
 
     global zenbenchmark_results
diff --git a/test_runner/fixtures/log_helper.py b/test_runner/fixtures/log_helper.py
new file mode 100644
index 0000000000..cab7462a51
--- /dev/null
+++ b/test_runner/fixtures/log_helper.py
@@ -0,0 +1,45 @@
+import logging
+import logging.config
+"""
+This file configures logging to use in python tests.
+Logs are automatically captured and shown in their
+own section after all tests are executed. 
+
+To see logs for all (even successful) tests, run
+pytest with the following command:
+- `pipenv run pytest -n8 -rA`
+
+Other log config can be set in pytest.ini file.
+You can add `log_cli = true` to it to watch
+logs in real time.
+
+To get more info about logging with pytest, see
+https://docs.pytest.org/en/6.2.x/logging.html
+"""
+
+# this config is only used for default log levels,
+# log format is specified in pytest.ini file
+LOGGING = {
+    "version": 1,
+    "loggers": {
+        "root": {
+            "level": "INFO"
+        },
+        "root.wal_acceptor_async": {
+            "level": "INFO"  # a lot of logs on DEBUG level
+        }
+    }
+}
+
+
+def getLogger(name='root') -> logging.Logger:
+    """Method to get logger for tests.
+    
+    Should be used to get correctly initialized logger. """
+    return logging.getLogger(name)
+
+
+# default logger for tests
+log = getLogger()
+
+logging.config.dictConfig(LOGGING)
diff --git a/test_runner/fixtures/utils.py b/test_runner/fixtures/utils.py
index 92bd25ed24..dbb1809a2b 100644
--- a/test_runner/fixtures/utils.py
+++ b/test_runner/fixtures/utils.py
@@ -2,6 +2,7 @@ import os
 import subprocess
 
 from typing import Any, List
+from fixtures.log_helper import log
 
 
 def get_self_dir() -> str:
@@ -39,7 +40,7 @@ def subprocess_capture(capture_dir: str, cmd: List[str], **kwargs: Any) -> str:
 
     with open(stdout_filename, 'w') as stdout_f:
         with open(stderr_filename, 'w') as stderr_f:
-            print('(capturing output to "{}.stdout")'.format(base))
+            log.info('(capturing output to "{}.stdout")'.format(base))
             subprocess.run(cmd, **kwargs, stdout=stdout_f, stderr=stderr_f)
 
     return basepath
@@ -58,13 +59,6 @@ def global_counter() -> int:
     _global_counter += 1
     return _global_counter
 
-def debug_print(*args, **kwargs) -> None:
-    """ Print to the console if TEST_DEBUG_PRINT is set in env.
-    
-    All parameters are passed to print().
-    """
-    if os.environ.get('TEST_DEBUG_PRINT') is not None:
-        print(*args, **kwargs)
 
 def lsn_to_hex(num: int) -> str:
     """ Convert lsn from int to standard hex notation. """
diff --git a/test_runner/fixtures/zenith_fixtures.py b/test_runner/fixtures/zenith_fixtures.py
index d29d278cdd..868f14ab29 100644
--- a/test_runner/fixtures/zenith_fixtures.py
+++ b/test_runner/fixtures/zenith_fixtures.py
@@ -1,5 +1,5 @@
 from dataclasses import dataclass
-from functools import cached_property
+from cached_property import cached_property
 import asyncpg
 import os
 import pathlib
@@ -13,9 +13,8 @@ import signal
 import subprocess
 import time
 import filecmp
-import difflib
 
-from contextlib import closing
+from contextlib import closing, suppress
 from pathlib import Path
 from dataclasses import dataclass
 
@@ -27,6 +26,7 @@ from typing_extensions import Literal
 import requests
 
 from .utils import (get_self_dir, mkdir_if_needed, subprocess_capture)
+from fixtures.log_helper import log
 """
 This file contains pytest fixtures. A fixture is a test resource that can be
 summoned by placing its name in the test's arguments.
@@ -54,17 +54,18 @@ DEFAULT_POSTGRES_DIR = 'tmp_install'
 BASE_PORT = 15000
 WORKER_PORT_NUM = 100
 
+
 def pytest_configure(config):
     """
     Ensure that no unwanted daemons are running before we start testing.
     Check that we do not owerflow available ports range.
     """
     numprocesses = config.getoption('numprocesses')
-    if numprocesses is not None and BASE_PORT + numprocesses * WORKER_PORT_NUM > 32768: # do not use ephemeral ports
-         raise Exception('Too many workers configured. Cannot distrubute ports for services.')
+    if numprocesses is not None and BASE_PORT + numprocesses * WORKER_PORT_NUM > 32768:  # do not use ephemeral ports
+        raise Exception('Too many workers configured. Cannot distrubute ports for services.')
 
     # does not use -c as it is not supported on macOS
-    cmd = ['pgrep', 'pageserver|postgres|wal_acceptor']
+    cmd = ['pgrep', 'pageserver|postgres|safekeeper']
     result = subprocess.run(cmd, stdout=subprocess.DEVNULL)
     if result.returncode == 0:
         # returncode of 0 means it found something.
@@ -72,7 +73,7 @@ def pytest_configure(config):
         # result of the test.
         # NOTE this shows as an internal pytest error, there might be a better way
         raise Exception(
-            'Found interfering processes running. Stop all Zenith pageservers, nodes, WALs, as well as stand-alone Postgres.'
+            'Found interfering processes running. Stop all Zenith pageservers, nodes, safekeepers, as well as stand-alone Postgres.'
         )
 
 
@@ -105,7 +106,11 @@ class PgProtocol:
         self.port = port
         self.username = username or "zenith_admin"
 
-    def connstr(self, *, dbname: str = 'postgres', username: Optional[str] = None, password: Optional[str] = None) -> str:
+    def connstr(self,
+                *,
+                dbname: str = 'postgres',
+                username: Optional[str] = None,
+                password: Optional[str] = None) -> str:
         """
         Build a libpq connection string for the Postgres instance.
         """
@@ -117,7 +122,12 @@ class PgProtocol:
         return f'{res} password={password}'
 
     # autocommit=True here by default because that's what we need most of the time
-    def connect(self, *, autocommit=True, dbname: str = 'postgres', username: Optional[str] = None, password: Optional[str] = None) -> PgConnection:
+    def connect(self,
+                *,
+                autocommit=True,
+                dbname: str = 'postgres',
+                username: Optional[str] = None,
+                password: Optional[str] = None) -> PgConnection:
         """
         Connect to the node.
         Returns psycopg2's connection object.
@@ -133,7 +143,11 @@ class PgProtocol:
         conn.autocommit = autocommit
         return conn
 
-    async def connect_async(self, *, dbname: str = 'postgres', username: Optional[str] = None, password: Optional[str] = None) -> asyncpg.Connection:
+    async def connect_async(self,
+                            *,
+                            dbname: str = 'postgres',
+                            username: Optional[str] = None,
+                            password: Optional[str] = None) -> asyncpg.Connection:
         """
         Connect to the node from async python.
         Returns asyncpg's connection object.
@@ -188,22 +202,22 @@ class ZenithCli:
 
         >>> result = zenith_cli.run(...)
         >>> assert result.stderr == ""
-        >>> print(result.stdout)
+        >>> log.info(result.stdout)
         """
 
         assert type(arguments) == list
 
         args = [self.bin_zenith] + arguments
-        print('Running command "{}"'.format(' '.join(args)))
+        log.info('Running command "{}"'.format(' '.join(args)))
 
         # Interceipt CalledProcessError and print more info
         try:
             res = subprocess.run(args,
-                                env=self.env,
-                                check=True,
-                                universal_newlines=True,
-                                stdout=subprocess.PIPE,
-                                stderr=subprocess.PIPE)
+                                 env=self.env,
+                                 check=True,
+                                 universal_newlines=True,
+                                 stdout=subprocess.PIPE,
+                                 stderr=subprocess.PIPE)
         except subprocess.CalledProcessError as exc:
             # this way command output will be in recorded and shown in CI in failure message
             msg = f"""\
@@ -211,7 +225,7 @@ class ZenithCli:
               stdout: {exc.stdout}
               stderr: {exc.stderr}
             """
-            print(msg)
+            log.info(msg)
 
             raise Exception(msg) from exc
 
@@ -241,21 +255,17 @@ class ZenithPageserverHttpClient(requests.Session):
         return res.json()
 
     def branch_create(self, tenant_id: uuid.UUID, name: str, start_point: str) -> Dict:
-        res = self.post(
-            f"http://localhost:{self.port}/v1/branch",
-            json={
-                'tenant_id': tenant_id.hex,
-                'name': name,
-                'start_point': start_point,
-            }
-        )
+        res = self.post(f"http://localhost:{self.port}/v1/branch",
+                        json={
+                            'tenant_id': tenant_id.hex,
+                            'name': name,
+                            'start_point': start_point,
+                        })
         res.raise_for_status()
         return res.json()
 
     def branch_detail(self, tenant_id: uuid.UUID, name: str) -> Dict:
-        res = self.get(
-            f"http://localhost:{self.port}/v1/branch/{tenant_id.hex}/{name}",
-        )
+        res = self.get(f"http://localhost:{self.port}/v1/branch/{tenant_id.hex}/{name}", )
         res.raise_for_status()
         return res.json()
 
@@ -297,7 +307,11 @@ class AuthKeys:
         return token
 
     def generate_tenant_token(self, tenant_id):
-        token = jwt.encode({"scope": "tenant", "tenant_id": tenant_id}, self.priv, algorithm="RS256")
+        token = jwt.encode({
+            "scope": "tenant", "tenant_id": tenant_id
+        },
+                           self.priv,
+                           algorithm="RS256")
 
         if isinstance(token, bytes):
             token = token.decode()
@@ -322,6 +336,7 @@ def worker_base_port(worker_seq_no: int):
     # so workers have disjoint set of ports for services
     return BASE_PORT + worker_seq_no * WORKER_PORT_NUM
 
+
 class PortDistributor:
     def __init__(self, base_port: int, port_number: int) -> None:
         self.iterator = iter(range(base_port, base_port + port_number))
@@ -330,13 +345,15 @@ class PortDistributor:
         try:
             return next(self.iterator)
         except StopIteration:
-            raise RuntimeError('port range configured for test is exhausted, consider enlarging the range')
+            raise RuntimeError(
+                'port range configured for test is exhausted, consider enlarging the range')
 
 
 @zenfixture
 def port_distributor(worker_base_port):
     return PortDistributor(base_port=worker_base_port, port_number=WORKER_PORT_NUM)
 
+
 @dataclass
 class PageserverPort:
     pg: int
@@ -351,14 +368,18 @@ class ZenithPageserver(PgProtocol):
         self.running = False
         self.initial_tenant = None
         self.repo_dir = repo_dir
-        self.service_port = port # do not shadow PgProtocol.port which is just int
+        self.service_port = port  # do not shadow PgProtocol.port which is just int
 
     def init(self, enable_auth: bool = False) -> 'ZenithPageserver':
         """
         Initialize the repository, i.e. run "zenith init".
         Returns self.
         """
-        cmd = ['init', f'--pageserver-pg-port={self.service_port.pg}', f'--pageserver-http-port={self.service_port.http}']
+        cmd = [
+            'init',
+            f'--pageserver-pg-port={self.service_port.pg}',
+            f'--pageserver-http-port={self.service_port.http}'
+        ]
         if enable_auth:
             cmd.append('--enable-auth')
         self.zenith_cli.run(cmd)
@@ -375,6 +396,7 @@ class ZenithPageserver(PgProtocol):
         Start the page server.
         Returns self.
         """
+        assert self.running == False
 
         self.zenith_cli.run(['start'])
         self.running = True
@@ -382,14 +404,18 @@ class ZenithPageserver(PgProtocol):
         self.initial_tenant = self.zenith_cli.run(['tenant', 'list']).stdout.strip()
         return self
 
-    def stop(self) -> 'ZenithPageserver':
+    def stop(self, immediate=False) -> 'ZenithPageserver':
         """
         Stop the page server.
         Returns self.
         """
+        cmd = ['stop']
+        if immediate:
+            cmd.append('immediate')
 
+        log.info(f"Stopping pageserver with {cmd}")
         if self.running:
-            self.zenith_cli.run(['stop'])
+            self.zenith_cli.run(cmd)
             self.running = False
 
         return self
@@ -398,7 +424,7 @@ class ZenithPageserver(PgProtocol):
         return self
 
     def __exit__(self, exc_type, exc, tb):
-        self.stop()
+        self.stop(True)
 
     @cached_property
     def auth_keys(self) -> AuthKeys:
@@ -413,18 +439,17 @@ class ZenithPageserver(PgProtocol):
         )
 
 
-
-
 @zenfixture
 def pageserver_port(port_distributor: PortDistributor) -> PageserverPort:
     pg = port_distributor.get_port()
     http = port_distributor.get_port()
-    print(f"pageserver_port: pg={pg} http={http}")
+    log.info(f"pageserver_port: pg={pg} http={http}")
     return PageserverPort(pg=pg, http=http)
 
 
 @zenfixture
-def pageserver(zenith_cli: ZenithCli, repo_dir: str, pageserver_port: PageserverPort) -> Iterator[ZenithPageserver]:
+def pageserver(zenith_cli: ZenithCli, repo_dir: str,
+               pageserver_port: PageserverPort) -> Iterator[ZenithPageserver]:
     """
     The 'pageserver' fixture provides a Page Server that's up and running.
 
@@ -436,15 +461,17 @@ def pageserver(zenith_cli: ZenithCli, repo_dir: str, pageserver_port: Pageserver
     By convention, the test branches are named after the tests. For example,
     test called 'test_foo' would create and use branches with the 'test_foo' prefix.
     """
-    ps = ZenithPageserver(zenith_cli=zenith_cli, repo_dir=repo_dir, port=pageserver_port).init().start()
+    ps = ZenithPageserver(zenith_cli=zenith_cli, repo_dir=repo_dir,
+                          port=pageserver_port).init().start()
     # For convenience in tests, create a branch from the freshly-initialized cluster.
     zenith_cli.run(["branch", "empty", "main"])
 
     yield ps
 
     # After the yield comes any cleanup code we need.
-    print('Starting pageserver cleanup')
-    ps.stop()
+    log.info('Starting pageserver cleanup')
+    ps.stop(True)
+
 
 class PgBin:
     """ A helper class for executing postgres binaries """
@@ -481,7 +508,7 @@ class PgBin:
         """
 
         self._fixpath(command)
-        print('Running command "{}"'.format(' '.join(command)))
+        log.info('Running command "{}"'.format(' '.join(command)))
         env = self._build_env(env)
         subprocess.run(command, env=env, cwd=cwd, check=True)
 
@@ -498,7 +525,7 @@ class PgBin:
         """
 
         self._fixpath(command)
-        print('Running command "{}"'.format(' '.join(command)))
+        log.info('Running command "{}"'.format(' '.join(command)))
         env = self._build_env(env)
         return subprocess_capture(self.log_dir, command, env=env, cwd=cwd, check=True, **kwargs)
 
@@ -507,9 +534,11 @@ class PgBin:
 def pg_bin(test_output_dir: str, pg_distrib_dir: str) -> PgBin:
     return PgBin(test_output_dir, pg_distrib_dir)
 
+
 @pytest.fixture
 def pageserver_auth_enabled(zenith_cli: ZenithCli, repo_dir: str, pageserver_port: PageserverPort):
-    with ZenithPageserver(zenith_cli=zenith_cli, repo_dir=repo_dir, port=pageserver_port).init(enable_auth=True).start() as ps:
+    with ZenithPageserver(zenith_cli=zenith_cli, repo_dir=repo_dir,
+                          port=pageserver_port).init(enable_auth=True).start() as ps:
         # For convenience in tests, create a branch from the freshly-initialized cluster.
         zenith_cli.run(["branch", "empty", "main"])
         yield ps
@@ -517,21 +546,27 @@ def pageserver_auth_enabled(zenith_cli: ZenithCli, repo_dir: str, pageserver_por
 
 class Postgres(PgProtocol):
     """ An object representing a running postgres daemon. """
-    def __init__(self, zenith_cli: ZenithCli, repo_dir: str, pg_bin: PgBin, tenant_id: str, port: int):
+    def __init__(self,
+                 zenith_cli: ZenithCli,
+                 repo_dir: str,
+                 pg_bin: PgBin,
+                 tenant_id: str,
+                 port: int):
         super().__init__(host='localhost', port=port)
 
         self.zenith_cli = zenith_cli
         self.running = False
         self.repo_dir = repo_dir
-        self.branch: Optional[str] = None  # dubious, see asserts below
-        self.pgdata_dir: Optional[str] = None # Path to computenode PGDATA
+        self.node_name: Optional[str] = None  # dubious, see asserts below
+        self.pgdata_dir: Optional[str] = None  # Path to computenode PGDATA
         self.tenant_id = tenant_id
         self.pg_bin = pg_bin
-        # path to conf is <repo_dir>/pgdatadirs/tenants/<tenant_id>/<branch_name>/postgresql.conf
+        # path to conf is <repo_dir>/pgdatadirs/tenants/<tenant_id>/<node_name>/postgresql.conf
 
     def create(
         self,
-        branch: str,
+        node_name: str,
+        branch: Optional[str] = None,
         wal_acceptors: Optional[str] = None,
         config_lines: Optional[List[str]] = None,
     ) -> 'Postgres':
@@ -545,9 +580,19 @@ class Postgres(PgProtocol):
         if not config_lines:
             config_lines = []
 
-        self.zenith_cli.run(['pg', 'create', branch, f'--tenantid={self.tenant_id}', f'--port={self.port}'])
-        self.branch = branch
-        path = pathlib.Path('pgdatadirs') / 'tenants' / self.tenant_id / self.branch
+        if branch is None:
+            branch = node_name
+
+        self.zenith_cli.run([
+            'pg',
+            'create',
+            f'--tenantid={self.tenant_id}',
+            f'--port={self.port}',
+            node_name,
+            branch
+        ])
+        self.node_name = node_name
+        path = pathlib.Path('pgdatadirs') / 'tenants' / self.tenant_id / self.node_name
         self.pgdata_dir = os.path.join(self.repo_dir, path)
 
         if wal_acceptors is not None:
@@ -564,20 +609,21 @@ class Postgres(PgProtocol):
         Returns self.
         """
 
-        assert self.branch is not None
+        assert self.node_name is not None
 
-        print(f"Starting postgres on branch {self.branch}")
+        log.info(f"Starting postgres node {self.node_name}")
 
-        run_result = self.zenith_cli.run(['pg', 'start', self.branch, f'--tenantid={self.tenant_id}', f'--port={self.port}'])
+        run_result = self.zenith_cli.run(
+            ['pg', 'start', f'--tenantid={self.tenant_id}', f'--port={self.port}', self.node_name])
         self.running = True
 
-        print(f"stdout: {run_result.stdout}")
+        log.info(f"stdout: {run_result.stdout}")
 
         return self
 
     def pg_data_dir_path(self) -> str:
         """ Path to data directory """
-        path = pathlib.Path('pgdatadirs') / 'tenants' / self.tenant_id / self.branch
+        path = pathlib.Path('pgdatadirs') / 'tenants' / self.tenant_id / self.node_name
         return os.path.join(self.repo_dir, path)
 
     def pg_xact_dir_path(self) -> str:
@@ -634,8 +680,8 @@ class Postgres(PgProtocol):
         """
 
         if self.running:
-            assert self.branch is not None
-            self.zenith_cli.run(['pg', 'stop', self.branch, f'--tenantid={self.tenant_id}'])
+            assert self.node_name is not None
+            self.zenith_cli.run(['pg', 'stop', self.node_name, f'--tenantid={self.tenant_id}'])
             self.running = False
 
         return self
@@ -646,15 +692,17 @@ class Postgres(PgProtocol):
         Returns self.
         """
 
-        assert self.branch is not None
+        assert self.node_name is not None
         assert self.tenant_id is not None
-        self.zenith_cli.run(['pg', 'stop', '--destroy', self.branch, f'--tenantid={self.tenant_id}'])
+        self.zenith_cli.run(
+            ['pg', 'stop', '--destroy', self.node_name, f'--tenantid={self.tenant_id}'])
 
         return self
 
     def create_start(
         self,
-        branch: str,
+        node_name: str,
+        branch: Optional[str] = None,
         wal_acceptors: Optional[str] = None,
         config_lines: Optional[List[str]] = None,
     ) -> 'Postgres':
@@ -665,6 +713,7 @@ class Postgres(PgProtocol):
         """
 
         self.create(
+            node_name=node_name,
             branch=branch,
             wal_acceptors=wal_acceptors,
             config_lines=config_lines,
@@ -678,9 +727,15 @@ class Postgres(PgProtocol):
     def __exit__(self, exc_type, exc, tb):
         self.stop()
 
+
 class PostgresFactory:
     """ An object representing multiple running postgres daemons. """
-    def __init__(self, zenith_cli: ZenithCli, repo_dir: str, pg_bin: PgBin, initial_tenant: str, port_distributor: PortDistributor):
+    def __init__(self,
+                 zenith_cli: ZenithCli,
+                 repo_dir: str,
+                 pg_bin: PgBin,
+                 initial_tenant: str,
+                 port_distributor: PortDistributor):
         self.zenith_cli = zenith_cli
         self.repo_dir = repo_dir
         self.num_instances = 0
@@ -689,13 +744,13 @@ class PostgresFactory:
         self.port_distributor = port_distributor
         self.pg_bin = pg_bin
 
-    def create_start(
-        self,
-        branch: str = "main",
-        tenant_id: Optional[str] = None,
-        wal_acceptors: Optional[str] = None,
-        config_lines: Optional[List[str]] = None
-    ) -> Postgres:
+    def create_start(self,
+                     node_name: str = "main",
+                     branch: Optional[str] = None,
+                     tenant_id: Optional[str] = None,
+                     wal_acceptors: Optional[str] = None,
+                     config_lines: Optional[List[str]] = None) -> Postgres:
+
         pg = Postgres(
             zenith_cli=self.zenith_cli,
             repo_dir=self.repo_dir,
@@ -707,18 +762,18 @@ class PostgresFactory:
         self.instances.append(pg)
 
         return pg.create_start(
+            node_name=node_name,
             branch=branch,
             wal_acceptors=wal_acceptors,
             config_lines=config_lines,
         )
 
-    def create(
-        self,
-        branch: str = "main",
-        tenant_id: Optional[str] = None,
-        wal_acceptors: Optional[str] = None,
-        config_lines: Optional[List[str]] = None
-    ) -> Postgres:
+    def create(self,
+               node_name: str = "main",
+               branch: Optional[str] = None,
+               tenant_id: Optional[str] = None,
+               wal_acceptors: Optional[str] = None,
+               config_lines: Optional[List[str]] = None) -> Postgres:
 
         pg = Postgres(
             zenith_cli=self.zenith_cli,
@@ -732,18 +787,17 @@ class PostgresFactory:
         self.instances.append(pg)
 
         return pg.create(
+            node_name=node_name,
             branch=branch,
             wal_acceptors=wal_acceptors,
             config_lines=config_lines,
         )
 
-    def config(
-        self,
-        branch: str = "main",
-        tenant_id: Optional[str] = None,
-        wal_acceptors: Optional[str] = None,
-        config_lines: Optional[List[str]] = None
-    ) -> Postgres:
+    def config(self,
+               node_name: str = "main",
+               tenant_id: Optional[str] = None,
+               wal_acceptors: Optional[str] = None,
+               config_lines: Optional[List[str]] = None) -> Postgres:
 
         pg = Postgres(
             zenith_cli=self.zenith_cli,
@@ -757,7 +811,7 @@ class PostgresFactory:
         self.instances.append(pg)
 
         return pg.config(
-            branch=branch,
+            node_name=node_name,
             wal_acceptors=wal_acceptors,
             config_lines=config_lines,
         )
@@ -768,13 +822,18 @@ class PostgresFactory:
 
         return self
 
+
 @zenfixture
 def initial_tenant(pageserver: ZenithPageserver):
     return pageserver.initial_tenant
 
 
 @zenfixture
-def postgres(zenith_cli: ZenithCli, initial_tenant: str, repo_dir: str, pg_bin: PgBin, port_distributor: PortDistributor) -> Iterator[PostgresFactory]:
+def postgres(zenith_cli: ZenithCli,
+             initial_tenant: str,
+             repo_dir: str,
+             pg_bin: PgBin,
+             port_distributor: PortDistributor) -> Iterator[PostgresFactory]:
     pgfactory = PostgresFactory(
         zenith_cli=zenith_cli,
         repo_dir=repo_dir,
@@ -786,53 +845,69 @@ def postgres(zenith_cli: ZenithCli, initial_tenant: str, repo_dir: str, pg_bin:
     yield pgfactory
 
     # After the yield comes any cleanup code we need.
-    print('Starting postgres cleanup')
+    log.info('Starting postgres cleanup')
     pgfactory.stop_all()
 
+
 def read_pid(path: Path):
     """ Read content of file into number """
     return int(path.read_text())
 
 
+@dataclass
+class WalAcceptorPort:
+    pg: int
+    http: int
+
+
 @dataclass
 class WalAcceptor:
     """ An object representing a running wal acceptor daemon. """
     wa_bin_path: Path
     data_dir: Path
-    port: int
-    num: int # identifier for logging
+    port: WalAcceptorPort
+    num: int  # identifier for logging
     pageserver_port: int
     auth_token: Optional[str] = None
 
     def start(self) -> 'WalAcceptor':
         # create data directory if not exists
         self.data_dir.mkdir(parents=True, exist_ok=True)
-        self.pidfile.unlink(missing_ok=True)
+        with suppress(FileNotFoundError):
+            self.pidfile.unlink()
 
         cmd = [str(self.wa_bin_path)]
         cmd.extend(["-D", str(self.data_dir)])
-        cmd.extend(["-l", f"localhost:{self.port}"])
+        cmd.extend(["--listen-pg", f"localhost:{self.port.pg}"])
+        cmd.extend(["--listen-http", f"localhost:{self.port.http}"])
         cmd.append("--daemonize")
         cmd.append("--no-sync")
         # Tell page server it can receive WAL from this WAL safekeeper
         cmd.extend(["--pageserver", f"localhost:{self.pageserver_port}"])
         cmd.extend(["--recall", "1 second"])
-        print('Running command "{}"'.format(' '.join(cmd)))
+        log.info('Running command "{}"'.format(' '.join(cmd)))
         env = {'PAGESERVER_AUTH_TOKEN': self.auth_token} if self.auth_token else None
         subprocess.run(cmd, check=True, env=env)
 
-        # wait for wal acceptor start by checkking that pid is readable
-        for _ in range(3):
-            pid = self.get_pid()
-            if pid is not None:
-                return self
-            time.sleep(0.5)
-
-        raise RuntimeError("cannot get wal acceptor pid")
+        # wait for wal acceptor start by checking its status
+        started_at = time.time()
+        while True:
+            try:
+                http_cli = self.http_client()
+                http_cli.check_status()
+            except Exception as e:
+                elapsed = time.time() - started_at
+                if elapsed > 3:
+                    raise RuntimeError(
+                        f"timed out waiting {elapsed:.0f}s for wal acceptor start: {e}")
+                time.sleep(0.5)
+            else:
+                break  # success
+        return self
 
     @property
     def pidfile(self) -> Path:
-        return self.data_dir / "wal_acceptor.pid"
+        return self.data_dir / "safekeeper.pid"
 
     def get_pid(self) -> Optional[int]:
         if not self.pidfile.exists():
@@ -846,20 +921,21 @@ class WalAcceptor:
         return pid
 
     def stop(self) -> 'WalAcceptor':
-        print('Stopping wal acceptor {}'.format(self.num))
+        log.info('Stopping wal acceptor {}'.format(self.num))
         pid = self.get_pid()
         if pid is None:
-            print("Wal acceptor {} is not running".format(self.num))
+            log.info("Wal acceptor {} is not running".format(self.num))
             return self
 
         try:
             os.kill(pid, signal.SIGTERM)
         except Exception:
             # TODO: cleanup pid file on exit in wal acceptor
-            pass # pidfile might be obsolete
+            pass  # pidfile might be obsolete
         return self
 
-    def append_logical_message(self, tenant_id: str, timeline_id: str, request: Dict[str, Any]) -> Dict[str, Any]:
+    def append_logical_message(self, tenant_id: str, timeline_id: str,
+                               request: Dict[str, Any]) -> Dict[str, Any]:
         """
         Send JSON_CTRL query to append LogicalMessage to WAL and modify 
         safekeeper state. It will construct LogicalMessage from provided
@@ -868,23 +944,31 @@ class WalAcceptor:
 
         # "replication=0" hacks psycopg not to send additional queries
         # on startup, see https://github.com/psycopg/psycopg2/pull/482
-        connstr = f"host=localhost port={self.port} replication=0 options='-c ztimelineid={timeline_id} ztenantid={tenant_id}'"
+        connstr = f"host=localhost port={self.port.pg} replication=0 options='-c ztimelineid={timeline_id} ztenantid={tenant_id}'"
 
         with closing(psycopg2.connect(connstr)) as conn:
             # server doesn't support transactions
             conn.autocommit = True
             with conn.cursor() as cur:
                 request_json = json.dumps(request)
-                print(f"JSON_CTRL request on port {self.port}: {request_json}")
+                log.info(f"JSON_CTRL request on port {self.port.pg}: {request_json}")
                 cur.execute("JSON_CTRL " + request_json)
                 all = cur.fetchall()
-                print(f"JSON_CTRL response: {all[0][0]}")
+                log.info(f"JSON_CTRL response: {all[0][0]}")
                 return json.loads(all[0][0])
 
+    def http_client(self):
+        return WalAcceptorHttpClient(port=self.port.http)
+
+
 class WalAcceptorFactory:
     """ An object representing multiple running wal acceptors. """
-    def __init__(self, zenith_binpath: Path, data_dir: Path, pageserver_port: int, port_distributor: PortDistributor):
-        self.wa_bin_path = zenith_binpath / 'wal_acceptor'
+    def __init__(self,
+                 zenith_binpath: Path,
+                 data_dir: Path,
+                 pageserver_port: int,
+                 port_distributor: PortDistributor):
+        self.wa_bin_path = zenith_binpath / 'safekeeper'
         self.data_dir = data_dir
         self.instances: List[WalAcceptor] = []
         self.port_distributor = port_distributor
@@ -898,7 +982,10 @@ class WalAcceptorFactory:
         wa = WalAcceptor(
             wa_bin_path=self.wa_bin_path,
             data_dir=self.data_dir / "wal_acceptor_{}".format(wa_num),
-            port=self.port_distributor.get_port(),
+            port=WalAcceptorPort(
+                pg=self.port_distributor.get_port(),
+                http=self.port_distributor.get_port(),
+            ),
             num=wa_num,
             pageserver_port=self.pageserver_port,
             auth_token=auth_token,
@@ -922,11 +1009,14 @@ class WalAcceptorFactory:
 
     def get_connstrs(self) -> str:
         """ Get list of wal acceptor endpoints suitable for wal_acceptors GUC  """
-        return ','.join(["localhost:{}".format(wa.port) for wa in self.instances])
+        return ','.join(["localhost:{}".format(wa.port.pg) for wa in self.instances])
 
 
 @zenfixture
-def wa_factory(zenith_binpath: str, repo_dir: str, pageserver_port: PageserverPort, port_distributor: PortDistributor) -> Iterator[WalAcceptorFactory]:
+def wa_factory(zenith_binpath: str,
+               repo_dir: str,
+               pageserver_port: PageserverPort,
+               port_distributor: PortDistributor) -> Iterator[WalAcceptorFactory]:
     """ Gives WalAcceptorFactory providing wal acceptors. """
     wafactory = WalAcceptorFactory(
         zenith_binpath=Path(zenith_binpath),
@@ -936,16 +1026,36 @@ def wa_factory(zenith_binpath: str, repo_dir: str, pageserver_port: PageserverPo
     )
     yield wafactory
     # After the yield comes any cleanup code we need.
-    print('Starting wal acceptors cleanup')
+    log.info('Starting wal acceptors cleanup')
     wafactory.stop_all()
 
 
+@dataclass
+class PageserverTimelineStatus:
+    acceptor_epoch: int
+
+
+class WalAcceptorHttpClient(requests.Session):
+    def __init__(self, port: int) -> None:
+        super().__init__()
+        self.port = port
+
+    def check_status(self):
+        self.get(f"http://localhost:{self.port}/v1/status").raise_for_status()
+
+    def timeline_status(self, tenant_id: str, timeline_id: str) -> PageserverTimelineStatus:
+        res = self.get(f"http://localhost:{self.port}/v1/timeline/{tenant_id}/{timeline_id}")
+        res.raise_for_status()
+        resj = res.json()
+        return PageserverTimelineStatus(acceptor_epoch=resj['acceptor_state']['epoch'])
+
+
 @zenfixture
 def base_dir() -> str:
     """ find the base directory (currently this is the git root) """
 
     base_dir = os.path.normpath(os.path.join(get_self_dir(), '../..'))
-    print('\nbase_dir is', base_dir)
+    log.info(f'base_dir is {base_dir}')
     return base_dir
 
 
@@ -974,7 +1084,7 @@ def test_output_dir(request: Any, top_output_dir: str) -> str:
         test_name = 'shared'
 
     test_output_dir = os.path.join(top_output_dir, test_name)
-    print('test_output_dir is', test_output_dir)
+    log.info(f'test_output_dir is {test_output_dir}')
     shutil.rmtree(test_output_dir, ignore_errors=True)
     mkdir_if_needed(test_output_dir)
     return test_output_dir
@@ -1016,7 +1126,7 @@ def pg_distrib_dir(base_dir: str) -> str:
         pg_dir = env_postgres_bin
     else:
         pg_dir = os.path.normpath(os.path.join(base_dir, DEFAULT_POSTGRES_DIR))
-    print('postgres dir is', pg_dir)
+    log.info(f'postgres dir is {pg_dir}')
     if not os.path.exists(os.path.join(pg_dir, 'bin/postgres')):
         raise Exception('postgres not found at "{}"'.format(pg_dir))
     return pg_dir
@@ -1038,6 +1148,7 @@ class TenantFactory:
 def tenant_factory(zenith_cli: ZenithCli):
     return TenantFactory(zenith_cli)
 
+
 #
 # Test helpers
 #
@@ -1048,18 +1159,29 @@ def list_files_to_compare(pgdata_dir: str):
             rel_dir = os.path.relpath(root, pgdata_dir)
             # Skip some dirs and files we don't want to compare
             skip_dirs = ['pg_wal', 'pg_stat', 'pg_stat_tmp', 'pg_subtrans', 'pg_logical']
-            skip_files = ['pg_internal.init', 'pg.log', 'zenith.signal', 'postgresql.conf',
-                        'postmaster.opts', 'postmaster.pid', 'pg_control']
+            skip_files = [
+                'pg_internal.init',
+                'pg.log',
+                'zenith.signal',
+                'postgresql.conf',
+                'postmaster.opts',
+                'postmaster.pid',
+                'pg_control'
+            ]
             if rel_dir not in skip_dirs and filename not in skip_files:
                 rel_file = os.path.join(rel_dir, filename)
                 pgdata_files.append(rel_file)
 
     pgdata_files.sort()
-    print(pgdata_files)
+    log.info(pgdata_files)
     return pgdata_files
 
+
 # pg is the existing and running compute node, that we want to compare with a basebackup
-def check_restored_datadir_content(zenith_cli: ZenithCli, test_output_dir: str, pg: Postgres, pageserver_pg_port: int):
+def check_restored_datadir_content(zenith_cli: ZenithCli,
+                                   test_output_dir: str,
+                                   pg: Postgres,
+                                   pageserver_pg_port: int):
 
     # Get the timeline ID of our branch. We need it for the 'basebackup' command
     with closing(pg.connect()) as conn:
@@ -1071,7 +1193,7 @@ def check_restored_datadir_content(zenith_cli: ZenithCli, test_output_dir: str,
     pg.stop()
 
     # Take a basebackup from pageserver
-    restored_dir_path = os.path.join(test_output_dir, f"{pg.branch}_restored_datadir")
+    restored_dir_path = os.path.join(test_output_dir, f"{pg.node_name}_restored_datadir")
     mkdir_if_needed(restored_dir_path)
 
     psql_path = os.path.join(pg.pg_bin.pg_bin_path, 'psql')
@@ -1101,9 +1223,7 @@ def check_restored_datadir_content(zenith_cli: ZenithCli, test_output_dir: str,
                                                 restored_dir_path,
                                                 pgdata_files,
                                                 shallow=False)
-    print('filecmp result mismatch and error lists:')
-    print(mismatch)
-    print(error)
+    log.info(f'filecmp result mismatch and error lists:\n\t mismatch={mismatch}\n\t error={error}')
 
     for f in mismatch:
 
diff --git a/test_runner/performance/test_bulk_insert.py b/test_runner/performance/test_bulk_insert.py
index 95f1ea5e4a..cf6fa03703 100644
--- a/test_runner/performance/test_bulk_insert.py
+++ b/test_runner/performance/test_bulk_insert.py
@@ -1,9 +1,11 @@
 import os
 from contextlib import closing
 from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
+from fixtures.log_helper import log
 
 pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
 
+
 #
 # Run bulk INSERT test.
 #
@@ -14,16 +16,21 @@ pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
 # 3. Disk space used
 # 4. Peak memory usage
 #
-def test_bulk_insert(postgres: PostgresFactory, pageserver: ZenithPageserver, pg_bin, zenith_cli, zenbenchmark, repo_dir: str):
+def test_bulk_insert(postgres: PostgresFactory,
+                     pageserver: ZenithPageserver,
+                     pg_bin,
+                     zenith_cli,
+                     zenbenchmark,
+                     repo_dir: str):
     # Create a branch for us
     zenith_cli.run(["branch", "test_bulk_insert", "empty"])
 
     pg = postgres.create_start('test_bulk_insert')
-    print("postgres is running on 'test_bulk_insert' branch")
+    log.info("postgres is running on 'test_bulk_insert' branch")
 
     # Open a connection directly to the page server that we'll use to force
     # flushing the layers to disk
-    psconn = pageserver.connect();
+    psconn = pageserver.connect()
     pscur = psconn.cursor()
 
     # Get the timeline ID of our branch. We need it for the 'do_gc' command
@@ -47,5 +54,7 @@ def test_bulk_insert(postgres: PostgresFactory, pageserver: ZenithPageserver, pg
             zenbenchmark.record("peak_mem", zenbenchmark.get_peak_mem(pageserver) / 1024, 'MB')
 
             # Report disk space used by the repository
-            timeline_size = zenbenchmark.get_timeline_size(repo_dir, pageserver.initial_tenant, timeline)
-            zenbenchmark.record('size', timeline_size / (1024*1024), 'MB')
+            timeline_size = zenbenchmark.get_timeline_size(repo_dir,
+                                                           pageserver.initial_tenant,
+                                                           timeline)
+            zenbenchmark.record('size', timeline_size / (1024 * 1024), 'MB')
diff --git a/test_runner/performance/test_bulk_tenant_create.py b/test_runner/performance/test_bulk_tenant_create.py
index e1de1dd014..1e2a17c2c9 100644
--- a/test_runner/performance/test_bulk_tenant_create.py
+++ b/test_runner/performance/test_bulk_tenant_create.py
@@ -37,7 +37,9 @@ def test_bulk_tenant_create(
 
         tenant = tenant_factory.create()
         zenith_cli.run([
-            "branch", f"test_bulk_tenant_create_{tenants_count}_{i}_{use_wal_acceptors}", "main",
+            "branch",
+            f"test_bulk_tenant_create_{tenants_count}_{i}_{use_wal_acceptors}",
+            "main",
             f"--tenantid={tenant}"
         ])
 
@@ -46,6 +48,7 @@ def test_bulk_tenant_create(
 
         pg_tenant = postgres.create_start(
             f"test_bulk_tenant_create_{tenants_count}_{i}_{use_wal_acceptors}",
+            None,  # branch name, None means same as node name
             tenant,
             wal_acceptors=wa_factory.get_connstrs() if use_wal_acceptors == 'with_wa' else None,
         )
diff --git a/test_runner/performance/test_gist_build.py b/test_runner/performance/test_gist_build.py
new file mode 100644
index 0000000000..5a80978cf0
--- /dev/null
+++ b/test_runner/performance/test_gist_build.py
@@ -0,0 +1,61 @@
+import os
+from contextlib import closing
+from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
+from fixtures.log_helper import log
+
+pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
+
+
+#
+# Test buffering GisT build. It WAL-logs the whole relation, in 32-page chunks.
+# As of this writing, we're duplicate those giant WAL records for each page,
+# which makes the delta layer about 32x larger than it needs to be.
+#
+def test_gist_buffering_build(postgres: PostgresFactory,
+                              pageserver: ZenithPageserver,
+                              pg_bin,
+                              zenith_cli,
+                              zenbenchmark,
+                              repo_dir: str):
+    # Create a branch for us
+    zenith_cli.run(["branch", "test_gist_buffering_build", "empty"])
+
+    pg = postgres.create_start('test_gist_buffering_build')
+    log.info("postgres is running on 'test_gist_buffering_build' branch")
+
+    # Open a connection directly to the page server that we'll use to force
+    # flushing the layers to disk
+    psconn = pageserver.connect()
+    pscur = psconn.cursor()
+
+    # Get the timeline ID of our branch. We need it for the 'do_gc' command
+    with closing(pg.connect()) as conn:
+        with conn.cursor() as cur:
+            cur.execute("SHOW zenith.zenith_timeline")
+            timeline = cur.fetchone()[0]
+
+            # Create test table.
+            cur.execute("create table gist_point_tbl(id int4, p point)")
+            cur.execute(
+                "insert into gist_point_tbl select g, point(g, g) from generate_series(1, 1000000) g;"
+            )
+
+            # Build the index.
+            with zenbenchmark.record_pageserver_writes(pageserver, 'pageserver_writes'):
+                with zenbenchmark.record_duration('build'):
+                    cur.execute(
+                        "create index gist_pointidx2 on gist_point_tbl using gist(p) with (buffering = on)"
+                    )
+
+                    # Flush the layers from memory to disk. This is included in the reported
+                    # time and I/O
+                    pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 1000000")
+
+            # Record peak memory usage
+            zenbenchmark.record("peak_mem", zenbenchmark.get_peak_mem(pageserver) / 1024, 'MB')
+
+            # Report disk space used by the repository
+            timeline_size = zenbenchmark.get_timeline_size(repo_dir,
+                                                           pageserver.initial_tenant,
+                                                           timeline)
+            zenbenchmark.record('size', timeline_size / (1024 * 1024), 'MB')
diff --git a/test_runner/performance/test_perf_pgbench.py b/test_runner/performance/test_perf_pgbench.py
index 18db78f12a..388ac4314c 100644
--- a/test_runner/performance/test_perf_pgbench.py
+++ b/test_runner/performance/test_perf_pgbench.py
@@ -1,9 +1,11 @@
 import os
 from contextlib import closing
 from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
+from fixtures.log_helper import log
 
 pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
 
+
 #
 # Run a very short pgbench test.
 #
@@ -13,16 +15,21 @@ pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
 # 2. Time to run 5000 pgbench transactions
 # 3. Disk space used
 #
-def test_pgbench(postgres: PostgresFactory, pageserver: ZenithPageserver, pg_bin, zenith_cli, zenbenchmark, repo_dir: str):
+def test_pgbench(postgres: PostgresFactory,
+                 pageserver: ZenithPageserver,
+                 pg_bin,
+                 zenith_cli,
+                 zenbenchmark,
+                 repo_dir: str):
     # Create a branch for us
     zenith_cli.run(["branch", "test_pgbench_perf", "empty"])
 
     pg = postgres.create_start('test_pgbench_perf')
-    print("postgres is running on 'test_pgbench_perf' branch")
+    log.info("postgres is running on 'test_pgbench_perf' branch")
 
     # Open a connection directly to the page server that we'll use to force
     # flushing the layers to disk
-    psconn = pageserver.connect();
+    psconn = pageserver.connect()
     pscur = psconn.cursor()
 
     # Get the timeline ID of our branch. We need it for the 'do_gc' command
@@ -52,4 +59,4 @@ def test_pgbench(postgres: PostgresFactory, pageserver: ZenithPageserver, pg_bin
 
     # Report disk space used by the repository
     timeline_size = zenbenchmark.get_timeline_size(repo_dir, pageserver.initial_tenant, timeline)
-    zenbenchmark.record('size', timeline_size / (1024*1024), 'MB')
+    zenbenchmark.record('size', timeline_size / (1024 * 1024), 'MB')
diff --git a/test_runner/performance/test_write_amplification.py b/test_runner/performance/test_write_amplification.py
index 09310c702b..1a1cc7bf21 100644
--- a/test_runner/performance/test_write_amplification.py
+++ b/test_runner/performance/test_write_amplification.py
@@ -13,19 +13,26 @@
 import os
 from contextlib import closing
 from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
+from fixtures.log_helper import log
 
 pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
 
-def test_write_amplification(postgres: PostgresFactory, pageserver: ZenithPageserver, pg_bin, zenith_cli, zenbenchmark, repo_dir: str):
+
+def test_write_amplification(postgres: PostgresFactory,
+                             pageserver: ZenithPageserver,
+                             pg_bin,
+                             zenith_cli,
+                             zenbenchmark,
+                             repo_dir: str):
     # Create a branch for us
     zenith_cli.run(["branch", "test_write_amplification", "empty"])
 
     pg = postgres.create_start('test_write_amplification')
-    print("postgres is running on 'test_write_amplification' branch")
+    log.info("postgres is running on 'test_write_amplification' branch")
 
     # Open a connection directly to the page server that we'll use to force
     # flushing the layers to disk
-    psconn = pageserver.connect();
+    psconn = pageserver.connect()
     pscur = psconn.cursor()
 
     with closing(pg.connect()) as conn:
@@ -70,5 +77,7 @@ def test_write_amplification(postgres: PostgresFactory, pageserver: ZenithPagese
                         pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
 
             # Report disk space used by the repository
-            timeline_size = zenbenchmark.get_timeline_size(repo_dir, pageserver.initial_tenant, timeline)
-            zenbenchmark.record('size', timeline_size / (1024*1024), 'MB')
+            timeline_size = zenbenchmark.get_timeline_size(repo_dir,
+                                                           pageserver.initial_tenant,
+                                                           timeline)
+            zenbenchmark.record('size', timeline_size / (1024 * 1024), 'MB')
diff --git a/test_runner/pytest.ini b/test_runner/pytest.ini
index 78b5304f78..e6c7013559 100644
--- a/test_runner/pytest.ini
+++ b/test_runner/pytest.ini
@@ -1,2 +1,4 @@
 [pytest]
 minversion = 6.0
+log_format = %(asctime)s.%(msecs)-3d %(levelname)s [%(filename)s:%(lineno)d] %(message)s
+log_date_format = %Y-%m-%d %H:%M:%S
diff --git a/test_runner/setup.cfg b/test_runner/setup.cfg
index 578cb28efc..cff4c7f86e 100644
--- a/test_runner/setup.cfg
+++ b/test_runner/setup.cfg
@@ -10,6 +10,7 @@ max-line-length = 100
 [yapf]
 based_on_style = pep8
 column_limit = 100
+split_all_top_level_comma_separated_values = true
 
 [mypy]
 # some tests don't typecheck when this flag is set
diff --git a/test_runner/test_broken.py b/test_runner/test_broken.py
index da715d7387..66bfe1192c 100644
--- a/test_runner/test_broken.py
+++ b/test_runner/test_broken.py
@@ -1,6 +1,8 @@
 import pytest
 import os
 
+from fixtures.log_helper import log
+
 pytest_plugins = ("fixtures.zenith_fixtures")
 """
 Use this test to see what happens when tests fail.
@@ -22,7 +24,7 @@ def test_broken(zenith_cli, pageserver, postgres, pg_bin):
     zenith_cli.run(["branch", "test_broken", "empty"])
 
     postgres.create_start("test_broken")
-    print('postgres is running')
+    log.info('postgres is running')
 
-    print('THIS NEXT COMMAND WILL FAIL:')
+    log.info('THIS NEXT COMMAND WILL FAIL:')
     pg_bin.run('pgbench -i_am_a_broken_test'.split())
diff --git a/vendor/postgres b/vendor/postgres
index 93b1dd0055..6b58de66ec 160000
--- a/vendor/postgres
+++ b/vendor/postgres
@@ -1 +1 @@
-Subproject commit 93b1dd005527f3c82aec2dbf3b220aba8c9eab2c
+Subproject commit 6b58de66ec08e5dd8747353b3c33e696e5bfde81
diff --git a/walkeeper/Cargo.toml b/walkeeper/Cargo.toml
index 16790ca214..2e2e435236 100644
--- a/walkeeper/Cargo.toml
+++ b/walkeeper/Cargo.toml
@@ -11,6 +11,8 @@ regex = "1.4.5"
 bincode = "1.3"
 bytes = "1.0.1"
 byteorder = "1.4.3"
+hyper = "0.14"
+routerify = "2"
 fs2 = "0.4.3"
 lazy_static = "1.4.0"
 serde_json = "1"
@@ -28,9 +30,11 @@ humantime = "2.1.0"
 walkdir = "2"
 serde = { version = "1.0", features = ["derive"] }
 hex = "0.4.3"
+const_format = "0.2.21"
 
 # FIXME: 'pageserver' is needed for ZTimelineId. Refactor
 pageserver = { path = "../pageserver" }
 postgres_ffi = { path = "../postgres_ffi" }
 workspace_hack = { path = "../workspace_hack" }
+zenith_metrics = { path = "../zenith_metrics" }
 zenith_utils = { path = "../zenith_utils" }
diff --git a/walkeeper/README b/walkeeper/README
index 6c5a69e926..05325bafd9 100644
--- a/walkeeper/README
+++ b/walkeeper/README
@@ -89,12 +89,12 @@ A: Page Server is a single server which can be lost. As our primary
 
 Q: What if the compute node evicts a page, needs it back, but the page is yet
    to reach the Page Server?
-A: If the compute node has evicted a page, all changes from that page are
-   already committed, i.e. they are saved on majority of WAL safekeepers. These
-   WAL records will eventually reach the Page Server. The Page Server notes
-   that the compute note requests pages with a very recent LSN and will not
-   respond to the compute node until it a corresponding WAL is received from WAL
-   safekeepers.
+A: If the compute node has evicted a page, changes to it have been WAL-logged
+   (that's why it is called Write Ahead logging; there are some exceptions like
+   index builds, but these are exceptions). These WAL records will eventually
+   reach the Page Server. The Page Server notes that the compute note requests
+   pages with a very recent LSN and will not respond to the compute node until a
+   corresponding WAL is received from WAL safekeepers.
 
 Q: How long may Page Server wait for?
 A: Not too long, hopefully. If a page is evicted, it probably was not used for
diff --git a/walkeeper/src/bin/wal_acceptor.rs b/walkeeper/src/bin/safekeeper.rs
similarity index 64%
rename from walkeeper/src/bin/wal_acceptor.rs
rename to walkeeper/src/bin/safekeeper.rs
index d8a0ab6737..7ce8765789 100644
--- a/walkeeper/src/bin/wal_acceptor.rs
+++ b/walkeeper/src/bin/safekeeper.rs
@@ -1,35 +1,48 @@
 //
-// Main entry point for the wal_acceptor executable
+// Main entry point for the safekeeper executable
 //
 use anyhow::Result;
 use clap::{App, Arg};
+use const_format::formatcp;
 use daemonize::Daemonize;
 use log::*;
 use std::env;
+use std::net::TcpListener;
 use std::path::{Path, PathBuf};
 use std::thread;
+use zenith_utils::http::endpoint;
 use zenith_utils::logging;
 
+use walkeeper::defaults::{DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_PG_LISTEN_ADDR};
+use walkeeper::http;
 use walkeeper::s3_offload;
 use walkeeper::wal_service;
 use walkeeper::WalAcceptorConf;
 
 fn main() -> Result<()> {
-    let arg_matches = App::new("Zenith wal_acceptor")
+    zenith_metrics::set_common_metrics_prefix("safekeeper");
+    let arg_matches = App::new("Zenith safekeeper")
         .about("Store WAL stream to local file system and push it to WAL receivers")
         .arg(
             Arg::with_name("datadir")
                 .short("D")
                 .long("dir")
                 .takes_value(true)
-                .help("Path to the WAL acceptor data directory"),
+                .help("Path to the safekeeper data directory"),
         )
         .arg(
-            Arg::with_name("listen")
+            Arg::with_name("listen-pg")
                 .short("l")
-                .long("listen")
+                .long("listen-pg")
+                .alias("listen") // for compatibility
                 .takes_value(true)
-                .help("listen for incoming connections on ip:port (default: 127.0.0.1:5454)"),
+                .help(formatcp!("listen for incoming WAL data connections on ip:port (default: {DEFAULT_PG_LISTEN_ADDR})")),
+        )
+        .arg(
+            Arg::with_name("listen-http")
+                .long("listen-http")
+                .takes_value(true)
+                .help(formatcp!("http endpoint address for metrics on ip:port (default: {DEFAULT_HTTP_LISTEN_ADDR})")),
         )
         .arg(
             Arg::with_name("pageserver")
@@ -70,7 +83,8 @@ fn main() -> Result<()> {
         daemonize: false,
         no_sync: false,
         pageserver_addr: None,
-        listen_addr: "localhost:5454".to_string(),
+        listen_pg_addr: DEFAULT_PG_LISTEN_ADDR.to_string(),
+        listen_http_addr: DEFAULT_HTTP_LISTEN_ADDR.to_string(),
         ttl: None,
         recall_period: None,
         pageserver_auth_token: env::var("PAGESERVER_AUTH_TOKEN").ok(),
@@ -91,8 +105,12 @@ fn main() -> Result<()> {
         conf.daemonize = true;
     }
 
-    if let Some(addr) = arg_matches.value_of("listen") {
-        conf.listen_addr = addr.to_owned();
+    if let Some(addr) = arg_matches.value_of("listen-pg") {
+        conf.listen_pg_addr = addr.to_owned();
+    }
+
+    if let Some(addr) = arg_matches.value_of("listen-http") {
+        conf.listen_http_addr = addr.to_owned();
     }
 
     if let Some(addr) = arg_matches.value_of("pageserver") {
@@ -111,8 +129,19 @@ fn main() -> Result<()> {
 }
 
 fn start_wal_acceptor(conf: WalAcceptorConf) -> Result<()> {
-    let log_filename = conf.data_dir.join("wal_acceptor.log");
-    let (_scope_guard, log_file) = logging::init(log_filename, conf.daemonize)?;
+    let log_filename = conf.data_dir.join("safekeeper.log");
+    let log_file = logging::init(log_filename, conf.daemonize)?;
+
+    let http_listener = TcpListener::bind(conf.listen_http_addr.clone()).map_err(|e| {
+        error!("failed to bind to address {}: {}", conf.listen_http_addr, e);
+        e
+    })?;
+
+    info!("Starting safekeeper on {}", conf.listen_pg_addr);
+    let pg_listener = TcpListener::bind(conf.listen_pg_addr.clone()).map_err(|e| {
+        error!("failed to bind to address {}: {}", conf.listen_pg_addr, e);
+        e
+    })?;
 
     if conf.daemonize {
         info!("daemonizing...");
@@ -123,7 +152,7 @@ fn start_wal_acceptor(conf: WalAcceptorConf) -> Result<()> {
         let stderr = log_file;
 
         let daemonize = Daemonize::new()
-            .pid_file("wal_acceptor.pid")
+            .pid_file("safekeeper.pid")
             .working_directory(Path::new("."))
             .stdout(stdout)
             .stderr(stderr);
@@ -136,6 +165,17 @@ fn start_wal_acceptor(conf: WalAcceptorConf) -> Result<()> {
 
     let mut threads = Vec::new();
 
+    let conf_cloned = conf.clone();
+    let http_endpoint_thread = thread::Builder::new()
+        .name("http_endpoint_thread".into())
+        .spawn(|| {
+            // TODO authentication
+            let router = http::make_router(conf_cloned);
+            endpoint::serve_thread_main(router, http_listener).unwrap();
+        })
+        .unwrap();
+    threads.push(http_endpoint_thread);
+
     if conf.ttl.is_some() {
         let s3_conf = conf.clone();
         let s3_offload_thread = thread::Builder::new()
@@ -152,7 +192,7 @@ fn start_wal_acceptor(conf: WalAcceptorConf) -> Result<()> {
         .name("WAL acceptor thread".into())
         .spawn(|| {
             // thread code
-            let thread_result = wal_service::thread_main(conf);
+            let thread_result = wal_service::thread_main(conf, pg_listener);
             if let Err(e) = thread_result {
                 info!("wal_service thread terminated: {}", e);
             }
diff --git a/walkeeper/src/http/mod.rs b/walkeeper/src/http/mod.rs
new file mode 100644
index 0000000000..c82d1c0362
--- /dev/null
+++ b/walkeeper/src/http/mod.rs
@@ -0,0 +1,2 @@
+pub mod routes;
+pub use routes::make_router;
diff --git a/walkeeper/src/http/routes.rs b/walkeeper/src/http/routes.rs
new file mode 100644
index 0000000000..8ab405508e
--- /dev/null
+++ b/walkeeper/src/http/routes.rs
@@ -0,0 +1,88 @@
+use hyper::{Body, Request, Response, StatusCode};
+use routerify::ext::RequestExt;
+use routerify::RouterBuilder;
+use serde::Serialize;
+use serde::Serializer;
+use std::fmt::Display;
+use std::sync::Arc;
+use zenith_utils::lsn::Lsn;
+
+use crate::safekeeper::AcceptorState;
+use crate::timeline::CreateControlFile;
+use crate::timeline::GlobalTimelines;
+use crate::WalAcceptorConf;
+use zenith_utils::http::endpoint;
+use zenith_utils::http::error::ApiError;
+use zenith_utils::http::json::json_response;
+use zenith_utils::http::request::parse_request_param;
+use zenith_utils::zid::{ZTenantId, ZTimelineId};
+
+/// Healthcheck handler.
+async fn status_handler(_: Request<Body>) -> Result<Response<Body>, ApiError> {
+    Ok(json_response(StatusCode::OK, "")?)
+}
+
+fn get_conf(request: &Request<Body>) -> &WalAcceptorConf {
+    request
+        .data::<Arc<WalAcceptorConf>>()
+        .expect("unknown state type")
+        .as_ref()
+}
+
+fn display_serialize<S, F>(z: &F, s: S) -> Result<S::Ok, S::Error>
+where
+    S: Serializer,
+    F: Display,
+{
+    s.serialize_str(&format!("{}", z))
+}
+
+/// Info about timeline on safekeeper ready for reporting.
+#[derive(Debug, Serialize)]
+struct TimelineStatus {
+    #[serde(serialize_with = "display_serialize")]
+    tenant_id: ZTenantId,
+    #[serde(serialize_with = "display_serialize")]
+    timeline_id: ZTimelineId,
+    acceptor_state: AcceptorState,
+    #[serde(serialize_with = "display_serialize")]
+    commit_lsn: Lsn,
+    #[serde(serialize_with = "display_serialize")]
+    truncate_lsn: Lsn,
+}
+
+/// Report info about timeline.
+async fn timeline_status_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+    let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
+    let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;
+
+    let tli = GlobalTimelines::get(
+        get_conf(&request),
+        tenant_id,
+        timeline_id,
+        CreateControlFile::False,
+    )
+    .map_err(ApiError::from_err)?;
+    let sk_state = tli.get_info();
+
+    let status = TimelineStatus {
+        tenant_id,
+        timeline_id,
+        acceptor_state: sk_state.acceptor_state,
+        commit_lsn: sk_state.commit_lsn,
+        truncate_lsn: sk_state.truncate_lsn,
+    };
+    Ok(json_response(StatusCode::OK, status)?)
+}
+
+/// Safekeeper http router.
+pub fn make_router(conf: WalAcceptorConf) -> RouterBuilder<hyper::Body, ApiError> {
+    let router = endpoint::make_router();
+    router
+        .data(Arc::new(conf))
+        .get("/v1/status", status_handler)
+        .get(
+            "/v1/timeline/:tenant_id/:timeline_id",
+            timeline_status_handler,
+        )
+}
diff --git a/walkeeper/src/lib.rs b/walkeeper/src/lib.rs
index fb04459c47..4406823076 100644
--- a/walkeeper/src/lib.rs
+++ b/walkeeper/src/lib.rs
@@ -2,6 +2,7 @@
 use std::path::PathBuf;
 use std::time::Duration;
 
+pub mod http;
 pub mod json_ctrl;
 pub mod receive_wal;
 pub mod replication;
@@ -11,12 +12,23 @@ pub mod send_wal;
 pub mod timeline;
 pub mod wal_service;
 
+pub mod defaults {
+    use const_format::formatcp;
+
+    pub const DEFAULT_PG_LISTEN_PORT: u16 = 5454;
+    pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
+
+    pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 7676;
+    pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
+}
+
 #[derive(Debug, Clone)]
 pub struct WalAcceptorConf {
     pub data_dir: PathBuf,
     pub daemonize: bool,
     pub no_sync: bool,
-    pub listen_addr: String,
+    pub listen_pg_addr: String,
+    pub listen_http_addr: String,
     pub pageserver_addr: Option<String>,
     // TODO (create issue) this is temporary, until protocol between PG<->SK<->PS rework
     pub pageserver_auth_token: Option<String>,
diff --git a/walkeeper/src/receive_wal.rs b/walkeeper/src/receive_wal.rs
index 4596344b76..527c8d891c 100644
--- a/walkeeper/src/receive_wal.rs
+++ b/walkeeper/src/receive_wal.rs
@@ -42,7 +42,7 @@ fn request_callback(conf: WalAcceptorConf, timelineid: ZTimelineId, tenantid: ZT
     );
 
     // use Config parsing because SockAddr parsing doesnt allow to use host names instead of ip addresses
-    let me_connstr = format!("postgresql://no_user@{}/no_db", conf.listen_addr);
+    let me_connstr = format!("postgresql://no_user@{}/no_db", conf.listen_pg_addr);
     let me_conf: Config = me_connstr.parse().unwrap();
     let (host, port) = connection_host_port(&me_conf);
     let callme = format!(
diff --git a/walkeeper/src/safekeeper.rs b/walkeeper/src/safekeeper.rs
index 95f0e9e0c2..49e5945c95 100644
--- a/walkeeper/src/safekeeper.rs
+++ b/walkeeper/src/safekeeper.rs
@@ -15,8 +15,11 @@ use std::cmp::min;
 use std::io;
 use std::io::Read;
 
+use lazy_static::lazy_static;
+
 use crate::replication::HotStandbyFeedback;
 use postgres_ffi::xlog_utils::MAX_SEND_SIZE;
+use zenith_metrics::{register_gauge_vec, Gauge, GaugeVec};
 use zenith_utils::bin_ser::LeSer;
 use zenith_utils::lsn::Lsn;
 use zenith_utils::pq_proto::SystemId;
@@ -281,6 +284,45 @@ pub trait Storage {
     fn write_wal(&mut self, server: &ServerInfo, startpos: Lsn, buf: &[u8]) -> Result<()>;
 }
 
+lazy_static! {
+    // The prometheus crate does not support u64 yet, i64 only (see `IntGauge`).
+    // i64 is faster than f64, so update to u64 when available.
+    static ref FLUSH_LSN_GAUGE: GaugeVec = register_gauge_vec!(
+        "safekeeper_flush_lsn",
+        "Current flush_lsn, grouped by timeline",
+        &["ztli"]
+    )
+    .expect("Failed to register safekeeper_flush_lsn gauge vec");
+    static ref COMMIT_LSN_GAUGE: GaugeVec = register_gauge_vec!(
+        "safekeeper_commit_lsn",
+        "Current commit_lsn (not necessarily persisted to disk), grouped by timeline",
+        &["ztli"]
+    )
+    .expect("Failed to register safekeeper_commit_lsn gauge vec");
+}
+
+struct SafeKeeperMetrics {
+    flush_lsn: Gauge,
+    commit_lsn: Gauge,
+}
+
+impl SafeKeeperMetrics {
+    fn new(ztli: ZTimelineId) -> SafeKeeperMetrics {
+        let ztli_str = format!("{}", ztli);
+        SafeKeeperMetrics {
+            flush_lsn: FLUSH_LSN_GAUGE.with_label_values(&[&ztli_str]),
+            commit_lsn: COMMIT_LSN_GAUGE.with_label_values(&[&ztli_str]),
+        }
+    }
+
+    fn new_noname() -> SafeKeeperMetrics {
+        SafeKeeperMetrics {
+            flush_lsn: FLUSH_LSN_GAUGE.with_label_values(&["n/a"]),
+            commit_lsn: COMMIT_LSN_GAUGE.with_label_values(&["n/a"]),
+        }
+    }
+}
+
 /// SafeKeeper which consumes events (messages from compute) and provides
 /// replies.
 pub struct SafeKeeper<ST: Storage> {
@@ -288,6 +330,8 @@ pub struct SafeKeeper<ST: Storage> {
     /// Established by reading wal.
     pub flush_lsn: Lsn,
     pub tli: u32,
+    // Cached metrics so we don't have to recompute labels on each update.
+    metrics: SafeKeeperMetrics,
     /// not-yet-flushed pairs of same named fields in s.*
     pub commit_lsn: Lsn,
     pub truncate_lsn: Lsn,
@@ -306,6 +350,7 @@ where
         SafeKeeper {
             flush_lsn,
             tli,
+            metrics: SafeKeeperMetrics::new_noname(),
             commit_lsn: state.commit_lsn,
             truncate_lsn: state.truncate_lsn,
             storage,
@@ -357,6 +402,8 @@ where
         self.s.server.wal_seg_size = msg.wal_seg_size;
         self.storage.persist(&self.s, true)?;
 
+        self.metrics = SafeKeeperMetrics::new(self.s.server.ztli);
+
         info!(
             "processed greeting from proposer {:?}, sending term {:?}",
             msg.proposer_id, self.s.acceptor_state.term
@@ -481,6 +528,7 @@ where
         }
         if last_rec_lsn > self.flush_lsn {
             self.flush_lsn = last_rec_lsn;
+            self.metrics.flush_lsn.set(u64::from(self.flush_lsn) as f64);
         }
 
         // Advance commit_lsn taking into account what we have locally. xxx this
@@ -498,6 +546,9 @@ where
             sync_control_file |=
                 commit_lsn >= msg.h.epoch_start_lsn && self.s.commit_lsn < msg.h.epoch_start_lsn;
             self.commit_lsn = commit_lsn;
+            self.metrics
+                .commit_lsn
+                .set(u64::from(self.commit_lsn) as f64);
         }
 
         self.truncate_lsn = msg.h.truncate_lsn;
diff --git a/walkeeper/src/send_wal.rs b/walkeeper/src/send_wal.rs
index e81b6c5eac..fcd8595e15 100644
--- a/walkeeper/src/send_wal.rs
+++ b/walkeeper/src/send_wal.rs
@@ -13,14 +13,13 @@ use std::str::FromStr;
 use std::sync::Arc;
 use zenith_utils::postgres_backend;
 use zenith_utils::postgres_backend::PostgresBackend;
-use zenith_utils::pq_proto::{BeMessage, FeStartupMessage, RowDescriptor};
+use zenith_utils::pq_proto::{BeMessage, FeStartupMessage, RowDescriptor, INT4_OID, TEXT_OID};
 use zenith_utils::zid::{ZTenantId, ZTimelineId};
 
 use crate::timeline::CreateControlFile;
 
 /// Handler for streaming WAL from acceptor
 pub struct SendWalHandler {
-    /// wal acceptor configuration
     pub conf: WalAcceptorConf,
     /// assigned application name
     pub appname: Option<String>,
@@ -72,19 +71,16 @@ impl postgres_backend::Handler for SendWalHandler {
         }
         if query_string.starts_with(b"IDENTIFY_SYSTEM") {
             self.handle_identify_system(pgb)?;
-            Ok(())
         } else if query_string.starts_with(b"START_REPLICATION") {
             ReplicationConn::new(pgb).run(self, pgb, &query_string)?;
-            Ok(())
         } else if query_string.starts_with(b"START_WAL_PUSH") {
             ReceiveWalConn::new(pgb)?.run(self)?;
-            Ok(())
         } else if query_string.starts_with(b"JSON_CTRL") {
             handle_json_ctrl(self, pgb, &query_string)?;
-            Ok(())
         } else {
             bail!("Unexpected command {:?}", query_string);
         }
+        Ok(())
     }
 }
 
@@ -114,25 +110,25 @@ impl SendWalHandler {
         pgb.write_message_noflush(&BeMessage::RowDescription(&[
             RowDescriptor {
                 name: b"systemid",
-                typoid: 25,
+                typoid: TEXT_OID,
                 typlen: -1,
                 ..Default::default()
             },
             RowDescriptor {
                 name: b"timeline",
-                typoid: 23,
+                typoid: INT4_OID,
                 typlen: 4,
                 ..Default::default()
             },
             RowDescriptor {
                 name: b"xlogpos",
-                typoid: 25,
+                typoid: TEXT_OID,
                 typlen: -1,
                 ..Default::default()
             },
             RowDescriptor {
                 name: b"dbname",
-                typoid: 25,
+                typoid: TEXT_OID,
                 typlen: -1,
                 ..Default::default()
             },
diff --git a/walkeeper/src/timeline.rs b/walkeeper/src/timeline.rs
index 42e8afabb8..b30c061c9c 100644
--- a/walkeeper/src/timeline.rs
+++ b/walkeeper/src/timeline.rs
@@ -155,7 +155,7 @@ impl SharedState {
         }
         match opts.open(&control_file_path) {
             Ok(mut file) => {
-                // Lock file to prevent two or more active wal_acceptors
+                // Lock file to prevent two or more active safekeepers
                 match file.try_lock_exclusive() {
                     Ok(()) => {}
                     Err(e) => {
@@ -340,7 +340,7 @@ lazy_static! {
 }
 
 /// A zero-sized struct used to manage access to the global timelines map.
-struct GlobalTimelines;
+pub struct GlobalTimelines;
 
 impl GlobalTimelines {
     /// Get a timeline with control file loaded from the global TIMELINES map.
diff --git a/walkeeper/src/wal_service.rs b/walkeeper/src/wal_service.rs
index c77078560c..4a294e9c95 100644
--- a/walkeeper/src/wal_service.rs
+++ b/walkeeper/src/wal_service.rs
@@ -12,13 +12,7 @@ use crate::WalAcceptorConf;
 use zenith_utils::postgres_backend::{AuthType, PostgresBackend};
 
 /// Accept incoming TCP connections and spawn them into a background thread.
-pub fn thread_main(conf: WalAcceptorConf) -> Result<()> {
-    info!("Starting wal acceptor on {}", conf.listen_addr);
-    let listener = TcpListener::bind(conf.listen_addr.clone()).map_err(|e| {
-        error!("failed to bind to address {}: {}", conf.listen_addr, e);
-        e
-    })?;
-
+pub fn thread_main(conf: WalAcceptorConf, listener: TcpListener) -> Result<()> {
     loop {
         match listener.accept() {
             Ok((socket, peer_addr)) => {
@@ -41,8 +35,8 @@ fn handle_socket(socket: TcpStream, conf: WalAcceptorConf) -> Result<()> {
     socket.set_nodelay(true)?;
 
     let mut conn_handler = SendWalHandler::new(conf);
-    let pgbackend = PostgresBackend::new(socket, AuthType::Trust, None)?;
-    // libpq replication protocol between wal_acceptor and replicas/pagers
+    let pgbackend = PostgresBackend::new(socket, AuthType::Trust, None, false)?;
+    // libpq replication protocol between safekeeper and replicas/pagers
     pgbackend.run(&mut conn_handler)?;
 
     Ok(())
diff --git a/zenith/src/main.rs b/zenith/src/main.rs
index 1c04e803e6..e79d42377e 100644
--- a/zenith/src/main.rs
+++ b/zenith/src/main.rs
@@ -32,12 +32,16 @@ struct BranchTreeEl {
 //   * Providing CLI api to the pageserver
 //   * TODO: export/import to/from usual postgres
 fn main() -> Result<()> {
-    let timeline_arg = Arg::with_name("timeline")
-        .short("n")
+    let node_arg = Arg::with_name("node")
         .index(1)
-        .help("Timeline name")
+        .help("Node name")
         .required(true);
 
+    let timeline_arg = Arg::with_name("timeline")
+        .index(2)
+        .help("Branch name or a point-in time specification")
+        .required(false);
+
     let tenantid_arg = Arg::with_name("tenantid")
         .long("tenantid")
         .help("Tenant id. Represented as a hexadecimal string 32 symbols length")
@@ -88,7 +92,12 @@ fn main() -> Result<()> {
         )
         .subcommand(SubCommand::with_name("status"))
         .subcommand(SubCommand::with_name("start").about("Start local pageserver"))
-        .subcommand(SubCommand::with_name("stop").about("Stop local pageserver"))
+        .subcommand(SubCommand::with_name("stop").about("Stop local pageserver")
+                    .arg(Arg::with_name("immediate")
+                    .help("Don't flush repository data at shutdown")
+                    .required(false)
+                    )
+        )
         .subcommand(SubCommand::with_name("restart").about("Restart local pageserver"))
         .subcommand(
             SubCommand::with_name("pg")
@@ -97,7 +106,10 @@ fn main() -> Result<()> {
                 .subcommand(SubCommand::with_name("list").arg(tenantid_arg.clone()))
                 .subcommand(SubCommand::with_name("create")
                     .about("Create a postgres compute node")
-                    .arg(timeline_arg.clone()).arg(tenantid_arg.clone()).arg(port_arg.clone())
+                    .arg(node_arg.clone())
+                    .arg(timeline_arg.clone())
+                    .arg(tenantid_arg.clone())
+                    .arg(port_arg.clone())
                     .arg(
                         Arg::with_name("config-only")
                             .help("Don't do basebackup, create compute node with only config files")
@@ -106,13 +118,13 @@ fn main() -> Result<()> {
                     ))
                 .subcommand(SubCommand::with_name("start")
                     .about("Start a postgres compute node.\n This command actually creates new node from scratch, but preserves existing config files")
-                    .arg(
-                        timeline_arg.clone()
-                    ).arg(
-                        tenantid_arg.clone()
-                    ).arg(port_arg.clone()))
+                    .arg(node_arg.clone())
+                    .arg(timeline_arg.clone())
+                    .arg(tenantid_arg.clone())
+                    .arg(port_arg.clone()))
                 .subcommand(
                     SubCommand::with_name("stop")
+                        .arg(node_arg.clone())
                         .arg(timeline_arg.clone())
                         .arg(tenantid_arg.clone())
                         .arg(
@@ -196,10 +208,12 @@ fn main() -> Result<()> {
             }
         }
 
-        ("stop", Some(_sub_m)) => {
+        ("stop", Some(stop_match)) => {
             let pageserver = PageServerNode::from_env(&env);
 
-            if let Err(e) = pageserver.stop() {
+            let immediate = stop_match.is_present("immediate");
+
+            if let Err(e) = pageserver.stop(immediate) {
                 eprintln!("pageserver stop failed: {}", e);
                 exit(1);
             }
@@ -208,7 +222,8 @@ fn main() -> Result<()> {
         ("restart", Some(_sub_m)) => {
             let pageserver = PageServerNode::from_env(&env);
 
-            if let Err(e) = pageserver.stop() {
+            //TODO what shutdown strategy should we use here?
+            if let Err(e) = pageserver.stop(false) {
                 eprintln!("pageserver stop failed: {}", e);
                 exit(1);
             }
@@ -422,25 +437,32 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
             let tenantid: ZTenantId = list_match
                 .value_of("tenantid")
                 .map_or(Ok(env.tenantid), |value| value.parse())?;
+
             let branch_infos = get_branch_infos(env, &tenantid).unwrap_or_else(|e| {
                 eprintln!("Failed to load branch info: {}", e);
                 HashMap::new()
             });
 
-            println!("BRANCH\tADDRESS\t\tLSN\t\tSTATUS");
-            for ((_, timeline_name), node) in cplane
+            println!("NODE\tADDRESS\t\tBRANCH\tLSN\t\tSTATUS");
+            for ((_, node_name), node) in cplane
                 .nodes
                 .iter()
                 .filter(|((node_tenantid, _), _)| node_tenantid == &tenantid)
             {
+                // FIXME: This shows the LSN at the end of the timeline. It's not the
+                // right thing to do for read-only nodes that might be anchored at an
+                // older point in time, or following but lagging behind the primary.
+                let lsn_str = branch_infos
+                    .get(&node.timelineid)
+                    .map(|bi| bi.latest_valid_lsn.to_string())
+                    .unwrap_or_else(|| "?".to_string());
+
                 println!(
-                    "{}\t{}\t{}\t{}",
-                    timeline_name,
+                    "{}\t{}\t{}\t{}\t{}",
+                    node_name,
                     node.address,
-                    branch_infos
-                        .get(&node.timelineid)
-                        .map(|bi| bi.latest_valid_lsn.to_string())
-                        .unwrap_or_else(|| "?".to_string()),
+                    node.timelineid, // FIXME: resolve human-friendly branch name
+                    lsn_str,
                     node.status(),
                 );
             }
@@ -449,26 +471,28 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
             let tenantid: ZTenantId = create_match
                 .value_of("tenantid")
                 .map_or(Ok(env.tenantid), |value| value.parse())?;
-            let timeline_name = create_match.value_of("timeline").unwrap_or("main");
+            let node_name = create_match.value_of("node").unwrap_or("main");
+            let timeline_name = create_match.value_of("timeline").unwrap_or(node_name);
 
             let port: Option<u16> = match create_match.value_of("port") {
                 Some(p) => Some(p.parse()?),
                 None => None,
             };
-            cplane.new_node(tenantid, timeline_name, port)?;
+            cplane.new_node(tenantid, node_name, timeline_name, port)?;
         }
         ("start", Some(start_match)) => {
             let tenantid: ZTenantId = start_match
                 .value_of("tenantid")
                 .map_or(Ok(env.tenantid), |value| value.parse())?;
-            let timeline_name = start_match.value_of("timeline").unwrap_or("main");
+            let node_name = start_match.value_of("node").unwrap_or("main");
+            let timeline_name = start_match.value_of("timeline");
 
             let port: Option<u16> = match start_match.value_of("port") {
                 Some(p) => Some(p.parse()?),
                 None => None,
             };
 
-            let node = cplane.nodes.get(&(tenantid, timeline_name.to_owned()));
+            let node = cplane.nodes.get(&(tenantid, node_name.to_owned()));
 
             let auth_token = if matches!(env.auth_type, AuthType::ZenithJWT) {
                 let claims = Claims::new(Some(tenantid), Scope::Tenant);
@@ -477,12 +501,11 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
                 None
             };
 
-            println!(
-                "Starting {} postgres on timeline {}...",
-                if node.is_some() { "existing" } else { "new" },
-                timeline_name
-            );
             if let Some(node) = node {
+                if timeline_name.is_some() {
+                    println!("timeline name ignored because node exists already");
+                }
+                println!("Starting existing postgres {}...", node_name);
                 node.start(&auth_token)?;
             } else {
                 // when used with custom port this results in non obvious behaviour
@@ -490,12 +513,17 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
                 // start --port X
                 // stop
                 // start <-- will also use port X even without explicit port argument
-                let node = cplane.new_node(tenantid, timeline_name, port)?;
+                let timeline_name = timeline_name.unwrap_or(node_name);
+                println!(
+                    "Starting new postgres {} on {}...",
+                    node_name, timeline_name
+                );
+                let node = cplane.new_node(tenantid, node_name, timeline_name, port)?;
                 node.start(&auth_token)?;
             }
         }
         ("stop", Some(stop_match)) => {
-            let timeline_name = stop_match.value_of("timeline").unwrap_or("main");
+            let node_name = stop_match.value_of("node").unwrap_or("main");
             let destroy = stop_match.is_present("destroy");
             let tenantid: ZTenantId = stop_match
                 .value_of("tenantid")
@@ -503,8 +531,8 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
 
             let node = cplane
                 .nodes
-                .get(&(tenantid, timeline_name.to_owned()))
-                .ok_or_else(|| anyhow!("postgres {} is not found", timeline_name))?;
+                .get(&(tenantid, node_name.to_owned()))
+                .ok_or_else(|| anyhow!("postgres {} is not found", node_name))?;
             node.stop(destroy)?;
         }
 
diff --git a/zenith_metrics/src/lib.rs b/zenith_metrics/src/lib.rs
index e3c3c81ee7..59a8a31c9e 100644
--- a/zenith_metrics/src/lib.rs
+++ b/zenith_metrics/src/lib.rs
@@ -5,6 +5,8 @@
 use lazy_static::lazy_static;
 use once_cell::race::OnceBox;
 pub use prometheus::{exponential_buckets, linear_buckets};
+pub use prometheus::{register_gauge, Gauge};
+pub use prometheus::{register_gauge_vec, GaugeVec};
 pub use prometheus::{register_histogram, Histogram};
 pub use prometheus::{register_histogram_vec, HistogramVec};
 pub use prometheus::{register_int_counter, IntCounter};
@@ -44,7 +46,7 @@ pub fn set_common_metrics_prefix(prefix: &'static str) {
 }
 
 /// Prepends a prefix to a common metric name so they are distinguished between
-/// different services, see https://github.com/zenithdb/zenith/pull/681
+/// different services, see <https://github.com/zenithdb/zenith/pull/681>
 /// A call to set_common_metrics_prefix() is necessary prior to calling this.
 pub fn new_common_metric_name(unprefixed_metric_name: &str) -> String {
     // Not unwrap() because metrics may be initialized after multiple threads have been started.
diff --git a/zenith_utils/Cargo.toml b/zenith_utils/Cargo.toml
index 22c1c9bab6..6571fae042 100644
--- a/zenith_utils/Cargo.toml
+++ b/zenith_utils/Cargo.toml
@@ -18,12 +18,9 @@ serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
 thiserror = "1.0"
 tokio = "1.11"
-
-slog-async = "2.6.0"
-slog-stdlog = "4.1.0"
-slog-scope = "4.4.0"
-slog-term = "2.8.0"
-slog = "2.7.0"
+tracing = "0.1"
+tracing-log = "0.1"
+tracing-subscriber = "0.2"
 
 zenith_metrics = { path = "../zenith_metrics" }
 workspace_hack = { path = "../workspace_hack" }
diff --git a/zenith_utils/src/http/endpoint.rs b/zenith_utils/src/http/endpoint.rs
index 3c5b53b77a..30e7bfc921 100644
--- a/zenith_utils/src/http/endpoint.rs
+++ b/zenith_utils/src/http/endpoint.rs
@@ -12,8 +12,17 @@ use std::net::TcpListener;
 use zenith_metrics::{new_common_metric_name, register_int_counter, IntCounter};
 use zenith_metrics::{Encoder, TextEncoder};
 
+use std::sync::Mutex;
+use tokio::sync::oneshot::Sender;
+
 use super::error::ApiError;
 
+lazy_static! {
+    /// Channel used to send shutdown signal - wrapped in an Option to allow
+    /// it to be taken by value (since oneshot channels consume themselves on send)
+    static ref SHUTDOWN_SENDER: Mutex<Option<Sender<()>>> = Mutex::new(None);
+}
+
 lazy_static! {
     static ref SERVE_METRICS_COUNT: IntCounter = register_int_counter!(
         new_common_metric_name("serve_metrics_count"),
@@ -143,11 +152,18 @@ pub fn check_permission(req: &Request<Body>, tenantid: Option<ZTenantId>) -> Res
     }
 }
 
+// Send shutdown signal
+pub fn shutdown() {
+    if let Some(tx) = SHUTDOWN_SENDER.lock().unwrap().take() {
+        let _ = tx.send(());
+    }
+}
+
 pub fn serve_thread_main(
     router_builder: RouterBuilder<hyper::Body, ApiError>,
     listener: TcpListener,
 ) -> anyhow::Result<()> {
-    log::info!("Starting a http endoint at {}", listener.local_addr()?);
+    log::info!("Starting a http endpoint at {}", listener.local_addr()?);
 
     // Create a Service from the router above to handle incoming requests.
     let service = RouterService::new(router_builder.build().map_err(|err| anyhow!(err))?).unwrap();
@@ -159,7 +175,14 @@ pub fn serve_thread_main(
 
     let _guard = runtime.enter();
 
-    let server = Server::from_tcp(listener)?.serve(service);
+    let (send, recv) = tokio::sync::oneshot::channel::<()>();
+    *SHUTDOWN_SENDER.lock().unwrap() = Some(send);
+
+    let server = Server::from_tcp(listener)?
+        .serve(service)
+        .with_graceful_shutdown(async {
+            recv.await.ok();
+        });
 
     runtime.block_on(server)?;
 
diff --git a/zenith_utils/src/http/mod.rs b/zenith_utils/src/http/mod.rs
index b6740ad543..16b7e87721 100644
--- a/zenith_utils/src/http/mod.rs
+++ b/zenith_utils/src/http/mod.rs
@@ -1,3 +1,4 @@
 pub mod endpoint;
 pub mod error;
 pub mod json;
+pub mod request;
diff --git a/zenith_utils/src/http/request.rs b/zenith_utils/src/http/request.rs
new file mode 100644
index 0000000000..3bc8993c26
--- /dev/null
+++ b/zenith_utils/src/http/request.rs
@@ -0,0 +1,33 @@
+use std::str::FromStr;
+
+use super::error::ApiError;
+use hyper::{Body, Request};
+use routerify::ext::RequestExt;
+
+pub fn get_request_param<'a>(
+    request: &'a Request<Body>,
+    param_name: &str,
+) -> Result<&'a str, ApiError> {
+    match request.param(param_name) {
+        Some(arg) => Ok(arg),
+        None => {
+            return Err(ApiError::BadRequest(format!(
+                "no {} specified in path param",
+                param_name
+            )))
+        }
+    }
+}
+
+pub fn parse_request_param<T: FromStr>(
+    request: &Request<Body>,
+    param_name: &str,
+) -> Result<T, ApiError> {
+    match get_request_param(request, param_name)?.parse() {
+        Ok(v) => Ok(v),
+        Err(_) => Err(ApiError::BadRequest(format!(
+            "failed to parse {}",
+            param_name
+        ))),
+    }
+}
diff --git a/zenith_utils/src/lib.rs b/zenith_utils/src/lib.rs
index ca26be5df2..96b3cf5066 100644
--- a/zenith_utils/src/lib.rs
+++ b/zenith_utils/src/lib.rs
@@ -8,6 +8,9 @@ pub mod lsn;
 /// SeqWait allows waiting for a future sequence number to arrive
 pub mod seqwait;
 
+/// append only ordered map implemented with a Vec
+pub mod vec_map;
+
 // Async version of SeqWait. Currently unused.
 // pub mod seqwait_async;
 
diff --git a/zenith_utils/src/logging.rs b/zenith_utils/src/logging.rs
index c6ed35cbf4..53dbfc305d 100644
--- a/zenith_utils/src/logging.rs
+++ b/zenith_utils/src/logging.rs
@@ -1,4 +1,3 @@
-use slog::{Drain, Level};
 use std::{
     fs::{File, OpenOptions},
     path::Path,
@@ -6,10 +5,12 @@ use std::{
 
 use anyhow::{Context, Result};
 
-pub fn init(
-    log_filename: impl AsRef<Path>,
-    daemonize: bool,
-) -> Result<(slog_scope::GlobalLoggerGuard, File)> {
+use tracing::subscriber::set_global_default;
+use tracing_log::LogTracer;
+use tracing_subscriber::fmt;
+use tracing_subscriber::{layer::SubscriberExt, EnvFilter, Registry};
+
+pub fn init(log_filename: impl AsRef<Path>, daemonize: bool) -> Result<File> {
     // Don't open the same file for output multiple times;
     // the different fds could overwrite each other's output.
     let log_file = OpenOptions::new()
@@ -18,30 +19,38 @@ pub fn init(
         .open(&log_filename)
         .with_context(|| format!("failed to open {:?}", log_filename.as_ref()))?;
 
+    let default_filter_str = "info";
+
+    // We fall back to printing all spans at info-level or above if
+    // the RUST_LOG environment variable is not set.
+    let env_filter =
+        EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(default_filter_str));
+
     // we are cloning and returning log file in order to allow redirecting daemonized stdout and stderr to it
     // if we do not use daemonization (e.g. in docker) it is better to log to stdout directly
     // for example to be in line with docker log command which expects logs comimg from stdout
-    let guard = if daemonize {
-        let decorator = slog_term::PlainSyncDecorator::new(log_file.try_clone()?);
-        let drain = slog_term::FullFormat::new(decorator)
-            .build()
-            .filter_level(Level::Info)
-            .fuse();
-        let logger = slog::Logger::root(drain, slog::o!());
-        slog_scope::set_global_logger(logger)
+    //
+    // TODO: perhaps use a more human-readable format when !daemonize
+    if daemonize {
+        let x = log_file.try_clone().unwrap();
+
+        let fmt_layer = fmt::layer()
+            .pretty()
+            .with_target(false) // don't include event targets
+            .with_ansi(false) // don't use colors in log file
+            .with_writer(move || x.try_clone().unwrap());
+        let subscriber = Registry::default().with(env_filter).with(fmt_layer);
+
+        set_global_default(subscriber).expect("Failed to set subscriber");
     } else {
-        let decorator = slog_term::TermDecorator::new().build();
-        let drain = slog_term::FullFormat::new(decorator)
-            .build()
-            .filter_level(Level::Info)
-            .fuse();
-        let drain = slog_async::Async::new(drain).chan_size(1000).build().fuse();
-        let logger = slog::Logger::root(drain, slog::o!());
-        slog_scope::set_global_logger(logger)
-    };
+        let fmt_layer = fmt::layer().with_target(false); // don't include event targets
+        let subscriber = Registry::default().with(env_filter).with(fmt_layer);
 
-    // initialise forwarding of std log calls
-    slog_stdlog::init()?;
+        set_global_default(subscriber).expect("Failed to set subscriber");
+    }
 
-    Ok((guard, log_file))
+    // Redirect all `log`'s events to our subscriber
+    LogTracer::init().expect("Failed to set logger");
+
+    Ok(log_file)
 }
diff --git a/zenith_utils/src/postgres_backend.rs b/zenith_utils/src/postgres_backend.rs
index b2e0a1a525..02eb330f3b 100644
--- a/zenith_utils/src/postgres_backend.rs
+++ b/zenith_utils/src/postgres_backend.rs
@@ -13,7 +13,11 @@ use serde::{Deserialize, Serialize};
 use std::io::{self, Write};
 use std::net::{Shutdown, SocketAddr, TcpStream};
 use std::str::FromStr;
+use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::Arc;
+use std::time::Duration;
+
+static PGBACKEND_SHUTDOWN_REQUESTED: AtomicBool = AtomicBool::new(false);
 
 pub trait Handler {
     /// Handle single query.
@@ -135,13 +139,32 @@ pub fn query_from_cstring(query_string: Bytes) -> Vec<u8> {
     query_string
 }
 
+// Helper function for socket read loops
+pub fn is_socket_read_timed_out(error: &anyhow::Error) -> bool {
+    for cause in error.chain() {
+        if let Some(io_error) = cause.downcast_ref::<io::Error>() {
+            if io_error.kind() == std::io::ErrorKind::WouldBlock {
+                return true;
+            }
+        }
+    }
+    false
+}
+
 impl PostgresBackend {
     pub fn new(
         socket: TcpStream,
         auth_type: AuthType,
         tls_config: Option<Arc<rustls::ServerConfig>>,
+        set_read_timeout: bool,
     ) -> io::Result<Self> {
         let peer_addr = socket.peer_addr()?;
+        if set_read_timeout {
+            socket
+                .set_read_timeout(Some(Duration::from_secs(5)))
+                .unwrap();
+        }
+
         Ok(Self {
             stream: Some(Stream::Bidirectional(BidiStream::from_tcp(socket))),
             buf_out: BytesMut::with_capacity(10 * 1024),
@@ -229,12 +252,26 @@ impl PostgresBackend {
 
         let mut unnamed_query_string = Bytes::new();
 
-        while let Some(msg) = self.read_message()? {
-            trace!("got message {:?}", msg);
+        while !PGBACKEND_SHUTDOWN_REQUESTED.load(Ordering::Relaxed) {
+            match self.read_message() {
+                Ok(message) => {
+                    if let Some(msg) = message {
+                        trace!("got message {:?}", msg);
 
-            match self.process_message(handler, msg, &mut unnamed_query_string)? {
-                ProcessMsgResult::Continue => continue,
-                ProcessMsgResult::Break => break,
+                        match self.process_message(handler, msg, &mut unnamed_query_string)? {
+                            ProcessMsgResult::Continue => continue,
+                            ProcessMsgResult::Break => break,
+                        }
+                    } else {
+                        break;
+                    }
+                }
+                Err(e) => {
+                    // If it is a timeout error, continue the loop
+                    if !is_socket_read_timed_out(&e) {
+                        return Err(e);
+                    }
+                }
             }
         }
 
@@ -427,3 +464,8 @@ impl PostgresBackend {
         Ok(ProcessMsgResult::Continue)
     }
 }
+
+// Set the flag to inform connections to cancel
+pub fn set_pgbackend_shutdown_requested() {
+    PGBACKEND_SHUTDOWN_REQUESTED.swap(true, Ordering::Relaxed);
+}
diff --git a/zenith_utils/src/pq_proto.rs b/zenith_utils/src/pq_proto.rs
index 12e08737bf..1941784332 100644
--- a/zenith_utils/src/pq_proto.rs
+++ b/zenith_utils/src/pq_proto.rs
@@ -15,8 +15,9 @@ use std::str;
 pub type Oid = u32;
 pub type SystemId = u64;
 
-pub const TEXT_OID: Oid = 25;
 pub const INT8_OID: Oid = 20;
+pub const INT4_OID: Oid = 23;
+pub const TEXT_OID: Oid = 25;
 
 #[derive(Debug)]
 pub enum FeMessage {
diff --git a/zenith_utils/src/vec_map.rs b/zenith_utils/src/vec_map.rs
new file mode 100644
index 0000000000..4e2c827b47
--- /dev/null
+++ b/zenith_utils/src/vec_map.rs
@@ -0,0 +1,293 @@
+use std::{cmp::Ordering, ops::RangeBounds};
+
+use serde::{Deserialize, Serialize};
+
+/// Ordered map datastructure implemented in a Vec.
+/// Append only - can only add keys that are larger than the
+/// current max key.
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct VecMap<K, V>(Vec<(K, V)>);
+
+impl<K, V> Default for VecMap<K, V> {
+    fn default() -> Self {
+        VecMap(Default::default())
+    }
+}
+
+#[derive(Debug)]
+pub struct InvalidKey;
+
+impl<K: Ord, V> VecMap<K, V> {
+    pub fn is_empty(&self) -> bool {
+        self.0.is_empty()
+    }
+
+    pub fn as_slice(&self) -> &[(K, V)] {
+        self.0.as_slice()
+    }
+
+    /// This function may panic if given a range where the lower bound is
+    /// greater than the upper bound.
+    pub fn slice_range<R: RangeBounds<K>>(&self, range: R) -> &[(K, V)] {
+        use std::ops::Bound::*;
+
+        let binary_search = |k: &K| self.0.binary_search_by_key(&k, extract_key);
+
+        let start_idx = match range.start_bound() {
+            Unbounded => 0,
+            Included(k) => binary_search(k).unwrap_or_else(std::convert::identity),
+            Excluded(k) => match binary_search(k) {
+                Ok(idx) => idx + 1,
+                Err(idx) => idx,
+            },
+        };
+
+        let end_idx = match range.end_bound() {
+            Unbounded => self.0.len(),
+            Included(k) => match binary_search(k) {
+                Ok(idx) => idx + 1,
+                Err(idx) => idx,
+            },
+            Excluded(k) => binary_search(k).unwrap_or_else(std::convert::identity),
+        };
+
+        &self.0[start_idx..end_idx]
+    }
+
+    /// Add a key value pair to the map.
+    /// If `key` is less than or equal to the current maximum key
+    /// the pair will not be added and InvalidKey error will be returned.
+    pub fn append(&mut self, key: K, value: V) -> Result<(), InvalidKey> {
+        if let Some((last_key, _last_value)) = self.0.last() {
+            if &key <= last_key {
+                return Err(InvalidKey);
+            }
+        }
+
+        self.0.push((key, value));
+        Ok(())
+    }
+
+    /// Update the maximum key value pair or add a new key value pair to the map.
+    /// If `key` is less than the current maximum key no updates or additions
+    /// will occur and InvalidKey error will be returned.
+    pub fn append_or_update_last(&mut self, key: K, mut value: V) -> Result<Option<V>, InvalidKey> {
+        if let Some((last_key, last_value)) = self.0.last_mut() {
+            match key.cmp(last_key) {
+                Ordering::Less => return Err(InvalidKey),
+                Ordering::Equal => {
+                    std::mem::swap(last_value, &mut value);
+                    return Ok(Some(value));
+                }
+                Ordering::Greater => {}
+            }
+        }
+
+        self.0.push((key, value));
+        Ok(None)
+    }
+
+    /// Split the map into two.
+    ///
+    /// The left map contains everything before `cutoff` (exclusive).
+    /// Right map contains `cutoff` and everything after (inclusive).
+    pub fn split_at(&self, cutoff: &K) -> (Self, Self)
+    where
+        K: Clone,
+        V: Clone,
+    {
+        let split_idx = self
+            .0
+            .binary_search_by_key(&cutoff, extract_key)
+            .unwrap_or_else(std::convert::identity);
+
+        (
+            VecMap(self.0[..split_idx].to_vec()),
+            VecMap(self.0[split_idx..].to_vec()),
+        )
+    }
+
+    /// Move items from `other` to the end of `self`, leaving `other` empty.
+    /// If any keys in `other` is less than or equal to any key in `self`,
+    /// `InvalidKey` error will be returned and no mutation will occur.
+    pub fn extend(&mut self, other: &mut Self) -> Result<(), InvalidKey> {
+        let self_last_opt = self.0.last().map(extract_key);
+        let other_first_opt = other.0.last().map(extract_key);
+
+        if let (Some(self_last), Some(other_first)) = (self_last_opt, other_first_opt) {
+            if self_last >= other_first {
+                return Err(InvalidKey);
+            }
+        }
+
+        self.0.append(&mut other.0);
+
+        Ok(())
+    }
+}
+
+fn extract_key<K, V>(entry: &(K, V)) -> &K {
+    &entry.0
+}
+
+#[cfg(test)]
+mod tests {
+    use std::{collections::BTreeMap, ops::Bound};
+
+    use super::VecMap;
+
+    #[test]
+    fn unbounded_range() {
+        let mut vec = VecMap::default();
+        vec.append(0, ()).unwrap();
+
+        assert_eq!(vec.slice_range(0..0), &[]);
+    }
+
+    #[test]
+    #[should_panic]
+    fn invalid_ordering_range() {
+        let mut vec = VecMap::default();
+        vec.append(0, ()).unwrap();
+
+        #[allow(clippy::reversed_empty_ranges)]
+        vec.slice_range(1..0);
+    }
+
+    #[test]
+    fn range_tests() {
+        let mut vec = VecMap::default();
+        vec.append(0, ()).unwrap();
+        vec.append(2, ()).unwrap();
+        vec.append(4, ()).unwrap();
+
+        assert_eq!(vec.slice_range(0..0), &[]);
+        assert_eq!(vec.slice_range(0..1), &[(0, ())]);
+        assert_eq!(vec.slice_range(0..2), &[(0, ())]);
+        assert_eq!(vec.slice_range(0..3), &[(0, ()), (2, ())]);
+
+        assert_eq!(vec.slice_range(..0), &[]);
+        assert_eq!(vec.slice_range(..1), &[(0, ())]);
+
+        assert_eq!(vec.slice_range(..3), &[(0, ()), (2, ())]);
+        assert_eq!(vec.slice_range(..3), &[(0, ()), (2, ())]);
+
+        assert_eq!(vec.slice_range(0..=0), &[(0, ())]);
+        assert_eq!(vec.slice_range(0..=1), &[(0, ())]);
+        assert_eq!(vec.slice_range(0..=2), &[(0, ()), (2, ())]);
+        assert_eq!(vec.slice_range(0..=3), &[(0, ()), (2, ())]);
+
+        assert_eq!(vec.slice_range(..=0), &[(0, ())]);
+        assert_eq!(vec.slice_range(..=1), &[(0, ())]);
+        assert_eq!(vec.slice_range(..=2), &[(0, ()), (2, ())]);
+        assert_eq!(vec.slice_range(..=3), &[(0, ()), (2, ())]);
+    }
+
+    struct BoundIter {
+        min: i32,
+        max: i32,
+
+        next: Option<Bound<i32>>,
+    }
+
+    impl BoundIter {
+        fn new(min: i32, max: i32) -> Self {
+            Self {
+                min,
+                max,
+
+                next: Some(Bound::Unbounded),
+            }
+        }
+    }
+
+    impl Iterator for BoundIter {
+        type Item = Bound<i32>;
+
+        fn next(&mut self) -> Option<Self::Item> {
+            let cur = self.next?;
+
+            self.next = match &cur {
+                Bound::Unbounded => Some(Bound::Included(self.min)),
+                Bound::Included(x) => {
+                    if *x >= self.max {
+                        Some(Bound::Excluded(self.min))
+                    } else {
+                        Some(Bound::Included(x + 1))
+                    }
+                }
+                Bound::Excluded(x) => {
+                    if *x >= self.max {
+                        None
+                    } else {
+                        Some(Bound::Excluded(x + 1))
+                    }
+                }
+            };
+
+            Some(cur)
+        }
+    }
+
+    #[test]
+    fn range_exhaustive() {
+        let map: BTreeMap<i32, ()> = (1..=7).step_by(2).map(|x| (x, ())).collect();
+        let mut vec = VecMap::default();
+        for &key in map.keys() {
+            vec.append(key, ()).unwrap();
+        }
+
+        const RANGE_MIN: i32 = 0;
+        const RANGE_MAX: i32 = 8;
+        for lower_bound in BoundIter::new(RANGE_MIN, RANGE_MAX) {
+            let ub_min = match lower_bound {
+                Bound::Unbounded => RANGE_MIN,
+                Bound::Included(x) => x,
+                Bound::Excluded(x) => x + 1,
+            };
+            for upper_bound in BoundIter::new(ub_min, RANGE_MAX) {
+                let map_range: Vec<(i32, ())> = map
+                    .range((lower_bound, upper_bound))
+                    .map(|(&x, _)| (x, ()))
+                    .collect();
+                let vec_slice = vec.slice_range((lower_bound, upper_bound));
+
+                assert_eq!(map_range, vec_slice);
+            }
+        }
+    }
+
+    #[test]
+    fn extend() {
+        let mut left = VecMap::default();
+        left.append(0, ()).unwrap();
+        assert_eq!(left.as_slice(), &[(0, ())]);
+
+        let mut empty = VecMap::default();
+        left.extend(&mut empty).unwrap();
+        assert_eq!(left.as_slice(), &[(0, ())]);
+        assert_eq!(empty.as_slice(), &[]);
+
+        let mut right = VecMap::default();
+        right.append(1, ()).unwrap();
+
+        left.extend(&mut right).unwrap();
+
+        assert_eq!(left.as_slice(), &[(0, ()), (1, ())]);
+        assert_eq!(right.as_slice(), &[]);
+
+        let mut zero_map = VecMap::default();
+        zero_map.append(0, ()).unwrap();
+
+        left.extend(&mut zero_map).unwrap_err();
+        assert_eq!(left.as_slice(), &[(0, ()), (1, ())]);
+        assert_eq!(zero_map.as_slice(), &[(0, ())]);
+
+        let mut one_map = VecMap::default();
+        one_map.append(1, ()).unwrap();
+
+        left.extend(&mut one_map).unwrap_err();
+        assert_eq!(left.as_slice(), &[(0, ()), (1, ())]);
+        assert_eq!(one_map.as_slice(), &[(1, ())]);
+    }
+}
diff --git a/zenith_utils/tests/ssl_test.rs b/zenith_utils/tests/ssl_test.rs
index ba0f63d6ec..2a597700ae 100644
--- a/zenith_utils/tests/ssl_test.rs
+++ b/zenith_utils/tests/ssl_test.rs
@@ -110,7 +110,7 @@ fn ssl() {
         .unwrap();
     let tls_config = Some(Arc::new(cfg));
 
-    let pgb = PostgresBackend::new(server_sock, AuthType::Trust, tls_config).unwrap();
+    let pgb = PostgresBackend::new(server_sock, AuthType::Trust, tls_config, true).unwrap();
     pgb.run(&mut handler).unwrap();
     assert!(handler.got_query);
 
@@ -150,7 +150,7 @@ fn no_ssl() {
 
     let mut handler = TestHandler;
 
-    let pgb = PostgresBackend::new(server_sock, AuthType::Trust, None).unwrap();
+    let pgb = PostgresBackend::new(server_sock, AuthType::Trust, None, true).unwrap();
     pgb.run(&mut handler).unwrap();
 
     client_jh.join().unwrap();
@@ -214,7 +214,7 @@ fn server_forces_ssl() {
         .unwrap();
     let tls_config = Some(Arc::new(cfg));
 
-    let pgb = PostgresBackend::new(server_sock, AuthType::Trust, tls_config).unwrap();
+    let pgb = PostgresBackend::new(server_sock, AuthType::Trust, tls_config, true).unwrap();
     let res = pgb.run(&mut handler).unwrap_err();
     assert_eq!("client did not connect with TLS", format!("{}", res));