Merged with main.

2026-06-02 13:00:37 +00:00 · 2022-06-02 00:31:41 +02:00
parent 6ec80c0015 9ec312ce98
commit 2bfef5514e
114 changed files with 2808 additions and 1124 deletions
--- a/.circleci/ansible/get_binaries.sh
+++ b/.circleci/ansible/get_binaries.sh
@@ -6,7 +6,7 @@ RELEASE=${RELEASE:-false}

 # look at docker hub for latest tag for neon docker image
 if [ "${RELEASE}" = "true" ]; then
-    echo "search latest relase tag"
+    echo "search latest release tag"
    VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/neondatabase/neon/tags |jq -r -S '.[].name' | grep release | sed 's/release-//g' | grep -E '^[0-9]+$' | sort -n | tail -1)
    if [ -z "${VERSION}" ]; then
        echo "no any docker tags found, exiting..."
@@ -31,7 +31,7 @@ echo "found ${VERSION}"
 rm -rf neon_install postgres_install.tar.gz neon_install.tar.gz .neon_current_version
 mkdir neon_install

-# retrive binaries from docker image
+# retrieve binaries from docker image
 echo "getting binaries from docker image"
 docker pull --quiet neondatabase/neon:${TAG}
 ID=$(docker create neondatabase/neon:${TAG})
--- a/.circleci/ansible/staging.hosts
+++ b/.circleci/ansible/staging.hosts
@@ -3,7 +3,6 @@
 zenith-us-stage-ps-2 console_region_id=27

 [safekeepers]
-zenith-us-stage-sk-1 console_region_id=27
 zenith-us-stage-sk-4 console_region_id=27
 zenith-us-stage-sk-5 console_region_id=27
 zenith-us-stage-sk-6 console_region_id=27
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -453,9 +453,6 @@ jobs:
      - checkout
      - setup_remote_docker:
          docker_layer_caching: true
-      # Build neondatabase/compute-tools:latest image and push it to Docker hub
-      # TODO: this should probably also use versioned tag, not just :latest.
-      # XXX: but should it? We build and use it only locally now.
      - run:
          name: Build and push compute-tools Docker image
          command: |
@@ -463,7 +460,10 @@ jobs:
            docker build \
              --build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
              --build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
-              --tag neondatabase/compute-tools:latest -f Dockerfile.compute-tools .
+              --tag neondatabase/compute-tools:local \
+              --tag neondatabase/compute-tools:latest \
+              -f Dockerfile.compute-tools .
+            # Only push :latest image
            docker push neondatabase/compute-tools:latest
      - run:
          name: Init postgres submodule
@@ -473,7 +473,9 @@ jobs:
          command: |
            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
            DOCKER_TAG=$(git log --oneline|wc -l)
-            docker build --tag neondatabase/compute-node:${DOCKER_TAG} --tag neondatabase/compute-node:latest vendor/postgres
+            docker build --tag neondatabase/compute-node:${DOCKER_TAG} \
+              --tag neondatabase/compute-node:latest vendor/postgres \
+              --build-arg COMPUTE_TOOLS_TAG=local
            docker push neondatabase/compute-node:${DOCKER_TAG}
            docker push neondatabase/compute-node:latest

@@ -510,9 +512,6 @@ jobs:
      - checkout
      - setup_remote_docker:
          docker_layer_caching: true
-      # Build neondatabase/compute-tools:release image and push it to Docker hub
-      # TODO: this should probably also use versioned tag, not just :latest.
-      # XXX: but should it? We build and use it only locally now.
      - run:
          name: Build and push compute-tools Docker image
          command: |
@@ -520,7 +519,10 @@ jobs:
            docker build \
              --build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
              --build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
-              --tag neondatabase/compute-tools:release -f Dockerfile.compute-tools .
+              --tag neondatabase/compute-tools:release \
+              --tag neondatabase/compute-tools:local \
+              -f Dockerfile.compute-tools .
+            # Only push :release image
            docker push neondatabase/compute-tools:release
      - run:
          name: Init postgres submodule
@@ -530,7 +532,9 @@ jobs:
          command: |
            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
            DOCKER_TAG="release-$(git log --oneline|wc -l)"
-            docker build --tag neondatabase/compute-node:${DOCKER_TAG} --tag neondatabase/compute-node:release vendor/postgres
+            docker build --tag neondatabase/compute-node:${DOCKER_TAG} \
+              --tag neondatabase/compute-node:release vendor/postgres \
+              --build-arg COMPUTE_TOOLS_TAG=local
            docker push neondatabase/compute-node:${DOCKER_TAG}
            docker push neondatabase/compute-node:release

--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -19,7 +19,7 @@ jobs:
  bench:
    # this workflow runs on self hosteed runner
    # it's environment is quite different from usual guthub runner
-    # probably the most important difference is that it doesnt start from clean workspace each time
+    # probably the most important difference is that it doesn't start from clean workspace each time
    # e g if you install system packages they are not cleaned up since you install them directly in host machine
    # not a container or something
    # See documentation for more info: https://docs.github.com/en/actions/hosting-your-own-runners/about-self-hosted-runners
--- a/.github/workflows/testing.yml
+++ b/.github/workflows/testing.yml
@@ -40,11 +40,11 @@ jobs:
        if: matrix.os == 'ubuntu-latest'
        run: |
          sudo apt update
-          sudo apt install build-essential libreadline-dev zlib1g-dev flex bison libseccomp-dev
+          sudo apt install build-essential libreadline-dev zlib1g-dev flex bison libseccomp-dev libssl-dev

-      - name: Install macOs postgres dependencies
+      - name: Install macOS postgres dependencies
        if: matrix.os == 'macos-latest'
-        run: brew install flex bison
+        run: brew install flex bison openssl

      - name: Set pg revision for caching
        id: pg_ver
@@ -58,10 +58,27 @@ jobs:
            tmp_install/
          key: ${{ runner.os }}-pg-${{ steps.pg_ver.outputs.pg_rev }}

+      - name: Set extra env for macOS
+        if: matrix.os == 'macos-latest'
+        run: |
+          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
+          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
+
      - name: Build postgres
        if: steps.cache_pg.outputs.cache-hit != 'true'
        run: make postgres

+      # Plain configure output can contain weird errors like 'error: C compiler cannot create executables'
+      # and the real cause will be inside config.log
+      - name: Print configure logs in case of failure
+        if: failure()
+        continue-on-error: true
+        run: |
+          echo '' && echo '=== config.log ===' && echo ''
+          cat tmp_install/build/config.log
+          echo '' && echo '=== configure.log ===' && echo ''
+          cat tmp_install/build/configure.log
+
      - name: Cache cargo deps
        id: cache_cargo
        uses: actions/cache@v2
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -363,6 +363,16 @@ dependencies = [
 "textwrap 0.14.2",
 ]

+[[package]]
+name = "close_fds"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3bc416f33de9d59e79e57560f450d21ff8393adcf1cdfc3e6d8fb93d5f88a2ed"
+dependencies = [
+ "cfg-if",
+ "libc",
+]
+
 [[package]]
 name = "cmake"
 version = "0.1.48"
@@ -1789,6 +1799,7 @@ dependencies = [
 "bytes",
 "chrono",
 "clap 3.0.14",
+ "close_fds",
 "const_format",
 "crc32c",
 "crossbeam-utils",
--- a/2
+++ b/2
@@ -25,7 +25,7 @@ COPY --from=pg-build /pg/tmp_install/include/postgresql/server tmp_install/inclu
 COPY . .

 # Show build caching stats to check if it was used in the end.
-# Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, loosing the compilation stats.
+# Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
 RUN set -e \
    && sudo -E "PATH=$PATH" mold -run cargo build --release \
    && cachepot -s
--- a/Dockerfile.alpine
+++ b/Dockerfile.alpine
@@ -1,95 +0,0 @@
-#
-# Docker image for console integration testing.
-#
-# We may also reuse it in CI to unify installation process and as a general binaries building
-# tool for production servers.
-#
-# Dynamic linking is used for librocksdb and libstdc++ bacause librocksdb-sys calls
-# bindgen with "dynamic" feature flag. This also prevents usage of dockerhub alpine-rust
-# images which are statically linked and have guards against any dlopen. I would rather
-# prefer all static binaries so we may change the way librocksdb-sys builds or wait until
-# we will have our own storage and drop rockdb dependency.
-#
-# Cargo-chef is used to separate dependencies building from main binaries building. This
-# way `docker build` will download and install dependencies only of there are changes to
-# out Cargo.toml files.
-#
-
-
-#
-# build postgres separately -- this layer will be rebuilt only if one of
-# mentioned paths will get any changes
-#
-FROM alpine:3.13 as pg-build
-RUN apk add --update clang llvm compiler-rt compiler-rt-static lld musl-dev binutils \
-                     make bison flex readline-dev zlib-dev perl linux-headers libseccomp-dev
-WORKDIR zenith
-COPY ./vendor/postgres vendor/postgres
-COPY ./Makefile Makefile
-# Build using clang and lld
-RUN CC='clang' LD='lld' CFLAGS='-fuse-ld=lld --rtlib=compiler-rt' make postgres -j4
-
-#
-# Calculate cargo dependencies.
-# This will always run, but only generate recipe.json with list of dependencies without
-# installing them.
-#
-FROM alpine:20210212 as cargo-deps-inspect
-RUN apk add --update rust cargo
-RUN cargo install cargo-chef
-WORKDIR zenith
-COPY . .
-RUN cargo chef prepare --recipe-path recipe.json
-
-#
-# Build cargo dependencies.
-# This temp cantainner would be build only if recipe.json was changed.
-#
-FROM alpine:20210212 as deps-build
-RUN apk add --update rust cargo openssl-dev clang build-base
-# rust-rocksdb can be built against system-wide rocksdb -- that saves about
-# 10 minutes during build. Rocksdb apk package is in testing now, but use it
-# anyway. In case of any troubles we can download and build rocksdb here manually
-# (to cache it as a docker layer).
-RUN apk --no-cache --update --repository https://dl-cdn.alpinelinux.org/alpine/edge/testing add rocksdb-dev
-WORKDIR zenith
-COPY --from=pg-build /zenith/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
-COPY --from=cargo-deps-inspect /root/.cargo/bin/cargo-chef /root/.cargo/bin/
-COPY --from=cargo-deps-inspect /zenith/recipe.json recipe.json
-RUN ROCKSDB_LIB_DIR=/usr/lib/ cargo chef cook --release --recipe-path recipe.json
-
-#
-# Build zenith binaries
-#
-FROM alpine:20210212 as build
-RUN apk add --update rust cargo openssl-dev clang build-base
-RUN apk --no-cache --update --repository https://dl-cdn.alpinelinux.org/alpine/edge/testing add rocksdb-dev
-WORKDIR zenith
-COPY . .
-# Copy cached dependencies
-COPY --from=pg-build /zenith/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
-COPY --from=deps-build /zenith/target target
-COPY --from=deps-build /root/.cargo /root/.cargo
-RUN cargo build --release
-
-#
-# Copy binaries to resulting image.
-# build-base hare to provide libstdc++ (it will also bring gcc, but leave it this way until we figure
-# out how to statically link rocksdb or avoid it at all).
-#
-FROM alpine:3.13
-RUN apk add --update openssl build-base libseccomp-dev
-RUN apk --no-cache --update --repository https://dl-cdn.alpinelinux.org/alpine/edge/testing add rocksdb
-COPY --from=build /zenith/target/release/pageserver /usr/local/bin
-COPY --from=build /zenith/target/release/safekeeper /usr/local/bin
-COPY --from=build /zenith/target/release/proxy /usr/local/bin
-COPY --from=pg-build /zenith/tmp_install /usr/local
-COPY docker-entrypoint.sh /docker-entrypoint.sh
-
-RUN addgroup zenith && adduser -h /data -D -G zenith zenith
-VOLUME ["/data"]
-WORKDIR /data
-USER zenith
-EXPOSE 6400
-ENTRYPOINT ["/docker-entrypoint.sh"]
-CMD ["pageserver"]
--- a/12
+++ b/12
@@ -26,7 +26,7 @@ endif
 # macOS with brew-installed openssl requires explicit paths
 UNAME_S := $(shell uname -s)
 ifeq ($(UNAME_S),Darwin)
-    PG_CONFIGURE_OPTS += --with-includes=/usr/local/opt/openssl/include --with-libraries=/usr/local/opt/openssl/lib
+    PG_CONFIGURE_OPTS += --with-includes=$(HOMEBREW_PREFIX)/opt/openssl/include --with-libraries=$(HOMEBREW_PREFIX)/opt/openssl/lib
 endif

 # Choose whether we should be silent or verbose
@@ -74,16 +74,16 @@ postgres-headers: postgres-configure
 	+@echo "Installing PostgreSQL headers"
 	$(MAKE) -C tmp_install/build/src/include MAKELEVEL=0 install

-# Compile and install PostgreSQL and contrib/zenith
+# Compile and install PostgreSQL and contrib/neon
 .PHONY: postgres
 postgres: postgres-configure \
 		  postgres-headers # to prevent `make install` conflicts with zenith's `postgres-headers`
 	+@echo "Compiling PostgreSQL"
 	$(MAKE) -C tmp_install/build MAKELEVEL=0 install
-	+@echo "Compiling contrib/zenith"
-	$(MAKE) -C tmp_install/build/contrib/zenith install
-	+@echo "Compiling contrib/zenith_test_utils"
-	$(MAKE) -C tmp_install/build/contrib/zenith_test_utils install
+	+@echo "Compiling contrib/neon"
+	$(MAKE) -C tmp_install/build/contrib/neon install
+	+@echo "Compiling contrib/neon_test_utils"
+	$(MAKE) -C tmp_install/build/contrib/neon_test_utils install
 	+@echo "Compiling pg_buffercache"
 	$(MAKE) -C tmp_install/build/contrib/pg_buffercache install
 	+@echo "Compiling pageinspect"
--- a/README.md
+++ b/README.md
@@ -5,6 +5,11 @@ Neon is a serverless open source alternative to AWS Aurora Postgres. It separate
 The project used to be called "Zenith". Many of the commands and code comments
 still refer to "zenith", but we are in the process of renaming things.

+## Quick start
+[Join the waitlist](https://neon.tech/) for our free tier to receive your serverless postgres instance. Then connect to it with your preferred postgres client (psql, dbeaver, etc) or use the online SQL editor.
+
+Alternatively, compile and run the project [locally](#running-local-installation).
+
 ## Architecture overview

 A Neon installation consists of compute nodes and Neon storage engine.
@@ -24,13 +29,18 @@ Pageserver consists of:
 ## Running local installation


-#### building on Ubuntu/ Debian (Linux)
+#### building on Linux
 1. Install build dependencies and other useful packages

-On Ubuntu or Debian this set of packages should be sufficient to build the code:
-```text
+* On Ubuntu or Debian this set of packages should be sufficient to build the code:
+```bash
 apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libseccomp-dev \
-libssl-dev clang pkg-config libpq-dev libprotobuf-dev etcd
+libssl-dev clang pkg-config libpq-dev etcd cmake postgresql-client
+```
+* On Fedora these packages are needed:
+```bash
+dnf install flex bison readline-devel zlib-devel openssl-devel \
+  libseccomp-devel perl clang cmake etcd postgresql postgresql-contrib
 ```

 2. [Install Rust](https://www.rust-lang.org/tools/install)
@@ -39,16 +49,11 @@ libssl-dev clang pkg-config libpq-dev libprotobuf-dev etcd
 curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
 ```

-3. Install PostgreSQL Client
-```
-apt install postgresql-client
-```
-
-4. Build neon and patched postgres
+3. Build neon and patched postgres
 ```sh
 git clone --recursive https://github.com/neondatabase/neon.git
 cd neon
-make -j5
+make -j`nproc`
 ```

 #### building on OSX (12.3.1)
@@ -108,7 +113,7 @@ Safekeeper started
 > ./target/debug/neon_local pg start main
 Starting new postgres main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
 Extracting base backup to create postgres instance: path=.zenith/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
-Starting postgres node at 'host=127.0.0.1 port=55432 user=zenith_admin dbname=postgres'
+Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'

 # check list of running postgres instances
 > ./target/debug/neon_local pg list
@@ -118,7 +123,7 @@ Starting postgres node at 'host=127.0.0.1 port=55432 user=zenith_admin dbname=po

 2. Now it is possible to connect to postgres and run some queries:
 ```text
-> psql -p55432 -h 127.0.0.1 -U zenith_admin postgres
+> psql -p55432 -h 127.0.0.1 -U cloud_admin postgres
 postgres=# CREATE TABLE t(key int primary key, value text);
 CREATE TABLE
 postgres=# insert into t values(1,1);
@@ -145,7 +150,7 @@ Created timeline 'b3b863fa45fa9e57e615f9f2d944e601' at Lsn 0/16F9A00 for tenant:
 > ./target/debug/neon_local pg start migration_check --branch-name migration_check
 Starting new postgres migration_check on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
 Extracting base backup to create postgres instance: path=.zenith/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
-Starting postgres node at 'host=127.0.0.1 port=55433 user=zenith_admin dbname=postgres'
+Starting postgres node at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres'

 # check the new list of running postgres instances
 > ./target/debug/neon_local pg list
@@ -155,7 +160,7 @@ Starting postgres node at 'host=127.0.0.1 port=55433 user=zenith_admin dbname=po

 # this new postgres instance will have all the data from 'main' postgres,
 # but all modifications would not affect data in original postgres
-> psql -p55433 -h 127.0.0.1 -U zenith_admin postgres
+> psql -p55433 -h 127.0.0.1 -U cloud_admin postgres
 postgres=# select * from t;
 key | value
 -----+-------
@@ -166,7 +171,7 @@ postgres=# insert into t values(2,2);
 INSERT 0 1

 # check that the new change doesn't affect the 'main' postgres
-> psql -p55432 -h 127.0.0.1 -U zenith_admin postgres
+> psql -p55432 -h 127.0.0.1 -U cloud_admin postgres
 postgres=# select * from t;
 key | value
 -----+-------
--- a/compute_tools/README.md
+++ b/compute_tools/README.md
@@ -22,7 +22,7 @@ Also `compute_ctl` spawns two separate service threads:
 Usage example:
 ```sh
 compute_ctl -D /var/db/postgres/compute \
-            -C 'postgresql://zenith_admin@localhost/postgres' \
+            -C 'postgresql://cloud_admin@localhost/postgres' \
            -S /var/db/postgres/specs/current.json \
            -b /usr/local/bin/postgres
 ```
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -21,7 +21,7 @@
 //! Usage example:
 //! ```sh
 //! compute_ctl -D /var/db/postgres/compute \
-//!             -C 'postgresql://zenith_admin@localhost/postgres' \
+//!             -C 'postgresql://cloud_admin@localhost/postgres' \
 //!             -S /var/db/postgres/specs/current.json \
 //!             -b /usr/local/bin/postgres
 //! ```
@@ -116,17 +116,17 @@ fn main() -> Result<()> {
    let pageserver_connstr = spec
        .cluster
        .settings
-        .find("zenith.page_server_connstring")
+        .find("neon.pageserver_connstring")
        .expect("pageserver connstr should be provided");
    let tenant = spec
        .cluster
        .settings
-        .find("zenith.zenith_tenant")
+        .find("neon.tenant_id")
        .expect("tenant id should be provided");
    let timeline = spec
        .cluster
        .settings
-        .find("zenith.zenith_timeline")
+        .find("neon.timeline_id")
        .expect("tenant id should be provided");

    let compute_state = ComputeNode {
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -262,7 +262,30 @@ impl ComputeNode {
            .unwrap_or_else(|| "5432".to_string());
        wait_for_postgres(&mut pg, &port, pgdata_path)?;

-        let mut client = Client::connect(&self.connstr, NoTls)?;
+        // If connection fails,
+        // it may be the old node with `zenith_admin` superuser.
+        //
+        // In this case we need to connect with old `zenith_admin`name
+        // and create new user. We cannot simply rename connected user,
+        // but we can create a new one and grant it all privileges.
+        let mut client = match Client::connect(&self.connstr, NoTls) {
+            Err(e) => {
+                info!(
+                    "cannot connect to postgres: {}, retrying with `zenith_admin` username",
+                    e
+                );
+                let zenith_admin_connstr = self.connstr.replacen("cloud_admin", "zenith_admin", 1);
+
+                let mut client = Client::connect(&zenith_admin_connstr, NoTls)?;
+                client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
+                client.simple_query("GRANT zenith_admin TO cloud_admin")?;
+                drop(client);
+
+                // reconnect with connsting with expected name
+                Client::connect(&self.connstr, NoTls)?
+            }
+            Ok(client) => client,
+        };

        handle_roles(&self.spec, &mut client)?;
        handle_databases(&self.spec, &mut client)?;
--- a/compute_tools/src/monitor.rs
+++ b/compute_tools/src/monitor.rs
@@ -43,7 +43,7 @@ fn watch_compute_activity(compute: &Arc<ComputeNode>) {
                         FROM pg_stat_activity
                         WHERE backend_type = 'client backend'
                            AND pid != pg_backend_pid()
-                            AND usename != 'zenith_admin';", // XXX: find a better way to filter other monitors?
+                            AND usename != 'cloud_admin';", // XXX: find a better way to filter other monitors?
                        &[],
                    );
                let mut last_active = compute.state.read().unwrap().last_active;
--- a/compute_tools/tests/cluster_spec.json
+++ b/compute_tools/tests/cluster_spec.json
@@ -85,7 +85,7 @@
                "vartype": "bool"
            },
            {
-                "name": "wal_acceptors",
+                "name": "safekeepers",
                "value": "127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501",
                "vartype": "string"
            },
@@ -150,7 +150,7 @@
                "vartype": "integer"
            },
            {
-                "name": "zenith.zenith_tenant",
+                "name": "neon.tenant_id",
                "value": "b0554b632bd4d547a63b86c3630317e8",
                "vartype": "string"
            },
@@ -160,13 +160,13 @@
                "vartype": "integer"
            },
            {
-                "name": "zenith.zenith_timeline",
+                "name": "neon.timeline_id",
                "value": "2414a61ffc94e428f14b5758fe308e13",
                "vartype": "string"
            },
            {
                "name": "shared_preload_libraries",
-                "value": "zenith",
+                "value": "neon",
                "vartype": "string"
            },
            {
@@ -175,7 +175,7 @@
                "vartype": "string"
            },
            {
-                "name": "zenith.page_server_connstring",
+                "name": "neon.pageserver_connstring",
                "value": "host=127.0.0.1 port=6400",
                "vartype": "string"
            }
--- a/compute_tools/tests/pg_helpers_tests.rs
+++ b/compute_tools/tests/pg_helpers_tests.rs
@@ -28,7 +28,7 @@ mod pg_helpers_tests {

        assert_eq!(
            spec.cluster.settings.as_pg_settings(),
-            "fsync = off\nwal_level = replica\nhot_standby = on\nwal_acceptors = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'\nwal_log_hints = on\nlog_connections = on\nshared_buffers = 32768\nport = 55432\nmax_connections = 100\nmax_wal_senders = 10\nlisten_addresses = '0.0.0.0'\nwal_sender_timeout = 0\npassword_encryption = md5\nmaintenance_work_mem = 65536\nmax_parallel_workers = 8\nmax_worker_processes = 8\nzenith.zenith_tenant = 'b0554b632bd4d547a63b86c3630317e8'\nmax_replication_slots = 10\nzenith.zenith_timeline = '2414a61ffc94e428f14b5758fe308e13'\nshared_preload_libraries = 'zenith'\nsynchronous_standby_names = 'walproposer'\nzenith.page_server_connstring = 'host=127.0.0.1 port=6400'"
+            "fsync = off\nwal_level = replica\nhot_standby = on\nsafekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'\nwal_log_hints = on\nlog_connections = on\nshared_buffers = 32768\nport = 55432\nmax_connections = 100\nmax_wal_senders = 10\nlisten_addresses = '0.0.0.0'\nwal_sender_timeout = 0\npassword_encryption = md5\nmaintenance_work_mem = 65536\nmax_parallel_workers = 8\nmax_worker_processes = 8\nneon.tenant_id = 'b0554b632bd4d547a63b86c3630317e8'\nmax_replication_slots = 10\nneon.timeline_id = '2414a61ffc94e428f14b5758fe308e13'\nshared_preload_libraries = 'neon'\nsynchronous_standby_names = 'walproposer'\nneon.pageserver_connstring = 'host=127.0.0.1 port=6400'"
        );
    }

--- a/control_plane/src/compute.rs
+++ b/control_plane/src/compute.rs
@@ -148,9 +148,9 @@ impl PostgresNode {
        // Read a few options from the config file
        let context = format!("in config file {}", cfg_path_str);
        let port: u16 = conf.parse_field("port", &context)?;
-        let timeline_id: ZTimelineId = conf.parse_field("zenith.zenith_timeline", &context)?;
-        let tenant_id: ZTenantId = conf.parse_field("zenith.zenith_tenant", &context)?;
-        let uses_wal_proposer = conf.get("wal_acceptors").is_some();
+        let timeline_id: ZTimelineId = conf.parse_field("neon.timeline_id", &context)?;
+        let tenant_id: ZTenantId = conf.parse_field("neon.tenant_id", &context)?;
+        let uses_wal_proposer = conf.get("safekeepers").is_some();

        // parse recovery_target_lsn, if any
        let recovery_target_lsn: Option<Lsn> =
@@ -303,11 +303,11 @@ impl PostgresNode {
            // uses only needed variables namely host, port, user, password.
            format!("postgresql://no_user:{}@{}:{}", password, host, port)
        };
-        conf.append("shared_preload_libraries", "zenith");
+        conf.append("shared_preload_libraries", "neon");
        conf.append_line("");
-        conf.append("zenith.page_server_connstring", &pageserver_connstr);
-        conf.append("zenith.zenith_tenant", &self.tenant_id.to_string());
-        conf.append("zenith.zenith_timeline", &self.timeline_id.to_string());
+        conf.append("neon.pageserver_connstring", &pageserver_connstr);
+        conf.append("neon.tenant_id", &self.tenant_id.to_string());
+        conf.append("neon.timeline_id", &self.timeline_id.to_string());
        if let Some(lsn) = self.lsn {
            conf.append("recovery_target_lsn", &lsn.to_string());
        }
@@ -341,7 +341,7 @@ impl PostgresNode {
                .map(|sk| format!("localhost:{}", sk.pg_port))
                .collect::<Vec<String>>()
                .join(",");
-            conf.append("wal_acceptors", &safekeepers);
+            conf.append("safekeepers", &safekeepers);
        } else {
            // We only use setup without safekeepers for tests,
            // and don't care about data durability on pageserver,
@@ -352,7 +352,6 @@ impl PostgresNode {
            // This isn't really a supported configuration, but can be useful for
            // testing.
            conf.append("synchronous_standby_names", "pageserver");
-            conf.append("zenith.callmemaybe_connstring", &self.connstr());
        }

        let mut file = File::create(self.pgdata().join("postgresql.conf"))?;
@@ -499,7 +498,7 @@ impl PostgresNode {
            "host={} port={} user={} dbname={}",
            self.address.ip(),
            self.address.port(),
-            "zenith_admin",
+            "cloud_admin",
            "postgres"
        )
    }
--- a/control_plane/src/etcd.rs
+++ b/control_plane/src/etcd.rs
@@ -77,7 +77,7 @@ pub fn stop_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
    let etcd_pid_file_path = etcd_pid_file_path(env);
    let pid = Pid::from_raw(read_pidfile(&etcd_pid_file_path).with_context(|| {
        format!(
-            "Failed to read etcd pid filea at {}",
+            "Failed to read etcd pid file at {}",
            etcd_pid_file_path.display()
        )
    })?);
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -119,16 +119,24 @@ impl EtcdBroker {
    }

    pub fn comma_separated_endpoints(&self) -> String {
-        self.broker_endpoints.iter().map(Url::as_str).fold(
-            String::new(),
-            |mut comma_separated_urls, url| {
+        self.broker_endpoints
+            .iter()
+            .map(|url| {
+                // URL by default adds a '/' path at the end, which is not what etcd CLI wants.
+                let url_string = url.as_str();
+                if url_string.ends_with('/') {
+                    &url_string[0..url_string.len() - 1]
+                } else {
+                    url_string
+                }
+            })
+            .fold(String::new(), |mut comma_separated_urls, url| {
                if !comma_separated_urls.is_empty() {
                    comma_separated_urls.push(',');
                }
                comma_separated_urls.push_str(url);
                comma_separated_urls
-            },
-        )
+            })
    }
 }

--- a/control_plane/src/storage.rs
+++ b/control_plane/src/storage.rs
@@ -1,6 +1,7 @@
 use std::collections::HashMap;
 use std::io::Write;
 use std::net::TcpStream;
+use std::num::NonZeroU64;
 use std::path::PathBuf;
 use std::process::Command;
 use std::time::Duration;
@@ -11,6 +12,7 @@ use nix::errno::Errno;
 use nix::sys::signal::{kill, Signal};
 use nix::unistd::Pid;
 use pageserver::http::models::{TenantConfigRequest, TenantCreateRequest, TimelineCreateRequest};
+use pageserver::tenant_mgr::TenantInfo;
 use pageserver::timelines::TimelineInfo;
 use postgres::{Config, NoTls};
 use reqwest::blocking::{Client, RequestBuilder, Response};
@@ -26,7 +28,6 @@ use utils::{

 use crate::local_env::LocalEnv;
 use crate::{fill_aws_secrets_vars, fill_rust_env_vars, read_pidfile};
-use pageserver::tenant_mgr::TenantInfo;

 #[derive(Error, Debug)]
 pub enum PageserverHttpError {
@@ -37,6 +38,12 @@ pub enum PageserverHttpError {
    Response(String),
 }

+impl From<anyhow::Error> for PageserverHttpError {
+    fn from(e: anyhow::Error) -> Self {
+        Self::Response(e.to_string())
+    }
+}
+
 type Result<T> = result::Result<T, PageserverHttpError>;

 pub trait ResponseErrorMessageExt: Sized {
@@ -410,6 +417,15 @@ impl PageServerNode {
                    .map(|x| x.parse::<usize>())
                    .transpose()?,
                pitr_interval: settings.get("pitr_interval").map(|x| x.to_string()),
+                walreceiver_connect_timeout: settings
+                    .get("walreceiver_connect_timeout")
+                    .map(|x| x.to_string()),
+                lagging_wal_timeout: settings.get("lagging_wal_timeout").map(|x| x.to_string()),
+                max_lsn_wal_lag: settings
+                    .get("max_lsn_wal_lag")
+                    .map(|x| x.parse::<NonZeroU64>())
+                    .transpose()
+                    .context("Failed to parse 'max_lsn_wal_lag' as non zero integer")?,
            })
            .send()?
            .error_from_body()?
@@ -433,22 +449,41 @@ impl PageServerNode {
                tenant_id,
                checkpoint_distance: settings
                    .get("checkpoint_distance")
-                    .map(|x| x.parse::<u64>().unwrap()),
+                    .map(|x| x.parse::<u64>())
+                    .transpose()
+                    .context("Failed to parse 'checkpoint_distance' as an integer")?,
                compaction_target_size: settings
                    .get("compaction_target_size")
-                    .map(|x| x.parse::<u64>().unwrap()),
+                    .map(|x| x.parse::<u64>())
+                    .transpose()
+                    .context("Failed to parse 'compaction_target_size' as an integer")?,
                compaction_period: settings.get("compaction_period").map(|x| x.to_string()),
                compaction_threshold: settings
                    .get("compaction_threshold")
-                    .map(|x| x.parse::<usize>().unwrap()),
+                    .map(|x| x.parse::<usize>())
+                    .transpose()
+                    .context("Failed to parse 'compaction_threshold' as an integer")?,
                gc_horizon: settings
                    .get("gc_horizon")
-                    .map(|x| x.parse::<u64>().unwrap()),
+                    .map(|x| x.parse::<u64>())
+                    .transpose()
+                    .context("Failed to parse 'gc_horizon' as an integer")?,
                gc_period: settings.get("gc_period").map(|x| x.to_string()),
                image_creation_threshold: settings
                    .get("image_creation_threshold")
-                    .map(|x| x.parse::<usize>().unwrap()),
+                    .map(|x| x.parse::<usize>())
+                    .transpose()
+                    .context("Failed to parse 'image_creation_threshold' as non zero integer")?,
                pitr_interval: settings.get("pitr_interval").map(|x| x.to_string()),
+                walreceiver_connect_timeout: settings
+                    .get("walreceiver_connect_timeout")
+                    .map(|x| x.to_string()),
+                lagging_wal_timeout: settings.get("lagging_wal_timeout").map(|x| x.to_string()),
+                max_lsn_wal_lag: settings
+                    .get("max_lsn_wal_lag")
+                    .map(|x| x.parse::<NonZeroU64>())
+                    .transpose()
+                    .context("Failed to parse 'max_lsn_wal_lag' as non zero integer")?,
            })
            .send()?
            .error_from_body()?;
--- a/docs/README.md
+++ b/docs/README.md
@@ -6,7 +6,7 @@
 - [docker.md](docker.md) — Docker images and building pipeline.
 - [glossary.md](glossary.md) — Glossary of all the terms used in codebase.
 - [multitenancy.md](multitenancy.md) — how multitenancy is organized in the pageserver and Zenith CLI.
- [sourcetree.md](sourcetree.md) — Overview of the source tree layeout.
+- [sourcetree.md](sourcetree.md) — Overview of the source tree layout.
 - [pageserver/README.md](/pageserver/README.md) — pageserver overview.
 - [postgres_ffi/README.md](/libs/postgres_ffi/README.md) — Postgres FFI overview.
 - [test_runner/README.md](/test_runner/README.md) — tests infrastructure overview.
--- a/docs/glossary.md
+++ b/docs/glossary.md
@@ -2,7 +2,7 @@

 ### Authentication

-### Backpresssure
+### Backpressure

 Backpressure is used to limit the lag between pageserver and compute node or WAL service.

@@ -115,7 +115,7 @@ Neon safekeeper LSNs. For more check [safekeeper/README_PROTO.md](/safekeeper/RE
 * `CommitLSN`: position in WAL confirmed by quorum safekeepers.
 * `RestartLSN`: position in WAL confirmed by all safekeepers.
 * `FlushLSN`: part of WAL persisted to the disk by safekeeper.
-* `VCL`: the largerst LSN for which we can guarantee availablity of all prior records.
+* `VCL`: the largest LSN for which we can guarantee availability of all prior records.

 Neon pageserver LSNs:
 * `last_record_lsn` - the end of last processed WAL record.
--- a/docs/multitenancy.md
+++ b/docs/multitenancy.md
@@ -6,7 +6,7 @@ Zenith supports multitenancy. One pageserver can serve multiple tenants at once.

 ### Tenants in other commands

-By default during `zenith init` new tenant is created on the pageserver. Newly created tenant's id is saved to cli config, so other commands can use it automatically if no direct arugment `--tenantid=<tenantid>` is provided. So generally tenantid more frequently appears in internal pageserver interface. Its commands take tenantid argument to distinguish to which tenant operation should be applied. CLI support creation of new tenants.
+By default during `zenith init` new tenant is created on the pageserver. Newly created tenant's id is saved to cli config, so other commands can use it automatically if no direct argument `--tenantid=<tenantid>` is provided. So generally tenantid more frequently appears in internal pageserver interface. Its commands take tenantid argument to distinguish to which tenant operation should be applied. CLI support creation of new tenants.

 Examples for cli:

--- a/docs/rfcs/002-storage.md
+++ b/docs/rfcs/002-storage.md
@@ -111,13 +111,13 @@ Since we are storing page diffs of variable sizes there is no structural depende

 ### **Chunk metadata**

-Chunk metadata is a file lies in chunk directory that stores info about current snapshots and PITR regions. Chunck should always consult this data when merging SSTables and applying delete markers.
+Chunk metadata is a file lies in chunk directory that stores info about current snapshots and PITR regions. Chunk should always consult this data when merging SSTables and applying delete markers.

 ### **Chunk splitting**

 *(NB: following paragraph is about how to avoid page splitting)*

-When chunks hits some soft storage limit (let's say 100Gb) it should be split in half and global matadata about chunk boundaries should be updated. Here i assume that chunk split is a local operation happening on single node. Process of chink splitting should look like following:
+When chunks hits some soft storage limit (let's say 100Gb) it should be split in half and global metadata about chunk boundaries should be updated. Here i assume that chunk split is a local operation happening on single node. Process of chink splitting should look like following:

 1. Find separation key and spawn two new chunks with [lo, mid) [mid, hi) boundaries.

@@ -166,7 +166,7 @@ Multi-tenant storage makes sense even on a laptop, when you work with different

 Few databases are stored in one chunk, replicated three times

- When database can't fit into one storage node it can occupy lots of chunks that were split while database was growing. Chunk placement on nodes is controlled by us with some automatization, but we alway may manually move chunks around the cluster.
+- When database can't fit into one storage node it can occupy lots of chunks that were split while database was growing. Chunk placement on nodes is controlled by us with some automatization, but we always may manually move chunks around the cluster.

 <img width="940" alt="Screenshot_2021-02-22_at_16 49 10" src="https://user-images.githubusercontent.com/284219/108729815-fb071e00-753b-11eb-86e0-be6703e47d82.png">

--- a/docs/rfcs/003-laptop-cli.md
+++ b/docs/rfcs/003-laptop-cli.md
@@ -123,7 +123,7 @@ Show currently attached storages. For example:
 > zenith storage list
 NAME            USED    TYPE                OPTIONS          PATH
 local           5.1G    zenith-local                         /opt/zenith/store/local
-local.compr     20.4G   zenith-local        comression=on    /opt/zenith/store/local.compr
+local.compr     20.4G   zenith-local        compression=on    /opt/zenith/store/local.compr
 zcloud          60G     zenith-remote                        zenith.tech/stas/mystore
 s3tank          80G     S3
 ```
@@ -136,9 +136,9 @@ s3tank          80G     S3

 ## pg

-Manages postgres data directories and can start postgreses with proper configuration. An experienced user may avoid using that (except pg create) and configure/run postgres by themself.
+Manages postgres data directories and can start postgres instances with proper configuration. An experienced user may avoid using that (except pg create) and configure/run postgres by themselves.

-Pg is a term for a single postgres running on some data. I'm trying to avoid here separation of datadir management and postgres instance management -- both that concepts bundled here together.
+Pg is a term for a single postgres running on some data. I'm trying to avoid separation of datadir management and postgres instance management -- both that concepts bundled here together.

 **zenith pg create** [--no-start --snapshot --cow] -s storage-name -n pgdata

--- a/docs/rfcs/005-zenith_local.md
+++ b/docs/rfcs/005-zenith_local.md
@@ -31,7 +31,7 @@ Ideally, just one binary that incorporates all elements we need.

 #### Components:

- **zenith-CLI** - interface for end-users.  Turns commands to REST requests and handles responces to show them in a user-friendly way.  
+- **zenith-CLI** - interface for end-users.  Turns commands to REST requests and handles responses to show them in a user-friendly way.  
 CLI proposal is here https://github.com/libzenith/rfcs/blob/003-laptop-cli.md/003-laptop-cli.md
 WIP code is here: https://github.com/libzenith/postgres/tree/main/pageserver/src/bin/cli

--- a/docs/rfcs/006-laptop-cli-v2-CLI.md
+++ b/docs/rfcs/006-laptop-cli-v2-CLI.md
@@ -25,9 +25,9 @@ To make changes in the catalog you need to run compute nodes
 zenith start /home/pipedpiper/northwind:main -- starts a compute instance
 zenith start zenith://zenith.tech/northwind:main -- starts a compute instance in the cloud
 -- you can start a compute node against any hash or branch
-zenith start /home/pipedpiper/northwind:experimental --port 8008 -- start anothe compute instance (on different port)
+zenith start /home/pipedpiper/northwind:experimental --port 8008 -- start another compute instance (on different port)
 -- you can start a compute node against any hash or branch
-zenith start /home/pipedpiper/northwind:<hash> --port 8009 -- start anothe compute instance (on different port)
+zenith start /home/pipedpiper/northwind:<hash> --port 8009 -- start another compute instance (on different port)

 -- After running some DML you can run 
 -- zenith status and see how there are two WAL streams one on top of 
--- a/docs/rfcs/006-laptop-cli-v2-repository-structure.md
+++ b/docs/rfcs/006-laptop-cli-v2-repository-structure.md
@@ -121,7 +121,7 @@ repository, launch an instance on the same branch in both clones, and
 later try to push/pull between them? Perhaps create a new timeline
 every time you start up an instance? Then you would detect that the
 timelines have diverged. That would match with the "epoch" concept
-that we have in the WAL safekeepr
+that we have in the WAL safekeeper

 ### zenith checkout/commit

--- a/docs/rfcs/009-snapshot-first-storage-cli.md
+++ b/docs/rfcs/009-snapshot-first-storage-cli.md
@@ -2,9 +2,9 @@ While working on export/import commands, I understood that they fit really well

 We may think about backups as snapshots in a different format (i.e plain pgdata format, basebackup tar format, WAL-G format (if they want to support it) and so on). They use same storage API, the only difference is the code that packs/unpacks files.

-Even if zenith aims to maintains durability using it's own snapshots, backups will be useful for uploading data from postges to zenith.
+Even if zenith aims to maintains durability using it's own snapshots, backups will be useful for uploading data from postgres to zenith.

-So here is an attemt to design consistent CLI for diferent usage scenarios:
+So here is an attempt to design consistent CLI for different usage scenarios:

 #### 1. Start empty pageserver.
 That is what we have now.
--- a/docs/rfcs/009-snapshot-first-storage-pitr.md
+++ b/docs/rfcs/009-snapshot-first-storage-pitr.md
@@ -3,7 +3,7 @@
 GetPage@LSN can be called with older LSNs, and the page server needs
 to be able to reconstruct older page versions. That's needed for
 having read-only replicas that lag behind the primary, or that are
-"anchored" at an older LSN, and internally in the page server whne you
+"anchored" at an older LSN, and internally in the page server when you
 branch at an older point in time. How do you do that?

 For now, I'm not considering incremental snapshots at all. I don't
@@ -192,7 +192,7 @@ for a particular relation readily available alongside the snapshot
 files, and you don't need to track what snapshot LSNs exist
 separately.

-(If we wanted to minize the number of files, you could include the
+(If we wanted to minimize the number of files, you could include the
 snapshot @300 and the WAL between 200 and 300 in the same file, but I
 feel it's probably better to keep them separate)

--- a/docs/rfcs/009-snapshot-first-storage.md
+++ b/docs/rfcs/009-snapshot-first-storage.md
@@ -121,7 +121,7 @@ The properties of s3 that we depend on are:
 list objects
 streaming read of entire object
 read byte range from object
-streaming write new object (may use multipart upload for better relialibity)
+streaming write new object (may use multipart upload for better reliability)
 delete object (that should not disrupt an already-started read).

 Uploaded files, restored backups, or s3 buckets controlled by users could contain malicious content. We should always validate that objects contain the content they’re supposed to. Incorrect, Corrupt or malicious-looking contents should cause software (cloud tools, pageserver) to fail gracefully.
--- a/docs/rfcs/010-storage_details.md
+++ b/docs/rfcs/010-storage_details.md
@@ -40,7 +40,7 @@ b) overwrite older pages with the newer pages -- if there is no replica we proba

 I imagine that newly created pages would just be added to the back of PageStore (again in queue-like fashion) and this way there wouldn't be any meaningful ordering inside of that queue. When we are forming a new incremental snapshot we may prohibit any updates to the current set of pages in PageStore (giving up on single page version rule) and cut off that whole set when snapshot creation is complete.

-With option b) we can also treat PageStor as an uncompleted increamental snapshot.
+With option b) we can also treat PageStor as an uncompleted incremental snapshot.

 ### LocalStore

@@ -123,7 +123,7 @@ As far as I understand Bookfile/Aversion addresses versioning and serialization
 As for exact data that should go to snapshots I think it is the following for each snapshot:

 * format version number
-* set of key/values to interpret content (e.g. is page compression enabled, is that a full or incremental snapshot, previous snapshot id, is there WAL at the end on file, etc) -- it is up to a reader to decide what to do if some keys are missing or some unknow key are present. If we add something backward compatible to the file we can keep the version number.
+* set of key/values to interpret content (e.g. is page compression enabled, is that a full or incremental snapshot, previous snapshot id, is there WAL at the end on file, etc) -- it is up to a reader to decide what to do if some keys are missing or some unknown key are present. If we add something backward compatible to the file we can keep the version number.
 * array of [BuffTag, corresponding offset in file] for pages -- IIUC that is analogous to ToC in Bookfile
 * array of [(BuffTag, LSN), corresponding offset in file] for the WAL records
 * pages, one by one
@@ -131,7 +131,7 @@ As for exact data that should go to snapshots I think it is the following for ea

 It is also important to be able to load metadata quickly since it would be one of the main factors impacting the time of page server start. E.g. if would store/cache about 10TB of data per page server, the size of uncompressed page references would be about 30GB (10TB / ( 8192 bytes page size / ( ~18 bytes per ObjectTag + 8 bytes offset in the file))).

-1) Since our ToC/array of entries can be sorted by ObjectTag we can store the whole BufferTag only when realtion_id is changed and store only delta-encoded offsets for a given relation. That would reduce the average per-page metadata size to something less than 4 bytes instead of 26 (assuming that pages would follow the same order and offset delatas would be small).
+1) Since our ToC/array of entries can be sorted by ObjectTag we can store the whole BufferTag only when relation_id is changed and store only delta-encoded offsets for a given relation. That would reduce the average per-page metadata size to something less than 4 bytes instead of 26 (assuming that pages would follow the same order and offset deltas would be small).
 2) It makes sense to keep ToC at the beginning of the file to avoid extra seeks to locate it. Doesn't matter too much with the local files but matters on S3 -- if we are accessing a lot of ~1Gb files with the size of metadata ~ 1Mb then the time to transfer this metadata would be comparable with access latency itself (which is about a half of a second). So by slurping metadata with one read of file header instead of N reads we can improve the speed of page server start by this N factor.

 I think both of that optimizations can be done later, but that is something to keep in mind when we are designing our storage serialization routines.
--- a/docs/rfcs/013-term-history.md
+++ b/docs/rfcs/013-term-history.md
@@ -7,13 +7,13 @@ and e.g. prevents electing two proposers with the same term -- it is actually
 called `term` in the code. The second, called `epoch`, reflects progress of log
 receival and this might lag behind `term`; safekeeper switches to epoch `n` when
 it has received all committed log records from all `< n` terms. This roughly
-correspones to proposed in
+corresponds to proposed in

 https://github.com/zenithdb/rfcs/pull/3/files


 This makes our biggest our difference from Raft. In Raft, every log record is
-stamped with term in which it was generated; while we essentialy store in
+stamped with term in which it was generated; while we essentially store in
 `epoch` only the term of the highest record on this safekeeper -- when we know
 it -- because during recovery generally we don't, and `epoch` is bumped directly
 to the term of the proposer who performs the recovery when it is finished. It is
--- a/docs/rfcs/015-storage-messaging.md
+++ b/docs/rfcs/015-storage-messaging.md
@@ -124,7 +124,7 @@ Each storage node can subscribe to the relevant sets of keys and maintain a loca

 ### Safekeeper address discovery

-During the startup safekeeper should publish the address he is listening on as the part of `{"sk_#{sk_id}" => ip_address}`. Then the pageserver can resolve `sk_#{sk_id}` to the actual address. This way it would work both locally and in the cloud setup. Safekeeper should have `--advertised-address` CLI option so that we can listen on e.g. 0.0.0.0 but advertize something more useful.
+During the startup safekeeper should publish the address he is listening on as the part of `{"sk_#{sk_id}" => ip_address}`. Then the pageserver can resolve `sk_#{sk_id}` to the actual address. This way it would work both locally and in the cloud setup. Safekeeper should have `--advertised-address` CLI option so that we can listen on e.g. 0.0.0.0 but advertise something more useful.

 ### Safekeeper behavior

@@ -195,7 +195,7 @@ sequenceDiagram
    PS1->>SK1: start replication
 ```

-#### Behavour of services during typical operations
+#### Behaviour of services during typical operations

 ```mermaid
 sequenceDiagram
@@ -250,7 +250,7 @@ sequenceDiagram
    PS2->>M: Register downloaded timeline
    PS2->>M: Get safekeepers for timeline, subscribe to changes
    PS2->>SK1: Start replication to catch up
-    note over O: PS2 catched up, time to switch compute
+    note over O: PS2 caught up, time to switch compute
    O->>C: Restart compute with new pageserver url in config
    note over C: Wal push is restarted
    loop request pages
--- a/docs/rfcs/README.md
+++ b/docs/rfcs/README.md
@@ -49,7 +49,7 @@ topics.

 RFC lifecycle:

- Should be submitted in a pull request with and full RFC text in a commited markdown file and copy of the Summary and Motivation sections also included in the PR body.
+- Should be submitted in a pull request with and full RFC text in a committed markdown file and copy of the Summary and Motivation sections also included in the PR body.
 - RFC should be published for review before most of the actual code is written. This isn’t a strict rule, don’t hesitate to experiment and build a POC in parallel with writing an RFC.
 - Add labels to the PR in the same manner as you do Issues. Example TBD
 - Request the review from your peers. Reviewing the RFCs from your peers is a priority, same as reviewing the actual code.
--- a/docs/rfcs/cluster-size-limits.md
+++ b/docs/rfcs/cluster-size-limits.md
@@ -22,8 +22,8 @@ so we don't want to give users access to the functionality that we don't think i

 * pageserver - calculate the size consumed by a timeline and add it to the feedback message.
 * safekeeper - pass feedback message from pageserver to compute.
-* compute - receive feedback message, enforce size limit based on GUC `zenith.max_cluster_size`.
-* console - set and update `zenith.max_cluster_size` setting
+* compute - receive feedback message, enforce size limit based on GUC `neon.max_cluster_size`.
+* console - set and update `neon.max_cluster_size` setting

 ## Proposed implementation

@@ -49,7 +49,7 @@ This message is received by the safekeeper and propagated to compute node as a p

 Finally, when compute node receives the `current_timeline_size` from safekeeper (or from pageserver directly), it updates the global variable.

-And then every zenith_extend() operation checks if limit is reached `(current_timeline_size > zenith.max_cluster_size)` and throws `ERRCODE_DISK_FULL` error if so.
+And then every zenith_extend() operation checks if limit is reached `(current_timeline_size > neon.max_cluster_size)` and throws `ERRCODE_DISK_FULL` error if so.
 (see Postgres error codes [https://www.postgresql.org/docs/devel/errcodes-appendix.html](https://www.postgresql.org/docs/devel/errcodes-appendix.html))

 TODO:
@@ -75,5 +75,5 @@ We should warn users if the limit is soon to be reached.
 ### **Security implications**

 We treat compute as an untrusted component. That's why we try to isolate it with secure container runtime or a VM.
-Malicious users may change the `zenith.max_cluster_size`, so we need an extra size limit check.
+Malicious users may change the `neon.max_cluster_size`, so we need an extra size limit check.
 To cover this case, we also monitor the compute node size in the console.
--- a/docs/settings.md
+++ b/docs/settings.md
@@ -23,7 +23,7 @@ gc_horizon = '67108864'
 max_file_descriptors = '100'

 # initial superuser role name to use when creating a new tenant
-initial_superuser_name = 'zenith_admin'
+initial_superuser_name = 'cloud_admin'

 broker_etcd_prefix = 'neon'
 broker_endpoints = ['some://etcd']
@@ -31,14 +31,14 @@ broker_endpoints = ['some://etcd']
 # [remote_storage]
 ```

-The config above shows default values for all basic pageserver settings, besides `broker_endpoints`: that one has to be set by the user, 
+The config above shows default values for all basic pageserver settings, besides `broker_endpoints`: that one has to be set by the user,
 see the corresponding section below.
 Pageserver uses default values for all files that are missing in the config, so it's not a hard error to leave the config blank.
 Yet, it validates the config values it can (e.g. postgres install dir) and errors if the validation fails, refusing to start.

 Note the `[remote_storage]` section: it's a [table](https://toml.io/en/v1.0.0#table) in TOML specification and

- either has to be placed in the config after the table-less values such as `initial_superuser_name = 'zenith_admin'`
+- either has to be placed in the config after the table-less values such as `initial_superuser_name = 'cloud_admin'`

 - or can be placed anywhere if rewritten in identical form as [inline table](https://toml.io/en/v1.0.0#inline-table): `remote_storage = {foo = 2}`

@@ -54,7 +54,7 @@ Note that TOML distinguishes between strings and integers, the former require si

 A list of endpoints (etcd currently) to connect and pull the information from.
 Mandatory, does not have a default, since requires etcd to be started as a separate process,
-and its connection url should be specified separately. 
+and its connection url should be specified separately.

 #### broker_etcd_prefix

@@ -105,17 +105,31 @@ Interval at which garbage collection is triggered. Default is 100 s.

 #### image_creation_threshold

-L0 delta layer threshold for L1 iamge layer creation. Default is 3.
+L0 delta layer threshold for L1 image layer creation. Default is 3.

 #### pitr_interval

 WAL retention duration for PITR branching. Default is 30 days.

+#### walreceiver_connect_timeout
+
+Time to wait to establish the wal receiver connection before failing
+
+#### lagging_wal_timeout
+
+Time the pageserver did not get any WAL updates from safekeeper (if any).
+Avoids lagging pageserver preemptively by forcing to switch it from stalled connections.
+
+#### max_lsn_wal_lag
+
+Difference between Lsn values of the latest available WAL on safekeepers: if currently connected safekeeper starts to lag too long and too much,
+it gets swapped to the different one.
+
 #### initial_superuser_name

 Name of the initial superuser role, passed to initdb when a new tenant
 is initialized. It doesn't affect anything after initialization. The
-default is Note: The default is 'zenith_admin', and the console
+default is Note: The default is 'cloud_admin', and the console
 depends on that, so if you change it, bad things will happen.

 #### page_cache_size
@@ -185,7 +199,7 @@ If no IAM bucket access is used during the remote storage usage, use the `AWS_AC

 ###### General remote storage configuration

-Pagesever allows only one remote storage configured concurrently and errors if parameters from multiple different remote configurations are used.
+Pageserver allows only one remote storage configured concurrently and errors if parameters from multiple different remote configurations are used.
 No default values are used for the remote storage configuration parameters.

 Besides, there are parameters common for all types of remote storage that can be configured, those have defaults:
--- a/docs/sourcetree.md
+++ b/docs/sourcetree.md
@@ -10,7 +10,7 @@ Intended to be used in integration tests and in CLI tools for local installation

 `/docs`:

-Documentaion of the Zenith features and concepts.
+Documentation of the Zenith features and concepts.
 Now it is mostly dev documentation.

 `/monitoring`:
@@ -42,13 +42,13 @@ Integration tests, written in Python using the `pytest` framework.

 `/vendor/postgres`:

-PostgreSQL source tree, with the modifications needed for Zenith.
+PostgreSQL source tree, with the modifications needed for Neon.

-`/vendor/postgres/contrib/zenith`:
+`/vendor/postgres/contrib/neon`:

 PostgreSQL extension that implements storage manager API and network communications with remote page server.

-`/vendor/postgres/contrib/zenith_test_utils`:
+`/vendor/postgres/contrib/neon_test_utils`:

 PostgreSQL extension that contains functions needed for testing and debugging.

@@ -92,7 +92,7 @@ A single virtual environment with all dependencies is described in the single `P

 ### Prerequisites
 - Install Python 3.9 (the minimal supported version) or greater.
-    - Our setup with poetry should work with newer python versions too. So feel free to open an issue with a `c/test-runner` label if something doesnt work as expected.
+    - Our setup with poetry should work with newer python versions too. So feel free to open an issue with a `c/test-runner` label if something doesn't work as expected.
    - If you have some trouble with other version you can resolve it by installing Python 3.9 separately, via [pyenv](https://github.com/pyenv/pyenv) or via system package manager e.g.:
      ```bash
      # In Ubuntu
--- a/libs/etcd_broker/src/lib.rs
+++ b/libs/etcd_broker/src/lib.rs
@@ -31,7 +31,7 @@ struct SafekeeperTimeline {

 /// Published data about safekeeper's timeline. Fields made optional for easy migrations.
 #[serde_as]
-#[derive(Debug, Deserialize, Serialize)]
+#[derive(Debug, Clone, Deserialize, Serialize)]
 pub struct SkTimelineInfo {
    /// Term of the last entry.
    pub last_log_term: Option<u64>,
@@ -55,7 +55,9 @@ pub struct SkTimelineInfo {
    #[serde(default)]
    pub peer_horizon_lsn: Option<Lsn>,
    #[serde(default)]
-    pub safekeeper_connection_string: Option<String>,
+    pub safekeeper_connstr: Option<String>,
+    #[serde(default)]
+    pub pageserver_connstr: Option<String>,
 }

 #[derive(Debug, thiserror::Error)]
--- a/libs/metrics/src/lib.rs
+++ b/libs/metrics/src/lib.rs
@@ -3,6 +3,7 @@
 //! Otherwise, we might not see all metrics registered via
 //! a default registry.
 use lazy_static::lazy_static;
+pub use prometheus::{core, default_registry, proto};
 pub use prometheus::{exponential_buckets, linear_buckets};
 pub use prometheus::{register_gauge, Gauge};
 pub use prometheus::{register_gauge_vec, GaugeVec};
--- a/libs/postgres_ffi/src/waldecoder.rs
+++ b/libs/postgres_ffi/src/waldecoder.rs
@@ -73,7 +73,7 @@ impl WalStreamDecoder {
    /// Returns one of the following:
    ///     Ok((Lsn, Bytes)): a tuple containing the LSN of next record, and the record itself
    ///     Ok(None): there is not enough data in the input buffer. Feed more by calling the `feed_bytes` function
-    ///     Err(WalDecodeError): an error occured while decoding, meaning the input was invalid.
+    ///     Err(WalDecodeError): an error occurred while decoding, meaning the input was invalid.
    ///
    pub fn poll_decode(&mut self) -> Result<Option<(Lsn, Bytes)>, WalDecodeError> {
        let recordbuf;
--- a/libs/postgres_ffi/src/xlog_utils.rs
+++ b/libs/postgres_ffi/src/xlog_utils.rs
@@ -531,7 +531,7 @@ impl CheckPoint {
    ///
    /// Returns 'true' if the XID was updated.
    pub fn update_next_xid(&mut self, xid: u32) -> bool {
-        // nextXid should nw greate than any XID in WAL, so increment provided XID and check for wraparround.
+        // nextXid should nw greater than any XID in WAL, so increment provided XID and check for wraparround.
        let mut new_xid = std::cmp::max(xid + 1, pg_constants::FIRST_NORMAL_TRANSACTION_ID);
        // To reduce number of metadata checkpoints, we forward align XID on XID_CHECKPOINT_INTERVAL.
        // XID_CHECKPOINT_INTERVAL should not be larger than BLCKSZ*CLOG_XACTS_PER_BYTE
--- a/libs/postgres_ffi/wal_generate/src/lib.rs
+++ b/libs/postgres_ffi/wal_generate/src/lib.rs
@@ -80,7 +80,7 @@ impl Conf {
            .arg(self.datadir.as_os_str())
            .args(&["-c", "wal_keep_size=50MB"]) // Ensure old WAL is not removed
            .args(&["-c", "logging_collector=on"]) // stderr will mess up with tests output
-            .args(&["-c", "shared_preload_libraries=zenith"]) // can only be loaded at startup
+            .args(&["-c", "shared_preload_libraries=neon"]) // can only be loaded at startup
            // Disable background processes as much as possible
            .args(&["-c", "wal_writer_delay=10s"])
            .args(&["-c", "autovacuum=off"])
@@ -178,7 +178,7 @@ fn generate_internal<C: postgres::GenericClient>(
    client: &mut C,
    f: impl Fn(&mut C, PgLsn) -> Result<Option<PgLsn>>,
 ) -> Result<PgLsn> {
-    client.execute("create extension if not exists zenith_test_utils", &[])?;
+    client.execute("create extension if not exists neon_test_utils", &[])?;

    let wal_segment_size = client.query_one(
        "select cast(setting as bigint) as setting, unit \
--- a/libs/utils/scripts/restore_from_wal.sh
+++ b/libs/utils/scripts/restore_from_wal.sh
@@ -5,7 +5,7 @@ DATA_DIR=$3
 PORT=$4
 SYSID=`od -A n -j 24 -N 8 -t d8 $WAL_PATH/000000010000000000000002* | cut -c 3-`
 rm -fr $DATA_DIR
-env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U zenith_admin -D $DATA_DIR --sysid=$SYSID
+env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U cloud_admin -D $DATA_DIR --sysid=$SYSID
 echo port=$PORT >> $DATA_DIR/postgresql.conf
 REDO_POS=0x`$PG_BIN/pg_controldata -D $DATA_DIR | fgrep "REDO location"| cut -c 42-`
 declare -i WAL_SIZE=$REDO_POS+114
--- a/libs/utils/scripts/restore_from_wal_archive.sh
+++ b/libs/utils/scripts/restore_from_wal_archive.sh
@@ -5,7 +5,7 @@ PORT=$4
 SYSID=`od -A n -j 24 -N 8 -t d8 $WAL_PATH/000000010000000000000002* | cut -c 3-`
 rm -fr $DATA_DIR /tmp/pg_wals
 mkdir /tmp/pg_wals
-env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U zenith_admin -D $DATA_DIR --sysid=$SYSID
+env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U cloud_admin -D $DATA_DIR --sysid=$SYSID
 echo port=$PORT >> $DATA_DIR/postgresql.conf
 REDO_POS=0x`$PG_BIN/pg_controldata -D $DATA_DIR | fgrep "REDO location"| cut -c 42-`
 declare -i WAL_SIZE=$REDO_POS+114
--- a/libs/utils/src/bin_ser.rs
+++ b/libs/utils/src/bin_ser.rs
@@ -71,7 +71,7 @@ impl From<bincode::Error> for SerializeError {
 /// - Fixed integer encoding (i.e. 1u32 is 00000001 not 01)
 ///
 /// Does not allow trailing bytes in deserialization. If this is desired, you
-/// may set [`Options::allow_trailing_bytes`] to explicitly accomodate this.
+/// may set [`Options::allow_trailing_bytes`] to explicitly accommodate this.
 pub fn be_coder() -> impl Options {
    bincode::DefaultOptions::new()
        .with_big_endian()
@@ -85,7 +85,7 @@ pub fn be_coder() -> impl Options {
 /// - Fixed integer encoding (i.e. 1u32 is 00000001 not 01)
 ///
 /// Does not allow trailing bytes in deserialization. If this is desired, you
-/// may set [`Options::allow_trailing_bytes`] to explicitly accomodate this.
+/// may set [`Options::allow_trailing_bytes`] to explicitly accommodate this.
 pub fn le_coder() -> impl Options {
    bincode::DefaultOptions::new()
        .with_little_endian()
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -64,7 +64,7 @@ pub mod signals;
 /// One thing to note is that .git is not available in docker (and it is bad to include it there).
 /// So everything becides docker build is covered by git_version crate, and docker uses a `GIT_VERSION` argument to get the value required.
 /// It takes variable from build process env and puts it to the rustc env. And then we can retrieve it here by using env! macro.
-/// Git version received from environment variable used as a fallback in git_version invokation.
+/// Git version received from environment variable used as a fallback in git_version invocation.
 /// And to avoid running buildscript every recompilation, we use rerun-if-env-changed option.
 /// So the build script will be run only when GIT_VERSION envvar has changed.
 ///
--- a/libs/utils/src/postgres_backend.rs
+++ b/libs/utils/src/postgres_backend.rs
@@ -336,11 +336,11 @@ impl PostgresBackend {
        let have_tls = self.tls_config.is_some();
        match msg {
            FeMessage::StartupPacket(m) => {
-                trace!("got startup message {:?}", m);
+                trace!("got startup message {m:?}");

                match m {
                    FeStartupPacket::SslRequest => {
-                        info!("SSL requested");
+                        debug!("SSL requested");

                        self.write_message(&BeMessage::EncryptionResponse(have_tls))?;
                        if have_tls {
@@ -349,7 +349,7 @@ impl PostgresBackend {
                        }
                    }
                    FeStartupPacket::GssEncRequest => {
-                        info!("GSS requested");
+                        debug!("GSS requested");
                        self.write_message(&BeMessage::EncryptionResponse(false))?;
                    }
                    FeStartupPacket::StartupMessage { .. } => {
@@ -433,12 +433,7 @@ impl PostgresBackend {
                    // full cause of the error, not just the top-level context + its trace.
                    // We don't want to send that in the ErrorResponse though,
                    // because it's not relevant to the compute node logs.
-                    if query_string.starts_with("callmemaybe") {
-                        // FIXME avoid printing a backtrace for tenant x not found errors until this is properly fixed
-                        error!("query handler for '{}' failed: {}", query_string, e);
-                    } else {
-                        error!("query handler for '{}' failed: {:?}", query_string, e);
-                    }
+                    error!("query handler for '{}' failed: {:?}", query_string, e);
                    self.write_message_noflush(&BeMessage::ErrorResponse(&e.to_string()))?;
                    // TODO: untangle convoluted control flow
                    if e.to_string().contains("failed to run") {
@@ -475,7 +470,7 @@ impl PostgresBackend {
                    self.write_message(&BeMessage::ErrorResponse(&e.to_string()))?;
                }
                // NOTE there is no ReadyForQuery message. This handler is used
-                // for basebackup and it uses CopyOut which doesnt require
+                // for basebackup and it uses CopyOut which doesn't require
                // ReadyForQuery message and backend just switches back to
                // processing mode after sending CopyDone or ErrorResponse.
            }
--- a/libs/utils/src/pq_proto.rs
+++ b/libs/utils/src/pq_proto.rs
@@ -468,7 +468,7 @@ impl BeParameterStatusMessage<'static> {
    }
 }

-// One row desciption in RowDescription packet.
+// One row description in RowDescription packet.
 #[derive(Debug)]
 pub struct RowDescriptor<'a> {
    pub name: &'a [u8],
@@ -617,7 +617,7 @@ fn cstr_to_str(b: &Bytes) -> Result<&str> {
 impl<'a> BeMessage<'a> {
    /// Write message to the given buf.
    // Unlike the reading side, we use BytesMut
-    // here as msg len preceeds its body and it is handy to write it down first
+    // here as msg len precedes its body and it is handy to write it down first
    // and then fill the length. With Write we would have to either calc it
    // manually or have one more buffer.
    pub fn write(buf: &mut BytesMut, message: &BeMessage) -> io::Result<()> {
@@ -1051,7 +1051,7 @@ mod tests {
    #[test]
    fn test_zenithfeedback_serialization() {
        let mut zf = ZenithFeedback::empty();
-        // Fill zf wih some values
+        // Fill zf with some values
        zf.current_timeline_size = 12345678;
        // Set rounded time to be able to compare it with deserialized value,
        // because it is rounded up to microseconds during serialization.
@@ -1066,7 +1066,7 @@ mod tests {
    #[test]
    fn test_zenithfeedback_unknown_key() {
        let mut zf = ZenithFeedback::empty();
-        // Fill zf wih some values
+        // Fill zf with some values
        zf.current_timeline_size = 12345678;
        // Set rounded time to be able to compare it with deserialized value,
        // because it is rounded up to microseconds during serialization.
--- a/libs/utils/src/zid.rs
+++ b/libs/utils/src/zid.rs
@@ -193,7 +193,7 @@ pub struct ZTenantId(ZId);
 zid_newtype!(ZTenantId);

 // A pair uniquely identifying Zenith instance.
-#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash, Serialize, Deserialize)]
 pub struct ZTenantTimelineId {
    pub tenant_id: ZTenantId,
    pub timeline_id: ZTimelineId,
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -5,7 +5,7 @@ edition = "2021"

 [features]
 # It is simpler infra-wise to have failpoints enabled by default
-# It shouldn't affect perf in any way because failpoints
+# It shouldn't affect performance in any way because failpoints
 # are not placed in hot code paths
 default = ["failpoints"]
 profiling = ["pprof"]
@@ -60,6 +60,7 @@ metrics = { path = "../libs/metrics" }
 utils = { path = "../libs/utils" }
 remote_storage = { path = "../libs/remote_storage" }
 workspace_hack = { version = "0.1", path = "../workspace_hack" }
+close_fds = "0.3.2"

 [dev-dependencies]
 hex-literal = "0.3"
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -34,7 +34,7 @@ pub mod defaults {
    pub const DEFAULT_WAIT_LSN_TIMEOUT: &str = "60 s";
    pub const DEFAULT_WAL_REDO_TIMEOUT: &str = "60 s";

-    pub const DEFAULT_SUPERUSER: &str = "zenith_admin";
+    pub const DEFAULT_SUPERUSER: &str = "cloud_admin";

    pub const DEFAULT_PAGE_CACHE_SIZE: usize = 8192;
    pub const DEFAULT_MAX_FILE_DESCRIPTORS: usize = 100;
@@ -114,7 +114,7 @@ pub struct PageServerConf {
    pub default_tenant_conf: TenantConf,

    /// A prefix to add in etcd brokers before every key.
-    /// Can be used for isolating different pageserver groups withing the same etcd cluster.
+    /// Can be used for isolating different pageserver groups within the same etcd cluster.
    pub broker_etcd_prefix: String,

    /// Etcd broker endpoints to connect to.
@@ -480,6 +480,21 @@ impl PageServerConf {
        if let Some(pitr_interval) = item.get("pitr_interval") {
            t_conf.pitr_interval = Some(parse_toml_duration("pitr_interval", pitr_interval)?);
        }
+        if let Some(walreceiver_connect_timeout) = item.get("walreceiver_connect_timeout") {
+            t_conf.walreceiver_connect_timeout = Some(parse_toml_duration(
+                "walreceiver_connect_timeout",
+                walreceiver_connect_timeout,
+            )?);
+        }
+        if let Some(lagging_wal_timeout) = item.get("lagging_wal_timeout") {
+            t_conf.lagging_wal_timeout = Some(parse_toml_duration(
+                "lagging_wal_timeout",
+                lagging_wal_timeout,
+            )?);
+        }
+        if let Some(max_lsn_wal_lag) = item.get("max_lsn_wal_lag") {
+            t_conf.max_lsn_wal_lag = Some(parse_toml_from_str("max_lsn_wal_lag", max_lsn_wal_lag)?);
+        }

        Ok(t_conf)
    }
@@ -499,7 +514,7 @@ impl PageServerConf {
            max_file_descriptors: defaults::DEFAULT_MAX_FILE_DESCRIPTORS,
            listen_pg_addr: defaults::DEFAULT_PG_LISTEN_ADDR.to_string(),
            listen_http_addr: defaults::DEFAULT_HTTP_LISTEN_ADDR.to_string(),
-            superuser: "zenith_admin".to_string(),
+            superuser: "cloud_admin".to_string(),
            workdir: repo_dir,
            pg_distrib_dir: PathBuf::new(),
            auth_type: AuthType::Trust,
--- a/pageserver/src/http/models.rs
+++ b/pageserver/src/http/models.rs
@@ -1,3 +1,5 @@
+use std::num::NonZeroU64;
+
 use serde::{Deserialize, Serialize};
 use serde_with::{serde_as, DisplayFromStr};
 use utils::{
@@ -33,6 +35,9 @@ pub struct TenantCreateRequest {
    pub gc_period: Option<String>,
    pub image_creation_threshold: Option<usize>,
    pub pitr_interval: Option<String>,
+    pub walreceiver_connect_timeout: Option<String>,
+    pub lagging_wal_timeout: Option<String>,
+    pub max_lsn_wal_lag: Option<NonZeroU64>,
 }

 #[serde_as]
@@ -68,6 +73,9 @@ pub struct TenantConfigRequest {
    pub gc_period: Option<String>,
    pub image_creation_threshold: Option<usize>,
    pub pitr_interval: Option<String>,
+    pub walreceiver_connect_timeout: Option<String>,
+    pub lagging_wal_timeout: Option<String>,
+    pub max_lsn_wal_lag: Option<NonZeroU64>,
 }

 impl TenantConfigRequest {
@@ -82,6 +90,21 @@ impl TenantConfigRequest {
            gc_period: None,
            image_creation_threshold: None,
            pitr_interval: None,
+            walreceiver_connect_timeout: None,
+            lagging_wal_timeout: None,
+            max_lsn_wal_lag: None,
        }
    }
 }
+
+/// A WAL receiver's data stored inside the global `WAL_RECEIVERS`.
+/// We keep one WAL receiver active per timeline.
+#[serde_as]
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct WalReceiverEntry {
+    pub wal_producer_connstr: Option<String>,
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    pub last_received_msg_lsn: Option<Lsn>,
+    /// the timestamp (in microseconds) of the last received message
+    pub last_received_msg_ts: Option<u128>,
+}
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -229,23 +229,16 @@ async fn wal_receiver_get_handler(request: Request<Body>) -> Result<Response<Bod
    check_permission(&request, Some(tenant_id))?;

    let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;
+    let wal_receiver_entry = crate::walreceiver::get_wal_receiver_entry(tenant_id, timeline_id)
+        .instrument(info_span!("wal_receiver_get", tenant = %tenant_id, timeline = %timeline_id))
+        .await
+        .ok_or_else(|| {
+            ApiError::NotFound(format!(
+                "WAL receiver data not found for tenant {tenant_id} and timeline {timeline_id}"
+            ))
+        })?;

-    let wal_receiver = tokio::task::spawn_blocking(move || {
-        let _enter =
-            info_span!("wal_receiver_get", tenant = %tenant_id, timeline = %timeline_id).entered();
-
-        crate::walreceiver::get_wal_receiver_entry(tenant_id, timeline_id)
-    })
-    .await
-    .map_err(ApiError::from_err)?
-    .ok_or_else(|| {
-        ApiError::NotFound(format!(
-            "WAL receiver not found for tenant {} and timeline {}",
-            tenant_id, timeline_id
-        ))
-    })?;
-
-    json_response(StatusCode::OK, wal_receiver)
+    json_response(StatusCode::OK, &wal_receiver_entry)
 }

 async fn timeline_attach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
@@ -402,6 +395,19 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
            Some(humantime::parse_duration(&pitr_interval).map_err(ApiError::from_err)?);
    }

+    if let Some(walreceiver_connect_timeout) = request_data.walreceiver_connect_timeout {
+        tenant_conf.walreceiver_connect_timeout = Some(
+            humantime::parse_duration(&walreceiver_connect_timeout).map_err(ApiError::from_err)?,
+        );
+    }
+    if let Some(lagging_wal_timeout) = request_data.lagging_wal_timeout {
+        tenant_conf.lagging_wal_timeout =
+            Some(humantime::parse_duration(&lagging_wal_timeout).map_err(ApiError::from_err)?);
+    }
+    if let Some(max_lsn_wal_lag) = request_data.max_lsn_wal_lag {
+        tenant_conf.max_lsn_wal_lag = Some(max_lsn_wal_lag);
+    }
+
    tenant_conf.checkpoint_distance = request_data.checkpoint_distance;
    tenant_conf.compaction_target_size = request_data.compaction_target_size;
    tenant_conf.compaction_threshold = request_data.compaction_threshold;
@@ -449,6 +455,18 @@ async fn tenant_config_handler(mut request: Request<Body>) -> Result<Response<Bo
        tenant_conf.pitr_interval =
            Some(humantime::parse_duration(&pitr_interval).map_err(ApiError::from_err)?);
    }
+    if let Some(walreceiver_connect_timeout) = request_data.walreceiver_connect_timeout {
+        tenant_conf.walreceiver_connect_timeout = Some(
+            humantime::parse_duration(&walreceiver_connect_timeout).map_err(ApiError::from_err)?,
+        );
+    }
+    if let Some(lagging_wal_timeout) = request_data.lagging_wal_timeout {
+        tenant_conf.lagging_wal_timeout =
+            Some(humantime::parse_duration(&lagging_wal_timeout).map_err(ApiError::from_err)?);
+    }
+    if let Some(max_lsn_wal_lag) = request_data.max_lsn_wal_lag {
+        tenant_conf.max_lsn_wal_lag = Some(max_lsn_wal_lag);
+    }

    tenant_conf.checkpoint_distance = request_data.checkpoint_distance;
    tenant_conf.compaction_target_size = request_data.compaction_target_size;
--- a/pageserver/src/keyspace.rs
+++ b/pageserver/src/keyspace.rs
@@ -15,7 +15,7 @@ pub struct KeySpace {
 impl KeySpace {
    ///
    /// Partition a key space into roughly chunks of roughly 'target_size' bytes
-    /// in each patition.
+    /// in each partition.
    ///
    pub fn partition(&self, target_size: u64) -> KeyPartitioning {
        // Assume that each value is 8k in size.
--- a/pageserver/src/layered_repository.rs
+++ b/pageserver/src/layered_repository.rs
@@ -25,6 +25,7 @@ use std::collections::{BTreeSet, HashSet};
 use std::fs;
 use std::fs::{File, OpenOptions};
 use std::io::Write;
+use std::num::NonZeroU64;
 use std::ops::{Bound::Included, Deref, Range};
 use std::path::{Path, PathBuf};
 use std::sync::atomic::{self, AtomicBool};
@@ -557,6 +558,27 @@ impl LayeredRepository {
            .unwrap_or(self.conf.default_tenant_conf.pitr_interval)
    }

+    pub fn get_wal_receiver_connect_timeout(&self) -> Duration {
+        let tenant_conf = self.tenant_conf.read().unwrap();
+        tenant_conf
+            .walreceiver_connect_timeout
+            .unwrap_or(self.conf.default_tenant_conf.walreceiver_connect_timeout)
+    }
+
+    pub fn get_lagging_wal_timeout(&self) -> Duration {
+        let tenant_conf = self.tenant_conf.read().unwrap();
+        tenant_conf
+            .lagging_wal_timeout
+            .unwrap_or(self.conf.default_tenant_conf.lagging_wal_timeout)
+    }
+
+    pub fn get_max_lsn_wal_lag(&self) -> NonZeroU64 {
+        let tenant_conf = self.tenant_conf.read().unwrap();
+        tenant_conf
+            .max_lsn_wal_lag
+            .unwrap_or(self.conf.default_tenant_conf.max_lsn_wal_lag)
+    }
+
    pub fn update_tenant_config(&self, new_tenant_conf: TenantConfOpt) -> Result<()> {
        let mut tenant_conf = self.tenant_conf.write().unwrap();

@@ -823,7 +845,7 @@ impl LayeredRepository {
        for (timeline_id, timeline_entry) in timelines.iter() {
            timeline_ids.push(*timeline_id);

-            // This is unresolved question for now, how to do gc in presense of remote timelines
+            // This is unresolved question for now, how to do gc in presence of remote timelines
            // especially when this is combined with branching.
            // Somewhat related: https://github.com/zenithdb/zenith/issues/999
            if let Some(ancestor_timeline_id) = &timeline_entry.ancestor_timeline_id() {
@@ -1831,7 +1853,7 @@ impl LayeredTimeline {
        // collect any page versions that are no longer needed because
        // of the new image layers we created in step 2.
        //
-        // TODO: This hight level strategy hasn't been implemented yet.
+        // TODO: This high level strategy hasn't been implemented yet.
        // Below are functions compact_level0() and create_image_layers()
        // but they are a bit ad hoc and don't quite work like it's explained
        // above. Rewrite it.
@@ -2268,7 +2290,7 @@ impl LayeredTimeline {
            }

            // 3. Is it needed by a child branch?
-            // NOTE With that wee would keep data that
+            // NOTE With that we would keep data that
            // might be referenced by child branches forever.
            // We can track this in child timeline GC and delete parent layers when
            // they are no longer needed. This might be complicated with long inheritance chains.
--- a/pageserver/src/layered_repository/README.md
+++ b/pageserver/src/layered_repository/README.md
@@ -260,7 +260,7 @@ Whenever a GetPage@LSN request comes in from the compute node, the
 page server needs to reconstruct the requested page, as it was at the
 requested LSN. To do that, the page server first checks the recent
 in-memory layer; if the requested page version is found there, it can
-be returned immediatedly without looking at the files on
+be returned immediately without looking at the files on
 disk. Otherwise the page server needs to locate the layer file that
 contains the requested page version.

--- a/pageserver/src/layered_repository/disk_btree.rs
+++ b/pageserver/src/layered_repository/disk_btree.rs
@@ -7,7 +7,7 @@
 //! - Fixed-width keys
 //! - Fixed-width values (VALUE_SZ)
 //! - The tree is created in a bulk operation. Insert/deletion after creation
-//!   is not suppported
+//!   is not supported
 //! - page-oriented
 //!
 //! TODO:
@@ -498,8 +498,8 @@ where
            return Ok(());
        }

-        // It did not fit. Try to compress, and it it succeeds to make some room
-        // on the node, try appending to it again.
+        // It did not fit. Try to compress, and if it succeeds to make
+        // some room on the node, try appending to it again.
        #[allow(clippy::collapsible_if)]
        if last.compress() {
            if last.push(key, value) {
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -7,7 +7,6 @@
 //     *status* -- show actual info about this pageserver,
 //     *pagestream* -- enter mode where smgr and pageserver talk with their
 //  custom protocol.
-//     *callmemaybe <zenith timelineid> $url* -- ask pageserver to start walreceiver on $url
 //

 use anyhow::{bail, ensure, Context, Result};
@@ -38,7 +37,6 @@ use crate::repository::Timeline;
 use crate::tenant_mgr;
 use crate::thread_mgr;
 use crate::thread_mgr::ThreadKind;
-use crate::walreceiver;
 use crate::CheckpointConfig;
 use metrics::{register_histogram_vec, HistogramVec};
 use postgres_ffi::xlog_utils::to_pg_timestamp;
@@ -634,7 +632,7 @@ impl PageServerHandler {
            return Ok(());
        }
        // auth is some, just checked above, when auth is some
-        // then claims are always present because of checks during connetion init
+        // then claims are always present because of checks during connection init
        // so this expect won't trigger
        let claims = self
            .claims
@@ -716,30 +714,6 @@ impl postgres_backend::Handler for PageServerHandler {

            // Check that the timeline exists
            self.handle_basebackup_request(pgb, timelineid, lsn, tenantid)?;
-            pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
-        } else if query_string.starts_with("callmemaybe ") {
-            // callmemaybe <zenith tenantid as hex string> <zenith timelineid as hex string> <connstr>
-            // TODO lazy static
-            let re = Regex::new(r"^callmemaybe ([[:xdigit:]]+) ([[:xdigit:]]+) (.*)$").unwrap();
-            let caps = re
-                .captures(query_string)
-                .with_context(|| format!("invalid callmemaybe: '{}'", query_string))?;
-
-            let tenantid = ZTenantId::from_str(caps.get(1).unwrap().as_str())?;
-            let timelineid = ZTimelineId::from_str(caps.get(2).unwrap().as_str())?;
-            let connstr = caps.get(3).unwrap().as_str().to_owned();
-
-            self.check_permission(Some(tenantid))?;
-
-            let _enter =
-                info_span!("callmemaybe", timeline = %timelineid, tenant = %tenantid).entered();
-
-            // Check that the timeline exists
-            tenant_mgr::get_local_timeline_with_load(tenantid, timelineid)
-                .context("Cannot load local timeline")?;
-
-            walreceiver::launch_wal_receiver(self.conf, tenantid, timelineid, &connstr)?;
-
            pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
        } else if query_string.to_ascii_lowercase().starts_with("set ") {
            // important because psycopg2 executes "SET datestyle TO 'ISO'"
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -521,7 +521,7 @@ pub struct DatadirModification<'a, R: Repository> {

    lsn: Lsn,

-    // The modifications are not applied directly to the underyling key-value store.
+    // The modifications are not applied directly to the underlying key-value store.
    // The put-functions add the modifications here, and they are flushed to the
    // underlying key-value store by the 'finish' function.
    pending_updates: HashMap<Key, Value>,
--- a/pageserver/src/remote_storage/storage_sync/delete.rs
+++ b/pageserver/src/remote_storage/storage_sync/delete.rs
@@ -1,4 +1,4 @@
-//! Timeline synchrnonization logic to delete a bulk of timeline's remote files from the remote storage.
+//! Timeline synchronization logic to delete a bulk of timeline's remote files from the remote storage.

 use anyhow::Context;
 use futures::stream::{FuturesUnordered, StreamExt};
--- a/pageserver/src/repository.rs
+++ b/pageserver/src/repository.rs
@@ -19,7 +19,7 @@ use utils::{
 #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize)]
 /// Key used in the Repository kv-store.
 ///
-/// The Repository treates this as an opaque struct, but see the code in pgdatadir_mapping.rs
+/// The Repository treats this as an opaque struct, but see the code in pgdatadir_mapping.rs
 /// for what we actually store in these fields.
 pub struct Key {
    pub field1: u8,
@@ -210,7 +210,7 @@ pub trait Repository: Send + Sync {
    ) -> Result<()>;

    /// Get Timeline handle for given zenith timeline ID.
-    /// This function is idempotent. It doesnt change internal state in any way.
+    /// This function is idempotent. It doesn't change internal state in any way.
    fn get_timeline(&self, timelineid: ZTimelineId) -> Option<RepositoryTimeline<Self::Timeline>>;

    /// Get Timeline handle for locally available timeline. Load it into memory if it is not loaded.
@@ -345,11 +345,11 @@ pub trait Timeline: Send + Sync {

    /// Look up given page version.
    ///
-    /// NOTE: It is considerd an error to 'get' a key that doesn't exist. The abstraction
+    /// NOTE: It is considered an error to 'get' a key that doesn't exist. The abstraction
    /// above this needs to store suitable metadata to track what data exists with
    /// what keys, in separate metadata entries. If a non-existent key is requested,
-    /// the Repository implementation may incorrectly return a value from an ancestore
-    /// branch, for exampel, or waste a lot of cycles chasing the non-existing key.
+    /// the Repository implementation may incorrectly return a value from an ancestor
+    /// branch, for example, or waste a lot of cycles chasing the non-existing key.
    ///
    fn get(&self, key: Key, lsn: Lsn) -> Result<Bytes>;

@@ -469,6 +469,9 @@ pub mod repo_harness {
                gc_period: Some(tenant_conf.gc_period),
                image_creation_threshold: Some(tenant_conf.image_creation_threshold),
                pitr_interval: Some(tenant_conf.pitr_interval),
+                walreceiver_connect_timeout: Some(tenant_conf.walreceiver_connect_timeout),
+                lagging_wal_timeout: Some(tenant_conf.lagging_wal_timeout),
+                max_lsn_wal_lag: Some(tenant_conf.max_lsn_wal_lag),
            }
        }
    }
--- a/pageserver/src/storage_sync.rs
+++ b/pageserver/src/storage_sync.rs
@@ -69,7 +69,7 @@
 //! Yet instead of keeping the `metadata` file remotely, we wrap it with more data in [`IndexPart`], containing the list of remote files.
 //! This file gets read to populate the cache, if the remote timeline data is missing from it and gets updated after every successful download.
 //! This way, we optimize S3 storage access by not running the `S3 list` command that could be expencive and slow: knowing both [`ZTenantId`] and [`ZTimelineId`],
-//! we can always reconstruct the path to the timeline, use this to get the same path on the remote storage and retrive its shard contents, if needed, same as any layer files.
+//! we can always reconstruct the path to the timeline, use this to get the same path on the remote storage and retrieve its shard contents, if needed, same as any layer files.
 //!
 //! By default, pageserver reads the remote storage index data only for timelines located locally, to synchronize those, if needed.
 //! Bulk index data download happens only initially, on pageserver startup. The rest of the remote storage stays unknown to pageserver and loaded on demand only,
@@ -96,7 +96,7 @@
 //! timeline uploads and downloads can happen concurrently, in no particular order due to incremental nature of the timeline layers.
 //! Deletion happens only after a successful upload only, otherwise the compaction output might make the timeline inconsistent until both tasks are fully processed without errors.
 //! Upload and download update the remote data (inmemory index and S3 json index part file) only after every layer is successfully synchronized, while the deletion task
-//! does otherwise: it requires to have the remote data updated first succesfully: blob files will be invisible to pageserver this way.
+//! does otherwise: it requires to have the remote data updated first successfully: blob files will be invisible to pageserver this way.
 //!
 //! During the loop startup, an initial [`RemoteTimelineIndex`] state is constructed via downloading and merging the index data for all timelines,
 //! present locally.
@@ -440,7 +440,7 @@ fn collect_timeline_files(
    //   initial collect will fail because there is no metadata.
    //   We either need to start download if we see empty dir after restart or attach caller should
    //   be aware of that and retry attach if awaits_download for timeline switched from true to false
-    //   but timelinne didnt appear locally.
+    //   but timelinne didn't appear locally.
    //   Check what happens with remote index in that case.
    let timeline_metadata_path = match timeline_metadata_path {
        Some(path) => path,
@@ -1007,7 +1007,7 @@ where
    // in local (implicitly, via Lsn values and related memory state) or remote (explicitly via remote layer file paths) metadata.
    // When operating in a system without tasks failing over the error threshold,
    // current batching and task processing systems aim to update the layer set and metadata files (remote and local),
-    // without "loosing" such layer files.
+    // without "losing" such layer files.
    let (upload_result, status_update) = tokio::join!(
        async {
            if let Some(upload_data) = upload_data {
@@ -1162,7 +1162,7 @@ where
                        return Some(TimelineSyncStatusUpdate::Downloaded);
                    }
                    Err(e) => {
-                        error!("Timeline {sync_id} was expected to be in the remote index after a sucessful download, but it's absent: {e:?}");
+                        error!("Timeline {sync_id} was expected to be in the remote index after a successful download, but it's absent: {e:?}");
                    }
                },
                Err(e) => {
@@ -1549,10 +1549,10 @@ fn compare_local_and_remote_timeline(
    let remote_files = remote_entry.stored_files();

    // TODO probably here we need more sophisticated logic,
-    //   if more data is available remotely can we just download whats there?
+    //   if more data is available remotely can we just download what's there?
    //   without trying to upload something. It may be tricky, needs further investigation.
    //   For now looks strange that we can request upload
-    //   and dowload for the same timeline simultaneously.
+    //   and download for the same timeline simultaneously.
    //   (upload needs to be only for previously unsynced files, not whole timeline dir).
    //   If one of the tasks fails they will be reordered in the queue which can lead
    //   to timeline being stuck in evicted state
@@ -1565,7 +1565,7 @@ fn compare_local_and_remote_timeline(
            }),
        ));
        (LocalTimelineInitStatus::NeedsSync, true)
-        // we do not need to manupulate with remote consistent lsn here
+        // we do not need to manipulate with remote consistent lsn here
        // because it will be updated when sync will be completed
    } else {
        (LocalTimelineInitStatus::LocallyComplete, false)
--- a/pageserver/src/storage_sync/delete.rs
+++ b/pageserver/src/storage_sync/delete.rs
@@ -1,4 +1,4 @@
-//! Timeline synchrnonization logic to delete a bulk of timeline's remote files from the remote storage.
+//! Timeline synchronization logic to delete a bulk of timeline's remote files from the remote storage.

 use anyhow::Context;
 use futures::stream::{FuturesUnordered, StreamExt};
--- a/pageserver/src/storage_sync/download.rs
+++ b/pageserver/src/storage_sync/download.rs
@@ -1,4 +1,4 @@
-//! Timeline synchrnonization logic to fetch the layer files from remote storage into pageserver's local directory.
+//! Timeline synchronization logic to fetch the layer files from remote storage into pageserver's local directory.

 use std::{collections::HashSet, fmt::Debug, path::Path};

--- a/pageserver/src/storage_sync/index.rs
+++ b/pageserver/src/storage_sync/index.rs
@@ -273,7 +273,7 @@ mod tests {
        };

        let index_part = IndexPart::from_remote_timeline(&timeline_path, remote_timeline.clone())
-            .expect("Correct remote timeline should be convertable to index part");
+            .expect("Correct remote timeline should be convertible to index part");

        assert_eq!(
            index_part.timeline_layers.iter().collect::<BTreeSet<_>>(),
@@ -305,7 +305,7 @@ mod tests {
        );

        let restored_timeline = RemoteTimeline::from_index_part(&timeline_path, index_part)
-            .expect("Correct index part should be convertable to remote timeline");
+            .expect("Correct index part should be convertible to remote timeline");

        let original_metadata = &remote_timeline.metadata;
        let restored_metadata = &restored_timeline.metadata;
--- a/pageserver/src/storage_sync/upload.rs
+++ b/pageserver/src/storage_sync/upload.rs
@@ -391,7 +391,7 @@ mod tests {
        assert_eq!(
            upload.metadata,
            Some(metadata),
-            "Successful upload should not chage its metadata"
+            "Successful upload should not change its metadata"
        );

        let storage_files = storage.list().await?;
--- a/pageserver/src/tenant_config.rs
+++ b/pageserver/src/tenant_config.rs
@@ -10,6 +10,7 @@
 //!
 use crate::config::PageServerConf;
 use serde::{Deserialize, Serialize};
+use std::num::NonZeroU64;
 use std::path::PathBuf;
 use std::time::Duration;
 use utils::zid::ZTenantId;
@@ -34,6 +35,9 @@ pub mod defaults {
    pub const DEFAULT_GC_PERIOD: &str = "100 s";
    pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
    pub const DEFAULT_PITR_INTERVAL: &str = "30 days";
+    pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "2 seconds";
+    pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
+    pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 1_000_000;
 }

 /// Per-tenant configuration options
@@ -68,6 +72,17 @@ pub struct TenantConf {
    // Page versions older than this are garbage collected away.
    #[serde(with = "humantime_serde")]
    pub pitr_interval: Duration,
+    /// Maximum amount of time to wait while opening a connection to receive wal, before erroring.
+    #[serde(with = "humantime_serde")]
+    pub walreceiver_connect_timeout: Duration,
+    /// Considers safekeepers stalled after no WAL updates were received longer than this threshold.
+    /// A stalled safekeeper will be changed to a newer one when it appears.
+    #[serde(with = "humantime_serde")]
+    pub lagging_wal_timeout: Duration,
+    /// Considers safekeepers lagging when their WAL is behind another safekeeper for more than this threshold.
+    /// A lagging safekeeper will be changed after `lagging_wal_timeout` time elapses since the last WAL update,
+    /// to avoid eager reconnects.
+    pub max_lsn_wal_lag: NonZeroU64,
 }

 /// Same as TenantConf, but this struct preserves the information about
@@ -85,6 +100,11 @@ pub struct TenantConfOpt {
    pub image_creation_threshold: Option<usize>,
    #[serde(with = "humantime_serde")]
    pub pitr_interval: Option<Duration>,
+    #[serde(with = "humantime_serde")]
+    pub walreceiver_connect_timeout: Option<Duration>,
+    #[serde(with = "humantime_serde")]
+    pub lagging_wal_timeout: Option<Duration>,
+    pub max_lsn_wal_lag: Option<NonZeroU64>,
 }

 impl TenantConfOpt {
@@ -108,6 +128,13 @@ impl TenantConfOpt {
                .image_creation_threshold
                .unwrap_or(global_conf.image_creation_threshold),
            pitr_interval: self.pitr_interval.unwrap_or(global_conf.pitr_interval),
+            walreceiver_connect_timeout: self
+                .walreceiver_connect_timeout
+                .unwrap_or(global_conf.walreceiver_connect_timeout),
+            lagging_wal_timeout: self
+                .lagging_wal_timeout
+                .unwrap_or(global_conf.lagging_wal_timeout),
+            max_lsn_wal_lag: self.max_lsn_wal_lag.unwrap_or(global_conf.max_lsn_wal_lag),
        }
    }

@@ -136,6 +163,15 @@ impl TenantConfOpt {
        if let Some(pitr_interval) = other.pitr_interval {
            self.pitr_interval = Some(pitr_interval);
        }
+        if let Some(walreceiver_connect_timeout) = other.walreceiver_connect_timeout {
+            self.walreceiver_connect_timeout = Some(walreceiver_connect_timeout);
+        }
+        if let Some(lagging_wal_timeout) = other.lagging_wal_timeout {
+            self.lagging_wal_timeout = Some(lagging_wal_timeout);
+        }
+        if let Some(max_lsn_wal_lag) = other.max_lsn_wal_lag {
+            self.max_lsn_wal_lag = Some(max_lsn_wal_lag);
+        }
    }
 }

@@ -155,6 +191,14 @@ impl TenantConf {
            image_creation_threshold: DEFAULT_IMAGE_CREATION_THRESHOLD,
            pitr_interval: humantime::parse_duration(DEFAULT_PITR_INTERVAL)
                .expect("cannot parse default PITR interval"),
+            walreceiver_connect_timeout: humantime::parse_duration(
+                DEFAULT_WALRECEIVER_CONNECT_TIMEOUT,
+            )
+            .expect("cannot parse default walreceiver connect timeout"),
+            lagging_wal_timeout: humantime::parse_duration(DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT)
+                .expect("cannot parse default walreceiver lagging wal timeout"),
+            max_lsn_wal_lag: NonZeroU64::new(DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG)
+                .expect("cannot parse default max walreceiver Lsn wal lag"),
        }
    }

@@ -175,6 +219,16 @@ impl TenantConf {
            gc_period: Duration::from_secs(10),
            image_creation_threshold: defaults::DEFAULT_IMAGE_CREATION_THRESHOLD,
            pitr_interval: Duration::from_secs(60 * 60),
+            walreceiver_connect_timeout: humantime::parse_duration(
+                defaults::DEFAULT_WALRECEIVER_CONNECT_TIMEOUT,
+            )
+            .unwrap(),
+            lagging_wal_timeout: humantime::parse_duration(
+                defaults::DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT,
+            )
+            .unwrap(),
+            max_lsn_wal_lag: NonZeroU64::new(defaults::DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG)
+                .unwrap(),
        }
    }
 }
--- a/pageserver/src/tenant_mgr.rs
+++ b/pageserver/src/tenant_mgr.rs
@@ -8,11 +8,10 @@ use crate::repository::{Repository, TimelineSyncStatusUpdate};
 use crate::storage_sync::index::RemoteIndex;
 use crate::storage_sync::{self, LocalTimelineInitStatus, SyncStartupData};
 use crate::tenant_config::TenantConfOpt;
-use crate::thread_mgr;
 use crate::thread_mgr::ThreadKind;
-use crate::timelines;
 use crate::timelines::CreateRepo;
 use crate::walredo::PostgresRedoManager;
+use crate::{thread_mgr, timelines, walreceiver};
 use crate::{DatadirTimelineImpl, RepositoryImpl};
 use anyhow::{bail, Context};
 use serde::{Deserialize, Serialize};
@@ -21,23 +20,30 @@ use std::collections::hash_map::Entry;
 use std::collections::HashMap;
 use std::fmt;
 use std::sync::Arc;
+use tokio::sync::mpsc;
 use tracing::*;
 use utils::lsn::Lsn;

-use utils::zid::{ZTenantId, ZTimelineId};
+use utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId};

 mod tenants_state {
+    use anyhow::ensure;
    use std::{
        collections::HashMap,
        sync::{RwLock, RwLockReadGuard, RwLockWriteGuard},
    };
+    use tokio::sync::mpsc;
+    use tracing::{debug, error};

    use utils::zid::ZTenantId;

-    use crate::tenant_mgr::Tenant;
+    use crate::tenant_mgr::{LocalTimelineUpdate, Tenant};

    lazy_static::lazy_static! {
        static ref TENANTS: RwLock<HashMap<ZTenantId, Tenant>> = RwLock::new(HashMap::new());
+        /// Sends updates to the local timelines (creation and deletion) to the WAL receiver,
+        /// so that it can enable/disable corresponding processes.
+        static ref TIMELINE_UPDATE_SENDER: RwLock<Option<mpsc::UnboundedSender<LocalTimelineUpdate>>> = RwLock::new(None);
    }

    pub(super) fn read_tenants() -> RwLockReadGuard<'static, HashMap<ZTenantId, Tenant>> {
@@ -51,6 +57,39 @@ mod tenants_state {
            .write()
            .expect("Failed to write() tenants lock, it got poisoned")
    }
+
+    pub(super) fn set_timeline_update_sender(
+        timeline_updates_sender: mpsc::UnboundedSender<LocalTimelineUpdate>,
+    ) -> anyhow::Result<()> {
+        let mut sender_guard = TIMELINE_UPDATE_SENDER
+            .write()
+            .expect("Failed to write() timeline_update_sender lock, it got poisoned");
+        ensure!(sender_guard.is_none(), "Timeline update sender already set");
+        *sender_guard = Some(timeline_updates_sender);
+        Ok(())
+    }
+
+    pub(super) fn try_send_timeline_update(update: LocalTimelineUpdate) {
+        match TIMELINE_UPDATE_SENDER
+            .read()
+            .expect("Failed to read() timeline_update_sender lock, it got poisoned")
+            .as_ref()
+        {
+            Some(sender) => {
+                if let Err(e) = sender.send(update) {
+                    error!("Failed to send timeline update: {}", e);
+                }
+            }
+            None => debug!("Timeline update sender is not enabled, cannot send update {update:?}"),
+        }
+    }
+
+    pub(super) fn stop_timeline_update_sender() {
+        TIMELINE_UPDATE_SENDER
+            .write()
+            .expect("Failed to write() timeline_update_sender lock, it got poisoned")
+            .take();
+    }
 }

 struct Tenant {
@@ -87,10 +126,10 @@ pub enum TenantState {
 impl fmt::Display for TenantState {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
-            TenantState::Active => f.write_str("Active"),
-            TenantState::Idle => f.write_str("Idle"),
-            TenantState::Stopping => f.write_str("Stopping"),
-            TenantState::Broken => f.write_str("Broken"),
+            Self::Active => f.write_str("Active"),
+            Self::Idle => f.write_str("Idle"),
+            Self::Stopping => f.write_str("Stopping"),
+            Self::Broken => f.write_str("Broken"),
        }
    }
 }
@@ -99,6 +138,11 @@ impl fmt::Display for TenantState {
 /// Timelines that are only partially available locally (remote storage has more data than this pageserver)
 /// are scheduled for download and added to the repository once download is completed.
 pub fn init_tenant_mgr(conf: &'static PageServerConf) -> anyhow::Result<RemoteIndex> {
+    let (timeline_updates_sender, timeline_updates_receiver) =
+        mpsc::unbounded_channel::<LocalTimelineUpdate>();
+    tenants_state::set_timeline_update_sender(timeline_updates_sender)?;
+    walreceiver::init_wal_receiver_main_thread(conf, timeline_updates_receiver)?;
+
    let SyncStartupData {
        remote_index,
        local_timeline_init_statuses,
@@ -113,16 +157,27 @@ pub fn init_tenant_mgr(conf: &'static PageServerConf) -> anyhow::Result<RemoteIn
            // loading a tenant is serious, but it's better to complete the startup and
            // serve other tenants, than fail completely.
            error!("Failed to initialize local tenant {tenant_id}: {:?}", err);
-            let mut m = tenants_state::write_tenants();
-            if let Some(tenant) = m.get_mut(&tenant_id) {
-                tenant.state = TenantState::Broken;
-            }
+            set_tenant_state(tenant_id, TenantState::Broken)?;
        }
    }

    Ok(remote_index)
 }

+pub enum LocalTimelineUpdate {
+    Detach(ZTenantTimelineId),
+    Attach(ZTenantTimelineId, Arc<DatadirTimelineImpl>),
+}
+
+impl std::fmt::Debug for LocalTimelineUpdate {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Self::Detach(ttid) => f.debug_tuple("Remove").field(ttid).finish(),
+            Self::Attach(ttid, _) => f.debug_tuple("Add").field(ttid).finish(),
+        }
+    }
+}
+
 /// Updates tenants' repositories, changing their timelines state in memory.
 pub fn apply_timeline_sync_status_updates(
    conf: &'static PageServerConf,
@@ -160,6 +215,7 @@ pub fn apply_timeline_sync_status_updates(
 /// Shut down all tenants. This runs as part of pageserver shutdown.
 ///
 pub fn shutdown_all_tenants() {
+    tenants_state::stop_timeline_update_sender();
    let mut m = tenants_state::write_tenants();
    let mut tenantids = Vec::new();
    for (tenantid, tenant) in m.iter_mut() {
@@ -173,7 +229,7 @@ pub fn shutdown_all_tenants() {
    }
    drop(m);

-    thread_mgr::shutdown_threads(Some(ThreadKind::WalReceiver), None, None);
+    thread_mgr::shutdown_threads(Some(ThreadKind::WalReceiverManager), None, None);
    thread_mgr::shutdown_threads(Some(ThreadKind::GarbageCollector), None, None);
    thread_mgr::shutdown_threads(Some(ThreadKind::Compactor), None, None);

@@ -247,32 +303,49 @@ pub fn get_tenant_state(tenantid: ZTenantId) -> Option<TenantState> {
    Some(tenants_state::read_tenants().get(&tenantid)?.state)
 }

-///
-/// Change the state of a tenant to Active and launch its compactor and GC
-/// threads. If the tenant was already in Active state or Stopping, does nothing.
-///
-pub fn activate_tenant(tenant_id: ZTenantId) -> anyhow::Result<()> {
+pub fn set_tenant_state(tenant_id: ZTenantId, new_state: TenantState) -> anyhow::Result<()> {
    let mut m = tenants_state::write_tenants();
    let tenant = m
        .get_mut(&tenant_id)
        .with_context(|| format!("Tenant not found for id {tenant_id}"))?;
+    let old_state = tenant.state;
+    tenant.state = new_state;
+    drop(m);

-    info!("activating tenant {tenant_id}");
-
-    match tenant.state {
-        // If the tenant is already active, nothing to do.
-        TenantState::Active => {}
-
-        // If it's Idle, launch the compactor and GC threads
-        TenantState::Idle => {
-            thread_mgr::spawn(
+    match (old_state, new_state) {
+        (TenantState::Broken, TenantState::Broken)
+        | (TenantState::Active, TenantState::Active)
+        | (TenantState::Idle, TenantState::Idle)
+        | (TenantState::Stopping, TenantState::Stopping) => {
+            debug!("tenant {tenant_id} already in state {new_state}");
+        }
+        (TenantState::Broken, ignored) => {
+            debug!("Ignoring {ignored} since tenant {tenant_id} is in broken state");
+        }
+        (_, TenantState::Broken) => {
+            debug!("Setting tenant {tenant_id} status to broken");
+        }
+        (TenantState::Stopping, ignored) => {
+            debug!("Ignoring {ignored} since tenant {tenant_id} is in stopping state");
+        }
+        (TenantState::Idle, TenantState::Active) => {
+            info!("activating tenant {tenant_id}");
+            let compactor_spawn_result = thread_mgr::spawn(
                ThreadKind::Compactor,
                Some(tenant_id),
                None,
                "Compactor thread",
                false,
                move || crate::tenant_threads::compact_loop(tenant_id),
-            )?;
+            );
+            if compactor_spawn_result.is_err() {
+                let mut m = tenants_state::write_tenants();
+                m.get_mut(&tenant_id)
+                    .with_context(|| format!("Tenant not found for id {tenant_id}"))?
+                    .state = old_state;
+                drop(m);
+            }
+            compactor_spawn_result?;

            let gc_spawn_result = thread_mgr::spawn(
                ThreadKind::GarbageCollector,
@@ -286,21 +359,31 @@ pub fn activate_tenant(tenant_id: ZTenantId) -> anyhow::Result<()> {
            .with_context(|| format!("Failed to launch GC thread for tenant {tenant_id}"));

            if let Err(e) = &gc_spawn_result {
+                let mut m = tenants_state::write_tenants();
+                m.get_mut(&tenant_id)
+                    .with_context(|| format!("Tenant not found for id {tenant_id}"))?
+                    .state = old_state;
+                drop(m);
                error!("Failed to start GC thread for tenant {tenant_id}, stopping its checkpointer thread: {e:?}");
                thread_mgr::shutdown_threads(Some(ThreadKind::Compactor), Some(tenant_id), None);
                return gc_spawn_result;
            }
-            tenant.state = TenantState::Active;
        }
-
-        TenantState::Stopping => {
-            // don't re-activate it if it's being stopped
+        (TenantState::Idle, TenantState::Stopping) => {
+            info!("stopping idle tenant {tenant_id}");
        }
-
-        TenantState::Broken => {
-            // cannot activate
+        (TenantState::Active, TenantState::Stopping | TenantState::Idle) => {
+            info!("stopping tenant {tenant_id} threads due to new state {new_state}");
+            thread_mgr::shutdown_threads(
+                Some(ThreadKind::WalReceiverManager),
+                Some(tenant_id),
+                None,
+            );
+            thread_mgr::shutdown_threads(Some(ThreadKind::GarbageCollector), Some(tenant_id), None);
+            thread_mgr::shutdown_threads(Some(ThreadKind::Compactor), Some(tenant_id), None);
        }
    }
+
    Ok(())
 }

@@ -325,15 +408,15 @@ pub fn get_local_timeline_with_load(
        .with_context(|| format!("Tenant {tenant_id} not found"))?;

    if let Some(page_tline) = tenant.local_timelines.get(&timeline_id) {
-        return Ok(Arc::clone(page_tline));
+        Ok(Arc::clone(page_tline))
+    } else {
+        let page_tline = load_local_timeline(&tenant.repo, timeline_id)
+            .with_context(|| format!("Failed to load local timeline for tenant {tenant_id}"))?;
+        tenant
+            .local_timelines
+            .insert(timeline_id, Arc::clone(&page_tline));
+        Ok(page_tline)
    }
-
-    let page_tline = load_local_timeline(&tenant.repo, timeline_id)
-        .with_context(|| format!("Failed to load local timeline for tenant {tenant_id}"))?;
-    tenant
-        .local_timelines
-        .insert(timeline_id, Arc::clone(&page_tline));
-    Ok(page_tline)
 }

 pub fn detach_timeline(
@@ -351,6 +434,9 @@ pub fn detach_timeline(
                .detach_timeline(timeline_id)
                .context("Failed to detach inmem tenant timeline")?;
            tenant.local_timelines.remove(&timeline_id);
+            tenants_state::try_send_timeline_update(LocalTimelineUpdate::Detach(
+                ZTenantTimelineId::new(tenant_id, timeline_id),
+            ));
        }
        None => bail!("Tenant {tenant_id} not found in local tenant state"),
    }
@@ -379,6 +465,12 @@ fn load_local_timeline(
        repartition_distance,
    ));
    page_tline.init_logical_size()?;
+
+    tenants_state::try_send_timeline_update(LocalTimelineUpdate::Attach(
+        ZTenantTimelineId::new(repo.tenant_id(), timeline_id),
+        Arc::clone(&page_tline),
+    ));
+
    Ok(page_tline)
 }

--- a/pageserver/src/thread_mgr.rs
+++ b/pageserver/src/thread_mgr.rs
@@ -91,8 +91,8 @@ pub enum ThreadKind {
    // associated with one later, after receiving a command from the client.
    PageRequestHandler,

-    // Thread that connects to a safekeeper to fetch WAL for one timeline.
-    WalReceiver,
+    // Main walreceiver manager thread that ensures that every timeline spawns a connection to safekeeper, to fetch WAL.
+    WalReceiverManager,

    // Thread that handles compaction of all timelines for a tenant.
    Compactor,
--- a/pageserver/src/timelines.rs
+++ b/pageserver/src/timelines.rs
@@ -283,8 +283,6 @@ fn bootstrap_timeline<R: Repository>(
    tli: ZTimelineId,
    repo: &R,
 ) -> Result<()> {
-    let _enter = info_span!("bootstrapping", timeline = %tli, tenant = %tenantid).entered();
-
    let initdb_path = conf
        .tenant_path(&tenantid)
        .join(format!("tmp-timeline-{}", tli));
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -336,7 +336,7 @@ impl VirtualFile {
        // library RwLock doesn't allow downgrading without releasing the lock,
        // and that doesn't seem worth the trouble.
        //
-        // XXX: `parking_lot::RwLock` can enable such downgrades, yet its implemenation is fair and
+        // XXX: `parking_lot::RwLock` can enable such downgrades, yet its implementation is fair and
        // may deadlock on subsequent read calls.
        // Simply replacing all `RwLock` in project causes deadlocks, so use it sparingly.
        let result = STORAGE_IO_TIME
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -12,7 +12,7 @@
 //! The zenith Repository can store page versions in two formats: as
 //! page images, or a WAL records. WalIngest::ingest_record() extracts
 //! page images out of some WAL records, but most it stores as WAL
-//! records. If a WAL record modifies multple pages, WalIngest
+//! records. If a WAL record modifies multiple pages, WalIngest
 //! will call Repository::put_wal_record or put_page_image functions
 //! separately for each modified page.
 //!
--- a/pageserver/src/walreceiver.rs
+++ b/pageserver/src/walreceiver.rs
--- a/pageserver/src/walreceiver/connection_handler.rs
+++ b/pageserver/src/walreceiver/connection_handler.rs
@@ -0,0 +1,405 @@
+//! Actual Postgres connection handler to stream WAL to the server.
+//! Runs as a separate, cancellable Tokio task.
+use std::{
+    str::FromStr,
+    sync::Arc,
+    time::{Duration, SystemTime},
+};
+
+use anyhow::{bail, ensure, Context};
+use bytes::BytesMut;
+use fail::fail_point;
+use postgres::{SimpleQueryMessage, SimpleQueryRow};
+use postgres_ffi::waldecoder::WalStreamDecoder;
+use postgres_protocol::message::backend::ReplicationMessage;
+use postgres_types::PgLsn;
+use tokio::{pin, select, sync::watch, time};
+use tokio_postgres::{replication::ReplicationStream, Client};
+use tokio_stream::StreamExt;
+use tracing::{debug, error, info, info_span, trace, warn, Instrument};
+use utils::{
+    lsn::Lsn,
+    pq_proto::ZenithFeedback,
+    zid::{NodeId, ZTenantTimelineId},
+};
+
+use crate::{
+    http::models::WalReceiverEntry,
+    repository::{Repository, Timeline},
+    tenant_mgr,
+    walingest::WalIngest,
+};
+
+#[derive(Debug, Clone)]
+pub enum WalConnectionEvent {
+    Started,
+    NewWal(ZenithFeedback),
+    End(Result<(), String>),
+}
+
+/// A wrapper around standalone Tokio task, to poll its updates or cancel the task.
+#[derive(Debug)]
+pub struct WalReceiverConnection {
+    handle: tokio::task::JoinHandle<()>,
+    cancellation: watch::Sender<()>,
+    events_receiver: watch::Receiver<WalConnectionEvent>,
+}
+
+impl WalReceiverConnection {
+    /// Initializes the connection task, returning a set of handles on top of it.
+    /// The task is started immediately after the creation, fails if no connection is established during the timeout given.
+    pub fn open(
+        id: ZTenantTimelineId,
+        safekeeper_id: NodeId,
+        wal_producer_connstr: String,
+        connect_timeout: Duration,
+    ) -> Self {
+        let (cancellation, mut cancellation_receiver) = watch::channel(());
+        let (events_sender, events_receiver) = watch::channel(WalConnectionEvent::Started);
+
+        let handle = tokio::spawn(
+            async move {
+                let connection_result = handle_walreceiver_connection(
+                    id,
+                    &wal_producer_connstr,
+                    &events_sender,
+                    &mut cancellation_receiver,
+                    connect_timeout,
+                )
+                .await
+                .map_err(|e| {
+                    format!("Walreceiver connection for id {id} failed with error: {e:#}")
+                });
+
+                match &connection_result {
+                    Ok(()) => {
+                        debug!("Walreceiver connection for id {id} ended successfully")
+                    }
+                    Err(e) => warn!("{e}"),
+                }
+                events_sender
+                    .send(WalConnectionEvent::End(connection_result))
+                    .ok();
+            }
+            .instrument(info_span!("safekeeper_handle", sk = %safekeeper_id)),
+        );
+
+        Self {
+            handle,
+            cancellation,
+            events_receiver,
+        }
+    }
+
+    /// Polls for the next WAL receiver event, if there's any available since the last check.
+    /// Blocks if there's no new event available, returns `None` if no new events will ever occur.
+    /// Only the last event is returned, all events received between observatins are lost.
+    pub async fn next_event(&mut self) -> Option<WalConnectionEvent> {
+        match self.events_receiver.changed().await {
+            Ok(()) => Some(self.events_receiver.borrow().clone()),
+            Err(_cancellation_error) => None,
+        }
+    }
+
+    /// Gracefully aborts current WAL streaming task, waiting for the current WAL streamed.
+    pub async fn shutdown(&mut self) -> anyhow::Result<()> {
+        self.cancellation.send(()).ok();
+        let handle = &mut self.handle;
+        handle
+            .await
+            .context("Failed to join on a walreceiver connection task")?;
+        Ok(())
+    }
+}
+
+async fn handle_walreceiver_connection(
+    id: ZTenantTimelineId,
+    wal_producer_connstr: &str,
+    events_sender: &watch::Sender<WalConnectionEvent>,
+    cancellation: &mut watch::Receiver<()>,
+    connect_timeout: Duration,
+) -> anyhow::Result<()> {
+    // Connect to the database in replication mode.
+    info!("connecting to {wal_producer_connstr}");
+    let connect_cfg =
+        format!("{wal_producer_connstr} application_name=pageserver replication=true");
+
+    let (mut replication_client, connection) = time::timeout(
+        connect_timeout,
+        tokio_postgres::connect(&connect_cfg, postgres::NoTls),
+    )
+    .await
+    .context("Timed out while waiting for walreceiver connection to open")?
+    .context("Failed to open walreceiver conection")?;
+    // The connection object performs the actual communication with the database,
+    // so spawn it off to run on its own.
+    let mut connection_cancellation = cancellation.clone();
+    tokio::spawn(
+        async move {
+            info!("connected!");
+            select! {
+                    connection_result = connection => match connection_result{
+                            Ok(()) => info!("Walreceiver db connection closed"),
+                            Err(connection_error) => {
+                                if connection_error.is_closed() {
+                                    info!("Connection closed regularly: {connection_error}")
+                                } else {
+                                    warn!("Connection aborted: {connection_error}")
+                                }
+                            }
+                        },
+
+                    _ = connection_cancellation.changed() => info!("Connection cancelled"),
+            }
+        }
+        .instrument(info_span!("safekeeper_handle_db")),
+    );
+
+    // Immediately increment the gauge, then create a job to decrement it on task exit.
+    // One of the pros of `defer!` is that this will *most probably*
+    // get called, even in presence of panics.
+    let gauge = crate::LIVE_CONNECTIONS_COUNT.with_label_values(&["wal_receiver"]);
+    gauge.inc();
+    scopeguard::defer! {
+        gauge.dec();
+    }
+
+    let identify = identify_system(&mut replication_client).await?;
+    info!("{identify:?}");
+    let end_of_wal = Lsn::from(u64::from(identify.xlogpos));
+    let mut caught_up = false;
+    let ZTenantTimelineId {
+        tenant_id,
+        timeline_id,
+    } = id;
+
+    let (repo, timeline) = tokio::task::spawn_blocking(move || {
+        let repo = tenant_mgr::get_repository_for_tenant(tenant_id)
+            .with_context(|| format!("no repository found for tenant {tenant_id}"))?;
+        let timeline = tenant_mgr::get_local_timeline_with_load(tenant_id, timeline_id)
+            .with_context(|| {
+                format!("local timeline {timeline_id} not found for tenant {tenant_id}")
+            })?;
+        Ok::<_, anyhow::Error>((repo, timeline))
+    })
+    .await
+    .with_context(|| format!("Failed to spawn blocking task to get repository and timeline for tenant {tenant_id} timeline {timeline_id}"))??;
+
+    //
+    // Start streaming the WAL, from where we left off previously.
+    //
+    // If we had previously received WAL up to some point in the middle of a WAL record, we
+    // better start from the end of last full WAL record, not in the middle of one.
+    let mut last_rec_lsn = timeline.get_last_record_lsn();
+    let mut startpoint = last_rec_lsn;
+
+    if startpoint == Lsn(0) {
+        bail!("No previous WAL position");
+    }
+
+    // There might be some padding after the last full record, skip it.
+    startpoint += startpoint.calc_padding(8u32);
+
+    info!("last_record_lsn {last_rec_lsn} starting replication from {startpoint}, server is at {end_of_wal}...");
+
+    let query = format!("START_REPLICATION PHYSICAL {startpoint}");
+
+    let copy_stream = replication_client.copy_both_simple(&query).await?;
+    let physical_stream = ReplicationStream::new(copy_stream);
+    pin!(physical_stream);
+
+    let mut waldecoder = WalStreamDecoder::new(startpoint);
+
+    let mut walingest = WalIngest::new(timeline.as_ref(), startpoint)?;
+
+    while let Some(replication_message) = {
+        select! {
+            // check for shutdown first
+            biased;
+            _ = cancellation.changed() => {
+                info!("walreceiver interrupted");
+                None
+            }
+            replication_message = physical_stream.next() => replication_message,
+        }
+    } {
+        let replication_message = replication_message?;
+        let status_update = match replication_message {
+            ReplicationMessage::XLogData(xlog_data) => {
+                // Pass the WAL data to the decoder, and see if we can decode
+                // more records as a result.
+                let data = xlog_data.data();
+                let startlsn = Lsn::from(xlog_data.wal_start());
+                let endlsn = startlsn + data.len() as u64;
+
+                trace!("received XLogData between {startlsn} and {endlsn}");
+
+                waldecoder.feed_bytes(data);
+
+                while let Some((lsn, recdata)) = waldecoder.poll_decode()? {
+                    let _enter = info_span!("processing record", lsn = %lsn).entered();
+
+                    // It is important to deal with the aligned records as lsn in getPage@LSN is
+                    // aligned and can be several bytes bigger. Without this alignment we are
+                    // at risk of hitting a deadlock.
+                    ensure!(lsn.is_aligned());
+
+                    walingest.ingest_record(&timeline, recdata, lsn)?;
+
+                    fail_point!("walreceiver-after-ingest");
+
+                    last_rec_lsn = lsn;
+                }
+
+                if !caught_up && endlsn >= end_of_wal {
+                    info!("caught up at LSN {endlsn}");
+                    caught_up = true;
+                }
+
+                let timeline_to_check = Arc::clone(&timeline.tline);
+                tokio::task::spawn_blocking(move || timeline_to_check.check_checkpoint_distance())
+                    .await
+                    .with_context(|| {
+                        format!("Spawned checkpoint check task panicked for timeline {id}")
+                    })?
+                    .with_context(|| {
+                        format!("Failed to check checkpoint distance for timeline {id}")
+                    })?;
+
+                Some(endlsn)
+            }
+
+            ReplicationMessage::PrimaryKeepAlive(keepalive) => {
+                let wal_end = keepalive.wal_end();
+                let timestamp = keepalive.timestamp();
+                let reply_requested = keepalive.reply() != 0;
+
+                trace!("received PrimaryKeepAlive(wal_end: {wal_end}, timestamp: {timestamp:?} reply: {reply_requested})");
+
+                if reply_requested {
+                    Some(last_rec_lsn)
+                } else {
+                    None
+                }
+            }
+
+            _ => None,
+        };
+
+        if let Some(last_lsn) = status_update {
+            let remote_index = repo.get_remote_index();
+            let timeline_remote_consistent_lsn = remote_index
+                .read()
+                .await
+                // here we either do not have this timeline in remote index
+                // or there were no checkpoints for it yet
+                .timeline_entry(&ZTenantTimelineId {
+                    tenant_id,
+                    timeline_id,
+                })
+                .map(|remote_timeline| remote_timeline.metadata.disk_consistent_lsn())
+                // no checkpoint was uploaded
+                .unwrap_or(Lsn(0));
+
+            // The last LSN we processed. It is not guaranteed to survive pageserver crash.
+            let write_lsn = u64::from(last_lsn);
+            // `disk_consistent_lsn` is the LSN at which page server guarantees local persistence of all received data
+            let flush_lsn = u64::from(timeline.tline.get_disk_consistent_lsn());
+            // The last LSN that is synced to remote storage and is guaranteed to survive pageserver crash
+            // Used by safekeepers to remove WAL preceding `remote_consistent_lsn`.
+            let apply_lsn = u64::from(timeline_remote_consistent_lsn);
+            let ts = SystemTime::now();
+
+            // Update the current WAL receiver's data stored inside the global hash table `WAL_RECEIVERS`
+            {
+                super::WAL_RECEIVER_ENTRIES.write().await.insert(
+                    id,
+                    WalReceiverEntry {
+                        wal_producer_connstr: Some(wal_producer_connstr.to_owned()),
+                        last_received_msg_lsn: Some(last_lsn),
+                        last_received_msg_ts: Some(
+                            ts.duration_since(SystemTime::UNIX_EPOCH)
+                                .expect("Received message time should be before UNIX EPOCH!")
+                                .as_micros(),
+                        ),
+                    },
+                );
+            }
+
+            // Send zenith feedback message.
+            // Regular standby_status_update fields are put into this message.
+            let zenith_status_update = ZenithFeedback {
+                current_timeline_size: timeline.get_current_logical_size() as u64,
+                ps_writelsn: write_lsn,
+                ps_flushlsn: flush_lsn,
+                ps_applylsn: apply_lsn,
+                ps_replytime: ts,
+            };
+
+            debug!("zenith_status_update {zenith_status_update:?}");
+
+            let mut data = BytesMut::new();
+            zenith_status_update.serialize(&mut data)?;
+            physical_stream
+                .as_mut()
+                .zenith_status_update(data.len() as u64, &data)
+                .await?;
+            if let Err(e) = events_sender.send(WalConnectionEvent::NewWal(zenith_status_update)) {
+                warn!("Wal connection event listener dropped, aborting the connection: {e}");
+                return Ok(());
+            }
+        }
+    }
+
+    Ok(())
+}
+
+/// Data returned from the postgres `IDENTIFY_SYSTEM` command
+///
+/// See the [postgres docs] for more details.
+///
+/// [postgres docs]: https://www.postgresql.org/docs/current/protocol-replication.html
+#[derive(Debug)]
+// As of nightly 2021-09-11, fields that are only read by the type's `Debug` impl still count as
+// unused. Relevant issue: https://github.com/rust-lang/rust/issues/88900
+#[allow(dead_code)]
+struct IdentifySystem {
+    systemid: u64,
+    timeline: u32,
+    xlogpos: PgLsn,
+    dbname: Option<String>,
+}
+
+/// There was a problem parsing the response to
+/// a postgres IDENTIFY_SYSTEM command.
+#[derive(Debug, thiserror::Error)]
+#[error("IDENTIFY_SYSTEM parse error")]
+struct IdentifyError;
+
+/// Run the postgres `IDENTIFY_SYSTEM` command
+async fn identify_system(client: &mut Client) -> anyhow::Result<IdentifySystem> {
+    let query_str = "IDENTIFY_SYSTEM";
+    let response = client.simple_query(query_str).await?;
+
+    // get(N) from row, then parse it as some destination type.
+    fn get_parse<T>(row: &SimpleQueryRow, idx: usize) -> Result<T, IdentifyError>
+    where
+        T: FromStr,
+    {
+        let val = row.get(idx).ok_or(IdentifyError)?;
+        val.parse::<T>().or(Err(IdentifyError))
+    }
+
+    // extract the row contents into an IdentifySystem struct.
+    // written as a closure so I can use ? for Option here.
+    if let Some(SimpleQueryMessage::Row(first_row)) = response.get(0) {
+        Ok(IdentifySystem {
+            systemid: get_parse(first_row, 0)?,
+            timeline: get_parse(first_row, 1)?,
+            xlogpos: get_parse(first_row, 2)?,
+            dbname: get_parse(first_row, 3).ok(),
+        })
+    } else {
+        Err(IdentifyError.into())
+    }
+}
--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -28,6 +28,7 @@ use std::fs::OpenOptions;
 use std::io::prelude::*;
 use std::io::{Error, ErrorKind};
 use std::os::unix::io::AsRawFd;
+use std::os::unix::prelude::CommandExt;
 use std::path::PathBuf;
 use std::process::Stdio;
 use std::process::{Child, ChildStderr, ChildStdin, ChildStdout, Command};
@@ -122,7 +123,7 @@ lazy_static! {

 ///
 /// This is the real implementation that uses a Postgres process to
-/// perform WAL replay. Only one thread can use the processs at a time,
+/// perform WAL replay. Only one thread can use the process at a time,
 /// that is controlled by the Mutex. In the future, we might want to
 /// launch a pool of processes to allow concurrent replay of multiple
 /// records.
@@ -134,7 +135,7 @@ pub struct PostgresRedoManager {
    process: Mutex<Option<PostgresRedoProcess>>,
 }

-/// Can this request be served by zenith redo funcitons
+/// Can this request be served by zenith redo functions
 /// or we need to pass it to wal-redo postgres process?
 fn can_apply_in_zenith(rec: &ZenithWalRecord) -> bool {
    // Currently, we don't have bespoken Rust code to replay any
@@ -554,6 +555,40 @@ impl PostgresRedoManager {
    }
 }

+///
+/// Command with ability not to give all file descriptors to child process
+///
+trait CloseFileDescriptors: CommandExt {
+    ///
+    /// Close file descriptors (other than stdin, stdout, stderr) in child process
+    ///
+    fn close_fds(&mut self) -> &mut Command;
+}
+
+impl<C: CommandExt> CloseFileDescriptors for C {
+    fn close_fds(&mut self) -> &mut Command {
+        unsafe {
+            self.pre_exec(move || {
+                // SAFETY: Code executed inside pre_exec should have async-signal-safety,
+                // which means it should be safe to execute inside a signal handler.
+                // The precise meaning depends on platform. See `man signal-safety`
+                // for the linux definition.
+                //
+                // The set_fds_cloexec_threadsafe function is documented to be
+                // async-signal-safe.
+                //
+                // Aside from this function, the rest of the code is re-entrant and
+                // doesn't make any syscalls. We're just passing constants.
+                //
+                // NOTE: It's easy to indirectly cause a malloc or lock a mutex,
+                // which is not async-signal-safe. Be careful.
+                close_fds::set_fds_cloexec_threadsafe(3, &[]);
+                Ok(())
+            })
+        }
+    }
+}
+
 ///
 /// Handle to the Postgres WAL redo process
 ///
@@ -607,9 +642,10 @@ impl PostgresRedoProcess {
                .open(PathBuf::from(&datadir).join("postgresql.conf"))?;
            config.write_all(b"shared_buffers=128kB\n")?;
            config.write_all(b"fsync=off\n")?;
-            config.write_all(b"shared_preload_libraries=zenith\n")?;
-            config.write_all(b"zenith.wal_redo=on\n")?;
+            config.write_all(b"shared_preload_libraries=neon\n")?;
+            config.write_all(b"neon.wal_redo=on\n")?;
        }
+
        // Start postgres itself
        let mut child = Command::new(conf.pg_bin_dir().join("postgres"))
            .arg("--wal-redo")
@@ -620,6 +656,19 @@ impl PostgresRedoProcess {
            .env("LD_LIBRARY_PATH", conf.pg_lib_dir())
            .env("DYLD_LIBRARY_PATH", conf.pg_lib_dir())
            .env("PGDATA", &datadir)
+            // The redo process is not trusted, so it runs in seccomp mode
+            // (see seccomp in zenith_wal_redo.c). We have to make sure it doesn't
+            // inherit any file descriptors from the pageserver that would allow
+            // an attacker to do bad things.
+            //
+            // The Rust standard library makes sure to mark any file descriptors with
+            // as close-on-exec by default, but that's not enough, since we use
+            // libraries that directly call libc open without setting that flag.
+            //
+            // One example is the pidfile of the daemonize library, which doesn't
+            // currently mark file descriptors as close-on-exec. Either way, we
+            // want to be on the safe side and prevent accidental regression.
+            .close_fds()
            .spawn()
            .map_err(|e| {
                Error::new(
--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -95,7 +95,7 @@ async fn handle_client(

 /// Establish a (most probably, secure) connection with the client.
 /// For better testing experience, `stream` can be any object satisfying the traits.
-/// It's easier to work with owned `stream` here as we need to updgrade it to TLS;
+/// It's easier to work with owned `stream` here as we need to upgrade it to TLS;
 /// we also take an extra care of propagating only the select handshake errors to client.
 async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
    stream: S,
--- a/safekeeper/README.md
+++ b/safekeeper/README.md
@@ -75,7 +75,7 @@ safekeepers. The Paxos and crash recovery algorithm ensures that only
 one primary node can be actively streaming WAL to the quorum of
 safekeepers.

-See README_PROTO.md for a more detailed desription of the consensus
+See README_PROTO.md for a more detailed description of the consensus
 protocol. spec/ contains TLA+ specification of it.

 # Q&A
--- a/safekeeper/README_PROTO.md
+++ b/safekeeper/README_PROTO.md
@@ -143,7 +143,7 @@ Restart of PostgreSQL initiates new round of voting and switching new epoch.
 ## Limitations
 Right now message queue is maintained in main memory and is not spilled to the disk.
 It can cause memory overflow in case of presence of lagging safekeepers.
-It is assumed that in case of loosing local data by some safekeepers, it should be recovered using some external mechanism.
+It is assumed that in case of losing local data by some safekeepers, it should be recovered using some external mechanism.


 ## Glossary
@@ -152,8 +152,8 @@ It is assumed that in case of loosing local data by some safekeepers, it should
 * `FlushLSN`: part of WAL persisted to the disk by safekeeper.
 * `NodeID`: pair (term,UUID)
 * `Pager`: Neon component restoring pages from WAL stream
-* `Replica`: read-only computatio node
-* `VCL`: the largerst LSN for which we can guarantee availablity of all prior records.
+* `Replica`: read-only computation node
+* `VCL`: the largest LSN for which we can guarantee availability of all prior records.

 ## Algorithm

--- a/safekeeper/spec/ProposerAcceptorConsensus.tla
+++ b/safekeeper/spec/ProposerAcceptorConsensus.tla
@@ -88,7 +88,7 @@ TypeOk ==
      \* in campaign proposer sends RequestVote and waits for acks;
      \* in leader he is elected
      /\ prop_state[p].state \in {"campaign", "leader"}
-      \* 0..max_term should be actually Nat in the unbouned model, but TLC won't
+      \* 0..max_term should be actually Nat in the unbounded model, but TLC won't
      \* swallow it
      /\ prop_state[p].term \in 0..max_term
      \* votes received
--- a/safekeeper/src/bin/safekeeper.rs
+++ b/safekeeper/src/bin/safekeeper.rs
@@ -16,7 +16,8 @@ use toml_edit::Document;
 use tracing::*;
 use url::{ParseError, Url};

-use safekeeper::control_file::{self};
+use safekeeper::broker;
+use safekeeper::control_file;
 use safekeeper::defaults::{
    DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_PG_LISTEN_ADDR, DEFAULT_WAL_BACKUP_RUNTIME_THREADS,
 };
@@ -26,7 +27,6 @@ use safekeeper::timeline::GlobalTimelines;
 use safekeeper::wal_backup;
 use safekeeper::wal_service;
 use safekeeper::SafeKeeperConf;
-use safekeeper::{broker, callmemaybe};
 use utils::{
    http::endpoint, logging, project_git_version, shutdown::exit_now, signals, tcp_listener,
    zid::NodeId,
@@ -100,7 +100,7 @@ fn main() -> anyhow::Result<()> {
            Arg::new("dump-control-file")
                .long("dump-control-file")
                .takes_value(true)
-                .help("Dump control file at path specifed by this argument and exit"),
+                .help("Dump control file at path specified by this argument and exit"),
        )
        .arg(
            Arg::new("id").long("id").takes_value(true).help("safekeeper node id: integer")
@@ -264,11 +264,16 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
        }
    }

+    // Register metrics collector for active timelines. It's important to do this
+    // after daemonizing, otherwise process collector will be upset.
+    let registry = metrics::default_registry();
+    let timeline_collector = safekeeper::metrics::TimelineCollector::new();
+    registry.register(Box::new(timeline_collector))?;
+
    let signals = signals::install_shutdown_handlers()?;
    let mut threads = vec![];
-    let (callmemaybe_tx, callmemaybe_rx) = mpsc::unbounded_channel();
    let (wal_backup_launcher_tx, wal_backup_launcher_rx) = mpsc::channel(100);
-    GlobalTimelines::init(callmemaybe_tx, wal_backup_launcher_tx);
+    GlobalTimelines::init(wal_backup_launcher_tx);

    let conf_ = conf.clone();
    threads.push(
@@ -290,29 +295,14 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
    let safekeeper_thread = thread::Builder::new()
        .name("Safekeeper thread".into())
        .spawn(|| {
-            // thread code
-            let thread_result = wal_service::thread_main(conf_cloned, pg_listener);
-            if let Err(e) = thread_result {
-                info!("safekeeper thread terminated: {}", e);
+            if let Err(e) = wal_service::thread_main(conf_cloned, pg_listener) {
+                info!("safekeeper thread terminated: {e}");
            }
        })
        .unwrap();

    threads.push(safekeeper_thread);

-    let conf_cloned = conf.clone();
-    let callmemaybe_thread = thread::Builder::new()
-        .name("callmemaybe thread".into())
-        .spawn(|| {
-            // thread code
-            let thread_result = callmemaybe::thread_main(conf_cloned, callmemaybe_rx);
-            if let Err(e) = thread_result {
-                error!("callmemaybe thread terminated: {}", e);
-            }
-        })
-        .unwrap();
-    threads.push(callmemaybe_thread);
-
    if !conf.broker_endpoints.is_empty() {
        let conf_ = conf.clone();
        threads.push(
--- a/safekeeper/src/callmemaybe.rs
+++ b/safekeeper/src/callmemaybe.rs
@@ -39,7 +39,7 @@ async fn request_callback(
        }
    });

-    // use Config parsing because SockAddr parsing doesnt allow to use host names instead of ip addresses
+    // use Config parsing because SockAddr parsing doesn't allow to use host names instead of ip addresses
    let me_connstr = format!("postgresql://no_user@{}/no_db", listen_pg_addr_str);
    let me_conf: postgres::config::Config = me_connstr.parse().unwrap();
    let (host, port) = connection_host_port(&me_conf);
--- a/safekeeper/src/control_file_upgrade.rs
+++ b/safekeeper/src/control_file_upgrade.rs
@@ -27,7 +27,7 @@ struct SafeKeeperStateV1 {
    acceptor_state: AcceptorStateV1,
    /// information about server
    server: ServerInfoV2,
-    /// Unique id of the last *elected* proposer we dealed with. Not needed
+    /// Unique id of the last *elected* proposer we dealt with. Not needed
    /// for correctness, exists for monitoring purposes.
    proposer_uuid: PgUuid,
    /// part of WAL acknowledged by quorum and available locally
@@ -57,7 +57,7 @@ pub struct SafeKeeperStateV2 {
    pub acceptor_state: AcceptorState,
    /// information about server
    pub server: ServerInfoV2,
-    /// Unique id of the last *elected* proposer we dealed with. Not needed
+    /// Unique id of the last *elected* proposer we dealt with. Not needed
    /// for correctness, exists for monitoring purposes.
    pub proposer_uuid: PgUuid,
    /// part of WAL acknowledged by quorum and available locally
@@ -89,7 +89,7 @@ pub struct SafeKeeperStateV3 {
    pub acceptor_state: AcceptorState,
    /// information about server
    pub server: ServerInfoV3,
-    /// Unique id of the last *elected* proposer we dealed with. Not needed
+    /// Unique id of the last *elected* proposer we dealt with. Not needed
    /// for correctness, exists for monitoring purposes.
    #[serde(with = "hex")]
    pub proposer_uuid: PgUuid,
@@ -114,7 +114,7 @@ pub struct SafeKeeperStateV4 {
    pub acceptor_state: AcceptorState,
    /// information about server
    pub server: ServerInfo,
-    /// Unique id of the last *elected* proposer we dealed with. Not needed
+    /// Unique id of the last *elected* proposer we dealt with. Not needed
    /// for correctness, exists for monitoring purposes.
    #[serde(with = "hex")]
    pub proposer_uuid: PgUuid,
--- a/safekeeper/src/lib.rs
+++ b/safekeeper/src/lib.rs
@@ -8,12 +8,12 @@ use url::Url;
 use utils::zid::{NodeId, ZTenantId, ZTenantTimelineId};

 pub mod broker;
-pub mod callmemaybe;
 pub mod control_file;
 pub mod control_file_upgrade;
 pub mod handler;
 pub mod http;
 pub mod json_ctrl;
+pub mod metrics;
 pub mod receive_wal;
 pub mod remove_wal;
 pub mod safekeeper;
--- a/safekeeper/src/metrics.rs
+++ b/safekeeper/src/metrics.rs
@@ -0,0 +1,336 @@
+//! This module exports metrics for all active timelines.
+
+use std::time::{Instant, SystemTime};
+
+use metrics::{
+    core::{AtomicU64, Collector, Desc, GenericGaugeVec, Opts},
+    proto::MetricFamily,
+    Gauge, IntGaugeVec,
+};
+use postgres_ffi::xlog_utils::XLogSegNo;
+use utils::{lsn::Lsn, zid::ZTenantTimelineId};
+
+use crate::{
+    safekeeper::{SafeKeeperState, SafekeeperMemState},
+    timeline::{GlobalTimelines, ReplicaState},
+};
+
+pub struct FullTimelineInfo {
+    pub zttid: ZTenantTimelineId,
+    pub replicas: Vec<ReplicaState>,
+    pub wal_backup_active: bool,
+    pub timeline_is_active: bool,
+    pub num_computes: u32,
+    pub last_removed_segno: XLogSegNo,
+
+    pub epoch_start_lsn: Lsn,
+    pub mem_state: SafekeeperMemState,
+    pub persisted_state: SafeKeeperState,
+
+    pub flush_lsn: Lsn,
+}
+
+pub struct TimelineCollector {
+    descs: Vec<Desc>,
+    commit_lsn: GenericGaugeVec<AtomicU64>,
+    backup_lsn: GenericGaugeVec<AtomicU64>,
+    flush_lsn: GenericGaugeVec<AtomicU64>,
+    epoch_start_lsn: GenericGaugeVec<AtomicU64>,
+    peer_horizon_lsn: GenericGaugeVec<AtomicU64>,
+    remote_consistent_lsn: GenericGaugeVec<AtomicU64>,
+    feedback_ps_write_lsn: GenericGaugeVec<AtomicU64>,
+    feedback_last_time_seconds: GenericGaugeVec<AtomicU64>,
+    timeline_active: GenericGaugeVec<AtomicU64>,
+    wal_backup_active: GenericGaugeVec<AtomicU64>,
+    connected_computes: IntGaugeVec,
+    disk_usage: GenericGaugeVec<AtomicU64>,
+    acceptor_term: GenericGaugeVec<AtomicU64>,
+    collect_timeline_metrics: Gauge,
+}
+
+impl Default for TimelineCollector {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl TimelineCollector {
+    pub fn new() -> TimelineCollector {
+        let mut descs = Vec::new();
+
+        let commit_lsn = GenericGaugeVec::new(
+            Opts::new(
+                "safekeeper_commit_lsn",
+                "Current commit_lsn (not necessarily persisted to disk), grouped by timeline",
+            ),
+            &["tenant_id", "timeline_id"],
+        )
+        .unwrap();
+        descs.extend(commit_lsn.desc().into_iter().cloned());
+
+        let backup_lsn = GenericGaugeVec::new(
+            Opts::new(
+                "safekeeper_backup_lsn",
+                "Current backup_lsn, up to which WAL is backed up, grouped by timeline",
+            ),
+            &["tenant_id", "timeline_id"],
+        )
+        .unwrap();
+        descs.extend(backup_lsn.desc().into_iter().cloned());
+
+        let flush_lsn = GenericGaugeVec::new(
+            Opts::new(
+                "safekeeper_flush_lsn",
+                "Current flush_lsn, grouped by timeline",
+            ),
+            &["tenant_id", "timeline_id"],
+        )
+        .unwrap();
+        descs.extend(flush_lsn.desc().into_iter().cloned());
+
+        let epoch_start_lsn = GenericGaugeVec::new(
+            Opts::new(
+                "safekeeper_epoch_start_lsn",
+                "Point since which compute generates new WAL in the current consensus term",
+            ),
+            &["tenant_id", "timeline_id"],
+        )
+        .unwrap();
+        descs.extend(epoch_start_lsn.desc().into_iter().cloned());
+
+        let peer_horizon_lsn = GenericGaugeVec::new(
+            Opts::new(
+                "safekeeper_peer_horizon_lsn",
+                "LSN of the most lagging safekeeper",
+            ),
+            &["tenant_id", "timeline_id"],
+        )
+        .unwrap();
+        descs.extend(peer_horizon_lsn.desc().into_iter().cloned());
+
+        let remote_consistent_lsn = GenericGaugeVec::new(
+            Opts::new(
+                "safekeeper_remote_consistent_lsn",
+                "LSN which is persisted to the remote storage in pageserver",
+            ),
+            &["tenant_id", "timeline_id"],
+        )
+        .unwrap();
+        descs.extend(remote_consistent_lsn.desc().into_iter().cloned());
+
+        let feedback_ps_write_lsn = GenericGaugeVec::new(
+            Opts::new(
+                "safekeeper_feedback_ps_write_lsn",
+                "Last LSN received by the pageserver, acknowledged in the feedback",
+            ),
+            &["tenant_id", "timeline_id"],
+        )
+        .unwrap();
+        descs.extend(feedback_ps_write_lsn.desc().into_iter().cloned());
+
+        let feedback_last_time_seconds = GenericGaugeVec::new(
+            Opts::new(
+                "safekeeper_feedback_last_time_seconds",
+                "Timestamp of the last feedback from the pageserver",
+            ),
+            &["tenant_id", "timeline_id"],
+        )
+        .unwrap();
+        descs.extend(feedback_last_time_seconds.desc().into_iter().cloned());
+
+        let timeline_active = GenericGaugeVec::new(
+            Opts::new(
+                "safekeeper_timeline_active",
+                "Reports 1 for active timelines, 0 for inactive",
+            ),
+            &["tenant_id", "timeline_id"],
+        )
+        .unwrap();
+        descs.extend(timeline_active.desc().into_iter().cloned());
+
+        let wal_backup_active = GenericGaugeVec::new(
+            Opts::new(
+                "safekeeper_wal_backup_active",
+                "Reports 1 for timelines with active WAL backup, 0 otherwise",
+            ),
+            &["tenant_id", "timeline_id"],
+        )
+        .unwrap();
+        descs.extend(wal_backup_active.desc().into_iter().cloned());
+
+        let connected_computes = IntGaugeVec::new(
+            Opts::new(
+                "safekeeper_connected_computes",
+                "Number of active compute connections",
+            ),
+            &["tenant_id", "timeline_id"],
+        )
+        .unwrap();
+        descs.extend(connected_computes.desc().into_iter().cloned());
+
+        let disk_usage = GenericGaugeVec::new(
+            Opts::new(
+                "safekeeper_disk_usage_bytes",
+                "Estimated disk space used to store WAL segments",
+            ),
+            &["tenant_id", "timeline_id"],
+        )
+        .unwrap();
+        descs.extend(disk_usage.desc().into_iter().cloned());
+
+        let acceptor_term = GenericGaugeVec::new(
+            Opts::new("safekeeper_acceptor_term", "Current consensus term"),
+            &["tenant_id", "timeline_id"],
+        )
+        .unwrap();
+        descs.extend(acceptor_term.desc().into_iter().cloned());
+
+        let collect_timeline_metrics = Gauge::new(
+            "safekeeper_collect_timeline_metrics_seconds",
+            "Time spent collecting timeline metrics, including obtaining mutex lock for all timelines",
+        )
+        .unwrap();
+        descs.extend(collect_timeline_metrics.desc().into_iter().cloned());
+
+        TimelineCollector {
+            descs,
+            commit_lsn,
+            backup_lsn,
+            flush_lsn,
+            epoch_start_lsn,
+            peer_horizon_lsn,
+            remote_consistent_lsn,
+            feedback_ps_write_lsn,
+            feedback_last_time_seconds,
+            timeline_active,
+            wal_backup_active,
+            connected_computes,
+            disk_usage,
+            acceptor_term,
+            collect_timeline_metrics,
+        }
+    }
+}
+
+impl Collector for TimelineCollector {
+    fn desc(&self) -> Vec<&Desc> {
+        self.descs.iter().collect()
+    }
+
+    fn collect(&self) -> Vec<MetricFamily> {
+        let start_collecting = Instant::now();
+
+        // reset all metrics to clean up inactive timelines
+        self.commit_lsn.reset();
+        self.backup_lsn.reset();
+        self.flush_lsn.reset();
+        self.epoch_start_lsn.reset();
+        self.peer_horizon_lsn.reset();
+        self.remote_consistent_lsn.reset();
+        self.feedback_ps_write_lsn.reset();
+        self.feedback_last_time_seconds.reset();
+        self.timeline_active.reset();
+        self.wal_backup_active.reset();
+        self.connected_computes.reset();
+        self.disk_usage.reset();
+        self.acceptor_term.reset();
+
+        let timelines = GlobalTimelines::active_timelines_metrics();
+
+        for tli in timelines {
+            let tenant_id = tli.zttid.tenant_id.to_string();
+            let timeline_id = tli.zttid.timeline_id.to_string();
+            let labels = &[tenant_id.as_str(), timeline_id.as_str()];
+
+            let mut most_advanced: Option<utils::pq_proto::ZenithFeedback> = None;
+            for replica in tli.replicas.iter() {
+                if let Some(replica_feedback) = replica.zenith_feedback {
+                    if let Some(current) = most_advanced {
+                        if current.ps_writelsn < replica_feedback.ps_writelsn {
+                            most_advanced = Some(replica_feedback);
+                        }
+                    } else {
+                        most_advanced = Some(replica_feedback);
+                    }
+                }
+            }
+
+            self.commit_lsn
+                .with_label_values(labels)
+                .set(tli.mem_state.commit_lsn.into());
+            self.backup_lsn
+                .with_label_values(labels)
+                .set(tli.mem_state.backup_lsn.into());
+            self.flush_lsn
+                .with_label_values(labels)
+                .set(tli.flush_lsn.into());
+            self.epoch_start_lsn
+                .with_label_values(labels)
+                .set(tli.epoch_start_lsn.into());
+            self.peer_horizon_lsn
+                .with_label_values(labels)
+                .set(tli.mem_state.peer_horizon_lsn.into());
+            self.remote_consistent_lsn
+                .with_label_values(labels)
+                .set(tli.mem_state.remote_consistent_lsn.into());
+            self.timeline_active
+                .with_label_values(labels)
+                .set(tli.timeline_is_active as u64);
+            self.wal_backup_active
+                .with_label_values(labels)
+                .set(tli.wal_backup_active as u64);
+            self.connected_computes
+                .with_label_values(labels)
+                .set(tli.num_computes as i64);
+            self.acceptor_term
+                .with_label_values(labels)
+                .set(tli.persisted_state.acceptor_state.term as u64);
+
+            if let Some(feedback) = most_advanced {
+                self.feedback_ps_write_lsn
+                    .with_label_values(labels)
+                    .set(feedback.ps_writelsn);
+                if let Ok(unix_time) = feedback.ps_replytime.duration_since(SystemTime::UNIX_EPOCH)
+                {
+                    self.feedback_last_time_seconds
+                        .with_label_values(labels)
+                        .set(unix_time.as_secs());
+                }
+            }
+
+            if tli.last_removed_segno != 0 {
+                let segno_count = tli
+                    .flush_lsn
+                    .segment_number(tli.persisted_state.server.wal_seg_size as usize)
+                    - tli.last_removed_segno;
+                let disk_usage_bytes = segno_count * tli.persisted_state.server.wal_seg_size as u64;
+                self.disk_usage
+                    .with_label_values(labels)
+                    .set(disk_usage_bytes);
+            }
+        }
+
+        // collect MetricFamilys.
+        let mut mfs = Vec::new();
+        mfs.extend(self.commit_lsn.collect());
+        mfs.extend(self.backup_lsn.collect());
+        mfs.extend(self.flush_lsn.collect());
+        mfs.extend(self.epoch_start_lsn.collect());
+        mfs.extend(self.peer_horizon_lsn.collect());
+        mfs.extend(self.remote_consistent_lsn.collect());
+        mfs.extend(self.feedback_ps_write_lsn.collect());
+        mfs.extend(self.feedback_last_time_seconds.collect());
+        mfs.extend(self.timeline_active.collect());
+        mfs.extend(self.wal_backup_active.collect());
+        mfs.extend(self.connected_computes.collect());
+        mfs.extend(self.disk_usage.collect());
+        mfs.extend(self.acceptor_term.collect());
+
+        // report time it took to collect all info
+        let elapsed = start_collecting.elapsed().as_secs_f64();
+        self.collect_timeline_metrics.set(elapsed);
+        mfs.extend(self.collect_timeline_metrics.collect());
+
+        mfs
+    }
+}
--- a/safekeeper/src/safekeeper.rs
+++ b/safekeeper/src/safekeeper.rs
@@ -15,13 +15,10 @@ use std::fmt;
 use std::io::Read;
 use tracing::*;

-use lazy_static::lazy_static;
-
 use crate::control_file;
 use crate::send_wal::HotStandbyFeedback;

 use crate::wal_storage;
-use metrics::{register_gauge_vec, Gauge, GaugeVec};
 use postgres_ffi::xlog_utils::MAX_SEND_SIZE;
 use utils::{
    bin_ser::LeSer,
@@ -180,7 +177,7 @@ pub struct SafeKeeperState {
    pub acceptor_state: AcceptorState,
    /// information about server
    pub server: ServerInfo,
-    /// Unique id of the last *elected* proposer we dealed with. Not needed
+    /// Unique id of the last *elected* proposer we dealt with. Not needed
    /// for correctness, exists for monitoring purposes.
    #[serde(with = "hex")]
    pub proposer_uuid: PgUuid,
@@ -487,45 +484,16 @@ impl AcceptorProposerMessage {
    }
 }

-lazy_static! {
-    // The prometheus crate does not support u64 yet, i64 only (see `IntGauge`).
-    // i64 is faster than f64, so update to u64 when available.
-    static ref COMMIT_LSN_GAUGE: GaugeVec = register_gauge_vec!(
-        "safekeeper_commit_lsn",
-        "Current commit_lsn (not necessarily persisted to disk), grouped by timeline",
-        &["tenant_id", "timeline_id"]
-    )
-    .expect("Failed to register safekeeper_commit_lsn gauge vec");
-}
-
-struct SafeKeeperMetrics {
-    commit_lsn: Gauge,
-    // WAL-related metrics are in WalStorageMetrics
-}
-
-impl SafeKeeperMetrics {
-    fn new(tenant_id: ZTenantId, timeline_id: ZTimelineId) -> Self {
-        let tenant_id = tenant_id.to_string();
-        let timeline_id = timeline_id.to_string();
-        Self {
-            commit_lsn: COMMIT_LSN_GAUGE.with_label_values(&[&tenant_id, &timeline_id]),
-        }
-    }
-}
-
 /// SafeKeeper which consumes events (messages from compute) and provides
 /// replies.
 pub struct SafeKeeper<CTRL: control_file::Storage, WAL: wal_storage::Storage> {
-    // Cached metrics so we don't have to recompute labels on each update.
-    metrics: SafeKeeperMetrics,
-
    /// Maximum commit_lsn between all nodes, can be ahead of local flush_lsn.
    /// Note: be careful to set only if we are sure our WAL (term history) matches
    /// committed one.
    pub global_commit_lsn: Lsn,
    /// LSN since the proposer safekeeper currently talking to appends WAL;
    /// determines epoch switch point.
-    epoch_start_lsn: Lsn,
+    pub epoch_start_lsn: Lsn,

    pub inmem: SafekeeperMemState, // in memory part
    pub state: CTRL,               // persistent state storage
@@ -555,7 +523,6 @@ where
        wal_store.init_storage(&state)?;

        Ok(SafeKeeper {
-            metrics: SafeKeeperMetrics::new(state.tenant_id, ztli),
            global_commit_lsn: state.commit_lsn,
            epoch_start_lsn: Lsn(0),
            inmem: SafekeeperMemState {
@@ -757,9 +724,8 @@ where
            // upgrade.
            self.global_commit_lsn = max(self.global_commit_lsn, state.timeline_start_lsn);
            self.inmem.commit_lsn = max(self.inmem.commit_lsn, state.timeline_start_lsn);
-            self.metrics.commit_lsn.set(self.inmem.commit_lsn.0 as f64);

-            // Initalizing backup_lsn is useful to avoid making backup think it should upload 0 segment.
+            // Initializing backup_lsn is useful to avoid making backup think it should upload 0 segment.
            self.inmem.backup_lsn = max(self.inmem.backup_lsn, state.timeline_start_lsn);

            state.acceptor_state.term_history = msg.term_history.clone();
@@ -777,7 +743,6 @@ where
        assert!(commit_lsn >= self.inmem.commit_lsn);

        self.inmem.commit_lsn = commit_lsn;
-        self.metrics.commit_lsn.set(self.inmem.commit_lsn.0 as f64);

        // If new commit_lsn reached epoch switch, force sync of control
        // file: walproposer in sync mode is very interested when this
--- a/safekeeper/src/send_wal.rs
+++ b/safekeeper/src/send_wal.rs
@@ -8,7 +8,6 @@ use anyhow::{bail, Context, Result};

 use postgres_ffi::xlog_utils::{get_current_timestamp, TimestampTz, MAX_SEND_SIZE};

-use crate::callmemaybe::{CallmeEvent, SubscriptionStateKey};
 use bytes::Bytes;
 use serde::{Deserialize, Serialize};
 use std::cmp::min;
@@ -17,7 +16,6 @@ use std::sync::Arc;
 use std::thread::sleep;
 use std::time::Duration;
 use std::{str, thread};
-use tokio::sync::mpsc::UnboundedSender;
 use tracing::*;
 use utils::{
    bin_ser::BeSer,
@@ -25,7 +23,6 @@ use utils::{
    postgres_backend::PostgresBackend,
    pq_proto::{BeMessage, FeMessage, WalSndKeepAlive, XLogDataBody, ZenithFeedback},
    sock_split::ReadStream,
-    zid::{ZTenantId, ZTimelineId},
 };

 // See: https://www.postgresql.org/docs/13/protocol-replication.html
@@ -83,40 +80,6 @@ impl Drop for ReplicationConnGuard {
    }
 }

-// XXX: Naming is a bit messy here.
-// This ReplicationStreamGuard lives as long as ReplicationConn
-// and current ReplicationConnGuard is tied to the background thread
-// that receives feedback.
-struct ReplicationStreamGuard {
-    tx: UnboundedSender<CallmeEvent>,
-    tenant_id: ZTenantId,
-    timeline_id: ZTimelineId,
-    pageserver_connstr: String,
-}
-
-impl Drop for ReplicationStreamGuard {
-    fn drop(&mut self) {
-        // the connection with pageserver is lost,
-        // resume callback subscription
-        debug!(
-            "Connection to pageserver is gone. Resume callmemaybe subsciption if necessary. tenantid {} timelineid {}",
-            self.tenant_id, self.timeline_id,
-        );
-
-        let subscription_key = SubscriptionStateKey::new(
-            self.tenant_id,
-            self.timeline_id,
-            self.pageserver_connstr.to_owned(),
-        );
-
-        self.tx
-            .send(CallmeEvent::Resume(subscription_key))
-            .unwrap_or_else(|e| {
-                error!("failed to send Resume request to callmemaybe thread {}", e);
-            });
-    }
-}
-
 impl ReplicationConn {
    /// Create a new `ReplicationConn`
    pub fn new(pgb: &mut PostgresBackend) -> Self {
@@ -256,36 +219,6 @@ impl ReplicationConn {
        };
        info!("Start replication from {:?} till {:?}", start_pos, stop_pos);

-        // Don't spam pageserver with callmemaybe queries
-        // when replication connection with pageserver is already established.
-        let _guard = {
-            if spg.appname == Some("wal_proposer_recovery".to_string()) {
-                None
-            } else {
-                let pageserver_connstr = pageserver_connstr.expect("there should be a pageserver connection string since this is not a wal_proposer_recovery");
-                let zttid = spg.timeline.get().zttid;
-                let tx_clone = spg.timeline.get().callmemaybe_tx.clone();
-                let subscription_key = SubscriptionStateKey::new(
-                    zttid.tenant_id,
-                    zttid.timeline_id,
-                    pageserver_connstr.clone(),
-                );
-                tx_clone
-                    .send(CallmeEvent::Pause(subscription_key))
-                    .unwrap_or_else(|e| {
-                        error!("failed to send Pause request to callmemaybe thread {}", e);
-                    });
-
-                // create a guard to subscribe callback again, when this connection will exit
-                Some(ReplicationStreamGuard {
-                    tx: tx_clone,
-                    tenant_id: zttid.tenant_id,
-                    timeline_id: zttid.timeline_id,
-                    pageserver_connstr,
-                })
-            }
-        };
-
        // switch to copy
        pgb.write_message(&BeMessage::CopyBothResponse)?;

--- a/safekeeper/src/timeline.rs
+++ b/safekeeper/src/timeline.rs
@@ -16,7 +16,7 @@ use std::fs::{self};

 use std::sync::{Arc, Condvar, Mutex, MutexGuard};
 use std::time::Duration;
-use tokio::sync::mpsc::{Sender, UnboundedSender};
+use tokio::sync::mpsc::Sender;
 use tracing::*;

 use utils::{
@@ -25,7 +25,6 @@ use utils::{
    zid::{NodeId, ZTenantId, ZTenantTimelineId},
 };

-use crate::callmemaybe::{CallmeEvent, SubscriptionStateKey};
 use crate::control_file;
 use crate::safekeeper::{
    AcceptorProposerMessage, ProposerAcceptorMessage, SafeKeeper, SafeKeeperState,
@@ -33,6 +32,7 @@ use crate::safekeeper::{
 };
 use crate::send_wal::HotStandbyFeedback;

+use crate::metrics::FullTimelineInfo;
 use crate::wal_storage;
 use crate::wal_storage::Storage as wal_storage_iface;
 use crate::SafeKeeperConf;
@@ -190,79 +190,33 @@ impl SharedState {
        self.wal_backup_active
    }

-    /// start/change walsender (via callmemaybe).
-    fn callmemaybe_sub(
+    /// Activate timeline's walsender: start/change timeline information propagated into etcd for further pageserver connections.
+    fn activate_walsender(
        &mut self,
        zttid: &ZTenantTimelineId,
-        pageserver_connstr: Option<&String>,
-        callmemaybe_tx: &UnboundedSender<CallmeEvent>,
-    ) -> Result<()> {
-        if let Some(ref pageserver_connstr) = self.pageserver_connstr {
-            // unsub old sub. xxx: callmemaybe is going out
-            let old_subscription_key = SubscriptionStateKey::new(
-                zttid.tenant_id,
-                zttid.timeline_id,
-                pageserver_connstr.to_owned(),
-            );
-            callmemaybe_tx
-                .send(CallmeEvent::Unsubscribe(old_subscription_key))
-                .unwrap_or_else(|e| {
-                    error!("failed to send Pause request to callmemaybe thread {}", e);
-                });
+        new_pageserver_connstr: Option<String>,
+    ) {
+        if self.pageserver_connstr != new_pageserver_connstr {
+            self.deactivate_walsender(zttid);
+
+            if new_pageserver_connstr.is_some() {
+                info!(
+                    "timeline {} has activated its walsender with connstr {new_pageserver_connstr:?}",
+                    zttid.timeline_id,
+                );
+            }
+            self.pageserver_connstr = new_pageserver_connstr;
        }
-        if let Some(pageserver_connstr) = pageserver_connstr {
-            let subscription_key = SubscriptionStateKey::new(
-                zttid.tenant_id,
-                zttid.timeline_id,
-                pageserver_connstr.to_owned(),
-            );
-            // xx: sending to channel under lock is not very cool, but
-            // shouldn't be a problem here. If it is, we can grab a counter
-            // here and later augment channel messages with it.
-            callmemaybe_tx
-                .send(CallmeEvent::Subscribe(subscription_key))
-                .unwrap_or_else(|e| {
-                    error!(
-                        "failed to send Subscribe request to callmemaybe thread {}",
-                        e
-                    );
-                });
-            info!(
-                "timeline {} is subscribed to callmemaybe to {}",
-                zttid.timeline_id, pageserver_connstr
-            );
-        }
-        self.pageserver_connstr = pageserver_connstr.map(|c| c.to_owned());
-        Ok(())
    }

-    /// Deactivate the timeline: stop callmemaybe.
-    fn callmemaybe_unsub(
-        &mut self,
-        zttid: &ZTenantTimelineId,
-        callmemaybe_tx: &UnboundedSender<CallmeEvent>,
-    ) -> Result<()> {
-        if let Some(ref pageserver_connstr) = self.pageserver_connstr {
-            let subscription_key = SubscriptionStateKey::new(
-                zttid.tenant_id,
-                zttid.timeline_id,
-                pageserver_connstr.to_owned(),
-            );
-            callmemaybe_tx
-                .send(CallmeEvent::Unsubscribe(subscription_key))
-                .unwrap_or_else(|e| {
-                    error!(
-                        "failed to send Unsubscribe request to callmemaybe thread {}",
-                        e
-                    );
-                });
+    /// Deactivate the timeline: stop sending the timeline data into etcd, so no pageserver can connect for WAL streaming.
+    fn deactivate_walsender(&mut self, zttid: &ZTenantTimelineId) {
+        if let Some(pageserver_connstr) = self.pageserver_connstr.take() {
            info!(
-                "timeline {} is unsubscribed from callmemaybe to {}",
+                "timeline {} had deactivated its wallsender with connstr {pageserver_connstr:?}",
                zttid.timeline_id,
-                self.pageserver_connstr.as_ref().unwrap()
-            );
+            )
        }
-        Ok(())
    }

    fn get_wal_seg_size(&self) -> usize {
@@ -331,7 +285,6 @@ impl SharedState {
 /// Database instance (tenant)
 pub struct Timeline {
    pub zttid: ZTenantTimelineId,
-    pub callmemaybe_tx: UnboundedSender<CallmeEvent>,
    /// Sending here asks for wal backup launcher attention (start/stop
    /// offloading). Sending zttid instead of concrete command allows to do
    /// sending without timeline lock.
@@ -347,7 +300,6 @@ pub struct Timeline {
 impl Timeline {
    fn new(
        zttid: ZTenantTimelineId,
-        callmemaybe_tx: UnboundedSender<CallmeEvent>,
        wal_backup_launcher_tx: Sender<ZTenantTimelineId>,
        shared_state: SharedState,
    ) -> Timeline {
@@ -355,7 +307,6 @@ impl Timeline {
            watch::channel(shared_state.sk.inmem.commit_lsn);
        Timeline {
            zttid,
-            callmemaybe_tx,
            wal_backup_launcher_tx,
            commit_lsn_watch_tx,
            commit_lsn_watch_rx,
@@ -377,7 +328,7 @@ impl Timeline {
            // should have kind of generations assigned by compute to distinguish
            // the latest one or even pass it through consensus to reliably deliver
            // to all safekeepers.
-            shared_state.callmemaybe_sub(&self.zttid, pageserver_connstr, &self.callmemaybe_tx)?;
+            shared_state.activate_walsender(&self.zttid, pageserver_connstr.cloned());
        }
        // Wake up wal backup launcher, if offloading not started yet.
        if is_wal_backup_action_pending {
@@ -413,7 +364,7 @@ impl Timeline {
            (replica_state.remote_consistent_lsn != Lsn::MAX && // Lsn::MAX means that we don't know the latest LSN yet.
             replica_state.remote_consistent_lsn >= shared_state.sk.inmem.commit_lsn);
            if stop {
-                shared_state.callmemaybe_unsub(&self.zttid, &self.callmemaybe_tx)?;
+                shared_state.deactivate_walsender(&self.zttid);
                return Ok(true);
            }
        }
@@ -430,16 +381,14 @@ impl Timeline {
    /// Deactivates the timeline, assuming it is being deleted.
    /// Returns whether the timeline was already active.
    ///
-    /// The callmemaybe thread is stopped by the deactivation message. We assume all other threads
-    /// will stop by themselves eventually (possibly with errors, but no panics). There should be no
-    /// compute threads (as we're deleting the timeline), actually. Some WAL may be left unsent, but
+    /// We assume all threads will stop by themselves eventually (possibly with errors, but no panics).
+    /// There should be no compute threads (as we're deleting the timeline), actually. Some WAL may be left unsent, but
    /// we're deleting the timeline anyway.
    pub async fn deactivate_for_delete(&self) -> Result<bool> {
        let was_active: bool;
        {
-            let mut shared_state = self.mutex.lock().unwrap();
+            let shared_state = self.mutex.lock().unwrap();
            was_active = shared_state.active;
-            shared_state.callmemaybe_unsub(&self.zttid, &self.callmemaybe_tx)?;
        }
        self.wal_backup_launcher_tx.send(self.zttid).await?;
        Ok(was_active)
@@ -450,6 +399,33 @@ impl Timeline {
        shared_state.active
    }

+    /// Returns full timeline info, required for the metrics.
+    /// If the timeline is not active, returns None instead.
+    pub fn info_for_metrics(&self) -> Option<FullTimelineInfo> {
+        let shared_state = self.mutex.lock().unwrap();
+        if !shared_state.active {
+            return None;
+        }
+
+        Some(FullTimelineInfo {
+            zttid: self.zttid,
+            replicas: shared_state
+                .replicas
+                .iter()
+                .filter_map(|r| r.as_ref())
+                .copied()
+                .collect(),
+            wal_backup_active: shared_state.wal_backup_active,
+            timeline_is_active: shared_state.active,
+            num_computes: shared_state.num_computes,
+            last_removed_segno: shared_state.last_removed_segno,
+            epoch_start_lsn: shared_state.sk.epoch_start_lsn,
+            mem_state: shared_state.sk.inmem.clone(),
+            persisted_state: shared_state.sk.state.clone(),
+            flush_lsn: shared_state.sk.wal_store.flush_lsn(),
+        })
+    }
+
    /// Timed wait for an LSN to be committed.
    ///
    /// Returns the last committed LSN, which will be at least
@@ -548,7 +524,8 @@ impl Timeline {
                shared_state.sk.inmem.remote_consistent_lsn,
            )),
            peer_horizon_lsn: Some(shared_state.sk.inmem.peer_horizon_lsn),
-            safekeeper_connection_string: Some(conf.listen_pg_addr.clone()),
+            safekeeper_connstr: Some(conf.listen_pg_addr.clone()),
+            pageserver_connstr: shared_state.pageserver_connstr.clone(),
            backup_lsn: Some(shared_state.sk.inmem.backup_lsn),
        })
    }
@@ -647,14 +624,12 @@ impl TimelineTools for Option<Arc<Timeline>> {

 struct GlobalTimelinesState {
    timelines: HashMap<ZTenantTimelineId, Arc<Timeline>>,
-    callmemaybe_tx: Option<UnboundedSender<CallmeEvent>>,
    wal_backup_launcher_tx: Option<Sender<ZTenantTimelineId>>,
 }

 lazy_static! {
    static ref TIMELINES_STATE: Mutex<GlobalTimelinesState> = Mutex::new(GlobalTimelinesState {
        timelines: HashMap::new(),
-        callmemaybe_tx: None,
        wal_backup_launcher_tx: None,
    });
 }
@@ -669,13 +644,8 @@ pub struct TimelineDeleteForceResult {
 pub struct GlobalTimelines;

 impl GlobalTimelines {
-    pub fn init(
-        callmemaybe_tx: UnboundedSender<CallmeEvent>,
-        wal_backup_launcher_tx: Sender<ZTenantTimelineId>,
-    ) {
+    pub fn init(wal_backup_launcher_tx: Sender<ZTenantTimelineId>) {
        let mut state = TIMELINES_STATE.lock().unwrap();
-        assert!(state.callmemaybe_tx.is_none());
-        state.callmemaybe_tx = Some(callmemaybe_tx);
        assert!(state.wal_backup_launcher_tx.is_none());
        state.wal_backup_launcher_tx = Some(wal_backup_launcher_tx);
    }
@@ -698,7 +668,6 @@ impl GlobalTimelines {

                let new_tli = Arc::new(Timeline::new(
                    zttid,
-                    state.callmemaybe_tx.as_ref().unwrap().clone(),
                    state.wal_backup_launcher_tx.as_ref().unwrap().clone(),
                    shared_state,
                ));
@@ -750,7 +719,6 @@ impl GlobalTimelines {

                let new_tli = Arc::new(Timeline::new(
                    zttid,
-                    state.callmemaybe_tx.as_ref().unwrap().clone(),
                    state.wal_backup_launcher_tx.as_ref().unwrap().clone(),
                    shared_state,
                ));
@@ -777,6 +745,16 @@ impl GlobalTimelines {
            .collect()
    }

+    /// Return FullTimelineInfo for all active timelines.
+    pub fn active_timelines_metrics() -> Vec<FullTimelineInfo> {
+        let state = TIMELINES_STATE.lock().unwrap();
+        state
+            .timelines
+            .iter()
+            .filter_map(|(_, tli)| tli.info_for_metrics())
+            .collect()
+    }
+
    fn delete_force_internal(
        conf: &SafeKeeperConf,
        zttid: &ZTenantTimelineId,
--- a/safekeeper/src/wal_backup.rs
+++ b/safekeeper/src/wal_backup.rs
@@ -204,6 +204,7 @@ impl WalBackupTask {
                l.give_up().await;
            }

+            info!("acquiring leadership");
            match broker::get_leader(&self.election).await {
                Ok(l) => {
                    self.leader = Some(l);
@@ -214,6 +215,7 @@ impl WalBackupTask {
                    continue;
                }
            }
+            info!("acquired leadership");

            // offload loop
            loop {
@@ -257,7 +259,7 @@ impl WalBackupTask {
                    // Optimization idea for later:
                    //  Avoid checking election leader every time by returning current lease grant expiration time
                    //  Re-check leadership only after expiration time,
-                    //  such approach woud reduce overhead on write-intensive workloads
+                    //  such approach would reduce overhead on write-intensive workloads

                    match l
                        .check_am_i(
@@ -268,7 +270,7 @@ impl WalBackupTask {
                    {
                        Ok(leader) => {
                            if !leader {
-                                info!("leader has changed");
+                                info!("lost leadership");
                                break;
                            }
                        }
@@ -389,7 +391,7 @@ async fn backup_object(source_file: &Path, size: usize) -> Result<()> {

    let file = File::open(&source_file).await?;

-    // Storage is initialized by launcher at ths point.
+    // Storage is initialized by launcher at this point.
    match storage.as_ref().unwrap() {
        GenericRemoteStorage::Local(local_storage) => {
            let destination = local_storage.remote_object_id(source_file)?;
--- a/safekeeper/src/wal_storage.rs
+++ b/safekeeper/src/wal_storage.rs
@@ -31,20 +31,11 @@ use postgres_ffi::xlog_utils::{XLogFileName, XLOG_BLCKSZ};

 use postgres_ffi::waldecoder::WalStreamDecoder;

-use metrics::{
-    register_gauge_vec, register_histogram_vec, Gauge, GaugeVec, Histogram, HistogramVec,
-    DISK_WRITE_SECONDS_BUCKETS,
-};
+use metrics::{register_histogram_vec, Histogram, HistogramVec, DISK_WRITE_SECONDS_BUCKETS};

 lazy_static! {
    // The prometheus crate does not support u64 yet, i64 only (see `IntGauge`).
    // i64 is faster than f64, so update to u64 when available.
-    static ref FLUSH_LSN_GAUGE: GaugeVec = register_gauge_vec!(
-        "safekeeper_flush_lsn",
-        "Current flush_lsn, grouped by timeline",
-        &["tenant_id", "timeline_id"]
-    )
-    .expect("Failed to register safekeeper_flush_lsn gauge vec");
    static ref WRITE_WAL_BYTES: HistogramVec = register_histogram_vec!(
        "safekeeper_write_wal_bytes",
        "Bytes written to WAL in a single request, grouped by timeline",
@@ -69,7 +60,6 @@ lazy_static! {
 }

 struct WalStorageMetrics {
-    flush_lsn: Gauge,
    write_wal_bytes: Histogram,
    write_wal_seconds: Histogram,
    flush_wal_seconds: Histogram,
@@ -80,7 +70,6 @@ impl WalStorageMetrics {
        let tenant_id = zttid.tenant_id.to_string();
        let timeline_id = zttid.timeline_id.to_string();
        Self {
-            flush_lsn: FLUSH_LSN_GAUGE.with_label_values(&[&tenant_id, &timeline_id]),
            write_wal_bytes: WRITE_WAL_BYTES.with_label_values(&[&tenant_id, &timeline_id]),
            write_wal_seconds: WRITE_WAL_SECONDS.with_label_values(&[&tenant_id, &timeline_id]),
            flush_wal_seconds: FLUSH_WAL_SECONDS.with_label_values(&[&tenant_id, &timeline_id]),
@@ -126,7 +115,7 @@ pub struct PhysicalStorage {
    conf: SafeKeeperConf,

    // fields below are filled upon initialization
-    /// None if unitialized, Some(usize) if storage is initialized.
+    /// None if uninitialized, Some(usize) if storage is initialized.
    wal_seg_size: Option<usize>,

    /// Written to disk, but possibly still in the cache and not fully persisted.
@@ -171,7 +160,6 @@ impl PhysicalStorage {
    /// Wrapper for flush_lsn updates that also updates metrics.
    fn update_flush_lsn(&mut self) {
        self.flush_record_lsn = self.write_record_lsn;
-        self.metrics.flush_lsn.set(self.flush_record_lsn.0 as f64);
    }

    /// Call fdatasync if config requires so.
@@ -456,7 +444,7 @@ impl Storage for PhysicalStorage {
            segno += 1;
            let (wal_file_path, wal_file_partial_path) =
                wal_file_paths(&self.timeline_dir, segno, wal_seg_size)?;
-            // TODO: better use fs::try_exists which is currenty avaialble only in nightly build
+            // TODO: better use fs::try_exists which is currently available only in nightly build
            if wal_file_path.exists() {
                fs::remove_file(&wal_file_path)?;
            } else if wal_file_partial_path.exists() {
--- a/test_runner/batch_others/test_ancestor_branch.py
+++ b/test_runner/batch_others/test_ancestor_branch.py
@@ -24,13 +24,11 @@ def test_ancestor_branch(zenith_env_builder: ZenithEnvBuilder):
            'compaction_target_size': '4194304',
        })

-    with closing(env.pageserver.connect()) as psconn:
-        with psconn.cursor(cursor_factory=psycopg2.extras.DictCursor) as pscur:
-            pscur.execute("failpoints flush-frozen=sleep(10000)")
+    env.pageserver.safe_psql("failpoints flush-frozen=sleep(10000)")

    pg_branch0 = env.postgres.create_start('main', tenant_id=tenant)
    branch0_cur = pg_branch0.connect().cursor()
-    branch0_cur.execute("SHOW zenith.zenith_timeline")
+    branch0_cur.execute("SHOW neon.timeline_id")
    branch0_timeline = branch0_cur.fetchone()[0]
    log.info(f"b0 timeline {branch0_timeline}")

@@ -55,7 +53,7 @@ def test_ancestor_branch(zenith_env_builder: ZenithEnvBuilder):
    log.info("postgres is running on 'branch1' branch")

    branch1_cur = pg_branch1.connect().cursor()
-    branch1_cur.execute("SHOW zenith.zenith_timeline")
+    branch1_cur.execute("SHOW neon.timeline_id")
    branch1_timeline = branch1_cur.fetchone()[0]
    log.info(f"b1 timeline {branch1_timeline}")

@@ -79,7 +77,7 @@ def test_ancestor_branch(zenith_env_builder: ZenithEnvBuilder):
    log.info("postgres is running on 'branch2' branch")
    branch2_cur = pg_branch2.connect().cursor()

-    branch2_cur.execute("SHOW zenith.zenith_timeline")
+    branch2_cur.execute("SHOW neon.timeline_id")
    branch2_timeline = branch2_cur.fetchone()[0]
    log.info(f"b2 timeline {branch2_timeline}")

--- a/test_runner/batch_others/test_backpressure.py
+++ b/test_runner/batch_others/test_backpressure.py
@@ -26,7 +26,7 @@ def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interv
    log.info("checks started")

    with pg_cur(pg) as cur:
-        cur.execute("CREATE EXTENSION zenith")  # TODO move it to zenith_fixtures?
+        cur.execute("CREATE EXTENSION neon")  # TODO move it to zenith_fixtures?

        cur.execute("select pg_size_bytes(current_setting('max_replication_write_lag'))")
        res = cur.fetchone()
--- a/test_runner/batch_others/test_branch_behind.py
+++ b/test_runner/batch_others/test_branch_behind.py
@@ -31,7 +31,7 @@ def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):
    main_pg_conn = pgmain.connect()
    main_cur = main_pg_conn.cursor()

-    main_cur.execute("SHOW zenith.zenith_timeline")
+    main_cur.execute("SHOW neon.timeline_id")
    timeline = main_cur.fetchone()[0]

    # Create table, and insert the first 100 rows
--- a/test_runner/batch_others/test_broken_timeline.py
+++ b/test_runner/batch_others/test_broken_timeline.py
@@ -26,7 +26,7 @@ def test_broken_timeline(zenith_env_builder: ZenithEnvBuilder):
                cur.execute("CREATE TABLE t(key int primary key, value text)")
                cur.execute("INSERT INTO t SELECT generate_series(1,100), 'payload'")

-                cur.execute("SHOW zenith.zenith_timeline")
+                cur.execute("SHOW neon.timeline_id")
                timeline_id = cur.fetchone()[0]
        pg.stop()
        tenant_timelines.append((tenant_id, timeline_id, pg))
--- a/test_runner/batch_others/test_clog_truncate.py
+++ b/test_runner/batch_others/test_clog_truncate.py
@@ -14,7 +14,7 @@ def test_clog_truncate(zenith_simple_env: ZenithEnv):
    env = zenith_simple_env
    env.zenith_cli.create_branch('test_clog_truncate', 'empty')

-    # set agressive autovacuum to make sure that truncation will happen
+    # set aggressive autovacuum to make sure that truncation will happen
    config = [
        'autovacuum_max_workers=10',
        'autovacuum_vacuum_threshold=0',
@@ -29,7 +29,7 @@ def test_clog_truncate(zenith_simple_env: ZenithEnv):
    log.info('postgres is running on test_clog_truncate branch')

    # Install extension containing function needed for test
-    pg.safe_psql('CREATE EXTENSION zenith_test_utils')
+    pg.safe_psql('CREATE EXTENSION neon_test_utils')

    # Consume many xids to advance clog
    with closing(pg.connect()) as conn:
--- a/test_runner/batch_others/test_gc_aggressive.py
+++ b/test_runner/batch_others/test_gc_aggressive.py
@@ -62,7 +62,7 @@ def test_gc_aggressive(zenith_env_builder: ZenithEnvBuilder):
    conn = pg.connect()
    cur = conn.cursor()

-    cur.execute("SHOW zenith.zenith_timeline")
+    cur.execute("SHOW neon.timeline_id")
    timeline = cur.fetchone()[0]

    # Create table, and insert the first 100 rows
--- a/test_runner/batch_others/test_normal_work.py
+++ b/test_runner/batch_others/test_normal_work.py
@@ -0,0 +1,47 @@
+from fixtures.log_helper import log
+from fixtures.zenith_fixtures import ZenithEnv, ZenithEnvBuilder, ZenithPageserverHttpClient
+
+
+def check_tenant(env: ZenithEnv, pageserver_http: ZenithPageserverHttpClient):
+    tenant_id, timeline_id = env.zenith_cli.create_tenant()
+    pg = env.postgres.create_start('main', tenant_id=tenant_id)
+    # we rely upon autocommit after each statement
+    res_1 = pg.safe_psql_many(queries=[
+        'CREATE TABLE t(key int primary key, value text)',
+        'INSERT INTO t SELECT generate_series(1,100000), \'payload\'',
+        'SELECT sum(key) FROM t',
+    ])
+
+    assert res_1[-1][0] == (5000050000, )
+    # TODO check detach on live instance
+    log.info("stopping compute")
+    pg.stop()
+    log.info("compute stopped")
+
+    pg.start()
+    res_2 = pg.safe_psql('SELECT sum(key) FROM t')
+    assert res_2[0] == (5000050000, )
+
+    pg.stop()
+    pageserver_http.timeline_detach(tenant_id, timeline_id)
+
+
+def test_normal_work(zenith_env_builder: ZenithEnvBuilder):
+    """
+    Basic test:
+    * create new tenant with a timeline
+    * write some data
+    * ensure that it was successfully written
+    * restart compute
+    * check that the data is there
+    * stop compute
+    * detach timeline
+
+    Repeat check for several tenants/timelines.
+    """
+
+    env = zenith_env_builder.init_start()
+    pageserver_http = env.pageserver.http_client()
+
+    for _ in range(3):
+        check_tenant(env, pageserver_http)
--- a/test_runner/batch_others/test_old_request_lsn.py
+++ b/test_runner/batch_others/test_old_request_lsn.py
@@ -26,7 +26,7 @@ def test_old_request_lsn(zenith_env_builder: ZenithEnvBuilder):
    cur = pg_conn.cursor()

    # Get the timeline ID of our branch. We need it for the 'do_gc' command
-    cur.execute("SHOW zenith.zenith_timeline")
+    cur.execute("SHOW neon.timeline_id")
    timeline = cur.fetchone()[0]

    psconn = env.pageserver.connect()
--- a/test_runner/batch_others/test_pageserver_api.py
+++ b/test_runner/batch_others/test_pageserver_api.py
@@ -63,10 +63,11 @@ def test_pageserver_http_get_wal_receiver_not_found(zenith_simple_env: ZenithEnv

    tenant_id, timeline_id = env.zenith_cli.create_tenant()

-    # no PG compute node is running, so no WAL receiver is running
-    with pytest.raises(ZenithPageserverApiException) as e:
-        _ = client.wal_receiver_get(tenant_id, timeline_id)
-        assert "Not Found" in str(e.value)
+    empty_response = client.wal_receiver_get(tenant_id, timeline_id)
+
+    assert empty_response.get('wal_producer_connstr') is None, 'Should not be able to connect to WAL streaming without PG compute node running'
+    assert empty_response.get('last_received_msg_lsn') is None, 'Should not be able to connect to WAL streaming without PG compute node running'
+    assert empty_response.get('last_received_msg_ts') is None, 'Should not be able to connect to WAL streaming without PG compute node running'


 def test_pageserver_http_get_wal_receiver_success(zenith_simple_env: ZenithEnv):
@@ -81,7 +82,6 @@ def test_pageserver_http_get_wal_receiver_success(zenith_simple_env: ZenithEnv):

        # a successful `wal_receiver_get` response must contain the below fields
        assert list(res.keys()) == [
-            "thread_id",
            "wal_producer_connstr",
            "last_received_msg_lsn",
            "last_received_msg_ts",
--- a/test_runner/batch_others/test_pitr_gc.py
+++ b/test_runner/batch_others/test_pitr_gc.py
@@ -25,7 +25,7 @@ def test_pitr_gc(zenith_env_builder: ZenithEnvBuilder):
    main_pg_conn = pgmain.connect()
    main_cur = main_pg_conn.cursor()

-    main_cur.execute("SHOW zenith.zenith_timeline")
+    main_cur.execute("SHOW neon.timeline_id")
    timeline = main_cur.fetchone()[0]

    # Create table
@@ -55,7 +55,7 @@ def test_pitr_gc(zenith_env_builder: ZenithEnvBuilder):
    with closing(env.pageserver.connect()) as psconn:
        with psconn.cursor(cursor_factory=psycopg2.extras.DictCursor) as pscur:
            pscur.execute(f"compact {env.initial_tenant.hex} {timeline}")
-            # perform agressive GC. Data still should be kept because of the PITR setting.
+            # perform aggressive GC. Data still should be kept because of the PITR setting.
            pscur.execute(f"do_gc {env.initial_tenant.hex} {timeline} 0")
            row = pscur.fetchone()
            print_gc_result(row)
--- a/Show More
+++ b/Show More