Update docs

Fix tests
WIP
2026-05-17 05:00:38 +00:00 · 2022-06-20 13:29:14 -04:00 · 2022-06-20 13:26:27 -04:00 · 2022-06-20 12:59:11 -04:00 · 2022-06-20 10:33:05 -04:00 · 2022-06-20 10:17:46 -04:00
220 changed files with 7919 additions and 4031 deletions
--- a/.circleci/ansible/deploy.yaml
+++ b/.circleci/ansible/deploy.yaml
@@ -57,7 +57,7 @@
      args:
        creates: "/storage/pageserver/data/tenants"
      environment:
-        ZENITH_REPO_DIR: "/storage/pageserver/data"
+        NEON_REPO_DIR: "/storage/pageserver/data"
        LD_LIBRARY_PATH: "/usr/local/lib"
      become: true
      tags:
@@ -131,7 +131,7 @@
      args:
        creates: "/storage/safekeeper/data/safekeeper.id"
      environment:
-        ZENITH_REPO_DIR: "/storage/safekeeper/data"
+        NEON_REPO_DIR: "/storage/safekeeper/data"
        LD_LIBRARY_PATH: "/usr/local/lib"
      become: true
      tags:
--- a/.circleci/ansible/get_binaries.sh
+++ b/.circleci/ansible/get_binaries.sh
@@ -6,7 +6,7 @@ RELEASE=${RELEASE:-false}

 # look at docker hub for latest tag for neon docker image
 if [ "${RELEASE}" = "true" ]; then
-    echo "search latest relase tag"
+    echo "search latest release tag"
    VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/neondatabase/neon/tags |jq -r -S '.[].name' | grep release | sed 's/release-//g' | grep -E '^[0-9]+$' | sort -n | tail -1)
    if [ -z "${VERSION}" ]; then
        echo "no any docker tags found, exiting..."
@@ -31,7 +31,7 @@ echo "found ${VERSION}"
 rm -rf neon_install postgres_install.tar.gz neon_install.tar.gz .neon_current_version
 mkdir neon_install

-# retrive binaries from docker image
+# retrieve binaries from docker image
 echo "getting binaries from docker image"
 docker pull --quiet neondatabase/neon:${TAG}
 ID=$(docker create neondatabase/neon:${TAG})
--- a/.circleci/ansible/production.hosts
+++ b/.circleci/ansible/production.hosts
@@ -16,4 +16,3 @@ console_mgmt_base_url = http://console-release.local
 bucket_name           = zenith-storage-oregon
 bucket_region         = us-west-2
 etcd_endpoints        = etcd-release.local:2379
-safekeeper_enable_s3_offload = false
--- a/.circleci/ansible/staging.hosts
+++ b/.circleci/ansible/staging.hosts
@@ -3,7 +3,6 @@
 zenith-us-stage-ps-2 console_region_id=27

 [safekeepers]
-zenith-us-stage-sk-1 console_region_id=27
 zenith-us-stage-sk-4 console_region_id=27
 zenith-us-stage-sk-5 console_region_id=27
 zenith-us-stage-sk-6 console_region_id=27
@@ -17,4 +16,3 @@ console_mgmt_base_url = http://console-staging.local
 bucket_name           = zenith-staging-storage-us-east-1
 bucket_region         = us-east-1
 etcd_endpoints        = etcd-staging.local:2379
-safekeeper_enable_s3_offload = false
--- a/.circleci/ansible/systemd/pageserver.service
+++ b/.circleci/ansible/systemd/pageserver.service
@@ -5,7 +5,7 @@ After=network.target auditd.service
 [Service]
 Type=simple
 User=pageserver
-Environment=RUST_BACKTRACE=1 ZENITH_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/lib
+Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/lib
 ExecStart=/usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -c "broker_endpoints=['{{ etcd_endpoints }}']" -D /storage/pageserver/data
 ExecReload=/bin/kill -HUP $MAINPID
 KillMode=mixed
--- a/.circleci/ansible/systemd/safekeeper.service
+++ b/.circleci/ansible/systemd/safekeeper.service
@@ -5,8 +5,8 @@ After=network.target auditd.service
 [Service]
 Type=simple
 User=safekeeper
-Environment=RUST_BACKTRACE=1 ZENITH_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/lib
-ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -p {{ first_pageserver }}:6400 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --enable-s3-offload={{ safekeeper_enable_s3_offload }}
+Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/lib
+ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -p {{ first_pageserver }}:6400 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="wal"}'
 ExecReload=/bin/kill -HUP $MAINPID
 KillMode=mixed
 KillSignal=SIGINT
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -11,15 +11,6 @@ executors:
      - image: zimg/rust:1.58

 jobs:
-  check-codestyle-rust:
-    executor: neon-xlarge-executor
-    steps:
-      - checkout
-      - run:
-          name: rustfmt
-          when: always
-          command: cargo fmt --all -- --check
-
  # A job to build postgres
  build-postgres:
    executor: neon-xlarge-executor
@@ -462,9 +453,6 @@ jobs:
      - checkout
      - setup_remote_docker:
          docker_layer_caching: true
-      # Build neondatabase/compute-tools:latest image and push it to Docker hub
-      # TODO: this should probably also use versioned tag, not just :latest.
-      # XXX: but should it? We build and use it only locally now.
      - run:
          name: Build and push compute-tools Docker image
          command: |
@@ -472,7 +460,10 @@ jobs:
            docker build \
              --build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
              --build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
-              --tag neondatabase/compute-tools:latest -f Dockerfile.compute-tools .
+              --tag neondatabase/compute-tools:local \
+              --tag neondatabase/compute-tools:latest \
+              -f Dockerfile.compute-tools .
+            # Only push :latest image
            docker push neondatabase/compute-tools:latest
      - run:
          name: Init postgres submodule
@@ -482,7 +473,9 @@ jobs:
          command: |
            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
            DOCKER_TAG=$(git log --oneline|wc -l)
-            docker build --tag neondatabase/compute-node:${DOCKER_TAG} --tag neondatabase/compute-node:latest vendor/postgres
+            docker build --tag neondatabase/compute-node:${DOCKER_TAG} \
+              --tag neondatabase/compute-node:latest vendor/postgres \
+              --build-arg COMPUTE_TOOLS_TAG=local
            docker push neondatabase/compute-node:${DOCKER_TAG}
            docker push neondatabase/compute-node:latest

@@ -519,9 +512,6 @@ jobs:
      - checkout
      - setup_remote_docker:
          docker_layer_caching: true
-      # Build neondatabase/compute-tools:release image and push it to Docker hub
-      # TODO: this should probably also use versioned tag, not just :latest.
-      # XXX: but should it? We build and use it only locally now.
      - run:
          name: Build and push compute-tools Docker image
          command: |
@@ -529,7 +519,10 @@ jobs:
            docker build \
              --build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
              --build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
-              --tag neondatabase/compute-tools:release -f Dockerfile.compute-tools .
+              --tag neondatabase/compute-tools:release \
+              --tag neondatabase/compute-tools:local \
+              -f Dockerfile.compute-tools .
+            # Only push :release image
            docker push neondatabase/compute-tools:release
      - run:
          name: Init postgres submodule
@@ -539,7 +532,9 @@ jobs:
          command: |
            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
            DOCKER_TAG="release-$(git log --oneline|wc -l)"
-            docker build --tag neondatabase/compute-node:${DOCKER_TAG} --tag neondatabase/compute-node:release vendor/postgres
+            docker build --tag neondatabase/compute-node:${DOCKER_TAG} \
+              --tag neondatabase/compute-node:release vendor/postgres \
+              --build-arg COMPUTE_TOOLS_TAG=local
            docker push neondatabase/compute-node:${DOCKER_TAG}
            docker push neondatabase/compute-node:release

@@ -740,7 +735,6 @@ jobs:
 workflows:
  build_and_test:
    jobs:
-      - check-codestyle-rust
      - check-codestyle-python
      - build-postgres:
          name: build-postgres-<< matrix.build_type >>
@@ -756,7 +750,6 @@ workflows:
            - build-postgres-<< matrix.build_type >>
      - run-pytest:
          name: pg_regress-tests-<< matrix.build_type >>
-          context: PERF_TEST_RESULT_CONNSTR
          matrix:
            parameters:
              build_type: ["debug", "release"]
--- a/.dockerignore
+++ b/.dockerignore
@@ -9,8 +9,8 @@ tmp_install
 tmp_check_cli
 test_output
 .vscode
-.zenith
-integration_tests/.zenith
+.neon
+integration_tests/.neon
 .mypy_cache

 Dockerfile
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -19,7 +19,7 @@ jobs:
  bench:
    # this workflow runs on self hosteed runner
    # it's environment is quite different from usual guthub runner
-    # probably the most important difference is that it doesnt start from clean workspace each time
+    # probably the most important difference is that it doesn't start from clean workspace each time
    # e g if you install system packages they are not cleaned up since you install them directly in host machine
    # not a container or something
    # See documentation for more info: https://docs.github.com/en/actions/hosting-your-own-runners/about-self-hosted-runners
--- a/.github/workflows/testing.yml
+++ b/.github/workflows/testing.yml
@@ -1,8 +1,10 @@
 name: Build and Test

 on:
-  pull_request:
  push:
+    branches:
+    - main
+  pull_request:

 jobs:
  regression-check:
@@ -10,7 +12,7 @@ jobs:
      matrix:
        # If we want to duplicate this job for different
        # Rust toolchains (e.g. nightly or 1.37.0), add them here.
-        rust_toolchain: [stable]
+        rust_toolchain: [1.58]
        os: [ubuntu-latest, macos-latest]
    timeout-minutes: 30
    name: run regression test suite
@@ -23,22 +25,26 @@ jobs:
          submodules: true
          fetch-depth: 2

-      - name: install rust toolchain ${{ matrix.rust_toolchain }}
+      - name: Install rust toolchain ${{ matrix.rust_toolchain }}
        uses: actions-rs/toolchain@v1
        with:
          profile: minimal
          toolchain: ${{ matrix.rust_toolchain }}
+          components: rustfmt, clippy
          override: true

+      - name: Check formatting
+        run: cargo fmt --all -- --check
+
      - name: Install Ubuntu postgres dependencies
        if: matrix.os == 'ubuntu-latest'
        run: |
          sudo apt update
-          sudo apt install build-essential libreadline-dev zlib1g-dev flex bison libseccomp-dev
+          sudo apt install build-essential libreadline-dev zlib1g-dev flex bison libseccomp-dev libssl-dev

-      - name: Install macOs postgres dependencies
+      - name: Install macOS postgres dependencies
        if: matrix.os == 'macos-latest'
-        run: brew install flex bison
+        run: brew install flex bison openssl

      - name: Set pg revision for caching
        id: pg_ver
@@ -52,10 +58,27 @@ jobs:
            tmp_install/
          key: ${{ runner.os }}-pg-${{ steps.pg_ver.outputs.pg_rev }}

+      - name: Set extra env for macOS
+        if: matrix.os == 'macos-latest'
+        run: |
+          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
+          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
+
      - name: Build postgres
        if: steps.cache_pg.outputs.cache-hit != 'true'
        run: make postgres

+      # Plain configure output can contain weird errors like 'error: C compiler cannot create executables'
+      # and the real cause will be inside config.log
+      - name: Print configure logs in case of failure
+        if: failure()
+        continue-on-error: true
+        run: |
+          echo '' && echo '=== config.log ===' && echo ''
+          cat tmp_install/build/config.log
+          echo '' && echo '=== configure.log ===' && echo ''
+          cat tmp_install/build/configure.log
+
      - name: Cache cargo deps
        id: cache_cargo
        uses: actions/cache@v2
@@ -64,7 +87,7 @@ jobs:
            ~/.cargo/registry
            ~/.cargo/git
            target
-          key: ${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}
+          key: ${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust-${{ matrix.rust_toolchain }}

      - name: Run cargo clippy
        run: ./run_clippy.sh
--- a/.gitignore
+++ b/.gitignore
@@ -5,8 +5,9 @@
 __pycache__/
 test_output/
 .vscode
-/.zenith
-/integration_tests/.zenith
+.idea
+/.neon
+/integration_tests/.neon

 # Coverage
 *.profraw
--- a/.yapfignore
+++ b/.yapfignore
@@ -6,5 +6,5 @@ target/
 tmp_install/
 __pycache__/
 test_output/
-.zenith/
+.neon/
 .git/
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -363,6 +363,16 @@ dependencies = [
 "textwrap 0.14.2",
 ]

+[[package]]
+name = "close_fds"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3bc416f33de9d59e79e57560f450d21ff8393adcf1cdfc3e6d8fb93d5f88a2ed"
+dependencies = [
+ "cfg-if",
+ "libc",
+]
+
 [[package]]
 name = "cmake"
 version = "0.1.48"
@@ -801,6 +811,7 @@ name = "etcd_broker"
 version = "0.1.0"
 dependencies = [
 "etcd-client",
+ "once_cell",
 "regex",
 "serde",
 "serde_json",
@@ -1722,9 +1733,9 @@ dependencies = [

 [[package]]
 name = "once_cell"
-version = "1.9.0"
+version = "1.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5"
+checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9"

 [[package]]
 name = "oorandom"
@@ -1789,6 +1800,7 @@ dependencies = [
 "bytes",
 "chrono",
 "clap 3.0.14",
+ "close_fds",
 "const_format",
 "crc32c",
 "crossbeam-utils",
@@ -1830,6 +1842,7 @@ dependencies = [
 "tracing",
 "url",
 "utils",
+ "walkdir",
 "workspace_hack",
 ]

@@ -2394,6 +2407,8 @@ version = "0.1.0"
 dependencies = [
 "anyhow",
 "async-trait",
+ "metrics",
+ "once_cell",
 "rusoto_core",
 "rusoto_s3",
 "serde",
@@ -2401,6 +2416,7 @@ dependencies = [
 "tempfile",
 "tokio",
 "tokio-util 0.7.0",
+ "toml_edit",
 "tracing",
 "workspace_hack",
 ]
@@ -2652,6 +2668,7 @@ name = "safekeeper"
 version = "0.1.0"
 dependencies = [
 "anyhow",
+ "async-trait",
 "byteorder",
 "bytes",
 "clap 3.0.14",
@@ -2660,12 +2677,14 @@ dependencies = [
 "daemonize",
 "etcd_broker",
 "fs2",
+ "futures",
 "git-version",
 "hex",
 "humantime",
 "hyper",
 "lazy_static",
 "metrics",
+ "once_cell",
 "postgres",
 "postgres-protocol",
 "postgres_ffi",
@@ -2679,6 +2698,7 @@ dependencies = [
 "tokio",
 "tokio-postgres",
 "tokio-util 0.7.0",
+ "toml_edit",
 "tracing",
 "url",
 "utils",
--- a/2
+++ b/2
@@ -25,7 +25,7 @@ COPY --from=pg-build /pg/tmp_install/include/postgresql/server tmp_install/inclu
 COPY . .

 # Show build caching stats to check if it was used in the end.
-# Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, loosing the compilation stats.
+# Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
 RUN set -e \
    && sudo -E "PATH=$PATH" mold -run cargo build --release \
    && cachepot -s
--- a/Dockerfile.alpine
+++ b/Dockerfile.alpine
@@ -1,95 +0,0 @@
-#
-# Docker image for console integration testing.
-#
-# We may also reuse it in CI to unify installation process and as a general binaries building
-# tool for production servers.
-#
-# Dynamic linking is used for librocksdb and libstdc++ bacause librocksdb-sys calls
-# bindgen with "dynamic" feature flag. This also prevents usage of dockerhub alpine-rust
-# images which are statically linked and have guards against any dlopen. I would rather
-# prefer all static binaries so we may change the way librocksdb-sys builds or wait until
-# we will have our own storage and drop rockdb dependency.
-#
-# Cargo-chef is used to separate dependencies building from main binaries building. This
-# way `docker build` will download and install dependencies only of there are changes to
-# out Cargo.toml files.
-#
-
-
-#
-# build postgres separately -- this layer will be rebuilt only if one of
-# mentioned paths will get any changes
-#
-FROM alpine:3.13 as pg-build
-RUN apk add --update clang llvm compiler-rt compiler-rt-static lld musl-dev binutils \
-                     make bison flex readline-dev zlib-dev perl linux-headers libseccomp-dev
-WORKDIR zenith
-COPY ./vendor/postgres vendor/postgres
-COPY ./Makefile Makefile
-# Build using clang and lld
-RUN CC='clang' LD='lld' CFLAGS='-fuse-ld=lld --rtlib=compiler-rt' make postgres -j4
-
-#
-# Calculate cargo dependencies.
-# This will always run, but only generate recipe.json with list of dependencies without
-# installing them.
-#
-FROM alpine:20210212 as cargo-deps-inspect
-RUN apk add --update rust cargo
-RUN cargo install cargo-chef
-WORKDIR zenith
-COPY . .
-RUN cargo chef prepare --recipe-path recipe.json
-
-#
-# Build cargo dependencies.
-# This temp cantainner would be build only if recipe.json was changed.
-#
-FROM alpine:20210212 as deps-build
-RUN apk add --update rust cargo openssl-dev clang build-base
-# rust-rocksdb can be built against system-wide rocksdb -- that saves about
-# 10 minutes during build. Rocksdb apk package is in testing now, but use it
-# anyway. In case of any troubles we can download and build rocksdb here manually
-# (to cache it as a docker layer).
-RUN apk --no-cache --update --repository https://dl-cdn.alpinelinux.org/alpine/edge/testing add rocksdb-dev
-WORKDIR zenith
-COPY --from=pg-build /zenith/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
-COPY --from=cargo-deps-inspect /root/.cargo/bin/cargo-chef /root/.cargo/bin/
-COPY --from=cargo-deps-inspect /zenith/recipe.json recipe.json
-RUN ROCKSDB_LIB_DIR=/usr/lib/ cargo chef cook --release --recipe-path recipe.json
-
-#
-# Build zenith binaries
-#
-FROM alpine:20210212 as build
-RUN apk add --update rust cargo openssl-dev clang build-base
-RUN apk --no-cache --update --repository https://dl-cdn.alpinelinux.org/alpine/edge/testing add rocksdb-dev
-WORKDIR zenith
-COPY . .
-# Copy cached dependencies
-COPY --from=pg-build /zenith/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
-COPY --from=deps-build /zenith/target target
-COPY --from=deps-build /root/.cargo /root/.cargo
-RUN cargo build --release
-
-#
-# Copy binaries to resulting image.
-# build-base hare to provide libstdc++ (it will also bring gcc, but leave it this way until we figure
-# out how to statically link rocksdb or avoid it at all).
-#
-FROM alpine:3.13
-RUN apk add --update openssl build-base libseccomp-dev
-RUN apk --no-cache --update --repository https://dl-cdn.alpinelinux.org/alpine/edge/testing add rocksdb
-COPY --from=build /zenith/target/release/pageserver /usr/local/bin
-COPY --from=build /zenith/target/release/safekeeper /usr/local/bin
-COPY --from=build /zenith/target/release/proxy /usr/local/bin
-COPY --from=pg-build /zenith/tmp_install /usr/local
-COPY docker-entrypoint.sh /docker-entrypoint.sh
-
-RUN addgroup zenith && adduser -h /data -D -G zenith zenith
-VOLUME ["/data"]
-WORKDIR /data
-USER zenith
-EXPOSE 6400
-ENTRYPOINT ["/docker-entrypoint.sh"]
-CMD ["pageserver"]
--- a/12
+++ b/12
@@ -26,7 +26,7 @@ endif
 # macOS with brew-installed openssl requires explicit paths
 UNAME_S := $(shell uname -s)
 ifeq ($(UNAME_S),Darwin)
-    PG_CONFIGURE_OPTS += --with-includes=/usr/local/opt/openssl/include --with-libraries=/usr/local/opt/openssl/lib
+    PG_CONFIGURE_OPTS += --with-includes=$(HOMEBREW_PREFIX)/opt/openssl/include --with-libraries=$(HOMEBREW_PREFIX)/opt/openssl/lib
 endif

 # Choose whether we should be silent or verbose
@@ -74,16 +74,16 @@ postgres-headers: postgres-configure
 	+@echo "Installing PostgreSQL headers"
 	$(MAKE) -C tmp_install/build/src/include MAKELEVEL=0 install

-# Compile and install PostgreSQL and contrib/zenith
+# Compile and install PostgreSQL and contrib/neon
 .PHONY: postgres
 postgres: postgres-configure \
 		  postgres-headers # to prevent `make install` conflicts with zenith's `postgres-headers`
 	+@echo "Compiling PostgreSQL"
 	$(MAKE) -C tmp_install/build MAKELEVEL=0 install
-	+@echo "Compiling contrib/zenith"
-	$(MAKE) -C tmp_install/build/contrib/zenith install
-	+@echo "Compiling contrib/zenith_test_utils"
-	$(MAKE) -C tmp_install/build/contrib/zenith_test_utils install
+	+@echo "Compiling contrib/neon"
+	$(MAKE) -C tmp_install/build/contrib/neon install
+	+@echo "Compiling contrib/neon_test_utils"
+	$(MAKE) -C tmp_install/build/contrib/neon_test_utils install
 	+@echo "Compiling pg_buffercache"
 	$(MAKE) -C tmp_install/build/contrib/pg_buffercache install
 	+@echo "Compiling pageinspect"
--- a/README.md
+++ b/README.md
@@ -5,6 +5,11 @@ Neon is a serverless open source alternative to AWS Aurora Postgres. It separate
 The project used to be called "Zenith". Many of the commands and code comments
 still refer to "zenith", but we are in the process of renaming things.

+## Quick start
+[Join the waitlist](https://neon.tech/) for our free tier to receive your serverless postgres instance. Then connect to it with your preferred postgres client (psql, dbeaver, etc) or use the online SQL editor.
+
+Alternatively, compile and run the project [locally](#running-local-installation).
+
 ## Architecture overview

 A Neon installation consists of compute nodes and Neon storage engine.
@@ -24,13 +29,18 @@ Pageserver consists of:
 ## Running local installation


-#### building on Ubuntu/ Debian (Linux)
+#### building on Linux
 1. Install build dependencies and other useful packages

-On Ubuntu or Debian this set of packages should be sufficient to build the code:
-```text
+* On Ubuntu or Debian this set of packages should be sufficient to build the code:
+```bash
 apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libseccomp-dev \
-libssl-dev clang pkg-config libpq-dev libprotobuf-dev etcd
+libssl-dev clang pkg-config libpq-dev etcd cmake postgresql-client
+```
+* On Fedora these packages are needed:
+```bash
+dnf install flex bison readline-devel zlib-devel openssl-devel \
+  libseccomp-devel perl clang cmake etcd postgresql postgresql-contrib
 ```

 2. [Install Rust](https://www.rust-lang.org/tools/install)
@@ -39,16 +49,11 @@ libssl-dev clang pkg-config libpq-dev libprotobuf-dev etcd
 curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
 ```

-3. Install PostgreSQL Client
-```
-apt install postgresql-client
-```
-
-4. Build neon and patched postgres
+3. Build neon and patched postgres
 ```sh
 git clone --recursive https://github.com/neondatabase/neon.git
 cd neon
-make -j5
+make -j`nproc`
 ```

 #### building on OSX (12.3.1)
@@ -75,7 +80,7 @@ brew link --force libpq
 ```sh
 git clone --recursive https://github.com/neondatabase/neon.git
 cd neon
-make -j5
+make -j`nproc`
 ```

 #### dependency installation notes
@@ -88,7 +93,7 @@ Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (r
 #### running neon database
 1. Start pageserver and postgres on top of it (should be called from repo root):
 ```sh
-# Create repository in .zenith with proper paths to binaries and data
+# Create repository in .neon with proper paths to binaries and data
 # Later that would be responsibility of a package install script
 > ./target/debug/neon_local init
 initializing tenantid 9ef87a5bf0d92544f6fafeeb3239695c
@@ -98,17 +103,17 @@ pageserver init succeeded

 # start pageserver and safekeeper
 > ./target/debug/neon_local start
-Starting pageserver at '127.0.0.1:64000' in '.zenith'
+Starting pageserver at '127.0.0.1:64000' in '.neon'
 Pageserver started
 initializing for sk 1 for 7676
-Starting safekeeper at '127.0.0.1:5454' in '.zenith/safekeepers/sk1'
+Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'
 Safekeeper started

 # start postgres compute node
 > ./target/debug/neon_local pg start main
 Starting new postgres main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
-Extracting base backup to create postgres instance: path=.zenith/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
-Starting postgres node at 'host=127.0.0.1 port=55432 user=zenith_admin dbname=postgres'
+Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
+Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'

 # check list of running postgres instances
 > ./target/debug/neon_local pg list
@@ -118,7 +123,7 @@ Starting postgres node at 'host=127.0.0.1 port=55432 user=zenith_admin dbname=po

 2. Now it is possible to connect to postgres and run some queries:
 ```text
-> psql -p55432 -h 127.0.0.1 -U zenith_admin postgres
+> psql -p55432 -h 127.0.0.1 -U cloud_admin postgres
 postgres=# CREATE TABLE t(key int primary key, value text);
 CREATE TABLE
 postgres=# insert into t values(1,1);
@@ -144,8 +149,8 @@ Created timeline 'b3b863fa45fa9e57e615f9f2d944e601' at Lsn 0/16F9A00 for tenant:
 # start postgres on that branch
 > ./target/debug/neon_local pg start migration_check --branch-name migration_check
 Starting new postgres migration_check on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
-Extracting base backup to create postgres instance: path=.zenith/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
-Starting postgres node at 'host=127.0.0.1 port=55433 user=zenith_admin dbname=postgres'
+Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
+Starting postgres node at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres'

 # check the new list of running postgres instances
 > ./target/debug/neon_local pg list
@@ -155,7 +160,7 @@ Starting postgres node at 'host=127.0.0.1 port=55433 user=zenith_admin dbname=po

 # this new postgres instance will have all the data from 'main' postgres,
 # but all modifications would not affect data in original postgres
-> psql -p55433 -h 127.0.0.1 -U zenith_admin postgres
+> psql -p55433 -h 127.0.0.1 -U cloud_admin postgres
 postgres=# select * from t;
 key | value
 -----+-------
@@ -166,7 +171,7 @@ postgres=# insert into t values(2,2);
 INSERT 0 1

 # check that the new change doesn't affect the 'main' postgres
-> psql -p55432 -h 127.0.0.1 -U zenith_admin postgres
+> psql -p55432 -h 127.0.0.1 -U cloud_admin postgres
 postgres=# select * from t;
 key | value
 -----+-------
--- a/compute_tools/README.md
+++ b/compute_tools/README.md
@@ -22,7 +22,7 @@ Also `compute_ctl` spawns two separate service threads:
 Usage example:
 ```sh
 compute_ctl -D /var/db/postgres/compute \
-            -C 'postgresql://zenith_admin@localhost/postgres' \
+            -C 'postgresql://cloud_admin@localhost/postgres' \
            -S /var/db/postgres/specs/current.json \
            -b /usr/local/bin/postgres
 ```
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -21,7 +21,7 @@
 //! Usage example:
 //! ```sh
 //! compute_ctl -D /var/db/postgres/compute \
-//!             -C 'postgresql://zenith_admin@localhost/postgres' \
+//!             -C 'postgresql://cloud_admin@localhost/postgres' \
 //!             -S /var/db/postgres/specs/current.json \
 //!             -b /usr/local/bin/postgres
 //! ```
@@ -116,17 +116,17 @@ fn main() -> Result<()> {
    let pageserver_connstr = spec
        .cluster
        .settings
-        .find("zenith.page_server_connstring")
+        .find("neon.pageserver_connstring")
        .expect("pageserver connstr should be provided");
    let tenant = spec
        .cluster
        .settings
-        .find("zenith.zenith_tenant")
+        .find("neon.tenant_id")
        .expect("tenant id should be provided");
    let timeline = spec
        .cluster
        .settings
-        .find("zenith.zenith_timeline")
+        .find("neon.timeline_id")
        .expect("tenant id should be provided");

    let compute_state = ComputeNode {
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -262,7 +262,30 @@ impl ComputeNode {
            .unwrap_or_else(|| "5432".to_string());
        wait_for_postgres(&mut pg, &port, pgdata_path)?;

-        let mut client = Client::connect(&self.connstr, NoTls)?;
+        // If connection fails,
+        // it may be the old node with `zenith_admin` superuser.
+        //
+        // In this case we need to connect with old `zenith_admin`name
+        // and create new user. We cannot simply rename connected user,
+        // but we can create a new one and grant it all privileges.
+        let mut client = match Client::connect(&self.connstr, NoTls) {
+            Err(e) => {
+                info!(
+                    "cannot connect to postgres: {}, retrying with `zenith_admin` username",
+                    e
+                );
+                let zenith_admin_connstr = self.connstr.replacen("cloud_admin", "zenith_admin", 1);
+
+                let mut client = Client::connect(&zenith_admin_connstr, NoTls)?;
+                client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
+                client.simple_query("GRANT zenith_admin TO cloud_admin")?;
+                drop(client);
+
+                // reconnect with connsting with expected name
+                Client::connect(&self.connstr, NoTls)?
+            }
+            Ok(client) => client,
+        };

        handle_roles(&self.spec, &mut client)?;
        handle_databases(&self.spec, &mut client)?;
--- a/compute_tools/src/monitor.rs
+++ b/compute_tools/src/monitor.rs
@@ -43,7 +43,7 @@ fn watch_compute_activity(compute: &Arc<ComputeNode>) {
                         FROM pg_stat_activity
                         WHERE backend_type = 'client backend'
                            AND pid != pg_backend_pid()
-                            AND usename != 'zenith_admin';", // XXX: find a better way to filter other monitors?
+                            AND usename != 'cloud_admin';", // XXX: find a better way to filter other monitors?
                        &[],
                    );
                let mut last_active = compute.state.read().unwrap().last_active;
--- a/compute_tools/tests/cluster_spec.json
+++ b/compute_tools/tests/cluster_spec.json
@@ -85,7 +85,7 @@
                "vartype": "bool"
            },
            {
-                "name": "wal_acceptors",
+                "name": "safekeepers",
                "value": "127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501",
                "vartype": "string"
            },
@@ -150,7 +150,7 @@
                "vartype": "integer"
            },
            {
-                "name": "zenith.zenith_tenant",
+                "name": "neon.tenant_id",
                "value": "b0554b632bd4d547a63b86c3630317e8",
                "vartype": "string"
            },
@@ -160,13 +160,13 @@
                "vartype": "integer"
            },
            {
-                "name": "zenith.zenith_timeline",
+                "name": "neon.timeline_id",
                "value": "2414a61ffc94e428f14b5758fe308e13",
                "vartype": "string"
            },
            {
                "name": "shared_preload_libraries",
-                "value": "zenith",
+                "value": "neon",
                "vartype": "string"
            },
            {
@@ -175,7 +175,7 @@
                "vartype": "string"
            },
            {
-                "name": "zenith.page_server_connstring",
+                "name": "neon.pageserver_connstring",
                "value": "host=127.0.0.1 port=6400",
                "vartype": "string"
            }
--- a/compute_tools/tests/pg_helpers_tests.rs
+++ b/compute_tools/tests/pg_helpers_tests.rs
@@ -28,7 +28,7 @@ mod pg_helpers_tests {

        assert_eq!(
            spec.cluster.settings.as_pg_settings(),
-            "fsync = off\nwal_level = replica\nhot_standby = on\nwal_acceptors = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'\nwal_log_hints = on\nlog_connections = on\nshared_buffers = 32768\nport = 55432\nmax_connections = 100\nmax_wal_senders = 10\nlisten_addresses = '0.0.0.0'\nwal_sender_timeout = 0\npassword_encryption = md5\nmaintenance_work_mem = 65536\nmax_parallel_workers = 8\nmax_worker_processes = 8\nzenith.zenith_tenant = 'b0554b632bd4d547a63b86c3630317e8'\nmax_replication_slots = 10\nzenith.zenith_timeline = '2414a61ffc94e428f14b5758fe308e13'\nshared_preload_libraries = 'zenith'\nsynchronous_standby_names = 'walproposer'\nzenith.page_server_connstring = 'host=127.0.0.1 port=6400'"
+            "fsync = off\nwal_level = replica\nhot_standby = on\nsafekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'\nwal_log_hints = on\nlog_connections = on\nshared_buffers = 32768\nport = 55432\nmax_connections = 100\nmax_wal_senders = 10\nlisten_addresses = '0.0.0.0'\nwal_sender_timeout = 0\npassword_encryption = md5\nmaintenance_work_mem = 65536\nmax_parallel_workers = 8\nmax_worker_processes = 8\nneon.tenant_id = 'b0554b632bd4d547a63b86c3630317e8'\nmax_replication_slots = 10\nneon.timeline_id = '2414a61ffc94e428f14b5758fe308e13'\nshared_preload_libraries = 'neon'\nsynchronous_standby_names = 'walproposer'\nneon.pageserver_connstring = 'host=127.0.0.1 port=6400'"
        );
    }

--- a/control_plane/src/compute.rs
+++ b/control_plane/src/compute.rs
@@ -148,9 +148,9 @@ impl PostgresNode {
        // Read a few options from the config file
        let context = format!("in config file {}", cfg_path_str);
        let port: u16 = conf.parse_field("port", &context)?;
-        let timeline_id: ZTimelineId = conf.parse_field("zenith.zenith_timeline", &context)?;
-        let tenant_id: ZTenantId = conf.parse_field("zenith.zenith_tenant", &context)?;
-        let uses_wal_proposer = conf.get("wal_acceptors").is_some();
+        let timeline_id: ZTimelineId = conf.parse_field("neon.timeline_id", &context)?;
+        let tenant_id: ZTenantId = conf.parse_field("neon.tenant_id", &context)?;
+        let uses_wal_proposer = conf.get("safekeepers").is_some();

        // parse recovery_target_lsn, if any
        let recovery_target_lsn: Option<Lsn> =
@@ -303,11 +303,11 @@ impl PostgresNode {
            // uses only needed variables namely host, port, user, password.
            format!("postgresql://no_user:{}@{}:{}", password, host, port)
        };
-        conf.append("shared_preload_libraries", "zenith");
+        conf.append("shared_preload_libraries", "neon");
        conf.append_line("");
-        conf.append("zenith.page_server_connstring", &pageserver_connstr);
-        conf.append("zenith.zenith_tenant", &self.tenant_id.to_string());
-        conf.append("zenith.zenith_timeline", &self.timeline_id.to_string());
+        conf.append("neon.pageserver_connstring", &pageserver_connstr);
+        conf.append("neon.tenant_id", &self.tenant_id.to_string());
+        conf.append("neon.timeline_id", &self.timeline_id.to_string());
        if let Some(lsn) = self.lsn {
            conf.append("recovery_target_lsn", &lsn.to_string());
        }
@@ -341,7 +341,7 @@ impl PostgresNode {
                .map(|sk| format!("localhost:{}", sk.pg_port))
                .collect::<Vec<String>>()
                .join(",");
-            conf.append("wal_acceptors", &safekeepers);
+            conf.append("safekeepers", &safekeepers);
        } else {
            // We only use setup without safekeepers for tests,
            // and don't care about data durability on pageserver,
@@ -352,7 +352,6 @@ impl PostgresNode {
            // This isn't really a supported configuration, but can be useful for
            // testing.
            conf.append("synchronous_standby_names", "pageserver");
-            conf.append("zenith.callmemaybe_connstring", &self.connstr());
        }

        let mut file = File::create(self.pgdata().join("postgresql.conf"))?;
@@ -499,7 +498,7 @@ impl PostgresNode {
            "host={} port={} user={} dbname={}",
            self.address.ip(),
            self.address.port(),
-            "zenith_admin",
+            "cloud_admin",
            "postgres"
        )
    }
--- a/control_plane/src/etcd.rs
+++ b/control_plane/src/etcd.rs
@@ -77,7 +77,7 @@ pub fn stop_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
    let etcd_pid_file_path = etcd_pid_file_path(env);
    let pid = Pid::from_raw(read_pidfile(&etcd_pid_file_path).with_context(|| {
        format!(
-            "Failed to read etcd pid filea at {}",
+            "Failed to read etcd pid file at {}",
            etcd_pid_file_path.display()
        )
    })?);
--- a/control_plane/src/lib.rs
+++ b/control_plane/src/lib.rs
@@ -49,3 +49,12 @@ fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
        cmd
    }
 }
+
+fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
+    for env_key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"] {
+        if let Ok(value) = std::env::var(env_key) {
+            cmd = cmd.env(env_key, value);
+        }
+    }
+    cmd
+}
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -21,9 +21,9 @@ use utils::{
 use crate::safekeeper::SafekeeperNode;

 //
-// This data structures represents zenith CLI config
+// This data structures represents neon_local CLI config
 //
-// It is deserialized from the .zenith/config file, or the config file passed
+// It is deserialized from the .neon/config file, or the config file passed
 // to 'zenith init --config=<path>' option. See control_plane/simple.conf for
 // an example.
 //
@@ -34,8 +34,8 @@ pub struct LocalEnv {
    // compute nodes).
    //
    // This is not stored in the config file. Rather, this is the path where the
-    // config file itself is. It is read from the ZENITH_REPO_DIR env variable or
-    // '.zenith' if not given.
+    // config file itself is. It is read from the NEON_REPO_DIR env variable or
+    // '.neon' if not given.
    #[serde(skip)]
    pub base_data_dir: PathBuf,

@@ -119,16 +119,24 @@ impl EtcdBroker {
    }

    pub fn comma_separated_endpoints(&self) -> String {
-        self.broker_endpoints.iter().map(Url::as_str).fold(
-            String::new(),
-            |mut comma_separated_urls, url| {
+        self.broker_endpoints
+            .iter()
+            .map(|url| {
+                // URL by default adds a '/' path at the end, which is not what etcd CLI wants.
+                let url_string = url.as_str();
+                if url_string.ends_with('/') {
+                    &url_string[0..url_string.len() - 1]
+                } else {
+                    url_string
+                }
+            })
+            .fold(String::new(), |mut comma_separated_urls, url| {
                if !comma_separated_urls.is_empty() {
                    comma_separated_urls.push(',');
                }
                comma_separated_urls.push_str(url);
                comma_separated_urls
-            },
-        )
+            })
    }
 }

@@ -167,6 +175,9 @@ pub struct SafekeeperConf {
    pub pg_port: u16,
    pub http_port: u16,
    pub sync: bool,
+    pub remote_storage: Option<String>,
+    pub backup_threads: Option<u32>,
+    pub auth_enabled: bool,
 }

 impl Default for SafekeeperConf {
@@ -176,6 +187,9 @@ impl Default for SafekeeperConf {
            pg_port: 0,
            http_port: 0,
            sync: true,
+            remote_storage: None,
+            backup_threads: None,
+            auth_enabled: false,
        }
    }
 }
@@ -325,7 +339,7 @@ impl LocalEnv {
    pub fn persist_config(&self, base_path: &Path) -> anyhow::Result<()> {
        // Currently, the user first passes a config file with 'zenith init --config=<path>'
        // We read that in, in `create_config`, and fill any missing defaults. Then it's saved
-        // to .zenith/config. TODO: We lose any formatting and comments along the way, which is
+        // to .neon/config. TODO: We lose any formatting and comments along the way, which is
        // a bit sad.
        let mut conf_content = r#"# This file describes a locale deployment of the page server
 # and safekeeeper node. It is read by the 'zenith' command-line
@@ -377,6 +391,7 @@ impl LocalEnv {
            base_path != Path::new(""),
            "repository base path is missing"
        );
+
        ensure!(
            !base_path.exists(),
            "directory '{}' already exists. Perhaps already initialized?",
@@ -468,9 +483,9 @@ impl LocalEnv {
 }

 fn base_path() -> PathBuf {
-    match std::env::var_os("ZENITH_REPO_DIR") {
+    match std::env::var_os("NEON_REPO_DIR") {
        Some(val) => PathBuf::from(val),
-        None => PathBuf::from(".zenith"),
+        None => PathBuf::from(".neon"),
    }
 }

--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -23,7 +23,7 @@ use utils::{

 use crate::local_env::{LocalEnv, SafekeeperConf};
 use crate::storage::PageServerNode;
-use crate::{fill_rust_env_vars, read_pidfile};
+use crate::{fill_aws_secrets_vars, fill_rust_env_vars, read_pidfile};

 #[derive(Error, Debug)]
 pub enum SafekeeperHttpError {
@@ -143,6 +143,19 @@ impl SafekeeperNode {
        if let Some(prefix) = self.env.etcd_broker.broker_etcd_prefix.as_deref() {
            cmd.args(&["--broker-etcd-prefix", prefix]);
        }
+        if let Some(threads) = self.conf.backup_threads {
+            cmd.args(&["--backup-threads", threads.to_string().as_ref()]);
+        }
+        if let Some(ref remote_storage) = self.conf.remote_storage {
+            cmd.args(&["--remote-storage", remote_storage]);
+        }
+        if self.conf.auth_enabled {
+            cmd.arg("--auth-validation-public-key-path");
+            // PathBuf is better be passed as is, not via `String`.
+            cmd.arg(self.env.base_data_dir.join("auth_public_key.pem"));
+        }
+
+        fill_aws_secrets_vars(&mut cmd);

        if !cmd.status()?.success() {
            bail!(
--- a/control_plane/src/storage.rs
+++ b/control_plane/src/storage.rs
@@ -1,6 +1,8 @@
 use std::collections::HashMap;
-use std::io::Write;
+use std::fs::File;
+use std::io::{BufReader, Write};
 use std::net::TcpStream;
+use std::num::NonZeroU64;
 use std::path::PathBuf;
 use std::process::Command;
 use std::time::Duration;
@@ -11,6 +13,7 @@ use nix::errno::Errno;
 use nix::sys::signal::{kill, Signal};
 use nix::unistd::Pid;
 use pageserver::http::models::{TenantConfigRequest, TenantCreateRequest, TimelineCreateRequest};
+use pageserver::tenant_mgr::TenantInfo;
 use pageserver::timelines::TimelineInfo;
 use postgres::{Config, NoTls};
 use reqwest::blocking::{Client, RequestBuilder, Response};
@@ -25,8 +28,7 @@ use utils::{
 };

 use crate::local_env::LocalEnv;
-use crate::{fill_rust_env_vars, read_pidfile};
-use pageserver::tenant_mgr::TenantInfo;
+use crate::{fill_aws_secrets_vars, fill_rust_env_vars, read_pidfile};

 #[derive(Error, Debug)]
 pub enum PageserverHttpError {
@@ -37,6 +39,12 @@ pub enum PageserverHttpError {
    Response(String),
 }

+impl From<anyhow::Error> for PageserverHttpError {
+    fn from(e: anyhow::Error) -> Self {
+        Self::Response(e.to_string())
+    }
+}
+
 type Result<T> = result::Result<T, PageserverHttpError>;

 pub trait ResponseErrorMessageExt: Sized {
@@ -410,6 +418,15 @@ impl PageServerNode {
                    .map(|x| x.parse::<usize>())
                    .transpose()?,
                pitr_interval: settings.get("pitr_interval").map(|x| x.to_string()),
+                walreceiver_connect_timeout: settings
+                    .get("walreceiver_connect_timeout")
+                    .map(|x| x.to_string()),
+                lagging_wal_timeout: settings.get("lagging_wal_timeout").map(|x| x.to_string()),
+                max_lsn_wal_lag: settings
+                    .get("max_lsn_wal_lag")
+                    .map(|x| x.parse::<NonZeroU64>())
+                    .transpose()
+                    .context("Failed to parse 'max_lsn_wal_lag' as non zero integer")?,
            })
            .send()?
            .error_from_body()?
@@ -433,22 +450,41 @@ impl PageServerNode {
                tenant_id,
                checkpoint_distance: settings
                    .get("checkpoint_distance")
-                    .map(|x| x.parse::<u64>().unwrap()),
+                    .map(|x| x.parse::<u64>())
+                    .transpose()
+                    .context("Failed to parse 'checkpoint_distance' as an integer")?,
                compaction_target_size: settings
                    .get("compaction_target_size")
-                    .map(|x| x.parse::<u64>().unwrap()),
+                    .map(|x| x.parse::<u64>())
+                    .transpose()
+                    .context("Failed to parse 'compaction_target_size' as an integer")?,
                compaction_period: settings.get("compaction_period").map(|x| x.to_string()),
                compaction_threshold: settings
                    .get("compaction_threshold")
-                    .map(|x| x.parse::<usize>().unwrap()),
+                    .map(|x| x.parse::<usize>())
+                    .transpose()
+                    .context("Failed to parse 'compaction_threshold' as an integer")?,
                gc_horizon: settings
                    .get("gc_horizon")
-                    .map(|x| x.parse::<u64>().unwrap()),
+                    .map(|x| x.parse::<u64>())
+                    .transpose()
+                    .context("Failed to parse 'gc_horizon' as an integer")?,
                gc_period: settings.get("gc_period").map(|x| x.to_string()),
                image_creation_threshold: settings
                    .get("image_creation_threshold")
-                    .map(|x| x.parse::<usize>().unwrap()),
+                    .map(|x| x.parse::<usize>())
+                    .transpose()
+                    .context("Failed to parse 'image_creation_threshold' as non zero integer")?,
                pitr_interval: settings.get("pitr_interval").map(|x| x.to_string()),
+                walreceiver_connect_timeout: settings
+                    .get("walreceiver_connect_timeout")
+                    .map(|x| x.to_string()),
+                lagging_wal_timeout: settings.get("lagging_wal_timeout").map(|x| x.to_string()),
+                max_lsn_wal_lag: settings
+                    .get("max_lsn_wal_lag")
+                    .map(|x| x.parse::<NonZeroU64>())
+                    .transpose()
+                    .context("Failed to parse 'max_lsn_wal_lag' as non zero integer")?,
            })
            .send()?
            .error_from_body()?;
@@ -492,13 +528,54 @@ impl PageServerNode {

        Ok(timeline_info_response)
    }
-}

-fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
-    for env_key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"] {
-        if let Ok(value) = std::env::var(env_key) {
-            cmd = cmd.env(env_key, value);
+    /// Import a basebackup prepared using either:
+    /// a) `pg_basebackup -F tar`, or
+    /// b) The `fullbackup` pageserver endpoint
+    ///
+    /// # Arguments
+    /// * `tenant_id` - tenant to import into. Created if not exists
+    /// * `timeline_id` - id to assign to imported timeline
+    /// * `base` - (start lsn of basebackup, path to `base.tar` file)
+    /// * `pg_wal` - if there's any wal to import: (end lsn, path to `pg_wal.tar`)
+    pub fn timeline_import(
+        &self,
+        tenant_id: ZTenantId,
+        timeline_id: ZTimelineId,
+        base: (Lsn, PathBuf),
+        pg_wal: Option<(Lsn, PathBuf)>,
+    ) -> anyhow::Result<()> {
+        let mut client = self.pg_connection_config.connect(NoTls).unwrap();
+
+        // Init base reader
+        let (start_lsn, base_tarfile_path) = base;
+        let base_tarfile = File::open(base_tarfile_path)?;
+        let mut base_reader = BufReader::new(base_tarfile);
+
+        // Init wal reader if necessary
+        let (end_lsn, wal_reader) = if let Some((end_lsn, wal_tarfile_path)) = pg_wal {
+            let wal_tarfile = File::open(wal_tarfile_path)?;
+            let wal_reader = BufReader::new(wal_tarfile);
+            (end_lsn, Some(wal_reader))
+        } else {
+            (start_lsn, None)
+        };
+
+        // Import base
+        let import_cmd =
+            format!("import basebackup {tenant_id} {timeline_id} {start_lsn} {end_lsn}");
+        let mut writer = client.copy_in(&import_cmd)?;
+        io::copy(&mut base_reader, &mut writer)?;
+        writer.finish()?;
+
+        // Import wal if necessary
+        if let Some(mut wal_reader) = wal_reader {
+            let import_cmd = format!("import wal {tenant_id} {timeline_id} {start_lsn} {end_lsn}");
+            let mut writer = client.copy_in(&import_cmd)?;
+            io::copy(&mut wal_reader, &mut writer)?;
+            writer.finish()?;
        }
+
+        Ok(())
    }
-    cmd
 }
--- a/docs/README.md
+++ b/docs/README.md
@@ -6,7 +6,7 @@
 - [docker.md](docker.md) — Docker images and building pipeline.
 - [glossary.md](glossary.md) — Glossary of all the terms used in codebase.
 - [multitenancy.md](multitenancy.md) — how multitenancy is organized in the pageserver and Zenith CLI.
- [sourcetree.md](sourcetree.md) — Overview of the source tree layeout.
+- [sourcetree.md](sourcetree.md) — Overview of the source tree layout.
 - [pageserver/README.md](/pageserver/README.md) — pageserver overview.
 - [postgres_ffi/README.md](/libs/postgres_ffi/README.md) — Postgres FFI overview.
 - [test_runner/README.md](/test_runner/README.md) — tests infrastructure overview.
--- a/docs/core_changes.md
+++ b/docs/core_changes.md
@@ -188,7 +188,7 @@ Not currently committed but proposed:
 3. Prefetching
 - Why?
  As far as pages in Zenith are loaded on demand, to reduce node startup time
-  and also sppedup some massive queries we need some mechanism for bulk loading to
+  and also speedup some massive queries we need some mechanism for bulk loading to
  reduce page request round-trip overhead.

  Currently Postgres is supporting prefetching only for bitmap scan.
--- a/docs/glossary.md
+++ b/docs/glossary.md
@@ -2,7 +2,7 @@

 ### Authentication

-### Backpresssure
+### Backpressure

 Backpressure is used to limit the lag between pageserver and compute node or WAL service.

@@ -115,7 +115,7 @@ Neon safekeeper LSNs. For more check [safekeeper/README_PROTO.md](/safekeeper/RE
 * `CommitLSN`: position in WAL confirmed by quorum safekeepers.
 * `RestartLSN`: position in WAL confirmed by all safekeepers.
 * `FlushLSN`: part of WAL persisted to the disk by safekeeper.
-* `VCL`: the largerst LSN for which we can guarantee availablity of all prior records.
+* `VCL`: the largest LSN for which we can guarantee availability of all prior records.

 Neon pageserver LSNs:
 * `last_record_lsn` - the end of last processed WAL record.
--- a/docs/multitenancy.md
+++ b/docs/multitenancy.md
@@ -6,7 +6,7 @@ Zenith supports multitenancy. One pageserver can serve multiple tenants at once.

 ### Tenants in other commands

-By default during `zenith init` new tenant is created on the pageserver. Newly created tenant's id is saved to cli config, so other commands can use it automatically if no direct arugment `--tenantid=<tenantid>` is provided. So generally tenantid more frequently appears in internal pageserver interface. Its commands take tenantid argument to distinguish to which tenant operation should be applied. CLI support creation of new tenants.
+By default during `zenith init` new tenant is created on the pageserver. Newly created tenant's id is saved to cli config, so other commands can use it automatically if no direct argument `--tenantid=<tenantid>` is provided. So generally tenantid more frequently appears in internal pageserver interface. Its commands take tenantid argument to distinguish to which tenant operation should be applied. CLI support creation of new tenants.

 Examples for cli:

--- a/docs/rfcs/002-storage.md
+++ b/docs/rfcs/002-storage.md
@@ -77,7 +77,7 @@ Upon storage node restart recent WAL files are applied to appropriate pages and

 ### **Checkpointing**

-No such mechanism is needed. Or we may look at the storage node as at kind of continuous chekpointer.
+No such mechanism is needed. Or we may look at the storage node as at kind of continuous checkpointer.

 ### **Full page writes (torn page protection)**

@@ -111,13 +111,13 @@ Since we are storing page diffs of variable sizes there is no structural depende

 ### **Chunk metadata**

-Chunk metadata is a file lies in chunk directory that stores info about current snapshots and PITR regions. Chunck should always consult this data when merging SSTables and applying delete markers.
+Chunk metadata is a file lies in chunk directory that stores info about current snapshots and PITR regions. Chunk should always consult this data when merging SSTables and applying delete markers.

 ### **Chunk splitting**

 *(NB: following paragraph is about how to avoid page splitting)*

-When chunks hits some soft storage limit (let's say 100Gb) it should be split in half and global matadata about chunk boundaries should be updated. Here i assume that chunk split is a local operation happening on single node. Process of chink splitting should look like following:
+When chunks hits some soft storage limit (let's say 100Gb) it should be split in half and global metadata about chunk boundaries should be updated. Here i assume that chunk split is a local operation happening on single node. Process of chink splitting should look like following:

 1. Find separation key and spawn two new chunks with [lo, mid) [mid, hi) boundaries.

@@ -166,7 +166,7 @@ Multi-tenant storage makes sense even on a laptop, when you work with different

 Few databases are stored in one chunk, replicated three times

- When database can't fit into one storage node it can occupy lots of chunks that were split while database was growing. Chunk placement on nodes is controlled by us with some automatization, but we alway may manually move chunks around the cluster.
+- When database can't fit into one storage node it can occupy lots of chunks that were split while database was growing. Chunk placement on nodes is controlled by us with some automatization, but we always may manually move chunks around the cluster.

 <img width="940" alt="Screenshot_2021-02-22_at_16 49 10" src="https://user-images.githubusercontent.com/284219/108729815-fb071e00-753b-11eb-86e0-be6703e47d82.png">

--- a/docs/rfcs/003-laptop-cli.md
+++ b/docs/rfcs/003-laptop-cli.md
@@ -123,7 +123,7 @@ Show currently attached storages. For example:
 > zenith storage list
 NAME            USED    TYPE                OPTIONS          PATH
 local           5.1G    zenith-local                         /opt/zenith/store/local
-local.compr     20.4G   zenith-local        comression=on    /opt/zenith/store/local.compr
+local.compr     20.4G   zenith-local        compression=on    /opt/zenith/store/local.compr
 zcloud          60G     zenith-remote                        zenith.tech/stas/mystore
 s3tank          80G     S3
 ```
@@ -136,9 +136,9 @@ s3tank          80G     S3

 ## pg

-Manages postgres data directories and can start postgreses with proper configuration. An experienced user may avoid using that (except pg create) and configure/run postgres by themself.
+Manages postgres data directories and can start postgres instances with proper configuration. An experienced user may avoid using that (except pg create) and configure/run postgres by themselves.

-Pg is a term for a single postgres running on some data. I'm trying to avoid here separation of datadir management and postgres instance management -- both that concepts bundled here together.
+Pg is a term for a single postgres running on some data. I'm trying to avoid separation of datadir management and postgres instance management -- both that concepts bundled here together.

 **zenith pg create** [--no-start --snapshot --cow] -s storage-name -n pgdata

--- a/docs/rfcs/005-zenith_local.md
+++ b/docs/rfcs/005-zenith_local.md
@@ -31,7 +31,7 @@ Ideally, just one binary that incorporates all elements we need.

 #### Components:

- **zenith-CLI** - interface for end-users.  Turns commands to REST requests and handles responces to show them in a user-friendly way.  
+- **zenith-CLI** - interface for end-users.  Turns commands to REST requests and handles responses to show them in a user-friendly way.  
 CLI proposal is here https://github.com/libzenith/rfcs/blob/003-laptop-cli.md/003-laptop-cli.md
 WIP code is here: https://github.com/libzenith/postgres/tree/main/pageserver/src/bin/cli

--- a/docs/rfcs/006-laptop-cli-v2-CLI.md
+++ b/docs/rfcs/006-laptop-cli-v2-CLI.md
@@ -25,9 +25,9 @@ To make changes in the catalog you need to run compute nodes
 zenith start /home/pipedpiper/northwind:main -- starts a compute instance
 zenith start zenith://zenith.tech/northwind:main -- starts a compute instance in the cloud
 -- you can start a compute node against any hash or branch
-zenith start /home/pipedpiper/northwind:experimental --port 8008 -- start anothe compute instance (on different port)
+zenith start /home/pipedpiper/northwind:experimental --port 8008 -- start another compute instance (on different port)
 -- you can start a compute node against any hash or branch
-zenith start /home/pipedpiper/northwind:<hash> --port 8009 -- start anothe compute instance (on different port)
+zenith start /home/pipedpiper/northwind:<hash> --port 8009 -- start another compute instance (on different port)

 -- After running some DML you can run 
 -- zenith status and see how there are two WAL streams one on top of 
--- a/docs/rfcs/006-laptop-cli-v2-repository-structure.md
+++ b/docs/rfcs/006-laptop-cli-v2-repository-structure.md
@@ -121,7 +121,7 @@ repository, launch an instance on the same branch in both clones, and
 later try to push/pull between them? Perhaps create a new timeline
 every time you start up an instance? Then you would detect that the
 timelines have diverged. That would match with the "epoch" concept
-that we have in the WAL safekeepr
+that we have in the WAL safekeeper

 ### zenith checkout/commit

--- a/docs/rfcs/009-snapshot-first-storage-cli.md
+++ b/docs/rfcs/009-snapshot-first-storage-cli.md
@@ -2,9 +2,9 @@ While working on export/import commands, I understood that they fit really well

 We may think about backups as snapshots in a different format (i.e plain pgdata format, basebackup tar format, WAL-G format (if they want to support it) and so on). They use same storage API, the only difference is the code that packs/unpacks files.

-Even if zenith aims to maintains durability using it's own snapshots, backups will be useful for uploading data from postges to zenith.
+Even if zenith aims to maintains durability using it's own snapshots, backups will be useful for uploading data from postgres to zenith.

-So here is an attemt to design consistent CLI for diferent usage scenarios:
+So here is an attempt to design consistent CLI for different usage scenarios:

 #### 1. Start empty pageserver.
 That is what we have now.
--- a/docs/rfcs/009-snapshot-first-storage-pitr.md
+++ b/docs/rfcs/009-snapshot-first-storage-pitr.md
@@ -3,7 +3,7 @@
 GetPage@LSN can be called with older LSNs, and the page server needs
 to be able to reconstruct older page versions. That's needed for
 having read-only replicas that lag behind the primary, or that are
-"anchored" at an older LSN, and internally in the page server whne you
+"anchored" at an older LSN, and internally in the page server when you
 branch at an older point in time. How do you do that?

 For now, I'm not considering incremental snapshots at all. I don't
@@ -192,7 +192,7 @@ for a particular relation readily available alongside the snapshot
 files, and you don't need to track what snapshot LSNs exist
 separately.

-(If we wanted to minize the number of files, you could include the
+(If we wanted to minimize the number of files, you could include the
 snapshot @300 and the WAL between 200 and 300 in the same file, but I
 feel it's probably better to keep them separate)

--- a/docs/rfcs/009-snapshot-first-storage.md
+++ b/docs/rfcs/009-snapshot-first-storage.md
@@ -121,7 +121,7 @@ The properties of s3 that we depend on are:
 list objects
 streaming read of entire object
 read byte range from object
-streaming write new object (may use multipart upload for better relialibity)
+streaming write new object (may use multipart upload for better reliability)
 delete object (that should not disrupt an already-started read).

 Uploaded files, restored backups, or s3 buckets controlled by users could contain malicious content. We should always validate that objects contain the content they’re supposed to. Incorrect, Corrupt or malicious-looking contents should cause software (cloud tools, pageserver) to fail gracefully.
--- a/docs/rfcs/010-storage_details.md
+++ b/docs/rfcs/010-storage_details.md
@@ -40,7 +40,7 @@ b) overwrite older pages with the newer pages -- if there is no replica we proba

 I imagine that newly created pages would just be added to the back of PageStore (again in queue-like fashion) and this way there wouldn't be any meaningful ordering inside of that queue. When we are forming a new incremental snapshot we may prohibit any updates to the current set of pages in PageStore (giving up on single page version rule) and cut off that whole set when snapshot creation is complete.

-With option b) we can also treat PageStor as an uncompleted increamental snapshot.
+With option b) we can also treat PageStor as an uncompleted incremental snapshot.

 ### LocalStore

@@ -123,7 +123,7 @@ As far as I understand Bookfile/Aversion addresses versioning and serialization
 As for exact data that should go to snapshots I think it is the following for each snapshot:

 * format version number
-* set of key/values to interpret content (e.g. is page compression enabled, is that a full or incremental snapshot, previous snapshot id, is there WAL at the end on file, etc) -- it is up to a reader to decide what to do if some keys are missing or some unknow key are present. If we add something backward compatible to the file we can keep the version number.
+* set of key/values to interpret content (e.g. is page compression enabled, is that a full or incremental snapshot, previous snapshot id, is there WAL at the end on file, etc) -- it is up to a reader to decide what to do if some keys are missing or some unknown key are present. If we add something backward compatible to the file we can keep the version number.
 * array of [BuffTag, corresponding offset in file] for pages -- IIUC that is analogous to ToC in Bookfile
 * array of [(BuffTag, LSN), corresponding offset in file] for the WAL records
 * pages, one by one
@@ -131,7 +131,7 @@ As for exact data that should go to snapshots I think it is the following for ea

 It is also important to be able to load metadata quickly since it would be one of the main factors impacting the time of page server start. E.g. if would store/cache about 10TB of data per page server, the size of uncompressed page references would be about 30GB (10TB / ( 8192 bytes page size / ( ~18 bytes per ObjectTag + 8 bytes offset in the file))).

-1) Since our ToC/array of entries can be sorted by ObjectTag we can store the whole BufferTag only when realtion_id is changed and store only delta-encoded offsets for a given relation. That would reduce the average per-page metadata size to something less than 4 bytes instead of 26 (assuming that pages would follow the same order and offset delatas would be small).
+1) Since our ToC/array of entries can be sorted by ObjectTag we can store the whole BufferTag only when relation_id is changed and store only delta-encoded offsets for a given relation. That would reduce the average per-page metadata size to something less than 4 bytes instead of 26 (assuming that pages would follow the same order and offset deltas would be small).
 2) It makes sense to keep ToC at the beginning of the file to avoid extra seeks to locate it. Doesn't matter too much with the local files but matters on S3 -- if we are accessing a lot of ~1Gb files with the size of metadata ~ 1Mb then the time to transfer this metadata would be comparable with access latency itself (which is about a half of a second). So by slurping metadata with one read of file header instead of N reads we can improve the speed of page server start by this N factor.

 I think both of that optimizations can be done later, but that is something to keep in mind when we are designing our storage serialization routines.
--- a/docs/rfcs/013-term-history.md
+++ b/docs/rfcs/013-term-history.md
@@ -7,13 +7,13 @@ and e.g. prevents electing two proposers with the same term -- it is actually
 called `term` in the code. The second, called `epoch`, reflects progress of log
 receival and this might lag behind `term`; safekeeper switches to epoch `n` when
 it has received all committed log records from all `< n` terms. This roughly
-correspones to proposed in
+corresponds to proposed in

 https://github.com/zenithdb/rfcs/pull/3/files


 This makes our biggest our difference from Raft. In Raft, every log record is
-stamped with term in which it was generated; while we essentialy store in
+stamped with term in which it was generated; while we essentially store in
 `epoch` only the term of the highest record on this safekeeper -- when we know
 it -- because during recovery generally we don't, and `epoch` is bumped directly
 to the term of the proposer who performs the recovery when it is finished. It is
--- a/docs/rfcs/015-storage-messaging.md
+++ b/docs/rfcs/015-storage-messaging.md
@@ -124,7 +124,7 @@ Each storage node can subscribe to the relevant sets of keys and maintain a loca

 ### Safekeeper address discovery

-During the startup safekeeper should publish the address he is listening on as the part of `{"sk_#{sk_id}" => ip_address}`. Then the pageserver can resolve `sk_#{sk_id}` to the actual address. This way it would work both locally and in the cloud setup. Safekeeper should have `--advertised-address` CLI option so that we can listen on e.g. 0.0.0.0 but advertize something more useful.
+During the startup safekeeper should publish the address he is listening on as the part of `{"sk_#{sk_id}" => ip_address}`. Then the pageserver can resolve `sk_#{sk_id}` to the actual address. This way it would work both locally and in the cloud setup. Safekeeper should have `--advertised-address` CLI option so that we can listen on e.g. 0.0.0.0 but advertise something more useful.

 ### Safekeeper behavior

@@ -195,7 +195,7 @@ sequenceDiagram
    PS1->>SK1: start replication
 ```

-#### Behavour of services during typical operations
+#### Behaviour of services during typical operations

 ```mermaid
 sequenceDiagram
@@ -250,7 +250,7 @@ sequenceDiagram
    PS2->>M: Register downloaded timeline
    PS2->>M: Get safekeepers for timeline, subscribe to changes
    PS2->>SK1: Start replication to catch up
-    note over O: PS2 catched up, time to switch compute
+    note over O: PS2 caught up, time to switch compute
    O->>C: Restart compute with new pageserver url in config
    note over C: Wal push is restarted
    loop request pages
--- a/docs/rfcs/README.md
+++ b/docs/rfcs/README.md
@@ -49,7 +49,7 @@ topics.

 RFC lifecycle:

- Should be submitted in a pull request with and full RFC text in a commited markdown file and copy of the Summary and Motivation sections also included in the PR body.
+- Should be submitted in a pull request with and full RFC text in a committed markdown file and copy of the Summary and Motivation sections also included in the PR body.
 - RFC should be published for review before most of the actual code is written. This isn’t a strict rule, don’t hesitate to experiment and build a POC in parallel with writing an RFC.
 - Add labels to the PR in the same manner as you do Issues. Example TBD
 - Request the review from your peers. Reviewing the RFCs from your peers is a priority, same as reviewing the actual code.
--- a/docs/rfcs/cluster-size-limits.md
+++ b/docs/rfcs/cluster-size-limits.md
@@ -22,8 +22,8 @@ so we don't want to give users access to the functionality that we don't think i

 * pageserver - calculate the size consumed by a timeline and add it to the feedback message.
 * safekeeper - pass feedback message from pageserver to compute.
-* compute - receive feedback message, enforce size limit based on GUC `zenith.max_cluster_size`.
-* console - set and update `zenith.max_cluster_size` setting
+* compute - receive feedback message, enforce size limit based on GUC `neon.max_cluster_size`.
+* console - set and update `neon.max_cluster_size` setting

 ## Proposed implementation

@@ -36,12 +36,12 @@ This is how the `LOGICAL_TIMELINE_SIZE` metric is implemented in the pageserver.
 Alternatively, we could count only relation data. As in pg_database_size().
 This approach is somewhat more user-friendly because it is the data that is really affected by the user.
 On the other hand, it puts us in a weaker position than other services, i.e., RDS.
-We will need to refactor the timeline_size counter or add another counter to implement it. 
+We will need to refactor the timeline_size counter or add another counter to implement it.

 Timeline size is updated during wal digestion. It is not versioned and is valid at the last_received_lsn moment.
 Then this size should be reported to compute node.

-`current_timeline_size` value is included in the walreceiver's custom feedback message: `ZenithFeedback.`
+`current_timeline_size` value is included in the walreceiver's custom feedback message: `ReplicationFeedback.`

 (PR about protocol changes https://github.com/zenithdb/zenith/pull/1037).

@@ -49,7 +49,7 @@ This message is received by the safekeeper and propagated to compute node as a p

 Finally, when compute node receives the `current_timeline_size` from safekeeper (or from pageserver directly), it updates the global variable.

-And then every zenith_extend() operation checks if limit is reached `(current_timeline_size > zenith.max_cluster_size)` and throws `ERRCODE_DISK_FULL` error if so.
+And then every zenith_extend() operation checks if limit is reached `(current_timeline_size > neon.max_cluster_size)` and throws `ERRCODE_DISK_FULL` error if so.
 (see Postgres error codes [https://www.postgresql.org/docs/devel/errcodes-appendix.html](https://www.postgresql.org/docs/devel/errcodes-appendix.html))

 TODO:
@@ -64,16 +64,16 @@ We should warn users if the limit is soon to be reached.
 ### **Reliability, failure modes and corner cases**

 1. `current_timeline_size` is valid at the last received and digested by pageserver lsn.
-    
+
    If pageserver lags behind compute node, `current_timeline_size` will lag too. This lag can be tuned using backpressure, but it is not expected to be 0 all the time.
-    
+
    So transactions that happen in this lsn range may cause limit overflow. Especially operations that generate (i.e., CREATE DATABASE) or free (i.e., TRUNCATE) a lot of data pages while generating a small amount of WAL. Are there other operations like this?
-    
+
    Currently, CREATE DATABASE operations are restricted in the console. So this is not an issue.


 ### **Security implications**

 We treat compute as an untrusted component. That's why we try to isolate it with secure container runtime or a VM.
-Malicious users may change the `zenith.max_cluster_size`, so we need an extra size limit check.
+Malicious users may change the `neon.max_cluster_size`, so we need an extra size limit check.
 To cover this case, we also monitor the compute node size in the console.
--- a/docs/settings.md
+++ b/docs/settings.md
@@ -23,7 +23,7 @@ gc_horizon = '67108864'
 max_file_descriptors = '100'

 # initial superuser role name to use when creating a new tenant
-initial_superuser_name = 'zenith_admin'
+initial_superuser_name = 'cloud_admin'

 broker_etcd_prefix = 'neon'
 broker_endpoints = ['some://etcd']
@@ -31,14 +31,14 @@ broker_endpoints = ['some://etcd']
 # [remote_storage]
 ```

-The config above shows default values for all basic pageserver settings, besides `broker_endpoints`: that one has to be set by the user, 
+The config above shows default values for all basic pageserver settings, besides `broker_endpoints`: that one has to be set by the user,
 see the corresponding section below.
 Pageserver uses default values for all files that are missing in the config, so it's not a hard error to leave the config blank.
 Yet, it validates the config values it can (e.g. postgres install dir) and errors if the validation fails, refusing to start.

 Note the `[remote_storage]` section: it's a [table](https://toml.io/en/v1.0.0#table) in TOML specification and

- either has to be placed in the config after the table-less values such as `initial_superuser_name = 'zenith_admin'`
+- either has to be placed in the config after the table-less values such as `initial_superuser_name = 'cloud_admin'`

 - or can be placed anywhere if rewritten in identical form as [inline table](https://toml.io/en/v1.0.0#inline-table): `remote_storage = {foo = 2}`

@@ -54,7 +54,7 @@ Note that TOML distinguishes between strings and integers, the former require si

 A list of endpoints (etcd currently) to connect and pull the information from.
 Mandatory, does not have a default, since requires etcd to be started as a separate process,
-and its connection url should be specified separately. 
+and its connection url should be specified separately.

 #### broker_etcd_prefix

@@ -105,17 +105,31 @@ Interval at which garbage collection is triggered. Default is 100 s.

 #### image_creation_threshold

-L0 delta layer threshold for L1 iamge layer creation. Default is 3.
+L0 delta layer threshold for L1 image layer creation. Default is 3.

 #### pitr_interval

 WAL retention duration for PITR branching. Default is 30 days.

+#### walreceiver_connect_timeout
+
+Time to wait to establish the wal receiver connection before failing
+
+#### lagging_wal_timeout
+
+Time the pageserver did not get any WAL updates from safekeeper (if any).
+Avoids lagging pageserver preemptively by forcing to switch it from stalled connections.
+
+#### max_lsn_wal_lag
+
+Difference between Lsn values of the latest available WAL on safekeepers: if currently connected safekeeper starts to lag too long and too much,
+it gets swapped to the different one.
+
 #### initial_superuser_name

 Name of the initial superuser role, passed to initdb when a new tenant
 is initialized. It doesn't affect anything after initialization. The
-default is Note: The default is 'zenith_admin', and the console
+default is Note: The default is 'cloud_admin', and the console
 depends on that, so if you change it, bad things will happen.

 #### page_cache_size
@@ -140,7 +154,7 @@ The default distrib dir is `./tmp_install/`.
 #### workdir (-D)

 A directory in the file system, where pageserver will store its files.
-The default is `./.zenith/`.
+The default is `./.neon/`.

 This parameter has a special CLI alias (`-D`) and can not be overridden with regular `-c` way.

@@ -185,7 +199,7 @@ If no IAM bucket access is used during the remote storage usage, use the `AWS_AC

 ###### General remote storage configuration

-Pagesever allows only one remote storage configured concurrently and errors if parameters from multiple different remote configurations are used.
+Pageserver allows only one remote storage configured concurrently and errors if parameters from multiple different remote configurations are used.
 No default values are used for the remote storage configuration parameters.

 Besides, there are parameters common for all types of remote storage that can be configured, those have defaults:
--- a/docs/sourcetree.md
+++ b/docs/sourcetree.md
@@ -10,7 +10,7 @@ Intended to be used in integration tests and in CLI tools for local installation

 `/docs`:

-Documentaion of the Zenith features and concepts.
+Documentation of the Zenith features and concepts.
 Now it is mostly dev documentation.

 `/monitoring`:
@@ -42,13 +42,13 @@ Integration tests, written in Python using the `pytest` framework.

 `/vendor/postgres`:

-PostgreSQL source tree, with the modifications needed for Zenith.
+PostgreSQL source tree, with the modifications needed for Neon.

-`/vendor/postgres/contrib/zenith`:
+`/vendor/postgres/contrib/neon`:

 PostgreSQL extension that implements storage manager API and network communications with remote page server.

-`/vendor/postgres/contrib/zenith_test_utils`:
+`/vendor/postgres/contrib/neon_test_utils`:

 PostgreSQL extension that contains functions needed for testing and debugging.

@@ -92,7 +92,7 @@ A single virtual environment with all dependencies is described in the single `P

 ### Prerequisites
 - Install Python 3.9 (the minimal supported version) or greater.
-    - Our setup with poetry should work with newer python versions too. So feel free to open an issue with a `c/test-runner` label if something doesnt work as expected.
+    - Our setup with poetry should work with newer python versions too. So feel free to open an issue with a `c/test-runner` label if something doesn't work as expected.
    - If you have some trouble with other version you can resolve it by installing Python 3.9 separately, via [pyenv](https://github.com/pyenv/pyenv) or via system package manager e.g.:
      ```bash
      # In Ubuntu
--- a/libs/etcd_broker/Cargo.toml
+++ b/libs/etcd_broker/Cargo.toml
@@ -9,6 +9,7 @@
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
 serde_with = "1.12.0"
+ once_cell = "1.8.0"

 utils = { path = "../utils" }
 workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/libs/etcd_broker/src/lib.rs
+++ b/libs/etcd_broker/src/lib.rs
@@ -1,88 +1,43 @@
 //! A set of primitives to access a shared data/updates, propagated via etcd broker (not persistent).
 //! Intended to connect services to each other, not to store their data.
+
+/// All broker keys, that are used when dealing with etcd.
+pub mod subscription_key;
+/// All broker values, possible to use when dealing with etcd.
+pub mod subscription_value;
+
 use std::{
    collections::{hash_map, HashMap},
-    fmt::Display,
    str::FromStr,
 };

-use regex::{Captures, Regex};
-use serde::{Deserialize, Serialize};
-use serde_with::{serde_as, DisplayFromStr};
-
-pub use etcd_client::*;
+use serde::de::DeserializeOwned;

+use subscription_key::SubscriptionKey;
 use tokio::{sync::mpsc, task::JoinHandle};
 use tracing::*;
-use utils::{
-    lsn::Lsn,
-    zid::{NodeId, ZTenantId, ZTenantTimelineId},
-};
+use utils::zid::{NodeId, ZTenantTimelineId};
+
+use crate::subscription_key::SubscriptionFullKey;
+
+pub use etcd_client::*;

 /// Default value to use for prefixing to all etcd keys with.
 /// This way allows isolating safekeeper/pageserver groups in the same etcd cluster.
 pub const DEFAULT_NEON_BROKER_ETCD_PREFIX: &str = "neon";

-#[derive(Debug, Deserialize, Serialize)]
-struct SafekeeperTimeline {
-    safekeeper_id: NodeId,
-    info: SkTimelineInfo,
-}
-
-/// Published data about safekeeper's timeline. Fields made optional for easy migrations.
-#[serde_as]
-#[derive(Debug, Deserialize, Serialize)]
-pub struct SkTimelineInfo {
-    /// Term of the last entry.
-    pub last_log_term: Option<u64>,
-    /// LSN of the last record.
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    #[serde(default)]
-    pub flush_lsn: Option<Lsn>,
-    /// Up to which LSN safekeeper regards its WAL as committed.
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    #[serde(default)]
-    pub commit_lsn: Option<Lsn>,
-    /// LSN up to which safekeeper offloaded WAL to s3.
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    #[serde(default)]
-    pub s3_wal_lsn: Option<Lsn>,
-    /// LSN of last checkpoint uploaded by pageserver.
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    #[serde(default)]
-    pub remote_consistent_lsn: Option<Lsn>,
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    #[serde(default)]
-    pub peer_horizon_lsn: Option<Lsn>,
-    #[serde(default)]
-    pub safekeeper_connection_string: Option<String>,
-}
-
-#[derive(Debug, thiserror::Error)]
-pub enum BrokerError {
-    #[error("Etcd client error: {0}. Context: {1}")]
-    EtcdClient(etcd_client::Error, String),
-    #[error("Error during parsing etcd data: {0}")]
-    ParsingError(String),
-    #[error("Internal error: {0}")]
-    InternalError(String),
-}
-
 /// A way to control the data retrieval from a certain subscription.
-pub struct SkTimelineSubscription {
-    safekeeper_timeline_updates:
-        mpsc::UnboundedReceiver<HashMap<ZTenantTimelineId, HashMap<NodeId, SkTimelineInfo>>>,
-    kind: SkTimelineSubscriptionKind,
+pub struct BrokerSubscription<V> {
+    value_updates: mpsc::UnboundedReceiver<HashMap<ZTenantTimelineId, HashMap<NodeId, V>>>,
+    key: SubscriptionKey,
    watcher_handle: JoinHandle<Result<(), BrokerError>>,
    watcher: Watcher,
 }

-impl SkTimelineSubscription {
+impl<V> BrokerSubscription<V> {
    /// Asynchronously polls for more data from the subscription, suspending the current future if there's no data sent yet.
-    pub async fn fetch_data(
-        &mut self,
-    ) -> Option<HashMap<ZTenantTimelineId, HashMap<NodeId, SkTimelineInfo>>> {
-        self.safekeeper_timeline_updates.recv().await
+    pub async fn fetch_data(&mut self) -> Option<HashMap<ZTenantTimelineId, HashMap<NodeId, V>>> {
+        self.value_updates.recv().await
    }

    /// Cancels the subscription, stopping the data poller and waiting for it to shut down.
@@ -90,142 +45,90 @@ impl SkTimelineSubscription {
        self.watcher.cancel().await.map_err(|e| {
            BrokerError::EtcdClient(
                e,
-                format!(
-                    "Failed to cancel timeline subscription, kind: {:?}",
-                    self.kind
-                ),
+                format!("Failed to cancel broker subscription, kind: {:?}", self.key),
            )
        })?;
        self.watcher_handle.await.map_err(|e| {
            BrokerError::InternalError(format!(
-                "Failed to join the timeline updates task, kind: {:?}, error: {e}",
-                self.kind
+                "Failed to join the broker value updates task, kind: {:?}, error: {e}",
+                self.key
            ))
        })?
    }
 }

-/// The subscription kind to the timeline updates from safekeeper.
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct SkTimelineSubscriptionKind {
-    broker_etcd_prefix: String,
-    kind: SubscriptionKind,
-}
-
-impl SkTimelineSubscriptionKind {
-    pub fn all(broker_etcd_prefix: String) -> Self {
-        Self {
-            broker_etcd_prefix,
-            kind: SubscriptionKind::All,
-        }
-    }
-
-    pub fn tenant(broker_etcd_prefix: String, tenant: ZTenantId) -> Self {
-        Self {
-            broker_etcd_prefix,
-            kind: SubscriptionKind::Tenant(tenant),
-        }
-    }
-
-    pub fn timeline(broker_etcd_prefix: String, timeline: ZTenantTimelineId) -> Self {
-        Self {
-            broker_etcd_prefix,
-            kind: SubscriptionKind::Timeline(timeline),
-        }
-    }
-
-    fn watch_regex(&self) -> Regex {
-        match self.kind {
-            SubscriptionKind::All => Regex::new(&format!(
-                r"^{}/([[:xdigit:]]+)/([[:xdigit:]]+)/safekeeper/([[:digit:]])$",
-                self.broker_etcd_prefix
-            ))
-            .expect("wrong regex for 'everything' subscription"),
-            SubscriptionKind::Tenant(tenant_id) => Regex::new(&format!(
-                r"^{}/{tenant_id}/([[:xdigit:]]+)/safekeeper/([[:digit:]])$",
-                self.broker_etcd_prefix
-            ))
-            .expect("wrong regex for 'tenant' subscription"),
-            SubscriptionKind::Timeline(ZTenantTimelineId {
-                tenant_id,
-                timeline_id,
-            }) => Regex::new(&format!(
-                r"^{}/{tenant_id}/{timeline_id}/safekeeper/([[:digit:]])$",
-                self.broker_etcd_prefix
-            ))
-            .expect("wrong regex for 'timeline' subscription"),
-        }
-    }
-
-    /// Etcd key to use for watching a certain timeline updates from safekeepers.
-    pub fn watch_key(&self) -> String {
-        match self.kind {
-            SubscriptionKind::All => self.broker_etcd_prefix.to_string(),
-            SubscriptionKind::Tenant(tenant_id) => {
-                format!("{}/{tenant_id}/safekeeper", self.broker_etcd_prefix)
-            }
-            SubscriptionKind::Timeline(ZTenantTimelineId {
-                tenant_id,
-                timeline_id,
-            }) => format!(
-                "{}/{tenant_id}/{timeline_id}/safekeeper",
-                self.broker_etcd_prefix
-            ),
-        }
-    }
-}
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-enum SubscriptionKind {
-    /// Get every timeline update.
-    All,
-    /// Get certain tenant timelines' updates.
-    Tenant(ZTenantId),
-    /// Get certain timeline updates.
-    Timeline(ZTenantTimelineId),
+#[derive(Debug, thiserror::Error)]
+pub enum BrokerError {
+    #[error("Etcd client error: {0}. Context: {1}")]
+    EtcdClient(etcd_client::Error, String),
+    #[error("Error during parsing etcd key: {0}")]
+    KeyNotParsed(String),
+    #[error("Internal error: {0}")]
+    InternalError(String),
 }

 /// Creates a background task to poll etcd for timeline updates from safekeepers.
 /// Stops and returns `Err` on any error during etcd communication.
 /// Watches the key changes until either the watcher is cancelled via etcd or the subscription cancellation handle,
 /// exiting normally in such cases.
-pub async fn subscribe_to_safekeeper_timeline_updates(
+/// Etcd values are parsed as json fukes into a type, specified in the generic patameter.
+pub async fn subscribe_for_json_values<V>(
    client: &mut Client,
-    subscription: SkTimelineSubscriptionKind,
-) -> Result<SkTimelineSubscription, BrokerError> {
-    info!("Subscribing to timeline updates, subscription kind: {subscription:?}");
+    key: SubscriptionKey,
+) -> Result<BrokerSubscription<V>, BrokerError>
+where
+    V: DeserializeOwned + Send + 'static,
+{
+    subscribe_for_values(client, key, |_, value_str| {
+        match serde_json::from_str::<V>(value_str) {
+            Ok(value) => Some(value),
+            Err(e) => {
+                error!("Failed to parse value str '{value_str}': {e}");
+                None
+            }
+        }
+    })
+    .await
+}
+
+/// Same as [`subscribe_for_json_values`], but allows to specify a custom parser of a etcd value string.
+pub async fn subscribe_for_values<P, V>(
+    client: &mut Client,
+    key: SubscriptionKey,
+    value_parser: P,
+) -> Result<BrokerSubscription<V>, BrokerError>
+where
+    V: Send + 'static,
+    P: Fn(SubscriptionFullKey, &str) -> Option<V> + Send + 'static,
+{
+    info!("Subscribing to broker value updates, key: {key:?}");
+    let subscription_key = key.clone();

    let (watcher, mut stream) = client
-        .watch(
-            subscription.watch_key(),
-            Some(WatchOptions::new().with_prefix()),
-        )
+        .watch(key.watch_key(), Some(WatchOptions::new().with_prefix()))
        .await
        .map_err(|e| {
            BrokerError::EtcdClient(
                e,
-                format!("Failed to init the watch for subscription {subscription:?}"),
+                format!("Failed to init the watch for subscription {key:?}"),
            )
        })?;

-    let (timeline_updates_sender, safekeeper_timeline_updates) = mpsc::unbounded_channel();
-
-    let subscription_kind = subscription.kind;
-    let regex = subscription.watch_regex();
+    let (value_updates_sender, value_updates_receiver) = mpsc::unbounded_channel();
    let watcher_handle = tokio::spawn(async move {
        while let Some(resp) = stream.message().await.map_err(|e| BrokerError::InternalError(format!(
-            "Failed to get messages from the subscription stream, kind: {subscription_kind:?}, error: {e}"
+            "Failed to get messages from the subscription stream, kind: {:?}, error: {e}", key.kind
        )))? {
            if resp.canceled() {
                info!("Watch for timeline updates subscription was canceled, exiting");
                break;
            }

-            let mut timeline_updates: HashMap<ZTenantTimelineId, HashMap<NodeId, SkTimelineInfo>> = HashMap::new();
+            let mut value_updates: HashMap<ZTenantTimelineId, HashMap<NodeId, V>> = HashMap::new();
            // Keep track that the timeline data updates from etcd arrive in the right order.
            // https://etcd.io/docs/v3.5/learning/api_guarantees/#isolation-level-and-consistency-of-replicas
            // > etcd does not ensure linearizability for watch operations. Users are expected to verify the revision of watch responses to ensure correct ordering.
-            let mut timeline_etcd_versions: HashMap<ZTenantTimelineId, i64> = HashMap::new();
+            let mut value_etcd_versions: HashMap<ZTenantTimelineId, i64> = HashMap::new();


            let events = resp.events();
@@ -236,113 +139,77 @@ pub async fn subscribe_to_safekeeper_timeline_updates(
                    if let Some(new_etcd_kv) = event.kv() {
                        let new_kv_version = new_etcd_kv.version();

-                        match parse_etcd_key_value(subscription_kind, &regex, new_etcd_kv) {
-                            Ok(Some((zttid, timeline))) => {
-                                match timeline_updates
-                                    .entry(zttid)
-                                    .or_default()
-                                    .entry(timeline.safekeeper_id)
-                                {
-                                    hash_map::Entry::Occupied(mut o) => {
-                                        let old_etcd_kv_version = timeline_etcd_versions.get(&zttid).copied().unwrap_or(i64::MIN);
-                                        if old_etcd_kv_version < new_kv_version {
-                                            o.insert(timeline.info);
-                                            timeline_etcd_versions.insert(zttid,new_kv_version);
+                        match parse_etcd_kv(new_etcd_kv, &value_parser, &key.cluster_prefix) {
+                            Ok(Some((key, value))) => match value_updates
+                                .entry(key.id)
+                                .or_default()
+                                .entry(key.node_id)
+                                    {
+                                        hash_map::Entry::Occupied(mut o) => {
+                                            let old_etcd_kv_version = value_etcd_versions.get(&key.id).copied().unwrap_or(i64::MIN);
+                                            if old_etcd_kv_version < new_kv_version {
+                                                o.insert(value);
+                                                value_etcd_versions.insert(key.id,new_kv_version);
+                                            } else {
+                                                debug!("Skipping etcd timeline update due to older version compared to one that's already stored");
+                                            }
                                        }
-                                    }
-                                    hash_map::Entry::Vacant(v) => {
-                                        v.insert(timeline.info);
-                                        timeline_etcd_versions.insert(zttid,new_kv_version);
-                                    }
-                                }
-                            }
-                            Ok(None) => {}
-                            Err(e) => error!("Failed to parse timeline update: {e}"),
+                                        hash_map::Entry::Vacant(v) => {
+                                            v.insert(value);
+                                            value_etcd_versions.insert(key.id,new_kv_version);
+                                        }
+                                    },
+                            Ok(None) => debug!("Ignoring key {key:?} : no value was returned by the parser"),
+                            Err(BrokerError::KeyNotParsed(e)) => debug!("Unexpected key {key:?} for timeline update: {e}"),
+                            Err(e) => error!("Failed to represent etcd KV {new_etcd_kv:?}: {e}"),
                        };
                    }
                }
            }

-            if let Err(e) = timeline_updates_sender.send(timeline_updates) {
-                info!("Timeline updates sender got dropped, exiting: {e}");
-                break;
+            if !value_updates.is_empty() {
+                if let Err(e) = value_updates_sender.send(value_updates) {
+                    info!("Broker value updates for key {key:?} sender got dropped, exiting: {e}");
+                    break;
+                }
            }
        }

        Ok(())
-    });
+    }.instrument(info_span!("etcd_broker")));

-    Ok(SkTimelineSubscription {
-        kind: subscription,
-        safekeeper_timeline_updates,
+    Ok(BrokerSubscription {
+        key: subscription_key,
+        value_updates: value_updates_receiver,
        watcher_handle,
        watcher,
    })
 }

-fn parse_etcd_key_value(
-    subscription_kind: SubscriptionKind,
-    regex: &Regex,
+fn parse_etcd_kv<P, V>(
    kv: &KeyValue,
-) -> Result<Option<(ZTenantTimelineId, SafekeeperTimeline)>, BrokerError> {
-    let caps = if let Some(caps) = regex.captures(kv.key_str().map_err(|e| {
-        BrokerError::EtcdClient(e, format!("Failed to represent kv {kv:?} as key str"))
-    })?) {
-        caps
-    } else {
-        return Ok(None);
-    };
-
-    let (zttid, safekeeper_id) = match subscription_kind {
-        SubscriptionKind::All => (
-            ZTenantTimelineId::new(
-                parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?,
-                parse_capture(&caps, 2).map_err(BrokerError::ParsingError)?,
-            ),
-            NodeId(parse_capture(&caps, 3).map_err(BrokerError::ParsingError)?),
-        ),
-        SubscriptionKind::Tenant(tenant_id) => (
-            ZTenantTimelineId::new(
-                tenant_id,
-                parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?,
-            ),
-            NodeId(parse_capture(&caps, 2).map_err(BrokerError::ParsingError)?),
-        ),
-        SubscriptionKind::Timeline(zttid) => (
-            zttid,
-            NodeId(parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?),
-        ),
-    };
-
-    let info_str = kv.value_str().map_err(|e| {
-        BrokerError::EtcdClient(e, format!("Failed to represent kv {kv:?} as value str"))
-    })?;
-    Ok(Some((
-        zttid,
-        SafekeeperTimeline {
-            safekeeper_id,
-            info: serde_json::from_str(info_str).map_err(|e| {
-                BrokerError::ParsingError(format!(
-                    "Failed to parse '{info_str}' as safekeeper timeline info: {e}"
-                ))
-            })?,
-        },
-    )))
-}
-
-fn parse_capture<T>(caps: &Captures, index: usize) -> Result<T, String>
+    value_parser: &P,
+    cluster_prefix: &str,
+) -> Result<Option<(SubscriptionFullKey, V)>, BrokerError>
 where
-    T: FromStr,
-    <T as FromStr>::Err: Display,
+    P: Fn(SubscriptionFullKey, &str) -> Option<V>,
 {
-    let capture_match = caps
-        .get(index)
-        .ok_or_else(|| format!("Failed to get capture match at index {index}"))?
-        .as_str();
-    capture_match.parse().map_err(|e| {
-        format!(
-            "Failed to parse {} from {capture_match}: {e}",
-            std::any::type_name::<T>()
-        )
-    })
+    let key_str = kv.key_str().map_err(|e| {
+        BrokerError::EtcdClient(e, "Failed to extract key str out of etcd KV".to_string())
+    })?;
+    let value_str = kv.value_str().map_err(|e| {
+        BrokerError::EtcdClient(e, "Failed to extract value str out of etcd KV".to_string())
+    })?;
+
+    if !key_str.starts_with(cluster_prefix) {
+        return Err(BrokerError::KeyNotParsed(format!(
+            "KV has unexpected key '{key_str}' that does not start with cluster prefix {cluster_prefix}"
+        )));
+    }
+
+    let key = SubscriptionFullKey::from_str(&key_str[cluster_prefix.len()..]).map_err(|e| {
+        BrokerError::KeyNotParsed(format!("Failed to parse KV key '{key_str}': {e}"))
+    })?;
+
+    Ok(value_parser(key, value_str).map(|value| (key, value)))
 }
--- a/libs/etcd_broker/src/subscription_key.rs
+++ b/libs/etcd_broker/src/subscription_key.rs
@@ -0,0 +1,310 @@
+//! Etcd broker keys, used in the project and shared between instances.
+//! The keys are split into two categories:
+//!
+//! * [`SubscriptionFullKey`] full key format: `<cluster_prefix>/<tenant>/<timeline>/<node_kind>/<operation>/<node_id>`
+//! Always returned from etcd in this form, always start with the user key provided.
+//!
+//! * [`SubscriptionKey`] user input key format: always partial, since it's unknown which `node_id`'s are available.
+//! Full key always starts with the user input one, due to etcd subscription properties.
+
+use std::{fmt::Display, str::FromStr};
+
+use once_cell::sync::Lazy;
+use regex::{Captures, Regex};
+use utils::zid::{NodeId, ZTenantId, ZTenantTimelineId};
+
+/// The subscription kind to the timeline updates from safekeeper.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct SubscriptionKey {
+    /// Generic cluster prefix, allowing to use the same etcd instance by multiple logic groups.
+    pub cluster_prefix: String,
+    /// The subscription kind.
+    pub kind: SubscriptionKind,
+}
+
+/// All currently possible key kinds of a etcd broker subscription.
+/// Etcd works so, that every key that starts with the subbscription key given is considered matching and
+/// returned as part of the subscrption.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum SubscriptionKind {
+    /// Get every update in etcd.
+    All,
+    /// Get etcd updates for any timeiline of a certain tenant, affected by any operation from any node kind.
+    TenantTimelines(ZTenantId),
+    /// Get etcd updates for a certain timeline of a tenant, affected by any operation from any node kind.
+    Timeline(ZTenantTimelineId),
+    /// Get etcd timeline updates, specific to a certain node kind.
+    Node(ZTenantTimelineId, NodeKind),
+    /// Get etcd timeline updates for a certain operation on specific nodes.
+    Operation(ZTenantTimelineId, NodeKind, OperationKind),
+}
+
+/// All kinds of nodes, able to write into etcd.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum NodeKind {
+    Safekeeper,
+    Pageserver,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum OperationKind {
+    Safekeeper(SkOperationKind),
+}
+
+/// Current operations, running inside the safekeeper node.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum SkOperationKind {
+    TimelineInfo,
+    WalBackup,
+}
+
+static SUBSCRIPTION_FULL_KEY_REGEX: Lazy<Regex> = Lazy::new(|| {
+    Regex::new("/([[:xdigit:]]+)/([[:xdigit:]]+)/([^/]+)/([^/]+)/([[:digit:]]+)$")
+        .expect("wrong subscription full etcd key regex")
+});
+
+/// Full key, received from etcd during any of the component's work.
+/// No other etcd keys are considered during system's work.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub struct SubscriptionFullKey {
+    pub id: ZTenantTimelineId,
+    pub node_kind: NodeKind,
+    pub operation: OperationKind,
+    pub node_id: NodeId,
+}
+
+impl SubscriptionKey {
+    /// Subscribes for all etcd updates.
+    pub fn all(cluster_prefix: String) -> Self {
+        SubscriptionKey {
+            cluster_prefix,
+            kind: SubscriptionKind::All,
+        }
+    }
+
+    /// Subscribes to a given timeline info updates from safekeepers.
+    pub fn sk_timeline_info(cluster_prefix: String, timeline: ZTenantTimelineId) -> Self {
+        Self {
+            cluster_prefix,
+            kind: SubscriptionKind::Operation(
+                timeline,
+                NodeKind::Safekeeper,
+                OperationKind::Safekeeper(SkOperationKind::TimelineInfo),
+            ),
+        }
+    }
+
+    /// Subscribes to all timeine updates during specific operations, running on the corresponding nodes.
+    pub fn operation(
+        cluster_prefix: String,
+        timeline: ZTenantTimelineId,
+        node_kind: NodeKind,
+        operation: OperationKind,
+    ) -> Self {
+        Self {
+            cluster_prefix,
+            kind: SubscriptionKind::Operation(timeline, node_kind, operation),
+        }
+    }
+
+    /// Etcd key to use for watching a certain timeline updates from safekeepers.
+    pub fn watch_key(&self) -> String {
+        let cluster_prefix = &self.cluster_prefix;
+        match self.kind {
+            SubscriptionKind::All => cluster_prefix.to_string(),
+            SubscriptionKind::TenantTimelines(tenant_id) => {
+                format!("{cluster_prefix}/{tenant_id}")
+            }
+            SubscriptionKind::Timeline(id) => {
+                format!("{cluster_prefix}/{id}")
+            }
+            SubscriptionKind::Node(id, node_kind) => {
+                format!("{cluster_prefix}/{id}/{node_kind}")
+            }
+            SubscriptionKind::Operation(id, node_kind, operation_kind) => {
+                format!("{cluster_prefix}/{id}/{node_kind}/{operation_kind}")
+            }
+        }
+    }
+}
+
+impl Display for OperationKind {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            OperationKind::Safekeeper(o) => o.fmt(f),
+        }
+    }
+}
+
+impl FromStr for OperationKind {
+    type Err = String;
+
+    fn from_str(operation_kind_str: &str) -> Result<Self, Self::Err> {
+        match operation_kind_str {
+            "timeline_info" => Ok(OperationKind::Safekeeper(SkOperationKind::TimelineInfo)),
+            "wal_backup" => Ok(OperationKind::Safekeeper(SkOperationKind::WalBackup)),
+            _ => Err(format!("Unknown operation kind: {operation_kind_str}")),
+        }
+    }
+}
+
+impl Display for SubscriptionFullKey {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let Self {
+            id,
+            node_kind,
+            operation,
+            node_id,
+        } = self;
+        write!(f, "{id}/{node_kind}/{operation}/{node_id}")
+    }
+}
+
+impl FromStr for SubscriptionFullKey {
+    type Err = String;
+
+    fn from_str(subscription_kind_str: &str) -> Result<Self, Self::Err> {
+        let key_captures = match SUBSCRIPTION_FULL_KEY_REGEX.captures(subscription_kind_str) {
+            Some(captures) => captures,
+            None => {
+                return Err(format!(
+                    "Subscription kind str does not match a subscription full key regex {}",
+                    SUBSCRIPTION_FULL_KEY_REGEX.as_str()
+                ));
+            }
+        };
+
+        Ok(Self {
+            id: ZTenantTimelineId::new(
+                parse_capture(&key_captures, 1)?,
+                parse_capture(&key_captures, 2)?,
+            ),
+            node_kind: parse_capture(&key_captures, 3)?,
+            operation: parse_capture(&key_captures, 4)?,
+            node_id: NodeId(parse_capture(&key_captures, 5)?),
+        })
+    }
+}
+
+fn parse_capture<T>(caps: &Captures, index: usize) -> Result<T, String>
+where
+    T: FromStr,
+    <T as FromStr>::Err: Display,
+{
+    let capture_match = caps
+        .get(index)
+        .ok_or_else(|| format!("Failed to get capture match at index {index}"))?
+        .as_str();
+    capture_match.parse().map_err(|e| {
+        format!(
+            "Failed to parse {} from {capture_match}: {e}",
+            std::any::type_name::<T>()
+        )
+    })
+}
+
+impl Display for NodeKind {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Safekeeper => write!(f, "safekeeper"),
+            Self::Pageserver => write!(f, "pageserver"),
+        }
+    }
+}
+
+impl FromStr for NodeKind {
+    type Err = String;
+
+    fn from_str(node_kind_str: &str) -> Result<Self, Self::Err> {
+        match node_kind_str {
+            "safekeeper" => Ok(Self::Safekeeper),
+            "pageserver" => Ok(Self::Pageserver),
+            _ => Err(format!("Invalid node kind: {node_kind_str}")),
+        }
+    }
+}
+
+impl Display for SkOperationKind {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::TimelineInfo => write!(f, "timeline_info"),
+            Self::WalBackup => write!(f, "wal_backup"),
+        }
+    }
+}
+
+impl FromStr for SkOperationKind {
+    type Err = String;
+
+    fn from_str(operation_str: &str) -> Result<Self, Self::Err> {
+        match operation_str {
+            "timeline_info" => Ok(Self::TimelineInfo),
+            "wal_backup" => Ok(Self::WalBackup),
+            _ => Err(format!("Invalid operation: {operation_str}")),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use utils::zid::ZTimelineId;
+
+    use super::*;
+
+    #[test]
+    fn full_cluster_key_parsing() {
+        let prefix = "neon";
+        let node_kind = NodeKind::Safekeeper;
+        let operation_kind = OperationKind::Safekeeper(SkOperationKind::WalBackup);
+        let tenant_id = ZTenantId::generate();
+        let timeline_id = ZTimelineId::generate();
+        let id = ZTenantTimelineId::new(tenant_id, timeline_id);
+        let node_id = NodeId(1);
+
+        let timeline_subscription_keys = [
+            SubscriptionKey {
+                cluster_prefix: prefix.to_string(),
+                kind: SubscriptionKind::All,
+            },
+            SubscriptionKey {
+                cluster_prefix: prefix.to_string(),
+                kind: SubscriptionKind::TenantTimelines(tenant_id),
+            },
+            SubscriptionKey {
+                cluster_prefix: prefix.to_string(),
+                kind: SubscriptionKind::Timeline(id),
+            },
+            SubscriptionKey {
+                cluster_prefix: prefix.to_string(),
+                kind: SubscriptionKind::Node(id, node_kind),
+            },
+            SubscriptionKey {
+                cluster_prefix: prefix.to_string(),
+                kind: SubscriptionKind::Operation(id, node_kind, operation_kind),
+            },
+        ];
+
+        let full_key_string = format!(
+            "{}/{node_id}",
+            timeline_subscription_keys.last().unwrap().watch_key()
+        );
+
+        for key in timeline_subscription_keys {
+            assert!(full_key_string.starts_with(&key.watch_key()), "Full key '{full_key_string}' should start with any of the keys, keys, but {key:?} did not match");
+        }
+
+        let full_key = SubscriptionFullKey::from_str(&full_key_string).unwrap_or_else(|e| {
+            panic!("Failed to parse {full_key_string} as a subscription full key: {e}")
+        });
+
+        assert_eq!(
+            full_key,
+            SubscriptionFullKey {
+                id,
+                node_kind,
+                operation: operation_kind,
+                node_id
+            }
+        )
+    }
+}
--- a/libs/etcd_broker/src/subscription_value.rs
+++ b/libs/etcd_broker/src/subscription_value.rs
@@ -0,0 +1,35 @@
+//! Module for the values to put into etcd.
+
+use serde::{Deserialize, Serialize};
+use serde_with::{serde_as, DisplayFromStr};
+use utils::lsn::Lsn;
+
+/// Data about safekeeper's timeline. Fields made optional for easy migrations.
+#[serde_as]
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct SkTimelineInfo {
+    /// Term of the last entry.
+    pub last_log_term: Option<u64>,
+    /// LSN of the last record.
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    #[serde(default)]
+    pub flush_lsn: Option<Lsn>,
+    /// Up to which LSN safekeeper regards its WAL as committed.
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    #[serde(default)]
+    pub commit_lsn: Option<Lsn>,
+    /// LSN up to which safekeeper has backed WAL.
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    #[serde(default)]
+    pub backup_lsn: Option<Lsn>,
+    /// LSN of last checkpoint uploaded by pageserver.
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    #[serde(default)]
+    pub remote_consistent_lsn: Option<Lsn>,
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    #[serde(default)]
+    pub peer_horizon_lsn: Option<Lsn>,
+    /// A connection string to use for WAL receiving.
+    #[serde(default)]
+    pub safekeeper_connstr: Option<String>,
+}
--- a/libs/metrics/src/lib.rs
+++ b/libs/metrics/src/lib.rs
@@ -3,6 +3,7 @@
 //! Otherwise, we might not see all metrics registered via
 //! a default registry.
 use lazy_static::lazy_static;
+pub use prometheus::{core, default_registry, proto};
 pub use prometheus::{exponential_buckets, linear_buckets};
 pub use prometheus::{register_gauge, Gauge};
 pub use prometheus::{register_gauge_vec, GaugeVec};
--- a/libs/postgres_ffi/src/waldecoder.rs
+++ b/libs/postgres_ffi/src/waldecoder.rs
@@ -73,7 +73,7 @@ impl WalStreamDecoder {
    /// Returns one of the following:
    ///     Ok((Lsn, Bytes)): a tuple containing the LSN of next record, and the record itself
    ///     Ok(None): there is not enough data in the input buffer. Feed more by calling the `feed_bytes` function
-    ///     Err(WalDecodeError): an error occured while decoding, meaning the input was invalid.
+    ///     Err(WalDecodeError): an error occurred while decoding, meaning the input was invalid.
    ///
    pub fn poll_decode(&mut self) -> Result<Option<(Lsn, Bytes)>, WalDecodeError> {
        let recordbuf;
--- a/libs/postgres_ffi/src/xlog_utils.rs
+++ b/libs/postgres_ffi/src/xlog_utils.rs
@@ -531,7 +531,7 @@ impl CheckPoint {
    ///
    /// Returns 'true' if the XID was updated.
    pub fn update_next_xid(&mut self, xid: u32) -> bool {
-        // nextXid should nw greate than any XID in WAL, so increment provided XID and check for wraparround.
+        // nextXid should nw greater than any XID in WAL, so increment provided XID and check for wraparround.
        let mut new_xid = std::cmp::max(xid + 1, pg_constants::FIRST_NORMAL_TRANSACTION_ID);
        // To reduce number of metadata checkpoints, we forward align XID on XID_CHECKPOINT_INTERVAL.
        // XID_CHECKPOINT_INTERVAL should not be larger than BLCKSZ*CLOG_XACTS_PER_BYTE
--- a/libs/postgres_ffi/wal_generate/src/lib.rs
+++ b/libs/postgres_ffi/wal_generate/src/lib.rs
@@ -4,6 +4,7 @@ use log::*;
 use postgres::types::PgLsn;
 use postgres::Client;
 use std::cmp::Ordering;
+use std::fs;
 use std::path::{Path, PathBuf};
 use std::process::{Command, Stdio};
 use std::time::Instant;
@@ -69,6 +70,12 @@ impl Conf {

    pub fn start_server(&self) -> Result<PostgresServer> {
        info!("Starting Postgres server in {:?}", self.datadir);
+        let log_file = fs::File::create(self.datadir.join("pg.log")).with_context(|| {
+            format!(
+                "Failed to create pg.log file in directory {}",
+                self.datadir.display()
+            )
+        })?;
        let unix_socket_dir = tempdir()?; // We need a directory with a short name for Unix socket (up to 108 symbols)
        let unix_socket_dir_path = unix_socket_dir.path().to_owned();
        let server_process = self
@@ -80,11 +87,11 @@ impl Conf {
            .arg(self.datadir.as_os_str())
            .args(&["-c", "wal_keep_size=50MB"]) // Ensure old WAL is not removed
            .args(&["-c", "logging_collector=on"]) // stderr will mess up with tests output
-            .args(&["-c", "shared_preload_libraries=zenith"]) // can only be loaded at startup
+            .args(&["-c", "shared_preload_libraries=neon"]) // can only be loaded at startup
            // Disable background processes as much as possible
            .args(&["-c", "wal_writer_delay=10s"])
            .args(&["-c", "autovacuum=off"])
-            .stderr(Stdio::null())
+            .stderr(Stdio::from(log_file))
            .spawn()?;
        let server = PostgresServer {
            process: server_process,
@@ -178,7 +185,7 @@ fn generate_internal<C: postgres::GenericClient>(
    client: &mut C,
    f: impl Fn(&mut C, PgLsn) -> Result<Option<PgLsn>>,
 ) -> Result<PgLsn> {
-    client.execute("create extension if not exists zenith_test_utils", &[])?;
+    client.execute("create extension if not exists neon_test_utils", &[])?;

    let wal_segment_size = client.query_one(
        "select cast(setting as bigint) as setting, unit \
--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -5,14 +5,17 @@ edition = "2021"

 [dependencies]
 anyhow = { version = "1.0", features = ["backtrace"] }
-tokio = { version = "1.17", features = ["sync", "macros", "fs", "io-util"] }
-tokio-util = { version = "0.7", features = ["io"] }
-tracing = "0.1.27"
+async-trait = "0.1"
+metrics = { version = "0.1", path = "../metrics" }
+once_cell = "1.8.0"
 rusoto_core = "0.48"
 rusoto_s3 = "0.48"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
-async-trait = "0.1"
+tokio = { version = "1.17", features = ["sync", "macros", "fs", "io-util"] }
+tokio-util = { version = "0.7", features = ["io"] }
+toml_edit = { version = "0.13", features = ["easy"] }
+tracing = "0.1.27"

 workspace_hack = { version = "0.1", path = "../../workspace_hack" }

--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -16,8 +16,10 @@ use std::{
    path::{Path, PathBuf},
 };

-use anyhow::Context;
+use anyhow::{bail, Context};
+
 use tokio::io;
+use toml_edit::Item;
 use tracing::info;

 pub use self::{
@@ -203,6 +205,90 @@ pub fn path_with_suffix_extension(original_path: impl AsRef<Path>, suffix: &str)
        .with_extension(new_extension.as_ref())
 }

+impl RemoteStorageConfig {
+    pub fn from_toml(toml: &toml_edit::Item) -> anyhow::Result<RemoteStorageConfig> {
+        let local_path = toml.get("local_path");
+        let bucket_name = toml.get("bucket_name");
+        let bucket_region = toml.get("bucket_region");
+
+        let max_concurrent_syncs = NonZeroUsize::new(
+            parse_optional_integer("max_concurrent_syncs", toml)?
+                .unwrap_or(DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNCS),
+        )
+        .context("Failed to parse 'max_concurrent_syncs' as a positive integer")?;
+
+        let max_sync_errors = NonZeroU32::new(
+            parse_optional_integer("max_sync_errors", toml)?
+                .unwrap_or(DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS),
+        )
+        .context("Failed to parse 'max_sync_errors' as a positive integer")?;
+
+        let concurrency_limit = NonZeroUsize::new(
+            parse_optional_integer("concurrency_limit", toml)?
+                .unwrap_or(DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT),
+        )
+        .context("Failed to parse 'concurrency_limit' as a positive integer")?;
+
+        let storage = match (local_path, bucket_name, bucket_region) {
+            (None, None, None) => bail!("no 'local_path' nor 'bucket_name' option"),
+            (_, Some(_), None) => {
+                bail!("'bucket_region' option is mandatory if 'bucket_name' is given ")
+            }
+            (_, None, Some(_)) => {
+                bail!("'bucket_name' option is mandatory if 'bucket_region' is given ")
+            }
+            (None, Some(bucket_name), Some(bucket_region)) => RemoteStorageKind::AwsS3(S3Config {
+                bucket_name: parse_toml_string("bucket_name", bucket_name)?,
+                bucket_region: parse_toml_string("bucket_region", bucket_region)?,
+                prefix_in_bucket: toml
+                    .get("prefix_in_bucket")
+                    .map(|prefix_in_bucket| parse_toml_string("prefix_in_bucket", prefix_in_bucket))
+                    .transpose()?,
+                endpoint: toml
+                    .get("endpoint")
+                    .map(|endpoint| parse_toml_string("endpoint", endpoint))
+                    .transpose()?,
+                concurrency_limit,
+            }),
+            (Some(local_path), None, None) => RemoteStorageKind::LocalFs(PathBuf::from(
+                parse_toml_string("local_path", local_path)?,
+            )),
+            (Some(_), Some(_), _) => bail!("local_path and bucket_name are mutually exclusive"),
+        };
+
+        Ok(RemoteStorageConfig {
+            max_concurrent_syncs,
+            max_sync_errors,
+            storage,
+        })
+    }
+}
+
+// Helper functions to parse a toml Item
+fn parse_optional_integer<I, E>(name: &str, item: &toml_edit::Item) -> anyhow::Result<Option<I>>
+where
+    I: TryFrom<i64, Error = E>,
+    E: std::error::Error + Send + Sync + 'static,
+{
+    let toml_integer = match item.get(name) {
+        Some(item) => item
+            .as_integer()
+            .with_context(|| format!("configure option {name} is not an integer"))?,
+        None => return Ok(None),
+    };
+
+    I::try_from(toml_integer)
+        .map(Some)
+        .with_context(|| format!("configure option {name} is too large"))
+}
+
+fn parse_toml_string(name: &str, item: &Item) -> anyhow::Result<String> {
+    let s = item
+        .as_str()
+        .with_context(|| format!("configure option {name} is not a string"))?;
+    Ok(s.to_string())
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -23,6 +23,71 @@ use crate::{strip_path_prefix, RemoteStorage, S3Config};

 use super::StorageMetadata;

+pub(super) mod metrics {
+    use metrics::{register_int_counter_vec, IntCounterVec};
+    use once_cell::sync::Lazy;
+
+    static S3_REQUESTS_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
+        register_int_counter_vec!(
+            "remote_storage_s3_requests_count",
+            "Number of s3 requests of particular type",
+            &["request_type"],
+        )
+        .expect("failed to define a metric")
+    });
+
+    static S3_REQUESTS_FAIL_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
+        register_int_counter_vec!(
+            "remote_storage_s3_failures_count",
+            "Number of failed s3 requests of particular type",
+            &["request_type"],
+        )
+        .expect("failed to define a metric")
+    });
+
+    pub fn inc_get_object() {
+        S3_REQUESTS_COUNT.with_label_values(&["get_object"]).inc();
+    }
+
+    pub fn inc_get_object_fail() {
+        S3_REQUESTS_FAIL_COUNT
+            .with_label_values(&["get_object"])
+            .inc();
+    }
+
+    pub fn inc_put_object() {
+        S3_REQUESTS_COUNT.with_label_values(&["put_object"]).inc();
+    }
+
+    pub fn inc_put_object_fail() {
+        S3_REQUESTS_FAIL_COUNT
+            .with_label_values(&["put_object"])
+            .inc();
+    }
+
+    pub fn inc_delete_object() {
+        S3_REQUESTS_COUNT
+            .with_label_values(&["delete_object"])
+            .inc();
+    }
+
+    pub fn inc_delete_object_fail() {
+        S3_REQUESTS_FAIL_COUNT
+            .with_label_values(&["delete_object"])
+            .inc();
+    }
+
+    pub fn inc_list_objects() {
+        S3_REQUESTS_COUNT.with_label_values(&["list_objects"]).inc();
+    }
+
+    pub fn inc_list_objects_fail() {
+        S3_REQUESTS_FAIL_COUNT
+            .with_label_values(&["list_objects"])
+            .inc();
+    }
+}
+
 const S3_PREFIX_SEPARATOR: char = '/';

 #[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Hash)]
@@ -152,6 +217,9 @@ impl RemoteStorage for S3Bucket {
                .acquire()
                .await
                .context("Concurrency limiter semaphore got closed during S3 list")?;
+
+            metrics::inc_list_objects();
+
            let fetch_response = self
                .client
                .list_objects_v2(ListObjectsV2Request {
@@ -160,7 +228,11 @@ impl RemoteStorage for S3Bucket {
                    continuation_token,
                    ..ListObjectsV2Request::default()
                })
-                .await?;
+                .await
+                .map_err(|e| {
+                    metrics::inc_list_objects_fail();
+                    e
+                })?;
            document_keys.extend(
                fetch_response
                    .contents
@@ -190,6 +262,8 @@ impl RemoteStorage for S3Bucket {
            .acquire()
            .await
            .context("Concurrency limiter semaphore got closed during S3 upload")?;
+
+        metrics::inc_put_object();
        self.client
            .put_object(PutObjectRequest {
                body: Some(StreamingBody::new_with_size(
@@ -201,7 +275,11 @@ impl RemoteStorage for S3Bucket {
                metadata: metadata.map(|m| m.0),
                ..PutObjectRequest::default()
            })
-            .await?;
+            .await
+            .map_err(|e| {
+                metrics::inc_put_object_fail();
+                e
+            })?;
        Ok(())
    }

@@ -215,6 +293,9 @@ impl RemoteStorage for S3Bucket {
            .acquire()
            .await
            .context("Concurrency limiter semaphore got closed during S3 download")?;
+
+        metrics::inc_get_object();
+
        let object_output = self
            .client
            .get_object(GetObjectRequest {
@@ -222,7 +303,11 @@ impl RemoteStorage for S3Bucket {
                key: from.key().to_owned(),
                ..GetObjectRequest::default()
            })
-            .await?;
+            .await
+            .map_err(|e| {
+                metrics::inc_get_object_fail();
+                e
+            })?;

        if let Some(body) = object_output.body {
            let mut from = io::BufReader::new(body.into_async_read());
@@ -251,6 +336,9 @@ impl RemoteStorage for S3Bucket {
            .acquire()
            .await
            .context("Concurrency limiter semaphore got closed during S3 range download")?;
+
+        metrics::inc_get_object();
+
        let object_output = self
            .client
            .get_object(GetObjectRequest {
@@ -259,7 +347,11 @@ impl RemoteStorage for S3Bucket {
                range,
                ..GetObjectRequest::default()
            })
-            .await?;
+            .await
+            .map_err(|e| {
+                metrics::inc_get_object_fail();
+                e
+            })?;

        if let Some(body) = object_output.body {
            let mut from = io::BufReader::new(body.into_async_read());
@@ -275,13 +367,20 @@ impl RemoteStorage for S3Bucket {
            .acquire()
            .await
            .context("Concurrency limiter semaphore got closed during S3 delete")?;
+
+        metrics::inc_delete_object();
+
        self.client
            .delete_object(DeleteObjectRequest {
                bucket: self.bucket_name.clone(),
                key: path.key().to_owned(),
                ..DeleteObjectRequest::default()
            })
-            .await?;
+            .await
+            .map_err(|e| {
+                metrics::inc_delete_object_fail();
+                e
+            })?;
        Ok(())
    }
 }
--- a/libs/utils/scripts/restore_from_wal.sh
+++ b/libs/utils/scripts/restore_from_wal.sh
@@ -5,7 +5,7 @@ DATA_DIR=$3
 PORT=$4
 SYSID=`od -A n -j 24 -N 8 -t d8 $WAL_PATH/000000010000000000000002* | cut -c 3-`
 rm -fr $DATA_DIR
-env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U zenith_admin -D $DATA_DIR --sysid=$SYSID
+env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U cloud_admin -D $DATA_DIR --sysid=$SYSID
 echo port=$PORT >> $DATA_DIR/postgresql.conf
 REDO_POS=0x`$PG_BIN/pg_controldata -D $DATA_DIR | fgrep "REDO location"| cut -c 42-`
 declare -i WAL_SIZE=$REDO_POS+114
--- a/libs/utils/scripts/restore_from_wal_archive.sh
+++ b/libs/utils/scripts/restore_from_wal_archive.sh
@@ -5,7 +5,7 @@ PORT=$4
 SYSID=`od -A n -j 24 -N 8 -t d8 $WAL_PATH/000000010000000000000002* | cut -c 3-`
 rm -fr $DATA_DIR /tmp/pg_wals
 mkdir /tmp/pg_wals
-env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U zenith_admin -D $DATA_DIR --sysid=$SYSID
+env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U cloud_admin -D $DATA_DIR --sysid=$SYSID
 echo port=$PORT >> $DATA_DIR/postgresql.conf
 REDO_POS=0x`$PG_BIN/pg_controldata -D $DATA_DIR | fgrep "REDO location"| cut -c 42-`
 declare -i WAL_SIZE=$REDO_POS+114
--- a/libs/utils/src/bin_ser.rs
+++ b/libs/utils/src/bin_ser.rs
@@ -71,7 +71,7 @@ impl From<bincode::Error> for SerializeError {
 /// - Fixed integer encoding (i.e. 1u32 is 00000001 not 01)
 ///
 /// Does not allow trailing bytes in deserialization. If this is desired, you
-/// may set [`Options::allow_trailing_bytes`] to explicitly accomodate this.
+/// may set [`Options::allow_trailing_bytes`] to explicitly accommodate this.
 pub fn be_coder() -> impl Options {
    bincode::DefaultOptions::new()
        .with_big_endian()
@@ -85,7 +85,7 @@ pub fn be_coder() -> impl Options {
 /// - Fixed integer encoding (i.e. 1u32 is 00000001 not 01)
 ///
 /// Does not allow trailing bytes in deserialization. If this is desired, you
-/// may set [`Options::allow_trailing_bytes`] to explicitly accomodate this.
+/// may set [`Options::allow_trailing_bytes`] to explicitly accommodate this.
 pub fn le_coder() -> impl Options {
    bincode::DefaultOptions::new()
        .with_little_endian()
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -64,7 +64,7 @@ pub mod signals;
 /// One thing to note is that .git is not available in docker (and it is bad to include it there).
 /// So everything becides docker build is covered by git_version crate, and docker uses a `GIT_VERSION` argument to get the value required.
 /// It takes variable from build process env and puts it to the rustc env. And then we can retrieve it here by using env! macro.
-/// Git version received from environment variable used as a fallback in git_version invokation.
+/// Git version received from environment variable used as a fallback in git_version invocation.
 /// And to avoid running buildscript every recompilation, we use rerun-if-env-changed option.
 /// So the build script will be run only when GIT_VERSION envvar has changed.
 ///
--- a/libs/utils/src/lsn.rs
+++ b/libs/utils/src/lsn.rs
@@ -26,6 +26,9 @@ impl Lsn {
    /// Maximum possible value for an LSN
    pub const MAX: Lsn = Lsn(u64::MAX);

+    /// Invalid value for InvalidXLogRecPtr, as defined in xlogdefs.h
+    pub const INVALID: Lsn = Lsn(0);
+
    /// Subtract a number, returning None on overflow.
    pub fn checked_sub<T: Into<u64>>(self, other: T) -> Option<Lsn> {
        let other: u64 = other.into();
@@ -103,6 +106,12 @@ impl Lsn {
    pub fn is_aligned(&self) -> bool {
        *self == self.align()
    }
+
+    /// Return if the LSN is valid
+    /// mimics postgres XLogRecPtrIsInvalid macro
+    pub fn is_valid(self) -> bool {
+        self != Lsn::INVALID
+    }
 }

 impl From<u64> for Lsn {
--- a/libs/utils/src/postgres_backend.rs
+++ b/libs/utils/src/postgres_backend.rs
@@ -13,13 +13,10 @@ use std::fmt;
 use std::io::{self, Write};
 use std::net::{Shutdown, SocketAddr, TcpStream};
 use std::str::FromStr;
-use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::Arc;
 use std::time::Duration;
 use tracing::*;

-static PGBACKEND_SHUTDOWN_REQUESTED: AtomicBool = AtomicBool::new(false);
-
 pub trait Handler {
    /// Handle single query.
    /// postgres_backend will issue ReadyForQuery after calling this (this
@@ -45,6 +42,10 @@ pub trait Handler {
    fn check_auth_jwt(&mut self, _pgb: &mut PostgresBackend, _jwt_response: &[u8]) -> Result<()> {
        bail!("JWT auth failed")
    }
+
+    fn is_shutdown_requested(&self) -> bool {
+        false
+    }
 }

 /// PostgresBackend protocol state.
@@ -274,7 +275,7 @@ impl PostgresBackend {

        let mut unnamed_query_string = Bytes::new();

-        while !PGBACKEND_SHUTDOWN_REQUESTED.load(Ordering::Relaxed) {
+        while !handler.is_shutdown_requested() {
            match self.read_message() {
                Ok(message) => {
                    if let Some(msg) = message {
@@ -336,11 +337,11 @@ impl PostgresBackend {
        let have_tls = self.tls_config.is_some();
        match msg {
            FeMessage::StartupPacket(m) => {
-                trace!("got startup message {:?}", m);
+                trace!("got startup message {m:?}");

                match m {
                    FeStartupPacket::SslRequest => {
-                        info!("SSL requested");
+                        debug!("SSL requested");

                        self.write_message(&BeMessage::EncryptionResponse(have_tls))?;
                        if have_tls {
@@ -349,7 +350,7 @@ impl PostgresBackend {
                        }
                    }
                    FeStartupPacket::GssEncRequest => {
-                        info!("GSS requested");
+                        debug!("GSS requested");
                        self.write_message(&BeMessage::EncryptionResponse(false))?;
                    }
                    FeStartupPacket::StartupMessage { .. } => {
@@ -433,12 +434,7 @@ impl PostgresBackend {
                    // full cause of the error, not just the top-level context + its trace.
                    // We don't want to send that in the ErrorResponse though,
                    // because it's not relevant to the compute node logs.
-                    if query_string.starts_with("callmemaybe") {
-                        // FIXME avoid printing a backtrace for tenant x not found errors until this is properly fixed
-                        error!("query handler for '{}' failed: {}", query_string, e);
-                    } else {
-                        error!("query handler for '{}' failed: {:?}", query_string, e);
-                    }
+                    error!("query handler for '{}' failed: {:?}", query_string, e);
                    self.write_message_noflush(&BeMessage::ErrorResponse(&e.to_string()))?;
                    // TODO: untangle convoluted control flow
                    if e.to_string().contains("failed to run") {
@@ -475,7 +471,7 @@ impl PostgresBackend {
                    self.write_message(&BeMessage::ErrorResponse(&e.to_string()))?;
                }
                // NOTE there is no ReadyForQuery message. This handler is used
-                // for basebackup and it uses CopyOut which doesnt require
+                // for basebackup and it uses CopyOut which doesn't require
                // ReadyForQuery message and backend just switches back to
                // processing mode after sending CopyDone or ErrorResponse.
            }
@@ -498,8 +494,3 @@ impl PostgresBackend {
        Ok(ProcessMsgResult::Continue)
    }
 }
-
-// Set the flag to inform connections to cancel
-pub fn set_pgbackend_shutdown_requested() {
-    PGBACKEND_SHUTDOWN_REQUESTED.swap(true, Ordering::Relaxed);
-}
--- a/libs/utils/src/pq_proto.rs
+++ b/libs/utils/src/pq_proto.rs
@@ -269,7 +269,14 @@ impl FeStartupPacket {
                            .next()
                            .context("expected even number of params in StartupMessage")?;
                        if name == "options" {
-                            // deprecated way of passing params as cmd line args
+                            // parsing options arguments "...&options=<var0>%3D<val0>+<var1>=<var1>..."
+                            // '%3D' is '=' and '+' is ' '
+
+                            // Note: we allow users that don't have SNI capabilities,
+                            // to pass a special keyword argument 'project'
+                            // to be used to determine the cluster name by the proxy.
+
+                            //TODO: write unit test for this and refactor in its own function.
                            for cmdopt in value.split(' ') {
                                let nameval: Vec<&str> = cmdopt.split('=').collect();
                                if nameval.len() == 2 {
@@ -464,7 +471,7 @@ impl BeParameterStatusMessage<'static> {
    }
 }

-// One row desciption in RowDescription packet.
+// One row description in RowDescription packet.
 #[derive(Debug)]
 pub struct RowDescriptor<'a> {
    pub name: &'a [u8],
@@ -613,7 +620,7 @@ fn cstr_to_str(b: &Bytes) -> Result<&str> {
 impl<'a> BeMessage<'a> {
    /// Write message to the given buf.
    // Unlike the reading side, we use BytesMut
-    // here as msg len preceeds its body and it is handy to write it down first
+    // here as msg len precedes its body and it is handy to write it down first
    // and then fill the length. With Write we would have to either calc it
    // manually or have one more buffer.
    pub fn write(buf: &mut BytesMut, message: &BeMessage) -> io::Result<()> {
@@ -919,10 +926,10 @@ impl<'a> BeMessage<'a> {
    }
 }

-// Zenith extension of postgres replication protocol
-// See ZENITH_STATUS_UPDATE_TAG_BYTE
+// Neon extension of postgres replication protocol
+// See NEON_STATUS_UPDATE_TAG_BYTE
 #[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
-pub struct ZenithFeedback {
+pub struct ReplicationFeedback {
    // Last known size of the timeline. Used to enforce timeline size limit.
    pub current_timeline_size: u64,
    // Parts of StandbyStatusUpdate we resend to compute via safekeeper
@@ -932,13 +939,13 @@ pub struct ZenithFeedback {
    pub ps_replytime: SystemTime,
 }

-// NOTE: Do not forget to increment this number when adding new fields to ZenithFeedback.
+// NOTE: Do not forget to increment this number when adding new fields to ReplicationFeedback.
 // Do not remove previously available fields because this might be backwards incompatible.
-pub const ZENITH_FEEDBACK_FIELDS_NUMBER: u8 = 5;
+pub const REPLICATION_FEEDBACK_FIELDS_NUMBER: u8 = 5;

-impl ZenithFeedback {
-    pub fn empty() -> ZenithFeedback {
-        ZenithFeedback {
+impl ReplicationFeedback {
+    pub fn empty() -> ReplicationFeedback {
+        ReplicationFeedback {
            current_timeline_size: 0,
            ps_writelsn: 0,
            ps_applylsn: 0,
@@ -947,7 +954,7 @@ impl ZenithFeedback {
        }
    }

-    // Serialize ZenithFeedback using custom format
+    // Serialize ReplicationFeedback using custom format
    // to support protocol extensibility.
    //
    // Following layout is used:
@@ -958,7 +965,7 @@ impl ZenithFeedback {
    // uint32 - value length in bytes
    // value itself
    pub fn serialize(&self, buf: &mut BytesMut) -> Result<()> {
-        buf.put_u8(ZENITH_FEEDBACK_FIELDS_NUMBER); // # of keys
+        buf.put_u8(REPLICATION_FEEDBACK_FIELDS_NUMBER); // # of keys
        write_cstr(&Bytes::from("current_timeline_size"), buf)?;
        buf.put_i32(8);
        buf.put_u64(self.current_timeline_size);
@@ -985,9 +992,9 @@ impl ZenithFeedback {
        Ok(())
    }

-    // Deserialize ZenithFeedback message
-    pub fn parse(mut buf: Bytes) -> ZenithFeedback {
-        let mut zf = ZenithFeedback::empty();
+    // Deserialize ReplicationFeedback message
+    pub fn parse(mut buf: Bytes) -> ReplicationFeedback {
+        let mut zf = ReplicationFeedback::empty();
        let nfields = buf.get_u8();
        let mut i = 0;
        while i < nfields {
@@ -1028,14 +1035,14 @@ impl ZenithFeedback {
                _ => {
                    let len = buf.get_i32();
                    warn!(
-                        "ZenithFeedback parse. unknown key {} of len {}. Skip it.",
+                        "ReplicationFeedback parse. unknown key {} of len {}. Skip it.",
                        key, len
                    );
                    buf.advance(len as usize);
                }
            }
        }
-        trace!("ZenithFeedback parsed is {:?}", zf);
+        trace!("ReplicationFeedback parsed is {:?}", zf);
        zf
    }
 }
@@ -1045,9 +1052,9 @@ mod tests {
    use super::*;

    #[test]
-    fn test_zenithfeedback_serialization() {
-        let mut zf = ZenithFeedback::empty();
-        // Fill zf wih some values
+    fn test_replication_feedback_serialization() {
+        let mut zf = ReplicationFeedback::empty();
+        // Fill zf with some values
        zf.current_timeline_size = 12345678;
        // Set rounded time to be able to compare it with deserialized value,
        // because it is rounded up to microseconds during serialization.
@@ -1055,14 +1062,14 @@ mod tests {
        let mut data = BytesMut::new();
        zf.serialize(&mut data).unwrap();

-        let zf_parsed = ZenithFeedback::parse(data.freeze());
+        let zf_parsed = ReplicationFeedback::parse(data.freeze());
        assert_eq!(zf, zf_parsed);
    }

    #[test]
-    fn test_zenithfeedback_unknown_key() {
-        let mut zf = ZenithFeedback::empty();
-        // Fill zf wih some values
+    fn test_replication_feedback_unknown_key() {
+        let mut zf = ReplicationFeedback::empty();
+        // Fill zf with some values
        zf.current_timeline_size = 12345678;
        // Set rounded time to be able to compare it with deserialized value,
        // because it is rounded up to microseconds during serialization.
@@ -1072,7 +1079,7 @@ mod tests {

        // Add an extra field to the buffer and adjust number of keys
        if let Some(first) = data.first_mut() {
-            *first = ZENITH_FEEDBACK_FIELDS_NUMBER + 1;
+            *first = REPLICATION_FEEDBACK_FIELDS_NUMBER + 1;
        }

        write_cstr(&Bytes::from("new_field_one"), &mut data).unwrap();
@@ -1080,7 +1087,7 @@ mod tests {
        data.put_u64(42);

        // Parse serialized data and check that new field is not parsed
-        let zf_parsed = ZenithFeedback::parse(data.freeze());
+        let zf_parsed = ReplicationFeedback::parse(data.freeze());
        assert_eq!(zf, zf_parsed);
    }

--- a/libs/utils/src/zid.rs
+++ b/libs/utils/src/zid.rs
@@ -193,7 +193,7 @@ pub struct ZTenantId(ZId);
 zid_newtype!(ZTenantId);

 // A pair uniquely identifying Zenith instance.
-#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash, Serialize, Deserialize)]
 pub struct ZTenantTimelineId {
    pub tenant_id: ZTenantId,
    pub timeline_id: ZTimelineId,
@@ -218,7 +218,7 @@ impl ZTenantTimelineId {

 impl fmt::Display for ZTenantTimelineId {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{}-{}", self.tenant_id, self.timeline_id)
+        write!(f, "{}/{}", self.tenant_id, self.timeline_id)
    }
 }

--- a/neon_local/src/main.rs
+++ b/neon_local/src/main.rs
@@ -14,7 +14,7 @@ use safekeeper::defaults::{
    DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT,
 };
 use std::collections::{BTreeSet, HashMap};
-use std::path::Path;
+use std::path::{Path, PathBuf};
 use std::process::exit;
 use std::str::FromStr;
 use utils::{
@@ -159,6 +159,20 @@ fn main() -> Result<()> {
                .about("Create a new blank timeline")
                .arg(tenant_id_arg.clone())
                .arg(branch_name_arg.clone()))
+            .subcommand(App::new("import")
+                .about("Import timeline from basebackup directory")
+                .arg(tenant_id_arg.clone())
+                .arg(timeline_id_arg.clone())
+                .arg(Arg::new("node-name").long("node-name").takes_value(true)
+                    .help("Name to assign to the imported timeline"))
+                .arg(Arg::new("base-tarfile").long("base-tarfile").takes_value(true)
+                    .help("Basebackup tarfile to import"))
+                .arg(Arg::new("base-lsn").long("base-lsn").takes_value(true)
+                    .help("Lsn the basebackup starts at"))
+                .arg(Arg::new("wal-tarfile").long("wal-tarfile").takes_value(true)
+                    .help("Wal to add after base"))
+                .arg(Arg::new("end-lsn").long("end-lsn").takes_value(true)
+                    .help("Lsn the basebackup ends at")))
        ).subcommand(
            App::new("tenant")
            .setting(AppSettings::ArgRequiredElseHelp)
@@ -613,6 +627,43 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
                timeline.timeline_id, last_record_lsn, tenant_id,
            );
        }
+        Some(("import", import_match)) => {
+            let tenant_id = get_tenant_id(import_match, env)?;
+            let timeline_id = parse_timeline_id(import_match)?.expect("No timeline id provided");
+            let name = import_match
+                .value_of("node-name")
+                .ok_or_else(|| anyhow!("No node name provided"))?;
+
+            // Parse base inputs
+            let base_tarfile = import_match
+                .value_of("base-tarfile")
+                .map(|s| PathBuf::from_str(s).unwrap())
+                .ok_or_else(|| anyhow!("No base-tarfile provided"))?;
+            let base_lsn = Lsn::from_str(
+                import_match
+                    .value_of("base-lsn")
+                    .ok_or_else(|| anyhow!("No base-lsn provided"))?,
+            )?;
+            let base = (base_lsn, base_tarfile);
+
+            // Parse pg_wal inputs
+            let wal_tarfile = import_match
+                .value_of("wal-tarfile")
+                .map(|s| PathBuf::from_str(s).unwrap());
+            let end_lsn = import_match
+                .value_of("end-lsn")
+                .map(|s| Lsn::from_str(s).unwrap());
+            // TODO validate both or none are provided
+            let pg_wal = end_lsn.zip(wal_tarfile);
+
+            let mut cplane = ComputeControlPlane::load(env.clone())?;
+            println!("Importing timeline into pageserver ...");
+            pageserver.timeline_import(tenant_id, timeline_id, base, pg_wal)?;
+            println!("Creating node for imported timeline ...");
+            env.register_branch_mapping(name.to_string(), tenant_id, timeline_id)?;
+            cplane.new_node(tenant_id, name, timeline_id, None, None)?;
+            println!("Done");
+        }
        Some(("branch", branch_match)) => {
            let tenant_id = get_tenant_id(branch_match, env)?;
            let new_branch_name = branch_match
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -5,7 +5,7 @@ edition = "2021"

 [features]
 # It is simpler infra-wise to have failpoints enabled by default
-# It shouldn't affect perf in any way because failpoints
+# It shouldn't affect performance in any way because failpoints
 # are not placed in hot code paths
 default = ["failpoints"]
 profiling = ["pprof"]
@@ -60,6 +60,8 @@ metrics = { path = "../libs/metrics" }
 utils = { path = "../libs/utils" }
 remote_storage = { path = "../libs/remote_storage" }
 workspace_hack = { version = "0.1", path = "../workspace_hack" }
+close_fds = "0.3.2"
+walkdir = "2.3.2"

 [dev-dependencies]
 hex-literal = "0.3"
--- a/pageserver/README.md
+++ b/pageserver/README.md
@@ -69,7 +69,7 @@ Repository

 The repository stores all the page versions, or WAL records needed to
 reconstruct them. Each tenant has a separate Repository, which is
-stored in the .zenith/tenants/<tenantid> directory.
+stored in the .neon/tenants/<tenantid> directory.

 Repository is an abstract trait, defined in `repository.rs`. It is
 implemented by the LayeredRepository object in
@@ -92,7 +92,7 @@ Each repository also has a WAL redo manager associated with it, see
 records, whenever we need to reconstruct a page version from WAL to
 satisfy a GetPage@LSN request, or to avoid accumulating too much WAL
 for a page. The WAL redo manager uses a Postgres process running in
-special zenith wal-redo mode to do the actual WAL redo, and
+special Neon wal-redo mode to do the actual WAL redo, and
 communicates with the process using a pipe.


--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -13,6 +13,7 @@
 use anyhow::{anyhow, bail, ensure, Context, Result};
 use bytes::{BufMut, BytesMut};
 use fail::fail_point;
+use itertools::Itertools;
 use std::fmt::Write as FmtWrite;
 use std::io;
 use std::io::Write;
@@ -21,7 +22,7 @@ use std::time::SystemTime;
 use tar::{Builder, EntryType, Header};
 use tracing::*;

-use crate::reltag::SlruKind;
+use crate::reltag::{RelTag, SlruKind};
 use crate::repository::Timeline;
 use crate::DatadirTimelineImpl;
 use postgres_ffi::xlog_utils::*;
@@ -39,11 +40,12 @@ where
    timeline: &'a Arc<DatadirTimelineImpl>,
    pub lsn: Lsn,
    prev_record_lsn: Lsn,
-
+    full_backup: bool,
    finished: bool,
 }

-// Create basebackup with non-rel data in it. Omit relational data.
+// Create basebackup with non-rel data in it.
+// Only include relational data if 'full_backup' is true.
 //
 // Currently we use empty lsn in two cases:
 //  * During the basebackup right after timeline creation
@@ -58,6 +60,7 @@ where
        write: W,
        timeline: &'a Arc<DatadirTimelineImpl>,
        req_lsn: Option<Lsn>,
+        full_backup: bool,
    ) -> Result<Basebackup<'a, W>> {
        // Compute postgres doesn't have any previous WAL files, but the first
        // record that it's going to write needs to include the LSN of the
@@ -94,8 +97,8 @@ where
        };

        info!(
-            "taking basebackup lsn={}, prev_lsn={}",
-            backup_lsn, backup_prev
+            "taking basebackup lsn={}, prev_lsn={} (full_backup={})",
+            backup_lsn, backup_prev, full_backup
        );

        Ok(Basebackup {
@@ -103,11 +106,14 @@ where
            timeline,
            lsn: backup_lsn,
            prev_record_lsn: backup_prev,
+            full_backup,
            finished: false,
        })
    }

    pub fn send_tarball(mut self) -> anyhow::Result<()> {
+        // TODO include checksum
+
        // Create pgdata subdirs structure
        for dir in pg_constants::PGDATA_SUBDIRS.iter() {
            let header = new_tar_header_dir(*dir)?;
@@ -140,6 +146,13 @@ where
        // Create tablespace directories
        for ((spcnode, dbnode), has_relmap_file) in self.timeline.list_dbdirs(self.lsn)? {
            self.add_dbdir(spcnode, dbnode, has_relmap_file)?;
+
+            // Gather and send relational files in each database if full backup is requested.
+            if self.full_backup {
+                for rel in self.timeline.list_rels(spcnode, dbnode, self.lsn)? {
+                    self.add_rel(rel)?;
+                }
+            }
        }
        for xid in self.timeline.list_twophase_files(self.lsn)? {
            self.add_twophase_file(xid)?;
@@ -157,6 +170,38 @@ where
        Ok(())
    }

+    fn add_rel(&mut self, tag: RelTag) -> anyhow::Result<()> {
+        let nblocks = self.timeline.get_rel_size(tag, self.lsn)?;
+
+        // Function that adds relation segment data to archive
+        let mut add_file = |segment_index, data: &Vec<u8>| -> anyhow::Result<()> {
+            let file_name = tag.to_segfile_name(segment_index as u32);
+            let header = new_tar_header(&file_name, data.len() as u64)?;
+            self.ar.append(&header, data.as_slice())?;
+            Ok(())
+        };
+
+        // If the relation is empty, create an empty file
+        if nblocks == 0 {
+            add_file(0, &vec![])?;
+            return Ok(());
+        }
+
+        // Add a file for each chunk of blocks (aka segment)
+        let chunks = (0..nblocks).chunks(pg_constants::RELSEG_SIZE as usize);
+        for (seg, blocks) in chunks.into_iter().enumerate() {
+            let mut segment_data: Vec<u8> = vec![];
+            for blknum in blocks {
+                let img = self.timeline.get_rel_page_at_lsn(tag, blknum, self.lsn)?;
+                segment_data.extend_from_slice(&img[..]);
+            }
+
+            add_file(seg, &segment_data)?;
+        }
+
+        Ok(())
+    }
+
    //
    // Generate SLRU segment files from repository.
    //
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -104,7 +104,7 @@ fn main() -> anyhow::Result<()> {
        return Ok(());
    }

-    let workdir = Path::new(arg_matches.value_of("workdir").unwrap_or(".zenith"));
+    let workdir = Path::new(arg_matches.value_of("workdir").unwrap_or(".neon"));
    let workdir = workdir
        .canonicalize()
        .with_context(|| format!("Error opening workdir '{}'", workdir.display()))?;
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -5,9 +5,9 @@
 //! See also `settings.md` for better description on every parameter.

 use anyhow::{anyhow, bail, ensure, Context, Result};
-use remote_storage::{RemoteStorageConfig, RemoteStorageKind, S3Config};
+use remote_storage::RemoteStorageConfig;
 use std::env;
-use std::num::{NonZeroU32, NonZeroUsize};
+
 use std::path::{Path, PathBuf};
 use std::str::FromStr;
 use std::time::Duration;
@@ -34,7 +34,7 @@ pub mod defaults {
    pub const DEFAULT_WAIT_LSN_TIMEOUT: &str = "60 s";
    pub const DEFAULT_WAL_REDO_TIMEOUT: &str = "60 s";

-    pub const DEFAULT_SUPERUSER: &str = "zenith_admin";
+    pub const DEFAULT_SUPERUSER: &str = "cloud_admin";

    pub const DEFAULT_PAGE_CACHE_SIZE: usize = 8192;
    pub const DEFAULT_MAX_FILE_DESCRIPTORS: usize = 100;
@@ -114,7 +114,7 @@ pub struct PageServerConf {
    pub default_tenant_conf: TenantConf,

    /// A prefix to add in etcd brokers before every key.
-    /// Can be used for isolating different pageserver groups withing the same etcd cluster.
+    /// Can be used for isolating different pageserver groups within the same etcd cluster.
    pub broker_etcd_prefix: String,

    /// Etcd broker endpoints to connect to.
@@ -394,7 +394,7 @@ impl PageServerConf {
                )),
                "auth_type" => builder.auth_type(parse_toml_from_str(key, item)?),
                "remote_storage" => {
-                    builder.remote_storage_config(Some(Self::parse_remote_storage_config(item)?))
+                    builder.remote_storage_config(Some(RemoteStorageConfig::from_toml(item)?))
                }
                "tenant_config" => {
                    t_conf = Self::parse_toml_tenant_conf(item)?;
@@ -480,68 +480,25 @@ impl PageServerConf {
        if let Some(pitr_interval) = item.get("pitr_interval") {
            t_conf.pitr_interval = Some(parse_toml_duration("pitr_interval", pitr_interval)?);
        }
+        if let Some(walreceiver_connect_timeout) = item.get("walreceiver_connect_timeout") {
+            t_conf.walreceiver_connect_timeout = Some(parse_toml_duration(
+                "walreceiver_connect_timeout",
+                walreceiver_connect_timeout,
+            )?);
+        }
+        if let Some(lagging_wal_timeout) = item.get("lagging_wal_timeout") {
+            t_conf.lagging_wal_timeout = Some(parse_toml_duration(
+                "lagging_wal_timeout",
+                lagging_wal_timeout,
+            )?);
+        }
+        if let Some(max_lsn_wal_lag) = item.get("max_lsn_wal_lag") {
+            t_conf.max_lsn_wal_lag = Some(parse_toml_from_str("max_lsn_wal_lag", max_lsn_wal_lag)?);
+        }

        Ok(t_conf)
    }

-    /// subroutine of parse_config(), to parse the `[remote_storage]` table.
-    fn parse_remote_storage_config(toml: &toml_edit::Item) -> anyhow::Result<RemoteStorageConfig> {
-        let local_path = toml.get("local_path");
-        let bucket_name = toml.get("bucket_name");
-        let bucket_region = toml.get("bucket_region");
-
-        let max_concurrent_syncs = NonZeroUsize::new(
-            parse_optional_integer("max_concurrent_syncs", toml)?
-                .unwrap_or(remote_storage::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNCS),
-        )
-        .context("Failed to parse 'max_concurrent_syncs' as a positive integer")?;
-
-        let max_sync_errors = NonZeroU32::new(
-            parse_optional_integer("max_sync_errors", toml)?
-                .unwrap_or(remote_storage::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS),
-        )
-        .context("Failed to parse 'max_sync_errors' as a positive integer")?;
-
-        let concurrency_limit = NonZeroUsize::new(
-            parse_optional_integer("concurrency_limit", toml)?
-                .unwrap_or(remote_storage::DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT),
-        )
-        .context("Failed to parse 'concurrency_limit' as a positive integer")?;
-
-        let storage = match (local_path, bucket_name, bucket_region) {
-            (None, None, None) => bail!("no 'local_path' nor 'bucket_name' option"),
-            (_, Some(_), None) => {
-                bail!("'bucket_region' option is mandatory if 'bucket_name' is given ")
-            }
-            (_, None, Some(_)) => {
-                bail!("'bucket_name' option is mandatory if 'bucket_region' is given ")
-            }
-            (None, Some(bucket_name), Some(bucket_region)) => RemoteStorageKind::AwsS3(S3Config {
-                bucket_name: parse_toml_string("bucket_name", bucket_name)?,
-                bucket_region: parse_toml_string("bucket_region", bucket_region)?,
-                prefix_in_bucket: toml
-                    .get("prefix_in_bucket")
-                    .map(|prefix_in_bucket| parse_toml_string("prefix_in_bucket", prefix_in_bucket))
-                    .transpose()?,
-                endpoint: toml
-                    .get("endpoint")
-                    .map(|endpoint| parse_toml_string("endpoint", endpoint))
-                    .transpose()?,
-                concurrency_limit,
-            }),
-            (Some(local_path), None, None) => RemoteStorageKind::LocalFs(PathBuf::from(
-                parse_toml_string("local_path", local_path)?,
-            )),
-            (Some(_), Some(_), _) => bail!("local_path and bucket_name are mutually exclusive"),
-        };
-
-        Ok(RemoteStorageConfig {
-            max_concurrent_syncs,
-            max_sync_errors,
-            storage,
-        })
-    }
-
    #[cfg(test)]
    pub fn test_repo_dir(test_name: &str) -> PathBuf {
        PathBuf::from(format!("../tmp_check/test_{test_name}"))
@@ -557,7 +514,7 @@ impl PageServerConf {
            max_file_descriptors: defaults::DEFAULT_MAX_FILE_DESCRIPTORS,
            listen_pg_addr: defaults::DEFAULT_PG_LISTEN_ADDR.to_string(),
            listen_http_addr: defaults::DEFAULT_HTTP_LISTEN_ADDR.to_string(),
-            superuser: "zenith_admin".to_string(),
+            superuser: "cloud_admin".to_string(),
            workdir: repo_dir,
            pg_distrib_dir: PathBuf::new(),
            auth_type: AuthType::Trust,
@@ -592,23 +549,6 @@ fn parse_toml_u64(name: &str, item: &Item) -> Result<u64> {
    Ok(i as u64)
 }

-fn parse_optional_integer<I, E>(name: &str, item: &toml_edit::Item) -> anyhow::Result<Option<I>>
-where
-    I: TryFrom<i64, Error = E>,
-    E: std::error::Error + Send + Sync + 'static,
-{
-    let toml_integer = match item.get(name) {
-        Some(item) => item
-            .as_integer()
-            .with_context(|| format!("configure option {name} is not an integer"))?,
-        None => return Ok(None),
-    };
-
-    I::try_from(toml_integer)
-        .map(Some)
-        .with_context(|| format!("configure option {name} is too large"))
-}
-
 fn parse_toml_duration(name: &str, item: &Item) -> Result<Duration> {
    let s = item
        .as_str()
@@ -651,8 +591,12 @@ fn parse_toml_array(name: &str, item: &Item) -> anyhow::Result<Vec<String>> {

 #[cfg(test)]
 mod tests {
-    use std::fs;
+    use std::{
+        fs,
+        num::{NonZeroU32, NonZeroUsize},
+    };

+    use remote_storage::{RemoteStorageKind, S3Config};
    use tempfile::{tempdir, TempDir};

    use super::*;
--- a/pageserver/src/http/models.rs
+++ b/pageserver/src/http/models.rs
@@ -1,3 +1,5 @@
+use std::num::NonZeroU64;
+
 use serde::{Deserialize, Serialize};
 use serde_with::{serde_as, DisplayFromStr};
 use utils::{
@@ -33,6 +35,9 @@ pub struct TenantCreateRequest {
    pub gc_period: Option<String>,
    pub image_creation_threshold: Option<usize>,
    pub pitr_interval: Option<String>,
+    pub walreceiver_connect_timeout: Option<String>,
+    pub lagging_wal_timeout: Option<String>,
+    pub max_lsn_wal_lag: Option<NonZeroU64>,
 }

 #[serde_as]
@@ -68,6 +73,9 @@ pub struct TenantConfigRequest {
    pub gc_period: Option<String>,
    pub image_creation_threshold: Option<usize>,
    pub pitr_interval: Option<String>,
+    pub walreceiver_connect_timeout: Option<String>,
+    pub lagging_wal_timeout: Option<String>,
+    pub max_lsn_wal_lag: Option<NonZeroU64>,
 }

 impl TenantConfigRequest {
@@ -82,6 +90,21 @@ impl TenantConfigRequest {
            gc_period: None,
            image_creation_threshold: None,
            pitr_interval: None,
+            walreceiver_connect_timeout: None,
+            lagging_wal_timeout: None,
+            max_lsn_wal_lag: None,
        }
    }
 }
+
+/// A WAL receiver's data stored inside the global `WAL_RECEIVERS`.
+/// We keep one WAL receiver active per timeline.
+#[serde_as]
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct WalReceiverEntry {
+    pub wal_producer_connstr: Option<String>,
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    pub last_received_msg_lsn: Option<Lsn>,
+    /// the timestamp (in microseconds) of the last received message
+    pub last_received_msg_ts: Option<u128>,
+}
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -229,23 +229,16 @@ async fn wal_receiver_get_handler(request: Request<Body>) -> Result<Response<Bod
    check_permission(&request, Some(tenant_id))?;

    let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;
+    let wal_receiver_entry = crate::walreceiver::get_wal_receiver_entry(tenant_id, timeline_id)
+        .instrument(info_span!("wal_receiver_get", tenant = %tenant_id, timeline = %timeline_id))
+        .await
+        .ok_or_else(|| {
+            ApiError::NotFound(format!(
+                "WAL receiver data not found for tenant {tenant_id} and timeline {timeline_id}"
+            ))
+        })?;

-    let wal_receiver = tokio::task::spawn_blocking(move || {
-        let _enter =
-            info_span!("wal_receiver_get", tenant = %tenant_id, timeline = %timeline_id).entered();
-
-        crate::walreceiver::get_wal_receiver_entry(tenant_id, timeline_id)
-    })
-    .await
-    .map_err(ApiError::from_err)?
-    .ok_or_else(|| {
-        ApiError::NotFound(format!(
-            "WAL receiver not found for tenant {} and timeline {}",
-            tenant_id, timeline_id
-        ))
-    })?;
-
-    json_response(StatusCode::OK, wal_receiver)
+    json_response(StatusCode::OK, &wal_receiver_entry)
 }

 async fn timeline_attach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
@@ -402,6 +395,19 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
            Some(humantime::parse_duration(&pitr_interval).map_err(ApiError::from_err)?);
    }

+    if let Some(walreceiver_connect_timeout) = request_data.walreceiver_connect_timeout {
+        tenant_conf.walreceiver_connect_timeout = Some(
+            humantime::parse_duration(&walreceiver_connect_timeout).map_err(ApiError::from_err)?,
+        );
+    }
+    if let Some(lagging_wal_timeout) = request_data.lagging_wal_timeout {
+        tenant_conf.lagging_wal_timeout =
+            Some(humantime::parse_duration(&lagging_wal_timeout).map_err(ApiError::from_err)?);
+    }
+    if let Some(max_lsn_wal_lag) = request_data.max_lsn_wal_lag {
+        tenant_conf.max_lsn_wal_lag = Some(max_lsn_wal_lag);
+    }
+
    tenant_conf.checkpoint_distance = request_data.checkpoint_distance;
    tenant_conf.compaction_target_size = request_data.compaction_target_size;
    tenant_conf.compaction_threshold = request_data.compaction_threshold;
@@ -449,6 +455,18 @@ async fn tenant_config_handler(mut request: Request<Body>) -> Result<Response<Bo
        tenant_conf.pitr_interval =
            Some(humantime::parse_duration(&pitr_interval).map_err(ApiError::from_err)?);
    }
+    if let Some(walreceiver_connect_timeout) = request_data.walreceiver_connect_timeout {
+        tenant_conf.walreceiver_connect_timeout = Some(
+            humantime::parse_duration(&walreceiver_connect_timeout).map_err(ApiError::from_err)?,
+        );
+    }
+    if let Some(lagging_wal_timeout) = request_data.lagging_wal_timeout {
+        tenant_conf.lagging_wal_timeout =
+            Some(humantime::parse_duration(&lagging_wal_timeout).map_err(ApiError::from_err)?);
+    }
+    if let Some(max_lsn_wal_lag) = request_data.max_lsn_wal_lag {
+        tenant_conf.max_lsn_wal_lag = Some(max_lsn_wal_lag);
+    }

    tenant_conf.checkpoint_distance = request_data.checkpoint_distance;
    tenant_conf.compaction_target_size = request_data.compaction_target_size;
--- a/pageserver/src/import_datadir.rs
+++ b/pageserver/src/import_datadir.rs
@@ -2,7 +2,6 @@
 //! Import data and WAL from a PostgreSQL data directory and WAL segments into
 //! a zenith Timeline.
 //!
-use std::fs;
 use std::fs::File;
 use std::io::{Read, Seek, SeekFrom};
 use std::path::{Path, PathBuf};
@@ -10,16 +9,18 @@ use std::path::{Path, PathBuf};
 use anyhow::{bail, ensure, Context, Result};
 use bytes::Bytes;
 use tracing::*;
+use walkdir::WalkDir;

 use crate::pgdatadir_mapping::*;
 use crate::reltag::{RelTag, SlruKind};
 use crate::repository::Repository;
+use crate::repository::Timeline;
 use crate::walingest::WalIngest;
 use postgres_ffi::relfile_utils::*;
 use postgres_ffi::waldecoder::*;
 use postgres_ffi::xlog_utils::*;
+use postgres_ffi::Oid;
 use postgres_ffi::{pg_constants, ControlFileData, DBState_DB_SHUTDOWNED};
-use postgres_ffi::{Oid, TransactionId};
 use utils::lsn::Lsn;

 ///
@@ -35,100 +36,29 @@ pub fn import_timeline_from_postgres_datadir<R: Repository>(
 ) -> Result<()> {
    let mut pg_control: Option<ControlFileData> = None;

+    // TODO this shoud be start_lsn, which is not necessarily equal to end_lsn (aka lsn)
+    // Then fishing out pg_control would be unnecessary
    let mut modification = tline.begin_modification(lsn);
    modification.init_empty()?;

-    // Scan 'global'
-    let mut relfiles: Vec<PathBuf> = Vec::new();
-    for direntry in fs::read_dir(path.join("global"))? {
-        let direntry = direntry?;
-        match direntry.file_name().to_str() {
-            None => continue,
+    // Import all but pg_wal
+    let all_but_wal = WalkDir::new(path)
+        .into_iter()
+        .filter_entry(|entry| !entry.path().ends_with("pg_wal"));
+    for entry in all_but_wal {
+        let entry = entry.unwrap();
+        let metadata = entry.metadata().unwrap();
+        if metadata.is_file() {
+            let absolute_path = entry.path();
+            let relative_path = absolute_path.strip_prefix(path)?;

-            Some("pg_control") => {
-                pg_control = Some(import_control_file(&mut modification, &direntry.path())?);
-            }
-            Some("pg_filenode.map") => {
-                import_relmap_file(
-                    &mut modification,
-                    pg_constants::GLOBALTABLESPACE_OID,
-                    0,
-                    &direntry.path(),
-                )?;
-            }
-
-            // Load any relation files into the page server (but only after the other files)
-            _ => relfiles.push(direntry.path()),
-        }
-    }
-    for relfile in relfiles {
-        import_relfile(
-            &mut modification,
-            &relfile,
-            pg_constants::GLOBALTABLESPACE_OID,
-            0,
-        )?;
-    }
-
-    // Scan 'base'. It contains database dirs, the database OID is the filename.
-    // E.g. 'base/12345', where 12345 is the database OID.
-    for direntry in fs::read_dir(path.join("base"))? {
-        let direntry = direntry?;
-
-        //skip all temporary files
-        if direntry.file_name().to_string_lossy() == "pgsql_tmp" {
-            continue;
-        }
-
-        let dboid = direntry.file_name().to_string_lossy().parse::<u32>()?;
-
-        let mut relfiles: Vec<PathBuf> = Vec::new();
-        for direntry in fs::read_dir(direntry.path())? {
-            let direntry = direntry?;
-            match direntry.file_name().to_str() {
-                None => continue,
-
-                Some("PG_VERSION") => {
-                    //modification.put_dbdir_creation(pg_constants::DEFAULTTABLESPACE_OID, dboid)?;
-                }
-                Some("pg_filenode.map") => import_relmap_file(
-                    &mut modification,
-                    pg_constants::DEFAULTTABLESPACE_OID,
-                    dboid,
-                    &direntry.path(),
-                )?,
-
-                // Load any relation files into the page server
-                _ => relfiles.push(direntry.path()),
+            let file = File::open(absolute_path)?;
+            let len = metadata.len() as usize;
+            if let Some(control_file) = import_file(&mut modification, relative_path, file, len)? {
+                pg_control = Some(control_file);
            }
        }
-        for relfile in relfiles {
-            import_relfile(
-                &mut modification,
-                &relfile,
-                pg_constants::DEFAULTTABLESPACE_OID,
-                dboid,
-            )?;
-        }
    }
-    for entry in fs::read_dir(path.join("pg_xact"))? {
-        let entry = entry?;
-        import_slru_file(&mut modification, SlruKind::Clog, &entry.path())?;
-    }
-    for entry in fs::read_dir(path.join("pg_multixact").join("members"))? {
-        let entry = entry?;
-        import_slru_file(&mut modification, SlruKind::MultiXactMembers, &entry.path())?;
-    }
-    for entry in fs::read_dir(path.join("pg_multixact").join("offsets"))? {
-        let entry = entry?;
-        import_slru_file(&mut modification, SlruKind::MultiXactOffsets, &entry.path())?;
-    }
-    for entry in fs::read_dir(path.join("pg_twophase"))? {
-        let entry = entry?;
-        let xid = u32::from_str_radix(&entry.path().to_string_lossy(), 16)?;
-        import_twophase_file(&mut modification, xid, &entry.path())?;
-    }
-    // TODO: Scan pg_tblspc

    // We're done importing all the data files.
    modification.commit()?;
@@ -158,11 +88,13 @@ pub fn import_timeline_from_postgres_datadir<R: Repository>(
 }

 // subroutine of import_timeline_from_postgres_datadir(), to load one relation file.
-fn import_relfile<R: Repository>(
+fn import_rel<R: Repository, Reader: Read>(
    modification: &mut DatadirModification<R>,
    path: &Path,
    spcoid: Oid,
    dboid: Oid,
+    mut reader: Reader,
+    len: usize,
 ) -> anyhow::Result<()> {
    // Does it look like a relation file?
    trace!("importing rel file {}", path.display());
@@ -173,16 +105,10 @@ fn import_relfile<R: Repository>(
            e
        })?;

-    let mut file = File::open(path)?;
    let mut buf: [u8; 8192] = [0u8; 8192];

-    let len = file.metadata().unwrap().len();
-    ensure!(len % pg_constants::BLCKSZ as u64 == 0);
-    let nblocks = len / pg_constants::BLCKSZ as u64;
-
-    if segno != 0 {
-        todo!();
-    }
+    ensure!(len % pg_constants::BLCKSZ as usize == 0);
+    let nblocks = len / pg_constants::BLCKSZ as usize;

    let rel = RelTag {
        spcnode: spcoid,
@@ -190,11 +116,22 @@ fn import_relfile<R: Repository>(
        relnode,
        forknum,
    };
-    modification.put_rel_creation(rel, nblocks as u32)?;

    let mut blknum: u32 = segno * (1024 * 1024 * 1024 / pg_constants::BLCKSZ as u32);
+
+    // Call put_rel_creation for every segment of the relation,
+    // because there is no guarantee about the order in which we are processing segments.
+    // ignore "relation already exists" error
+    if let Err(e) = modification.put_rel_creation(rel, nblocks as u32) {
+        if e.to_string().contains("already exists") {
+            debug!("relation {} already exists. we must be extending it", rel);
+        } else {
+            return Err(e);
+        }
+    }
+
    loop {
-        let r = file.read_exact(&mut buf);
+        let r = reader.read_exact(&mut buf);
        match r {
            Ok(_) => {
                modification.put_rel_page_image(rel, blknum, Bytes::copy_from_slice(&buf))?;
@@ -204,7 +141,9 @@ fn import_relfile<R: Repository>(
            Err(err) => match err.kind() {
                std::io::ErrorKind::UnexpectedEof => {
                    // reached EOF. That's expected.
-                    ensure!(blknum == nblocks as u32, "unexpected EOF");
+                    let relative_blknum =
+                        blknum - segno * (1024 * 1024 * 1024 / pg_constants::BLCKSZ as u32);
+                    ensure!(relative_blknum == nblocks as u32, "unexpected EOF");
                    break;
                }
                _ => {
@@ -215,96 +154,39 @@ fn import_relfile<R: Repository>(
        blknum += 1;
    }

+    // Update relation size
+    //
+    // If we process rel segments out of order,
+    // put_rel_extend will skip the update.
+    modification.put_rel_extend(rel, blknum)?;
+
    Ok(())
 }

-/// Import a relmapper (pg_filenode.map) file into the repository
-fn import_relmap_file<R: Repository>(
-    modification: &mut DatadirModification<R>,
-    spcnode: Oid,
-    dbnode: Oid,
-    path: &Path,
-) -> Result<()> {
-    let mut file = File::open(path)?;
-    let mut buffer = Vec::new();
-    // read the whole file
-    file.read_to_end(&mut buffer)?;
-
-    trace!("importing relmap file {}", path.display());
-
-    modification.put_relmap_file(spcnode, dbnode, Bytes::copy_from_slice(&buffer[..]))?;
-    Ok(())
-}
-
-/// Import a twophase state file (pg_twophase/<xid>) into the repository
-fn import_twophase_file<R: Repository>(
-    modification: &mut DatadirModification<R>,
-    xid: TransactionId,
-    path: &Path,
-) -> Result<()> {
-    let mut file = File::open(path)?;
-    let mut buffer = Vec::new();
-    // read the whole file
-    file.read_to_end(&mut buffer)?;
-
-    trace!("importing non-rel file {}", path.display());
-
-    modification.put_twophase_file(xid, Bytes::copy_from_slice(&buffer[..]))?;
-    Ok(())
-}
-
-///
-/// Import pg_control file into the repository.
-///
-/// The control file is imported as is, but we also extract the checkpoint record
-/// from it and store it separated.
-fn import_control_file<R: Repository>(
-    modification: &mut DatadirModification<R>,
-    path: &Path,
-) -> Result<ControlFileData> {
-    let mut file = File::open(path)?;
-    let mut buffer = Vec::new();
-    // read the whole file
-    file.read_to_end(&mut buffer)?;
-
-    trace!("importing control file {}", path.display());
-
-    // Import it as ControlFile
-    modification.put_control_file(Bytes::copy_from_slice(&buffer[..]))?;
-
-    // Extract the checkpoint record and import it separately.
-    let pg_control = ControlFileData::decode(&buffer)?;
-    let checkpoint_bytes = pg_control.checkPointCopy.encode()?;
-    modification.put_checkpoint(checkpoint_bytes)?;
-
-    Ok(pg_control)
-}
-
-///
 /// Import an SLRU segment file
 ///
-fn import_slru_file<R: Repository>(
+fn import_slru<R: Repository, Reader: Read>(
    modification: &mut DatadirModification<R>,
    slru: SlruKind,
    path: &Path,
+    mut reader: Reader,
+    len: usize,
 ) -> Result<()> {
    trace!("importing slru file {}", path.display());

-    let mut file = File::open(path)?;
    let mut buf: [u8; 8192] = [0u8; 8192];
    let segno = u32::from_str_radix(&path.file_name().unwrap().to_string_lossy(), 16)?;

-    let len = file.metadata().unwrap().len();
-    ensure!(len % pg_constants::BLCKSZ as u64 == 0); // we assume SLRU block size is the same as BLCKSZ
-    let nblocks = len / pg_constants::BLCKSZ as u64;
+    ensure!(len % pg_constants::BLCKSZ as usize == 0); // we assume SLRU block size is the same as BLCKSZ
+    let nblocks = len / pg_constants::BLCKSZ as usize;

-    ensure!(nblocks <= pg_constants::SLRU_PAGES_PER_SEGMENT as u64);
+    ensure!(nblocks <= pg_constants::SLRU_PAGES_PER_SEGMENT as usize);

    modification.put_slru_segment_creation(slru, segno, nblocks as u32)?;

    let mut rpageno = 0;
    loop {
-        let r = file.read_exact(&mut buf);
+        let r = reader.read_exact(&mut buf);
        match r {
            Ok(_) => {
                modification.put_slru_page_image(
@@ -396,10 +278,245 @@ fn import_wal<R: Repository>(
    }

    if last_lsn != startpoint {
-        debug!("reached end of WAL at {}", last_lsn);
+        info!("reached end of WAL at {}", last_lsn);
    } else {
        info!("no WAL to import at {}", last_lsn);
    }

    Ok(())
 }
+
+pub fn import_basebackup_from_tar<R: Repository, Reader: Read>(
+    tline: &mut DatadirTimeline<R>,
+    reader: Reader,
+    base_lsn: Lsn,
+) -> Result<()> {
+    info!("importing base at {}", base_lsn);
+    let mut modification = tline.begin_modification(base_lsn);
+    modification.init_empty()?;
+
+    let mut pg_control: Option<ControlFileData> = None;
+
+    // Import base
+    for base_tar_entry in tar::Archive::new(reader).entries()? {
+        let entry = base_tar_entry.unwrap();
+        let header = entry.header();
+        let len = header.entry_size()? as usize;
+        let file_path = header.path().unwrap().into_owned();
+
+        match header.entry_type() {
+            tar::EntryType::Regular => {
+                // let mut buffer = Vec::new();
+                // entry.read_to_end(&mut buffer).unwrap();
+
+                if let Some(res) = import_file(&mut modification, file_path.as_ref(), entry, len)? {
+                    // We found the pg_control file.
+                    pg_control = Some(res);
+                }
+            }
+            tar::EntryType::Directory => {
+                debug!("directory {:?}", file_path);
+            }
+            _ => {
+                panic!("tar::EntryType::?? {}", file_path.display());
+            }
+        }
+    }
+
+    // sanity check: ensure that pg_control is loaded
+    let _pg_control = pg_control.context("pg_control file not found")?;
+
+    modification.commit()?;
+    Ok(())
+}
+
+pub fn import_wal_from_tar<R: Repository, Reader: Read>(
+    tline: &mut DatadirTimeline<R>,
+    reader: Reader,
+    start_lsn: Lsn,
+    end_lsn: Lsn,
+) -> Result<()> {
+    // Set up walingest mutable state
+    let mut waldecoder = WalStreamDecoder::new(start_lsn);
+    let mut segno = start_lsn.segment_number(pg_constants::WAL_SEGMENT_SIZE);
+    let mut offset = start_lsn.segment_offset(pg_constants::WAL_SEGMENT_SIZE);
+    let mut last_lsn = start_lsn;
+    let mut walingest = WalIngest::new(tline, start_lsn)?;
+
+    // Ingest wal until end_lsn
+    info!("importing wal until {}", end_lsn);
+    let mut pg_wal_tar = tar::Archive::new(reader);
+    let mut pg_wal_entries_iter = pg_wal_tar.entries()?;
+    while last_lsn <= end_lsn {
+        let bytes = {
+            let entry = pg_wal_entries_iter.next().expect("expected more wal")?;
+            let header = entry.header();
+            let file_path = header.path().unwrap().into_owned();
+
+            match header.entry_type() {
+                tar::EntryType::Regular => {
+                    // FIXME: assume postgresql tli 1 for now
+                    let expected_filename = XLogFileName(1, segno, pg_constants::WAL_SEGMENT_SIZE);
+                    let file_name = file_path.file_name().unwrap().to_string_lossy();
+                    ensure!(expected_filename == file_name);
+
+                    debug!("processing wal file {:?}", file_path);
+                    read_all_bytes(entry)?
+                }
+                tar::EntryType::Directory => {
+                    debug!("directory {:?}", file_path);
+                    continue;
+                }
+                _ => {
+                    panic!("tar::EntryType::?? {}", file_path.display());
+                }
+            }
+        };
+
+        waldecoder.feed_bytes(&bytes[offset..]);
+
+        while last_lsn <= end_lsn {
+            if let Some((lsn, recdata)) = waldecoder.poll_decode()? {
+                walingest.ingest_record(tline, recdata, lsn)?;
+                last_lsn = lsn;
+
+                debug!("imported record at {} (end {})", lsn, end_lsn);
+            }
+        }
+
+        debug!("imported records up to {}", last_lsn);
+        segno += 1;
+        offset = 0;
+    }
+
+    if last_lsn != start_lsn {
+        info!("reached end of WAL at {}", last_lsn);
+    } else {
+        info!("there was no WAL to import at {}", last_lsn);
+    }
+
+    // Log any extra unused files
+    for e in &mut pg_wal_entries_iter {
+        let entry = e.unwrap();
+        let header = entry.header();
+        let file_path = header.path().unwrap().into_owned();
+        info!("skipping {:?}", file_path);
+    }
+
+    Ok(())
+}
+
+pub fn import_file<R: Repository, Reader: Read>(
+    modification: &mut DatadirModification<R>,
+    file_path: &Path,
+    reader: Reader,
+    len: usize,
+) -> Result<Option<ControlFileData>> {
+    debug!("looking at {:?}", file_path);
+
+    if file_path.starts_with("global") {
+        let spcnode = pg_constants::GLOBALTABLESPACE_OID;
+        let dbnode = 0;
+
+        match file_path.file_name().unwrap().to_string_lossy().as_ref() {
+            "pg_control" => {
+                let bytes = read_all_bytes(reader)?;
+
+                // Extract the checkpoint record and import it separately.
+                let pg_control = ControlFileData::decode(&bytes[..])?;
+                let checkpoint_bytes = pg_control.checkPointCopy.encode()?;
+                modification.put_checkpoint(checkpoint_bytes)?;
+                debug!("imported control file");
+
+                // Import it as ControlFile
+                modification.put_control_file(bytes)?;
+                return Ok(Some(pg_control));
+            }
+            "pg_filenode.map" => {
+                let bytes = read_all_bytes(reader)?;
+                modification.put_relmap_file(spcnode, dbnode, bytes)?;
+                debug!("imported relmap file")
+            }
+            "PG_VERSION" => {
+                debug!("ignored");
+            }
+            _ => {
+                import_rel(modification, file_path, spcnode, dbnode, reader, len)?;
+                debug!("imported rel creation");
+            }
+        }
+    } else if file_path.starts_with("base") {
+        let spcnode = pg_constants::DEFAULTTABLESPACE_OID;
+        let dbnode: u32 = file_path
+            .iter()
+            .nth(1)
+            .unwrap()
+            .to_string_lossy()
+            .parse()
+            .unwrap();
+
+        match file_path.file_name().unwrap().to_string_lossy().as_ref() {
+            "pg_filenode.map" => {
+                let bytes = read_all_bytes(reader)?;
+                modification.put_relmap_file(spcnode, dbnode, bytes)?;
+                debug!("imported relmap file")
+            }
+            "PG_VERSION" => {
+                debug!("ignored");
+            }
+            _ => {
+                import_rel(modification, file_path, spcnode, dbnode, reader, len)?;
+                debug!("imported rel creation");
+            }
+        }
+    } else if file_path.starts_with("pg_xact") {
+        let slru = SlruKind::Clog;
+
+        import_slru(modification, slru, file_path, reader, len)?;
+        debug!("imported clog slru");
+    } else if file_path.starts_with("pg_multixact/offsets") {
+        let slru = SlruKind::MultiXactOffsets;
+
+        import_slru(modification, slru, file_path, reader, len)?;
+        debug!("imported multixact offsets slru");
+    } else if file_path.starts_with("pg_multixact/members") {
+        let slru = SlruKind::MultiXactMembers;
+
+        import_slru(modification, slru, file_path, reader, len)?;
+        debug!("imported multixact members slru");
+    } else if file_path.starts_with("pg_twophase") {
+        let xid = u32::from_str_radix(&file_path.file_name().unwrap().to_string_lossy(), 16)?;
+
+        let bytes = read_all_bytes(reader)?;
+        modification.put_twophase_file(xid, Bytes::copy_from_slice(&bytes[..]))?;
+        debug!("imported twophase file");
+    } else if file_path.starts_with("pg_wal") {
+        debug!("found wal file in base section. ignore it");
+    } else if file_path.starts_with("zenith.signal") {
+        // Parse zenith signal file to set correct previous LSN
+        let bytes = read_all_bytes(reader)?;
+        // zenith.signal format is "PREV LSN: prev_lsn"
+        let zenith_signal = std::str::from_utf8(&bytes).unwrap();
+        let zenith_signal = zenith_signal.split(':').collect::<Vec<_>>();
+        let prev_lsn = zenith_signal[1].trim().parse::<Lsn>().unwrap();
+
+        let writer = modification.tline.tline.writer();
+        writer.finish_write(prev_lsn);
+
+        debug!("imported zenith signal {}", prev_lsn);
+    } else if file_path.starts_with("pg_tblspc") {
+        // TODO Backups exported from neon won't have pg_tblspc, but we will need
+        // this to import arbitrary postgres databases.
+        bail!("Importing pg_tblspc is not implemented");
+    } else {
+        debug!("ignored");
+    }
+
+    Ok(None)
+}
+
+fn read_all_bytes<Reader: Read>(mut reader: Reader) -> Result<Bytes> {
+    let mut buf: Vec<u8> = vec![];
+    reader.read_to_end(&mut buf)?;
+    Ok(Bytes::copy_from_slice(&buf[..]))
+}
--- a/pageserver/src/keyspace.rs
+++ b/pageserver/src/keyspace.rs
@@ -15,7 +15,7 @@ pub struct KeySpace {
 impl KeySpace {
    ///
    /// Partition a key space into roughly chunks of roughly 'target_size' bytes
-    /// in each patition.
+    /// in each partition.
    ///
    pub fn partition(&self, target_size: u64) -> KeyPartitioning {
        // Assume that each value is 8k in size.
--- a/pageserver/src/layered_repository.rs
+++ b/pageserver/src/layered_repository.rs
@@ -4,7 +4,7 @@
 //! The functions here are responsible for locating the correct layer for the
 //! get/put call, tracing timeline branching history as needed.
 //!
-//! The files are stored in the .zenith/tenants/<tenantid>/timelines/<timelineid>
+//! The files are stored in the .neon/tenants/<tenantid>/timelines/<timelineid>
 //! directory. See layered_repository/README for how the files are managed.
 //! In addition to the layer files, there is a metadata file in the same
 //! directory that contains information about the timeline, in particular its
@@ -25,6 +25,7 @@ use std::collections::{BTreeSet, HashSet};
 use std::fs;
 use std::fs::{File, OpenOptions};
 use std::io::Write;
+use std::num::NonZeroU64;
 use std::ops::{Bound::Included, Deref, Range};
 use std::path::{Path, PathBuf};
 use std::sync::atomic::{self, AtomicBool};
@@ -147,7 +148,7 @@ lazy_static! {
    .expect("failed to define a metric");
 }

-/// Parts of the `.zenith/tenants/<tenantid>/timelines/<timelineid>` directory prefix.
+/// Parts of the `.neon/tenants/<tenantid>/timelines/<timelineid>` directory prefix.
 pub const TIMELINES_SEGMENT_NAME: &str = "timelines";

 ///
@@ -242,15 +243,15 @@ impl Repository for LayeredRepository {
        );
        timeline.layers.write().unwrap().next_open_layer_at = Some(initdb_lsn);

+        // Insert if not exists
        let timeline = Arc::new(timeline);
-        let r = timelines.insert(
-            timelineid,
-            LayeredTimelineEntry::Loaded(Arc::clone(&timeline)),
-        );
-        ensure!(
-            r.is_none(),
-            "assertion failure, inserted duplicate timeline"
-        );
+        match timelines.entry(timelineid) {
+            Entry::Occupied(_) => bail!("Timeline already exists"),
+            Entry::Vacant(vacant) => {
+                vacant.insert(LayeredTimelineEntry::Loaded(Arc::clone(&timeline)))
+            }
+        };
+
        Ok(timeline)
    }

@@ -557,6 +558,27 @@ impl LayeredRepository {
            .unwrap_or(self.conf.default_tenant_conf.pitr_interval)
    }

+    pub fn get_wal_receiver_connect_timeout(&self) -> Duration {
+        let tenant_conf = self.tenant_conf.read().unwrap();
+        tenant_conf
+            .walreceiver_connect_timeout
+            .unwrap_or(self.conf.default_tenant_conf.walreceiver_connect_timeout)
+    }
+
+    pub fn get_lagging_wal_timeout(&self) -> Duration {
+        let tenant_conf = self.tenant_conf.read().unwrap();
+        tenant_conf
+            .lagging_wal_timeout
+            .unwrap_or(self.conf.default_tenant_conf.lagging_wal_timeout)
+    }
+
+    pub fn get_max_lsn_wal_lag(&self) -> NonZeroU64 {
+        let tenant_conf = self.tenant_conf.read().unwrap();
+        tenant_conf
+            .max_lsn_wal_lag
+            .unwrap_or(self.conf.default_tenant_conf.max_lsn_wal_lag)
+    }
+
    pub fn update_tenant_config(&self, new_tenant_conf: TenantConfOpt) -> Result<()> {
        let mut tenant_conf = self.tenant_conf.write().unwrap();

@@ -823,7 +845,7 @@ impl LayeredRepository {
        for (timeline_id, timeline_entry) in timelines.iter() {
            timeline_ids.push(*timeline_id);

-            // This is unresolved question for now, how to do gc in presense of remote timelines
+            // This is unresolved question for now, how to do gc in presence of remote timelines
            // especially when this is combined with branching.
            // Somewhat related: https://github.com/zenithdb/zenith/issues/999
            if let Some(ancestor_timeline_id) = &timeline_entry.ancestor_timeline_id() {
@@ -1230,7 +1252,7 @@ impl LayeredTimeline {
            }),
            disk_consistent_lsn: AtomicLsn::new(metadata.disk_consistent_lsn().0),

-            last_freeze_at: AtomicLsn::new(0),
+            last_freeze_at: AtomicLsn::new(metadata.disk_consistent_lsn().0),

            ancestor_timeline: ancestor,
            ancestor_lsn: metadata.ancestor_lsn(),
@@ -1705,9 +1727,7 @@ impl LayeredTimeline {
            new_delta_path.clone(),
            self.conf.timeline_path(&self.timeline_id, &self.tenant_id),
        ])?;
-        fail_point!("checkpoint-before-sync");
-
-        fail_point!("flush-frozen");
+        fail_point!("flush-frozen-before-sync");

        // Finally, replace the frozen in-memory layer with the new on-disk layer
        {
@@ -1831,7 +1851,7 @@ impl LayeredTimeline {
        // collect any page versions that are no longer needed because
        // of the new image layers we created in step 2.
        //
-        // TODO: This hight level strategy hasn't been implemented yet.
+        // TODO: This high level strategy hasn't been implemented yet.
        // Below are functions compact_level0() and create_image_layers()
        // but they are a bit ad hoc and don't quite work like it's explained
        // above. Rewrite it.
@@ -2268,7 +2288,7 @@ impl LayeredTimeline {
            }

            // 3. Is it needed by a child branch?
-            // NOTE With that wee would keep data that
+            // NOTE With that we would keep data that
            // might be referenced by child branches forever.
            // We can track this in child timeline GC and delete parent layers when
            // they are no longer needed. This might be complicated with long inheritance chains.
@@ -2518,7 +2538,7 @@ fn rename_to_backup(path: PathBuf) -> anyhow::Result<()> {
    bail!("couldn't find an unused backup number for {:?}", path)
 }

-fn load_metadata(
+pub fn load_metadata(
    conf: &'static PageServerConf,
    timeline_id: ZTimelineId,
    tenant_id: ZTenantId,
--- a/pageserver/src/layered_repository/README.md
+++ b/pageserver/src/layered_repository/README.md
@@ -123,7 +123,7 @@ The files are called "layer files". Each layer file covers a range of keys, and
 a range of LSNs (or a single LSN, in case of image layers). You can think of it
 as a rectangle in the two-dimensional key-LSN space. The layer files for each
 timeline are stored in the timeline's subdirectory under
-`.zenith/tenants/<tenantid>/timelines`.
+`.neon/tenants/<tenantid>/timelines`.

 There are two kind of layer files: images, and delta layers. An image file
 contains a snapshot of all keys at a particular LSN, whereas a delta file
@@ -178,7 +178,7 @@ version, and how branching and GC works is still valid.
 The full path of a delta file looks like this:

 ```
-    .zenith/tenants/941ddc8604413b88b3d208bddf90396c/timelines/4af489b06af8eed9e27a841775616962/rel_1663_13990_2609_0_10_000000000169C348_0000000001702000
+    .neon/tenants/941ddc8604413b88b3d208bddf90396c/timelines/4af489b06af8eed9e27a841775616962/rel_1663_13990_2609_0_10_000000000169C348_0000000001702000
 ```

 For simplicity, the examples below use a simplified notation for the
@@ -260,7 +260,7 @@ Whenever a GetPage@LSN request comes in from the compute node, the
 page server needs to reconstruct the requested page, as it was at the
 requested LSN. To do that, the page server first checks the recent
 in-memory layer; if the requested page version is found there, it can
-be returned immediatedly without looking at the files on
+be returned immediately without looking at the files on
 disk. Otherwise the page server needs to locate the layer file that
 contains the requested page version.

@@ -409,7 +409,7 @@ removed because there is no newer layer file for the table.

 Things get slightly more complicated with multiple branches. All of
 the above still holds, but in addition to recent files we must also
-retain older shapshot files that are still needed by child branches.
+retain older snapshot files that are still needed by child branches.
 For example, if child branch is created at LSN 150, and the 'customers'
 table is updated on the branch, you would have these files:

--- a/pageserver/src/layered_repository/disk_btree.rs
+++ b/pageserver/src/layered_repository/disk_btree.rs
@@ -7,7 +7,7 @@
 //! - Fixed-width keys
 //! - Fixed-width values (VALUE_SZ)
 //! - The tree is created in a bulk operation. Insert/deletion after creation
-//!   is not suppported
+//!   is not supported
 //! - page-oriented
 //!
 //! TODO:
@@ -498,8 +498,8 @@ where
            return Ok(());
        }

-        // It did not fit. Try to compress, and it it succeeds to make some room
-        // on the node, try appending to it again.
+        // It did not fit. Try to compress, and if it succeeds to make
+        // some room on the node, try appending to it again.
        #[allow(clippy::collapsible_if)]
        if last.compress() {
            if last.push(key, value) {
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -24,7 +24,6 @@ pub mod walredo;

 use lazy_static::lazy_static;
 use tracing::info;
-use utils::postgres_backend;

 use crate::thread_mgr::ThreadKind;
 use metrics::{register_int_gauge_vec, IntGaugeVec};
@@ -73,7 +72,6 @@ pub fn shutdown_pageserver(exit_code: i32) {
    thread_mgr::shutdown_threads(Some(ThreadKind::LibpqEndpointListener), None, None);

    // Shut down any page service threads.
-    postgres_backend::set_pgbackend_shutdown_requested();
    thread_mgr::shutdown_threads(Some(ThreadKind::PageRequestHandler), None, None);

    // Shut down all the tenants. This flushes everything to disk and kills
--- a/pageserver/src/page_cache.rs
+++ b/pageserver/src/page_cache.rs
@@ -20,7 +20,7 @@
 //! assign a buffer for a page, you must hold the mapping lock and the lock on
 //! the slot at the same time.
 //!
-//! Whenever you need to hold both locks simultenously, the slot lock must be
+//! Whenever you need to hold both locks simultaneously, the slot lock must be
 //! acquired first. This consistent ordering avoids deadlocks. To look up a page
 //! in the cache, you would first look up the mapping, while holding the mapping
 //! lock, and then lock the slot. You must release the mapping lock in between,
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -7,14 +7,13 @@
 //     *status* -- show actual info about this pageserver,
 //     *pagestream* -- enter mode where smgr and pageserver talk with their
 //  custom protocol.
-//     *callmemaybe <zenith timelineid> $url* -- ask pageserver to start walreceiver on $url
 //

 use anyhow::{bail, ensure, Context, Result};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 use lazy_static::lazy_static;
 use regex::Regex;
-use std::io;
+use std::io::{self, Read};
 use std::net::TcpListener;
 use std::str;
 use std::str::FromStr;
@@ -30,6 +29,8 @@ use utils::{

 use crate::basebackup;
 use crate::config::{PageServerConf, ProfilingConfig};
+use crate::import_datadir::{import_basebackup_from_tar, import_wal_from_tar};
+use crate::layered_repository::LayeredRepository;
 use crate::pgdatadir_mapping::{DatadirTimeline, LsnForTimestamp};
 use crate::profiling::profpoint_start;
 use crate::reltag::RelTag;
@@ -38,7 +39,6 @@ use crate::repository::Timeline;
 use crate::tenant_mgr;
 use crate::thread_mgr;
 use crate::thread_mgr::ThreadKind;
-use crate::walreceiver;
 use crate::CheckpointConfig;
 use metrics::{register_histogram_vec, HistogramVec};
 use postgres_ffi::xlog_utils::to_pg_timestamp;
@@ -202,6 +202,96 @@ impl PagestreamBeMessage {
    }
 }

+/// Implements Read for the server side of CopyIn
+struct CopyInReader<'a> {
+    pgb: &'a mut PostgresBackend,
+
+    /// Overflow buffer for bytes sent in CopyData messages
+    /// that the reader (caller of read) hasn't asked for yet.
+    /// TODO use BytesMut?
+    buf: Vec<u8>,
+
+    /// Bytes before `buf_begin` are considered as dropped.
+    /// This allows us to implement O(1) pop_front on Vec<u8>.
+    /// The Vec won't grow large because we only add to it
+    /// when it's empty.
+    buf_begin: usize,
+}
+
+impl<'a> CopyInReader<'a> {
+    // NOTE: pgb should be in copy in state already
+    fn new(pgb: &'a mut PostgresBackend) -> Self {
+        Self {
+            pgb,
+            buf: Vec::<_>::new(),
+            buf_begin: 0,
+        }
+    }
+}
+
+impl<'a> Drop for CopyInReader<'a> {
+    fn drop(&mut self) {
+        // Finalize copy protocol so that self.pgb can be reused
+        // TODO instead, maybe take ownership of pgb and give it back at the end
+        let mut buf: Vec<u8> = vec![];
+        let _ = self.read_to_end(&mut buf);
+    }
+}
+
+impl<'a> Read for CopyInReader<'a> {
+    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+        while !thread_mgr::is_shutdown_requested() {
+            // Return from buffer if nonempty
+            if self.buf_begin < self.buf.len() {
+                let bytes_to_read = std::cmp::min(buf.len(), self.buf.len() - self.buf_begin);
+                buf[..bytes_to_read].copy_from_slice(&self.buf[self.buf_begin..][..bytes_to_read]);
+                self.buf_begin += bytes_to_read;
+                return Ok(bytes_to_read);
+            }
+
+            // Delete garbage
+            self.buf.clear();
+            self.buf_begin = 0;
+
+            // Wait for client to send CopyData bytes
+            match self.pgb.read_message() {
+                Ok(Some(message)) => {
+                    let copy_data_bytes = match message {
+                        FeMessage::CopyData(bytes) => bytes,
+                        FeMessage::CopyDone => return Ok(0),
+                        FeMessage::Sync => continue,
+                        m => {
+                            let msg = format!("unexpected message {:?}", m);
+                            self.pgb.write_message(&BeMessage::ErrorResponse(&msg))?;
+                            return Err(io::Error::new(io::ErrorKind::Other, msg));
+                        }
+                    };
+
+                    // Return as much as we can, saving the rest in self.buf
+                    let mut reader = copy_data_bytes.reader();
+                    let bytes_read = reader.read(buf)?;
+                    reader.read_to_end(&mut self.buf)?;
+                    return Ok(bytes_read);
+                }
+                Ok(None) => {
+                    let msg = "client closed connection";
+                    self.pgb.write_message(&BeMessage::ErrorResponse(msg))?;
+                    return Err(io::Error::new(io::ErrorKind::Other, msg));
+                }
+                Err(e) => {
+                    if !is_socket_read_timed_out(&e) {
+                        return Err(io::Error::new(io::ErrorKind::Other, e));
+                    }
+                }
+            }
+        }
+
+        // Shutting down
+        let msg = "Importer thread was shut down";
+        Err(io::Error::new(io::ErrorKind::Other, msg))
+    }
+}
+
 ///////////////////////////////////////////////////////////////////////////////

 ///
@@ -305,7 +395,29 @@ fn page_service_conn_main(

    let mut conn_handler = PageServerHandler::new(conf, auth);
    let pgbackend = PostgresBackend::new(socket, auth_type, None, true)?;
-    pgbackend.run(&mut conn_handler)
+    match pgbackend.run(&mut conn_handler) {
+        Ok(()) => {
+            // we've been requested to shut down
+            Ok(())
+        }
+        Err(err) => {
+            let root_cause_io_err_kind = err
+                .root_cause()
+                .downcast_ref::<io::Error>()
+                .map(|e| e.kind());
+
+            // `ConnectionReset` error happens when the Postgres client closes the connection.
+            // As this disconnection happens quite often and is expected,
+            // we decided to downgrade the logging level to `INFO`.
+            // See: https://github.com/neondatabase/neon/issues/1683.
+            if root_cause_io_err_kind == Some(io::ErrorKind::ConnectionReset) {
+                info!("Postgres client disconnected");
+                Ok(())
+            } else {
+                Err(err)
+            }
+        }
+    }
 }

 #[derive(Debug)]
@@ -350,6 +462,10 @@ impl PageServerHandler {
    ) -> anyhow::Result<()> {
        let _enter = info_span!("pagestream", timeline = %timelineid, tenant = %tenantid).entered();

+        // NOTE: pagerequests handler exits when connection is closed,
+        //       so there is no need to reset the association
+        thread_mgr::associate_with(Some(tenantid), Some(timelineid));
+
        // Check that the timeline exists
        let timeline = tenant_mgr::get_local_timeline_with_load(tenantid, timelineid)
            .context("Cannot load local timeline")?;
@@ -423,6 +539,98 @@ impl PageServerHandler {
        Ok(())
    }

+    fn handle_import_basebackup(
+        &self,
+        pgb: &mut PostgresBackend,
+        tenant_id: ZTenantId,
+        timeline_id: ZTimelineId,
+        base_lsn: Lsn,
+        _end_lsn: Lsn,
+    ) -> anyhow::Result<()> {
+        thread_mgr::associate_with(Some(tenant_id), Some(timeline_id));
+        let _enter =
+            info_span!("import basebackup", timeline = %timeline_id, tenant = %tenant_id).entered();
+
+        // Create empty timeline
+        info!("creating new timeline");
+        let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
+        let timeline = repo.create_empty_timeline(timeline_id, Lsn(0))?;
+        let repartition_distance = repo.get_checkpoint_distance();
+        let mut datadir_timeline =
+            DatadirTimeline::<LayeredRepository>::new(timeline, repartition_distance);
+
+        // TODO mark timeline as not ready until it reaches end_lsn.
+        // We might have some wal to import as well, and we should prevent compute
+        // from connecting before that and writing conflicting wal.
+        //
+        // This is not relevant for pageserver->pageserver migrations, since there's
+        // no wal to import. But should be fixed if we want to import from postgres.
+
+        // TODO leave clean state on error. For now you can use detach to clean
+        // up broken state from a failed import.
+
+        // Import basebackup provided via CopyData
+        info!("importing basebackup");
+        pgb.write_message(&BeMessage::CopyInResponse)?;
+        let reader = CopyInReader::new(pgb);
+        import_basebackup_from_tar(&mut datadir_timeline, reader, base_lsn)?;
+
+        // TODO check checksum
+        // Meanwhile you can verify client-side by taking fullbackup
+        // and checking that it matches in size with what was imported.
+        // It wouldn't work if base came from vanilla postgres though,
+        // since we discard some log files.
+
+        // Flush data to disk, then upload to s3
+        info!("flushing layers");
+        datadir_timeline.tline.checkpoint(CheckpointConfig::Flush)?;
+
+        info!("done");
+        Ok(())
+    }
+
+    fn handle_import_wal(
+        &self,
+        pgb: &mut PostgresBackend,
+        tenant_id: ZTenantId,
+        timeline_id: ZTimelineId,
+        start_lsn: Lsn,
+        end_lsn: Lsn,
+    ) -> anyhow::Result<()> {
+        thread_mgr::associate_with(Some(tenant_id), Some(timeline_id));
+        let _enter =
+            info_span!("import wal", timeline = %timeline_id, tenant = %tenant_id).entered();
+
+        let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
+        let timeline = repo.get_timeline_load(timeline_id)?;
+        ensure!(timeline.get_last_record_lsn() == start_lsn);
+
+        let repartition_distance = repo.get_checkpoint_distance();
+        let mut datadir_timeline =
+            DatadirTimeline::<LayeredRepository>::new(timeline, repartition_distance);
+
+        // TODO leave clean state on error. For now you can use detach to clean
+        // up broken state from a failed import.
+
+        // Import wal provided via CopyData
+        info!("importing wal");
+        pgb.write_message(&BeMessage::CopyInResponse)?;
+        let reader = CopyInReader::new(pgb);
+        import_wal_from_tar(&mut datadir_timeline, reader, start_lsn, end_lsn)?;
+
+        // TODO Does it make sense to overshoot?
+        ensure!(datadir_timeline.tline.get_last_record_lsn() >= end_lsn);
+
+        // Flush data to disk, then upload to s3. No need for a forced checkpoint.
+        // We only want to persist the data, and it doesn't matter if it's in the
+        // shape of deltas or images.
+        info!("flushing layers");
+        datadir_timeline.tline.checkpoint(CheckpointConfig::Flush)?;
+
+        info!("done");
+        Ok(())
+    }
+
    /// Helper function to handle the LSN from client request.
    ///
    /// Each GetPage (and Exists and Nblocks) request includes information about
@@ -572,6 +780,7 @@ impl PageServerHandler {
        timelineid: ZTimelineId,
        lsn: Option<Lsn>,
        tenantid: ZTenantId,
+        full_backup: bool,
    ) -> anyhow::Result<()> {
        let span = info_span!("basebackup", timeline = %timelineid, tenant = %tenantid, lsn = field::Empty);
        let _enter = span.enter();
@@ -594,7 +803,7 @@ impl PageServerHandler {
        {
            let mut writer = CopyDataSink { pgb };

-            let basebackup = basebackup::Basebackup::new(&mut writer, &timeline, lsn)?;
+            let basebackup = basebackup::Basebackup::new(&mut writer, &timeline, lsn, full_backup)?;
            span.record("lsn", &basebackup.lsn.to_string().as_str());
            basebackup.send_tarball()?;
        }
@@ -612,7 +821,7 @@ impl PageServerHandler {
            return Ok(());
        }
        // auth is some, just checked above, when auth is some
-        // then claims are always present because of checks during connetion init
+        // then claims are always present because of checks during connection init
        // so this expect won't trigger
        let claims = self
            .claims
@@ -652,6 +861,10 @@ impl postgres_backend::Handler for PageServerHandler {
        Ok(())
    }

+    fn is_shutdown_requested(&self) -> bool {
+        thread_mgr::is_shutdown_requested()
+    }
+
    fn process_query(
        &mut self,
        pgb: &mut PostgresBackend,
@@ -693,31 +906,74 @@ impl postgres_backend::Handler for PageServerHandler {
            };

            // Check that the timeline exists
-            self.handle_basebackup_request(pgb, timelineid, lsn, tenantid)?;
+            self.handle_basebackup_request(pgb, timelineid, lsn, tenantid, false)?;
            pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
-        } else if query_string.starts_with("callmemaybe ") {
-            // callmemaybe <zenith tenantid as hex string> <zenith timelineid as hex string> <connstr>
-            // TODO lazy static
-            let re = Regex::new(r"^callmemaybe ([[:xdigit:]]+) ([[:xdigit:]]+) (.*)$").unwrap();
-            let caps = re
-                .captures(query_string)
-                .with_context(|| format!("invalid callmemaybe: '{}'", query_string))?;
+        }
+        // same as basebackup, but result includes relational data as well
+        else if query_string.starts_with("fullbackup ") {
+            let (_, params_raw) = query_string.split_at("fullbackup ".len());
+            let params = params_raw.split_whitespace().collect::<Vec<_>>();

-            let tenantid = ZTenantId::from_str(caps.get(1).unwrap().as_str())?;
-            let timelineid = ZTimelineId::from_str(caps.get(2).unwrap().as_str())?;
-            let connstr = caps.get(3).unwrap().as_str().to_owned();
+            ensure!(
+                params.len() == 3,
+                "invalid param number for fullbackup command"
+            );
+
+            let tenantid = ZTenantId::from_str(params[0])?;
+            let timelineid = ZTimelineId::from_str(params[1])?;

            self.check_permission(Some(tenantid))?;

-            let _enter =
-                info_span!("callmemaybe", timeline = %timelineid, tenant = %tenantid).entered();
+            // Lsn is required for fullbackup, because otherwise we would not know
+            // at which lsn to upload this backup.
+            //
+            // The caller is responsible for providing a valid lsn
+            // and using it in the subsequent import.
+            let lsn = Some(Lsn::from_str(params[2])?);

            // Check that the timeline exists
-            tenant_mgr::get_local_timeline_with_load(tenantid, timelineid)
-                .context("Cannot load local timeline")?;
+            self.handle_basebackup_request(pgb, timelineid, lsn, tenantid, true)?;
+            pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
+        } else if query_string.starts_with("import basebackup ") {
+            // Import the `base` section (everything but the wal) of a basebackup.
+            // Assumes the tenant already exists on this pageserver.
+            //
+            // Files are scheduled to be persisted to remote storage, and the
+            // caller should poll the http api to check when that is done.
+            //
+            // Example import command:
+            // 1. Get start/end LSN from backup_manifest file
+            // 2. Run:
+            // cat my_backup/base.tar | psql -h $PAGESERVER \
+            //     -c "import basebackup $TENANT $TIMELINE $START_LSN $END_LSN"
+            let (_, params_raw) = query_string.split_at("import basebackup ".len());
+            let params = params_raw.split_whitespace().collect::<Vec<_>>();
+            ensure!(params.len() == 4);
+            let tenant = ZTenantId::from_str(params[0])?;
+            let timeline = ZTimelineId::from_str(params[1])?;
+            let base_lsn = Lsn::from_str(params[2])?;
+            let end_lsn = Lsn::from_str(params[3])?;

-            walreceiver::launch_wal_receiver(self.conf, tenantid, timelineid, &connstr)?;
+            self.check_permission(Some(tenant))?;

+            self.handle_import_basebackup(pgb, tenant, timeline, base_lsn, end_lsn)?;
+            pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
+        } else if query_string.starts_with("import wal ") {
+            // Import the `pg_wal` section of a basebackup.
+            //
+            // Files are scheduled to be persisted to remote storage, and the
+            // caller should poll the http api to check when that is done.
+            let (_, params_raw) = query_string.split_at("import wal ".len());
+            let params = params_raw.split_whitespace().collect::<Vec<_>>();
+            ensure!(params.len() == 4);
+            let tenant = ZTenantId::from_str(params[0])?;
+            let timeline = ZTimelineId::from_str(params[1])?;
+            let start_lsn = Lsn::from_str(params[2])?;
+            let end_lsn = Lsn::from_str(params[3])?;
+
+            self.check_permission(Some(tenant))?;
+
+            self.handle_import_wal(pgb, tenant, timeline, start_lsn, end_lsn)?;
            pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
        } else if query_string.to_ascii_lowercase().starts_with("set ") {
            // important because psycopg2 executes "SET datestyle TO 'ISO'"
@@ -806,7 +1062,6 @@ impl postgres_backend::Handler for PageServerHandler {
                .map(|h| h.as_str().parse())
                .unwrap_or_else(|| Ok(repo.get_gc_horizon()))?;

-            let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
            // Use tenant's pitr setting
            let pitr = repo.get_pitr_interval();
            let result = repo.gc_iteration(Some(timelineid), gc_horizon, pitr, true)?;
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -521,7 +521,7 @@ pub struct DatadirModification<'a, R: Repository> {

    lsn: Lsn,

-    // The modifications are not applied directly to the underyling key-value store.
+    // The modifications are not applied directly to the underlying key-value store.
    // The put-functions add the modifications here, and they are flushed to the
    // underlying key-value store by the 'finish' function.
    pending_updates: HashMap<Key, Value>,
@@ -749,6 +749,7 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
    }

    /// Extend relation
+    /// If new size is smaller, do nothing.
    pub fn put_rel_extend(&mut self, rel: RelTag, nblocks: BlockNumber) -> Result<()> {
        ensure!(rel.relnode != 0, "invalid relnode");

@@ -756,10 +757,13 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
        let size_key = rel_size_to_key(rel);
        let old_size = self.get(size_key)?.get_u32_le();

-        let buf = nblocks.to_le_bytes();
-        self.put(size_key, Value::Image(Bytes::from(buf.to_vec())));
+        // only extend relation here. never decrease the size
+        if nblocks > old_size {
+            let buf = nblocks.to_le_bytes();
+            self.put(size_key, Value::Image(Bytes::from(buf.to_vec())));

-        self.pending_nblocks += nblocks as isize - old_size as isize;
+            self.pending_nblocks += nblocks as isize - old_size as isize;
+        }
        Ok(())
    }

--- a/pageserver/src/reltag.rs
+++ b/pageserver/src/reltag.rs
@@ -3,7 +3,7 @@ use std::cmp::Ordering;
 use std::fmt;

 use postgres_ffi::relfile_utils::forknumber_to_name;
-use postgres_ffi::Oid;
+use postgres_ffi::{pg_constants, Oid};

 ///
 /// Relation data file segment id throughout the Postgres cluster.
@@ -75,6 +75,30 @@ impl fmt::Display for RelTag {
    }
 }

+impl RelTag {
+    pub fn to_segfile_name(&self, segno: u32) -> String {
+        let mut name = if self.spcnode == pg_constants::GLOBALTABLESPACE_OID {
+            "global/".to_string()
+        } else {
+            format!("base/{}/", self.dbnode)
+        };
+
+        name += &self.relnode.to_string();
+
+        if let Some(fork_name) = forknumber_to_name(self.forknum) {
+            name += "_";
+            name += fork_name;
+        }
+
+        if segno != 0 {
+            name += ".";
+            name += &segno.to_string();
+        }
+
+        name
+    }
+}
+
 ///
 /// Non-relation transaction status files (clog (a.k.a. pg_xact) and
 /// pg_multixact) in Postgres are handled by SLRU (Simple LRU) buffer,
--- a/pageserver/src/remote_storage/storage_sync/delete.rs
+++ b/pageserver/src/remote_storage/storage_sync/delete.rs
@@ -1,223 +0,0 @@
-//! Timeline synchrnonization logic to delete a bulk of timeline's remote files from the remote storage.
-
-use anyhow::Context;
-use futures::stream::{FuturesUnordered, StreamExt};
-use tracing::{debug, error, info};
-use utils::zid::ZTenantTimelineId;
-
-use crate::remote_storage::{
-    storage_sync::{SyncQueue, SyncTask},
-    RemoteStorage,
-};
-
-use super::{LayersDeletion, SyncData};
-
-/// Attempts to remove the timleline layers from the remote storage.
-/// If the task had not adjusted the metadata before, the deletion will fail.
-pub(super) async fn delete_timeline_layers<'a, P, S>(
-    storage: &'a S,
-    sync_queue: &SyncQueue,
-    sync_id: ZTenantTimelineId,
-    mut delete_data: SyncData<LayersDeletion>,
-) -> bool
-where
-    P: std::fmt::Debug + Send + Sync + 'static,
-    S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
-{
-    if !delete_data.data.deletion_registered {
-        error!("Cannot delete timeline layers before the deletion metadata is not registered, reenqueueing");
-        delete_data.retries += 1;
-        sync_queue.push(sync_id, SyncTask::Delete(delete_data));
-        return false;
-    }
-
-    if delete_data.data.layers_to_delete.is_empty() {
-        info!("No layers to delete, skipping");
-        return true;
-    }
-
-    let layers_to_delete = delete_data
-        .data
-        .layers_to_delete
-        .drain()
-        .collect::<Vec<_>>();
-    debug!("Layers to delete: {layers_to_delete:?}");
-    info!("Deleting {} timeline layers", layers_to_delete.len());
-
-    let mut delete_tasks = layers_to_delete
-        .into_iter()
-        .map(|local_layer_path| async {
-            let storage_path = match storage.storage_path(&local_layer_path).with_context(|| {
-                format!(
-                    "Failed to get the layer storage path for local path '{}'",
-                    local_layer_path.display()
-                )
-            }) {
-                Ok(path) => path,
-                Err(e) => return Err((e, local_layer_path)),
-            };
-
-            match storage.delete(&storage_path).await.with_context(|| {
-                format!(
-                    "Failed to delete remote layer from storage at '{:?}'",
-                    storage_path
-                )
-            }) {
-                Ok(()) => Ok(local_layer_path),
-                Err(e) => Err((e, local_layer_path)),
-            }
-        })
-        .collect::<FuturesUnordered<_>>();
-
-    let mut errored = false;
-    while let Some(deletion_result) = delete_tasks.next().await {
-        match deletion_result {
-            Ok(local_layer_path) => {
-                debug!(
-                    "Successfully deleted layer {} for timeline {sync_id}",
-                    local_layer_path.display()
-                );
-                delete_data.data.deleted_layers.insert(local_layer_path);
-            }
-            Err((e, local_layer_path)) => {
-                errored = true;
-                error!(
-                    "Failed to delete layer {} for timeline {sync_id}: {e:?}",
-                    local_layer_path.display()
-                );
-                delete_data.data.layers_to_delete.insert(local_layer_path);
-            }
-        }
-    }
-
-    if errored {
-        debug!("Reenqueuing failed delete task for timeline {sync_id}");
-        delete_data.retries += 1;
-        sync_queue.push(sync_id, SyncTask::Delete(delete_data));
-    }
-    errored
-}
-
-#[cfg(test)]
-mod tests {
-    use std::{collections::HashSet, num::NonZeroUsize};
-
-    use itertools::Itertools;
-    use tempfile::tempdir;
-    use tokio::fs;
-    use utils::lsn::Lsn;
-
-    use crate::{
-        remote_storage::{
-            storage_sync::test_utils::{create_local_timeline, dummy_metadata},
-            LocalFs,
-        },
-        repository::repo_harness::{RepoHarness, TIMELINE_ID},
-    };
-
-    use super::*;
-
-    #[tokio::test]
-    async fn delete_timeline_negative() -> anyhow::Result<()> {
-        let harness = RepoHarness::create("delete_timeline_negative")?;
-        let (sync_queue, _) = SyncQueue::new(NonZeroUsize::new(100).unwrap());
-        let sync_id = ZTenantTimelineId::new(harness.tenant_id, TIMELINE_ID);
-        let storage = LocalFs::new(tempdir()?.path().to_path_buf(), &harness.conf.workdir)?;
-
-        let deleted = delete_timeline_layers(
-            &storage,
-            &sync_queue,
-            sync_id,
-            SyncData {
-                retries: 1,
-                data: LayersDeletion {
-                    deleted_layers: HashSet::new(),
-                    layers_to_delete: HashSet::new(),
-                    deletion_registered: false,
-                },
-            },
-        )
-        .await;
-
-        assert!(
-            !deleted,
-            "Should not start the deletion for task with delete metadata unregistered"
-        );
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn delete_timeline() -> anyhow::Result<()> {
-        let harness = RepoHarness::create("delete_timeline")?;
-        let (sync_queue, _) = SyncQueue::new(NonZeroUsize::new(100).unwrap());
-
-        let sync_id = ZTenantTimelineId::new(harness.tenant_id, TIMELINE_ID);
-        let layer_files = ["a", "b", "c", "d"];
-        let storage = LocalFs::new(tempdir()?.path().to_path_buf(), &harness.conf.workdir)?;
-        let current_retries = 3;
-        let metadata = dummy_metadata(Lsn(0x30));
-        let local_timeline_path = harness.timeline_path(&TIMELINE_ID);
-        let timeline_upload =
-            create_local_timeline(&harness, TIMELINE_ID, &layer_files, metadata.clone()).await?;
-        for local_path in timeline_upload.layers_to_upload {
-            let remote_path = storage.storage_path(&local_path)?;
-            let remote_parent_dir = remote_path.parent().unwrap();
-            if !remote_parent_dir.exists() {
-                fs::create_dir_all(&remote_parent_dir).await?;
-            }
-            fs::copy(&local_path, &remote_path).await?;
-        }
-        assert_eq!(
-            storage
-                .list()
-                .await?
-                .into_iter()
-                .map(|remote_path| storage.local_path(&remote_path).unwrap())
-                .filter_map(|local_path| { Some(local_path.file_name()?.to_str()?.to_owned()) })
-                .sorted()
-                .collect::<Vec<_>>(),
-            layer_files
-                .iter()
-                .map(|layer_str| layer_str.to_string())
-                .sorted()
-                .collect::<Vec<_>>(),
-            "Expect to have all layer files remotely before deletion"
-        );
-
-        let deleted = delete_timeline_layers(
-            &storage,
-            &sync_queue,
-            sync_id,
-            SyncData {
-                retries: current_retries,
-                data: LayersDeletion {
-                    deleted_layers: HashSet::new(),
-                    layers_to_delete: HashSet::from([
-                        local_timeline_path.join("a"),
-                        local_timeline_path.join("c"),
-                        local_timeline_path.join("something_different"),
-                    ]),
-                    deletion_registered: true,
-                },
-            },
-        )
-        .await;
-        assert!(deleted, "Should be able to delete timeline files");
-
-        assert_eq!(
-            storage
-                .list()
-                .await?
-                .into_iter()
-                .map(|remote_path| storage.local_path(&remote_path).unwrap())
-                .filter_map(|local_path| { Some(local_path.file_name()?.to_str()?.to_owned()) })
-                .sorted()
-                .collect::<Vec<_>>(),
-            vec!["b".to_string(), "d".to_string()],
-            "Expect to have only non-deleted files remotely"
-        );
-
-        Ok(())
-    }
-}
--- a/pageserver/src/repository.rs
+++ b/pageserver/src/repository.rs
@@ -19,7 +19,7 @@ use utils::{
 #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize)]
 /// Key used in the Repository kv-store.
 ///
-/// The Repository treates this as an opaque struct, but see the code in pgdatadir_mapping.rs
+/// The Repository treats this as an opaque struct, but see the code in pgdatadir_mapping.rs
 /// for what we actually store in these fields.
 pub struct Key {
    pub field1: u8,
@@ -195,8 +195,9 @@ impl Display for TimelineSyncStatusUpdate {
        f.write_str(s)
    }
 }
+
 ///
-/// A repository corresponds to one .zenith directory. One repository holds multiple
+/// A repository corresponds to one .neon directory. One repository holds multiple
 /// timelines, forked off from the same initial call to 'initdb'.
 pub trait Repository: Send + Sync {
    type Timeline: Timeline;
@@ -210,7 +211,7 @@ pub trait Repository: Send + Sync {
    ) -> Result<()>;

    /// Get Timeline handle for given zenith timeline ID.
-    /// This function is idempotent. It doesnt change internal state in any way.
+    /// This function is idempotent. It doesn't change internal state in any way.
    fn get_timeline(&self, timelineid: ZTimelineId) -> Option<RepositoryTimeline<Self::Timeline>>;

    /// Get Timeline handle for locally available timeline. Load it into memory if it is not loaded.
@@ -242,7 +243,7 @@ pub trait Repository: Send + Sync {
    ///
    /// 'timelineid' specifies the timeline to GC, or None for all.
    /// `horizon` specifies delta from last lsn to preserve all object versions (pitr interval).
-    /// `checkpoint_before_gc` parameter is used to force compaction of storage before CG
+    /// `checkpoint_before_gc` parameter is used to force compaction of storage before GC
    /// to make tests more deterministic.
    /// TODO Do we still need it or we can call checkpoint explicitly in tests where needed?
    fn gc_iteration(
@@ -345,11 +346,11 @@ pub trait Timeline: Send + Sync {

    /// Look up given page version.
    ///
-    /// NOTE: It is considerd an error to 'get' a key that doesn't exist. The abstraction
+    /// NOTE: It is considered an error to 'get' a key that doesn't exist. The abstraction
    /// above this needs to store suitable metadata to track what data exists with
    /// what keys, in separate metadata entries. If a non-existent key is requested,
-    /// the Repository implementation may incorrectly return a value from an ancestore
-    /// branch, for exampel, or waste a lot of cycles chasing the non-existing key.
+    /// the Repository implementation may incorrectly return a value from an ancestor
+    /// branch, for example, or waste a lot of cycles chasing the non-existing key.
    ///
    fn get(&self, key: Key, lsn: Lsn) -> Result<Bytes>;

@@ -469,6 +470,9 @@ pub mod repo_harness {
                gc_period: Some(tenant_conf.gc_period),
                image_creation_threshold: Some(tenant_conf.image_creation_threshold),
                pitr_interval: Some(tenant_conf.pitr_interval),
+                walreceiver_connect_timeout: Some(tenant_conf.walreceiver_connect_timeout),
+                lagging_wal_timeout: Some(tenant_conf.lagging_wal_timeout),
+                max_lsn_wal_lag: Some(tenant_conf.max_lsn_wal_lag),
            }
        }
    }
--- a/pageserver/src/storage_sync.rs
+++ b/pageserver/src/storage_sync.rs
@@ -69,7 +69,7 @@
 //! Yet instead of keeping the `metadata` file remotely, we wrap it with more data in [`IndexPart`], containing the list of remote files.
 //! This file gets read to populate the cache, if the remote timeline data is missing from it and gets updated after every successful download.
 //! This way, we optimize S3 storage access by not running the `S3 list` command that could be expencive and slow: knowing both [`ZTenantId`] and [`ZTimelineId`],
-//! we can always reconstruct the path to the timeline, use this to get the same path on the remote storage and retrive its shard contents, if needed, same as any layer files.
+//! we can always reconstruct the path to the timeline, use this to get the same path on the remote storage and retrieve its shard contents, if needed, same as any layer files.
 //!
 //! By default, pageserver reads the remote storage index data only for timelines located locally, to synchronize those, if needed.
 //! Bulk index data download happens only initially, on pageserver startup. The rest of the remote storage stays unknown to pageserver and loaded on demand only,
@@ -96,7 +96,7 @@
 //! timeline uploads and downloads can happen concurrently, in no particular order due to incremental nature of the timeline layers.
 //! Deletion happens only after a successful upload only, otherwise the compaction output might make the timeline inconsistent until both tasks are fully processed without errors.
 //! Upload and download update the remote data (inmemory index and S3 json index part file) only after every layer is successfully synchronized, while the deletion task
-//! does otherwise: it requires to have the remote data updated first succesfully: blob files will be invisible to pageserver this way.
+//! does otherwise: it requires to have the remote data updated first successfully: blob files will be invisible to pageserver this way.
 //!
 //! During the loop startup, an initial [`RemoteTimelineIndex`] state is constructed via downloading and merging the index data for all timelines,
 //! present locally.
@@ -186,8 +186,8 @@ use crate::{
 };

 use metrics::{
-    register_histogram_vec, register_int_counter, register_int_gauge, HistogramVec, IntCounter,
-    IntGauge,
+    register_histogram_vec, register_int_counter, register_int_counter_vec, register_int_gauge,
+    HistogramVec, IntCounter, IntCounterVec, IntGauge,
 };
 use utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId};

@@ -208,14 +208,17 @@ lazy_static! {
    static ref IMAGE_SYNC_TIME: HistogramVec = register_histogram_vec!(
        "pageserver_remote_storage_image_sync_seconds",
        "Time took to synchronize (download or upload) a whole pageserver image. \
-        Grouped by `operation_kind` (upload|download) and `status` (success|failure)",
-        &["operation_kind", "status"],
-        vec![
-            0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0, 6.0, 7.0,
-            8.0, 9.0, 10.0, 12.5, 15.0, 17.5, 20.0
-        ]
+        Grouped by tenant and timeline ids, `operation_kind` (upload|download) and `status` (success|failure)",
+        &["tenant_id", "timeline_id", "operation_kind", "status"],
+        vec![0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 3.0, 10.0, 20.0]
    )
    .expect("failed to register pageserver image sync time histogram vec");
+    static ref REMOTE_INDEX_UPLOAD: IntCounterVec = register_int_counter_vec!(
+        "pageserver_remote_storage_remote_index_uploads_total",
+        "Number of remote index uploads",
+        &["tenant_id", "timeline_id"],
+    )
+    .expect("failed to register pageserver remote index upload vec");
 }

 static SYNC_QUEUE: OnceCell<SyncQueue> = OnceCell::new();
@@ -440,7 +443,7 @@ fn collect_timeline_files(
    //   initial collect will fail because there is no metadata.
    //   We either need to start download if we see empty dir after restart or attach caller should
    //   be aware of that and retry attach if awaits_download for timeline switched from true to false
-    //   but timelinne didnt appear locally.
+    //   but timelinne didn't appear locally.
    //   Check what happens with remote index in that case.
    let timeline_metadata_path = match timeline_metadata_path {
        Some(path) => path,
@@ -892,7 +895,7 @@ fn storage_sync_loop<P, S>(

        REMAINING_SYNC_ITEMS.set(remaining_queue_length as i64);
        if remaining_queue_length > 0 || !batched_tasks.is_empty() {
-            info!("Processing tasks for {} timelines in batch, more tasks left to process: {remaining_queue_length}", batched_tasks.len());
+            debug!("Processing tasks for {} timelines in batch, more tasks left to process: {remaining_queue_length}", batched_tasks.len());
        } else {
            debug!("No tasks to process");
            continue;
@@ -1007,7 +1010,7 @@ where
    // in local (implicitly, via Lsn values and related memory state) or remote (explicitly via remote layer file paths) metadata.
    // When operating in a system without tasks failing over the error threshold,
    // current batching and task processing systems aim to update the layer set and metadata files (remote and local),
-    // without "loosing" such layer files.
+    // without "losing" such layer files.
    let (upload_result, status_update) = tokio::join!(
        async {
            if let Some(upload_data) = upload_data {
@@ -1146,30 +1149,30 @@ where
    .await
    {
        DownloadedTimeline::Abort => {
-            register_sync_status(sync_start, task_name, None);
+            register_sync_status(sync_id, sync_start, task_name, None);
            if let Err(e) = index.write().await.set_awaits_download(&sync_id, false) {
                error!("Timeline {sync_id} was expected to be in the remote index after a download attempt, but it's absent: {e:?}");
            }
        }
        DownloadedTimeline::FailedAndRescheduled => {
-            register_sync_status(sync_start, task_name, Some(false));
+            register_sync_status(sync_id, sync_start, task_name, Some(false));
        }
        DownloadedTimeline::Successful(mut download_data) => {
            match update_local_metadata(conf, sync_id, current_remote_timeline).await {
                Ok(()) => match index.write().await.set_awaits_download(&sync_id, false) {
                    Ok(()) => {
-                        register_sync_status(sync_start, task_name, Some(true));
+                        register_sync_status(sync_id, sync_start, task_name, Some(true));
                        return Some(TimelineSyncStatusUpdate::Downloaded);
                    }
                    Err(e) => {
-                        error!("Timeline {sync_id} was expected to be in the remote index after a sucessful download, but it's absent: {e:?}");
+                        error!("Timeline {sync_id} was expected to be in the remote index after a successful download, but it's absent: {e:?}");
                    }
                },
                Err(e) => {
                    error!("Failed to update local timeline metadata: {e:?}");
                    download_data.retries += 1;
                    sync_queue.push(sync_id, SyncTask::Download(download_data));
-                    register_sync_status(sync_start, task_name, Some(false));
+                    register_sync_status(sync_id, sync_start, task_name, Some(false));
                }
            }
        }
@@ -1186,7 +1189,7 @@ async fn update_local_metadata(
    let remote_metadata = match remote_timeline {
        Some(timeline) => &timeline.metadata,
        None => {
-            info!("No remote timeline to update local metadata from, skipping the update");
+            debug!("No remote timeline to update local metadata from, skipping the update");
            return Ok(());
        }
    };
@@ -1265,14 +1268,14 @@ async fn delete_timeline_data<P, S>(
            error!("Failed to update remote timeline {sync_id}: {e:?}");
            new_delete_data.retries += 1;
            sync_queue.push(sync_id, SyncTask::Delete(new_delete_data));
-            register_sync_status(sync_start, task_name, Some(false));
+            register_sync_status(sync_id, sync_start, task_name, Some(false));
            return;
        }
    }
    timeline_delete.deletion_registered = true;

    let sync_status = delete_timeline_layers(storage, sync_queue, sync_id, new_delete_data).await;
-    register_sync_status(sync_start, task_name, Some(sync_status));
+    register_sync_status(sync_id, sync_start, task_name, Some(sync_status));
 }

 async fn read_metadata_file(metadata_path: &Path) -> anyhow::Result<TimelineMetadata> {
@@ -1306,7 +1309,7 @@ async fn upload_timeline_data<P, S>(
    .await
    {
        UploadedTimeline::FailedAndRescheduled => {
-            register_sync_status(sync_start, task_name, Some(false));
+            register_sync_status(sync_id, sync_start, task_name, Some(false));
            return;
        }
        UploadedTimeline::Successful(upload_data) => upload_data,
@@ -1325,13 +1328,13 @@ async fn upload_timeline_data<P, S>(
    .await
    {
        Ok(()) => {
-            register_sync_status(sync_start, task_name, Some(true));
+            register_sync_status(sync_id, sync_start, task_name, Some(true));
        }
        Err(e) => {
            error!("Failed to update remote timeline {sync_id}: {e:?}");
            uploaded_data.retries += 1;
            sync_queue.push(sync_id, SyncTask::Upload(uploaded_data));
-            register_sync_status(sync_start, task_name, Some(false));
+            register_sync_status(sync_id, sync_start, task_name, Some(false));
        }
    }
 }
@@ -1421,7 +1424,14 @@ where
        IndexPart::from_remote_timeline(&timeline_path, updated_remote_timeline)
            .context("Failed to create an index part from the updated remote timeline")?;

-    info!("Uploading remote index for the timeline");
+    debug!("Uploading remote index for the timeline");
+    REMOTE_INDEX_UPLOAD
+        .with_label_values(&[
+            &sync_id.tenant_id.to_string(),
+            &sync_id.timeline_id.to_string(),
+        ])
+        .inc();
+
    upload_index_part(conf, storage, sync_id, new_index_part)
        .await
        .context("Failed to upload new index part")
@@ -1549,10 +1559,10 @@ fn compare_local_and_remote_timeline(
    let remote_files = remote_entry.stored_files();

    // TODO probably here we need more sophisticated logic,
-    //   if more data is available remotely can we just download whats there?
+    //   if more data is available remotely can we just download what's there?
    //   without trying to upload something. It may be tricky, needs further investigation.
    //   For now looks strange that we can request upload
-    //   and dowload for the same timeline simultaneously.
+    //   and download for the same timeline simultaneously.
    //   (upload needs to be only for previously unsynced files, not whole timeline dir).
    //   If one of the tasks fails they will be reordered in the queue which can lead
    //   to timeline being stuck in evicted state
@@ -1565,7 +1575,7 @@ fn compare_local_and_remote_timeline(
            }),
        ));
        (LocalTimelineInitStatus::NeedsSync, true)
-        // we do not need to manupulate with remote consistent lsn here
+        // we do not need to manipulate with remote consistent lsn here
        // because it will be updated when sync will be completed
    } else {
        (LocalTimelineInitStatus::LocallyComplete, false)
@@ -1590,12 +1600,24 @@ fn compare_local_and_remote_timeline(
    (initial_timeline_status, awaits_download)
 }

-fn register_sync_status(sync_start: Instant, sync_name: &str, sync_status: Option<bool>) {
+fn register_sync_status(
+    sync_id: ZTenantTimelineId,
+    sync_start: Instant,
+    sync_name: &str,
+    sync_status: Option<bool>,
+) {
    let secs_elapsed = sync_start.elapsed().as_secs_f64();
-    info!("Processed a sync task in {secs_elapsed:.2} seconds");
+    debug!("Processed a sync task in {secs_elapsed:.2} seconds");
+
+    let tenant_id = sync_id.tenant_id.to_string();
+    let timeline_id = sync_id.timeline_id.to_string();
    match sync_status {
-        Some(true) => IMAGE_SYNC_TIME.with_label_values(&[sync_name, "success"]),
-        Some(false) => IMAGE_SYNC_TIME.with_label_values(&[sync_name, "failure"]),
+        Some(true) => {
+            IMAGE_SYNC_TIME.with_label_values(&[&tenant_id, &timeline_id, sync_name, "success"])
+        }
+        Some(false) => {
+            IMAGE_SYNC_TIME.with_label_values(&[&tenant_id, &timeline_id, sync_name, "failure"])
+        }
        None => return,
    }
    .observe(secs_elapsed)
--- a/pageserver/src/storage_sync/delete.rs
+++ b/pageserver/src/storage_sync/delete.rs
@@ -1,4 +1,4 @@
-//! Timeline synchrnonization logic to delete a bulk of timeline's remote files from the remote storage.
+//! Timeline synchronization logic to delete a bulk of timeline's remote files from the remote storage.

 use anyhow::Context;
 use futures::stream::{FuturesUnordered, StreamExt};
--- a/pageserver/src/storage_sync/download.rs
+++ b/pageserver/src/storage_sync/download.rs
@@ -1,4 +1,4 @@
-//! Timeline synchrnonization logic to fetch the layer files from remote storage into pageserver's local directory.
+//! Timeline synchronization logic to fetch the layer files from remote storage into pageserver's local directory.

 use std::{collections::HashSet, fmt::Debug, path::Path};

--- a/pageserver/src/storage_sync/index.rs
+++ b/pageserver/src/storage_sync/index.rs
@@ -273,7 +273,7 @@ mod tests {
        };

        let index_part = IndexPart::from_remote_timeline(&timeline_path, remote_timeline.clone())
-            .expect("Correct remote timeline should be convertable to index part");
+            .expect("Correct remote timeline should be convertible to index part");

        assert_eq!(
            index_part.timeline_layers.iter().collect::<BTreeSet<_>>(),
@@ -305,7 +305,7 @@ mod tests {
        );

        let restored_timeline = RemoteTimeline::from_index_part(&timeline_path, index_part)
-            .expect("Correct index part should be convertable to remote timeline");
+            .expect("Correct index part should be convertible to remote timeline");

        let original_metadata = &remote_timeline.metadata;
        let restored_metadata = &restored_timeline.metadata;
--- a/pageserver/src/storage_sync/upload.rs
+++ b/pageserver/src/storage_sync/upload.rs
@@ -4,6 +4,7 @@ use std::{fmt::Debug, path::PathBuf};

 use anyhow::Context;
 use futures::stream::{FuturesUnordered, StreamExt};
+use lazy_static::lazy_static;
 use remote_storage::RemoteStorage;
 use tokio::fs;
 use tracing::{debug, error, info, warn};
@@ -17,6 +18,16 @@ use super::{
 use crate::{
    config::PageServerConf, layered_repository::metadata::metadata_path, storage_sync::SyncTask,
 };
+use metrics::{register_int_counter_vec, IntCounterVec};
+
+lazy_static! {
+    static ref NO_LAYERS_UPLOAD: IntCounterVec = register_int_counter_vec!(
+        "pageserver_remote_storage_no_layers_uploads_total",
+        "Number of skipped uploads due to no layers",
+        &["tenant_id", "timeline_id"],
+    )
+    .expect("failed to register pageserver no layers upload vec");
+}

 /// Serializes and uploads the given index part data to the remote storage.
 pub(super) async fn upload_index_part<P, S>(
@@ -102,7 +113,13 @@ where
        .collect::<Vec<_>>();

    if layers_to_upload.is_empty() {
-        info!("No layers to upload after filtering, aborting");
+        debug!("No layers to upload after filtering, aborting");
+        NO_LAYERS_UPLOAD
+            .with_label_values(&[
+                &sync_id.tenant_id.to_string(),
+                &sync_id.timeline_id.to_string(),
+            ])
+            .inc();
        return UploadedTimeline::Successful(upload_data);
    }

@@ -391,7 +408,7 @@ mod tests {
        assert_eq!(
            upload.metadata,
            Some(metadata),
-            "Successful upload should not chage its metadata"
+            "Successful upload should not change its metadata"
        );

        let storage_files = storage.list().await?;
--- a/pageserver/src/tenant_config.rs
+++ b/pageserver/src/tenant_config.rs
@@ -10,6 +10,7 @@
 //!
 use crate::config::PageServerConf;
 use serde::{Deserialize, Serialize};
+use std::num::NonZeroU64;
 use std::path::PathBuf;
 use std::time::Duration;
 use utils::zid::ZTenantId;
@@ -34,6 +35,9 @@ pub mod defaults {
    pub const DEFAULT_GC_PERIOD: &str = "100 s";
    pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
    pub const DEFAULT_PITR_INTERVAL: &str = "30 days";
+    pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "2 seconds";
+    pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
+    pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 10_000;
 }

 /// Per-tenant configuration options
@@ -68,6 +72,17 @@ pub struct TenantConf {
    // Page versions older than this are garbage collected away.
    #[serde(with = "humantime_serde")]
    pub pitr_interval: Duration,
+    /// Maximum amount of time to wait while opening a connection to receive wal, before erroring.
+    #[serde(with = "humantime_serde")]
+    pub walreceiver_connect_timeout: Duration,
+    /// Considers safekeepers stalled after no WAL updates were received longer than this threshold.
+    /// A stalled safekeeper will be changed to a newer one when it appears.
+    #[serde(with = "humantime_serde")]
+    pub lagging_wal_timeout: Duration,
+    /// Considers safekeepers lagging when their WAL is behind another safekeeper for more than this threshold.
+    /// A lagging safekeeper will be changed after `lagging_wal_timeout` time elapses since the last WAL update,
+    /// to avoid eager reconnects.
+    pub max_lsn_wal_lag: NonZeroU64,
 }

 /// Same as TenantConf, but this struct preserves the information about
@@ -85,6 +100,11 @@ pub struct TenantConfOpt {
    pub image_creation_threshold: Option<usize>,
    #[serde(with = "humantime_serde")]
    pub pitr_interval: Option<Duration>,
+    #[serde(with = "humantime_serde")]
+    pub walreceiver_connect_timeout: Option<Duration>,
+    #[serde(with = "humantime_serde")]
+    pub lagging_wal_timeout: Option<Duration>,
+    pub max_lsn_wal_lag: Option<NonZeroU64>,
 }

 impl TenantConfOpt {
@@ -108,6 +128,13 @@ impl TenantConfOpt {
                .image_creation_threshold
                .unwrap_or(global_conf.image_creation_threshold),
            pitr_interval: self.pitr_interval.unwrap_or(global_conf.pitr_interval),
+            walreceiver_connect_timeout: self
+                .walreceiver_connect_timeout
+                .unwrap_or(global_conf.walreceiver_connect_timeout),
+            lagging_wal_timeout: self
+                .lagging_wal_timeout
+                .unwrap_or(global_conf.lagging_wal_timeout),
+            max_lsn_wal_lag: self.max_lsn_wal_lag.unwrap_or(global_conf.max_lsn_wal_lag),
        }
    }

@@ -136,6 +163,15 @@ impl TenantConfOpt {
        if let Some(pitr_interval) = other.pitr_interval {
            self.pitr_interval = Some(pitr_interval);
        }
+        if let Some(walreceiver_connect_timeout) = other.walreceiver_connect_timeout {
+            self.walreceiver_connect_timeout = Some(walreceiver_connect_timeout);
+        }
+        if let Some(lagging_wal_timeout) = other.lagging_wal_timeout {
+            self.lagging_wal_timeout = Some(lagging_wal_timeout);
+        }
+        if let Some(max_lsn_wal_lag) = other.max_lsn_wal_lag {
+            self.max_lsn_wal_lag = Some(max_lsn_wal_lag);
+        }
    }
 }

@@ -155,6 +191,14 @@ impl TenantConf {
            image_creation_threshold: DEFAULT_IMAGE_CREATION_THRESHOLD,
            pitr_interval: humantime::parse_duration(DEFAULT_PITR_INTERVAL)
                .expect("cannot parse default PITR interval"),
+            walreceiver_connect_timeout: humantime::parse_duration(
+                DEFAULT_WALRECEIVER_CONNECT_TIMEOUT,
+            )
+            .expect("cannot parse default walreceiver connect timeout"),
+            lagging_wal_timeout: humantime::parse_duration(DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT)
+                .expect("cannot parse default walreceiver lagging wal timeout"),
+            max_lsn_wal_lag: NonZeroU64::new(DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG)
+                .expect("cannot parse default max walreceiver Lsn wal lag"),
        }
    }

@@ -175,6 +219,16 @@ impl TenantConf {
            gc_period: Duration::from_secs(10),
            image_creation_threshold: defaults::DEFAULT_IMAGE_CREATION_THRESHOLD,
            pitr_interval: Duration::from_secs(60 * 60),
+            walreceiver_connect_timeout: humantime::parse_duration(
+                defaults::DEFAULT_WALRECEIVER_CONNECT_TIMEOUT,
+            )
+            .unwrap(),
+            lagging_wal_timeout: humantime::parse_duration(
+                defaults::DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT,
+            )
+            .unwrap(),
+            max_lsn_wal_lag: NonZeroU64::new(defaults::DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG)
+                .unwrap(),
        }
    }
 }
--- a/pageserver/src/tenant_mgr.rs
+++ b/pageserver/src/tenant_mgr.rs
@@ -2,17 +2,16 @@
 //! page server.

 use crate::config::PageServerConf;
-use crate::layered_repository::LayeredRepository;
+use crate::layered_repository::{load_metadata, LayeredRepository};
 use crate::pgdatadir_mapping::DatadirTimeline;
 use crate::repository::{Repository, TimelineSyncStatusUpdate};
 use crate::storage_sync::index::RemoteIndex;
 use crate::storage_sync::{self, LocalTimelineInitStatus, SyncStartupData};
 use crate::tenant_config::TenantConfOpt;
-use crate::thread_mgr;
 use crate::thread_mgr::ThreadKind;
-use crate::timelines;
 use crate::timelines::CreateRepo;
 use crate::walredo::PostgresRedoManager;
+use crate::{thread_mgr, timelines, walreceiver};
 use crate::{DatadirTimelineImpl, RepositoryImpl};
 use anyhow::{bail, Context};
 use serde::{Deserialize, Serialize};
@@ -21,22 +20,30 @@ use std::collections::hash_map::Entry;
 use std::collections::HashMap;
 use std::fmt;
 use std::sync::Arc;
+use tokio::sync::mpsc;
 use tracing::*;
+use utils::lsn::Lsn;

-use utils::zid::{ZTenantId, ZTimelineId};
+use utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId};

 mod tenants_state {
+    use anyhow::ensure;
    use std::{
        collections::HashMap,
        sync::{RwLock, RwLockReadGuard, RwLockWriteGuard},
    };
+    use tokio::sync::mpsc;
+    use tracing::{debug, error};

    use utils::zid::ZTenantId;

-    use crate::tenant_mgr::Tenant;
+    use crate::tenant_mgr::{LocalTimelineUpdate, Tenant};

    lazy_static::lazy_static! {
        static ref TENANTS: RwLock<HashMap<ZTenantId, Tenant>> = RwLock::new(HashMap::new());
+        /// Sends updates to the local timelines (creation and deletion) to the WAL receiver,
+        /// so that it can enable/disable corresponding processes.
+        static ref TIMELINE_UPDATE_SENDER: RwLock<Option<mpsc::UnboundedSender<LocalTimelineUpdate>>> = RwLock::new(None);
    }

    pub(super) fn read_tenants() -> RwLockReadGuard<'static, HashMap<ZTenantId, Tenant>> {
@@ -50,6 +57,39 @@ mod tenants_state {
            .write()
            .expect("Failed to write() tenants lock, it got poisoned")
    }
+
+    pub(super) fn set_timeline_update_sender(
+        timeline_updates_sender: mpsc::UnboundedSender<LocalTimelineUpdate>,
+    ) -> anyhow::Result<()> {
+        let mut sender_guard = TIMELINE_UPDATE_SENDER
+            .write()
+            .expect("Failed to write() timeline_update_sender lock, it got poisoned");
+        ensure!(sender_guard.is_none(), "Timeline update sender already set");
+        *sender_guard = Some(timeline_updates_sender);
+        Ok(())
+    }
+
+    pub(super) fn try_send_timeline_update(update: LocalTimelineUpdate) {
+        match TIMELINE_UPDATE_SENDER
+            .read()
+            .expect("Failed to read() timeline_update_sender lock, it got poisoned")
+            .as_ref()
+        {
+            Some(sender) => {
+                if let Err(e) = sender.send(update) {
+                    error!("Failed to send timeline update: {}", e);
+                }
+            }
+            None => debug!("Timeline update sender is not enabled, cannot send update {update:?}"),
+        }
+    }
+
+    pub(super) fn stop_timeline_update_sender() {
+        TIMELINE_UPDATE_SENDER
+            .write()
+            .expect("Failed to write() timeline_update_sender lock, it got poisoned")
+            .take();
+    }
 }

 struct Tenant {
@@ -86,10 +126,10 @@ pub enum TenantState {
 impl fmt::Display for TenantState {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
-            TenantState::Active => f.write_str("Active"),
-            TenantState::Idle => f.write_str("Idle"),
-            TenantState::Stopping => f.write_str("Stopping"),
-            TenantState::Broken => f.write_str("Broken"),
+            Self::Active => f.write_str("Active"),
+            Self::Idle => f.write_str("Idle"),
+            Self::Stopping => f.write_str("Stopping"),
+            Self::Broken => f.write_str("Broken"),
        }
    }
 }
@@ -98,6 +138,11 @@ impl fmt::Display for TenantState {
 /// Timelines that are only partially available locally (remote storage has more data than this pageserver)
 /// are scheduled for download and added to the repository once download is completed.
 pub fn init_tenant_mgr(conf: &'static PageServerConf) -> anyhow::Result<RemoteIndex> {
+    let (timeline_updates_sender, timeline_updates_receiver) =
+        mpsc::unbounded_channel::<LocalTimelineUpdate>();
+    tenants_state::set_timeline_update_sender(timeline_updates_sender)?;
+    walreceiver::init_wal_receiver_main_thread(conf, timeline_updates_receiver)?;
+
    let SyncStartupData {
        remote_index,
        local_timeline_init_statuses,
@@ -112,16 +157,27 @@ pub fn init_tenant_mgr(conf: &'static PageServerConf) -> anyhow::Result<RemoteIn
            // loading a tenant is serious, but it's better to complete the startup and
            // serve other tenants, than fail completely.
            error!("Failed to initialize local tenant {tenant_id}: {:?}", err);
-            let mut m = tenants_state::write_tenants();
-            if let Some(tenant) = m.get_mut(&tenant_id) {
-                tenant.state = TenantState::Broken;
-            }
+            set_tenant_state(tenant_id, TenantState::Broken)?;
        }
    }

    Ok(remote_index)
 }

+pub enum LocalTimelineUpdate {
+    Detach(ZTenantTimelineId),
+    Attach(ZTenantTimelineId, Arc<DatadirTimelineImpl>),
+}
+
+impl std::fmt::Debug for LocalTimelineUpdate {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Self::Detach(ttid) => f.debug_tuple("Remove").field(ttid).finish(),
+            Self::Attach(ttid, _) => f.debug_tuple("Add").field(ttid).finish(),
+        }
+    }
+}
+
 /// Updates tenants' repositories, changing their timelines state in memory.
 pub fn apply_timeline_sync_status_updates(
    conf: &'static PageServerConf,
@@ -159,6 +215,7 @@ pub fn apply_timeline_sync_status_updates(
 /// Shut down all tenants. This runs as part of pageserver shutdown.
 ///
 pub fn shutdown_all_tenants() {
+    tenants_state::stop_timeline_update_sender();
    let mut m = tenants_state::write_tenants();
    let mut tenantids = Vec::new();
    for (tenantid, tenant) in m.iter_mut() {
@@ -172,7 +229,7 @@ pub fn shutdown_all_tenants() {
    }
    drop(m);

-    thread_mgr::shutdown_threads(Some(ThreadKind::WalReceiver), None, None);
+    thread_mgr::shutdown_threads(Some(ThreadKind::WalReceiverManager), None, None);
    thread_mgr::shutdown_threads(Some(ThreadKind::GarbageCollector), None, None);
    thread_mgr::shutdown_threads(Some(ThreadKind::Compactor), None, None);

@@ -246,32 +303,49 @@ pub fn get_tenant_state(tenantid: ZTenantId) -> Option<TenantState> {
    Some(tenants_state::read_tenants().get(&tenantid)?.state)
 }

-///
-/// Change the state of a tenant to Active and launch its compactor and GC
-/// threads. If the tenant was already in Active state or Stopping, does nothing.
-///
-pub fn activate_tenant(tenant_id: ZTenantId) -> anyhow::Result<()> {
+pub fn set_tenant_state(tenant_id: ZTenantId, new_state: TenantState) -> anyhow::Result<()> {
    let mut m = tenants_state::write_tenants();
    let tenant = m
        .get_mut(&tenant_id)
        .with_context(|| format!("Tenant not found for id {tenant_id}"))?;
+    let old_state = tenant.state;
+    tenant.state = new_state;
+    drop(m);

-    info!("activating tenant {tenant_id}");
-
-    match tenant.state {
-        // If the tenant is already active, nothing to do.
-        TenantState::Active => {}
-
-        // If it's Idle, launch the compactor and GC threads
-        TenantState::Idle => {
-            thread_mgr::spawn(
+    match (old_state, new_state) {
+        (TenantState::Broken, TenantState::Broken)
+        | (TenantState::Active, TenantState::Active)
+        | (TenantState::Idle, TenantState::Idle)
+        | (TenantState::Stopping, TenantState::Stopping) => {
+            debug!("tenant {tenant_id} already in state {new_state}");
+        }
+        (TenantState::Broken, ignored) => {
+            debug!("Ignoring {ignored} since tenant {tenant_id} is in broken state");
+        }
+        (_, TenantState::Broken) => {
+            debug!("Setting tenant {tenant_id} status to broken");
+        }
+        (TenantState::Stopping, ignored) => {
+            debug!("Ignoring {ignored} since tenant {tenant_id} is in stopping state");
+        }
+        (TenantState::Idle, TenantState::Active) => {
+            info!("activating tenant {tenant_id}");
+            let compactor_spawn_result = thread_mgr::spawn(
                ThreadKind::Compactor,
                Some(tenant_id),
                None,
                "Compactor thread",
                false,
                move || crate::tenant_threads::compact_loop(tenant_id),
-            )?;
+            );
+            if compactor_spawn_result.is_err() {
+                let mut m = tenants_state::write_tenants();
+                m.get_mut(&tenant_id)
+                    .with_context(|| format!("Tenant not found for id {tenant_id}"))?
+                    .state = old_state;
+                drop(m);
+            }
+            compactor_spawn_result?;

            let gc_spawn_result = thread_mgr::spawn(
                ThreadKind::GarbageCollector,
@@ -285,21 +359,31 @@ pub fn activate_tenant(tenant_id: ZTenantId) -> anyhow::Result<()> {
            .with_context(|| format!("Failed to launch GC thread for tenant {tenant_id}"));

            if let Err(e) = &gc_spawn_result {
+                let mut m = tenants_state::write_tenants();
+                m.get_mut(&tenant_id)
+                    .with_context(|| format!("Tenant not found for id {tenant_id}"))?
+                    .state = old_state;
+                drop(m);
                error!("Failed to start GC thread for tenant {tenant_id}, stopping its checkpointer thread: {e:?}");
                thread_mgr::shutdown_threads(Some(ThreadKind::Compactor), Some(tenant_id), None);
                return gc_spawn_result;
            }
-            tenant.state = TenantState::Active;
        }
-
-        TenantState::Stopping => {
-            // don't re-activate it if it's being stopped
+        (TenantState::Idle, TenantState::Stopping) => {
+            info!("stopping idle tenant {tenant_id}");
        }
-
-        TenantState::Broken => {
-            // cannot activate
+        (TenantState::Active, TenantState::Stopping | TenantState::Idle) => {
+            info!("stopping tenant {tenant_id} threads due to new state {new_state}");
+            thread_mgr::shutdown_threads(
+                Some(ThreadKind::WalReceiverManager),
+                Some(tenant_id),
+                None,
+            );
+            thread_mgr::shutdown_threads(Some(ThreadKind::GarbageCollector), Some(tenant_id), None);
+            thread_mgr::shutdown_threads(Some(ThreadKind::Compactor), Some(tenant_id), None);
        }
    }
+
    Ok(())
 }

@@ -324,15 +408,15 @@ pub fn get_local_timeline_with_load(
        .with_context(|| format!("Tenant {tenant_id} not found"))?;

    if let Some(page_tline) = tenant.local_timelines.get(&timeline_id) {
-        return Ok(Arc::clone(page_tline));
+        Ok(Arc::clone(page_tline))
+    } else {
+        let page_tline = load_local_timeline(&tenant.repo, timeline_id)
+            .with_context(|| format!("Failed to load local timeline for tenant {tenant_id}"))?;
+        tenant
+            .local_timelines
+            .insert(timeline_id, Arc::clone(&page_tline));
+        Ok(page_tline)
    }
-
-    let page_tline = new_local_timeline(&tenant.repo, timeline_id)
-        .with_context(|| format!("Failed to create new local timeline for tenant {tenant_id}"))?;
-    tenant
-        .local_timelines
-        .insert(timeline_id, Arc::clone(&page_tline));
-    Ok(page_tline)
 }

 pub fn detach_timeline(
@@ -350,6 +434,9 @@ pub fn detach_timeline(
                .detach_timeline(timeline_id)
                .context("Failed to detach inmem tenant timeline")?;
            tenant.local_timelines.remove(&timeline_id);
+            tenants_state::try_send_timeline_update(LocalTimelineUpdate::Detach(
+                ZTenantTimelineId::new(tenant_id, timeline_id),
+            ));
        }
        None => bail!("Tenant {tenant_id} not found in local tenant state"),
    }
@@ -365,7 +452,7 @@ pub fn detach_timeline(
    Ok(())
 }

-fn new_local_timeline(
+fn load_local_timeline(
    repo: &RepositoryImpl,
    timeline_id: ZTimelineId,
 ) -> anyhow::Result<Arc<DatadirTimeline<LayeredRepository>>> {
@@ -378,6 +465,12 @@ fn new_local_timeline(
        repartition_distance,
    ));
    page_tline.init_logical_size()?;
+
+    tenants_state::try_send_timeline_update(LocalTimelineUpdate::Attach(
+        ZTenantTimelineId::new(repo.tenant_id(), timeline_id),
+        Arc::clone(&page_tline),
+    ));
+
    Ok(page_tline)
 }

@@ -399,6 +492,26 @@ pub fn list_tenants() -> Vec<TenantInfo> {
        .collect()
 }

+/// Check if a given timeline is "broken" \[1\].
+/// The function returns an error if the timeline is "broken".
+///
+/// \[1\]: it's not clear now how should we classify a timeline as broken.
+/// A timeline is categorized as broken when any of following conditions is true:
+/// - failed to load the timeline's metadata
+/// - the timeline's disk consistent LSN is zero
+fn check_broken_timeline(repo: &LayeredRepository, timeline_id: ZTimelineId) -> anyhow::Result<()> {
+    let metadata = load_metadata(repo.conf, timeline_id, repo.tenant_id())
+        .context("failed to load metadata")?;
+
+    // A timeline with zero disk consistent LSN can happen when the page server
+    // failed to checkpoint the timeline import data when creating that timeline.
+    if metadata.disk_consistent_lsn() == Lsn::INVALID {
+        bail!("Timeline {timeline_id} has a zero disk consistent LSN.");
+    }
+
+    Ok(())
+}
+
 fn init_local_repository(
    conf: &'static PageServerConf,
    tenant_id: ZTenantId,
@@ -414,7 +527,13 @@ fn init_local_repository(
        match init_status {
            LocalTimelineInitStatus::LocallyComplete => {
                debug!("timeline {timeline_id} for tenant {tenant_id} is locally complete, registering it in repository");
-                status_updates.insert(timeline_id, TimelineSyncStatusUpdate::Downloaded);
+                if let Err(err) = check_broken_timeline(&repo, timeline_id) {
+                    info!(
+                        "Found a broken timeline {timeline_id} (err={err:?}), skip registering it in repository"
+                    );
+                } else {
+                    status_updates.insert(timeline_id, TimelineSyncStatusUpdate::Downloaded);
+                }
            }
            LocalTimelineInitStatus::NeedsSync => {
                debug!(
@@ -458,8 +577,8 @@ fn apply_timeline_remote_sync_status_updates(
                    bail!("Local timeline {timeline_id} already registered")
                }
                Entry::Vacant(v) => {
-                    v.insert(new_local_timeline(repo, timeline_id).with_context(|| {
-                        format!("Failed to register new local timeline for tenant {tenant_id}")
+                    v.insert(load_local_timeline(repo, timeline_id).with_context(|| {
+                        format!("Failed to register add local timeline for tenant {tenant_id}")
                    })?);
                }
            },
--- a/pageserver/src/thread_mgr.rs
+++ b/pageserver/src/thread_mgr.rs
@@ -91,8 +91,8 @@ pub enum ThreadKind {
    // associated with one later, after receiving a command from the client.
    PageRequestHandler,

-    // Thread that connects to a safekeeper to fetch WAL for one timeline.
-    WalReceiver,
+    // Main walreceiver manager thread that ensures that every timeline spawns a connection to safekeeper, to fetch WAL.
+    WalReceiverManager,

    // Thread that handles compaction of all timelines for a tenant.
    Compactor,
@@ -108,15 +108,21 @@ pub enum ThreadKind {
    StorageSync,
 }

+struct MutableThreadState {
+    /// Tenant and timeline that this thread is associated with.
+    tenant_id: Option<ZTenantId>,
+    timeline_id: Option<ZTimelineId>,
+
+    /// Handle for waiting for the thread to exit. It can be None, if the
+    /// the thread has already exited.
+    join_handle: Option<JoinHandle<()>>,
+}
+
 struct PageServerThread {
    _thread_id: u64,

    kind: ThreadKind,

-    /// Tenant and timeline that this thread is associated with.
-    tenant_id: Option<ZTenantId>,
-    timeline_id: Option<ZTimelineId>,
-
    name: String,

    // To request thread shutdown, set the flag, and send a dummy message to the
@@ -124,9 +130,7 @@ struct PageServerThread {
    shutdown_requested: AtomicBool,
    shutdown_tx: watch::Sender<()>,

-    /// Handle for waiting for the thread to exit. It can be None, if the
-    /// the thread has already exited.
-    join_handle: Mutex<Option<JoinHandle<()>>>,
+    mutable: Mutex<MutableThreadState>,
 }

 /// Launch a new thread
@@ -145,29 +149,27 @@ where
 {
    let (shutdown_tx, shutdown_rx) = watch::channel(());
    let thread_id = NEXT_THREAD_ID.fetch_add(1, Ordering::Relaxed);
-    let thread = PageServerThread {
+    let thread = Arc::new(PageServerThread {
        _thread_id: thread_id,
        kind,
-        tenant_id,
-        timeline_id,
        name: name.to_string(),
-
        shutdown_requested: AtomicBool::new(false),
        shutdown_tx,
-
-        join_handle: Mutex::new(None),
-    };
-
-    let thread_rc = Arc::new(thread);
-
-    let mut jh_guard = thread_rc.join_handle.lock().unwrap();
+        mutable: Mutex::new(MutableThreadState {
+            tenant_id,
+            timeline_id,
+            join_handle: None,
+        }),
+    });

    THREADS
        .lock()
        .unwrap()
-        .insert(thread_id, Arc::clone(&thread_rc));
+        .insert(thread_id, Arc::clone(&thread));

-    let thread_rc2 = Arc::clone(&thread_rc);
+    let mut thread_mut = thread.mutable.lock().unwrap();
+
+    let thread_cloned = Arc::clone(&thread);
    let thread_name = name.to_string();
    let join_handle = match thread::Builder::new()
        .name(name.to_string())
@@ -175,7 +177,7 @@ where
            thread_wrapper(
                thread_name,
                thread_id,
-                thread_rc2,
+                thread_cloned,
                shutdown_rx,
                shutdown_process_on_error,
                f,
@@ -189,8 +191,8 @@ where
            return Err(err);
        }
    };
-    *jh_guard = Some(join_handle);
-    drop(jh_guard);
+    thread_mut.join_handle = Some(join_handle);
+    drop(thread_mut);

    // The thread is now running. Nothing more to do here
    Ok(thread_id)
@@ -229,19 +231,20 @@ fn thread_wrapper<F>(
        .remove(&thread_id)
        .expect("no thread in registry");

+    let thread_mut = thread.mutable.lock().unwrap();
    match result {
        Ok(Ok(())) => debug!("Thread '{}' exited normally", thread_name),
        Ok(Err(err)) => {
            if shutdown_process_on_error {
                error!(
                    "Shutting down: thread '{}' tenant_id: {:?}, timeline_id: {:?} exited with error: {:?}",
-                    thread_name, thread.tenant_id, thread.timeline_id, err
+                    thread_name, thread_mut.tenant_id, thread_mut.timeline_id, err
                );
                shutdown_pageserver(1);
            } else {
                error!(
                    "Thread '{}' tenant_id: {:?}, timeline_id: {:?} exited with error: {:?}",
-                    thread_name, thread.tenant_id, thread.timeline_id, err
+                    thread_name, thread_mut.tenant_id, thread_mut.timeline_id, err
                );
            }
        }
@@ -249,19 +252,29 @@ fn thread_wrapper<F>(
            if shutdown_process_on_error {
                error!(
                    "Shutting down: thread '{}' tenant_id: {:?}, timeline_id: {:?} panicked: {:?}",
-                    thread_name, thread.tenant_id, thread.timeline_id, err
+                    thread_name, thread_mut.tenant_id, thread_mut.timeline_id, err
                );
                shutdown_pageserver(1);
            } else {
                error!(
                    "Thread '{}' tenant_id: {:?}, timeline_id: {:?} panicked: {:?}",
-                    thread_name, thread.tenant_id, thread.timeline_id, err
+                    thread_name, thread_mut.tenant_id, thread_mut.timeline_id, err
                );
            }
        }
    }
 }

+// expected to be called from the thread of the given id.
+pub fn associate_with(tenant_id: Option<ZTenantId>, timeline_id: Option<ZTimelineId>) {
+    CURRENT_THREAD.with(|ct| {
+        let borrowed = ct.borrow();
+        let mut thread_mut = borrowed.as_ref().unwrap().mutable.lock().unwrap();
+        thread_mut.tenant_id = tenant_id;
+        thread_mut.timeline_id = timeline_id;
+    });
+}
+
 /// Is there a thread running that matches the criteria

 /// Signal and wait for threads to shut down.
@@ -285,9 +298,10 @@ pub fn shutdown_threads(

    let threads = THREADS.lock().unwrap();
    for thread in threads.values() {
+        let thread_mut = thread.mutable.lock().unwrap();
        if (kind.is_none() || Some(thread.kind) == kind)
-            && (tenant_id.is_none() || thread.tenant_id == tenant_id)
-            && (timeline_id.is_none() || thread.timeline_id == timeline_id)
+            && (tenant_id.is_none() || thread_mut.tenant_id == tenant_id)
+            && (timeline_id.is_none() || thread_mut.timeline_id == timeline_id)
        {
            thread.shutdown_requested.store(true, Ordering::Relaxed);
            // FIXME: handle error?
@@ -298,8 +312,10 @@ pub fn shutdown_threads(
    drop(threads);

    for thread in victim_threads {
+        let mut thread_mut = thread.mutable.lock().unwrap();
        info!("waiting for {} to shut down", thread.name);
-        if let Some(join_handle) = thread.join_handle.lock().unwrap().take() {
+        if let Some(join_handle) = thread_mut.join_handle.take() {
+            drop(thread_mut);
            let _ = join_handle.join();
        } else {
            // The thread had not even fully started yet. Or it was shut down
--- a/pageserver/src/timelines.rs
+++ b/pageserver/src/timelines.rs
@@ -283,9 +283,9 @@ fn bootstrap_timeline<R: Repository>(
    tli: ZTimelineId,
    repo: &R,
 ) -> Result<()> {
-    let _enter = info_span!("bootstrapping", timeline = %tli, tenant = %tenantid).entered();
-
-    let initdb_path = conf.tenant_path(&tenantid).join("tmp");
+    let initdb_path = conf
+        .tenant_path(&tenantid)
+        .join(format!("tmp-timeline-{}", tli));

    // Init temporarily repo to get bootstrap data
    run_initdb(conf, &initdb_path)?;
@@ -300,10 +300,15 @@ fn bootstrap_timeline<R: Repository>(
    let timeline = repo.create_empty_timeline(tli, lsn)?;
    let mut page_tline: DatadirTimeline<R> = DatadirTimeline::new(timeline, u64::MAX);
    import_datadir::import_timeline_from_postgres_datadir(&pgdata_path, &mut page_tline, lsn)?;
+
+    fail::fail_point!("before-checkpoint-new-timeline", |_| {
+        bail!("failpoint before-checkpoint-new-timeline");
+    });
+
    page_tline.tline.checkpoint(CheckpointConfig::Forced)?;

-    println!(
-        "created initial timeline {} timeline.lsn {}",
+    info!(
+        "created root timeline {} timeline.lsn {}",
        tli,
        page_tline.tline.get_last_record_lsn()
    );
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -336,7 +336,7 @@ impl VirtualFile {
        // library RwLock doesn't allow downgrading without releasing the lock,
        // and that doesn't seem worth the trouble.
        //
-        // XXX: `parking_lot::RwLock` can enable such downgrades, yet its implemenation is fair and
+        // XXX: `parking_lot::RwLock` can enable such downgrades, yet its implementation is fair and
        // may deadlock on subsequent read calls.
        // Simply replacing all `RwLock` in project causes deadlocks, so use it sparingly.
        let result = STORAGE_IO_TIME
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -12,7 +12,7 @@
 //! The zenith Repository can store page versions in two formats: as
 //! page images, or a WAL records. WalIngest::ingest_record() extracts
 //! page images out of some WAL records, but most it stores as WAL
-//! records. If a WAL record modifies multple pages, WalIngest
+//! records. If a WAL record modifies multiple pages, WalIngest
 //! will call Repository::put_wal_record or put_page_image functions
 //! separately for each modified page.
 //!
--- a/pageserver/src/walreceiver.rs
+++ b/pageserver/src/walreceiver.rs
--- a/pageserver/src/walreceiver/connection_handler.rs
+++ b/pageserver/src/walreceiver/connection_handler.rs
@@ -0,0 +1,405 @@
+//! Actual Postgres connection handler to stream WAL to the server.
+//! Runs as a separate, cancellable Tokio task.
+use std::{
+    str::FromStr,
+    sync::Arc,
+    time::{Duration, SystemTime},
+};
+
+use anyhow::{bail, ensure, Context};
+use bytes::BytesMut;
+use fail::fail_point;
+use postgres::{SimpleQueryMessage, SimpleQueryRow};
+use postgres_ffi::waldecoder::WalStreamDecoder;
+use postgres_protocol::message::backend::ReplicationMessage;
+use postgres_types::PgLsn;
+use tokio::{pin, select, sync::watch, time};
+use tokio_postgres::{replication::ReplicationStream, Client};
+use tokio_stream::StreamExt;
+use tracing::{debug, error, info, info_span, trace, warn, Instrument};
+use utils::{
+    lsn::Lsn,
+    pq_proto::ReplicationFeedback,
+    zid::{NodeId, ZTenantTimelineId},
+};
+
+use crate::{
+    http::models::WalReceiverEntry,
+    repository::{Repository, Timeline},
+    tenant_mgr,
+    walingest::WalIngest,
+};
+
+#[derive(Debug, Clone)]
+pub enum WalConnectionEvent {
+    Started,
+    NewWal(ReplicationFeedback),
+    End(Result<(), String>),
+}
+
+/// A wrapper around standalone Tokio task, to poll its updates or cancel the task.
+#[derive(Debug)]
+pub struct WalReceiverConnection {
+    handle: tokio::task::JoinHandle<()>,
+    cancellation: watch::Sender<()>,
+    events_receiver: watch::Receiver<WalConnectionEvent>,
+}
+
+impl WalReceiverConnection {
+    /// Initializes the connection task, returning a set of handles on top of it.
+    /// The task is started immediately after the creation, fails if no connection is established during the timeout given.
+    pub fn open(
+        id: ZTenantTimelineId,
+        safekeeper_id: NodeId,
+        wal_producer_connstr: String,
+        connect_timeout: Duration,
+    ) -> Self {
+        let (cancellation, mut cancellation_receiver) = watch::channel(());
+        let (events_sender, events_receiver) = watch::channel(WalConnectionEvent::Started);
+
+        let handle = tokio::spawn(
+            async move {
+                let connection_result = handle_walreceiver_connection(
+                    id,
+                    &wal_producer_connstr,
+                    &events_sender,
+                    &mut cancellation_receiver,
+                    connect_timeout,
+                )
+                .await
+                .map_err(|e| {
+                    format!("Walreceiver connection for id {id} failed with error: {e:#}")
+                });
+
+                match &connection_result {
+                    Ok(()) => {
+                        debug!("Walreceiver connection for id {id} ended successfully")
+                    }
+                    Err(e) => warn!("{e}"),
+                }
+                events_sender
+                    .send(WalConnectionEvent::End(connection_result))
+                    .ok();
+            }
+            .instrument(info_span!("safekeeper_handle", sk = %safekeeper_id)),
+        );
+
+        Self {
+            handle,
+            cancellation,
+            events_receiver,
+        }
+    }
+
+    /// Polls for the next WAL receiver event, if there's any available since the last check.
+    /// Blocks if there's no new event available, returns `None` if no new events will ever occur.
+    /// Only the last event is returned, all events received between observatins are lost.
+    pub async fn next_event(&mut self) -> Option<WalConnectionEvent> {
+        match self.events_receiver.changed().await {
+            Ok(()) => Some(self.events_receiver.borrow().clone()),
+            Err(_cancellation_error) => None,
+        }
+    }
+
+    /// Gracefully aborts current WAL streaming task, waiting for the current WAL streamed.
+    pub async fn shutdown(&mut self) -> anyhow::Result<()> {
+        self.cancellation.send(()).ok();
+        let handle = &mut self.handle;
+        handle
+            .await
+            .context("Failed to join on a walreceiver connection task")?;
+        Ok(())
+    }
+}
+
+async fn handle_walreceiver_connection(
+    id: ZTenantTimelineId,
+    wal_producer_connstr: &str,
+    events_sender: &watch::Sender<WalConnectionEvent>,
+    cancellation: &mut watch::Receiver<()>,
+    connect_timeout: Duration,
+) -> anyhow::Result<()> {
+    // Connect to the database in replication mode.
+    info!("connecting to {wal_producer_connstr}");
+    let connect_cfg =
+        format!("{wal_producer_connstr} application_name=pageserver replication=true");
+
+    let (mut replication_client, connection) = time::timeout(
+        connect_timeout,
+        tokio_postgres::connect(&connect_cfg, postgres::NoTls),
+    )
+    .await
+    .context("Timed out while waiting for walreceiver connection to open")?
+    .context("Failed to open walreceiver conection")?;
+    // The connection object performs the actual communication with the database,
+    // so spawn it off to run on its own.
+    let mut connection_cancellation = cancellation.clone();
+    tokio::spawn(
+        async move {
+            info!("connected!");
+            select! {
+                    connection_result = connection => match connection_result{
+                            Ok(()) => info!("Walreceiver db connection closed"),
+                            Err(connection_error) => {
+                                if connection_error.is_closed() {
+                                    info!("Connection closed regularly: {connection_error}")
+                                } else {
+                                    warn!("Connection aborted: {connection_error}")
+                                }
+                            }
+                        },
+
+                    _ = connection_cancellation.changed() => info!("Connection cancelled"),
+            }
+        }
+        .instrument(info_span!("safekeeper_handle_db")),
+    );
+
+    // Immediately increment the gauge, then create a job to decrement it on task exit.
+    // One of the pros of `defer!` is that this will *most probably*
+    // get called, even in presence of panics.
+    let gauge = crate::LIVE_CONNECTIONS_COUNT.with_label_values(&["wal_receiver"]);
+    gauge.inc();
+    scopeguard::defer! {
+        gauge.dec();
+    }
+
+    let identify = identify_system(&mut replication_client).await?;
+    info!("{identify:?}");
+    let end_of_wal = Lsn::from(u64::from(identify.xlogpos));
+    let mut caught_up = false;
+    let ZTenantTimelineId {
+        tenant_id,
+        timeline_id,
+    } = id;
+
+    let (repo, timeline) = tokio::task::spawn_blocking(move || {
+        let repo = tenant_mgr::get_repository_for_tenant(tenant_id)
+            .with_context(|| format!("no repository found for tenant {tenant_id}"))?;
+        let timeline = tenant_mgr::get_local_timeline_with_load(tenant_id, timeline_id)
+            .with_context(|| {
+                format!("local timeline {timeline_id} not found for tenant {tenant_id}")
+            })?;
+        Ok::<_, anyhow::Error>((repo, timeline))
+    })
+    .await
+    .with_context(|| format!("Failed to spawn blocking task to get repository and timeline for tenant {tenant_id} timeline {timeline_id}"))??;
+
+    //
+    // Start streaming the WAL, from where we left off previously.
+    //
+    // If we had previously received WAL up to some point in the middle of a WAL record, we
+    // better start from the end of last full WAL record, not in the middle of one.
+    let mut last_rec_lsn = timeline.get_last_record_lsn();
+    let mut startpoint = last_rec_lsn;
+
+    if startpoint == Lsn(0) {
+        bail!("No previous WAL position");
+    }
+
+    // There might be some padding after the last full record, skip it.
+    startpoint += startpoint.calc_padding(8u32);
+
+    info!("last_record_lsn {last_rec_lsn} starting replication from {startpoint}, server is at {end_of_wal}...");
+
+    let query = format!("START_REPLICATION PHYSICAL {startpoint}");
+
+    let copy_stream = replication_client.copy_both_simple(&query).await?;
+    let physical_stream = ReplicationStream::new(copy_stream);
+    pin!(physical_stream);
+
+    let mut waldecoder = WalStreamDecoder::new(startpoint);
+
+    let mut walingest = WalIngest::new(timeline.as_ref(), startpoint)?;
+
+    while let Some(replication_message) = {
+        select! {
+            // check for shutdown first
+            biased;
+            _ = cancellation.changed() => {
+                info!("walreceiver interrupted");
+                None
+            }
+            replication_message = physical_stream.next() => replication_message,
+        }
+    } {
+        let replication_message = replication_message?;
+        let status_update = match replication_message {
+            ReplicationMessage::XLogData(xlog_data) => {
+                // Pass the WAL data to the decoder, and see if we can decode
+                // more records as a result.
+                let data = xlog_data.data();
+                let startlsn = Lsn::from(xlog_data.wal_start());
+                let endlsn = startlsn + data.len() as u64;
+
+                trace!("received XLogData between {startlsn} and {endlsn}");
+
+                waldecoder.feed_bytes(data);
+
+                while let Some((lsn, recdata)) = waldecoder.poll_decode()? {
+                    let _enter = info_span!("processing record", lsn = %lsn).entered();
+
+                    // It is important to deal with the aligned records as lsn in getPage@LSN is
+                    // aligned and can be several bytes bigger. Without this alignment we are
+                    // at risk of hitting a deadlock.
+                    ensure!(lsn.is_aligned());
+
+                    walingest.ingest_record(&timeline, recdata, lsn)?;
+
+                    fail_point!("walreceiver-after-ingest");
+
+                    last_rec_lsn = lsn;
+                }
+
+                if !caught_up && endlsn >= end_of_wal {
+                    info!("caught up at LSN {endlsn}");
+                    caught_up = true;
+                }
+
+                let timeline_to_check = Arc::clone(&timeline.tline);
+                tokio::task::spawn_blocking(move || timeline_to_check.check_checkpoint_distance())
+                    .await
+                    .with_context(|| {
+                        format!("Spawned checkpoint check task panicked for timeline {id}")
+                    })?
+                    .with_context(|| {
+                        format!("Failed to check checkpoint distance for timeline {id}")
+                    })?;
+
+                Some(endlsn)
+            }
+
+            ReplicationMessage::PrimaryKeepAlive(keepalive) => {
+                let wal_end = keepalive.wal_end();
+                let timestamp = keepalive.timestamp();
+                let reply_requested = keepalive.reply() != 0;
+
+                trace!("received PrimaryKeepAlive(wal_end: {wal_end}, timestamp: {timestamp:?} reply: {reply_requested})");
+
+                if reply_requested {
+                    Some(last_rec_lsn)
+                } else {
+                    None
+                }
+            }
+
+            _ => None,
+        };
+
+        if let Some(last_lsn) = status_update {
+            let remote_index = repo.get_remote_index();
+            let timeline_remote_consistent_lsn = remote_index
+                .read()
+                .await
+                // here we either do not have this timeline in remote index
+                // or there were no checkpoints for it yet
+                .timeline_entry(&ZTenantTimelineId {
+                    tenant_id,
+                    timeline_id,
+                })
+                .map(|remote_timeline| remote_timeline.metadata.disk_consistent_lsn())
+                // no checkpoint was uploaded
+                .unwrap_or(Lsn(0));
+
+            // The last LSN we processed. It is not guaranteed to survive pageserver crash.
+            let write_lsn = u64::from(last_lsn);
+            // `disk_consistent_lsn` is the LSN at which page server guarantees local persistence of all received data
+            let flush_lsn = u64::from(timeline.tline.get_disk_consistent_lsn());
+            // The last LSN that is synced to remote storage and is guaranteed to survive pageserver crash
+            // Used by safekeepers to remove WAL preceding `remote_consistent_lsn`.
+            let apply_lsn = u64::from(timeline_remote_consistent_lsn);
+            let ts = SystemTime::now();
+
+            // Update the current WAL receiver's data stored inside the global hash table `WAL_RECEIVERS`
+            {
+                super::WAL_RECEIVER_ENTRIES.write().await.insert(
+                    id,
+                    WalReceiverEntry {
+                        wal_producer_connstr: Some(wal_producer_connstr.to_owned()),
+                        last_received_msg_lsn: Some(last_lsn),
+                        last_received_msg_ts: Some(
+                            ts.duration_since(SystemTime::UNIX_EPOCH)
+                                .expect("Received message time should be before UNIX EPOCH!")
+                                .as_micros(),
+                        ),
+                    },
+                );
+            }
+
+            // Send zenith feedback message.
+            // Regular standby_status_update fields are put into this message.
+            let zenith_status_update = ReplicationFeedback {
+                current_timeline_size: timeline.get_current_logical_size() as u64,
+                ps_writelsn: write_lsn,
+                ps_flushlsn: flush_lsn,
+                ps_applylsn: apply_lsn,
+                ps_replytime: ts,
+            };
+
+            debug!("zenith_status_update {zenith_status_update:?}");
+
+            let mut data = BytesMut::new();
+            zenith_status_update.serialize(&mut data)?;
+            physical_stream
+                .as_mut()
+                .zenith_status_update(data.len() as u64, &data)
+                .await?;
+            if let Err(e) = events_sender.send(WalConnectionEvent::NewWal(zenith_status_update)) {
+                warn!("Wal connection event listener dropped, aborting the connection: {e}");
+                return Ok(());
+            }
+        }
+    }
+
+    Ok(())
+}
+
+/// Data returned from the postgres `IDENTIFY_SYSTEM` command
+///
+/// See the [postgres docs] for more details.
+///
+/// [postgres docs]: https://www.postgresql.org/docs/current/protocol-replication.html
+#[derive(Debug)]
+// As of nightly 2021-09-11, fields that are only read by the type's `Debug` impl still count as
+// unused. Relevant issue: https://github.com/rust-lang/rust/issues/88900
+#[allow(dead_code)]
+struct IdentifySystem {
+    systemid: u64,
+    timeline: u32,
+    xlogpos: PgLsn,
+    dbname: Option<String>,
+}
+
+/// There was a problem parsing the response to
+/// a postgres IDENTIFY_SYSTEM command.
+#[derive(Debug, thiserror::Error)]
+#[error("IDENTIFY_SYSTEM parse error")]
+struct IdentifyError;
+
+/// Run the postgres `IDENTIFY_SYSTEM` command
+async fn identify_system(client: &mut Client) -> anyhow::Result<IdentifySystem> {
+    let query_str = "IDENTIFY_SYSTEM";
+    let response = client.simple_query(query_str).await?;
+
+    // get(N) from row, then parse it as some destination type.
+    fn get_parse<T>(row: &SimpleQueryRow, idx: usize) -> Result<T, IdentifyError>
+    where
+        T: FromStr,
+    {
+        let val = row.get(idx).ok_or(IdentifyError)?;
+        val.parse::<T>().or(Err(IdentifyError))
+    }
+
+    // extract the row contents into an IdentifySystem struct.
+    // written as a closure so I can use ? for Option here.
+    if let Some(SimpleQueryMessage::Row(first_row)) = response.get(0) {
+        Ok(IdentifySystem {
+            systemid: get_parse(first_row, 0)?,
+            timeline: get_parse(first_row, 1)?,
+            xlogpos: get_parse(first_row, 2)?,
+            dbname: get_parse(first_row, 3).ok(),
+        })
+    } else {
+        Err(IdentifyError.into())
+    }
+}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Bojan Serafimov	0a59884b75	Update docs	2022-06-20 13:29:14 -04:00
Bojan Serafimov	1303cf77a3	Fix tests	2022-06-20 13:26:27 -04:00
Bojan Serafimov	3e647f8a27	WIP	2022-06-20 12:59:11 -04:00
Bojan Serafimov	8b38295f02	Return Err on shutdown	2022-06-20 10:33:05 -04:00
Bojan Serafimov	bdfe7dacef	Address comments	2022-06-20 10:17:46 -04:00
Bojan Serafimov	cdab7bc83b	Merge branch 'main' into basebackup-import	2022-06-18 15:21:50 -04:00
Bojan Serafimov	2ce653fd79	Add checksum todos	2022-06-18 11:19:02 -04:00
Bojan Serafimov	010132dbb0	Check file sizes	2022-06-18 11:14:21 -04:00
Bojan Serafimov	443e409950	Fix test	2022-06-18 10:48:21 -04:00
Bojan Serafimov	43d1531e66	Add example usage command	2022-06-17 22:13:19 -04:00
Bojan Serafimov	71afd06f83	Test failed import	2022-06-17 21:42:04 -04:00
Bojan Serafimov	45b470e206	Bump timeout	2022-06-17 16:30:29 -04:00
Bojan Serafimov	dc97f95636	Add docstring	2022-06-17 15:39:18 -04:00
Anastasia Lubennikova	11d7743b39	basebackup import fixes (#1955 )	2022-06-17 15:29:32 -04:00
Arthur Petukhovsky	f862373ac0	Fix WAL timeout in test_s3_wal_replay (#1953 )	2022-06-17 20:43:54 +03:00
Bojan Serafimov	0e556b2782	Error on tblspc file	2022-06-17 13:34:41 -04:00
Bojan Serafimov	4fff237f00	Flush layers	2022-06-17 13:18:51 -04:00
Arthur Petukhovsky	699f46cd84	Download WAL from S3 if it's not available in safekeeper dir (#1932 ) `send_wal.rs` and `WalReader` are now async. `test_s3_wal_replay` checks that WAL can be replayed after offloaded.	2022-06-17 15:33:39 +03:00
Bojan Serafimov	ba79946af1	Fix create_empty_timeline	2022-06-17 00:00:48 -04:00
Bojan Serafimov	e3ce99a711	Merge branch 'main' into basebackup-import	2022-06-16 22:00:21 -04:00
Bojan Serafimov	ee3462c6ea	Attempt to fix flaky test	2022-06-16 21:38:59 -04:00
Bojan Serafimov	290ad416a5	Remove repeated code	2022-06-16 18:31:07 -04:00
Anastasia Lubennikova	36ee182d26	Implement page servise 'fullbackup' endpoint (#1923 ) * Implement page servise 'fullbackup' endpoint that works like basebackup, but also sends relational files * Add test_runner/batch_others/test_fullbackup.py Co-authored-by: bojanserafimov <bojan.serafimov7@gmail.com>	2022-06-16 14:07:11 +03:00
Anastasia Lubennikova	d11c9f9fcb	Use random ports for the proxy and local pg in tests Fixes #1931 Author: Dmitry Ivanov	2022-06-15 20:21:58 +03:00
Bojan Serafimov	670c8ab7be	Merge branch 'basebackup-import' of github.com:neondatabase/neon into basebackup-import	2022-06-14 17:08:31 -04:00
Bojan Serafimov	6463be13fc	Fix typo	2022-06-14 17:07:34 -04:00
Bojan Serafimov	05151e643f	Merge branch 'main' into basebackup-import	2022-06-14 17:05:52 -04:00
Anastasia Lubennikova	9ccb7b75a6	Fix import of multi-segment relation files	2022-06-14 21:42:07 +03:00
Bojan Serafimov	05b5ae32a1	Fix comment	2022-06-12 13:01:13 -04:00
Bojan Serafimov	23d4f12cdd	Handle shutdown	2022-06-12 12:37:52 -04:00
Bojan Serafimov	cd081280bf	Improve errors	2022-06-12 12:23:43 -04:00
Bojan Serafimov	909a0df048	Run yapf	2022-06-12 11:17:44 -04:00
Bojan Serafimov	557fde6c57	fmt + clippy	2022-06-12 10:52:02 -04:00
Bojan Serafimov	2283eb871a	Add assertion	2022-06-12 10:05:28 -04:00
Bojan Serafimov	938100058d	Don't read all into memory	2022-06-11 15:55:52 -04:00
Kirill Bulatov	d8a37452c8	Rename ZenithFeedback (#1912 )	2022-06-11 00:44:05 +03:00
Bojan Serafimov	978ef167e0	Test works now	2022-06-10 13:54:02 -04:00
Bojan Serafimov	a568c49111	WIP	2022-06-10 10:53:52 -04:00
Bojan Serafimov	ea97135fa8	Create user in test	2022-06-09 23:38:18 -04:00
Bojan Serafimov	f03c14170d	Fix bug. Should work now	2022-06-09 23:28:20 -04:00
Bojan Serafimov	31cf43724c	WIP	2022-06-09 22:51:32 -04:00
Bojan Serafimov	1380a1cce1	Pass lsn	2022-06-09 12:39:29 -04:00
chaitanya sharma	e1336f451d	renamed .zenith data-dir to .neon.	2022-06-09 18:19:18 +02:00
Arseny Sher	a4d8261390	Save Postgres log in test_find_end_of_wal_* tests.	2022-06-09 19:16:43 +04:00
Egor Suvorov	e2a5a31595	Safekeeper HTTP router: add comment about /v1/timeline	2022-06-09 17:14:46 +02:00
Egor Suvorov	0ac0fba77a	test_runner: test Safekeeper HTTP API Auth All endpoints except for POST /v1/timeline are tested, this one is not tested in any way yet. Three attempts for each endpoint: correctly authenticated, badly authenticated, unauthenticated.	2022-06-09 17:14:46 +02:00
Egor Suvorov	a001052cdd	test_runner: SafekeeperHttpClient: support auth	2022-06-09 17:14:46 +02:00
Egor Suvorov	1f1d852204	ZenithEnvBuilder: rename pageserver_auth_enabled --> auth_enabled	2022-06-09 17:14:46 +02:00
Egor Suvorov	f7b878611a	Implement JWT authentication in Safekeeper HTTP API (#1753 ) * `control_plane` crate (used by `neon_local`) now parses an `auth_enabled` bool for each Safekeeper * If auth is enabled, a Safekeeper is passed a path to a public key via a new command line argument * Added TODO comments to other places needing auth	2022-06-09 17:14:46 +02:00
Bojan Serafimov	5babab9461	Fix bug	2022-06-09 11:10:06 -04:00
Bojan Serafimov	7fca35e262	PG_VERSION is not a relfile	2022-06-09 10:57:10 -04:00
Bojan Serafimov	3dfdda4e49	Improve logs, fix bug	2022-06-09 10:53:13 -04:00
Arseny Sher	a51b2dac9a	Don't s3 offload from newly joined safekeeper not having required WAL. I made the check at launcher level with the perspective of generally moving election (decision who offloads) there. Also log timeline 'active' changes.	2022-06-09 18:30:16 +04:00
Bojan Serafimov	b6b4424673	commit changes	2022-06-09 10:29:08 -04:00
Bojan Serafimov	c59dea5390	Import relfiles, add todos	2022-06-09 00:09:51 -04:00
Bojan Serafimov	6a182b5c1b	Fix protocol bug	2022-06-08 20:10:43 -04:00
Bojan Serafimov	0277c37759	Don't compress tar	2022-06-08 19:31:23 -04:00
Bojan Serafimov	7fb732a39f	Create timeline if not exists	2022-06-08 19:24:34 -04:00
Bojan Serafimov	76b126be35	Implement Read	2022-06-08 18:41:38 -04:00
Bojan Serafimov	3e42367603	Copy tar reader	2022-06-08 15:57:01 -04:00
Bojan Serafimov	ecee80d1bf	Send tar	2022-06-08 14:37:46 -04:00
Bojan Serafimov	28acefb31f	Add todos	2022-06-08 11:50:01 -04:00
Bojan Serafimov	db4be89577	Start working on api	2022-06-08 11:41:30 -04:00
Bojan Serafimov	a39501beee	Add todos	2022-06-08 10:36:35 -04:00
Thang Pham	e22d9cee3a	fix `ZeroDivisionError` in `scripts/generate_perf_report_page` (#1906 ) Fixes the `ZeroDivisionError` error by adding `EPS=1e-6` when doing the calculation.	2022-06-08 09:15:12 -04:00
Arthur Petukhovsky	a01999bc4a	Replace most common remote logs with metrics (#1909 )	2022-06-08 13:36:49 +03:00
chaitanya sharma	32e64afd54	Use better parallel build instructions in readme.md (#1908 )	2022-06-08 11:25:37 +03:00
Kirill Bulatov	8a53472e4f	Force etcd broker keys to not to intersect	2022-06-08 11:21:05 +03:00
Bojan Serafimov	0677fb7ae7	Add neon_local command	2022-06-07 22:08:35 -04:00
Bojan Serafimov	99260b18ab	Add test	2022-06-07 19:16:29 -04:00
Dmitry Rodionov	6e26588d17	Allow to customize shutdown condition in PostgresBackend Use it in PageServerHandler to check per thread shutdown condition from thread_mgr which takes into account tenants and timelines	2022-06-07 22:11:54 +03:00
Arseny Sher	0b93253b3c	Fix leaked keepalive task in s3 offloading leader election. I still don't like the surroundings and feel we'd better get away without using election API at all, but this is a quick fix to keep CI green. ref #1815	2022-06-07 15:17:57 +04:00
Dmitry Rodionov	7dc6beacbd	make it possible to associate thread with a tenant after thread start	2022-06-07 12:59:35 +03:00
Thang Pham	6cfebc096f	Add read/write throughput performance tests (#1883 ) Part of #1467 This PR adds several performance tests that compare the [PG statistics](https://www.postgresql.org/docs/current/monitoring-stats.html) obtained when running PG benchmarks against Neon and vanilla PG to measure the read/write throughput of the DB.	2022-06-06 12:32:10 -04:00
KlimentSerafimov	fecad1ca34	Resolving issue #1745 . Added cluster option for SNI data (#1813 ) * Added project option in case SNI data is missing. Resolving issue #1745. * Added invariant checking for project name: if both sni_data and project_name are available then they should match.	2022-06-06 08:14:41 -04:00
bojanserafimov	92de8423af	Remove dead code (#1886 )	2022-06-05 09:18:11 -04:00
Dmitry Rodionov	e442f5357b	unify two identical failpoints in flush_frozen_layer probably is a merge artfact	2022-06-03 19:36:09 +03:00
Arseny Sher	5a723d44cd	Parametrize test_normal_work. I like to run small test locally, but let's avoid duplication.	2022-06-03 20:32:53 +04:00
Kirill Bulatov	2623193876	Remove pageserver_connstr from WAL stream logic	2022-06-03 17:30:36 +03:00
Arseny Sher	70a53c4b03	Get backup test_safekeeper_normal_work, but skip by default. It is handy for development.	2022-06-03 16:12:14 +04:00
Arseny Sher	9e108102b3	Silence etcd safekeeper info key parse errors. When we subscribe to everything, it is ok to receive not only safekeeper timeline updates.	2022-06-03 16:12:14 +04:00
huming	9c846a93e8	chore(doc)	2022-06-03 14:24:27 +03:00
Kirill Bulatov	c5007d3916	Remove unused module	2022-06-03 00:23:13 +03:00
Kirill Bulatov	5b06599770	Simplify etcd key regex parsing	2022-06-03 00:23:13 +03:00
Kirill Bulatov	1d16ee92d4	Fix the Lsn difference reconnection	2022-06-03 00:23:13 +03:00
Kirill Bulatov	7933804284	Fix and test regex parsing	2022-06-03 00:23:13 +03:00
Kirill Bulatov	a91e0c299d	Reproduce etcd parsing bug in Python tests	2022-06-03 00:23:13 +03:00
Kirill Bulatov	b0c4ec0594	Log storage sync and etcd events a bit better	2022-06-03 00:23:13 +03:00
bojanserafimov	90e2c9ee1f	Rename zenith to neon in python tests (#1871 )	2022-06-02 16:21:28 -04:00
Egor Suvorov	aba5e5f8b5	GitHub Actions: pin Rust version to 1.58 like on CircleCI * Fix failing `cargo clippy` while we're here. The behavior has been changed in Rust 1.60: https://github.com/rust-lang/rust-clippy/issues/8928 * Add Rust version to the Cargo deps cache key	2022-06-02 17:45:53 +02:00
Dmitry Rodionov	b155fe0e2f	avoid perf test result context for pg regress	2022-06-02 17:41:34 +03:00
Ryan Russell	c71faae2c6	Docs readability cont Signed-off-by: Ryan Russell <git@ryanrussell.org>	2022-06-02 15:05:12 +02:00
Kirill Bulatov	de7eda2dc6	Fix url path printing	2022-06-02 00:48:10 +03:00
Dmitry Rodionov	1188c9a95c	remove extra span as this code is already covered by create timeline span E g this log line contains duplicated data: INFO /timeline_create{tenant=8d367870988250a755101b5189bbbc17 new_timeline=Some(27e2580f51f5660642d8ce124e9ee4ac) lsn=None}: bootstrapping{timeline=27e2580f51f5660642d8ce124e9ee4ac tenant=8d367870988250a755101b5189bbbc17}: created root timeline 27e2580f51f5660642d8ce124e9ee4ac timeline.lsn 0/16960E8 this avoids variable duplication in `bootstrapping` subspan	2022-06-01 19:29:17 +03:00
Kirill Bulatov	e5cb727572	Replace callmemaybe with etcd subscriptions on safekeeper timeline info	2022-06-01 16:07:04 +03:00
Dmitry Rodionov	6623c5b9d5	add installation instructions for Fedora Linux	2022-06-01 15:59:53 +03:00
Anton Chaporgin	e5a2b0372d	remove sk1 from inventory (#1845 ) https://github.com/neondatabase/cloud/issues/1454	2022-06-01 15:40:45 +03:00
Alexey Kondratov	af6143ea1f	Install missing openssl packages in the Github Actions workflow	2022-05-31 23:12:30 +03:00
Alexey Kondratov	ff233cf4c2	Use :local compute-tools tag to build compute-node image	2022-05-31 23:12:30 +03:00
Dmitry Rodionov	b1b67cc5a0	improve test normal work to start several computes	2022-05-31 22:42:11 +03:00
bojanserafimov	ca10cc12c1	Close file descriptors for redo process (#1834 )	2022-05-31 14:14:09 -04:00
Thang Pham	c97cd684e0	Use `HOMEBREW_PREFIX` instead of hard-coded path (#1833 )	2022-05-31 11:20:51 -04:00
Ryan Russell	54e163ac03	Improve Readability in Docs Signed-off-by: Ryan Russell <ryanrussell@users.noreply.github.com>	2022-05-31 17:22:47 +03:00
Konstantin Knizhnik	595a6bc1e1	Bump vendor/postgres to fix basebackup LSN comparison. (#1835 ) Co-authored-by: Arseny Sher <sher-ars@yandex.ru>	2022-05-31 14:47:06 +03:00
Arthur Petukhovsky	c3e0b6c839	Implement timeline-based metrics in safekeeper (#1823 ) Now there's timelines metrics collector, which goes through all timelines and reports metrics only for active ones	2022-05-31 11:10:50 +03:00
Arseny Sher	36281e3b47	Extend test_wal_backup with compute restart.	2022-05-30 13:57:17 +04:00
Anastasia Lubennikova	e014cb6026	rename zenith.zenith_tenant to neon.tenant_id in test	2022-05-30 12:24:44 +03:00
Anastasia Lubennikova	915e5c9114	Rename 'zenith_admin' to 'cloud_admin' on compute node start	2022-05-30 11:11:01 +03:00
Anastasia Lubennikova	67d6ff4100	Rename custom GUCs: - zenith.zenith_tenant -> neon.tenant_id - zenith.zenith_timeline -> neon.timeline_id	2022-05-30 11:11:01 +03:00
Anastasia Lubennikova	6a867bce6d	Rename 'zenith_admin' role to 'cloud_admin'	2022-05-30 11:11:01 +03:00
Anastasia Lubennikova	751f1191b4	Rename 'wal_acceptors' GUC to 'safekeepers'	2022-05-30 11:11:01 +03:00
Anastasia Lubennikova	3accde613d	Rename contrib/zenith to contrib/neon. Rename custom GUCs: - zenith.page_server_connstring -> neon.pageserver_connstring - zenith.zenith_tenant -> neon.tenantid - zenith.zenith_timeline -> neon.timelineid - zenith.max_cluster_size -> neon.max_cluster_size	2022-05-30 11:11:01 +03:00
Heikki Linnakangas	e3b320daab	Remove obsolete Dockerfile.alpine It hasn't been used for anything for a long time. The comments still talked about librocksdb, which we also haven't used for a long time.	2022-05-28 21:22:19 +03:00
Heikki Linnakangas	4b4d3073b8	Fix misc typos	2022-05-28 14:56:23 +03:00
Kian-Meng Ang	f1c51a1267	Fix typos	2022-05-28 14:02:05 +03:00
bojanserafimov	500e8772f0	Add quick-start guide in readme (#1816 )	2022-05-27 17:48:11 -04:00
Dmitry Ivanov	b3ec6e0661	[proxy] Propagate SASL/SCRAM auth errors to the user This will replace the vague (and incorrect) "Internal error" with a nice and helpful authentication error, e.g. "password doesn't match".	2022-05-27 21:50:43 +03:00
Dmitry Ivanov	5d813f9738	[proxy] Refactoring This patch attempts to fix some of the technical debt we had to introduce in previous patches.	2022-05-27 21:50:43 +03:00
Thang Pham	757746b571	Fix `test_pageserver_http_get_wal_receiver_success` flaky test. (#1786 ) Fixes #1768. ## Context Previously, to test `get_wal_receiver` API, we make run some DB transactions then call the API to check the latest message's LSN from the WAL receiver. However, this test won't work because it's not guaranteed that the WAL receiver will get the latest WAL from the postgres/safekeeper at the time of making the API call. This PR resolves the above issue by adding a "poll and wait" code that waits to retrieve the latest data from the WAL receiver. This PR also fixes a bug that tries to compare two hex LSNs, should convert to number before the comparison. See: https://github.com/neondatabase/neon/issues/1768#issuecomment-1133752122.	2022-05-27 13:33:53 -04:00
Arseny Sher	cb8bf1beb6	Prevent commit_lsn <= flush_lsn violation after `a42eba3cd7`. Nothing complained about that yet, but we definitely don't hold at least one assert, so let's keep it this way until better version.	2022-05-27 20:23:30 +04:00
Thang Pham	75f71a6380	Handle broken timelines on startup (#1809 ) Resolve #1663. ## Changes - ignore a "broken" [1] timeline on page server startup - fix the race condition when creating multiple timelines in parallel for a tenant - added tests for the above changes [1]: a timeline is marked as "broken" if either - failed to load the timeline's metadata or - the timeline's disk consistent LSN is zero	2022-05-27 11:43:06 -04:00
Arseny Sher	54b75248ff	s3 WAL offloading staging review. - Uncomment accidently `self.keep_alive.abort()` commented line, due to this task never finished, which blocked launcher. - Mess up with initialization one more time, to fix offloader trying to back up segment 0. Now we initialize all required LSNs in handle_elected, where we learn start LSN for the first time. - Fix blind attempt to provide safekeeper service file with remote storage params.	2022-05-27 14:02:52 +04:00
Arseny Sher	0e1bd57c53	Add WAL offloading to s3 on safekeepers. Separate task is launched for each timeline and stopped when timeline doesn't need offloading. Decision who offloads is done through etcd leader election; currently there is no pre condition for participating, that's a TODO. neon_local and tests infrastructure for remote storage in safekeepers added, along with the test itself. ref #1009 Co-authored-by: Anton Shyrabokau <ahtoxa@Antons-MacBook-Pro.local>	2022-05-27 06:19:23 +04:00
bojanserafimov	1d71949c51	Change proxy welcome message (#1808 ) Remove zenith sun and outdated instructions around .pgpass	2022-05-26 14:59:03 -04:00
Thang Pham	7d565aa4b9	Reduce the logging level when PG client disconnected to `INFO` (#1713 ) Fixes #1683.	2022-05-26 12:21:15 -04:00
Dmitry Rodionov	72a7220dc8	Tidy up some log messages * turn println into an info with proper message * rename new_local_timeline to load_local_timeline because it does not create new timeline, it registers timeline that exists on disk in pageserver in-memory structures	2022-05-26 18:37:40 +03:00
Konstantin Knizhnik	b0d114ee3f	Initialize last_freeze_at with disk consistent LSN to avoid creation of small L0 delta layer on startup refer #1736	2022-05-26 15:42:18 +03:00
Dmitry Rodionov	38f2d165b7	allow TLS 1.2 in proxy to be compatible with older client libraries	2022-05-26 13:21:29 +03:00
Dmitry Rodionov	5a5737278e	add simple metrics for remote storage operations track number of operations and number of their failures	2022-05-26 01:24:52 +03:00
Kirill Bulatov	06f5e017a1	Move rustfmt check to GH Action	2022-05-26 01:03:48 +03:00
Kirill Bulatov	887b0e14d9	Run basic checks on PRs and pushes to main only	2022-05-26 01:03:48 +03:00