Fix the markdown rendering 004-durability.md RFC

Continue with pageserver startup, if loading some tenants fail.
Fixes https://github.com/neondatabase/neon/issues/1664
2026-02-09 21:50:37 +00:00 · 2022-05-16 10:11:39 +03:00 · 2022-05-15 00:25:38 +03:00 · 2022-05-13 21:41:00 +03:00 · 2022-05-13 20:41:54 +03:00 · 2022-05-13 17:36:18 +02:00
265 changed files with 19162 additions and 9124 deletions
--- a/.circleci/ansible/.gitignore
+++ b/.circleci/ansible/.gitignore
@@ -1,2 +1,4 @@
 zenith_install.tar.gz
 .zenith_current_version
+neon_install.tar.gz
+.neon_current_version
--- a/.circleci/ansible/deploy.yaml
+++ b/.circleci/ansible/deploy.yaml
@@ -1,14 +1,14 @@
- name: Upload Zenith binaries
+- name: Upload Neon binaries
  hosts: storage
  gather_facts: False
  remote_user: admin

  tasks:

-    - name: get latest version of Zenith binaries
+    - name: get latest version of Neon binaries
      register: current_version_file
      set_fact:
-        current_version: "{{ lookup('file', '.zenith_current_version') | trim }}"
+        current_version: "{{ lookup('file', '.neon_current_version') | trim }}"
      tags:
      - pageserver
      - safekeeper
@@ -19,11 +19,11 @@
      - pageserver
      - safekeeper

-    - name: upload and extract Zenith binaries to /usr/local
+    - name: upload and extract Neon binaries to /usr/local
      ansible.builtin.unarchive:
        owner: root
        group: root
-        src: zenith_install.tar.gz
+        src: neon_install.tar.gz
        dest: /usr/local
      become: true
      tags:
@@ -63,21 +63,18 @@
      tags:
      - pageserver

-    # It seems that currently S3 integration does not play well
-    # even with fresh pageserver without a burden of old data.
-    # TODO: turn this back on once the issue is solved.
-    # - name: update remote storage (s3) config
-    #   lineinfile:
-    #     path: /storage/pageserver/data/pageserver.toml
-    #     line: "{{ item }}"
-    #   loop:
-    #     - "[remote_storage]"
-    #     - "bucket_name = '{{ bucket_name }}'"
-    #     - "bucket_region = '{{ bucket_region }}'"
-    #     - "prefix_in_bucket = '{{ inventory_hostname }}'"
-    #   become: true
-    #   tags:
-    #   - pageserver
+    - name: update remote storage (s3) config
+      lineinfile:
+        path: /storage/pageserver/data/pageserver.toml
+        line: "{{ item }}"
+      loop:
+        - "[remote_storage]"
+        - "bucket_name = '{{ bucket_name }}'"
+        - "bucket_region = '{{ bucket_region }}'"
+        - "prefix_in_bucket = '{{ inventory_hostname }}'"
+      become: true
+      tags:
+      - pageserver

    - name: upload systemd service definition
      ansible.builtin.template:
@@ -116,6 +113,30 @@

  tasks:

+    - name: upload init script
+      when: console_mgmt_base_url is defined
+      ansible.builtin.template:
+        src: scripts/init_safekeeper.sh
+        dest: /tmp/init_safekeeper.sh
+        owner: root
+        group: root
+        mode: '0755'
+      become: true
+      tags:
+      - safekeeper
+
+    - name: init safekeeper
+      shell:
+        cmd: /tmp/init_safekeeper.sh
+      args:
+        creates: "/storage/safekeeper/data/safekeeper.id"
+      environment:
+        ZENITH_REPO_DIR: "/storage/safekeeper/data"
+        LD_LIBRARY_PATH: "/usr/local/lib"
+      become: true
+      tags:
+      - safekeeper
+
    # in the future safekeepers should discover pageservers byself
    # but currently use first pageserver that was discovered
    - name: set first pageserver var for safekeepers
--- a/.circleci/ansible/get_binaries.sh
+++ b/.circleci/ansible/get_binaries.sh
@@ -4,10 +4,10 @@ set -e

 RELEASE=${RELEASE:-false}

-# look at docker hub for latest tag fo zenith docker image
+# look at docker hub for latest tag for neon docker image
 if [ "${RELEASE}" = "true" ]; then
    echo "search latest relase tag"
-    VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/zenithdb/zenith/tags |jq -r -S '.[].name' | grep release | sed 's/release-//g' | tail -1)
+    VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/neondatabase/neon/tags |jq -r -S '.[].name' | grep release | sed 's/release-//g' | grep -E '^[0-9]+$' | sort -n | tail -1)
    if [ -z "${VERSION}" ]; then
        echo "no any docker tags found, exiting..."
        exit 1
@@ -16,7 +16,7 @@ if [ "${RELEASE}" = "true" ]; then
    fi
 else
    echo "search latest dev tag"
-    VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/zenithdb/zenith/tags |jq -r -S '.[].name' | grep -v release | tail -1)
+    VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/neondatabase/neon/tags |jq -r -S '.[].name' | grep -E '^[0-9]+$' | sort -n | tail -1)
    if [ -z "${VERSION}" ]; then
        echo "no any docker tags found, exiting..."
        exit 1
@@ -28,25 +28,25 @@ fi
 echo "found ${VERSION}"

 # do initial cleanup
-rm -rf zenith_install postgres_install.tar.gz zenith_install.tar.gz .zenith_current_version
-mkdir zenith_install
+rm -rf neon_install postgres_install.tar.gz neon_install.tar.gz .neon_current_version
+mkdir neon_install

 # retrive binaries from docker image
 echo "getting binaries from docker image"
-docker pull --quiet zenithdb/zenith:${TAG}
-ID=$(docker create zenithdb/zenith:${TAG})
+docker pull --quiet neondatabase/neon:${TAG}
+ID=$(docker create neondatabase/neon:${TAG})
 docker cp ${ID}:/data/postgres_install.tar.gz .
-tar -xzf postgres_install.tar.gz -C zenith_install
-docker cp ${ID}:/usr/local/bin/pageserver zenith_install/bin/
-docker cp ${ID}:/usr/local/bin/safekeeper zenith_install/bin/
-docker cp ${ID}:/usr/local/bin/proxy zenith_install/bin/
-docker cp ${ID}:/usr/local/bin/postgres zenith_install/bin/
+tar -xzf postgres_install.tar.gz -C neon_install
+docker cp ${ID}:/usr/local/bin/pageserver neon_install/bin/
+docker cp ${ID}:/usr/local/bin/safekeeper neon_install/bin/
+docker cp ${ID}:/usr/local/bin/proxy neon_install/bin/
+docker cp ${ID}:/usr/local/bin/postgres neon_install/bin/
 docker rm -vf ${ID}

 # store version to file (for ansible playbooks) and create binaries tarball
-echo ${VERSION} > zenith_install/.zenith_current_version
-echo ${VERSION} > .zenith_current_version
-tar -czf zenith_install.tar.gz -C zenith_install .
+echo ${VERSION} > neon_install/.neon_current_version
+echo ${VERSION} > .neon_current_version
+tar -czf neon_install.tar.gz -C neon_install .

 # do final cleaup
-rm -rf zenith_install postgres_install.tar.gz
+rm -rf neon_install postgres_install.tar.gz
--- a/.circleci/ansible/production.hosts
+++ b/.circleci/ansible/production.hosts
@@ -14,3 +14,5 @@ safekeepers
 console_mgmt_base_url = http://console-release.local
 bucket_name           = zenith-storage-oregon
 bucket_region         = us-west-2
+etcd_endpoints        = etcd-release.local:2379
+safekeeper_enable_s3_offload = true
--- a/.circleci/ansible/scripts/init_safekeeper.sh
+++ b/.circleci/ansible/scripts/init_safekeeper.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+# get instance id from meta-data service
+INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
+
+# store fqdn hostname in var
+HOST=$(hostname -f)
+
+
+cat <<EOF | tee /tmp/payload
+{
+  "version": 1,
+  "host": "${HOST}",
+  "port": 6500,
+  "region_id": {{ console_region_id }},
+  "instance_id": "${INSTANCE_ID}",
+  "http_host": "${HOST}",
+  "http_port": 7676
+}
+EOF
+
+# check if safekeeper already registered or not
+if ! curl -sf -X PATCH -d '{}' {{ console_mgmt_base_url }}/api/v1/safekeepers/${INSTANCE_ID} -o /dev/null; then
+
+    # not registered, so register it now
+    ID=$(curl -sf -X POST {{ console_mgmt_base_url }}/api/v1/safekeepers -d@/tmp/payload | jq -r '.ID')
+
+    # init safekeeper
+    sudo -u safekeeper /usr/local/bin/safekeeper --id ${ID} --init -D /storage/safekeeper/data
+fi
--- a/.circleci/ansible/staging.hosts
+++ b/.circleci/ansible/staging.hosts
@@ -4,8 +4,8 @@ zenith-us-stage-ps-2 console_region_id=27

 [safekeepers]
 zenith-us-stage-sk-1 console_region_id=27
-zenith-us-stage-sk-2 console_region_id=27
-zenith-us-stage-sk-3 console_region_id=27
+zenith-us-stage-sk-4 console_region_id=27
+zenith-us-stage-sk-5 console_region_id=27

 [storage:children]
 pageservers
@@ -15,3 +15,5 @@ safekeepers
 console_mgmt_base_url = http://console-staging.local
 bucket_name           = zenith-staging-storage-us-east-1
 bucket_region         = us-east-1
+etcd_endpoints        = etcd-staging.local:2379
+safekeeper_enable_s3_offload = false
--- a/.circleci/ansible/systemd/safekeeper.service
+++ b/.circleci/ansible/systemd/safekeeper.service
@@ -6,7 +6,7 @@ After=network.target auditd.service
 Type=simple
 User=safekeeper
 Environment=RUST_BACKTRACE=1 ZENITH_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/lib
-ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -p {{ first_pageserver }}:6400 -D /storage/safekeeper/data
+ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -p {{ first_pageserver }}:6400 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --enable-s3-offload={{ safekeeper_enable_s3_offload }}
 ExecReload=/bin/kill -HUP $MAINPID
 KillMode=mixed
 KillSignal=SIGINT
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1,18 +1,18 @@
 version: 2.1

 executors:
-  zenith-xlarge-executor:
+  neon-xlarge-executor:
    resource_class: xlarge
    docker:
      # NB: when changed, do not forget to update rust image tag in all Dockerfiles
-      - image: zimg/rust:1.56
-  zenith-executor:
+      - image: zimg/rust:1.58
+  neon-executor:
    docker:
-      - image: zimg/rust:1.56
+      - image: zimg/rust:1.58

 jobs:
  check-codestyle-rust:
-    executor: zenith-xlarge-executor
+    executor: neon-xlarge-executor
    steps:
      - checkout
      - run:
@@ -22,7 +22,7 @@ jobs:

  # A job to build postgres
  build-postgres:
-    executor: zenith-xlarge-executor
+    executor: neon-xlarge-executor
    parameters:
      build_type:
        type: enum
@@ -67,9 +67,9 @@ jobs:
          paths:
            - tmp_install

-  # A job to build zenith rust code
-  build-zenith:
-    executor: zenith-xlarge-executor
+  # A job to build Neon rust code
+  build-neon:
+    executor: neon-xlarge-executor
    parameters:
      build_type:
        type: enum
@@ -113,7 +113,7 @@ jobs:
              CARGO_FLAGS=
            elif [[ $BUILD_TYPE == "release" ]]; then
              cov_prefix=()
-              CARGO_FLAGS=--release
+              CARGO_FLAGS="--release --features profiling"
            fi

            export CARGO_INCREMENTAL=0
@@ -121,7 +121,7 @@ jobs:
            export RUSTC_WRAPPER=cachepot
            export AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}"
            export AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}"
-            "${cov_prefix[@]}" mold -run cargo build $CARGO_FLAGS --bins --tests
+            "${cov_prefix[@]}" mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
            cachepot -s

      - save_cache:
@@ -132,20 +132,6 @@ jobs:
            - ~/.cargo/git
            - target

-        # Run style checks
-        # has to run separately from cargo fmt section
-        # since needs to run with dependencies
-      - run:
-          name: cargo clippy
-          command: |
-            if [[ $BUILD_TYPE == "debug" ]]; then
-              cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
-            elif [[ $BUILD_TYPE == "release" ]]; then
-              cov_prefix=()
-            fi
-
-            "${cov_prefix[@]}" ./run_clippy.sh
-
        # Run rust unit tests
      - run:
          name: cargo test
@@ -223,7 +209,7 @@ jobs:
            - "*"

  check-codestyle-python:
-    executor: zenith-executor
+    executor: neon-executor
    steps:
      - checkout
      - restore_cache:
@@ -246,7 +232,7 @@ jobs:
          command: poetry run mypy .

  run-pytest:
-    executor: zenith-executor
+    executor: neon-executor
    parameters:
      # pytest args to specify the tests to run.
      #
@@ -369,7 +355,7 @@ jobs:
          when: always
          command: |
            du -sh /tmp/test_output/*
-            find /tmp/test_output -type f ! -name "pg.log" ! -name "pageserver.log" ! -name "safekeeper.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" -delete
+            find /tmp/test_output -type f ! -name "pg.log" ! -name "pageserver.log" ! -name "safekeeper.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" ! -name "flamegraph.svg" -delete
            du -sh /tmp/test_output/*
      - store_artifacts:
          path: /tmp/test_output
@@ -390,7 +376,7 @@ jobs:
            - "*"

  coverage-report:
-    executor: zenith-xlarge-executor
+    executor: neon-xlarge-executor
    steps:
      - attach_workspace:
          at: /tmp/zenith
@@ -405,7 +391,7 @@ jobs:
      - run:
          name: Build coverage report
          command: |
-            COMMIT_URL=https://github.com/zenithdb/zenith/commit/$CIRCLE_SHA1
+            COMMIT_URL=https://github.com/neondatabase/neon/commit/$CIRCLE_SHA1

            scripts/coverage \
              --dir=/tmp/zenith/coverage report \
@@ -416,11 +402,11 @@ jobs:
          name: Upload coverage report
          command: |
            LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
-            REPORT_URL=https://zenithdb.github.io/zenith-coverage-data/$CIRCLE_SHA1
-            COMMIT_URL=https://github.com/zenithdb/zenith/commit/$CIRCLE_SHA1
+            REPORT_URL=https://neondatabase.github.io/zenith-coverage-data/$CIRCLE_SHA1
+            COMMIT_URL=https://github.com/neondatabase/neon/commit/$CIRCLE_SHA1

            scripts/git-upload \
-              --repo=https://$VIP_VAP_ACCESS_TOKEN@github.com/zenithdb/zenith-coverage-data.git \
+              --repo=https://$VIP_VAP_ACCESS_TOKEN@github.com/neondatabase/zenith-coverage-data.git \
              --message="Add code coverage for $COMMIT_URL" \
              copy /tmp/zenith/coverage/report $CIRCLE_SHA1 # COPY FROM TO_RELATIVE

@@ -437,7 +423,7 @@ jobs:
                \"target_url\": \"$REPORT_URL\"
              }"

-  # Build zenithdb/zenith:latest image and push it to Docker hub
+  # Build neondatabase/neon:latest image and push it to Docker hub
  docker-image:
    docker:
      - image: cimg/base:2021.04
@@ -451,18 +437,18 @@ jobs:
      - run:
          name: Build and push Docker image
          command: |
-            echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
+            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
            DOCKER_TAG=$(git log --oneline|wc -l)
            docker build \
              --pull \
              --build-arg GIT_VERSION=${CIRCLE_SHA1} \
              --build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
              --build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
-              --tag zenithdb/zenith:${DOCKER_TAG} --tag zenithdb/zenith:latest .
-            docker push zenithdb/zenith:${DOCKER_TAG}
-            docker push zenithdb/zenith:latest
+              --tag neondatabase/neon:${DOCKER_TAG} --tag neondatabase/neon:latest .
+            docker push neondatabase/neon:${DOCKER_TAG}
+            docker push neondatabase/neon:latest

-  # Build zenithdb/compute-node:latest image and push it to Docker hub
+  # Build neondatabase/compute-node:latest image and push it to Docker hub
  docker-image-compute:
    docker:
      - image: cimg/base:2021.04
@@ -470,31 +456,31 @@ jobs:
      - checkout
      - setup_remote_docker:
          docker_layer_caching: true
-      # Build zenithdb/compute-tools:latest image and push it to Docker hub
+      # Build neondatabase/compute-tools:latest image and push it to Docker hub
      # TODO: this should probably also use versioned tag, not just :latest.
      # XXX: but should it? We build and use it only locally now.
      - run:
          name: Build and push compute-tools Docker image
          command: |
-            echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
+            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
            docker build \
              --build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
              --build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
-              --tag zenithdb/compute-tools:latest -f Dockerfile.compute-tools .
-            docker push zenithdb/compute-tools:latest
+              --tag neondatabase/compute-tools:latest -f Dockerfile.compute-tools .
+            docker push neondatabase/compute-tools:latest
      - run:
          name: Init postgres submodule
          command: git submodule update --init --depth 1
      - run:
          name: Build and push compute-node Docker image
          command: |
-            echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
+            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
            DOCKER_TAG=$(git log --oneline|wc -l)
-            docker build --tag zenithdb/compute-node:${DOCKER_TAG} --tag zenithdb/compute-node:latest vendor/postgres
-            docker push zenithdb/compute-node:${DOCKER_TAG}
-            docker push zenithdb/compute-node:latest
+            docker build --tag neondatabase/compute-node:${DOCKER_TAG} --tag neondatabase/compute-node:latest vendor/postgres
+            docker push neondatabase/compute-node:${DOCKER_TAG}
+            docker push neondatabase/compute-node:latest

-  # Build production zenithdb/zenith:release image and push it to Docker hub
+  # Build production neondatabase/neon:release image and push it to Docker hub
  docker-image-release:
    docker:
      - image: cimg/base:2021.04
@@ -508,18 +494,18 @@ jobs:
      - run:
          name: Build and push Docker image
          command: |
-            echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
+            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
            DOCKER_TAG="release-$(git log --oneline|wc -l)"
            docker build \
              --pull \
              --build-arg GIT_VERSION=${CIRCLE_SHA1} \
              --build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
              --build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
-              --tag zenithdb/zenith:${DOCKER_TAG} --tag zenithdb/zenith:release .
-            docker push zenithdb/zenith:${DOCKER_TAG}
-            docker push zenithdb/zenith:release
+              --tag neondatabase/neon:${DOCKER_TAG} --tag neondatabase/neon:release .
+            docker push neondatabase/neon:${DOCKER_TAG}
+            docker push neondatabase/neon:release

-  # Build production zenithdb/compute-node:release image and push it to Docker hub
+  # Build production neondatabase/compute-node:release image and push it to Docker hub
  docker-image-compute-release:
    docker:
      - image: cimg/base:2021.04
@@ -527,29 +513,29 @@ jobs:
      - checkout
      - setup_remote_docker:
          docker_layer_caching: true
-      # Build zenithdb/compute-tools:release image and push it to Docker hub
+      # Build neondatabase/compute-tools:release image and push it to Docker hub
      # TODO: this should probably also use versioned tag, not just :latest.
      # XXX: but should it? We build and use it only locally now.
      - run:
          name: Build and push compute-tools Docker image
          command: |
-            echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
+            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
            docker build \
              --build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
              --build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
-              --tag zenithdb/compute-tools:release -f Dockerfile.compute-tools .
-            docker push zenithdb/compute-tools:release
+              --tag neondatabase/compute-tools:release -f Dockerfile.compute-tools .
+            docker push neondatabase/compute-tools:release
      - run:
          name: Init postgres submodule
          command: git submodule update --init --depth 1
      - run:
          name: Build and push compute-node Docker image
          command: |
-            echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
+            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
            DOCKER_TAG="release-$(git log --oneline|wc -l)"
-            docker build --tag zenithdb/compute-node:${DOCKER_TAG} --tag zenithdb/compute-node:release vendor/postgres
-            docker push zenithdb/compute-node:${DOCKER_TAG}
-            docker push zenithdb/compute-node:release
+            docker build --tag neondatabase/compute-node:${DOCKER_TAG} --tag neondatabase/compute-node:release vendor/postgres
+            docker push neondatabase/compute-node:${DOCKER_TAG}
+            docker push neondatabase/compute-node:release

  deploy-staging:
    docker:
@@ -575,7 +561,7 @@ jobs:
            rm -f ssh-key ssh-key-cert.pub

            ansible-playbook deploy.yaml -i staging.hosts
-            rm -f zenith_install.tar.gz .zenith_current_version
+            rm -f neon_install.tar.gz .neon_current_version

  deploy-staging-proxy:
    docker:
@@ -593,13 +579,13 @@ jobs:
          name: Setup helm v3
          command: |
            curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
-            helm repo add zenithdb https://zenithdb.github.io/helm-charts
+            helm repo add neondatabase https://neondatabase.github.io/helm-charts
      - run:
          name: Re-deploy proxy
          command: |
            DOCKER_TAG=$(git log --oneline|wc -l)
-            helm upgrade zenith-proxy zenithdb/zenith-proxy --install -f .circleci/helm-values/staging.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
-
+            helm upgrade neon-proxy       neondatabase/neon-proxy --install -f .circleci/helm-values/staging.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
+            helm upgrade neon-proxy-scram neondatabase/neon-proxy --install -f .circleci/helm-values/staging.proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait

  deploy-release:
    docker:
@@ -625,7 +611,7 @@ jobs:
            rm -f ssh-key ssh-key-cert.pub

            ansible-playbook deploy.yaml -i production.hosts
-            rm -f zenith_install.tar.gz .zenith_current_version
+            rm -f neon_install.tar.gz .neon_current_version

  deploy-release-proxy:
    docker:
@@ -643,7 +629,7 @@ jobs:
          name: Setup helm v3
          command: |
            curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
-            helm repo add zenithdb https://zenithdb.github.io/helm-charts
+            helm repo add zenithdb https://neondatabase.github.io/helm-charts
      - run:
          name: Re-deploy proxy
          command: |
@@ -672,7 +658,7 @@ jobs:
            --data \
              "{
                \"state\": \"pending\",
-                \"context\": \"zenith-remote-ci\",
+                \"context\": \"neon-cloud-e2e\",
                \"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
              }"
      - run:
@@ -688,7 +674,7 @@ jobs:
              "{
                \"ref\": \"main\",
                \"inputs\": {
-                  \"ci_job_name\": \"zenith-remote-ci\",
+                  \"ci_job_name\": \"neon-cloud-e2e\",
                  \"commit_hash\": \"$CIRCLE_SHA1\",
                  \"remote_repo\": \"$LOCAL_REPO\"
                }
@@ -704,8 +690,8 @@ workflows:
          matrix:
            parameters:
              build_type: ["debug", "release"]
-      - build-zenith:
-          name: build-zenith-<< matrix.build_type >>
+      - build-neon:
+          name: build-neon-<< matrix.build_type >>
          matrix:
            parameters:
              build_type: ["debug", "release"]
@@ -720,7 +706,7 @@ workflows:
          test_selection: batch_pg_regress
          needs_postgres_source: true
          requires:
-            - build-zenith-<< matrix.build_type >>
+            - build-neon-<< matrix.build_type >>
      - run-pytest:
          name: other-tests-<< matrix.build_type >>
          matrix:
@@ -728,7 +714,7 @@ workflows:
              build_type: ["debug", "release"]
          test_selection: batch_others
          requires:
-            - build-zenith-<< matrix.build_type >>
+            - build-neon-<< matrix.build_type >>
      - run-pytest:
          name: benchmarks
          context: PERF_TEST_RESULT_CONNSTR
@@ -737,7 +723,7 @@ workflows:
          run_in_parallel: false
          save_perf_report: true
          requires:
-            - build-zenith-release
+            - build-neon-release
      - coverage-report:
          # Context passes credentials for gh api
          context: CI_ACCESS_TOKEN
@@ -828,11 +814,11 @@ workflows:
      - remote-ci-trigger:
          # Context passes credentials for gh api
          context: CI_ACCESS_TOKEN
-          remote_repo: "zenithdb/console"
+          remote_repo: "neondatabase/cloud"
          requires:
            # XXX: Successful build doesn't mean everything is OK, but
            # the job to be triggered takes so much time to complete (~22 min)
            # that it's better not to wait for the commented-out steps
-            - build-zenith-debug
+            - build-neon-release
            # - pg_regress-tests-release
            # - other-tests-release
--- a/.circleci/helm-values/production.proxy.yaml
+++ b/.circleci/helm-values/production.proxy.yaml
@@ -1,9 +1,12 @@
 # Helm chart values for zenith-proxy.
 # This is a YAML-formatted file.

+image:
+  repository: neondatabase/neon
+
 settings:
-  authEndpoint: "https://console.zenith.tech/authenticate_proxy_request/"
-  uri: "https://console.zenith.tech/psql_session/"
+  authEndpoint: "https://console.neon.tech/authenticate_proxy_request/"
+  uri: "https://console.neon.tech/psql_session/"

 # -- Additional labels for zenith-proxy pods
 podLabels:
@@ -25,7 +28,7 @@ exposedService:
    service.beta.kubernetes.io/aws-load-balancer-type: external
    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: start.zenith.tech
+    external-dns.alpha.kubernetes.io/hostname: start.zenith.tech,connect.neon.tech,pg.neon.tech

 metrics:
  enabled: true
--- a/.circleci/helm-values/staging.proxy-scram.yaml
+++ b/.circleci/helm-values/staging.proxy-scram.yaml
@@ -0,0 +1,31 @@
+# Helm chart values for zenith-proxy.
+# This is a YAML-formatted file.
+
+image:
+  repository: neondatabase/neon
+
+settings:
+  authBackend: "console"
+  authEndpoint: "http://console-staging.local/management/api/v2"
+  domain: "*.cloud.stage.neon.tech"
+
+# -- Additional labels for zenith-proxy pods
+podLabels:
+  zenith_service: proxy-scram
+  zenith_env: staging
+  zenith_region: us-east-1
+  zenith_region_slug: virginia
+
+exposedService:
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
+    external-dns.alpha.kubernetes.io/hostname: cloud.stage.neon.tech
+
+metrics:
+  enabled: true
+  serviceMonitor:
+    enabled: true
+    selector:
+      release: kube-prometheus-stack
--- a/.circleci/helm-values/staging.proxy.yaml
+++ b/.circleci/helm-values/staging.proxy.yaml
@@ -1,9 +1,12 @@
 # Helm chart values for zenith-proxy.
 # This is a YAML-formatted file.

+image:
+  repository: neondatabase/neon
+
 settings:
-  authEndpoint: "https://console.stage.zenith.tech/authenticate_proxy_request/"
-  uri: "https://console.stage.zenith.tech/psql_session/"
+  authEndpoint: "https://console.stage.neon.tech/authenticate_proxy_request/"
+  uri: "https://console.stage.neon.tech/psql_session/"

 # -- Additional labels for zenith-proxy pods
 podLabels:
@@ -17,7 +20,7 @@ exposedService:
    service.beta.kubernetes.io/aws-load-balancer-type: external
    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: start.stage.zenith.tech
+    external-dns.alpha.kubernetes.io/hostname: connect.stage.neon.tech

 metrics:
  enabled: true
--- a/.config/hakari.toml
+++ b/.config/hakari.toml
@@ -10,6 +10,8 @@ dep-format-version = "2"
 # Hakari works much better with the new feature resolver.
 # For more about the new feature resolver, see:
 # https://blog.rust-lang.org/2021/03/25/Rust-1.51.0.html#cargos-new-feature-resolver
+# Have to keep the resolver still here since hakari requires this field,
+# despite it's now the default for 2021 edition & cargo.
 resolver = "2"

 # Add triples corresponding to platforms commonly used by developers here.
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -26,7 +26,7 @@ jobs:
    runs-on: [self-hosted, zenith-benchmarker]

    env:
-      PG_BIN: "/usr/pgsql-13/bin"
+      POSTGRES_DISTRIB_DIR: "/usr/pgsql-13"

    steps:
    - name: Checkout zenith repo
@@ -51,7 +51,7 @@ jobs:
        echo Poetry
        poetry --version
        echo Pgbench
-        $PG_BIN/pgbench --version
+        $POSTGRES_DISTRIB_DIR/bin/pgbench --version

    # FIXME cluster setup is skipped due to various changes in console API
    # for now pre created cluster is used. When API gain some stability
@@ -66,7 +66,7 @@ jobs:

        echo "Starting cluster"
        # wake up the cluster
-        $PG_BIN/psql $BENCHMARK_CONNSTR -c "SELECT 1"
+        $POSTGRES_DISTRIB_DIR/bin/psql $BENCHMARK_CONNSTR -c "SELECT 1"

    - name: Run benchmark
      # pgbench is installed system wide from official repo
@@ -83,8 +83,11 @@ jobs:
      # sudo yum install postgresql13-contrib
      # actual binaries are located in /usr/pgsql-13/bin/
      env:
-        TEST_PG_BENCH_TRANSACTIONS_MATRIX: "5000,10000,20000"
-        TEST_PG_BENCH_SCALES_MATRIX: "10,15"
+        # The pgbench test runs two tests of given duration against each scale.
+        # So the total runtime with these parameters is 2 * 2 * 300 = 1200, or 20 minutes.
+        # Plus time needed to initialize the test databases.
+        TEST_PG_BENCH_DURATIONS_MATRIX: "300"
+        TEST_PG_BENCH_SCALES_MATRIX: "10,100"
        PLATFORM: "zenith-staging"
        BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
        REMOTE_ENV: "1" # indicate to test harness that we do not have zenith binaries locally
--- a/.github/workflows/testing.yml
+++ b/.github/workflows/testing.yml
@@ -36,8 +36,7 @@ jobs:

      - name: Install macOs postgres dependencies
        if: matrix.os == 'macos-latest'
-        run: |
-          brew install flex bison
+        run: brew install flex bison

      - name: Set pg revision for caching
        id: pg_ver
@@ -53,8 +52,7 @@ jobs:

      - name: Build postgres
        if: steps.cache_pg.outputs.cache-hit != 'true'
-        run: |
-          make postgres
+        run: make postgres

      - name: Cache cargo deps
        id: cache_cargo
@@ -64,13 +62,10 @@ jobs:
            ~/.cargo/registry
            ~/.cargo/git
            target
-          key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
+          key: ${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}

-      # Use `env CARGO_INCREMENTAL=0` to mitigate https://github.com/rust-lang/rust/issues/91696 for rustc 1.57.0
-      - name: Run cargo build
-        run: |
-          env CARGO_INCREMENTAL=0 cargo build --workspace --bins --examples --tests
+      - name: Run cargo clippy
+        run: ./run_clippy.sh

      - name: Run cargo test
-        run: |
-          env CARGO_INCREMENTAL=0 cargo test -- --nocapture --test-threads=1
+        run: cargo test --all --all-targets
--- a/.gitignore
+++ b/.gitignore
@@ -11,3 +11,6 @@ test_output/
 # Coverage
 *.profraw
 *.profdata
+
+*.key
+*.crt
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,22 +3,19 @@ members = [
    "compute_tools",
    "control_plane",
    "pageserver",
-    "postgres_ffi",
    "proxy",
-    "walkeeper",
+    "safekeeper",
    "workspace_hack",
-    "zenith",
-    "zenith_metrics",
-    "zenith_utils",
+    "neon_local",
+    "libs/*",
 ]
-resolver = "2"

 [profile.release]
 # This is useful for profiling and, to some extent, debug.
 # Besides, debug info should not affect the performance.
 debug = true

-# This is only needed for proxy's tests
-# TODO: we should probably fork tokio-postgres-rustls instead
+# This is only needed for proxy's tests.
+# TODO: we should probably fork `tokio-postgres-rustls` instead.
 [patch.crates-io]
-tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
+tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
--- a/15
+++ b/15
@@ -1,7 +1,5 @@
 # Build Postgres
-#
-#FROM zimg/rust:1.56 AS pg-build
-FROM zenithdb/build:buster-20220309 AS pg-build
+FROM zimg/rust:1.58 AS pg-build
 WORKDIR /pg

 USER root
@@ -11,27 +9,26 @@ COPY Makefile Makefile

 ENV BUILD_TYPE release
 RUN set -e \
-    && make -j $(nproc) -s postgres \
+    && mold -run make -j $(nproc) -s postgres \
    && rm -rf tmp_install/build \
    && tar -C tmp_install -czf /postgres_install.tar.gz .

 # Build zenith binaries
-#
-#FROM zimg/rust:1.56 AS build
-FROM zenithdb/build:buster-20220309 AS build
+FROM zimg/rust:1.58 AS build
 ARG GIT_VERSION=local

 ARG CACHEPOT_BUCKET=zenith-rust-cachepot
 ARG AWS_ACCESS_KEY_ID
 ARG AWS_SECRET_ACCESS_KEY
-ENV RUSTC_WRAPPER /usr/local/cargo/bin/cachepot

 COPY --from=pg-build /pg/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
 COPY . .

 # Show build caching stats to check if it was used in the end.
 # Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, loosing the compilation stats.
-RUN cargo build --release && /usr/local/cargo/bin/cachepot -s
+RUN set -e \
+    && sudo -E "PATH=$PATH" mold -run cargo build --release \
+    && cachepot -s

 # Build final image
 #
--- a/Dockerfile.build
+++ b/Dockerfile.build
@@ -1,23 +0,0 @@
-FROM rust:1.56.1-slim-buster
-WORKDIR /home/circleci/project
-
-RUN set -e \
-    && apt-get update \
-    && apt-get -yq install \
-        automake \
-        libtool \
-        build-essential \
-        bison \
-        flex \
-        libreadline-dev \
-        zlib1g-dev \
-        libxml2-dev \
-        libseccomp-dev \
-        pkg-config \
-        libssl-dev \
-        clang
-
-RUN set -e \
-    && rustup component add clippy \
-    && cargo install cargo-audit \
-    && cargo install --git https://github.com/paritytech/cachepot
--- a/Dockerfile.compute-tools
+++ b/Dockerfile.compute-tools
@@ -1,19 +1,18 @@
 # First transient image to build compute_tools binaries
 # NB: keep in sync with rust image version in .circle/config.yml
-FROM zenithdb/build:buster-20220309 AS rust-build
-
-WORKDIR /zenith
+FROM zimg/rust:1.58 AS rust-build

 ARG CACHEPOT_BUCKET=zenith-rust-cachepot
 ARG AWS_ACCESS_KEY_ID
 ARG AWS_SECRET_ACCESS_KEY
-ENV RUSTC_WRAPPER /usr/local/cargo/bin/cachepot

 COPY . .

-RUN cargo build -p compute_tools --release && /usr/local/cargo/bin/cachepot -s
+RUN set -e \
+    && sudo -E "PATH=$PATH" mold -run cargo build -p compute_tools --release \
+    && cachepot -s

 # Final image that only has one binary
 FROM debian:buster-slim

-COPY --from=rust-build /zenith/target/release/zenith_ctl /usr/local/bin/zenith_ctl
+COPY --from=rust-build /home/circleci/project/target/release/zenith_ctl /usr/local/bin/zenith_ctl
--- a/README.md
+++ b/README.md
@@ -1,19 +1,22 @@
-# Zenith
+# Neon

-Zenith is a serverless open source alternative to AWS Aurora Postgres. It separates storage and compute and substitutes PostgreSQL storage layer by redistributing data across a cluster of nodes.
+Neon is a serverless open source alternative to AWS Aurora Postgres. It separates storage and compute and substitutes PostgreSQL storage layer by redistributing data across a cluster of nodes.
+
+The project used to be called "Zenith". Many of the commands and code comments
+still refer to "zenith", but we are in the process of renaming things.

 ## Architecture overview

-A Zenith installation consists of compute nodes and Zenith storage engine.
+A Neon installation consists of compute nodes and Neon storage engine.

-Compute nodes are stateless PostgreSQL nodes, backed by Zenith storage engine.
+Compute nodes are stateless PostgreSQL nodes, backed by Neon storage engine.

-Zenith storage engine consists of two major components:
+Neon storage engine consists of two major components:
 - Pageserver. Scalable storage backend for compute nodes.
 - WAL service. The service that receives WAL from compute node and ensures that it is stored durably.

 Pageserver consists of:
- Repository - Zenith storage implementation.
+- Repository - Neon storage implementation.
 - WAL receiver - service that receives WAL from WAL service and stores it in the repository.
 - Page service - service that communicates with compute nodes and responds with pages from the repository.
 - WAL redo - service that builds pages from base images and WAL records on Page service request.
@@ -28,17 +31,17 @@ apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libsec
 libssl-dev clang pkg-config libpq-dev
 ```

-[Rust] 1.56.1 or later is also required.
+[Rust] 1.58 or later is also required.

 To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `tmp_install/bin` and `tmp_install/lib`, respectively.

 To run the integration tests or Python scripts (not required to use the code), install
 Python (3.7 or higher), and install python3 packages using `./scripts/pysync` (requires poetry) in the project directory.

-2. Build zenith and patched postgres
+2. Build neon and patched postgres
 ```sh
-git clone --recursive https://github.com/zenithdb/zenith.git
-cd zenith
+git clone --recursive https://github.com/neondatabase/neon.git
+cd neon
 make -j5
 ```

@@ -46,32 +49,30 @@ make -j5
 ```sh
 # Create repository in .zenith with proper paths to binaries and data
 # Later that would be responsibility of a package install script
-> ./target/debug/zenith init
-initializing tenantid c03ba6b7ad4c5e9cf556f059ade44229
-created initial timeline 5b014a9e41b4b63ce1a1febc04503636 timeline.lsn 0/169C3C8
-created main branch
+> ./target/debug/neon_local init
+initializing tenantid 9ef87a5bf0d92544f6fafeeb3239695c
+created initial timeline de200bd42b49cc1814412c7e592dd6e9 timeline.lsn 0/16B5A50
+initial timeline de200bd42b49cc1814412c7e592dd6e9 created
 pageserver init succeeded

 # start pageserver and safekeeper
-> ./target/debug/zenith start
-Starting pageserver at 'localhost:64000' in '.zenith'
+> ./target/debug/neon_local start
+Starting pageserver at '127.0.0.1:64000' in '.zenith'
 Pageserver started
-initializing for single for 7676
-Starting safekeeper at '127.0.0.1:5454' in '.zenith/safekeepers/single'
+initializing for sk 1 for 7676
+Starting safekeeper at '127.0.0.1:5454' in '.zenith/safekeepers/sk1'
 Safekeeper started

 # start postgres compute node
-> ./target/debug/zenith pg start main
-Starting new postgres main on timeline 5b014a9e41b4b63ce1a1febc04503636 ...
-Extracting base backup to create postgres instance: path=.zenith/pgdatadirs/tenants/c03ba6b7ad4c5e9cf556f059ade44229/main port=55432
+> ./target/debug/neon_local pg start main
+Starting new postgres main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
+Extracting base backup to create postgres instance: path=.zenith/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
 Starting postgres node at 'host=127.0.0.1 port=55432 user=zenith_admin dbname=postgres'
-waiting for server to start.... done
-server started

 # check list of running postgres instances
-> ./target/debug/zenith pg list
-NODE	ADDRESS	TIMELINES	BRANCH NAME	LSN		STATUS
-main	127.0.0.1:55432	5b014a9e41b4b63ce1a1febc04503636	main	0/1609610	running
+> ./target/debug/neon_local pg list
+ NODE  ADDRESS          TIMELINE                          BRANCH NAME  LSN        STATUS
+ main  127.0.0.1:55432  de200bd42b49cc1814412c7e592dd6e9  main         0/16B5BA8  running
 ```

 4. Now it is possible to connect to postgres and run some queries:
@@ -91,18 +92,25 @@ postgres=# select * from t;
 5. And create branches and run postgres on them:
 ```sh
 # create branch named migration_check
-> ./target/debug/zenith timeline branch --branch-name migration_check
-Created timeline '0e9331cad6efbafe6a88dd73ae21a5c9' at Lsn 0/16F5830 for tenant: c03ba6b7ad4c5e9cf556f059ade44229. Ancestor timeline: 'main'
+> ./target/debug/neon_local timeline branch --branch-name migration_check
+Created timeline 'b3b863fa45fa9e57e615f9f2d944e601' at Lsn 0/16F9A00 for tenant: 9ef87a5bf0d92544f6fafeeb3239695c. Ancestor timeline: 'main'

 # check branches tree
-> ./target/debug/zenith timeline list
- main [5b014a9e41b4b63ce1a1febc04503636]
- ┗━ @0/1609610: migration_check [0e9331cad6efbafe6a88dd73ae21a5c9]
+> ./target/debug/neon_local timeline list
+(L) main [de200bd42b49cc1814412c7e592dd6e9]
+(L) ┗━ @0/16F9A00: migration_check [b3b863fa45fa9e57e615f9f2d944e601]

 # start postgres on that branch
-> ./target/debug/zenith pg start migration_check
-Starting postgres node at 'host=127.0.0.1 port=55433 user=stas'
-waiting for server to start.... done
+> ./target/debug/neon_local pg start migration_check --branch-name migration_check
+Starting new postgres migration_check on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
+Extracting base backup to create postgres instance: path=.zenith/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
+Starting postgres node at 'host=127.0.0.1 port=55433 user=zenith_admin dbname=postgres'
+
+# check the new list of running postgres instances
+> ./target/debug/neon_local pg list
+ NODE             ADDRESS          TIMELINE                          BRANCH NAME      LSN        STATUS
+ main             127.0.0.1:55432  de200bd42b49cc1814412c7e592dd6e9  main             0/16F9A38  running
+ migration_check  127.0.0.1:55433  b3b863fa45fa9e57e615f9f2d944e601  migration_check  0/16F9A70  running

 # this new postgres instance will have all the data from 'main' postgres,
 # but all modifications would not affect data in original postgres
@@ -115,18 +123,26 @@ postgres=# select * from t;

 postgres=# insert into t values(2,2);
 INSERT 0 1
+
+# check that the new change doesn't affect the 'main' postgres
+> psql -p55432 -h 127.0.0.1 -U zenith_admin postgres
+postgres=# select * from t;
+ key | value
+-----+-------
+   1 | 1
+(1 row)
 ```

 6. If you want to run tests afterwards (see below), you have to stop all the running the pageserver, safekeeper and postgres instances
   you have just started. You can stop them all with one command:
 ```sh
-> ./target/debug/zenith stop
+> ./target/debug/neon_local stop
 ```

 ## Running tests

 ```sh
-git clone --recursive https://github.com/zenithdb/zenith.git
+git clone --recursive https://github.com/neondatabase/neon.git
 make # builds also postgres and installs it to ./tmp_install
 ./scripts/pytest
 ```
@@ -141,14 +157,14 @@ To view your `rustdoc` documentation in a browser, try running `cargo doc --no-d

 ### Postgres-specific terms

-Due to Zenith's very close relation with PostgreSQL internals, there are numerous specific terms used.
+Due to Neon's very close relation with PostgreSQL internals, there are numerous specific terms used.
 Same applies to certain spelling: i.e. we use MB to denote 1024 * 1024 bytes, while MiB would be technically more correct, it's inconsistent with what PostgreSQL code and its documentation use.

 To get more familiar with this aspect, refer to:

- [Zenith glossary](/docs/glossary.md)
+- [Neon glossary](/docs/glossary.md)
 - [PostgreSQL glossary](https://www.postgresql.org/docs/13/glossary.html)
- Other PostgreSQL documentation and sources (Zenith fork sources can be found [here](https://github.com/zenithdb/postgres))
+- Other PostgreSQL documentation and sources (Neon fork sources can be found [here](https://github.com/neondatabase/postgres))

 ## Join the development

--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -11,10 +11,11 @@ clap = "3.0"
 env_logger = "0.9"
 hyper = { version = "0.14", features = ["full"] }
 log = { version = "0.4", features = ["std", "serde"] }
-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
+postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 regex = "1"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
 tar = "0.4"
 tokio = { version = "1.17", features = ["macros", "rt", "rt-multi-thread"] }
+tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 workspace_hack = { version = "0.1", path = "../workspace_hack" }
--- a/compute_tools/src/bin/zenith_ctl.rs
+++ b/compute_tools/src/bin/zenith_ctl.rs
@@ -38,6 +38,7 @@ use clap::Arg;
 use log::info;
 use postgres::{Client, NoTls};

+use compute_tools::checker::create_writablity_check_data;
 use compute_tools::config;
 use compute_tools::http_api::launch_http_server;
 use compute_tools::logger::*;
@@ -128,6 +129,8 @@ fn run_compute(state: &Arc<RwLock<ComputeState>>) -> Result<ExitStatus> {

    handle_roles(&read_state.spec, &mut client)?;
    handle_databases(&read_state.spec, &mut client)?;
+    handle_grants(&read_state.spec, &mut client)?;
+    create_writablity_check_data(&mut client)?;

    // 'Close' connection
    drop(client);
@@ -155,7 +158,7 @@ fn run_compute(state: &Arc<RwLock<ComputeState>>) -> Result<ExitStatus> {
 }

 fn main() -> Result<()> {
-    // TODO: re-use `zenith_utils::logging` later
+    // TODO: re-use `utils::logging` later
    init_logger(DEFAULT_LOG_LEVEL)?;

    // Env variable is set by `cargo`
--- a/compute_tools/src/checker.rs
+++ b/compute_tools/src/checker.rs
@@ -0,0 +1,46 @@
+use std::sync::{Arc, RwLock};
+
+use anyhow::{anyhow, Result};
+use log::error;
+use postgres::Client;
+use tokio_postgres::NoTls;
+
+use crate::zenith::ComputeState;
+
+pub fn create_writablity_check_data(client: &mut Client) -> Result<()> {
+    let query = "
+    CREATE TABLE IF NOT EXISTS health_check (
+        id serial primary key,
+        updated_at timestamptz default now()
+    );
+    INSERT INTO health_check VALUES (1, now())
+        ON CONFLICT (id) DO UPDATE
+         SET updated_at = now();";
+    let result = client.simple_query(query)?;
+    if result.len() < 2 {
+        return Err(anyhow::format_err!("executed  {} queries", result.len()));
+    }
+    Ok(())
+}
+
+pub async fn check_writability(state: &Arc<RwLock<ComputeState>>) -> Result<()> {
+    let connstr = state.read().unwrap().connstr.clone();
+    let (client, connection) = tokio_postgres::connect(&connstr, NoTls).await?;
+    if client.is_closed() {
+        return Err(anyhow!("connection to postgres closed"));
+    }
+    tokio::spawn(async move {
+        if let Err(e) = connection.await {
+            error!("connection error: {}", e);
+        }
+    });
+
+    let result = client
+        .simple_query("UPDATE health_check SET updated_at = now() WHERE id = 1;")
+        .await?;
+
+    if result.len() != 1 {
+        return Err(anyhow!("statement can't be executed"));
+    }
+    Ok(())
+}
--- a/compute_tools/src/http_api.rs
+++ b/compute_tools/src/http_api.rs
@@ -11,7 +11,7 @@ use log::{error, info};
 use crate::zenith::*;

 // Service function to handle all available routes.
-fn routes(req: Request<Body>, state: Arc<RwLock<ComputeState>>) -> Response<Body> {
+async fn routes(req: Request<Body>, state: Arc<RwLock<ComputeState>>) -> Response<Body> {
    match (req.method(), req.uri().path()) {
        // Timestamp of the last Postgres activity in the plain text.
        (&Method::GET, "/last_activity") => {
@@ -29,6 +29,15 @@ fn routes(req: Request<Body>, state: Arc<RwLock<ComputeState>>) -> Response<Body
            Response::new(Body::from(format!("{}", state.ready)))
        }

+        (&Method::GET, "/check_writability") => {
+            info!("serving /check_writability GET request");
+            let res = crate::checker::check_writability(&state).await;
+            match res {
+                Ok(_) => Response::new(Body::from("true")),
+                Err(e) => Response::new(Body::from(e.to_string())),
+            }
+        }
+
        // Return the `404 Not Found` for any other routes.
        _ => {
            let mut not_found = Response::new(Body::from("404 Not Found"));
@@ -48,7 +57,7 @@ async fn serve(state: Arc<RwLock<ComputeState>>) {
        async move {
            Ok::<_, Infallible>(service_fn(move |req: Request<Body>| {
                let state = state.clone();
-                async move { Ok::<_, Infallible>(routes(req, state)) }
+                async move { Ok::<_, Infallible>(routes(req, state).await) }
            }))
        }
    });
--- a/compute_tools/src/lib.rs
+++ b/compute_tools/src/lib.rs
@@ -2,6 +2,7 @@
 //! Various tools and helpers to handle cluster / compute node (Postgres)
 //! configuration.
 //!
+pub mod checker;
 pub mod config;
 pub mod http_api;
 #[macro_use]
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -132,7 +132,14 @@ impl Role {
        let mut params: String = "LOGIN".to_string();

        if let Some(pass) = &self.encrypted_password {
-            params.push_str(&format!(" PASSWORD 'md5{}'", pass));
+            // Some time ago we supported only md5 and treated all encrypted_password as md5.
+            // Now we also support SCRAM-SHA-256 and to preserve compatibility
+            // we treat all encrypted_password as md5 unless they starts with SCRAM-SHA-256.
+            if pass.starts_with("SCRAM-SHA-256") {
+                params.push_str(&format!(" PASSWORD '{}'", pass));
+            } else {
+                params.push_str(&format!(" PASSWORD 'md5{}'", pass));
+            }
        } else {
            params.push_str(" PASSWORD NULL");
        }
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -136,13 +136,20 @@ pub fn handle_roles(spec: &ClusterSpec, client: &mut Client) -> Result<()> {
                xact.execute(query.as_str(), &[])?;
            }
        } else {
-            info!("role name {}", &name);
+            info!("role name: '{}'", &name);
            let mut query: String = format!("CREATE ROLE {} ", name.quote());
-            info!("role create query {}", &query);
+            info!("role create query: '{}'", &query);
            info_print!(" -> create");

            query.push_str(&role.to_pg_options());
            xact.execute(query.as_str(), &[])?;
+
+            let grant_query = format!(
+                "grant pg_read_all_data, pg_write_all_data to {}",
+                name.quote()
+            );
+            xact.execute(grant_query.as_str(), &[])?;
+            info!("role grant query: '{}'", &grant_query);
        }

        info_print!("\n");
@@ -244,3 +251,24 @@ pub fn handle_databases(spec: &ClusterSpec, client: &mut Client) -> Result<()> {

    Ok(())
 }
+
+// Grant CREATE ON DATABASE to the database owner
+// to allow clients create trusted extensions.
+pub fn handle_grants(spec: &ClusterSpec, client: &mut Client) -> Result<()> {
+    info!("cluster spec grants:");
+
+    for db in &spec.cluster.databases {
+        let dbname = &db.name;
+
+        let query: String = format!(
+            "GRANT CREATE ON DATABASE {} TO {}",
+            dbname.quote(),
+            db.owner.quote()
+        );
+        info!("grant query {}", &query);
+
+        client.execute(query.as_str(), &[])?;
+    }
+
+    Ok(())
+}
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -5,7 +5,7 @@ edition = "2021"

 [dependencies]
 tar = "0.4.33"
-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
+postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 serde = { version = "1.0", features = ["derive"] }
 serde_with = "1.12.0"
 toml = "0.5"
@@ -18,6 +18,6 @@ url = "2.2.2"
 reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }

 pageserver = { path = "../pageserver" }
-walkeeper = { path = "../walkeeper" }
-zenith_utils = { path = "../zenith_utils" }
+safekeeper = { path = "../safekeeper" }
+utils = { path = "../libs/utils" }
 workspace_hack = { version = "0.1", path = "../workspace_hack" }
--- a/control_plane/src/compute.rs
+++ b/control_plane/src/compute.rs
@@ -11,11 +11,12 @@ use std::sync::Arc;
 use std::time::Duration;

 use anyhow::{Context, Result};
-use zenith_utils::connstring::connection_host_port;
-use zenith_utils::lsn::Lsn;
-use zenith_utils::postgres_backend::AuthType;
-use zenith_utils::zid::ZTenantId;
-use zenith_utils::zid::ZTimelineId;
+use utils::{
+    connstring::connection_host_port,
+    lsn::Lsn,
+    postgres_backend::AuthType,
+    zid::{ZTenantId, ZTimelineId},
+};

 use crate::local_env::LocalEnv;
 use crate::postgresql_conf::PostgresConf;
@@ -272,12 +273,7 @@ impl PostgresNode {
        conf.append("wal_sender_timeout", "5s");
        conf.append("listen_addresses", &self.address.ip().to_string());
        conf.append("port", &self.address.port().to_string());
-
-        // Never clean up old WAL. TODO: We should use a replication
-        // slot or something proper, to prevent the compute node
-        // from removing WAL that hasn't been streamed to the safekeeper or
-        // page server yet. (gh issue #349)
-        conf.append("wal_keep_size", "10TB");
+        conf.append("wal_keep_size", "0");

        // Configure the node to fetch pages from pageserver
        let pageserver_connstr = {
@@ -331,14 +327,14 @@ impl PostgresNode {
            // Configure the node to connect to the safekeepers
            conf.append("synchronous_standby_names", "walproposer");

-            let wal_acceptors = self
+            let safekeepers = self
                .env
                .safekeepers
                .iter()
                .map(|sk| format!("localhost:{}", sk.pg_port))
                .collect::<Vec<String>>()
                .join(",");
-            conf.append("wal_acceptors", &wal_acceptors);
+            conf.append("wal_acceptors", &safekeepers);
        } else {
            // We only use setup without safekeepers for tests,
            // and don't care about data durability on pageserver,
@@ -420,10 +416,15 @@ impl PostgresNode {
        if let Some(token) = auth_token {
            cmd.env("ZENITH_AUTH_TOKEN", token);
        }
-        let pg_ctl = cmd.status().context("pg_ctl failed")?;

-        if !pg_ctl.success() {
-            anyhow::bail!("pg_ctl failed");
+        let pg_ctl = cmd.output().context("pg_ctl failed")?;
+        if !pg_ctl.status.success() {
+            anyhow::bail!(
+                "pg_ctl failed, exit code: {}, stdout: {}, stderr: {}",
+                pg_ctl.status,
+                String::from_utf8_lossy(&pg_ctl.stdout),
+                String::from_utf8_lossy(&pg_ctl.stderr),
+            );
        }
        Ok(())
    }
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -11,9 +11,11 @@ use std::env;
 use std::fs;
 use std::path::{Path, PathBuf};
 use std::process::{Command, Stdio};
-use zenith_utils::auth::{encode_from_key_file, Claims, Scope};
-use zenith_utils::postgres_backend::AuthType;
-use zenith_utils::zid::{ZNodeId, ZTenantId, ZTenantTimelineId, ZTimelineId};
+use utils::{
+    auth::{encode_from_key_file, Claims, Scope},
+    postgres_backend::AuthType,
+    zid::{ZNodeId, ZTenantId, ZTenantTimelineId, ZTimelineId},
+};

 use crate::safekeeper::SafekeeperNode;

@@ -61,6 +63,10 @@ pub struct LocalEnv {
    #[serde(default)]
    pub broker_endpoints: Option<String>,

+    /// A prefix to all to any key when pushing/polling etcd from a node.
+    #[serde(default)]
+    pub broker_etcd_prefix: Option<String>,
+
    pub pageserver: PageServerConf,

    #[serde(default)]
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -13,15 +13,17 @@ use nix::unistd::Pid;
 use postgres::Config;
 use reqwest::blocking::{Client, RequestBuilder, Response};
 use reqwest::{IntoUrl, Method};
+use safekeeper::http::models::TimelineCreateRequest;
 use thiserror::Error;
-use walkeeper::http::models::TimelineCreateRequest;
-use zenith_utils::http::error::HttpErrorBody;
-use zenith_utils::zid::{ZNodeId, ZTenantId, ZTimelineId};
+use utils::{
+    connstring::connection_address,
+    http::error::HttpErrorBody,
+    zid::{ZNodeId, ZTenantId, ZTimelineId},
+};

 use crate::local_env::{LocalEnv, SafekeeperConf};
 use crate::storage::PageServerNode;
 use crate::{fill_rust_env_vars, read_pidfile};
-use zenith_utils::connstring::connection_address;

 #[derive(Error, Debug)]
 pub enum SafekeeperHttpError {
@@ -75,6 +77,7 @@ pub struct SafekeeperNode {
    pub pageserver: Arc<PageServerNode>,

    broker_endpoints: Option<String>,
+    broker_etcd_prefix: Option<String>,
 }

 impl SafekeeperNode {
@@ -92,6 +95,7 @@ impl SafekeeperNode {
            http_base_url: format!("http://127.0.0.1:{}/v1", conf.http_port),
            pageserver,
            broker_endpoints: env.broker_endpoints.clone(),
+            broker_etcd_prefix: env.broker_etcd_prefix.clone(),
        }
    }

@@ -141,6 +145,9 @@ impl SafekeeperNode {
        if let Some(ref ep) = self.broker_endpoints {
            cmd.args(&["--broker-endpoints", ep]);
        }
+        if let Some(prefix) = self.broker_etcd_prefix.as_deref() {
+            cmd.args(&["--broker-etcd-prefix", prefix]);
+        }

        if !cmd.status()?.success() {
            bail!(
--- a/control_plane/src/storage.rs
+++ b/control_plane/src/storage.rs
@@ -1,3 +1,4 @@
+use std::collections::HashMap;
 use std::io::Write;
 use std::net::TcpStream;
 use std::path::PathBuf;
@@ -9,21 +10,23 @@ use anyhow::{bail, Context};
 use nix::errno::Errno;
 use nix::sys::signal::{kill, Signal};
 use nix::unistd::Pid;
-use pageserver::http::models::{TenantCreateRequest, TimelineCreateRequest};
+use pageserver::http::models::{TenantConfigRequest, TenantCreateRequest, TimelineCreateRequest};
 use pageserver::timelines::TimelineInfo;
 use postgres::{Config, NoTls};
 use reqwest::blocking::{Client, RequestBuilder, Response};
 use reqwest::{IntoUrl, Method};
 use thiserror::Error;
-use zenith_utils::http::error::HttpErrorBody;
-use zenith_utils::lsn::Lsn;
-use zenith_utils::postgres_backend::AuthType;
-use zenith_utils::zid::{ZTenantId, ZTimelineId};
+use utils::{
+    connstring::connection_address,
+    http::error::HttpErrorBody,
+    lsn::Lsn,
+    postgres_backend::AuthType,
+    zid::{ZTenantId, ZTimelineId},
+};

 use crate::local_env::LocalEnv;
 use crate::{fill_rust_env_vars, read_pidfile};
 use pageserver::tenant_mgr::TenantInfo;
-use zenith_utils::connstring::connection_address;

 #[derive(Error, Debug)]
 pub enum PageserverHttpError {
@@ -164,6 +167,9 @@ impl PageServerNode {
            );
        }

+        // echo the captured output of the init command
+        println!("{}", String::from_utf8_lossy(&init_output.stdout));
+
        Ok(initial_timeline_id)
    }

@@ -183,8 +189,6 @@ impl PageServerNode {
        );
        io::stdout().flush().unwrap();

-        let mut cmd = Command::new(self.env.pageserver_bin()?);
-
        let repo_path = self.repo_path();
        let mut args = vec!["-D", repo_path.to_str().unwrap()];

@@ -192,9 +196,11 @@ impl PageServerNode {
            args.extend(["-c", config_override]);
        }

-        fill_rust_env_vars(cmd.args(&args).arg("--daemonize"));
+        let mut cmd = Command::new(self.env.pageserver_bin()?);
+        let mut filled_cmd = fill_rust_env_vars(cmd.args(&args).arg("--daemonize"));
+        filled_cmd = fill_aws_secrets_vars(filled_cmd);

-        if !cmd.status()?.success() {
+        if !filled_cmd.status()?.success() {
            bail!(
                "Pageserver failed to start. See '{}' for details.",
                self.repo_path().join("pageserver.log").display()
@@ -342,10 +348,36 @@ impl PageServerNode {
    pub fn tenant_create(
        &self,
        new_tenant_id: Option<ZTenantId>,
+        settings: HashMap<&str, &str>,
    ) -> anyhow::Result<Option<ZTenantId>> {
        let tenant_id_string = self
            .http_request(Method::POST, format!("{}/tenant", self.http_base_url))
-            .json(&TenantCreateRequest { new_tenant_id })
+            .json(&TenantCreateRequest {
+                new_tenant_id,
+                checkpoint_distance: settings
+                    .get("checkpoint_distance")
+                    .map(|x| x.parse::<u64>())
+                    .transpose()?,
+                compaction_target_size: settings
+                    .get("compaction_target_size")
+                    .map(|x| x.parse::<u64>())
+                    .transpose()?,
+                compaction_period: settings.get("compaction_period").map(|x| x.to_string()),
+                compaction_threshold: settings
+                    .get("compaction_threshold")
+                    .map(|x| x.parse::<usize>())
+                    .transpose()?,
+                gc_horizon: settings
+                    .get("gc_horizon")
+                    .map(|x| x.parse::<u64>())
+                    .transpose()?,
+                gc_period: settings.get("gc_period").map(|x| x.to_string()),
+                image_creation_threshold: settings
+                    .get("image_creation_threshold")
+                    .map(|x| x.parse::<usize>())
+                    .transpose()?,
+                pitr_interval: settings.get("pitr_interval").map(|x| x.to_string()),
+            })
            .send()?
            .error_from_body()?
            .json::<Option<String>>()?;
@@ -362,6 +394,35 @@ impl PageServerNode {
            .transpose()
    }

+    pub fn tenant_config(&self, tenant_id: ZTenantId, settings: HashMap<&str, &str>) -> Result<()> {
+        self.http_request(Method::PUT, format!("{}/tenant/config", self.http_base_url))
+            .json(&TenantConfigRequest {
+                tenant_id,
+                checkpoint_distance: settings
+                    .get("checkpoint_distance")
+                    .map(|x| x.parse::<u64>().unwrap()),
+                compaction_target_size: settings
+                    .get("compaction_target_size")
+                    .map(|x| x.parse::<u64>().unwrap()),
+                compaction_period: settings.get("compaction_period").map(|x| x.to_string()),
+                compaction_threshold: settings
+                    .get("compaction_threshold")
+                    .map(|x| x.parse::<usize>().unwrap()),
+                gc_horizon: settings
+                    .get("gc_horizon")
+                    .map(|x| x.parse::<u64>().unwrap()),
+                gc_period: settings.get("gc_period").map(|x| x.to_string()),
+                image_creation_threshold: settings
+                    .get("image_creation_threshold")
+                    .map(|x| x.parse::<usize>().unwrap()),
+                pitr_interval: settings.get("pitr_interval").map(|x| x.to_string()),
+            })
+            .send()?
+            .error_from_body()?;
+
+        Ok(())
+    }
+
    pub fn timeline_list(&self, tenant_id: &ZTenantId) -> anyhow::Result<Vec<TimelineInfo>> {
        let timeline_infos: Vec<TimelineInfo> = self
            .http_request(
@@ -399,3 +460,12 @@ impl PageServerNode {
        Ok(timeline_info_response)
    }
 }
+
+fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
+    for env_key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"] {
+        if let Ok(value) = std::env::var(env_key) {
+            cmd = cmd.env(env_key, value);
+        }
+    }
+    cmd
+}
--- a/docs/README.md
+++ b/docs/README.md
@@ -7,8 +7,8 @@
 - [glossary.md](glossary.md) — Glossary of all the terms used in codebase.
 - [multitenancy.md](multitenancy.md) — how multitenancy is organized in the pageserver and Zenith CLI.
 - [sourcetree.md](sourcetree.md) — Overview of the source tree layeout.
- [pageserver/README](/pageserver/README) — pageserver overview.
- [postgres_ffi/README](/postgres_ffi/README) — Postgres FFI overview.
+- [pageserver/README.md](/pageserver/README.md) — pageserver overview.
+- [postgres_ffi/README.md](/libs/postgres_ffi/README.md) — Postgres FFI overview.
 - [test_runner/README.md](/test_runner/README.md) — tests infrastructure overview.
- [walkeeper/README](/walkeeper/README) — WAL service overview.
+- [safekeeper/README.md](/safekeeper/README.md) — WAL service overview.
 - [core_changes.md](core_changes.md) - Description of Zenith changes in Postgres core
--- a/docs/authentication.md
+++ b/docs/authentication.md
@@ -27,4 +27,4 @@ management_token = jwt.encode({"scope": "pageserverapi"}, auth_keys.priv, algori
 tenant_token = jwt.encode({"scope": "tenant", "tenant_id": ps.initial_tenant}, auth_keys.priv, algorithm="RS256")
 ```

-Utility functions to work with jwts in rust are located in zenith_utils/src/auth.rs
+Utility functions to work with jwts in rust are located in libs/utils/src/auth.rs
--- a/docs/glossary.md
+++ b/docs/glossary.md
@@ -29,7 +29,7 @@ Each Branch lives in a corresponding timeline[] and has an ancestor[].

 NOTE: This is an overloaded term.

-A checkpoint record in the WAL marks a point in the WAL sequence at which it is guaranteed that all data files have been updated with all information from shared memory modified before that checkpoint; 
+A checkpoint record in the WAL marks a point in the WAL sequence at which it is guaranteed that all data files have been updated with all information from shared memory modified before that checkpoint;

 ### Checkpoint (Layered repository)

@@ -108,10 +108,10 @@ PostgreSQL LSNs and functions to monitor them:
 * `pg_current_wal_lsn()` - Returns the current write-ahead log write location.
 * `pg_current_wal_flush_lsn()` - Returns the current write-ahead log flush location.
 * `pg_last_wal_receive_lsn()` - Returns the last write-ahead log location that has been received and synced to disk by streaming replication. While streaming replication is in progress this will increase monotonically.
-* `pg_last_wal_replay_lsn ()` - Returns the last write-ahead log location that has been replayed during recovery. If recovery is still in progress this will increase monotonically. 
+* `pg_last_wal_replay_lsn ()` - Returns the last write-ahead log location that has been replayed during recovery. If recovery is still in progress this will increase monotonically.
 [source PostgreSQL documentation](https://www.postgresql.org/docs/devel/functions-admin.html):

-Zenith safekeeper LSNs. For more check [walkeeper/README_PROTO.md](/walkeeper/README_PROTO.md)
+Zenith safekeeper LSNs. For more check [safekeeper/README_PROTO.md](/safekeeper/README_PROTO.md)
 * `CommitLSN`: position in WAL confirmed by quorum safekeepers.
 * `RestartLSN`: position in WAL confirmed by all safekeepers.
 * `FlushLSN`: part of WAL persisted to the disk by safekeeper.
@@ -190,7 +190,7 @@ or we do not support them in zenith yet (pg_commit_ts).
 Tenant represents a single customer, interacting with Zenith.
 Wal redo[] activity, timelines[], layers[] are managed for each tenant independently.
 One pageserver[] can serve multiple tenants at once.
-One safekeeper 
+One safekeeper

 See `docs/multitenancy.md` for more.

--- a/docs/rfcs/004-durability.md
+++ b/docs/rfcs/004-durability.md
@@ -22,7 +22,7 @@ In addition to the WAL safekeeper nodes, the WAL is archived in
 S3. WAL that has been archived to S3 can be removed from the
 safekeepers, so the safekeepers don't need a lot of disk space.

-
+```
                                +----------------+
                        +-----> | WAL safekeeper |
                        |       +----------------+
@@ -42,23 +42,23 @@ safekeepers, so the safekeepers don't need a lot of disk space.
                  \
                   \
                    \
-                     \      +--------+
-					  \		|        |
-					   +-->	|   S3   |
-							|        |
-                            +--------+
-
+                     \          +--------+
+                      \         |        |
+                       +------> |   S3   |
+                                |        |
+                                +--------+

+```
 Every WAL safekeeper holds a section of WAL, and a VCL value.
 The WAL can be divided into three portions:

-
+```
                                    VCL                   LSN
                                     |                     |
                                     V                     V
 .................ccccccccccccccccccccXXXXXXXXXXXXXXXXXXXXXXX
 Archived WAL       Completed WAL          In-flight WAL
-
+```

 Note that all this WAL kept in a safekeeper is a contiguous section.
 This is different from Aurora: In Aurora, there can be holes in the
--- a/docs/rfcs/009-snapshot-first-storage-cli.md
+++ b/docs/rfcs/009-snapshot-first-storage-cli.md
@@ -12,7 +12,7 @@ Init empty pageserver using `initdb` in temporary directory.

 `--storage_dest=FILE_PREFIX | S3_PREFIX |...` option defines object storage type, all other parameters are passed via env variables. Inspired by WAL-G style naming : https://wal-g.readthedocs.io/STORAGES/.

-Save`storage_dest` and other parameters in config. 
+Save`storage_dest` and other parameters in config.
 Push snapshots to `storage_dest` in background.

 ```
@@ -21,7 +21,7 @@ zenith start
 ```

 #### 2. Restart pageserver (manually or crash-recovery).
-Take `storage_dest` from pageserver config, start pageserver from latest snapshot in `storage_dest`. 
+Take `storage_dest` from pageserver config, start pageserver from latest snapshot in `storage_dest`.
 Push snapshots to `storage_dest` in background.

 ```
@@ -32,7 +32,7 @@ zenith start
 Start pageserver from existing snapshot.
 Path to snapshot provided via `--snapshot_path=FILE_PREFIX | S3_PREFIX | ...`
 Do not save `snapshot_path` and `snapshot_format` in config, as it is a one-time operation.
-Save`storage_dest` parameters in config. 
+Save`storage_dest` parameters in config.
 Push snapshots to `storage_dest` in background.
 ```
 //I.e. we want to start zenith on top of existing $PGDATA and use s3 as a persistent storage.
@@ -42,15 +42,15 @@ zenith start
 How to pass credentials needed for `snapshot_path`?

 #### 4. Export.
-Manually push snapshot to `snapshot_path` which differs from `storage_dest` 
+Manually push snapshot to `snapshot_path` which differs from `storage_dest`
 Optionally set `snapshot_format`, which can be plain pgdata format or zenith format.
 ```
 zenith export --snapshot_path=FILE_PREFIX --snapshot_format=pgdata
 ```

 #### Notes and questions
- walkeeper s3_offload should use same (similar) syntax for storage. How to set it in UI?
+- safekeeper s3_offload should use same (similar) syntax for storage. How to set it in UI?
 - Why do we need `zenith init` as a separate command? Can't we init everything at first start?
 - We can think of better names for all options.
 - Export to plain postgres format will be useless, if we are not 100% compatible on page level.
-I can recall at least one such difference - PD_WAL_LOGGED flag in pages.
+I can recall at least one such difference - PD_WAL_LOGGED flag in pages.
--- a/docs/rfcs/016-connection-routing.md
+++ b/docs/rfcs/016-connection-routing.md
@@ -0,0 +1,151 @@
+# Dispatching a connection
+
+For each client connection, Neon service needs to authenticate the
+connection, and route it to the right PostgreSQL instance.
+
+## Authentication
+
+There are three different ways to authenticate:
+
+- anonymous; no authentication needed
+- PostgreSQL authentication
+- github single sign-on using browser
+
+In anonymous access, the user doesn't need to perform any
+authentication at all. This can be used e.g. in interactive PostgreSQL
+documentation, allowing you to run the examples very quickly. Similar
+to sqlfiddle.com.
+
+PostgreSQL authentication works the same as always. All the different
+PostgreSQL authentication options like SCRAM, kerberos, etc. are
+available. [1]
+
+The third option is to authenticate with github single sign-on. When
+you open the connection in psql, you get a link that you open with
+your browser. Opening the link redirects you to github authentication,
+and lets the connection to proceed. This is also known as "Link auth" [2].
+
+
+## Routing the connection
+
+When a client starts a connection, it needs to be routed to the
+correct PostgreSQL instance. Routing can be done by the proxy, acting
+as a man-in-the-middle, or the connection can be routed at the network
+level based on the hostname or IP address.
+
+Either way, Neon needs to identify which PostgreSQL instance the
+connection should be routed to. If the instance is not already
+running, it needs to be started. Some connections always require a new
+PostgreSQL instance to be created, e.g. if you want to run a one-off
+query against a particular point-in-time.
+
+The PostgreSQL instance is identified by:
+- Neon account (possibly anonymous)
+- cluster (known as tenant in the storage?)
+- branch or snapshot name
+- timestamp (PITR)
+- primary or read-replica
+- one-off read replica
+- one-off writeable branch
+
+When you are using regular PostgreSQL authentication or anonymous
+access, the connection URL needs to contain all the information needed
+for the routing. With github single sign-on, the browser is involved
+and some details - the Neon account in particular - can be deduced
+from the authentication exchange.
+
+There are three methods for identifying the PostgreSQL instance:
+
+- Browser interaction (link auth)
+- Options in the connection URL and the domain name
+- A pre-defined endpoint, identified by domain name or IP address
+
+### Link Auth
+
+    postgres://<username>@start.neon.tech/<dbname>
+
+This gives you a link that you open in browser. Clicking the link
+performs github authentication, and the Neon account name is
+provided to the proxy behind the scenes. The proxy routes the
+connection to the primary PostgreSQL instance in cluster called
+"main", branch "main".
+
+Further ideas:
+- You could pre-define a different target for link auth
+  connections in the UI.
+- You could have a drop-down in the browser, allowing you to connect
+  to any cluster you want. Link Auth can be like Teleport.
+
+### Connection URL
+
+The connection URL looks like this:
+
+    postgres://<username>@<cluster-id>.db.neon.tech/<dbname>
+
+By default, this connects you to the primary PostgreSQL instance
+running on the "main" branch in the named cluster [3]. However, you can
+change that by specifying options in the connection URL. The following
+options are supported:
+
+| option name  | Description                                                                                       | Examples                                            |
+| ---          | ---                                                                                               | ---                                                 |
+| cluster      | Cluster name                                                                                      | cluster:myproject                                   |
+| branch       | Branch name                                                                                       | branch:main                                         |
+| timestamp    | Connect to an instance at given point-in-time.                                                    | timestamp:2022-04-08 timestamp:2022-04-08T11:42:16Z |
+| lsn          | Connect to an instance at given LSN                                                               | lsn:0/12FF0420                                      |
+| read-replica | Connect to a read-replica. If the parameter is 'new', a new instance is created for this session. | read-replica read-replica:new                       |
+
+For example, to read branch 'testing' as it was on Mar 31, 2022, you could
+specify a timestamp in the connection URL [4]:
+
+    postgres://alice@cluster-1234.db.neon.tech/postgres?options=branch:testing,timestamp:2022-03-31
+
+Connecting with cluster name and options can be disabled in the UI. If
+disabled, you can only connect using a pre-defined endpoint.
+
+### Pre-defined Endpoint
+
+Instead of providing the cluster name, branch, and all those options
+in the connection URL, you can define a named endpoint with the same
+options.
+
+In the UI, click "create endpoint". Fill in the details:
+
+- Cluster name
+- Branch
+- timestamp or LSN
+- is this for the primary or for a read replica
+- etc.
+
+When you click Finish, a named endpoint is created. You can now use the endpoint ID to connect:
+
+    postgres://<username>@<endpoint-id>.endpoint.neon.tech/<dbname>
+
+
+An endpoint can be assigned a static or dynamic IP address, so that
+you can connect to it with clients that don't support TLS SNI. Maybe
+bypass the proxy altogether, but that ought to be invisible to the
+user.
+
+You can limit the range of source IP addresses that are allowed to
+connect to an endpoint. An endpoint can also be exposed in an Amazon
+VPC, allowing direct connections from applications.
+
+
+# Footnotes
+
+[1] I'm not sure how feasible it is to set up configure like Kerberos
+or LDAP in a cloud environment. But in principle I think we should
+allow customers to have the full power of PostgreSQL, including all
+authentication options. However, it's up to the customer to configure
+it correctly.
+
+[2] Link is a way to both authenticate and to route the connection
+
+[3] This assumes that cluster-ids are globally unique, across all
+Neon accounts.
+
+[4] The syntax accepted in the connection URL is limited by libpq. The
+only way to pass arbitrary options to the server (or our proxy) is
+with the "options" keyword, and the options must be percent-encoded. I
+think the above would work but i haven't tested it
--- a/docs/rfcs/cluster-size-limits.md
+++ b/docs/rfcs/cluster-size-limits.md
@@ -0,0 +1,79 @@
+Cluster size limits
+==================
+
+## Summary
+
+One of the resource consumption limits for free-tier users is a cluster size limit.
+
+To enforce it, we need to calculate the timeline size and check if the limit is reached before relation create/extend operations.
+If the limit is reached, the query must fail with some meaningful error/warning.
+We may want to exempt some operations from the quota to allow users free space to fit back into the limit.
+
+The stateless compute node that performs validation is separate from the storage that calculates the usage, so we need to exchange cluster size information between those components.
+
+## Motivation
+
+Limit the maximum size of a PostgreSQL instance to limit free tier users (and other tiers in the future).
+First of all, this is needed to control our free tier production costs.
+Another reason to limit resources is risk management — we haven't (fully) tested and optimized zenith for big clusters,
+so we don't want to give users access to the functionality that we don't think is ready.
+
+## Components
+
+* pageserver - calculate the size consumed by a timeline and add it to the feedback message.
+* safekeeper - pass feedback message from pageserver to compute.
+* compute - receive feedback message, enforce size limit based on GUC `zenith.max_cluster_size`.
+* console - set and update `zenith.max_cluster_size` setting
+
+## Proposed implementation
+
+First of all, it's necessary to define timeline size.
+
+The current approach is to count all data, including SLRUs. (not including WAL)
+Here we think of it as a physical disk underneath the Postgres cluster.
+This is how the `LOGICAL_TIMELINE_SIZE` metric is implemented in the pageserver.
+
+Alternatively, we could count only relation data. As in pg_database_size().
+This approach is somewhat more user-friendly because it is the data that is really affected by the user.
+On the other hand, it puts us in a weaker position than other services, i.e., RDS.
+We will need to refactor the timeline_size counter or add another counter to implement it. 
+
+Timeline size is updated during wal digestion. It is not versioned and is valid at the last_received_lsn moment.
+Then this size should be reported to compute node.
+
+`current_timeline_size` value is included in the walreceiver's custom feedback message: `ZenithFeedback.`
+
+(PR about protocol changes https://github.com/zenithdb/zenith/pull/1037).
+
+This message is received by the safekeeper and propagated to compute node as a part of `AppendResponse`.
+
+Finally, when compute node receives the `current_timeline_size` from safekeeper (or from pageserver directly), it updates the global variable.
+
+And then every zenith_extend() operation checks if limit is reached `(current_timeline_size > zenith.max_cluster_size)` and throws `ERRCODE_DISK_FULL` error if so.
+(see Postgres error codes [https://www.postgresql.org/docs/devel/errcodes-appendix.html](https://www.postgresql.org/docs/devel/errcodes-appendix.html))
+
+TODO:
+We can allow autovacuum processes to bypass this check, simply checking `IsAutoVacuumWorkerProcess()`.
+It would be nice to allow manual VACUUM and VACUUM FULL to bypass the check, but it's uneasy to distinguish these operations at the low level.
+See issues https://github.com/neondatabase/neon/issues/1245
+https://github.com/zenithdb/zenith/issues/1445
+
+TODO:
+We should warn users if the limit is soon to be reached.
+
+### **Reliability, failure modes and corner cases**
+
+1. `current_timeline_size` is valid at the last received and digested by pageserver lsn.
+    
+    If pageserver lags behind compute node, `current_timeline_size` will lag too. This lag can be tuned using backpressure, but it is not expected to be 0 all the time.
+    
+    So transactions that happen in this lsn range may cause limit overflow. Especially operations that generate (i.e., CREATE DATABASE) or free (i.e., TRUNCATE) a lot of data pages while generating a small amount of WAL. Are there other operations like this?
+    
+    Currently, CREATE DATABASE operations are restricted in the console. So this is not an issue.
+
+
+### **Security implications**
+
+We treat compute as an untrusted component. That's why we try to isolate it with secure container runtime or a VM.
+Malicious users may change the `zenith.max_cluster_size`, so we need an extra size limit check.
+To cover this case, we also monitor the compute node size in the console.
--- a/docs/settings.md
+++ b/docs/settings.md
@@ -6,7 +6,6 @@ If there's no such file during `init` phase of the server, it creates the file i
 There's a possibility to pass an arbitrary config value to the pageserver binary as an argument: such values override
 the values in the config file, if any are specified for the same key and get into the final config during init phase.

-
 ### Config example

 ```toml
@@ -35,9 +34,9 @@ Yet, it validates the config values it can (e.g. postgres install dir) and error

 Note the `[remote_storage]` section: it's a [table](https://toml.io/en/v1.0.0#table) in TOML specification and

-* either has to be placed in the config after the table-less values such as `initial_superuser_name = 'zenith_admin'`
+- either has to be placed in the config after the table-less values such as `initial_superuser_name = 'zenith_admin'`

-* or can be placed anywhere if rewritten in identical form as [inline table](https://toml.io/en/v1.0.0#inline-table): `remote_storage = {foo = 2}`
+- or can be placed anywhere if rewritten in identical form as [inline table](https://toml.io/en/v1.0.0#inline-table): `remote_storage = {foo = 2}`

 ### Config values

@@ -57,7 +56,7 @@ but it will trigger a checkpoint operation to get it back below the
 limit.

 `checkpoint_distance` also determines how much WAL needs to be kept
-durable in the safekeeper.  The safekeeper must have capacity to hold
+durable in the safekeeper. The safekeeper must have capacity to hold
 this much WAL, with some headroom, otherwise you can get stuck in a
 situation where the safekeeper is full and stops accepting new WAL,
 but the pageserver is not flushing out and releasing the space in the
@@ -72,7 +71,11 @@ The unit is # of bytes.

 Every `compaction_period` seconds, the page server checks if
 maintenance operations, like compaction, are needed on the layer
-files.  Default is 1 s, which should be fine.
+files. Default is 1 s, which should be fine.
+
+#### compaction_target_size
+
+File sizes for L0 delta and L1 image layers. Default is 128MB.

 #### gc_horizon

@@ -85,6 +88,14 @@ away.

 Interval at which garbage collection is triggered. Default is 100 s.

+#### image_creation_threshold
+
+L0 delta layer threshold for L1 iamge layer creation. Default is 3.
+
+#### pitr_interval
+
+WAL retention duration for PITR branching. Default is 30 days.
+
 #### initial_superuser_name

 Name of the initial superuser role, passed to initdb when a new tenant
@@ -151,13 +162,12 @@ bucket_region = 'eu-north-1'
 # Optional, pageserver uses entire bucket if the prefix is not specified.
 prefix_in_bucket = '/some/prefix/'

-# Access key to connect to the bucket ("login" part of the credentials)
-access_key_id = 'SOMEKEYAAAAASADSAH*#'
-
-# Secret access key to connect to the bucket ("password" part of the credentials)
-secret_access_key = 'SOMEsEcReTsd292v'
+# S3 API query limit to avoid getting errors/throttling from AWS.
+concurrency_limit = 100
 ```

+If no IAM bucket access is used during the remote storage usage, use the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` environment variables to set the access credentials.
+
 ###### General remote storage configuration

 Pagesever allows only one remote storage configured concurrently and errors if parameters from multiple different remote configurations are used.
@@ -167,14 +177,13 @@ Besides, there are parameters common for all types of remote storage that can be

 ```toml
 [remote_storage]
-# Max number of concurrent connections to open for uploading to or downloading from the remote storage.
-max_concurrent_sync = 100
+# Max number of concurrent timeline synchronized (layers uploaded or downloaded) with the remote storage at the same time.
+max_concurrent_syncs = 50

 # Max number of errors a single task can have before it's considered failed and not attempted to run anymore.
 max_sync_errors = 10
 ```

-
 ## safekeeper

 TODO
--- a/docs/sourcetree.md
+++ b/docs/sourcetree.md
@@ -28,12 +28,7 @@ The pageserver has a few different duties:
 - Receive WAL from the WAL service and decode it.
 - Replay WAL that's applicable to the chunks that the Page Server maintains

-For more detailed info, see `/pageserver/README`
-
-`/postgres_ffi`:
-
-Utility functions for interacting with PostgreSQL file formats.
-Misc constants, copied from PostgreSQL headers.
+For more detailed info, see [/pageserver/README](/pageserver/README.md)

 `/proxy`:

@@ -57,12 +52,12 @@ PostgreSQL extension that implements storage manager API and network communicati

 PostgreSQL extension that contains functions needed for testing and debugging.

-`/walkeeper`:
+`/safekeeper`:

 The zenith WAL service that receives WAL from a primary compute nodes and streams it to the pageserver.
 It acts as a holding area and redistribution center for recently generated WAL.

-For more detailed info, see `/walkeeper/README`
+For more detailed info, see [/safekeeper/README](/safekeeper/README.md)

 `/workspace_hack`:
 The workspace_hack crate exists only to pin down some dependencies.
@@ -74,14 +69,21 @@ We use [cargo-hakari](https://crates.io/crates/cargo-hakari) for automation.
 Main entry point for the 'zenith' CLI utility.
 TODO: Doesn't it belong to control_plane?

-`/zenith_metrics`:
+`/libs`:
+Unites granular neon helper crates under the hood.

+`/libs/postgres_ffi`:
+
+Utility functions for interacting with PostgreSQL file formats.
+Misc constants, copied from PostgreSQL headers.
+
+`/libs/utils`:
+Generic helpers that are shared between other crates in this repository.
+A subject for future modularization.
+
+`/libs/metrics`:
 Helpers for exposing Prometheus metrics from the server.

-`/zenith_utils`:
-
-Helpers that are shared between other crates in this repository.
-
 ## Using Python
 Note that Debian/Ubuntu Python packages are stale, as it commonly happens,
 so manual installation of dependencies is not recommended.
--- a/libs/etcd_broker/Cargo.toml
+++ b/libs/etcd_broker/Cargo.toml
@@ -0,0 +1,17 @@
+[package]
+ name = "etcd_broker"
+ version = "0.1.0"
+ edition = "2021"
+
+ [dependencies]
+ etcd-client = "0.9.0"
+ regex = "1.4.5"
+ serde = { version = "1.0", features = ["derive"] }
+ serde_json = "1"
+ serde_with = "1.12.0"
+
+ utils = { path = "../utils" }
+ workspace_hack = { version = "0.1", path = "../../workspace_hack" }
+ tokio = "1"
+ tracing = "0.1"
+ thiserror = "1"
--- a/libs/etcd_broker/src/lib.rs
+++ b/libs/etcd_broker/src/lib.rs
@@ -0,0 +1,344 @@
+//! A set of primitives to access a shared data/updates, propagated via etcd broker (not persistent).
+//! Intended to connect services to each other, not to store their data.
+use std::{
+    collections::{hash_map, HashMap},
+    fmt::Display,
+    str::FromStr,
+};
+
+use regex::{Captures, Regex};
+use serde::{Deserialize, Serialize};
+use serde_with::{serde_as, DisplayFromStr};
+
+pub use etcd_client::*;
+
+use tokio::{sync::mpsc, task::JoinHandle};
+use tracing::*;
+use utils::{
+    lsn::Lsn,
+    zid::{ZNodeId, ZTenantId, ZTenantTimelineId},
+};
+
+#[derive(Debug, Deserialize, Serialize)]
+struct SafekeeperTimeline {
+    safekeeper_id: ZNodeId,
+    info: SkTimelineInfo,
+}
+
+/// Published data about safekeeper's timeline. Fields made optional for easy migrations.
+#[serde_as]
+#[derive(Debug, Deserialize, Serialize)]
+pub struct SkTimelineInfo {
+    /// Term of the last entry.
+    pub last_log_term: Option<u64>,
+    /// LSN of the last record.
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    #[serde(default)]
+    pub flush_lsn: Option<Lsn>,
+    /// Up to which LSN safekeeper regards its WAL as committed.
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    #[serde(default)]
+    pub commit_lsn: Option<Lsn>,
+    /// LSN up to which safekeeper offloaded WAL to s3.
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    #[serde(default)]
+    pub s3_wal_lsn: Option<Lsn>,
+    /// LSN of last checkpoint uploaded by pageserver.
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    #[serde(default)]
+    pub remote_consistent_lsn: Option<Lsn>,
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    #[serde(default)]
+    pub peer_horizon_lsn: Option<Lsn>,
+    #[serde(default)]
+    pub safekeeper_connection_string: Option<String>,
+}
+
+#[derive(Debug, thiserror::Error)]
+pub enum BrokerError {
+    #[error("Etcd client error: {0}. Context: {1}")]
+    EtcdClient(etcd_client::Error, String),
+    #[error("Error during parsing etcd data: {0}")]
+    ParsingError(String),
+    #[error("Internal error: {0}")]
+    InternalError(String),
+}
+
+/// A way to control the data retrieval from a certain subscription.
+pub struct SkTimelineSubscription {
+    safekeeper_timeline_updates:
+        mpsc::UnboundedReceiver<HashMap<ZTenantTimelineId, HashMap<ZNodeId, SkTimelineInfo>>>,
+    kind: SkTimelineSubscriptionKind,
+    watcher_handle: JoinHandle<Result<(), BrokerError>>,
+    watcher: Watcher,
+}
+
+impl SkTimelineSubscription {
+    /// Asynchronously polls for more data from the subscription, suspending the current future if there's no data sent yet.
+    pub async fn fetch_data(
+        &mut self,
+    ) -> Option<HashMap<ZTenantTimelineId, HashMap<ZNodeId, SkTimelineInfo>>> {
+        self.safekeeper_timeline_updates.recv().await
+    }
+
+    /// Cancels the subscription, stopping the data poller and waiting for it to shut down.
+    pub async fn cancel(mut self) -> Result<(), BrokerError> {
+        self.watcher.cancel().await.map_err(|e| {
+            BrokerError::EtcdClient(
+                e,
+                format!(
+                    "Failed to cancel timeline subscription, kind: {:?}",
+                    self.kind
+                ),
+            )
+        })?;
+        self.watcher_handle.await.map_err(|e| {
+            BrokerError::InternalError(format!(
+                "Failed to join the timeline updates task, kind: {:?}, error: {e}",
+                self.kind
+            ))
+        })?
+    }
+}
+
+/// The subscription kind to the timeline updates from safekeeper.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct SkTimelineSubscriptionKind {
+    broker_prefix: String,
+    kind: SubscriptionKind,
+}
+
+impl SkTimelineSubscriptionKind {
+    pub fn all(broker_prefix: String) -> Self {
+        Self {
+            broker_prefix,
+            kind: SubscriptionKind::All,
+        }
+    }
+
+    pub fn tenant(broker_prefix: String, tenant: ZTenantId) -> Self {
+        Self {
+            broker_prefix,
+            kind: SubscriptionKind::Tenant(tenant),
+        }
+    }
+
+    pub fn timeline(broker_prefix: String, timeline: ZTenantTimelineId) -> Self {
+        Self {
+            broker_prefix,
+            kind: SubscriptionKind::Timeline(timeline),
+        }
+    }
+
+    fn watch_regex(&self) -> Regex {
+        match self.kind {
+            SubscriptionKind::All => Regex::new(&format!(
+                r"^{}/([[:xdigit:]]+)/([[:xdigit:]]+)/safekeeper/([[:digit:]])$",
+                self.broker_prefix
+            ))
+            .expect("wrong regex for 'everything' subscription"),
+            SubscriptionKind::Tenant(tenant_id) => Regex::new(&format!(
+                r"^{}/{tenant_id}/([[:xdigit:]]+)/safekeeper/([[:digit:]])$",
+                self.broker_prefix
+            ))
+            .expect("wrong regex for 'tenant' subscription"),
+            SubscriptionKind::Timeline(ZTenantTimelineId {
+                tenant_id,
+                timeline_id,
+            }) => Regex::new(&format!(
+                r"^{}/{tenant_id}/{timeline_id}/safekeeper/([[:digit:]])$",
+                self.broker_prefix
+            ))
+            .expect("wrong regex for 'timeline' subscription"),
+        }
+    }
+
+    /// Etcd key to use for watching a certain timeline updates from safekeepers.
+    pub fn watch_key(&self) -> String {
+        match self.kind {
+            SubscriptionKind::All => self.broker_prefix.to_string(),
+            SubscriptionKind::Tenant(tenant_id) => {
+                format!("{}/{tenant_id}/safekeeper", self.broker_prefix)
+            }
+            SubscriptionKind::Timeline(ZTenantTimelineId {
+                tenant_id,
+                timeline_id,
+            }) => format!(
+                "{}/{tenant_id}/{timeline_id}/safekeeper",
+                self.broker_prefix
+            ),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+enum SubscriptionKind {
+    /// Get every timeline update.
+    All,
+    /// Get certain tenant timelines' updates.
+    Tenant(ZTenantId),
+    /// Get certain timeline updates.
+    Timeline(ZTenantTimelineId),
+}
+
+/// Creates a background task to poll etcd for timeline updates from safekeepers.
+/// Stops and returns `Err` on any error during etcd communication.
+/// Watches the key changes until either the watcher is cancelled via etcd or the subscription cancellation handle,
+/// exiting normally in such cases.
+pub async fn subscribe_to_safekeeper_timeline_updates(
+    client: &mut Client,
+    subscription: SkTimelineSubscriptionKind,
+) -> Result<SkTimelineSubscription, BrokerError> {
+    info!("Subscribing to timeline updates, subscription kind: {subscription:?}");
+
+    let (watcher, mut stream) = client
+        .watch(
+            subscription.watch_key(),
+            Some(WatchOptions::new().with_prefix()),
+        )
+        .await
+        .map_err(|e| {
+            BrokerError::EtcdClient(
+                e,
+                format!("Failed to init the watch for subscription {subscription:?}"),
+            )
+        })?;
+
+    let (timeline_updates_sender, safekeeper_timeline_updates) = mpsc::unbounded_channel();
+
+    let subscription_kind = subscription.kind;
+    let regex = subscription.watch_regex();
+    let watcher_handle = tokio::spawn(async move {
+        while let Some(resp) = stream.message().await.map_err(|e| BrokerError::InternalError(format!(
+            "Failed to get messages from the subscription stream, kind: {subscription_kind:?}, error: {e}"
+        )))? {
+            if resp.canceled() {
+                info!("Watch for timeline updates subscription was canceled, exiting");
+                break;
+            }
+
+            let mut timeline_updates: HashMap<ZTenantTimelineId, HashMap<ZNodeId, SkTimelineInfo>> = HashMap::new();
+            // Keep track that the timeline data updates from etcd arrive in the right order.
+            // https://etcd.io/docs/v3.5/learning/api_guarantees/#isolation-level-and-consistency-of-replicas
+            // > etcd does not ensure linearizability for watch operations. Users are expected to verify the revision of watch responses to ensure correct ordering.
+            let mut timeline_etcd_versions: HashMap<ZTenantTimelineId, i64> = HashMap::new();
+
+
+            let events = resp.events();
+            debug!("Processing {} events", events.len());
+
+            for event in events {
+                if EventType::Put == event.event_type() {
+                    if let Some(new_etcd_kv) = event.kv() {
+                        let new_kv_version = new_etcd_kv.version();
+
+                        match parse_etcd_key_value(subscription_kind, &regex, new_etcd_kv) {
+                            Ok(Some((zttid, timeline))) => {
+                                match timeline_updates
+                                    .entry(zttid)
+                                    .or_default()
+                                    .entry(timeline.safekeeper_id)
+                                {
+                                    hash_map::Entry::Occupied(mut o) => {
+                                        let old_etcd_kv_version = timeline_etcd_versions.get(&zttid).copied().unwrap_or(i64::MIN);
+                                        if old_etcd_kv_version < new_kv_version {
+                                            o.insert(timeline.info);
+                                            timeline_etcd_versions.insert(zttid,new_kv_version);
+                                        }
+                                    }
+                                    hash_map::Entry::Vacant(v) => {
+                                        v.insert(timeline.info);
+                                        timeline_etcd_versions.insert(zttid,new_kv_version);
+                                    }
+                                }
+                            }
+                            Ok(None) => {}
+                            Err(e) => error!("Failed to parse timeline update: {e}"),
+                        };
+                    }
+                }
+            }
+
+            if let Err(e) = timeline_updates_sender.send(timeline_updates) {
+                info!("Timeline updates sender got dropped, exiting: {e}");
+                break;
+            }
+        }
+
+        Ok(())
+    });
+
+    Ok(SkTimelineSubscription {
+        kind: subscription,
+        safekeeper_timeline_updates,
+        watcher_handle,
+        watcher,
+    })
+}
+
+fn parse_etcd_key_value(
+    subscription_kind: SubscriptionKind,
+    regex: &Regex,
+    kv: &KeyValue,
+) -> Result<Option<(ZTenantTimelineId, SafekeeperTimeline)>, BrokerError> {
+    let caps = if let Some(caps) = regex.captures(kv.key_str().map_err(|e| {
+        BrokerError::EtcdClient(e, format!("Failed to represent kv {kv:?} as key str"))
+    })?) {
+        caps
+    } else {
+        return Ok(None);
+    };
+
+    let (zttid, safekeeper_id) = match subscription_kind {
+        SubscriptionKind::All => (
+            ZTenantTimelineId::new(
+                parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?,
+                parse_capture(&caps, 2).map_err(BrokerError::ParsingError)?,
+            ),
+            ZNodeId(parse_capture(&caps, 3).map_err(BrokerError::ParsingError)?),
+        ),
+        SubscriptionKind::Tenant(tenant_id) => (
+            ZTenantTimelineId::new(
+                tenant_id,
+                parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?,
+            ),
+            ZNodeId(parse_capture(&caps, 2).map_err(BrokerError::ParsingError)?),
+        ),
+        SubscriptionKind::Timeline(zttid) => (
+            zttid,
+            ZNodeId(parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?),
+        ),
+    };
+
+    let info_str = kv.value_str().map_err(|e| {
+        BrokerError::EtcdClient(e, format!("Failed to represent kv {kv:?} as value str"))
+    })?;
+    Ok(Some((
+        zttid,
+        SafekeeperTimeline {
+            safekeeper_id,
+            info: serde_json::from_str(info_str).map_err(|e| {
+                BrokerError::ParsingError(format!(
+                    "Failed to parse '{info_str}' as safekeeper timeline info: {e}"
+                ))
+            })?,
+        },
+    )))
+}
+
+fn parse_capture<T>(caps: &Captures, index: usize) -> Result<T, String>
+where
+    T: FromStr,
+    <T as FromStr>::Err: Display,
+{
+    let capture_match = caps
+        .get(index)
+        .ok_or_else(|| format!("Failed to get capture match at index {index}"))?
+        .as_str();
+    capture_match.parse().map_err(|e| {
+        format!(
+            "Failed to parse {} from {capture_match}: {e}",
+            std::any::type_name::<T>()
+        )
+    })
+}
--- a/zenith_metrics/Cargo.toml
+++ b/zenith_metrics/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "zenith_metrics"
+name = "metrics"
 version = "0.1.0"
 edition = "2021"

@@ -8,4 +8,4 @@ prometheus = {version = "0.13", default_features=false} # removes protobuf depen
 libc = "0.2"
 lazy_static = "1.4"
 once_cell = "1.8.0"
-workspace_hack = { version = "0.1", path = "../workspace_hack" }
+workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/zenith_metrics/src/lib.rs
+++ b/zenith_metrics/src/lib.rs
--- a/zenith_metrics/src/wrappers.rs
+++ b/zenith_metrics/src/wrappers.rs
@@ -8,8 +8,8 @@ use std::io::{Read, Result, Write};
 ///
 /// ```
 /// # use std::io::{Result, Read};
-/// # use zenith_metrics::{register_int_counter, IntCounter};
-/// # use zenith_metrics::CountedReader;
+/// # use metrics::{register_int_counter, IntCounter};
+/// # use metrics::CountedReader;
 /// #
 /// # lazy_static::lazy_static! {
 /// #     static ref INT_COUNTER: IntCounter = register_int_counter!(
@@ -83,8 +83,8 @@ impl<T: Read> Read for CountedReader<'_, T> {
 ///
 /// ```
 /// # use std::io::{Result, Write};
-/// # use zenith_metrics::{register_int_counter, IntCounter};
-/// # use zenith_metrics::CountedWriter;
+/// # use metrics::{register_int_counter, IntCounter};
+/// # use metrics::CountedWriter;
 /// #
 /// # lazy_static::lazy_static! {
 /// #     static ref INT_COUNTER: IntCounter = register_int_counter!(
--- a/libs/postgres_ffi/Cargo.toml
+++ b/libs/postgres_ffi/Cargo.toml
@@ -17,8 +17,8 @@ log = "0.4.14"
 memoffset = "0.6.2"
 thiserror = "1.0"
 serde = { version = "1.0", features = ["derive"] }
-zenith_utils = { path = "../zenith_utils" }
-workspace_hack = { version = "0.1", path = "../workspace_hack" }
+utils = { path = "../utils" }
+workspace_hack = { version = "0.1", path = "../../workspace_hack" }

 [build-dependencies]
 bindgen = "0.59.1"
--- a/libs/postgres_ffi/README.md
+++ b/libs/postgres_ffi/README.md
--- a/libs/postgres_ffi/build.rs
+++ b/libs/postgres_ffi/build.rs
@@ -88,8 +88,8 @@ fn main() {
        // 'pg_config --includedir-server' would perhaps be the more proper way to find it,
        // but this will do for now.
        //
-        .clang_arg("-I../tmp_install/include/server")
-        .clang_arg("-I../tmp_install/include/postgresql/server")
+        .clang_arg("-I../../tmp_install/include/server")
+        .clang_arg("-I../../tmp_install/include/postgresql/server")
        //
        // Finish the builder and generate the bindings.
        //
--- a/libs/postgres_ffi/pg_control_ffi.h
+++ b/libs/postgres_ffi/pg_control_ffi.h
--- a/libs/postgres_ffi/samples/pg_hba.conf
+++ b/libs/postgres_ffi/samples/pg_hba.conf
--- a/libs/postgres_ffi/src/controlfile_utils.rs
+++ b/libs/postgres_ffi/src/controlfile_utils.rs
@@ -43,7 +43,7 @@ impl ControlFileData {
    /// Interpret a slice of bytes as a Postgres control file.
    ///
    pub fn decode(buf: &[u8]) -> Result<ControlFileData> {
-        use zenith_utils::bin_ser::LeSer;
+        use utils::bin_ser::LeSer;

        // Check that the slice has the expected size. The control file is
        // padded with zeros up to a 512 byte sector size, so accept a
@@ -77,7 +77,7 @@ impl ControlFileData {
    ///
    /// The CRC is recomputed to match the contents of the fields.
    pub fn encode(&self) -> Bytes {
-        use zenith_utils::bin_ser::LeSer;
+        use utils::bin_ser::LeSer;

        // Serialize into a new buffer.
        let b = self.ser().unwrap();
--- a/libs/postgres_ffi/src/lib.rs
+++ b/libs/postgres_ffi/src/lib.rs
@@ -8,6 +8,7 @@
 #![allow(deref_nullptr)]

 use serde::{Deserialize, Serialize};
+use utils::lsn::Lsn;

 include!(concat!(env!("OUT_DIR"), "/bindings.rs"));

@@ -37,3 +38,21 @@ pub const fn transaction_id_precedes(id1: TransactionId, id2: TransactionId) ->
    let diff = id1.wrapping_sub(id2) as i32;
    diff < 0
 }
+
+// Check if page is not yet initialized (port of Postgres PageIsInit() macro)
+pub fn page_is_new(pg: &[u8]) -> bool {
+    pg[14] == 0 && pg[15] == 0 // pg_upper == 0
+}
+
+// ExtractLSN from page header
+pub fn page_get_lsn(pg: &[u8]) -> Lsn {
+    Lsn(
+        ((u32::from_le_bytes(pg[0..4].try_into().unwrap()) as u64) << 32)
+            | u32::from_le_bytes(pg[4..8].try_into().unwrap()) as u64,
+    )
+}
+
+pub fn page_set_lsn(pg: &mut [u8], lsn: Lsn) {
+    pg[0..4].copy_from_slice(&((lsn.0 >> 32) as u32).to_le_bytes());
+    pg[4..8].copy_from_slice(&(lsn.0 as u32).to_le_bytes());
+}
--- a/libs/postgres_ffi/src/nonrelfile_utils.rs
+++ b/libs/postgres_ffi/src/nonrelfile_utils.rs
--- a/libs/postgres_ffi/src/pg_constants.rs
+++ b/libs/postgres_ffi/src/pg_constants.rs
--- a/libs/postgres_ffi/src/relfile_utils.rs
+++ b/libs/postgres_ffi/src/relfile_utils.rs
--- a/libs/postgres_ffi/src/waldecoder.rs
+++ b/libs/postgres_ffi/src/waldecoder.rs
@@ -4,7 +4,7 @@
 //! This understands the WAL page and record format, enough to figure out where the WAL record
 //! boundaries are, and to reassemble WAL records that cross page boundaries.
 //!
-//! This functionality is needed by both the pageserver and the walkeepers. The pageserver needs
+//! This functionality is needed by both the pageserver and the safekeepers. The pageserver needs
 //! to look deeper into the WAL records to also understand which blocks they modify, the code
 //! for that is in pageserver/src/walrecord.rs
 //!
@@ -18,7 +18,7 @@ use crc32c::*;
 use log::*;
 use std::cmp::min;
 use thiserror::Error;
-use zenith_utils::lsn::Lsn;
+use utils::lsn::Lsn;

 pub struct WalStreamDecoder {
    lsn: Lsn,
@@ -89,7 +89,12 @@ impl WalStreamDecoder {
                    return Ok(None);
                }

-                let hdr = XLogLongPageHeaderData::from_bytes(&mut self.inputbuf);
+                let hdr = XLogLongPageHeaderData::from_bytes(&mut self.inputbuf).map_err(|e| {
+                    WalDecodeError {
+                        msg: format!("long header deserialization failed {}", e),
+                        lsn: self.lsn,
+                    }
+                })?;

                if hdr.std.xlp_pageaddr != self.lsn.0 {
                    return Err(WalDecodeError {
@@ -106,7 +111,12 @@ impl WalStreamDecoder {
                    return Ok(None);
                }

-                let hdr = XLogPageHeaderData::from_bytes(&mut self.inputbuf);
+                let hdr = XLogPageHeaderData::from_bytes(&mut self.inputbuf).map_err(|e| {
+                    WalDecodeError {
+                        msg: format!("header deserialization failed {}", e),
+                        lsn: self.lsn,
+                    }
+                })?;

                if hdr.xlp_pageaddr != self.lsn.0 {
                    return Err(WalDecodeError {
@@ -188,7 +198,13 @@ impl WalStreamDecoder {
        }

        // We now have a record in the 'recordbuf' local variable.
-        let xlogrec = XLogRecord::from_slice(&recordbuf[0..XLOG_SIZE_OF_XLOG_RECORD]);
+        let xlogrec =
+            XLogRecord::from_slice(&recordbuf[0..XLOG_SIZE_OF_XLOG_RECORD]).map_err(|e| {
+                WalDecodeError {
+                    msg: format!("xlog record deserialization failed {}", e),
+                    lsn: self.lsn,
+                }
+            })?;

        let mut crc = 0;
        crc = crc32c_append(crc, &recordbuf[XLOG_RECORD_CRC_OFFS + 4..]);
--- a/libs/postgres_ffi/src/xlog_utils.rs
+++ b/libs/postgres_ffi/src/xlog_utils.rs
@@ -15,7 +15,7 @@ use crate::XLogPageHeaderData;
 use crate::XLogRecord;
 use crate::XLOG_PAGE_MAGIC;

-use anyhow::{bail, Result};
+use anyhow::bail;
 use byteorder::{ByteOrder, LittleEndian};
 use bytes::BytesMut;
 use bytes::{Buf, Bytes};
@@ -28,7 +28,9 @@ use std::io::prelude::*;
 use std::io::SeekFrom;
 use std::path::{Path, PathBuf};
 use std::time::SystemTime;
-use zenith_utils::lsn::Lsn;
+use utils::bin_ser::DeserializeError;
+use utils::bin_ser::SerializeError;
+use utils::lsn::Lsn;

 pub const XLOG_FNAME_LEN: usize = 24;
 pub const XLOG_BLCKSZ: usize = 8192;
@@ -118,11 +120,15 @@ pub fn normalize_lsn(lsn: Lsn, seg_sz: usize) -> Lsn {
 }

 pub fn get_current_timestamp() -> TimestampTz {
+    to_pg_timestamp(SystemTime::now())
+}
+
+pub fn to_pg_timestamp(time: SystemTime) -> TimestampTz {
    const UNIX_EPOCH_JDATE: u64 = 2440588; /* == date2j(1970, 1, 1) */
    const POSTGRES_EPOCH_JDATE: u64 = 2451545; /* == date2j(2000, 1, 1) */
    const SECS_PER_DAY: u64 = 86400;
    const USECS_PER_SEC: u64 = 1000000;
-    match SystemTime::now().duration_since(SystemTime::UNIX_EPOCH) {
+    match time.duration_since(SystemTime::UNIX_EPOCH) {
        Ok(n) => {
            ((n.as_secs() - ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY))
                * USECS_PER_SEC
@@ -140,7 +146,7 @@ fn find_end_of_wal_segment(
    tli: TimeLineID,
    wal_seg_size: usize,
    start_offset: usize, // start reading at this point
-) -> Result<u32> {
+) -> anyhow::Result<u32> {
    // step back to the beginning of the page to read it in...
    let mut offs: usize = start_offset - start_offset % XLOG_BLCKSZ;
    let mut contlen: usize = 0;
@@ -268,7 +274,7 @@ pub fn find_end_of_wal(
    wal_seg_size: usize,
    precise: bool,
    start_lsn: Lsn, // start reading WAL at this point or later
-) -> Result<(XLogRecPtr, TimeLineID)> {
+) -> anyhow::Result<(XLogRecPtr, TimeLineID)> {
    let mut high_segno: XLogSegNo = 0;
    let mut high_tli: TimeLineID = 0;
    let mut high_ispartial = false;
@@ -350,19 +356,19 @@ pub fn main() {
 }

 impl XLogRecord {
-    pub fn from_slice(buf: &[u8]) -> XLogRecord {
-        use zenith_utils::bin_ser::LeSer;
-        XLogRecord::des(buf).unwrap()
+    pub fn from_slice(buf: &[u8]) -> Result<XLogRecord, DeserializeError> {
+        use utils::bin_ser::LeSer;
+        XLogRecord::des(buf)
    }

-    pub fn from_bytes<B: Buf>(buf: &mut B) -> XLogRecord {
-        use zenith_utils::bin_ser::LeSer;
-        XLogRecord::des_from(&mut buf.reader()).unwrap()
+    pub fn from_bytes<B: Buf>(buf: &mut B) -> Result<XLogRecord, DeserializeError> {
+        use utils::bin_ser::LeSer;
+        XLogRecord::des_from(&mut buf.reader())
    }

-    pub fn encode(&self) -> Bytes {
-        use zenith_utils::bin_ser::LeSer;
-        self.ser().unwrap().into()
+    pub fn encode(&self) -> Result<Bytes, SerializeError> {
+        use utils::bin_ser::LeSer;
+        Ok(self.ser()?.into())
    }

    // Is this record an XLOG_SWITCH record? They need some special processing,
@@ -372,35 +378,35 @@ impl XLogRecord {
 }

 impl XLogPageHeaderData {
-    pub fn from_bytes<B: Buf>(buf: &mut B) -> XLogPageHeaderData {
-        use zenith_utils::bin_ser::LeSer;
-        XLogPageHeaderData::des_from(&mut buf.reader()).unwrap()
+    pub fn from_bytes<B: Buf>(buf: &mut B) -> Result<XLogPageHeaderData, DeserializeError> {
+        use utils::bin_ser::LeSer;
+        XLogPageHeaderData::des_from(&mut buf.reader())
    }
 }

 impl XLogLongPageHeaderData {
-    pub fn from_bytes<B: Buf>(buf: &mut B) -> XLogLongPageHeaderData {
-        use zenith_utils::bin_ser::LeSer;
-        XLogLongPageHeaderData::des_from(&mut buf.reader()).unwrap()
+    pub fn from_bytes<B: Buf>(buf: &mut B) -> Result<XLogLongPageHeaderData, DeserializeError> {
+        use utils::bin_ser::LeSer;
+        XLogLongPageHeaderData::des_from(&mut buf.reader())
    }

-    pub fn encode(&self) -> Bytes {
-        use zenith_utils::bin_ser::LeSer;
-        self.ser().unwrap().into()
+    pub fn encode(&self) -> Result<Bytes, SerializeError> {
+        use utils::bin_ser::LeSer;
+        self.ser().map(|b| b.into())
    }
 }

 pub const SIZEOF_CHECKPOINT: usize = std::mem::size_of::<CheckPoint>();

 impl CheckPoint {
-    pub fn encode(&self) -> Bytes {
-        use zenith_utils::bin_ser::LeSer;
-        self.ser().unwrap().into()
+    pub fn encode(&self) -> Result<Bytes, SerializeError> {
+        use utils::bin_ser::LeSer;
+        Ok(self.ser()?.into())
    }

-    pub fn decode(buf: &[u8]) -> Result<CheckPoint, anyhow::Error> {
-        use zenith_utils::bin_ser::LeSer;
-        Ok(CheckPoint::des(buf)?)
+    pub fn decode(buf: &[u8]) -> Result<CheckPoint, DeserializeError> {
+        use utils::bin_ser::LeSer;
+        CheckPoint::des(buf)
    }

    /// Update next XID based on provided new_xid and stored epoch.
@@ -438,7 +444,7 @@ impl CheckPoint {
 // Generate new, empty WAL segment.
 // We need this segment to start compute node.
 //
-pub fn generate_wal_segment(segno: u64, system_id: u64) -> Bytes {
+pub fn generate_wal_segment(segno: u64, system_id: u64) -> Result<Bytes, SerializeError> {
    let mut seg_buf = BytesMut::with_capacity(pg_constants::WAL_SEGMENT_SIZE as usize);

    let pageaddr = XLogSegNoOffsetToRecPtr(segno, 0, pg_constants::WAL_SEGMENT_SIZE);
@@ -458,12 +464,12 @@ pub fn generate_wal_segment(segno: u64, system_id: u64) -> Bytes {
        xlp_xlog_blcksz: XLOG_BLCKSZ as u32,
    };

-    let hdr_bytes = hdr.encode();
+    let hdr_bytes = hdr.encode()?;
    seg_buf.extend_from_slice(&hdr_bytes);

    //zero out the rest of the file
    seg_buf.resize(pg_constants::WAL_SEGMENT_SIZE, 0);
-    seg_buf.freeze()
+    Ok(seg_buf.freeze())
 }

 #[cfg(test)]
@@ -477,7 +483,9 @@ mod tests {
    #[test]
    pub fn test_find_end_of_wal() {
        // 1. Run initdb to generate some WAL
-        let top_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("..");
+        let top_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+            .join("..")
+            .join("..");
        let data_dir = top_path.join("test_output/test_find_end_of_wal");
        let initdb_path = top_path.join("tmp_install/bin/initdb");
        let lib_path = top_path.join("tmp_install/lib");
@@ -495,7 +503,13 @@ mod tests {
            .env("DYLD_LIBRARY_PATH", &lib_path)
            .output()
            .unwrap();
-        assert!(initdb_output.status.success());
+        assert!(
+            initdb_output.status.success(),
+            "initdb failed. Status: '{}', stdout: '{}', stderr: '{}'",
+            initdb_output.status,
+            String::from_utf8_lossy(&initdb_output.stdout),
+            String::from_utf8_lossy(&initdb_output.stderr),
+        );

        // 2. Pick WAL generated by initdb
        let wal_dir = data_dir.join("pg_wal");
--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -0,0 +1,20 @@
+[package]
+name = "remote_storage"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+anyhow = { version = "1.0", features = ["backtrace"] }
+tokio = { version = "1.17", features = ["sync", "macros", "fs", "io-util"] }
+tokio-util = { version = "0.7", features = ["io"] }
+tracing = "0.1.27"
+rusoto_core = "0.48"
+rusoto_s3 = "0.48"
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1"
+async-trait = "0.1"
+
+workspace_hack = { version = "0.1", path = "../../workspace_hack" }
+
+[dev-dependencies]
+tempfile = "3.2"
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -0,0 +1,232 @@
+//! A set of generic storage abstractions for the page server to use when backing up and restoring its state from the external storage.
+//! No other modules from this tree are supposed to be used directly by the external code.
+//!
+//! [`RemoteStorage`] trait a CRUD-like generic abstraction to use for adapting external storages with a few implementations:
+//!   * [`local_fs`] allows to use local file system as an external storage
+//!   * [`s3_bucket`] uses AWS S3 bucket as an external storage
+//!
+mod local_fs;
+mod s3_bucket;
+
+use std::{
+    borrow::Cow,
+    collections::HashMap,
+    ffi::OsStr,
+    num::{NonZeroU32, NonZeroUsize},
+    path::{Path, PathBuf},
+};
+
+use anyhow::Context;
+use tokio::io;
+use tracing::info;
+
+pub use self::{
+    local_fs::LocalFs,
+    s3_bucket::{S3Bucket, S3ObjectKey},
+};
+
+/// How many different timelines can be processed simultaneously when synchronizing layers with the remote storage.
+/// During regular work, pageserver produces one layer file per timeline checkpoint, with bursts of concurrency
+/// during start (where local and remote timelines are compared and initial sync tasks are scheduled) and timeline attach.
+/// Both cases may trigger timeline download, that might download a lot of layers. This concurrency is limited by the clients internally, if needed.
+pub const DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNCS: usize = 50;
+pub const DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS: u32 = 10;
+/// Currently, sync happens with AWS S3, that has two limits on requests per second:
+/// ~200 RPS for IAM services
+/// https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/UsingWithRDS.IAMDBAuth.html
+/// ~3500 PUT/COPY/POST/DELETE or 5500 GET/HEAD S3 requests
+/// https://aws.amazon.com/premiumsupport/knowledge-center/s3-request-limit-avoid-throttling/
+pub const DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT: usize = 100;
+
+/// Storage (potentially remote) API to manage its state.
+/// This storage tries to be unaware of any layered repository context,
+/// providing basic CRUD operations for storage files.
+#[async_trait::async_trait]
+pub trait RemoteStorage: Send + Sync {
+    /// A way to uniquely reference a file in the remote storage.
+    type RemoteObjectId;
+
+    /// Attempts to derive the storage path out of the local path, if the latter is correct.
+    fn remote_object_id(&self, local_path: &Path) -> anyhow::Result<Self::RemoteObjectId>;
+
+    /// Gets the download path of the given storage file.
+    fn local_path(&self, remote_object_id: &Self::RemoteObjectId) -> anyhow::Result<PathBuf>;
+
+    /// Lists all items the storage has right now.
+    async fn list(&self) -> anyhow::Result<Vec<Self::RemoteObjectId>>;
+
+    /// Streams the local file contents into remote into the remote storage entry.
+    async fn upload(
+        &self,
+        from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
+        // S3 PUT request requires the content length to be specified,
+        // otherwise it starts to fail with the concurrent connection count increasing.
+        from_size_bytes: usize,
+        to: &Self::RemoteObjectId,
+        metadata: Option<StorageMetadata>,
+    ) -> anyhow::Result<()>;
+
+    /// Streams the remote storage entry contents into the buffered writer given, returns the filled writer.
+    /// Returns the metadata, if any was stored with the file previously.
+    async fn download(
+        &self,
+        from: &Self::RemoteObjectId,
+        to: &mut (impl io::AsyncWrite + Unpin + Send + Sync),
+    ) -> anyhow::Result<Option<StorageMetadata>>;
+
+    /// Streams a given byte range of the remote storage entry contents into the buffered writer given, returns the filled writer.
+    /// Returns the metadata, if any was stored with the file previously.
+    async fn download_byte_range(
+        &self,
+        from: &Self::RemoteObjectId,
+        start_inclusive: u64,
+        end_exclusive: Option<u64>,
+        to: &mut (impl io::AsyncWrite + Unpin + Send + Sync),
+    ) -> anyhow::Result<Option<StorageMetadata>>;
+
+    async fn delete(&self, path: &Self::RemoteObjectId) -> anyhow::Result<()>;
+}
+
+/// TODO kb
+pub enum GenericRemoteStorage {
+    Local(LocalFs),
+    S3(S3Bucket),
+}
+
+impl GenericRemoteStorage {
+    pub fn new(
+        working_directory: PathBuf,
+        storage_config: &RemoteStorageConfig,
+    ) -> anyhow::Result<Self> {
+        match &storage_config.storage {
+            RemoteStorageKind::LocalFs(root) => {
+                info!("Using fs root '{}' as a remote storage", root.display());
+                LocalFs::new(root.clone(), working_directory).map(GenericRemoteStorage::Local)
+            }
+            RemoteStorageKind::AwsS3(s3_config) => {
+                info!("Using s3 bucket '{}' in region '{}' as a remote storage, prefix in bucket: '{:?}', bucket endpoint: '{:?}'",
+                    s3_config.bucket_name, s3_config.bucket_region, s3_config.prefix_in_bucket, s3_config.endpoint);
+                S3Bucket::new(s3_config, working_directory).map(GenericRemoteStorage::S3)
+            }
+        }
+    }
+}
+
+/// Extra set of key-value pairs that contain arbitrary metadata about the storage entry.
+/// Immutable, cannot be changed once the file is created.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct StorageMetadata(HashMap<String, String>);
+
+fn strip_path_prefix<'a>(prefix: &'a Path, path: &'a Path) -> anyhow::Result<&'a Path> {
+    if prefix == path {
+        anyhow::bail!(
+            "Prefix and the path are equal, cannot strip: '{}'",
+            prefix.display()
+        )
+    } else {
+        path.strip_prefix(prefix).with_context(|| {
+            format!(
+                "Path '{}' is not prefixed with '{}'",
+                path.display(),
+                prefix.display(),
+            )
+        })
+    }
+}
+
+/// External backup storage configuration, enough for creating a client for that storage.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct RemoteStorageConfig {
+    /// Max allowed number of concurrent sync operations between the API user and the remote storage.
+    pub max_concurrent_syncs: NonZeroUsize,
+    /// Max allowed errors before the sync task is considered failed and evicted.
+    pub max_sync_errors: NonZeroU32,
+    /// The storage connection configuration.
+    pub storage: RemoteStorageKind,
+}
+
+/// A kind of a remote storage to connect to, with its connection configuration.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum RemoteStorageKind {
+    /// Storage based on local file system.
+    /// Specify a root folder to place all stored files into.
+    LocalFs(PathBuf),
+    /// AWS S3 based storage, storing all files in the S3 bucket
+    /// specified by the config
+    AwsS3(S3Config),
+}
+
+/// AWS S3 bucket coordinates and access credentials to manage the bucket contents (read and write).
+#[derive(Clone, PartialEq, Eq)]
+pub struct S3Config {
+    /// Name of the bucket to connect to.
+    pub bucket_name: String,
+    /// The region where the bucket is located at.
+    pub bucket_region: String,
+    /// A "subfolder" in the bucket, to use the same bucket separately by multiple remote storage users at once.
+    pub prefix_in_bucket: Option<String>,
+    /// A base URL to send S3 requests to.
+    /// By default, the endpoint is derived from a region name, assuming it's
+    /// an AWS S3 region name, erroring on wrong region name.
+    /// Endpoint provides a way to support other S3 flavors and their regions.
+    ///
+    /// Example: `http://127.0.0.1:5000`
+    pub endpoint: Option<String>,
+    /// AWS S3 has various limits on its API calls, we need not to exceed those.
+    /// See [`DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT`] for more details.
+    pub concurrency_limit: NonZeroUsize,
+}
+
+impl std::fmt::Debug for S3Config {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("S3Config")
+            .field("bucket_name", &self.bucket_name)
+            .field("bucket_region", &self.bucket_region)
+            .field("prefix_in_bucket", &self.prefix_in_bucket)
+            .field("concurrency_limit", &self.concurrency_limit)
+            .finish()
+    }
+}
+
+pub fn path_with_suffix_extension(original_path: impl AsRef<Path>, suffix: &str) -> PathBuf {
+    let new_extension = match original_path
+        .as_ref()
+        .extension()
+        .map(OsStr::to_string_lossy)
+    {
+        Some(extension) => Cow::Owned(format!("{extension}.{suffix}")),
+        None => Cow::Borrowed(suffix),
+    };
+    original_path
+        .as_ref()
+        .with_extension(new_extension.as_ref())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_path_with_suffix_extension() {
+        let p = PathBuf::from("/foo/bar");
+        assert_eq!(
+            &path_with_suffix_extension(&p, "temp").to_string_lossy(),
+            "/foo/bar.temp"
+        );
+        let p = PathBuf::from("/foo/bar");
+        assert_eq!(
+            &path_with_suffix_extension(&p, "temp.temp").to_string_lossy(),
+            "/foo/bar.temp.temp"
+        );
+        let p = PathBuf::from("/foo/bar.baz");
+        assert_eq!(
+            &path_with_suffix_extension(&p, "temp.temp").to_string_lossy(),
+            "/foo/bar.baz.temp.temp"
+        );
+        let p = PathBuf::from("/foo/bar.baz");
+        assert_eq!(
+            &path_with_suffix_extension(&p, ".temp").to_string_lossy(),
+            "/foo/bar.baz..temp"
+        );
+    }
+}
--- a/pageserver/src/remote_storage/local_fs.rs
+++ b/pageserver/src/remote_storage/local_fs.rs
@@ -1,11 +1,10 @@
 //! Local filesystem acting as a remote storage.
-//! Multiple pageservers can use the same "storage" of this kind by using different storage roots.
+//! Multiple API users can use the same "storage" of this kind by using different storage roots.
 //!
-//! This storage used in pageserver tests, but can also be used in cases when a certain persistent
+//! This storage used in tests, but can also be used in cases when a certain persistent
 //! volume is mounted to the local FS.

 use std::{
-    ffi::OsString,
    future::Future,
    path::{Path, PathBuf},
    pin::Pin,
@@ -18,16 +17,18 @@ use tokio::{
 };
 use tracing::*;

-use super::{strip_path_prefix, RemoteStorage};
+use crate::path_with_suffix_extension;
+
+use super::{strip_path_prefix, RemoteStorage, StorageMetadata};

 pub struct LocalFs {
-    pageserver_workdir: &'static Path,
-    root: PathBuf,
+    working_directory: PathBuf,
+    storage_root: PathBuf,
 }

 impl LocalFs {
    /// Attempts to create local FS storage, along with its root directory.
-    pub fn new(root: PathBuf, pageserver_workdir: &'static Path) -> anyhow::Result<Self> {
+    pub fn new(root: PathBuf, working_directory: PathBuf) -> anyhow::Result<Self> {
        if !root.exists() {
            std::fs::create_dir_all(&root).with_context(|| {
                format!(
@@ -37,15 +38,15 @@ impl LocalFs {
            })?;
        }
        Ok(Self {
-            pageserver_workdir,
-            root,
+            working_directory,
+            storage_root: root,
        })
    }

    fn resolve_in_storage(&self, path: &Path) -> anyhow::Result<PathBuf> {
        if path.is_relative() {
-            Ok(self.root.join(path))
-        } else if path.starts_with(&self.root) {
+            Ok(self.storage_root.join(path))
+        } else if path.starts_with(&self.storage_root) {
            Ok(path.to_path_buf())
        } else {
            bail!(
@@ -54,46 +55,68 @@ impl LocalFs {
            )
        }
    }
+
+    async fn read_storage_metadata(
+        &self,
+        file_path: &Path,
+    ) -> anyhow::Result<Option<StorageMetadata>> {
+        let metadata_path = storage_metadata_path(file_path);
+        if metadata_path.exists() && metadata_path.is_file() {
+            let metadata_string = fs::read_to_string(&metadata_path).await.with_context(|| {
+                format!(
+                    "Failed to read metadata from the local storage at '{}'",
+                    metadata_path.display()
+                )
+            })?;
+
+            serde_json::from_str(&metadata_string)
+                .with_context(|| {
+                    format!(
+                        "Failed to deserialize metadata from the local storage at '{}'",
+                        metadata_path.display()
+                    )
+                })
+                .map(|metadata| Some(StorageMetadata(metadata)))
+        } else {
+            Ok(None)
+        }
+    }
 }

 #[async_trait::async_trait]
 impl RemoteStorage for LocalFs {
-    type StoragePath = PathBuf;
+    type RemoteObjectId = PathBuf;

-    fn storage_path(&self, local_path: &Path) -> anyhow::Result<Self::StoragePath> {
-        Ok(self.root.join(
-            strip_path_prefix(self.pageserver_workdir, local_path)
+    fn remote_object_id(&self, local_path: &Path) -> anyhow::Result<Self::RemoteObjectId> {
+        Ok(self.storage_root.join(
+            strip_path_prefix(&self.working_directory, local_path)
                .context("local path does not belong to this storage")?,
        ))
    }

-    fn local_path(&self, storage_path: &Self::StoragePath) -> anyhow::Result<PathBuf> {
-        let relative_path = strip_path_prefix(&self.root, storage_path)
+    fn local_path(&self, storage_path: &Self::RemoteObjectId) -> anyhow::Result<PathBuf> {
+        let relative_path = strip_path_prefix(&self.storage_root, storage_path)
            .context("local path does not belong to this storage")?;
-        Ok(self.pageserver_workdir.join(relative_path))
+        Ok(self.working_directory.join(relative_path))
    }

-    async fn list(&self) -> anyhow::Result<Vec<Self::StoragePath>> {
-        get_all_files(&self.root).await
+    async fn list(&self) -> anyhow::Result<Vec<Self::RemoteObjectId>> {
+        get_all_files(&self.storage_root).await
    }

    async fn upload(
        &self,
-        mut from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
-        to: &Self::StoragePath,
+        from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
+        from_size_bytes: usize,
+        to: &Self::RemoteObjectId,
+        metadata: Option<StorageMetadata>,
    ) -> anyhow::Result<()> {
        let target_file_path = self.resolve_in_storage(to)?;
        create_target_directory(&target_file_path).await?;
        // We need this dance with sort of durable rename (without fsyncs)
        // to prevent partial uploads. This was really hit when pageserver shutdown
        // cancelled the upload and partial file was left on the fs
-        let mut temp_extension = target_file_path
-            .extension()
-            .unwrap_or_default()
-            .to_os_string();
-
-        temp_extension.push(OsString::from(".temp"));
-        let temp_file_path = target_file_path.with_extension(temp_extension);
+        let temp_file_path = path_with_suffix_extension(&target_file_path, "temp");
        let mut destination = io::BufWriter::new(
            fs::OpenOptions::new()
                .write(true)
@@ -108,7 +131,11 @@ impl RemoteStorage for LocalFs {
                })?,
        );

-        io::copy(&mut from, &mut destination)
+        let from_size_bytes = from_size_bytes as u64;
+        // Require to read 1 byte more than the expected to check later, that the stream and its size match.
+        let mut buffer_to_read = from.take(from_size_bytes + 1);
+
+        let bytes_read = io::copy(&mut buffer_to_read, &mut destination)
            .await
            .with_context(|| {
                format!(
@@ -117,6 +144,19 @@ impl RemoteStorage for LocalFs {
                )
            })?;

+        ensure!(
+            bytes_read == from_size_bytes,
+            "Provided stream has actual size {} fthat is smaller than the given stream size {}",
+            bytes_read,
+            from_size_bytes
+        );
+
+        ensure!(
+            buffer_to_read.read(&mut [0]).await? == 0,
+            "Provided stream has bigger size than the given stream size {}",
+            from_size_bytes
+        );
+
        destination.flush().await.with_context(|| {
            format!(
                "Failed to upload (flush temp) file to the local storage at '{}'",
@@ -132,14 +172,31 @@ impl RemoteStorage for LocalFs {
                    target_file_path.display()
                )
            })?;
+
+        if let Some(storage_metadata) = metadata {
+            let storage_metadata_path = storage_metadata_path(&target_file_path);
+            fs::write(
+                &storage_metadata_path,
+                serde_json::to_string(&storage_metadata.0)
+                    .context("Failed to serialize storage metadata as json")?,
+            )
+            .await
+            .with_context(|| {
+                format!(
+                    "Failed to write metadata to the local storage at '{}'",
+                    storage_metadata_path.display()
+                )
+            })?;
+        }
+
        Ok(())
    }

    async fn download(
        &self,
-        from: &Self::StoragePath,
+        from: &Self::RemoteObjectId,
        to: &mut (impl io::AsyncWrite + Unpin + Send + Sync),
-    ) -> anyhow::Result<()> {
+    ) -> anyhow::Result<Option<StorageMetadata>> {
        let file_path = self.resolve_in_storage(from)?;

        if file_path.exists() && file_path.is_file() {
@@ -162,7 +219,8 @@ impl RemoteStorage for LocalFs {
                )
            })?;
            source.flush().await?;
-            Ok(())
+
+            self.read_storage_metadata(&file_path).await
        } else {
            bail!(
                "File '{}' either does not exist or is not a file",
@@ -171,13 +229,13 @@ impl RemoteStorage for LocalFs {
        }
    }

-    async fn download_range(
+    async fn download_byte_range(
        &self,
-        from: &Self::StoragePath,
+        from: &Self::RemoteObjectId,
        start_inclusive: u64,
        end_exclusive: Option<u64>,
        to: &mut (impl io::AsyncWrite + Unpin + Send + Sync),
-    ) -> anyhow::Result<()> {
+    ) -> anyhow::Result<Option<StorageMetadata>> {
        if let Some(end_exclusive) = end_exclusive {
            ensure!(
                end_exclusive > start_inclusive,
@@ -186,7 +244,7 @@ impl RemoteStorage for LocalFs {
                end_exclusive
            );
            if start_inclusive == end_exclusive.saturating_sub(1) {
-                return Ok(());
+                return Ok(None);
            }
        }
        let file_path = self.resolve_in_storage(from)?;
@@ -220,7 +278,8 @@ impl RemoteStorage for LocalFs {
                    file_path.display()
                )
            })?;
-            Ok(())
+
+            self.read_storage_metadata(&file_path).await
        } else {
            bail!(
                "File '{}' either does not exist or is not a file",
@@ -229,7 +288,7 @@ impl RemoteStorage for LocalFs {
        }
    }

-    async fn delete(&self, path: &Self::StoragePath) -> anyhow::Result<()> {
+    async fn delete(&self, path: &Self::RemoteObjectId) -> anyhow::Result<()> {
        let file_path = self.resolve_in_storage(path)?;
        if file_path.exists() && file_path.is_file() {
            Ok(fs::remove_file(file_path).await?)
@@ -242,6 +301,10 @@ impl RemoteStorage for LocalFs {
    }
 }

+fn storage_metadata_path(original_path: &Path) -> PathBuf {
+    path_with_suffix_extension(original_path, "metadata")
+}
+
 fn get_all_files<'a, P>(
    directory_path: P,
 ) -> Pin<Box<dyn Future<Output = anyhow::Result<Vec<PathBuf>>> + Send + Sync + 'a>>
@@ -291,29 +354,30 @@ async fn create_target_directory(target_file_path: &Path) -> anyhow::Result<()>

 #[cfg(test)]
 mod pure_tests {
-    use crate::{
-        layered_repository::metadata::METADATA_FILE_NAME,
-        repository::repo_harness::{RepoHarness, TIMELINE_ID},
-    };
+    use tempfile::tempdir;

    use super::*;

    #[test]
    fn storage_path_positive() -> anyhow::Result<()> {
-        let repo_harness = RepoHarness::create("storage_path_positive")?;
+        let workdir = tempdir()?.path().to_owned();
+
        let storage_root = PathBuf::from("somewhere").join("else");
        let storage = LocalFs {
-            pageserver_workdir: &repo_harness.conf.workdir,
-            root: storage_root.clone(),
+            working_directory: workdir.clone(),
+            storage_root: storage_root.clone(),
        };

-        let local_path = repo_harness.timeline_path(&TIMELINE_ID).join("file_name");
-        let expected_path = storage_root.join(local_path.strip_prefix(&repo_harness.conf.workdir)?);
+        let local_path = workdir
+            .join("timelines")
+            .join("some_timeline")
+            .join("file_name");
+        let expected_path = storage_root.join(local_path.strip_prefix(&workdir)?);

        assert_eq!(
            expected_path,
-            storage.storage_path(&local_path).expect("Matching path should map to storage path normally"),
-            "File paths from pageserver workdir should be stored in local fs storage with the same path they have relative to the workdir"
+            storage.remote_object_id(&local_path).expect("Matching path should map to storage path normally"),
+            "File paths from workdir should be stored in local fs storage with the same path they have relative to the workdir"
        );

        Ok(())
@@ -323,7 +387,7 @@ mod pure_tests {
    fn storage_path_negatives() -> anyhow::Result<()> {
        #[track_caller]
        fn storage_path_error(storage: &LocalFs, mismatching_path: &Path) -> String {
-            match storage.storage_path(mismatching_path) {
+            match storage.remote_object_id(mismatching_path) {
                Ok(wrong_path) => panic!(
                    "Expected path '{}' to error, but got storage path: {:?}",
                    mismatching_path.display(),
@@ -333,16 +397,16 @@ mod pure_tests {
            }
        }

-        let repo_harness = RepoHarness::create("storage_path_negatives")?;
+        let workdir = tempdir()?.path().to_owned();
        let storage_root = PathBuf::from("somewhere").join("else");
        let storage = LocalFs {
-            pageserver_workdir: &repo_harness.conf.workdir,
-            root: storage_root,
+            working_directory: workdir.clone(),
+            storage_root,
        };

-        let error_string = storage_path_error(&storage, &repo_harness.conf.workdir);
+        let error_string = storage_path_error(&storage, &workdir);
        assert!(error_string.contains("does not belong to this storage"));
-        assert!(error_string.contains(repo_harness.conf.workdir.to_str().unwrap()));
+        assert!(error_string.contains(workdir.to_str().unwrap()));

        let mismatching_path_str = "/something/else";
        let error_message = storage_path_error(&storage, Path::new(mismatching_path_str));
@@ -351,7 +415,7 @@ mod pure_tests {
            "Error should mention wrong path"
        );
        assert!(
-            error_message.contains(repo_harness.conf.workdir.to_str().unwrap()),
+            error_message.contains(workdir.to_str().unwrap()),
            "Error should mention server workdir"
        );
        assert!(error_message.contains("does not belong to this storage"));
@@ -361,29 +425,28 @@ mod pure_tests {

    #[test]
    fn local_path_positive() -> anyhow::Result<()> {
-        let repo_harness = RepoHarness::create("local_path_positive")?;
+        let workdir = tempdir()?.path().to_owned();
        let storage_root = PathBuf::from("somewhere").join("else");
        let storage = LocalFs {
-            pageserver_workdir: &repo_harness.conf.workdir,
-            root: storage_root.clone(),
+            working_directory: workdir.clone(),
+            storage_root: storage_root.clone(),
        };

        let name = "not a metadata";
-        let local_path = repo_harness.timeline_path(&TIMELINE_ID).join(name);
+        let local_path = workdir.join("timelines").join("some_timeline").join(name);
        assert_eq!(
            local_path,
            storage
-                .local_path(
-                    &storage_root.join(local_path.strip_prefix(&repo_harness.conf.workdir)?)
-                )
+                .local_path(&storage_root.join(local_path.strip_prefix(&workdir)?))
                .expect("For a valid input, valid local path should be parsed"),
            "Should be able to parse metadata out of the correctly named remote delta file"
        );

-        let local_metadata_path = repo_harness
-            .timeline_path(&TIMELINE_ID)
-            .join(METADATA_FILE_NAME);
-        let remote_metadata_path = storage.storage_path(&local_metadata_path)?;
+        let local_metadata_path = workdir
+            .join("timelines")
+            .join("some_timeline")
+            .join("metadata");
+        let remote_metadata_path = storage.remote_object_id(&local_metadata_path)?;
        assert_eq!(
            local_metadata_path,
            storage
@@ -409,11 +472,10 @@ mod pure_tests {
            }
        }

-        let repo_harness = RepoHarness::create("local_path_negatives")?;
        let storage_root = PathBuf::from("somewhere").join("else");
        let storage = LocalFs {
-            pageserver_workdir: &repo_harness.conf.workdir,
-            root: storage_root,
+            working_directory: tempdir()?.path().to_owned(),
+            storage_root,
        };

        let totally_wrong_path = "wrong_wrong_wrong";
@@ -425,16 +487,19 @@ mod pure_tests {

    #[test]
    fn download_destination_matches_original_path() -> anyhow::Result<()> {
-        let repo_harness = RepoHarness::create("download_destination_matches_original_path")?;
-        let original_path = repo_harness.timeline_path(&TIMELINE_ID).join("some name");
+        let workdir = tempdir()?.path().to_owned();
+        let original_path = workdir
+            .join("timelines")
+            .join("some_timeline")
+            .join("some name");

        let storage_root = PathBuf::from("somewhere").join("else");
        let dummy_storage = LocalFs {
-            pageserver_workdir: &repo_harness.conf.workdir,
-            root: storage_root,
+            working_directory: workdir,
+            storage_root,
        };

-        let storage_path = dummy_storage.storage_path(&original_path)?;
+        let storage_path = dummy_storage.remote_object_id(&original_path)?;
        let download_destination = dummy_storage.local_path(&storage_path)?;

        assert_eq!(
@@ -449,23 +514,22 @@ mod pure_tests {
 #[cfg(test)]
 mod fs_tests {
    use super::*;
-    use crate::repository::repo_harness::{RepoHarness, TIMELINE_ID};

-    use std::io::Write;
+    use std::{collections::HashMap, io::Write};
    use tempfile::tempdir;

    #[tokio::test]
    async fn upload_file() -> anyhow::Result<()> {
-        let repo_harness = RepoHarness::create("upload_file")?;
+        let workdir = tempdir()?.path().to_owned();
        let storage = create_storage()?;

-        let source = create_file_for_upload(
-            &storage.pageserver_workdir.join("whatever"),
+        let (file, size) = create_file_for_upload(
+            &storage.working_directory.join("whatever"),
            "whatever_contents",
        )
        .await?;
        let target_path = PathBuf::from("/").join("somewhere").join("else");
-        match storage.upload(source, &target_path).await {
+        match storage.upload(file, size, &target_path, None).await {
            Ok(()) => panic!("Should not allow storing files with wrong target path"),
            Err(e) => {
                let message = format!("{:?}", e);
@@ -475,14 +539,14 @@ mod fs_tests {
        }
        assert!(storage.list().await?.is_empty());

-        let target_path_1 = upload_dummy_file(&repo_harness, &storage, "upload_1").await?;
+        let target_path_1 = upload_dummy_file(&workdir, &storage, "upload_1", None).await?;
        assert_eq!(
            storage.list().await?,
            vec![target_path_1.clone()],
            "Should list a single file after first upload"
        );

-        let target_path_2 = upload_dummy_file(&repo_harness, &storage, "upload_2").await?;
+        let target_path_2 = upload_dummy_file(&workdir, &storage, "upload_2", None).await?;
        assert_eq!(
            list_files_sorted(&storage).await?,
            vec![target_path_1.clone(), target_path_2.clone()],
@@ -493,22 +557,25 @@ mod fs_tests {
    }

    fn create_storage() -> anyhow::Result<LocalFs> {
-        let pageserver_workdir = Box::leak(Box::new(tempdir()?.path().to_owned()));
-        let storage = LocalFs::new(tempdir()?.path().to_owned(), pageserver_workdir)?;
-        Ok(storage)
+        LocalFs::new(tempdir()?.path().to_owned(), tempdir()?.path().to_owned())
    }

    #[tokio::test]
    async fn download_file() -> anyhow::Result<()> {
-        let repo_harness = RepoHarness::create("download_file")?;
+        let workdir = tempdir()?.path().to_owned();
+
        let storage = create_storage()?;
        let upload_name = "upload_1";
-        let upload_target = upload_dummy_file(&repo_harness, &storage, upload_name).await?;
+        let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;

        let mut content_bytes = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
-        storage.download(&upload_target, &mut content_bytes).await?;
-        content_bytes.flush().await?;
+        let metadata = storage.download(&upload_target, &mut content_bytes).await?;
+        assert!(
+            metadata.is_none(),
+            "No metadata should be returned for no metadata upload"
+        );

+        content_bytes.flush().await?;
        let contents = String::from_utf8(content_bytes.into_inner().into_inner())?;
        assert_eq!(
            dummy_contents(upload_name),
@@ -530,15 +597,20 @@ mod fs_tests {

    #[tokio::test]
    async fn download_file_range_positive() -> anyhow::Result<()> {
-        let repo_harness = RepoHarness::create("download_file_range_positive")?;
+        let workdir = tempdir()?.path().to_owned();
+
        let storage = create_storage()?;
        let upload_name = "upload_1";
-        let upload_target = upload_dummy_file(&repo_harness, &storage, upload_name).await?;
+        let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;

        let mut full_range_bytes = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
-        storage
-            .download_range(&upload_target, 0, None, &mut full_range_bytes)
+        let metadata = storage
+            .download_byte_range(&upload_target, 0, None, &mut full_range_bytes)
            .await?;
+        assert!(
+            metadata.is_none(),
+            "No metadata should be returned for no metadata upload"
+        );
        full_range_bytes.flush().await?;
        assert_eq!(
            dummy_contents(upload_name),
@@ -548,14 +620,18 @@ mod fs_tests {

        let mut zero_range_bytes = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
        let same_byte = 1_000_000_000;
-        storage
-            .download_range(
+        let metadata = storage
+            .download_byte_range(
                &upload_target,
                same_byte,
                Some(same_byte + 1), // exclusive end
                &mut zero_range_bytes,
            )
            .await?;
+        assert!(
+            metadata.is_none(),
+            "No metadata should be returned for no metadata upload"
+        );
        zero_range_bytes.flush().await?;
        assert!(
            zero_range_bytes.into_inner().into_inner().is_empty(),
@@ -566,14 +642,19 @@ mod fs_tests {
        let (first_part_local, second_part_local) = uploaded_bytes.split_at(3);

        let mut first_part_remote = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
-        storage
-            .download_range(
+        let metadata = storage
+            .download_byte_range(
                &upload_target,
                0,
                Some(first_part_local.len() as u64),
                &mut first_part_remote,
            )
            .await?;
+        assert!(
+            metadata.is_none(),
+            "No metadata should be returned for no metadata upload"
+        );
+
        first_part_remote.flush().await?;
        let first_part_remote = first_part_remote.into_inner().into_inner();
        assert_eq!(
@@ -583,14 +664,19 @@ mod fs_tests {
        );

        let mut second_part_remote = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
-        storage
-            .download_range(
+        let metadata = storage
+            .download_byte_range(
                &upload_target,
                first_part_local.len() as u64,
                Some((first_part_local.len() + second_part_local.len()) as u64),
                &mut second_part_remote,
            )
            .await?;
+        assert!(
+            metadata.is_none(),
+            "No metadata should be returned for no metadata upload"
+        );
+
        second_part_remote.flush().await?;
        let second_part_remote = second_part_remote.into_inner().into_inner();
        assert_eq!(
@@ -604,16 +690,17 @@ mod fs_tests {

    #[tokio::test]
    async fn download_file_range_negative() -> anyhow::Result<()> {
-        let repo_harness = RepoHarness::create("download_file_range_negative")?;
+        let workdir = tempdir()?.path().to_owned();
+
        let storage = create_storage()?;
        let upload_name = "upload_1";
-        let upload_target = upload_dummy_file(&repo_harness, &storage, upload_name).await?;
+        let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;

        let start = 10000;
        let end = 234;
        assert!(start > end, "Should test an incorrect range");
        match storage
-            .download_range(&upload_target, start, Some(end), &mut io::sink())
+            .download_byte_range(&upload_target, start, Some(end), &mut io::sink())
            .await
        {
            Ok(_) => panic!("Should not allow downloading wrong ranges"),
@@ -627,7 +714,7 @@ mod fs_tests {

        let non_existing_path = PathBuf::from("somewhere").join("else");
        match storage
-            .download_range(&non_existing_path, 1, Some(3), &mut io::sink())
+            .download_byte_range(&non_existing_path, 1, Some(3), &mut io::sink())
            .await
        {
            Ok(_) => panic!("Should not allow downloading non-existing storage file ranges"),
@@ -642,10 +729,11 @@ mod fs_tests {

    #[tokio::test]
    async fn delete_file() -> anyhow::Result<()> {
-        let repo_harness = RepoHarness::create("delete_file")?;
+        let workdir = tempdir()?.path().to_owned();
+
        let storage = create_storage()?;
        let upload_name = "upload_1";
-        let upload_target = upload_dummy_file(&repo_harness, &storage, upload_name).await?;
+        let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;

        storage.delete(&upload_target).await?;
        assert!(storage.list().await?.is_empty());
@@ -661,31 +749,85 @@ mod fs_tests {
        Ok(())
    }

-    async fn upload_dummy_file(
-        harness: &RepoHarness<'_>,
-        storage: &LocalFs,
-        name: &str,
-    ) -> anyhow::Result<PathBuf> {
-        let timeline_path = harness.timeline_path(&TIMELINE_ID);
-        let relative_timeline_path = timeline_path.strip_prefix(&harness.conf.workdir)?;
-        let storage_path = storage.root.join(relative_timeline_path).join(name);
-        storage
-            .upload(
-                create_file_for_upload(
-                    &storage.pageserver_workdir.join(name),
-                    &dummy_contents(name),
-                )
-                .await?,
-                &storage_path,
+    #[tokio::test]
+    async fn file_with_metadata() -> anyhow::Result<()> {
+        let workdir = tempdir()?.path().to_owned();
+
+        let storage = create_storage()?;
+        let upload_name = "upload_1";
+        let metadata = StorageMetadata(HashMap::from([
+            ("one".to_string(), "1".to_string()),
+            ("two".to_string(), "2".to_string()),
+        ]));
+        let upload_target =
+            upload_dummy_file(&workdir, &storage, upload_name, Some(metadata.clone())).await?;
+
+        let mut content_bytes = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
+        let full_download_metadata = storage.download(&upload_target, &mut content_bytes).await?;
+
+        content_bytes.flush().await?;
+        let contents = String::from_utf8(content_bytes.into_inner().into_inner())?;
+        assert_eq!(
+            dummy_contents(upload_name),
+            contents,
+            "We should upload and download the same contents"
+        );
+
+        assert_eq!(
+            full_download_metadata.as_ref(),
+            Some(&metadata),
+            "We should get the same metadata back for full download"
+        );
+
+        let uploaded_bytes = dummy_contents(upload_name).into_bytes();
+        let (first_part_local, _) = uploaded_bytes.split_at(3);
+
+        let mut first_part_remote = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
+        let partial_download_metadata = storage
+            .download_byte_range(
+                &upload_target,
+                0,
+                Some(first_part_local.len() as u64),
+                &mut first_part_remote,
            )
            .await?;
+        first_part_remote.flush().await?;
+        let first_part_remote = first_part_remote.into_inner().into_inner();
+        assert_eq!(
+            first_part_local,
+            first_part_remote.as_slice(),
+            "First part bytes should be returned when requested"
+        );
+
+        assert_eq!(
+            partial_download_metadata.as_ref(),
+            Some(&metadata),
+            "We should get the same metadata back for partial download"
+        );
+
+        Ok(())
+    }
+
+    async fn upload_dummy_file(
+        workdir: &Path,
+        storage: &LocalFs,
+        name: &str,
+        metadata: Option<StorageMetadata>,
+    ) -> anyhow::Result<PathBuf> {
+        let timeline_path = workdir.join("timelines").join("some_timeline");
+        let relative_timeline_path = timeline_path.strip_prefix(&workdir)?;
+        let storage_path = storage.storage_root.join(relative_timeline_path).join(name);
+
+        let from_path = storage.working_directory.join(name);
+        let (file, size) = create_file_for_upload(&from_path, &dummy_contents(name)).await?;
+        storage.upload(file, size, &storage_path, metadata).await?;
        Ok(storage_path)
    }

    async fn create_file_for_upload(
        path: &Path,
        contents: &str,
-    ) -> anyhow::Result<io::BufReader<fs::File>> {
+    ) -> anyhow::Result<(io::BufReader<fs::File>, usize)> {
        std::fs::create_dir_all(path.parent().unwrap())?;
        let mut file_for_writing = std::fs::OpenOptions::new()
            .write(true)
@@ -693,8 +835,10 @@ mod fs_tests {
            .open(path)?;
        write!(file_for_writing, "{}", contents)?;
        drop(file_for_writing);
-        Ok(io::BufReader::new(
-            fs::OpenOptions::new().read(true).open(&path).await?,
+        let file_size = path.metadata()?.len() as usize;
+        Ok((
+            io::BufReader::new(fs::OpenOptions::new().read(true).open(&path).await?),
+            file_size,
        ))
    }

--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -0,0 +1,465 @@
+//! AWS S3 storage wrapper around `rusoto` library.
+//!
+//! Respects `prefix_in_bucket` property from [`S3Config`],
+//! allowing multiple api users to independently work with the same S3 bucket, if
+//! their bucket prefixes are both specified and different.
+
+use std::path::{Path, PathBuf};
+
+use anyhow::Context;
+use rusoto_core::{
+    credential::{InstanceMetadataProvider, StaticProvider},
+    HttpClient, Region,
+};
+use rusoto_s3::{
+    DeleteObjectRequest, GetObjectRequest, ListObjectsV2Request, PutObjectRequest, S3Client,
+    StreamingBody, S3,
+};
+use tokio::{io, sync::Semaphore};
+use tokio_util::io::ReaderStream;
+use tracing::debug;
+
+use crate::{strip_path_prefix, RemoteStorage, S3Config};
+
+use super::StorageMetadata;
+
+const S3_PREFIX_SEPARATOR: char = '/';
+
+#[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Hash)]
+pub struct S3ObjectKey(String);
+
+impl S3ObjectKey {
+    fn key(&self) -> &str {
+        &self.0
+    }
+
+    fn download_destination(&self, workdir: &Path, prefix_to_strip: Option<&str>) -> PathBuf {
+        let path_without_prefix = match prefix_to_strip {
+            Some(prefix) => self.0.strip_prefix(prefix).unwrap_or_else(|| {
+                panic!(
+                    "Could not strip prefix '{}' from S3 object key '{}'",
+                    prefix, self.0
+                )
+            }),
+            None => &self.0,
+        };
+
+        workdir.join(
+            path_without_prefix
+                .split(S3_PREFIX_SEPARATOR)
+                .collect::<PathBuf>(),
+        )
+    }
+}
+
+/// AWS S3 storage.
+pub struct S3Bucket {
+    workdir: PathBuf,
+    client: S3Client,
+    bucket_name: String,
+    prefix_in_bucket: Option<String>,
+    // Every request to S3 can be throttled or cancelled, if a certain number of requests per second is exceeded.
+    // Same goes to IAM, which is queried before every S3 request, if enabled. IAM has even lower RPS threshold.
+    // The helps to ensure we don't exceed the thresholds.
+    concurrency_limiter: Semaphore,
+}
+
+impl S3Bucket {
+    /// Creates the S3 storage, errors if incorrect AWS S3 configuration provided.
+    pub fn new(aws_config: &S3Config, workdir: PathBuf) -> anyhow::Result<Self> {
+        debug!(
+            "Creating s3 remote storage for S3 bucket {}",
+            aws_config.bucket_name
+        );
+        let region = match aws_config.endpoint.clone() {
+            Some(custom_endpoint) => Region::Custom {
+                name: aws_config.bucket_region.clone(),
+                endpoint: custom_endpoint,
+            },
+            None => aws_config
+                .bucket_region
+                .parse::<Region>()
+                .context("Failed to parse the s3 region from config")?,
+        };
+        let request_dispatcher = HttpClient::new().context("Failed to create S3 http client")?;
+
+        let access_key_id = std::env::var("AWS_ACCESS_KEY_ID").ok();
+        let secret_access_key = std::env::var("AWS_SECRET_ACCESS_KEY").ok();
+
+        let client = if access_key_id.is_none() && secret_access_key.is_none() {
+            debug!("Using IAM-based AWS access");
+            S3Client::new_with(request_dispatcher, InstanceMetadataProvider::new(), region)
+        } else {
+            debug!("Using credentials-based AWS access");
+            S3Client::new_with(
+                request_dispatcher,
+                StaticProvider::new_minimal(
+                    access_key_id.unwrap_or_default(),
+                    secret_access_key.unwrap_or_default(),
+                ),
+                region,
+            )
+        };
+
+        let prefix_in_bucket = aws_config.prefix_in_bucket.as_deref().map(|prefix| {
+            let mut prefix = prefix;
+            while prefix.starts_with(S3_PREFIX_SEPARATOR) {
+                prefix = &prefix[1..]
+            }
+
+            let mut prefix = prefix.to_string();
+            while prefix.ends_with(S3_PREFIX_SEPARATOR) {
+                prefix.pop();
+            }
+            prefix
+        });
+
+        Ok(Self {
+            client,
+            workdir,
+            bucket_name: aws_config.bucket_name.clone(),
+            prefix_in_bucket,
+            concurrency_limiter: Semaphore::new(aws_config.concurrency_limit.get()),
+        })
+    }
+}
+
+#[async_trait::async_trait]
+impl RemoteStorage for S3Bucket {
+    type RemoteObjectId = S3ObjectKey;
+
+    fn remote_object_id(&self, local_path: &Path) -> anyhow::Result<Self::RemoteObjectId> {
+        let relative_path = strip_path_prefix(&self.workdir, local_path)?;
+        let mut key = self.prefix_in_bucket.clone().unwrap_or_default();
+        for segment in relative_path {
+            key.push(S3_PREFIX_SEPARATOR);
+            key.push_str(&segment.to_string_lossy());
+        }
+        Ok(S3ObjectKey(key))
+    }
+
+    fn local_path(&self, storage_path: &Self::RemoteObjectId) -> anyhow::Result<PathBuf> {
+        Ok(storage_path.download_destination(&self.workdir, self.prefix_in_bucket.as_deref()))
+    }
+
+    async fn list(&self) -> anyhow::Result<Vec<Self::RemoteObjectId>> {
+        let mut document_keys = Vec::new();
+
+        let mut continuation_token = None;
+        loop {
+            let _guard = self
+                .concurrency_limiter
+                .acquire()
+                .await
+                .context("Concurrency limiter semaphore got closed during S3 list")?;
+            let fetch_response = self
+                .client
+                .list_objects_v2(ListObjectsV2Request {
+                    bucket: self.bucket_name.clone(),
+                    prefix: self.prefix_in_bucket.clone(),
+                    continuation_token,
+                    ..ListObjectsV2Request::default()
+                })
+                .await?;
+            document_keys.extend(
+                fetch_response
+                    .contents
+                    .unwrap_or_default()
+                    .into_iter()
+                    .filter_map(|o| Some(S3ObjectKey(o.key?))),
+            );
+
+            match fetch_response.continuation_token {
+                Some(new_token) => continuation_token = Some(new_token),
+                None => break,
+            }
+        }
+
+        Ok(document_keys)
+    }
+
+    async fn upload(
+        &self,
+        from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
+        from_size_bytes: usize,
+        to: &Self::RemoteObjectId,
+        metadata: Option<StorageMetadata>,
+    ) -> anyhow::Result<()> {
+        let _guard = self
+            .concurrency_limiter
+            .acquire()
+            .await
+            .context("Concurrency limiter semaphore got closed during S3 upload")?;
+        self.client
+            .put_object(PutObjectRequest {
+                body: Some(StreamingBody::new_with_size(
+                    ReaderStream::new(from),
+                    from_size_bytes,
+                )),
+                bucket: self.bucket_name.clone(),
+                key: to.key().to_owned(),
+                metadata: metadata.map(|m| m.0),
+                ..PutObjectRequest::default()
+            })
+            .await?;
+        Ok(())
+    }
+
+    async fn download(
+        &self,
+        from: &Self::RemoteObjectId,
+        to: &mut (impl io::AsyncWrite + Unpin + Send + Sync),
+    ) -> anyhow::Result<Option<StorageMetadata>> {
+        let _guard = self
+            .concurrency_limiter
+            .acquire()
+            .await
+            .context("Concurrency limiter semaphore got closed during S3 download")?;
+        let object_output = self
+            .client
+            .get_object(GetObjectRequest {
+                bucket: self.bucket_name.clone(),
+                key: from.key().to_owned(),
+                ..GetObjectRequest::default()
+            })
+            .await?;
+
+        if let Some(body) = object_output.body {
+            let mut from = io::BufReader::new(body.into_async_read());
+            io::copy(&mut from, to).await?;
+        }
+
+        Ok(object_output.metadata.map(StorageMetadata))
+    }
+
+    async fn download_byte_range(
+        &self,
+        from: &Self::RemoteObjectId,
+        start_inclusive: u64,
+        end_exclusive: Option<u64>,
+        to: &mut (impl io::AsyncWrite + Unpin + Send + Sync),
+    ) -> anyhow::Result<Option<StorageMetadata>> {
+        // S3 accepts ranges as https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
+        // and needs both ends to be exclusive
+        let end_inclusive = end_exclusive.map(|end| end.saturating_sub(1));
+        let range = Some(match end_inclusive {
+            Some(end_inclusive) => format!("bytes={}-{}", start_inclusive, end_inclusive),
+            None => format!("bytes={}-", start_inclusive),
+        });
+        let _guard = self
+            .concurrency_limiter
+            .acquire()
+            .await
+            .context("Concurrency limiter semaphore got closed during S3 range download")?;
+        let object_output = self
+            .client
+            .get_object(GetObjectRequest {
+                bucket: self.bucket_name.clone(),
+                key: from.key().to_owned(),
+                range,
+                ..GetObjectRequest::default()
+            })
+            .await?;
+
+        if let Some(body) = object_output.body {
+            let mut from = io::BufReader::new(body.into_async_read());
+            io::copy(&mut from, to).await?;
+        }
+
+        Ok(object_output.metadata.map(StorageMetadata))
+    }
+
+    async fn delete(&self, path: &Self::RemoteObjectId) -> anyhow::Result<()> {
+        let _guard = self
+            .concurrency_limiter
+            .acquire()
+            .await
+            .context("Concurrency limiter semaphore got closed during S3 delete")?;
+        self.client
+            .delete_object(DeleteObjectRequest {
+                bucket: self.bucket_name.clone(),
+                key: path.key().to_owned(),
+                ..DeleteObjectRequest::default()
+            })
+            .await?;
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use tempfile::tempdir;
+
+    use super::*;
+
+    #[test]
+    fn download_destination() -> anyhow::Result<()> {
+        let workdir = tempdir()?.path().to_owned();
+        let local_path = workdir.join("one").join("two").join("test_name");
+        let relative_path = local_path.strip_prefix(&workdir)?;
+
+        let key = S3ObjectKey(format!(
+            "{}{}",
+            S3_PREFIX_SEPARATOR,
+            relative_path
+                .iter()
+                .map(|segment| segment.to_str().unwrap())
+                .collect::<Vec<_>>()
+                .join(&S3_PREFIX_SEPARATOR.to_string()),
+        ));
+
+        assert_eq!(
+            local_path,
+            key.download_destination(&workdir, None),
+            "Download destination should consist of s3 path joined with the workdir prefix"
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn storage_path_positive() -> anyhow::Result<()> {
+        let workdir = tempdir()?.path().to_owned();
+
+        let segment_1 = "matching";
+        let segment_2 = "file";
+        let local_path = &workdir.join(segment_1).join(segment_2);
+
+        let storage = dummy_storage(workdir);
+
+        let expected_key = S3ObjectKey(format!(
+            "{}{S3_PREFIX_SEPARATOR}{segment_1}{S3_PREFIX_SEPARATOR}{segment_2}",
+            storage.prefix_in_bucket.as_deref().unwrap_or_default(),
+        ));
+
+        let actual_key = storage
+            .remote_object_id(local_path)
+            .expect("Matching path should map to S3 path normally");
+        assert_eq!(
+            expected_key,
+            actual_key,
+            "S3 key from the matching path should contain all segments after the workspace prefix, separated with S3 separator"
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn storage_path_negatives() -> anyhow::Result<()> {
+        #[track_caller]
+        fn storage_path_error(storage: &S3Bucket, mismatching_path: &Path) -> String {
+            match storage.remote_object_id(mismatching_path) {
+                Ok(wrong_key) => panic!(
+                    "Expected path '{}' to error, but got S3 key: {:?}",
+                    mismatching_path.display(),
+                    wrong_key,
+                ),
+                Err(e) => e.to_string(),
+            }
+        }
+
+        let workdir = tempdir()?.path().to_owned();
+        let storage = dummy_storage(workdir.clone());
+
+        let error_message = storage_path_error(&storage, &workdir);
+        assert!(
+            error_message.contains("Prefix and the path are equal"),
+            "Message '{}' does not contain the required string",
+            error_message
+        );
+
+        let mismatching_path = PathBuf::from("somewhere").join("else");
+        let error_message = storage_path_error(&storage, &mismatching_path);
+        assert!(
+            error_message.contains(mismatching_path.to_str().unwrap()),
+            "Error should mention wrong path"
+        );
+        assert!(
+            error_message.contains(workdir.to_str().unwrap()),
+            "Error should mention server workdir"
+        );
+        assert!(
+            error_message.contains("is not prefixed with"),
+            "Message '{}' does not contain a required string",
+            error_message
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn local_path_positive() -> anyhow::Result<()> {
+        let workdir = tempdir()?.path().to_owned();
+        let storage = dummy_storage(workdir.clone());
+        let timeline_dir = workdir.join("timelines").join("test_timeline");
+        let relative_timeline_path = timeline_dir.strip_prefix(&workdir)?;
+
+        let s3_key = create_s3_key(
+            &relative_timeline_path.join("not a metadata"),
+            storage.prefix_in_bucket.as_deref(),
+        );
+        assert_eq!(
+            s3_key.download_destination(&workdir, storage.prefix_in_bucket.as_deref()),
+            storage
+                .local_path(&s3_key)
+                .expect("For a valid input, valid S3 info should be parsed"),
+            "Should be able to parse metadata out of the correctly named remote delta file"
+        );
+
+        let s3_key = create_s3_key(
+            &relative_timeline_path.join("metadata"),
+            storage.prefix_in_bucket.as_deref(),
+        );
+        assert_eq!(
+            s3_key.download_destination(&workdir, storage.prefix_in_bucket.as_deref()),
+            storage
+                .local_path(&s3_key)
+                .expect("For a valid input, valid S3 info should be parsed"),
+            "Should be able to parse metadata out of the correctly named remote metadata file"
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn download_destination_matches_original_path() -> anyhow::Result<()> {
+        let workdir = tempdir()?.path().to_owned();
+        let original_path = workdir
+            .join("timelines")
+            .join("some_timeline")
+            .join("some name");
+
+        let dummy_storage = dummy_storage(workdir);
+
+        let key = dummy_storage.remote_object_id(&original_path)?;
+        let download_destination = dummy_storage.local_path(&key)?;
+
+        assert_eq!(
+            original_path, download_destination,
+            "'original path -> storage key -> matching fs path' transformation should produce the same path as the input one for the correct path"
+        );
+
+        Ok(())
+    }
+
+    fn dummy_storage(workdir: PathBuf) -> S3Bucket {
+        S3Bucket {
+            workdir,
+            client: S3Client::new("us-east-1".parse().unwrap()),
+            bucket_name: "dummy-bucket".to_string(),
+            prefix_in_bucket: Some("dummy_prefix/".to_string()),
+            concurrency_limiter: Semaphore::new(1),
+        }
+    }
+
+    fn create_s3_key(relative_file_path: &Path, prefix: Option<&str>) -> S3ObjectKey {
+        S3ObjectKey(relative_file_path.iter().fold(
+            prefix.unwrap_or_default().to_string(),
+            |mut path_string, segment| {
+                path_string.push(S3_PREFIX_SEPARATOR);
+                path_string.push_str(segment.to_str().unwrap());
+                path_string
+            },
+        ))
+    }
+}
--- a/zenith_utils/Cargo.toml
+++ b/zenith_utils/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "zenith_utils"
+name = "utils"
 version = "0.1.0"
 edition = "2021"

@@ -10,8 +10,8 @@ bytes = "1.0.1"
 hyper = { version = "0.14.7", features = ["full"] }
 lazy_static = "1.4.0"
 pin-project-lite = "0.2.7"
-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
-postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
+postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
+postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 routerify = "3"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
@@ -22,23 +22,23 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 nix = "0.23.0"
 signal-hook = "0.3.10"
 rand = "0.8.3"
-jsonwebtoken = "7"
+jsonwebtoken = "8"
 hex = { version = "0.4.3", features = ["serde"] }
-rustls = "0.19.1"
-rustls-split = "0.2.1"
+rustls = "0.20.2"
+rustls-split = "0.3.0"
 git-version = "0.3.5"
 serde_with = "1.12.0"

-zenith_metrics = { path = "../zenith_metrics" }
-workspace_hack = { version = "0.1", path = "../workspace_hack" }
+metrics = { path = "../metrics" }
+workspace_hack = { version = "0.1", path = "../../workspace_hack" }

 [dev-dependencies]
 byteorder = "1.4.3"
 bytes = "1.0.1"
 hex-literal = "0.3"
 tempfile = "3.2"
-webpki = "0.21"
 criterion = "0.3"
+rustls-pemfile = "0.2.1"

 [[bench]]
 name = "benchmarks"
--- a/zenith_utils/benches/benchmarks.rs
+++ b/zenith_utils/benches/benchmarks.rs
@@ -1,7 +1,7 @@
 #![allow(unused)]

 use criterion::{criterion_group, criterion_main, Criterion};
-use zenith_utils::zid;
+use utils::zid;

 pub fn bench_zid_stringify(c: &mut Criterion) {
    // Can only use public methods.
--- a/libs/utils/scripts/restore_from_wal.sh
+++ b/libs/utils/scripts/restore_from_wal.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+PG_BIN=$1
+WAL_PATH=$2
+DATA_DIR=$3
+PORT=$4
+SYSID=`od -A n -j 24 -N 8 -t d8 $WAL_PATH/000000010000000000000002* | cut -c 3-`
+rm -fr $DATA_DIR
+env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U zenith_admin -D $DATA_DIR --sysid=$SYSID
+echo port=$PORT >> $DATA_DIR/postgresql.conf
+REDO_POS=0x`$PG_BIN/pg_controldata -D $DATA_DIR | fgrep "REDO location"| cut -c 42-`
+declare -i WAL_SIZE=$REDO_POS+114
+$PG_BIN/pg_ctl -D $DATA_DIR -l logfile start
+$PG_BIN/pg_ctl -D $DATA_DIR -l logfile stop -m immediate
+cp $DATA_DIR/pg_wal/000000010000000000000001 .
+cp $WAL_PATH/* $DATA_DIR/pg_wal/
+if [ -f $DATA_DIR/pg_wal/*.partial ]
+then
+	(cd $DATA_DIR/pg_wal ; for partial in \*.partial ; do mv $partial `basename $partial .partial` ; done)
+fi
+dd if=000000010000000000000001 of=$DATA_DIR/pg_wal/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc
+rm -f 000000010000000000000001
--- a/libs/utils/scripts/restore_from_wal_archive.sh
+++ b/libs/utils/scripts/restore_from_wal_archive.sh
@@ -0,0 +1,20 @@
+PG_BIN=$1
+WAL_PATH=$2
+DATA_DIR=$3
+PORT=$4
+SYSID=`od -A n -j 24 -N 8 -t d8 $WAL_PATH/000000010000000000000002* | cut -c 3-`
+rm -fr $DATA_DIR /tmp/pg_wals
+mkdir /tmp/pg_wals
+env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U zenith_admin -D $DATA_DIR --sysid=$SYSID
+echo port=$PORT >> $DATA_DIR/postgresql.conf
+REDO_POS=0x`$PG_BIN/pg_controldata -D $DATA_DIR | fgrep "REDO location"| cut -c 42-`
+declare -i WAL_SIZE=$REDO_POS+114
+cp $WAL_PATH/* /tmp/pg_wals
+if [ -f $DATA_DIR/pg_wal/*.partial ]
+then
+	(cd /tmp/pg_wals ; for partial in \*.partial ; do  mv $partial `basename $partial .partial` ; done)
+fi
+dd if=$DATA_DIR/pg_wal/000000010000000000000001 of=/tmp/pg_wals/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc
+echo > $DATA_DIR/recovery.signal
+rm -f $DATA_DIR/pg_wal/*
+echo "restore_command = 'cp /tmp/pg_wals/%f %p'" >> $DATA_DIR/postgresql.conf
--- a/zenith_utils/src/accum.rs
+++ b/zenith_utils/src/accum.rs
@@ -5,7 +5,7 @@
 /// For example, to calculate the smallest value among some integers:
 ///
 /// ```
-/// use zenith_utils::accum::Accum;
+/// use utils::accum::Accum;
 ///
 /// let values = [1, 2, 3];
 ///
--- a/zenith_utils/src/auth.rs
+++ b/zenith_utils/src/auth.rs
@@ -1,8 +1,6 @@
 // For details about authentication see docs/authentication.md
-// TODO there are two issues for our use case in jsonwebtoken library which will be resolved in next release
-// The first one is that there is no way to disable expiration claim, but it can be excluded from validation, so use this as a workaround for now.
-// Relevant issue: https://github.com/Keats/jsonwebtoken/issues/190
-// The second one is that we wanted to use ed25519 keys, but they are also not supported until next version. So we go with RSA keys for now.
+//
+// TODO: use ed25519 keys
 // Relevant issue: https://github.com/Keats/jsonwebtoken/issues/162

 use serde;
@@ -59,19 +57,19 @@ pub fn check_permission(claims: &Claims, tenantid: Option<ZTenantId>) -> Result<
 }

 pub struct JwtAuth {
-    decoding_key: DecodingKey<'static>,
+    decoding_key: DecodingKey,
    validation: Validation,
 }

 impl JwtAuth {
-    pub fn new(decoding_key: DecodingKey<'_>) -> Self {
+    pub fn new(decoding_key: DecodingKey) -> Self {
+        let mut validation = Validation::new(JWT_ALGORITHM);
+        // The default 'required_spec_claims' is 'exp'. But we don't want to require
+        // expiration.
+        validation.required_spec_claims = [].into();
        Self {
-            decoding_key: decoding_key.into_static(),
-            validation: Validation {
-                algorithms: vec![JWT_ALGORITHM],
-                validate_exp: false,
-                ..Default::default()
-            },
+            decoding_key,
+            validation,
        }
    }

--- a/zenith_utils/src/bin_ser.rs
+++ b/zenith_utils/src/bin_ser.rs
--- a/zenith_utils/src/connstring.rs
+++ b/zenith_utils/src/connstring.rs
--- a/zenith_utils/src/crashsafe_dir.rs
+++ b/zenith_utils/src/crashsafe_dir.rs
--- a/zenith_utils/src/http/endpoint.rs
+++ b/zenith_utils/src/http/endpoint.rs
@@ -5,12 +5,11 @@ use anyhow::anyhow;
 use hyper::header::AUTHORIZATION;
 use hyper::{header::CONTENT_TYPE, Body, Request, Response, Server};
 use lazy_static::lazy_static;
+use metrics::{new_common_metric_name, register_int_counter, Encoder, IntCounter, TextEncoder};
 use routerify::ext::RequestExt;
 use routerify::RequestInfo;
 use routerify::{Middleware, Router, RouterBuilder, RouterService};
 use tracing::info;
-use zenith_metrics::{new_common_metric_name, register_int_counter, IntCounter};
-use zenith_metrics::{Encoder, TextEncoder};

 use std::future::Future;
 use std::net::TcpListener;
@@ -36,7 +35,7 @@ async fn prometheus_metrics_handler(_req: Request<Body>) -> Result<Response<Body
    let mut buffer = vec![];
    let encoder = TextEncoder::new();

-    let metrics = zenith_metrics::gather();
+    let metrics = metrics::gather();
    encoder.encode(&metrics, &mut buffer).unwrap();

    let response = Response::builder()
--- a/zenith_utils/src/http/error.rs
+++ b/zenith_utils/src/http/error.rs
@@ -17,6 +17,9 @@ pub enum ApiError {
    #[error("NotFound: {0}")]
    NotFound(String),

+    #[error("Conflict: {0}")]
+    Conflict(String),
+
    #[error(transparent)]
    InternalServerError(#[from] anyhow::Error),
 }
@@ -42,6 +45,9 @@ impl ApiError {
            ApiError::NotFound(_) => {
                HttpErrorBody::response_from_msg_and_status(self.to_string(), StatusCode::NOT_FOUND)
            }
+            ApiError::Conflict(_) => {
+                HttpErrorBody::response_from_msg_and_status(self.to_string(), StatusCode::CONFLICT)
+            }
            ApiError::InternalServerError(err) => HttpErrorBody::response_from_msg_and_status(
                err.to_string(),
                StatusCode::INTERNAL_SERVER_ERROR,
--- a/zenith_utils/src/http/json.rs
+++ b/zenith_utils/src/http/json.rs
@@ -10,8 +10,8 @@ pub async fn json_request<T: for<'de> Deserialize<'de>>(
    let whole_body = hyper::body::aggregate(request.body_mut())
        .await
        .map_err(ApiError::from_err)?;
-    Ok(serde_json::from_reader(whole_body.reader())
-        .map_err(|err| ApiError::BadRequest(format!("Failed to parse json request {}", err)))?)
+    serde_json::from_reader(whole_body.reader())
+        .map_err(|err| ApiError::BadRequest(format!("Failed to parse json request {}", err)))
 }

 pub fn json_response<T: Serialize>(
--- a/zenith_utils/src/http/mod.rs
+++ b/zenith_utils/src/http/mod.rs
--- a/zenith_utils/src/http/request.rs
+++ b/zenith_utils/src/http/request.rs
@@ -1,7 +1,7 @@
 use std::str::FromStr;

 use super::error::ApiError;
-use hyper::{Body, Request};
+use hyper::{body::HttpBody, Body, Request};
 use routerify::ext::RequestExt;

 pub fn get_request_param<'a>(
@@ -31,3 +31,10 @@ pub fn parse_request_param<T: FromStr>(
        ))),
    }
 }
+
+pub async fn ensure_no_body(request: &mut Request<Body>) -> Result<(), ApiError> {
+    match request.body_mut().data().await {
+        Some(_) => Err(ApiError::BadRequest("Unexpected request body".into())),
+        None => Ok(()),
+    }
+}
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -0,0 +1,97 @@
+//! `utils` is intended to be a place to put code that is shared
+//! between other crates in this repository.
+
+#![allow(clippy::manual_range_contains)]
+
+/// `Lsn` type implements common tasks on Log Sequence Numbers
+pub mod lsn;
+/// SeqWait allows waiting for a future sequence number to arrive
+pub mod seqwait;
+
+/// append only ordered map implemented with a Vec
+pub mod vec_map;
+
+// Async version of SeqWait. Currently unused.
+// pub mod seqwait_async;
+
+pub mod bin_ser;
+pub mod postgres_backend;
+pub mod pq_proto;
+
+// dealing with connstring parsing and handy access to it's parts
+pub mod connstring;
+
+// helper functions for creating and fsyncing directories/trees
+pub mod crashsafe_dir;
+
+// common authentication routines
+pub mod auth;
+
+// utility functions and helper traits for unified unique id generation/serialization etc.
+pub mod zid;
+// http endpoint utils
+pub mod http;
+
+// socket splitting utils
+pub mod sock_split;
+
+// common log initialisation routine
+pub mod logging;
+
+// Misc
+pub mod accum;
+pub mod shutdown;
+
+// Tools for calling certain async methods in sync contexts
+pub mod sync;
+
+// Utility for binding TcpListeners with proper socket options.
+pub mod tcp_listener;
+
+// Utility for putting a raw file descriptor into non-blocking mode
+pub mod nonblock;
+
+// Default signal handling
+pub mod signals;
+
+/// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages
+///
+/// we have several cases:
+/// * building locally from git repo
+/// * building in CI from git repo
+/// * building in docker (either in CI or locally)
+///
+/// One thing to note is that .git is not available in docker (and it is bad to include it there).
+/// So everything becides docker build is covered by git_version crate, and docker uses a `GIT_VERSION` argument to get the value required.
+/// It takes variable from build process env and puts it to the rustc env. And then we can retrieve it here by using env! macro.
+/// Git version received from environment variable used as a fallback in git_version invokation.
+/// And to avoid running buildscript every recompilation, we use rerun-if-env-changed option.
+/// So the build script will be run only when GIT_VERSION envvar has changed.
+///
+/// Why not to use buildscript to get git commit sha directly without procmacro from different crate?
+/// Caching and workspaces complicates that. In case `utils` is not
+/// recompiled due to caching then version may become outdated.
+/// git_version crate handles that case by introducing a dependency on .git internals via include_bytes! macro,
+/// so if we changed the index state git_version will pick that up and rerun the macro.
+///
+/// Note that with git_version prefix is `git:` and in case of git version from env its `git-env:`.
+///
+/// #############################################################################################
+/// TODO this macro is not the way the library is intended to be used, see https://github.com/neondatabase/neon/issues/1565 for details.
+/// We use `cachepot` to reduce our current CI build times: https://github.com/neondatabase/cloud/pull/1033#issuecomment-1100935036
+/// Yet, it seems to ignore the GIT_VERSION env variable, passed to Docker build, even with build.rs that contains
+/// `println!("cargo:rerun-if-env-changed=GIT_VERSION");` code for cachepot cache invalidation.
+/// The problem needs further investigation and regular `const` declaration instead of a macro.
+#[macro_export]
+macro_rules! project_git_version {
+    ($const_identifier:ident) => {
+        const $const_identifier: &str = git_version::git_version!(
+            prefix = "git:",
+            fallback = concat!(
+                "git-env:",
+                env!("GIT_VERSION", "Missing GIT_VERSION envvar")
+            ),
+            args = ["--abbrev=40", "--always", "--dirty=-modified"] // always use full sha
+        );
+    };
+}
--- a/zenith_utils/src/logging.rs
+++ b/zenith_utils/src/logging.rs
--- a/zenith_utils/src/lsn.rs
+++ b/zenith_utils/src/lsn.rs
--- a/zenith_utils/src/nonblock.rs
+++ b/zenith_utils/src/nonblock.rs
--- a/zenith_utils/src/postgres_backend.rs
+++ b/zenith_utils/src/postgres_backend.rs
@@ -304,8 +304,8 @@ impl PostgresBackend {
    pub fn start_tls(&mut self) -> anyhow::Result<()> {
        match self.stream.take() {
            Some(Stream::Bidirectional(bidi_stream)) => {
-                let session = rustls::ServerSession::new(&self.tls_config.clone().unwrap());
-                self.stream = Some(Stream::Bidirectional(bidi_stream.start_tls(session)?));
+                let conn = rustls::ServerConnection::new(self.tls_config.clone().unwrap())?;
+                self.stream = Some(Stream::Bidirectional(bidi_stream.start_tls(conn)?));
                Ok(())
            }
            stream => {
@@ -375,9 +375,8 @@ impl PostgresBackend {
                            }
                            AuthType::MD5 => {
                                rand::thread_rng().fill(&mut self.md5_salt);
-                                let md5_salt = self.md5_salt;
                                self.write_message(&BeMessage::AuthenticationMD5Password(
-                                    &md5_salt,
+                                    self.md5_salt,
                                ))?;
                                self.state = ProtoState::Authentication;
                            }
@@ -434,7 +433,12 @@ impl PostgresBackend {
                    // full cause of the error, not just the top-level context + its trace.
                    // We don't want to send that in the ErrorResponse though,
                    // because it's not relevant to the compute node logs.
-                    error!("query handler for '{}' failed: {:?}", query_string, e);
+                    if query_string.starts_with("callmemaybe") {
+                        // FIXME avoid printing a backtrace for tenant x not found errors until this is properly fixed
+                        error!("query handler for '{}' failed: {}", query_string, e);
+                    } else {
+                        error!("query handler for '{}' failed: {:?}", query_string, e);
+                    }
                    self.write_message_noflush(&BeMessage::ErrorResponse(&e.to_string()))?;
                    // TODO: untangle convoluted control flow
                    if e.to_string().contains("failed to run") {
--- a/zenith_utils/src/pq_proto.rs
+++ b/zenith_utils/src/pq_proto.rs
@@ -100,6 +100,21 @@ pub struct FeExecuteMessage {
 #[derive(Debug)]
 pub struct FeCloseMessage {}

+/// Retry a read on EINTR
+///
+/// This runs the enclosed expression, and if it returns
+/// Err(io::ErrorKind::Interrupted), retries it.
+macro_rules! retry_read {
+    ( $x:expr ) => {
+        loop {
+            match $x {
+                Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
+                res => break res,
+            }
+        }
+    };
+}
+
 impl FeMessage {
    /// Read one message from the stream.
    /// This function returns `Ok(None)` in case of EOF.
@@ -107,7 +122,7 @@ impl FeMessage {
    ///
    /// ```
    /// # use std::io;
-    /// # use zenith_utils::pq_proto::FeMessage;
+    /// # use utils::pq_proto::FeMessage;
    /// #
    /// # fn process_message(msg: FeMessage) -> anyhow::Result<()> {
    /// #     Ok(())
@@ -141,12 +156,12 @@ impl FeMessage {
            // Each libpq message begins with a message type byte, followed by message length
            // If the client closes the connection, return None. But if the client closes the
            // connection in the middle of a message, we will return an error.
-            let tag = match stream.read_u8().await {
+            let tag = match retry_read!(stream.read_u8().await) {
                Ok(b) => b,
                Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => return Ok(None),
                Err(e) => return Err(e.into()),
            };
-            let len = stream.read_u32().await?;
+            let len = retry_read!(stream.read_u32().await)?;

            // The message length includes itself, so it better be at least 4
            let bodylen = len
@@ -207,7 +222,7 @@ impl FeStartupPacket {
            // reading 4 bytes, to be precise), return None to indicate that the connection
            // was closed. This matches the PostgreSQL server's behavior, which avoids noise
            // in the log if the client opens connection but closes it immediately.
-            let len = match stream.read_u32().await {
+            let len = match retry_read!(stream.read_u32().await) {
                Ok(len) => len as usize,
                Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => return Ok(None),
                Err(e) => return Err(e.into()),
@@ -217,7 +232,7 @@ impl FeStartupPacket {
                bail!("invalid message length");
            }

-            let request_code = stream.read_u32().await?;
+            let request_code = retry_read!(stream.read_u32().await)?;

            // the rest of startup packet are params
            let params_len = len - 8;
@@ -401,7 +416,8 @@ fn read_null_terminated(buf: &mut Bytes) -> anyhow::Result<Bytes> {
 #[derive(Debug)]
 pub enum BeMessage<'a> {
    AuthenticationOk,
-    AuthenticationMD5Password(&'a [u8; 4]),
+    AuthenticationMD5Password([u8; 4]),
+    AuthenticationSasl(BeAuthenticationSaslMessage<'a>),
    AuthenticationCleartextPassword,
    BackendKeyData(CancelKeyData),
    BindComplete,
@@ -429,6 +445,13 @@ pub enum BeMessage<'a> {
    KeepAlive(WalSndKeepAlive),
 }

+#[derive(Debug)]
+pub enum BeAuthenticationSaslMessage<'a> {
+    Methods(&'a [&'a str]),
+    Continue(&'a [u8]),
+    Final(&'a [u8]),
+}
+
 #[derive(Debug)]
 pub enum BeParameterStatusMessage<'a> {
    Encoding(&'a str),
@@ -480,6 +503,18 @@ impl RowDescriptor<'_> {
            formatcode: 0,
        }
    }
+
+    pub const fn text_col(name: &[u8]) -> RowDescriptor {
+        RowDescriptor {
+            name,
+            tableoid: 0,
+            attnum: 0,
+            typoid: TEXT_OID,
+            typlen: -1,
+            typmod: 0,
+            formatcode: 0,
+        }
+    }
 }

 #[derive(Debug)]
@@ -611,6 +646,32 @@ impl<'a> BeMessage<'a> {
                .unwrap(); // write into BytesMut can't fail
            }

+            BeMessage::AuthenticationSasl(msg) => {
+                buf.put_u8(b'R');
+                write_body(buf, |buf| {
+                    use BeAuthenticationSaslMessage::*;
+                    match msg {
+                        Methods(methods) => {
+                            buf.put_i32(10); // Specifies that SASL auth method is used.
+                            for method in methods.iter() {
+                                write_cstr(method.as_bytes(), buf)?;
+                            }
+                            buf.put_u8(0); // zero terminator for the list
+                        }
+                        Continue(extra) => {
+                            buf.put_i32(11); // Continue SASL auth.
+                            buf.put_slice(extra);
+                        }
+                        Final(extra) => {
+                            buf.put_i32(12); // Send final SASL message.
+                            buf.put_slice(extra);
+                        }
+                    }
+                    Ok::<_, io::Error>(())
+                })
+                .unwrap()
+            }
+
            BeMessage::BackendKeyData(key_data) => {
                buf.put_u8(b'K');
                write_body(buf, |buf| {
--- a/zenith_utils/src/seqwait.rs
+++ b/zenith_utils/src/seqwait.rs
--- a/zenith_utils/src/seqwait_async.rs
+++ b/zenith_utils/src/seqwait_async.rs
--- a/zenith_utils/src/shutdown.rs
+++ b/zenith_utils/src/shutdown.rs
--- a/zenith_utils/src/signals.rs
+++ b/zenith_utils/src/signals.rs
--- a/zenith_utils/src/sock_split.rs
+++ b/zenith_utils/src/sock_split.rs
@@ -4,7 +4,7 @@ use std::{
    sync::Arc,
 };

-use rustls::Session;
+use rustls::Connection;

 /// Wrapper supporting reads of a shared TcpStream.
 pub struct ArcTcpRead(Arc<TcpStream>);
@@ -56,7 +56,7 @@ impl BufStream {

 pub enum ReadStream {
    Tcp(BufReader<ArcTcpRead>),
-    Tls(rustls_split::ReadHalf<rustls::ServerSession>),
+    Tls(rustls_split::ReadHalf),
 }

 impl io::Read for ReadStream {
@@ -79,7 +79,7 @@ impl ReadStream {

 pub enum WriteStream {
    Tcp(Arc<TcpStream>),
-    Tls(rustls_split::WriteHalf<rustls::ServerSession>),
+    Tls(rustls_split::WriteHalf),
 }

 impl WriteStream {
@@ -107,11 +107,11 @@ impl io::Write for WriteStream {
    }
 }

-type TlsStream<T> = rustls::StreamOwned<rustls::ServerSession, T>;
+type TlsStream<T> = rustls::StreamOwned<rustls::ServerConnection, T>;

 pub enum BidiStream {
    Tcp(BufStream),
-    /// This variant is boxed, because [`rustls::ServerSession`] is quite larger than [`BufStream`].
+    /// This variant is boxed, because [`rustls::ServerConnection`] is quite larger than [`BufStream`].
    Tls(Box<TlsStream<BufStream>>),
 }

@@ -127,7 +127,7 @@ impl BidiStream {
                if how == Shutdown::Read {
                    tls_boxed.sock.get_ref().shutdown(how)
                } else {
-                    tls_boxed.sess.send_close_notify();
+                    tls_boxed.conn.send_close_notify();
                    let res = tls_boxed.flush();
                    tls_boxed.sock.get_ref().shutdown(how)?;
                    res
@@ -154,19 +154,23 @@ impl BidiStream {
                // TODO would be nice to avoid the Arc here
                let socket = Arc::try_unwrap(reader.into_inner().0).unwrap();

-                let (read_half, write_half) =
-                    rustls_split::split(socket, tls_boxed.sess, read_buf_cfg, write_buf_cfg);
+                let (read_half, write_half) = rustls_split::split(
+                    socket,
+                    Connection::Server(tls_boxed.conn),
+                    read_buf_cfg,
+                    write_buf_cfg,
+                );
                (ReadStream::Tls(read_half), WriteStream::Tls(write_half))
            }
        }
    }

-    pub fn start_tls(self, mut session: rustls::ServerSession) -> io::Result<Self> {
+    pub fn start_tls(self, mut conn: rustls::ServerConnection) -> io::Result<Self> {
        match self {
            Self::Tcp(mut stream) => {
-                session.complete_io(&mut stream)?;
-                assert!(!session.is_handshaking());
-                Ok(Self::Tls(Box::new(TlsStream::new(session, stream))))
+                conn.complete_io(&mut stream)?;
+                assert!(!conn.is_handshaking());
+                Ok(Self::Tls(Box::new(TlsStream::new(conn, stream))))
            }
            Self::Tls { .. } => Err(io::Error::new(
                io::ErrorKind::InvalidInput,
--- a/zenith_utils/src/sync.rs
+++ b/zenith_utils/src/sync.rs
@@ -29,7 +29,7 @@ impl<S, T: Future> SyncFuture<S, T> {
    /// Example:
    ///
    /// ```
-    /// # use zenith_utils::sync::SyncFuture;
+    /// # use utils::sync::SyncFuture;
    /// # use std::future::Future;
    /// # use tokio::io::AsyncReadExt;
    /// #
--- a/zenith_utils/src/tcp_listener.rs
+++ b/zenith_utils/src/tcp_listener.rs
--- a/zenith_utils/src/vec_map.rs
+++ b/zenith_utils/src/vec_map.rs
@@ -1,11 +1,9 @@
 use std::{alloc::Layout, cmp::Ordering, ops::RangeBounds};

-use serde::{Deserialize, Serialize};
-
 /// Ordered map datastructure implemented in a Vec.
 /// Append only - can only add keys that are larger than the
 /// current max key.
-#[derive(Clone, Debug, Serialize, Deserialize)]
+#[derive(Clone, Debug)]
 pub struct VecMap<K, V>(Vec<(K, V)>);

 impl<K, V> Default for VecMap<K, V> {
--- a/zenith_utils/src/zid.rs
+++ b/zenith_utils/src/zid.rs
@@ -224,7 +224,7 @@ impl fmt::Display for ZTenantTimelineId {

 // Unique ID of a storage node (safekeeper or pageserver). Supposed to be issued
 // by the console.
-#[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd, Debug, Serialize, Deserialize)]
+#[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd, Hash, Debug, Serialize, Deserialize)]
 #[serde(transparent)]
 pub struct ZNodeId(pub u64);

--- a/zenith_utils/tests/bin_ser_test.rs
+++ b/zenith_utils/tests/bin_ser_test.rs
@@ -2,7 +2,7 @@ use bytes::{Buf, BytesMut};
 use hex_literal::hex;
 use serde::Deserialize;
 use std::io::Read;
-use zenith_utils::bin_ser::LeSer;
+use utils::bin_ser::LeSer;

 #[derive(Debug, PartialEq, Deserialize)]
 pub struct HeaderData {
--- a/zenith_utils/tests/cert.pem
+++ b/zenith_utils/tests/cert.pem
--- a/zenith_utils/tests/key.pem
+++ b/zenith_utils/tests/key.pem
--- a/zenith_utils/tests/ssl_test.rs
+++ b/zenith_utils/tests/ssl_test.rs
@@ -8,9 +8,8 @@ use std::{
 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 use lazy_static::lazy_static;
-use rustls::Session;

-use zenith_utils::postgres_backend::{AuthType, Handler, PostgresBackend};
+use utils::postgres_backend::{AuthType, Handler, PostgresBackend};

 fn make_tcp_pair() -> (TcpStream, TcpStream) {
    let listener = TcpListener::bind("127.0.0.1:0").unwrap();
@@ -23,11 +22,11 @@ fn make_tcp_pair() -> (TcpStream, TcpStream) {
 lazy_static! {
    static ref KEY: rustls::PrivateKey = {
        let mut cursor = Cursor::new(include_bytes!("key.pem"));
-        rustls::internal::pemfile::rsa_private_keys(&mut cursor).unwrap()[0].clone()
+        rustls::PrivateKey(rustls_pemfile::rsa_private_keys(&mut cursor).unwrap()[0].clone())
    };
    static ref CERT: rustls::Certificate = {
        let mut cursor = Cursor::new(include_bytes!("cert.pem"));
-        rustls::internal::pemfile::certs(&mut cursor).unwrap()[0].clone()
+        rustls::Certificate(rustls_pemfile::certs(&mut cursor).unwrap()[0].clone())
    };
 }

@@ -45,17 +44,23 @@ fn ssl() {
        let ssl_response = client_sock.read_u8().unwrap();
        assert_eq!(b'S', ssl_response);

-        let mut cfg = rustls::ClientConfig::new();
-        cfg.root_store.add(&CERT).unwrap();
+        let cfg = rustls::ClientConfig::builder()
+            .with_safe_defaults()
+            .with_root_certificates({
+                let mut store = rustls::RootCertStore::empty();
+                store.add(&CERT).unwrap();
+                store
+            })
+            .with_no_client_auth();
        let client_config = Arc::new(cfg);

-        let dns_name = webpki::DNSNameRef::try_from_ascii_str("localhost").unwrap();
-        let mut session = rustls::ClientSession::new(&client_config, dns_name);
+        let dns_name = "localhost".try_into().unwrap();
+        let mut conn = rustls::ClientConnection::new(client_config, dns_name).unwrap();

-        session.complete_io(&mut client_sock).unwrap();
-        assert!(!session.is_handshaking());
+        conn.complete_io(&mut client_sock).unwrap();
+        assert!(!conn.is_handshaking());

-        let mut stream = rustls::Stream::new(&mut session, &mut client_sock);
+        let mut stream = rustls::Stream::new(&mut conn, &mut client_sock);

        // StartupMessage
        stream.write_u32::<BigEndian>(9).unwrap();
@@ -105,8 +110,10 @@ fn ssl() {
    }
    let mut handler = TestHandler { got_query: false };

-    let mut cfg = rustls::ServerConfig::new(rustls::NoClientAuth::new());
-    cfg.set_single_cert(vec![CERT.clone()], KEY.clone())
+    let cfg = rustls::ServerConfig::builder()
+        .with_safe_defaults()
+        .with_no_client_auth()
+        .with_single_cert(vec![CERT.clone()], KEY.clone())
        .unwrap();
    let tls_config = Some(Arc::new(cfg));

@@ -209,8 +216,10 @@ fn server_forces_ssl() {
    }
    let mut handler = TestHandler;

-    let mut cfg = rustls::ServerConfig::new(rustls::NoClientAuth::new());
-    cfg.set_single_cert(vec![CERT.clone()], KEY.clone())
+    let cfg = rustls::ServerConfig::builder()
+        .with_safe_defaults()
+        .with_no_client_auth()
+        .with_single_cert(vec![CERT.clone()], KEY.clone())
        .unwrap();
    let tls_config = Some(Arc::new(cfg));

--- a/neon_local/Cargo.toml
+++ b/neon_local/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "zenith"
+name = "neon_local"
 version = "0.1.0"
 edition = "2021"

@@ -7,12 +7,14 @@ edition = "2021"
 clap = "3.0"
 anyhow = "1.0"
 serde_json = "1"
-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
+comfy-table = "5.0.1"
+postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
+git-version = "0.3.5"

 # FIXME: 'pageserver' is needed for BranchInfo. Refactor
 pageserver = { path = "../pageserver" }
 control_plane = { path = "../control_plane" }
-walkeeper = { path = "../walkeeper" }
-postgres_ffi = { path = "../postgres_ffi" }
-zenith_utils = { path = "../zenith_utils" }
+safekeeper = { path = "../safekeeper" }
+postgres_ffi = { path = "../libs/postgres_ffi" }
+utils = { path = "../libs/utils" }
 workspace_hack = { version = "0.1", path = "../workspace_hack" }
--- a/neon_local/src/main.rs
+++ b/neon_local/src/main.rs
@@ -9,18 +9,20 @@ use pageserver::config::defaults::{
    DEFAULT_HTTP_LISTEN_ADDR as DEFAULT_PAGESERVER_HTTP_ADDR,
    DEFAULT_PG_LISTEN_ADDR as DEFAULT_PAGESERVER_PG_ADDR,
 };
-use std::collections::{BTreeSet, HashMap};
-use std::process::exit;
-use std::str::FromStr;
-use walkeeper::defaults::{
+use safekeeper::defaults::{
    DEFAULT_HTTP_LISTEN_PORT as DEFAULT_SAFEKEEPER_HTTP_PORT,
    DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT,
 };
-use zenith_utils::auth::{Claims, Scope};
-use zenith_utils::lsn::Lsn;
-use zenith_utils::postgres_backend::AuthType;
-use zenith_utils::zid::{ZNodeId, ZTenantId, ZTenantTimelineId, ZTimelineId};
-use zenith_utils::GIT_VERSION;
+use std::collections::{BTreeSet, HashMap};
+use std::process::exit;
+use std::str::FromStr;
+use utils::{
+    auth::{Claims, Scope},
+    lsn::Lsn,
+    postgres_backend::AuthType,
+    project_git_version,
+    zid::{ZNodeId, ZTenantId, ZTenantTimelineId, ZTimelineId},
+};

 use pageserver::timelines::TimelineInfo;

@@ -28,6 +30,7 @@ use pageserver::timelines::TimelineInfo;
 const DEFAULT_SAFEKEEPER_ID: ZNodeId = ZNodeId(1);
 const DEFAULT_PAGESERVER_ID: ZNodeId = ZNodeId(1);
 const DEFAULT_BRANCH_NAME: &str = "main";
+project_git_version!(GIT_VERSION);

 fn default_conf() -> String {
    format!(
@@ -60,15 +63,15 @@ http_port = {safekeeper_http_port}
 struct TimelineTreeEl {
    /// `TimelineInfo` received from the `pageserver` via the `timeline_list` http API call.
    pub info: TimelineInfo,
-    /// Name, recovered from zenith config mappings
+    /// Name, recovered from neon config mappings
    pub name: Option<String>,
    /// Holds all direct children of this timeline referenced using `timeline_id`.
    pub children: BTreeSet<ZTimelineId>,
 }

-// Main entry point for the 'zenith' CLI utility
+// Main entry point for the 'neon_local' CLI utility
 //
-// This utility helps to manage zenith installation. That includes following:
+// This utility helps to manage neon installation. That includes following:
 //   * Management of local postgres installations running on top of the
 //     pageserver.
 //   * Providing CLI api to the pageserver
@@ -123,12 +126,12 @@ fn main() -> Result<()> {
        .takes_value(true)
        .required(false);

-    let matches = App::new("Zenith CLI")
+    let matches = App::new("Neon CLI")
        .setting(AppSettings::ArgRequiredElseHelp)
        .version(GIT_VERSION)
        .subcommand(
            App::new("init")
-                .about("Initialize a new Zenith repository")
+                .about("Initialize a new Neon repository")
                .arg(pageserver_config_args.clone())
                .arg(timeline_id_arg.clone().help("Use a specific timeline id when creating a tenant and its initial timeline"))
                .arg(
@@ -164,7 +167,12 @@ fn main() -> Result<()> {
            .subcommand(App::new("create")
                .arg(tenant_id_arg.clone())
                .arg(timeline_id_arg.clone().help("Use a specific timeline id when creating a tenant and its initial timeline"))
-        )
+				.arg(Arg::new("config").short('c').takes_value(true).multiple_occurrences(true).required(false))
+				)
+            .subcommand(App::new("config")
+                .arg(tenant_id_arg.clone())
+				.arg(Arg::new("config").short('c').takes_value(true).multiple_occurrences(true).required(false))
+				)
        )
        .subcommand(
            App::new("pageserver")
@@ -251,7 +259,7 @@ fn main() -> Result<()> {
        None => bail!("no subcommand provided"),
    };

-    // Check for 'zenith init' command first.
+    // Check for 'neon init' command first.
    let subcommand_result = if sub_name == "init" {
        handle_init(sub_args).map(Some)
    } else {
@@ -474,9 +482,8 @@ fn handle_init(init_match: &ArgMatches) -> Result<LocalEnv> {
    };

    let mut env =
-        LocalEnv::create_config(&toml_file).context("Failed to create zenith configuration")?;
-    env.init()
-        .context("Failed to initialize zenith repository")?;
+        LocalEnv::create_config(&toml_file).context("Failed to create neon configuration")?;
+    env.init().context("Failed to initialize neon repository")?;

    // default_tenantid was generated by the `env.init()` call above
    let initial_tenant_id = env.default_tenant_id.unwrap();
@@ -511,7 +518,7 @@ fn pageserver_config_overrides(init_match: &ArgMatches) -> Vec<&str> {
        .collect()
 }

-fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> Result<()> {
+fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> anyhow::Result<()> {
    let pageserver = PageServerNode::from_env(env);
    match tenant_match.subcommand() {
        Some(("list", _)) => {
@@ -521,8 +528,12 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> Re
        }
        Some(("create", create_match)) => {
            let initial_tenant_id = parse_tenant_id(create_match)?;
+            let tenant_conf: HashMap<_, _> = create_match
+                .values_of("config")
+                .map(|vals| vals.flat_map(|c| c.split_once(':')).collect())
+                .unwrap_or_default();
            let new_tenant_id = pageserver
-                .tenant_create(initial_tenant_id)?
+                .tenant_create(initial_tenant_id, tenant_conf)?
                .ok_or_else(|| {
                    anyhow!("Tenant with id {:?} was already created", initial_tenant_id)
                })?;
@@ -530,6 +541,41 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> Re
                "tenant {} successfully created on the pageserver",
                new_tenant_id
            );
+
+            // Create an initial timeline for the new tenant
+            let new_timeline_id = parse_timeline_id(create_match)?;
+            let timeline = pageserver
+                .timeline_create(new_tenant_id, new_timeline_id, None, None)?
+                .context(format!(
+                    "Failed to create initial timeline for tenant {new_tenant_id}"
+                ))?;
+            let new_timeline_id = timeline.timeline_id;
+            let last_record_lsn = timeline
+                .local
+                .context(format!("Failed to get last record LSN: no local timeline info for timeline {new_timeline_id}"))?
+                .last_record_lsn;
+
+            env.register_branch_mapping(
+                DEFAULT_BRANCH_NAME.to_string(),
+                new_tenant_id,
+                new_timeline_id,
+            )?;
+
+            println!(
+                "Created an initial timeline '{new_timeline_id}' at Lsn {last_record_lsn} for tenant: {new_tenant_id}",
+            );
+        }
+        Some(("config", create_match)) => {
+            let tenant_id = get_tenant_id(create_match, env)?;
+            let tenant_conf: HashMap<_, _> = create_match
+                .values_of("config")
+                .map(|vals| vals.flat_map(|c| c.split_once(':')).collect())
+                .unwrap_or_default();
+
+            pageserver
+                .tenant_config(tenant_id, tenant_conf)
+                .with_context(|| format!("Tenant config failed for tenant with id {tenant_id}"))?;
+            println!("tenant {tenant_id} successfully configured on the pageserver");
        }
        Some((sub_name, _)) => bail!("Unexpected tenant subcommand '{}'", sub_name),
        None => bail!("no tenant subcommand provided"),
@@ -550,7 +596,7 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
            let tenant_id = get_tenant_id(create_match, env)?;
            let new_branch_name = create_match
                .value_of("branch-name")
-                .ok_or(anyhow!("No branch name provided"))?;
+                .ok_or_else(|| anyhow!("No branch name provided"))?;
            let timeline = pageserver
                .timeline_create(tenant_id, None, None, None)?
                .ok_or_else(|| anyhow!("Failed to create new timeline for tenant {}", tenant_id))?;
@@ -571,7 +617,7 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
            let tenant_id = get_tenant_id(branch_match, env)?;
            let new_branch_name = branch_match
                .value_of("branch-name")
-                .ok_or(anyhow!("No branch name provided"))?;
+                .ok_or_else(|| anyhow!("No branch name provided"))?;
            let ancestor_branch_name = branch_match
                .value_of("ancestor-branch-name")
                .unwrap_or(DEFAULT_BRANCH_NAME);
@@ -633,35 +679,56 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {

            let timeline_name_mappings = env.timeline_name_mappings();

-            println!("NODE\tADDRESS\tTIMELINE\tBRANCH NAME\tLSN\t\tSTATUS");
+            let mut table = comfy_table::Table::new();
+
+            table.load_preset(comfy_table::presets::NOTHING);
+
+            table.set_header(&[
+                "NODE",
+                "ADDRESS",
+                "TIMELINE",
+                "BRANCH NAME",
+                "LSN",
+                "STATUS",
+            ]);
+
            for ((_, node_name), node) in cplane
                .nodes
                .iter()
                .filter(|((node_tenant_id, _), _)| node_tenant_id == &tenant_id)
            {
-                // FIXME: This shows the LSN at the end of the timeline. It's not the
-                // right thing to do for read-only nodes that might be anchored at an
-                // older point in time, or following but lagging behind the primary.
-                let lsn_str = timeline_infos
-                    .get(&node.timeline_id)
-                    .and_then(|bi| bi.local.as_ref().map(|l| l.last_record_lsn.to_string()))
-                    .unwrap_or_else(|| "?".to_string());
+                let lsn_str = match node.lsn {
+                    None => {
+                        // -> primary node
+                        // Use the LSN at the end of the timeline.
+                        timeline_infos
+                            .get(&node.timeline_id)
+                            .and_then(|bi| bi.local.as_ref().map(|l| l.last_record_lsn.to_string()))
+                            .unwrap_or_else(|| "?".to_string())
+                    }
+                    Some(lsn) => {
+                        // -> read-only node
+                        // Use the node's LSN.
+                        lsn.to_string()
+                    }
+                };

                let branch_name = timeline_name_mappings
                    .get(&ZTenantTimelineId::new(tenant_id, node.timeline_id))
                    .map(|name| name.as_str())
                    .unwrap_or("?");

-                println!(
-                    "{}\t{}\t{}\t{}\t{}\t{}",
-                    node_name,
-                    node.address,
-                    node.timeline_id,
+                table.add_row(&[
+                    node_name.as_str(),
+                    &node.address.to_string(),
+                    &node.timeline_id.to_string(),
                    branch_name,
-                    lsn_str,
+                    lsn_str.as_str(),
                    node.status(),
-                );
+                ]);
            }
+
+            println!("{table}");
        }
        "create" => {
            let branch_name = sub_args
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -3,25 +3,32 @@ name = "pageserver"
 version = "0.1.0"
 edition = "2021"

+[features]
+# It is simpler infra-wise to have failpoints enabled by default
+# It shouldn't affect perf in any way because failpoints
+# are not placed in hot code paths
+default = ["failpoints"]
+profiling = ["pprof"]
+failpoints = ["fail/failpoints"]
+
 [dependencies]
-bookfile = { git = "https://github.com/zenithdb/bookfile.git", rev="bf6e43825dfb6e749ae9b80e8372c8fea76cec2f" }
 chrono = "0.4.19"
 rand = "0.8.3"
 regex = "1.4.5"
 bytes = { version = "1.0.1", features = ['serde'] }
 byteorder = "1.4.3"
 futures = "0.3.13"
+hex = "0.4.3"
 hyper = "0.14"
 itertools = "0.10.3"
 lazy_static = "1.4.0"
-log = "0.4.14"
 clap = "3.0"
 daemonize = "0.4.1"
 tokio = { version = "1.17", features = ["process", "sync", "macros", "fs", "rt", "io-util", "time"] }
-postgres-types = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
-postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
-tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
+postgres-types = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
+postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
+postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
+tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 tokio-stream = "0.1.8"
 anyhow = { version = "1.0", features = ["backtrace"] }
 crc32c = "0.6.0"
@@ -31,26 +38,26 @@ humantime = "2.1.0"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
 serde_with = "1.12.0"
+humantime-serde = "1.1.1"
+
+pprof = { git = "https://github.com/neondatabase/pprof-rs.git", branch = "wallclock-profiling", features = ["flamegraph"], optional = true }

 toml_edit = { version = "0.13", features = ["easy"] }
 scopeguard = "1.1.0"
-async-trait = "0.1"
 const_format = "0.2.21"
 tracing = "0.1.27"
-tracing-futures = "0.2"
 signal-hook = "0.3.10"
 url = "2"
 nix = "0.23"
 once_cell = "1.8.0"
 crossbeam-utils = "0.8.5"
 fail = "0.5.0"
+git-version = "0.3.5"

-rust-s3 = { version = "0.28", default-features = false, features = ["no-verify-ssl", "tokio-rustls-tls"] }
-async-compression = {version = "0.3", features = ["zstd", "tokio"]}
-
-postgres_ffi = { path = "../postgres_ffi" }
-zenith_metrics = { path = "../zenith_metrics" }
-zenith_utils = { path = "../zenith_utils" }
+postgres_ffi = { path = "../libs/postgres_ffi" }
+metrics = { path = "../libs/metrics" }
+utils = { path = "../libs/utils" }
+remote_storage = { path = "../libs/remote_storage" }
 workspace_hack = { version = "0.1", path = "../workspace_hack" }

 [dev-dependencies]
--- a/pageserver/README.md
+++ b/pageserver/README.md
@@ -135,7 +135,7 @@ The backup service is disabled by default and can be enabled to interact with a

 CLI examples:
 * Local FS: `${PAGESERVER_BIN} -c "remote_storage={local_path='/some/local/path/'}"`
-* AWS S3  : `${PAGESERVER_BIN} -c "remote_storage={bucket_name='some-sample-bucket',bucket_region='eu-north-1', prefix_in_bucket='/test_prefix/',access_key_id='SOMEKEYAAAAASADSAH*#',secret_access_key='SOMEsEcReTsd292v'}"`
+* AWS S3  : `env AWS_ACCESS_KEY_ID='SOMEKEYAAAAASADSAH*#' AWS_SECRET_ACCESS_KEY='SOMEsEcReTsd292v' ${PAGESERVER_BIN} -c "remote_storage={bucket_name='some-sample-bucket',bucket_region='eu-north-1', prefix_in_bucket='/test_prefix/'}"`

 For Amazon AWS S3, a key id and secret access key could be located in `~/.aws/credentials` if awscli was ever configured to work with the desired bucket, on the AWS Settings page for a certain user. Also note, that the bucket names does not contain any protocols when used on AWS.
 For local S3 installations, refer to the their documentation for name format and credentials.
@@ -155,11 +155,9 @@ or
 bucket_name = 'some-sample-bucket'
 bucket_region = 'eu-north-1'
 prefix_in_bucket = '/test_prefix/'
-access_key_id = 'SOMEKEYAAAAASADSAH*#'
-secret_access_key = 'SOMEsEcReTsd292v'
 ```

-Also, `AWS_SECRET_ACCESS_KEY` and `AWS_ACCESS_KEY_ID` variables can be used to specify the credentials instead of any of the ways above.
+`AWS_SECRET_ACCESS_KEY` and `AWS_ACCESS_KEY_ID` env variables can be used to specify the S3 credentials if needed.

 TODO: Sharding
 --------------------
--- a/Show More
+++ b/Show More