Merge branch 'main' into bojan-psbench-over-kvstore

Parse output
Simplify
2026-05-25 09:00:37 +00:00 · 2022-04-12 13:04:59 -04:00 · 2022-04-12 10:53:14 -04:00 · 2022-04-11 21:50:50 -04:00 · 2022-04-05 20:33:57 -04:00 · 2022-03-28 15:42:16 -04:00
302 changed files with 9245 additions and 17390 deletions
--- a/.circleci/ansible/.gitignore
+++ b/.circleci/ansible/.gitignore
@@ -1,4 +1,2 @@
 zenith_install.tar.gz
 .zenith_current_version
-neon_install.tar.gz
-.neon_current_version
--- a/.circleci/ansible/deploy.yaml
+++ b/.circleci/ansible/deploy.yaml
@@ -1,14 +1,14 @@
- name: Upload Neon binaries
+- name: Upload Zenith binaries
  hosts: storage
  gather_facts: False
  remote_user: admin

  tasks:

-    - name: get latest version of Neon binaries
+    - name: get latest version of Zenith binaries
      register: current_version_file
      set_fact:
-        current_version: "{{ lookup('file', '.neon_current_version') | trim }}"
+        current_version: "{{ lookup('file', '.zenith_current_version') | trim }}"
      tags:
      - pageserver
      - safekeeper
@@ -19,11 +19,11 @@
      - pageserver
      - safekeeper

-    - name: upload and extract Neon binaries to /usr/local
+    - name: upload and extract Zenith binaries to /usr/local
      ansible.builtin.unarchive:
        owner: root
        group: root
-        src: neon_install.tar.gz
+        src: zenith_install.tar.gz
        dest: /usr/local
      become: true
      tags:
@@ -63,18 +63,21 @@
      tags:
      - pageserver

-    - name: update remote storage (s3) config
-      lineinfile:
-        path: /storage/pageserver/data/pageserver.toml
-        line: "{{ item }}"
-      loop:
-        - "[remote_storage]"
-        - "bucket_name = '{{ bucket_name }}'"
-        - "bucket_region = '{{ bucket_region }}'"
-        - "prefix_in_bucket = '{{ inventory_hostname }}'"
-      become: true
-      tags:
-      - pageserver
+    # It seems that currently S3 integration does not play well
+    # even with fresh pageserver without a burden of old data.
+    # TODO: turn this back on once the issue is solved.
+    # - name: update remote storage (s3) config
+    #   lineinfile:
+    #     path: /storage/pageserver/data/pageserver.toml
+    #     line: "{{ item }}"
+    #   loop:
+    #     - "[remote_storage]"
+    #     - "bucket_name = '{{ bucket_name }}'"
+    #     - "bucket_region = '{{ bucket_region }}'"
+    #     - "prefix_in_bucket = '{{ inventory_hostname }}'"
+    #   become: true
+    #   tags:
+    #   - pageserver

    - name: upload systemd service definition
      ansible.builtin.template:
--- a/.circleci/ansible/get_binaries.sh
+++ b/.circleci/ansible/get_binaries.sh
@@ -4,10 +4,10 @@ set -e

 RELEASE=${RELEASE:-false}

-# look at docker hub for latest tag for neon docker image
+# look at docker hub for latest tag fo zenith docker image
 if [ "${RELEASE}" = "true" ]; then
    echo "search latest relase tag"
-    VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/neondatabase/neon/tags |jq -r -S '.[].name' | grep release | sed 's/release-//g' | grep -E '^[0-9]+$' | sort -n | tail -1)
+    VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/zenithdb/zenith/tags |jq -r -S '.[].name' | grep release | sed 's/release-//g' | tail -1)
    if [ -z "${VERSION}" ]; then
        echo "no any docker tags found, exiting..."
        exit 1
@@ -16,7 +16,7 @@ if [ "${RELEASE}" = "true" ]; then
    fi
 else
    echo "search latest dev tag"
-    VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/neondatabase/neon/tags |jq -r -S '.[].name' | grep -E '^[0-9]+$' | sort -n | tail -1)
+    VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/zenithdb/zenith/tags |jq -r -S '.[].name' | grep -v release | tail -1)
    if [ -z "${VERSION}" ]; then
        echo "no any docker tags found, exiting..."
        exit 1
@@ -28,25 +28,25 @@ fi
 echo "found ${VERSION}"

 # do initial cleanup
-rm -rf neon_install postgres_install.tar.gz neon_install.tar.gz .neon_current_version
-mkdir neon_install
+rm -rf zenith_install postgres_install.tar.gz zenith_install.tar.gz .zenith_current_version
+mkdir zenith_install

 # retrive binaries from docker image
 echo "getting binaries from docker image"
-docker pull --quiet neondatabase/neon:${TAG}
-ID=$(docker create neondatabase/neon:${TAG})
+docker pull --quiet zenithdb/zenith:${TAG}
+ID=$(docker create zenithdb/zenith:${TAG})
 docker cp ${ID}:/data/postgres_install.tar.gz .
-tar -xzf postgres_install.tar.gz -C neon_install
-docker cp ${ID}:/usr/local/bin/pageserver neon_install/bin/
-docker cp ${ID}:/usr/local/bin/safekeeper neon_install/bin/
-docker cp ${ID}:/usr/local/bin/proxy neon_install/bin/
-docker cp ${ID}:/usr/local/bin/postgres neon_install/bin/
+tar -xzf postgres_install.tar.gz -C zenith_install
+docker cp ${ID}:/usr/local/bin/pageserver zenith_install/bin/
+docker cp ${ID}:/usr/local/bin/safekeeper zenith_install/bin/
+docker cp ${ID}:/usr/local/bin/proxy zenith_install/bin/
+docker cp ${ID}:/usr/local/bin/postgres zenith_install/bin/
 docker rm -vf ${ID}

 # store version to file (for ansible playbooks) and create binaries tarball
-echo ${VERSION} > neon_install/.neon_current_version
-echo ${VERSION} > .neon_current_version
-tar -czf neon_install.tar.gz -C neon_install .
+echo ${VERSION} > zenith_install/.zenith_current_version
+echo ${VERSION} > .zenith_current_version
+tar -czf zenith_install.tar.gz -C zenith_install .

 # do final cleaup
-rm -rf neon_install postgres_install.tar.gz
+rm -rf zenith_install postgres_install.tar.gz
--- a/.circleci/ansible/neon-stress.hosts
+++ b/.circleci/ansible/neon-stress.hosts
@@ -1,19 +0,0 @@
-[pageservers]
-neon-stress-ps-1 console_region_id=1
-neon-stress-ps-2 console_region_id=1
-
-[safekeepers]
-neon-stress-sk-1 console_region_id=1
-neon-stress-sk-2 console_region_id=1
-neon-stress-sk-3 console_region_id=1
-
-[storage:children]
-pageservers
-safekeepers
-
-[storage:vars]
-console_mgmt_base_url = http://neon-stress-console.local
-bucket_name           = neon-storage-ireland
-bucket_region         = eu-west-1
-etcd_endpoints        = etcd-stress.local:2379
-safekeeper_enable_s3_offload = false
--- a/.circleci/ansible/production.hosts
+++ b/.circleci/ansible/production.hosts
@@ -1,6 +1,5 @@
 [pageservers]
-#zenith-1-ps-1 console_region_id=1
-zenith-1-ps-2 console_region_id=1
+zenith-1-ps-1 console_region_id=1

 [safekeepers]
 zenith-1-sk-1 console_region_id=1
@@ -15,5 +14,3 @@ safekeepers
 console_mgmt_base_url = http://console-release.local
 bucket_name           = zenith-storage-oregon
 bucket_region         = us-west-2
-etcd_endpoints        = etcd-release.local:2379
-safekeeper_enable_s3_offload = false
--- a/.circleci/ansible/staging.hosts
+++ b/.circleci/ansible/staging.hosts
@@ -4,9 +4,9 @@ zenith-us-stage-ps-2 console_region_id=27

 [safekeepers]
 zenith-us-stage-sk-1 console_region_id=27
+zenith-us-stage-sk-2 console_region_id=27
+zenith-us-stage-sk-3 console_region_id=27
 zenith-us-stage-sk-4 console_region_id=27
-zenith-us-stage-sk-5 console_region_id=27
-zenith-us-stage-sk-6 console_region_id=27

 [storage:children]
 pageservers
@@ -16,5 +16,3 @@ safekeepers
 console_mgmt_base_url = http://console-staging.local
 bucket_name           = zenith-staging-storage-us-east-1
 bucket_region         = us-east-1
-etcd_endpoints        = etcd-staging.local:2379
-safekeeper_enable_s3_offload = false
--- a/.circleci/ansible/systemd/pageserver.service
+++ b/.circleci/ansible/systemd/pageserver.service
@@ -6,7 +6,7 @@ After=network.target auditd.service
 Type=simple
 User=pageserver
 Environment=RUST_BACKTRACE=1 ZENITH_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/lib
-ExecStart=/usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -c "broker_endpoints=['{{ etcd_endpoints }}']" -D /storage/pageserver/data
+ExecStart=/usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -D /storage/pageserver/data
 ExecReload=/bin/kill -HUP $MAINPID
 KillMode=mixed
 KillSignal=SIGINT
--- a/.circleci/ansible/systemd/safekeeper.service
+++ b/.circleci/ansible/systemd/safekeeper.service
@@ -6,7 +6,7 @@ After=network.target auditd.service
 Type=simple
 User=safekeeper
 Environment=RUST_BACKTRACE=1 ZENITH_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/lib
-ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -p {{ first_pageserver }}:6400 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --enable-s3-offload={{ safekeeper_enable_s3_offload }}
+ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -p {{ first_pageserver }}:6400 -D /storage/safekeeper/data
 ExecReload=/bin/kill -HUP $MAINPID
 KillMode=mixed
 KillSignal=SIGINT
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1,18 +1,18 @@
 version: 2.1

 executors:
-  neon-xlarge-executor:
+  zenith-xlarge-executor:
    resource_class: xlarge
    docker:
      # NB: when changed, do not forget to update rust image tag in all Dockerfiles
-      - image: zimg/rust:1.58
-  neon-executor:
+      - image: zimg/rust:1.56
+  zenith-executor:
    docker:
-      - image: zimg/rust:1.58
+      - image: zimg/rust:1.56

 jobs:
  check-codestyle-rust:
-    executor: neon-xlarge-executor
+    executor: zenith-xlarge-executor
    steps:
      - checkout
      - run:
@@ -22,7 +22,7 @@ jobs:

  # A job to build postgres
  build-postgres:
-    executor: neon-xlarge-executor
+    executor: zenith-xlarge-executor
    parameters:
      build_type:
        type: enum
@@ -67,9 +67,9 @@ jobs:
          paths:
            - tmp_install

-  # A job to build Neon rust code
-  build-neon:
-    executor: neon-xlarge-executor
+  # A job to build zenith rust code
+  build-zenith:
+    executor: zenith-xlarge-executor
    parameters:
      build_type:
        type: enum
@@ -113,7 +113,7 @@ jobs:
              CARGO_FLAGS=
            elif [[ $BUILD_TYPE == "release" ]]; then
              cov_prefix=()
-              CARGO_FLAGS="--release --features profiling"
+              CARGO_FLAGS=--release
            fi

            export CARGO_INCREMENTAL=0
@@ -121,7 +121,7 @@ jobs:
            export RUSTC_WRAPPER=cachepot
            export AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}"
            export AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}"
-            "${cov_prefix[@]}" mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
+            "${cov_prefix[@]}" mold -run cargo build $CARGO_FLAGS --bins --tests
            cachepot -s

      - save_cache:
@@ -132,6 +132,20 @@ jobs:
            - ~/.cargo/git
            - target

+        # Run style checks
+        # has to run separately from cargo fmt section
+        # since needs to run with dependencies
+      - run:
+          name: cargo clippy
+          command: |
+            if [[ $BUILD_TYPE == "debug" ]]; then
+              cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
+            elif [[ $BUILD_TYPE == "release" ]]; then
+              cov_prefix=()
+            fi
+
+            "${cov_prefix[@]}" ./run_clippy.sh
+
        # Run rust unit tests
      - run:
          name: cargo test
@@ -209,7 +223,7 @@ jobs:
            - "*"

  check-codestyle-python:
-    executor: neon-executor
+    executor: zenith-executor
    steps:
      - checkout
      - restore_cache:
@@ -222,12 +236,6 @@ jobs:
          key: v2-python-deps-{{ checksum "poetry.lock" }}
          paths:
            - /home/circleci/.cache/pypoetry/virtualenvs
-      - run:
-          name: Print versions
-          when: always
-          command: |
-              poetry run python --version
-              poetry show
      - run:
          name: Run yapf to ensure code format
          when: always
@@ -238,7 +246,7 @@ jobs:
          command: poetry run mypy .

  run-pytest:
-    executor: neon-executor
+    executor: zenith-executor
    parameters:
      # pytest args to specify the tests to run.
      #
@@ -293,7 +301,7 @@ jobs:
          # `Too long with no output` error, if a test is running for a long time.
          # In that case, tests should have internal timeouts that are less than
          # no_output_timeout, specified here.
-          no_output_timeout: 1m
+          no_output_timeout: 10m
          environment:
            - ZENITH_BIN: /tmp/zenith/bin
            - POSTGRES_DISTRIB_DIR: /tmp/zenith/pg_install
@@ -354,7 +362,6 @@ jobs:
              fi
            fi
      - run:
-          # TODO wait for processes to die in case of timeout?
          # CircleCI artifacts are preserved one file at a time, so skipping
          # this step isn't a good idea. If you want to extract the
          # pageserver state, perhaps a tarball would be a better idea.
@@ -362,7 +369,7 @@ jobs:
          when: always
          command: |
            du -sh /tmp/test_output/*
-            find /tmp/test_output -type f ! -name "*.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" ! -name "flamegraph.svg" ! -name "*.metrics" -delete
+            find /tmp/test_output -type f ! -name "pg.log" ! -name "pageserver.log" ! -name "safekeeper.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" -delete
            du -sh /tmp/test_output/*
      - store_artifacts:
          path: /tmp/test_output
@@ -383,7 +390,7 @@ jobs:
            - "*"

  coverage-report:
-    executor: neon-xlarge-executor
+    executor: zenith-xlarge-executor
    steps:
      - attach_workspace:
          at: /tmp/zenith
@@ -398,7 +405,7 @@ jobs:
      - run:
          name: Build coverage report
          command: |
-            COMMIT_URL=https://github.com/neondatabase/neon/commit/$CIRCLE_SHA1
+            COMMIT_URL=https://github.com/zenithdb/zenith/commit/$CIRCLE_SHA1

            scripts/coverage \
              --dir=/tmp/zenith/coverage report \
@@ -409,11 +416,11 @@ jobs:
          name: Upload coverage report
          command: |
            LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
-            REPORT_URL=https://neondatabase.github.io/zenith-coverage-data/$CIRCLE_SHA1
-            COMMIT_URL=https://github.com/neondatabase/neon/commit/$CIRCLE_SHA1
+            REPORT_URL=https://zenithdb.github.io/zenith-coverage-data/$CIRCLE_SHA1
+            COMMIT_URL=https://github.com/zenithdb/zenith/commit/$CIRCLE_SHA1

            scripts/git-upload \
-              --repo=https://$VIP_VAP_ACCESS_TOKEN@github.com/neondatabase/zenith-coverage-data.git \
+              --repo=https://$VIP_VAP_ACCESS_TOKEN@github.com/zenithdb/zenith-coverage-data.git \
              --message="Add code coverage for $COMMIT_URL" \
              copy /tmp/zenith/coverage/report $CIRCLE_SHA1 # COPY FROM TO_RELATIVE

@@ -430,7 +437,7 @@ jobs:
                \"target_url\": \"$REPORT_URL\"
              }"

-  # Build neondatabase/neon:latest image and push it to Docker hub
+  # Build zenithdb/zenith:latest image and push it to Docker hub
  docker-image:
    docker:
      - image: cimg/base:2021.04
@@ -444,18 +451,18 @@ jobs:
      - run:
          name: Build and push Docker image
          command: |
-            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
+            echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
            DOCKER_TAG=$(git log --oneline|wc -l)
            docker build \
              --pull \
              --build-arg GIT_VERSION=${CIRCLE_SHA1} \
              --build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
              --build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
-              --tag neondatabase/neon:${DOCKER_TAG} --tag neondatabase/neon:latest .
-            docker push neondatabase/neon:${DOCKER_TAG}
-            docker push neondatabase/neon:latest
+              --tag zenithdb/zenith:${DOCKER_TAG} --tag zenithdb/zenith:latest .
+            docker push zenithdb/zenith:${DOCKER_TAG}
+            docker push zenithdb/zenith:latest

-  # Build neondatabase/compute-node:latest image and push it to Docker hub
+  # Build zenithdb/compute-node:latest image and push it to Docker hub
  docker-image-compute:
    docker:
      - image: cimg/base:2021.04
@@ -463,31 +470,31 @@ jobs:
      - checkout
      - setup_remote_docker:
          docker_layer_caching: true
-      # Build neondatabase/compute-tools:latest image and push it to Docker hub
+      # Build zenithdb/compute-tools:latest image and push it to Docker hub
      # TODO: this should probably also use versioned tag, not just :latest.
      # XXX: but should it? We build and use it only locally now.
      - run:
          name: Build and push compute-tools Docker image
          command: |
-            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
+            echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
            docker build \
              --build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
              --build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
-              --tag neondatabase/compute-tools:latest -f Dockerfile.compute-tools .
-            docker push neondatabase/compute-tools:latest
+              --tag zenithdb/compute-tools:latest -f Dockerfile.compute-tools .
+            docker push zenithdb/compute-tools:latest
      - run:
          name: Init postgres submodule
          command: git submodule update --init --depth 1
      - run:
          name: Build and push compute-node Docker image
          command: |
-            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
+            echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
            DOCKER_TAG=$(git log --oneline|wc -l)
-            docker build --tag neondatabase/compute-node:${DOCKER_TAG} --tag neondatabase/compute-node:latest vendor/postgres
-            docker push neondatabase/compute-node:${DOCKER_TAG}
-            docker push neondatabase/compute-node:latest
+            docker build --tag zenithdb/compute-node:${DOCKER_TAG} --tag zenithdb/compute-node:latest vendor/postgres
+            docker push zenithdb/compute-node:${DOCKER_TAG}
+            docker push zenithdb/compute-node:latest

-  # Build production neondatabase/neon:release image and push it to Docker hub
+  # Build production zenithdb/zenith:release image and push it to Docker hub
  docker-image-release:
    docker:
      - image: cimg/base:2021.04
@@ -501,18 +508,18 @@ jobs:
      - run:
          name: Build and push Docker image
          command: |
-            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
+            echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
            DOCKER_TAG="release-$(git log --oneline|wc -l)"
            docker build \
              --pull \
              --build-arg GIT_VERSION=${CIRCLE_SHA1} \
              --build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
              --build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
-              --tag neondatabase/neon:${DOCKER_TAG} --tag neondatabase/neon:release .
-            docker push neondatabase/neon:${DOCKER_TAG}
-            docker push neondatabase/neon:release
+              --tag zenithdb/zenith:${DOCKER_TAG} --tag zenithdb/zenith:release .
+            docker push zenithdb/zenith:${DOCKER_TAG}
+            docker push zenithdb/zenith:release

-  # Build production neondatabase/compute-node:release image and push it to Docker hub
+  # Build production zenithdb/compute-node:release image and push it to Docker hub
  docker-image-compute-release:
    docker:
      - image: cimg/base:2021.04
@@ -520,29 +527,29 @@ jobs:
      - checkout
      - setup_remote_docker:
          docker_layer_caching: true
-      # Build neondatabase/compute-tools:release image and push it to Docker hub
+      # Build zenithdb/compute-tools:release image and push it to Docker hub
      # TODO: this should probably also use versioned tag, not just :latest.
      # XXX: but should it? We build and use it only locally now.
      - run:
          name: Build and push compute-tools Docker image
          command: |
-            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
+            echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
            docker build \
              --build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
              --build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
-              --tag neondatabase/compute-tools:release -f Dockerfile.compute-tools .
-            docker push neondatabase/compute-tools:release
+              --tag zenithdb/compute-tools:release -f Dockerfile.compute-tools .
+            docker push zenithdb/compute-tools:release
      - run:
          name: Init postgres submodule
          command: git submodule update --init --depth 1
      - run:
          name: Build and push compute-node Docker image
          command: |
-            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
+            echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
            DOCKER_TAG="release-$(git log --oneline|wc -l)"
-            docker build --tag neondatabase/compute-node:${DOCKER_TAG} --tag neondatabase/compute-node:release vendor/postgres
-            docker push neondatabase/compute-node:${DOCKER_TAG}
-            docker push neondatabase/compute-node:release
+            docker build --tag zenithdb/compute-node:${DOCKER_TAG} --tag zenithdb/compute-node:release vendor/postgres
+            docker push zenithdb/compute-node:${DOCKER_TAG}
+            docker push zenithdb/compute-node:release

  deploy-staging:
    docker:
@@ -568,7 +575,7 @@ jobs:
            rm -f ssh-key ssh-key-cert.pub

            ansible-playbook deploy.yaml -i staging.hosts
-            rm -f neon_install.tar.gz .neon_current_version
+            rm -f zenith_install.tar.gz .zenith_current_version

  deploy-staging-proxy:
    docker:
@@ -586,63 +593,13 @@ jobs:
          name: Setup helm v3
          command: |
            curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
-            helm repo add neondatabase https://neondatabase.github.io/helm-charts
+            helm repo add zenithdb https://zenithdb.github.io/helm-charts
      - run:
          name: Re-deploy proxy
          command: |
            DOCKER_TAG=$(git log --oneline|wc -l)
-            helm upgrade neon-proxy       neondatabase/neon-proxy --install -f .circleci/helm-values/staging.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
-            helm upgrade neon-proxy-scram neondatabase/neon-proxy --install -f .circleci/helm-values/staging.proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait
+            helm upgrade zenith-proxy zenithdb/zenith-proxy --install -f .circleci/helm-values/staging.proxy.yaml --set image.tag=${DOCKER_TAG} --wait

-  deploy-neon-stress:
-    docker:
-      - image: cimg/python:3.10
-    steps:
-      - checkout
-      - setup_remote_docker
-      - run:
-          name: Setup ansible
-          command: |
-            pip install --progress-bar off --user ansible boto3
-      - run:
-          name: Redeploy
-          command: |
-            cd "$(pwd)/.circleci/ansible"
-
-            ./get_binaries.sh
-
-            echo "${TELEPORT_SSH_KEY}"  | tr -d '\n'| base64 --decode >ssh-key
-            echo "${TELEPORT_SSH_CERT}" | tr -d '\n'| base64 --decode >ssh-key-cert.pub
-            chmod 0600 ssh-key
-            ssh-add ssh-key
-            rm -f ssh-key ssh-key-cert.pub
-
-            ansible-playbook deploy.yaml -i neon-stress.hosts
-            rm -f neon_install.tar.gz .neon_current_version
-
-  deploy-neon-stress-proxy:
-    docker:
-      - image: cimg/base:2021.04
-    environment:
-      KUBECONFIG: .kubeconfig
-    steps:
-      - checkout
-      - run:
-          name: Store kubeconfig file
-          command: |
-            echo "${NEON_STRESS_KUBECONFIG_DATA}" | base64 --decode > ${KUBECONFIG}
-            chmod 0600 ${KUBECONFIG}
-      - run:
-          name: Setup helm v3
-          command: |
-            curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
-            helm repo add neondatabase https://neondatabase.github.io/helm-charts
-      - run:
-          name: Re-deploy proxy
-          command: |
-            DOCKER_TAG=$(git log --oneline|wc -l)
-            helm upgrade neon-stress-proxy       neondatabase/neon-proxy --install -f .circleci/helm-values/neon-stress.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
-            helm upgrade neon-stress-proxy-scram neondatabase/neon-proxy --install -f .circleci/helm-values/neon-stress.proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait

  deploy-release:
    docker:
@@ -668,7 +625,7 @@ jobs:
            rm -f ssh-key ssh-key-cert.pub

            ansible-playbook deploy.yaml -i production.hosts
-            rm -f neon_install.tar.gz .neon_current_version
+            rm -f zenith_install.tar.gz .zenith_current_version

  deploy-release-proxy:
    docker:
@@ -686,13 +643,12 @@ jobs:
          name: Setup helm v3
          command: |
            curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
-            helm repo add neondatabase https://neondatabase.github.io/helm-charts
+            helm repo add zenithdb https://zenithdb.github.io/helm-charts
      - run:
          name: Re-deploy proxy
          command: |
            DOCKER_TAG="release-$(git log --oneline|wc -l)"
-            helm upgrade neon-proxy       neondatabase/neon-proxy --install -f .circleci/helm-values/production.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
-            helm upgrade neon-proxy-scram neondatabase/neon-proxy --install -f .circleci/helm-values/production.proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait
+            helm upgrade zenith-proxy zenithdb/zenith-proxy --install -f .circleci/helm-values/production.proxy.yaml --set image.tag=${DOCKER_TAG} --wait

  # Trigger a new remote CI job
  remote-ci-trigger:
@@ -716,7 +672,7 @@ jobs:
            --data \
              "{
                \"state\": \"pending\",
-                \"context\": \"neon-cloud-e2e\",
+                \"context\": \"zenith-remote-ci\",
                \"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
              }"
      - run:
@@ -732,7 +688,7 @@ jobs:
              "{
                \"ref\": \"main\",
                \"inputs\": {
-                  \"ci_job_name\": \"neon-cloud-e2e\",
+                  \"ci_job_name\": \"zenith-remote-ci\",
                  \"commit_hash\": \"$CIRCLE_SHA1\",
                  \"remote_repo\": \"$LOCAL_REPO\"
                }
@@ -748,8 +704,8 @@ workflows:
          matrix:
            parameters:
              build_type: ["debug", "release"]
-      - build-neon:
-          name: build-neon-<< matrix.build_type >>
+      - build-zenith:
+          name: build-zenith-<< matrix.build_type >>
          matrix:
            parameters:
              build_type: ["debug", "release"]
@@ -764,7 +720,7 @@ workflows:
          test_selection: batch_pg_regress
          needs_postgres_source: true
          requires:
-            - build-neon-<< matrix.build_type >>
+            - build-zenith-<< matrix.build_type >>
      - run-pytest:
          name: other-tests-<< matrix.build_type >>
          matrix:
@@ -772,7 +728,7 @@ workflows:
              build_type: ["debug", "release"]
          test_selection: batch_others
          requires:
-            - build-neon-<< matrix.build_type >>
+            - build-zenith-<< matrix.build_type >>
      - run-pytest:
          name: benchmarks
          context: PERF_TEST_RESULT_CONNSTR
@@ -781,7 +737,7 @@ workflows:
          run_in_parallel: false
          save_perf_report: true
          requires:
-            - build-neon-release
+            - build-zenith-release
      - coverage-report:
          # Context passes credentials for gh api
          context: CI_ACCESS_TOKEN
@@ -829,25 +785,6 @@ workflows:
          requires:
            - docker-image

-      - deploy-neon-stress:
-          # Context gives an ability to login
-          context: Docker Hub
-          # deploy only for commits to main
-          filters:
-            branches:
-              only:
-                - main
-          requires:
-            - docker-image
-      - deploy-neon-stress-proxy:
-          # deploy only for commits to main
-          filters:
-            branches:
-              only:
-                - main
-          requires:
-            - docker-image
-
      - docker-image-release:
          # Context gives an ability to login
          context: Docker Hub
@@ -891,11 +828,11 @@ workflows:
      - remote-ci-trigger:
          # Context passes credentials for gh api
          context: CI_ACCESS_TOKEN
-          remote_repo: "neondatabase/cloud"
+          remote_repo: "zenithdb/console"
          requires:
            # XXX: Successful build doesn't mean everything is OK, but
            # the job to be triggered takes so much time to complete (~22 min)
            # that it's better not to wait for the commented-out steps
-            - build-neon-release
+            - build-zenith-debug
            # - pg_regress-tests-release
            # - other-tests-release
--- a/.circleci/helm-values/neon-stress.proxy-scram.yaml
+++ b/.circleci/helm-values/neon-stress.proxy-scram.yaml
@@ -1,26 +0,0 @@
-fullnameOverride: "neon-stress-proxy-scram"
-
-settings:
-  authBackend: "console"
-  authEndpoint: "http://neon-stress-console.local/management/api/v2"
-  domain: "*.stress.neon.tech"
-
-podLabels:
-  zenith_service: proxy-scram
-  zenith_env: staging
-  zenith_region: eu-west-1
-  zenith_region_slug: ireland
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: '*.stress.neon.tech'
-
-metrics:
-  enabled: true
-  serviceMonitor:
-    enabled: true
-    selector:
-      release: kube-prometheus-stack
--- a/.circleci/helm-values/neon-stress.proxy.yaml
+++ b/.circleci/helm-values/neon-stress.proxy.yaml
@@ -1,34 +0,0 @@
-fullnameOverride: "neon-stress-proxy"
-
-settings:
-  authEndpoint: "https://console.dev.neon.tech/authenticate_proxy_request/"
-  uri: "https://console.dev.neon.tech/psql_session/"
-
-# -- Additional labels for zenith-proxy pods
-podLabels:
-  zenith_service: proxy
-  zenith_env: staging
-  zenith_region: eu-west-1
-  zenith_region_slug: ireland
-
-service:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internal
-    external-dns.alpha.kubernetes.io/hostname: neon-stress-proxy.local
-  type: LoadBalancer
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: connect.dev.neon.tech
-
-metrics:
-  enabled: true
-  serviceMonitor:
-    enabled: true
-    selector:
-      release: kube-prometheus-stack
--- a/.circleci/helm-values/production.proxy-scram.yaml
+++ b/.circleci/helm-values/production.proxy-scram.yaml
@@ -1,24 +0,0 @@
-settings:
-  authBackend: "console"
-  authEndpoint: "http://console-release.local/management/api/v2"
-  domain: "*.cloud.neon.tech"
-
-podLabels:
-  zenith_service: proxy-scram
-  zenith_env: production
-  zenith_region: us-west-2
-  zenith_region_slug: oregon
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: '*.cloud.neon.tech'
-
-metrics:
-  enabled: true
-  serviceMonitor:
-    enabled: true
-    selector:
-      release: kube-prometheus-stack
--- a/.circleci/helm-values/production.proxy.yaml
+++ b/.circleci/helm-values/production.proxy.yaml
@@ -1,6 +1,9 @@
+# Helm chart values for zenith-proxy.
+# This is a YAML-formatted file.
+
 settings:
-  authEndpoint: "https://console.neon.tech/authenticate_proxy_request/"
-  uri: "https://console.neon.tech/psql_session/"
+  authEndpoint: "https://console.zenith.tech/authenticate_proxy_request/"
+  uri: "https://console.zenith.tech/psql_session/"

 # -- Additional labels for zenith-proxy pods
 podLabels:
@@ -22,7 +25,7 @@ exposedService:
    service.beta.kubernetes.io/aws-load-balancer-type: external
    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: connect.neon.tech,pg.neon.tech
+    external-dns.alpha.kubernetes.io/hostname: start.zenith.tech

 metrics:
  enabled: true
--- a/.circleci/helm-values/staging.proxy-scram.yaml
+++ b/.circleci/helm-values/staging.proxy-scram.yaml
@@ -1,31 +0,0 @@
-# Helm chart values for zenith-proxy.
-# This is a YAML-formatted file.
-
-image:
-  repository: neondatabase/neon
-
-settings:
-  authBackend: "console"
-  authEndpoint: "http://console-staging.local/management/api/v2"
-  domain: "*.cloud.stage.neon.tech"
-
-# -- Additional labels for zenith-proxy pods
-podLabels:
-  zenith_service: proxy-scram
-  zenith_env: staging
-  zenith_region: us-east-1
-  zenith_region_slug: virginia
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: cloud.stage.neon.tech
-
-metrics:
-  enabled: true
-  serviceMonitor:
-    enabled: true
-    selector:
-      release: kube-prometheus-stack
--- a/.circleci/helm-values/staging.proxy.yaml
+++ b/.circleci/helm-values/staging.proxy.yaml
@@ -1,12 +1,9 @@
 # Helm chart values for zenith-proxy.
 # This is a YAML-formatted file.

-image:
-  repository: neondatabase/neon
-
 settings:
-  authEndpoint: "https://console.stage.neon.tech/authenticate_proxy_request/"
-  uri: "https://console.stage.neon.tech/psql_session/"
+  authEndpoint: "https://console.stage.zenith.tech/authenticate_proxy_request/"
+  uri: "https://console.stage.zenith.tech/psql_session/"

 # -- Additional labels for zenith-proxy pods
 podLabels:
@@ -20,7 +17,7 @@ exposedService:
    service.beta.kubernetes.io/aws-load-balancer-type: external
    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: connect.stage.neon.tech
+    external-dns.alpha.kubernetes.io/hostname: start.stage.zenith.tech

 metrics:
  enabled: true
--- a/.config/hakari.toml
+++ b/.config/hakari.toml
@@ -10,8 +10,6 @@ dep-format-version = "2"
 # Hakari works much better with the new feature resolver.
 # For more about the new feature resolver, see:
 # https://blog.rust-lang.org/2021/03/25/Rust-1.51.0.html#cargos-new-feature-resolver
-# Have to keep the resolver still here since hakari requires this field,
-# despite it's now the default for 2021 edition & cargo.
 resolver = "2"

 # Add triples corresponding to platforms commonly used by developers here.
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -26,7 +26,7 @@ jobs:
    runs-on: [self-hosted, zenith-benchmarker]

    env:
-      POSTGRES_DISTRIB_DIR: "/usr/pgsql-13"
+      PG_BIN: "/usr/pgsql-13/bin"

    steps:
    - name: Checkout zenith repo
@@ -51,7 +51,7 @@ jobs:
        echo Poetry
        poetry --version
        echo Pgbench
-        $POSTGRES_DISTRIB_DIR/bin/pgbench --version
+        $PG_BIN/pgbench --version

    # FIXME cluster setup is skipped due to various changes in console API
    # for now pre created cluster is used. When API gain some stability
@@ -66,7 +66,7 @@ jobs:

        echo "Starting cluster"
        # wake up the cluster
-        $POSTGRES_DISTRIB_DIR/bin/psql $BENCHMARK_CONNSTR -c "SELECT 1"
+        $PG_BIN/psql $BENCHMARK_CONNSTR -c "SELECT 1"

    - name: Run benchmark
      # pgbench is installed system wide from official repo
@@ -83,11 +83,8 @@ jobs:
      # sudo yum install postgresql13-contrib
      # actual binaries are located in /usr/pgsql-13/bin/
      env:
-        # The pgbench test runs two tests of given duration against each scale.
-        # So the total runtime with these parameters is 2 * 2 * 300 = 1200, or 20 minutes.
-        # Plus time needed to initialize the test databases.
-        TEST_PG_BENCH_DURATIONS_MATRIX: "300"
-        TEST_PG_BENCH_SCALES_MATRIX: "10,100"
+        TEST_PG_BENCH_TRANSACTIONS_MATRIX: "5000,10000,20000"
+        TEST_PG_BENCH_SCALES_MATRIX: "10,15"
        PLATFORM: "zenith-staging"
        BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
        REMOTE_ENV: "1" # indicate to test harness that we do not have zenith binaries locally
--- a/.github/workflows/testing.yml
+++ b/.github/workflows/testing.yml
@@ -1,8 +1,6 @@
 name: Build and Test

-on:
-  pull_request:
-  push:
+on: push

 jobs:
  regression-check:
@@ -38,7 +36,8 @@ jobs:

      - name: Install macOs postgres dependencies
        if: matrix.os == 'macos-latest'
-        run: brew install flex bison
+        run: |
+          brew install flex bison

      - name: Set pg revision for caching
        id: pg_ver
@@ -54,7 +53,8 @@ jobs:

      - name: Build postgres
        if: steps.cache_pg.outputs.cache-hit != 'true'
-        run: make postgres
+        run: |
+          make postgres

      - name: Cache cargo deps
        id: cache_cargo
@@ -64,10 +64,13 @@ jobs:
            ~/.cargo/registry
            ~/.cargo/git
            target
-          key: ${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}
+          key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}

-      - name: Run cargo clippy
-        run: ./run_clippy.sh
+      # Use `env CARGO_INCREMENTAL=0` to mitigate https://github.com/rust-lang/rust/issues/91696 for rustc 1.57.0
+      - name: Run cargo build
+        run: |
+          env CARGO_INCREMENTAL=0 cargo build --workspace --bins --examples --tests

      - name: Run cargo test
-        run: cargo test --all --all-targets
+        run: |
+          env CARGO_INCREMENTAL=0 cargo test -- --nocapture --test-threads=1
--- a/.gitignore
+++ b/.gitignore
@@ -11,6 +11,3 @@ test_output/
 # Coverage
 *.profraw
 *.profdata
-
-*.key
-*.crt
--- a/20
+++ b/20
@@ -0,0 +1,20 @@
+This software is licensed under the Apache 2.0 License:
+
+----------------------------------------------------------------------------
+Copyright 2021 Zenith Labs, Inc
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+----------------------------------------------------------------------------
+
+The PostgreSQL submodule in vendor/postgres is licensed under the
+PostgreSQL license. See vendor/postgres/COPYRIGHT.
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,19 +3,22 @@ members = [
    "compute_tools",
    "control_plane",
    "pageserver",
+    "postgres_ffi",
    "proxy",
-    "safekeeper",
+    "walkeeper",
    "workspace_hack",
-    "neon_local",
-    "libs/*",
+    "zenith",
+    "zenith_metrics",
+    "zenith_utils",
 ]
+resolver = "2"

 [profile.release]
 # This is useful for profiling and, to some extent, debug.
 # Besides, debug info should not affect the performance.
 debug = true

-# This is only needed for proxy's tests.
-# TODO: we should probably fork `tokio-postgres-rustls` instead.
+# This is only needed for proxy's tests
+# TODO: we should probably fork tokio-postgres-rustls instead
 [patch.crates-io]
-tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
+tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
--- a/15
+++ b/15
@@ -1,5 +1,7 @@
 # Build Postgres
-FROM zimg/rust:1.58 AS pg-build
+#
+#FROM zimg/rust:1.56 AS pg-build
+FROM zenithdb/build:buster-20220309 AS pg-build
 WORKDIR /pg

 USER root
@@ -9,26 +11,27 @@ COPY Makefile Makefile

 ENV BUILD_TYPE release
 RUN set -e \
-    && mold -run make -j $(nproc) -s postgres \
+    && make -j $(nproc) -s postgres \
    && rm -rf tmp_install/build \
    && tar -C tmp_install -czf /postgres_install.tar.gz .

 # Build zenith binaries
-FROM zimg/rust:1.58 AS build
+#
+#FROM zimg/rust:1.56 AS build
+FROM zenithdb/build:buster-20220309 AS build
 ARG GIT_VERSION=local

 ARG CACHEPOT_BUCKET=zenith-rust-cachepot
 ARG AWS_ACCESS_KEY_ID
 ARG AWS_SECRET_ACCESS_KEY
+ENV RUSTC_WRAPPER /usr/local/cargo/bin/cachepot

 COPY --from=pg-build /pg/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
 COPY . .

 # Show build caching stats to check if it was used in the end.
 # Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, loosing the compilation stats.
-RUN set -e \
-    && sudo -E "PATH=$PATH" mold -run cargo build --release \
-    && cachepot -s
+RUN cargo build --release && /usr/local/cargo/bin/cachepot -s

 # Build final image
 #
--- a/Dockerfile.build
+++ b/Dockerfile.build
@@ -0,0 +1,23 @@
+FROM rust:1.56.1-slim-buster
+WORKDIR /home/circleci/project
+
+RUN set -e \
+    && apt-get update \
+    && apt-get -yq install \
+        automake \
+        libtool \
+        build-essential \
+        bison \
+        flex \
+        libreadline-dev \
+        zlib1g-dev \
+        libxml2-dev \
+        libseccomp-dev \
+        pkg-config \
+        libssl-dev \
+        clang
+
+RUN set -e \
+    && rustup component add clippy \
+    && cargo install cargo-audit \
+    && cargo install --git https://github.com/paritytech/cachepot
--- a/Dockerfile.compute-tools
+++ b/Dockerfile.compute-tools
@@ -1,18 +1,19 @@
 # First transient image to build compute_tools binaries
 # NB: keep in sync with rust image version in .circle/config.yml
-FROM zimg/rust:1.58 AS rust-build
+FROM zenithdb/build:buster-20220309 AS rust-build
+
+WORKDIR /zenith

 ARG CACHEPOT_BUCKET=zenith-rust-cachepot
 ARG AWS_ACCESS_KEY_ID
 ARG AWS_SECRET_ACCESS_KEY
+ENV RUSTC_WRAPPER /usr/local/cargo/bin/cachepot

 COPY . .

-RUN set -e \
-    && sudo -E "PATH=$PATH" mold -run cargo build -p compute_tools --release \
-    && cachepot -s
+RUN cargo build -p compute_tools --release && /usr/local/cargo/bin/cachepot -s

 # Final image that only has one binary
 FROM debian:buster-slim

-COPY --from=rust-build /home/circleci/project/target/release/compute_ctl /usr/local/bin/compute_ctl
+COPY --from=rust-build /zenith/target/release/zenith_ctl /usr/local/bin/zenith_ctl
--- a/12
+++ b/12
@@ -12,21 +12,15 @@ endif
 #
 BUILD_TYPE ?= debug
 ifeq ($(BUILD_TYPE),release)
-	PG_CONFIGURE_OPTS = --enable-debug --with-openssl
+	PG_CONFIGURE_OPTS = --enable-debug
 	PG_CFLAGS = -O2 -g3 $(CFLAGS)
 	# Unfortunately, `--profile=...` is a nightly feature
 	CARGO_BUILD_FLAGS += --release
 else ifeq ($(BUILD_TYPE),debug)
-	PG_CONFIGURE_OPTS = --enable-debug --with-openssl --enable-cassert --enable-depend
+	PG_CONFIGURE_OPTS = --enable-debug --enable-cassert --enable-depend
 	PG_CFLAGS = -O0 -g3 $(CFLAGS)
 else
-	$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
-endif
-
-# macOS with brew-installed openssl requires explicit paths
-UNAME_S := $(shell uname -s)
-ifeq ($(UNAME_S),Darwin)
-    PG_CONFIGURE_OPTS += --with-includes=/usr/local/opt/openssl/include --with-libraries=/usr/local/opt/openssl/lib
+$(error Bad build type `$(BUILD_TYPE)', see Makefile for options)
 endif

 # Choose whether we should be silent or verbose
--- a/5
+++ b/5
@@ -1,5 +0,0 @@
-Neon
-Copyright 2022 Neon Inc.
-
-The PostgreSQL submodule in vendor/postgres is licensed under the
-PostgreSQL license. See vendor/postgres/COPYRIGHT.
--- a/README.md
+++ b/README.md
@@ -1,122 +1,80 @@
-# Neon
+# Zenith

-Neon is a serverless open source alternative to AWS Aurora Postgres. It separates storage and compute and substitutes PostgreSQL storage layer by redistributing data across a cluster of nodes.
-
-The project used to be called "Zenith". Many of the commands and code comments
-still refer to "zenith", but we are in the process of renaming things.
+Zenith is a serverless open source alternative to AWS Aurora Postgres. It separates storage and compute and substitutes PostgreSQL storage layer by redistributing data across a cluster of nodes.

 ## Architecture overview

-A Neon installation consists of compute nodes and Neon storage engine.
+A Zenith installation consists of compute nodes and Zenith storage engine.

-Compute nodes are stateless PostgreSQL nodes, backed by Neon storage engine.
+Compute nodes are stateless PostgreSQL nodes, backed by Zenith storage engine.

-Neon storage engine consists of two major components:
+Zenith storage engine consists of two major components:
 - Pageserver. Scalable storage backend for compute nodes.
 - WAL service. The service that receives WAL from compute node and ensures that it is stored durably.

 Pageserver consists of:
- Repository - Neon storage implementation.
+- Repository - Zenith storage implementation.
 - WAL receiver - service that receives WAL from WAL service and stores it in the repository.
 - Page service - service that communicates with compute nodes and responds with pages from the repository.
 - WAL redo - service that builds pages from base images and WAL records on Page service request.

 ## Running local installation

-
-#### building on Ubuntu/ Debian (Linux)
 1. Install build dependencies and other useful packages

 On Ubuntu or Debian this set of packages should be sufficient to build the code:
 ```text
 apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libseccomp-dev \
-libssl-dev clang pkg-config libpq-dev libprotobuf-dev etcd
+libssl-dev clang pkg-config libpq-dev
 ```

-2. [Install Rust](https://www.rust-lang.org/tools/install)
-```
-# recommended approach from https://www.rust-lang.org/tools/install
-curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
-```
+[Rust] 1.56.1 or later is also required.

-3. Install PostgreSQL Client
-```
-apt install postgresql-client
-```
-
-4. Build neon and patched postgres
-```sh
-git clone --recursive https://github.com/neondatabase/neon.git
-cd neon
-make -j5
-```
-
-#### building on OSX (12.3.1)
-1. Install XCode and dependencies
-```
-xcode-select --install
-brew install protobuf etcd
-```
-
-2. [Install Rust](https://www.rust-lang.org/tools/install)
-```
-# recommended approach from https://www.rust-lang.org/tools/install
-curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
-```
-
-3. Install PostgreSQL Client
-```
-# from https://stackoverflow.com/questions/44654216/correct-way-to-install-psql-without-full-postgres-on-macos
-brew install libpq
-brew link --force libpq
-```
-
-4. Build neon and patched postgres
-```sh
-git clone --recursive https://github.com/neondatabase/neon.git
-cd neon
-make -j5
-```
-
-#### dependency installation notes
 To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `tmp_install/bin` and `tmp_install/lib`, respectively.

 To run the integration tests or Python scripts (not required to use the code), install
-Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (requires poetry) in the project directory.
+Python (3.7 or higher), and install python3 packages using `./scripts/pysync` (requires poetry) in the project directory.

+2. Build zenith and patched postgres
+```sh
+git clone --recursive https://github.com/zenithdb/zenith.git
+cd zenith
+make -j5
+```

-#### running neon database
-1. Start pageserver and postgres on top of it (should be called from repo root):
+3. Start pageserver and postgres on top of it (should be called from repo root):
 ```sh
 # Create repository in .zenith with proper paths to binaries and data
 # Later that would be responsibility of a package install script
-> ./target/debug/neon_local init
-initializing tenantid 9ef87a5bf0d92544f6fafeeb3239695c
-created initial timeline de200bd42b49cc1814412c7e592dd6e9 timeline.lsn 0/16B5A50
-initial timeline de200bd42b49cc1814412c7e592dd6e9 created
+> ./target/debug/zenith init
+initializing tenantid c03ba6b7ad4c5e9cf556f059ade44229
+created initial timeline 5b014a9e41b4b63ce1a1febc04503636 timeline.lsn 0/169C3C8
+created main branch
 pageserver init succeeded

 # start pageserver and safekeeper
-> ./target/debug/neon_local start
-Starting pageserver at '127.0.0.1:64000' in '.zenith'
+> ./target/debug/zenith start
+Starting pageserver at 'localhost:64000' in '.zenith'
 Pageserver started
-initializing for sk 1 for 7676
-Starting safekeeper at '127.0.0.1:5454' in '.zenith/safekeepers/sk1'
+initializing for single for 7676
+Starting safekeeper at '127.0.0.1:5454' in '.zenith/safekeepers/single'
 Safekeeper started

 # start postgres compute node
-> ./target/debug/neon_local pg start main
-Starting new postgres main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
-Extracting base backup to create postgres instance: path=.zenith/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
+> ./target/debug/zenith pg start main
+Starting new postgres main on timeline 5b014a9e41b4b63ce1a1febc04503636 ...
+Extracting base backup to create postgres instance: path=.zenith/pgdatadirs/tenants/c03ba6b7ad4c5e9cf556f059ade44229/main port=55432
 Starting postgres node at 'host=127.0.0.1 port=55432 user=zenith_admin dbname=postgres'
+waiting for server to start.... done
+server started

 # check list of running postgres instances
-> ./target/debug/neon_local pg list
- NODE  ADDRESS          TIMELINE                          BRANCH NAME  LSN        STATUS
- main  127.0.0.1:55432  de200bd42b49cc1814412c7e592dd6e9  main         0/16B5BA8  running
+> ./target/debug/zenith pg list
+NODE	ADDRESS	TIMELINES	BRANCH NAME	LSN		STATUS
+main	127.0.0.1:55432	5b014a9e41b4b63ce1a1febc04503636	main	0/1609610	running
 ```

-2. Now it is possible to connect to postgres and run some queries:
+4. Now it is possible to connect to postgres and run some queries:
 ```text
 > psql -p55432 -h 127.0.0.1 -U zenith_admin postgres
 postgres=# CREATE TABLE t(key int primary key, value text);
@@ -130,28 +88,21 @@ postgres=# select * from t;
 (1 row)
 ```

-3. And create branches and run postgres on them:
+5. And create branches and run postgres on them:
 ```sh
 # create branch named migration_check
-> ./target/debug/neon_local timeline branch --branch-name migration_check
-Created timeline 'b3b863fa45fa9e57e615f9f2d944e601' at Lsn 0/16F9A00 for tenant: 9ef87a5bf0d92544f6fafeeb3239695c. Ancestor timeline: 'main'
+> ./target/debug/zenith timeline branch --branch-name migration_check
+Created timeline '0e9331cad6efbafe6a88dd73ae21a5c9' at Lsn 0/16F5830 for tenant: c03ba6b7ad4c5e9cf556f059ade44229. Ancestor timeline: 'main'

 # check branches tree
-> ./target/debug/neon_local timeline list
-(L) main [de200bd42b49cc1814412c7e592dd6e9]
-(L) ┗━ @0/16F9A00: migration_check [b3b863fa45fa9e57e615f9f2d944e601]
+> ./target/debug/zenith timeline list
+ main [5b014a9e41b4b63ce1a1febc04503636]
+ ┗━ @0/1609610: migration_check [0e9331cad6efbafe6a88dd73ae21a5c9]

 # start postgres on that branch
-> ./target/debug/neon_local pg start migration_check --branch-name migration_check
-Starting new postgres migration_check on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
-Extracting base backup to create postgres instance: path=.zenith/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
-Starting postgres node at 'host=127.0.0.1 port=55433 user=zenith_admin dbname=postgres'
-
-# check the new list of running postgres instances
-> ./target/debug/neon_local pg list
- NODE             ADDRESS          TIMELINE                          BRANCH NAME      LSN        STATUS
- main             127.0.0.1:55432  de200bd42b49cc1814412c7e592dd6e9  main             0/16F9A38  running
- migration_check  127.0.0.1:55433  b3b863fa45fa9e57e615f9f2d944e601  migration_check  0/16F9A70  running
+> ./target/debug/zenith pg start migration_check
+Starting postgres node at 'host=127.0.0.1 port=55433 user=stas'
+waiting for server to start.... done

 # this new postgres instance will have all the data from 'main' postgres,
 # but all modifications would not affect data in original postgres
@@ -164,26 +115,18 @@ postgres=# select * from t;

 postgres=# insert into t values(2,2);
 INSERT 0 1
-
-# check that the new change doesn't affect the 'main' postgres
-> psql -p55432 -h 127.0.0.1 -U zenith_admin postgres
-postgres=# select * from t;
- key | value
-----+-------
-   1 | 1
-(1 row)
 ```

-4. If you want to run tests afterwards (see below), you have to stop all the running the pageserver, safekeeper and postgres instances
+6. If you want to run tests afterwards (see below), you have to stop all the running the pageserver, safekeeper and postgres instances
   you have just started. You can stop them all with one command:
 ```sh
-> ./target/debug/neon_local stop
+> ./target/debug/zenith stop
 ```

 ## Running tests

 ```sh
-git clone --recursive https://github.com/neondatabase/neon.git
+git clone --recursive https://github.com/zenithdb/zenith.git
 make # builds also postgres and installs it to ./tmp_install
 ./scripts/pytest
 ```
@@ -198,14 +141,14 @@ To view your `rustdoc` documentation in a browser, try running `cargo doc --no-d

 ### Postgres-specific terms

-Due to Neon's very close relation with PostgreSQL internals, there are numerous specific terms used.
+Due to Zenith's very close relation with PostgreSQL internals, there are numerous specific terms used.
 Same applies to certain spelling: i.e. we use MB to denote 1024 * 1024 bytes, while MiB would be technically more correct, it's inconsistent with what PostgreSQL code and its documentation use.

 To get more familiar with this aspect, refer to:

- [Neon glossary](/docs/glossary.md)
+- [Zenith glossary](/docs/glossary.md)
 - [PostgreSQL glossary](https://www.postgresql.org/docs/13/glossary.html)
- Other PostgreSQL documentation and sources (Neon fork sources can be found [here](https://github.com/neondatabase/postgres))
+- Other PostgreSQL documentation and sources (Zenith fork sources can be found [here](https://github.com/zenithdb/postgres))

 ## Join the development

--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -11,11 +11,10 @@ clap = "3.0"
 env_logger = "0.9"
 hyper = { version = "0.14", features = ["full"] }
 log = { version = "0.4", features = ["std", "serde"] }
-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
+postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
 regex = "1"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
 tar = "0.4"
 tokio = { version = "1.17", features = ["macros", "rt", "rt-multi-thread"] }
-tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 workspace_hack = { version = "0.1", path = "../workspace_hack" }
--- a/compute_tools/README.md
+++ b/compute_tools/README.md
@@ -1,9 +1,9 @@
 # Compute node tools

-Postgres wrapper (`compute_ctl`) is intended to be run as a Docker entrypoint or as a `systemd`
-`ExecStart` option. It will handle all the `Neon` specifics during compute node
+Postgres wrapper (`zenith_ctl`) is intended to be run as a Docker entrypoint or as a `systemd`
+`ExecStart` option. It will handle all the `zenith` specifics during compute node
 initialization:
- `compute_ctl` accepts cluster (compute node) specification as a JSON file.
+- `zenith_ctl` accepts cluster (compute node) specification as a JSON file.
 - Every start is a fresh start, so the data directory is removed and
  initialized again on each run.
 - Next it will put configuration files into the `PGDATA` directory.
@@ -13,18 +13,18 @@ initialization:
 - Check and alter/drop/create roles and databases.
 - Hang waiting on the `postmaster` process to exit.

-Also `compute_ctl` spawns two separate service threads:
+Also `zenith_ctl` spawns two separate service threads:
 - `compute-monitor` checks the last Postgres activity timestamp and saves it
-  into the shared `ComputeNode`;
+  into the shared `ComputeState`;
 - `http-endpoint` runs a Hyper HTTP API server, which serves readiness and the
  last activity requests.

 Usage example:
 ```sh
-compute_ctl -D /var/db/postgres/compute \
-            -C 'postgresql://zenith_admin@localhost/postgres' \
-            -S /var/db/postgres/specs/current.json \
-            -b /usr/local/bin/postgres
+zenith_ctl -D /var/db/postgres/compute \
+           -C 'postgresql://zenith_admin@localhost/postgres' \
+           -S /var/db/postgres/specs/current.json \
+           -b /usr/local/bin/postgres
 ```

 ## Tests
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -1,174 +0,0 @@
-//!
-//! Postgres wrapper (`compute_ctl`) is intended to be run as a Docker entrypoint or as a `systemd`
-//! `ExecStart` option. It will handle all the `Neon` specifics during compute node
-//! initialization:
-//! - `compute_ctl` accepts cluster (compute node) specification as a JSON file.
-//! - Every start is a fresh start, so the data directory is removed and
-//!   initialized again on each run.
-//! - Next it will put configuration files into the `PGDATA` directory.
-//! - Sync safekeepers and get commit LSN.
-//! - Get `basebackup` from pageserver using the returned on the previous step LSN.
-//! - Try to start `postgres` and wait until it is ready to accept connections.
-//! - Check and alter/drop/create roles and databases.
-//! - Hang waiting on the `postmaster` process to exit.
-//!
-//! Also `compute_ctl` spawns two separate service threads:
-//! - `compute-monitor` checks the last Postgres activity timestamp and saves it
-//!   into the shared `ComputeNode`;
-//! - `http-endpoint` runs a Hyper HTTP API server, which serves readiness and the
-//!   last activity requests.
-//!
-//! Usage example:
-//! ```sh
-//! compute_ctl -D /var/db/postgres/compute \
-//!             -C 'postgresql://zenith_admin@localhost/postgres' \
-//!             -S /var/db/postgres/specs/current.json \
-//!             -b /usr/local/bin/postgres
-//! ```
-//!
-use std::fs::File;
-use std::panic;
-use std::path::Path;
-use std::process::exit;
-use std::sync::{Arc, RwLock};
-use std::{thread, time::Duration};
-
-use anyhow::Result;
-use chrono::Utc;
-use clap::Arg;
-use log::{error, info};
-
-use compute_tools::compute::{ComputeMetrics, ComputeNode, ComputeState, ComputeStatus};
-use compute_tools::http::api::launch_http_server;
-use compute_tools::logger::*;
-use compute_tools::monitor::launch_monitor;
-use compute_tools::params::*;
-use compute_tools::pg_helpers::*;
-use compute_tools::spec::*;
-
-fn main() -> Result<()> {
-    // TODO: re-use `utils::logging` later
-    init_logger(DEFAULT_LOG_LEVEL)?;
-
-    // Env variable is set by `cargo`
-    let version: Option<&str> = option_env!("CARGO_PKG_VERSION");
-    let matches = clap::App::new("compute_ctl")
-        .version(version.unwrap_or("unknown"))
-        .arg(
-            Arg::new("connstr")
-                .short('C')
-                .long("connstr")
-                .value_name("DATABASE_URL")
-                .required(true),
-        )
-        .arg(
-            Arg::new("pgdata")
-                .short('D')
-                .long("pgdata")
-                .value_name("DATADIR")
-                .required(true),
-        )
-        .arg(
-            Arg::new("pgbin")
-                .short('b')
-                .long("pgbin")
-                .value_name("POSTGRES_PATH"),
-        )
-        .arg(
-            Arg::new("spec")
-                .short('s')
-                .long("spec")
-                .value_name("SPEC_JSON"),
-        )
-        .arg(
-            Arg::new("spec-path")
-                .short('S')
-                .long("spec-path")
-                .value_name("SPEC_PATH"),
-        )
-        .get_matches();
-
-    let pgdata = matches.value_of("pgdata").expect("PGDATA path is required");
-    let connstr = matches
-        .value_of("connstr")
-        .expect("Postgres connection string is required");
-    let spec = matches.value_of("spec");
-    let spec_path = matches.value_of("spec-path");
-
-    // Try to use just 'postgres' if no path is provided
-    let pgbin = matches.value_of("pgbin").unwrap_or("postgres");
-
-    let spec: ComputeSpec = match spec {
-        // First, try to get cluster spec from the cli argument
-        Some(json) => serde_json::from_str(json)?,
-        None => {
-            // Second, try to read it from the file if path is provided
-            if let Some(sp) = spec_path {
-                let path = Path::new(sp);
-                let file = File::open(path)?;
-                serde_json::from_reader(file)?
-            } else {
-                panic!("cluster spec should be provided via --spec or --spec-path argument");
-            }
-        }
-    };
-
-    let pageserver_connstr = spec
-        .cluster
-        .settings
-        .find("zenith.page_server_connstring")
-        .expect("pageserver connstr should be provided");
-    let tenant = spec
-        .cluster
-        .settings
-        .find("zenith.zenith_tenant")
-        .expect("tenant id should be provided");
-    let timeline = spec
-        .cluster
-        .settings
-        .find("zenith.zenith_timeline")
-        .expect("tenant id should be provided");
-
-    let compute_state = ComputeNode {
-        start_time: Utc::now(),
-        connstr: connstr.to_string(),
-        pgdata: pgdata.to_string(),
-        pgbin: pgbin.to_string(),
-        spec,
-        tenant,
-        timeline,
-        pageserver_connstr,
-        metrics: ComputeMetrics::new(),
-        state: RwLock::new(ComputeState::new()),
-    };
-    let compute = Arc::new(compute_state);
-
-    // Launch service threads first, so we were able to serve availability
-    // requests, while configuration is still in progress.
-    let _http_handle = launch_http_server(&compute).expect("cannot launch http endpoint thread");
-    let _monitor_handle = launch_monitor(&compute).expect("cannot launch compute monitor thread");
-
-    // Run compute (Postgres) and hang waiting on it.
-    match compute.prepare_and_run() {
-        Ok(ec) => {
-            let code = ec.code().unwrap_or(1);
-            info!("Postgres exited with code {}, shutting down", code);
-            exit(code)
-        }
-        Err(error) => {
-            error!("could not start the compute node: {}", error);
-
-            let mut state = compute.state.write().unwrap();
-            state.error = Some(format!("{:?}", error));
-            state.status = ComputeStatus::Failed;
-            drop(state);
-
-            // Keep serving HTTP requests, so the cloud control plane was able to
-            // get the actual error.
-            info!("giving control plane 30s to collect the error before shutdown");
-            thread::sleep(Duration::from_secs(30));
-            info!("shutting down");
-            Err(error)
-        }
-    }
-}
--- a/compute_tools/src/bin/zenith_ctl.rs
+++ b/compute_tools/src/bin/zenith_ctl.rs
@@ -0,0 +1,249 @@
+//!
+//! Postgres wrapper (`zenith_ctl`) is intended to be run as a Docker entrypoint or as a `systemd`
+//! `ExecStart` option. It will handle all the `zenith` specifics during compute node
+//! initialization:
+//! - `zenith_ctl` accepts cluster (compute node) specification as a JSON file.
+//! - Every start is a fresh start, so the data directory is removed and
+//!   initialized again on each run.
+//! - Next it will put configuration files into the `PGDATA` directory.
+//! - Sync safekeepers and get commit LSN.
+//! - Get `basebackup` from pageserver using the returned on the previous step LSN.
+//! - Try to start `postgres` and wait until it is ready to accept connections.
+//! - Check and alter/drop/create roles and databases.
+//! - Hang waiting on the `postmaster` process to exit.
+//!
+//! Also `zenith_ctl` spawns two separate service threads:
+//! - `compute-monitor` checks the last Postgres activity timestamp and saves it
+//!   into the shared `ComputeState`;
+//! - `http-endpoint` runs a Hyper HTTP API server, which serves readiness and the
+//!   last activity requests.
+//!
+//! Usage example:
+//! ```sh
+//! zenith_ctl -D /var/db/postgres/compute \
+//!            -C 'postgresql://zenith_admin@localhost/postgres' \
+//!            -S /var/db/postgres/specs/current.json \
+//!            -b /usr/local/bin/postgres
+//! ```
+//!
+use std::fs::File;
+use std::panic;
+use std::path::Path;
+use std::process::{exit, Command, ExitStatus};
+use std::sync::{Arc, RwLock};
+
+use anyhow::{Context, Result};
+use chrono::Utc;
+use clap::Arg;
+use log::info;
+use postgres::{Client, NoTls};
+
+use compute_tools::config;
+use compute_tools::http_api::launch_http_server;
+use compute_tools::logger::*;
+use compute_tools::monitor::launch_monitor;
+use compute_tools::params::*;
+use compute_tools::pg_helpers::*;
+use compute_tools::spec::*;
+use compute_tools::zenith::*;
+
+/// Do all the preparations like PGDATA directory creation, configuration,
+/// safekeepers sync, basebackup, etc.
+fn prepare_pgdata(state: &Arc<RwLock<ComputeState>>) -> Result<()> {
+    let state = state.read().unwrap();
+    let spec = &state.spec;
+    let pgdata_path = Path::new(&state.pgdata);
+    let pageserver_connstr = spec
+        .cluster
+        .settings
+        .find("zenith.page_server_connstring")
+        .expect("pageserver connstr should be provided");
+    let tenant = spec
+        .cluster
+        .settings
+        .find("zenith.zenith_tenant")
+        .expect("tenant id should be provided");
+    let timeline = spec
+        .cluster
+        .settings
+        .find("zenith.zenith_timeline")
+        .expect("tenant id should be provided");
+
+    info!(
+        "starting cluster #{}, operation #{}",
+        spec.cluster.cluster_id,
+        spec.operation_uuid.as_ref().unwrap()
+    );
+
+    // Remove/create an empty pgdata directory and put configuration there.
+    create_pgdata(&state.pgdata)?;
+    config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec)?;
+
+    info!("starting safekeepers syncing");
+    let lsn = sync_safekeepers(&state.pgdata, &state.pgbin)
+        .with_context(|| "failed to sync safekeepers")?;
+    info!("safekeepers synced at LSN {}", lsn);
+
+    info!(
+        "getting basebackup@{} from pageserver {}",
+        lsn, pageserver_connstr
+    );
+    get_basebackup(&state.pgdata, &pageserver_connstr, &tenant, &timeline, &lsn).with_context(
+        || {
+            format!(
+                "failed to get basebackup@{} from pageserver {}",
+                lsn, pageserver_connstr
+            )
+        },
+    )?;
+
+    // Update pg_hba.conf received with basebackup.
+    update_pg_hba(pgdata_path)?;
+
+    Ok(())
+}
+
+/// Start Postgres as a child process and manage DBs/roles.
+/// After that this will hang waiting on the postmaster process to exit.
+fn run_compute(state: &Arc<RwLock<ComputeState>>) -> Result<ExitStatus> {
+    let read_state = state.read().unwrap();
+    let pgdata_path = Path::new(&read_state.pgdata);
+
+    // Run postgres as a child process.
+    let mut pg = Command::new(&read_state.pgbin)
+        .args(&["-D", &read_state.pgdata])
+        .spawn()
+        .expect("cannot start postgres process");
+
+    // Try default Postgres port if it is not provided
+    let port = read_state
+        .spec
+        .cluster
+        .settings
+        .find("port")
+        .unwrap_or_else(|| "5432".to_string());
+    wait_for_postgres(&port, pgdata_path)?;
+
+    let mut client = Client::connect(&read_state.connstr, NoTls)?;
+
+    handle_roles(&read_state.spec, &mut client)?;
+    handle_databases(&read_state.spec, &mut client)?;
+
+    // 'Close' connection
+    drop(client);
+
+    info!(
+        "finished configuration of cluster #{}",
+        read_state.spec.cluster.cluster_id
+    );
+
+    // Release the read lock.
+    drop(read_state);
+
+    // Get the write lock, update state and release the lock, so HTTP API
+    // was able to serve requests, while we are blocked waiting on
+    // Postgres.
+    let mut state = state.write().unwrap();
+    state.ready = true;
+    drop(state);
+
+    // Wait for child postgres process basically forever. In this state Ctrl+C
+    // will be propagated to postgres and it will be shut down as well.
+    let ecode = pg.wait().expect("failed to wait on postgres");
+
+    Ok(ecode)
+}
+
+fn main() -> Result<()> {
+    // TODO: re-use `zenith_utils::logging` later
+    init_logger(DEFAULT_LOG_LEVEL)?;
+
+    // Env variable is set by `cargo`
+    let version: Option<&str> = option_env!("CARGO_PKG_VERSION");
+    let matches = clap::App::new("zenith_ctl")
+        .version(version.unwrap_or("unknown"))
+        .arg(
+            Arg::new("connstr")
+                .short('C')
+                .long("connstr")
+                .value_name("DATABASE_URL")
+                .required(true),
+        )
+        .arg(
+            Arg::new("pgdata")
+                .short('D')
+                .long("pgdata")
+                .value_name("DATADIR")
+                .required(true),
+        )
+        .arg(
+            Arg::new("pgbin")
+                .short('b')
+                .long("pgbin")
+                .value_name("POSTGRES_PATH"),
+        )
+        .arg(
+            Arg::new("spec")
+                .short('s')
+                .long("spec")
+                .value_name("SPEC_JSON"),
+        )
+        .arg(
+            Arg::new("spec-path")
+                .short('S')
+                .long("spec-path")
+                .value_name("SPEC_PATH"),
+        )
+        .get_matches();
+
+    let pgdata = matches.value_of("pgdata").expect("PGDATA path is required");
+    let connstr = matches
+        .value_of("connstr")
+        .expect("Postgres connection string is required");
+    let spec = matches.value_of("spec");
+    let spec_path = matches.value_of("spec-path");
+
+    // Try to use just 'postgres' if no path is provided
+    let pgbin = matches.value_of("pgbin").unwrap_or("postgres");
+
+    let spec: ClusterSpec = match spec {
+        // First, try to get cluster spec from the cli argument
+        Some(json) => serde_json::from_str(json)?,
+        None => {
+            // Second, try to read it from the file if path is provided
+            if let Some(sp) = spec_path {
+                let path = Path::new(sp);
+                let file = File::open(path)?;
+                serde_json::from_reader(file)?
+            } else {
+                panic!("cluster spec should be provided via --spec or --spec-path argument");
+            }
+        }
+    };
+
+    let compute_state = ComputeState {
+        connstr: connstr.to_string(),
+        pgdata: pgdata.to_string(),
+        pgbin: pgbin.to_string(),
+        spec,
+        ready: false,
+        last_active: Utc::now(),
+    };
+    let compute_state = Arc::new(RwLock::new(compute_state));
+
+    // Launch service threads first, so we were able to serve availability
+    // requests, while configuration is still in progress.
+    let mut _threads = vec![
+        launch_http_server(&compute_state).expect("cannot launch compute monitor thread"),
+        launch_monitor(&compute_state).expect("cannot launch http endpoint thread"),
+    ];
+
+    prepare_pgdata(&compute_state)?;
+
+    // Run compute (Postgres) and hang waiting on it. Panic if any error happens,
+    // it will help us to trigger unwind and kill postmaster as well.
+    match run_compute(&compute_state) {
+        Ok(ec) => exit(ec.success() as i32),
+        Err(error) => panic!("cannot start compute node, error: {}", error),
+    }
+}
--- a/compute_tools/src/checker.rs
+++ b/compute_tools/src/checker.rs
@@ -1,46 +0,0 @@
-use std::sync::Arc;
-
-use anyhow::{anyhow, Result};
-use log::error;
-use postgres::Client;
-use tokio_postgres::NoTls;
-
-use crate::compute::ComputeNode;
-
-pub fn create_writablity_check_data(client: &mut Client) -> Result<()> {
-    let query = "
-    CREATE TABLE IF NOT EXISTS health_check (
-        id serial primary key,
-        updated_at timestamptz default now()
-    );
-    INSERT INTO health_check VALUES (1, now())
-        ON CONFLICT (id) DO UPDATE
-         SET updated_at = now();";
-    let result = client.simple_query(query)?;
-    if result.len() < 2 {
-        return Err(anyhow::format_err!("executed  {} queries", result.len()));
-    }
-    Ok(())
-}
-
-pub async fn check_writability(compute: &Arc<ComputeNode>) -> Result<()> {
-    let connstr = &compute.connstr;
-    let (client, connection) = tokio_postgres::connect(connstr, NoTls).await?;
-    if client.is_closed() {
-        return Err(anyhow!("connection to postgres closed"));
-    }
-    tokio::spawn(async move {
-        if let Err(e) = connection.await {
-            error!("connection error: {}", e);
-        }
-    });
-
-    let result = client
-        .simple_query("UPDATE health_check SET updated_at = now() WHERE id = 1;")
-        .await?;
-
-    if result.len() != 1 {
-        return Err(anyhow!("statement can't be executed"));
-    }
-    Ok(())
-}
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -1,321 +0,0 @@
-//
-// XXX: This starts to be scarry similar to the `PostgresNode` from `control_plane`,
-// but there are several things that makes `PostgresNode` usage inconvenient in the
-// cloud:
-// - it inherits from `LocalEnv`, which contains **all-all** the information about
-//   a complete service running
-// - it uses `PageServerNode` with information about http endpoint, which we do not
-//   need in the cloud again
-// - many tiny pieces like, for example, we do not use `pg_ctl` in the cloud
-//
-// Thus, to use `PostgresNode` in the cloud, we need to 'mock' a bunch of required
-// attributes (not required for the cloud). Yet, it is still tempting to unify these
-// `PostgresNode` and `ComputeNode` and use one in both places.
-//
-// TODO: stabilize `ComputeNode` and think about using it in the `control_plane`.
-//
-use std::fs;
-use std::os::unix::fs::PermissionsExt;
-use std::path::Path;
-use std::process::{Command, ExitStatus, Stdio};
-use std::sync::atomic::{AtomicU64, Ordering};
-use std::sync::RwLock;
-
-use anyhow::{Context, Result};
-use chrono::{DateTime, Utc};
-use log::info;
-use postgres::{Client, NoTls};
-use serde::{Serialize, Serializer};
-
-use crate::checker::create_writablity_check_data;
-use crate::config;
-use crate::pg_helpers::*;
-use crate::spec::*;
-
-/// Compute node info shared across several `compute_ctl` threads.
-pub struct ComputeNode {
-    pub start_time: DateTime<Utc>,
-    pub connstr: String,
-    pub pgdata: String,
-    pub pgbin: String,
-    pub spec: ComputeSpec,
-    pub tenant: String,
-    pub timeline: String,
-    pub pageserver_connstr: String,
-    pub metrics: ComputeMetrics,
-    /// Volatile part of the `ComputeNode` so should be used under `RwLock`
-    /// to allow HTTP API server to serve status requests, while configuration
-    /// is in progress.
-    pub state: RwLock<ComputeState>,
-}
-
-fn rfc3339_serialize<S>(x: &DateTime<Utc>, s: S) -> Result<S::Ok, S::Error>
-where
-    S: Serializer,
-{
-    x.to_rfc3339().serialize(s)
-}
-
-#[derive(Serialize)]
-#[serde(rename_all = "snake_case")]
-pub struct ComputeState {
-    pub status: ComputeStatus,
-    /// Timestamp of the last Postgres activity
-    #[serde(serialize_with = "rfc3339_serialize")]
-    pub last_active: DateTime<Utc>,
-    pub error: Option<String>,
-}
-
-impl ComputeState {
-    pub fn new() -> Self {
-        Self {
-            status: ComputeStatus::Init,
-            last_active: Utc::now(),
-            error: None,
-        }
-    }
-}
-
-impl Default for ComputeState {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-#[derive(Serialize, Clone, Copy, PartialEq, Eq)]
-#[serde(rename_all = "snake_case")]
-pub enum ComputeStatus {
-    Init,
-    Running,
-    Failed,
-}
-
-#[derive(Serialize)]
-pub struct ComputeMetrics {
-    pub sync_safekeepers_ms: AtomicU64,
-    pub basebackup_ms: AtomicU64,
-    pub config_ms: AtomicU64,
-    pub total_startup_ms: AtomicU64,
-}
-
-impl ComputeMetrics {
-    pub fn new() -> Self {
-        Self {
-            sync_safekeepers_ms: AtomicU64::new(0),
-            basebackup_ms: AtomicU64::new(0),
-            config_ms: AtomicU64::new(0),
-            total_startup_ms: AtomicU64::new(0),
-        }
-    }
-}
-
-impl Default for ComputeMetrics {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl ComputeNode {
-    pub fn set_status(&self, status: ComputeStatus) {
-        self.state.write().unwrap().status = status;
-    }
-
-    pub fn get_status(&self) -> ComputeStatus {
-        self.state.read().unwrap().status
-    }
-
-    // Remove `pgdata` directory and create it again with right permissions.
-    fn create_pgdata(&self) -> Result<()> {
-        // Ignore removal error, likely it is a 'No such file or directory (os error 2)'.
-        // If it is something different then create_dir() will error out anyway.
-        let _ok = fs::remove_dir_all(&self.pgdata);
-        fs::create_dir(&self.pgdata)?;
-        fs::set_permissions(&self.pgdata, fs::Permissions::from_mode(0o700))?;
-
-        Ok(())
-    }
-
-    // Get basebackup from the libpq connection to pageserver using `connstr` and
-    // unarchive it to `pgdata` directory overriding all its previous content.
-    fn get_basebackup(&self, lsn: &str) -> Result<()> {
-        let start_time = Utc::now();
-
-        let mut client = Client::connect(&self.pageserver_connstr, NoTls)?;
-        let basebackup_cmd = match lsn {
-            "0/0" => format!("basebackup {} {}", &self.tenant, &self.timeline), // First start of the compute
-            _ => format!("basebackup {} {} {}", &self.tenant, &self.timeline, lsn),
-        };
-        let copyreader = client.copy_out(basebackup_cmd.as_str())?;
-
-        // Read the archive directly from the `CopyOutReader`
-        //
-        // Set `ignore_zeros` so that unpack() reads all the Copy data and
-        // doesn't stop at the end-of-archive marker. Otherwise, if the server
-        // sends an Error after finishing the tarball, we will not notice it.
-        let mut ar = tar::Archive::new(copyreader);
-        ar.set_ignore_zeros(true);
-        ar.unpack(&self.pgdata)?;
-
-        self.metrics.basebackup_ms.store(
-            Utc::now()
-                .signed_duration_since(start_time)
-                .to_std()
-                .unwrap()
-                .as_millis() as u64,
-            Ordering::Relaxed,
-        );
-
-        Ok(())
-    }
-
-    // Run `postgres` in a special mode with `--sync-safekeepers` argument
-    // and return the reported LSN back to the caller.
-    fn sync_safekeepers(&self) -> Result<String> {
-        let start_time = Utc::now();
-
-        let sync_handle = Command::new(&self.pgbin)
-            .args(&["--sync-safekeepers"])
-            .env("PGDATA", &self.pgdata) // we cannot use -D in this mode
-            .stdout(Stdio::piped())
-            .spawn()
-            .expect("postgres --sync-safekeepers failed to start");
-
-        // `postgres --sync-safekeepers` will print all log output to stderr and
-        // final LSN to stdout. So we pipe only stdout, while stderr will be automatically
-        // redirected to the caller output.
-        let sync_output = sync_handle
-            .wait_with_output()
-            .expect("postgres --sync-safekeepers failed");
-        if !sync_output.status.success() {
-            anyhow::bail!(
-                "postgres --sync-safekeepers exited with non-zero status: {}",
-                sync_output.status,
-            );
-        }
-
-        self.metrics.sync_safekeepers_ms.store(
-            Utc::now()
-                .signed_duration_since(start_time)
-                .to_std()
-                .unwrap()
-                .as_millis() as u64,
-            Ordering::Relaxed,
-        );
-
-        let lsn = String::from(String::from_utf8(sync_output.stdout)?.trim());
-
-        Ok(lsn)
-    }
-
-    /// Do all the preparations like PGDATA directory creation, configuration,
-    /// safekeepers sync, basebackup, etc.
-    pub fn prepare_pgdata(&self) -> Result<()> {
-        let spec = &self.spec;
-        let pgdata_path = Path::new(&self.pgdata);
-
-        // Remove/create an empty pgdata directory and put configuration there.
-        self.create_pgdata()?;
-        config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec)?;
-
-        info!("starting safekeepers syncing");
-        let lsn = self
-            .sync_safekeepers()
-            .with_context(|| "failed to sync safekeepers")?;
-        info!("safekeepers synced at LSN {}", lsn);
-
-        info!(
-            "getting basebackup@{} from pageserver {}",
-            lsn, &self.pageserver_connstr
-        );
-        self.get_basebackup(&lsn).with_context(|| {
-            format!(
-                "failed to get basebackup@{} from pageserver {}",
-                lsn, &self.pageserver_connstr
-            )
-        })?;
-
-        // Update pg_hba.conf received with basebackup.
-        update_pg_hba(pgdata_path)?;
-
-        Ok(())
-    }
-
-    /// Start Postgres as a child process and manage DBs/roles.
-    /// After that this will hang waiting on the postmaster process to exit.
-    pub fn run(&self) -> Result<ExitStatus> {
-        let start_time = Utc::now();
-
-        let pgdata_path = Path::new(&self.pgdata);
-
-        // Run postgres as a child process.
-        let mut pg = Command::new(&self.pgbin)
-            .args(&["-D", &self.pgdata])
-            .spawn()
-            .expect("cannot start postgres process");
-
-        // Try default Postgres port if it is not provided
-        let port = self
-            .spec
-            .cluster
-            .settings
-            .find("port")
-            .unwrap_or_else(|| "5432".to_string());
-        wait_for_postgres(&mut pg, &port, pgdata_path)?;
-
-        let mut client = Client::connect(&self.connstr, NoTls)?;
-
-        handle_roles(&self.spec, &mut client)?;
-        handle_databases(&self.spec, &mut client)?;
-        handle_grants(&self.spec, &mut client)?;
-        create_writablity_check_data(&mut client)?;
-
-        // 'Close' connection
-        drop(client);
-        let startup_end_time = Utc::now();
-
-        self.metrics.config_ms.store(
-            startup_end_time
-                .signed_duration_since(start_time)
-                .to_std()
-                .unwrap()
-                .as_millis() as u64,
-            Ordering::Relaxed,
-        );
-        self.metrics.total_startup_ms.store(
-            startup_end_time
-                .signed_duration_since(self.start_time)
-                .to_std()
-                .unwrap()
-                .as_millis() as u64,
-            Ordering::Relaxed,
-        );
-
-        self.set_status(ComputeStatus::Running);
-
-        info!(
-            "finished configuration of compute for project {}",
-            self.spec.cluster.cluster_id
-        );
-
-        // Wait for child Postgres process basically forever. In this state Ctrl+C
-        // will propagate to Postgres and it will be shut down as well.
-        let ecode = pg
-            .wait()
-            .expect("failed to start waiting on Postgres process");
-
-        Ok(ecode)
-    }
-
-    pub fn prepare_and_run(&self) -> Result<ExitStatus> {
-        info!(
-            "starting compute for project {}, operation {}, tenant {}, timeline {}",
-            self.spec.cluster.cluster_id,
-            self.spec.operation_uuid.as_ref().unwrap(),
-            self.tenant,
-            self.timeline,
-        );
-
-        self.prepare_pgdata()?;
-        self.run()
-    }
-}
--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -6,7 +6,7 @@ use std::path::Path;
 use anyhow::Result;

 use crate::pg_helpers::PgOptionsSerialize;
-use crate::spec::ComputeSpec;
+use crate::zenith::ClusterSpec;

 /// Check that `line` is inside a text file and put it there if it is not.
 /// Create file if it doesn't exist.
@@ -32,20 +32,20 @@ pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
 }

 /// Create or completely rewrite configuration file specified by `path`
-pub fn write_postgres_conf(path: &Path, spec: &ComputeSpec) -> Result<()> {
+pub fn write_postgres_conf(path: &Path, spec: &ClusterSpec) -> Result<()> {
    // File::create() destroys the file content if it exists.
    let mut postgres_conf = File::create(path)?;

-    write_auto_managed_block(&mut postgres_conf, &spec.cluster.settings.as_pg_settings())?;
+    write_zenith_managed_block(&mut postgres_conf, &spec.cluster.settings.as_pg_settings())?;

    Ok(())
 }

 // Write Postgres config block wrapped with generated comment section
-fn write_auto_managed_block(file: &mut File, buf: &str) -> Result<()> {
-    writeln!(file, "# Managed by compute_ctl: begin")?;
+fn write_zenith_managed_block(file: &mut File, buf: &str) -> Result<()> {
+    writeln!(file, "# Managed by Zenith: begin")?;
    writeln!(file, "{}", buf)?;
-    writeln!(file, "# Managed by compute_ctl: end")?;
+    writeln!(file, "# Managed by Zenith: end")?;

    Ok(())
 }
--- a/compute_tools/src/http/api.rs
+++ b/compute_tools/src/http/api.rs
@@ -1,109 +0,0 @@
-use std::convert::Infallible;
-use std::net::SocketAddr;
-use std::sync::Arc;
-use std::thread;
-
-use anyhow::Result;
-use hyper::service::{make_service_fn, service_fn};
-use hyper::{Body, Method, Request, Response, Server, StatusCode};
-use log::{error, info};
-use serde_json;
-
-use crate::compute::{ComputeNode, ComputeStatus};
-
-// Service function to handle all available routes.
-async fn routes(req: Request<Body>, compute: Arc<ComputeNode>) -> Response<Body> {
-    match (req.method(), req.uri().path()) {
-        // Timestamp of the last Postgres activity in the plain text.
-        // DEPRECATED in favour of /status
-        (&Method::GET, "/last_activity") => {
-            info!("serving /last_active GET request");
-            let state = compute.state.read().unwrap();
-
-            // Use RFC3339 format for consistency.
-            Response::new(Body::from(state.last_active.to_rfc3339()))
-        }
-
-        // Has compute setup process finished? -> true/false.
-        // DEPRECATED in favour of /status
-        (&Method::GET, "/ready") => {
-            info!("serving /ready GET request");
-            let status = compute.get_status();
-            Response::new(Body::from(format!("{}", status == ComputeStatus::Running)))
-        }
-
-        // Serialized compute state.
-        (&Method::GET, "/status") => {
-            info!("serving /status GET request");
-            let state = compute.state.read().unwrap();
-            Response::new(Body::from(serde_json::to_string(&*state).unwrap()))
-        }
-
-        // Startup metrics in JSON format. Keep /metrics reserved for a possible
-        // future use for Prometheus metrics format.
-        (&Method::GET, "/metrics.json") => {
-            info!("serving /metrics.json GET request");
-            Response::new(Body::from(serde_json::to_string(&compute.metrics).unwrap()))
-        }
-
-        // DEPRECATED, use POST instead
-        (&Method::GET, "/check_writability") => {
-            info!("serving /check_writability GET request");
-            let res = crate::checker::check_writability(&compute).await;
-            match res {
-                Ok(_) => Response::new(Body::from("true")),
-                Err(e) => Response::new(Body::from(e.to_string())),
-            }
-        }
-
-        (&Method::POST, "/check_writability") => {
-            info!("serving /check_writability POST request");
-            let res = crate::checker::check_writability(&compute).await;
-            match res {
-                Ok(_) => Response::new(Body::from("true")),
-                Err(e) => Response::new(Body::from(e.to_string())),
-            }
-        }
-
-        // Return the `404 Not Found` for any other routes.
-        _ => {
-            let mut not_found = Response::new(Body::from("404 Not Found"));
-            *not_found.status_mut() = StatusCode::NOT_FOUND;
-            not_found
-        }
-    }
-}
-
-// Main Hyper HTTP server function that runs it and blocks waiting on it forever.
-#[tokio::main]
-async fn serve(state: Arc<ComputeNode>) {
-    let addr = SocketAddr::from(([0, 0, 0, 0], 3080));
-
-    let make_service = make_service_fn(move |_conn| {
-        let state = state.clone();
-        async move {
-            Ok::<_, Infallible>(service_fn(move |req: Request<Body>| {
-                let state = state.clone();
-                async move { Ok::<_, Infallible>(routes(req, state).await) }
-            }))
-        }
-    });
-
-    info!("starting HTTP server on {}", addr);
-
-    let server = Server::bind(&addr).serve(make_service);
-
-    // Run this server forever
-    if let Err(e) = server.await {
-        error!("server error: {}", e);
-    }
-}
-
-/// Launch a separate Hyper HTTP API server thread and return its `JoinHandle`.
-pub fn launch_http_server(state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
-    let state = Arc::clone(state);
-
-    Ok(thread::Builder::new()
-        .name("http-endpoint".into())
-        .spawn(move || serve(state))?)
-}
--- a/compute_tools/src/http/mod.rs
+++ b/compute_tools/src/http/mod.rs
@@ -1 +0,0 @@
-pub mod api;
--- a/compute_tools/src/http/openapi_spec.yaml
+++ b/compute_tools/src/http/openapi_spec.yaml
@@ -1,158 +0,0 @@
-openapi: "3.0.2"
-info:
-  title: Compute node control API
-  version: "1.0"
-
-servers:
-  - url: "http://localhost:3080"
-
-paths:
-  /status:
-    get:
-      tags:
-      - "info"
-      summary: Get compute node internal status
-      description: ""
-      operationId: getComputeStatus
-      responses:
-        "200":
-          description: ComputeState
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/ComputeState"
-
-  /metrics.json:
-    get:
-      tags:
-      - "info"
-      summary: Get compute node startup metrics in JSON format
-      description: ""
-      operationId: getComputeMetricsJSON
-      responses:
-        "200":
-          description: ComputeMetrics
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/ComputeMetrics"
-
-  /ready:
-    get:
-      deprecated: true
-      tags:
-      - "info"
-      summary: Check whether compute startup process finished successfully
-      description: ""
-      operationId: computeIsReady
-      responses:
-        "200":
-          description: Compute is ready ('true') or not ('false')
-          content:
-            text/plain:
-              schema:
-                type: string
-                example: "true"
-
-  /last_activity:
-    get:
-      deprecated: true
-      tags:
-      - "info"
-      summary: Get timestamp of the last compute activity
-      description: ""
-      operationId: getLastComputeActivityTS
-      responses:
-        "200":
-          description: Timestamp of the last compute activity
-          content:
-            text/plain:
-              schema:
-                type: string
-                example: "2022-10-12T07:20:50.52Z"
-
-  /check_writability:
-    get:
-      deprecated: true
-      tags:
-      - "check"
-      summary: Check that we can write new data on this compute
-      description: ""
-      operationId: checkComputeWritabilityDeprecated
-      responses:
-        "200":
-          description: Check result
-          content:
-            text/plain:
-              schema:
-                type: string
-                description: Error text or 'true' if check passed
-                example: "true"
-
-    post:
-      tags:
-      - "check"
-      summary: Check that we can write new data on this compute
-      description: ""
-      operationId: checkComputeWritability
-      responses:
-        "200":
-          description: Check result
-          content:
-            text/plain:
-              schema:
-                type: string
-                description: Error text or 'true' if check passed
-                example: "true"
-
-components:
-  securitySchemes:
-    JWT:
-      type: http
-      scheme: bearer
-      bearerFormat: JWT
-
-  schemas:
-    ComputeMetrics:
-      type: object
-      description: Compute startup metrics
-      required:
-        - sync_safekeepers_ms
-        - basebackup_ms
-        - config_ms
-        - total_startup_ms
-      properties:
-        sync_safekeepers_ms:
-          type: integer
-        basebackup_ms:
-          type: integer
-        config_ms:
-          type: integer
-        total_startup_ms:
-          type: integer
-
-    ComputeState:
-      type: object
-      required:
-        - status
-        - last_active
-      properties:
-        status:
-          $ref: '#/components/schemas/ComputeStatus'
-        last_active:
-          type: string
-          description: The last detected compute activity timestamp in UTC and RFC3339 format
-          example: "2022-10-12T07:20:50.52Z"
-        error:
-          type: string
-          description: Text of the error during compute startup, if any
-
-    ComputeStatus:
-      type: string
-      enum:
-        - init
-        - failed
-        - running
-
-security:
-  - JWT: []
--- a/compute_tools/src/http_api.rs
+++ b/compute_tools/src/http_api.rs
@@ -0,0 +1,73 @@
+use std::convert::Infallible;
+use std::net::SocketAddr;
+use std::sync::{Arc, RwLock};
+use std::thread;
+
+use anyhow::Result;
+use hyper::service::{make_service_fn, service_fn};
+use hyper::{Body, Method, Request, Response, Server, StatusCode};
+use log::{error, info};
+
+use crate::zenith::*;
+
+// Service function to handle all available routes.
+fn routes(req: Request<Body>, state: Arc<RwLock<ComputeState>>) -> Response<Body> {
+    match (req.method(), req.uri().path()) {
+        // Timestamp of the last Postgres activity in the plain text.
+        (&Method::GET, "/last_activity") => {
+            info!("serving /last_active GET request");
+            let state = state.read().unwrap();
+
+            // Use RFC3339 format for consistency.
+            Response::new(Body::from(state.last_active.to_rfc3339()))
+        }
+
+        // Has compute setup process finished? -> true/false
+        (&Method::GET, "/ready") => {
+            info!("serving /ready GET request");
+            let state = state.read().unwrap();
+            Response::new(Body::from(format!("{}", state.ready)))
+        }
+
+        // Return the `404 Not Found` for any other routes.
+        _ => {
+            let mut not_found = Response::new(Body::from("404 Not Found"));
+            *not_found.status_mut() = StatusCode::NOT_FOUND;
+            not_found
+        }
+    }
+}
+
+// Main Hyper HTTP server function that runs it and blocks waiting on it forever.
+#[tokio::main]
+async fn serve(state: Arc<RwLock<ComputeState>>) {
+    let addr = SocketAddr::from(([0, 0, 0, 0], 3080));
+
+    let make_service = make_service_fn(move |_conn| {
+        let state = state.clone();
+        async move {
+            Ok::<_, Infallible>(service_fn(move |req: Request<Body>| {
+                let state = state.clone();
+                async move { Ok::<_, Infallible>(routes(req, state)) }
+            }))
+        }
+    });
+
+    info!("starting HTTP server on {}", addr);
+
+    let server = Server::bind(&addr).serve(make_service);
+
+    // Run this server forever
+    if let Err(e) = server.await {
+        error!("server error: {}", e);
+    }
+}
+
+/// Launch a separate Hyper HTTP API server thread and return its `JoinHandle`.
+pub fn launch_http_server(state: &Arc<RwLock<ComputeState>>) -> Result<thread::JoinHandle<()>> {
+    let state = Arc::clone(state);
+
+    Ok(thread::Builder::new()
+        .name("http-endpoint".into())
+        .spawn(move || serve(state))?)
+}
--- a/compute_tools/src/lib.rs
+++ b/compute_tools/src/lib.rs
@@ -2,13 +2,12 @@
 //! Various tools and helpers to handle cluster / compute node (Postgres)
 //! configuration.
 //!
-pub mod checker;
 pub mod config;
-pub mod http;
+pub mod http_api;
 #[macro_use]
 pub mod logger;
-pub mod compute;
 pub mod monitor;
 pub mod params;
 pub mod pg_helpers;
 pub mod spec;
+pub mod zenith;
--- a/compute_tools/src/monitor.rs
+++ b/compute_tools/src/monitor.rs
@@ -1,4 +1,4 @@
-use std::sync::Arc;
+use std::sync::{Arc, RwLock};
 use std::{thread, time};

 use anyhow::Result;
@@ -6,16 +6,16 @@ use chrono::{DateTime, Utc};
 use log::{debug, info};
 use postgres::{Client, NoTls};

-use crate::compute::ComputeNode;
+use crate::zenith::ComputeState;

 const MONITOR_CHECK_INTERVAL: u64 = 500; // milliseconds

 // Spin in a loop and figure out the last activity time in the Postgres.
 // Then update it in the shared state. This function never errors out.
 // XXX: the only expected panic is at `RwLock` unwrap().
-fn watch_compute_activity(compute: &Arc<ComputeNode>) {
+fn watch_compute_activity(state: &Arc<RwLock<ComputeState>>) {
    // Suppose that `connstr` doesn't change
-    let connstr = compute.connstr.clone();
+    let connstr = state.read().unwrap().connstr.clone();
    // Define `client` outside of the loop to reuse existing connection if it's active.
    let mut client = Client::connect(&connstr, NoTls);
    let timeout = time::Duration::from_millis(MONITOR_CHECK_INTERVAL);
@@ -46,7 +46,7 @@ fn watch_compute_activity(compute: &Arc<ComputeNode>) {
                            AND usename != 'zenith_admin';", // XXX: find a better way to filter other monitors?
                        &[],
                    );
-                let mut last_active = compute.state.read().unwrap().last_active;
+                let mut last_active = state.read().unwrap().last_active;

                if let Ok(backs) = backends {
                    let mut idle_backs: Vec<DateTime<Utc>> = vec![];
@@ -83,14 +83,14 @@ fn watch_compute_activity(compute: &Arc<ComputeNode>) {
                }

                // Update the last activity in the shared state if we got a more recent one.
-                let mut state = compute.state.write().unwrap();
+                let mut state = state.write().unwrap();
                if last_active > state.last_active {
                    state.last_active = last_active;
                    debug!("set the last compute activity time to: {}", last_active);
                }
            }
            Err(e) => {
-                debug!("cannot connect to postgres: {}, retrying", e);
+                info!("cannot connect to postgres: {}, retrying", e);

                // Establish a new connection and try again.
                client = Client::connect(&connstr, NoTls);
@@ -100,7 +100,7 @@ fn watch_compute_activity(compute: &Arc<ComputeNode>) {
 }

 /// Launch a separate compute monitor thread and return its `JoinHandle`.
-pub fn launch_monitor(state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
+pub fn launch_monitor(state: &Arc<RwLock<ComputeState>>) -> Result<thread::JoinHandle<()>> {
    let state = Arc::clone(state);

    Ok(thread::Builder::new()
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -1,9 +1,7 @@
-use std::fs::File;
-use std::io::{BufRead, BufReader};
 use std::net::{SocketAddr, TcpStream};
 use std::os::unix::fs::PermissionsExt;
 use std::path::Path;
-use std::process::Child;
+use std::process::Command;
 use std::str::FromStr;
 use std::{fs, thread, time};

@@ -134,14 +132,7 @@ impl Role {
        let mut params: String = "LOGIN".to_string();

        if let Some(pass) = &self.encrypted_password {
-            // Some time ago we supported only md5 and treated all encrypted_password as md5.
-            // Now we also support SCRAM-SHA-256 and to preserve compatibility
-            // we treat all encrypted_password as md5 unless they starts with SCRAM-SHA-256.
-            if pass.starts_with("SCRAM-SHA-256") {
-                params.push_str(&format!(" PASSWORD '{}'", pass));
-            } else {
-                params.push_str(&format!(" PASSWORD 'md5{}'", pass));
-            }
+            params.push_str(&format!(" PASSWORD 'md5{}'", pass));
        } else {
            params.push_str(" PASSWORD NULL");
        }
@@ -222,12 +213,12 @@ pub fn get_existing_dbs(client: &mut Client) -> Result<Vec<Database>> {
 /// Wait for Postgres to become ready to accept connections:
 /// - state should be `ready` in the `pgdata/postmaster.pid`
 /// - and we should be able to connect to 127.0.0.1:5432
-pub fn wait_for_postgres(pg: &mut Child, port: &str, pgdata: &Path) -> Result<()> {
+pub fn wait_for_postgres(port: &str, pgdata: &Path) -> Result<()> {
    let pid_path = pgdata.join("postmaster.pid");
    let mut slept: u64 = 0; // ms
    let pause = time::Duration::from_millis(100);

-    let timeout = time::Duration::from_millis(10);
+    let timeout = time::Duration::from_millis(200);
    let addr = SocketAddr::from_str(&format!("127.0.0.1:{}", port)).unwrap();

    loop {
@@ -238,19 +229,14 @@ pub fn wait_for_postgres(pg: &mut Child, port: &str, pgdata: &Path) -> Result<()
            bail!("timed out while waiting for Postgres to start");
        }

-        if let Ok(Some(status)) = pg.try_wait() {
-            // Postgres exited, that is not what we expected, bail out earlier.
-            let code = status.code().unwrap_or(-1);
-            bail!("Postgres exited unexpectedly with code {}", code);
-        }
-
        if pid_path.exists() {
-            let file = BufReader::new(File::open(&pid_path)?);
-            let status = file
-                .lines()
-                .last()
-                .unwrap()
-                .unwrap_or_else(|_| "unknown".to_string());
+            // XXX: dumb and the simplest way to get the last line in a text file
+            // TODO: better use `.lines().last()` later
+            let stdout = Command::new("tail")
+                .args(&["-n1", pid_path.to_str().unwrap()])
+                .output()?
+                .stdout;
+            let status = String::from_utf8(stdout)?;
            let can_connect = TcpStream::connect_timeout(&addr, timeout).is_ok();

            // Now Postgres is ready to accept connections
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -3,53 +3,16 @@ use std::path::Path;
 use anyhow::Result;
 use log::{info, log_enabled, warn, Level};
 use postgres::Client;
-use serde::Deserialize;

 use crate::config;
 use crate::params::PG_HBA_ALL_MD5;
 use crate::pg_helpers::*;
-
-/// Cluster spec or configuration represented as an optional number of
-/// delta operations + final cluster state description.
-#[derive(Clone, Deserialize)]
-pub struct ComputeSpec {
-    pub format_version: f32,
-    pub timestamp: String,
-    pub operation_uuid: Option<String>,
-    /// Expected cluster state at the end of transition process.
-    pub cluster: Cluster,
-    pub delta_operations: Option<Vec<DeltaOp>>,
-}
-
-/// Cluster state seen from the perspective of the external tools
-/// like Rails web console.
-#[derive(Clone, Deserialize)]
-pub struct Cluster {
-    pub cluster_id: String,
-    pub name: String,
-    pub state: Option<String>,
-    pub roles: Vec<Role>,
-    pub databases: Vec<Database>,
-    pub settings: GenericOptions,
-}
-
-/// Single cluster state changing operation that could not be represented as
-/// a static `Cluster` structure. For example:
-/// - DROP DATABASE
-/// - DROP ROLE
-/// - ALTER ROLE name RENAME TO new_name
-/// - ALTER DATABASE name RENAME TO new_name
-#[derive(Clone, Deserialize)]
-pub struct DeltaOp {
-    pub action: String,
-    pub name: PgIdent,
-    pub new_name: Option<PgIdent>,
-}
+use crate::zenith::ClusterSpec;

 /// It takes cluster specification and does the following:
 /// - Serialize cluster config and put it into `postgresql.conf` completely rewriting the file.
 /// - Update `pg_hba.conf` to allow external connections.
-pub fn handle_configuration(spec: &ComputeSpec, pgdata_path: &Path) -> Result<()> {
+pub fn handle_configuration(spec: &ClusterSpec, pgdata_path: &Path) -> Result<()> {
    // File `postgresql.conf` is no longer included into `basebackup`, so just
    // always write all config into it creating new file.
    config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec)?;
@@ -76,7 +39,7 @@ pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> {

 /// Given a cluster spec json and open transaction it handles roles creation,
 /// deletion and update.
-pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
+pub fn handle_roles(spec: &ClusterSpec, client: &mut Client) -> Result<()> {
    let mut xact = client.transaction()?;
    let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;

@@ -173,20 +136,13 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
                xact.execute(query.as_str(), &[])?;
            }
        } else {
-            info!("role name: '{}'", &name);
+            info!("role name {}", &name);
            let mut query: String = format!("CREATE ROLE {} ", name.quote());
-            info!("role create query: '{}'", &query);
+            info!("role create query {}", &query);
            info_print!(" -> create");

            query.push_str(&role.to_pg_options());
            xact.execute(query.as_str(), &[])?;
-
-            let grant_query = format!(
-                "grant pg_read_all_data, pg_write_all_data to {}",
-                name.quote()
-            );
-            xact.execute(grant_query.as_str(), &[])?;
-            info!("role grant query: '{}'", &grant_query);
        }

        info_print!("\n");
@@ -202,7 +158,7 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
 /// like `CREATE DATABASE` and `DROP DATABASE` do not support it. Statement-level
 /// atomicity should be enough here due to the order of operations and various checks,
 /// which together provide us idempotency.
-pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
+pub fn handle_databases(spec: &ClusterSpec, client: &mut Client) -> Result<()> {
    let existing_dbs: Vec<Database> = get_existing_dbs(client)?;

    // Print a list of existing Postgres databases (only in debug mode)
@@ -288,24 +244,3 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {

    Ok(())
 }
-
-// Grant CREATE ON DATABASE to the database owner
-// to allow clients create trusted extensions.
-pub fn handle_grants(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
-    info!("cluster spec grants:");
-
-    for db in &spec.cluster.databases {
-        let dbname = &db.name;
-
-        let query: String = format!(
-            "GRANT CREATE ON DATABASE {} TO {}",
-            dbname.quote(),
-            db.owner.quote()
-        );
-        info!("grant query {}", &query);
-
-        client.execute(query.as_str(), &[])?;
-    }
-
-    Ok(())
-}
--- a/compute_tools/src/zenith.rs
+++ b/compute_tools/src/zenith.rs
@@ -0,0 +1,109 @@
+use std::process::{Command, Stdio};
+
+use anyhow::Result;
+use chrono::{DateTime, Utc};
+use postgres::{Client, NoTls};
+use serde::Deserialize;
+
+use crate::pg_helpers::*;
+
+/// Compute node state shared across several `zenith_ctl` threads.
+/// Should be used under `RwLock` to allow HTTP API server to serve
+/// status requests, while configuration is in progress.
+pub struct ComputeState {
+    pub connstr: String,
+    pub pgdata: String,
+    pub pgbin: String,
+    pub spec: ClusterSpec,
+    /// Compute setup process has finished
+    pub ready: bool,
+    /// Timestamp of the last Postgres activity
+    pub last_active: DateTime<Utc>,
+}
+
+/// Cluster spec or configuration represented as an optional number of
+/// delta operations + final cluster state description.
+#[derive(Clone, Deserialize)]
+pub struct ClusterSpec {
+    pub format_version: f32,
+    pub timestamp: String,
+    pub operation_uuid: Option<String>,
+    /// Expected cluster state at the end of transition process.
+    pub cluster: Cluster,
+    pub delta_operations: Option<Vec<DeltaOp>>,
+}
+
+/// Cluster state seen from the perspective of the external tools
+/// like Rails web console.
+#[derive(Clone, Deserialize)]
+pub struct Cluster {
+    pub cluster_id: String,
+    pub name: String,
+    pub state: Option<String>,
+    pub roles: Vec<Role>,
+    pub databases: Vec<Database>,
+    pub settings: GenericOptions,
+}
+
+/// Single cluster state changing operation that could not be represented as
+/// a static `Cluster` structure. For example:
+/// - DROP DATABASE
+/// - DROP ROLE
+/// - ALTER ROLE name RENAME TO new_name
+/// - ALTER DATABASE name RENAME TO new_name
+#[derive(Clone, Deserialize)]
+pub struct DeltaOp {
+    pub action: String,
+    pub name: PgIdent,
+    pub new_name: Option<PgIdent>,
+}
+
+/// Get basebackup from the libpq connection to pageserver using `connstr` and
+/// unarchive it to `pgdata` directory overriding all its previous content.
+pub fn get_basebackup(
+    pgdata: &str,
+    connstr: &str,
+    tenant: &str,
+    timeline: &str,
+    lsn: &str,
+) -> Result<()> {
+    let mut client = Client::connect(connstr, NoTls)?;
+    let basebackup_cmd = match lsn {
+        "0/0" => format!("basebackup {} {}", tenant, timeline), // First start of the compute
+        _ => format!("basebackup {} {} {}", tenant, timeline, lsn),
+    };
+    let copyreader = client.copy_out(basebackup_cmd.as_str())?;
+    let mut ar = tar::Archive::new(copyreader);
+
+    ar.unpack(&pgdata)?;
+
+    Ok(())
+}
+
+/// Run `postgres` in a special mode with `--sync-safekeepers` argument
+/// and return the reported LSN back to the caller.
+pub fn sync_safekeepers(pgdata: &str, pgbin: &str) -> Result<String> {
+    let sync_handle = Command::new(&pgbin)
+        .args(&["--sync-safekeepers"])
+        .env("PGDATA", &pgdata) // we cannot use -D in this mode
+        .stdout(Stdio::piped())
+        .spawn()
+        .expect("postgres --sync-safekeepers failed to start");
+
+    // `postgres --sync-safekeepers` will print all log output to stderr and
+    // final LSN to stdout. So we pipe only stdout, while stderr will be automatically
+    // redirected to the caller output.
+    let sync_output = sync_handle
+        .wait_with_output()
+        .expect("postgres --sync-safekeepers failed");
+    if !sync_output.status.success() {
+        anyhow::bail!(
+            "postgres --sync-safekeepers exited with non-zero status: {}",
+            sync_output.status,
+        );
+    }
+
+    let lsn = String::from(String::from_utf8(sync_output.stdout)?.trim());
+
+    Ok(lsn)
+}
--- a/compute_tools/tests/pg_helpers_tests.rs
+++ b/compute_tools/tests/pg_helpers_tests.rs
@@ -4,12 +4,12 @@ mod pg_helpers_tests {
    use std::fs::File;

    use compute_tools::pg_helpers::*;
-    use compute_tools::spec::ComputeSpec;
+    use compute_tools::zenith::ClusterSpec;

    #[test]
    fn params_serialize() {
        let file = File::open("tests/cluster_spec.json").unwrap();
-        let spec: ComputeSpec = serde_json::from_reader(file).unwrap();
+        let spec: ClusterSpec = serde_json::from_reader(file).unwrap();

        assert_eq!(
            spec.cluster.databases.first().unwrap().to_pg_options(),
@@ -24,7 +24,7 @@ mod pg_helpers_tests {
    #[test]
    fn settings_serialize() {
        let file = File::open("tests/cluster_spec.json").unwrap();
-        let spec: ComputeSpec = serde_json::from_reader(file).unwrap();
+        let spec: ClusterSpec = serde_json::from_reader(file).unwrap();

        assert_eq!(
            spec.cluster.settings.as_pg_settings(),
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -4,8 +4,8 @@ version = "0.1.0"
 edition = "2021"

 [dependencies]
-tar = "0.4.38"
-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
+tar = "0.4.33"
+postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
 serde = { version = "1.0", features = ["derive"] }
 serde_with = "1.12.0"
 toml = "0.5"
@@ -18,6 +18,6 @@ url = "2.2.2"
 reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }

 pageserver = { path = "../pageserver" }
-safekeeper = { path = "../safekeeper" }
-utils = { path = "../libs/utils" }
+walkeeper = { path = "../walkeeper" }
+zenith_utils = { path = "../zenith_utils" }
 workspace_hack = { version = "0.1", path = "../workspace_hack" }
--- a/control_plane/simple.conf
+++ b/control_plane/simple.conf
@@ -9,6 +9,3 @@ auth_type = 'Trust'
 id = 1
 pg_port = 5454
 http_port = 7676
-
-[etcd_broker]
-broker_endpoints = ['http://127.0.0.1:2379']
--- a/control_plane/src/compute.rs
+++ b/control_plane/src/compute.rs
@@ -11,12 +11,11 @@ use std::sync::Arc;
 use std::time::Duration;

 use anyhow::{Context, Result};
-use utils::{
-    connstring::connection_host_port,
-    lsn::Lsn,
-    postgres_backend::AuthType,
-    zid::{ZTenantId, ZTimelineId},
-};
+use zenith_utils::connstring::connection_host_port;
+use zenith_utils::lsn::Lsn;
+use zenith_utils::postgres_backend::AuthType;
+use zenith_utils::zid::ZTenantId;
+use zenith_utils::zid::ZTimelineId;

 use crate::local_env::LocalEnv;
 use crate::postgresql_conf::PostgresConf;
@@ -231,13 +230,8 @@ impl PostgresNode {
            .context("page server 'basebackup' command failed")?;

        // Read the archive directly from the `CopyOutReader`
-        //
-        // Set `ignore_zeros` so that unpack() reads all the Copy data and
-        // doesn't stop at the end-of-archive marker. Otherwise, if the server
-        // sends an Error after finishing the tarball, we will not notice it.
-        let mut ar = tar::Archive::new(copyreader);
-        ar.set_ignore_zeros(true);
-        ar.unpack(&self.pgdata())
+        tar::Archive::new(copyreader)
+            .unpack(&self.pgdata())
            .context("extracting base backup failed")?;

        Ok(())
@@ -278,9 +272,12 @@ impl PostgresNode {
        conf.append("wal_sender_timeout", "5s");
        conf.append("listen_addresses", &self.address.ip().to_string());
        conf.append("port", &self.address.port().to_string());
-        conf.append("wal_keep_size", "0");
-        // walproposer panics when basebackup is invalid, it is pointless to restart in this case.
-        conf.append("restart_after_crash", "off");
+
+        // Never clean up old WAL. TODO: We should use a replication
+        // slot or something proper, to prevent the compute node
+        // from removing WAL that hasn't been streamed to the safekeeper or
+        // page server yet. (gh issue #349)
+        conf.append("wal_keep_size", "10TB");

        // Configure the node to fetch pages from pageserver
        let pageserver_connstr = {
@@ -334,14 +331,14 @@ impl PostgresNode {
            // Configure the node to connect to the safekeepers
            conf.append("synchronous_standby_names", "walproposer");

-            let safekeepers = self
+            let wal_acceptors = self
                .env
                .safekeepers
                .iter()
                .map(|sk| format!("localhost:{}", sk.pg_port))
                .collect::<Vec<String>>()
                .join(",");
-            conf.append("wal_acceptors", &safekeepers);
+            conf.append("wal_acceptors", &wal_acceptors);
        } else {
            // We only use setup without safekeepers for tests,
            // and don't care about data durability on pageserver,
@@ -423,15 +420,10 @@ impl PostgresNode {
        if let Some(token) = auth_token {
            cmd.env("ZENITH_AUTH_TOKEN", token);
        }
+        let pg_ctl = cmd.status().context("pg_ctl failed")?;

-        let pg_ctl = cmd.output().context("pg_ctl failed")?;
-        if !pg_ctl.status.success() {
-            anyhow::bail!(
-                "pg_ctl failed, exit code: {}, stdout: {}, stderr: {}",
-                pg_ctl.status,
-                String::from_utf8_lossy(&pg_ctl.stdout),
-                String::from_utf8_lossy(&pg_ctl.stderr),
-            );
+        if !pg_ctl.success() {
+            anyhow::bail!("pg_ctl failed");
        }
        Ok(())
    }
--- a/control_plane/src/etcd.rs
+++ b/control_plane/src/etcd.rs
@@ -1,97 +0,0 @@
-use std::{
-    fs,
-    path::PathBuf,
-    process::{Command, Stdio},
-};
-
-use anyhow::Context;
-use nix::{
-    sys::signal::{kill, Signal},
-    unistd::Pid,
-};
-
-use crate::{local_env, read_pidfile};
-
-pub fn start_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
-    let etcd_broker = &env.etcd_broker;
-    println!(
-        "Starting etcd broker using {}",
-        etcd_broker.etcd_binary_path.display()
-    );
-
-    let etcd_data_dir = env.base_data_dir.join("etcd");
-    fs::create_dir_all(&etcd_data_dir).with_context(|| {
-        format!(
-            "Failed to create etcd data dir: {}",
-            etcd_data_dir.display()
-        )
-    })?;
-
-    let etcd_stdout_file =
-        fs::File::create(etcd_data_dir.join("etcd.stdout.log")).with_context(|| {
-            format!(
-                "Failed to create ectd stout file in directory {}",
-                etcd_data_dir.display()
-            )
-        })?;
-    let etcd_stderr_file =
-        fs::File::create(etcd_data_dir.join("etcd.stderr.log")).with_context(|| {
-            format!(
-                "Failed to create ectd stderr file in directory {}",
-                etcd_data_dir.display()
-            )
-        })?;
-    let client_urls = etcd_broker.comma_separated_endpoints();
-
-    let etcd_process = Command::new(&etcd_broker.etcd_binary_path)
-        .args(&[
-            format!("--data-dir={}", etcd_data_dir.display()),
-            format!("--listen-client-urls={client_urls}"),
-            format!("--advertise-client-urls={client_urls}"),
-            // Set --quota-backend-bytes to keep the etcd virtual memory
-            // size smaller. Our test etcd clusters are very small.
-            // See https://github.com/etcd-io/etcd/issues/7910
-            "--quota-backend-bytes=100000000".to_string(),
-        ])
-        .stdout(Stdio::from(etcd_stdout_file))
-        .stderr(Stdio::from(etcd_stderr_file))
-        .spawn()
-        .context("Failed to spawn etcd subprocess")?;
-    let pid = etcd_process.id();
-
-    let etcd_pid_file_path = etcd_pid_file_path(env);
-    fs::write(&etcd_pid_file_path, pid.to_string()).with_context(|| {
-        format!(
-            "Failed to create etcd pid file at {}",
-            etcd_pid_file_path.display()
-        )
-    })?;
-
-    Ok(())
-}
-
-pub fn stop_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
-    let etcd_path = &env.etcd_broker.etcd_binary_path;
-    println!("Stopping etcd broker at {}", etcd_path.display());
-
-    let etcd_pid_file_path = etcd_pid_file_path(env);
-    let pid = Pid::from_raw(read_pidfile(&etcd_pid_file_path).with_context(|| {
-        format!(
-            "Failed to read etcd pid filea at {}",
-            etcd_pid_file_path.display()
-        )
-    })?);
-
-    kill(pid, Signal::SIGTERM).with_context(|| {
-        format!(
-            "Failed to stop etcd with pid {pid} at {}",
-            etcd_pid_file_path.display()
-        )
-    })?;
-
-    Ok(())
-}
-
-fn etcd_pid_file_path(env: &local_env::LocalEnv) -> PathBuf {
-    env.base_data_dir.join("etcd.pid")
-}
--- a/control_plane/src/lib.rs
+++ b/control_plane/src/lib.rs
@@ -12,7 +12,6 @@ use std::path::Path;
 use std::process::Command;

 pub mod compute;
-pub mod etcd;
 pub mod local_env;
 pub mod postgresql_conf;
 pub mod safekeeper;
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -4,7 +4,6 @@
 //! script which will use local paths.

 use anyhow::{bail, ensure, Context};
-use reqwest::Url;
 use serde::{Deserialize, Serialize};
 use serde_with::{serde_as, DisplayFromStr};
 use std::collections::HashMap;
@@ -12,11 +11,9 @@ use std::env;
 use std::fs;
 use std::path::{Path, PathBuf};
 use std::process::{Command, Stdio};
-use utils::{
-    auth::{encode_from_key_file, Claims, Scope},
-    postgres_backend::AuthType,
-    zid::{NodeId, ZTenantId, ZTenantTimelineId, ZTimelineId},
-};
+use zenith_utils::auth::{encode_from_key_file, Claims, Scope};
+use zenith_utils::postgres_backend::AuthType;
+use zenith_utils::zid::{ZNodeId, ZTenantId, ZTenantTimelineId, ZTimelineId};

 use crate::safekeeper::SafekeeperNode;

@@ -60,7 +57,9 @@ pub struct LocalEnv {
    #[serde(default)]
    pub private_key_path: PathBuf,

-    pub etcd_broker: EtcdBroker,
+    // A comma separated broker (etcd) endpoints for storage nodes coordination, e.g. 'http://127.0.0.1:2379'.
+    #[serde(default)]
+    pub broker_endpoints: Option<String>,

    pub pageserver: PageServerConf,

@@ -76,67 +75,11 @@ pub struct LocalEnv {
    branch_name_mappings: HashMap<String, Vec<(ZTenantId, ZTimelineId)>>,
 }

-/// Etcd broker config for cluster internal communication.
-#[serde_as]
-#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
-pub struct EtcdBroker {
-    /// A prefix to all to any key when pushing/polling etcd from a node.
-    #[serde(default)]
-    pub broker_etcd_prefix: Option<String>,
-
-    /// Broker (etcd) endpoints for storage nodes coordination, e.g. 'http://127.0.0.1:2379'.
-    #[serde(default)]
-    #[serde_as(as = "Vec<DisplayFromStr>")]
-    pub broker_endpoints: Vec<Url>,
-
-    /// Etcd binary path to use.
-    #[serde(default)]
-    pub etcd_binary_path: PathBuf,
-}
-
-impl EtcdBroker {
-    pub fn locate_etcd() -> anyhow::Result<PathBuf> {
-        let which_output = Command::new("which")
-            .arg("etcd")
-            .output()
-            .context("Failed to run 'which etcd' command")?;
-        let stdout = String::from_utf8_lossy(&which_output.stdout);
-        ensure!(
-            which_output.status.success(),
-            "'which etcd' invocation failed. Status: {}, stdout: {stdout}, stderr: {}",
-            which_output.status,
-            String::from_utf8_lossy(&which_output.stderr)
-        );
-
-        let etcd_path = PathBuf::from(stdout.trim());
-        ensure!(
-            etcd_path.is_file(),
-            "'which etcd' invocation was successful, but the path it returned is not a file or does not exist: {}",
-            etcd_path.display()
-        );
-
-        Ok(etcd_path)
-    }
-
-    pub fn comma_separated_endpoints(&self) -> String {
-        self.broker_endpoints.iter().map(Url::as_str).fold(
-            String::new(),
-            |mut comma_separated_urls, url| {
-                if !comma_separated_urls.is_empty() {
-                    comma_separated_urls.push(',');
-                }
-                comma_separated_urls.push_str(url);
-                comma_separated_urls
-            },
-        )
-    }
-}
-
 #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
 #[serde(default)]
 pub struct PageServerConf {
    // node id
-    pub id: NodeId,
+    pub id: ZNodeId,
    // Pageserver connection settings
    pub listen_pg_addr: String,
    pub listen_http_addr: String,
@@ -151,7 +94,7 @@ pub struct PageServerConf {
 impl Default for PageServerConf {
    fn default() -> Self {
        Self {
-            id: NodeId(0),
+            id: ZNodeId(0),
            listen_pg_addr: String::new(),
            listen_http_addr: String::new(),
            auth_type: AuthType::Trust,
@@ -163,7 +106,7 @@ impl Default for PageServerConf {
 #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
 #[serde(default)]
 pub struct SafekeeperConf {
-    pub id: NodeId,
+    pub id: ZNodeId,
    pub pg_port: u16,
    pub http_port: u16,
    pub sync: bool,
@@ -172,7 +115,7 @@ pub struct SafekeeperConf {
 impl Default for SafekeeperConf {
    fn default() -> Self {
        Self {
-            id: NodeId(0),
+            id: ZNodeId(0),
            pg_port: 0,
            http_port: 0,
            sync: true,
@@ -235,7 +178,12 @@ impl LocalEnv {
            if old_timeline_id == &timeline_id {
                Ok(())
            } else {
-                bail!("branch '{branch_name}' is already mapped to timeline {old_timeline_id}, cannot map to another timeline {timeline_id}");
+                bail!(
+                    "branch '{}' is already mapped to timeline {}, cannot map to another timeline {}",
+                    branch_name,
+                    old_timeline_id,
+                    timeline_id
+                );
            }
        } else {
            existing_values.push((tenant_id, timeline_id));
@@ -271,7 +219,7 @@ impl LocalEnv {
    ///
    /// Unlike 'load_config', this function fills in any defaults that are missing
    /// from the config file.
-    pub fn parse_config(toml: &str) -> anyhow::Result<Self> {
+    pub fn create_config(toml: &str) -> anyhow::Result<Self> {
        let mut env: LocalEnv = toml::from_str(toml)?;

        // Find postgres binaries.
@@ -284,11 +232,26 @@ impl LocalEnv {
                env.pg_distrib_dir = cwd.join("tmp_install")
            }
        }
+        if !env.pg_distrib_dir.join("bin/postgres").exists() {
+            bail!(
+                "Can't find postgres binary at {}",
+                env.pg_distrib_dir.display()
+            );
+        }

        // Find zenith binaries.
        if env.zenith_distrib_dir == Path::new("") {
            env.zenith_distrib_dir = env::current_exe()?.parent().unwrap().to_owned();
        }
+        for binary in ["pageserver", "safekeeper"] {
+            if !env.zenith_distrib_dir.join(binary).exists() {
+                bail!(
+                    "Can't find binary '{}' in zenith distrib dir '{}'",
+                    binary,
+                    env.zenith_distrib_dir.display()
+                );
+            }
+        }

        // If no initial tenant ID was given, generate it.
        if env.default_tenant_id.is_none() {
@@ -382,36 +345,6 @@ impl LocalEnv {
            "directory '{}' already exists. Perhaps already initialized?",
            base_path.display()
        );
-        if !self.pg_distrib_dir.join("bin/postgres").exists() {
-            bail!(
-                "Can't find postgres binary at {}",
-                self.pg_distrib_dir.display()
-            );
-        }
-        for binary in ["pageserver", "safekeeper"] {
-            if !self.zenith_distrib_dir.join(binary).exists() {
-                bail!(
-                    "Can't find binary '{}' in zenith distrib dir '{}'",
-                    binary,
-                    self.zenith_distrib_dir.display()
-                );
-            }
-        }
-
-        for binary in ["pageserver", "safekeeper"] {
-            if !self.zenith_distrib_dir.join(binary).exists() {
-                bail!(
-                    "Can't find binary '{binary}' in zenith distrib dir '{}'",
-                    self.zenith_distrib_dir.display()
-                );
-            }
-        }
-        if !self.pg_distrib_dir.join("bin/postgres").exists() {
-            bail!(
-                "Can't find postgres binary at {}",
-                self.pg_distrib_dir.display()
-            );
-        }

        fs::create_dir(&base_path)?;

@@ -469,35 +402,7 @@ impl LocalEnv {

 fn base_path() -> PathBuf {
    match std::env::var_os("ZENITH_REPO_DIR") {
-        Some(val) => PathBuf::from(val),
-        None => PathBuf::from(".zenith"),
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn simple_conf_parsing() {
-        let simple_conf_toml = include_str!("../simple.conf");
-        let simple_conf_parse_result = LocalEnv::parse_config(simple_conf_toml);
-        assert!(
-            simple_conf_parse_result.is_ok(),
-            "failed to parse simple config {simple_conf_toml}, reason: {simple_conf_parse_result:?}"
-        );
-
-        let string_to_replace = "broker_endpoints = ['http://127.0.0.1:2379']";
-        let spoiled_url_str = "broker_endpoints = ['!@$XOXO%^&']";
-        let spoiled_url_toml = simple_conf_toml.replace(string_to_replace, spoiled_url_str);
-        assert!(
-            spoiled_url_toml.contains(spoiled_url_str),
-            "Failed to replace string {string_to_replace} in the toml file {simple_conf_toml}"
-        );
-        let spoiled_url_parse_result = LocalEnv::parse_config(&spoiled_url_toml);
-        assert!(
-            spoiled_url_parse_result.is_err(),
-            "expected toml with invalid Url {spoiled_url_toml} to fail the parsing, but got {spoiled_url_parse_result:?}"
-        );
+        Some(val) => PathBuf::from(val.to_str().unwrap()),
+        None => ".zenith".into(),
    }
 }
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -13,17 +13,15 @@ use nix::unistd::Pid;
 use postgres::Config;
 use reqwest::blocking::{Client, RequestBuilder, Response};
 use reqwest::{IntoUrl, Method};
-use safekeeper::http::models::TimelineCreateRequest;
 use thiserror::Error;
-use utils::{
-    connstring::connection_address,
-    http::error::HttpErrorBody,
-    zid::{NodeId, ZTenantId, ZTimelineId},
-};
+use walkeeper::http::models::TimelineCreateRequest;
+use zenith_utils::http::error::HttpErrorBody;
+use zenith_utils::zid::{ZNodeId, ZTenantId, ZTimelineId};

 use crate::local_env::{LocalEnv, SafekeeperConf};
 use crate::storage::PageServerNode;
 use crate::{fill_rust_env_vars, read_pidfile};
+use zenith_utils::connstring::connection_address;

 #[derive(Error, Debug)]
 pub enum SafekeeperHttpError {
@@ -52,7 +50,7 @@ impl ResponseErrorMessageExt for Response {
        Err(SafekeeperHttpError::Response(
            match self.json::<HttpErrorBody>() {
                Ok(err_body) => format!("Error: {}", err_body.msg),
-                Err(_) => format!("Http error ({}) at {url}.", status.as_u16()),
+                Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
            },
        ))
    }
@@ -65,7 +63,7 @@ impl ResponseErrorMessageExt for Response {
 //
 #[derive(Debug)]
 pub struct SafekeeperNode {
-    pub id: NodeId,
+    pub id: ZNodeId,

    pub conf: SafekeeperConf,

@@ -75,12 +73,16 @@ pub struct SafekeeperNode {
    pub http_base_url: String,

    pub pageserver: Arc<PageServerNode>,
+
+    broker_endpoints: Option<String>,
 }

 impl SafekeeperNode {
    pub fn from_env(env: &LocalEnv, conf: &SafekeeperConf) -> SafekeeperNode {
        let pageserver = Arc::new(PageServerNode::from_env(env));

+        println!("initializing for sk {} for {}", conf.id, conf.http_port);
+
        SafekeeperNode {
            id: conf.id,
            conf: conf.clone(),
@@ -89,6 +91,7 @@ impl SafekeeperNode {
            http_client: Client::new(),
            http_base_url: format!("http://127.0.0.1:{}/v1", conf.http_port),
            pageserver,
+            broker_endpoints: env.broker_endpoints.clone(),
        }
    }

@@ -100,7 +103,7 @@ impl SafekeeperNode {
            .unwrap()
    }

-    pub fn datadir_path_by_id(env: &LocalEnv, sk_id: NodeId) -> PathBuf {
+    pub fn datadir_path_by_id(env: &LocalEnv, sk_id: ZNodeId) -> PathBuf {
        env.safekeeper_data_dir(format!("sk{}", sk_id).as_ref())
    }

@@ -135,13 +138,8 @@ impl SafekeeperNode {
        if !self.conf.sync {
            cmd.arg("--no-sync");
        }
-
-        let comma_separated_endpoints = self.env.etcd_broker.comma_separated_endpoints();
-        if !comma_separated_endpoints.is_empty() {
-            cmd.args(&["--broker-endpoints", &comma_separated_endpoints]);
-        }
-        if let Some(prefix) = self.env.etcd_broker.broker_etcd_prefix.as_deref() {
-            cmd.args(&["--broker-etcd-prefix", prefix]);
+        if let Some(ref ep) = self.broker_endpoints {
+            cmd.args(&["--broker-endpoints", ep]);
        }

        if !cmd.status()?.success() {
@@ -205,13 +203,12 @@ impl SafekeeperNode {
        let pid = Pid::from_raw(pid);

        let sig = if immediate {
-            print!("Stopping safekeeper {} immediately..", self.id);
+            println!("Stop safekeeper immediately");
            Signal::SIGQUIT
        } else {
-            print!("Stopping safekeeper {} gracefully..", self.id);
+            println!("Stop safekeeper gracefully");
            Signal::SIGTERM
        };
-        io::stdout().flush().unwrap();
        match kill(pid, sig) {
            Ok(_) => (),
            Err(Errno::ESRCH) => {
@@ -233,35 +230,25 @@ impl SafekeeperNode {
        // TODO Remove this "timeout" and handle it on caller side instead.
        // Shutting down may take a long time,
        // if safekeeper flushes a lot of data
-        let mut tcp_stopped = false;
        for _ in 0..100 {
-            if !tcp_stopped {
-                if let Err(err) = TcpStream::connect(&address) {
-                    tcp_stopped = true;
-                    if err.kind() != io::ErrorKind::ConnectionRefused {
-                        eprintln!("\nSafekeeper connection failed with error: {err}");
-                    }
-                }
-            }
-            if tcp_stopped {
-                // Also check status on the HTTP port
+            if let Err(_e) = TcpStream::connect(&address) {
+                println!("Safekeeper stopped receiving connections");
+
+                //Now check status
                match self.check_status() {
-                    Err(SafekeeperHttpError::Transport(err)) if err.is_connect() => {
-                        println!("done!");
-                        return Ok(());
+                    Ok(_) => {
+                        println!("Safekeeper status is OK. Wait a bit.");
+                        thread::sleep(Duration::from_secs(1));
                    }
                    Err(err) => {
-                        eprintln!("\nSafekeeper status check failed with error: {err}");
+                        println!("Safekeeper status is: {}", err);
                        return Ok(());
                    }
-                    Ok(()) => {
-                        // keep waiting
-                    }
                }
+            } else {
+                println!("Safekeeper still receives connections");
+                thread::sleep(Duration::from_secs(1));
            }
-            print!(".");
-            io::stdout().flush().unwrap();
-            thread::sleep(Duration::from_secs(1));
        }

        bail!("Failed to stop safekeeper with pid {}", pid);
@@ -286,7 +273,7 @@ impl SafekeeperNode {
        &self,
        tenant_id: ZTenantId,
        timeline_id: ZTimelineId,
-        peer_ids: Vec<NodeId>,
+        peer_ids: Vec<ZNodeId>,
    ) -> Result<()> {
        Ok(self
            .http_request(
--- a/control_plane/src/storage.rs
+++ b/control_plane/src/storage.rs
@@ -1,4 +1,3 @@
-use std::collections::HashMap;
 use std::io::Write;
 use std::net::TcpStream;
 use std::path::PathBuf;
@@ -10,23 +9,21 @@ use anyhow::{bail, Context};
 use nix::errno::Errno;
 use nix::sys::signal::{kill, Signal};
 use nix::unistd::Pid;
-use pageserver::http::models::{TenantConfigRequest, TenantCreateRequest, TimelineCreateRequest};
+use pageserver::http::models::{TenantCreateRequest, TimelineCreateRequest};
 use pageserver::timelines::TimelineInfo;
 use postgres::{Config, NoTls};
 use reqwest::blocking::{Client, RequestBuilder, Response};
 use reqwest::{IntoUrl, Method};
 use thiserror::Error;
-use utils::{
-    connstring::connection_address,
-    http::error::HttpErrorBody,
-    lsn::Lsn,
-    postgres_backend::AuthType,
-    zid::{ZTenantId, ZTimelineId},
-};
+use zenith_utils::http::error::HttpErrorBody;
+use zenith_utils::lsn::Lsn;
+use zenith_utils::postgres_backend::AuthType;
+use zenith_utils::zid::{ZTenantId, ZTimelineId};

 use crate::local_env::LocalEnv;
 use crate::{fill_rust_env_vars, read_pidfile};
 use pageserver::tenant_mgr::TenantInfo;
+use zenith_utils::connstring::connection_address;

 #[derive(Error, Debug)]
 pub enum PageserverHttpError {
@@ -121,16 +118,6 @@ impl PageServerNode {
        );
        let listen_pg_addr_param =
            format!("listen_pg_addr='{}'", self.env.pageserver.listen_pg_addr);
-        let broker_endpoints_param = format!(
-            "broker_endpoints=[{}]",
-            self.env
-                .etcd_broker
-                .broker_endpoints
-                .iter()
-                .map(|url| format!("'{url}'"))
-                .collect::<Vec<_>>()
-                .join(",")
-        );
        let mut args = Vec::with_capacity(20);

        args.push("--init");
@@ -139,19 +126,8 @@ impl PageServerNode {
        args.extend(["-c", &authg_type_param]);
        args.extend(["-c", &listen_http_addr_param]);
        args.extend(["-c", &listen_pg_addr_param]);
-        args.extend(["-c", &broker_endpoints_param]);
        args.extend(["-c", &id]);

-        let broker_etcd_prefix_param = self
-            .env
-            .etcd_broker
-            .broker_etcd_prefix
-            .as_ref()
-            .map(|prefix| format!("broker_etcd_prefix='{prefix}'"));
-        if let Some(broker_etcd_prefix_param) = broker_etcd_prefix_param.as_deref() {
-            args.extend(["-c", broker_etcd_prefix_param]);
-        }
-
        for config_override in config_overrides {
            args.extend(["-c", config_override]);
        }
@@ -188,9 +164,6 @@ impl PageServerNode {
            );
        }

-        // echo the captured output of the init command
-        println!("{}", String::from_utf8_lossy(&init_output.stdout));
-
        Ok(initial_timeline_id)
    }

@@ -210,6 +183,8 @@ impl PageServerNode {
        );
        io::stdout().flush().unwrap();

+        let mut cmd = Command::new(self.env.pageserver_bin()?);
+
        let repo_path = self.repo_path();
        let mut args = vec!["-D", repo_path.to_str().unwrap()];

@@ -217,11 +192,9 @@ impl PageServerNode {
            args.extend(["-c", config_override]);
        }

-        let mut cmd = Command::new(self.env.pageserver_bin()?);
-        let mut filled_cmd = fill_rust_env_vars(cmd.args(&args).arg("--daemonize"));
-        filled_cmd = fill_aws_secrets_vars(filled_cmd);
+        fill_rust_env_vars(cmd.args(&args).arg("--daemonize"));

-        if !filled_cmd.status()?.success() {
+        if !cmd.status()?.success() {
            bail!(
                "Pageserver failed to start. See '{}' for details.",
                self.repo_path().join("pageserver.log").display()
@@ -281,13 +254,12 @@ impl PageServerNode {
        let pid = Pid::from_raw(read_pidfile(&pid_file)?);

        let sig = if immediate {
-            print!("Stopping pageserver immediately..");
+            println!("Stop pageserver immediately");
            Signal::SIGQUIT
        } else {
-            print!("Stopping pageserver gracefully..");
+            println!("Stop pageserver gracefully");
            Signal::SIGTERM
        };
-        io::stdout().flush().unwrap();
        match kill(pid, sig) {
            Ok(_) => (),
            Err(Errno::ESRCH) => {
@@ -309,36 +281,25 @@ impl PageServerNode {
        // TODO Remove this "timeout" and handle it on caller side instead.
        // Shutting down may take a long time,
        // if pageserver checkpoints a lot of data
-        let mut tcp_stopped = false;
        for _ in 0..100 {
-            if !tcp_stopped {
-                if let Err(err) = TcpStream::connect(&address) {
-                    tcp_stopped = true;
-                    if err.kind() != io::ErrorKind::ConnectionRefused {
-                        eprintln!("\nPageserver connection failed with error: {err}");
-                    }
-                }
-            }
-            if tcp_stopped {
-                // Also check status on the HTTP port
+            if let Err(_e) = TcpStream::connect(&address) {
+                println!("Pageserver stopped receiving connections");

+                //Now check status
                match self.check_status() {
-                    Err(PageserverHttpError::Transport(err)) if err.is_connect() => {
-                        println!("done!");
-                        return Ok(());
+                    Ok(_) => {
+                        println!("Pageserver status is OK. Wait a bit.");
+                        thread::sleep(Duration::from_secs(1));
                    }
                    Err(err) => {
-                        eprintln!("\nPageserver status check failed with error: {err}");
+                        println!("Pageserver status is: {}", err);
                        return Ok(());
                    }
-                    Ok(()) => {
-                        // keep waiting
-                    }
                }
+            } else {
+                println!("Pageserver still receives connections");
+                thread::sleep(Duration::from_secs(1));
            }
-            print!(".");
-            io::stdout().flush().unwrap();
-            thread::sleep(Duration::from_secs(1));
        }

        bail!("Failed to stop pageserver with pid {}", pid);
@@ -381,36 +342,10 @@ impl PageServerNode {
    pub fn tenant_create(
        &self,
        new_tenant_id: Option<ZTenantId>,
-        settings: HashMap<&str, &str>,
    ) -> anyhow::Result<Option<ZTenantId>> {
        let tenant_id_string = self
            .http_request(Method::POST, format!("{}/tenant", self.http_base_url))
-            .json(&TenantCreateRequest {
-                new_tenant_id,
-                checkpoint_distance: settings
-                    .get("checkpoint_distance")
-                    .map(|x| x.parse::<u64>())
-                    .transpose()?,
-                compaction_target_size: settings
-                    .get("compaction_target_size")
-                    .map(|x| x.parse::<u64>())
-                    .transpose()?,
-                compaction_period: settings.get("compaction_period").map(|x| x.to_string()),
-                compaction_threshold: settings
-                    .get("compaction_threshold")
-                    .map(|x| x.parse::<usize>())
-                    .transpose()?,
-                gc_horizon: settings
-                    .get("gc_horizon")
-                    .map(|x| x.parse::<u64>())
-                    .transpose()?,
-                gc_period: settings.get("gc_period").map(|x| x.to_string()),
-                image_creation_threshold: settings
-                    .get("image_creation_threshold")
-                    .map(|x| x.parse::<usize>())
-                    .transpose()?,
-                pitr_interval: settings.get("pitr_interval").map(|x| x.to_string()),
-            })
+            .json(&TenantCreateRequest { new_tenant_id })
            .send()?
            .error_from_body()?
            .json::<Option<String>>()?;
@@ -427,35 +362,6 @@ impl PageServerNode {
            .transpose()
    }

-    pub fn tenant_config(&self, tenant_id: ZTenantId, settings: HashMap<&str, &str>) -> Result<()> {
-        self.http_request(Method::PUT, format!("{}/tenant/config", self.http_base_url))
-            .json(&TenantConfigRequest {
-                tenant_id,
-                checkpoint_distance: settings
-                    .get("checkpoint_distance")
-                    .map(|x| x.parse::<u64>().unwrap()),
-                compaction_target_size: settings
-                    .get("compaction_target_size")
-                    .map(|x| x.parse::<u64>().unwrap()),
-                compaction_period: settings.get("compaction_period").map(|x| x.to_string()),
-                compaction_threshold: settings
-                    .get("compaction_threshold")
-                    .map(|x| x.parse::<usize>().unwrap()),
-                gc_horizon: settings
-                    .get("gc_horizon")
-                    .map(|x| x.parse::<u64>().unwrap()),
-                gc_period: settings.get("gc_period").map(|x| x.to_string()),
-                image_creation_threshold: settings
-                    .get("image_creation_threshold")
-                    .map(|x| x.parse::<usize>().unwrap()),
-                pitr_interval: settings.get("pitr_interval").map(|x| x.to_string()),
-            })
-            .send()?
-            .error_from_body()?;
-
-        Ok(())
-    }
-
    pub fn timeline_list(&self, tenant_id: &ZTenantId) -> anyhow::Result<Vec<TimelineInfo>> {
        let timeline_infos: Vec<TimelineInfo> = self
            .http_request(
@@ -493,12 +399,3 @@ impl PageServerNode {
        Ok(timeline_info_response)
    }
 }
-
-fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
-    for env_key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"] {
-        if let Ok(value) = std::env::var(env_key) {
-            cmd = cmd.env(env_key, value);
-        }
-    }
-    cmd
-}
--- a/docker-entrypoint.sh
+++ b/docker-entrypoint.sh
@@ -1,20 +1,13 @@
 #!/bin/sh
 set -eux

-broker_endpoints_param="${BROKER_ENDPOINT:-absent}"
-if [ "$broker_endpoints_param" != "absent" ]; then
-    broker_endpoints_param="-c broker_endpoints=['$broker_endpoints_param']"
-else
-    broker_endpoints_param=''
-fi
-
 if [ "$1" = 'pageserver' ]; then
    if [ ! -d "/data/tenants" ]; then
        echo "Initializing pageserver data directory"
-        pageserver --init -D /data -c "pg_distrib_dir='/usr/local'" -c "id=10" $broker_endpoints_param
+        pageserver --init -D /data -c "pg_distrib_dir='/usr/local'" -c "id=10"
    fi
    echo "Staring pageserver at 0.0.0.0:6400"
-    pageserver -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" $broker_endpoints_param -D /data
+    pageserver -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -D /data
 else
    "$@"
 fi
--- a/docs/README.md
+++ b/docs/README.md
@@ -7,8 +7,8 @@
 - [glossary.md](glossary.md) — Glossary of all the terms used in codebase.
 - [multitenancy.md](multitenancy.md) — how multitenancy is organized in the pageserver and Zenith CLI.
 - [sourcetree.md](sourcetree.md) — Overview of the source tree layeout.
- [pageserver/README.md](/pageserver/README.md) — pageserver overview.
- [postgres_ffi/README.md](/libs/postgres_ffi/README.md) — Postgres FFI overview.
+- [pageserver/README](/pageserver/README) — pageserver overview.
+- [postgres_ffi/README](/postgres_ffi/README) — Postgres FFI overview.
 - [test_runner/README.md](/test_runner/README.md) — tests infrastructure overview.
- [safekeeper/README.md](/safekeeper/README.md) — WAL service overview.
+- [walkeeper/README](/walkeeper/README) — WAL service overview.
 - [core_changes.md](core_changes.md) - Description of Zenith changes in Postgres core
--- a/docs/authentication.md
+++ b/docs/authentication.md
@@ -27,4 +27,4 @@ management_token = jwt.encode({"scope": "pageserverapi"}, auth_keys.priv, algori
 tenant_token = jwt.encode({"scope": "tenant", "tenant_id": ps.initial_tenant}, auth_keys.priv, algorithm="RS256")
 ```

-Utility functions to work with jwts in rust are located in libs/utils/src/auth.rs
+Utility functions to work with jwts in rust are located in zenith_utils/src/auth.rs
--- a/docs/docker.md
+++ b/docs/docker.md
@@ -1,20 +1,20 @@
-# Docker images of Neon
+# Docker images of Zenith

 ## Images

 Currently we build two main images:

- [neondatabase/neon](https://hub.docker.com/repository/docker/zenithdb/zenith) — image with pre-built `pageserver`, `safekeeper` and `proxy` binaries and all the required runtime dependencies. Built from [/Dockerfile](/Dockerfile).
- [neondatabase/compute-node](https://hub.docker.com/repository/docker/zenithdb/compute-node) — compute node image with pre-built Postgres binaries from [neondatabase/postgres](https://github.com/neondatabase/postgres).
+- [zenithdb/zenith](https://hub.docker.com/repository/docker/zenithdb/zenith) — image with pre-built `pageserver`, `safekeeper` and `proxy` binaries and all the required runtime dependencies. Built from [/Dockerfile](/Dockerfile).
+- [zenithdb/compute-node](https://hub.docker.com/repository/docker/zenithdb/compute-node) — compute node image with pre-built Postgres binaries from [zenithdb/postgres](https://github.com/zenithdb/postgres).

-And additional intermediate image:
+And additional intermediate images:

- [neondatabase/compute-tools](https://hub.docker.com/repository/docker/neondatabase/compute-tools) — compute node configuration management tools.
+- [zenithdb/compute-tools](https://hub.docker.com/repository/docker/zenithdb/compute-tools) — compute node configuration management tools.

 ## Building pipeline

-We build all images after a successful `release` tests run and push automatically to Docker Hub with two parallel CI jobs
+1. Image `zenithdb/compute-tools` is re-built automatically.

-1. `neondatabase/compute-tools` and `neondatabase/compute-node`
+2. Image `zenithdb/compute-node` is built independently in the [zenithdb/postgres](https://github.com/zenithdb/postgres) repo.

-2. `neondatabase/neon`
+3. Image `zenithdb/zenith` is built in this repo after a successful `release` tests run and pushed to Docker Hub automatically.
--- a/docs/glossary.md
+++ b/docs/glossary.md
@@ -21,7 +21,7 @@ NOTE:It has nothing to do with PostgreSQL pg_basebackup.

 ### Branch

-We can create branch at certain LSN using `neon_local timeline branch` command.
+We can create branch at certain LSN using `zenith timeline branch` command.
 Each Branch lives in a corresponding timeline[] and has an ancestor[].


@@ -29,7 +29,7 @@ Each Branch lives in a corresponding timeline[] and has an ancestor[].

 NOTE: This is an overloaded term.

-A checkpoint record in the WAL marks a point in the WAL sequence at which it is guaranteed that all data files have been updated with all information from shared memory modified before that checkpoint;
+A checkpoint record in the WAL marks a point in the WAL sequence at which it is guaranteed that all data files have been updated with all information from shared memory modified before that checkpoint; 

 ### Checkpoint (Layered repository)

@@ -91,7 +91,7 @@ The layer map tracks what layers exist in a timeline.

 ### Layered repository

-Neon repository implementation that keeps data in layers.
+Zenith repository implementation that keeps data in layers.
 ### LSN

 The Log Sequence Number (LSN) is a unique identifier of the WAL record[] in the WAL log.
@@ -101,23 +101,23 @@ It is printed as two hexadecimal numbers of up to 8 digits each, separated by a
 Check also [PostgreSQL doc about pg_lsn type](https://www.postgresql.org/docs/devel/datatype-pg-lsn.html)
 Values can be compared to calculate the volume of WAL data that separates them, so they are used to measure the progress of replication and recovery.

-In Postgres and Neon LSNs are used to describe certain points in WAL handling.
+In postgres and Zenith lsns are used to describe certain points in WAL handling.

 PostgreSQL LSNs and functions to monitor them:
 * `pg_current_wal_insert_lsn()` - Returns the current write-ahead log insert location.
 * `pg_current_wal_lsn()` - Returns the current write-ahead log write location.
 * `pg_current_wal_flush_lsn()` - Returns the current write-ahead log flush location.
 * `pg_last_wal_receive_lsn()` - Returns the last write-ahead log location that has been received and synced to disk by streaming replication. While streaming replication is in progress this will increase monotonically.
-* `pg_last_wal_replay_lsn ()` - Returns the last write-ahead log location that has been replayed during recovery. If recovery is still in progress this will increase monotonically.
+* `pg_last_wal_replay_lsn ()` - Returns the last write-ahead log location that has been replayed during recovery. If recovery is still in progress this will increase monotonically. 
 [source PostgreSQL documentation](https://www.postgresql.org/docs/devel/functions-admin.html):

-Neon safekeeper LSNs. For more check [safekeeper/README_PROTO.md](/safekeeper/README_PROTO.md)
+Zenith safekeeper LSNs. For more check [walkeeper/README_PROTO.md](/walkeeper/README_PROTO.md)
 * `CommitLSN`: position in WAL confirmed by quorum safekeepers.
 * `RestartLSN`: position in WAL confirmed by all safekeepers.
 * `FlushLSN`: part of WAL persisted to the disk by safekeeper.
 * `VCL`: the largerst LSN for which we can guarantee availablity of all prior records.

-Neon pageserver LSNs:
+Zenith pageserver LSNs:
 * `last_record_lsn` - the end of last processed WAL record.
 * `disk_consistent_lsn` - data is known to be fully flushed and fsync'd to local disk on pageserver up to this LSN.
 * `remote_consistent_lsn` - The last LSN that is synced to remote storage and is guaranteed to survive pageserver crash.
@@ -132,7 +132,7 @@ This is the unit of data exchange between compute node and pageserver.

 ### Pageserver

-Neon storage engine: repositories + wal receiver + page service + wal redo.
+Zenith storage engine: repositories + wal receiver + page service + wal redo.

 ### Page service

@@ -184,13 +184,13 @@ relation exceeds that size, it is split into multiple segments.
 SLRUs include pg_clog, pg_multixact/members, and
 pg_multixact/offsets. There are other SLRUs in PostgreSQL, but
 they don't need to be stored permanently (e.g. pg_subtrans),
-or we do not support them in neon yet (pg_commit_ts).
+or we do not support them in zenith yet (pg_commit_ts).

 ### Tenant (Multitenancy)
-Tenant represents a single customer, interacting with Neon.
+Tenant represents a single customer, interacting with Zenith.
 Wal redo[] activity, timelines[], layers[] are managed for each tenant independently.
 One pageserver[] can serve multiple tenants at once.
-One safekeeper
+One safekeeper 

 See `docs/multitenancy.md` for more.

--- a/docs/rfcs/004-durability.md
+++ b/docs/rfcs/004-durability.md
@@ -22,7 +22,7 @@ In addition to the WAL safekeeper nodes, the WAL is archived in
 S3. WAL that has been archived to S3 can be removed from the
 safekeepers, so the safekeepers don't need a lot of disk space.

-```
+
                                +----------------+
                        +-----> | WAL safekeeper |
                        |       +----------------+
@@ -42,23 +42,23 @@ safekeepers, so the safekeepers don't need a lot of disk space.
                  \
                   \
                    \
-                     \          +--------+
-                      \         |        |
-                       +------> |   S3   |
-                                |        |
-                                +--------+
+                     \      +--------+
+					  \		|        |
+					   +-->	|   S3   |
+							|        |
+                            +--------+
+

-```
 Every WAL safekeeper holds a section of WAL, and a VCL value.
 The WAL can be divided into three portions:

-```
+
                                    VCL                   LSN
                                     |                     |
                                     V                     V
 .................ccccccccccccccccccccXXXXXXXXXXXXXXXXXXXXXXX
 Archived WAL       Completed WAL          In-flight WAL
-```
+

 Note that all this WAL kept in a safekeeper is a contiguous section.
 This is different from Aurora: In Aurora, there can be holes in the
--- a/docs/rfcs/009-snapshot-first-storage-cli.md
+++ b/docs/rfcs/009-snapshot-first-storage-cli.md
@@ -12,7 +12,7 @@ Init empty pageserver using `initdb` in temporary directory.

 `--storage_dest=FILE_PREFIX | S3_PREFIX |...` option defines object storage type, all other parameters are passed via env variables. Inspired by WAL-G style naming : https://wal-g.readthedocs.io/STORAGES/.

-Save`storage_dest` and other parameters in config.
+Save`storage_dest` and other parameters in config. 
 Push snapshots to `storage_dest` in background.

 ```
@@ -21,7 +21,7 @@ zenith start
 ```

 #### 2. Restart pageserver (manually or crash-recovery).
-Take `storage_dest` from pageserver config, start pageserver from latest snapshot in `storage_dest`.
+Take `storage_dest` from pageserver config, start pageserver from latest snapshot in `storage_dest`. 
 Push snapshots to `storage_dest` in background.

 ```
@@ -32,7 +32,7 @@ zenith start
 Start pageserver from existing snapshot.
 Path to snapshot provided via `--snapshot_path=FILE_PREFIX | S3_PREFIX | ...`
 Do not save `snapshot_path` and `snapshot_format` in config, as it is a one-time operation.
-Save`storage_dest` parameters in config.
+Save`storage_dest` parameters in config. 
 Push snapshots to `storage_dest` in background.
 ```
 //I.e. we want to start zenith on top of existing $PGDATA and use s3 as a persistent storage.
@@ -42,15 +42,15 @@ zenith start
 How to pass credentials needed for `snapshot_path`?

 #### 4. Export.
-Manually push snapshot to `snapshot_path` which differs from `storage_dest`
+Manually push snapshot to `snapshot_path` which differs from `storage_dest` 
 Optionally set `snapshot_format`, which can be plain pgdata format or zenith format.
 ```
 zenith export --snapshot_path=FILE_PREFIX --snapshot_format=pgdata
 ```

 #### Notes and questions
- safekeeper s3_offload should use same (similar) syntax for storage. How to set it in UI?
+- walkeeper s3_offload should use same (similar) syntax for storage. How to set it in UI?
 - Why do we need `zenith init` as a separate command? Can't we init everything at first start?
 - We can think of better names for all options.
 - Export to plain postgres format will be useless, if we are not 100% compatible on page level.
-I can recall at least one such difference - PD_WAL_LOGGED flag in pages.
+I can recall at least one such difference - PD_WAL_LOGGED flag in pages.
--- a/docs/rfcs/016-connection-routing.md
+++ b/docs/rfcs/016-connection-routing.md
@@ -1,151 +0,0 @@
-# Dispatching a connection
-
-For each client connection, Neon service needs to authenticate the
-connection, and route it to the right PostgreSQL instance.
-
-## Authentication
-
-There are three different ways to authenticate:
-
- anonymous; no authentication needed
- PostgreSQL authentication
- github single sign-on using browser
-
-In anonymous access, the user doesn't need to perform any
-authentication at all. This can be used e.g. in interactive PostgreSQL
-documentation, allowing you to run the examples very quickly. Similar
-to sqlfiddle.com.
-
-PostgreSQL authentication works the same as always. All the different
-PostgreSQL authentication options like SCRAM, kerberos, etc. are
-available. [1]
-
-The third option is to authenticate with github single sign-on. When
-you open the connection in psql, you get a link that you open with
-your browser. Opening the link redirects you to github authentication,
-and lets the connection to proceed. This is also known as "Link auth" [2].
-
-
-## Routing the connection
-
-When a client starts a connection, it needs to be routed to the
-correct PostgreSQL instance. Routing can be done by the proxy, acting
-as a man-in-the-middle, or the connection can be routed at the network
-level based on the hostname or IP address.
-
-Either way, Neon needs to identify which PostgreSQL instance the
-connection should be routed to. If the instance is not already
-running, it needs to be started. Some connections always require a new
-PostgreSQL instance to be created, e.g. if you want to run a one-off
-query against a particular point-in-time.
-
-The PostgreSQL instance is identified by:
- Neon account (possibly anonymous)
- cluster (known as tenant in the storage?)
- branch or snapshot name
- timestamp (PITR)
- primary or read-replica
- one-off read replica
- one-off writeable branch
-
-When you are using regular PostgreSQL authentication or anonymous
-access, the connection URL needs to contain all the information needed
-for the routing. With github single sign-on, the browser is involved
-and some details - the Neon account in particular - can be deduced
-from the authentication exchange.
-
-There are three methods for identifying the PostgreSQL instance:
-
- Browser interaction (link auth)
- Options in the connection URL and the domain name
- A pre-defined endpoint, identified by domain name or IP address
-
-### Link Auth
-
-    postgres://<username>@start.neon.tech/<dbname>
-
-This gives you a link that you open in browser. Clicking the link
-performs github authentication, and the Neon account name is
-provided to the proxy behind the scenes. The proxy routes the
-connection to the primary PostgreSQL instance in cluster called
-"main", branch "main".
-
-Further ideas:
- You could pre-define a different target for link auth
-  connections in the UI.
- You could have a drop-down in the browser, allowing you to connect
-  to any cluster you want. Link Auth can be like Teleport.
-
-### Connection URL
-
-The connection URL looks like this:
-
-    postgres://<username>@<cluster-id>.db.neon.tech/<dbname>
-
-By default, this connects you to the primary PostgreSQL instance
-running on the "main" branch in the named cluster [3]. However, you can
-change that by specifying options in the connection URL. The following
-options are supported:
-
-| option name  | Description                                                                                       | Examples                                            |
-| ---          | ---                                                                                               | ---                                                 |
-| cluster      | Cluster name                                                                                      | cluster:myproject                                   |
-| branch       | Branch name                                                                                       | branch:main                                         |
-| timestamp    | Connect to an instance at given point-in-time.                                                    | timestamp:2022-04-08 timestamp:2022-04-08T11:42:16Z |
-| lsn          | Connect to an instance at given LSN                                                               | lsn:0/12FF0420                                      |
-| read-replica | Connect to a read-replica. If the parameter is 'new', a new instance is created for this session. | read-replica read-replica:new                       |
-
-For example, to read branch 'testing' as it was on Mar 31, 2022, you could
-specify a timestamp in the connection URL [4]:
-
-    postgres://alice@cluster-1234.db.neon.tech/postgres?options=branch:testing,timestamp:2022-03-31
-
-Connecting with cluster name and options can be disabled in the UI. If
-disabled, you can only connect using a pre-defined endpoint.
-
-### Pre-defined Endpoint
-
-Instead of providing the cluster name, branch, and all those options
-in the connection URL, you can define a named endpoint with the same
-options.
-
-In the UI, click "create endpoint". Fill in the details:
-
- Cluster name
- Branch
- timestamp or LSN
- is this for the primary or for a read replica
- etc.
-
-When you click Finish, a named endpoint is created. You can now use the endpoint ID to connect:
-
-    postgres://<username>@<endpoint-id>.endpoint.neon.tech/<dbname>
-
-
-An endpoint can be assigned a static or dynamic IP address, so that
-you can connect to it with clients that don't support TLS SNI. Maybe
-bypass the proxy altogether, but that ought to be invisible to the
-user.
-
-You can limit the range of source IP addresses that are allowed to
-connect to an endpoint. An endpoint can also be exposed in an Amazon
-VPC, allowing direct connections from applications.
-
-
-# Footnotes
-
-[1] I'm not sure how feasible it is to set up configure like Kerberos
-or LDAP in a cloud environment. But in principle I think we should
-allow customers to have the full power of PostgreSQL, including all
-authentication options. However, it's up to the customer to configure
-it correctly.
-
-[2] Link is a way to both authenticate and to route the connection
-
-[3] This assumes that cluster-ids are globally unique, across all
-Neon accounts.
-
-[4] The syntax accepted in the connection URL is limited by libpq. The
-only way to pass arbitrary options to the server (or our proxy) is
-with the "options" keyword, and the options must be percent-encoded. I
-think the above would work but i haven't tested it
--- a/docs/rfcs/cluster-size-limits.md
+++ b/docs/rfcs/cluster-size-limits.md
@@ -1,79 +0,0 @@
-Cluster size limits
-==================
-
-## Summary
-
-One of the resource consumption limits for free-tier users is a cluster size limit.
-
-To enforce it, we need to calculate the timeline size and check if the limit is reached before relation create/extend operations.
-If the limit is reached, the query must fail with some meaningful error/warning.
-We may want to exempt some operations from the quota to allow users free space to fit back into the limit.
-
-The stateless compute node that performs validation is separate from the storage that calculates the usage, so we need to exchange cluster size information between those components.
-
-## Motivation
-
-Limit the maximum size of a PostgreSQL instance to limit free tier users (and other tiers in the future).
-First of all, this is needed to control our free tier production costs.
-Another reason to limit resources is risk management — we haven't (fully) tested and optimized zenith for big clusters,
-so we don't want to give users access to the functionality that we don't think is ready.
-
-## Components
-
-* pageserver - calculate the size consumed by a timeline and add it to the feedback message.
-* safekeeper - pass feedback message from pageserver to compute.
-* compute - receive feedback message, enforce size limit based on GUC `zenith.max_cluster_size`.
-* console - set and update `zenith.max_cluster_size` setting
-
-## Proposed implementation
-
-First of all, it's necessary to define timeline size.
-
-The current approach is to count all data, including SLRUs. (not including WAL)
-Here we think of it as a physical disk underneath the Postgres cluster.
-This is how the `LOGICAL_TIMELINE_SIZE` metric is implemented in the pageserver.
-
-Alternatively, we could count only relation data. As in pg_database_size().
-This approach is somewhat more user-friendly because it is the data that is really affected by the user.
-On the other hand, it puts us in a weaker position than other services, i.e., RDS.
-We will need to refactor the timeline_size counter or add another counter to implement it. 
-
-Timeline size is updated during wal digestion. It is not versioned and is valid at the last_received_lsn moment.
-Then this size should be reported to compute node.
-
-`current_timeline_size` value is included in the walreceiver's custom feedback message: `ZenithFeedback.`
-
-(PR about protocol changes https://github.com/zenithdb/zenith/pull/1037).
-
-This message is received by the safekeeper and propagated to compute node as a part of `AppendResponse`.
-
-Finally, when compute node receives the `current_timeline_size` from safekeeper (or from pageserver directly), it updates the global variable.
-
-And then every zenith_extend() operation checks if limit is reached `(current_timeline_size > zenith.max_cluster_size)` and throws `ERRCODE_DISK_FULL` error if so.
-(see Postgres error codes [https://www.postgresql.org/docs/devel/errcodes-appendix.html](https://www.postgresql.org/docs/devel/errcodes-appendix.html))
-
-TODO:
-We can allow autovacuum processes to bypass this check, simply checking `IsAutoVacuumWorkerProcess()`.
-It would be nice to allow manual VACUUM and VACUUM FULL to bypass the check, but it's uneasy to distinguish these operations at the low level.
-See issues https://github.com/neondatabase/neon/issues/1245
-https://github.com/zenithdb/zenith/issues/1445
-
-TODO:
-We should warn users if the limit is soon to be reached.
-
-### **Reliability, failure modes and corner cases**
-
-1. `current_timeline_size` is valid at the last received and digested by pageserver lsn.
-    
-    If pageserver lags behind compute node, `current_timeline_size` will lag too. This lag can be tuned using backpressure, but it is not expected to be 0 all the time.
-    
-    So transactions that happen in this lsn range may cause limit overflow. Especially operations that generate (i.e., CREATE DATABASE) or free (i.e., TRUNCATE) a lot of data pages while generating a small amount of WAL. Are there other operations like this?
-    
-    Currently, CREATE DATABASE operations are restricted in the console. So this is not an issue.
-
-
-### **Security implications**
-
-We treat compute as an untrusted component. That's why we try to isolate it with secure container runtime or a VM.
-Malicious users may change the `zenith.max_cluster_size`, so we need an extra size limit check.
-To cover this case, we also monitor the compute node size in the console.
--- a/docs/settings.md
+++ b/docs/settings.md
@@ -6,6 +6,7 @@ If there's no such file during `init` phase of the server, it creates the file i
 There's a possibility to pass an arbitrary config value to the pageserver binary as an argument: such values override
 the values in the config file, if any are specified for the same key and get into the final config during init phase.

+
 ### Config example

 ```toml
@@ -25,22 +26,18 @@ max_file_descriptors = '100'
 # initial superuser role name to use when creating a new tenant
 initial_superuser_name = 'zenith_admin'

-broker_etcd_prefix = 'neon'
-broker_endpoints = ['some://etcd']
-
 # [remote_storage]
 ```

-The config above shows default values for all basic pageserver settings, besides `broker_endpoints`: that one has to be set by the user, 
-see the corresponding section below.
+The config above shows default values for all basic pageserver settings.
 Pageserver uses default values for all files that are missing in the config, so it's not a hard error to leave the config blank.
 Yet, it validates the config values it can (e.g. postgres install dir) and errors if the validation fails, refusing to start.

 Note the `[remote_storage]` section: it's a [table](https://toml.io/en/v1.0.0#table) in TOML specification and

- either has to be placed in the config after the table-less values such as `initial_superuser_name = 'zenith_admin'`
+* either has to be placed in the config after the table-less values such as `initial_superuser_name = 'zenith_admin'`

- or can be placed anywhere if rewritten in identical form as [inline table](https://toml.io/en/v1.0.0#inline-table): `remote_storage = {foo = 2}`
+* or can be placed anywhere if rewritten in identical form as [inline table](https://toml.io/en/v1.0.0#inline-table): `remote_storage = {foo = 2}`

 ### Config values

@@ -50,17 +47,6 @@ Example: `${PAGESERVER_BIN} -c "checkpoint_period = '100 s'" -c "remote_storage=

 Note that TOML distinguishes between strings and integers, the former require single or double quotes around them.

-#### broker_endpoints
-
-A list of endpoints (etcd currently) to connect and pull the information from.
-Mandatory, does not have a default, since requires etcd to be started as a separate process,
-and its connection url should be specified separately. 
-
-#### broker_etcd_prefix
-
-A prefix to add for every etcd key used, to separate one group of related instances from another, in the same cluster.
-Default is `neon`.
-
 #### checkpoint_distance

 `checkpoint_distance` is the amount of incoming WAL that is held in
@@ -71,7 +57,7 @@ but it will trigger a checkpoint operation to get it back below the
 limit.

 `checkpoint_distance` also determines how much WAL needs to be kept
-durable in the safekeeper. The safekeeper must have capacity to hold
+durable in the safekeeper.  The safekeeper must have capacity to hold
 this much WAL, with some headroom, otherwise you can get stuck in a
 situation where the safekeeper is full and stops accepting new WAL,
 but the pageserver is not flushing out and releasing the space in the
@@ -86,11 +72,7 @@ The unit is # of bytes.

 Every `compaction_period` seconds, the page server checks if
 maintenance operations, like compaction, are needed on the layer
-files. Default is 1 s, which should be fine.
-
-#### compaction_target_size
-
-File sizes for L0 delta and L1 image layers. Default is 128MB.
+files.  Default is 1 s, which should be fine.

 #### gc_horizon

@@ -103,14 +85,6 @@ away.

 Interval at which garbage collection is triggered. Default is 100 s.

-#### image_creation_threshold
-
-L0 delta layer threshold for L1 iamge layer creation. Default is 3.
-
-#### pitr_interval
-
-WAL retention duration for PITR branching. Default is 30 days.
-
 #### initial_superuser_name

 Name of the initial superuser role, passed to initdb when a new tenant
@@ -177,11 +151,12 @@ bucket_region = 'eu-north-1'
 # Optional, pageserver uses entire bucket if the prefix is not specified.
 prefix_in_bucket = '/some/prefix/'

-# S3 API query limit to avoid getting errors/throttling from AWS.
-concurrency_limit = 100
-```
+# Access key to connect to the bucket ("login" part of the credentials)
+access_key_id = 'SOMEKEYAAAAASADSAH*#'

-If no IAM bucket access is used during the remote storage usage, use the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` environment variables to set the access credentials.
+# Secret access key to connect to the bucket ("password" part of the credentials)
+secret_access_key = 'SOMEsEcReTsd292v'
+```

 ###### General remote storage configuration

@@ -192,13 +167,14 @@ Besides, there are parameters common for all types of remote storage that can be

 ```toml
 [remote_storage]
-# Max number of concurrent timeline synchronized (layers uploaded or downloaded) with the remote storage at the same time.
-max_concurrent_syncs = 50
+# Max number of concurrent connections to open for uploading to or downloading from the remote storage.
+max_concurrent_sync = 100

 # Max number of errors a single task can have before it's considered failed and not attempted to run anymore.
 max_sync_errors = 10
 ```

+
 ## safekeeper

 TODO
--- a/docs/sourcetree.md
+++ b/docs/sourcetree.md
@@ -28,7 +28,12 @@ The pageserver has a few different duties:
 - Receive WAL from the WAL service and decode it.
 - Replay WAL that's applicable to the chunks that the Page Server maintains

-For more detailed info, see [/pageserver/README](/pageserver/README.md)
+For more detailed info, see `/pageserver/README`
+
+`/postgres_ffi`:
+
+Utility functions for interacting with PostgreSQL file formats.
+Misc constants, copied from PostgreSQL headers.

 `/proxy`:

@@ -52,12 +57,12 @@ PostgreSQL extension that implements storage manager API and network communicati

 PostgreSQL extension that contains functions needed for testing and debugging.

-`/safekeeper`:
+`/walkeeper`:

 The zenith WAL service that receives WAL from a primary compute nodes and streams it to the pageserver.
 It acts as a holding area and redistribution center for recently generated WAL.

-For more detailed info, see [/safekeeper/README](/safekeeper/README.md)
+For more detailed info, see `/walkeeper/README`

 `/workspace_hack`:
 The workspace_hack crate exists only to pin down some dependencies.
@@ -69,21 +74,14 @@ We use [cargo-hakari](https://crates.io/crates/cargo-hakari) for automation.
 Main entry point for the 'zenith' CLI utility.
 TODO: Doesn't it belong to control_plane?

-`/libs`:
-Unites granular neon helper crates under the hood.
+`/zenith_metrics`:

-`/libs/postgres_ffi`:
-
-Utility functions for interacting with PostgreSQL file formats.
-Misc constants, copied from PostgreSQL headers.
-
-`/libs/utils`:
-Generic helpers that are shared between other crates in this repository.
-A subject for future modularization.
-
-`/libs/metrics`:
 Helpers for exposing Prometheus metrics from the server.

+`/zenith_utils`:
+
+Helpers that are shared between other crates in this repository.
+
 ## Using Python
 Note that Debian/Ubuntu Python packages are stale, as it commonly happens,
 so manual installation of dependencies is not recommended.
@@ -91,22 +89,18 @@ so manual installation of dependencies is not recommended.
 A single virtual environment with all dependencies is described in the single `Pipfile`.

 ### Prerequisites
- Install Python 3.9 (the minimal supported version) or greater.
+- Install Python 3.7 (the minimal supported version) or greater.
    - Our setup with poetry should work with newer python versions too. So feel free to open an issue with a `c/test-runner` label if something doesnt work as expected.
-    - If you have some trouble with other version you can resolve it by installing Python 3.9 separately, via [pyenv](https://github.com/pyenv/pyenv) or via system package manager e.g.:
+    - If you have some trouble with other version you can resolve it by installing Python 3.7 separately, via pyenv or via system package manager e.g.:
      ```bash
      # In Ubuntu
      sudo add-apt-repository ppa:deadsnakes/ppa
      sudo apt update
-      sudo apt install python3.9
+      sudo apt install python3.7
      ```
 - Install `poetry`
    - Exact version of `poetry` is not important, see installation instructions available at poetry's [website](https://python-poetry.org/docs/#installation)`.
- Install dependencies via `./scripts/pysync`.
-    - Note that CI uses specific Python version (look for `PYTHON_VERSION` [here](https://github.com/neondatabase/docker-images/blob/main/rust/Dockerfile))
-      so if you have different version some linting tools can yield different result locally vs in the CI.
-    - You can explicitly specify which Python to use by running `poetry env use /path/to/python`, e.g. `poetry env use python3.9`.
-      This may also disable the `The currently activated Python version X.Y.Z is not supported by the project` warning.
+- Install dependencies via `./scripts/pysync`. Note that CI uses Python 3.7 so if you have different version some linting tools can yield different result locally vs in the CI.

 Run `poetry shell` to activate the virtual environment.
 Alternatively, use `poetry run` to run a single command in the venv, e.g. `poetry run pytest`.
--- a/libs/etcd_broker/Cargo.toml
+++ b/libs/etcd_broker/Cargo.toml
@@ -1,17 +0,0 @@
-[package]
- name = "etcd_broker"
- version = "0.1.0"
- edition = "2021"
-
- [dependencies]
- etcd-client = "0.9.0"
- regex = "1.4.5"
- serde = { version = "1.0", features = ["derive"] }
- serde_json = "1"
- serde_with = "1.12.0"
-
- utils = { path = "../utils" }
- workspace_hack = { version = "0.1", path = "../../workspace_hack" }
- tokio = "1"
- tracing = "0.1"
- thiserror = "1"
--- a/libs/etcd_broker/src/lib.rs
+++ b/libs/etcd_broker/src/lib.rs
@@ -1,348 +0,0 @@
-//! A set of primitives to access a shared data/updates, propagated via etcd broker (not persistent).
-//! Intended to connect services to each other, not to store their data.
-use std::{
-    collections::{hash_map, HashMap},
-    fmt::Display,
-    str::FromStr,
-};
-
-use regex::{Captures, Regex};
-use serde::{Deserialize, Serialize};
-use serde_with::{serde_as, DisplayFromStr};
-
-pub use etcd_client::*;
-
-use tokio::{sync::mpsc, task::JoinHandle};
-use tracing::*;
-use utils::{
-    lsn::Lsn,
-    zid::{NodeId, ZTenantId, ZTenantTimelineId},
-};
-
-/// Default value to use for prefixing to all etcd keys with.
-/// This way allows isolating safekeeper/pageserver groups in the same etcd cluster.
-pub const DEFAULT_NEON_BROKER_ETCD_PREFIX: &str = "neon";
-
-#[derive(Debug, Deserialize, Serialize)]
-struct SafekeeperTimeline {
-    safekeeper_id: NodeId,
-    info: SkTimelineInfo,
-}
-
-/// Published data about safekeeper's timeline. Fields made optional for easy migrations.
-#[serde_as]
-#[derive(Debug, Deserialize, Serialize)]
-pub struct SkTimelineInfo {
-    /// Term of the last entry.
-    pub last_log_term: Option<u64>,
-    /// LSN of the last record.
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    #[serde(default)]
-    pub flush_lsn: Option<Lsn>,
-    /// Up to which LSN safekeeper regards its WAL as committed.
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    #[serde(default)]
-    pub commit_lsn: Option<Lsn>,
-    /// LSN up to which safekeeper offloaded WAL to s3.
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    #[serde(default)]
-    pub s3_wal_lsn: Option<Lsn>,
-    /// LSN of last checkpoint uploaded by pageserver.
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    #[serde(default)]
-    pub remote_consistent_lsn: Option<Lsn>,
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    #[serde(default)]
-    pub peer_horizon_lsn: Option<Lsn>,
-    #[serde(default)]
-    pub safekeeper_connection_string: Option<String>,
-}
-
-#[derive(Debug, thiserror::Error)]
-pub enum BrokerError {
-    #[error("Etcd client error: {0}. Context: {1}")]
-    EtcdClient(etcd_client::Error, String),
-    #[error("Error during parsing etcd data: {0}")]
-    ParsingError(String),
-    #[error("Internal error: {0}")]
-    InternalError(String),
-}
-
-/// A way to control the data retrieval from a certain subscription.
-pub struct SkTimelineSubscription {
-    safekeeper_timeline_updates:
-        mpsc::UnboundedReceiver<HashMap<ZTenantTimelineId, HashMap<NodeId, SkTimelineInfo>>>,
-    kind: SkTimelineSubscriptionKind,
-    watcher_handle: JoinHandle<Result<(), BrokerError>>,
-    watcher: Watcher,
-}
-
-impl SkTimelineSubscription {
-    /// Asynchronously polls for more data from the subscription, suspending the current future if there's no data sent yet.
-    pub async fn fetch_data(
-        &mut self,
-    ) -> Option<HashMap<ZTenantTimelineId, HashMap<NodeId, SkTimelineInfo>>> {
-        self.safekeeper_timeline_updates.recv().await
-    }
-
-    /// Cancels the subscription, stopping the data poller and waiting for it to shut down.
-    pub async fn cancel(mut self) -> Result<(), BrokerError> {
-        self.watcher.cancel().await.map_err(|e| {
-            BrokerError::EtcdClient(
-                e,
-                format!(
-                    "Failed to cancel timeline subscription, kind: {:?}",
-                    self.kind
-                ),
-            )
-        })?;
-        self.watcher_handle.await.map_err(|e| {
-            BrokerError::InternalError(format!(
-                "Failed to join the timeline updates task, kind: {:?}, error: {e}",
-                self.kind
-            ))
-        })?
-    }
-}
-
-/// The subscription kind to the timeline updates from safekeeper.
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct SkTimelineSubscriptionKind {
-    broker_etcd_prefix: String,
-    kind: SubscriptionKind,
-}
-
-impl SkTimelineSubscriptionKind {
-    pub fn all(broker_etcd_prefix: String) -> Self {
-        Self {
-            broker_etcd_prefix,
-            kind: SubscriptionKind::All,
-        }
-    }
-
-    pub fn tenant(broker_etcd_prefix: String, tenant: ZTenantId) -> Self {
-        Self {
-            broker_etcd_prefix,
-            kind: SubscriptionKind::Tenant(tenant),
-        }
-    }
-
-    pub fn timeline(broker_etcd_prefix: String, timeline: ZTenantTimelineId) -> Self {
-        Self {
-            broker_etcd_prefix,
-            kind: SubscriptionKind::Timeline(timeline),
-        }
-    }
-
-    fn watch_regex(&self) -> Regex {
-        match self.kind {
-            SubscriptionKind::All => Regex::new(&format!(
-                r"^{}/([[:xdigit:]]+)/([[:xdigit:]]+)/safekeeper/([[:digit:]])$",
-                self.broker_etcd_prefix
-            ))
-            .expect("wrong regex for 'everything' subscription"),
-            SubscriptionKind::Tenant(tenant_id) => Regex::new(&format!(
-                r"^{}/{tenant_id}/([[:xdigit:]]+)/safekeeper/([[:digit:]])$",
-                self.broker_etcd_prefix
-            ))
-            .expect("wrong regex for 'tenant' subscription"),
-            SubscriptionKind::Timeline(ZTenantTimelineId {
-                tenant_id,
-                timeline_id,
-            }) => Regex::new(&format!(
-                r"^{}/{tenant_id}/{timeline_id}/safekeeper/([[:digit:]])$",
-                self.broker_etcd_prefix
-            ))
-            .expect("wrong regex for 'timeline' subscription"),
-        }
-    }
-
-    /// Etcd key to use for watching a certain timeline updates from safekeepers.
-    pub fn watch_key(&self) -> String {
-        match self.kind {
-            SubscriptionKind::All => self.broker_etcd_prefix.to_string(),
-            SubscriptionKind::Tenant(tenant_id) => {
-                format!("{}/{tenant_id}/safekeeper", self.broker_etcd_prefix)
-            }
-            SubscriptionKind::Timeline(ZTenantTimelineId {
-                tenant_id,
-                timeline_id,
-            }) => format!(
-                "{}/{tenant_id}/{timeline_id}/safekeeper",
-                self.broker_etcd_prefix
-            ),
-        }
-    }
-}
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-enum SubscriptionKind {
-    /// Get every timeline update.
-    All,
-    /// Get certain tenant timelines' updates.
-    Tenant(ZTenantId),
-    /// Get certain timeline updates.
-    Timeline(ZTenantTimelineId),
-}
-
-/// Creates a background task to poll etcd for timeline updates from safekeepers.
-/// Stops and returns `Err` on any error during etcd communication.
-/// Watches the key changes until either the watcher is cancelled via etcd or the subscription cancellation handle,
-/// exiting normally in such cases.
-pub async fn subscribe_to_safekeeper_timeline_updates(
-    client: &mut Client,
-    subscription: SkTimelineSubscriptionKind,
-) -> Result<SkTimelineSubscription, BrokerError> {
-    info!("Subscribing to timeline updates, subscription kind: {subscription:?}");
-
-    let (watcher, mut stream) = client
-        .watch(
-            subscription.watch_key(),
-            Some(WatchOptions::new().with_prefix()),
-        )
-        .await
-        .map_err(|e| {
-            BrokerError::EtcdClient(
-                e,
-                format!("Failed to init the watch for subscription {subscription:?}"),
-            )
-        })?;
-
-    let (timeline_updates_sender, safekeeper_timeline_updates) = mpsc::unbounded_channel();
-
-    let subscription_kind = subscription.kind;
-    let regex = subscription.watch_regex();
-    let watcher_handle = tokio::spawn(async move {
-        while let Some(resp) = stream.message().await.map_err(|e| BrokerError::InternalError(format!(
-            "Failed to get messages from the subscription stream, kind: {subscription_kind:?}, error: {e}"
-        )))? {
-            if resp.canceled() {
-                info!("Watch for timeline updates subscription was canceled, exiting");
-                break;
-            }
-
-            let mut timeline_updates: HashMap<ZTenantTimelineId, HashMap<NodeId, SkTimelineInfo>> = HashMap::new();
-            // Keep track that the timeline data updates from etcd arrive in the right order.
-            // https://etcd.io/docs/v3.5/learning/api_guarantees/#isolation-level-and-consistency-of-replicas
-            // > etcd does not ensure linearizability for watch operations. Users are expected to verify the revision of watch responses to ensure correct ordering.
-            let mut timeline_etcd_versions: HashMap<ZTenantTimelineId, i64> = HashMap::new();
-
-
-            let events = resp.events();
-            debug!("Processing {} events", events.len());
-
-            for event in events {
-                if EventType::Put == event.event_type() {
-                    if let Some(new_etcd_kv) = event.kv() {
-                        let new_kv_version = new_etcd_kv.version();
-
-                        match parse_etcd_key_value(subscription_kind, &regex, new_etcd_kv) {
-                            Ok(Some((zttid, timeline))) => {
-                                match timeline_updates
-                                    .entry(zttid)
-                                    .or_default()
-                                    .entry(timeline.safekeeper_id)
-                                {
-                                    hash_map::Entry::Occupied(mut o) => {
-                                        let old_etcd_kv_version = timeline_etcd_versions.get(&zttid).copied().unwrap_or(i64::MIN);
-                                        if old_etcd_kv_version < new_kv_version {
-                                            o.insert(timeline.info);
-                                            timeline_etcd_versions.insert(zttid,new_kv_version);
-                                        }
-                                    }
-                                    hash_map::Entry::Vacant(v) => {
-                                        v.insert(timeline.info);
-                                        timeline_etcd_versions.insert(zttid,new_kv_version);
-                                    }
-                                }
-                            }
-                            Ok(None) => {}
-                            Err(e) => error!("Failed to parse timeline update: {e}"),
-                        };
-                    }
-                }
-            }
-
-            if let Err(e) = timeline_updates_sender.send(timeline_updates) {
-                info!("Timeline updates sender got dropped, exiting: {e}");
-                break;
-            }
-        }
-
-        Ok(())
-    });
-
-    Ok(SkTimelineSubscription {
-        kind: subscription,
-        safekeeper_timeline_updates,
-        watcher_handle,
-        watcher,
-    })
-}
-
-fn parse_etcd_key_value(
-    subscription_kind: SubscriptionKind,
-    regex: &Regex,
-    kv: &KeyValue,
-) -> Result<Option<(ZTenantTimelineId, SafekeeperTimeline)>, BrokerError> {
-    let caps = if let Some(caps) = regex.captures(kv.key_str().map_err(|e| {
-        BrokerError::EtcdClient(e, format!("Failed to represent kv {kv:?} as key str"))
-    })?) {
-        caps
-    } else {
-        return Ok(None);
-    };
-
-    let (zttid, safekeeper_id) = match subscription_kind {
-        SubscriptionKind::All => (
-            ZTenantTimelineId::new(
-                parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?,
-                parse_capture(&caps, 2).map_err(BrokerError::ParsingError)?,
-            ),
-            NodeId(parse_capture(&caps, 3).map_err(BrokerError::ParsingError)?),
-        ),
-        SubscriptionKind::Tenant(tenant_id) => (
-            ZTenantTimelineId::new(
-                tenant_id,
-                parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?,
-            ),
-            NodeId(parse_capture(&caps, 2).map_err(BrokerError::ParsingError)?),
-        ),
-        SubscriptionKind::Timeline(zttid) => (
-            zttid,
-            NodeId(parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?),
-        ),
-    };
-
-    let info_str = kv.value_str().map_err(|e| {
-        BrokerError::EtcdClient(e, format!("Failed to represent kv {kv:?} as value str"))
-    })?;
-    Ok(Some((
-        zttid,
-        SafekeeperTimeline {
-            safekeeper_id,
-            info: serde_json::from_str(info_str).map_err(|e| {
-                BrokerError::ParsingError(format!(
-                    "Failed to parse '{info_str}' as safekeeper timeline info: {e}"
-                ))
-            })?,
-        },
-    )))
-}
-
-fn parse_capture<T>(caps: &Captures, index: usize) -> Result<T, String>
-where
-    T: FromStr,
-    <T as FromStr>::Err: Display,
-{
-    let capture_match = caps
-        .get(index)
-        .ok_or_else(|| format!("Failed to get capture match at index {index}"))?
-        .as_str();
-    capture_match.parse().map_err(|e| {
-        format!(
-            "Failed to parse {} from {capture_match}: {e}",
-            std::any::type_name::<T>()
-        )
-    })
-}
--- a/libs/metrics/Cargo.toml
+++ b/libs/metrics/Cargo.toml
@@ -1,11 +0,0 @@
-[package]
-name = "metrics"
-version = "0.1.0"
-edition = "2021"
-
-[dependencies]
-prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
-libc = "0.2"
-lazy_static = "1.4"
-once_cell = "1.8.0"
-workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/libs/postgres_ffi/wal_generate/Cargo.toml
+++ b/libs/postgres_ffi/wal_generate/Cargo.toml
@@ -1,14 +0,0 @@
-[package]
-name = "wal_generate"
-version = "0.1.0"
-edition = "2021"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[dependencies]
-anyhow = "1.0"
-clap = "3.0"
-env_logger = "0.9"
-log = "0.4"
-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
-tempfile = "3.2"
--- a/libs/postgres_ffi/wal_generate/src/bin/wal_generate.rs
+++ b/libs/postgres_ffi/wal_generate/src/bin/wal_generate.rs
@@ -1,58 +0,0 @@
-use anyhow::*;
-use clap::{App, Arg};
-use wal_generate::*;
-
-fn main() -> Result<()> {
-    env_logger::Builder::from_env(
-        env_logger::Env::default().default_filter_or("wal_generate=info"),
-    )
-    .init();
-    let arg_matches = App::new("Postgres WAL generator")
-        .about("Generates Postgres databases with specific WAL properties")
-        .arg(
-            Arg::new("datadir")
-                .short('D')
-                .long("datadir")
-                .takes_value(true)
-                .help("Data directory for the Postgres server")
-                .required(true)
-        )
-        .arg(
-            Arg::new("pg-distrib-dir")
-                .long("pg-distrib-dir")
-                .takes_value(true)
-                .help("Directory with Postgres distribution (bin and lib directories, e.g. tmp_install)")
-                .default_value("/usr/local")
-        )
-        .arg(
-            Arg::new("type")
-                .long("type")
-                .takes_value(true)
-                .help("Type of WAL to generate")
-                .possible_values(["simple", "last_wal_record_crossing_segment", "wal_record_crossing_segment_followed_by_small_one"])
-                .required(true)
-        )
-        .get_matches();
-
-    let cfg = Conf {
-        pg_distrib_dir: arg_matches.value_of("pg-distrib-dir").unwrap().into(),
-        datadir: arg_matches.value_of("datadir").unwrap().into(),
-    };
-    cfg.initdb()?;
-    let mut srv = cfg.start_server()?;
-    let lsn = match arg_matches.value_of("type").unwrap() {
-        "simple" => generate_simple(&mut srv.connect_with_timeout()?)?,
-        "last_wal_record_crossing_segment" => {
-            generate_last_wal_record_crossing_segment(&mut srv.connect_with_timeout()?)?
-        }
-        "wal_record_crossing_segment_followed_by_small_one" => {
-            generate_wal_record_crossing_segment_followed_by_small_one(
-                &mut srv.connect_with_timeout()?,
-            )?
-        }
-        a => panic!("Unknown --type argument: {}", a),
-    };
-    println!("end_of_wal = {}", lsn);
-    srv.kill();
-    Ok(())
-}
--- a/libs/postgres_ffi/wal_generate/src/lib.rs
+++ b/libs/postgres_ffi/wal_generate/src/lib.rs
@@ -1,278 +0,0 @@
-use anyhow::*;
-use core::time::Duration;
-use log::*;
-use postgres::types::PgLsn;
-use postgres::Client;
-use std::cmp::Ordering;
-use std::path::{Path, PathBuf};
-use std::process::{Command, Stdio};
-use std::time::Instant;
-use tempfile::{tempdir, TempDir};
-
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct Conf {
-    pub pg_distrib_dir: PathBuf,
-    pub datadir: PathBuf,
-}
-
-pub struct PostgresServer {
-    process: std::process::Child,
-    _unix_socket_dir: TempDir,
-    client_config: postgres::Config,
-}
-
-impl Conf {
-    fn pg_bin_dir(&self) -> PathBuf {
-        self.pg_distrib_dir.join("bin")
-    }
-
-    fn pg_lib_dir(&self) -> PathBuf {
-        self.pg_distrib_dir.join("lib")
-    }
-
-    fn new_pg_command(&self, command: impl AsRef<Path>) -> Result<Command> {
-        let path = self.pg_bin_dir().join(command);
-        ensure!(path.exists(), "Command {:?} does not exist", path);
-        let mut cmd = Command::new(path);
-        cmd.env_clear()
-            .env("LD_LIBRARY_PATH", self.pg_lib_dir())
-            .env("DYLD_LIBRARY_PATH", self.pg_lib_dir());
-        Ok(cmd)
-    }
-
-    pub fn initdb(&self) -> Result<()> {
-        if let Some(parent) = self.datadir.parent() {
-            info!("Pre-creating parent directory {:?}", parent);
-            // Tests may be run concurrently and there may be a race to create `test_output/`.
-            // std::fs::create_dir_all is guaranteed to have no races with another thread creating directories.
-            std::fs::create_dir_all(parent)?;
-        }
-        info!(
-            "Running initdb in {:?} with user \"postgres\"",
-            self.datadir
-        );
-        let output = self
-            .new_pg_command("initdb")?
-            .arg("-D")
-            .arg(self.datadir.as_os_str())
-            .args(&["-U", "postgres", "--no-instructions", "--no-sync"])
-            .output()?;
-        debug!("initdb output: {:?}", output);
-        ensure!(
-            output.status.success(),
-            "initdb failed, stdout and stderr follow:\n{}{}",
-            String::from_utf8_lossy(&output.stdout),
-            String::from_utf8_lossy(&output.stderr),
-        );
-        Ok(())
-    }
-
-    pub fn start_server(&self) -> Result<PostgresServer> {
-        info!("Starting Postgres server in {:?}", self.datadir);
-        let unix_socket_dir = tempdir()?; // We need a directory with a short name for Unix socket (up to 108 symbols)
-        let unix_socket_dir_path = unix_socket_dir.path().to_owned();
-        let server_process = self
-            .new_pg_command("postgres")?
-            .args(&["-c", "listen_addresses="])
-            .arg("-k")
-            .arg(unix_socket_dir_path.as_os_str())
-            .arg("-D")
-            .arg(self.datadir.as_os_str())
-            .args(&["-c", "wal_keep_size=50MB"]) // Ensure old WAL is not removed
-            .args(&["-c", "logging_collector=on"]) // stderr will mess up with tests output
-            .args(&["-c", "shared_preload_libraries=zenith"]) // can only be loaded at startup
-            // Disable background processes as much as possible
-            .args(&["-c", "wal_writer_delay=10s"])
-            .args(&["-c", "autovacuum=off"])
-            .stderr(Stdio::null())
-            .spawn()?;
-        let server = PostgresServer {
-            process: server_process,
-            _unix_socket_dir: unix_socket_dir,
-            client_config: {
-                let mut c = postgres::Config::new();
-                c.host_path(&unix_socket_dir_path);
-                c.user("postgres");
-                c.connect_timeout(Duration::from_millis(1000));
-                c
-            },
-        };
-        Ok(server)
-    }
-
-    pub fn pg_waldump(
-        &self,
-        first_segment_name: &str,
-        last_segment_name: &str,
-    ) -> Result<std::process::Output> {
-        let first_segment_file = self.datadir.join(first_segment_name);
-        let last_segment_file = self.datadir.join(last_segment_name);
-        info!(
-            "Running pg_waldump for {} .. {}",
-            first_segment_file.display(),
-            last_segment_file.display()
-        );
-        let output = self
-            .new_pg_command("pg_waldump")?
-            .args(&[
-                &first_segment_file.as_os_str(),
-                &last_segment_file.as_os_str(),
-            ])
-            .output()?;
-        debug!("waldump output: {:?}", output);
-        Ok(output)
-    }
-}
-
-impl PostgresServer {
-    pub fn connect_with_timeout(&self) -> Result<Client> {
-        let retry_until = Instant::now() + *self.client_config.get_connect_timeout().unwrap();
-        while Instant::now() < retry_until {
-            use std::result::Result::Ok;
-            if let Ok(client) = self.client_config.connect(postgres::NoTls) {
-                return Ok(client);
-            }
-            std::thread::sleep(Duration::from_millis(100));
-        }
-        bail!("Connection timed out");
-    }
-
-    pub fn kill(&mut self) {
-        self.process.kill().unwrap();
-        self.process.wait().unwrap();
-    }
-}
-
-impl Drop for PostgresServer {
-    fn drop(&mut self) {
-        use std::result::Result::Ok;
-        match self.process.try_wait() {
-            Ok(Some(_)) => return,
-            Ok(None) => {
-                warn!("Server was not terminated, will be killed");
-            }
-            Err(e) => {
-                error!("Unable to get status of the server: {}, will be killed", e);
-            }
-        }
-        let _ = self.process.kill();
-    }
-}
-
-pub trait PostgresClientExt: postgres::GenericClient {
-    fn pg_current_wal_insert_lsn(&mut self) -> Result<PgLsn> {
-        Ok(self
-            .query_one("SELECT pg_current_wal_insert_lsn()", &[])?
-            .get(0))
-    }
-    fn pg_current_wal_flush_lsn(&mut self) -> Result<PgLsn> {
-        Ok(self
-            .query_one("SELECT pg_current_wal_flush_lsn()", &[])?
-            .get(0))
-    }
-}
-
-impl<C: postgres::GenericClient> PostgresClientExt for C {}
-
-fn generate_internal<C: postgres::GenericClient>(
-    client: &mut C,
-    f: impl Fn(&mut C, PgLsn) -> Result<Option<PgLsn>>,
-) -> Result<PgLsn> {
-    client.execute("create extension if not exists zenith_test_utils", &[])?;
-
-    let wal_segment_size = client.query_one(
-        "select cast(setting as bigint) as setting, unit \
-         from pg_settings where name = 'wal_segment_size'",
-        &[],
-    )?;
-    ensure!(
-        wal_segment_size.get::<_, String>("unit") == "B",
-        "Unexpected wal_segment_size unit"
-    );
-    ensure!(
-        wal_segment_size.get::<_, i64>("setting") == 16 * 1024 * 1024,
-        "Unexpected wal_segment_size in bytes"
-    );
-
-    let initial_lsn = client.pg_current_wal_insert_lsn()?;
-    info!("LSN initial = {}", initial_lsn);
-
-    let last_lsn = match f(client, initial_lsn)? {
-        None => client.pg_current_wal_insert_lsn()?,
-        Some(last_lsn) => match last_lsn.cmp(&client.pg_current_wal_insert_lsn()?) {
-            Ordering::Less => bail!("Some records were inserted after the generated WAL"),
-            Ordering::Equal => last_lsn,
-            Ordering::Greater => bail!("Reported LSN is greater than insert_lsn"),
-        },
-    };
-
-    // Some records may be not flushed, e.g. non-transactional logical messages.
-    client.execute("select neon_xlogflush(pg_current_wal_insert_lsn())", &[])?;
-    match last_lsn.cmp(&client.pg_current_wal_flush_lsn()?) {
-        Ordering::Less => bail!("Some records were flushed after the generated WAL"),
-        Ordering::Equal => {}
-        Ordering::Greater => bail!("Reported LSN is greater than flush_lsn"),
-    }
-    Ok(last_lsn)
-}
-
-pub fn generate_simple(client: &mut impl postgres::GenericClient) -> Result<PgLsn> {
-    generate_internal(client, |client, _| {
-        client.execute("CREATE table t(x int)", &[])?;
-        Ok(None)
-    })
-}
-
-fn generate_single_logical_message(
-    client: &mut impl postgres::GenericClient,
-    transactional: bool,
-) -> Result<PgLsn> {
-    generate_internal(client, |client, initial_lsn| {
-        ensure!(
-            initial_lsn < PgLsn::from(0x0200_0000 - 1024 * 1024),
-            "Initial LSN is too far in the future"
-        );
-
-        let message_lsn: PgLsn = client
-            .query_one(
-                "select pg_logical_emit_message($1, 'big-16mb-msg', \
-                 concat(repeat('abcd', 16 * 256 * 1024), 'end')) as message_lsn",
-                &[&transactional],
-            )?
-            .get("message_lsn");
-        ensure!(
-            message_lsn > PgLsn::from(0x0200_0000 + 4 * 8192),
-            "Logical message did not cross the segment boundary"
-        );
-        ensure!(
-            message_lsn < PgLsn::from(0x0400_0000),
-            "Logical message crossed two segments"
-        );
-
-        if transactional {
-            // Transactional logical messages are part of a transaction, so the one above is
-            // followed by a small COMMIT record.
-
-            let after_message_lsn = client.pg_current_wal_insert_lsn()?;
-            ensure!(
-                message_lsn < after_message_lsn,
-                "No record found after the emitted message"
-            );
-            Ok(Some(after_message_lsn))
-        } else {
-            Ok(Some(message_lsn))
-        }
-    })
-}
-
-pub fn generate_wal_record_crossing_segment_followed_by_small_one(
-    client: &mut impl postgres::GenericClient,
-) -> Result<PgLsn> {
-    generate_single_logical_message(client, true)
-}
-
-pub fn generate_last_wal_record_crossing_segment<C: postgres::GenericClient>(
-    client: &mut C,
-) -> Result<PgLsn> {
-    generate_single_logical_message(client, false)
-}
--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -1,20 +0,0 @@
-[package]
-name = "remote_storage"
-version = "0.1.0"
-edition = "2021"
-
-[dependencies]
-anyhow = { version = "1.0", features = ["backtrace"] }
-tokio = { version = "1.17", features = ["sync", "macros", "fs", "io-util"] }
-tokio-util = { version = "0.7", features = ["io"] }
-tracing = "0.1.27"
-rusoto_core = "0.48"
-rusoto_s3 = "0.48"
-serde = { version = "1.0", features = ["derive"] }
-serde_json = "1"
-async-trait = "0.1"
-
-workspace_hack = { version = "0.1", path = "../../workspace_hack" }
-
-[dev-dependencies]
-tempfile = "3.2"
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -1,233 +0,0 @@
-//! A set of generic storage abstractions for the page server to use when backing up and restoring its state from the external storage.
-//! No other modules from this tree are supposed to be used directly by the external code.
-//!
-//! [`RemoteStorage`] trait a CRUD-like generic abstraction to use for adapting external storages with a few implementations:
-//!   * [`local_fs`] allows to use local file system as an external storage
-//!   * [`s3_bucket`] uses AWS S3 bucket as an external storage
-//!
-mod local_fs;
-mod s3_bucket;
-
-use std::{
-    borrow::Cow,
-    collections::HashMap,
-    ffi::OsStr,
-    num::{NonZeroU32, NonZeroUsize},
-    path::{Path, PathBuf},
-};
-
-use anyhow::Context;
-use tokio::io;
-use tracing::info;
-
-pub use self::{
-    local_fs::LocalFs,
-    s3_bucket::{S3Bucket, S3ObjectKey},
-};
-
-/// How many different timelines can be processed simultaneously when synchronizing layers with the remote storage.
-/// During regular work, pageserver produces one layer file per timeline checkpoint, with bursts of concurrency
-/// during start (where local and remote timelines are compared and initial sync tasks are scheduled) and timeline attach.
-/// Both cases may trigger timeline download, that might download a lot of layers. This concurrency is limited by the clients internally, if needed.
-pub const DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNCS: usize = 50;
-pub const DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS: u32 = 10;
-/// Currently, sync happens with AWS S3, that has two limits on requests per second:
-/// ~200 RPS for IAM services
-/// https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/UsingWithRDS.IAMDBAuth.html
-/// ~3500 PUT/COPY/POST/DELETE or 5500 GET/HEAD S3 requests
-/// https://aws.amazon.com/premiumsupport/knowledge-center/s3-request-limit-avoid-throttling/
-pub const DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT: usize = 100;
-
-/// Storage (potentially remote) API to manage its state.
-/// This storage tries to be unaware of any layered repository context,
-/// providing basic CRUD operations for storage files.
-#[async_trait::async_trait]
-pub trait RemoteStorage: Send + Sync {
-    /// A way to uniquely reference a file in the remote storage.
-    type RemoteObjectId;
-
-    /// Attempts to derive the storage path out of the local path, if the latter is correct.
-    fn remote_object_id(&self, local_path: &Path) -> anyhow::Result<Self::RemoteObjectId>;
-
-    /// Gets the download path of the given storage file.
-    fn local_path(&self, remote_object_id: &Self::RemoteObjectId) -> anyhow::Result<PathBuf>;
-
-    /// Lists all items the storage has right now.
-    async fn list(&self) -> anyhow::Result<Vec<Self::RemoteObjectId>>;
-
-    /// Streams the local file contents into remote into the remote storage entry.
-    async fn upload(
-        &self,
-        from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
-        // S3 PUT request requires the content length to be specified,
-        // otherwise it starts to fail with the concurrent connection count increasing.
-        from_size_bytes: usize,
-        to: &Self::RemoteObjectId,
-        metadata: Option<StorageMetadata>,
-    ) -> anyhow::Result<()>;
-
-    /// Streams the remote storage entry contents into the buffered writer given, returns the filled writer.
-    /// Returns the metadata, if any was stored with the file previously.
-    async fn download(
-        &self,
-        from: &Self::RemoteObjectId,
-        to: &mut (impl io::AsyncWrite + Unpin + Send + Sync),
-    ) -> anyhow::Result<Option<StorageMetadata>>;
-
-    /// Streams a given byte range of the remote storage entry contents into the buffered writer given, returns the filled writer.
-    /// Returns the metadata, if any was stored with the file previously.
-    async fn download_byte_range(
-        &self,
-        from: &Self::RemoteObjectId,
-        start_inclusive: u64,
-        end_exclusive: Option<u64>,
-        to: &mut (impl io::AsyncWrite + Unpin + Send + Sync),
-    ) -> anyhow::Result<Option<StorageMetadata>>;
-
-    async fn delete(&self, path: &Self::RemoteObjectId) -> anyhow::Result<()>;
-}
-
-/// Every storage, currently supported.
-/// Serves as a simple way to pass around the [`RemoteStorage`] without dealing with generics.
-pub enum GenericRemoteStorage {
-    Local(LocalFs),
-    S3(S3Bucket),
-}
-
-impl GenericRemoteStorage {
-    pub fn new(
-        working_directory: PathBuf,
-        storage_config: &RemoteStorageConfig,
-    ) -> anyhow::Result<Self> {
-        match &storage_config.storage {
-            RemoteStorageKind::LocalFs(root) => {
-                info!("Using fs root '{}' as a remote storage", root.display());
-                LocalFs::new(root.clone(), working_directory).map(GenericRemoteStorage::Local)
-            }
-            RemoteStorageKind::AwsS3(s3_config) => {
-                info!("Using s3 bucket '{}' in region '{}' as a remote storage, prefix in bucket: '{:?}', bucket endpoint: '{:?}'",
-                    s3_config.bucket_name, s3_config.bucket_region, s3_config.prefix_in_bucket, s3_config.endpoint);
-                S3Bucket::new(s3_config, working_directory).map(GenericRemoteStorage::S3)
-            }
-        }
-    }
-}
-
-/// Extra set of key-value pairs that contain arbitrary metadata about the storage entry.
-/// Immutable, cannot be changed once the file is created.
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct StorageMetadata(HashMap<String, String>);
-
-fn strip_path_prefix<'a>(prefix: &'a Path, path: &'a Path) -> anyhow::Result<&'a Path> {
-    if prefix == path {
-        anyhow::bail!(
-            "Prefix and the path are equal, cannot strip: '{}'",
-            prefix.display()
-        )
-    } else {
-        path.strip_prefix(prefix).with_context(|| {
-            format!(
-                "Path '{}' is not prefixed with '{}'",
-                path.display(),
-                prefix.display(),
-            )
-        })
-    }
-}
-
-/// External backup storage configuration, enough for creating a client for that storage.
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct RemoteStorageConfig {
-    /// Max allowed number of concurrent sync operations between the API user and the remote storage.
-    pub max_concurrent_syncs: NonZeroUsize,
-    /// Max allowed errors before the sync task is considered failed and evicted.
-    pub max_sync_errors: NonZeroU32,
-    /// The storage connection configuration.
-    pub storage: RemoteStorageKind,
-}
-
-/// A kind of a remote storage to connect to, with its connection configuration.
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub enum RemoteStorageKind {
-    /// Storage based on local file system.
-    /// Specify a root folder to place all stored files into.
-    LocalFs(PathBuf),
-    /// AWS S3 based storage, storing all files in the S3 bucket
-    /// specified by the config
-    AwsS3(S3Config),
-}
-
-/// AWS S3 bucket coordinates and access credentials to manage the bucket contents (read and write).
-#[derive(Clone, PartialEq, Eq)]
-pub struct S3Config {
-    /// Name of the bucket to connect to.
-    pub bucket_name: String,
-    /// The region where the bucket is located at.
-    pub bucket_region: String,
-    /// A "subfolder" in the bucket, to use the same bucket separately by multiple remote storage users at once.
-    pub prefix_in_bucket: Option<String>,
-    /// A base URL to send S3 requests to.
-    /// By default, the endpoint is derived from a region name, assuming it's
-    /// an AWS S3 region name, erroring on wrong region name.
-    /// Endpoint provides a way to support other S3 flavors and their regions.
-    ///
-    /// Example: `http://127.0.0.1:5000`
-    pub endpoint: Option<String>,
-    /// AWS S3 has various limits on its API calls, we need not to exceed those.
-    /// See [`DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT`] for more details.
-    pub concurrency_limit: NonZeroUsize,
-}
-
-impl std::fmt::Debug for S3Config {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("S3Config")
-            .field("bucket_name", &self.bucket_name)
-            .field("bucket_region", &self.bucket_region)
-            .field("prefix_in_bucket", &self.prefix_in_bucket)
-            .field("concurrency_limit", &self.concurrency_limit)
-            .finish()
-    }
-}
-
-pub fn path_with_suffix_extension(original_path: impl AsRef<Path>, suffix: &str) -> PathBuf {
-    let new_extension = match original_path
-        .as_ref()
-        .extension()
-        .map(OsStr::to_string_lossy)
-    {
-        Some(extension) => Cow::Owned(format!("{extension}.{suffix}")),
-        None => Cow::Borrowed(suffix),
-    };
-    original_path
-        .as_ref()
-        .with_extension(new_extension.as_ref())
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_path_with_suffix_extension() {
-        let p = PathBuf::from("/foo/bar");
-        assert_eq!(
-            &path_with_suffix_extension(&p, "temp").to_string_lossy(),
-            "/foo/bar.temp"
-        );
-        let p = PathBuf::from("/foo/bar");
-        assert_eq!(
-            &path_with_suffix_extension(&p, "temp.temp").to_string_lossy(),
-            "/foo/bar.temp.temp"
-        );
-        let p = PathBuf::from("/foo/bar.baz");
-        assert_eq!(
-            &path_with_suffix_extension(&p, "temp.temp").to_string_lossy(),
-            "/foo/bar.baz.temp.temp"
-        );
-        let p = PathBuf::from("/foo/bar.baz");
-        assert_eq!(
-            &path_with_suffix_extension(&p, ".temp").to_string_lossy(),
-            "/foo/bar.baz..temp"
-        );
-    }
-}
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -1,105 +0,0 @@
-//! `utils` is intended to be a place to put code that is shared
-//! between other crates in this repository.
-
-#![allow(clippy::manual_range_contains)]
-
-/// `Lsn` type implements common tasks on Log Sequence Numbers
-pub mod lsn;
-/// SeqWait allows waiting for a future sequence number to arrive
-pub mod seqwait;
-
-/// append only ordered map implemented with a Vec
-pub mod vec_map;
-
-// Async version of SeqWait. Currently unused.
-// pub mod seqwait_async;
-
-pub mod bin_ser;
-pub mod postgres_backend;
-pub mod pq_proto;
-
-// dealing with connstring parsing and handy access to it's parts
-pub mod connstring;
-
-// helper functions for creating and fsyncing directories/trees
-pub mod crashsafe_dir;
-
-// common authentication routines
-pub mod auth;
-
-// utility functions and helper traits for unified unique id generation/serialization etc.
-pub mod zid;
-// http endpoint utils
-pub mod http;
-
-// socket splitting utils
-pub mod sock_split;
-
-// common log initialisation routine
-pub mod logging;
-
-// Misc
-pub mod accum;
-pub mod shutdown;
-
-// Tools for calling certain async methods in sync contexts
-pub mod sync;
-
-// Utility for binding TcpListeners with proper socket options.
-pub mod tcp_listener;
-
-// Utility for putting a raw file descriptor into non-blocking mode
-pub mod nonblock;
-
-// Default signal handling
-pub mod signals;
-
-/// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages
-///
-/// we have several cases:
-/// * building locally from git repo
-/// * building in CI from git repo
-/// * building in docker (either in CI or locally)
-///
-/// One thing to note is that .git is not available in docker (and it is bad to include it there).
-/// So everything becides docker build is covered by git_version crate, and docker uses a `GIT_VERSION` argument to get the value required.
-/// It takes variable from build process env and puts it to the rustc env. And then we can retrieve it here by using env! macro.
-/// Git version received from environment variable used as a fallback in git_version invokation.
-/// And to avoid running buildscript every recompilation, we use rerun-if-env-changed option.
-/// So the build script will be run only when GIT_VERSION envvar has changed.
-///
-/// Why not to use buildscript to get git commit sha directly without procmacro from different crate?
-/// Caching and workspaces complicates that. In case `utils` is not
-/// recompiled due to caching then version may become outdated.
-/// git_version crate handles that case by introducing a dependency on .git internals via include_bytes! macro,
-/// so if we changed the index state git_version will pick that up and rerun the macro.
-///
-/// Note that with git_version prefix is `git:` and in case of git version from env its `git-env:`.
-///
-/// #############################################################################################
-/// TODO this macro is not the way the library is intended to be used, see https://github.com/neondatabase/neon/issues/1565 for details.
-/// We use `cachepot` to reduce our current CI build times: https://github.com/neondatabase/cloud/pull/1033#issuecomment-1100935036
-/// Yet, it seems to ignore the GIT_VERSION env variable, passed to Docker build, even with build.rs that contains
-/// `println!("cargo:rerun-if-env-changed=GIT_VERSION");` code for cachepot cache invalidation.
-/// The problem needs further investigation and regular `const` declaration instead of a macro.
-#[macro_export]
-macro_rules! project_git_version {
-    ($const_identifier:ident) => {
-        const $const_identifier: &str = git_version::git_version!(
-            prefix = "git:",
-            fallback = concat!(
-                "git-env:",
-                env!("GIT_VERSION", "Missing GIT_VERSION envvar")
-            ),
-            args = ["--abbrev=40", "--always", "--dirty=-modified"] // always use full sha
-        );
-    };
-}
-
-/// Same as `assert!`, but evaluated during compilation and gets optimized out in runtime.
-#[macro_export]
-macro_rules! const_assert {
-    ($($args:tt)*) => {
-        const _: () = assert!($($args)*);
-    };
-}
--- a/monitoring/docker-compose.yml
+++ b/monitoring/docker-compose.yml
@@ -0,0 +1,25 @@
+version: "3"
+services:
+
+  prometheus:
+    container_name: prometheus
+    image: prom/prometheus:latest
+    volumes:
+      - ./prometheus.yaml:/etc/prometheus/prometheus.yml
+    # ports:
+    #   - "9090:9090"
+    # TODO: find a proper portable solution
+    network_mode: "host"
+
+  grafana:
+    image: grafana/grafana:latest
+    volumes:
+      - ./grafana.yaml:/etc/grafana/provisioning/datasources/datasources.yaml
+    environment:
+      - GF_AUTH_ANONYMOUS_ENABLED=true
+      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin
+      - GF_AUTH_DISABLE_LOGIN_FORM=true
+    # ports:
+    #   - "3000:3000"
+    # TODO: find a proper portable solution
+    network_mode: "host"
--- a/monitoring/grafana.yaml
+++ b/monitoring/grafana.yaml
@@ -0,0 +1,12 @@
+apiVersion: 1
+
+datasources:
+- name: Prometheus
+  type: prometheus
+  access: proxy
+  orgId: 1
+  url: http://localhost:9090
+  basicAuth: false
+  isDefault: false
+  version: 1
+  editable: false
--- a/monitoring/prometheus.yaml
+++ b/monitoring/prometheus.yaml
@@ -0,0 +1,5 @@
+scrape_configs:
+  - job_name: 'default'
+    scrape_interval: 10s
+    static_configs:
+      - targets: ['localhost:9898']
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -3,14 +3,6 @@ name = "pageserver"
 version = "0.1.0"
 edition = "2021"

-[features]
-# It is simpler infra-wise to have failpoints enabled by default
-# It shouldn't affect perf in any way because failpoints
-# are not placed in hot code paths
-default = ["failpoints"]
-profiling = ["pprof"]
-failpoints = ["fail/failpoints"]
-
 [dependencies]
 chrono = "0.4.19"
 rand = "0.8.3"
@@ -22,13 +14,15 @@ hex = "0.4.3"
 hyper = "0.14"
 itertools = "0.10.3"
 lazy_static = "1.4.0"
+log = "0.4.14"
 clap = "3.0"
 daemonize = "0.4.1"
 tokio = { version = "1.17", features = ["process", "sync", "macros", "fs", "rt", "io-util", "time"] }
-postgres-types = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
-postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
-tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
+tokio-util = { version = "0.7", features = ["io"] }
+postgres-types = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
+postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
+postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
+tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
 tokio-stream = "0.1.8"
 anyhow = { version = "1.0", features = ["backtrace"] }
 crc32c = "0.6.0"
@@ -38,27 +32,27 @@ humantime = "2.1.0"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
 serde_with = "1.12.0"
-humantime-serde = "1.1.1"
-
-pprof = { git = "https://github.com/neondatabase/pprof-rs.git", branch = "wallclock-profiling", features = ["flamegraph"], optional = true }

 toml_edit = { version = "0.13", features = ["easy"] }
 scopeguard = "1.1.0"
 const_format = "0.2.21"
 tracing = "0.1.27"
+tracing-futures = "0.2"
 signal-hook = "0.3.10"
 url = "2"
 nix = "0.23"
 once_cell = "1.8.0"
 crossbeam-utils = "0.8.5"
 fail = "0.5.0"
-git-version = "0.3.5"

-postgres_ffi = { path = "../libs/postgres_ffi" }
-etcd_broker = { path = "../libs/etcd_broker" }
-metrics = { path = "../libs/metrics" }
-utils = { path = "../libs/utils" }
-remote_storage = { path = "../libs/remote_storage" }
+rusoto_core = "0.47"
+rusoto_s3 = "0.47"
+async-trait = "0.1"
+async-compression = {version = "0.3", features = ["zstd", "tokio"]}
+
+postgres_ffi = { path = "../postgres_ffi" }
+zenith_metrics = { path = "../zenith_metrics" }
+zenith_utils = { path = "../zenith_utils" }
 workspace_hack = { version = "0.1", path = "../workspace_hack" }

 [dev-dependencies]
--- a/pageserver/README.md
+++ b/pageserver/README.md
@@ -135,7 +135,7 @@ The backup service is disabled by default and can be enabled to interact with a

 CLI examples:
 * Local FS: `${PAGESERVER_BIN} -c "remote_storage={local_path='/some/local/path/'}"`
-* AWS S3  : `env AWS_ACCESS_KEY_ID='SOMEKEYAAAAASADSAH*#' AWS_SECRET_ACCESS_KEY='SOMEsEcReTsd292v' ${PAGESERVER_BIN} -c "remote_storage={bucket_name='some-sample-bucket',bucket_region='eu-north-1', prefix_in_bucket='/test_prefix/'}"`
+* AWS S3  : `${PAGESERVER_BIN} -c "remote_storage={bucket_name='some-sample-bucket',bucket_region='eu-north-1', prefix_in_bucket='/test_prefix/',access_key_id='SOMEKEYAAAAASADSAH*#',secret_access_key='SOMEsEcReTsd292v'}"`

 For Amazon AWS S3, a key id and secret access key could be located in `~/.aws/credentials` if awscli was ever configured to work with the desired bucket, on the AWS Settings page for a certain user. Also note, that the bucket names does not contain any protocols when used on AWS.
 For local S3 installations, refer to the their documentation for name format and credentials.
@@ -155,9 +155,11 @@ or
 bucket_name = 'some-sample-bucket'
 bucket_region = 'eu-north-1'
 prefix_in_bucket = '/test_prefix/'
+access_key_id = 'SOMEKEYAAAAASADSAH*#'
+secret_access_key = 'SOMEsEcReTsd292v'
 ```

-`AWS_SECRET_ACCESS_KEY` and `AWS_ACCESS_KEY_ID` env variables can be used to specify the S3 credentials if needed.
+Also, `AWS_SECRET_ACCESS_KEY` and `AWS_ACCESS_KEY_ID` variables can be used to specify the credentials instead of any of the ways above.

 TODO: Sharding
 --------------------
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -10,37 +10,31 @@
 //! This module is responsible for creation of such tarball
 //! from data stored in object storage.
 //!
-use anyhow::{anyhow, bail, ensure, Context, Result};
+use anyhow::{ensure, Context, Result};
 use bytes::{BufMut, BytesMut};
-use fail::fail_point;
+use log::*;
 use std::fmt::Write as FmtWrite;
 use std::io;
 use std::io::Write;
 use std::sync::Arc;
 use std::time::SystemTime;
 use tar::{Builder, EntryType, Header};
-use tracing::*;

 use crate::reltag::SlruKind;
 use crate::repository::Timeline;
 use crate::DatadirTimelineImpl;
 use postgres_ffi::xlog_utils::*;
 use postgres_ffi::*;
-use utils::lsn::Lsn;
+use zenith_utils::lsn::Lsn;

 /// This is short-living object only for the time of tarball creation,
 /// created mostly to avoid passing a lot of parameters between various functions
 /// used for constructing tarball.
-pub struct Basebackup<'a, W>
-where
-    W: Write,
-{
-    ar: Builder<AbortableWrite<W>>,
+pub struct Basebackup<'a> {
+    ar: Builder<&'a mut dyn Write>,
    timeline: &'a Arc<DatadirTimelineImpl>,
    pub lsn: Lsn,
    prev_record_lsn: Lsn,
-
-    finished: bool,
 }

 // Create basebackup with non-rel data in it. Omit relational data.
@@ -50,15 +44,12 @@ where
 //  * When working without safekeepers. In this situation it is important to match the lsn
 //    we are taking basebackup on with the lsn that is used in pageserver's walreceiver
 //    to start the replication.
-impl<'a, W> Basebackup<'a, W>
-where
-    W: Write,
-{
+impl<'a> Basebackup<'a> {
    pub fn new(
-        write: W,
+        write: &'a mut dyn Write,
        timeline: &'a Arc<DatadirTimelineImpl>,
        req_lsn: Option<Lsn>,
-    ) -> Result<Basebackup<'a, W>> {
+    ) -> Result<Basebackup<'a>> {
        // Compute postgres doesn't have any previous WAL files, but the first
        // record that it's going to write needs to include the LSN of the
        // previous record (xl_prev). We include prev_record_lsn in the
@@ -99,15 +90,14 @@ where
        );

        Ok(Basebackup {
-            ar: Builder::new(AbortableWrite::new(write)),
+            ar: Builder::new(write),
            timeline,
            lsn: backup_lsn,
            prev_record_lsn: backup_prev,
-            finished: false,
        })
    }

-    pub fn send_tarball(mut self) -> anyhow::Result<()> {
+    pub fn send_tarball(&mut self) -> anyhow::Result<()> {
        // Create pgdata subdirs structure
        for dir in pg_constants::PGDATA_SUBDIRS.iter() {
            let header = new_tar_header_dir(*dir)?;
@@ -145,14 +135,9 @@ where
            self.add_twophase_file(xid)?;
        }

-        fail_point!("basebackup-before-control-file", |_| {
-            bail!("failpoint basebackup-before-control-file")
-        });
-
        // Generate pg_control and bootstrap WAL segment.
        self.add_pgcontrol_file()?;
        self.ar.finish()?;
-        self.finished = true;
        debug!("all tarred up!");
        Ok(())
    }
@@ -169,17 +154,9 @@ where
            let img = self
                .timeline
                .get_slru_page_at_lsn(slru, segno, blknum, self.lsn)?;
+            ensure!(img.len() == pg_constants::BLCKSZ as usize);

-            if slru == SlruKind::Clog {
-                ensure!(
-                    img.len() == pg_constants::BLCKSZ as usize
-                        || img.len() == pg_constants::BLCKSZ as usize + 8
-                );
-            } else {
-                ensure!(img.len() == pg_constants::BLCKSZ as usize);
-            }
-
-            slru_buf.extend_from_slice(&img[..pg_constants::BLCKSZ as usize]);
+            slru_buf.extend_from_slice(&img);
        }

        let segname = format!("{}/{:>04X}", slru.to_str(), segno);
@@ -338,27 +315,13 @@ where
        let wal_file_name = XLogFileName(PG_TLI, segno, pg_constants::WAL_SEGMENT_SIZE);
        let wal_file_path = format!("pg_wal/{}", wal_file_name);
        let header = new_tar_header(&wal_file_path, pg_constants::WAL_SEGMENT_SIZE as u64)?;
-        let wal_seg = generate_wal_segment(segno, pg_control.system_identifier)
-            .map_err(|e| anyhow!(e).context("Failed generating wal segment"))?;
+        let wal_seg = generate_wal_segment(segno, pg_control.system_identifier);
        ensure!(wal_seg.len() == pg_constants::WAL_SEGMENT_SIZE);
        self.ar.append(&header, &wal_seg[..])?;
        Ok(())
    }
 }

-impl<'a, W> Drop for Basebackup<'a, W>
-where
-    W: Write,
-{
-    /// If the basebackup was not finished, prevent the Archive::drop() from
-    /// writing the end-of-archive marker.
-    fn drop(&mut self) {
-        if !self.finished {
-            self.ar.get_mut().abort();
-        }
-    }
-}
-
 //
 // Create new tarball entry header
 //
@@ -394,49 +357,3 @@ fn new_tar_header_dir(path: &str) -> anyhow::Result<Header> {
    header.set_cksum();
    Ok(header)
 }
-
-/// A wrapper that passes through all data to the underlying Write,
-/// until abort() is called.
-///
-/// tar::Builder has an annoying habit of finishing the archive with
-/// a valid tar end-of-archive marker (two 512-byte sectors of zeros),
-/// even if an error occurs and we don't finish building the archive.
-/// We'd rather abort writing the tarball immediately than construct
-/// a seemingly valid but incomplete archive. This wrapper allows us
-/// to swallow the end-of-archive marker that Builder::drop() emits,
-/// without writing it to the underlying sink.
-///
-struct AbortableWrite<W> {
-    w: W,
-    aborted: bool,
-}
-
-impl<W> AbortableWrite<W> {
-    pub fn new(w: W) -> Self {
-        AbortableWrite { w, aborted: false }
-    }
-
-    pub fn abort(&mut self) {
-        self.aborted = true;
-    }
-}
-
-impl<W> Write for AbortableWrite<W>
-where
-    W: Write,
-{
-    fn write(&mut self, data: &[u8]) -> io::Result<usize> {
-        if self.aborted {
-            Ok(data.len())
-        } else {
-            self.w.write(data)
-        }
-    }
-    fn flush(&mut self) -> io::Result<()> {
-        if self.aborted {
-            Ok(())
-        } else {
-            self.w.flush()
-        }
-    }
-}
--- a/pageserver/src/bin/dump_layerfile.rs
+++ b/pageserver/src/bin/dump_layerfile.rs
@@ -7,9 +7,7 @@ use pageserver::layered_repository::dump_layerfile_from_path;
 use pageserver::page_cache;
 use pageserver::virtual_file;
 use std::path::PathBuf;
-use utils::project_git_version;
-
-project_git_version!(GIT_VERSION);
+use zenith_utils::GIT_VERSION;

 fn main() -> Result<()> {
    let arg_matches = App::new("Zenith dump_layerfile utility")
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -1,46 +1,39 @@
 //! Main entry point for the Page Server executable.

-use std::{env, fs::File, path::Path, process, str::FromStr, thread::sleep, time::Duration};
+use std::{env, path::Path, str::FromStr};
 use tracing::*;
+use zenith_utils::{
+    auth::JwtAuth,
+    logging,
+    postgres_backend::AuthType,
+    tcp_listener,
+    zid::{ZTenantId, ZTimelineId},
+    GIT_VERSION,
+};

 use anyhow::{bail, Context, Result};

 use clap::{App, Arg};
 use daemonize::Daemonize;

-use fail::FailScenario;
 use pageserver::{
    config::{defaults::*, PageServerConf},
-    http, page_cache, page_service, profiling, tenant_mgr, thread_mgr,
+    http, page_cache, page_service,
+    remote_storage::{self, SyncStartupData},
+    repository::{Repository, TimelineSyncStatusUpdate},
+    tenant_mgr, thread_mgr,
    thread_mgr::ThreadKind,
    timelines, virtual_file, LOG_FILE_NAME,
 };
-use utils::{
-    auth::JwtAuth,
-    http::endpoint,
-    logging,
-    postgres_backend::AuthType,
-    project_git_version,
-    shutdown::exit_now,
-    signals::{self, Signal},
-    tcp_listener,
-    zid::{ZTenantId, ZTimelineId},
-};
-
-project_git_version!(GIT_VERSION);
-
-fn version() -> String {
-    format!(
-        "{GIT_VERSION} profiling:{} failpoints:{}",
-        cfg!(feature = "profiling"),
-        fail::has_failpoints()
-    )
-}
+use zenith_utils::http::endpoint;
+use zenith_utils::shutdown::exit_now;
+use zenith_utils::signals::{self, Signal};

 fn main() -> anyhow::Result<()> {
+    zenith_metrics::set_common_metrics_prefix("pageserver");
    let arg_matches = App::new("Zenith page server")
        .about("Materializes WAL stream to pages and serves them to the postgres")
-        .version(&*version())
+        .version(GIT_VERSION)
        .arg(
            Arg::new("daemonize")
                .short('d')
@@ -85,25 +78,8 @@ fn main() -> anyhow::Result<()> {
                .help("Additional configuration overrides of the ones from the toml config file (or new ones to add there).
                Any option has to be a valid toml document, example: `-c=\"foo='hey'\"` `-c=\"foo={value=1}\"`"),
        )
-        .arg(
-            Arg::new("enabled-features")
-                .long("enabled-features")
-                .takes_value(false)
-                .help("Show enabled compile time features"),
-        )
        .get_matches();

-    if arg_matches.is_present("enabled-features") {
-        let features: &[&str] = &[
-            #[cfg(feature = "failpoints")]
-            "failpoints",
-            #[cfg(feature = "profiling")]
-            "profiling",
-        ];
-        println!("{{\"features\": {features:?} }}");
-        return Ok(());
-    }
-
    let workdir = Path::new(arg_matches.value_of("workdir").unwrap_or(".zenith"));
    let workdir = workdir
        .canonicalize()
@@ -184,9 +160,6 @@ fn main() -> anyhow::Result<()> {
    // as a ref.
    let conf: &'static PageServerConf = Box::leak(Box::new(conf));

-    // Initialize up failpoints support
-    let scenario = FailScenario::setup();
-
    // Basic initialization of things that don't change after startup
    virtual_file::init(conf.max_file_descriptors);
    page_cache::init(conf.page_cache_size);
@@ -202,19 +175,20 @@ fn main() -> anyhow::Result<()> {
                cfg_file_path.display()
            )
        })?;
+        Ok(())
    } else {
-        start_pageserver(conf, daemonize).context("Failed to start pageserver")?;
+        start_pageserver(conf, daemonize).context("Failed to start pageserver")
    }
-
-    scenario.teardown();
-    Ok(())
 }

 fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()> {
    // Initialize logger
    let log_file = logging::init(LOG_FILE_NAME, daemonize)?;

-    info!("version: {GIT_VERSION}");
+    // TODO init only if configured
+    pageserver::wal_metadata::init(conf).expect("wal_metadata init failed");
+
+    info!("version: {}", GIT_VERSION);

    // TODO: Check that it looks like a valid repository before going further

@@ -254,14 +228,52 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
        // Otherwise, the coverage data will be damaged.
        match daemonize.exit_action(|| exit_now(0)).start() {
            Ok(_) => info!("Success, daemonized"),
-            Err(err) => bail!("{err}. could not daemonize. bailing."),
+            Err(err) => error!(%err, "could not daemonize"),
        }
    }

    let signals = signals::install_shutdown_handlers()?;

-    // start profiler (if enabled)
-    let profiler_guard = profiling::init_profiler(conf);
+    // Initialize repositories with locally available timelines.
+    // Timelines that are only partially available locally (remote storage has more data than this pageserver)
+    // are scheduled for download and added to the repository once download is completed.
+    let SyncStartupData {
+        remote_index,
+        local_timeline_init_statuses,
+    } = remote_storage::start_local_timeline_sync(conf)
+        .context("Failed to set up local files sync with external storage")?;
+
+    for (tenant_id, local_timeline_init_statuses) in local_timeline_init_statuses {
+        // initialize local tenant
+        let repo = tenant_mgr::load_local_repo(conf, tenant_id, &remote_index);
+        for (timeline_id, init_status) in local_timeline_init_statuses {
+            match init_status {
+                remote_storage::LocalTimelineInitStatus::LocallyComplete => {
+                    debug!("timeline {} for tenant {} is locally complete, registering it in repository", tenant_id, timeline_id);
+                    // Lets fail here loudly to be on the safe side.
+                    // XXX: It may be a better api to actually distinguish between repository startup
+                    //   and processing of newly downloaded timelines.
+                    repo.apply_timeline_remote_sync_status_update(
+                        timeline_id,
+                        TimelineSyncStatusUpdate::Downloaded,
+                    )
+                    .with_context(|| {
+                        format!(
+                            "Failed to bootstrap timeline {} for tenant {}",
+                            timeline_id, tenant_id
+                        )
+                    })?
+                }
+                remote_storage::LocalTimelineInitStatus::NeedsSync => {
+                    debug!(
+                        "timeline {} for tenant {} needs sync, \
+                         so skipped for adding into repository until sync is finished",
+                        tenant_id, timeline_id
+                    );
+                }
+            }
+        }
+    }

    // initialize authentication for incoming connections
    let auth = match &conf.auth_type {
@@ -274,24 +286,6 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
    };
    info!("Using auth: {:#?}", conf.auth_type);

-    let remote_index = tenant_mgr::init_tenant_mgr(conf)?;
-
-    // Create file and frequently check if it's still here
-    thread_mgr::spawn(
-        ThreadKind::HttpEndpointListener,
-        None,
-        None,
-        "http_endpoint_thread",
-        true,
-        move || {
-            File::create("delete-me.txt").expect("FFFF failed creating file");
-            loop {
-                File::open("delete-me.txt").expect("FFFF cannot find file");
-                sleep(Duration::from_millis(1));
-            }
-        },
-    )?;
-
    // Spawn a new thread for the http endpoint
    // bind before launching separate thread so the error reported before startup exits
    let auth_cloned = auth.clone();
@@ -300,9 +294,9 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
        None,
        None,
        "http_endpoint_thread",
-        true,
+        false,
        move || {
-            let router = http::make_router(conf, auth_cloned, remote_index)?;
+            let router = http::make_router(conf, auth_cloned, remote_index);
            endpoint::serve_thread_main(router, http_listener, thread_mgr::shutdown_watcher())
        },
    )?;
@@ -314,7 +308,7 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
        None,
        None,
        "libpq endpoint thread",
-        true,
+        false,
        move || page_service::thread_main(conf, auth, pageserver_listener, conf.auth_type),
    )?;

@@ -324,7 +318,6 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
                "Got {}. Terminating in immediate shutdown mode",
                signal.name()
            );
-            profiling::exit_profiler(conf, &profiler_guard);
            std::process::exit(111);
        }

@@ -333,8 +326,7 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
                "Got {}. Terminating gracefully in fast shutdown mode",
                signal.name()
            );
-            profiling::exit_profiler(conf, &profiler_guard);
-            pageserver::shutdown_pageserver(0);
+            pageserver::shutdown_pageserver();
            unreachable!()
        }
    })
--- a/pageserver/src/bin/pageserver_zst.rs
+++ b/pageserver/src/bin/pageserver_zst.rs
@@ -0,0 +1,334 @@
+//! A CLI helper to deal with remote storage (S3, usually) blobs as archives.
+//! See [`compression`] for more details about the archives.
+
+use std::{collections::BTreeSet, path::Path};
+
+use anyhow::{bail, ensure, Context};
+use clap::{App, Arg};
+use pageserver::{
+    layered_repository::metadata::{TimelineMetadata, METADATA_FILE_NAME},
+    remote_storage::compression,
+};
+use tokio::{fs, io};
+use zenith_utils::GIT_VERSION;
+
+const LIST_SUBCOMMAND: &str = "list";
+const ARCHIVE_ARG_NAME: &str = "archive";
+
+const EXTRACT_SUBCOMMAND: &str = "extract";
+const TARGET_DIRECTORY_ARG_NAME: &str = "target_directory";
+
+const CREATE_SUBCOMMAND: &str = "create";
+const SOURCE_DIRECTORY_ARG_NAME: &str = "source_directory";
+
+#[tokio::main(flavor = "current_thread")]
+async fn main() -> anyhow::Result<()> {
+    let arg_matches = App::new("pageserver zst blob [un]compressor utility")
+        .version(GIT_VERSION)
+        .subcommands(vec![
+            App::new(LIST_SUBCOMMAND)
+                .about("List the archive contents")
+                .arg(
+                    Arg::new(ARCHIVE_ARG_NAME)
+                        .required(true)
+                        .takes_value(true)
+                        .help("An archive to list the contents of"),
+                ),
+            App::new(EXTRACT_SUBCOMMAND)
+                .about("Extracts the archive into the directory")
+                .arg(
+                    Arg::new(ARCHIVE_ARG_NAME)
+                        .required(true)
+                        .takes_value(true)
+                        .help("An archive to extract"),
+                )
+                .arg(
+                    Arg::new(TARGET_DIRECTORY_ARG_NAME)
+                        .required(false)
+                        .takes_value(true)
+                        .help("A directory to extract the archive into. Optional, will use the current directory if not specified"),
+                ),
+            App::new(CREATE_SUBCOMMAND)
+                .about("Creates an archive with the contents of a directory (only the first level files are taken, metadata file has to be present in the same directory)")
+                .arg(
+                    Arg::new(SOURCE_DIRECTORY_ARG_NAME)
+                        .required(true)
+                        .takes_value(true)
+                        .help("A directory to use for creating the archive"),
+                )
+                .arg(
+                    Arg::new(TARGET_DIRECTORY_ARG_NAME)
+                        .required(false)
+                        .takes_value(true)
+                        .help("A directory to create the archive in. Optional, will use the current directory if not specified"),
+                ),
+        ])
+        .get_matches();
+
+    let subcommand_name = match arg_matches.subcommand_name() {
+        Some(name) => name,
+        None => bail!("No subcommand specified"),
+    };
+
+    let subcommand_matches = match arg_matches.subcommand_matches(subcommand_name) {
+        Some(matches) => matches,
+        None => bail!(
+            "No subcommand arguments were recognized for subcommand '{}'",
+            subcommand_name
+        ),
+    };
+
+    let target_dir = Path::new(
+        subcommand_matches
+            .value_of(TARGET_DIRECTORY_ARG_NAME)
+            .unwrap_or("./"),
+    );
+
+    match subcommand_name {
+        LIST_SUBCOMMAND => {
+            let archive = match subcommand_matches.value_of(ARCHIVE_ARG_NAME) {
+                Some(archive) => Path::new(archive),
+                None => bail!("No '{}' argument is specified", ARCHIVE_ARG_NAME),
+            };
+            list_archive(archive).await
+        }
+        EXTRACT_SUBCOMMAND => {
+            let archive = match subcommand_matches.value_of(ARCHIVE_ARG_NAME) {
+                Some(archive) => Path::new(archive),
+                None => bail!("No '{}' argument is specified", ARCHIVE_ARG_NAME),
+            };
+            extract_archive(archive, target_dir).await
+        }
+        CREATE_SUBCOMMAND => {
+            let source_dir = match subcommand_matches.value_of(SOURCE_DIRECTORY_ARG_NAME) {
+                Some(source) => Path::new(source),
+                None => bail!("No '{}' argument is specified", SOURCE_DIRECTORY_ARG_NAME),
+            };
+            create_archive(source_dir, target_dir).await
+        }
+        unknown => bail!("Unknown subcommand {}", unknown),
+    }
+}
+
+async fn list_archive(archive: &Path) -> anyhow::Result<()> {
+    let archive = archive.canonicalize().with_context(|| {
+        format!(
+            "Failed to get the absolute path for the archive path '{}'",
+            archive.display()
+        )
+    })?;
+    ensure!(
+        archive.is_file(),
+        "Path '{}' is not an archive file",
+        archive.display()
+    );
+    println!("Listing an archive at path '{}'", archive.display());
+    let archive_name = match archive.file_name().and_then(|name| name.to_str()) {
+        Some(name) => name,
+        None => bail!(
+            "Failed to get the archive name from the path '{}'",
+            archive.display()
+        ),
+    };
+
+    let archive_bytes = fs::read(&archive)
+        .await
+        .context("Failed to read the archive bytes")?;
+
+    let header = compression::read_archive_header(archive_name, &mut archive_bytes.as_slice())
+        .await
+        .context("Failed to read the archive header")?;
+
+    let empty_path = Path::new("");
+    println!("-------------------------------");
+
+    let longest_path_in_archive = header
+        .files
+        .iter()
+        .filter_map(|file| Some(file.subpath.as_path(empty_path).to_str()?.len()))
+        .max()
+        .unwrap_or_default()
+        .max(METADATA_FILE_NAME.len());
+
+    for regular_file in &header.files {
+        println!(
+            "File: {:width$} uncompressed size: {} bytes",
+            regular_file.subpath.as_path(empty_path).display(),
+            regular_file.size,
+            width = longest_path_in_archive,
+        )
+    }
+    println!(
+        "File: {:width$} uncompressed size: {} bytes",
+        METADATA_FILE_NAME,
+        header.metadata_file_size,
+        width = longest_path_in_archive,
+    );
+    println!("-------------------------------");
+
+    Ok(())
+}
+
+async fn extract_archive(archive: &Path, target_dir: &Path) -> anyhow::Result<()> {
+    let archive = archive.canonicalize().with_context(|| {
+        format!(
+            "Failed to get the absolute path for the archive path '{}'",
+            archive.display()
+        )
+    })?;
+    ensure!(
+        archive.is_file(),
+        "Path '{}' is not an archive file",
+        archive.display()
+    );
+    let archive_name = match archive.file_name().and_then(|name| name.to_str()) {
+        Some(name) => name,
+        None => bail!(
+            "Failed to get the archive name from the path '{}'",
+            archive.display()
+        ),
+    };
+
+    if !target_dir.exists() {
+        fs::create_dir_all(target_dir).await.with_context(|| {
+            format!(
+                "Failed to create the target dir at path '{}'",
+                target_dir.display()
+            )
+        })?;
+    }
+    let target_dir = target_dir.canonicalize().with_context(|| {
+        format!(
+            "Failed to get the absolute path for the target dir path '{}'",
+            target_dir.display()
+        )
+    })?;
+    ensure!(
+        target_dir.is_dir(),
+        "Path '{}' is not a directory",
+        target_dir.display()
+    );
+    let mut dir_contents = fs::read_dir(&target_dir)
+        .await
+        .context("Failed to list the target directory contents")?;
+    let dir_entry = dir_contents
+        .next_entry()
+        .await
+        .context("Failed to list the target directory contents")?;
+    ensure!(
+        dir_entry.is_none(),
+        "Target directory '{}' is not empty",
+        target_dir.display()
+    );
+
+    println!(
+        "Extracting an archive at path '{}' into directory '{}'",
+        archive.display(),
+        target_dir.display()
+    );
+
+    let mut archive_file = fs::File::open(&archive).await.with_context(|| {
+        format!(
+            "Failed to get the archive name from the path '{}'",
+            archive.display()
+        )
+    })?;
+    let header = compression::read_archive_header(archive_name, &mut archive_file)
+        .await
+        .context("Failed to read the archive header")?;
+    compression::uncompress_with_header(&BTreeSet::new(), &target_dir, header, &mut archive_file)
+        .await
+        .context("Failed to extract the archive")
+}
+
+async fn create_archive(source_dir: &Path, target_dir: &Path) -> anyhow::Result<()> {
+    let source_dir = source_dir.canonicalize().with_context(|| {
+        format!(
+            "Failed to get the absolute path for the source dir path '{}'",
+            source_dir.display()
+        )
+    })?;
+    ensure!(
+        source_dir.is_dir(),
+        "Path '{}' is not a directory",
+        source_dir.display()
+    );
+
+    if !target_dir.exists() {
+        fs::create_dir_all(target_dir).await.with_context(|| {
+            format!(
+                "Failed to create the target dir at path '{}'",
+                target_dir.display()
+            )
+        })?;
+    }
+    let target_dir = target_dir.canonicalize().with_context(|| {
+        format!(
+            "Failed to get the absolute path for the target dir path '{}'",
+            target_dir.display()
+        )
+    })?;
+    ensure!(
+        target_dir.is_dir(),
+        "Path '{}' is not a directory",
+        target_dir.display()
+    );
+
+    println!(
+        "Compressing directory '{}' and creating resulting archive in directory '{}'",
+        source_dir.display(),
+        target_dir.display()
+    );
+
+    let mut metadata_file_contents = None;
+    let mut files_co_archive = Vec::new();
+
+    let mut source_dir_contents = fs::read_dir(&source_dir)
+        .await
+        .context("Failed to read the source directory contents")?;
+
+    while let Some(source_dir_entry) = source_dir_contents
+        .next_entry()
+        .await
+        .context("Failed to read a source dir entry")?
+    {
+        let entry_path = source_dir_entry.path();
+        if entry_path.is_file() {
+            if entry_path.file_name().and_then(|name| name.to_str()) == Some(METADATA_FILE_NAME) {
+                let metadata_bytes = fs::read(entry_path)
+                    .await
+                    .context("Failed to read metata file bytes in the source dir")?;
+                metadata_file_contents = Some(
+                    TimelineMetadata::from_bytes(&metadata_bytes)
+                        .context("Failed to parse metata file contents in the source dir")?,
+                );
+            } else {
+                files_co_archive.push(entry_path);
+            }
+        }
+    }
+
+    let metadata = match metadata_file_contents {
+        Some(metadata) => metadata,
+        None => bail!(
+            "No metadata file found in the source dir '{}', cannot create the archive",
+            source_dir.display()
+        ),
+    };
+
+    let _ = compression::archive_files_as_stream(
+        &source_dir,
+        files_co_archive.iter(),
+        &metadata,
+        move |mut archive_streamer, archive_name| async move {
+            let archive_target = target_dir.join(&archive_name);
+            let mut archive_file = fs::File::create(&archive_target).await?;
+            io::copy(&mut archive_streamer, &mut archive_file).await?;
+            Ok(archive_target)
+        },
+    )
+    .await
+    .context("Failed to create an archive")?;
+
+    Ok(())
+}
--- a/pageserver/src/bin/psbench.rs
+++ b/pageserver/src/bin/psbench.rs
@@ -0,0 +1,158 @@
+//! Pageserver benchmark tool
+//!
+//! Usually it's easier to write python perf tests, but here the performance
+//! of the tester matters, and the API is easier to work with from rust.
+use std::{collections::HashSet, io::{BufRead, BufReader, Cursor}, time::Duration};
+use pageserver::wal_metadata::{Page, WalEntryMetadata};
+use tokio::io::{AsyncReadExt, AsyncWriteExt};
+use bytes::{BufMut, BytesMut};
+use clap::{App, Arg};
+use std::fs::File;
+use zenith_utils::{GIT_VERSION, lsn::Lsn, pq_proto::{BeMessage, FeMessage}};
+use std::time::Instant;
+
+use anyhow::Result;
+
+const BYTES_IN_PAGE: usize = 8 * 1024;
+
+pub fn read_lines_buffered(file_name: &str) -> impl Iterator<Item = String> {
+    BufReader::new(File::open(file_name).unwrap())
+        .lines()
+        .map(|result| result.unwrap())
+}
+
+pub async fn get_page(
+    pagestream: &mut tokio::net::TcpStream,
+    lsn: &Lsn,
+    page: &Page,
+    latest: bool,
+) -> anyhow::Result<Vec<u8>> {
+    let latest: u8 = if latest {1} else {0};
+    let msg = {
+        let query = {
+            let mut query = BytesMut::new();
+            query.put_u8(2);  // Specifies get_page query
+            query.put_u8(latest);
+            query.put_u64(lsn.0);
+            page.write(&mut query).await?;
+            query.freeze()
+        };
+
+        let mut buf = BytesMut::new();
+        let copy_msg = BeMessage::CopyData(&query);
+        BeMessage::write(&mut buf, &copy_msg)?;
+        buf.freeze()
+    };
+
+    pagestream.write(&msg).await?;
+
+    let response = match FeMessage::read_fut(pagestream).await? {
+        Some(FeMessage::CopyData(page)) => page,
+        r => panic!("Expected CopyData message, got: {:?}", r),
+    };
+
+    let page = {
+        let mut cursor = Cursor::new(response);
+        let tag = AsyncReadExt::read_u8(&mut cursor).await?;
+
+        match tag {
+            102 => {
+                let mut page = Vec::<u8>::new();
+                cursor.read_to_end(&mut page).await?;
+                if page.len() != BYTES_IN_PAGE {
+                    panic!("Expected 8kb page, got: {:?}", page.len());
+                }
+                page
+            },
+            103 => {
+                let mut bytes = Vec::<u8>::new();
+                cursor.read_to_end(&mut bytes).await?;
+                let message = String::from_utf8(bytes)?;
+                panic!("Got error message: {}", message);
+            },
+            _ => panic!("Unhandled tag {:?}", tag)
+        }
+    };
+
+    Ok(page)
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let arg_matches = App::new("LALALA")
+        .about("lalala")
+        .version(GIT_VERSION)
+        .arg(
+            Arg::new("wal_metadata_file")
+                .help("Path to wal metadata file")
+                .required(true)
+                .index(1),
+        )
+        .arg(
+            Arg::new("tenant_hex")
+                .help("TODO")
+                .required(true)
+                .index(2),
+        )
+        .arg(
+            Arg::new("timeline")
+                .help("TODO")
+                .required(true)
+                .index(3),
+        )
+        .get_matches();
+
+    let metadata_file = arg_matches.value_of("wal_metadata_file").unwrap();
+    let tenant_hex = arg_matches.value_of("tenant_hex").unwrap();
+    let timeline = arg_matches.value_of("timeline").unwrap();
+
+    // Parse log lines
+    let wal_metadata: Vec<WalEntryMetadata> = read_lines_buffered(metadata_file)
+        .map(|line| serde_json::from_str(&line).expect("corrupt metadata file"))
+        .collect();
+
+    // Get raw TCP connection to the pageserver postgres protocol port
+    let mut socket = tokio::net::TcpStream::connect("localhost:15000").await?;
+    let (client, conn) = tokio_postgres::Config::new()
+        .host("127.0.0.1")
+        .port(15000)
+        .dbname("postgres")
+        .user("zenith_admin")
+        .connect_raw(&mut socket, tokio_postgres::NoTls)
+        .await?;
+
+    // Enter pagestream protocol
+    let init_query = format!("pagestream {} {}", tenant_hex, timeline);
+    tokio::select! {
+        _ = conn => panic!("AAAA"),
+        _ = client.query(init_query.as_str(), &[]) => (),
+    };
+
+    // Derive some variables
+    let total_wal_size: usize = wal_metadata.iter().map(|m| m.size).sum();
+    let affected_pages: HashSet<_> = wal_metadata.iter().map(|m| m.affected_pages.clone())
+        .flatten().collect();
+    let latest_lsn = wal_metadata.iter().map(|m| m.lsn).max().unwrap();
+
+    // Get all latest pages
+    let mut durations: Vec<Duration> = vec![];
+    for page in &affected_pages {
+        let start = Instant::now();
+        let _page_bytes = get_page(&mut socket, &latest_lsn, &page, true).await?;
+        let duration = start.elapsed();
+
+        durations.push(duration);
+    }
+
+    durations.sort();
+    // Results are a space separated table of "metric_name value unit", for ease of parsing
+    println!("test_param num_pages {}", affected_pages.len());
+    println!("test_param num_wal_entries {}", wal_metadata.len());
+    println!("test_param total_wal_size {} bytes", total_wal_size);
+    println!("lower_is_better fastest {:?} microseconds", durations.first().unwrap().as_micros());
+    println!("lower_is_better median {:?} microseconds", durations[durations.len() / 2].as_micros());
+    println!("lower_is_better p99 {:?} microseconds", durations[durations.len() - 1 - durations.len() / 100].as_micros());
+    println!("lower_is_better slowest {:?} microseconds", durations.last().unwrap().as_micros());
+
+    Ok(())
+}
--- a/pageserver/src/bin/update_metadata.rs
+++ b/pageserver/src/bin/update_metadata.rs
@@ -6,9 +6,8 @@ use clap::{App, Arg};
 use pageserver::layered_repository::metadata::TimelineMetadata;
 use std::path::PathBuf;
 use std::str::FromStr;
-use utils::{lsn::Lsn, project_git_version};
-
-project_git_version!(GIT_VERSION);
+use zenith_utils::lsn::Lsn;
+use zenith_utils::GIT_VERSION;

 fn main() -> Result<()> {
    let arg_matches = App::new("Zenith update metadata utility")
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -4,26 +4,22 @@
 //! file, or on the command line.
 //! See also `settings.md` for better description on every parameter.

-use anyhow::{anyhow, bail, ensure, Context, Result};
-use remote_storage::{RemoteStorageConfig, RemoteStorageKind, S3Config};
+use anyhow::{bail, ensure, Context, Result};
+use toml_edit;
+use toml_edit::{Document, Item};
+use zenith_utils::postgres_backend::AuthType;
+use zenith_utils::zid::{ZNodeId, ZTenantId, ZTimelineId};
+
+use std::convert::TryInto;
 use std::env;
 use std::num::{NonZeroU32, NonZeroUsize};
 use std::path::{Path, PathBuf};
 use std::str::FromStr;
 use std::time::Duration;
-use toml_edit;
-use toml_edit::{Document, Item};
-use url::Url;
-use utils::{
-    postgres_backend::AuthType,
-    zid::{NodeId, ZTenantId, ZTimelineId},
-};

 use crate::layered_repository::TIMELINES_SEGMENT_NAME;
-use crate::tenant_config::{TenantConf, TenantConfOpt};

 pub mod defaults {
-    use crate::tenant_config::defaults::*;
    use const_format::formatcp;

    pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
@@ -31,10 +27,27 @@ pub mod defaults {
    pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
    pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");

+    // FIXME: This current value is very low. I would imagine something like 1 GB or 10 GB
+    // would be more appropriate. But a low value forces the code to be exercised more,
+    // which is good for now to trigger bugs.
+    // This parameter actually determines L0 layer file size.
+    pub const DEFAULT_CHECKPOINT_DISTANCE: u64 = 256 * 1024 * 1024;
+
+    // Target file size, when creating image and delta layers.
+    // This parameter determines L1 layer file size.
+    pub const DEFAULT_COMPACTION_TARGET_SIZE: u64 = 128 * 1024 * 1024;
+
+    pub const DEFAULT_COMPACTION_PERIOD: &str = "1 s";
+
+    pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;
+    pub const DEFAULT_GC_PERIOD: &str = "100 s";
+
    pub const DEFAULT_WAIT_LSN_TIMEOUT: &str = "60 s";
    pub const DEFAULT_WAL_REDO_TIMEOUT: &str = "60 s";

    pub const DEFAULT_SUPERUSER: &str = "zenith_admin";
+    pub const DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNC: usize = 10;
+    pub const DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS: u32 = 10;

    pub const DEFAULT_PAGE_CACHE_SIZE: usize = 8192;
    pub const DEFAULT_MAX_FILE_DESCRIPTORS: usize = 100;
@@ -49,6 +62,13 @@ pub mod defaults {
 #listen_pg_addr = '{DEFAULT_PG_LISTEN_ADDR}'
 #listen_http_addr = '{DEFAULT_HTTP_LISTEN_ADDR}'

+#checkpoint_distance = {DEFAULT_CHECKPOINT_DISTANCE} # in bytes
+#compaction_target_size = {DEFAULT_COMPACTION_TARGET_SIZE} # in bytes
+#compaction_period = '{DEFAULT_COMPACTION_PERIOD}'
+
+#gc_period = '{DEFAULT_GC_PERIOD}'
+#gc_horizon = {DEFAULT_GC_HORIZON}
+
 #wait_lsn_timeout = '{DEFAULT_WAIT_LSN_TIMEOUT}'
 #wal_redo_timeout = '{DEFAULT_WAL_REDO_TIMEOUT}'

@@ -57,17 +77,6 @@ pub mod defaults {
 # initial superuser role name to use when creating a new tenant
 #initial_superuser_name = '{DEFAULT_SUPERUSER}'

-# [tenant_config]
-#checkpoint_distance = {DEFAULT_CHECKPOINT_DISTANCE} # in bytes
-#compaction_target_size = {DEFAULT_COMPACTION_TARGET_SIZE} # in bytes
-#compaction_period = '{DEFAULT_COMPACTION_PERIOD}'
-#compaction_threshold = '{DEFAULT_COMPACTION_THRESHOLD}'
-
-#gc_period = '{DEFAULT_GC_PERIOD}'
-#gc_horizon = {DEFAULT_GC_HORIZON}
-#image_creation_threshold = {DEFAULT_IMAGE_CREATION_THRESHOLD}
-#pitr_interval = '{DEFAULT_PITR_INTERVAL}'
-
 # [remote_storage]

 "###
@@ -78,13 +87,29 @@ pub mod defaults {
 pub struct PageServerConf {
    // Identifier of that particular pageserver so e g safekeepers
    // can safely distinguish different pageservers
-    pub id: NodeId,
+    pub id: ZNodeId,

    /// Example (default): 127.0.0.1:64000
    pub listen_pg_addr: String,
    /// Example (default): 127.0.0.1:9898
    pub listen_http_addr: String,

+    // Flush out an inmemory layer, if it's holding WAL older than this
+    // This puts a backstop on how much WAL needs to be re-digested if the
+    // page server crashes.
+    // This parameter actually determines L0 layer file size.
+    pub checkpoint_distance: u64,
+
+    // Target file size, when creating image and delta layers.
+    // This parameter determines L1 layer file size.
+    pub compaction_target_size: u64,
+
+    // How often to check if there's compaction work to be done.
+    pub compaction_period: Duration,
+
+    pub gc_horizon: u64,
+    pub gc_period: Duration,
+
    // Timeout when waiting for WAL receiver to catch up to an LSN given in a GetPage@LSN call.
    pub wait_lsn_timeout: Duration,
    // How long to wait for WAL redo to complete.
@@ -109,35 +134,6 @@ pub struct PageServerConf {

    pub auth_validation_public_key_path: Option<PathBuf>,
    pub remote_storage_config: Option<RemoteStorageConfig>,
-
-    pub profiling: ProfilingConfig,
-    pub default_tenant_conf: TenantConf,
-
-    /// A prefix to add in etcd brokers before every key.
-    /// Can be used for isolating different pageserver groups withing the same etcd cluster.
-    pub broker_etcd_prefix: String,
-
-    /// Etcd broker endpoints to connect to.
-    pub broker_endpoints: Vec<Url>,
-}
-
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub enum ProfilingConfig {
-    Disabled,
-    PageRequests,
-}
-
-impl FromStr for ProfilingConfig {
-    type Err = anyhow::Error;
-
-    fn from_str(s: &str) -> Result<ProfilingConfig, Self::Err> {
-        let result = match s {
-            "disabled"  => ProfilingConfig::Disabled,
-            "page_requests"  => ProfilingConfig::PageRequests,
-            _ => bail!("invalid value \"{s}\" for profiling option, valid values are \"disabled\" and \"page_requests\""),
-        };
-        Ok(result)
-    }
 }

 // use dedicated enum for builder to better indicate the intention
@@ -162,6 +158,14 @@ struct PageServerConfigBuilder {

    listen_http_addr: BuilderValue<String>,

+    checkpoint_distance: BuilderValue<u64>,
+
+    compaction_target_size: BuilderValue<u64>,
+    compaction_period: BuilderValue<Duration>,
+
+    gc_horizon: BuilderValue<u64>,
+    gc_period: BuilderValue<Duration>,
+
    wait_lsn_timeout: BuilderValue<Duration>,
    wal_redo_timeout: BuilderValue<Duration>,

@@ -180,11 +184,7 @@ struct PageServerConfigBuilder {
    auth_validation_public_key_path: BuilderValue<Option<PathBuf>>,
    remote_storage_config: BuilderValue<Option<RemoteStorageConfig>>,

-    id: BuilderValue<NodeId>,
-
-    profiling: BuilderValue<ProfilingConfig>,
-    broker_etcd_prefix: BuilderValue<String>,
-    broker_endpoints: BuilderValue<Vec<Url>>,
+    id: BuilderValue<ZNodeId>,
 }

 impl Default for PageServerConfigBuilder {
@@ -194,6 +194,13 @@ impl Default for PageServerConfigBuilder {
        Self {
            listen_pg_addr: Set(DEFAULT_PG_LISTEN_ADDR.to_string()),
            listen_http_addr: Set(DEFAULT_HTTP_LISTEN_ADDR.to_string()),
+            checkpoint_distance: Set(DEFAULT_CHECKPOINT_DISTANCE),
+            compaction_target_size: Set(DEFAULT_COMPACTION_TARGET_SIZE),
+            compaction_period: Set(humantime::parse_duration(DEFAULT_COMPACTION_PERIOD)
+                .expect("cannot parse default compaction period")),
+            gc_horizon: Set(DEFAULT_GC_HORIZON),
+            gc_period: Set(humantime::parse_duration(DEFAULT_GC_PERIOD)
+                .expect("cannot parse default gc period")),
            wait_lsn_timeout: Set(humantime::parse_duration(DEFAULT_WAIT_LSN_TIMEOUT)
                .expect("cannot parse default wait lsn timeout")),
            wal_redo_timeout: Set(humantime::parse_duration(DEFAULT_WAL_REDO_TIMEOUT)
@@ -209,9 +216,6 @@ impl Default for PageServerConfigBuilder {
            auth_validation_public_key_path: Set(None),
            remote_storage_config: Set(None),
            id: NotSet,
-            profiling: Set(ProfilingConfig::Disabled),
-            broker_etcd_prefix: Set(etcd_broker::DEFAULT_NEON_BROKER_ETCD_PREFIX.to_string()),
-            broker_endpoints: Set(Vec::new()),
        }
    }
 }
@@ -225,6 +229,26 @@ impl PageServerConfigBuilder {
        self.listen_http_addr = BuilderValue::Set(listen_http_addr)
    }

+    pub fn checkpoint_distance(&mut self, checkpoint_distance: u64) {
+        self.checkpoint_distance = BuilderValue::Set(checkpoint_distance)
+    }
+
+    pub fn compaction_target_size(&mut self, compaction_target_size: u64) {
+        self.compaction_target_size = BuilderValue::Set(compaction_target_size)
+    }
+
+    pub fn compaction_period(&mut self, compaction_period: Duration) {
+        self.compaction_period = BuilderValue::Set(compaction_period)
+    }
+
+    pub fn gc_horizon(&mut self, gc_horizon: u64) {
+        self.gc_horizon = BuilderValue::Set(gc_horizon)
+    }
+
+    pub fn gc_period(&mut self, gc_period: Duration) {
+        self.gc_period = BuilderValue::Set(gc_period)
+    }
+
    pub fn wait_lsn_timeout(&mut self, wait_lsn_timeout: Duration) {
        self.wait_lsn_timeout = BuilderValue::Set(wait_lsn_timeout)
    }
@@ -268,70 +292,117 @@ impl PageServerConfigBuilder {
        self.remote_storage_config = BuilderValue::Set(remote_storage_config)
    }

-    pub fn broker_endpoints(&mut self, broker_endpoints: Vec<Url>) {
-        self.broker_endpoints = BuilderValue::Set(broker_endpoints)
-    }
-
-    pub fn broker_etcd_prefix(&mut self, broker_etcd_prefix: String) {
-        self.broker_etcd_prefix = BuilderValue::Set(broker_etcd_prefix)
-    }
-
-    pub fn id(&mut self, node_id: NodeId) {
+    pub fn id(&mut self, node_id: ZNodeId) {
        self.id = BuilderValue::Set(node_id)
    }

-    pub fn profiling(&mut self, profiling: ProfilingConfig) {
-        self.profiling = BuilderValue::Set(profiling)
-    }
-
-    pub fn build(self) -> anyhow::Result<PageServerConf> {
-        let broker_endpoints = self
-            .broker_endpoints
-            .ok_or(anyhow!("No broker endpoints provided"))?;
-
+    pub fn build(self) -> Result<PageServerConf> {
        Ok(PageServerConf {
            listen_pg_addr: self
                .listen_pg_addr
-                .ok_or(anyhow!("missing listen_pg_addr"))?,
+                .ok_or(anyhow::anyhow!("missing listen_pg_addr"))?,
            listen_http_addr: self
                .listen_http_addr
-                .ok_or(anyhow!("missing listen_http_addr"))?,
+                .ok_or(anyhow::anyhow!("missing listen_http_addr"))?,
+            checkpoint_distance: self
+                .checkpoint_distance
+                .ok_or(anyhow::anyhow!("missing checkpoint_distance"))?,
+            compaction_target_size: self
+                .compaction_target_size
+                .ok_or(anyhow::anyhow!("missing compaction_target_size"))?,
+            compaction_period: self
+                .compaction_period
+                .ok_or(anyhow::anyhow!("missing compaction_period"))?,
+            gc_horizon: self
+                .gc_horizon
+                .ok_or(anyhow::anyhow!("missing gc_horizon"))?,
+            gc_period: self.gc_period.ok_or(anyhow::anyhow!("missing gc_period"))?,
            wait_lsn_timeout: self
                .wait_lsn_timeout
-                .ok_or(anyhow!("missing wait_lsn_timeout"))?,
+                .ok_or(anyhow::anyhow!("missing wait_lsn_timeout"))?,
            wal_redo_timeout: self
                .wal_redo_timeout
-                .ok_or(anyhow!("missing wal_redo_timeout"))?,
-            superuser: self.superuser.ok_or(anyhow!("missing superuser"))?,
+                .ok_or(anyhow::anyhow!("missing wal_redo_timeout"))?,
+            superuser: self.superuser.ok_or(anyhow::anyhow!("missing superuser"))?,
            page_cache_size: self
                .page_cache_size
-                .ok_or(anyhow!("missing page_cache_size"))?,
+                .ok_or(anyhow::anyhow!("missing page_cache_size"))?,
            max_file_descriptors: self
                .max_file_descriptors
-                .ok_or(anyhow!("missing max_file_descriptors"))?,
-            workdir: self.workdir.ok_or(anyhow!("missing workdir"))?,
+                .ok_or(anyhow::anyhow!("missing max_file_descriptors"))?,
+            workdir: self.workdir.ok_or(anyhow::anyhow!("missing workdir"))?,
            pg_distrib_dir: self
                .pg_distrib_dir
-                .ok_or(anyhow!("missing pg_distrib_dir"))?,
-            auth_type: self.auth_type.ok_or(anyhow!("missing auth_type"))?,
+                .ok_or(anyhow::anyhow!("missing pg_distrib_dir"))?,
+            auth_type: self.auth_type.ok_or(anyhow::anyhow!("missing auth_type"))?,
            auth_validation_public_key_path: self
                .auth_validation_public_key_path
-                .ok_or(anyhow!("missing auth_validation_public_key_path"))?,
+                .ok_or(anyhow::anyhow!("missing auth_validation_public_key_path"))?,
            remote_storage_config: self
                .remote_storage_config
-                .ok_or(anyhow!("missing remote_storage_config"))?,
-            id: self.id.ok_or(anyhow!("missing id"))?,
-            profiling: self.profiling.ok_or(anyhow!("missing profiling"))?,
-            // TenantConf is handled separately
-            default_tenant_conf: TenantConf::default(),
-            broker_endpoints,
-            broker_etcd_prefix: self
-                .broker_etcd_prefix
-                .ok_or(anyhow!("missing broker_etcd_prefix"))?,
+                .ok_or(anyhow::anyhow!("missing remote_storage_config"))?,
+            id: self.id.ok_or(anyhow::anyhow!("missing id"))?,
        })
    }
 }

+/// External backup storage configuration, enough for creating a client for that storage.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct RemoteStorageConfig {
+    /// Max allowed number of concurrent sync operations between pageserver and the remote storage.
+    pub max_concurrent_sync: NonZeroUsize,
+    /// Max allowed errors before the sync task is considered failed and evicted.
+    pub max_sync_errors: NonZeroU32,
+    /// The storage connection configuration.
+    pub storage: RemoteStorageKind,
+}
+
+/// A kind of a remote storage to connect to, with its connection configuration.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum RemoteStorageKind {
+    /// Storage based on local file system.
+    /// Specify a root folder to place all stored files into.
+    LocalFs(PathBuf),
+    /// AWS S3 based storage, storing all files in the S3 bucket
+    /// specified by the config
+    AwsS3(S3Config),
+}
+
+/// AWS S3 bucket coordinates and access credentials to manage the bucket contents (read and write).
+#[derive(Clone, PartialEq, Eq)]
+pub struct S3Config {
+    /// Name of the bucket to connect to.
+    pub bucket_name: String,
+    /// The region where the bucket is located at.
+    pub bucket_region: String,
+    /// A "subfolder" in the bucket, to use the same bucket separately by multiple pageservers at once.
+    pub prefix_in_bucket: Option<String>,
+    /// "Login" to use when connecting to bucket.
+    /// Can be empty for cases like AWS k8s IAM
+    /// where we can allow certain pods to connect
+    /// to the bucket directly without any credentials.
+    pub access_key_id: Option<String>,
+    /// "Password" to use when connecting to bucket.
+    pub secret_access_key: Option<String>,
+    /// A base URL to send S3 requests to.
+    /// By default, the endpoint is derived from a region name, assuming it's
+    /// an AWS S3 region name, erroring on wrong region name.
+    /// Endpoint provides a way to support other S3 flavors and their regions.
+    ///
+    /// Example: `http://127.0.0.1:5000`
+    pub endpoint: Option<String>,
+}
+
+impl std::fmt::Debug for S3Config {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("S3Config")
+            .field("bucket_name", &self.bucket_name)
+            .field("bucket_region", &self.bucket_region)
+            .field("prefix_in_bucket", &self.prefix_in_bucket)
+            .finish()
+    }
+}
+
 impl PageServerConf {
    //
    // Repository paths, relative to workdir.
@@ -369,16 +440,21 @@ impl PageServerConf {
    /// validating the input and failing on errors.
    ///
    /// This leaves any options not present in the file in the built-in defaults.
-    pub fn parse_and_validate(toml: &Document, workdir: &Path) -> anyhow::Result<Self> {
+    pub fn parse_and_validate(toml: &Document, workdir: &Path) -> Result<Self> {
        let mut builder = PageServerConfigBuilder::default();
        builder.workdir(workdir.to_owned());

-        let mut t_conf: TenantConfOpt = Default::default();
-
        for (key, item) in toml.iter() {
            match key {
                "listen_pg_addr" => builder.listen_pg_addr(parse_toml_string(key, item)?),
                "listen_http_addr" => builder.listen_http_addr(parse_toml_string(key, item)?),
+                "checkpoint_distance" => builder.checkpoint_distance(parse_toml_u64(key, item)?),
+                "compaction_target_size" => {
+                    builder.compaction_target_size(parse_toml_u64(key, item)?)
+                }
+                "compaction_period" => builder.compaction_period(parse_toml_duration(key, item)?),
+                "gc_horizon" => builder.gc_horizon(parse_toml_u64(key, item)?),
+                "gc_period" => builder.gc_period(parse_toml_duration(key, item)?),
                "wait_lsn_timeout" => builder.wait_lsn_timeout(parse_toml_duration(key, item)?),
                "wal_redo_timeout" => builder.wal_redo_timeout(parse_toml_duration(key, item)?),
                "initial_superuser_name" => builder.superuser(parse_toml_string(key, item)?),
@@ -392,27 +468,12 @@ impl PageServerConf {
                "auth_validation_public_key_path" => builder.auth_validation_public_key_path(Some(
                    PathBuf::from(parse_toml_string(key, item)?),
                )),
-                "auth_type" => builder.auth_type(parse_toml_from_str(key, item)?),
+                "auth_type" => builder.auth_type(parse_toml_auth_type(key, item)?),
                "remote_storage" => {
                    builder.remote_storage_config(Some(Self::parse_remote_storage_config(item)?))
                }
-                "tenant_config" => {
-                    t_conf = Self::parse_toml_tenant_conf(item)?;
-                }
-                "id" => builder.id(NodeId(parse_toml_u64(key, item)?)),
-                "profiling" => builder.profiling(parse_toml_from_str(key, item)?),
-                "broker_etcd_prefix" => builder.broker_etcd_prefix(parse_toml_string(key, item)?),
-                "broker_endpoints" => builder.broker_endpoints(
-                    parse_toml_array(key, item)?
-                        .into_iter()
-                        .map(|endpoint_str| {
-                            endpoint_str.parse::<Url>().with_context(|| {
-                                format!("Array item {endpoint_str} for key {key} is not a valid url endpoint")
-                            })
-                        })
-                        .collect::<anyhow::Result<_>>()?,
-                ),
-                _ => bail!("unrecognized pageserver option '{key}'"),
+                "id" => builder.id(ZNodeId(parse_toml_u64(key, item)?)),
+                _ => bail!("unrecognized pageserver option '{}'", key),
            }
        }

@@ -438,75 +499,41 @@ impl PageServerConf {
            );
        }

-        conf.default_tenant_conf = t_conf.merge(TenantConf::default());
-
        Ok(conf)
    }

-    // subroutine of parse_and_validate to parse `[tenant_conf]` section
-
-    pub fn parse_toml_tenant_conf(item: &toml_edit::Item) -> Result<TenantConfOpt> {
-        let mut t_conf: TenantConfOpt = Default::default();
-        if let Some(checkpoint_distance) = item.get("checkpoint_distance") {
-            t_conf.checkpoint_distance =
-                Some(parse_toml_u64("checkpoint_distance", checkpoint_distance)?);
-        }
-
-        if let Some(compaction_target_size) = item.get("compaction_target_size") {
-            t_conf.compaction_target_size = Some(parse_toml_u64(
-                "compaction_target_size",
-                compaction_target_size,
-            )?);
-        }
-
-        if let Some(compaction_period) = item.get("compaction_period") {
-            t_conf.compaction_period =
-                Some(parse_toml_duration("compaction_period", compaction_period)?);
-        }
-
-        if let Some(compaction_threshold) = item.get("compaction_threshold") {
-            t_conf.compaction_threshold =
-                Some(parse_toml_u64("compaction_threshold", compaction_threshold)?.try_into()?);
-        }
-
-        if let Some(gc_horizon) = item.get("gc_horizon") {
-            t_conf.gc_horizon = Some(parse_toml_u64("gc_horizon", gc_horizon)?);
-        }
-
-        if let Some(gc_period) = item.get("gc_period") {
-            t_conf.gc_period = Some(parse_toml_duration("gc_period", gc_period)?);
-        }
-
-        if let Some(pitr_interval) = item.get("pitr_interval") {
-            t_conf.pitr_interval = Some(parse_toml_duration("pitr_interval", pitr_interval)?);
-        }
-
-        Ok(t_conf)
-    }
-
    /// subroutine of parse_config(), to parse the `[remote_storage]` table.
    fn parse_remote_storage_config(toml: &toml_edit::Item) -> anyhow::Result<RemoteStorageConfig> {
        let local_path = toml.get("local_path");
        let bucket_name = toml.get("bucket_name");
        let bucket_region = toml.get("bucket_region");

-        let max_concurrent_syncs = NonZeroUsize::new(
-            parse_optional_integer("max_concurrent_syncs", toml)?
-                .unwrap_or(remote_storage::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNCS),
-        )
-        .context("Failed to parse 'max_concurrent_syncs' as a positive integer")?;
-
-        let max_sync_errors = NonZeroU32::new(
-            parse_optional_integer("max_sync_errors", toml)?
-                .unwrap_or(remote_storage::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS),
-        )
-        .context("Failed to parse 'max_sync_errors' as a positive integer")?;
-
-        let concurrency_limit = NonZeroUsize::new(
-            parse_optional_integer("concurrency_limit", toml)?
-                .unwrap_or(remote_storage::DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT),
-        )
-        .context("Failed to parse 'concurrency_limit' as a positive integer")?;
+        let max_concurrent_sync: NonZeroUsize = if let Some(s) = toml.get("max_concurrent_sync") {
+            parse_toml_u64("max_concurrent_sync", s)
+                .and_then(|toml_u64| {
+                    toml_u64.try_into().with_context(|| {
+                        format!("'max_concurrent_sync' value {} is too large", toml_u64)
+                    })
+                })
+                .ok()
+                .and_then(NonZeroUsize::new)
+                .context("'max_concurrent_sync' must be a non-zero positive integer")?
+        } else {
+            NonZeroUsize::new(defaults::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNC).unwrap()
+        };
+        let max_sync_errors: NonZeroU32 = if let Some(s) = toml.get("max_sync_errors") {
+            parse_toml_u64("max_sync_errors", s)
+                .and_then(|toml_u64| {
+                    toml_u64.try_into().with_context(|| {
+                        format!("'max_sync_errors' value {} is too large", toml_u64)
+                    })
+                })
+                .ok()
+                .and_then(NonZeroU32::new)
+                .context("'max_sync_errors' must be a non-zero positive integer")?
+        } else {
+            NonZeroU32::new(defaults::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS).unwrap()
+        };

        let storage = match (local_path, bucket_name, bucket_region) {
            (None, None, None) => bail!("no 'local_path' nor 'bucket_name' option"),
@@ -519,6 +546,16 @@ impl PageServerConf {
            (None, Some(bucket_name), Some(bucket_region)) => RemoteStorageKind::AwsS3(S3Config {
                bucket_name: parse_toml_string("bucket_name", bucket_name)?,
                bucket_region: parse_toml_string("bucket_region", bucket_region)?,
+                access_key_id: toml
+                    .get("access_key_id")
+                    .map(|access_key_id| parse_toml_string("access_key_id", access_key_id))
+                    .transpose()?,
+                secret_access_key: toml
+                    .get("secret_access_key")
+                    .map(|secret_access_key| {
+                        parse_toml_string("secret_access_key", secret_access_key)
+                    })
+                    .transpose()?,
                prefix_in_bucket: toml
                    .get("prefix_in_bucket")
                    .map(|prefix_in_bucket| parse_toml_string("prefix_in_bucket", prefix_in_bucket))
@@ -527,7 +564,6 @@ impl PageServerConf {
                    .get("endpoint")
                    .map(|endpoint| parse_toml_string("endpoint", endpoint))
                    .transpose()?,
-                concurrency_limit,
            }),
            (Some(local_path), None, None) => RemoteStorageKind::LocalFs(PathBuf::from(
                parse_toml_string("local_path", local_path)?,
@@ -536,7 +572,7 @@ impl PageServerConf {
        };

        Ok(RemoteStorageConfig {
-            max_concurrent_syncs,
+            max_concurrent_sync,
            max_sync_errors,
            storage,
        })
@@ -544,13 +580,18 @@ impl PageServerConf {

    #[cfg(test)]
    pub fn test_repo_dir(test_name: &str) -> PathBuf {
-        PathBuf::from(format!("../tmp_check/test_{test_name}"))
+        PathBuf::from(format!("../tmp_check/test_{}", test_name))
    }

    #[cfg(test)]
    pub fn dummy_conf(repo_dir: PathBuf) -> Self {
        PageServerConf {
-            id: NodeId(0),
+            id: ZNodeId(0),
+            checkpoint_distance: defaults::DEFAULT_CHECKPOINT_DISTANCE,
+            compaction_target_size: 4 * 1024 * 1024,
+            compaction_period: Duration::from_secs(10),
+            gc_horizon: defaults::DEFAULT_GC_HORIZON,
+            gc_period: Duration::from_secs(10),
            wait_lsn_timeout: Duration::from_secs(60),
            wal_redo_timeout: Duration::from_secs(60),
            page_cache_size: defaults::DEFAULT_PAGE_CACHE_SIZE,
@@ -563,10 +604,6 @@ impl PageServerConf {
            auth_type: AuthType::Trust,
            auth_validation_public_key_path: None,
            remote_storage_config: None,
-            profiling: ProfilingConfig::Disabled,
-            default_tenant_conf: TenantConf::dummy_conf(),
-            broker_endpoints: Vec::new(),
-            broker_etcd_prefix: etcd_broker::DEFAULT_NEON_BROKER_ETCD_PREFIX.to_string(),
        }
    }
 }
@@ -576,7 +613,7 @@ impl PageServerConf {
 fn parse_toml_string(name: &str, item: &Item) -> Result<String> {
    let s = item
        .as_str()
-        .with_context(|| format!("configure option {name} is not a string"))?;
+        .with_context(|| format!("configure option {} is not a string", name))?;
    Ok(s.to_string())
 }

@@ -585,68 +622,26 @@ fn parse_toml_u64(name: &str, item: &Item) -> Result<u64> {
    // for our use, though.
    let i: i64 = item
        .as_integer()
-        .with_context(|| format!("configure option {name} is not an integer"))?;
+        .with_context(|| format!("configure option {} is not an integer", name))?;
    if i < 0 {
-        bail!("configure option {name} cannot be negative");
+        bail!("configure option {} cannot be negative", name);
    }
    Ok(i as u64)
 }

-fn parse_optional_integer<I, E>(name: &str, item: &toml_edit::Item) -> anyhow::Result<Option<I>>
-where
-    I: TryFrom<i64, Error = E>,
-    E: std::error::Error + Send + Sync + 'static,
-{
-    let toml_integer = match item.get(name) {
-        Some(item) => item
-            .as_integer()
-            .with_context(|| format!("configure option {name} is not an integer"))?,
-        None => return Ok(None),
-    };
-
-    I::try_from(toml_integer)
-        .map(Some)
-        .with_context(|| format!("configure option {name} is too large"))
-}
-
 fn parse_toml_duration(name: &str, item: &Item) -> Result<Duration> {
    let s = item
        .as_str()
-        .with_context(|| format!("configure option {name} is not a string"))?;
+        .with_context(|| format!("configure option {} is not a string", name))?;

    Ok(humantime::parse_duration(s)?)
 }

-fn parse_toml_from_str<T>(name: &str, item: &Item) -> anyhow::Result<T>
-where
-    T: FromStr,
-    <T as FromStr>::Err: std::fmt::Display,
-{
+fn parse_toml_auth_type(name: &str, item: &Item) -> Result<AuthType> {
    let v = item
        .as_str()
-        .with_context(|| format!("configure option {name} is not a string"))?;
-    T::from_str(v).map_err(|e| {
-        anyhow!(
-            "Failed to parse string as {parse_type} for configure option {name}: {e}",
-            parse_type = stringify!(T)
-        )
-    })
-}
-
-fn parse_toml_array(name: &str, item: &Item) -> anyhow::Result<Vec<String>> {
-    let array = item
-        .as_array()
-        .with_context(|| format!("configure option {name} is not an array"))?;
-
-    array
-        .iter()
-        .map(|value| {
-            value
-                .as_str()
-                .map(str::to_string)
-                .with_context(|| format!("Array item {value:?} for key {name} is not a string"))
-        })
-        .collect()
+        .with_context(|| format!("configure option {} is not a string", name))?;
+    AuthType::from_str(v)
 }

 #[cfg(test)]
@@ -663,6 +658,14 @@ mod tests {
 listen_pg_addr = '127.0.0.1:64000'
 listen_http_addr = '127.0.0.1:9898'

+checkpoint_distance = 111 # in bytes
+
+compaction_target_size = 111 # in bytes
+compaction_period = '111 s'
+
+gc_period = '222 s'
+gc_horizon = 222
+
 wait_lsn_timeout = '111 s'
 wal_redo_timeout = '111 s'

@@ -679,23 +682,26 @@ id = 10
    fn parse_defaults() -> anyhow::Result<()> {
        let tempdir = tempdir()?;
        let (workdir, pg_distrib_dir) = prepare_fs(&tempdir)?;
-        let broker_endpoint = "http://127.0.0.1:7777";
-        // we have to create dummy values to overcome the validation errors
-        let config_string = format!(
-            "pg_distrib_dir='{}'\nid=10\nbroker_endpoints = ['{broker_endpoint}']",
-            pg_distrib_dir.display()
-        );
+        // we have to create dummy pathes to overcome the validation errors
+        let config_string = format!("pg_distrib_dir='{}'\nid=10", pg_distrib_dir.display());
        let toml = config_string.parse()?;

-        let parsed_config = PageServerConf::parse_and_validate(&toml, &workdir)
-            .unwrap_or_else(|e| panic!("Failed to parse config '{config_string}', reason: {e:?}"));
+        let parsed_config =
+            PageServerConf::parse_and_validate(&toml, &workdir).unwrap_or_else(|e| {
+                panic!("Failed to parse config '{}', reason: {}", config_string, e)
+            });

        assert_eq!(
            parsed_config,
            PageServerConf {
-                id: NodeId(10),
+                id: ZNodeId(10),
                listen_pg_addr: defaults::DEFAULT_PG_LISTEN_ADDR.to_string(),
                listen_http_addr: defaults::DEFAULT_HTTP_LISTEN_ADDR.to_string(),
+                checkpoint_distance: defaults::DEFAULT_CHECKPOINT_DISTANCE,
+                compaction_target_size: defaults::DEFAULT_COMPACTION_TARGET_SIZE,
+                compaction_period: humantime::parse_duration(defaults::DEFAULT_COMPACTION_PERIOD)?,
+                gc_horizon: defaults::DEFAULT_GC_HORIZON,
+                gc_period: humantime::parse_duration(defaults::DEFAULT_GC_PERIOD)?,
                wait_lsn_timeout: humantime::parse_duration(defaults::DEFAULT_WAIT_LSN_TIMEOUT)?,
                wal_redo_timeout: humantime::parse_duration(defaults::DEFAULT_WAL_REDO_TIMEOUT)?,
                superuser: defaults::DEFAULT_SUPERUSER.to_string(),
@@ -706,12 +712,6 @@ id = 10
                auth_type: AuthType::Trust,
                auth_validation_public_key_path: None,
                remote_storage_config: None,
-                profiling: ProfilingConfig::Disabled,
-                default_tenant_conf: TenantConf::default(),
-                broker_endpoints: vec![broker_endpoint
-                    .parse()
-                    .expect("Failed to parse a valid broker endpoint URL")],
-                broker_etcd_prefix: etcd_broker::DEFAULT_NEON_BROKER_ETCD_PREFIX.to_string(),
            },
            "Correct defaults should be used when no config values are provided"
        );
@@ -723,23 +723,30 @@ id = 10
    fn parse_basic_config() -> anyhow::Result<()> {
        let tempdir = tempdir()?;
        let (workdir, pg_distrib_dir) = prepare_fs(&tempdir)?;
-        let broker_endpoint = "http://127.0.0.1:7777";

        let config_string = format!(
-            "{ALL_BASE_VALUES_TOML}pg_distrib_dir='{}'\nbroker_endpoints = ['{broker_endpoint}']",
+            "{}pg_distrib_dir='{}'",
+            ALL_BASE_VALUES_TOML,
            pg_distrib_dir.display()
        );
        let toml = config_string.parse()?;

-        let parsed_config = PageServerConf::parse_and_validate(&toml, &workdir)
-            .unwrap_or_else(|e| panic!("Failed to parse config '{config_string}', reason: {e:?}"));
+        let parsed_config =
+            PageServerConf::parse_and_validate(&toml, &workdir).unwrap_or_else(|e| {
+                panic!("Failed to parse config '{}', reason: {}", config_string, e)
+            });

        assert_eq!(
            parsed_config,
            PageServerConf {
-                id: NodeId(10),
+                id: ZNodeId(10),
                listen_pg_addr: "127.0.0.1:64000".to_string(),
                listen_http_addr: "127.0.0.1:9898".to_string(),
+                checkpoint_distance: 111,
+                compaction_target_size: 111,
+                compaction_period: Duration::from_secs(111),
+                gc_horizon: 222,
+                gc_period: Duration::from_secs(222),
                wait_lsn_timeout: Duration::from_secs(111),
                wal_redo_timeout: Duration::from_secs(111),
                superuser: "zzzz".to_string(),
@@ -750,12 +757,6 @@ id = 10
                auth_type: AuthType::Trust,
                auth_validation_public_key_path: None,
                remote_storage_config: None,
-                profiling: ProfilingConfig::Disabled,
-                default_tenant_conf: TenantConf::default(),
-                broker_endpoints: vec![broker_endpoint
-                    .parse()
-                    .expect("Failed to parse a valid broker endpoint URL")],
-                broker_etcd_prefix: etcd_broker::DEFAULT_NEON_BROKER_ETCD_PREFIX.to_string(),
            },
            "Should be able to parse all basic config values correctly"
        );
@@ -767,7 +768,6 @@ id = 10
    fn parse_remote_fs_storage_config() -> anyhow::Result<()> {
        let tempdir = tempdir()?;
        let (workdir, pg_distrib_dir) = prepare_fs(&tempdir)?;
-        let broker_endpoint = "http://127.0.0.1:7777";

        let local_storage_path = tempdir.path().join("local_remote_storage");

@@ -785,36 +785,37 @@ local_path = '{}'"#,

        for remote_storage_config_str in identical_toml_declarations {
            let config_string = format!(
-                r#"{ALL_BASE_VALUES_TOML}
+                r#"{}
 pg_distrib_dir='{}'
-broker_endpoints = ['{broker_endpoint}']

-{remote_storage_config_str}"#,
+{}"#,
+                ALL_BASE_VALUES_TOML,
                pg_distrib_dir.display(),
+                remote_storage_config_str,
            );

            let toml = config_string.parse()?;

            let parsed_remote_storage_config = PageServerConf::parse_and_validate(&toml, &workdir)
                .unwrap_or_else(|e| {
-                    panic!("Failed to parse config '{config_string}', reason: {e:?}")
+                    panic!("Failed to parse config '{}', reason: {}", config_string, e)
                })
                .remote_storage_config
                .expect("Should have remote storage config for the local FS");

            assert_eq!(
-                parsed_remote_storage_config,
-                RemoteStorageConfig {
-                    max_concurrent_syncs: NonZeroUsize::new(
-                        remote_storage::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNCS
-                    )
-                        .unwrap(),
-                    max_sync_errors: NonZeroU32::new(remote_storage::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS)
-                        .unwrap(),
-                    storage: RemoteStorageKind::LocalFs(local_storage_path.clone()),
-                },
-                "Remote storage config should correctly parse the local FS config and fill other storage defaults"
-            );
+            parsed_remote_storage_config,
+            RemoteStorageConfig {
+                max_concurrent_sync: NonZeroUsize::new(
+                    defaults::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNC
+                )
+                .unwrap(),
+                max_sync_errors: NonZeroU32::new(defaults::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS)
+                    .unwrap(),
+                storage: RemoteStorageKind::LocalFs(local_storage_path.clone()),
+            },
+            "Remote storage config should correctly parse the local FS config and fill other storage defaults"
+        );
        }
        Ok(())
    }
@@ -827,44 +828,47 @@ broker_endpoints = ['{broker_endpoint}']
        let bucket_name = "some-sample-bucket".to_string();
        let bucket_region = "eu-north-1".to_string();
        let prefix_in_bucket = "test_prefix".to_string();
+        let access_key_id = "SOMEKEYAAAAASADSAH*#".to_string();
+        let secret_access_key = "SOMEsEcReTsd292v".to_string();
        let endpoint = "http://localhost:5000".to_string();
-        let max_concurrent_syncs = NonZeroUsize::new(111).unwrap();
+        let max_concurrent_sync = NonZeroUsize::new(111).unwrap();
        let max_sync_errors = NonZeroU32::new(222).unwrap();
-        let s3_concurrency_limit = NonZeroUsize::new(333).unwrap();
-        let broker_endpoint = "http://127.0.0.1:7777";

        let identical_toml_declarations = &[
            format!(
                r#"[remote_storage]
-max_concurrent_syncs = {max_concurrent_syncs}
-max_sync_errors = {max_sync_errors}
-bucket_name = '{bucket_name}'
-bucket_region = '{bucket_region}'
-prefix_in_bucket = '{prefix_in_bucket}'
-endpoint = '{endpoint}'
-concurrency_limit = {s3_concurrency_limit}"#
+max_concurrent_sync = {}
+max_sync_errors = {}
+bucket_name = '{}'
+bucket_region = '{}'
+prefix_in_bucket = '{}'
+access_key_id = '{}'
+secret_access_key = '{}'
+endpoint = '{}'"#,
+                max_concurrent_sync, max_sync_errors, bucket_name, bucket_region, prefix_in_bucket, access_key_id, secret_access_key, endpoint
            ),
            format!(
-                "remote_storage={{max_concurrent_syncs={max_concurrent_syncs}, max_sync_errors={max_sync_errors}, bucket_name='{bucket_name}',\
-                bucket_region='{bucket_region}', prefix_in_bucket='{prefix_in_bucket}', endpoint='{endpoint}', concurrency_limit={s3_concurrency_limit}}}",
+                "remote_storage={{max_concurrent_sync={}, max_sync_errors={}, bucket_name='{}', bucket_region='{}', prefix_in_bucket='{}', access_key_id='{}', secret_access_key='{}', endpoint='{}'}}",
+                max_concurrent_sync, max_sync_errors, bucket_name, bucket_region, prefix_in_bucket, access_key_id, secret_access_key, endpoint
            ),
        ];

        for remote_storage_config_str in identical_toml_declarations {
            let config_string = format!(
-                r#"{ALL_BASE_VALUES_TOML}
+                r#"{}
 pg_distrib_dir='{}'
-broker_endpoints = ['{broker_endpoint}']

-{remote_storage_config_str}"#,
+{}"#,
+                ALL_BASE_VALUES_TOML,
                pg_distrib_dir.display(),
+                remote_storage_config_str,
            );

            let toml = config_string.parse()?;

            let parsed_remote_storage_config = PageServerConf::parse_and_validate(&toml, &workdir)
                .unwrap_or_else(|e| {
-                    panic!("Failed to parse config '{config_string}', reason: {e:?}")
+                    panic!("Failed to parse config '{}', reason: {}", config_string, e)
                })
                .remote_storage_config
                .expect("Should have remote storage config for S3");
@@ -872,14 +876,15 @@ broker_endpoints = ['{broker_endpoint}']
            assert_eq!(
                parsed_remote_storage_config,
                RemoteStorageConfig {
-                    max_concurrent_syncs,
+                    max_concurrent_sync,
                    max_sync_errors,
                    storage: RemoteStorageKind::AwsS3(S3Config {
                        bucket_name: bucket_name.clone(),
                        bucket_region: bucket_region.clone(),
+                        access_key_id: Some(access_key_id.clone()),
+                        secret_access_key: Some(secret_access_key.clone()),
                        prefix_in_bucket: Some(prefix_in_bucket.clone()),
-                        endpoint: Some(endpoint.clone()),
-                        concurrency_limit: s3_concurrency_limit,
+                        endpoint: Some(endpoint.clone())
                    }),
                },
                "Remote storage config should correctly parse the S3 config"
--- a/pageserver/src/http/models.rs
+++ b/pageserver/src/http/models.rs
@@ -1,8 +1,8 @@
 use serde::{Deserialize, Serialize};
 use serde_with::{serde_as, DisplayFromStr};
-use utils::{
+use zenith_utils::{
    lsn::Lsn,
-    zid::{NodeId, ZTenantId, ZTimelineId},
+    zid::{ZNodeId, ZTenantId, ZTimelineId},
 };

 #[serde_as]
@@ -20,19 +20,11 @@ pub struct TimelineCreateRequest {
 }

 #[serde_as]
-#[derive(Serialize, Deserialize, Default)]
+#[derive(Serialize, Deserialize)]
 pub struct TenantCreateRequest {
    #[serde(default)]
    #[serde_as(as = "Option<DisplayFromStr>")]
    pub new_tenant_id: Option<ZTenantId>,
-    pub checkpoint_distance: Option<u64>,
-    pub compaction_target_size: Option<u64>,
-    pub compaction_period: Option<String>,
-    pub compaction_threshold: Option<usize>,
-    pub gc_horizon: Option<u64>,
-    pub gc_period: Option<String>,
-    pub image_creation_threshold: Option<usize>,
-    pub pitr_interval: Option<String>,
 }

 #[serde_as]
@@ -42,46 +34,5 @@ pub struct TenantCreateResponse(#[serde_as(as = "DisplayFromStr")] pub ZTenantId

 #[derive(Serialize)]
 pub struct StatusResponse {
-    pub id: NodeId,
-}
-
-impl TenantCreateRequest {
-    pub fn new(new_tenant_id: Option<ZTenantId>) -> TenantCreateRequest {
-        TenantCreateRequest {
-            new_tenant_id,
-            ..Default::default()
-        }
-    }
-}
-
-#[serde_as]
-#[derive(Serialize, Deserialize)]
-pub struct TenantConfigRequest {
-    pub tenant_id: ZTenantId,
-    #[serde(default)]
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    pub checkpoint_distance: Option<u64>,
-    pub compaction_target_size: Option<u64>,
-    pub compaction_period: Option<String>,
-    pub compaction_threshold: Option<usize>,
-    pub gc_horizon: Option<u64>,
-    pub gc_period: Option<String>,
-    pub image_creation_threshold: Option<usize>,
-    pub pitr_interval: Option<String>,
-}
-
-impl TenantConfigRequest {
-    pub fn new(tenant_id: ZTenantId) -> TenantConfigRequest {
-        TenantConfigRequest {
-            tenant_id,
-            checkpoint_distance: None,
-            compaction_target_size: None,
-            compaction_period: None,
-            compaction_threshold: None,
-            gc_horizon: None,
-            gc_period: None,
-            image_creation_threshold: None,
-            pitr_interval: None,
-        }
-    }
+    pub id: ZNodeId,
 }
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -123,53 +123,6 @@ paths:
              schema:
                $ref: "#/components/schemas/Error"

-  /v1/tenant/{tenant_id}/timeline/{timeline_id}/wal_receiver:
-    parameters:
-      - name: tenant_id
-        in: path
-        required: true
-        schema:
-          type: string
-          format: hex
-      - name: timeline_id
-        in: path
-        required: true
-        schema:
-          type: string
-          format: hex
-    get:
-      description: Get wal receiver's data attached to the timeline
-      responses:
-        "200":
-          description: WalReceiverEntry
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/WalReceiverEntry"
-        "401":
-          description: Unauthorized Error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/UnauthorizedError"
-        "403":
-          description: Forbidden Error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/ForbiddenError"
-        "404":
-          description: Error when no wal receiver is running or found
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/NotFoundError"
-        "500":
-          description: Generic operation error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/Error"

  /v1/tenant/{tenant_id}/timeline/{timeline_id}/attach:
    parameters:
@@ -375,7 +328,11 @@ paths:
        content:
          application/json:
            schema:
-              $ref: "#/components/schemas/TenantCreateInfo"
+              type: object
+              properties:
+                new_tenant_id:
+                  type: string
+                  format: hex
      responses:
        "201":
          description: New tenant created successfully
@@ -414,48 +371,7 @@ paths:
            application/json:
              schema:
                $ref: "#/components/schemas/Error"
-  /v1/tenant/config:
-    put:
-      description: |
-        Update tenant's config.
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: "#/components/schemas/TenantConfigInfo"
-      responses:
-        "200":
-          description: OK
-          content:
-            application/json:
-              schema:
-                type: array
-                items:
-                  $ref: "#/components/schemas/TenantInfo"
-        "400":
-          description: Malformed tenant config request
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/Error"
-        "401":
-          description: Unauthorized Error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/UnauthorizedError"
-        "403":
-          description: Forbidden Error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/ForbiddenError"
-        "500":
-          description: Generic operation error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/Error"
+
 components:
  securitySchemes:
    JWT:
@@ -473,45 +389,6 @@ components:
          type: string
        state:
          type: string
-    TenantCreateInfo:
-      type: object
-      properties:
-        new_tenant_id:
-          type: string
-          format: hex
-        tenant_id:
-          type: string
-          format: hex
-        gc_period:
-          type: string
-        gc_horizon:
-          type: integer
-        pitr_interval:
-          type: string
-        checkpoint_distance:
-          type: integer
-        compaction_period:
-          type: string
-        compaction_threshold:
-          type: string
-    TenantConfigInfo:
-      type: object
-      properties:
-        tenant_id:
-          type: string
-          format: hex
-        gc_period:
-          type: string
-        gc_horizon:
-          type: integer
-        pitr_interval:
-          type: string
-        checkpoint_distance:
-          type: integer
-        compaction_period:
-          type: string
-        compaction_threshold:
-          type: string
    TimelineInfo:
      type: object
      required:
@@ -532,7 +409,6 @@ components:
      type: object
      required:
        - awaits_download
-        - remote_consistent_lsn
      properties:
        awaits_download:
          type: boolean
@@ -567,21 +443,6 @@ components:
          type: integer
        current_logical_size_non_incremental:
          type: integer
-    WalReceiverEntry:
-      type: object
-      required:
-        - thread_id
-        - wal_producer_connstr
-      properties:
-        thread_id:
-          type: integer
-        wal_producer_connstr:
-          type: string
-        last_received_msg_lsn:
-          type: string
-          format: hex
-        last_received_msg_ts:
-          type: integer

    Error:
      type: object
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -1,39 +1,36 @@
 use std::sync::Arc;

-use anyhow::{Context, Result};
+use anyhow::Result;
 use hyper::StatusCode;
 use hyper::{Body, Request, Response, Uri};
-use remote_storage::GenericRemoteStorage;
 use tracing::*;
+use zenith_utils::auth::JwtAuth;
+use zenith_utils::http::endpoint::attach_openapi_ui;
+use zenith_utils::http::endpoint::auth_middleware;
+use zenith_utils::http::endpoint::check_permission;
+use zenith_utils::http::error::ApiError;
+use zenith_utils::http::{
+    endpoint,
+    error::HttpErrorBody,
+    json::{json_request, json_response},
+    request::parse_request_param,
+};
+use zenith_utils::http::{RequestExt, RouterBuilder};
+use zenith_utils::zid::{ZTenantTimelineId, ZTimelineId};

 use super::models::{
-    StatusResponse, TenantConfigRequest, TenantCreateRequest, TenantCreateResponse,
-    TimelineCreateRequest,
+    StatusResponse, TenantCreateRequest, TenantCreateResponse, TimelineCreateRequest,
 };
+use crate::remote_storage::{schedule_timeline_download, RemoteIndex};
 use crate::repository::Repository;
-use crate::storage_sync;
-use crate::storage_sync::index::{RemoteIndex, RemoteTimeline};
-use crate::tenant_config::TenantConfOpt;
 use crate::timelines::{LocalTimelineInfo, RemoteTimelineInfo, TimelineInfo};
-use crate::{config::PageServerConf, tenant_mgr, timelines};
-use utils::{
-    auth::JwtAuth,
-    http::{
-        endpoint::{self, attach_openapi_ui, auth_middleware, check_permission},
-        error::{ApiError, HttpErrorBody},
-        json::{json_request, json_response},
-        request::parse_request_param,
-        RequestExt, RouterBuilder,
-    },
-    zid::{ZTenantId, ZTenantTimelineId, ZTimelineId},
-};
+use crate::{config::PageServerConf, tenant_mgr, timelines, ZTenantId};

 struct State {
    conf: &'static PageServerConf,
    auth: Option<Arc<JwtAuth>>,
    remote_index: RemoteIndex,
    allowlist_routes: Vec<Uri>,
-    remote_storage: Option<GenericRemoteStorage>,
 }

 impl State {
@@ -41,27 +38,17 @@ impl State {
        conf: &'static PageServerConf,
        auth: Option<Arc<JwtAuth>>,
        remote_index: RemoteIndex,
-    ) -> anyhow::Result<Self> {
+    ) -> Self {
        let allowlist_routes = ["/v1/status", "/v1/doc", "/swagger.yml"]
            .iter()
            .map(|v| v.parse().unwrap())
            .collect::<Vec<_>>();
-        // Note that this remote storage is created separately from the main one in the sync_loop.
-        // It's fine since it's stateless and some code duplication saves us from bloating the code around with generics.
-        let remote_storage = conf
-            .remote_storage_config
-            .as_ref()
-            .map(|storage_config| GenericRemoteStorage::new(conf.workdir.clone(), storage_config))
-            .transpose()
-            .context("Failed to init generic remote storage")?;
-
-        Ok(Self {
+        Self {
            conf,
            auth,
            allowlist_routes,
            remote_index,
-            remote_storage,
-        })
+        }
    }
 }

@@ -135,8 +122,8 @@ async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>,
                    timeline_id,
                })
                .map(|remote_entry| RemoteTimelineInfo {
-                    remote_consistent_lsn: remote_entry.metadata.disk_consistent_lsn(),
-                    awaits_download: remote_entry.awaits_download,
+                    remote_consistent_lsn: remote_entry.disk_consistent_lsn(),
+                    awaits_download: remote_entry.get_awaits_download(),
                }),
        })
    }
@@ -166,47 +153,43 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
    let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;
    let include_non_incremental_logical_size = get_include_non_incremental_logical_size(&request);

-    let (local_timeline_info, remote_timeline_info) = async {
-        // any error here will render local timeline as None
-        // XXX .in_current_span does not attach messages in spawn_blocking future to current future's span
-        let local_timeline_info = tokio::task::spawn_blocking(move || {
-            let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
-            let local_timeline = {
-                repo.get_timeline(timeline_id)
-                    .as_ref()
-                    .map(|timeline| {
-                        LocalTimelineInfo::from_repo_timeline(
-                            tenant_id,
-                            timeline_id,
-                            timeline,
-                            include_non_incremental_logical_size,
-                        )
-                    })
-                    .transpose()?
-            };
-            Ok::<_, anyhow::Error>(local_timeline)
-        })
-        .await
-        .ok()
-        .and_then(|r| r.ok())
-        .flatten();
+    let span = info_span!("timeline_detail_handler", tenant = %tenant_id, timeline = %timeline_id);

-        let remote_timeline_info = {
-            let remote_index_read = get_state(&request).remote_index.read().await;
-            remote_index_read
-                .timeline_entry(&ZTenantTimelineId {
-                    tenant_id,
-                    timeline_id,
-                })
-                .map(|remote_entry| RemoteTimelineInfo {
-                    remote_consistent_lsn: remote_entry.metadata.disk_consistent_lsn(),
-                    awaits_download: remote_entry.awaits_download,
+    let (local_timeline_info, span) = tokio::task::spawn_blocking(move || {
+        let entered = span.entered();
+        let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
+        let local_timeline = {
+            repo.get_timeline(timeline_id)
+                .as_ref()
+                .map(|timeline| {
+                    LocalTimelineInfo::from_repo_timeline(
+                        tenant_id,
+                        timeline_id,
+                        timeline,
+                        include_non_incremental_logical_size,
+                    )
                })
+                .transpose()?
        };
-        (local_timeline_info, remote_timeline_info)
-    }
-    .instrument(info_span!("timeline_detail_handler", tenant = %tenant_id, timeline = %timeline_id))
-    .await;
+        Ok::<_, anyhow::Error>((local_timeline, entered.exit()))
+    })
+    .await
+    .map_err(ApiError::from_err)??;
+
+    let remote_timeline_info = {
+        let remote_index_read = get_state(&request).remote_index.read().await;
+        remote_index_read
+            .timeline_entry(&ZTenantTimelineId {
+                tenant_id,
+                timeline_id,
+            })
+            .map(|remote_entry| RemoteTimelineInfo {
+                remote_consistent_lsn: remote_entry.disk_consistent_lsn(),
+                awaits_download: remote_entry.get_awaits_download(),
+            })
+    };
+
+    let _enter = span.entered();

    if local_timeline_info.is_none() && remote_timeline_info.is_none() {
        return Err(ApiError::NotFound(
@@ -224,129 +207,44 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
    json_response(StatusCode::OK, timeline_info)
 }

-async fn wal_receiver_get_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
-    check_permission(&request, Some(tenant_id))?;
-
-    let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;
-
-    let wal_receiver = tokio::task::spawn_blocking(move || {
-        let _enter =
-            info_span!("wal_receiver_get", tenant = %tenant_id, timeline = %timeline_id).entered();
-
-        crate::walreceiver::get_wal_receiver_entry(tenant_id, timeline_id)
-    })
-    .await
-    .map_err(ApiError::from_err)?
-    .ok_or_else(|| {
-        ApiError::NotFound(format!(
-            "WAL receiver not found for tenant {} and timeline {}",
-            tenant_id, timeline_id
-        ))
-    })?;
-
-    json_response(StatusCode::OK, wal_receiver)
-}
-
 async fn timeline_attach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
    let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
    check_permission(&request, Some(tenant_id))?;

    let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;
-    info!(
-        "Handling timeline {} attach for tenant: {}",
-        timeline_id, tenant_id,
-    );
+    let span = info_span!("timeline_attach_handler", tenant = %tenant_id, timeline = %timeline_id);

-    tokio::task::spawn_blocking(move || {
-        if tenant_mgr::get_local_timeline_with_load(tenant_id, timeline_id).is_ok() {
+    let span = tokio::task::spawn_blocking(move || {
+        let entered = span.entered();
+        if tenant_mgr::get_timeline_for_tenant_load(tenant_id, timeline_id).is_ok() {
            // TODO: maybe answer with 309 Not Modified here?
            anyhow::bail!("Timeline is already present locally")
        };
-        Ok(())
+        Ok(entered.exit())
    })
    .await
    .map_err(ApiError::from_err)??;

-    let sync_id = ZTenantTimelineId {
-        tenant_id,
-        timeline_id,
-    };
-    let state = get_state(&request);
-    let remote_index = &state.remote_index;
+    let mut remote_index_write = get_state(&request).remote_index.write().await;

-    let mut index_accessor = remote_index.write().await;
-    if let Some(remote_timeline) = index_accessor.timeline_entry_mut(&sync_id) {
-        if remote_timeline.awaits_download {
-            return Err(ApiError::Conflict(
-                "Timeline download is already in progress".to_string(),
-            ));
-        }
-
-        remote_timeline.awaits_download = true;
-        storage_sync::schedule_layer_download(tenant_id, timeline_id);
-        return json_response(StatusCode::ACCEPTED, ());
-    } else {
-        // no timeline in the index, release the lock to make the potentially lengthy download opetation
-        drop(index_accessor);
-    }
-
-    let new_timeline = match try_download_index_part_data(state, sync_id).await {
-        Ok(Some(mut new_timeline)) => {
-            tokio::fs::create_dir_all(state.conf.timeline_path(&timeline_id, &tenant_id))
-                .await
-                .context("Failed to create new timeline directory")?;
-            new_timeline.awaits_download = true;
-            new_timeline
-        }
-        Ok(None) => return Err(ApiError::NotFound("Unknown remote timeline".to_string())),
-        Err(e) => {
-            error!("Failed to retrieve remote timeline data: {:?}", e);
-            return Err(ApiError::NotFound(
-                "Failed to retrieve remote timeline".to_string(),
-            ));
-        }
-    };
-
-    let mut index_accessor = remote_index.write().await;
-    match index_accessor.timeline_entry_mut(&sync_id) {
-        Some(remote_timeline) => {
-            if remote_timeline.awaits_download {
-                return Err(ApiError::Conflict(
-                    "Timeline download is already in progress".to_string(),
-                ));
-            }
-            remote_timeline.awaits_download = true;
-        }
-        None => index_accessor.add_timeline_entry(sync_id, new_timeline),
-    }
-    storage_sync::schedule_layer_download(tenant_id, timeline_id);
-    json_response(StatusCode::ACCEPTED, ())
-}
-
-async fn try_download_index_part_data(
-    state: &State,
-    sync_id: ZTenantTimelineId,
-) -> anyhow::Result<Option<RemoteTimeline>> {
-    let index_part = match state.remote_storage.as_ref() {
-        Some(GenericRemoteStorage::Local(local_storage)) => {
-            storage_sync::download_index_part(state.conf, local_storage, sync_id).await
-        }
-        Some(GenericRemoteStorage::S3(s3_storage)) => {
-            storage_sync::download_index_part(state.conf, s3_storage, sync_id).await
-        }
-        None => return Ok(None),
-    }
-    .with_context(|| format!("Failed to download index part for timeline {sync_id}"))?;
-
-    let timeline_path = state
-        .conf
-        .timeline_path(&sync_id.timeline_id, &sync_id.tenant_id);
-    RemoteTimeline::from_index_part(&timeline_path, index_part)
-        .map(Some)
-        .with_context(|| {
-            format!("Failed to convert index part into remote timeline for timeline {sync_id}")
+    let _enter = span.entered(); // entered guard cannot live across awaits (non Send)
+    let index_entry = remote_index_write
+        .timeline_entry_mut(&ZTenantTimelineId {
+            tenant_id,
+            timeline_id,
        })
+        .ok_or_else(|| ApiError::NotFound("Unknown remote timeline".to_string()))?;
+
+    if index_entry.get_awaits_download() {
+        return Err(ApiError::Conflict(
+            "Timeline download is already in progress".to_string(),
+        ));
+    }
+
+    index_entry.set_awaits_download(true);
+    schedule_timeline_download(tenant_id, timeline_id);
+
+    json_response(StatusCode::ACCEPTED, ())
 }

 async fn timeline_detach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
@@ -359,8 +257,8 @@ async fn timeline_detach_handler(request: Request<Body>) -> Result<Response<Body
        let _enter =
            info_span!("timeline_detach_handler", tenant = %tenant_id, timeline = %timeline_id)
                .entered();
-        let state = get_state(&request);
-        tenant_mgr::detach_timeline(state.conf, tenant_id, timeline_id)
+        let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
+        repo.detach_timeline(timeline_id)
    })
    .await
    .map_err(ApiError::from_err)??;
@@ -377,7 +275,7 @@ async fn tenant_list_handler(request: Request<Body>) -> Result<Response<Body>, A
        crate::tenant_mgr::list_tenants()
    })
    .await
-    .map_err(ApiError::from_err)?;
+    .map_err(ApiError::from_err)??;

    json_response(StatusCode::OK, response_data)
 }
@@ -389,28 +287,6 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
    let request_data: TenantCreateRequest = json_request(&mut request).await?;
    let remote_index = get_state(&request).remote_index.clone();

-    let mut tenant_conf = TenantConfOpt::default();
-    if let Some(gc_period) = request_data.gc_period {
-        tenant_conf.gc_period =
-            Some(humantime::parse_duration(&gc_period).map_err(ApiError::from_err)?);
-    }
-    tenant_conf.gc_horizon = request_data.gc_horizon;
-    tenant_conf.image_creation_threshold = request_data.image_creation_threshold;
-
-    if let Some(pitr_interval) = request_data.pitr_interval {
-        tenant_conf.pitr_interval =
-            Some(humantime::parse_duration(&pitr_interval).map_err(ApiError::from_err)?);
-    }
-
-    tenant_conf.checkpoint_distance = request_data.checkpoint_distance;
-    tenant_conf.compaction_target_size = request_data.compaction_target_size;
-    tenant_conf.compaction_threshold = request_data.compaction_threshold;
-
-    if let Some(compaction_period) = request_data.compaction_period {
-        tenant_conf.compaction_period =
-            Some(humantime::parse_duration(&compaction_period).map_err(ApiError::from_err)?);
-    }
-
    let target_tenant_id = request_data
        .new_tenant_id
        .map(ZTenantId::from)
@@ -418,9 +294,8 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo

    let new_tenant_id = tokio::task::spawn_blocking(move || {
        let _enter = info_span!("tenant_create", tenant = ?target_tenant_id).entered();
-        let conf = get_config(&request);

-        tenant_mgr::create_tenant_repository(conf, tenant_conf, target_tenant_id, remote_index)
+        tenant_mgr::create_tenant_repository(get_config(&request), target_tenant_id, remote_index)
    })
    .await
    .map_err(ApiError::from_err)??;
@@ -431,45 +306,6 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
    })
 }

-async fn tenant_config_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let request_data: TenantConfigRequest = json_request(&mut request).await?;
-    let tenant_id = request_data.tenant_id;
-    // check for management permission
-    check_permission(&request, Some(tenant_id))?;
-
-    let mut tenant_conf: TenantConfOpt = Default::default();
-    if let Some(gc_period) = request_data.gc_period {
-        tenant_conf.gc_period =
-            Some(humantime::parse_duration(&gc_period).map_err(ApiError::from_err)?);
-    }
-    tenant_conf.gc_horizon = request_data.gc_horizon;
-    tenant_conf.image_creation_threshold = request_data.image_creation_threshold;
-
-    if let Some(pitr_interval) = request_data.pitr_interval {
-        tenant_conf.pitr_interval =
-            Some(humantime::parse_duration(&pitr_interval).map_err(ApiError::from_err)?);
-    }
-
-    tenant_conf.checkpoint_distance = request_data.checkpoint_distance;
-    tenant_conf.compaction_target_size = request_data.compaction_target_size;
-    tenant_conf.compaction_threshold = request_data.compaction_threshold;
-
-    if let Some(compaction_period) = request_data.compaction_period {
-        tenant_conf.compaction_period =
-            Some(humantime::parse_duration(&compaction_period).map_err(ApiError::from_err)?);
-    }
-
-    tokio::task::spawn_blocking(move || {
-        let _enter = info_span!("tenant_config", tenant = ?tenant_id).entered();
-
-        tenant_mgr::update_tenant_config(tenant_conf, tenant_id)
-    })
-    .await
-    .map_err(ApiError::from_err)??;
-
-    json_response(StatusCode::OK, ())
-}
-
 async fn handler_404(_: Request<Body>) -> Result<Response<Body>, ApiError> {
    json_response(
        StatusCode::NOT_FOUND,
@@ -481,7 +317,7 @@ pub fn make_router(
    conf: &'static PageServerConf,
    auth: Option<Arc<JwtAuth>>,
    remote_index: RemoteIndex,
-) -> anyhow::Result<RouterBuilder<hyper::Body, ApiError>> {
+) -> RouterBuilder<hyper::Body, ApiError> {
    let spec = include_bytes!("openapi_spec.yml");
    let mut router = attach_openapi_ui(endpoint::make_router(), spec, "/swagger.yml", "/v1/doc");
    if auth.is_some() {
@@ -495,24 +331,17 @@ pub fn make_router(
        }))
    }

-    Ok(router
-        .data(Arc::new(
-            State::new(conf, auth, remote_index).context("Failed to initialize router state")?,
-        ))
+    router
+        .data(Arc::new(State::new(conf, auth, remote_index)))
        .get("/v1/status", status_handler)
        .get("/v1/tenant", tenant_list_handler)
        .post("/v1/tenant", tenant_create_handler)
-        .put("/v1/tenant/config", tenant_config_handler)
        .get("/v1/tenant/:tenant_id/timeline", timeline_list_handler)
        .post("/v1/tenant/:tenant_id/timeline", timeline_create_handler)
        .get(
            "/v1/tenant/:tenant_id/timeline/:timeline_id",
            timeline_detail_handler,
        )
-        .get(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/wal_receiver",
-            wal_receiver_get_handler,
-        )
        .post(
            "/v1/tenant/:tenant_id/timeline/:timeline_id/attach",
            timeline_attach_handler,
@@ -521,5 +350,5 @@ pub fn make_router(
            "/v1/tenant/:tenant_id/timeline/:timeline_id/detach",
            timeline_detach_handler,
        )
-        .any(handler_404))
+        .any(handler_404)
 }
--- a/pageserver/src/import_datadir.rs
+++ b/pageserver/src/import_datadir.rs
@@ -20,7 +20,7 @@ use postgres_ffi::waldecoder::*;
 use postgres_ffi::xlog_utils::*;
 use postgres_ffi::{pg_constants, ControlFileData, DBState_DB_SHUTDOWNED};
 use postgres_ffi::{Oid, TransactionId};
-use utils::lsn::Lsn;
+use zenith_utils::lsn::Lsn;

 ///
 /// Import all relation data pages from local disk into the repository.
@@ -274,7 +274,7 @@ fn import_control_file<R: Repository>(

    // Extract the checkpoint record and import it separately.
    let pg_control = ControlFileData::decode(&buffer)?;
-    let checkpoint_bytes = pg_control.checkPointCopy.encode()?;
+    let checkpoint_bytes = pg_control.checkPointCopy.encode();
    modification.put_checkpoint(checkpoint_bytes)?;

    Ok(pg_control)
--- a/pageserver/src/layered_repository.rs
+++ b/pageserver/src/layered_repository.rs
--- a/pageserver/src/layered_repository/README.md
+++ b/pageserver/src/layered_repository/README.md
@@ -23,7 +23,6 @@ distribution depends on the workload: the updates could be totally random, or
 there could be a long stream of updates to a single relation when data is bulk
 loaded, for example, or something in between.

-```
 Cloud Storage                   Page Server                           Safekeeper
                        L1               L0             Memory            WAL

@@ -38,7 +37,6 @@ Cloud Storage                   Page Server                           Safekeeper
 +----+----+          +----+----+      |   |     |
 |EEEE|               |EEEE|EEEE|      +---+-----+
 +----+               +----+----+
-```

 In this illustration, WAL is received as a stream from the Safekeeper, from the
 right.  It is immediately captured by the page server and stored quickly in
@@ -49,7 +47,7 @@ the same page and relation close to each other.
 From the page server memory, whenever enough WAL has been accumulated, it is flushed
 to disk into a new L0 layer file, and the memory is released.

-When enough L0 files have been accumulated, they are merged together and sliced
+When enough L0 files have been accumulated, they are merged together rand sliced
 per key-space, producing a new set of files where each file contains a more
 narrow key range, but larger LSN range.

@@ -123,7 +121,7 @@ The files are called "layer files". Each layer file covers a range of keys, and
 a range of LSNs (or a single LSN, in case of image layers). You can think of it
 as a rectangle in the two-dimensional key-LSN space. The layer files for each
 timeline are stored in the timeline's subdirectory under
-`.zenith/tenants/<tenantid>/timelines`.
+.zenith/tenants/<tenantid>/timelines.

 There are two kind of layer files: images, and delta layers. An image file
 contains a snapshot of all keys at a particular LSN, whereas a delta file
@@ -132,11 +130,8 @@ range of LSN.

 image file:

-```
    000000067F000032BE0000400000000070B6-000000067F000032BE0000400000000080B6__00000000346BC568
              start key                          end key                           LSN
-```
-

 The first parts define the key range that the layer covers. See
 pgdatadir_mapping.rs for how the key space is used. The last part is the LSN.
@@ -145,10 +140,8 @@ delta file:

 Delta files are named similarly, but they cover a range of LSNs:

-```
    000000067F000032BE0000400000000020B6-000000067F000032BE0000400000000030B6__000000578C6B29-0000000057A50051
              start key                          end key                          start LSN     end LSN
-```

 A delta file contains all the key-values in the key-range that were updated in
 the LSN range. If a key has not been modified, there is no trace of it in the
@@ -158,9 +151,7 @@ delta layer.
 A delta layer file can cover a part of the overall key space, as in the previous
 example, or the whole key range like this:

-```
    000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000578C6B29-0000000057A50051
-```

 A file that covers the whole key range is called a L0 file (Level 0), while a
 file that covers only part of the key range is called a L1 file. The "level" of
@@ -177,9 +168,7 @@ version, and how branching and GC works is still valid.

 The full path of a delta file looks like this:

-```
    .zenith/tenants/941ddc8604413b88b3d208bddf90396c/timelines/4af489b06af8eed9e27a841775616962/rel_1663_13990_2609_0_10_000000000169C348_0000000001702000
-```

 For simplicity, the examples below use a simplified notation for the
 paths.  The tenant ID is left out, the timeline ID is replaced with
@@ -188,10 +177,8 @@ with a human-readable table name. The LSNs are also shorter. For
 example, a base image file at LSN 100 and a delta file between 100-200
 for 'orders' table on 'main' branch is represented like this:

-```
    main/orders_100
    main/orders_100_200
-```


 # Creating layer files
@@ -201,14 +188,12 @@ branch called 'main' and two tables, 'orders' and 'customers'. The end
 of WAL is currently at LSN 250. In this starting situation, you would
 have these files on disk:

-```
 	main/orders_100
 	main/orders_100_200
 	main/orders_200
 	main/customers_100
 	main/customers_100_200
 	main/customers_200
-```

 In addition to those files, the recent changes between LSN 200 and the
 end of WAL at 250 are kept in memory. If the page server crashes, the
@@ -239,7 +224,6 @@ If the customers table is modified later, a new file is created for it
 at the next checkpoint. The new file will cover the "gap" from the
 last layer file, so the LSN ranges are always contiguous:

-```
 	main/orders_100
 	main/orders_100_200
 	main/orders_200
@@ -252,7 +236,6 @@ last layer file, so the LSN ranges are always contiguous:
 	main/customers_200
 	main/customers_200_500
 	main/customers_500
-```

 ## Reading page versions

@@ -276,18 +259,15 @@ involves replaying any WAL records applicable to the page between LSNs

 Imagine that a child branch is created at LSN 250:

-```
            @250
    ----main--+-------------------------->
               \
                +---child-------------->
-```


 Then, the 'orders' table is updated differently on the 'main' and
 'child' branches. You now have this situation on disk:

-```
    main/orders_100
    main/orders_100_200
    main/orders_200
@@ -302,7 +282,6 @@ Then, the 'orders' table is updated differently on the 'main' and
    child/orders_300
    child/orders_300_400
    child/orders_400
-```

 Because the 'customers' table hasn't been modified on the child
 branch, there is no file for it there. If you request a page for it on
@@ -315,7 +294,6 @@ is linear, and the request's LSN identifies unambiguously which file
 you need to look at. For example, the history for the 'orders' table
 on the 'main' branch consists of these files:

-```
    main/orders_100
    main/orders_100_200
    main/orders_200
@@ -323,12 +301,10 @@ on the 'main' branch consists of these files:
    main/orders_300
    main/orders_300_400
    main/orders_400
-```

 And from the 'child' branch's point of view, it consists of these
 files:

-```
    main/orders_100
    main/orders_100_200
    main/orders_200
@@ -337,7 +313,6 @@ files:
    child/orders_300
    child/orders_300_400
    child/orders_400
-```

 The branch metadata includes the point where the child branch was
 created, LSN 250. If a page request comes with LSN 275, we read the
@@ -370,7 +345,6 @@ Let's look at the single branch scenario again. Imagine that the end
 of the branch is LSN 525, so that the GC horizon is currently at
 525-150 = 375

-```
 	main/orders_100
 	main/orders_100_200
 	main/orders_200
@@ -383,13 +357,11 @@ of the branch is LSN 525, so that the GC horizon is currently at
 	main/customers_100
 	main/customers_100_200
 	main/customers_200
-```

 We can remove the following files because the end LSNs of those files are
 older than GC horizon 375, and there are more recent layer files for the
 table:

-```
 	main/orders_100       DELETE
 	main/orders_100_200   DELETE
 	main/orders_200       DELETE
@@ -402,9 +374,8 @@ table:
 	main/customers_100      DELETE
 	main/customers_100_200  DELETE
 	main/customers_200      KEEP, NO NEWER VERSION
-```

-'main/customers_200' is old enough, but it cannot be
+'main/customers_100_200' is old enough, but it cannot be
 removed because there is no newer layer file for the table.

 Things get slightly more complicated with multiple branches. All of
@@ -413,7 +384,6 @@ retain older shapshot files that are still needed by child branches.
 For example, if child branch is created at LSN 150, and the 'customers'
 table is updated on the branch, you would have these files:

-```
 	main/orders_100        KEEP, NEEDED BY child BRANCH
 	main/orders_100_200    KEEP, NEEDED BY child BRANCH
 	main/orders_200        DELETE
@@ -428,7 +398,6 @@ table is updated on the branch, you would have these files:
 	main/customers_200       KEEP, NO NEWER VERSION
 	child/customers_150_300  DELETE
 	child/customers_300      KEEP, NO NEWER VERSION
-```

 In this situation, 'main/orders_100' and 'main/orders_100_200' cannot
 be removed, even though they are older than the GC horizon, because
@@ -438,7 +407,6 @@ and 'main/orders_200_300' can still be removed.
 If 'orders' is modified later on the 'child' branch, we will create a
 new base image and delta file for it on the child:

-```
 	main/orders_100
 	main/orders_100_200

@@ -451,7 +419,6 @@ new base image and delta file for it on the child:
 	child/customers_300
 	child/orders_150_400
 	child/orders_400
-```

 After this, the 'main/orders_100' and 'main/orders_100_200' file could
 be removed. It is no longer needed by the child branch, because there
@@ -467,7 +434,6 @@ Describe GC and checkpoint interval settings.
 In principle, each relation can be checkpointed separately, i.e. the
 LSN ranges of the files don't need to line up. So this would be legal:

-```
 	main/orders_100
 	main/orders_100_200
 	main/orders_200
@@ -480,7 +446,6 @@ LSN ranges of the files don't need to line up. So this would be legal:
 	main/customers_250
 	main/customers_250_500
 	main/customers_500
-```

 However, the code currently always checkpoints all relations together.
 So that situation doesn't arise in practice.
@@ -503,13 +468,11 @@ does that.  It could be useful, however, as a transient state when
 garbage collecting around branch points, or explicit recovery
 points. For example, if we start with this:

-```
 	main/orders_100
 	main/orders_100_200
 	main/orders_200
 	main/orders_200_300
 	main/orders_300
-```

 And there is a branch or explicit recovery point at LSN 150, we could
 replace 'main/orders_100_200' with 'main/orders_150' to keep a
--- a/pageserver/src/layered_repository/blob_io.rs
+++ b/pageserver/src/layered_repository/blob_io.rs
@@ -1,20 +1,12 @@
 //!
 //! Functions for reading and writing variable-sized "blobs".
 //!
-//! Each blob begins with a 1- or 4-byte length field, followed by the
-//! actual data. If the length is smaller than 128 bytes, the length
-//! is written as a one byte. If it's larger than that, the length
-//! is written as a four-byte integer, in big-endian, with the high
-//! bit set. This way, we can detect whether it's 1- or 4-byte header
-//! by peeking at the first byte.
-//!
-//! len <  128: 0XXXXXXX
-//! len >= 128: 1XXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX
+//! Each blob begins with a 4-byte length, followed by the actual data.
 //!
 use crate::layered_repository::block_io::{BlockCursor, BlockReader};
 use crate::page_cache::PAGE_SZ;
 use std::cmp::min;
-use std::io::{Error, ErrorKind};
+use std::io::Error;

 /// For reading
 pub trait BlobCursor {
@@ -48,30 +40,21 @@ where

        let mut buf = self.read_blk(blknum)?;

-        // peek at the first byte, to determine if it's a 1- or 4-byte length
-        let first_len_byte = buf[off];
-        let len: usize = if first_len_byte < 0x80 {
-            // 1-byte length header
-            off += 1;
-            first_len_byte as usize
+        // read length
+        let mut len_buf = [0u8; 4];
+        let thislen = PAGE_SZ - off;
+        if thislen < 4 {
+            // it is split across two pages
+            len_buf[..thislen].copy_from_slice(&buf[off..PAGE_SZ]);
+            blknum += 1;
+            buf = self.read_blk(blknum)?;
+            len_buf[thislen..].copy_from_slice(&buf[0..4 - thislen]);
+            off = 4 - thislen;
        } else {
-            // 4-byte length header
-            let mut len_buf = [0u8; 4];
-            let thislen = PAGE_SZ - off;
-            if thislen < 4 {
-                // it is split across two pages
-                len_buf[..thislen].copy_from_slice(&buf[off..PAGE_SZ]);
-                blknum += 1;
-                buf = self.read_blk(blknum)?;
-                len_buf[thislen..].copy_from_slice(&buf[0..4 - thislen]);
-                off = 4 - thislen;
-            } else {
-                len_buf.copy_from_slice(&buf[off..off + 4]);
-                off += 4;
-            }
-            len_buf[0] &= 0x7f;
-            u32::from_be_bytes(len_buf) as usize
-        };
+            len_buf.copy_from_slice(&buf[off..off + 4]);
+            off += 4;
+        }
+        let len = u32::from_ne_bytes(len_buf) as usize;

        dstbuf.clear();

@@ -147,27 +130,10 @@ where
 {
    fn write_blob(&mut self, srcbuf: &[u8]) -> Result<u64, Error> {
        let offset = self.offset;
-
-        if srcbuf.len() < 128 {
-            // Short blob. Write a 1-byte length header
-            let len_buf = srcbuf.len() as u8;
-            self.inner.write_all(&[len_buf])?;
-            self.offset += 1;
-        } else {
-            // Write a 4-byte length header
-            if srcbuf.len() > 0x7fff_ffff {
-                return Err(Error::new(
-                    ErrorKind::Other,
-                    format!("blob too large ({} bytes)", srcbuf.len()),
-                ));
-            }
-            let mut len_buf = ((srcbuf.len()) as u32).to_be_bytes();
-            len_buf[0] |= 0x80;
-            self.inner.write_all(&len_buf)?;
-            self.offset += 4;
-        }
+        self.inner
+            .write_all(&((srcbuf.len()) as u32).to_ne_bytes())?;
        self.inner.write_all(srcbuf)?;
-        self.offset += srcbuf.len() as u64;
+        self.offset += 4 + srcbuf.len() as u64;
        Ok(offset)
    }
 }
--- a/pageserver/src/layered_repository/block_io.rs
+++ b/pageserver/src/layered_repository/block_io.rs
@@ -198,6 +198,7 @@ impl BlockWriter for BlockBuf {
        assert!(buf.len() == PAGE_SZ);
        let blknum = self.blocks.len();
        self.blocks.push(buf);
+        tracing::info!("buffered block {}", blknum);
        Ok(blknum as u32)
    }
 }
--- a/pageserver/src/layered_repository/delta_layer.rs
+++ b/pageserver/src/layered_repository/delta_layer.rs
@@ -35,10 +35,14 @@ use crate::page_cache::{PageReadGuard, PAGE_SZ};
 use crate::repository::{Key, Value, KEY_SIZE};
 use crate::virtual_file::VirtualFile;
 use crate::walrecord;
+use crate::{ZTenantId, ZTimelineId};
 use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION};
 use anyhow::{bail, ensure, Context, Result};
-use rand::{distributions::Alphanumeric, Rng};
+use log::*;
 use serde::{Deserialize, Serialize};
+// avoid binding to Write (conflicts with std::io::Write)
+// while being able to use std::fmt::Write's methods
+use std::fmt::Write as _;
 use std::fs;
 use std::io::{BufWriter, Write};
 use std::io::{Seek, SeekFrom};
@@ -46,13 +50,9 @@ use std::ops::Range;
 use std::os::unix::fs::FileExt;
 use std::path::{Path, PathBuf};
 use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
-use tracing::*;

-use utils::{
-    bin_ser::BeSer,
-    lsn::Lsn,
-    zid::{ZTenantId, ZTimelineId},
-};
+use zenith_utils::bin_ser::BeSer;
+use zenith_utils::lsn::Lsn;

 ///
 /// Header stored in the beginning of the file
@@ -216,17 +216,12 @@ impl Layer for DeltaLayer {
        PathBuf::from(self.layer_name().to_string())
    }

-    fn local_path(&self) -> Option<PathBuf> {
-        Some(self.path())
-    }
-
    fn get_value_reconstruct_data(
        &self,
        key: Key,
        lsn_range: Range<Lsn>,
        reconstruct_state: &mut ValueReconstructState,
    ) -> anyhow::Result<ValueReconstructResult> {
-        ensure!(lsn_range.start >= self.lsn_range.start);
        let mut need_image = true;

        ensure!(self.key_range.contains(&key));
@@ -252,9 +247,6 @@ impl Layer for DeltaLayer {
                    return false;
                }
                let entry_lsn = DeltaKey::extract_lsn_from_buf(key);
-                if entry_lsn < lsn_range.start {
-                    return false;
-                }
                offsets.push((entry_lsn, blob_ref.pos()));

                !blob_ref.will_init()
@@ -263,18 +255,8 @@ impl Layer for DeltaLayer {
            // Ok, 'offsets' now contains the offsets of all the entries we need to read
            let mut cursor = file.block_cursor();
            for (entry_lsn, pos) in offsets {
-                let buf = cursor.read_blob(pos).with_context(|| {
-                    format!(
-                        "Failed to read blob from virtual file {}",
-                        file.file.path.display()
-                    )
-                })?;
-                let val = Value::des(&buf).with_context(|| {
-                    format!(
-                        "Failed to deserialize file blob from virtual file {}",
-                        file.file.path.display()
-                    )
-                })?;
+                let buf = cursor.read_blob(pos)?;
+                let val = Value::des(&buf)?;
                match val {
                    Value::Image(img) => {
                        reconstruct_state.img = Some((entry_lsn, img));
@@ -305,10 +287,7 @@ impl Layer for DeltaLayer {
    }

    fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = anyhow::Result<(Key, Lsn, Value)>> + 'a> {
-        let inner = match self.load() {
-            Ok(inner) => inner,
-            Err(e) => panic!("Failed to load a delta layer: {e:?}"),
-        };
+        let inner = self.load().unwrap();

        match DeltaValueIter::new(inner) {
            Ok(iter) => Box::new(iter),
@@ -363,28 +342,6 @@ impl Layer for DeltaLayer {
        tree_reader.dump()?;

        let mut cursor = file.block_cursor();
-
-        // A subroutine to dump a single blob
-        let mut dump_blob = |blob_ref: BlobRef| -> anyhow::Result<String> {
-            let buf = cursor.read_blob(blob_ref.pos())?;
-            let val = Value::des(&buf)?;
-            let desc = match val {
-                Value::Image(img) => {
-                    format!(" img {} bytes", img.len())
-                }
-                Value::WalRecord(rec) => {
-                    let wal_desc = walrecord::describe_wal_record(&rec)?;
-                    format!(
-                        " rec {} bytes will_init: {} {}",
-                        buf.len(),
-                        rec.will_init(),
-                        wal_desc
-                    )
-                }
-            };
-            Ok(desc)
-        };
-
        tree_reader.visit(
            &[0u8; DELTA_KEY_SIZE],
            VisitDirection::Forwards,
@@ -393,10 +350,34 @@ impl Layer for DeltaLayer {
                let key = DeltaKey::extract_key_from_buf(delta_key);
                let lsn = DeltaKey::extract_lsn_from_buf(delta_key);

-                let desc = match dump_blob(blob_ref) {
-                    Ok(desc) => desc,
-                    Err(err) => format!("ERROR: {}", err),
-                };
+                let mut desc = String::new();
+                match cursor.read_blob(blob_ref.pos()) {
+                    Ok(buf) => {
+                        let val = Value::des(&buf);
+                        match val {
+                            Ok(Value::Image(img)) => {
+                                write!(&mut desc, " img {} bytes", img.len()).unwrap();
+                            }
+                            Ok(Value::WalRecord(rec)) => {
+                                let wal_desc = walrecord::describe_wal_record(&rec);
+                                write!(
+                                    &mut desc,
+                                    " rec {} bytes will_init: {} {}",
+                                    buf.len(),
+                                    rec.will_init(),
+                                    wal_desc
+                                )
+                                .unwrap();
+                            }
+                            Err(err) => {
+                                write!(&mut desc, " DESERIALIZATION ERROR: {}", err).unwrap();
+                            }
+                        }
+                    }
+                    Err(err) => {
+                        write!(&mut desc, " READ ERROR: {}", err).unwrap();
+                    }
+                }
                println!("  key {} at {}: {}", key, lsn, desc);
                true
            },
@@ -421,28 +402,6 @@ impl DeltaLayer {
        }
    }

-    fn temp_path_for(
-        conf: &PageServerConf,
-        timelineid: ZTimelineId,
-        tenantid: ZTenantId,
-        key_start: Key,
-        lsn_range: &Range<Lsn>,
-    ) -> PathBuf {
-        let rand_string: String = rand::thread_rng()
-            .sample_iter(&Alphanumeric)
-            .take(8)
-            .map(char::from)
-            .collect();
-
-        conf.timeline_path(&timelineid, &tenantid).join(format!(
-            "{}-XXX__{:016X}-{:016X}.{}.temp",
-            key_start,
-            u64::from(lsn_range.start),
-            u64::from(lsn_range.end),
-            rand_string
-        ))
-    }
-
    ///
    /// Open the underlying file and read the metadata into memory, if it's
    /// not loaded already.
@@ -460,9 +419,7 @@ impl DeltaLayer {
            drop(inner);
            let inner = self.inner.write().unwrap();
            if !inner.loaded {
-                self.load_inner(inner).with_context(|| {
-                    format!("Failed to load delta layer {}", self.path().display())
-                })?;
+                self.load_inner(inner)?;
            } else {
                // Another thread loaded it while we were not holding the lock.
            }
@@ -630,8 +587,12 @@ impl DeltaLayerWriter {
        //
        // Note: This overwrites any existing file. There shouldn't be any.
        // FIXME: throw an error instead?
-        let path = DeltaLayer::temp_path_for(conf, timelineid, tenantid, key_start, &lsn_range);
-
+        let path = conf.timeline_path(&timelineid, &tenantid).join(format!(
+            "{}-XXX__{:016X}-{:016X}.temp",
+            key_start,
+            u64::from(lsn_range.start),
+            u64::from(lsn_range.end)
+        ));
        let mut file = VirtualFile::create(&path)?;
        // make room for the header block
        file.seek(SeekFrom::Start(PAGE_SZ as u64))?;
@@ -724,8 +685,6 @@ impl DeltaLayerWriter {
            }),
        };

-        // fsync the file
-        file.sync_all()?;
        // Rename the file to its final name
        //
        // Note: This overwrites any existing file. There shouldn't be any.
--- a/pageserver/src/layered_repository/disk_btree.rs
+++ b/pageserver/src/layered_repository/disk_btree.rs
@@ -11,6 +11,7 @@
 //! - page-oriented
 //!
 //! TODO:
+//! - better errors (e.g. with thiserror?)
 //! - maybe something like an Adaptive Radix Tree would be more efficient?
 //! - the values stored by image and delta layers are offsets into the file,
 //!   and they are in monotonically increasing order. Prefix compression would
@@ -18,12 +19,11 @@
 //! - An Iterator interface would be more convenient for the callers than the
 //!   'visit' function
 //!
+use anyhow;
 use byteorder::{ReadBytesExt, BE};
 use bytes::{BufMut, Bytes, BytesMut};
 use hex;
-use std::{cmp::Ordering, io, result};
-use thiserror::Error;
-use tracing::error;
+use std::cmp::Ordering;

 use crate::layered_repository::block_io::{BlockReader, BlockWriter};

@@ -86,23 +86,6 @@ impl Value {
    }
 }

-#[derive(Error, Debug)]
-pub enum DiskBtreeError {
-    #[error("Attempt to append a value that is too large {0} > {}", MAX_VALUE)]
-    AppendOverflow(u64),
-
-    #[error("Unsorted input: key {key:?} is <= last_key {last_key:?}")]
-    UnsortedInput { key: Box<[u8]>, last_key: Box<[u8]> },
-
-    #[error("Could not push to new leaf node")]
-    FailedToPushToNewLeafNode,
-
-    #[error("IoError: {0}")]
-    Io(#[from] io::Error),
-}
-
-pub type Result<T> = result::Result<T, DiskBtreeError>;
-
 /// This is the on-disk representation.
 struct OnDiskNode<'a, const L: usize> {
    // Fixed-width fields
@@ -123,12 +106,12 @@ impl<'a, const L: usize> OnDiskNode<'a, L> {
    ///
    /// Interpret a PAGE_SZ page as a node.
    ///
-    fn deparse(buf: &[u8]) -> Result<OnDiskNode<L>> {
+    fn deparse(buf: &[u8]) -> OnDiskNode<L> {
        let mut cursor = std::io::Cursor::new(buf);
-        let num_children = cursor.read_u16::<BE>()?;
-        let level = cursor.read_u8()?;
-        let prefix_len = cursor.read_u8()?;
-        let suffix_len = cursor.read_u8()?;
+        let num_children = cursor.read_u16::<BE>().unwrap();
+        let level = cursor.read_u8().unwrap();
+        let prefix_len = cursor.read_u8().unwrap();
+        let suffix_len = cursor.read_u8().unwrap();

        let mut off = cursor.position();
        let prefix_off = off as usize;
@@ -146,7 +129,7 @@ impl<'a, const L: usize> OnDiskNode<'a, L> {
        let keys = &buf[keys_off..keys_off + keys_len];
        let values = &buf[values_off..values_off + values_len];

-        Ok(OnDiskNode {
+        OnDiskNode {
            num_children,
            level,
            prefix_len,
@@ -154,7 +137,7 @@ impl<'a, const L: usize> OnDiskNode<'a, L> {
            prefix,
            keys,
            values,
-        })
+        }
    }

    ///
@@ -166,11 +149,7 @@ impl<'a, const L: usize> OnDiskNode<'a, L> {
        Value::from_slice(value_slice)
    }

-    fn binary_search(
-        &self,
-        search_key: &[u8; L],
-        keybuf: &mut [u8],
-    ) -> result::Result<usize, usize> {
+    fn binary_search(&self, search_key: &[u8; L], keybuf: &mut [u8]) -> Result<usize, usize> {
        let mut size = self.num_children as usize;
        let mut low = 0;
        let mut high = size;
@@ -230,7 +209,7 @@ where
    ///
    /// Read the value for given key. Returns the value, or None if it doesn't exist.
    ///
-    pub fn get(&self, search_key: &[u8; L]) -> Result<Option<u64>> {
+    pub fn get(&self, search_key: &[u8; L]) -> anyhow::Result<Option<u64>> {
        let mut result: Option<u64> = None;
        self.visit(search_key, VisitDirection::Forwards, |key, value| {
            if key == search_key {
@@ -251,7 +230,7 @@ where
        search_key: &[u8; L],
        dir: VisitDirection,
        mut visitor: V,
-    ) -> Result<bool>
+    ) -> anyhow::Result<bool>
    where
        V: FnMut(&[u8], u64) -> bool,
    {
@@ -264,7 +243,7 @@ where
        search_key: &[u8; L],
        dir: VisitDirection,
        visitor: &mut V,
-    ) -> Result<bool>
+    ) -> anyhow::Result<bool>
    where
        V: FnMut(&[u8], u64) -> bool,
    {
@@ -281,11 +260,11 @@ where
        search_key: &[u8; L],
        dir: VisitDirection,
        visitor: &mut V,
-    ) -> Result<bool>
+    ) -> anyhow::Result<bool>
    where
        V: FnMut(&[u8], u64) -> bool,
    {
-        let node = OnDiskNode::deparse(node_buf)?;
+        let node = OnDiskNode::deparse(node_buf);
        let prefix_len = node.prefix_len as usize;
        let suffix_len = node.suffix_len as usize;

@@ -390,15 +369,15 @@ where
    }

    #[allow(dead_code)]
-    pub fn dump(&self) -> Result<()> {
+    pub fn dump(&self) -> anyhow::Result<()> {
        self.dump_recurse(self.root_blk, &[], 0)
    }

-    fn dump_recurse(&self, blknum: u32, path: &[u8], depth: usize) -> Result<()> {
+    fn dump_recurse(&self, blknum: u32, path: &[u8], depth: usize) -> anyhow::Result<()> {
        let blk = self.reader.read_blk(self.start_blk + blknum)?;
        let buf: &[u8] = blk.as_ref();

-        let node = OnDiskNode::<L>::deparse(buf)?;
+        let node = OnDiskNode::<L>::deparse(buf);

        print!("{:indent$}", "", indent = depth * 2);
        println!(
@@ -444,13 +423,6 @@ where
    ///
    /// stack[0] is the current root page, stack.last() is the leaf.
    ///
-    /// We maintain the length of the stack to be always greater than zero.
-    /// Two exceptions are:
-    /// 1. `Self::flush_node`. The method will push the new node if it extracted the last one.
-    ///   So because other methods cannot see the intermediate state invariant still holds.
-    /// 2. `Self::finish`. It consumes self and does not return it back,
-    ///  which means that this is where the structure is destroyed.
-    ///  Thus stack of zero length cannot be observed by other methods.
    stack: Vec<BuildNode<L>>,

    /// Last key that was appended to the tree. Used to sanity check that append
@@ -470,29 +442,19 @@ where
        }
    }

-    pub fn append(&mut self, key: &[u8; L], value: u64) -> Result<()> {
-        if value > MAX_VALUE {
-            return Err(DiskBtreeError::AppendOverflow(value));
-        }
+    pub fn append(&mut self, key: &[u8; L], value: u64) -> Result<(), anyhow::Error> {
+        assert!(value <= MAX_VALUE);
        if let Some(last_key) = &self.last_key {
-            if key <= last_key {
-                return Err(DiskBtreeError::UnsortedInput {
-                    key: key.as_slice().into(),
-                    last_key: last_key.as_slice().into(),
-                });
-            }
+            assert!(key > last_key, "unsorted input");
        }
        self.last_key = Some(*key);

-        self.append_internal(key, Value::from_u64(value))
+        Ok(self.append_internal(key, Value::from_u64(value))?)
    }

-    fn append_internal(&mut self, key: &[u8; L], value: Value) -> Result<()> {
+    fn append_internal(&mut self, key: &[u8; L], value: Value) -> Result<(), std::io::Error> {
        // Try to append to the current leaf buffer
-        let last = self
-            .stack
-            .last_mut()
-            .expect("should always have at least one item");
+        let last = self.stack.last_mut().unwrap();
        let level = last.level;
        if last.push(key, value) {
            return Ok(());
@@ -514,33 +476,26 @@ where
        // key to it.
        let mut last = BuildNode::new(level);
        if !last.push(key, value) {
-            return Err(DiskBtreeError::FailedToPushToNewLeafNode);
+            panic!("could not push to new leaf node");
        }
-
        self.stack.push(last);

        Ok(())
    }

-    /// Flush the bottommost node in the stack to disk. Appends a downlink to its parent,
-    /// and recursively flushes the parent too, if it becomes full. If the root page becomes full,
-    /// creates a new root page, increasing the height of the tree.
-    fn flush_node(&mut self) -> Result<()> {
-        // Get the current bottommost node in the stack and flush it to disk.
-        let last = self
-            .stack
-            .pop()
-            .expect("should always have at least one item");
+    fn flush_node(&mut self) -> Result<(), std::io::Error> {
+        let last = self.stack.pop().unwrap();
        let buf = last.pack();
        let downlink_key = last.first_key();
        let downlink_ptr = self.writer.write_blk(buf)?;

-        // Append the downlink to the parent. If there is no parent, ie. this was the root page,
-        // create a new root page, increasing the height of the tree.
+        // Append the downlink to the parent
        if self.stack.is_empty() {
            self.stack.push(BuildNode::new(last.level + 1));
        }
-        self.append_internal(&downlink_key, Value::from_blknum(downlink_ptr))
+        self.append_internal(&downlink_key, Value::from_blknum(downlink_ptr))?;
+
+        Ok(())
    }

    ///
@@ -550,16 +505,13 @@ where
    /// (In the image and delta layers, it is stored in the beginning of the file,
    /// in the summary header)
    ///
-    pub fn finish(mut self) -> Result<(u32, W)> {
+    pub fn finish(mut self) -> Result<(u32, W), std::io::Error> {
        // flush all levels, except the root.
        while self.stack.len() > 1 {
            self.flush_node()?;
        }

-        let root = self
-            .stack
-            .first()
-            .expect("by the check above we left one item there");
+        let root = self.stack.first().unwrap();
        let buf = root.pack();
        let root_blknum = self.writer.write_blk(buf)?;

@@ -740,14 +692,14 @@ mod tests {
    impl BlockReader for TestDisk {
        type BlockLease = std::rc::Rc<[u8; PAGE_SZ]>;

-        fn read_blk(&self, blknum: u32) -> io::Result<Self::BlockLease> {
+        fn read_blk(&self, blknum: u32) -> Result<Self::BlockLease, std::io::Error> {
            let mut buf = [0u8; PAGE_SZ];
            buf.copy_from_slice(&self.blocks[blknum as usize]);
            Ok(std::rc::Rc::new(buf))
        }
    }
    impl BlockWriter for &mut TestDisk {
-        fn write_blk(&mut self, buf: Bytes) -> io::Result<u32> {
+        fn write_blk(&mut self, buf: Bytes) -> Result<u32, std::io::Error> {
            let blknum = self.blocks.len();
            self.blocks.push(buf);
            Ok(blknum as u32)
@@ -755,7 +707,7 @@ mod tests {
    }

    #[test]
-    fn basic() -> Result<()> {
+    fn basic() -> anyhow::Result<()> {
        let mut disk = TestDisk::new();
        let mut writer = DiskBtreeBuilder::<_, 6>::new(&mut disk);

@@ -836,7 +788,7 @@ mod tests {
    }

    #[test]
-    fn lots_of_keys() -> Result<()> {
+    fn lots_of_keys() -> anyhow::Result<()> {
        let mut disk = TestDisk::new();
        let mut writer = DiskBtreeBuilder::<_, 8>::new(&mut disk);

@@ -930,7 +882,7 @@ mod tests {
    }

    #[test]
-    fn random_data() -> Result<()> {
+    fn random_data() -> anyhow::Result<()> {
        // Generate random keys with exponential distribution, to
        // exercise the prefix compression
        const NUM_KEYS: usize = 100000;
@@ -975,27 +927,21 @@ mod tests {
    }

    #[test]
+    #[should_panic(expected = "unsorted input")]
    fn unsorted_input() {
        let mut disk = TestDisk::new();
        let mut writer = DiskBtreeBuilder::<_, 2>::new(&mut disk);

        let _ = writer.append(b"ba", 1);
        let _ = writer.append(b"bb", 2);
-        let err = writer.append(b"aa", 3).expect_err("should've failed");
-        match err {
-            DiskBtreeError::UnsortedInput { key, last_key } => {
-                assert_eq!(key.as_ref(), b"aa".as_slice());
-                assert_eq!(last_key.as_ref(), b"bb".as_slice());
-            }
-            _ => panic!("unexpected error variant, expected DiskBtreeError::UnsortedInput"),
-        }
+        let _ = writer.append(b"aa", 3);
    }

    ///
    /// This test contains a particular data set, see disk_btree_test_data.rs
    ///
    #[test]
-    fn particular_data() -> Result<()> {
+    fn particular_data() -> anyhow::Result<()> {
        // Build a tree from it
        let mut disk = TestDisk::new();
        let mut writer = DiskBtreeBuilder::<_, 26>::new(&mut disk);
--- a/pageserver/src/layered_repository/ephemeral_file.rs
+++ b/pageserver/src/layered_repository/ephemeral_file.rs
@@ -16,8 +16,8 @@ use std::io::{Error, ErrorKind};
 use std::ops::DerefMut;
 use std::path::PathBuf;
 use std::sync::{Arc, RwLock};
-use tracing::*;
-use utils::zid::{ZTenantId, ZTimelineId};
+use zenith_utils::zid::ZTenantId;
+use zenith_utils::zid::ZTimelineId;

 use std::os::unix::fs::FileExt;

@@ -199,24 +199,18 @@ impl BlobWriter for EphemeralFile {
        let mut buf = self.get_buf_for_write(blknum)?;

        // Write the length field
-        if srcbuf.len() < 0x80 {
-            buf[off] = srcbuf.len() as u8;
-            off += 1;
+        let len_buf = u32::to_ne_bytes(srcbuf.len() as u32);
+        let thislen = PAGE_SZ - off;
+        if thislen < 4 {
+            // it needs to be split across pages
+            buf[off..(off + thislen)].copy_from_slice(&len_buf[..thislen]);
+            blknum += 1;
+            buf = self.get_buf_for_write(blknum)?;
+            buf[0..4 - thislen].copy_from_slice(&len_buf[thislen..]);
+            off = 4 - thislen;
        } else {
-            let mut len_buf = u32::to_be_bytes(srcbuf.len() as u32);
-            len_buf[0] |= 0x80;
-            let thislen = PAGE_SZ - off;
-            if thislen < 4 {
-                // it needs to be split across pages
-                buf[off..(off + thislen)].copy_from_slice(&len_buf[..thislen]);
-                blknum += 1;
-                buf = self.get_buf_for_write(blknum)?;
-                buf[0..4 - thislen].copy_from_slice(&len_buf[thislen..]);
-                off = 4 - thislen;
-            } else {
-                buf[off..off + 4].copy_from_slice(&len_buf);
-                off += 4;
-            }
+            buf[off..off + 4].copy_from_slice(&len_buf);
+            off += 4;
        }

        // Write the payload
@@ -235,13 +229,7 @@ impl BlobWriter for EphemeralFile {
            buf_remain = &buf_remain[this_blk_len..];
        }
        drop(buf);
-
-        if srcbuf.len() < 0x80 {
-            self.size += 1;
-        } else {
-            self.size += 4;
-        }
-        self.size += srcbuf.len() as u64;
+        self.size += 4 + srcbuf.len() as u64;

        Ok(pos)
    }
@@ -256,31 +244,16 @@ impl Drop for EphemeralFile {
        // remove entry from the hash map
        EPHEMERAL_FILES.write().unwrap().files.remove(&self.file_id);

-        // unlink the file
-        let res = std::fs::remove_file(&self.file.path);
-        if let Err(e) = res {
-            warn!(
-                "could not remove ephemeral file '{}': {}",
-                self.file.path.display(),
-                e
-            );
-        }
+        // unlink file
+        // FIXME: print error
+        let _ = std::fs::remove_file(&self.file.path);
    }
 }

 pub fn writeback(file_id: u64, blkno: u32, buf: &[u8]) -> Result<(), std::io::Error> {
    if let Some(file) = EPHEMERAL_FILES.read().unwrap().files.get(&file_id) {
-        match file.write_all_at(buf, blkno as u64 * PAGE_SZ as u64) {
-            Ok(_) => Ok(()),
-            Err(e) => Err(std::io::Error::new(
-                ErrorKind::Other,
-                format!(
-                    "failed to write back to ephemeral file at {} error: {}",
-                    file.path.display(),
-                    e
-                ),
-            )),
-        }
+        file.write_all_at(buf, blkno as u64 * PAGE_SZ as u64)?;
+        Ok(())
    } else {
        Err(std::io::Error::new(
            ErrorKind::Other,
@@ -399,12 +372,6 @@ mod tests {
            let pos = file.write_blob(&data)?;
            blobs.push((pos, data));
        }
-        // also test with a large blobs
-        for i in 0..100 {
-            let data = format!("blob{}", i).as_bytes().repeat(100);
-            let pos = file.write_blob(&data)?;
-            blobs.push((pos, data));
-        }

        let mut cursor = BlockCursor::new(&file);
        for (pos, expected) in blobs {
--- a/pageserver/src/layered_repository/filename.rs
+++ b/pageserver/src/layered_repository/filename.rs
@@ -8,7 +8,7 @@ use std::fmt;
 use std::ops::Range;
 use std::path::PathBuf;

-use utils::lsn::Lsn;
+use zenith_utils::lsn::Lsn;

 // Note: LayeredTimeline::load_layer_map() relies on this sort order
 #[derive(Debug, PartialEq, Eq, Clone)]
--- a/pageserver/src/layered_repository/image_layer.rs
+++ b/pageserver/src/layered_repository/image_layer.rs
@@ -30,11 +30,12 @@ use crate::layered_repository::storage_layer::{
 use crate::page_cache::PAGE_SZ;
 use crate::repository::{Key, Value, KEY_SIZE};
 use crate::virtual_file::VirtualFile;
+use crate::{ZTenantId, ZTimelineId};
 use crate::{IMAGE_FILE_MAGIC, STORAGE_FORMAT_VERSION};
 use anyhow::{bail, ensure, Context, Result};
 use bytes::Bytes;
 use hex;
-use rand::{distributions::Alphanumeric, Rng};
+use log::*;
 use serde::{Deserialize, Serialize};
 use std::fs;
 use std::io::Write;
@@ -42,13 +43,9 @@ use std::io::{Seek, SeekFrom};
 use std::ops::Range;
 use std::path::{Path, PathBuf};
 use std::sync::{RwLock, RwLockReadGuard};
-use tracing::*;

-use utils::{
-    bin_ser::BeSer,
-    lsn::Lsn,
-    zid::{ZTenantId, ZTimelineId},
-};
+use zenith_utils::bin_ser::BeSer;
+use zenith_utils::lsn::Lsn;

 ///
 /// Header stored in the beginning of the file
@@ -126,10 +123,6 @@ impl Layer for ImageLayer {
        PathBuf::from(self.layer_name().to_string())
    }

-    fn local_path(&self) -> Option<PathBuf> {
-        Some(self.path())
-    }
-
    fn get_tenant_id(&self) -> ZTenantId {
        self.tenantid
    }
@@ -155,7 +148,6 @@ impl Layer for ImageLayer {
        reconstruct_state: &mut ValueReconstructState,
    ) -> anyhow::Result<ValueReconstructResult> {
        assert!(self.key_range.contains(&key));
-        assert!(lsn_range.start >= self.lsn);
        assert!(lsn_range.end >= self.lsn);

        let inner = self.load()?;
@@ -242,22 +234,6 @@ impl ImageLayer {
        }
    }

-    fn temp_path_for(
-        conf: &PageServerConf,
-        timelineid: ZTimelineId,
-        tenantid: ZTenantId,
-        fname: &ImageFileName,
-    ) -> PathBuf {
-        let rand_string: String = rand::thread_rng()
-            .sample_iter(&Alphanumeric)
-            .take(8)
-            .map(char::from)
-            .collect();
-
-        conf.timeline_path(&timelineid, &tenantid)
-            .join(format!("{}.{}.temp", fname, rand_string))
-    }
-
    ///
    /// Open the underlying file and read the metadata into memory, if it's
    /// not loaded already.
@@ -275,9 +251,7 @@ impl ImageLayer {
            drop(inner);
            let mut inner = self.inner.write().unwrap();
            if !inner.loaded {
-                self.load_inner(&mut inner).with_context(|| {
-                    format!("Failed to load image layer {}", self.path().display())
-                })?
+                self.load_inner(&mut inner)?;
            } else {
                // Another thread loaded it while we were not holding the lock.
            }
@@ -415,7 +389,7 @@ impl ImageLayer {
 ///
 pub struct ImageLayerWriter {
    conf: &'static PageServerConf,
-    path: PathBuf,
+    _path: PathBuf,
    timelineid: ZTimelineId,
    tenantid: ZTenantId,
    key_range: Range<Key>,
@@ -433,10 +407,12 @@ impl ImageLayerWriter {
        key_range: &Range<Key>,
        lsn: Lsn,
    ) -> anyhow::Result<ImageLayerWriter> {
-        // Create the file initially with a temporary filename.
-        // We'll atomically rename it to the final name when we're done.
-        let path = ImageLayer::temp_path_for(
-            conf,
+        // Create the file
+        //
+        // Note: This overwrites any existing file. There shouldn't be any.
+        // FIXME: throw an error instead?
+        let path = ImageLayer::path_for(
+            &PathOrConf::Conf(conf),
            timelineid,
            tenantid,
            &ImageFileName {
@@ -456,7 +432,7 @@ impl ImageLayerWriter {

        let writer = ImageLayerWriter {
            conf,
-            path,
+            _path: path,
            timelineid,
            tenantid,
            key_range: key_range.clone(),
@@ -527,25 +503,6 @@ impl ImageLayerWriter {
                index_root_blk,
            }),
        };
-
-        // fsync the file
-        file.sync_all()?;
-
-        // Rename the file to its final name
-        //
-        // Note: This overwrites any existing file. There shouldn't be any.
-        // FIXME: throw an error instead?
-        let final_path = ImageLayer::path_for(
-            &PathOrConf::Conf(self.conf),
-            self.timelineid,
-            self.tenantid,
-            &ImageFileName {
-                key_range: self.key_range.clone(),
-                lsn: self.lsn,
-            },
-        );
-        std::fs::rename(self.path, &final_path)?;
-
        trace!("created image layer {}", layer.path().display());

        Ok(layer)
--- a/pageserver/src/layered_repository/inmemory_layer.rs
+++ b/pageserver/src/layered_repository/inmemory_layer.rs
@@ -14,21 +14,19 @@ use crate::layered_repository::storage_layer::{
 };
 use crate::repository::{Key, Value};
 use crate::walrecord;
+use crate::{ZTenantId, ZTimelineId};
 use anyhow::{bail, ensure, Result};
+use log::*;
 use std::collections::HashMap;
-use tracing::*;
-use utils::{
-    bin_ser::BeSer,
-    lsn::Lsn,
-    vec_map::VecMap,
-    zid::{ZTenantId, ZTimelineId},
-};
 // avoid binding to Write (conflicts with std::io::Write)
 // while being able to use std::fmt::Write's methods
 use std::fmt::Write as _;
 use std::ops::Range;
 use std::path::PathBuf;
 use std::sync::RwLock;
+use zenith_utils::bin_ser::BeSer;
+use zenith_utils::lsn::Lsn;
+use zenith_utils::vec_map::VecMap;

 pub struct InMemoryLayer {
    conf: &'static PageServerConf,
@@ -85,10 +83,6 @@ impl Layer for InMemoryLayer {
        ))
    }

-    fn local_path(&self) -> Option<PathBuf> {
-        None
-    }
-
    fn get_tenant_id(&self) -> ZTenantId {
        self.tenantid
    }
@@ -119,7 +113,7 @@ impl Layer for InMemoryLayer {
        lsn_range: Range<Lsn>,
        reconstruct_state: &mut ValueReconstructState,
    ) -> anyhow::Result<ValueReconstructResult> {
-        ensure!(lsn_range.start >= self.start_lsn);
+        ensure!(lsn_range.start <= self.start_lsn);
        let mut need_image = true;

        let inner = self.inner.read().unwrap();
@@ -130,6 +124,13 @@ impl Layer for InMemoryLayer {
        if let Some(vec_map) = inner.index.get(&key) {
            let slice = vec_map.slice_range(lsn_range);
            for (entry_lsn, pos) in slice.iter().rev() {
+                match &reconstruct_state.img {
+                    Some((cached_lsn, _)) if entry_lsn <= cached_lsn => {
+                        return Ok(ValueReconstructResult::Complete)
+                    }
+                    _ => {}
+                }
+
                let buf = reader.read_blob(*pos)?;
                let value = Value::des(&buf)?;
                match value {
@@ -211,7 +212,7 @@ impl Layer for InMemoryLayer {
                        write!(&mut desc, " img {} bytes", img.len())?;
                    }
                    Ok(Value::WalRecord(rec)) => {
-                        let wal_desc = walrecord::describe_wal_record(&rec).unwrap();
+                        let wal_desc = walrecord::describe_wal_record(&rec);
                        write!(
                            &mut desc,
                            " rec {} bytes will_init: {} {}",
--- a/pageserver/src/layered_repository/layer_map.rs
+++ b/pageserver/src/layered_repository/layer_map.rs
@@ -16,12 +16,12 @@ use crate::layered_repository::InMemoryLayer;
 use crate::repository::Key;
 use anyhow::Result;
 use lazy_static::lazy_static;
-use metrics::{register_int_gauge, IntGauge};
 use std::collections::VecDeque;
 use std::ops::Range;
 use std::sync::Arc;
 use tracing::*;
-use utils::lsn::Lsn;
+use zenith_metrics::{register_int_gauge, IntGauge};
+use zenith_utils::lsn::Lsn;

 lazy_static! {
    static ref NUM_ONDISK_LAYERS: IntGauge =
@@ -43,13 +43,10 @@ pub struct LayerMap {
    pub next_open_layer_at: Option<Lsn>,

    ///
-    /// Frozen layers, if any. Frozen layers are in-memory layers that
-    /// are no longer added to, but haven't been written out to disk
-    /// yet. They contain WAL older than the current 'open_layer' or
-    /// 'next_open_layer_at', but newer than any historic layer.
-    /// The frozen layers are in order from oldest to newest, so that
-    /// the newest one is in the 'back' of the VecDeque, and the oldest
-    /// in the 'front'.
+    /// The frozen layer, if any, contains WAL older than the current 'open_layer'
+    /// or 'next_open_layer_at', but newer than any historic layer. The frozen
+    /// layer is during checkpointing, when an InMemoryLayer is being written out
+    /// to disk.
    ///
    pub frozen_layers: VecDeque<Arc<InMemoryLayer>>,

@@ -132,15 +129,17 @@ impl LayerMap {
                // this layer contains the requested point in the key/lsn space.
                // No need to search any further
                trace!(
-                    "found layer {} for request on {key} at {end_lsn}",
+                    "found layer {} for request on {} at {}",
                    l.filename().display(),
+                    key,
+                    end_lsn
                );
                latest_delta.replace(Arc::clone(l));
                break;
            }
            // this layer's end LSN is smaller than the requested point. If there's
            // nothing newer, this is what we need to return. Remember this.
-            if let Some(old_candidate) = &latest_delta {
+            if let Some(ref old_candidate) = latest_delta {
                if l.get_lsn_range().end > old_candidate.get_lsn_range().end {
                    latest_delta.replace(Arc::clone(l));
                }
@@ -150,8 +149,10 @@ impl LayerMap {
        }
        if let Some(l) = latest_delta {
            trace!(
-                "found (old) layer {} for request on {key} at {end_lsn}",
+                "found (old) layer {} for request on {} at {}",
                l.filename().display(),
+                key,
+                end_lsn
            );
            let lsn_floor = std::cmp::max(
                Lsn(latest_img_lsn.unwrap_or(Lsn(0)).0 + 1),
@@ -162,13 +163,17 @@ impl LayerMap {
                layer: l,
            }))
        } else if let Some(l) = latest_img {
-            trace!("found img layer and no deltas for request on {key} at {end_lsn}");
+            trace!(
+                "found img layer and no deltas for request on {} at {}",
+                key,
+                end_lsn
+            );
            Ok(Some(SearchResult {
                lsn_floor: latest_img_lsn.unwrap(),
                layer: l,
            }))
        } else {
-            trace!("no layer found for request on {key} at {end_lsn}");
+            trace!("no layer found for request on {} at {}", key, end_lsn);
            Ok(None)
        }
    }
@@ -186,6 +191,7 @@ impl LayerMap {
    ///
    /// This should be called when the corresponding file on disk has been deleted.
    ///
+    #[allow(dead_code)]
    pub fn remove_historic(&mut self, layer: Arc<dyn Layer>) {
        let len_before = self.historic_layers.len();

@@ -201,14 +207,18 @@ impl LayerMap {
        NUM_ONDISK_LAYERS.dec();
    }

-    /// Is there a newer image layer for given key- and LSN-range?
+    /// Is there a newer image layer for given key-range?
    ///
    /// This is used for garbage collection, to determine if an old layer can
    /// be deleted.
-    pub fn image_layer_exists(
+    /// We ignore layers newer than disk_consistent_lsn because they will be removed at restart
+    /// We also only look at historic layers
+    //#[allow(dead_code)]
+    pub fn newer_image_layer_exists(
        &self,
        key_range: &Range<Key>,
-        lsn_range: &Range<Lsn>,
+        lsn: Lsn,
+        disk_consistent_lsn: Lsn,
    ) -> Result<bool> {
        let mut range_remain = key_range.clone();

@@ -221,7 +231,8 @@ impl LayerMap {
                let img_lsn = l.get_lsn_range().start;
                if !l.is_incremental()
                    && l.get_key_range().contains(&range_remain.start)
-                    && lsn_range.contains(&img_lsn)
+                    && img_lsn > lsn
+                    && img_lsn < disk_consistent_lsn
                {
                    made_progress = true;
                    let img_key_end = l.get_key_range().end;
@@ -239,7 +250,7 @@ impl LayerMap {
        }
    }

-    pub fn iter_historic_layers(&self) -> impl Iterator<Item = &Arc<dyn Layer>> {
+    pub fn iter_historic_layers(&self) -> std::slice::Iter<Arc<dyn Layer>> {
        self.historic_layers.iter()
    }

--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Bojan Serafimov	20923e70f5	Merge branch 'main' into bojan-psbench-over-kvstore	2022-04-12 13:04:59 -04:00
Bojan Serafimov	98149f1a08	Parse output	2022-04-12 10:53:14 -04:00
Bojan Serafimov	fbc4206f2d	Simplify	2022-04-11 21:50:50 -04:00
Bojan Serafimov	d614291c44	Test latest pages	2022-04-05 20:33:57 -04:00
Bojan Serafimov	35d8167f68	Measure that materializing all versions bloats by 0.3	2022-03-28 15:42:16 -04:00
Bojan Serafimov	2f7d9d2dd5	Count modified bits per wal (WIP)	2022-03-27 18:41:14 -04:00
Bojan Serafimov	21f9774ea4	Merge branch 'heikki-kvstore' into bojan-psbench-over-kvstore	2022-03-18 16:27:18 -04:00
Bojan Serafimov	098d7046f8	Improve test	2022-03-18 14:37:01 -04:00
Konstantin Knizhnik	a39de2997f	Optimize reading versions for delta_layer Store blob size in layer metadata for all layers types Heikki: This is a squashed version of PR #1369	2022-03-18 14:35:13 +02:00
Heikki Linnakangas	d756921220	RFC fixes, per comments in the PR	2022-03-18 14:18:25 +02:00
Heikki Linnakangas	2bc9ed164f	Merge remote-tracking branch 'origin/main' into heikki-kvstore	2022-03-18 12:08:17 +02:00
Heikki Linnakangas	8c4d270cde	Fix InMemoryLayer::dump	2022-03-18 11:57:19 +02:00
Heikki Linnakangas	35584f7242	Bump magic IDs, to distinguish old file format from new	2022-03-18 11:57:19 +02:00
Heikki Linnakangas	12141523f6	Improve comments	2022-03-18 11:57:19 +02:00
Heikki Linnakangas	d383ed4e68	Add missing fsyncs	2022-03-18 11:57:19 +02:00
Heikki Linnakangas	13ec0ce7b2	fix formatting	2022-03-17 19:40:08 +02:00
Heikki Linnakangas	80fc133833	Add sequential scan tests	2022-03-17 17:12:30 +02:00
Heikki Linnakangas	3da14d56f2	Fix materialized page caching.	2022-03-17 17:12:30 +02:00
Heikki Linnakangas	b0b2093d00	Improve comments and tidy up the code in pgdatadir_mapping.rs.	2022-03-17 13:14:33 +02:00
Bojan Serafimov	02aa7c023a	Add todos	2022-03-16 22:51:39 -04:00
Bojan Serafimov	180631da1f	Remove todos	2022-03-16 22:40:36 -04:00
Bojan Serafimov	811d46f070	Fix	2022-03-16 22:39:55 -04:00
Bojan Serafimov	728f299641	Add hot page workload	2022-03-16 19:18:28 -04:00
Bojan Serafimov	fb49418e7f	Trim dead code	2022-03-16 19:05:40 -04:00
Bojan Serafimov	887dc8f112	Print some stats	2022-03-16 19:01:54 -04:00
Bojan Serafimov	aa7b32d892	Simplify	2022-03-16 18:42:58 -04:00
Bojan Serafimov	d7ed9d8e01	cleanup	2022-03-16 17:40:19 -04:00
Bojan Serafimov	96c2b3a80a	WIP working pageserver get_page client	2022-03-16 14:31:42 -04:00
Heikki Linnakangas	7560854370	Rename things in KeyPartition, per Bojan's suggestions.	2022-03-16 19:29:07 +02:00
Heikki Linnakangas	adbbb0a4c8	Merge remote-tracking branch 'origin/main' into heikki-kvstore	2022-03-16 19:08:45 +02:00
Heikki Linnakangas	6a264aaca3	Stopgap "fix" for test_parallel_copy failure in debug mode.	2022-03-14 19:54:38 +02:00
Heikki Linnakangas	60ed6b3710	Shave some CPU cycles from reading blobs from files. This shows up in 'perf' profile when running in debug mode. Not so significant in release mode, but still.	2022-03-14 19:53:00 +02:00
Heikki Linnakangas	89690d7349	Prevent compaction from running at same time as GC. For same reasons as we prohibited concurrent checkpointing and GC previosly.	2022-03-14 14:22:04 +02:00
Heikki Linnakangas	09f2dff537	Refactor the checkpoint and compaction functions. The concept of a "checkpoint" had become quite muddled. This tries to clarify it again.	2022-03-14 13:22:46 +02:00
Heikki Linnakangas	2d8587f67d	Separate flushing in-memory layer to disk from checkpoints. When 'checkpoint_distance' is reached, freeze the current in-memory layer directly in the WAL receiver thread. And to flush the frozen layer to disk, launch a separate "layer flushing thread". This leaves only the compaction duty to the checkpoint thread.	2022-03-14 11:37:22 +02:00
Heikki Linnakangas	c559c72ede	Merge remote-tracking branch 'origin/main' into HEAD	2022-03-14 10:26:05 +02:00
Heikki Linnakangas	f06707badc	Bugfix: a few constant keys were missing from collect_keyspace As a result, you got "could not find data for key" errors.	2022-03-13 01:15:32 +02:00
Heikki Linnakangas	64cdd6064d	Don't ClearVisibilityMapFlags records for non-existent blocks. We create a ClearVisibilityMapFlags record for the VM page, when a heap WAL record indicates that the VM bit needs to be cleared. However, sometimes the VM block would not exist. It seems that PostgreSQL sometimes sets the clear-VM bit on WAL records, even though the corresponding VM page hasn't been initialized yet. There's no point in trying to clear a bit on a non-existent bit, so just skip emitting the record if the VM page doesn't exist. I'm not entirely sure why we're only seeing this bug with this PR, I think it existed before. Maybe we were more sloppy and returned an all-zeros page?	2022-03-13 01:14:58 +02:00
Heikki Linnakangas	4bd557ca61	Move RFC doc here. From https://github.com/zenithdb/rfcs/pull/17	2022-03-12 11:35:28 +02:00
Heikki Linnakangas	ee40297758	Refactor keyspace code Have separate classes for the KeySpace, a partitioning of the KeySpace (KeyPartitioning), and a builder object used to construct the KeySpace. Previously, KeyPartitioning did all those things, and it was a bit confusing.	2022-03-11 16:24:13 +02:00
Heikki Linnakangas	d5b8380dae	Improve comments on image layer. Make it more explicit that if a key doesn't exist in an image layer, it doesn't exist.	2022-03-11 09:47:09 +02:00
Heikki Linnakangas	bce2da4e55	Another 'tablespace' test fix.	2022-03-11 00:53:46 +02:00
Heikki Linnakangas	3948956e87	Fix pg_table_size() on a view	2022-03-10 23:35:24 +02:00
Heikki Linnakangas	a726b555fb	Handle tablespaces gracefully. We don't really support tablespaces. But this makes the 'tablespace' Postgres regression test pass, like it did previously.	2022-03-10 22:56:37 +02:00
Heikki Linnakangas	0e3512aad0	Crank down logging again	2022-03-10 18:50:12 +02:00
Heikki Linnakangas	6fb566b46f	Bump vendor/postgres to fix a bug with smgrnblocks() on newly created rel	2022-03-10 16:05:21 +02:00
Heikki Linnakangas	dd56eeefbf	Crank up logging	2022-03-10 15:45:50 +02:00
Heikki Linnakangas	d19a293e7e	Add a test for branching	2022-03-10 14:56:13 +02:00
Heikki Linnakangas	be4aebd7e9	silence clippy	2022-03-10 13:36:28 +02:00
Heikki Linnakangas	dac73328ba	Fix bug where reldir was not written to image layer.	2022-03-10 13:20:08 +02:00
Heikki Linnakangas	fb79c7f1f0	Make compaction more concurrent	2022-03-10 13:20:08 +02:00
Heikki Linnakangas	e7bd74d558	Tidy up	2022-03-10 13:20:08 +02:00
Heikki Linnakangas	da8beffc95	Fix logical timeline size tracking	2022-03-10 13:20:08 +02:00
Heikki Linnakangas	98ec8418c4	Fix bug with the partitioning and GC	2022-03-10 13:20:08 +02:00
Heikki Linnakangas	92d1322cd5	comments, other cleanup	2022-03-10 13:20:08 +02:00
Heikki Linnakangas	2896d35a8b	rustfmt and clippy fixes	2022-03-10 13:20:08 +02:00
Heikki Linnakangas	e096c62494	Misc fixes and stuff	2022-03-09 11:36:39 +02:00
Heikki Linnakangas	356f716d39	Fixes	2022-03-09 11:36:39 +02:00
Heikki Linnakangas	798ff26fb0	More work on compaction, and resurrect some unit tests	2022-03-09 11:36:39 +02:00
Heikki Linnakangas	28045890eb	Work on compaction.	2022-03-09 11:36:39 +02:00
Heikki Linnakangas	6127b6638b	Major storage format rewrite Major changes and new concepts: Simplify Repository to a value-store ------------------------------------ Move the responsibility of tracking relation metadata, like which relations exist and what are their sizes, from Repository to a new module, pgdatadir_mapping.rs. The interface to Repository is now a simple key-value PUT/GET operations. It's still not any old key-value store though. A Repository is still responsible from handling branching, and every GET operation comes with an LSN. Key --- The key to the Repository key-value store is a Key struct, which consists of a few integer fields. It's wide enough to store a full RelFileNode, fork and block number, and to distinguish those from metadata keys. See pgdatadir_mapping.rs for how relation blocks and metadata keys are mapped to the Key struct. Store arbitrary key-ranges in the layer files --------------------------------------------- The concept of a "segment" is gone. Each layer file can store an arbitrary range of Keys. TODO: - Deleting keys, to reclaim space. This isn't visible to Postgres, dropping or truncating a relation works as you would expect if you look at it from the compute node. If you drop a relation, for example, the relation is removed from the metadata entry, so that it appears to be gone. However, the layered repository implementation never reclaims the storage. - Tracking "logical database size", for disk space quotas. That ought to be reimplemented now in pgdatadir_mapping.rs, or perhaps in walingest.rs. - LSM compaction. The logic for checkpointing and creating image layers is very dumb. AFAIK the read code could deal with a full-fledged LSM tree now consisting of the delta and image layers. But there's no code to take a bunch of delta layers and compact them, and the heuristics for when to create image layers is pretty dumb. - The code to track the layers is inefficient. All layers are just stored in a vector, and whenever we need to find a layer, we do a linear search in it.	2022-03-09 11:36:39 +02:00
Heikki Linnakangas	c7c1e19667	Use more generics, less dyn	2022-03-09 11:36:38 +02:00