set pageserver id in dockerfile

review adjustments, fancy enum for builder, minor cleanups
add node id to pageserver
2026-03-03 16:30:38 +00:00 · 2022-02-23 09:17:45 +03:00 · 2022-02-23 08:33:50 +03:00 · 2022-02-23 08:33:50 +03:00 · 2022-02-23 08:33:50 +03:00 · 2022-02-22 12:01:12 -08:00
314 changed files with 54398 additions and 22331 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -0,0 +1,682 @@
+version: 2.1
+
+executors:
+  zenith-xlarge-executor:
+    resource_class: xlarge
+    docker:
+      # NB: when changed, do not forget to update rust image tag in all Dockerfiles
+      - image: zimg/rust:1.56
+  zenith-executor:
+    docker:
+      - image: zimg/rust:1.56
+
+jobs:
+  check-codestyle-rust:
+    executor: zenith-xlarge-executor
+    steps:
+      - checkout
+      - run:
+          name: rustfmt
+          when: always
+          command: cargo fmt --all -- --check
+
+  # A job to build postgres
+  build-postgres:
+    executor: zenith-xlarge-executor
+    parameters:
+      build_type:
+        type: enum
+        enum: ["debug", "release"]
+    environment:
+      BUILD_TYPE: << parameters.build_type >>
+    steps:
+        # Checkout the git repo (circleci doesn't have a flag to enable submodules here)
+      - checkout
+
+        # Grab the postgres git revision to build a cache key.
+        # Note this works even though the submodule hasn't been checkout out yet.
+      - run:
+          name: Get postgres cache key
+          command: git rev-parse HEAD:vendor/postgres > /tmp/cache-key-postgres
+
+      - restore_cache:
+          name: Restore postgres cache
+          keys:
+            # Restore ONLY if the rev key matches exactly
+            - v04-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
+
+        # Build postgres if the restore_cache didn't find a build.
+        # `make` can't figure out whether the cache is valid, since
+        # it only compares file timestamps.
+      - run:
+          name: build postgres
+          command: |
+            if [ ! -e tmp_install/bin/postgres ]; then
+              # "depth 1" saves some time by not cloning the whole repo
+              git submodule update --init --depth 1
+              # bail out on any warnings
+              COPT='-Werror' mold -run make postgres -j$(nproc)
+            fi
+
+      - save_cache:
+          name: Save postgres cache
+          key: v04-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
+          paths:
+            - tmp_install
+
+  # A job to build zenith rust code
+  build-zenith:
+    executor: zenith-xlarge-executor
+    parameters:
+      build_type:
+        type: enum
+        enum: ["debug", "release"]
+    environment:
+      BUILD_TYPE: << parameters.build_type >>
+    steps:
+        # Checkout the git repo (without submodules)
+      - checkout
+
+        # Grab the postgres git revision to build a cache key.
+        # Note this works even though the submodule hasn't been checkout out yet.
+      - run:
+          name: Get postgres cache key
+          command: |
+            git rev-parse HEAD:vendor/postgres > /tmp/cache-key-postgres
+
+      - restore_cache:
+          name: Restore postgres cache
+          keys:
+            # Restore ONLY if the rev key matches exactly
+            - v04-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
+
+      - restore_cache:
+          name: Restore rust cache
+          keys:
+            # Require an exact match. While an out of date cache might speed up the build,
+            # there's no way to clean out old packages, so the cache grows every time something
+            # changes.
+            - v04-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
+
+        # Build the rust code, including test binaries
+      - run:
+          name: Rust build << parameters.build_type >>
+          command: |
+            if [[ $BUILD_TYPE == "debug" ]]; then
+              cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
+              CARGO_FLAGS=
+            elif [[ $BUILD_TYPE == "release" ]]; then
+              cov_prefix=()
+              CARGO_FLAGS=--release
+            fi
+
+            export CARGO_INCREMENTAL=0
+            "${cov_prefix[@]}" mold -run cargo build $CARGO_FLAGS --bins --tests
+
+      - save_cache:
+          name: Save rust cache
+          key: v04-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
+          paths:
+            - ~/.cargo/registry
+            - ~/.cargo/git
+            - target
+
+        # Run style checks
+        # has to run separately from cargo fmt section
+        # since needs to run with dependencies
+      - run:
+          name: cargo clippy
+          command: |
+            if [[ $BUILD_TYPE == "debug" ]]; then
+              cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
+            elif [[ $BUILD_TYPE == "release" ]]; then
+              cov_prefix=()
+            fi
+
+            "${cov_prefix[@]}" ./run_clippy.sh
+
+        # Run rust unit tests
+      - run:
+          name: cargo test
+          command: |
+            if [[ $BUILD_TYPE == "debug" ]]; then
+              cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
+            elif [[ $BUILD_TYPE == "release" ]]; then
+              cov_prefix=()
+            fi
+
+            "${cov_prefix[@]}" cargo test
+
+        # Install the rust binaries, for use by test jobs
+      - run:
+          name: Install rust binaries
+          command: |
+            if [[ $BUILD_TYPE == "debug" ]]; then
+              cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
+            elif [[ $BUILD_TYPE == "release" ]]; then
+              cov_prefix=()
+            fi
+
+            binaries=$(
+              "${cov_prefix[@]}" cargo metadata --format-version=1 --no-deps |
+              jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
+            )
+
+            test_exe_paths=$(
+              "${cov_prefix[@]}" cargo test --message-format=json --no-run |
+              jq -r '.executable | select(. != null)'
+            )
+
+            mkdir -p /tmp/zenith/bin
+            mkdir -p /tmp/zenith/test_bin
+            mkdir -p /tmp/zenith/etc
+
+            # Install target binaries
+            for bin in $binaries; do
+              SRC=target/$BUILD_TYPE/$bin
+              DST=/tmp/zenith/bin/$bin
+              cp $SRC $DST
+              echo $DST >> /tmp/zenith/etc/binaries.list
+            done
+
+            # Install test executables (for code coverage)
+            if [[ $BUILD_TYPE == "debug" ]]; then
+              for bin in $test_exe_paths; do
+                SRC=$bin
+                DST=/tmp/zenith/test_bin/$(basename $bin)
+                cp $SRC $DST
+                echo $DST >> /tmp/zenith/etc/binaries.list
+              done
+            fi
+
+        # Install the postgres binaries, for use by test jobs
+      - run:
+          name: Install postgres binaries
+          command: |
+            cp -a tmp_install /tmp/zenith/pg_install
+
+      - run:
+          name: Merge coverage data
+          command: |
+            # This will speed up workspace uploads
+            if [[ $BUILD_TYPE == "debug" ]]; then
+              scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage merge
+            fi
+
+        # Save the rust binaries and coverage data for other jobs in this workflow.
+      - persist_to_workspace:
+          root: /tmp/zenith
+          paths:
+            - "*"
+
+  check-codestyle-python:
+    executor: zenith-executor
+    steps:
+      - checkout
+      - restore_cache:
+          keys:
+            - v1-python-deps-{{ checksum "poetry.lock" }}
+      - run:
+          name: Install deps
+          command: ./scripts/pysync
+      - save_cache:
+          key: v1-python-deps-{{ checksum "poetry.lock" }}
+          paths:
+            - /home/circleci/.cache/pypoetry/virtualenvs
+      - run:
+          name: Run yapf to ensure code format
+          when: always
+          command: poetry run yapf --recursive --diff .
+      - run:
+          name: Run mypy to check types
+          when: always
+          command: poetry run mypy .
+
+  run-pytest:
+    executor: zenith-executor
+    parameters:
+      # pytest args to specify the tests to run.
+      #
+      # This can be a test file name, e.g. 'test_pgbench.py, or a subdirectory,
+      # or '-k foobar' to run tests containing string 'foobar'. See pytest man page
+      # section SPECIFYING TESTS / SELECTING TESTS for details.
+      #
+      # Select the type of Rust build. Must be "release" or "debug".
+      build_type:
+        type: string
+        default: "debug"
+      # This parameter is required, to prevent the mistake of running all tests in one job.
+      test_selection:
+        type: string
+        default: ""
+      # Arbitrary parameters to pytest. For example "-s" to prevent capturing stdout/stderr
+      extra_params:
+        type: string
+        default: ""
+      needs_postgres_source:
+        type: boolean
+        default: false
+      run_in_parallel:
+        type: boolean
+        default: true
+      save_perf_report:
+        type: boolean
+        default: false
+    environment:
+      BUILD_TYPE: << parameters.build_type >>
+    steps:
+      - attach_workspace:
+          at: /tmp/zenith
+      - checkout
+      - when:
+          condition: << parameters.needs_postgres_source >>
+          steps:
+            - run: git submodule update --init --depth 1
+      - restore_cache:
+          keys:
+            - v1-python-deps-{{ checksum "poetry.lock" }}
+      - run:
+          name: Install deps
+          command: ./scripts/pysync
+      - save_cache:
+          key: v1-python-deps-{{ checksum "poetry.lock" }}
+          paths:
+            - /home/circleci/.cache/pypoetry/virtualenvs
+      - run:
+          name: Run pytest
+          # pytest doesn't output test logs in real time, so CI job may fail with
+          # `Too long with no output` error, if a test is running for a long time.
+          # In that case, tests should have internal timeouts that are less than
+          # no_output_timeout, specified here.
+          no_output_timeout: 10m
+          environment:
+            - ZENITH_BIN: /tmp/zenith/bin
+            - POSTGRES_DISTRIB_DIR: /tmp/zenith/pg_install
+            - TEST_OUTPUT: /tmp/test_output
+            # this variable will be embedded in perf test report
+            # and is needed to distinguish different environments
+            - PLATFORM: zenith-local-ci
+          command: |
+            PERF_REPORT_DIR="$(realpath test_runner/perf-report-local)"
+            rm -rf $PERF_REPORT_DIR
+
+            TEST_SELECTION="test_runner/<< parameters.test_selection >>"
+            EXTRA_PARAMS="<< parameters.extra_params >>"
+            if [ -z "$TEST_SELECTION" ]; then
+              echo "test_selection must be set"
+              exit 1
+            fi
+            if << parameters.run_in_parallel >>; then
+              EXTRA_PARAMS="-n4 $EXTRA_PARAMS"
+            fi
+            if << parameters.save_perf_report >>; then
+              if [[ $CIRCLE_BRANCH == "main" ]]; then
+                mkdir -p "$PERF_REPORT_DIR"
+                EXTRA_PARAMS="--out-dir $PERF_REPORT_DIR $EXTRA_PARAMS"
+              fi
+            fi
+
+            export GITHUB_SHA=$CIRCLE_SHA1
+
+            if [[ $BUILD_TYPE == "debug" ]]; then
+              cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
+            elif [[ $BUILD_TYPE == "release" ]]; then
+              cov_prefix=()
+            fi
+
+            # Run the tests.
+            #
+            # The junit.xml file allows CircleCI to display more fine-grained test information
+            # in its "Tests" tab in the results page.
+            # --verbose prints name of each test (helpful when there are
+            # multiple tests in one file)
+            # -rA prints summary in the end
+            # -n4 uses four processes to run tests via pytest-xdist
+            # -s is not used to prevent pytest from capturing output, because tests are running
+            # in parallel and logs are mixed between different tests
+            "${cov_prefix[@]}" ./scripts/pytest \
+              --junitxml=$TEST_OUTPUT/junit.xml \
+              --tb=short \
+              --verbose \
+              -m "not remote_cluster" \
+              -rA $TEST_SELECTION $EXTRA_PARAMS
+
+            if << parameters.save_perf_report >>; then
+              if [[ $CIRCLE_BRANCH == "main" ]]; then
+                export REPORT_FROM="$PERF_REPORT_DIR"
+                export REPORT_TO=local
+                scripts/generate_and_push_perf_report.sh
+              fi
+            fi
+      - run:
+          # CircleCI artifacts are preserved one file at a time, so skipping
+          # this step isn't a good idea. If you want to extract the
+          # pageserver state, perhaps a tarball would be a better idea.
+          name: Delete all data but logs
+          when: always
+          command: |
+            du -sh /tmp/test_output/*
+            find /tmp/test_output -type f ! -name "pg.log" ! -name "pageserver.log" ! -name "safekeeper.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" -delete
+            du -sh /tmp/test_output/*
+      - store_artifacts:
+          path: /tmp/test_output
+      # The store_test_results step tells CircleCI where to find the junit.xml file.
+      - store_test_results:
+          path: /tmp/test_output
+      - run:
+          name: Merge coverage data
+          command: |
+            # This will speed up workspace uploads
+            if [[ $BUILD_TYPE == "debug" ]]; then
+              scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage merge
+            fi
+      # Save coverage data (if any)
+      - persist_to_workspace:
+          root: /tmp/zenith
+          paths:
+            - "*"
+
+  coverage-report:
+    executor: zenith-xlarge-executor
+    steps:
+      - attach_workspace:
+          at: /tmp/zenith
+      - checkout
+      - restore_cache:
+          name: Restore rust cache
+          keys:
+            # Require an exact match. While an out of date cache might speed up the build,
+            # there's no way to clean out old packages, so the cache grows every time something
+            # changes.
+            - v04-rust-cache-deps-debug-{{ checksum "Cargo.lock" }}
+      - run:
+          name: Build coverage report
+          command: |
+            COMMIT_URL=https://github.com/zenithdb/zenith/commit/$CIRCLE_SHA1
+
+            scripts/coverage \
+              --dir=/tmp/zenith/coverage report \
+              --input-objects=/tmp/zenith/etc/binaries.list \
+              --commit-url=$COMMIT_URL \
+              --format=github
+      - run:
+          name: Upload coverage report
+          command: |
+            LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
+            REPORT_URL=https://zenithdb.github.io/zenith-coverage-data/$CIRCLE_SHA1
+            COMMIT_URL=https://github.com/zenithdb/zenith/commit/$CIRCLE_SHA1
+
+            scripts/git-upload \
+              --repo=https://$VIP_VAP_ACCESS_TOKEN@github.com/zenithdb/zenith-coverage-data.git \
+              --message="Add code coverage for $COMMIT_URL" \
+              copy /tmp/zenith/coverage/report $CIRCLE_SHA1 # COPY FROM TO_RELATIVE
+
+            # Add link to the coverage report to the commit
+            curl -f -X POST \
+            https://api.github.com/repos/$LOCAL_REPO/statuses/$CIRCLE_SHA1 \
+            -H "Accept: application/vnd.github.v3+json" \
+            --user "$CI_ACCESS_TOKEN" \
+            --data \
+              "{
+                \"state\": \"success\",
+                \"context\": \"zenith-coverage\",
+                \"description\": \"Coverage report is ready\",
+                \"target_url\": \"$REPORT_URL\"
+              }"
+
+  # Build zenithdb/zenith:latest image and push it to Docker hub
+  docker-image:
+    docker:
+      - image: cimg/base:2021.04
+    steps:
+      - checkout
+      - setup_remote_docker:
+          docker_layer_caching: true
+      - run:
+          name: Init postgres submodule
+          command: git submodule update --init --depth 1
+      - run:
+          name: Build and push Docker image
+          command: |
+            echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
+            DOCKER_TAG=$(git log --oneline|wc -l)
+            docker build --build-arg GIT_VERSION=$CIRCLE_SHA1 -t zenithdb/zenith:latest . && docker push zenithdb/zenith:latest
+            docker tag zenithdb/zenith:latest zenithdb/zenith:${DOCKER_TAG} && docker push zenithdb/zenith:${DOCKER_TAG}
+
+  # Build zenithdb/compute-node:latest image and push it to Docker hub
+  docker-image-compute:
+    docker:
+      - image: cimg/base:2021.04
+    steps:
+      - checkout
+      - setup_remote_docker:
+          docker_layer_caching: true
+      # Build zenithdb/compute-tools:latest image and push it to Docker hub
+      # TODO: this should probably also use versioned tag, not just :latest.
+      # XXX: but should it? We build and use it only locally now.
+      - run:
+          name: Build and push compute-tools Docker image
+          command: |
+            echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
+            docker build -t zenithdb/compute-tools:latest -f Dockerfile.compute-tools .
+            docker push zenithdb/compute-tools:latest
+      - run:
+          name: Init postgres submodule
+          command: git submodule update --init --depth 1
+      - run:
+          name: Build and push compute-node Docker image
+          command: |
+            echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
+            DOCKER_TAG=$(git log --oneline|wc -l)
+            docker build -t zenithdb/compute-node:latest vendor/postgres && docker push zenithdb/compute-node:latest
+            docker tag zenithdb/compute-node:latest zenithdb/compute-node:${DOCKER_TAG} && docker push zenithdb/compute-node:${DOCKER_TAG}
+
+  deploy-staging:
+    docker:
+      - image: cimg/python:3.10
+    steps:
+      - checkout
+      - setup_remote_docker
+      - run:
+          name: Get Zenith binaries
+          command: |
+            rm -rf zenith_install postgres_install.tar.gz zenith_install.tar.gz
+            mkdir zenith_install
+            DOCKER_TAG=$(git log --oneline|wc -l)
+            docker pull --quiet zenithdb/zenith:${DOCKER_TAG}
+            ID=$(docker create zenithdb/zenith:${DOCKER_TAG})
+            docker cp $ID:/data/postgres_install.tar.gz .
+            tar -xzf postgres_install.tar.gz -C zenith_install && rm postgres_install.tar.gz
+            docker cp $ID:/usr/local/bin/pageserver zenith_install/bin/
+            docker cp $ID:/usr/local/bin/safekeeper zenith_install/bin/
+            docker cp $ID:/usr/local/bin/proxy zenith_install/bin/
+            docker cp $ID:/usr/local/bin/postgres zenith_install/bin/
+            docker rm -v $ID
+            echo ${DOCKER_TAG} | tee zenith_install/.zenith_current_version
+            tar -czf zenith_install.tar.gz -C zenith_install .
+            ls -la zenith_install.tar.gz
+      - run:
+          name: Setup ansible
+          command: |
+            pip install --progress-bar off --user ansible boto3
+            ansible-galaxy collection install amazon.aws
+      - run:
+          name: Apply re-deploy playbook
+          environment:
+            ANSIBLE_HOST_KEY_CHECKING: false
+          command: |
+            echo "${STAGING_SSH_KEY}" | base64 --decode | ssh-add -
+            export AWS_REGION=${STAGING_AWS_REGION}
+            export AWS_ACCESS_KEY_ID=${STAGING_AWS_ACCESS_KEY_ID}
+            export AWS_SECRET_ACCESS_KEY=${STAGING_AWS_SECRET_ACCESS_KEY}
+            ansible-playbook .circleci/storage-redeploy.playbook.yml
+            rm -f zenith_install.tar.gz
+
+  deploy-staging-proxy:
+    docker:
+      - image: cimg/base:2021.04
+    environment:
+      KUBECONFIG: .kubeconfig
+    steps:
+      - checkout
+      - run:
+          name: Store kubeconfig file
+          command: |
+            echo "${STAGING_KUBECONFIG_DATA}" | base64 --decode > ${KUBECONFIG}
+            chmod 0600 ${KUBECONFIG}
+      - run:
+          name: Setup helm v3
+          command: |
+            curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
+            helm repo add zenithdb https://zenithdb.github.io/helm-charts
+      - run:
+          name: Re-deploy proxy
+          command: |
+            DOCKER_TAG=$(git log --oneline|wc -l)
+            helm upgrade zenith-proxy zenithdb/zenith-proxy --install -f .circleci/proxy.staging.yaml --set image.tag=${DOCKER_TAG} --wait
+
+  # Trigger a new remote CI job
+  remote-ci-trigger:
+    docker:
+      - image: cimg/base:2021.04
+    parameters:
+      remote_repo:
+        type: string
+    environment:
+      REMOTE_REPO: << parameters.remote_repo >>
+    steps:
+      - run:
+          name: Set PR's status to pending
+          command: |
+            LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
+
+            curl -f -X POST \
+            https://api.github.com/repos/$LOCAL_REPO/statuses/$CIRCLE_SHA1 \
+            -H "Accept: application/vnd.github.v3+json" \
+            --user "$CI_ACCESS_TOKEN" \
+            --data \
+              "{
+                \"state\": \"pending\",
+                \"context\": \"zenith-remote-ci\",
+                \"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
+              }"
+      - run:
+          name: Request a remote CI test
+          command: |
+            LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
+
+            curl -f -X POST \
+            https://api.github.com/repos/$REMOTE_REPO/actions/workflows/testing.yml/dispatches \
+            -H "Accept: application/vnd.github.v3+json" \
+            --user "$CI_ACCESS_TOKEN" \
+            --data \
+              "{
+                \"ref\": \"main\",
+                \"inputs\": {
+                  \"ci_job_name\": \"zenith-remote-ci\",
+                  \"commit_hash\": \"$CIRCLE_SHA1\",
+                  \"remote_repo\": \"$LOCAL_REPO\"
+                }
+              }"
+
+workflows:
+  build_and_test:
+    jobs:
+      - check-codestyle-rust
+      - check-codestyle-python
+      - build-postgres:
+          name: build-postgres-<< matrix.build_type >>
+          matrix:
+            parameters:
+              build_type: ["debug", "release"]
+      - build-zenith:
+          name: build-zenith-<< matrix.build_type >>
+          matrix:
+            parameters:
+              build_type: ["debug", "release"]
+          requires:
+            - build-postgres-<< matrix.build_type >>
+      - run-pytest:
+          name: pg_regress-tests-<< matrix.build_type >>
+          context: PERF_TEST_RESULT_CONNSTR
+          matrix:
+            parameters:
+              build_type: ["debug", "release"]
+          test_selection: batch_pg_regress
+          needs_postgres_source: true
+          requires:
+            - build-zenith-<< matrix.build_type >>
+      - run-pytest:
+          name: other-tests-<< matrix.build_type >>
+          matrix:
+            parameters:
+              build_type: ["debug", "release"]
+          test_selection: batch_others
+          requires:
+            - build-zenith-<< matrix.build_type >>
+      - run-pytest:
+          name: benchmarks
+          context: PERF_TEST_RESULT_CONNSTR
+          build_type: release
+          test_selection: performance
+          run_in_parallel: false
+          save_perf_report: true
+          requires:
+            - build-zenith-release
+      - coverage-report:
+          # Context passes credentials for gh api
+          context: CI_ACCESS_TOKEN
+          requires:
+            # TODO: consider adding more
+            - other-tests-debug
+      - docker-image:
+          # Context gives an ability to login
+          context: Docker Hub
+          # Build image only for commits to main
+          filters:
+            branches:
+              only:
+                - main
+          requires:
+            - pg_regress-tests-release
+            - other-tests-release
+      - docker-image-compute:
+          # Context gives an ability to login
+          context: Docker Hub
+          # Build image only for commits to main
+          filters:
+            branches:
+              only:
+                - main
+          requires:
+            - pg_regress-tests-release
+            - other-tests-release
+      - deploy-staging:
+          # Context gives an ability to login
+          context: Docker Hub
+          # deploy only for commits to main
+          filters:
+            branches:
+              only:
+                - main
+          requires:
+            - docker-image
+      - deploy-staging-proxy:
+          # deploy only for commits to main
+          filters:
+            branches:
+              only:
+                - main
+          requires:
+            - docker-image
+      - remote-ci-trigger:
+          # Context passes credentials for gh api
+          context: CI_ACCESS_TOKEN
+          remote_repo: "zenithdb/console"
+          requires:
+            # XXX: Successful build doesn't mean everything is OK, but
+            # the job to be triggered takes so much time to complete (~22 min)
+            # that it's better not to wait for the commented-out steps
+            - build-zenith-debug
+            # - pg_regress-tests-release
+            # - other-tests-release
--- a/.circleci/proxy.staging.yaml
+++ b/.circleci/proxy.staging.yaml
@@ -0,0 +1,27 @@
+# Helm chart values for zenith-proxy.
+# This is a YAML-formatted file.
+
+settings:
+  authEndpoint: "https://console.stage.zenith.tech/authenticate_proxy_request/"
+  uri: "https://console.stage.zenith.tech/psql_session/"
+
+# -- Additional labels for zenith-proxy pods
+podLabels:
+  zenith_service: proxy
+  zenith_env: staging
+  zenith_region: us-east-1
+  zenith_region_slug: virginia
+
+exposedService:
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
+    external-dns.alpha.kubernetes.io/hostname: start.stage.zenith.tech
+
+metrics:
+  enabled: true
+  serviceMonitor:
+    enabled: true
+    selector:
+      release: kube-prometheus-stack
--- a/.circleci/storage-redeploy.playbook.yml
+++ b/.circleci/storage-redeploy.playbook.yml
@@ -0,0 +1,138 @@
+- name: discover storage nodes
+  hosts: localhost
+  connection: local
+  gather_facts: False
+
+  tasks:
+
+    - name: discover safekeepers
+      no_log: true
+      ec2_instance_info:
+        filters:
+          "tag:zenith_env": "staging"
+          "tag:zenith_service": "safekeeper"
+      register: ec2_safekeepers
+
+    - name: discover pageservers
+      no_log: true
+      ec2_instance_info:
+        filters:
+          "tag:zenith_env": "staging"
+          "tag:zenith_service": "pageserver"
+      register: ec2_pageservers
+
+    - name: add safekeepers to host group
+      no_log: true
+      add_host:
+        name: safekeeper-{{ ansible_loop.index }}
+        ansible_host: "{{ item.public_ip_address }}"
+        groups:
+          - storage
+          - safekeepers
+      with_items: "{{ ec2_safekeepers.instances }}"
+      loop_control:
+        extended: yes
+
+    - name: add pageservers to host group
+      no_log: true
+      add_host:
+        name: pageserver-{{ ansible_loop.index }}
+        ansible_host: "{{ item.public_ip_address }}"
+        groups:
+          - storage
+          - pageservers
+      with_items: "{{ ec2_pageservers.instances }}"
+      loop_control:
+        extended: yes
+
+- name: Retrive versions
+  hosts: storage
+  gather_facts: False
+  remote_user: admin
+
+  tasks:
+
+    - name: Get current version of binaries
+      set_fact:
+        current_version: "{{lookup('file', '../zenith_install/.zenith_current_version') }}"
+
+    - name: Check that file with version exists on host
+      stat:
+        path: /usr/local/.zenith_current_version
+      register: version_file
+
+    - name: Try to get current version from the host
+      when: version_file.stat.exists
+      ansible.builtin.fetch:
+        src: /usr/local/.zenith_current_version
+        dest: .remote_version.{{ inventory_hostname }}
+        fail_on_missing: no
+        flat: yes
+
+    - name: Store remote version to variable
+      when: version_file.stat.exists
+      set_fact:
+        remote_version: "{{ lookup('file', '.remote_version.{{ inventory_hostname }}') }}"
+
+    - name: Store default value of remote version to variable in case when remote version file not found
+      when: not version_file.stat.exists
+      set_fact:
+        remote_version: "000"
+
+- name: Extract Zenith binaries
+  hosts: storage
+  gather_facts: False
+  remote_user: admin
+
+  tasks:
+
+    - name: Inform about version conflict
+      when: current_version <= remote_version
+      debug: msg="Current version {{ current_version }} LE than remote {{ remote_version }}"
+
+    - name: Extract Zenith binaries to /usr/local
+      when: current_version > remote_version
+      ansible.builtin.unarchive:
+        src: ../zenith_install.tar.gz
+        dest: /usr/local
+      become: true
+
+- name: Restart safekeepers
+  hosts: safekeepers
+  gather_facts: False
+  remote_user: admin
+
+  tasks:
+
+    - name: Inform about version conflict
+      when: current_version <= remote_version
+      debug: msg="Current version {{ current_version }} LE than remote {{ remote_version }}"
+
+    - name: Restart systemd service
+      when: current_version > remote_version
+      ansible.builtin.systemd:
+        daemon_reload: yes
+        name: safekeeper
+        enabled: yes
+        state: restarted
+      become: true
+
+- name: Restart pageservers
+  hosts: pageservers
+  gather_facts: False
+  remote_user: admin
+
+  tasks:
+
+    - name: Inform about version conflict
+      when: current_version <= remote_version
+      debug: msg="Current version {{ current_version }} LE than remote {{ remote_version }}"
+
+    - name: Restart systemd service
+      when: current_version > remote_version
+      ansible.builtin.systemd:
+        daemon_reload: yes
+        name: pageserver
+        enabled: yes
+        state: restarted
+      become: true
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,18 @@
+**/.git/
+**/__pycache__
+**/.pytest_cache
+
+.git
+target
+tmp_check
+tmp_install
+tmp_check_cli
+test_output
+.vscode
+.zenith
+integration_tests/.zenith
+.mypy_cache
+
+Dockerfile
+.dockerignore
+
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -0,0 +1,103 @@
+name: benchmarking
+
+on:
+  # uncomment to run on push for debugging your PR
+  # push:
+  #   branches: [ your branch ]
+  schedule:
+    # * is a special character in YAML so you have to quote this string
+    #          ┌───────────── minute (0 - 59)
+    #          │ ┌───────────── hour (0 - 23)
+    #          │ │ ┌───────────── day of the month (1 - 31)
+    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
+    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
+    - cron:  '36 7 * * *' # run once a day, timezone is utc
+
+  workflow_dispatch: # adds ability to run this manually
+
+jobs:
+  bench:
+    # this workflow runs on self hosteed runner
+    # it's environment is quite different from usual guthub runner
+    # probably the most important difference is that it doesnt start from clean workspace each time
+    # e g if you install system packages they are not cleaned up since you install them directly in host machine
+    # not a container or something
+    # See documentation for more info: https://docs.github.com/en/actions/hosting-your-own-runners/about-self-hosted-runners
+    runs-on: [self-hosted, zenith-benchmarker]
+
+    env:
+      PG_BIN: "/usr/pgsql-13/bin"
+
+    steps:
+    - name: Checkout zenith repo
+      uses: actions/checkout@v2
+
+    # actions/setup-python@v2 is not working correctly on self-hosted runners
+    # see https://github.com/actions/setup-python/issues/162
+    # and probably https://github.com/actions/setup-python/issues/162#issuecomment-865387976 in particular
+    # so the simplest solution to me is to use already installed system python and spin virtualenvs for job runs.
+    # there is Python 3.7.10 already installed on the machine so use it to install poetry and then use poetry's virtuealenvs
+    - name: Install poetry & deps
+      run: |
+        python3 -m pip install --upgrade poetry wheel
+        # since pip/poetry caches are reused there shouldn't be any troubles with install every time
+        ./scripts/pysync
+
+    - name: Show versions
+      run: |
+        echo Python
+        python3 --version
+        poetry run python3 --version
+        echo Pipenv
+        poetry --version
+        echo Pgbench
+        $PG_BIN/pgbench --version
+
+    # FIXME cluster setup is skipped due to various changes in console API
+    # for now pre created cluster is used. When API gain some stability
+    # after massive changes dynamic cluster setup will be revived.
+    # So use pre created cluster. It needs to be started manually, but stop is automatic after 5 minutes of inactivity
+    - name: Setup cluster
+      env:
+        BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
+      shell: bash
+      run: |
+        set -e
+
+        echo "Starting cluster"
+        # wake up the cluster
+        $PG_BIN/psql $BENCHMARK_CONNSTR -c "SELECT 1"
+
+    - name: Run benchmark
+      # pgbench is installed system wide from official repo
+      # https://download.postgresql.org/pub/repos/yum/13/redhat/rhel-7-x86_64/
+      # via
+      # sudo tee /etc/yum.repos.d/pgdg.repo<<EOF
+      # [pgdg13]
+      # name=PostgreSQL 13 for RHEL/CentOS 7 - x86_64
+      # baseurl=https://download.postgresql.org/pub/repos/yum/13/redhat/rhel-7-x86_64/
+      # enabled=1
+      # gpgcheck=0
+      # EOF
+      # sudo yum makecache
+      # sudo yum install postgresql13-contrib
+      # actual binaries are located in /usr/pgsql-13/bin/
+      env:
+        TEST_PG_BENCH_TRANSACTIONS_MATRIX: "5000,10000,20000"
+        TEST_PG_BENCH_SCALES_MATRIX: "10,15"
+        PLATFORM: "zenith-staging"
+        BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
+        REMOTE_ENV: "1" # indicate to test harness that we do not have zenith binaries locally
+      run: |
+        # just to be sure that no data was cached on self hosted runner
+        # since it might generate duplicates when calling ingest_perf_test_result.py
+        rm -rf perf-report-staging
+        mkdir -p perf-report-staging
+        ./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-staging
+
+    - name: Submit result
+      env:
+        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
+        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
+      run: |
+        REPORT_FROM=$(realpath perf-report-staging) REPORT_TO=staging scripts/generate_and_push_perf_report.sh
--- a/.github/workflows/notifications.yml
+++ b/.github/workflows/notifications.yml
@@ -0,0 +1,45 @@
+name: Send Notifications
+
+on:
+  push:
+    branches: [ main ]
+
+jobs:
+  send-notifications:
+    timeout-minutes: 30
+    name: send commit notifications
+    runs-on: ubuntu-latest
+
+    steps:
+
+      - name: Checkout
+        uses: actions/checkout@v2
+        with:
+          submodules: true
+          fetch-depth: 2
+
+      - name: Form variables for notification message
+        id: git_info_grab
+        run: |
+          git_stat=$(git show --stat=50)
+          git_stat="${git_stat//'%'/'%25'}"
+          git_stat="${git_stat//$'\n'/'%0A'}"
+          git_stat="${git_stat//$'\r'/'%0D'}"
+          git_stat="${git_stat// / }" # space -> 'Space En', as github tends to eat ordinary spaces
+          echo "::set-output name=git_stat::$git_stat"
+          echo "::set-output name=sha_short::$(git rev-parse --short HEAD)"
+          echo "##[set-output name=git_branch;]$(echo ${GITHUB_REF#refs/heads/})"
+
+      - name: Send notification
+        uses: appleboy/telegram-action@master
+        with:
+          to: ${{ secrets.TELEGRAM_TO }}
+          token: ${{ secrets.TELEGRAM_TOKEN }}
+          format: markdown
+          args: |
+            *@${{ github.actor }} pushed to* [${{ github.repository }}:${{steps.git_info_grab.outputs.git_branch}}](github.com/${{ github.repository }}/commit/${{steps.git_info_grab.outputs.sha_short }})
+
+            ```
+            ${{ steps.git_info_grab.outputs.git_stat }}
+            ```
+
--- a/.github/workflows/testing.yml
+++ b/.github/workflows/testing.yml
@@ -1,50 +1,41 @@
-name: regression check
+name: Build and Test

-on: [push]
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]

 jobs:
  regression-check:
-    timeout-minutes: 10
+    strategy:
+      matrix:
+        # If we want to duplicate this job for different
+        # Rust toolchains (e.g. nightly or 1.37.0), add them here.
+        rust_toolchain: [stable]
+        os: [ubuntu-latest]
+    timeout-minutes: 30
    name: run regression test suite
-    runs-on: ubuntu-latest
+    runs-on: ${{ matrix.os }}

    steps:
-
      - name: Checkout
        uses: actions/checkout@v2
        with:
          submodules: true
          fetch-depth: 2

-      - name: Form variables for notification message
-        id: git_info_grab
-        run: |
-          git_stat=$(git show --stat=50)
-          git_stat="${git_stat//'%'/'%25'}"
-          git_stat="${git_stat//$'\n'/'%0A'}"
-          git_stat="${git_stat//$'\r'/'%0D'}"
-          git_stat="${git_stat// / }" # space -> 'Space En', as github tends to eat ordinary spaces
-          echo "::set-output name=git_stat::$git_stat"
-          echo "::set-output name=sha_short::$(git rev-parse --short HEAD)"
-          echo "##[set-output name=git_branch;]$(echo ${GITHUB_REF#refs/heads/})"
-
-      - name: Send notification
-        uses: appleboy/telegram-action@master
+      - name: install rust toolchain ${{ matrix.rust_toolchain }}
+        uses: actions-rs/toolchain@v1
        with:
-          to: ${{ secrets.TELEGRAM_TO }}
-          token: ${{ secrets.TELEGRAM_TOKEN }}
-          format: markdown
-          args: |
-            *@${{ github.actor }} pushed to* [${{ github.repository }}:${{steps.git_info_grab.outputs.git_branch}}](github.com/${{ github.repository }}/commit/${{steps.git_info_grab.outputs.sha_short }})
-
-            ```
-            ${{ steps.git_info_grab.outputs.git_stat }}
-            ```
+          profile: minimal
+          toolchain: ${{ matrix.rust_toolchain }}
+          override: true

      - name: Install postgres dependencies
        run: |
          sudo apt update
-          sudo apt install build-essential libreadline-dev zlib1g-dev flex bison libxml2-dev libcurl4-openssl-dev
+          sudo apt install build-essential libreadline-dev zlib1g-dev flex bison libseccomp-dev

      - name: Set pg revision for caching
        id: pg_ver
@@ -61,11 +52,7 @@ jobs:
      - name: Build postgres
        if: steps.cache_pg.outputs.cache-hit != 'true'
        run: |
-          ./pgbuild.sh
-
-      - name: Install rust
-        run: |
-          sudo apt install -y cargo
+          make postgres

      - name: Cache cargo deps
        id: cache_cargo
@@ -77,10 +64,11 @@ jobs:
            target
          key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}

-      - name: Build
+      # Use `env CARGO_INCREMENTAL=0` to mitigate https://github.com/rust-lang/rust/issues/91696 for rustc 1.57.0
+      - name: Run cargo build
        run: |
-          cargo build
+          env CARGO_INCREMENTAL=0 cargo build --workspace --bins --examples --tests

-      - name: Run test
+      - name: Run cargo test
        run: |
-          cargo test --test test_pageserver -- --nocapture --test-threads=1
+          env CARGO_INCREMENTAL=0 cargo test -- --nocapture --test-threads=1
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,12 @@
 /tmp_check
 /tmp_install
 /tmp_check_cli
+__pycache__/
+test_output/
 .vscode
+/.zenith
+/integration_tests/.zenith
+
+# Coverage
+*.profraw
+*.profdata
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,4 +1,4 @@
 [submodule "vendor/postgres"]
 	path = vendor/postgres
-	url = https://github.com/libzenith/postgres
+	url = https://github.com/zenithdb/postgres
 	branch = main
--- a/.yapfignore
+++ b/.yapfignore
@@ -0,0 +1,10 @@
+# This file is only read when `yapf` is run from this directory.
+# Hence we only top-level directories here to avoid confusion.
+# See source code for the exact file format: https://github.com/google/yapf/blob/c6077954245bc3add82dafd853a1c7305a6ebd20/yapf/yapflib/file_resources.py#L40-L43
+vendor/
+target/
+tmp_install/
+__pycache__/
+test_output/
+.zenith/
+.git/
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -0,0 +1,31 @@
+# How to contribute
+
+Howdy! Usual good software engineering practices apply. Write
+tests. Write comments. Follow standard Rust coding practices where
+possible. Use 'cargo fmt' and 'clippy' to tidy up formatting.
+
+There are soft spots in the code, which could use cleanup,
+refactoring, additional comments, and so forth. Let's try to raise the
+bar, and clean things up as we go. Try to leave code in a better shape
+than it was before.
+
+## Submitting changes
+
+1. Make a PR for every change.
+
+   Even seemingly trivial patches can break things in surprising ways.
+Use of common sense is OK. If you're only fixing a typo in a comment,
+it's probably fine to just push it. But if in doubt, open a PR.
+
+2. Get at least one +1 on your PR before you push.
+
+   For simple patches, it will only take a minute for someone to review
+it.
+
+3. Always keep the CI green.
+
+   Do not push, if the CI failed on your PR. Even if you think it's not
+your patch's fault. Help to fix the root cause if something else has
+broken the CI, before pushing.
+
+*Happy Hacking!*
--- a/20
+++ b/20
@@ -0,0 +1,20 @@
+This software is licensed under the Apache 2.0 License:
+
+----------------------------------------------------------------------------
+Copyright 2021 Zenith Labs, Inc
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+----------------------------------------------------------------------------
+
+The PostgreSQL submodule in vendor/postgres is licensed under the
+PostgreSQL license. See vendor/postgres/COPYRIGHT.
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,10 +1,23 @@
 [workspace]
 members = [
-    "integration_tests",
-    "pageserver",
-    "walkeeper",
-    "zenith",
+    "compute_tools",
    "control_plane",
+    "pageserver",
    "postgres_ffi",
+    "proxy",
+    "walkeeper",
+    "workspace_hack",
+    "zenith",
+    "zenith_metrics",
    "zenith_utils",
 ]
+
+[profile.release]
+# This is useful for profiling and, to some extent, debug.
+# Besides, debug info should not affect the performance.
+debug = true
+
+# This is only needed for proxy's tests
+# TODO: we should probably fork tokio-postgres-rustls instead
+[patch.crates-io]
+tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
--- a/64
+++ b/64
@@ -0,0 +1,64 @@
+#
+# Docker image for console integration testing.
+#
+
+#
+# Build Postgres separately --- this layer will be rebuilt only if one of
+# mentioned paths will get any changes.
+#
+FROM zenithdb/build:buster AS pg-build
+WORKDIR /zenith
+COPY ./vendor/postgres vendor/postgres
+COPY ./Makefile Makefile
+ENV BUILD_TYPE release
+RUN make -j $(getconf _NPROCESSORS_ONLN) -s postgres
+RUN rm -rf postgres_install/build
+
+#
+# Build zenith binaries
+#
+# TODO: build cargo deps as separate layer. We used cargo-chef before but that was
+# net time waste in a lot of cases. Copying Cargo.lock with empty lib.rs should do the work.
+#
+FROM zenithdb/build:buster AS build
+
+ARG GIT_VERSION
+RUN if [ -z "$GIT_VERSION" ]; then echo "GIT_VERSION is reqired, use build_arg to pass it"; exit 1; fi
+
+WORKDIR /zenith
+COPY --from=pg-build /zenith/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
+
+COPY . .
+RUN GIT_VERSION=$GIT_VERSION cargo build --release
+
+#
+# Copy binaries to resulting image.
+#
+FROM debian:buster-slim
+WORKDIR /data
+
+RUN apt-get update && apt-get -yq install libreadline-dev libseccomp-dev openssl ca-certificates && \
+    mkdir zenith_install
+
+COPY --from=build /zenith/target/release/pageserver /usr/local/bin
+COPY --from=build /zenith/target/release/safekeeper /usr/local/bin
+COPY --from=build /zenith/target/release/proxy /usr/local/bin
+COPY --from=pg-build /zenith/tmp_install postgres_install
+COPY docker-entrypoint.sh /docker-entrypoint.sh
+
+# Remove build artifacts (~ 500 MB)
+RUN rm -rf postgres_install/build && \
+    # 'Install' Postgres binaries locally
+    cp -r postgres_install/* /usr/local/ && \
+    # Prepare an archive of Postgres binaries (should be around 11 MB)
+    # and keep it inside container for an ease of deploy pipeline.
+    cd postgres_install && tar -czf /data/postgres_install.tar.gz . && cd .. && \
+    rm -rf postgres_install
+
+RUN useradd -d /data zenith && chown -R zenith:zenith /data
+
+VOLUME ["/data"]
+USER zenith
+EXPOSE 6400
+ENTRYPOINT ["/docker-entrypoint.sh"]
+CMD ["pageserver"]
--- a/Dockerfile.alpine
+++ b/Dockerfile.alpine
@@ -0,0 +1,95 @@
+#
+# Docker image for console integration testing.
+#
+# We may also reuse it in CI to unify installation process and as a general binaries building
+# tool for production servers.
+#
+# Dynamic linking is used for librocksdb and libstdc++ bacause librocksdb-sys calls
+# bindgen with "dynamic" feature flag. This also prevents usage of dockerhub alpine-rust
+# images which are statically linked and have guards against any dlopen. I would rather
+# prefer all static binaries so we may change the way librocksdb-sys builds or wait until
+# we will have our own storage and drop rockdb dependency.
+#
+# Cargo-chef is used to separate dependencies building from main binaries building. This
+# way `docker build` will download and install dependencies only of there are changes to
+# out Cargo.toml files.
+#
+
+
+#
+# build postgres separately -- this layer will be rebuilt only if one of
+# mentioned paths will get any changes
+#
+FROM alpine:3.13 as pg-build
+RUN apk add --update clang llvm compiler-rt compiler-rt-static lld musl-dev binutils \
+                     make bison flex readline-dev zlib-dev perl linux-headers libseccomp-dev
+WORKDIR zenith
+COPY ./vendor/postgres vendor/postgres
+COPY ./Makefile Makefile
+# Build using clang and lld
+RUN CC='clang' LD='lld' CFLAGS='-fuse-ld=lld --rtlib=compiler-rt' make postgres -j4
+
+#
+# Calculate cargo dependencies.
+# This will always run, but only generate recipe.json with list of dependencies without
+# installing them.
+#
+FROM alpine:20210212 as cargo-deps-inspect
+RUN apk add --update rust cargo
+RUN cargo install cargo-chef
+WORKDIR zenith
+COPY . .
+RUN cargo chef prepare --recipe-path recipe.json
+
+#
+# Build cargo dependencies.
+# This temp cantainner would be build only if recipe.json was changed.
+#
+FROM alpine:20210212 as deps-build
+RUN apk add --update rust cargo openssl-dev clang build-base
+# rust-rocksdb can be built against system-wide rocksdb -- that saves about
+# 10 minutes during build. Rocksdb apk package is in testing now, but use it
+# anyway. In case of any troubles we can download and build rocksdb here manually
+# (to cache it as a docker layer).
+RUN apk --no-cache --update --repository https://dl-cdn.alpinelinux.org/alpine/edge/testing add rocksdb-dev
+WORKDIR zenith
+COPY --from=pg-build /zenith/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
+COPY --from=cargo-deps-inspect /root/.cargo/bin/cargo-chef /root/.cargo/bin/
+COPY --from=cargo-deps-inspect /zenith/recipe.json recipe.json
+RUN ROCKSDB_LIB_DIR=/usr/lib/ cargo chef cook --release --recipe-path recipe.json
+
+#
+# Build zenith binaries
+#
+FROM alpine:20210212 as build
+RUN apk add --update rust cargo openssl-dev clang build-base
+RUN apk --no-cache --update --repository https://dl-cdn.alpinelinux.org/alpine/edge/testing add rocksdb-dev
+WORKDIR zenith
+COPY . .
+# Copy cached dependencies
+COPY --from=pg-build /zenith/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
+COPY --from=deps-build /zenith/target target
+COPY --from=deps-build /root/.cargo /root/.cargo
+RUN cargo build --release
+
+#
+# Copy binaries to resulting image.
+# build-base hare to provide libstdc++ (it will also bring gcc, but leave it this way until we figure
+# out how to statically link rocksdb or avoid it at all).
+#
+FROM alpine:3.13
+RUN apk add --update openssl build-base libseccomp-dev
+RUN apk --no-cache --update --repository https://dl-cdn.alpinelinux.org/alpine/edge/testing add rocksdb
+COPY --from=build /zenith/target/release/pageserver /usr/local/bin
+COPY --from=build /zenith/target/release/safekeeper /usr/local/bin
+COPY --from=build /zenith/target/release/proxy /usr/local/bin
+COPY --from=pg-build /zenith/tmp_install /usr/local
+COPY docker-entrypoint.sh /docker-entrypoint.sh
+
+RUN addgroup zenith && adduser -h /data -D -G zenith zenith
+VOLUME ["/data"]
+WORKDIR /data
+USER zenith
+EXPOSE 6400
+ENTRYPOINT ["/docker-entrypoint.sh"]
+CMD ["pageserver"]
--- a/Dockerfile.build
+++ b/Dockerfile.build
@@ -0,0 +1,16 @@
+#
+# Image with all the required dependencies to build https://github.com/zenithdb/zenith
+# and Postgres from https://github.com/zenithdb/postgres
+# Also includes some rust development and build tools.
+# NB: keep in sync with rust image version in .circle/config.yml
+#
+FROM rust:1.56.1-slim-buster
+WORKDIR /zenith
+
+# Install postgres and zenith build dependencies
+# clang is for rocksdb
+RUN apt-get update && apt-get -yq install automake libtool build-essential bison flex libreadline-dev zlib1g-dev libxml2-dev \
+                                          libseccomp-dev pkg-config libssl-dev clang
+
+# Install rust tools
+RUN rustup component add clippy && cargo install cargo-audit
--- a/Dockerfile.compute-tools
+++ b/Dockerfile.compute-tools
@@ -0,0 +1,14 @@
+# First transient image to build compute_tools binaries
+# NB: keep in sync with rust image version in .circle/config.yml
+FROM rust:1.56.1-slim-buster AS rust-build
+
+WORKDIR /zenith
+
+COPY . .
+
+RUN cargo build -p compute_tools --release
+
+# Final image that only has one binary
+FROM debian:buster-slim
+
+COPY --from=rust-build /zenith/target/release/zenith_ctl /usr/local/bin/zenith_ctl
--- a/202
+++ b/202
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/104
+++ b/104
@@ -0,0 +1,104 @@
+# Seccomp BPF is only available for Linux
+UNAME_S := $(shell uname -s)
+ifeq ($(UNAME_S),Linux)
+	SECCOMP = --with-libseccomp
+else
+	SECCOMP =
+endif
+
+#
+# We differentiate between release / debug build types using the BUILD_TYPE
+# environment variable.
+#
+BUILD_TYPE ?= debug
+ifeq ($(BUILD_TYPE),release)
+	PG_CONFIGURE_OPTS = --enable-debug
+	PG_CFLAGS = -O2 -g3 $(CFLAGS)
+	# Unfortunately, `--profile=...` is a nightly feature
+	CARGO_BUILD_FLAGS += --release
+else ifeq ($(BUILD_TYPE),debug)
+	PG_CONFIGURE_OPTS = --enable-debug --enable-cassert --enable-depend
+	PG_CFLAGS = -O0 -g3 $(CFLAGS)
+else
+$(error Bad build type `$(BUILD_TYPE)', see Makefile for options)
+endif
+
+# Choose whether we should be silent or verbose
+CARGO_BUILD_FLAGS += --$(if $(filter s,$(MAKEFLAGS)),quiet,verbose)
+# Fix for a corner case when make doesn't pass a jobserver
+CARGO_BUILD_FLAGS += $(filter -j1,$(MAKEFLAGS))
+
+# This option has a side effect of passing make jobserver to cargo.
+# However, we shouldn't do this if `make -n` (--dry-run) has been asked.
+CARGO_CMD_PREFIX += $(if $(filter n,$(MAKEFLAGS)),,+)
+# Force cargo not to print progress bar
+CARGO_CMD_PREFIX += CARGO_TERM_PROGRESS_WHEN=never CI=1
+
+#
+# Top level Makefile to build Zenith and PostgreSQL
+#
+.PHONY: all
+all: zenith postgres
+
+### Zenith Rust bits
+#
+# The 'postgres_ffi' depends on the Postgres headers.
+.PHONY: zenith
+zenith: postgres-headers
+	+@echo "Compiling Zenith"
+	$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS)
+
+### PostgreSQL parts
+tmp_install/build/config.status:
+	+@echo "Configuring postgres build"
+	mkdir -p tmp_install/build
+	(cd tmp_install/build && \
+	../../vendor/postgres/configure CFLAGS='$(PG_CFLAGS)' \
+		$(PG_CONFIGURE_OPTS) \
+		$(SECCOMP) \
+		--prefix=$(abspath tmp_install) > configure.log)
+
+# nicer alias for running 'configure'
+.PHONY: postgres-configure
+postgres-configure: tmp_install/build/config.status
+
+# Install the PostgreSQL header files into tmp_install/include
+.PHONY: postgres-headers
+postgres-headers: postgres-configure
+	+@echo "Installing PostgreSQL headers"
+	$(MAKE) -C tmp_install/build/src/include MAKELEVEL=0 install
+
+# Compile and install PostgreSQL and contrib/zenith
+.PHONY: postgres
+postgres: postgres-configure \
+		  postgres-headers # to prevent `make install` conflicts with zenith's `postgres-headers`
+	+@echo "Compiling PostgreSQL"
+	$(MAKE) -C tmp_install/build MAKELEVEL=0 install
+	+@echo "Compiling contrib/zenith"
+	$(MAKE) -C tmp_install/build/contrib/zenith install
+	+@echo "Compiling contrib/zenith_test_utils"
+	$(MAKE) -C tmp_install/build/contrib/zenith_test_utils install
+
+.PHONY: postgres-clean
+postgres-clean:
+	$(MAKE) -C tmp_install/build MAKELEVEL=0 clean
+
+# This doesn't remove the effects of 'configure'.
+.PHONY: clean
+clean:
+	cd tmp_install/build && $(MAKE) clean
+	$(CARGO_CMD_PREFIX) cargo clean
+
+# This removes everything
+.PHONY: distclean
+distclean:
+	rm -rf tmp_install
+	$(CARGO_CMD_PREFIX) cargo clean
+
+.PHONY: fmt
+fmt:
+	./pre-commit.py --fix-inplace
+
+.PHONY: setup-pre-commit-hook
+setup-pre-commit-hook:
+	ln -s -f ../../pre-commit.py .git/hooks/pre-commit
--- a/README.md
+++ b/README.md
@@ -1,92 +1,157 @@
 # Zenith

-Zenith substitutes PostgreSQL storage layer and redistributes data across a cluster of nodes
+Zenith is a serverless open source alternative to AWS Aurora Postgres. It separates storage and compute and substitutes PostgreSQL storage layer by redistributing data across a cluster of nodes.
+
+## Architecture overview
+
+A Zenith installation consists of compute nodes and Zenith storage engine.
+
+Compute nodes are stateless PostgreSQL nodes, backed by Zenith storage engine.
+
+Zenith storage engine consists of two major components:
+- Pageserver. Scalable storage backend for compute nodes.
+- WAL service. The service that receives WAL from compute node and ensures that it is stored durably.
+
+Pageserver consists of:
+- Repository - Zenith storage implementation.
+- WAL receiver - service that receives WAL from WAL service and stores it in the repository.
+- Page service - service that communicates with compute nodes and responds with pages from the repository.
+- WAL redo - service that builds pages from base images and WAL records on Page service request.

 ## Running local installation

-1. Build zenith and patched postgres
+1. Install build dependencies and other useful packages
+
+On Ubuntu or Debian this set of packages should be sufficient to build the code:
+```text
+apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libseccomp-dev \
+libssl-dev clang pkg-config libpq-dev
+```
+
+[Rust] 1.56.1 or later is also required.
+
+To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `tmp_install/bin` and `tmp_install/lib`, respectively.
+
+To run the integration tests or Python scripts (not required to use the code), install
+Python (3.7 or higher), and install python3 packages using `./scripts/pysync` (requires poetry) in the project directory.
+
+2. Build zenith and patched postgres
 ```sh
-git clone --recursive https://github.com/libzenith/zenith.git
+git clone --recursive https://github.com/zenithdb/zenith.git
 cd zenith
-./pgbuild.sh # builds postgres and installs it to ./tmp_install
-cargo build
+make -j5
 ```

-2. Start pageserver and postggres on top of it (should be called from repo root):
+3. Start pageserver and postgres on top of it (should be called from repo root):
 ```sh
-# Create ~/.zenith with proper paths to binaries and data
+# Create repository in .zenith with proper paths to binaries and data
 # Later that would be responsibility of a package install script
->./target/debug/zenith init
+> ./target/debug/zenith init
+initializing tenantid c03ba6b7ad4c5e9cf556f059ade44229
+created initial timeline 5b014a9e41b4b63ce1a1febc04503636 timeline.lsn 0/169C3C8
+created main branch
+pageserver init succeeded

-# start pageserver
-> ./target/debug/zenith pageserver start
-Starting pageserver at '127.0.0.1:64000'
+# start pageserver and safekeeper
+> ./target/debug/zenith start
+Starting pageserver at 'localhost:64000' in '.zenith'
+Pageserver started
+initializing for single for 7676
+Starting safekeeper at 'localhost:5454' in '.zenith/safekeepers/single'
+Safekeeper started

-# create and configure postgres data dir
-> ./target/debug/zenith pg create
-Creating new postgres: path=/Users/user/code/zenith/tmp_check_cli/compute/pg1 port=55432
-Database initialized
+# start postgres compute node
+> ./target/debug/zenith pg start main
+Starting new postgres main on main...
+Extracting base backup to create postgres instance: path=.zenith/pgdatadirs/tenants/c03ba6b7ad4c5e9cf556f059ade44229/main port=55432
+Starting postgres node at 'host=127.0.0.1 port=55432 user=zenith_admin dbname=postgres'
+waiting for server to start.... done
+server started

-# start it
-> ./target/debug/zenith pg start pg1
-
-# look up status and connection info
-> ./target/debug/zenith pg list     
-NODE		ADDRESS				STATUS
-pg1			127.0.0.1:55432		running
+# check list of running postgres instances
+> ./target/debug/zenith pg list
+BRANCH	ADDRESS		LSN		STATUS
+main	127.0.0.1:55432	0/1609610	running
 ```

-3. Now it is possible to connect to postgres and run some queries:
-```
-> psql -p55432 -h 127.0.0.1 postgres
+4. Now it is possible to connect to postgres and run some queries:
+```text
+> psql -p55432 -h 127.0.0.1 -U zenith_admin postgres
 postgres=# CREATE TABLE t(key int primary key, value text);
 CREATE TABLE
 postgres=# insert into t values(1,1);
 INSERT 0 1
 postgres=# select * from t;
- key | value 
+ key | value
 -----+-------
   1 | 1
 (1 row)
 ```

+5. And create branches and run postgres on them:
+```sh
+# create branch named migration_check
+> ./target/debug/zenith branch migration_check main
+Created branch 'migration_check' at 0/1609610
+
+# check branches tree
+> ./target/debug/zenith branch
+ main
+ ┗━ @0/1609610: migration_check
+
+# start postgres on that branch
+> ./target/debug/zenith pg start migration_check
+Starting postgres node at 'host=127.0.0.1 port=55433 user=stas'
+waiting for server to start.... done
+
+# this new postgres instance will have all the data from 'main' postgres,
+# but all modifications would not affect data in original postgres
+> psql -p55433 -h 127.0.0.1 -U zenith_admin postgres
+postgres=# select * from t;
+ key | value
+-----+-------
+   1 | 1
+(1 row)
+
+postgres=# insert into t values(2,2);
+INSERT 0 1
+```
+
+6. If you want to run tests afterwards (see below), you have to stop all the running the pageserver, safekeeper and postgres instances
+   you have just started. You can stop them all with one command:
+```sh
+> ./target/debug/zenith stop
+```
+
 ## Running tests

 ```sh
-git clone --recursive https://github.com/libzenith/zenith.git
-./pgbuild.sh # builds postgres and installs it to ./tmp_install
-cargo test -- --test-threads=1
+git clone --recursive https://github.com/zenithdb/zenith.git
+make # builds also postgres and installs it to ./tmp_install
+./scripts/pytest
 ```

-## Source tree layout
+## Documentation

-/walkeeper:
+Now we use README files to cover design ideas and overall architecture for each module and `rustdoc` style documentation comments. See also [/docs/](/docs/) a top-level overview of all available markdown documentation.

-WAL safekeeper. Written in Rust.
+- [/docs/sourcetree.md](/docs/sourcetree.md) contains overview of source tree layout.

-/pageserver:
+To view your `rustdoc` documentation in a browser, try running `cargo doc --no-deps --open`

-Page Server. Written in Rust.
+### Postgres-specific terms

-Depends on the modified 'postgres' binary for WAL redo.
+Due to Zenith's very close relation with PostgreSQL internals, there are numerous specific terms used.
+Same applies to certain spelling: i.e. we use MB to denote 1024 * 1024 bytes, while MiB would be technically more correct, it's inconsistent with what PostgreSQL code and its documentation use.

-/integration_tests:
-
-Tests with different combinations of a Postgres compute node, WAL safekeeper and Page Server.
-
-/mgmt-console:
-
-Web UI to launch (modified) Postgres servers, using S3 as the backing store. Written in Python.
-This is somewhat outdated, as it doesn't use the WAL safekeeper or Page Servers.
-
-/vendor/postgres:
-
-PostgreSQL source tree, with the modifications needed for Zenith.
-
-/vendor/postgres/src/bin/safekeeper:
-
-Extension (safekeeper_proxy) that runs in the compute node, and connects to the WAL safekeepers
-and streams the WAL
+To get more familiar with this aspect, refer to:

+- [Zenith glossary](/docs/glossary.md)
+- [PostgreSQL glossary](https://www.postgresql.org/docs/13/glossary.html)
+- Other PostgreSQL documentation and sources (Zenith fork sources can be found [here](https://github.com/zenithdb/postgres))

+## Join the development

+- Read `CONTRIBUTING.md` to learn about project code style and practices.
+- To get familiar with a source tree layout, use [/docs/sourcetree.md](/docs/sourcetree.md).
+- To learn more about PostgreSQL internals, check http://www.interdb.jp/pg/index.html
--- a/compute_tools/.dockerignore
+++ b/compute_tools/.dockerignore
@@ -0,0 +1 @@
+target
--- a/compute_tools/.gitignore
+++ b/compute_tools/.gitignore
@@ -0,0 +1 @@
+target
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -0,0 +1,19 @@
+[package]
+name = "compute_tools"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+libc = "0.2"
+anyhow = "1.0"
+chrono = "0.4"
+clap = "3.0"
+env_logger = "0.9"
+hyper = { version = "0.14", features = ["full"] }
+log = { version = "0.4", features = ["std", "serde"] }
+postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
+regex = "1"
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1"
+tar = "0.4"
+tokio = { version = "1", features = ["macros", "rt", "rt-multi-thread"] }
--- a/compute_tools/README.md
+++ b/compute_tools/README.md
@@ -0,0 +1,81 @@
+# Compute node tools
+
+Postgres wrapper (`zenith_ctl`) is intended to be run as a Docker entrypoint or as a `systemd`
+`ExecStart` option. It will handle all the `zenith` specifics during compute node
+initialization:
+- `zenith_ctl` accepts cluster (compute node) specification as a JSON file.
+- Every start is a fresh start, so the data directory is removed and
+  initialized again on each run.
+- Next it will put configuration files into the `PGDATA` directory.
+- Sync safekeepers and get commit LSN.
+- Get `basebackup` from pageserver using the returned on the previous step LSN.
+- Try to start `postgres` and wait until it is ready to accept connections.
+- Check and alter/drop/create roles and databases.
+- Hang waiting on the `postmaster` process to exit.
+
+Also `zenith_ctl` spawns two separate service threads:
+- `compute-monitor` checks the last Postgres activity timestamp and saves it
+  into the shared `ComputeState`;
+- `http-endpoint` runs a Hyper HTTP API server, which serves readiness and the
+  last activity requests.
+
+Usage example:
+```sh
+zenith_ctl -D /var/db/postgres/compute \
+           -C 'postgresql://zenith_admin@localhost/postgres' \
+           -S /var/db/postgres/specs/current.json \
+           -b /usr/local/bin/postgres
+```
+
+## Tests
+
+Cargo formatter:
+```sh
+cargo fmt
+```
+
+Run tests:
+```sh
+cargo test
+```
+
+Clippy linter:
+```sh
+cargo clippy --all --all-targets -- -Dwarnings -Drust-2018-idioms
+```
+
+## Cross-platform compilation
+
+Imaging that you are on macOS (x86) and you want a Linux GNU (`x86_64-unknown-linux-gnu` platform in `rust` terminology) executable.
+
+### Using docker
+
+You can use a throw-away Docker container ([rustlang/rust](https://hub.docker.com/r/rustlang/rust/) image) for doing that:
+```sh
+docker run --rm \
+    -v $(pwd):/compute_tools \
+    -w /compute_tools \
+    -t rustlang/rust:nightly cargo build --release --target=x86_64-unknown-linux-gnu
+```
+or one-line:
+```sh
+docker run --rm -v $(pwd):/compute_tools -w /compute_tools -t rust:latest cargo build --release --target=x86_64-unknown-linux-gnu
+```
+
+### Using rust native cross-compilation
+
+Another way is to add `x86_64-unknown-linux-gnu` target on your host system:
+```sh
+rustup target add x86_64-unknown-linux-gnu
+```
+
+Install macOS cross-compiler toolchain:
+```sh
+brew tap SergioBenitez/osxct
+brew install x86_64-unknown-linux-gnu
+```
+
+And finally run `cargo build`:
+```sh
+CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER=x86_64-unknown-linux-gnu-gcc cargo build --target=x86_64-unknown-linux-gnu --release
+```
--- a/compute_tools/rustfmt.toml
+++ b/compute_tools/rustfmt.toml
@@ -0,0 +1 @@
+max_width = 100
--- a/compute_tools/src/bin/zenith_ctl.rs
+++ b/compute_tools/src/bin/zenith_ctl.rs
@@ -0,0 +1,249 @@
+//!
+//! Postgres wrapper (`zenith_ctl`) is intended to be run as a Docker entrypoint or as a `systemd`
+//! `ExecStart` option. It will handle all the `zenith` specifics during compute node
+//! initialization:
+//! - `zenith_ctl` accepts cluster (compute node) specification as a JSON file.
+//! - Every start is a fresh start, so the data directory is removed and
+//!   initialized again on each run.
+//! - Next it will put configuration files into the `PGDATA` directory.
+//! - Sync safekeepers and get commit LSN.
+//! - Get `basebackup` from pageserver using the returned on the previous step LSN.
+//! - Try to start `postgres` and wait until it is ready to accept connections.
+//! - Check and alter/drop/create roles and databases.
+//! - Hang waiting on the `postmaster` process to exit.
+//!
+//! Also `zenith_ctl` spawns two separate service threads:
+//! - `compute-monitor` checks the last Postgres activity timestamp and saves it
+//!   into the shared `ComputeState`;
+//! - `http-endpoint` runs a Hyper HTTP API server, which serves readiness and the
+//!   last activity requests.
+//!
+//! Usage example:
+//! ```sh
+//! zenith_ctl -D /var/db/postgres/compute \
+//!            -C 'postgresql://zenith_admin@localhost/postgres' \
+//!            -S /var/db/postgres/specs/current.json \
+//!            -b /usr/local/bin/postgres
+//! ```
+//!
+use std::fs::File;
+use std::panic;
+use std::path::Path;
+use std::process::{exit, Command, ExitStatus};
+use std::sync::{Arc, RwLock};
+
+use anyhow::{Context, Result};
+use chrono::Utc;
+use clap::Arg;
+use log::info;
+use postgres::{Client, NoTls};
+
+use compute_tools::config;
+use compute_tools::http_api::launch_http_server;
+use compute_tools::logger::*;
+use compute_tools::monitor::launch_monitor;
+use compute_tools::params::*;
+use compute_tools::pg_helpers::*;
+use compute_tools::spec::*;
+use compute_tools::zenith::*;
+
+/// Do all the preparations like PGDATA directory creation, configuration,
+/// safekeepers sync, basebackup, etc.
+fn prepare_pgdata(state: &Arc<RwLock<ComputeState>>) -> Result<()> {
+    let state = state.read().unwrap();
+    let spec = &state.spec;
+    let pgdata_path = Path::new(&state.pgdata);
+    let pageserver_connstr = spec
+        .cluster
+        .settings
+        .find("zenith.page_server_connstring")
+        .expect("pageserver connstr should be provided");
+    let tenant = spec
+        .cluster
+        .settings
+        .find("zenith.zenith_tenant")
+        .expect("tenant id should be provided");
+    let timeline = spec
+        .cluster
+        .settings
+        .find("zenith.zenith_timeline")
+        .expect("tenant id should be provided");
+
+    info!(
+        "starting cluster #{}, operation #{}",
+        spec.cluster.cluster_id,
+        spec.operation_uuid.as_ref().unwrap()
+    );
+
+    // Remove/create an empty pgdata directory and put configuration there.
+    create_pgdata(&state.pgdata)?;
+    config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec)?;
+
+    info!("starting safekeepers syncing");
+    let lsn = sync_safekeepers(&state.pgdata, &state.pgbin)
+        .with_context(|| "failed to sync safekeepers")?;
+    info!("safekeepers synced at LSN {}", lsn);
+
+    info!(
+        "getting basebackup@{} from pageserver {}",
+        lsn, pageserver_connstr
+    );
+    get_basebackup(&state.pgdata, &pageserver_connstr, &tenant, &timeline, &lsn).with_context(
+        || {
+            format!(
+                "failed to get basebackup@{} from pageserver {}",
+                lsn, pageserver_connstr
+            )
+        },
+    )?;
+
+    // Update pg_hba.conf received with basebackup.
+    update_pg_hba(pgdata_path)?;
+
+    Ok(())
+}
+
+/// Start Postgres as a child process and manage DBs/roles.
+/// After that this will hang waiting on the postmaster process to exit.
+fn run_compute(state: &Arc<RwLock<ComputeState>>) -> Result<ExitStatus> {
+    let read_state = state.read().unwrap();
+    let pgdata_path = Path::new(&read_state.pgdata);
+
+    // Run postgres as a child process.
+    let mut pg = Command::new(&read_state.pgbin)
+        .args(&["-D", &read_state.pgdata])
+        .spawn()
+        .expect("cannot start postgres process");
+
+    // Try default Postgres port if it is not provided
+    let port = read_state
+        .spec
+        .cluster
+        .settings
+        .find("port")
+        .unwrap_or_else(|| "5432".to_string());
+    wait_for_postgres(&port, pgdata_path)?;
+
+    let mut client = Client::connect(&read_state.connstr, NoTls)?;
+
+    handle_roles(&read_state.spec, &mut client)?;
+    handle_databases(&read_state.spec, &mut client)?;
+
+    // 'Close' connection
+    drop(client);
+
+    info!(
+        "finished configuration of cluster #{}",
+        read_state.spec.cluster.cluster_id
+    );
+
+    // Release the read lock.
+    drop(read_state);
+
+    // Get the write lock, update state and release the lock, so HTTP API
+    // was able to serve requests, while we are blocked waiting on
+    // Postgres.
+    let mut state = state.write().unwrap();
+    state.ready = true;
+    drop(state);
+
+    // Wait for child postgres process basically forever. In this state Ctrl+C
+    // will be propagated to postgres and it will be shut down as well.
+    let ecode = pg.wait().expect("failed to wait on postgres");
+
+    Ok(ecode)
+}
+
+fn main() -> Result<()> {
+    // TODO: re-use `zenith_utils::logging` later
+    init_logger(DEFAULT_LOG_LEVEL)?;
+
+    // Env variable is set by `cargo`
+    let version: Option<&str> = option_env!("CARGO_PKG_VERSION");
+    let matches = clap::App::new("zenith_ctl")
+        .version(version.unwrap_or("unknown"))
+        .arg(
+            Arg::new("connstr")
+                .short('C')
+                .long("connstr")
+                .value_name("DATABASE_URL")
+                .required(true),
+        )
+        .arg(
+            Arg::new("pgdata")
+                .short('D')
+                .long("pgdata")
+                .value_name("DATADIR")
+                .required(true),
+        )
+        .arg(
+            Arg::new("pgbin")
+                .short('b')
+                .long("pgbin")
+                .value_name("POSTGRES_PATH"),
+        )
+        .arg(
+            Arg::new("spec")
+                .short('s')
+                .long("spec")
+                .value_name("SPEC_JSON"),
+        )
+        .arg(
+            Arg::new("spec-path")
+                .short('S')
+                .long("spec-path")
+                .value_name("SPEC_PATH"),
+        )
+        .get_matches();
+
+    let pgdata = matches.value_of("pgdata").expect("PGDATA path is required");
+    let connstr = matches
+        .value_of("connstr")
+        .expect("Postgres connection string is required");
+    let spec = matches.value_of("spec");
+    let spec_path = matches.value_of("spec-path");
+
+    // Try to use just 'postgres' if no path is provided
+    let pgbin = matches.value_of("pgbin").unwrap_or("postgres");
+
+    let spec: ClusterSpec = match spec {
+        // First, try to get cluster spec from the cli argument
+        Some(json) => serde_json::from_str(json)?,
+        None => {
+            // Second, try to read it from the file if path is provided
+            if let Some(sp) = spec_path {
+                let path = Path::new(sp);
+                let file = File::open(path)?;
+                serde_json::from_reader(file)?
+            } else {
+                panic!("cluster spec should be provided via --spec or --spec-path argument");
+            }
+        }
+    };
+
+    let compute_state = ComputeState {
+        connstr: connstr.to_string(),
+        pgdata: pgdata.to_string(),
+        pgbin: pgbin.to_string(),
+        spec,
+        ready: false,
+        last_active: Utc::now(),
+    };
+    let compute_state = Arc::new(RwLock::new(compute_state));
+
+    // Launch service threads first, so we were able to serve availability
+    // requests, while configuration is still in progress.
+    let mut _threads = vec![
+        launch_http_server(&compute_state).expect("cannot launch compute monitor thread"),
+        launch_monitor(&compute_state).expect("cannot launch http endpoint thread"),
+    ];
+
+    prepare_pgdata(&compute_state)?;
+
+    // Run compute (Postgres) and hang waiting on it. Panic if any error happens,
+    // it will help us to trigger unwind and kill postmaster as well.
+    match run_compute(&compute_state) {
+        Ok(ec) => exit(ec.success() as i32),
+        Err(error) => panic!("cannot start compute node, error: {}", error),
+    }
+}
--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -0,0 +1,51 @@
+use std::fs::{File, OpenOptions};
+use std::io;
+use std::io::prelude::*;
+use std::path::Path;
+
+use anyhow::Result;
+
+use crate::pg_helpers::PgOptionsSerialize;
+use crate::zenith::ClusterSpec;
+
+/// Check that `line` is inside a text file and put it there if it is not.
+/// Create file if it doesn't exist.
+pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
+    let mut file = OpenOptions::new()
+        .read(true)
+        .write(true)
+        .create(true)
+        .append(false)
+        .open(path)?;
+    let buf = io::BufReader::new(&file);
+    let mut count: usize = 0;
+
+    for l in buf.lines() {
+        if l? == line {
+            return Ok(false);
+        }
+        count = 1;
+    }
+
+    write!(file, "{}{}", "\n".repeat(count), line)?;
+    Ok(true)
+}
+
+/// Create or completely rewrite configuration file specified by `path`
+pub fn write_postgres_conf(path: &Path, spec: &ClusterSpec) -> Result<()> {
+    // File::create() destroys the file content if it exists.
+    let mut postgres_conf = File::create(path)?;
+
+    write_zenith_managed_block(&mut postgres_conf, &spec.cluster.settings.as_pg_settings())?;
+
+    Ok(())
+}
+
+// Write Postgres config block wrapped with generated comment section
+fn write_zenith_managed_block(file: &mut File, buf: &str) -> Result<()> {
+    writeln!(file, "# Managed by Zenith: begin")?;
+    writeln!(file, "{}", buf)?;
+    writeln!(file, "# Managed by Zenith: end")?;
+
+    Ok(())
+}
--- a/compute_tools/src/http_api.rs
+++ b/compute_tools/src/http_api.rs
@@ -0,0 +1,73 @@
+use std::convert::Infallible;
+use std::net::SocketAddr;
+use std::sync::{Arc, RwLock};
+use std::thread;
+
+use anyhow::Result;
+use hyper::service::{make_service_fn, service_fn};
+use hyper::{Body, Method, Request, Response, Server, StatusCode};
+use log::{error, info};
+
+use crate::zenith::*;
+
+// Service function to handle all available routes.
+fn routes(req: Request<Body>, state: Arc<RwLock<ComputeState>>) -> Response<Body> {
+    match (req.method(), req.uri().path()) {
+        // Timestamp of the last Postgres activity in the plain text.
+        (&Method::GET, "/last_activity") => {
+            info!("serving /last_active GET request");
+            let state = state.read().unwrap();
+
+            // Use RFC3339 format for consistency.
+            Response::new(Body::from(state.last_active.to_rfc3339()))
+        }
+
+        // Has compute setup process finished? -> true/false
+        (&Method::GET, "/ready") => {
+            info!("serving /ready GET request");
+            let state = state.read().unwrap();
+            Response::new(Body::from(format!("{}", state.ready)))
+        }
+
+        // Return the `404 Not Found` for any other routes.
+        _ => {
+            let mut not_found = Response::new(Body::from("404 Not Found"));
+            *not_found.status_mut() = StatusCode::NOT_FOUND;
+            not_found
+        }
+    }
+}
+
+// Main Hyper HTTP server function that runs it and blocks waiting on it forever.
+#[tokio::main]
+async fn serve(state: Arc<RwLock<ComputeState>>) {
+    let addr = SocketAddr::from(([0, 0, 0, 0], 3080));
+
+    let make_service = make_service_fn(move |_conn| {
+        let state = state.clone();
+        async move {
+            Ok::<_, Infallible>(service_fn(move |req: Request<Body>| {
+                let state = state.clone();
+                async move { Ok::<_, Infallible>(routes(req, state)) }
+            }))
+        }
+    });
+
+    info!("starting HTTP server on {}", addr);
+
+    let server = Server::bind(&addr).serve(make_service);
+
+    // Run this server forever
+    if let Err(e) = server.await {
+        error!("server error: {}", e);
+    }
+}
+
+/// Launch a separate Hyper HTTP API server thread and return its `JoinHandle`.
+pub fn launch_http_server(state: &Arc<RwLock<ComputeState>>) -> Result<thread::JoinHandle<()>> {
+    let state = Arc::clone(state);
+
+    Ok(thread::Builder::new()
+        .name("http-endpoint".into())
+        .spawn(move || serve(state))?)
+}
--- a/compute_tools/src/lib.rs
+++ b/compute_tools/src/lib.rs
@@ -0,0 +1,13 @@
+//!
+//! Various tools and helpers to handle cluster / compute node (Postgres)
+//! configuration.
+//!
+pub mod config;
+pub mod http_api;
+#[macro_use]
+pub mod logger;
+pub mod monitor;
+pub mod params;
+pub mod pg_helpers;
+pub mod spec;
+pub mod zenith;
--- a/compute_tools/src/logger.rs
+++ b/compute_tools/src/logger.rs
@@ -0,0 +1,43 @@
+use std::io::Write;
+
+use anyhow::Result;
+use chrono::Utc;
+use env_logger::{Builder, Env};
+
+macro_rules! info_println {
+    ($($tts:tt)*) => {
+        if log_enabled!(Level::Info) {
+            println!($($tts)*);
+        }
+    }
+}
+
+macro_rules! info_print {
+    ($($tts:tt)*) => {
+        if log_enabled!(Level::Info) {
+            print!($($tts)*);
+        }
+    }
+}
+
+/// Initialize `env_logger` using either `default_level` or
+/// `RUST_LOG` environment variable as default log level.
+pub fn init_logger(default_level: &str) -> Result<()> {
+    let env = Env::default().filter_or("RUST_LOG", default_level);
+
+    Builder::from_env(env)
+        .format(|buf, record| {
+            let thread_handle = std::thread::current();
+            writeln!(
+                buf,
+                "{} [{}] {}: {}",
+                Utc::now().format("%Y-%m-%d %H:%M:%S%.3f %Z"),
+                thread_handle.name().unwrap_or("main"),
+                record.level(),
+                record.args()
+            )
+        })
+        .init();
+
+    Ok(())
+}
--- a/compute_tools/src/monitor.rs
+++ b/compute_tools/src/monitor.rs
@@ -0,0 +1,109 @@
+use std::sync::{Arc, RwLock};
+use std::{thread, time};
+
+use anyhow::Result;
+use chrono::{DateTime, Utc};
+use log::{debug, info};
+use postgres::{Client, NoTls};
+
+use crate::zenith::ComputeState;
+
+const MONITOR_CHECK_INTERVAL: u64 = 500; // milliseconds
+
+// Spin in a loop and figure out the last activity time in the Postgres.
+// Then update it in the shared state. This function never errors out.
+// XXX: the only expected panic is at `RwLock` unwrap().
+fn watch_compute_activity(state: &Arc<RwLock<ComputeState>>) {
+    // Suppose that `connstr` doesn't change
+    let connstr = state.read().unwrap().connstr.clone();
+    // Define `client` outside of the loop to reuse existing connection if it's active.
+    let mut client = Client::connect(&connstr, NoTls);
+    let timeout = time::Duration::from_millis(MONITOR_CHECK_INTERVAL);
+
+    info!("watching Postgres activity at {}", connstr);
+
+    loop {
+        // Should be outside of the write lock to allow others to read while we sleep.
+        thread::sleep(timeout);
+
+        match &mut client {
+            Ok(cli) => {
+                if cli.is_closed() {
+                    info!("connection to postgres closed, trying to reconnect");
+
+                    // Connection is closed, reconnect and try again.
+                    client = Client::connect(&connstr, NoTls);
+                    continue;
+                }
+
+                // Get all running client backends except ourself, use RFC3339 DateTime format.
+                let backends = cli
+                    .query(
+                        "SELECT state, to_char(state_change, 'YYYY-MM-DD\"T\"HH24:MI:SS.US\"Z\"') AS state_change
+                         FROM pg_stat_activity
+                         WHERE backend_type = 'client backend'
+                            AND pid != pg_backend_pid()
+                            AND usename != 'zenith_admin';", // XXX: find a better way to filter other monitors?
+                        &[],
+                    );
+                let mut last_active = state.read().unwrap().last_active;
+
+                if let Ok(backs) = backends {
+                    let mut idle_backs: Vec<DateTime<Utc>> = vec![];
+
+                    for b in backs.into_iter() {
+                        let state: String = b.get("state");
+                        let change: String = b.get("state_change");
+
+                        if state == "idle" {
+                            let change = DateTime::parse_from_rfc3339(&change);
+                            match change {
+                                Ok(t) => idle_backs.push(t.with_timezone(&Utc)),
+                                Err(e) => {
+                                    info!("cannot parse backend state_change DateTime: {}", e);
+                                    continue;
+                                }
+                            }
+                        } else {
+                            // Found non-idle backend, so the last activity is NOW.
+                            // Save it and exit the for loop. Also clear the idle backend
+                            // `state_change` timestamps array as it doesn't matter now.
+                            last_active = Utc::now();
+                            idle_backs.clear();
+                            break;
+                        }
+                    }
+
+                    // Sort idle backend `state_change` timestamps. The last one corresponds
+                    // to the last activity.
+                    idle_backs.sort();
+                    if let Some(last) = idle_backs.last() {
+                        last_active = *last;
+                    }
+                }
+
+                // Update the last activity in the shared state if we got a more recent one.
+                let mut state = state.write().unwrap();
+                if last_active > state.last_active {
+                    state.last_active = last_active;
+                    debug!("set the last compute activity time to: {}", last_active);
+                }
+            }
+            Err(e) => {
+                info!("cannot connect to postgres: {}, retrying", e);
+
+                // Establish a new connection and try again.
+                client = Client::connect(&connstr, NoTls);
+            }
+        }
+    }
+}
+
+/// Launch a separate compute monitor thread and return its `JoinHandle`.
+pub fn launch_monitor(state: &Arc<RwLock<ComputeState>>) -> Result<thread::JoinHandle<()>> {
+    let state = Arc::clone(state);
+
+    Ok(thread::Builder::new()
+        .name("compute-monitor".into())
+        .spawn(move || watch_compute_activity(&state))?)
+}
--- a/compute_tools/src/params.rs
+++ b/compute_tools/src/params.rs
@@ -0,0 +1,3 @@
+pub const DEFAULT_LOG_LEVEL: &str = "info";
+pub const DEFAULT_CONNSTRING: &str = "host=localhost user=postgres";
+pub const PG_HBA_ALL_MD5: &str = "host\tall\t\tall\t\t0.0.0.0/0\t\tmd5";
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -0,0 +1,264 @@
+use std::net::{SocketAddr, TcpStream};
+use std::os::unix::fs::PermissionsExt;
+use std::path::Path;
+use std::process::Command;
+use std::str::FromStr;
+use std::{fs, thread, time};
+
+use anyhow::{bail, Result};
+use postgres::{Client, Transaction};
+use serde::Deserialize;
+
+const POSTGRES_WAIT_TIMEOUT: u64 = 60 * 1000; // milliseconds
+
+/// Rust representation of Postgres role info with only those fields
+/// that matter for us.
+#[derive(Clone, Deserialize)]
+pub struct Role {
+    pub name: PgIdent,
+    pub encrypted_password: Option<String>,
+    pub options: GenericOptions,
+}
+
+/// Rust representation of Postgres database info with only those fields
+/// that matter for us.
+#[derive(Clone, Deserialize)]
+pub struct Database {
+    pub name: PgIdent,
+    pub owner: PgIdent,
+    pub options: GenericOptions,
+}
+
+/// Common type representing both SQL statement params with or without value,
+/// like `LOGIN` or `OWNER username` in the `CREATE/ALTER ROLE`, and config
+/// options like `wal_level = logical`.
+#[derive(Clone, Deserialize)]
+pub struct GenericOption {
+    pub name: String,
+    pub value: Option<String>,
+    pub vartype: String,
+}
+
+/// Optional collection of `GenericOption`'s. Type alias allows us to
+/// declare a `trait` on it.
+pub type GenericOptions = Option<Vec<GenericOption>>;
+
+impl GenericOption {
+    /// Represent `GenericOption` as SQL statement parameter.
+    pub fn to_pg_option(&self) -> String {
+        if let Some(val) = &self.value {
+            match self.vartype.as_ref() {
+                "string" => format!("{} '{}'", self.name, val),
+                _ => format!("{} {}", self.name, val),
+            }
+        } else {
+            self.name.to_owned()
+        }
+    }
+
+    /// Represent `GenericOption` as configuration option.
+    pub fn to_pg_setting(&self) -> String {
+        if let Some(val) = &self.value {
+            match self.vartype.as_ref() {
+                "string" => format!("{} = '{}'", self.name, val),
+                _ => format!("{} = {}", self.name, val),
+            }
+        } else {
+            self.name.to_owned()
+        }
+    }
+}
+
+pub trait PgOptionsSerialize {
+    fn as_pg_options(&self) -> String;
+    fn as_pg_settings(&self) -> String;
+}
+
+impl PgOptionsSerialize for GenericOptions {
+    /// Serialize an optional collection of `GenericOption`'s to
+    /// Postgres SQL statement arguments.
+    fn as_pg_options(&self) -> String {
+        if let Some(ops) = &self {
+            ops.iter()
+                .map(|op| op.to_pg_option())
+                .collect::<Vec<String>>()
+                .join(" ")
+        } else {
+            "".to_string()
+        }
+    }
+
+    /// Serialize an optional collection of `GenericOption`'s to
+    /// `postgresql.conf` compatible format.
+    fn as_pg_settings(&self) -> String {
+        if let Some(ops) = &self {
+            ops.iter()
+                .map(|op| op.to_pg_setting())
+                .collect::<Vec<String>>()
+                .join("\n")
+        } else {
+            "".to_string()
+        }
+    }
+}
+
+pub trait GenericOptionsSearch {
+    fn find(&self, name: &str) -> Option<String>;
+}
+
+impl GenericOptionsSearch for GenericOptions {
+    /// Lookup option by name
+    fn find(&self, name: &str) -> Option<String> {
+        match &self {
+            Some(ops) => {
+                let op = ops.iter().find(|s| s.name == name);
+                match op {
+                    Some(op) => op.value.clone(),
+                    None => None,
+                }
+            }
+            None => None,
+        }
+    }
+}
+
+impl Role {
+    /// Serialize a list of role parameters into a Postgres-acceptable
+    /// string of arguments.
+    pub fn to_pg_options(&self) -> String {
+        // XXX: consider putting LOGIN as a default option somewhere higher, e.g. in Rails.
+        // For now we do not use generic `options` for roles. Once used, add
+        // `self.options.as_pg_options()` somewhere here.
+        let mut params: String = "LOGIN".to_string();
+
+        if let Some(pass) = &self.encrypted_password {
+            params.push_str(&format!(" PASSWORD 'md5{}'", pass));
+        } else {
+            params.push_str(" PASSWORD NULL");
+        }
+
+        params
+    }
+}
+
+impl Database {
+    /// Serialize a list of database parameters into a Postgres-acceptable
+    /// string of arguments.
+    /// NB: `TEMPLATE` is actually also an identifier, but so far we only need
+    /// to use `template0` and `template1`, so it is not a problem. Yet in the future
+    /// it may require a proper quoting too.
+    pub fn to_pg_options(&self) -> String {
+        let mut params: String = self.options.as_pg_options();
+        params.push_str(&format!(" OWNER {}", &self.owner.quote()));
+
+        params
+    }
+}
+
+/// String type alias representing Postgres identifier and
+/// intended to be used for DB / role names.
+pub type PgIdent = String;
+
+/// Generic trait used to provide quoting for strings used in the
+/// Postgres SQL queries. Currently used only to implement quoting
+/// of identifiers, but could be used for literals in the future.
+pub trait PgQuote {
+    fn quote(&self) -> String;
+}
+
+impl PgQuote for PgIdent {
+    /// This is intended to mimic Postgres quote_ident(), but for simplicity it
+    /// always quotes provided string with `""` and escapes every `"`. Not idempotent,
+    /// i.e. if string is already escaped it will be escaped again.
+    fn quote(&self) -> String {
+        let result = format!("\"{}\"", self.replace("\"", "\"\""));
+        result
+    }
+}
+
+/// Build a list of existing Postgres roles
+pub fn get_existing_roles(xact: &mut Transaction<'_>) -> Result<Vec<Role>> {
+    let postgres_roles = xact
+        .query("SELECT rolname, rolpassword FROM pg_catalog.pg_authid", &[])?
+        .iter()
+        .map(|row| Role {
+            name: row.get("rolname"),
+            encrypted_password: row.get("rolpassword"),
+            options: None,
+        })
+        .collect();
+
+    Ok(postgres_roles)
+}
+
+/// Build a list of existing Postgres databases
+pub fn get_existing_dbs(client: &mut Client) -> Result<Vec<Database>> {
+    let postgres_dbs = client
+        .query(
+            "SELECT datname, datdba::regrole::text as owner
+               FROM pg_catalog.pg_database;",
+            &[],
+        )?
+        .iter()
+        .map(|row| Database {
+            name: row.get("datname"),
+            owner: row.get("owner"),
+            options: None,
+        })
+        .collect();
+
+    Ok(postgres_dbs)
+}
+
+/// Wait for Postgres to become ready to accept connections:
+/// - state should be `ready` in the `pgdata/postmaster.pid`
+/// - and we should be able to connect to 127.0.0.1:5432
+pub fn wait_for_postgres(port: &str, pgdata: &Path) -> Result<()> {
+    let pid_path = pgdata.join("postmaster.pid");
+    let mut slept: u64 = 0; // ms
+    let pause = time::Duration::from_millis(100);
+
+    let timeout = time::Duration::from_millis(200);
+    let addr = SocketAddr::from_str(&format!("127.0.0.1:{}", port)).unwrap();
+
+    loop {
+        // Sleep POSTGRES_WAIT_TIMEOUT at max (a bit longer actually if consider a TCP timeout,
+        // but postgres starts listening almost immediately, even if it is not really
+        // ready to accept connections).
+        if slept >= POSTGRES_WAIT_TIMEOUT {
+            bail!("timed out while waiting for Postgres to start");
+        }
+
+        if pid_path.exists() {
+            // XXX: dumb and the simplest way to get the last line in a text file
+            // TODO: better use `.lines().last()` later
+            let stdout = Command::new("tail")
+                .args(&["-n1", pid_path.to_str().unwrap()])
+                .output()?
+                .stdout;
+            let status = String::from_utf8(stdout)?;
+            let can_connect = TcpStream::connect_timeout(&addr, timeout).is_ok();
+
+            // Now Postgres is ready to accept connections
+            if status.trim() == "ready" && can_connect {
+                break;
+            }
+        }
+
+        thread::sleep(pause);
+        slept += 100;
+    }
+
+    Ok(())
+}
+
+/// Remove `pgdata` directory and create it again with right permissions.
+pub fn create_pgdata(pgdata: &str) -> Result<()> {
+    // Ignore removal error, likely it is a 'No such file or directory (os error 2)'.
+    // If it is something different then create_dir() will error out anyway.
+    let _ok = fs::remove_dir_all(pgdata);
+    fs::create_dir(pgdata)?;
+    fs::set_permissions(pgdata, fs::Permissions::from_mode(0o700))?;
+
+    Ok(())
+}
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -0,0 +1,246 @@
+use std::path::Path;
+
+use anyhow::Result;
+use log::{info, log_enabled, warn, Level};
+use postgres::Client;
+
+use crate::config;
+use crate::params::PG_HBA_ALL_MD5;
+use crate::pg_helpers::*;
+use crate::zenith::ClusterSpec;
+
+/// It takes cluster specification and does the following:
+/// - Serialize cluster config and put it into `postgresql.conf` completely rewriting the file.
+/// - Update `pg_hba.conf` to allow external connections.
+pub fn handle_configuration(spec: &ClusterSpec, pgdata_path: &Path) -> Result<()> {
+    // File `postgresql.conf` is no longer included into `basebackup`, so just
+    // always write all config into it creating new file.
+    config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec)?;
+
+    update_pg_hba(pgdata_path)?;
+
+    Ok(())
+}
+
+/// Check `pg_hba.conf` and update if needed to allow external connections.
+pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> {
+    // XXX: consider making it a part of spec.json
+    info!("checking pg_hba.conf");
+    let pghba_path = pgdata_path.join("pg_hba.conf");
+
+    if config::line_in_file(&pghba_path, PG_HBA_ALL_MD5)? {
+        info!("updated pg_hba.conf to allow external connections");
+    } else {
+        info!("pg_hba.conf is up-to-date");
+    }
+
+    Ok(())
+}
+
+/// Given a cluster spec json and open transaction it handles roles creation,
+/// deletion and update.
+pub fn handle_roles(spec: &ClusterSpec, client: &mut Client) -> Result<()> {
+    let mut xact = client.transaction()?;
+    let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;
+
+    // Print a list of existing Postgres roles (only in debug mode)
+    info!("postgres roles:");
+    for r in &existing_roles {
+        info_println!(
+            "{} - {}:{}",
+            " ".repeat(27 + 5),
+            r.name,
+            if r.encrypted_password.is_some() {
+                "[FILTERED]"
+            } else {
+                "(null)"
+            }
+        );
+    }
+
+    // Process delta operations first
+    if let Some(ops) = &spec.delta_operations {
+        info!("processing delta operations on roles");
+        for op in ops {
+            match op.action.as_ref() {
+                // We do not check either role exists or not,
+                // Postgres will take care of it for us
+                "delete_role" => {
+                    let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.quote());
+
+                    warn!("deleting role '{}'", &op.name);
+                    xact.execute(query.as_str(), &[])?;
+                }
+                // Renaming role drops its password, since tole name is
+                // used as a salt there.  It is important that this role
+                // is recorded with a new `name` in the `roles` list.
+                // Follow up roles update will set the new password.
+                "rename_role" => {
+                    let new_name = op.new_name.as_ref().unwrap();
+
+                    // XXX: with a limited number of roles it is fine, but consider making it a HashMap
+                    if existing_roles.iter().any(|r| r.name == op.name) {
+                        let query: String = format!(
+                            "ALTER ROLE {} RENAME TO {}",
+                            op.name.quote(),
+                            new_name.quote()
+                        );
+
+                        warn!("renaming role '{}' to '{}'", op.name, new_name);
+                        xact.execute(query.as_str(), &[])?;
+                    }
+                }
+                _ => {}
+            }
+        }
+    }
+
+    // Refresh Postgres roles info to handle possible roles renaming
+    let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;
+
+    info!("cluster spec roles:");
+    for role in &spec.cluster.roles {
+        let name = &role.name;
+
+        info_print!(
+            "{} - {}:{}",
+            " ".repeat(27 + 5),
+            name,
+            if role.encrypted_password.is_some() {
+                "[FILTERED]"
+            } else {
+                "(null)"
+            }
+        );
+
+        // XXX: with a limited number of roles it is fine, but consider making it a HashMap
+        let pg_role = existing_roles.iter().find(|r| r.name == *name);
+
+        if let Some(r) = pg_role {
+            let mut update_role = false;
+
+            if (r.encrypted_password.is_none() && role.encrypted_password.is_some())
+                || (r.encrypted_password.is_some() && role.encrypted_password.is_none())
+            {
+                update_role = true;
+            } else if let Some(pg_pwd) = &r.encrypted_password {
+                // Check whether password changed or not (trim 'md5:' prefix first)
+                update_role = pg_pwd[3..] != *role.encrypted_password.as_ref().unwrap();
+            }
+
+            if update_role {
+                let mut query: String = format!("ALTER ROLE {} ", name.quote());
+                info_print!(" -> update");
+
+                query.push_str(&role.to_pg_options());
+                xact.execute(query.as_str(), &[])?;
+            }
+        } else {
+            info!("role name {}", &name);
+            let mut query: String = format!("CREATE ROLE {} ", name.quote());
+            info!("role create query {}", &query);
+            info_print!(" -> create");
+
+            query.push_str(&role.to_pg_options());
+            xact.execute(query.as_str(), &[])?;
+        }
+
+        info_print!("\n");
+    }
+
+    xact.commit()?;
+
+    Ok(())
+}
+
+/// It follows mostly the same logic as `handle_roles()` excepting that we
+/// does not use an explicit transactions block, since major database operations
+/// like `CREATE DATABASE` and `DROP DATABASE` do not support it. Statement-level
+/// atomicity should be enough here due to the order of operations and various checks,
+/// which together provide us idempotency.
+pub fn handle_databases(spec: &ClusterSpec, client: &mut Client) -> Result<()> {
+    let existing_dbs: Vec<Database> = get_existing_dbs(client)?;
+
+    // Print a list of existing Postgres databases (only in debug mode)
+    info!("postgres databases:");
+    for r in &existing_dbs {
+        info_println!("{} - {}:{}", " ".repeat(27 + 5), r.name, r.owner);
+    }
+
+    // Process delta operations first
+    if let Some(ops) = &spec.delta_operations {
+        info!("processing delta operations on databases");
+        for op in ops {
+            match op.action.as_ref() {
+                // We do not check either DB exists or not,
+                // Postgres will take care of it for us
+                "delete_db" => {
+                    let query: String = format!("DROP DATABASE IF EXISTS {}", &op.name.quote());
+
+                    warn!("deleting database '{}'", &op.name);
+                    client.execute(query.as_str(), &[])?;
+                }
+                "rename_db" => {
+                    let new_name = op.new_name.as_ref().unwrap();
+
+                    // XXX: with a limited number of roles it is fine, but consider making it a HashMap
+                    if existing_dbs.iter().any(|r| r.name == op.name) {
+                        let query: String = format!(
+                            "ALTER DATABASE {} RENAME TO {}",
+                            op.name.quote(),
+                            new_name.quote()
+                        );
+
+                        warn!("renaming database '{}' to '{}'", op.name, new_name);
+                        client.execute(query.as_str(), &[])?;
+                    }
+                }
+                _ => {}
+            }
+        }
+    }
+
+    // Refresh Postgres databases info to handle possible renames
+    let existing_dbs: Vec<Database> = get_existing_dbs(client)?;
+
+    info!("cluster spec databases:");
+    for db in &spec.cluster.databases {
+        let name = &db.name;
+
+        info_print!("{} - {}:{}", " ".repeat(27 + 5), db.name, db.owner);
+
+        // XXX: with a limited number of databases it is fine, but consider making it a HashMap
+        let pg_db = existing_dbs.iter().find(|r| r.name == *name);
+
+        if let Some(r) = pg_db {
+            // XXX: db owner name is returned as quoted string from Postgres,
+            // when quoting is needed.
+            let new_owner = if r.owner.starts_with('\"') {
+                db.owner.quote()
+            } else {
+                db.owner.clone()
+            };
+
+            if new_owner != r.owner {
+                let query: String = format!(
+                    "ALTER DATABASE {} OWNER TO {}",
+                    name.quote(),
+                    db.owner.quote()
+                );
+                info_print!(" -> update");
+
+                client.execute(query.as_str(), &[])?;
+            }
+        } else {
+            let mut query: String = format!("CREATE DATABASE {} ", name.quote());
+            info_print!(" -> create");
+
+            query.push_str(&db.to_pg_options());
+            client.execute(query.as_str(), &[])?;
+        }
+
+        info_print!("\n");
+    }
+
+    Ok(())
+}
--- a/compute_tools/src/zenith.rs
+++ b/compute_tools/src/zenith.rs
@@ -0,0 +1,109 @@
+use std::process::{Command, Stdio};
+
+use anyhow::Result;
+use chrono::{DateTime, Utc};
+use postgres::{Client, NoTls};
+use serde::Deserialize;
+
+use crate::pg_helpers::*;
+
+/// Compute node state shared across several `zenith_ctl` threads.
+/// Should be used under `RwLock` to allow HTTP API server to serve
+/// status requests, while configuration is in progress.
+pub struct ComputeState {
+    pub connstr: String,
+    pub pgdata: String,
+    pub pgbin: String,
+    pub spec: ClusterSpec,
+    /// Compute setup process has finished
+    pub ready: bool,
+    /// Timestamp of the last Postgres activity
+    pub last_active: DateTime<Utc>,
+}
+
+/// Cluster spec or configuration represented as an optional number of
+/// delta operations + final cluster state description.
+#[derive(Clone, Deserialize)]
+pub struct ClusterSpec {
+    pub format_version: f32,
+    pub timestamp: String,
+    pub operation_uuid: Option<String>,
+    /// Expected cluster state at the end of transition process.
+    pub cluster: Cluster,
+    pub delta_operations: Option<Vec<DeltaOp>>,
+}
+
+/// Cluster state seen from the perspective of the external tools
+/// like Rails web console.
+#[derive(Clone, Deserialize)]
+pub struct Cluster {
+    pub cluster_id: String,
+    pub name: String,
+    pub state: Option<String>,
+    pub roles: Vec<Role>,
+    pub databases: Vec<Database>,
+    pub settings: GenericOptions,
+}
+
+/// Single cluster state changing operation that could not be represented as
+/// a static `Cluster` structure. For example:
+/// - DROP DATABASE
+/// - DROP ROLE
+/// - ALTER ROLE name RENAME TO new_name
+/// - ALTER DATABASE name RENAME TO new_name
+#[derive(Clone, Deserialize)]
+pub struct DeltaOp {
+    pub action: String,
+    pub name: PgIdent,
+    pub new_name: Option<PgIdent>,
+}
+
+/// Get basebackup from the libpq connection to pageserver using `connstr` and
+/// unarchive it to `pgdata` directory overriding all its previous content.
+pub fn get_basebackup(
+    pgdata: &str,
+    connstr: &str,
+    tenant: &str,
+    timeline: &str,
+    lsn: &str,
+) -> Result<()> {
+    let mut client = Client::connect(connstr, NoTls)?;
+    let basebackup_cmd = match lsn {
+        "0/0" => format!("basebackup {} {}", tenant, timeline), // First start of the compute
+        _ => format!("basebackup {} {} {}", tenant, timeline, lsn),
+    };
+    let copyreader = client.copy_out(basebackup_cmd.as_str())?;
+    let mut ar = tar::Archive::new(copyreader);
+
+    ar.unpack(&pgdata)?;
+
+    Ok(())
+}
+
+/// Run `postgres` in a special mode with `--sync-safekeepers` argument
+/// and return the reported LSN back to the caller.
+pub fn sync_safekeepers(pgdata: &str, pgbin: &str) -> Result<String> {
+    let sync_handle = Command::new(&pgbin)
+        .args(&["--sync-safekeepers"])
+        .env("PGDATA", &pgdata) // we cannot use -D in this mode
+        .stdout(Stdio::piped())
+        .spawn()
+        .expect("postgres --sync-safekeepers failed to start");
+
+    // `postgres --sync-safekeepers` will print all log output to stderr and
+    // final LSN to stdout. So we pipe only stdout, while stderr will be automatically
+    // redirected to the caller output.
+    let sync_output = sync_handle
+        .wait_with_output()
+        .expect("postgres --sync-safekeepers failed");
+    if !sync_output.status.success() {
+        anyhow::bail!(
+            "postgres --sync-safekeepers exited with non-zero status: {}",
+            sync_output.status,
+        );
+    }
+
+    let lsn = String::from(String::from_utf8(sync_output.stdout)?.trim());
+
+    Ok(lsn)
+}
--- a/compute_tools/tests/cluster_spec.json
+++ b/compute_tools/tests/cluster_spec.json
@@ -0,0 +1,205 @@
+{
+    "format_version": 1.0,
+
+    "timestamp": "2021-05-23T18:25:43.511Z",
+    "operation_uuid": "0f657b36-4b0f-4a2d-9c2e-1dcd615e7d8b",
+
+    "cluster": {
+        "cluster_id": "test-cluster-42",
+        "name": "Zenith Test",
+        "state": "restarted",
+        "roles": [
+            {
+                "name": "postgres",
+                "encrypted_password": "6b1d16b78004bbd51fa06af9eda75972",
+                "options": null
+            },
+            {
+                "name": "alexk",
+                "encrypted_password": null,
+                "options": null
+            },
+            {
+                "name": "zenith \"new\"",
+                "encrypted_password": "5b1d16b78004bbd51fa06af9eda75972",
+                "options": null
+            },
+            {
+                "name": "zen",
+                "encrypted_password": "9b1d16b78004bbd51fa06af9eda75972"
+            },
+            {
+                "name": "\"name\";\\n select 1;",
+                "encrypted_password": "5b1d16b78004bbd51fa06af9eda75972"
+            },
+            {
+                "name": "MyRole",
+                "encrypted_password": "5b1d16b78004bbd51fa06af9eda75972"
+            }
+        ],
+        "databases": [
+            {
+                "name": "DB2",
+                "owner": "alexk",
+                "options": [
+                    {
+                        "name": "LC_COLLATE",
+                        "value": "C",
+                        "vartype": "string"
+                    },
+                    {
+                        "name": "LC_CTYPE",
+                        "value": "C",
+                        "vartype": "string"
+                    },
+                    {
+                        "name": "TEMPLATE",
+                        "value": "template0",
+                        "vartype": "enum"
+                    }
+                ]
+            },
+            {
+                "name": "zenith",
+                "owner": "MyRole"
+            },
+            {
+                "name": "zen",
+                "owner": "zen"
+            }
+        ],
+        "settings": [
+            {
+                "name": "fsync",
+                "value": "off",
+                "vartype": "bool"
+            },
+            {
+                "name": "wal_level",
+                "value": "replica",
+                "vartype": "enum"
+            },
+            {
+                "name": "hot_standby",
+                "value": "on",
+                "vartype": "bool"
+            },
+            {
+                "name": "wal_acceptors",
+                "value": "127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501",
+                "vartype": "string"
+            },
+            {
+                "name": "wal_log_hints",
+                "value": "on",
+                "vartype": "bool"
+            },
+            {
+                "name": "log_connections",
+                "value": "on",
+                "vartype": "bool"
+            },
+            {
+                "name": "shared_buffers",
+                "value": "32768",
+                "vartype": "integer"
+            },
+            {
+                "name": "port",
+                "value": "55432",
+                "vartype": "integer"
+            },
+            {
+                "name": "max_connections",
+                "value": "100",
+                "vartype": "integer"
+            },
+            {
+                "name": "max_wal_senders",
+                "value": "10",
+                "vartype": "integer"
+            },
+            {
+                "name": "listen_addresses",
+                "value": "0.0.0.0",
+                "vartype": "string"
+            },
+            {
+                "name": "wal_sender_timeout",
+                "value": "0",
+                "vartype": "integer"
+            },
+            {
+                "name": "password_encryption",
+                "value": "md5",
+                "vartype": "enum"
+            },
+            {
+                "name": "maintenance_work_mem",
+                "value": "65536",
+                "vartype": "integer"
+            },
+            {
+                "name": "max_parallel_workers",
+                "value": "8",
+                "vartype": "integer"
+            },
+            {
+                "name": "max_worker_processes",
+                "value": "8",
+                "vartype": "integer"
+            },
+            {
+                "name": "zenith.zenith_tenant",
+                "value": "b0554b632bd4d547a63b86c3630317e8",
+                "vartype": "string"
+            },
+            {
+                "name": "max_replication_slots",
+                "value": "10",
+                "vartype": "integer"
+            },
+            {
+                "name": "zenith.zenith_timeline",
+                "value": "2414a61ffc94e428f14b5758fe308e13",
+                "vartype": "string"
+            },
+            {
+                "name": "shared_preload_libraries",
+                "value": "zenith",
+                "vartype": "string"
+            },
+            {
+                "name": "synchronous_standby_names",
+                "value": "walproposer",
+                "vartype": "string"
+            },
+            {
+                "name": "zenith.page_server_connstring",
+                "value": "host=127.0.0.1 port=6400",
+                "vartype": "string"
+            }
+        ]
+    },
+
+    "delta_operations": [
+        {
+            "action": "delete_db",
+            "name": "zenith_test"
+        },
+        {
+            "action": "rename_db",
+            "name": "DB",
+            "new_name": "DB2"
+        },
+        {
+            "action": "delete_role",
+            "name": "zenith2"
+        },
+        {
+            "action": "rename_role",
+            "name": "zenith new",
+            "new_name": "zenith \"new\""
+        }
+    ]
+}
--- a/compute_tools/tests/config_test.rs
+++ b/compute_tools/tests/config_test.rs
@@ -0,0 +1,48 @@
+#[cfg(test)]
+mod config_tests {
+
+    use std::fs::{remove_file, File};
+    use std::io::{Read, Write};
+    use std::path::Path;
+
+    use compute_tools::config::*;
+
+    fn write_test_file(path: &Path, content: &str) {
+        let mut file = File::create(path).unwrap();
+        file.write_all(content.as_bytes()).unwrap();
+    }
+
+    fn check_file_content(path: &Path, expected_content: &str) {
+        let mut file = File::open(path).unwrap();
+        let mut content = String::new();
+
+        file.read_to_string(&mut content).unwrap();
+        assert_eq!(content, expected_content);
+    }
+
+    #[test]
+    fn test_line_in_file() {
+        let path = Path::new("./tests/tmp/config_test.txt");
+        write_test_file(path, "line1\nline2.1\t line2.2\nline3");
+
+        let line = "line2.1\t line2.2";
+        let result = line_in_file(path, line).unwrap();
+        assert!(!result);
+        check_file_content(path, "line1\nline2.1\t line2.2\nline3");
+
+        let line = "line4";
+        let result = line_in_file(path, line).unwrap();
+        assert!(result);
+        check_file_content(path, "line1\nline2.1\t line2.2\nline3\nline4");
+
+        remove_file(path).unwrap();
+
+        let path = Path::new("./tests/tmp/new_config_test.txt");
+        let line = "line4";
+        let result = line_in_file(path, line).unwrap();
+        assert!(result);
+        check_file_content(path, "line4");
+
+        remove_file(path).unwrap();
+    }
+}
--- a/compute_tools/tests/pg_helpers_tests.rs
+++ b/compute_tools/tests/pg_helpers_tests.rs
@@ -0,0 +1,41 @@
+#[cfg(test)]
+mod pg_helpers_tests {
+
+    use std::fs::File;
+
+    use compute_tools::pg_helpers::*;
+    use compute_tools::zenith::ClusterSpec;
+
+    #[test]
+    fn params_serialize() {
+        let file = File::open("tests/cluster_spec.json").unwrap();
+        let spec: ClusterSpec = serde_json::from_reader(file).unwrap();
+
+        assert_eq!(
+            spec.cluster.databases.first().unwrap().to_pg_options(),
+            "LC_COLLATE 'C' LC_CTYPE 'C' TEMPLATE template0 OWNER \"alexk\""
+        );
+        assert_eq!(
+            spec.cluster.roles.first().unwrap().to_pg_options(),
+            "LOGIN PASSWORD 'md56b1d16b78004bbd51fa06af9eda75972'"
+        );
+    }
+
+    #[test]
+    fn settings_serialize() {
+        let file = File::open("tests/cluster_spec.json").unwrap();
+        let spec: ClusterSpec = serde_json::from_reader(file).unwrap();
+
+        assert_eq!(
+            spec.cluster.settings.as_pg_settings(),
+            "fsync = off\nwal_level = replica\nhot_standby = on\nwal_acceptors = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'\nwal_log_hints = on\nlog_connections = on\nshared_buffers = 32768\nport = 55432\nmax_connections = 100\nmax_wal_senders = 10\nlisten_addresses = '0.0.0.0'\nwal_sender_timeout = 0\npassword_encryption = md5\nmaintenance_work_mem = 65536\nmax_parallel_workers = 8\nmax_worker_processes = 8\nzenith.zenith_tenant = 'b0554b632bd4d547a63b86c3630317e8'\nmax_replication_slots = 10\nzenith.zenith_timeline = '2414a61ffc94e428f14b5758fe308e13'\nshared_preload_libraries = 'zenith'\nsynchronous_standby_names = 'walproposer'\nzenith.page_server_connstring = 'host=127.0.0.1 port=6400'"
+        );
+    }
+
+    #[test]
+    fn quote_ident() {
+        let ident: PgIdent = PgIdent::from("\"name\";\\n select 1;");
+
+        assert_eq!(ident.quote(), "\"\"\"name\"\";\\n select 1;\"");
+    }
+}
--- a/compute_tools/tests/tmp/.gitignore
+++ b/compute_tools/tests/tmp/.gitignore
@@ -0,0 +1 @@
+**/*
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -1,27 +1,21 @@
 [package]
 name = "control_plane"
 version = "0.1.0"
-authors = ["Stas Kelvich <stas@zenith.tech>"]
-edition = "2018"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+edition = "2021"

 [dependencies]
-rand = "0.8.3"
 tar = "0.4.33"
-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="a0d067b66447951d1276a53fb09886539c3fa094" }
-tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="a0d067b66447951d1276a53fb09886539c3fa094" }
-
-serde = ""
-serde_derive = ""
-toml = ""
-lazy_static = ""
+postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
+serde = { version = "1.0", features = ["derive"] }
+toml = "0.5"
+lazy_static = "1.4"
 regex = "1"
 anyhow = "1.0"
-hex = "0.4.3"
-bytes = "1.0.1"
-fs_extra = "1.2.0"
+thiserror = "1"
+nix = "0.23"
+url = "2.2.2"
+reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }

 pageserver = { path = "../pageserver" }
-walkeeper = { path = "../walkeeper" }
-postgres_ffi = { path = "../postgres_ffi" }
+zenith_utils = { path = "../zenith_utils" }
+workspace_hack = { path = "../workspace_hack" }
--- a/control_plane/safekeepers.conf
+++ b/control_plane/safekeepers.conf
@@ -0,0 +1,20 @@
+# Page server and three safekeepers.
+[pageserver]
+listen_pg_addr = '127.0.0.1:64000'
+listen_http_addr = '127.0.0.1:9898'
+auth_type = 'Trust'
+
+[[safekeepers]]
+id = 1
+pg_port = 5454
+http_port = 7676
+
+[[safekeepers]]
+id = 2
+pg_port = 5455
+http_port = 7677
+
+[[safekeepers]]
+id = 3
+pg_port = 5456
+http_port = 7678
--- a/control_plane/simple.conf
+++ b/control_plane/simple.conf
@@ -0,0 +1,11 @@
+# Minimal zenith environment with one safekeeper. This is equivalent to the built-in
+# defaults that you get with no --config
+[pageserver]
+listen_pg_addr = '127.0.0.1:64000'
+listen_http_addr = '127.0.0.1:9898'
+auth_type = 'Trust'
+
+[[safekeepers]]
+id = 1
+pg_port = 5454
+http_port = 7676
--- a/control_plane/src/compute.rs
+++ b/control_plane/src/compute.rs
@@ -1,23 +1,25 @@
-use std::fs::{self, OpenOptions};
-use std::io::{Read, Write};
+use std::collections::BTreeMap;
+use std::fs::{self, File};
+use std::io::Write;
 use std::net::SocketAddr;
 use std::net::TcpStream;
 use std::os::unix::fs::PermissionsExt;
-use std::process::Command;
+use std::path::PathBuf;
+use std::process::{Command, Stdio};
+use std::str::FromStr;
 use std::sync::Arc;
 use std::time::Duration;
-use std::{collections::BTreeMap, path::PathBuf};

 use anyhow::{Context, Result};
-use lazy_static::lazy_static;
-use regex::Regex;
-use tar;
-
-use postgres::{Client, NoTls};
+use zenith_utils::connstring::connection_host_port;
+use zenith_utils::lsn::Lsn;
+use zenith_utils::postgres_backend::AuthType;
+use zenith_utils::zid::ZTenantId;
+use zenith_utils::zid::ZTimelineId;

 use crate::local_env::LocalEnv;
-use crate::storage::{PageServerNode, WalProposerNode};
-use pageserver::ZTimelineId;
+use crate::postgresql_conf::PostgresConf;
+use crate::storage::PageServerNode;

 //
 // ComputeControlPlane
@@ -25,27 +27,34 @@ use pageserver::ZTimelineId;
 pub struct ComputeControlPlane {
    base_port: u16,
    pageserver: Arc<PageServerNode>,
-    pub nodes: BTreeMap<String, Arc<PostgresNode>>,
+    pub nodes: BTreeMap<(ZTenantId, String), Arc<PostgresNode>>,
    env: LocalEnv,
 }

 impl ComputeControlPlane {
    // Load current nodes with ports from data directories on disk
+    // Directory structure has the following layout:
+    // pgdatadirs
+    // |- tenants
+    // |  |- <tenant_id>
+    // |  |   |- <branch name>
    pub fn load(env: LocalEnv) -> Result<ComputeControlPlane> {
-        // TODO: since pageserver do not have config file yet we believe here that
-        // it is running on default port. Change that when pageserver will have config.
        let pageserver = Arc::new(PageServerNode::from_env(&env));

-        let pgdatadirspath = env.repo_path.join("pgdatadirs");
-        let nodes: Result<BTreeMap<_, _>> = fs::read_dir(&pgdatadirspath)
+        let mut nodes = BTreeMap::default();
+        let pgdatadirspath = &env.pg_data_dirs_path();
+
+        for tenant_dir in fs::read_dir(&pgdatadirspath)
            .with_context(|| format!("failed to list {}", pgdatadirspath.display()))?
-            .into_iter()
-            .map(|f| {
-                PostgresNode::from_dir_entry(f?, &env, &pageserver)
-                    .map(|node| (node.name.clone(), Arc::new(node)))
-            })
-            .collect();
-        let nodes = nodes?;
+        {
+            let tenant_dir = tenant_dir?;
+            for timeline_dir in fs::read_dir(tenant_dir.path())
+                .with_context(|| format!("failed to list {}", tenant_dir.path().display()))?
+            {
+                let node = PostgresNode::from_dir_entry(timeline_dir?, &env, &pageserver)?;
+                nodes.insert((node.tenantid, node.name.clone()), Arc::new(node));
+            }
+        }

        Ok(ComputeControlPlane {
            base_port: 55431,
@@ -64,80 +73,58 @@ impl ComputeControlPlane {
            .unwrap_or(self.base_port)
    }

-    pub fn local(local_env: &LocalEnv, pageserver: &Arc<PageServerNode>) -> ComputeControlPlane {
-        ComputeControlPlane {
-            base_port: 65431,
-            pageserver: Arc::clone(pageserver),
-            nodes: BTreeMap::new(),
-            env: local_env.clone(),
-        }
+    // FIXME: see also parse_point_in_time in branches.rs.
+    fn parse_point_in_time(
+        &self,
+        tenantid: ZTenantId,
+        s: &str,
+    ) -> Result<(ZTimelineId, Option<Lsn>)> {
+        let mut strings = s.split('@');
+        let name = strings.next().unwrap();
+
+        let lsn = strings
+            .next()
+            .map(Lsn::from_str)
+            .transpose()
+            .context("invalid LSN in point-in-time specification")?;
+
+        // Resolve the timeline ID, given the human-readable branch name
+        let timeline_id = self
+            .pageserver
+            .branch_get_by_name(&tenantid, name)?
+            .timeline_id;
+
+        Ok((timeline_id, lsn))
    }

-    /// Connect to a page server, get base backup, and untar it to initialize a
-    /// new data directory
-    pub fn new_from_page_server(
+    pub fn new_node(
        &mut self,
-        is_test: bool,
-        timelineid: ZTimelineId,
+        tenantid: ZTenantId,
+        name: &str,
+        timeline_spec: &str,
+        port: Option<u16>,
    ) -> Result<Arc<PostgresNode>> {
-        let node_id = self.nodes.len() as u32 + 1;
+        // Resolve the human-readable timeline spec into timeline ID and LSN
+        let (timelineid, lsn) = self.parse_point_in_time(tenantid, timeline_spec)?;

+        let port = port.unwrap_or_else(|| self.get_port());
        let node = Arc::new(PostgresNode {
-            name: format!("pg{}", node_id),
-            address: SocketAddr::new("127.0.0.1".parse().unwrap(), self.get_port()),
+            name: name.to_owned(),
+            address: SocketAddr::new("127.0.0.1".parse().unwrap(), port),
            env: self.env.clone(),
            pageserver: Arc::clone(&self.pageserver),
-            is_test,
+            is_test: false,
            timelineid,
+            lsn,
+            tenantid,
+            uses_wal_proposer: false,
        });

-        node.init_from_page_server()?;
-        self.nodes.insert(node.name.clone(), Arc::clone(&node));
+        node.create_pgdata()?;
+        node.setup_pg_conf(self.env.pageserver.auth_type)?;

-        Ok(node)
-    }
-
-    pub fn new_test_node(&mut self, timelineid: ZTimelineId) -> Arc<PostgresNode> {
-        let node = self.new_from_page_server(true, timelineid);
-        assert!(node.is_ok());
-        let node = node.unwrap();
-
-        // Configure the node to stream WAL directly to the pageserver
-        node.append_conf(
-            "postgresql.conf",
-            format!(
-                "callmemaybe_connstring = '{}'\n", // FIXME escaping
-                node.connstr()
-            )
-            .as_str(),
-        );
-
-        node
-    }
-
-    pub fn new_test_master_node(&mut self, timelineid: ZTimelineId) -> Arc<PostgresNode> {
-        let node = self.new_from_page_server(true, timelineid).unwrap();
-
-        node.append_conf(
-            "postgresql.conf",
-            "synchronous_standby_names = 'safekeeper_proxy'\n",
-        );
-
-        node
-    }
-
-    pub fn new_node(&mut self, timelineid: ZTimelineId) -> Result<Arc<PostgresNode>> {
-        let node = self.new_from_page_server(false, timelineid).unwrap();
-
-        // Configure the node to stream WAL directly to the pageserver
-        node.append_conf(
-            "postgresql.conf",
-            format!(
-                "callmemaybe_connstring = '{}'\n", // FIXME escaping
-                node.connstr()
-            )
-            .as_str(),
-        );
+        self.nodes
+            .insert((tenantid, node.name.clone()), Arc::clone(&node));

        Ok(node)
    }
@@ -145,13 +132,17 @@ impl ComputeControlPlane {

 ///////////////////////////////////////////////////////////////////////////////

+#[derive(Debug)]
 pub struct PostgresNode {
    pub address: SocketAddr,
    name: String,
    pub env: LocalEnv,
    pageserver: Arc<PageServerNode>,
    is_test: bool,
-    timelineid: ZTimelineId,
+    pub timelineid: ZTimelineId,
+    pub lsn: Option<Lsn>, // if it's a read-only node. None for primary
+    pub tenantid: ZTenantId,
+    uses_wal_proposer: bool,
 }

 impl PostgresNode {
@@ -167,43 +158,28 @@ impl PostgresNode {
            );
        }

-        lazy_static! {
-            static ref CONF_PORT_RE: Regex = Regex::new(r"(?m)^\s*port\s*=\s*(\d+)\s*$").unwrap();
-        }
-
        // parse data directory name
        let fname = entry.file_name();
        let name = fname.to_str().unwrap().to_string();

-        // find out tcp port in config file
+        // Read config file into memory
        let cfg_path = entry.path().join("postgresql.conf");
-        let config = fs::read_to_string(cfg_path.clone()).with_context(|| {
-            format!(
-                "failed to read config file in {}",
-                cfg_path.to_str().unwrap()
-            )
-        })?;
+        let cfg_path_str = cfg_path.to_string_lossy();
+        let mut conf_file = File::open(&cfg_path)
+            .with_context(|| format!("failed to open config file in {}", cfg_path_str))?;
+        let conf = PostgresConf::read(&mut conf_file)
+            .with_context(|| format!("failed to read config file in {}", cfg_path_str))?;

-        let err_msg = format!(
-            "failed to find port definition in config file {}",
-            cfg_path.to_str().unwrap()
-        );
-        let port: u16 = CONF_PORT_RE
-            .captures(config.as_str())
-            .ok_or(anyhow::Error::msg(err_msg.clone() + " 1"))?
-            .iter()
-            .last()
-            .ok_or(anyhow::Error::msg(err_msg.clone() + " 2"))?
-            .ok_or(anyhow::Error::msg(err_msg.clone() + " 3"))?
-            .as_str()
-            .parse()
-            .with_context(|| err_msg)?;
+        // Read a few options from the config file
+        let context = format!("in config file {}", cfg_path_str);
+        let port: u16 = conf.parse_field("port", &context)?;
+        let timelineid: ZTimelineId = conf.parse_field("zenith.zenith_timeline", &context)?;
+        let tenantid: ZTenantId = conf.parse_field("zenith.zenith_tenant", &context)?;
+        let uses_wal_proposer = conf.get("wal_acceptors").is_some();

-        // FIXME: What timeline is this server on? Would have to parse the postgresql.conf
-        // file for that, too. It's currently not needed for anything, but it would be
-        // nice to list the timeline in "zenith pg list"
-        let timelineid_buf = [0u8; 16];
-        let timelineid = ZTimelineId::from(timelineid_buf);
+        // parse recovery_target_lsn, if any
+        let recovery_target_lsn: Option<Lsn> =
+            conf.parse_field_optional("recovery_target_lsn", &context)?;

        // ok now
        Ok(PostgresNode {
@@ -213,104 +189,226 @@ impl PostgresNode {
            pageserver: Arc::clone(pageserver),
            is_test: false,
            timelineid,
+            lsn: recovery_target_lsn,
+            tenantid,
+            uses_wal_proposer,
        })
    }

-    // Connect to a page server, get base backup, and untar it to initialize a
-    // new data directory
-    pub fn init_from_page_server(&self) -> Result<()> {
-        let pgdata = self.pgdata();
+    fn sync_safekeepers(&self, auth_token: &Option<String>) -> Result<Lsn> {
+        let pg_path = self.env.pg_bin_dir().join("postgres");
+        let mut cmd = Command::new(&pg_path);

+        cmd.arg("--sync-safekeepers")
+            .env_clear()
+            .env("LD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap())
+            .env("DYLD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap())
+            .env("PGDATA", self.pgdata().to_str().unwrap())
+            .stdout(Stdio::piped())
+            // Comment this to avoid capturing stderr (useful if command hangs)
+            .stderr(Stdio::piped());
+
+        if let Some(token) = auth_token {
+            cmd.env("ZENITH_AUTH_TOKEN", token);
+        }
+
+        let sync_handle = cmd
+            .spawn()
+            .expect("postgres --sync-safekeepers failed to start");
+
+        let sync_output = sync_handle
+            .wait_with_output()
+            .expect("postgres --sync-safekeepers failed");
+        if !sync_output.status.success() {
+            anyhow::bail!(
+                "sync-safekeepers failed: '{}'",
+                String::from_utf8_lossy(&sync_output.stderr)
+            );
+        }
+
+        let lsn = Lsn::from_str(std::str::from_utf8(&sync_output.stdout)?.trim())?;
+        println!("Safekeepers synced on {}", lsn);
+        Ok(lsn)
+    }
+
+    /// Get basebackup from the pageserver as a tar archive and extract it
+    /// to the `self.pgdata()` directory.
+    fn do_basebackup(&self, lsn: Option<Lsn>) -> Result<()> {
        println!(
            "Extracting base backup to create postgres instance: path={} port={}",
-            pgdata.display(),
+            self.pgdata().display(),
            self.address.port()
        );

-        // initialize data directory
-        if self.is_test {
-            fs::remove_dir_all(&pgdata).ok();
-        }
+        let sql = if let Some(lsn) = lsn {
+            format!("basebackup {} {} {}", self.tenantid, self.timelineid, lsn)
+        } else {
+            format!("basebackup {} {}", self.tenantid, self.timelineid)
+        };

-        let sql = format!("basebackup {}", self.timelineid);
        let mut client = self
            .pageserver
            .page_server_psql_client()
-            .with_context(|| "connecting to page server failed")?;
+            .context("connecting to page server failed")?;

-        fs::create_dir_all(&pgdata)
-            .with_context(|| format!("could not create data directory {}", pgdata.display()))?;
-        fs::set_permissions(pgdata.as_path(), fs::Permissions::from_mode(0o700)).with_context(
-            || {
-                format!(
-                    "could not set permissions in data directory {}",
-                    pgdata.display()
-                )
-            },
-        )?;
-
-        // FIXME: The compute node should be able to stream the WAL it needs from the WAL safekeepers or archive.
-        // But that's not implemented yet. For now, 'pg_wal' is included in the base backup tarball that
-        // we receive from the Page Server, so we don't need to create the empty 'pg_wal' directory here.
-        //fs::create_dir_all(pgdata.join("pg_wal"))?;
-
-        let mut copyreader = client
+        let copyreader = client
            .copy_out(sql.as_str())
-            .with_context(|| "page server 'basebackup' command failed")?;
+            .context("page server 'basebackup' command failed")?;

-        // FIXME: Currently, we slurp the whole tarball into memory, and then extract it,
-        // but we really should do this:
-        //let mut ar = tar::Archive::new(copyreader);
-        let mut buf = vec![];
-        copyreader
-            .read_to_end(&mut buf)
-            .with_context(|| "reading base backup from page server failed")?;
-        let mut ar = tar::Archive::new(buf.as_slice());
-        ar.unpack(&pgdata)
-            .with_context(|| "extracting page backup failed")?;
-
-        // listen for selected port
-        self.append_conf(
-            "postgresql.conf",
-            &format!(
-                "max_wal_senders = 10\n\
-                 max_replication_slots = 10\n\
-                 hot_standby = on\n\
-                 shared_buffers = 1MB\n\
-                 max_connections = 100\n\
-                 wal_level = replica\n\
-                 listen_addresses = '{address}'\n\
-                 port = {port}\n",
-                address = self.address.ip(),
-                port = self.address.port()
-            ),
-        );
-
-        // Never clean up old WAL. TODO: We should use a replication
-        // slot or something proper, to prevent the compute node
-        // from removing WAL that hasn't been streamed to the safekeepr or
-        // page server yet. But this will do for now.
-        self.append_conf("postgresql.conf", &format!("wal_keep_size='10TB'\n"));
-
-        // Connect it to the page server.
-
-        // Configure that node to take pages from pageserver
-        self.append_conf(
-            "postgresql.conf",
-            &format!(
-                "page_server_connstring = 'host={} port={}'\n\
-                      zenith_timeline='{}'\n",
-                self.pageserver.address().ip(),
-                self.pageserver.address().port(),
-                self.timelineid
-            ),
-        );
+        // Read the archive directly from the `CopyOutReader`
+        tar::Archive::new(copyreader)
+            .unpack(&self.pgdata())
+            .context("extracting base backup failed")?;

        Ok(())
    }

-    fn pgdata(&self) -> PathBuf {
-        self.env.repo_path.join("pgdatadirs").join(&self.name)
+    fn create_pgdata(&self) -> Result<()> {
+        fs::create_dir_all(&self.pgdata()).with_context(|| {
+            format!(
+                "could not create data directory {}",
+                self.pgdata().display()
+            )
+        })?;
+        fs::set_permissions(self.pgdata().as_path(), fs::Permissions::from_mode(0o700))
+            .with_context(|| {
+                format!(
+                    "could not set permissions in data directory {}",
+                    self.pgdata().display()
+                )
+            })
+    }
+
+    // Connect to a page server, get base backup, and untar it to initialize a
+    // new data directory
+    fn setup_pg_conf(&self, auth_type: AuthType) -> Result<()> {
+        let mut conf = PostgresConf::new();
+        conf.append("max_wal_senders", "10");
+        // wal_log_hints is mandatory when running against pageserver (see gh issue#192)
+        // TODO: is it possible to check wal_log_hints at pageserver side via XLOG_PARAMETER_CHANGE?
+        conf.append("wal_log_hints", "on");
+        conf.append("max_replication_slots", "10");
+        conf.append("hot_standby", "on");
+        conf.append("shared_buffers", "1MB");
+        conf.append("fsync", "off");
+        conf.append("max_connections", "100");
+        conf.append("wal_level", "replica");
+        // wal_sender_timeout is the maximum time to wait for WAL replication.
+        // It also defines how often the walreciever will send a feedback message to the wal sender.
+        conf.append("wal_sender_timeout", "5s");
+        conf.append("listen_addresses", &self.address.ip().to_string());
+        conf.append("port", &self.address.port().to_string());
+
+        // Never clean up old WAL. TODO: We should use a replication
+        // slot or something proper, to prevent the compute node
+        // from removing WAL that hasn't been streamed to the safekeeper or
+        // page server yet. (gh issue #349)
+        conf.append("wal_keep_size", "10TB");
+
+        // Configure the node to fetch pages from pageserver
+        let pageserver_connstr = {
+            let (host, port) = connection_host_port(&self.pageserver.pg_connection_config);
+
+            // Set up authentication
+            //
+            // $ZENITH_AUTH_TOKEN will be replaced with value from environment
+            // variable during compute pg startup. It is done this way because
+            // otherwise user will be able to retrieve the value using SHOW
+            // command or pg_settings
+            let password = if let AuthType::ZenithJWT = auth_type {
+                "$ZENITH_AUTH_TOKEN"
+            } else {
+                ""
+            };
+            // NOTE avoiding spaces in connection string, because it is less error prone if we forward it somewhere.
+            // Also note that not all parameters are supported here. Because in compute we substitute $ZENITH_AUTH_TOKEN
+            // We parse this string and build it back with token from env var, and for simplicity rebuild
+            // uses only needed variables namely host, port, user, password.
+            format!("postgresql://no_user:{}@{}:{}", password, host, port)
+        };
+        conf.append("shared_preload_libraries", "zenith");
+        conf.append_line("");
+        conf.append("zenith.page_server_connstring", &pageserver_connstr);
+        conf.append("zenith.zenith_tenant", &self.tenantid.to_string());
+        conf.append("zenith.zenith_timeline", &self.timelineid.to_string());
+        if let Some(lsn) = self.lsn {
+            conf.append("recovery_target_lsn", &lsn.to_string());
+        }
+
+        conf.append_line("");
+        // Configure backpressure
+        // - Replication write lag depends on how fast the walreceiver can process incoming WAL.
+        //   This lag determines latency of get_page_at_lsn. Speed of applying WAL is about 10MB/sec,
+        //   so to avoid expiration of 1 minute timeout, this lag should not be larger than 600MB.
+        //   Actually latency should be much smaller (better if < 1sec). But we assume that recently
+        //   updates pages are not requested from pageserver.
+        // - Replication flush lag depends on speed of persisting data by checkpointer (creation of
+        //   delta/image layers) and advancing disk_consistent_lsn. Safekeepers are able to
+        //   remove/archive WAL only beyond disk_consistent_lsn. Too large a lag can cause long
+        //   recovery time (in case of pageserver crash) and disk space overflow at safekeepers.
+        // - Replication apply lag depends on speed of uploading changes to S3 by uploader thread.
+        //   To be able to restore database in case of pageserver node crash, safekeeper should not
+        //   remove WAL beyond this point. Too large lag can cause space exhaustion in safekeepers
+        //   (if they are not able to upload WAL to S3).
+        conf.append("max_replication_write_lag", "500MB");
+        conf.append("max_replication_flush_lag", "10GB");
+
+        if !self.env.safekeepers.is_empty() {
+            // Configure the node to connect to the safekeepers
+            conf.append("synchronous_standby_names", "walproposer");
+
+            let wal_acceptors = self
+                .env
+                .safekeepers
+                .iter()
+                .map(|sk| format!("localhost:{}", sk.pg_port))
+                .collect::<Vec<String>>()
+                .join(",");
+            conf.append("wal_acceptors", &wal_acceptors);
+        } else {
+            // We only use setup without safekeepers for tests,
+            // and don't care about data durability on pageserver,
+            // so set more relaxed synchronous_commit.
+            conf.append("synchronous_commit", "remote_write");
+
+            // Configure the node to stream WAL directly to the pageserver
+            // This isn't really a supported configuration, but can be useful for
+            // testing.
+            conf.append("synchronous_standby_names", "pageserver");
+            conf.append("zenith.callmemaybe_connstring", &self.connstr());
+        }
+
+        let mut file = File::create(self.pgdata().join("postgresql.conf"))?;
+        file.write_all(conf.to_string().as_bytes())?;
+
+        Ok(())
+    }
+
+    fn load_basebackup(&self, auth_token: &Option<String>) -> Result<()> {
+        let backup_lsn = if let Some(lsn) = self.lsn {
+            Some(lsn)
+        } else if self.uses_wal_proposer {
+            // LSN 0 means that it is bootstrap and we need to download just
+            // latest data from the pageserver. That is a bit clumsy but whole bootstrap
+            // procedure evolves quite actively right now, so let's think about it again
+            // when things would be more stable (TODO).
+            let lsn = self.sync_safekeepers(auth_token)?;
+            if lsn == Lsn(0) {
+                None
+            } else {
+                Some(lsn)
+            }
+        } else {
+            None
+        };
+
+        self.do_basebackup(backup_lsn)?;
+
+        Ok(())
+    }
+
+    pub fn pgdata(&self) -> PathBuf {
+        self.env.pg_data_dir(&self.tenantid, &self.name)
    }

    pub fn status(&self) -> &str {
@@ -326,60 +424,101 @@ impl PostgresNode {
        }
    }

-    pub fn append_conf(&self, config: &str, opts: &str) {
-        OpenOptions::new()
-            .append(true)
-            .open(self.pgdata().join(config).to_str().unwrap())
-            .unwrap()
-            .write_all(opts.as_bytes())
-            .unwrap();
-    }
-
-    fn pg_ctl(&self, args: &[&str]) -> Result<()> {
+    fn pg_ctl(&self, args: &[&str], auth_token: &Option<String>) -> Result<()> {
        let pg_ctl_path = self.env.pg_bin_dir().join("pg_ctl");
+        let mut cmd = Command::new(pg_ctl_path);
+        cmd.args(
+            [
+                &[
+                    "-D",
+                    self.pgdata().to_str().unwrap(),
+                    "-l",
+                    self.pgdata().join("pg.log").to_str().unwrap(),
+                    "-w", //wait till pg_ctl actually does what was asked
+                ],
+                args,
+            ]
+            .concat(),
+        )
+        .env_clear()
+        .env("LD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap())
+        .env("DYLD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap());
+        if let Some(token) = auth_token {
+            cmd.env("ZENITH_AUTH_TOKEN", token);
+        }
+        let pg_ctl = cmd.status().context("pg_ctl failed")?;

-        let pg_ctl = Command::new(pg_ctl_path)
-            .args(
-                [
-                    &[
-                        "-D",
-                        self.pgdata().to_str().unwrap(),
-                        "-l",
-                        self.pgdata().join("log").to_str().unwrap(),
-                    ],
-                    args,
-                ]
-                .concat(),
-            )
-            .env_clear()
-            .env("LD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap())
-            .status()
-            .with_context(|| "pg_ctl failed")?;
        if !pg_ctl.success() {
            anyhow::bail!("pg_ctl failed");
        }
        Ok(())
    }

-    pub fn start(&self) -> Result<()> {
+    pub fn start(&self, auth_token: &Option<String>) -> Result<()> {
+        // Bail if the node already running.
+        if self.status() == "running" {
+            anyhow::bail!("The node is already running");
+        }
+
+        // 1. We always start compute node from scratch, so
+        // if old dir exists, preserve 'postgresql.conf' and drop the directory
+        let postgresql_conf_path = self.pgdata().join("postgresql.conf");
+        let postgresql_conf = fs::read(&postgresql_conf_path).with_context(|| {
+            format!(
+                "failed to read config file in {}",
+                postgresql_conf_path.to_str().unwrap()
+            )
+        })?;
+        fs::remove_dir_all(&self.pgdata())?;
+        self.create_pgdata()?;
+
+        // 2. Bring back config files
+        fs::write(&postgresql_conf_path, postgresql_conf)?;
+
+        // 3. Load basebackup
+        self.load_basebackup(auth_token)?;
+
+        if self.lsn.is_some() {
+            File::create(self.pgdata().join("standby.signal"))?;
+        }
+
+        // 4. Finally start the compute node postgres
        println!("Starting postgres node at '{}'", self.connstr());
-        self.pg_ctl(&["start"])
+        self.pg_ctl(&["start"], auth_token)
    }

-    pub fn restart(&self) -> Result<()> {
-        self.pg_ctl(&["restart"])
+    pub fn restart(&self, auth_token: &Option<String>) -> Result<()> {
+        self.pg_ctl(&["restart"], auth_token)
    }

-    pub fn stop(&self) -> Result<()> {
-        self.pg_ctl(&["-m", "immediate", "stop"])
+    pub fn stop(&self, destroy: bool) -> Result<()> {
+        // If we are going to destroy data directory,
+        // use immediate shutdown mode, otherwise,
+        // shutdown gracefully to leave the data directory sane.
+        //
+        // Compute node always starts from scratch, so stop
+        // without destroy only used for testing and debugging.
+        //
+        if destroy {
+            self.pg_ctl(&["-m", "immediate", "stop"], &None)?;
+            println!(
+                "Destroying postgres data directory '{}'",
+                self.pgdata().to_str().unwrap()
+            );
+            fs::remove_dir_all(&self.pgdata())?;
+        } else {
+            self.pg_ctl(&["stop"], &None)?;
+        }
+        Ok(())
    }

    pub fn connstr(&self) -> String {
        format!(
-            "host={} port={} user={}",
+            "host={} port={} user={} dbname={}",
            self.address.ip(),
            self.address.port(),
-            self.whoami()
+            "zenith_admin",
+            "postgres"
        )
    }

@@ -389,62 +528,10 @@ impl PostgresNode {
            .output()
            .expect("failed to execute whoami");

-        if !output.status.success() {
-            panic!("whoami failed");
-        }
+        assert!(output.status.success(), "whoami failed");

        String::from_utf8(output.stdout).unwrap().trim().to_string()
    }
-
-    pub fn safe_psql(&self, db: &str, sql: &str) -> Vec<tokio_postgres::Row> {
-        let connstring = format!(
-            "host={} port={} dbname={} user={}",
-            self.address.ip(),
-            self.address.port(),
-            db,
-            self.whoami()
-        );
-        let mut client = Client::connect(connstring.as_str(), NoTls).unwrap();
-
-        println!("Running {}", sql);
-        client.query(sql, &[]).unwrap()
-    }
-
-    pub fn open_psql(&self, db: &str) -> Client {
-        let connstring = format!(
-            "host={} port={} dbname={} user={}",
-            self.address.ip(),
-            self.address.port(),
-            db,
-            self.whoami()
-        );
-        Client::connect(connstring.as_str(), NoTls).unwrap()
-    }
-
-    pub fn start_proxy(&self, wal_acceptors: &str) -> WalProposerNode {
-        let proxy_path = self.env.pg_bin_dir().join("safekeeper_proxy");
-        match Command::new(proxy_path.as_path())
-            .args(&["--ztimelineid", &self.timelineid.to_string()])
-            .args(&["-s", wal_acceptors])
-            .args(&["-h", &self.address.ip().to_string()])
-            .args(&["-p", &self.address.port().to_string()])
-            .arg("-v")
-            .stderr(
-                OpenOptions::new()
-                    .create(true)
-                    .append(true)
-                    .open(self.pgdata().join("safekeeper_proxy.log"))
-                    .unwrap(),
-            )
-            .spawn()
-        {
-            Ok(child) => WalProposerNode { pid: child.id() },
-            Err(e) => panic!("Failed to launch {:?}: {}", proxy_path, e),
-        }
-    }
-
-    // TODO
-    pub fn pg_bench() {}
 }

 impl Drop for PostgresNode {
@@ -453,7 +540,7 @@ impl Drop for PostgresNode {
    // and checking it here. But let just clean datadirs on start.
    fn drop(&mut self) {
        if self.is_test {
-            let _ = self.stop();
+            let _ = self.stop(true);
        }
    }
 }
--- a/control_plane/src/lib.rs
+++ b/control_plane/src/lib.rs
@@ -1,12 +1,50 @@
 //
 // Local control plane.
 //
-// Can start, cofigure and stop postgres instances running as a local processes.
+// Can start, configure and stop postgres instances running as a local processes.
 //
 // Intended to be used in integration tests and in CLI tools for
 // local installations.
 //
+use anyhow::{anyhow, bail, Context, Result};
+use std::fs;
+use std::path::Path;
+use std::process::Command;

 pub mod compute;
 pub mod local_env;
+pub mod postgresql_conf;
+pub mod safekeeper;
 pub mod storage;
+
+/// Read a PID file
+///
+/// We expect a file that contains a single integer.
+/// We return an i32 for compatibility with libc and nix.
+pub fn read_pidfile(pidfile: &Path) -> Result<i32> {
+    let pid_str = fs::read_to_string(pidfile)
+        .with_context(|| format!("failed to read pidfile {:?}", pidfile))?;
+    let pid: i32 = pid_str
+        .parse()
+        .map_err(|_| anyhow!("failed to parse pidfile {:?}", pidfile))?;
+    if pid < 1 {
+        bail!("pidfile {:?} contained bad value '{}'", pidfile, pid);
+    }
+    Ok(pid)
+}
+
+fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
+    let cmd = cmd.env_clear().env("RUST_BACKTRACE", "1");
+
+    let var = "LLVM_PROFILE_FILE";
+    if let Some(val) = std::env::var_os(var) {
+        cmd.env(var, val);
+    }
+
+    const RUST_LOG_KEY: &str = "RUST_LOG";
+    if let Ok(rust_log_value) = std::env::var(RUST_LOG_KEY) {
+        cmd.env(RUST_LOG_KEY, rust_log_value)
+    } else {
+        cmd
+    }
+}
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -1,389 +1,334 @@
-//
-// This module is responsible for locating and loading paths in a local setup.
-//
-// Now it also provides init method which acts like a stub for proper installation
-// script which will use local paths.
-//
-use anyhow::Context;
-use bytes::Bytes;
-use rand::Rng;
+//! This module is responsible for locating and loading paths in a local setup.
+//!
+//! Now it also provides init method which acts like a stub for proper installation
+//! script which will use local paths.
+
+use anyhow::{bail, Context};
+use serde::{Deserialize, Serialize};
 use std::env;
+use std::fmt::Write;
 use std::fs;
 use std::path::{Path, PathBuf};
 use std::process::{Command, Stdio};
+use zenith_utils::auth::{encode_from_key_file, Claims, Scope};
+use zenith_utils::postgres_backend::AuthType;
+use zenith_utils::zid::{opt_display_serde, ZNodeId, ZTenantId};

-use anyhow::Result;
-use serde_derive::{Deserialize, Serialize};
-
-use pageserver::ZTimelineId;
-use walkeeper::xlog_utils;
+use crate::safekeeper::SafekeeperNode;

 //
-// This data structure represents deserialized zenith config, which should be
-// located in ~/.zenith
+// This data structures represents zenith CLI config
 //
-// TODO: should we also support ZENITH_CONF env var?
+// It is deserialized from the .zenith/config file, or the config file passed
+// to 'zenith init --config=<path>' option. See control_plane/simple.conf for
+// an example.
 //
-#[derive(Serialize, Deserialize, Clone)]
+#[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct LocalEnv {
-    // Path to the Repository. Here page server and compute nodes will create and store their data.
-    pub repo_path: PathBuf,
-
-    // System identifier, from the PostgreSQL control file
-    pub systemid: u64,
+    // Base directory for all the nodes (the pageserver, safekeepers and
+    // compute nodes).
+    //
+    // This is not stored in the config file. Rather, this is the path where the
+    // config file itself is. It is read from the ZENITH_REPO_DIR env variable or
+    // '.zenith' if not given.
+    #[serde(skip)]
+    pub base_data_dir: PathBuf,

    // Path to postgres distribution. It's expected that "bin", "include",
    // "lib", "share" from postgres distribution are there. If at some point
    // in time we will be able to run against vanilla postgres we may split that
    // to four separate paths and match OS-specific installation layout.
+    #[serde(default)]
    pub pg_distrib_dir: PathBuf,

    // Path to pageserver binary.
+    #[serde(default)]
    pub zenith_distrib_dir: PathBuf,
+
+    // Default tenant ID to use with the 'zenith' command line utility, when
+    // --tenantid is not explicitly specified.
+    #[serde(with = "opt_display_serde")]
+    #[serde(default)]
+    pub default_tenantid: Option<ZTenantId>,
+
+    // used to issue tokens during e.g pg start
+    #[serde(default)]
+    pub private_key_path: PathBuf,
+
+    pub pageserver: PageServerConf,
+
+    #[serde(default)]
+    pub safekeepers: Vec<SafekeeperConf>,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(default)]
+pub struct PageServerConf {
+    // node id
+    pub id: ZNodeId,
+    // Pageserver connection settings
+    pub listen_pg_addr: String,
+    pub listen_http_addr: String,
+
+    // used to determine which auth type is used
+    pub auth_type: AuthType,
+
+    // jwt auth token used for communication with pageserver
+    pub auth_token: String,
+}
+
+impl Default for PageServerConf {
+    fn default() -> Self {
+        Self {
+            id: ZNodeId(0),
+            listen_pg_addr: String::new(),
+            listen_http_addr: String::new(),
+            auth_type: AuthType::Trust,
+            auth_token: String::new(),
+        }
+    }
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(default)]
+pub struct SafekeeperConf {
+    pub id: ZNodeId,
+    pub pg_port: u16,
+    pub http_port: u16,
+    pub sync: bool,
+}
+
+impl Default for SafekeeperConf {
+    fn default() -> Self {
+        Self {
+            id: ZNodeId(0),
+            pg_port: 0,
+            http_port: 0,
+            sync: true,
+        }
+    }
 }

 impl LocalEnv {
-    // postgres installation
+    // postgres installation paths
    pub fn pg_bin_dir(&self) -> PathBuf {
        self.pg_distrib_dir.join("bin")
    }
    pub fn pg_lib_dir(&self) -> PathBuf {
        self.pg_distrib_dir.join("lib")
    }
+
+    pub fn pageserver_bin(&self) -> anyhow::Result<PathBuf> {
+        Ok(self.zenith_distrib_dir.join("pageserver"))
+    }
+
+    pub fn safekeeper_bin(&self) -> anyhow::Result<PathBuf> {
+        Ok(self.zenith_distrib_dir.join("safekeeper"))
+    }
+
+    pub fn pg_data_dirs_path(&self) -> PathBuf {
+        self.base_data_dir.join("pgdatadirs").join("tenants")
+    }
+
+    pub fn pg_data_dir(&self, tenantid: &ZTenantId, branch_name: &str) -> PathBuf {
+        self.pg_data_dirs_path()
+            .join(tenantid.to_string())
+            .join(branch_name)
+    }
+
+    // TODO: move pageserver files into ./pageserver
+    pub fn pageserver_data_dir(&self) -> PathBuf {
+        self.base_data_dir.clone()
+    }
+
+    pub fn safekeeper_data_dir(&self, data_dir_name: &str) -> PathBuf {
+        self.base_data_dir.join("safekeepers").join(data_dir_name)
+    }
+
+    /// Create a LocalEnv from a config file.
+    ///
+    /// Unlike 'load_config', this function fills in any defaults that are missing
+    /// from the config file.
+    pub fn create_config(toml: &str) -> anyhow::Result<Self> {
+        let mut env: LocalEnv = toml::from_str(toml)?;
+
+        // Find postgres binaries.
+        // Follow POSTGRES_DISTRIB_DIR if set, otherwise look in "tmp_install".
+        if env.pg_distrib_dir == Path::new("") {
+            if let Some(postgres_bin) = env::var_os("POSTGRES_DISTRIB_DIR") {
+                env.pg_distrib_dir = postgres_bin.into();
+            } else {
+                let cwd = env::current_dir()?;
+                env.pg_distrib_dir = cwd.join("tmp_install")
+            }
+        }
+        if !env.pg_distrib_dir.join("bin/postgres").exists() {
+            bail!(
+                "Can't find postgres binary at {}",
+                env.pg_distrib_dir.display()
+            );
+        }
+
+        // Find zenith binaries.
+        if env.zenith_distrib_dir == Path::new("") {
+            env.zenith_distrib_dir = env::current_exe()?.parent().unwrap().to_owned();
+        }
+        for binary in ["pageserver", "safekeeper"] {
+            if !env.zenith_distrib_dir.join(binary).exists() {
+                bail!(
+                    "Can't find binary '{}' in zenith distrib dir '{}'",
+                    binary,
+                    env.zenith_distrib_dir.display()
+                );
+            }
+        }
+
+        // If no initial tenant ID was given, generate it.
+        if env.default_tenantid.is_none() {
+            env.default_tenantid = Some(ZTenantId::generate());
+        }
+
+        env.base_data_dir = base_path();
+
+        Ok(env)
+    }
+
+    /// Locate and load config
+    pub fn load_config() -> anyhow::Result<Self> {
+        let repopath = base_path();
+
+        if !repopath.exists() {
+            bail!(
+                "Zenith config is not found in {}. You need to run 'zenith init' first",
+                repopath.to_str().unwrap()
+            );
+        }
+
+        // TODO: check that it looks like a zenith repository
+
+        // load and parse file
+        let config = fs::read_to_string(repopath.join("config"))?;
+        let mut env: LocalEnv = toml::from_str(config.as_str())?;
+
+        env.base_data_dir = repopath;
+
+        Ok(env)
+    }
+
+    // this function is used only for testing purposes in CLI e g generate tokens during init
+    pub fn generate_auth_token(&self, claims: &Claims) -> anyhow::Result<String> {
+        let private_key_path = if self.private_key_path.is_absolute() {
+            self.private_key_path.to_path_buf()
+        } else {
+            self.base_data_dir.join(&self.private_key_path)
+        };
+
+        let key_data = fs::read(private_key_path)?;
+        encode_from_key_file(claims, &key_data)
+    }
+
+    //
+    // Initialize a new Zenith repository
+    //
+    pub fn init(&mut self) -> anyhow::Result<()> {
+        // check if config already exists
+        let base_path = &self.base_data_dir;
+        if base_path == Path::new("") {
+            bail!("repository base path is missing");
+        }
+        if base_path.exists() {
+            bail!(
+                "directory '{}' already exists. Perhaps already initialized?",
+                base_path.to_str().unwrap()
+            );
+        }
+
+        fs::create_dir(&base_path)?;
+
+        // generate keys for jwt
+        // openssl genrsa -out private_key.pem 2048
+        let private_key_path;
+        if self.private_key_path == PathBuf::new() {
+            private_key_path = base_path.join("auth_private_key.pem");
+            let keygen_output = Command::new("openssl")
+                .arg("genrsa")
+                .args(&["-out", private_key_path.to_str().unwrap()])
+                .arg("2048")
+                .stdout(Stdio::null())
+                .output()
+                .context("failed to generate auth private key")?;
+            if !keygen_output.status.success() {
+                bail!(
+                    "openssl failed: '{}'",
+                    String::from_utf8_lossy(&keygen_output.stderr)
+                );
+            }
+            self.private_key_path = PathBuf::from("auth_private_key.pem");
+
+            let public_key_path = base_path.join("auth_public_key.pem");
+            // openssl rsa -in private_key.pem -pubout -outform PEM -out public_key.pem
+            let keygen_output = Command::new("openssl")
+                .arg("rsa")
+                .args(&["-in", private_key_path.to_str().unwrap()])
+                .arg("-pubout")
+                .args(&["-outform", "PEM"])
+                .args(&["-out", public_key_path.to_str().unwrap()])
+                .stdout(Stdio::null())
+                .output()
+                .context("failed to generate auth private key")?;
+            if !keygen_output.status.success() {
+                bail!(
+                    "openssl failed: '{}'",
+                    String::from_utf8_lossy(&keygen_output.stderr)
+                );
+            }
+        }
+
+        self.pageserver.auth_token =
+            self.generate_auth_token(&Claims::new(None, Scope::PageServerApi))?;
+
+        fs::create_dir_all(self.pg_data_dirs_path())?;
+
+        for safekeeper in &self.safekeepers {
+            fs::create_dir_all(SafekeeperNode::datadir_path_by_id(self, safekeeper.id))?;
+        }
+
+        let mut conf_content = String::new();
+
+        // Currently, the user first passes a config file with 'zenith init --config=<path>'
+        // We read that in, in `create_config`, and fill any missing defaults. Then it's saved
+        // to .zenith/config. TODO: We lose any formatting and comments along the way, which is
+        // a bit sad.
+        write!(
+            &mut conf_content,
+            r#"# This file describes a locale deployment of the page server
+# and safekeeeper node. It is read by the 'zenith' command-line
+# utility.
+"#
+        )?;
+
+        // Convert the LocalEnv to a toml file.
+        //
+        // This could be as simple as this:
+        //
+        // conf_content += &toml::to_string_pretty(env)?;
+        //
+        // But it results in a "values must be emitted before tables". I'm not sure
+        // why, AFAICS the table, i.e. 'safekeepers: Vec<SafekeeperConf>' is last.
+        // Maybe rust reorders the fields to squeeze avoid padding or something?
+        // In any case, converting to toml::Value first, and serializing that, works.
+        // See https://github.com/alexcrichton/toml-rs/issues/142
+        conf_content += &toml::to_string_pretty(&toml::Value::try_from(&self)?)?;
+
+        fs::write(base_path.join("config"), conf_content)?;
+
+        Ok(())
+    }
 }

-fn zenith_repo_dir() -> PathBuf {
-    // Find repository path
+fn base_path() -> PathBuf {
    match std::env::var_os("ZENITH_REPO_DIR") {
        Some(val) => PathBuf::from(val.to_str().unwrap()),
        None => ".zenith".into(),
    }
 }
-
-//
-// Initialize a new Zenith repository
-//
-pub fn init() -> Result<()> {
-    // check if config already exists
-    let repo_path = zenith_repo_dir();
-    if repo_path.exists() {
-        anyhow::bail!(
-            "{} already exists. Perhaps already initialized?",
-            repo_path.to_str().unwrap()
-        );
-    }
-
-    // Now we can run init only from crate directory, so check that current dir is our crate.
-    // Use 'pageserver/Cargo.toml' existence as evidendce.
-    let cargo_path = env::current_dir()?;
-    if !cargo_path.join("pageserver/Cargo.toml").exists() {
-        anyhow::bail!(
-            "Current dirrectory does not look like a zenith repo. \
-            Please, run 'init' from zenith repo root."
-        );
-    }
-
-    // ok, now check that expected binaries are present
-
-    // check postgres
-    let pg_distrib_dir = cargo_path.join("tmp_install");
-    let pg_path = pg_distrib_dir.join("bin/postgres");
-    if !pg_path.exists() {
-        anyhow::bail!(
-            "Can't find postres binary at {}. \
-                       Perhaps './pgbuild.sh' is needed to build it first.",
-            pg_path.to_str().unwrap()
-        );
-    }
-
-    // check pageserver
-    let zenith_distrib_dir = cargo_path.join("target/debug/");
-    let pageserver_path = zenith_distrib_dir.join("pageserver");
-    if !pageserver_path.exists() {
-        anyhow::bail!(
-            "Can't find pageserver binary at {}. Please build it.",
-            pageserver_path.to_str().unwrap()
-        );
-    }
-
-    // ok, we are good to go
-    let mut conf = LocalEnv {
-        repo_path: repo_path.clone(),
-        pg_distrib_dir,
-        zenith_distrib_dir,
-        systemid: 0,
-    };
-    init_repo(&mut conf)?;
-
-    Ok(())
-}
-
-pub fn init_repo(local_env: &mut LocalEnv) -> Result<()> {
-    let repopath = &local_env.repo_path;
-    fs::create_dir(&repopath)
-        .with_context(|| format!("could not create directory {}", repopath.display()))?;
-    fs::create_dir(repopath.join("pgdatadirs"))?;
-    fs::create_dir(repopath.join("timelines"))?;
-    fs::create_dir(repopath.join("refs"))?;
-    fs::create_dir(repopath.join("refs").join("branches"))?;
-    fs::create_dir(repopath.join("refs").join("tags"))?;
-    println!("created directory structure in {}", repopath.display());
-
-    // Create initial timeline
-    let tli = create_timeline(&local_env, None)?;
-    let timelinedir = repopath.join("timelines").join(tli.to_string());
-    println!("created initial timeline {}", timelinedir.display());
-
-    // Run initdb
-    //
-    // FIXME: we create it temporarily in "tmp" directory, and move it into
-    // the repository. Use "tempdir()" or something? Or just create it directly
-    // in the repo?
-    let initdb_path = local_env.pg_bin_dir().join("initdb");
-    let _initdb = Command::new(initdb_path)
-        .args(&["-D", "tmp"])
-        .arg("--no-instructions")
-        .env_clear()
-        .env("LD_LIBRARY_PATH", local_env.pg_lib_dir().to_str().unwrap())
-        .stdout(Stdio::null())
-        .status()
-        .with_context(|| "failed to execute initdb")?;
-    println!("initdb succeeded");
-
-    // Read control file to extract the LSN and system id
-    let controlfile =
-        postgres_ffi::decode_pg_control(Bytes::from(fs::read("tmp/global/pg_control")?))?;
-    let systemid = controlfile.system_identifier;
-    let lsn = controlfile.checkPoint;
-    let lsnstr = format!("{:016X}", lsn);
-
-    // Move the initial WAL file
-    fs::rename(
-        "tmp/pg_wal/000000010000000000000001",
-        timelinedir
-            .join("wal")
-            .join("000000010000000000000001.partial"),
-    )?;
-    println!("moved initial WAL file");
-
-    // Remove pg_wal
-    fs::remove_dir_all("tmp/pg_wal")?;
-    println!("removed tmp/pg_wal");
-
-    force_crash_recovery(&PathBuf::from("tmp"))?;
-    println!("updated pg_control");
-
-    let target = timelinedir.join("snapshots").join(&lsnstr);
-    fs::rename("tmp", &target)?;
-    println!("moved 'tmp' to {}", target.display());
-
-    // Create 'main' branch to refer to the initial timeline
-    let data = tli.to_string();
-    fs::write(repopath.join("refs").join("branches").join("main"), data)?;
-    println!("created main branch");
-
-    // Also update the system id in the LocalEnv
-    local_env.systemid = systemid;
-
-    // write config
-    let toml = toml::to_string(&local_env)?;
-    fs::write(repopath.join("config"), toml)?;
-
-    println!(
-        "new zenith repository was created in {}",
-        repopath.display()
-    );
-
-    Ok(())
-}
-
-// If control file says the cluster was shut down cleanly, modify it, to mark
-// it as crashed. That forces crash recovery when you start the cluster.
-//
-// FIXME:
-// We currently do this to the initial snapshot in "zenith init". It would
-// be more natural to do this when the snapshot is restored instead, but we
-// currently don't have any code to create new snapshots, so it doesn't matter
-// Or better yet, use a less hacky way of putting the cluster into recovery.
-// Perhaps create a backup label file in the data directory when it's restored.
-fn force_crash_recovery(datadir: &Path) -> Result<()> {
-    // Read in the control file
-    let controlfilepath = datadir.to_path_buf().join("global").join("pg_control");
-    let mut controlfile =
-        postgres_ffi::decode_pg_control(Bytes::from(fs::read(controlfilepath.as_path())?))?;
-
-    controlfile.state = postgres_ffi::DBState_DB_IN_PRODUCTION;
-
-    fs::write(
-        controlfilepath.as_path(),
-        postgres_ffi::encode_pg_control(controlfile),
-    )?;
-
-    Ok(())
-}
-
-// check that config file is present
-pub fn load_config(repopath: &Path) -> Result<LocalEnv> {
-    if !repopath.exists() {
-        anyhow::bail!(
-            "Zenith config is not found in {}. You need to run 'zenith init' first",
-            repopath.to_str().unwrap()
-        );
-    }
-
-    // load and parse file
-    let config = fs::read_to_string(repopath.join("config"))?;
-    toml::from_str(config.as_str()).map_err(|e| e.into())
-}
-
-// local env for tests
-pub fn test_env(testname: &str) -> LocalEnv {
-    fs::create_dir_all("../tmp_check").expect("could not create directory ../tmp_check");
-
-    let repo_path = Path::new(env!("CARGO_MANIFEST_DIR"))
-        .join("../tmp_check/")
-        .join(testname);
-
-    // Remove remnants of old test repo
-    let _ = fs::remove_dir_all(&repo_path);
-
-    let mut local_env = LocalEnv {
-        repo_path,
-        pg_distrib_dir: Path::new(env!("CARGO_MANIFEST_DIR")).join("../tmp_install"),
-        zenith_distrib_dir: cargo_bin_dir(),
-        systemid: 0,
-    };
-    init_repo(&mut local_env).expect("could not initialize zenith repository");
-    return local_env;
-}
-
-// Find the directory where the binaries were put (i.e. target/debug/)
-pub fn cargo_bin_dir() -> PathBuf {
-    let mut pathbuf = std::env::current_exe().unwrap();
-
-    pathbuf.pop();
-    if pathbuf.ends_with("deps") {
-        pathbuf.pop();
-    }
-
-    return pathbuf;
-}
-
-#[derive(Debug, Clone, Copy)]
-pub struct PointInTime {
-    pub timelineid: ZTimelineId,
-    pub lsn: u64,
-}
-
-fn create_timeline(local_env: &LocalEnv, ancestor: Option<PointInTime>) -> Result<ZTimelineId> {
-    let repopath = &local_env.repo_path;
-
-    // Create initial timeline
-    let mut tli_buf = [0u8; 16];
-    rand::thread_rng().fill(&mut tli_buf);
-    let timelineid = ZTimelineId::from(tli_buf);
-
-    let timelinedir = repopath.join("timelines").join(timelineid.to_string());
-
-    fs::create_dir(&timelinedir)?;
-    fs::create_dir(&timelinedir.join("snapshots"))?;
-    fs::create_dir(&timelinedir.join("wal"))?;
-
-    if let Some(ancestor) = ancestor {
-        let data = format!(
-            "{}@{:X}/{:X}",
-            ancestor.timelineid,
-            ancestor.lsn >> 32,
-            ancestor.lsn & 0xffffffff
-        );
-        fs::write(timelinedir.join("ancestor"), data)?;
-    }
-
-    Ok(timelineid)
-}
-
-// Parse an LSN in the format used in filenames
-//
-// For example: 00000000015D3DD8
-//
-fn parse_lsn(s: &str) -> std::result::Result<u64, std::num::ParseIntError> {
-    u64::from_str_radix(s, 16)
-}
-
-// Create a new branch in the repository (for the "zenith branch" subcommand)
-pub fn create_branch(
-    local_env: &LocalEnv,
-    branchname: &str,
-    startpoint: PointInTime,
-) -> Result<()> {
-    let repopath = &local_env.repo_path;
-
-    // create a new timeline for it
-    let newtli = create_timeline(local_env, Some(startpoint))?;
-    let newtimelinedir = repopath.join("timelines").join(newtli.to_string());
-
-    let data = newtli.to_string();
-    fs::write(
-        repopath.join("refs").join("branches").join(branchname),
-        data,
-    )?;
-
-    // Copy the latest snapshot (TODO: before the startpoint) and all WAL
-    // TODO: be smarter and avoid the copying...
-    let (_maxsnapshot, oldsnapshotdir) = find_latest_snapshot(local_env, startpoint.timelineid)?;
-    let copy_opts = fs_extra::dir::CopyOptions::new();
-    fs_extra::dir::copy(oldsnapshotdir, newtimelinedir.join("snapshots"), &copy_opts)?;
-
-    let oldtimelinedir = repopath
-        .join("timelines")
-        .join(startpoint.timelineid.to_string());
-    let mut copy_opts = fs_extra::dir::CopyOptions::new();
-    copy_opts.content_only = true;
-    fs_extra::dir::copy(
-        oldtimelinedir.join("wal"),
-        newtimelinedir.join("wal"),
-        &copy_opts,
-    )?;
-
-    Ok(())
-}
-
-// Find the end of valid WAL in a wal directory
-pub fn find_end_of_wal(local_env: &LocalEnv, timeline: ZTimelineId) -> Result<u64> {
-    let repopath = &local_env.repo_path;
-    let waldir = repopath
-        .join("timelines")
-        .join(timeline.to_string())
-        .join("wal");
-
-    let (lsn, _tli) = xlog_utils::find_end_of_wal(&waldir, 16 * 1024 * 1024, true);
-
-    return Ok(lsn);
-}
-
-// Find the latest snapshot for a timeline
-fn find_latest_snapshot(local_env: &LocalEnv, timeline: ZTimelineId) -> Result<(u64, PathBuf)> {
-    let repopath = &local_env.repo_path;
-
-    let snapshotsdir = repopath
-        .join("timelines")
-        .join(timeline.to_string())
-        .join("snapshots");
-    let paths = fs::read_dir(&snapshotsdir)?;
-    let mut maxsnapshot: u64 = 0;
-    let mut snapshotdir: Option<PathBuf> = None;
-    for path in paths {
-        let path = path?;
-        let filename = path.file_name().to_str().unwrap().to_owned();
-        if let Ok(lsn) = parse_lsn(&filename) {
-            maxsnapshot = std::cmp::max(lsn, maxsnapshot);
-            snapshotdir = Some(path.path());
-        }
-    }
-    if maxsnapshot == 0 {
-        // TODO: check ancestor timeline
-        anyhow::bail!("no snapshot found in {}", snapshotsdir.display());
-    }
-
-    Ok((maxsnapshot, snapshotdir.unwrap()))
-}
--- a/control_plane/src/postgresql_conf.rs
+++ b/control_plane/src/postgresql_conf.rs
@@ -0,0 +1,228 @@
+///
+/// Module for parsing postgresql.conf file.
+///
+/// NOTE: This doesn't implement the full, correct postgresql.conf syntax. Just
+/// enough to extract a few settings we need in Zenith, assuming you don't do
+/// funny stuff like include-directives or funny escaping.
+use anyhow::{bail, Context, Result};
+use lazy_static::lazy_static;
+use regex::Regex;
+use std::collections::HashMap;
+use std::fmt;
+use std::io::BufRead;
+use std::str::FromStr;
+
+/// In-memory representation of a postgresql.conf file
+#[derive(Default)]
+pub struct PostgresConf {
+    lines: Vec<String>,
+    hash: HashMap<String, String>,
+}
+
+lazy_static! {
+    static ref CONF_LINE_RE: Regex = Regex::new(r"^((?:\w|\.)+)\s*=\s*(\S+)$").unwrap();
+}
+
+impl PostgresConf {
+    pub fn new() -> PostgresConf {
+        PostgresConf::default()
+    }
+
+    /// Read file into memory
+    pub fn read(read: impl std::io::Read) -> Result<PostgresConf> {
+        let mut result = Self::new();
+
+        for line in std::io::BufReader::new(read).lines() {
+            let line = line?;
+
+            // Store each line in a vector, in original format
+            result.lines.push(line.clone());
+
+            // Also parse each line and insert key=value lines into a hash map.
+            //
+            // FIXME: This doesn't match exactly the flex/bison grammar in PostgreSQL.
+            // But it's close enough for our usage.
+            let line = line.trim();
+            if line.starts_with('#') {
+                // comment, ignore
+                continue;
+            } else if let Some(caps) = CONF_LINE_RE.captures(line) {
+                let name = caps.get(1).unwrap().as_str();
+                let raw_val = caps.get(2).unwrap().as_str();
+
+                if let Ok(val) = deescape_str(raw_val) {
+                    // Note: if there's already an entry in the hash map for
+                    // this key, this will replace it. That's the behavior what
+                    // we want; when PostgreSQL reads the file, each line
+                    // overrides any previous value for the same setting.
+                    result.hash.insert(name.to_string(), val.to_string());
+                }
+            }
+        }
+        Ok(result)
+    }
+
+    /// Return the current value of 'option'
+    pub fn get(&self, option: &str) -> Option<&str> {
+        self.hash.get(option).map(|x| x.as_ref())
+    }
+
+    /// Return the current value of a field, parsed to the right datatype.
+    ///
+    /// This calls the FromStr::parse() function on the value of the field. If
+    /// the field does not exist, or parsing fails, returns an error.
+    ///
+    pub fn parse_field<T>(&self, field_name: &str, context: &str) -> Result<T>
+    where
+        T: FromStr,
+        <T as FromStr>::Err: std::error::Error + Send + Sync + 'static,
+    {
+        self.get(field_name)
+            .with_context(|| format!("could not find '{}' option {}", field_name, context))?
+            .parse::<T>()
+            .with_context(|| format!("could not parse '{}' option {}", field_name, context))
+    }
+
+    pub fn parse_field_optional<T>(&self, field_name: &str, context: &str) -> Result<Option<T>>
+    where
+        T: FromStr,
+        <T as FromStr>::Err: std::error::Error + Send + Sync + 'static,
+    {
+        if let Some(val) = self.get(field_name) {
+            let result = val
+                .parse::<T>()
+                .with_context(|| format!("could not parse '{}' option {}", field_name, context))?;
+
+            Ok(Some(result))
+        } else {
+            Ok(None)
+        }
+    }
+
+    ///
+    /// Note: if you call this multiple times for the same option, the config
+    /// file will a line for each call. It would be nice to have a function
+    /// to change an existing line, but that's a TODO.
+    ///
+    pub fn append(&mut self, option: &str, value: &str) {
+        self.lines
+            .push(format!("{}={}\n", option, escape_str(value)));
+        self.hash.insert(option.to_string(), value.to_string());
+    }
+
+    /// Append an arbitrary non-setting line to the config file
+    pub fn append_line(&mut self, line: &str) {
+        self.lines.push(line.to_string());
+    }
+}
+
+impl fmt::Display for PostgresConf {
+    /// Return the whole configuration file as a string
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        for line in self.lines.iter() {
+            f.write_str(line)?;
+        }
+        Ok(())
+    }
+}
+
+/// Escape a value for putting in postgresql.conf.
+fn escape_str(s: &str) -> String {
+    // If the string doesn't contain anything that needs quoting or escaping, return it
+    // as it is.
+    //
+    // The first part of the regex, before the '|', matches the INTEGER rule in the
+    // PostgreSQL flex grammar (guc-file.l). It matches plain integers like "123" and
+    // "-123", and also accepts units like "10MB". The second part of the regex matches
+    // the UNQUOTED_STRING rule, and accepts strings that contain a single word, beginning
+    // with a letter. That covers words like "off" or "posix". Everything else is quoted.
+    //
+    // This regex is a bit more conservative than the rules in guc-file.l, so we quote some
+    // strings that PostgreSQL would accept without quoting, but that's OK.
+    lazy_static! {
+        static ref UNQUOTED_RE: Regex =
+            Regex::new(r"(^[-+]?[0-9]+[a-zA-Z]*$)|(^[a-zA-Z][a-zA-Z0-9]*$)").unwrap();
+    }
+    if UNQUOTED_RE.is_match(s) {
+        s.to_string()
+    } else {
+        // Otherwise escape and quote it
+        let s = s
+            .replace('\\', "\\\\")
+            .replace('\n', "\\n")
+            .replace('\'', "''");
+
+        "\'".to_owned() + &s + "\'"
+    }
+}
+
+/// De-escape a possibly-quoted value.
+///
+/// See `DeescapeQuotedString` function in PostgreSQL sources for how PostgreSQL
+/// does this.
+fn deescape_str(s: &str) -> Result<String> {
+    // If the string has a quote at the beginning and end, strip them out.
+    if s.len() >= 2 && s.starts_with('\'') && s.ends_with('\'') {
+        let mut result = String::new();
+
+        let mut iter = s[1..(s.len() - 1)].chars().peekable();
+        while let Some(c) = iter.next() {
+            let newc = if c == '\\' {
+                match iter.next() {
+                    Some('b') => '\x08',
+                    Some('f') => '\x0c',
+                    Some('n') => '\n',
+                    Some('r') => '\r',
+                    Some('t') => '\t',
+                    Some('0'..='7') => {
+                        // TODO
+                        bail!("octal escapes not supported");
+                    }
+                    Some(n) => n,
+                    None => break,
+                }
+            } else if c == '\'' && iter.peek() == Some(&'\'') {
+                // doubled quote becomes just one quote
+                iter.next().unwrap()
+            } else {
+                c
+            };
+
+            result.push(newc);
+        }
+        Ok(result)
+    } else {
+        Ok(s.to_string())
+    }
+}
+
+#[test]
+fn test_postgresql_conf_escapes() -> Result<()> {
+    assert_eq!(escape_str("foo bar"), "'foo bar'");
+    // these don't need to be quoted
+    assert_eq!(escape_str("foo"), "foo");
+    assert_eq!(escape_str("123"), "123");
+    assert_eq!(escape_str("+123"), "+123");
+    assert_eq!(escape_str("-10"), "-10");
+    assert_eq!(escape_str("1foo"), "1foo");
+    assert_eq!(escape_str("foo1"), "foo1");
+    assert_eq!(escape_str("10MB"), "10MB");
+    assert_eq!(escape_str("-10kB"), "-10kB");
+
+    // these need quoting and/or escaping
+    assert_eq!(escape_str("foo bar"), "'foo bar'");
+    assert_eq!(escape_str("fo'o"), "'fo''o'");
+    assert_eq!(escape_str("fo\no"), "'fo\\no'");
+    assert_eq!(escape_str("fo\\o"), "'fo\\\\o'");
+    assert_eq!(escape_str("10 cats"), "'10 cats'");
+
+    // Test de-escaping
+    assert_eq!(deescape_str(&escape_str("foo"))?, "foo");
+    assert_eq!(deescape_str(&escape_str("fo'o\nba\\r"))?, "fo'o\nba\\r");
+    assert_eq!(deescape_str("'\\b\\f\\n\\r\\t'")?, "\x08\x0c\n\r\t");
+
+    // octal-escapes are currently not supported
+    assert!(deescape_str("'foo\\7\\07\\007'").is_err());
+
+    Ok(())
+}
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -0,0 +1,264 @@
+use std::io::Write;
+use std::net::TcpStream;
+use std::path::PathBuf;
+use std::process::Command;
+use std::sync::Arc;
+use std::time::Duration;
+use std::{io, result, thread};
+
+use anyhow::bail;
+use nix::errno::Errno;
+use nix::sys::signal::{kill, Signal};
+use nix::unistd::Pid;
+use postgres::Config;
+use reqwest::blocking::{Client, RequestBuilder, Response};
+use reqwest::{IntoUrl, Method};
+use thiserror::Error;
+use zenith_utils::http::error::HttpErrorBody;
+use zenith_utils::zid::ZNodeId;
+
+use crate::local_env::{LocalEnv, SafekeeperConf};
+use crate::storage::PageServerNode;
+use crate::{fill_rust_env_vars, read_pidfile};
+use zenith_utils::connstring::connection_address;
+
+#[derive(Error, Debug)]
+pub enum SafekeeperHttpError {
+    #[error("Reqwest error: {0}")]
+    Transport(#[from] reqwest::Error),
+
+    #[error("Error: {0}")]
+    Response(String),
+}
+
+type Result<T> = result::Result<T, SafekeeperHttpError>;
+
+pub trait ResponseErrorMessageExt: Sized {
+    fn error_from_body(self) -> Result<Self>;
+}
+
+impl ResponseErrorMessageExt for Response {
+    fn error_from_body(self) -> Result<Self> {
+        let status = self.status();
+        if !(status.is_client_error() || status.is_server_error()) {
+            return Ok(self);
+        }
+
+        // reqwest do not export it's error construction utility functions, so lets craft the message ourselves
+        let url = self.url().to_owned();
+        Err(SafekeeperHttpError::Response(
+            match self.json::<HttpErrorBody>() {
+                Ok(err_body) => format!("Error: {}", err_body.msg),
+                Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
+            },
+        ))
+    }
+}
+
+//
+// Control routines for safekeeper.
+//
+// Used in CLI and tests.
+//
+#[derive(Debug)]
+pub struct SafekeeperNode {
+    pub id: ZNodeId,
+
+    pub conf: SafekeeperConf,
+
+    pub pg_connection_config: Config,
+    pub env: LocalEnv,
+    pub http_client: Client,
+    pub http_base_url: String,
+
+    pub pageserver: Arc<PageServerNode>,
+}
+
+impl SafekeeperNode {
+    pub fn from_env(env: &LocalEnv, conf: &SafekeeperConf) -> SafekeeperNode {
+        let pageserver = Arc::new(PageServerNode::from_env(env));
+
+        println!("initializing for sk {} for {}", conf.id, conf.http_port);
+
+        SafekeeperNode {
+            id: conf.id,
+            conf: conf.clone(),
+            pg_connection_config: Self::safekeeper_connection_config(conf.pg_port),
+            env: env.clone(),
+            http_client: Client::new(),
+            http_base_url: format!("http://127.0.0.1:{}/v1", conf.http_port),
+            pageserver,
+        }
+    }
+
+    /// Construct libpq connection string for connecting to this safekeeper.
+    fn safekeeper_connection_config(port: u16) -> Config {
+        // TODO safekeeper authentication not implemented yet
+        format!("postgresql://no_user@127.0.0.1:{}/no_db", port)
+            .parse()
+            .unwrap()
+    }
+
+    pub fn datadir_path_by_id(env: &LocalEnv, sk_id: ZNodeId) -> PathBuf {
+        env.safekeeper_data_dir(format!("sk{}", sk_id).as_ref())
+    }
+
+    pub fn datadir_path(&self) -> PathBuf {
+        SafekeeperNode::datadir_path_by_id(&self.env, self.id)
+    }
+
+    pub fn pid_file(&self) -> PathBuf {
+        self.datadir_path().join("safekeeper.pid")
+    }
+
+    pub fn start(&self) -> anyhow::Result<()> {
+        print!(
+            "Starting safekeeper at '{}' in '{}'",
+            connection_address(&self.pg_connection_config),
+            self.datadir_path().display()
+        );
+        io::stdout().flush().unwrap();
+
+        let listen_pg = format!("127.0.0.1:{}", self.conf.pg_port);
+        let listen_http = format!("127.0.0.1:{}", self.conf.http_port);
+
+        let mut cmd = Command::new(self.env.safekeeper_bin()?);
+        fill_rust_env_vars(
+            cmd.args(&["-D", self.datadir_path().to_str().unwrap()])
+                .args(&["--id", self.id.to_string().as_ref()])
+                .args(&["--listen-pg", &listen_pg])
+                .args(&["--listen-http", &listen_http])
+                .args(&["--recall", "1 second"])
+                .arg("--daemonize"),
+        );
+        if !self.conf.sync {
+            cmd.arg("--no-sync");
+        }
+
+        if !cmd.status()?.success() {
+            bail!(
+                "Safekeeper failed to start. See '{}' for details.",
+                self.datadir_path().join("safekeeper.log").display()
+            );
+        }
+
+        // It takes a while for the safekeeper to start up. Wait until it is
+        // open for business.
+        const RETRIES: i8 = 15;
+        for retries in 1..RETRIES {
+            match self.check_status() {
+                Ok(_) => {
+                    println!("\nSafekeeper started");
+                    return Ok(());
+                }
+                Err(err) => {
+                    match err {
+                        SafekeeperHttpError::Transport(err) => {
+                            if err.is_connect() && retries < 5 {
+                                print!(".");
+                                io::stdout().flush().unwrap();
+                            } else {
+                                if retries == 5 {
+                                    println!() // put a line break after dots for second message
+                                }
+                                println!(
+                                    "Safekeeper not responding yet, err {} retrying ({})...",
+                                    err, retries
+                                );
+                            }
+                        }
+                        SafekeeperHttpError::Response(msg) => {
+                            bail!("safekeeper failed to start: {} ", msg)
+                        }
+                    }
+                    thread::sleep(Duration::from_secs(1));
+                }
+            }
+        }
+        bail!("safekeeper failed to start in {} seconds", RETRIES);
+    }
+
+    ///
+    /// Stop the server.
+    ///
+    /// If 'immediate' is true, we use SIGQUIT, killing the process immediately.
+    /// Otherwise we use SIGTERM, triggering a clean shutdown
+    ///
+    /// If the server is not running, returns success
+    ///
+    pub fn stop(&self, immediate: bool) -> anyhow::Result<()> {
+        let pid_file = self.pid_file();
+        if !pid_file.exists() {
+            println!("Safekeeper {} is already stopped", self.id);
+            return Ok(());
+        }
+        let pid = read_pidfile(&pid_file)?;
+        let pid = Pid::from_raw(pid);
+
+        let sig = if immediate {
+            println!("Stop safekeeper immediately");
+            Signal::SIGQUIT
+        } else {
+            println!("Stop safekeeper gracefully");
+            Signal::SIGTERM
+        };
+        match kill(pid, sig) {
+            Ok(_) => (),
+            Err(Errno::ESRCH) => {
+                println!(
+                    "Safekeeper with pid {} does not exist, but a PID file was found",
+                    pid
+                );
+                return Ok(());
+            }
+            Err(err) => bail!(
+                "Failed to send signal to safekeeper with pid {}: {}",
+                pid,
+                err.desc()
+            ),
+        }
+
+        let address = connection_address(&self.pg_connection_config);
+
+        // TODO Remove this "timeout" and handle it on caller side instead.
+        // Shutting down may take a long time,
+        // if safekeeper flushes a lot of data
+        for _ in 0..100 {
+            if let Err(_e) = TcpStream::connect(&address) {
+                println!("Safekeeper stopped receiving connections");
+
+                //Now check status
+                match self.check_status() {
+                    Ok(_) => {
+                        println!("Safekeeper status is OK. Wait a bit.");
+                        thread::sleep(Duration::from_secs(1));
+                    }
+                    Err(err) => {
+                        println!("Safekeeper status is: {}", err);
+                        return Ok(());
+                    }
+                }
+            } else {
+                println!("Safekeeper still receives connections");
+                thread::sleep(Duration::from_secs(1));
+            }
+        }
+
+        bail!("Failed to stop safekeeper with pid {}", pid);
+    }
+
+    fn http_request<U: IntoUrl>(&self, method: Method, url: U) -> RequestBuilder {
+        // TODO: authentication
+        //if self.env.auth_type == AuthType::ZenithJWT {
+        //    builder = builder.bearer_auth(&self.env.safekeeper_auth_token)
+        //}
+        self.http_client.request(method, url)
+    }
+
+    pub fn check_status(&self) -> Result<()> {
+        self.http_request(Method::GET, format!("{}/{}", self.http_base_url, "status"))
+            .send()?
+            .error_from_body()?;
+        Ok(())
+    }
+}
--- a/control_plane/src/storage.rs
+++ b/control_plane/src/storage.rs
@@ -1,135 +1,59 @@
-use anyhow::Result;
-use std::fs;
-use std::io;
-use std::net::SocketAddr;
+use std::io::Write;
 use std::net::TcpStream;
-use std::path::{Path, PathBuf};
+use std::path::PathBuf;
 use std::process::Command;
-use std::str::FromStr;
-use std::sync::atomic::{AtomicBool, Ordering};
-use std::sync::Arc;
-use std::thread;
 use std::time::Duration;
+use std::{io, result, thread};

-use postgres::{Client, NoTls};
+use anyhow::bail;
+use nix::errno::Errno;
+use nix::sys::signal::{kill, Signal};
+use nix::unistd::Pid;
+use pageserver::http::models::{BranchCreateRequest, TenantCreateRequest};
+use postgres::{Config, NoTls};
+use reqwest::blocking::{Client, RequestBuilder, Response};
+use reqwest::{IntoUrl, Method};
+use thiserror::Error;
+use zenith_utils::http::error::HttpErrorBody;
+use zenith_utils::postgres_backend::AuthType;
+use zenith_utils::zid::ZTenantId;

-use crate::compute::PostgresNode;
 use crate::local_env::LocalEnv;
-use pageserver::ZTimelineId;
+use crate::{fill_rust_env_vars, read_pidfile};
+use pageserver::branches::BranchInfo;
+use pageserver::tenant_mgr::TenantInfo;
+use zenith_utils::connstring::connection_address;

-//
-// Collection of several example deployments useful for tests.
-//
-// I'm intendedly modelling storage and compute control planes as a separate entities
-// as it is closer to the actual setup.
-//
-pub struct TestStorageControlPlane {
-    pub wal_acceptors: Vec<WalAcceptorNode>,
-    pub pageserver: Arc<PageServerNode>,
-    pub test_done: AtomicBool,
-    pub repopath: PathBuf,
+#[derive(Error, Debug)]
+pub enum PageserverHttpError {
+    #[error("Reqwest error: {0}")]
+    Transport(#[from] reqwest::Error),
+
+    #[error("Error: {0}")]
+    Response(String),
 }

-impl TestStorageControlPlane {
-    // Peek into the repository, to grab the timeline ID of given branch
-    pub fn get_branch_timeline(&self, branchname: &str) -> ZTimelineId {
-        let branchpath = self.repopath.join("refs/branches/".to_owned() + branchname);
+type Result<T> = result::Result<T, PageserverHttpError>;

-        ZTimelineId::from_str(&(fs::read_to_string(&branchpath).unwrap())).unwrap()
-    }
-
-    // postgres <-> page_server
-    //
-    // Initialize a new repository and configure a page server to run in it
-    //
-    pub fn one_page_server(local_env: &LocalEnv) -> TestStorageControlPlane {
-        let repopath = local_env.repo_path.clone();
-
-        let pserver = Arc::new(PageServerNode {
-            env: local_env.clone(),
-            kill_on_exit: true,
-            listen_address: None,
-        });
-        pserver.start().unwrap();
-
-        TestStorageControlPlane {
-            wal_acceptors: Vec::new(),
-            pageserver: pserver,
-            test_done: AtomicBool::new(false),
-            repopath: repopath,
-        }
-    }
-
-    pub fn one_page_server_no_start(local_env: &LocalEnv) -> TestStorageControlPlane {
-        let repopath = local_env.repo_path.clone();
-
-        let pserver = Arc::new(PageServerNode {
-            env: local_env.clone(),
-            kill_on_exit: true,
-            listen_address: None,
-        });
-
-        TestStorageControlPlane {
-            wal_acceptors: Vec::new(),
-            pageserver: pserver,
-            test_done: AtomicBool::new(false),
-            repopath: repopath,
-        }
-    }
-
-    // postgres <-> {wal_acceptor1, wal_acceptor2, ...}
-    pub fn fault_tolerant(local_env: &LocalEnv, redundancy: usize) -> TestStorageControlPlane {
-        let repopath = local_env.repo_path.clone();
-
-        let mut cplane = TestStorageControlPlane {
-            wal_acceptors: Vec::new(),
-            pageserver: Arc::new(PageServerNode {
-                env: local_env.clone(),
-                kill_on_exit: true,
-                listen_address: None,
-            }),
-            test_done: AtomicBool::new(false),
-            repopath: repopath,
-        };
-        cplane.pageserver.start().unwrap();
-
-        const WAL_ACCEPTOR_PORT: usize = 54321;
-
-        for i in 0..redundancy {
-            let wal_acceptor = WalAcceptorNode {
-                listen: format!("127.0.0.1:{}", WAL_ACCEPTOR_PORT + i)
-                    .parse()
-                    .unwrap(),
-                data_dir: local_env.repo_path.join(format!("wal_acceptor_{}", i)),
-                env: local_env.clone(),
-            };
-            wal_acceptor.init();
-            wal_acceptor.start();
-            cplane.wal_acceptors.push(wal_acceptor);
-        }
-        cplane
-    }
-
-    pub fn stop(&self) {
-        self.test_done.store(true, Ordering::Relaxed);
-    }
-
-    pub fn get_wal_acceptor_conn_info(&self) -> String {
-        self.wal_acceptors
-            .iter()
-            .map(|wa| wa.listen.to_string())
-            .collect::<Vec<String>>()
-            .join(",")
-    }
-
-    pub fn is_running(&self) -> bool {
-        self.test_done.load(Ordering::Relaxed)
-    }
+pub trait ResponseErrorMessageExt: Sized {
+    fn error_from_body(self) -> Result<Self>;
 }

-impl Drop for TestStorageControlPlane {
-    fn drop(&mut self) {
-        self.stop();
+impl ResponseErrorMessageExt for Response {
+    fn error_from_body(self) -> Result<Self> {
+        let status = self.status();
+        if !(status.is_client_error() || status.is_server_error()) {
+            return Ok(self);
+        }
+
+        // reqwest do not export it's error construction utility functions, so lets craft the message ourselves
+        let url = self.url().to_owned();
+        Err(PageserverHttpError::Response(
+            match self.json::<HttpErrorBody>() {
+                Ok(err_body) => format!("Error: {}", err_body.msg),
+                Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
+            },
+        ))
    }
 }

@@ -138,54 +62,125 @@ impl Drop for TestStorageControlPlane {
 //
 // Used in CLI and tests.
 //
+#[derive(Debug)]
 pub struct PageServerNode {
-    kill_on_exit: bool,
-    listen_address: Option<SocketAddr>,
+    pub pg_connection_config: Config,
    pub env: LocalEnv,
+    pub http_client: Client,
+    pub http_base_url: String,
 }

 impl PageServerNode {
    pub fn from_env(env: &LocalEnv) -> PageServerNode {
-        PageServerNode {
-            kill_on_exit: false,
-            listen_address: None, // default
+        let password = if env.pageserver.auth_type == AuthType::ZenithJWT {
+            &env.pageserver.auth_token
+        } else {
+            ""
+        };
+
+        Self {
+            pg_connection_config: Self::pageserver_connection_config(
+                password,
+                &env.pageserver.listen_pg_addr,
+            ),
            env: env.clone(),
+            http_client: Client::new(),
+            http_base_url: format!("http://{}/v1", env.pageserver.listen_http_addr),
        }
    }

-    pub fn address(&self) -> SocketAddr {
-        match self.listen_address {
-            Some(addr) => addr,
-            None => "127.0.0.1:64000".parse().unwrap(),
+    /// Construct libpq connection string for connecting to the pageserver.
+    fn pageserver_connection_config(password: &str, listen_addr: &str) -> Config {
+        format!("postgresql://no_user:{}@{}/no_db", password, listen_addr)
+            .parse()
+            .unwrap()
+    }
+
+    pub fn init(
+        &self,
+        create_tenant: Option<&str>,
+        config_overrides: &[&str],
+    ) -> anyhow::Result<()> {
+        let mut cmd = Command::new(self.env.pageserver_bin()?);
+
+        let id = format!("id={}", self.env.pageserver.id);
+
+        // FIXME: the paths should be shell-escaped to handle paths with spaces, quotas etc.
+        let base_data_dir_param = self.env.base_data_dir.display().to_string();
+        let pg_distrib_dir_param =
+            format!("pg_distrib_dir='{}'", self.env.pg_distrib_dir.display());
+        let authg_type_param = format!("auth_type='{}'", self.env.pageserver.auth_type);
+        let listen_http_addr_param = format!(
+            "listen_http_addr='{}'",
+            self.env.pageserver.listen_http_addr
+        );
+        let listen_pg_addr_param =
+            format!("listen_pg_addr='{}'", self.env.pageserver.listen_pg_addr);
+        let mut args = Vec::with_capacity(20);
+
+        args.push("--init");
+        args.extend(["-D", &base_data_dir_param]);
+        args.extend(["-c", &pg_distrib_dir_param]);
+        args.extend(["-c", &authg_type_param]);
+        args.extend(["-c", &listen_http_addr_param]);
+        args.extend(["-c", &listen_pg_addr_param]);
+        args.extend(["-c", &id]);
+
+        for config_override in config_overrides {
+            args.extend(["-c", config_override]);
        }
+
+        if self.env.pageserver.auth_type != AuthType::Trust {
+            args.extend([
+                "-c",
+                "auth_validation_public_key_path='auth_public_key.pem'",
+            ]);
+        }
+
+        if let Some(tenantid) = create_tenant {
+            args.extend(["--create-tenant", tenantid])
+        }
+
+        let status = fill_rust_env_vars(cmd.args(args))
+            .status()
+            .expect("pageserver init failed");
+
+        if !status.success() {
+            bail!("pageserver init failed");
+        }
+
+        Ok(())
    }

    pub fn repo_path(&self) -> PathBuf {
-        self.env.repo_path.clone()
+        self.env.pageserver_data_dir()
    }

    pub fn pid_file(&self) -> PathBuf {
-        self.env.repo_path.join("pageserver.pid")
+        self.repo_path().join("pageserver.pid")
    }

-    pub fn start(&self) -> Result<()> {
-        println!(
-            "Starting pageserver at '{}' in {}",
-            self.address(),
+    pub fn start(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
+        print!(
+            "Starting pageserver at '{}' in '{}'",
+            connection_address(&self.pg_connection_config),
            self.repo_path().display()
        );
+        io::stdout().flush().unwrap();

-        let mut cmd = Command::new(self.env.zenith_distrib_dir.join("pageserver"));
-        cmd.args(&["-l", self.address().to_string().as_str()])
-            .arg("-d")
-            .env_clear()
-            .env("RUST_BACKTRACE", "1")
-            .env("ZENITH_REPO_DIR", self.repo_path())
-            .env("PATH", self.env.pg_bin_dir().to_str().unwrap()) // needs postres-wal-redo binary
-            .env("LD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap());
+        let mut cmd = Command::new(self.env.pageserver_bin()?);
+
+        let repo_path = self.repo_path();
+        let mut args = vec!["-D", repo_path.to_str().unwrap()];
+
+        for config_override in config_overrides {
+            args.extend(["-c", config_override]);
+        }
+
+        fill_rust_env_vars(cmd.args(&args).arg("--daemonize"));

        if !cmd.status()?.success() {
-            anyhow::bail!(
+            bail!(
                "Pageserver failed to start. See '{}' for details.",
                self.repo_path().join("pageserver.log").display()
            );
@@ -193,221 +188,194 @@ impl PageServerNode {

        // It takes a while for the page server to start up. Wait until it is
        // open for business.
-        for retries in 1..15 {
-            let client = self.page_server_psql_client();
-            if client.is_ok() {
-                break;
+        const RETRIES: i8 = 15;
+        for retries in 1..RETRIES {
+            match self.check_status() {
+                Ok(_) => {
+                    println!("\nPageserver started");
+                    return Ok(());
+                }
+                Err(err) => {
+                    match err {
+                        PageserverHttpError::Transport(err) => {
+                            if err.is_connect() && retries < 5 {
+                                print!(".");
+                                io::stdout().flush().unwrap();
+                            } else {
+                                if retries == 5 {
+                                    println!() // put a line break after dots for second message
+                                }
+                                println!(
+                                    "Pageserver not responding yet, err {} retrying ({})...",
+                                    err, retries
+                                );
+                            }
+                        }
+                        PageserverHttpError::Response(msg) => {
+                            bail!("pageserver failed to start: {} ", msg)
+                        }
+                    }
+                    thread::sleep(Duration::from_secs(1));
+                }
+            }
+        }
+        bail!("pageserver failed to start in {} seconds", RETRIES);
+    }
+
+    ///
+    /// Stop the server.
+    ///
+    /// If 'immediate' is true, we use SIGQUIT, killing the process immediately.
+    /// Otherwise we use SIGTERM, triggering a clean shutdown
+    ///
+    /// If the server is not running, returns success
+    ///
+    pub fn stop(&self, immediate: bool) -> anyhow::Result<()> {
+        let pid_file = self.pid_file();
+        if !pid_file.exists() {
+            println!("Pageserver is already stopped");
+            return Ok(());
+        }
+        let pid = Pid::from_raw(read_pidfile(&pid_file)?);
+
+        let sig = if immediate {
+            println!("Stop pageserver immediately");
+            Signal::SIGQUIT
+        } else {
+            println!("Stop pageserver gracefully");
+            Signal::SIGTERM
+        };
+        match kill(pid, sig) {
+            Ok(_) => (),
+            Err(Errno::ESRCH) => {
+                println!(
+                    "Pageserver with pid {} does not exist, but a PID file was found",
+                    pid
+                );
+                return Ok(());
+            }
+            Err(err) => bail!(
+                "Failed to send signal to pageserver with pid {}: {}",
+                pid,
+                err.desc()
+            ),
+        }
+
+        let address = connection_address(&self.pg_connection_config);
+
+        // TODO Remove this "timeout" and handle it on caller side instead.
+        // Shutting down may take a long time,
+        // if pageserver checkpoints a lot of data
+        for _ in 0..100 {
+            if let Err(_e) = TcpStream::connect(&address) {
+                println!("Pageserver stopped receiving connections");
+
+                //Now check status
+                match self.check_status() {
+                    Ok(_) => {
+                        println!("Pageserver status is OK. Wait a bit.");
+                        thread::sleep(Duration::from_secs(1));
+                    }
+                    Err(err) => {
+                        println!("Pageserver status is: {}", err);
+                        return Ok(());
+                    }
+                }
            } else {
-                println!("page server not responding yet, retrying ({})...", retries);
+                println!("Pageserver still receives connections");
                thread::sleep(Duration::from_secs(1));
            }
        }
-        Ok(())
-    }

-    pub fn stop(&self) -> Result<()> {
-        let pidfile = self.pid_file();
-        let pid = read_pidfile(&pidfile)?;
-
-        let status = Command::new("kill")
-            .arg(&pid)
-            .env_clear()
-            .status()
-            .expect("failed to execute kill");
-
-        if !status.success() {
-            anyhow::bail!("Failed to kill pageserver with pid {}", pid);
-        }
-
-        // await for pageserver stop
-        for _ in 0..5 {
-            let stream = TcpStream::connect(self.address());
-            if let Err(_e) = stream {
-                return Ok(());
-            }
-            println!("Stopping pageserver on {}", self.address());
-            thread::sleep(Duration::from_secs(1));
-        }
-
-        // ok, we failed to stop pageserver, let's panic
-        if !status.success() {
-            anyhow::bail!("Failed to stop pageserver with pid {}", pid);
-        } else {
-            return Ok(());
-        }
+        bail!("Failed to stop pageserver with pid {}", pid);
    }

    pub fn page_server_psql(&self, sql: &str) -> Vec<postgres::SimpleQueryMessage> {
-        let connstring = format!(
-            "host={} port={} dbname={} user={}",
-            self.address().ip(),
-            self.address().port(),
-            "no_db",
-            "no_user",
-        );
-        let mut client = Client::connect(connstring.as_str(), NoTls).unwrap();
+        let mut client = self.pg_connection_config.connect(NoTls).unwrap();

        println!("Pageserver query: '{}'", sql);
        client.simple_query(sql).unwrap()
    }

-    pub fn page_server_psql_client(
-        &self,
-    ) -> std::result::Result<postgres::Client, postgres::Error> {
-        let connstring = format!(
-            "host={} port={} dbname={} user={}",
-            self.address().ip(),
-            self.address().port(),
-            "no_db",
-            "no_user",
-        );
-        Client::connect(connstring.as_str(), NoTls)
+    pub fn page_server_psql_client(&self) -> result::Result<postgres::Client, postgres::Error> {
+        self.pg_connection_config.connect(NoTls)
    }
-}

-impl Drop for PageServerNode {
-    fn drop(&mut self) {
-        if self.kill_on_exit {
-            let _ = self.stop();
+    fn http_request<U: IntoUrl>(&self, method: Method, url: U) -> RequestBuilder {
+        let mut builder = self.http_client.request(method, url);
+        if self.env.pageserver.auth_type == AuthType::ZenithJWT {
+            builder = builder.bearer_auth(&self.env.pageserver.auth_token)
        }
-    }
-}
-
-//
-// Control routines for WalAcceptor.
-//
-// Now used only in test setups.
-//
-pub struct WalAcceptorNode {
-    listen: SocketAddr,
-    data_dir: PathBuf,
-    env: LocalEnv,
-}
-
-impl WalAcceptorNode {
-    pub fn init(&self) {
-        if self.data_dir.exists() {
-            fs::remove_dir_all(self.data_dir.clone()).unwrap();
-        }
-        fs::create_dir_all(self.data_dir.clone()).unwrap();
+        builder
    }

-    pub fn start(&self) {
-        println!(
-            "Starting wal_acceptor in {} listening '{}'",
-            self.data_dir.to_str().unwrap(),
-            self.listen
-        );
-
-        let status = Command::new(self.env.zenith_distrib_dir.join("wal_acceptor"))
-            .args(&["-D", self.data_dir.to_str().unwrap()])
-            .args(&["-l", self.listen.to_string().as_str()])
-            .args(&["--systemid", &self.env.systemid.to_string()])
-            // Tell page server it can receive WAL from this WAL safekeeper
-            // FIXME: If there are multiple safekeepers, they will all inform
-            // the page server. Only the last "notification" will stay in effect.
-            // So it's pretty random which safekeeper the page server will connect to
-            .args(&["--pageserver", "127.0.0.1:64000"])
-            .arg("-d")
-            .arg("-n")
-            .status()
-            .expect("failed to start wal_acceptor");
-
-        if !status.success() {
-            panic!("wal_acceptor start failed");
-        }
-    }
-
-    pub fn stop(&self) -> std::result::Result<(), io::Error> {
-        println!("Stopping wal acceptor on {}", self.listen);
-        let pidfile = self.data_dir.join("wal_acceptor.pid");
-        let pid = read_pidfile(&pidfile)?;
-        // Ignores any failures when running this command
-        let _status = Command::new("kill")
-            .arg(pid)
-            .env_clear()
-            .status()
-            .expect("failed to execute kill");
-
+    pub fn check_status(&self) -> Result<()> {
+        self.http_request(Method::GET, format!("{}/{}", self.http_base_url, "status"))
+            .send()?
+            .error_from_body()?;
        Ok(())
    }
-}

-impl Drop for WalAcceptorNode {
-    fn drop(&mut self) {
-        self.stop().unwrap();
+    pub fn tenant_list(&self) -> Result<Vec<TenantInfo>> {
+        Ok(self
+            .http_request(Method::GET, format!("{}/{}", self.http_base_url, "tenant"))
+            .send()?
+            .error_from_body()?
+            .json()?)
    }
-}

-///////////////////////////////////////////////////////////////////////////////
-
-pub struct WalProposerNode {
-    pub pid: u32,
-}
-
-impl WalProposerNode {
-    pub fn stop(&self) {
-        let status = Command::new("kill")
-            .arg(self.pid.to_string())
-            .env_clear()
-            .status()
-            .expect("failed to execute kill");
-
-        if !status.success() {
-            panic!("kill start failed");
-        }
+    pub fn tenant_create(&self, tenantid: ZTenantId) -> Result<()> {
+        Ok(self
+            .http_request(Method::POST, format!("{}/{}", self.http_base_url, "tenant"))
+            .json(&TenantCreateRequest {
+                tenant_id: tenantid,
+            })
+            .send()?
+            .error_from_body()?
+            .json()?)
    }
-}

-impl Drop for WalProposerNode {
-    fn drop(&mut self) {
-        self.stop();
-    }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-
-pub fn regress_check(pg: &PostgresNode) {
-    pg.safe_psql("postgres", "CREATE DATABASE regression");
-
-    let regress_run_path = Path::new(env!("CARGO_MANIFEST_DIR")).join("tmp_check/regress");
-    fs::create_dir_all(regress_run_path.clone()).unwrap();
-    std::env::set_current_dir(regress_run_path).unwrap();
-
-    let regress_build_path =
-        Path::new(env!("CARGO_MANIFEST_DIR")).join("../tmp_install/build/src/test/regress");
-    let regress_src_path =
-        Path::new(env!("CARGO_MANIFEST_DIR")).join("../vendor/postgres/src/test/regress");
-
-    let _regress_check = Command::new(regress_build_path.join("pg_regress"))
-        .args(&[
-            "--bindir=''",
-            "--use-existing",
-            format!("--bindir={}", pg.env.pg_bin_dir().to_str().unwrap()).as_str(),
-            format!("--dlpath={}", regress_build_path.to_str().unwrap()).as_str(),
-            format!(
-                "--schedule={}",
-                regress_src_path.join("parallel_schedule").to_str().unwrap()
+    pub fn branch_list(&self, tenantid: &ZTenantId) -> Result<Vec<BranchInfo>> {
+        Ok(self
+            .http_request(
+                Method::GET,
+                format!("{}/branch/{}", self.http_base_url, tenantid),
            )
-            .as_str(),
-            format!("--inputdir={}", regress_src_path.to_str().unwrap()).as_str(),
-        ])
-        .env_clear()
-        .env("LD_LIBRARY_PATH", pg.env.pg_lib_dir().to_str().unwrap())
-        .env("PGHOST", pg.address.ip().to_string())
-        .env("PGPORT", pg.address.port().to_string())
-        .env("PGUSER", pg.whoami())
-        .status()
-        .expect("pg_regress failed");
-}
+            .send()?
+            .error_from_body()?
+            .json()?)
+    }

-/// Read a PID file
-///
-/// This should contain an unsigned integer, but we return it as a String
-/// because our callers only want to pass it back into a subcommand.
-fn read_pidfile(pidfile: &Path) -> std::result::Result<String, io::Error> {
-    fs::read_to_string(pidfile).map_err(|err| {
-        eprintln!("failed to read pidfile {:?}: {:?}", pidfile, err);
-        err
-    })
+    pub fn branch_create(
+        &self,
+        branch_name: &str,
+        startpoint: &str,
+        tenantid: &ZTenantId,
+    ) -> Result<BranchInfo> {
+        Ok(self
+            .http_request(Method::POST, format!("{}/branch", self.http_base_url))
+            .json(&BranchCreateRequest {
+                tenant_id: tenantid.to_owned(),
+                name: branch_name.to_owned(),
+                start_point: startpoint.to_owned(),
+            })
+            .send()?
+            .error_from_body()?
+            .json()?)
+    }
+
+    pub fn branch_get_by_name(
+        &self,
+        tenantid: &ZTenantId,
+        branch_name: &str,
+    ) -> Result<BranchInfo> {
+        Ok(self
+            .http_request(
+                Method::GET,
+                format!("{}/branch/{}/{}", self.http_base_url, tenantid, branch_name),
+            )
+            .send()?
+            .error_for_status()?
+            .json()?)
+    }
 }
--- a/docker-entrypoint.sh
+++ b/docker-entrypoint.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+set -eux
+
+if [ "$1" = 'pageserver' ]; then
+    if [ ! -d "/data/tenants" ]; then
+        echo "Initializing pageserver data directory"
+        pageserver --init -D /data -c "pg_distrib_dir='/usr/local'" -c "id=10"
+    fi
+    echo "Staring pageserver at 0.0.0.0:6400"
+    pageserver -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -D /data
+else
+    "$@"
+fi
--- a/docs/README.md
+++ b/docs/README.md
@@ -0,0 +1,14 @@
+# Zenith documentation
+
+## Table of contents
+
+- [authentication.md](authentication.md) — pageserver JWT authentication.
+- [docker.md](docker.md) — Docker images and building pipeline.
+- [glossary.md](glossary.md) — Glossary of all the terms used in codebase.
+- [multitenancy.md](multitenancy.md) — how multitenancy is organized in the pageserver and Zenith CLI.
+- [sourcetree.md](sourcetree.md) — Overview of the source tree layeout.
+- [pageserver/README](/pageserver/README) — pageserver overview.
+- [postgres_ffi/README](/postgres_ffi/README) — Postgres FFI overview.
+- [test_runner/README.md](/test_runner/README.md) — tests infrastructure overview.
+- [walkeeper/README](/walkeeper/README) — WAL service overview.
+- [core_changes.md](core_changes.md) - Description of Zenith changes in Postgres core
--- a/docs/authentication.md
+++ b/docs/authentication.md
@@ -0,0 +1,30 @@
+## Authentication
+
+### Overview
+
+Current state of authentication includes usage of JWT tokens in communication between compute and pageserver and between CLI and pageserver. JWT token is signed using RSA keys. CLI generates a key pair during call to `zenith init`. Using following openssl commands:
+
+```bash
+openssl genrsa -out private_key.pem 2048
+openssl rsa -in private_key.pem -pubout -outform PEM -out public_key.pem
+```
+
+CLI also generates signed token and saves it in the config for later access to pageserver. Now authentication is optional. Pageserver has two variables in config: `auth_validation_public_key_path` and `auth_type`, so when auth type present and set to `ZenithJWT` pageserver will require authentication for connections. Actual JWT is passed in password field of connection string. There is a caveat for psql, it silently truncates passwords to 100 symbols, so to correctly pass JWT via psql you have to either use PGPASSWORD environment variable, or store password in psql config file.
+
+Currently there is no authentication between compute and safekeepers, because this communication layer is under heavy refactoring. After this refactoring support for authentication will be added there too. Now safekeeper supports "hardcoded" token passed via environment variable to be able to use callmemaybe command in pageserver.
+
+Compute uses token passed via environment variable to communicate to pageserver and in the future to the safekeeper too.
+
+JWT authentication now supports two scopes: tenant and pageserverapi. Tenant scope is intended for use in tenant related api calls, e.g. create_branch. Compute launched for particular tenant also uses this scope. Scope pageserver api is intended to be used by console to manage pageserver. For now we have only one management operation - create tenant.
+
+Examples for token generation in python:
+
+```python
+# generate pageserverapi token
+management_token = jwt.encode({"scope": "pageserverapi"}, auth_keys.priv, algorithm="RS256")
+
+# generate tenant token
+tenant_token = jwt.encode({"scope": "tenant", "tenant_id": ps.initial_tenant}, auth_keys.priv, algorithm="RS256")
+```
+
+Utility functions to work with jwts in rust are located in zenith_utils/src/auth.rs
--- a/docs/core_changes.md
+++ b/docs/core_changes.md
@@ -0,0 +1,202 @@
+1. Add t_cid to XLOG record
+- Why?
+  The cmin/cmax on a heap page is a real bummer. I don't see any other way to fix that than bite the bullet and modify the WAL-logging routine to include the cmin/cmax.
+
+  To recap, the problem is that the XLOG_HEAP_INSERT record does not include the command id of the inserted row. And same with deletion/update. So in the primary, a row is inserted with current xmin + cmin. But in the replica, the cmin is always set to 1. That works, because the command id is only relevant to the inserting transaction itself. After commit/abort, no one cares abut it anymore.
+
+- Alternatives?
+  I don't know
+
+2. Add PD_WAL_LOGGED.
+- Why?
+  Postgres sometimes writes data to the page before it is wal-logged. If such page ais swapped out, we  will loose this change. The problem is currently solved by setting PD_WAL_LOGGED bit in page header. When page without this bit set is written to the SMGR, then it is forced to be written to the WAL as FPI using log_newpage_copy() function.
+
+  There was wrong assumption that it can happen only during construction of some exotic indexes (like gist). It is not true. The same situation can happen with COPY,VACUUM and when record hint bits are set.
+
+- Discussion:
+  https://discord.com/channels/869525774699462656/882681420986851359
+
+- Alternatives:
+  Do not store this flag in page header, but associate this bit with shared buffer. Logically it is more correct but in practice we will get not advantages: neither in space, neither in CPU overhead.
+
+
+3. XLogReadBufferForRedo not always loads and pins requested buffer. So we need to add extra checks that buffer is really pinned. Also do not use BufferGetBlockNumber for buffer returned by XLogReadBufferForRedo.
+- Why?
+  XLogReadBufferForRedo is not pinning pages which are not requested by wal-redo. It is specific only for wal-redo Postgres.
+
+- Alternatives?
+  No
+
+
+4. Eliminate reporting of some warnings related with hint bits, for example
+"page is not marked all-visible but visibility map bit is set in relation".
+- Why?
+  Hint bit may be not WAL logged.
+
+- Alternative?
+  Always wal log any page changes.
+
+
+5. Maintain last written LSN.
+- Why?
+  When compute node requests page from page server, we need to specify LSN. Ideally it should be LSN
+  of WAL record performing last update of this pages. But we do not know it, because we do not have page.
+  We can use current WAL flush position, but in this case there is high probability that page server
+  will be blocked until this peace of WAL is delivered.
+  As better approximation we can keep max LSN of written page. It will be better to take in account LSNs only of evicted pages,
+  but SMGR API doesn't provide such knowledge.
+
+- Alternatives?
+  Maintain map of LSNs of evicted pages.
+
+
+6. Launching Postgres without WAL.
+- Why?
+  According to Zenith architecture compute node is stateless. So when we are launching
+  compute node, we need to provide some dummy PG_DATADIR. Relation pages
+  can be requested on demand from page server. But Postgres still need some non-relational data:
+  control and configuration files, SLRUs,...
+  It is currently implemented  using basebackup (do not mix with pg_basebackup) which is created
+  by pageserver. It includes in this tarball config/control files, SLRUs and required directories.
+  As far as pageserver do not have original (non-scattered) WAL segments, it includes in
+  this tarball dummy WAL segment which contains only SHUTDOWN_CHECKPOINT record at the beginning of segment,
+  which redo field points to the end of wal. It allows to load checkpoint record in more or less
+  standard way with minimal changes of Postgres, but then some special handling is needed,
+  including restoring previous record position from zenith.signal file.
+  Also we have to correctly initialize header of last WAL page (pointed by checkpoint.redo)
+  to pass checks performed by XLogReader.
+
+- Alternatives?
+  We may not include fake WAL segment in tarball at all and modify xlog.c to load checkpoint record
+  in special way. But it may only increase number of changes in xlog.c
+
+7. Add redo_read_buffer_filter callback to XLogReadBufferForRedoExtended
+- Why?
+  We need a way in wal-redo Postgres to ignore pages which are not requested by pageserver.
+  So wal-redo Postgres reconstructs only requested page and for all other returns BLK_DONE
+  which means that recovery for them is not needed.
+
+- Alternatives?
+  No
+
+8. Enforce WAL logging of sequence updates.
+- Why?
+  Due to performance reasons Postgres don't want to log each fetching of a value from a sequence,
+  so we pre-log a few fetches in advance. In the event of crash we can lose
+  (skip over) as many values as we pre-logged.
+  But it doesn't work with Zenith because page with sequence value can be evicted from buffer cache
+  and we will get a gap in sequence values even without crash.
+
+- Alternatives:
+  Do not try to preserve sequential order but avoid performance penalty.
+
+
+9. Treat unlogged tables as normal (permanent) tables.
+- Why?
+  Unlogged tables are not transient, so them have to survive node restart (unlike temporary tables).
+  But as far as compute node is stateless, we need to persist their data to storage node.
+  And it can only be done through the WAL.
+
+- Alternatives?
+  * Store unlogged tables locally (violates requirement of stateless compute nodes).
+  * Prohibit unlogged tables at all.
+
+
+10. Support start Postgres in wal-redo mode
+- Why?
+  To be able to apply WAL record and reconstruct pages at page server.
+
+- Alternatives?
+  * Rewrite redo handlers in Rust
+  * Do not reconstruct pages at page server at all and do it at compute node.
+
+
+11. WAL proposer
+- Why?
+  WAL proposer is communicating with safekeeper and ensures WAL durability by quorum writes.
+  It is currently implemented as patch to standard WAL sender.
+
+- Alternatives?
+  Can be moved to extension if some extra callbacks will be added to wal sender code.
+
+
+12. Secure Computing BPF API wrapper.
+- Why?
+  Pageserver delegates complex WAL decoding duties to Postgres,
+  which means that the latter might fall victim to carefully designed
+  malicious WAL records and start doing harmful things to the system.
+  To prevent this, it has been decided to limit possible interactions
+  with the outside world using the Secure Computing BPF mode.
+
+- Alternatives:
+  * Rewrite redo handlers in Rust.
+  * Add more checks to guarantee correctness of WAL records.
+  * Move seccomp.c to extension
+  * Many other discussed approaches to neutralize incorrect WAL records vulnerabilities.
+
+
+13. Callbacks for replica feedbacks
+- Why?
+  Allowing waproposer to interact with walsender code.
+
+- Alternatives
+  Copy walsender code to walproposer.
+
+
+14. Support multiple SMGR implementations.
+- Why?
+  Postgres provides abstract API for storage manager but it has only one implementation
+  and provides no way to replace it with custom storage manager.
+
+- Alternatives?
+  None.
+
+
+15. Calculate database size as sum of all database relations.
+- Why?
+  Postgres is calculating database size by traversing data directory
+  but as far as Zenith compute node is stateless we can not do it.
+
+- Alternatives?
+  Send this request directly to pageserver and calculate real (physical) size
+  of Zenith representation of database/timeline, rather than sum logical size of all relations.
+
+
+-----------------------------------------------
+Not currently committed but proposed:
+
+1. Disable ring buffer buffer manager strategies
+- Why?
+  Postgres tries to avoid cache flushing by bulk operations (copy, seqscan, vacuum,...).
+  Even if there are free space in buffer cache, pages may be evicted.
+  Negative effect of it can be somehow compensated by file system cache, but in case of Zenith
+  cost of requesting page from page server is much higher.
+
+- Alternatives?
+  Instead of just prohibiting ring buffer we may try to implement more flexible eviction policy,
+  for example copy evicted page from ring buffer to some other buffer if there is free space
+  in buffer cache.
+
+2. Disable marking page as dirty when hint bits are set.
+- Why?
+  Postgres has to modify page twice: first time when some tuple is updated and second time when
+  hint bits are set. Wal logging hint bits updates requires FPI which significantly increase size of WAL.
+
+- Alternatives?
+  Add special WAL record for setting page hints.
+
+3. Prefetching
+- Why?
+  As far as pages in Zenith are loaded on demand, to reduce node startup time
+  and also sppedup some massive queries we need some mechanism for bulk loading to
+  reduce page request round-trip overhead.
+
+  Currently Postgres is supporting prefetching only for bitmap scan.
+  In Zenith we also use prefetch for sequential and index scan. For sequential scan we prefetch
+  some number of following pages. For index scan we prefetch pages of heap relation addressed by TIDs.
+
+4. Prewarming.
+- Why?
+  Short downtime (or, in other words, fast compute node restart time) is one of the key feature of Zenith.
+  But overhead of request-response round-trip for loading pages on demand can make started node warm-up quite slow.
+  We can capture state of compute node buffer cache and send bulk request for this pages at startup.
--- a/docs/docker.md
+++ b/docs/docker.md
@@ -0,0 +1,38 @@
+# Docker images of Zenith
+
+## Images
+
+Currently we build two main images:
+
+- [zenithdb/zenith](https://hub.docker.com/repository/docker/zenithdb/zenith) — image with pre-built `pageserver`, `safekeeper` and `proxy` binaries and all the required runtime dependencies. Built from [/Dockerfile](/Dockerfile).
+- [zenithdb/compute-node](https://hub.docker.com/repository/docker/zenithdb/compute-node) — compute node image with pre-built Postgres binaries from [zenithdb/postgres](https://github.com/zenithdb/postgres).
+
+And two intermediate images used either to reduce build time or to deliver some additional binary tools from other repos:
+
+- [zenithdb/build](https://hub.docker.com/repository/docker/zenithdb/build) — image with all the dependencies required to build Zenith and compute node images. This image is based on `rust:slim-buster`, so it also has a proper `rust` environment. Built from [/Dockerfile.build](/Dockerfile.build).
+- [zenithdb/compute-tools](https://hub.docker.com/repository/docker/zenithdb/compute-tools) — compute node configuration management tools.
+
+## Building pipeline
+
+1. Image `zenithdb/compute-tools` is re-built automatically.
+
+2. Image `zenithdb/build` is built manually. If you want to introduce any new compile time dependencies to Zenith or compute node you have to update this image as well, build it and push to Docker Hub.
+
+Build:
+```sh
+docker build -t zenithdb/build:buster -f Dockerfile.build .
+```
+
+Login:
+```sh
+docker login
+```
+
+Push to Docker Hub:
+```sh
+docker push zenithdb/build:buster
+```
+
+3. Image `zenithdb/compute-node` is built independently in the [zenithdb/postgres](https://github.com/zenithdb/postgres) repo.
+
+4. Image `zenithdb/zenith` is built in this repo after a successful `release` tests run and pushed to Docker Hub automatically.
--- a/docs/glossary.md
+++ b/docs/glossary.md
@@ -0,0 +1,261 @@
+# Glossary
+
+### Authentication
+
+### Backpresssure
+
+Backpressure is used to limit the lag between pageserver and compute node or WAL service.
+
+If compute node or WAL service run far ahead of Page Server,
+the time of serving page requests increases. This may lead to timeout errors.
+
+To tune backpressure limits use `max_replication_write_lag`, `max_replication_flush_lag` and `max_replication_apply_lag` settings.
+When lag between current LSN (pg_current_wal_flush_lsn() at compute node) and minimal write/flush/apply position of replica exceeds the limit
+backends performing writes are blocked until the replica is caught up.
+### Base image (page image)
+
+### Basebackup
+
+A tarball with files needed to bootstrap a compute node[] and a corresponding command to create it.
+NOTE:It has nothing to do with PostgreSQL pg_basebackup.
+
+### Branch
+
+We can create branch at certain LSN using `zenith branch` command.
+Each Branch lives in a corresponding timeline[] and has an ancestor[].
+
+
+### Checkpoint (PostgreSQL)
+
+NOTE: This is an overloaded term.
+
+A checkpoint record in the WAL marks a point in the WAL sequence at which it is guaranteed that all data files have been updated with all information from shared memory modified before that checkpoint; 
+
+### Checkpoint (Layered repository)
+
+NOTE: This is an overloaded term.
+
+Whenever enough WAL has been accumulated in memory, the page server []
+writes out the changes from in-memory layers into new layer files[]. This process
+is called "checkpointing". The page server only creates layer files for
+relations that have been modified since the last checkpoint. 
+
+Configuration parameter `checkpoint_distance` defines the distance
+from current LSN to perform checkpoint of in-memory layers.
+Default is `DEFAULT_CHECKPOINT_DISTANCE`.
+Set this parameter to `0` to force checkpoint of every layer.
+
+Configuration parameter `checkpoint_period` defines the interval between checkpoint iterations.
+Default is `DEFAULT_CHECKPOINT_PERIOD`.
+### Compute node
+
+Stateless Postgres node that stores data in pageserver.
+
+### Garbage collection
+
+The process of removing old on-disk layers that are not needed by any timeline anymore.
+### Fork
+
+Each of the separate segmented file sets in which a relation is stored. The main fork is where the actual data resides. There also exist two secondary forks for metadata: the free space map and the visibility map.
+Each PostgreSQL fork is considered a separate relish.
+
+### Layer
+
+A layer contains data needed to reconstruct any page versions within the
+layer's Segment and range of LSNs.
+
+There are two kinds of layers, in-memory and on-disk layers. In-memory
+layers are used to ingest incoming WAL, and provide fast access
+to the recent page versions. On-disk layers are stored as files on disk, and
+are immutable. See pageserver/src/layered_repository/README.md for more.
+
+### Layer file (on-disk layer)
+
+Layered repository on-disk format is based on immutable files.  The
+files are called "layer files". Each file corresponds to one RELISH_SEG_SIZE
+segment of a PostgreSQL relation fork. There are two kinds of layer
+files: image files and delta files. An image file contains a
+"snapshot" of the segment at a particular LSN, and a delta file
+contains WAL records applicable to the segment, in a range of LSNs.
+
+### Layer map
+
+The layer map tracks what layers exist for all the relishes in a timeline.
+### Layered repository
+
+Zenith repository implementation that keeps data in layers.
+### LSN
+
+The Log Sequence Number (LSN) is a unique identifier of the WAL record[] in the WAL log.
+The insert position is a byte offset into the logs, increasing monotonically with each new record.
+Internally, an LSN is a 64-bit integer, representing a byte position in the write-ahead log stream.
+It is printed as two hexadecimal numbers of up to 8 digits each, separated by a slash.
+Check also [PostgreSQL doc about pg_lsn type](https://www.postgresql.org/docs/devel/datatype-pg-lsn.html)
+Values can be compared to calculate the volume of WAL data that separates them, so they are used to measure the progress of replication and recovery.
+
+In postgres and Zenith lsns are used to describe certain points in WAL handling.
+
+PostgreSQL LSNs and functions to monitor them:
+* `pg_current_wal_insert_lsn()` - Returns the current write-ahead log insert location.
+* `pg_current_wal_lsn()` - Returns the current write-ahead log write location.
+* `pg_current_wal_flush_lsn()` - Returns the current write-ahead log flush location.
+* `pg_last_wal_receive_lsn()` - Returns the last write-ahead log location that has been received and synced to disk by streaming replication. While streaming replication is in progress this will increase monotonically.
+* `pg_last_wal_replay_lsn ()` - Returns the last write-ahead log location that has been replayed during recovery. If recovery is still in progress this will increase monotonically. 
+[source PostgreSQL documentation](https://www.postgresql.org/docs/devel/functions-admin.html):
+
+Zenith safekeeper LSNs. For more check [walkeeper/README_PROTO.md](/walkeeper/README_PROTO.md)
+* `CommitLSN`: position in WAL confirmed by quorum safekeepers.
+* `RestartLSN`: position in WAL confirmed by all safekeepers.
+* `FlushLSN`: part of WAL persisted to the disk by safekeeper.
+* `VCL`: the largerst LSN for which we can guarantee availablity of all prior records.
+
+Zenith pageserver LSNs:
+* `last_record_lsn` - the end of last processed WAL record.
+* `disk_consistent_lsn` - data is known to be fully flushed and fsync'd to local disk on pageserver up to this LSN.
+* `remote_consistent_lsn` - The last LSN that is synced to remote storage and is guaranteed to survive pageserver crash.
+TODO: use this name consistently in remote storage code. Now `disk_consistent_lsn` is used and meaning depends on the context.
+* `ancestor_lsn` - LSN of the branch point (the LSN at which this branch was created)
+
+TODO: add table that describes mapping between PostgreSQL (compute), safekeeper and pageserver LSNs.
+### Page (block)
+
+The basic structure used to store relation data. All pages are of the same size.
+This is the unit of data exchange between compute node and pageserver.
+
+### Pageserver
+
+Zenith storage engine: repositories + wal receiver + page service + wal redo.
+
+### Page service
+
+The Page Service listens for GetPage@LSN requests from the Compute Nodes,
+and responds with pages from the repository.
+
+
+### PITR (Point-in-time-recovery)
+
+PostgreSQL's ability to restore up to a specified LSN.
+
+### Primary node
+
+
+### Proxy
+
+Postgres protocol proxy/router.
+This service listens psql port, can check auth via external service
+and create new databases and accounts (control plane API in our case).
+
+### Relation
+
+The generic term in PostgreSQL for all objects in a database that have a name and a list of attributes defined in a specific order.
+
+### Relish
+
+We call each relation and other file that is stored in the
+repository a "relish". It comes from "rel"-ish, as in "kind of a
+rel", because it covers relations as well as other things that are
+not relations, but are treated similarly for the purposes of the
+storage layer.
+
+### Replication slot
+
+
+### Replica node
+
+
+### Repository
+
+Repository stores multiple timelines, forked off from the same initial call to 'initdb'
+and has associated WAL redo service.
+One repository corresponds to one Tenant.
+
+### Retention policy
+
+How much history do we need to keep around for PITR and read-only nodes?
+
+### Segment (PostgreSQL)
+
+NOTE: This is an overloaded term.
+
+A physical file that stores data for a given relation. File segments are
+limited in size by a compile-time setting (1 gigabyte by default), so if a
+relation exceeds that size, it is split into multiple segments.
+
+### Segment (Layered Repository)
+
+NOTE: This is an overloaded term.
+
+Segment is a RELISH_SEG_SIZE slice of relish (identified by a SegmentTag).
+
+### SLRU
+
+SLRUs include pg_clog, pg_multixact/members, and
+pg_multixact/offsets. There are other SLRUs in PostgreSQL, but
+they don't need to be stored permanently (e.g. pg_subtrans),
+or we do not support them in zenith yet (pg_commit_ts).
+Each SLRU segment is considered a separate relish[].
+
+### Tenant (Multitenancy)
+Tenant represents a single customer, interacting with Zenith.
+Wal redo[] activity, timelines[], layers[] are managed for each tenant independently.
+One pageserver[] can serve multiple tenants at once.
+One safekeeper 
+
+See `docs/multitenancy.md` for more.
+
+### Timeline
+
+Timeline accepts page changes and serves get_page_at_lsn() and
+get_rel_size() requests. The term "timeline" is used internally
+in the system, but to users they are exposed as "branches", with
+human-friendly names.
+
+NOTE: this has nothing to do with PostgreSQL WAL timelines.
+
+### XLOG
+
+PostgreSQL alias for WAL[].
+
+### WAL (Write-ahead log)
+
+The journal that keeps track of the changes in the database cluster as user- and system-invoked operations take place. It comprises many individual WAL records[] written sequentially to WAL files[].
+
+### WAL acceptor, WAL proposer
+
+In the context of the consensus algorithm, the Postgres
+compute node is also known as the WAL proposer, and the safekeeper is also known
+as the acceptor. Those are the standard terms in the Paxos algorithm.
+
+### WAL receiver (WAL decoder)
+
+The WAL receiver connects to the external WAL safekeeping service (or
+directly to the primary) using PostgreSQL physical streaming
+replication, and continuously receives WAL. It decodes the WAL records,
+and stores them to the repository.
+
+We keep one WAL receiver active per timeline.
+
+### WAL record
+
+A low-level description of an individual data change.
+
+### WAL redo
+
+A service that runs PostgreSQL in a special wal_redo mode
+to apply given WAL records over an old page image and return new page image.
+
+### WAL safekeeper
+
+One node that participates in the quorum. All the safekeepers
+together form the WAL service.
+
+### WAL segment (WAL file)
+
+Also known as WAL segment or WAL segment file. Each of the sequentially-numbered files that provide storage space for WAL. The files are all of the same predefined size and are written in sequential order, interspersing changes as they occur in multiple simultaneous sessions.
+
+### WAL service
+
+The service as whole that ensures that WAL is stored durably.
+
+### Web console
+
--- a/docs/multitenancy.md
+++ b/docs/multitenancy.md
@@ -0,0 +1,59 @@
+## Multitenancy
+
+### Overview
+
+Zenith supports multitenancy. One pageserver can serve multiple tenants at once. Tenants can be managed via zenith CLI. During page server setup tenant can be created using ```zenith init --create-tenant``` Also tenants can be added into the system on the fly without pageserver restart. This can be done using the following cli command: ```zenith tenant create``` Tenants use random identifiers which can be represented as a 32 symbols hexadecimal string. So zenith tenant create accepts desired tenant id as an optional argument. The concept of timelines/branches is working independently per tenant.
+
+### Tenants in other commands
+
+By default during `zenith init` new tenant is created on the pageserver. Newly created tenant's id is saved to cli config, so other commands can use it automatically if no direct arugment `--tenantid=<tenantid>` is provided. So generally tenantid more frequently appears in internal pageserver interface. Its commands take tenantid argument to distinguish to which tenant operation should be applied. CLI support creation of new tenants.
+
+Examples for cli:
+
+```sh
+zenith tenant list
+
+zenith tenant create // generates new id
+
+zenith tenant create ee6016ec31116c1b7c33dfdfca38892f
+
+zenith pg create main // default tenant from zenith init
+
+zenith pg create main --tenantid=ee6016ec31116c1b7c33dfdfca38892f
+
+zenith branch --tenantid=ee6016ec31116c1b7c33dfdfca38892f
+```
+
+### Data layout
+
+On the page server tenants introduce one level of indirection, so data directory structured the following way:
+```
+<pageserver working directory>
+├── pageserver.log
+├── pageserver.pid
+├── pageserver.toml
+└── tenants
+   ├── 537cffa58a4fa557e49e19951b5a9d6b
+   ├── de182bc61fb11a5a6b390a8aed3a804a
+   └── ee6016ec31116c1b7c33dfdfca38891f
+```
+Wal redo activity and timelines are managed for each tenant independently.
+
+For local environment used for example in tests there also new level of indirection for tenants. It touches `pgdatadirs` directory. Now it contains `tenants` subdirectory so the structure looks the following way:
+
+```
+pgdatadirs
+└── tenants
+   ├── de182bc61fb11a5a6b390a8aed3a804a
+   │  └── main
+   └── ee6016ec31116c1b7c33dfdfca38892f
+      └── main
+```
+
+### Changes to postgres
+
+Tenant id is passed to postgres via GUC the same way as the timeline. Tenant id is added to commands issued to pageserver, namely: pagestream, callmemaybe. Tenant id is also exists in ServerInfo structure, this is needed to pass the value to wal receiver to be able to forward it to the pageserver.
+
+### Safety
+
+For now particular tenant can only appear on a particular pageserver. Set of safekeepers are also pinned to particular (tenantid, timeline) pair so there can only be one writer for particular (tenantid, timeline).
--- a/docs/pageserver-tenant-migration.md
+++ b/docs/pageserver-tenant-migration.md
@@ -0,0 +1,22 @@
+## Pageserver tenant migration
+
+### Overview
+
+This feature allows to migrate a timeline from one pageserver to another by utilizing remote storage capability.
+
+### Migration process
+
+Pageserver implements two new http handlers: timeline attach and timeline detach.
+Timeline migration is performed in a following way:
+1. Timeline attach is called on a target pageserver. This asks pageserver to download latest checkpoint uploaded to s3.
+2. For now it is necessary to manually initialize replication stream via callmemaybe call so target pageserver initializes replication from safekeeper (it is desired to avoid this and initialize replication directly in attach handler, but this requires some refactoring (probably [#997](https://github.com/zenithdb/zenith/issues/997)/[#1049](https://github.com/zenithdb/zenith/issues/1049))
+3. Replication state can be tracked via timeline detail pageserver call.
+4. Compute node should be restarted with new pageserver connection string. Issue with multiple compute nodes for one timeline is handled on the safekeeper consensus level. So this is not a problem here.Currently responsibility for rescheduling the compute with updated config lies on external coordinator (console).
+5. Timeline is detached from old pageserver. On disk data is removed.
+
+
+### Implementation details
+
+Now safekeeper needs to track which pageserver it is replicating to. This introduces complications into replication code:
+* We need to distinguish different pageservers (now this is done by connection string which is imperfect and is covered here: https://github.com/zenithdb/zenith/issues/1105). Callmemaybe subscription management also needs to track that (this is already implemented).
+* We need to track which pageserver is the primary. This is needed to avoid reconnections to non primary pageservers. Because we shouldn't reconnect to them when they decide to stop their walreceiver. I e this can appear when there is a load on the compute and we are trying to detach timeline from old pageserver. In this case callmemaybe will try to reconnect to it because replication termination condition is not met (page server with active compute could never catch up to the latest lsn, so there is always some wal tail)
--- a/docs/settings.md
+++ b/docs/settings.md
@@ -0,0 +1,180 @@
+## Pageserver
+
+Pageserver is mainly configured via a `pageserver.toml` config file.
+If there's no such file during `init` phase of the server, it creates the file itself. Without 'init', the file is read.
+
+There's a possibility to pass an arbitrary config value to the pageserver binary as an argument: such values override
+the values in the config file, if any are specified for the same key and get into the final config during init phase.
+
+
+### Config example
+
+```toml
+# Initial configuration file created by 'pageserver --init'
+
+listen_pg_addr = '127.0.0.1:64000'
+listen_http_addr = '127.0.0.1:9898'
+
+checkpoint_distance = '268435456' # in bytes
+checkpoint_period = '1 s'
+
+gc_period = '100 s'
+gc_horizon = '67108864'
+
+max_file_descriptors = '100'
+
+# initial superuser role name to use when creating a new tenant
+initial_superuser_name = 'zenith_admin'
+
+# [remote_storage]
+```
+
+The config above shows default values for all basic pageserver settings.
+Pageserver uses default values for all files that are missing in the config, so it's not a hard error to leave the config blank.
+Yet, it validates the config values it can (e.g. postgres install dir) and errors if the validation fails, refusing to start.
+
+Note the `[remote_storage]` section: it's a [table](https://toml.io/en/v1.0.0#table) in TOML specification and
+
+* either has to be placed in the config after the table-less values such as `initial_superuser_name = 'zenith_admin'`
+
+* or can be placed anywhere if rewritten in identical form as [inline table](https://toml.io/en/v1.0.0#inline-table): `remote_storage = {foo = 2}`
+
+### Config values
+
+All values can be passed as an argument to the pageserver binary, using the `-c` parameter and specified as a valid TOML string. All tables should be passed in the inline form.
+
+Example: `${PAGESERVER_BIN} -c "checkpoint_period = '100 s'" -c "remote_storage={local_path='/some/local/path/'}"`
+
+Note that TOML distinguishes between strings and integers, the former require single or double quotes around them.
+
+#### checkpoint_distance
+
+`checkpoint_distance` is the amount of incoming WAL that is held in
+the open layer, before it's flushed to local disk. It puts an upper
+bound on how much WAL needs to be re-processed after a pageserver
+crash. It is a soft limit, the pageserver can momentarily go above it,
+but it will trigger a checkpoint operation to get it back below the
+limit.
+
+`checkpoint_distance` also determines how much WAL needs to be kept
+durable in the safekeeper.  The safekeeper must have capacity to hold
+this much WAL, with some headroom, otherwise you can get stuck in a
+situation where the safekeeper is full and stops accepting new WAL,
+but the pageserver is not flushing out and releasing the space in the
+safekeeper because it hasn't reached checkpoint_distance yet.
+
+`checkpoint_distance` also controls how often the WAL is uploaded to
+S3.
+
+The unit is # of bytes.
+
+#### checkpoint_period
+
+The pageserver checks whether `checkpoint_distance` has been reached
+every `checkpoint_period` seconds. Default is 1 s, which should be
+fine.
+
+#### gc_horizon
+
+`gz_horizon` determines how much history is retained, to allow
+branching and read replicas at an older point in time. The unit is #
+of bytes of WAL. Page versions older than this are garbage collected
+away.
+
+#### gc_period
+
+Interval at which garbage collection is triggered. Default is 100 s.
+
+#### initial_superuser_name
+
+Name of the initial superuser role, passed to initdb when a new tenant
+is initialized. It doesn't affect anything after initialization. The
+default is Note: The default is 'zenith_admin', and the console
+depends on that, so if you change it, bad things will happen.
+
+#### page_cache_size
+
+Size of the page cache, to hold materialized page versions. Unit is
+number of 8 kB blocks. The default is 8192, which means 64 MB.
+
+#### max_file_descriptors
+
+Max number of file descriptors to hold open concurrently for accessing
+layer files. This should be kept well below the process/container/OS
+limit (see `ulimit -n`), as the pageserver also needs file descriptors
+for other files and for sockets for incoming connections.
+
+#### pg_distrib_dir
+
+A directory with Postgres installation to use during pageserver activities.
+Inside that dir, a `bin/postgres` binary should be present.
+
+The default distrib dir is `./tmp_install/`.
+
+#### workdir (-D)
+
+A directory in the file system, where pageserver will store its files.
+The default is `./.zenith/`.
+
+This parameter has a special CLI alias (`-D`) and can not be overridden with regular `-c` way.
+
+##### Remote storage
+
+There's a way to automatically back up and restore some of the pageserver's data from working dir to the remote storage.
+The backup system is disabled by default and can be enabled for either of the currently available storages:
+
+###### Local FS storage
+
+Pageserver can back up and restore some of its workdir contents to another directory.
+For that, only a path to that directory needs to be specified as a parameter:
+
+```toml
+[remote_storage]
+local_path = '/some/local/path/'
+```
+
+###### S3 storage
+
+Pageserver can back up and restore some of its workdir contents to S3.
+Full set of S3 credentials is needed for that as parameters.
+Configuration example:
+
+```toml
+[remote_storage]
+# Name of the bucket to connect to
+bucket_name = 'some-sample-bucket'
+
+# Name of the region where the bucket is located at
+bucket_region = 'eu-north-1'
+
+# A "subfolder" in the bucket, to use the same bucket separately by multiple pageservers at once.
+# Optional, pageserver uses entire bucket if the prefix is not specified.
+prefix_in_bucket = '/some/prefix/'
+
+# Access key to connect to the bucket ("login" part of the credentials)
+access_key_id = 'SOMEKEYAAAAASADSAH*#'
+
+# Secret access key to connect to the bucket ("password" part of the credentials)
+secret_access_key = 'SOMEsEcReTsd292v'
+```
+
+###### General remote storage configuration
+
+Pagesever allows only one remote storage configured concurrently and errors if parameters from multiple different remote configurations are used.
+No default values are used for the remote storage configuration parameters.
+
+Besides, there are parameters common for all types of remote storage that can be configured, those have defaults:
+
+```toml
+[remote_storage]
+# Max number of concurrent connections to open for uploading to or downloading from the remote storage.
+max_concurrent_sync = 100
+
+# Max number of errors a single task can have before it's considered failed and not attempted to run anymore.
+max_sync_errors = 10
+```
+
+
+## safekeeper
+
+TODO
--- a/docs/sourcetree.md
+++ b/docs/sourcetree.md
@@ -0,0 +1,126 @@
+## Source tree layout
+
+Below you will find a brief overview of each subdir in the source tree in alphabetical order.
+
+`/control_plane`:
+
+Local control plane.
+Functions to start, configure and stop pageserver and postgres instances running as a local processes.
+Intended to be used in integration tests and in CLI tools for local installations.
+
+`/docs`:
+
+Documentaion of the Zenith features and concepts.
+Now it is mostly dev documentation.
+
+`/monitoring`:
+
+TODO
+
+`/pageserver`:
+
+Zenith storage service.
+The pageserver has a few different duties:
+
+- Store and manage the data.
+- Generate a tarball with files needed to bootstrap ComputeNode.
+- Respond to GetPage@LSN requests from the Compute Nodes.
+- Receive WAL from the WAL service and decode it.
+- Replay WAL that's applicable to the chunks that the Page Server maintains
+
+For more detailed info, see `/pageserver/README`
+
+`/postgres_ffi`:
+
+Utility functions for interacting with PostgreSQL file formats.
+Misc constants, copied from PostgreSQL headers.
+
+`/proxy`:
+
+Postgres protocol proxy/router.
+This service listens psql port, can check auth via external service
+and create new databases and accounts (control plane API in our case).
+
+`/test_runner`:
+
+Integration tests, written in Python using the `pytest` framework.
+
+`/vendor/postgres`:
+
+PostgreSQL source tree, with the modifications needed for Zenith.
+
+`/vendor/postgres/contrib/zenith`:
+
+PostgreSQL extension that implements storage manager API and network communications with remote page server.
+
+`/vendor/postgres/contrib/zenith_test_utils`:
+
+PostgreSQL extension that contains functions needed for testing and debugging.
+
+`/walkeeper`:
+
+The zenith WAL service that receives WAL from a primary compute nodes and streams it to the pageserver.
+It acts as a holding area and redistribution center for recently generated WAL.
+
+For more detailed info, see `/walkeeper/README`
+
+`/workspace_hack`:
+The workspace_hack crate exists only to pin down some dependencies.
+
+`/zenith`
+
+Main entry point for the 'zenith' CLI utility.
+TODO: Doesn't it belong to control_plane?
+
+`/zenith_metrics`:
+
+Helpers for exposing Prometheus metrics from the server.
+
+`/zenith_utils`:
+
+Helpers that are shared between other crates in this repository.
+
+## Using Python
+Note that Debian/Ubuntu Python packages are stale, as it commonly happens,
+so manual installation of dependencies is not recommended.
+
+A single virtual environment with all dependencies is described in the single `Pipfile`.
+
+### Prerequisites
+- Install Python 3.7 (the minimal supported version) or greater.
+    - Our setup with poetry should work with newer python versions too. So feel free to open an issue with a `c/test-runner` label if something doesnt work as expected.
+    - If you have some trouble with other version you can resolve it by installing Python 3.7 separately, via pyenv or via system package manager e.g.:
+      ```bash
+      # In Ubuntu
+      sudo add-apt-repository ppa:deadsnakes/ppa
+      sudo apt update
+      sudo apt install python3.7
+      ```
+- Install `poetry`
+    - Exact version of `poetry` is not important, see installation instructions available at poetry's [website](https://python-poetry.org/docs/#installation)`.
+- Install dependencies via `./scripts/pysync`. Note that CI uses Python 3.7 so if you have different version some linting tools can yield different result locally vs in the CI.
+
+Run `poetry shell` to activate the virtual environment.
+Alternatively, use `poetry run` to run a single command in the venv, e.g. `poetry run pytest`.
+
+### Obligatory checks
+We force code formatting via `yapf` and type hints via `mypy`.
+Run the following commands in the repository's root (next to `setup.cfg`):
+
+```bash
+poetry run yapf -ri .  # All code is reformatted
+poetry run mypy .  # Ensure there are no typing errors
+```
+
+**WARNING**: do not run `mypy` from a directory other than the root of the repository.
+Otherwise it will not find its configuration.
+
+Also consider:
+
+* Running `flake8` (or a linter of your choice, e.g. `pycodestyle`) and fixing possible defects, if any.
+* Adding more type hints to your code to avoid `Any`.
+
+### Changing dependencies
+To add new package or change an existing one you can use `poetry add` or `poetry update` or edit `pyproject.toml` manually. Do not forget to run `poetry lock` in the latter case.
+
+More details are available in poetry's [documentation](https://python-poetry.org/docs/).
--- a/integration_tests/.gitignore
+++ b/integration_tests/.gitignore
@@ -1 +0,0 @@
-tmp_check/
--- a/integration_tests/Cargo.toml
+++ b/integration_tests/Cargo.toml
@@ -1,17 +0,0 @@
-[package]
-name = "integration_tests"
-version = "0.1.0"
-authors = ["Stas Kelvich <stas@zenith.tech>"]
-edition = "2018"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[dependencies]
-lazy_static = "1.4.0"
-rand = "0.8.3"
-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="a0d067b66447951d1276a53fb09886539c3fa094" }
-tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="a0d067b66447951d1276a53fb09886539c3fa094" }
-
-pageserver = { path = "../pageserver" }
-walkeeper = { path = "../walkeeper" }
-control_plane = { path = "../control_plane" }
--- a/integration_tests/tests/test_compute.rs
+++ b/integration_tests/tests/test_compute.rs
@@ -1,11 +0,0 @@
-// test node resettlement to an empty datadir
-
-// TODO
-/*
-#[test]
-fn test_resettlement() {}
-
-// test seq scan of everythin after restart
-#[test]
-fn test_cold_seqscan() {}
-*/
--- a/integration_tests/tests/test_control_plane.rs
+++ b/integration_tests/tests/test_control_plane.rs
@@ -1,8 +0,0 @@
-// TODO
-/*
-#[test]
-fn test_actions() {}
-
-#[test]
-fn test_regress() {}
-*/
--- a/integration_tests/tests/test_pageserver.rs
+++ b/integration_tests/tests/test_pageserver.rs
@@ -1,136 +0,0 @@
-// mod control_plane;
-use control_plane::compute::ComputeControlPlane;
-use control_plane::local_env;
-use control_plane::local_env::PointInTime;
-use control_plane::storage::TestStorageControlPlane;
-use std::{thread, time};
-
-// XXX: force all redo at the end
-// -- restart + seqscan won't read deleted stuff
-// -- pageserver api endpoint to check all rels
-#[test]
-fn test_redo_cases() {
-    let local_env = local_env::test_env("test_redo_cases");
-
-    // Start pageserver that reads WAL directly from that postgres
-    let storage_cplane = TestStorageControlPlane::one_page_server(&local_env);
-    let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
-
-    // start postgres
-    let maintli = storage_cplane.get_branch_timeline("main");
-    let node = compute_cplane.new_test_node(maintli);
-    node.start().unwrap();
-
-    // check basic work with table
-    node.safe_psql(
-        "postgres",
-        "CREATE TABLE t(key int primary key, value text)",
-    );
-    node.safe_psql(
-        "postgres",
-        "INSERT INTO t SELECT generate_series(1,100000), 'payload'",
-    );
-    let count: i64 = node
-        .safe_psql("postgres", "SELECT sum(key) FROM t")
-        .first()
-        .unwrap()
-        .get(0);
-    println!("sum = {}", count);
-    assert_eq!(count, 5000050000);
-
-    // check 'create table as'
-    node.safe_psql("postgres", "CREATE TABLE t2 AS SELECT * FROM t");
-    let count: i64 = node
-        .safe_psql("postgres", "SELECT sum(key) FROM t")
-        .first()
-        .unwrap()
-        .get(0);
-    println!("sum = {}", count);
-    assert_eq!(count, 5000050000);
-}
-
-// Runs pg_regress on a compute node
-#[test]
-#[ignore]
-fn test_regress() {
-    let local_env = local_env::test_env("test_regress");
-
-    // Start pageserver that reads WAL directly from that postgres
-    let storage_cplane = TestStorageControlPlane::one_page_server(&local_env);
-    let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
-
-    // start postgres
-    let maintli = storage_cplane.get_branch_timeline("main");
-    let node = compute_cplane.new_test_node(maintli);
-    node.start().unwrap();
-
-    control_plane::storage::regress_check(&node);
-}
-
-// Run two postgres instances on one pageserver, on different timelines
-#[test]
-fn test_pageserver_two_timelines() {
-    let local_env = local_env::test_env("test_pageserver_two_timelines");
-
-    // Start pageserver that reads WAL directly from that postgres
-    let storage_cplane = TestStorageControlPlane::one_page_server(&local_env);
-    let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
-
-    let maintli = storage_cplane.get_branch_timeline("main");
-
-    // Create new branch at the end of 'main'
-    let startpoint = local_env::find_end_of_wal(&local_env, maintli).unwrap();
-    local_env::create_branch(
-        &local_env,
-        "experimental",
-        PointInTime {
-            timelineid: maintli,
-            lsn: startpoint,
-        },
-    )
-    .unwrap();
-    let experimentaltli = storage_cplane.get_branch_timeline("experimental");
-
-    // Launch postgres instances on both branches
-    let node1 = compute_cplane.new_test_node(maintli);
-    let node2 = compute_cplane.new_test_node(experimentaltli);
-    node1.start().unwrap();
-    node2.start().unwrap();
-
-    //give walreceiver time to connect
-    thread::sleep(time::Duration::from_secs(3));
-
-    // check node1
-    node1.safe_psql(
-        "postgres",
-        "CREATE TABLE t(key int primary key, value text)",
-    );
-    node1.safe_psql(
-        "postgres",
-        "INSERT INTO t SELECT generate_series(1,100000), 'payload'",
-    );
-    let count: i64 = node1
-        .safe_psql("postgres", "SELECT sum(key) FROM t")
-        .first()
-        .unwrap()
-        .get(0);
-    println!("sum = {}", count);
-    assert_eq!(count, 5000050000);
-
-    // check node2
-    node2.safe_psql(
-        "postgres",
-        "CREATE TABLE t(key int primary key, value text)",
-    );
-    node2.safe_psql(
-        "postgres",
-        "INSERT INTO t SELECT generate_series(100000,200000), 'payload'",
-    );
-    let count: i64 = node2
-        .safe_psql("postgres", "SELECT sum(key) FROM t")
-        .first()
-        .unwrap()
-        .get(0);
-    println!("sum = {}", count);
-    assert_eq!(count, 15000150000);
-}
--- a/integration_tests/tests/test_wal_acceptor.rs
+++ b/integration_tests/tests/test_wal_acceptor.rs
@@ -1,308 +0,0 @@
-// Restart acceptors one by one while compute is under the load.
-use control_plane::compute::ComputeControlPlane;
-use control_plane::local_env;
-use control_plane::local_env::PointInTime;
-use control_plane::storage::TestStorageControlPlane;
-use pageserver::ZTimelineId;
-
-use rand::Rng;
-use std::sync::Arc;
-use std::time::SystemTime;
-use std::{thread, time};
-
-#[test]
-fn test_acceptors_normal_work() {
-    let local_env = local_env::test_env("test_acceptors_normal_work");
-
-    const REDUNDANCY: usize = 3;
-    let storage_cplane = TestStorageControlPlane::fault_tolerant(&local_env, REDUNDANCY);
-    let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
-    let wal_acceptors = storage_cplane.get_wal_acceptor_conn_info();
-
-    // start postgres
-    let maintli = storage_cplane.get_branch_timeline("main");
-    let node = compute_cplane.new_test_master_node(maintli);
-    node.start().unwrap();
-
-    // start proxy
-    let _proxy = node.start_proxy(&wal_acceptors);
-
-    // check basic work with table
-    node.safe_psql(
-        "postgres",
-        "CREATE TABLE t(key int primary key, value text)",
-    );
-    node.safe_psql(
-        "postgres",
-        "INSERT INTO t SELECT generate_series(1,100000), 'payload'",
-    );
-    let count: i64 = node
-        .safe_psql("postgres", "SELECT sum(key) FROM t")
-        .first()
-        .unwrap()
-        .get(0);
-    println!("sum = {}", count);
-    assert_eq!(count, 5000050000);
-    // check wal files equality
-}
-
-// Run page server and multiple safekeepers, and multiple compute nodes running
-// against different timelines.
-#[test]
-fn test_many_timelines() {
-    // Initialize a new repository, and set up WAL safekeepers and page server.
-    const REDUNDANCY: usize = 3;
-    const N_TIMELINES: usize = 5;
-    let local_env = local_env::test_env("test_many_timelines");
-    let storage_cplane = TestStorageControlPlane::fault_tolerant(&local_env, REDUNDANCY);
-    let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
-    let wal_acceptors = storage_cplane.get_wal_acceptor_conn_info();
-
-    // Create branches
-    let mut timelines: Vec<ZTimelineId> = Vec::new();
-    let maintli = storage_cplane.get_branch_timeline("main"); // main branch
-    timelines.push(maintli);
-    let startpoint = local_env::find_end_of_wal(&local_env, maintli).unwrap();
-    for i in 1..N_TIMELINES {
-        // additional branches
-        let branchname = format!("experimental{}", i);
-        local_env::create_branch(
-            &local_env,
-            &branchname,
-            PointInTime {
-                timelineid: maintli,
-                lsn: startpoint,
-            },
-        )
-        .unwrap();
-        let tli = storage_cplane.get_branch_timeline(&branchname);
-        timelines.push(tli);
-    }
-
-    // start postgres on each timeline
-    let mut nodes = Vec::new();
-    for tli in timelines {
-        let node = compute_cplane.new_test_node(tli);
-        nodes.push(node.clone());
-        node.start().unwrap();
-        node.start_proxy(&wal_acceptors);
-    }
-
-    // create schema
-    for node in &nodes {
-        node.safe_psql(
-            "postgres",
-            "CREATE TABLE t(key int primary key, value text)",
-        );
-    }
-
-    // Populate data
-    for node in &nodes {
-        node.safe_psql(
-            "postgres",
-            "INSERT INTO t SELECT generate_series(1,100000), 'payload'",
-        );
-    }
-
-    // Check data
-    for node in &nodes {
-        let count: i64 = node
-            .safe_psql("postgres", "SELECT sum(key) FROM t")
-            .first()
-            .unwrap()
-            .get(0);
-        println!("sum = {}", count);
-        assert_eq!(count, 5000050000);
-    }
-}
-
-// Majority is always alive
-#[test]
-fn test_acceptors_restarts() {
-    let local_env = local_env::test_env("test_acceptors_restarts");
-
-    // Start pageserver that reads WAL directly from that postgres
-    const REDUNDANCY: usize = 3;
-    const FAULT_PROBABILITY: f32 = 0.01;
-
-    let storage_cplane = TestStorageControlPlane::fault_tolerant(&local_env, REDUNDANCY);
-    let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
-    let wal_acceptors = storage_cplane.get_wal_acceptor_conn_info();
-    let mut rng = rand::thread_rng();
-
-    // start postgres
-    let maintli = storage_cplane.get_branch_timeline("main");
-    let node = compute_cplane.new_test_master_node(maintli);
-    node.start().unwrap();
-
-    // start proxy
-    let _proxy = node.start_proxy(&wal_acceptors);
-    let mut failed_node: Option<usize> = None;
-
-    // check basic work with table
-    node.safe_psql(
-        "postgres",
-        "CREATE TABLE t(key int primary key, value text)",
-    );
-    let mut psql = node.open_psql("postgres");
-    for i in 1..=1000 {
-        psql.execute("INSERT INTO t values ($1, 'payload')", &[&i])
-            .unwrap();
-        let prob: f32 = rng.gen();
-        if prob <= FAULT_PROBABILITY {
-            if let Some(node) = failed_node {
-                storage_cplane.wal_acceptors[node].start();
-                failed_node = None;
-            } else {
-                let node: usize = rng.gen_range(0..REDUNDANCY);
-                failed_node = Some(node);
-                storage_cplane.wal_acceptors[node].stop().unwrap();
-            }
-        }
-    }
-    let count: i64 = node
-        .safe_psql("postgres", "SELECT sum(key) FROM t")
-        .first()
-        .unwrap()
-        .get(0);
-    println!("sum = {}", count);
-    assert_eq!(count, 500500);
-}
-
-fn start_acceptor(cplane: &Arc<TestStorageControlPlane>, no: usize) {
-    let cp = cplane.clone();
-    thread::spawn(move || {
-        thread::sleep(time::Duration::from_secs(1));
-        cp.wal_acceptors[no].start();
-    });
-}
-
-// Stop majority of acceptors while compute is under the load. Boot
-// them again and check that nothing was losed. Repeat.
-// N_CRASHES env var
-#[test]
-fn test_acceptors_unavailability() {
-    let local_env = local_env::test_env("test_acceptors_unavailability");
-
-    // Start pageserver that reads WAL directly from that postgres
-    const REDUNDANCY: usize = 2;
-
-    let storage_cplane = TestStorageControlPlane::fault_tolerant(&local_env, REDUNDANCY);
-    let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
-    let wal_acceptors = storage_cplane.get_wal_acceptor_conn_info();
-
-    // start postgres
-    let maintli = storage_cplane.get_branch_timeline("main");
-    let node = compute_cplane.new_test_master_node(maintli);
-    node.start().unwrap();
-
-    // start proxy
-    let _proxy = node.start_proxy(&wal_acceptors);
-
-    // check basic work with table
-    node.safe_psql(
-        "postgres",
-        "CREATE TABLE t(key int primary key, value text)",
-    );
-    let mut psql = node.open_psql("postgres");
-    psql.execute("INSERT INTO t values (1, 'payload')", &[])
-        .unwrap();
-
-    storage_cplane.wal_acceptors[0].stop().unwrap();
-    let cp = Arc::new(storage_cplane);
-    start_acceptor(&cp, 0);
-    let now = SystemTime::now();
-    psql.execute("INSERT INTO t values (2, 'payload')", &[])
-        .unwrap();
-    assert!(now.elapsed().unwrap().as_secs() > 1);
-    psql.execute("INSERT INTO t values (3, 'payload')", &[])
-        .unwrap();
-
-    cp.wal_acceptors[1].stop().unwrap();
-    start_acceptor(&cp, 1);
-    psql.execute("INSERT INTO t values (4, 'payload')", &[])
-        .unwrap();
-    assert!(now.elapsed().unwrap().as_secs() > 2);
-
-    psql.execute("INSERT INTO t values (5, 'payload')", &[])
-        .unwrap();
-
-    let count: i64 = node
-        .safe_psql("postgres", "SELECT sum(key) FROM t")
-        .first()
-        .unwrap()
-        .get(0);
-    println!("sum = {}", count);
-    assert_eq!(count, 15);
-}
-
-fn simulate_failures(cplane: Arc<TestStorageControlPlane>) {
-    let mut rng = rand::thread_rng();
-    let n_acceptors = cplane.wal_acceptors.len();
-    let failure_period = time::Duration::from_secs(1);
-    while cplane.is_running() {
-        thread::sleep(failure_period);
-        let mask: u32 = rng.gen_range(0..(1 << n_acceptors));
-        for i in 0..n_acceptors {
-            if (mask & (1 << i)) != 0 {
-                cplane.wal_acceptors[i].stop().unwrap();
-            }
-        }
-        thread::sleep(failure_period);
-        for i in 0..n_acceptors {
-            if (mask & (1 << i)) != 0 {
-                cplane.wal_acceptors[i].start();
-            }
-        }
-    }
-}
-
-// Race condition test
-#[test]
-fn test_race_conditions() {
-    let local_env = local_env::test_env("test_race_conditions");
-
-    // Start pageserver that reads WAL directly from that postgres
-    const REDUNDANCY: usize = 3;
-
-    let storage_cplane = Arc::new(TestStorageControlPlane::fault_tolerant(
-        &local_env, REDUNDANCY,
-    ));
-    let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
-    let wal_acceptors = storage_cplane.get_wal_acceptor_conn_info();
-
-    // start postgres
-    let maintli = storage_cplane.get_branch_timeline("main");
-    let node = compute_cplane.new_test_master_node(maintli);
-    node.start().unwrap();
-
-    // start proxy
-    let _proxy = node.start_proxy(&wal_acceptors);
-
-    // check basic work with table
-    node.safe_psql(
-        "postgres",
-        "CREATE TABLE t(key int primary key, value text)",
-    );
-
-    let cp = storage_cplane.clone();
-    let failures_thread = thread::spawn(move || {
-        simulate_failures(cp);
-    });
-
-    let mut psql = node.open_psql("postgres");
-    for i in 1..=1000 {
-        psql.execute("INSERT INTO t values ($1, 'payload')", &[&i])
-            .unwrap();
-    }
-    let count: i64 = node
-        .safe_psql("postgres", "SELECT sum(key) FROM t")
-        .first()
-        .unwrap()
-        .get(0);
-    println!("sum = {}", count);
-    assert_eq!(count, 500500);
-
-    storage_cplane.stop();
-    failures_thread.join().unwrap();
-}
--- a/mgmt-console/.gitignore
+++ b/mgmt-console/.gitignore
@@ -1,23 +0,0 @@
-# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
-
-# dependencies
-/node_modules
-/.pnp
-.pnp.js
-
-# testing
-/coverage
-
-# production
-/build
-
-# misc
-.DS_Store
-.env.local
-.env.development.local
-.env.test.local
-.env.production.local
-
-npm-debug.log*
-yarn-debug.log*
-yarn-error.log*
--- a/mgmt-console/README
+++ b/mgmt-console/README
@@ -1,55 +0,0 @@
-Mock implementation of a management console.
-
-See demo-howto.txt for usage.
-
-Building and Installation
-------------------------
-
-To compile Postgres:
-  sudo apt build-dep postgresql
-  sudo apt install bison flex libz-dev libssl-dev
-  sudo apt install ccache
-  sudo apt install libcurl4-openssl-dev libxml2-dev
-
-For the webapp:
-  # NOTE: This requires at least version 1.1.0 of python3-flask. That's not
-  # available in Debian Buster, need at least Bullseye.
-
-  sudo apt install python3 python3-flask python3-pip npm webpack
-  pip3 install Flask-BasicAuth
-  pip3 install boto3
-
-git clone and compile and install patched version of Postgres:
-
-  git clone https://github.com/libzenith/postgres.git
-  cd postgres
-  git checkout zenith-experiments
-  ./configure --enable-debug --enable-cassert --with-openssl --prefix=/home/heikki/pgsql-install --with-libxml CC="ccache gcc" CFLAGS="-O0"
-  make -j4 -s install
-
-Get the webapp:
-  cd ~
-  git clone https://github.com/libzenith/zenith-mgmt-console.git
-  cd zenith-mgmt-console
-  mkdir pgdatadirs
-
-
-  openssl req -new -x509 -days 365 -nodes -text -out server.crt \
-    -keyout server.key -subj "/CN=zenith-demo"
-
-For Mock S3 server (unless you want to test against a real cloud service):
-  sudo apt install python3-tornado
-
-  cd ~/zenith-mgmt-console
-  git clone https://github.com/hlinnaka/ms3.git
-
-Compile & run it:
-  npm install
-  webpack # compile React app
-
-  BASIC_AUTH_PASSWORD=<password> ./launch-local.sh
-
-
-You can view the contents of the S3 bucket with browser:
-
-http://<server>/list_bucket
--- a/mgmt-console/app.py
+++ b/mgmt-console/app.py
@@ -1,340 +0,0 @@
-from flask import request
-from flask_basicauth import BasicAuth
-from flask import render_template
-from subprocess import PIPE, STDOUT, run, Popen
-import html
-import os
-import re
-import shutil
-import logging
-import time
-
-import boto3
-from boto3.session import Session
-from botocore.client import Config
-from botocore.handlers import set_list_objects_encoding_type_url
-
-from flask import Flask
-
-import waldump
-
-
-app = Flask(__name__)
-
-app.config['BASIC_AUTH_USERNAME'] = 'zenith'
-app.config['BASIC_AUTH_PASSWORD'] = os.getenv('BASIC_AUTH_PASSWORD')
-app.config['BASIC_AUTH_FORCE'] = True
-
-basic_auth = BasicAuth(app)
-
-# S3 configuration:
-
-ENDPOINT = os.getenv('S3_ENDPOINT', 'https://localhost:9000')
-ACCESS_KEY = os.getenv('S3_ACCESSKEY', 'minioadmin')
-SECRET = os.getenv('S3_SECRET', '')
-BUCKET = os.getenv('S3_BUCKET', 'foobucket')
-
-print("Using bucket at " + ENDPOINT);
-
-#boto3.set_stream_logger('botocore', logging.DEBUG)
-
-session = Session(aws_access_key_id=ACCESS_KEY,
-                  aws_secret_access_key=SECRET,
-                  region_name=os.getenv('S3_REGION', 'auto'))
-
-# needed for google cloud?
-session.events.unregister('before-parameter-build.s3.ListObjects',
-                          set_list_objects_encoding_type_url)
-
-s3resource = session.resource('s3',
-                              endpoint_url=ENDPOINT,
-                              verify=False,
-                              config=Config(signature_version='s3v4'))
-s3bucket = s3resource.Bucket(BUCKET)
-
-s3_client = boto3.client('s3',
-                         endpoint_url=ENDPOINT,
-                         verify=False,
-                         config=Config(signature_version='s3v4'),
-                         aws_access_key_id=ACCESS_KEY,
-                         aws_secret_access_key=SECRET)
-
-
-@app.route("/")
-def index():
-    return render_template("index.html")
-
-
-@app.route("/api/waldump")
-def render_waldump():
-    return render_template("waldump.html")
-
-@app.route('/api/fetch_wal')
-def fetch_wal():
-    return waldump.fetch_wal(request, s3bucket);
-
-@app.route("/api/server_status")
-def server_status():
-    dirs = os.listdir("pgdatadirs")
-    dirs.sort()
-
-    primary = None
-    standbys = []
-
-    for dirname in dirs:
-        
-        result = run("pg_ctl status -D pgdatadirs/" + dirname, stdout=PIPE, stderr=STDOUT, universal_newlines=True, shell=True)
-
-        srv = {
-            'datadir': dirname,
-            'status': result.stdout,
-            'port': None
-        }
-
-        if dirname == 'primary':
-            primary = srv;
-            primary['port'] = 5432;
-        else:
-            standby_match = re.search('standby_([0-9]+)', dirname)
-            if standby_match:
-                srv['port'] = int(standby_match.group(1))
-
-            standbys.append(srv);
-
-    return {'primary': primary, 'standbys': standbys}
-
-@app.route('/api/list_bucket')
-def list_bucket():
-
-    response = 'cloud bucket contents:<br>\n'
-
-    for file in s3bucket.objects.all():
-        response = response + html.escape(file.key) + '<br>\n'
-
-    return response
-
-def walpos_str(walpos):
-    return '{:X}/{:X}'.format(walpos >> 32, walpos & 0xFFFFFFFF)
-
-@app.route('/api/bucket_summary')
-def bucket_summary():
-
-    nonrelimages = []
-    minwal = int(0)
-    maxwal = int(0)
-    minseqwal = int(0)
-    maxseqwal = int(0)
-
-    for file in s3bucket.objects.all():
-        path = file.key
-        match = re.search('nonreldata/nonrel_([0-9A-F]+).tar', path)
-        if match:
-            walpos = int(match.group(1), 16)
-            nonrelimages.append(walpos_str(walpos))
-
-        match = re.search('nonreldata/nonrel_([0-9A-F]+)-([0-9A-F]+)', path)
-        if match:
-            endwal = int(match.group(2), 16)
-            if endwal > maxwal:
-                maxwal = endwal
-
-        match = re.search('walarchive/([0-9A-F]{8})([0-9A-F]{8})([0-9A-F]{8})', path)
-        if match:
-            tli = int(match.group(1), 16)
-            logno = int(match.group(2), 16)
-            segno = int(match.group(3), 16)
-            # FIXME: this assumes default 16 MB wal segment size
-            logsegno = logno * (0x100000000 / (16*1024*1024)) + segno
-
-            seqwal = int((logsegno + 1) * (16*1024*1024))
-
-            if seqwal > maxseqwal:
-                maxseqwal = seqwal;
-            if minseqwal == 0 or seqwal < minseqwal:
-                minseqwal = seqwal;
-
-    return {
-        'nonrelimages': nonrelimages,
-        'minwal': walpos_str(minwal),
-        'maxwal': walpos_str(maxwal),
-        'minseqwal': walpos_str(minseqwal),
-        'maxseqwal': walpos_str(maxseqwal)
-        }
-
-def print_cmd_result(cmd_result):
-    return print_cmd_result_ex(cmd_result.args, cmd_result.returncode, cmd_result.stdout)
-
-def print_cmd_result_ex(cmd, returncode, stdout):
-    res = ''
-    res += 'ran command:\n' + str(cmd) + '\n'
-    res += 'It returned code ' + str(returncode) + '\n'
-    res += '\n'
-    res += 'stdout/stderr:\n'
-    res += stdout
-
-    return res
-
-@app.route('/api/init_primary', methods=['GET', 'POST'])
-def init_primary():
-    
-    initdb_result = run("initdb -D pgdatadirs/primary --username=zenith --pwfile=pg-password.txt", stdout=PIPE, stderr=STDOUT, universal_newlines=True, shell=True)
-    if initdb_result.returncode != 0:
-        return print_cmd_result(initdb_result)
-    
-    # Append archive_mode and archive_command and port to postgresql.conf
-    f=open("pgdatadirs/primary/postgresql.conf", "a+")
-    f.write("listen_addresses='*'\n")
-    f.write("archive_mode=on\n")
-    f.write("archive_command='zenith_push --archive-wal-path=%p --archive-wal-fname=%f'\n")
-    f.write("ssl=on\n")
-    f.close()
-
-    f=open("pgdatadirs/primary/pg_hba.conf", "a+")
-    f.write("# allow SSL connections with password from anywhere\n")
-    f.write("hostssl    all             all             0.0.0.0/0           md5\n")
-    f.write("hostssl    all             all             ::0/0               md5\n")
-    f.close()
-
-    shutil.copyfile("server.crt", "pgdatadirs/primary/server.crt")
-    shutil.copyfile("server.key", "pgdatadirs/primary/server.key")
-    os.chmod("pgdatadirs/primary/server.key", 0o0600)
-    
-    start_proc = Popen(args=["pg_ctl", "start", "-D", "pgdatadirs/primary", "-l", "pgdatadirs/primary/log"], stdout=PIPE, stderr=STDOUT, universal_newlines=True, shell=False, start_new_session=True, close_fds=True)
-    start_rc = start_proc.wait()
-    start_stdout, start_stderr = start_proc.communicate()
-
-    responsestr = print_cmd_result(initdb_result) + '\n'
-    responsestr += print_cmd_result_ex(start_proc.args, start_rc, start_stdout)
-
-    return responsestr
-
-@app.route('/api/zenith_push', methods=['GET', 'POST'])
-def zenith_push():
-    # Stop the primary if it's running
-    stop_result = run(args=["pg_ctl", "stop", "-D", "pgdatadirs/primary"], stdout=PIPE, stderr=STDOUT, universal_newlines=True, shell=False, start_new_session=True, close_fds=True)
-    
-    # Call zenith_push
-    push_result = run("zenith_push -D pgdatadirs/primary", stdout=PIPE, stderr=STDOUT, universal_newlines=True, shell=True)
-
-    # Restart the primary
-    start_proc = Popen(args=["pg_ctl", "start", "-D", "pgdatadirs/primary", "-l", "pgdatadirs/primary/log"], stdout=PIPE, stderr=STDOUT, universal_newlines=True, shell=False, start_new_session=True, close_fds=True)
-    start_rc = start_proc.wait()
-    start_stdout, start_stderr = start_proc.communicate()
-    
-    responsestr = print_cmd_result(stop_result) + '\n'
-    responsestr += print_cmd_result(push_result) + '\n'
-    responsestr += print_cmd_result_ex(start_proc.args, start_rc, start_stdout) + '\n'
-
-    return responsestr
-
-@app.route('/api/create_standby', methods=['GET', 'POST'])
-def create_standby():
-
-    walpos = request.form.get('walpos')
-    if not walpos:
-        return 'no walpos'
-    
-    dirs = os.listdir("pgdatadirs")
-
-    last_port = 5432
-
-    for dirname in dirs:
-
-        standby_match = re.search('standby_([0-9]+)', dirname)
-        if standby_match:
-            port = int(standby_match.group(1))
-            if port > last_port:
-                last_port = port
-
-    standby_port = last_port + 1
-
-    standby_dir = "pgdatadirs/standby_" + str(standby_port)
-
-    # Call zenith_restore
-    restore_result = run(["zenith_restore", "--end=" + walpos, "-D", standby_dir], stdout=PIPE, stderr=STDOUT, encoding='latin1')
-    responsestr = print_cmd_result(restore_result)
-
-    if restore_result.returncode == 0:
-        # Append hot_standby and port to postgresql.conf
-        f=open(standby_dir + "/postgresql.conf", "a+")
-        f.write("hot_standby=on\n")
-        f.write("port=" + str(standby_port) + "\n")
-        f.close()
-
-        start_proc = Popen(args=["pg_ctl", "start", "-D", standby_dir, "-l", standby_dir + "/log"], stdout=PIPE, stderr=STDOUT, universal_newlines=True, shell=False, start_new_session=True, close_fds=True)
-        start_rc = start_proc.wait()
-        start_stdout, start_stderr = start_proc.communicate()
-        responsestr += '\n\n' + print_cmd_result_ex(start_proc.args, start_rc, start_stdout)
-
-    return responsestr
-
-@app.route('/api/destroy_server', methods=['GET', 'POST'])
-def destroy_primary():
-
-    datadir = request.form.get('datadir')
-
-    # Check that the datadir parameter doesn't contain anything funny.
-    if not re.match("^[A-Za-z0-9_-]+$", datadir):
-        raise Exception('invalid datadir: ' + datadir)
-    
-    # Stop the server if it's running
-    stop_result = run(args=["pg_ctl", "stop", "-m", "immediate", "-D", "pgdatadirs/" + datadir], stdout=PIPE, stderr=STDOUT, universal_newlines=True, shell=False, start_new_session=True, close_fds=True)
-
-    shutil.rmtree('pgdatadirs/' + datadir, ignore_errors=True)
-
-    responsestr = print_cmd_result(stop_result) + '\n'
-    responsestr += 'Deleted datadir ' + datadir + '.\n'
-
-    return responsestr
-
-@app.route('/api/restore_primary', methods=['GET', 'POST'])
-def restore_primary():
-
-    # Call zenith_restore
-    restore_result = run(["zenith_restore", "-D", "pgdatadirs/primary"], stdout=PIPE, stderr=STDOUT, encoding='latin1')
-    responsestr = print_cmd_result(restore_result)
-
-    # Append restore_command to postgresql.conf, so that it can find the last raw WAL segments
-    f=open("pgdatadirs/primary/postgresql.conf", "a+")
-    f.write("listen_addresses='*'\n")
-    f.write("restore_command='zenith_restore --archive-wal-path=%p --archive-wal-fname=%f'\n")
-    f.write("ssl=on\n")
-    f.close()
-    
-    if restore_result.returncode == 0:
-        start_proc = Popen(args=["pg_ctl", "start", "-D", "pgdatadirs/primary", "-l", "pgdatadirs/primary/log"], stdout=PIPE, stderr=STDOUT, universal_newlines=True, shell=False, start_new_session=True, close_fds=True)
-        start_rc = start_proc.wait()
-        start_stdout, start_stderr = start_proc.communicate()
-        responsestr += print_cmd_result_ex(start_proc.args, start_rc, start_stdout)
-
-    return responsestr
-
-@app.route('/api/slicedice', methods=['GET', 'POST'])
-def run_slicedice():
-    result = run("zenith_slicedice", stdout=PIPE, stderr=STDOUT, universal_newlines=True, shell=True)
-    
-    responsestr = print_cmd_result(result)
-
-    return responsestr
-
-@app.route('/api/reset_demo', methods=['POST'])
-def reset_all():
-    result = run("pkill -9 postgres", stdout=PIPE, stderr=STDOUT, universal_newlines=True, shell=True)
-
-    dirs = os.listdir("pgdatadirs")
-    for dirname in dirs:
-        shutil.rmtree('pgdatadirs/' + dirname)
-        
-    for file in s3bucket.objects.all():
-        s3_client.delete_object(Bucket = BUCKET, Key = file.key)
-
-    responsestr = print_cmd_result(result) + '\n'
-    responsestr += '''
-Deleted all Postgres datadirs.
-Deleted all files in object storage bucket.
-'''
-
-    return responsestr
-
-if __name__ == '__main__':
-    app.run()
--- a/mgmt-console/babel.config.js
+++ b/mgmt-console/babel.config.js
@@ -1,3 +0,0 @@
-module.exports = {
-    presets: ["@babel/preset-env", "@babel/preset-react"],
-};
--- a/mgmt-console/demo-howto.txt
+++ b/mgmt-console/demo-howto.txt
@@ -1,67 +0,0 @@
-Mock implementation of a management console.
-
-This isn't very different from a "normal" PostgreSQL installation with
-a base backup and WAL archive. The main user-visible difference is
-that when you create a standby server, we don't restore the whole data
-directory, but only the "non-relation" files. Relation files are
-restored on demand, when they're accessed the first time. That makes
-the "create standby" operation is very fast, but with some delay when
-you connect and start running queries instead.  Most visible if you
-have a large database. (However, see note below about large databases)
-
-Note: lots of things are broken/unsafe. Things will fail if a table is
-larger than 1 GB. Or if there are more than 1000 files in the cloud
-bucket.
-
-How to use this demo:
-
-1. If there are any leftovers from previous runs, reset by clicking
-   the RESET DEMO button.  This kills and deletes all Postgres servers,
-   and empties the cloud storage bucket
-
-2. Create primary server by clicking on the "Init primary" button
-
-3. Push a base image of the primary to cloud storage, by clicking the
-   "push base image" button.  (This takes about 30 seconds, be
-   patient)
-
-4. Connect to primary with psql, and create a test table with a little data.
-
-      psql postgres  -p5432 -U zenith -h<host>
-
-      create table mytable (i int4);
-
-      insert into mytable values (1);
-      select pg_switch_wal();
-
-   The Postgres password is the same as for the management console.
-
-3. Now that there's a new WAL segment in the arhive, we can "slice &
-   dice" it. Click on the "Slice & dice button".
-
-4. Perform more updates on the primary, to generate more WAL.
-
-      insert into mytable values (2); select pg_switch_wal();
-      insert into mytable values (3); select pg_switch_wal();
-      insert into mytable values (4); select pg_switch_wal();
-      insert into mytable values (5); select pg_switch_wal();
-
-5. Slice & Dice the WAL again
-
-6. Now you can create read-only standby servers at any point in the
-   WAL. Type a WAL position in the text box (or use the slider), and
-   click "Create new standby". The first standby is created at port 5433,
-   the second at port 5434, and so forth.
-
-7. Connect to the standby with "psql -p 5433". Note that it takes a
-   few seconds until the connection is established. That's because the
-   standby has to restore the basic system catalogs, like pg_database and
-   pg_authid from the backup. After connecting, you can do "\d" to list
-   tables, this will also take a few seconds, as more catalog tables are
-   restored from backup.  Subsequent commands will be faster.
-
-   Run queries in the standby:
-
-      select * from mytable;
-
-   the result depends on the LSN that you picked when you created the server.
--- a/mgmt-console/js/app.js
+++ b/mgmt-console/js/app.js
@@ -1,463 +0,0 @@
-import React, { useState, useEffect } from 'react';
-import ReactDOM from 'react-dom';
-import Loader from "react-loader-spinner";
-import { Router, Route, Link, IndexRoute, hashHistory, browserHistory } from 'react-router';
-
-function ServerStatus(props) {
-    const datadir = props.server.datadir;
-    const status = props.server.status;
-    const port = props.server.port;
-
-    return (
-	<div>
-	    <h2>{ datadir == 'primary' ? 'Primary' : datadir }</h2>
-	    status: <div className='status'>{status}</div><br/>
-	    to connect: <span className='shellcommand'>psql -h { window.location.hostname } -p { port } -U zenith postgres</span><br/>
-	</div>
-    );
-}
-
-function StandbyList(props) {
-    const bucketSummary = props.bucketSummary;
-    const standbys = props.standbys;
-    const maxwalpos = bucketSummary.maxwal ? walpos_to_int(bucketSummary.maxwal) : 0;
-
-    const [walposInput, setWalposInput] = useState({ src: 'text', value: '0/0'});
-
-    // find earliest base image
-    const minwalpos = bucketSummary.nonrelimages ? bucketSummary.nonrelimages.reduce((minpos, imgpos_str, index, array) => {
-	const imgpos = walpos_to_int(imgpos_str);
-	return (minpos == 0 || imgpos < minpos) ? imgpos : minpos;
-    }, 0) : 0;
-
-    const can_create_standby = minwalpos > 0 && maxwalpos > 0 && maxwalpos >= minwalpos;
-    var walpos_valid = true;
-
-    function create_standby() {
-	const formdata = new FormData();
-	formdata.append("walpos", walposStr);
-
-	props.startOperation('Creating new standby at ' + walposStr + '...',
-			     fetch("/api/create_standby", { method: 'POST', body: formdata }));
-    }
-
-    function destroy_standby(datadir) {
-	const formdata = new FormData();
-	formdata.append("datadir", datadir);
-	props.startOperation('Destroying ' + datadir + '...',
-			     fetch("/api/destroy_server", { method: 'POST', body: formdata }));
-    }
-
-    const handleSliderChange = (event) => {
-	setWalposInput({ src: 'slider', value: event.target.value });
-    }    
-
-    const handleWalposChange = (event) => {
-	setWalposInput({ src: 'text', value: event.target.value });
-    }
-
-    var sliderValue;
-    var walposStr;
-    if (walposInput.src == 'text')
-    {
-	const walpos = walpos_to_int(walposInput.value);
-
-	if (walpos >= minwalpos && walpos <= maxwalpos)
-	    walpos_valid = true;
-	else
-	    walpos_valid = false;
-	
-	sliderValue = Math.round((walpos - minwalpos) / (maxwalpos - minwalpos) * 100);
-	walposStr = walposInput.value;
-    }
-    else
-    {
-	const slider = walposInput.value;
-	const new_walpos = minwalpos + slider / 100 * (maxwalpos - minwalpos);
-
-	console.log('minwalpos: '+ minwalpos);
-	console.log('maxwalpos: '+ maxwalpos);
-
-	walposStr = int_to_walpos(Math.round(new_walpos));
-	walpos_valid = true;
-	console.log(walposStr);
-    }
-
-    var standbystatus = ''
-    if (standbys)
-    {
-	standbystatus = 
-	    <div>
-		{
-		    standbys.length > 0 ? 
- 			standbys.map((server) =>
-			    <>
-				<ServerStatus key={ 'status_' + server.datadir} server={server}/>
-				<button key={ 'destroy_' + server.datadir} onClick={e => destroy_standby(server.datadir)}>Destroy standby</button>
-			    </>
-			) : "no standby servers"
-		}
-	    </div>
-    }
-
-    return (
-	<div>
-	    <h2>Standbys</h2>
-	    <button onClick={create_standby} disabled={!can_create_standby || !walpos_valid}>Create new Standby</button> at LSN 
-            <input type="text" id="walpos_input" value={ walposStr } onChange={handleWalposChange} disabled={!can_create_standby}/>
-	    <input type="range" id="walpos_slider" min="0" max="100" steps="1" value={sliderValue}  onChange={handleSliderChange} disabled={!can_create_standby}/>
-	    <br/>
-	    { standbystatus }
-	</div>
-    );
-}
-
-function ServerList(props) {
-    const primary = props.serverStatus ? props.serverStatus.primary : null;
-    const standbys = props.serverStatus ? props.serverStatus.standbys : [];
-    const bucketSummary = props.bucketSummary;
-
-    var primarystatus = '';
-
-    function destroy_primary() {
-	const formdata = new FormData();
-	formdata.append("datadir", 'primary');
-	props.startOperation('Destroying primary...',
-			     fetch("/api/destroy_server", { method: 'POST', body: formdata }));
-    }    
-
-    function restore_primary() {
-	props.startOperation('Restoring primary...',
-			     fetch("/api/restore_primary", { method: 'POST' }));
-    }    
-    
-    if (primary)
-    {
-	primarystatus =
-	    <div>
-		<ServerStatus server={primary}/>
-		<button onClick={destroy_primary}>Destroy primary</button>
-	    </div>
-    }
-    else
-    {
-	primarystatus =
-	    <div>
-		no primary server<br/>
-		<button onClick={restore_primary}>Restore primary</button>
-	    </div>
-    }
-
-    return (
-	<>
-	    { primarystatus }
-	    <StandbyList standbys={standbys} startOperation={props.startOperation} bucketSummary={props.bucketSummary}/>
-	    <p className="todo">
-		Should we list the WAL safekeeper nodes here? Or are they part of the Storage? Or not visible to users at all?
-	    </p>
-	</>
-    );
-}
-
-function BucketSummary(props) {
-    const bucketSummary = props.bucketSummary;
-    const startOperation = props.startOperation;
-
-    function slicedice() {
-	startOperation('Slicing sequential WAL to per-relation WAL...',
-		       fetch("/api/slicedice", { method: 'POST' }));
-    }
-    
-    if (!bucketSummary.nonrelimages)
-    {
-	return <>loading...</>
-    }
-
-    return (
-	<div>
-	    <div>Base images at following WAL positions:
-		<ul>
-		    {bucketSummary.nonrelimages.map((img) => (
-			<li key={img}>{img}</li>
-		    ))}
-		</ul>
-	    </div>
-            Sliced WAL is available up to { bucketSummary.maxwal }<br/>
-	    Raw WAL is available up to { bucketSummary.maxseqwal }<br/>
-
-	    <br/>
-	    <button onClick={slicedice}>Slice & Dice WAL</button>
-	    <p className="todo">
-		Currently, the slicing or "sharding" of the WAL needs to be triggered manually, by clicking the above button.
-		<br/>
-		TODO: make it a continuous process that runs in the WAL safekeepers, or in the Page Servers, or as a standalone service.
-	    </p>
-	</div>
-    );
-}
-
-function ProgressIndicator()
-{
-    return (
-	<div>
-	    <Loader
-		type="Puff"
-		color="#00BFFF"
-		height={100}
-		width={100}
-	    />
-	</div>
-    )
-}
-
-function walpos_to_int(walpos)
-{
-    const [hi, lo] = walpos.split('/');
-
-    return parseInt(hi, 16) + parseInt(lo, 16);
-}
-
-function int_to_walpos(x)
-{
-    console.log('converting ' + x);
-    return (Math.floor((x / 0x100000000)).toString(16) + '/' + (x % 0x100000000).toString(16)).toUpperCase();
-}
-
-function OperationStatus(props) {
-    const lastOperation = props.lastOperation;
-    const inProgress = props.inProgress;
-    const operationResult = props.operationResult;
-
-    if (lastOperation)
-    {
-	return (
-	    <div><h2>Last operation:</h2>
-		<div>{lastOperation} { (!inProgress && lastOperation) ? 'done!' : '' }</div>
-		<div className='result'>
-		    {inProgress ? <ProgressIndicator/> : <pre>{operationResult}</pre>}
-		</div>
-	    </div>
-	);
-    }
-    else
-	return '';
-}
-
-function ActionButtons(props) {
-
-    const startOperation = props.startOperation;
-    const bucketSummary = props.bucketSummary;
-    
-    function reset_demo() {
-	startOperation('resetting everything...',
-		       fetch("/api/reset_demo", { method: 'POST' }));
-    }
-
-    function init_primary() {
-	startOperation('Initializing new primary...',
-		       fetch("/api/init_primary", { method: 'POST' }));
-    }
-
-    function zenith_push() {
-	startOperation('Pushing new base image...',
-		       fetch("/api/zenith_push", { method: 'POST' }));
-    }
-	
-    return (
-	<div>
-	    <p className="todo">
-		RESET DEMO deletes everything in the storage bucket, and stops and destroys all servers. This resets the whole demo environment to the initial state.
-	    </p>
-	    <button onClick={reset_demo}>RESET DEMO</button>
-	    <p className="todo">
-		Init Primary runs initdb to create a new primary server. Click this after Resetting the demo.
-	    </p>
-
-	    <button onClick={init_primary}>Init primary</button>
-
-	    <p className="todo">
-		Push Base Image stops the primary, copies the current state of the primary to the storage bucket as a new base backup, and restarts the primary.
-		<br/>
-		TODO: This should be handled by a continuous background process, probably running in the storage nodes. And without having to shut down the cluster, of course.
-	    </p>
-
-	    <button onClick={zenith_push}>Push base image</button>
-
-	</div>
-    );
-}
-
-function Sidenav(props)
-{
-    const toPage = (page) => (event) => {
-	//event.preventDefault()
-	props.switchPage(page);
-    };
-    return (
-	<div>
-	    <h3 className="sidenav-item">Menu</h3>
-	    <a href="#servers" onClick={toPage('servers')} className="sidenav-item">Servers</a>
-	    <a href="#storage" onClick={toPage('storage')} className="sidenav-item">Storage</a>
-	    <a href="#snapshots" onClick={toPage('snapshots')} className="sidenav-item">Snapshots</a>
-	    <a href="#demo" onClick={toPage('demo')} className="sidenav-item">Demo</a>
-	    <a href="#import" onClick={toPage('import')}  className="sidenav-item">Import / Export</a>
-	    <a href="#jobs" onClick={toPage('jobs')} className="sidenav-item">Jobs</a>
-	</div>
-    );
-}
-
-function App()
-{
-    const [page, setPage] = useState('servers');
-    const [serverStatus, setServerStatus] = useState({});
-    const [bucketSummary, setBucketSummary] = useState({});
-    const [lastOperation, setLastOperation] = useState('');
-    const [inProgress, setInProgress] = useState('');
-    const [operationResult, setOperationResult] = useState('');
-
-    useEffect(() => {
-	reloadStatus();
-    }, []);
-
-    function startOperation(operation, promise)
-    {
-	promise.then(result => result.text()).then(resultText => {
-	    operationFinished(resultText);
-	});
-	
-	setLastOperation(operation);
-	setInProgress(true);
-	setOperationResult('');
-    }
-
-    function operationFinished(result)
-    {
-	setInProgress(false);
-	setOperationResult(result);
-	reloadStatus();
-    }
-
-    function clearOperation()
-    {
-	setLastOperation('')
-	setInProgress('');
-	setOperationResult('');
-	console.log("cleared");
-    }
-    
-    function reloadStatus()
-    {
-	fetch('/api/server_status').then(res => res.json()).then(data => {
-	    setServerStatus(data);
-	});
-
-	fetch('/api/bucket_summary').then(res => res.json()).then(data => {
-	    setBucketSummary(data);
-	});
-    }
-
-    const content = () => {
-	console.log(page);
-	if (page === 'servers') {
-	    return (
-		<>
-		    <h1>Server status</h1>
-		    <ServerList startOperation={ startOperation }
-				serverStatus={ serverStatus }
-				bucketSummary={ bucketSummary }/>
-		</>
-	    );
-	} else if (page === 'storage') {
-	    return (
-		<>
-		    <h1>Storage bucket status</h1>
-		    <BucketSummary startOperation={ startOperation }
-				   bucketSummary={ bucketSummary }/>
-		</>
-	    );
-	} else if (page === 'snapshots') {
-	    return (
-		<>
-		    <h1>Snapshots</h1>
-		    <p className="todo">
-			In Zenith, snapshots are just specific points (LSNs) in the WAL history, with a label. A snapshot prevents garbage collecting old data that's still needed to reconstruct the database at that LSN.
-		    </p>
-		    <p className="todo">
-			TODO:
-			<ul>
-			    <li>List existing snapshots</li>
-			    <li>Create new snapshot manually, from current state or from a given LSN</li>
-			    <li>Drill into the WAL stream to see what have happened. Provide tools for e.g. finding point where a table was dropped</li>
-			    <li>Create snapshots automatically based on events in the WAL, like if you call pg_create_restore_point(() in the primary</li>
-			    <li>Launch new reader instance at a snapshot</li>
-			    <li>Export snapshot</li>
-			    <li>Rollback cluster to a snapshot</li>
-			</ul>
-		    </p>
-		</>
-	    );
-	} else if (page === 'demo') {
-	    return (
-		<>
-		    <h1>Misc actions</h1>
-		    <ActionButtons startOperation={ startOperation }
-				   bucketSummary={ bucketSummary }/>
-		</>
-	    );
-	} else if (page === 'import') {
-	    return (
-		<>
-		    <h1>Import & Export tools</h1>
-		    <p className="TODO">TODO:
-			<ul>
-			    <li>Initialize database from existing backup (pg_basebackup, WAL-G, pgbackrest)</li>
-			    <li>Initialize from a pg_dump or other SQL script</li>
-			    <li>Launch batch job to import data files from S3</li>
-			    <li>Launch batch job to export database with pg_dump to S3</li>
-			</ul>
-			These jobs can be run in against reader processing nodes. We can even
-			spawn a new reader node dedicated to a job, and destry it when the job is done.
-		    </p>
-		</>
-	    );
-	} else if (page === 'jobs') {
-	    return (
-		<>
-		    <h1>Batch jobs</h1>
-		    <p className="TODO">TODO:
-			<ul>
-			    <li>List running jobs launched from Import & Export tools</li>
-			    <li>List other batch jobs launched by the user</li>
-			    <li>Launch new batch jobs</li>
-			</ul>
-		    </p>
-		</>
-	    );
-	}
-    }
-
-    function switchPage(page)
-    {
-	console.log("topage " + page);
-	setPage(page)
-	clearOperation();
-    };
-
-    return (
-	<div className="row">
-	    <div className="sidenav">
-		<Sidenav switchPage={switchPage} className="column"/>
-	    </div>
-	    <div className="column">
-		<div>
-		    { content() }
-		</div>
-		<OperationStatus lastOperation={ lastOperation }
-				 inProgress = { inProgress }
-				 operationResult = { operationResult }/>
-	    </div>
-	</div>
-    );
-}
-
-ReactDOM.render(<App/>, document.getElementById('reactApp'));
--- a/mgmt-console/js/waldump.js
+++ b/mgmt-console/js/waldump.js
@@ -1,105 +0,0 @@
-import React, { useState, useEffect } from 'react';
-import ReactDOM from 'react-dom';
-import Loader from "react-loader-spinner";
-
-function walpos_to_int(walpos)
-{
-    const [hi, lo] = walpos.split('/');
-
-    return parseInt(hi, 16) + parseInt(lo, 16);
-}
-
-const palette = [
-    "#003f5c",
-    "#2f4b7c",
-    "#665191",
-    "#a05195",
-    "#d45087",
-    "#f95d6a",
-    "#ff7c43",
-    "#ffa600"];
-
-function WalRecord(props)
-{
-    const firstwalpos = props.firstwalpos;
-    const endwalpos = props.endwalpos;
-    const record = props.record;
-    const index = props.index;
-    const xidmap = props.xidmap;
-
-    const startpos = walpos_to_int(record.start)
-    const endpos = walpos_to_int(record.end)
-
-    const scale = 1000 / (16*1024*1024)
-    const startx = (startpos - firstwalpos) * scale;
-    const endx = (endpos - firstwalpos) * scale;
-
-    const xidindex = xidmap[record.xid];
-    const color = palette[index % palette.length];
-
-    const y = 5 + (xidindex) * 20 + (index % 2) * 2;
-    
-    return (
-	<line x1={ startx } y1={y} x2={endx} y2={y} stroke={ color } strokeWidth="5">
-	    <title>
-		start: { record.start } end: { record.end }
-	    </title>
-	</line>
-    )
-}
-
-function WalFile(props)
-{
-    const walContent = props.walContent;
-    const firstwalpos = props.firstwalpos;
-    const xidmap = props.xidmap;
-   
-    return <svg width="1000" height="200">
-	       {
-		   walContent.records ? 
- 		       walContent.records.map((record, index) =>
-			   <WalRecord key={record.start} firstwalpos={firstwalpos} record={record} index={index} xidmap={xidmap}/>
-		       ) : "no records"
-	       }
-	   </svg>
-}
-
-function WalDumpApp()
-{
-    const [walContent, setWalContent] = useState({});
-
-    const filename = '00000001000000000000000C';
-
-    useEffect(() => {
-	fetch('/fetch_wal?filename='+filename).then(res => res.json()).then(data => {
-	    setWalContent(data);
-	});
-    }, []);
-
-    var firstwalpos = 0;
-    var endwalpos = 0;
-    var numxids = 0;
-    var xidmap = {};
-    if (walContent.records && walContent.records.length > 0)
-    {
-	firstwalpos = walpos_to_int(walContent.records[0].start);
-	endwalpos = firstwalpos + 16*1024*1024;
-
-	walContent.records.forEach(rec => {
-	    if (!xidmap[rec.xid])
-	    {
-		xidmap[rec.xid] = ++numxids;
-	    }
-	});
-    }
-
-    return (
-	<>
-	    <h2>{filename}</h2>
-	    <WalFile walContent={walContent} firstwalpos={firstwalpos} endwalpos={endwalpos} xidmap={xidmap}/>
-	</>
-    );
-}
-
-console.log('hey there');
-ReactDOM.render(<WalDumpApp/>, document.getElementById('waldump'));
--- a/mgmt-console/launch-google-cloud.sh
+++ b/mgmt-console/launch-google-cloud.sh
@@ -1,9 +0,0 @@
-#!/bin/bash
-#
-# NOTE: You must set the following environment variables before running this:
-#  BASIC_AUTH_PASSWORD - basic http auth password
-#  S3_ACCESSKEY
-#  S3_SECRET
-
-
-S3_ENDPOINT=https://storage.googleapis.com S3_BUCKET=zenith-testbucket PATH=/home/heikki/pgsql-install/bin:$PATH flask run --host=0.0.0.0
--- a/mgmt-console/launch-local.sh
+++ b/mgmt-console/launch-local.sh
@@ -1,8 +0,0 @@
-#!/bin/bash
-#
-# NOTE: You should set the BASIC_AUTH_PASSWORD environment variable before calling
-
-# Launch S3 server
-(cd ms3 && python3 -m ms3.app --listen-address=localhost) &
-
-FLASK_ENV=development S3_REGION=auto S3_ENDPOINT=http://localhost:9009 S3_BUCKET=zenith-testbucket PATH=/home/heikki/pgsql.fsmfork/bin:$PATH flask run --host=0.0.0.0
--- a/mgmt-console/package-lock.json
+++ b/mgmt-console/package-lock.json
--- a/mgmt-console/package.json
+++ b/mgmt-console/package.json
@@ -1,27 +0,0 @@
-{
-  "name": "starter-kit",
-  "version": "1.1.0",
-  "description": "",
-  "main": "index.js",
-  "scripts": {
-    "test": "echo \"Error: no test specified\" && exit 1",
-    "build": "webpack",
-    "start": "python app.py"
-  },
-  "author": "",
-  "license": "ISC",
-  "dependencies": {
-    "react": "^17.0.1",
-    "react-dom": "^17.0.1",
-    "react-loader-spinner": "^4.0.0",
-    "react-router": "^5.2.0"
-  },
-  "devDependencies": {
-    "@babel/core": "^7.13.1",
-    "@babel/preset-env": "^7.13.5",
-    "@babel/preset-react": "^7.12.13",
-    "babel-loader": "^8.2.2",
-    "webpack": "^5.24.2",
-    "webpack-cli": "^4.5.0"
-  }
-}
--- a/mgmt-console/templates/index.html
+++ b/mgmt-console/templates/index.html
@@ -1,58 +0,0 @@
-<head>
-
-<style>
-  .status {
-      font-family: monospace;
-      background-color: lightgrey;
-  }
-  .shellcommand {
-      font-family: monospace;
-      background-color: lightgrey;
-  }
-  .result {
-      font-family: monospace;
-      background-color: lightgrey;
-      padding: 10px;
-  }
-
-
-  .todo   {font-style: italic;}
-
-
-  h1   {color: blue;}
-
-  .column {
-      float: left;
-      width: 50%;
-      padding: 10px;
-  }
-  /* Clear floats after the columns */
-  .row:after {
-      content: "";
-      display: table;
-      clear: both;
-  }
-
-  .sidenav {
-      float: left;
-      width: 150px;
-      padding: 10px;
-      background-color: pink;
-  }
-
-  .sidenav-item {
-      padding:10px 0px;
-      border:none;
-      display:block;
-  }
-
-</style>
-
-</head>
-
-<body>
-  <div id="reactApp"></div>
-
-  <!-- Attach React components -->
-  <script type="text/javascript" src="{{ url_for('static', filename='app_bundle.js') }}"></script>
-</body>
--- a/mgmt-console/templates/waldump.html
+++ b/mgmt-console/templates/waldump.html
@@ -1,46 +0,0 @@
-<head>
-
-<style>
-  .status {
-      font-family: monospace;
-      background-color: lightgrey;
-  }
-  .shellcommand {
-      font-family: monospace;
-      background-color: lightgrey;
-  }
-  .result {
-      font-family: monospace;
-      background-color: lightgrey;
-      padding: 10px;
-  }
-h1   {color: blue;}
-p    {color: red;}
-
-* {
-  box-sizing: border-box;
-}
-
-.row {
-  display: flex;
-}
-
-/* Create two equal columns that sits next to each other */
-.column1 {
-  flex: 30%;
-  padding: 10px;
-}
-.column2 {
-  flex: 70%;
-  padding: 10px;
-}
-</style>
-
-</head>
-
-<body>
-  <div id="waldump"></div>
-
-  <!-- Attach React components -->
-  <script type="text/javascript" src="{{ url_for('static', filename='waldump_bundle.js') }}"></script>
-</body>
--- a/mgmt-console/waldump.py
+++ b/mgmt-console/waldump.py
@@ -1,25 +0,0 @@
-#
-# This file contains work-in-progress code to visualize WAL contents.
-#
-# This is the API endpoint that calls a 'zenith_wal_to_json' executable,
-# which is a hacked version of pg_waldump that prints information about the
-# records in JSON format. The code in js/waldump.js displays it.
-#
-
-import os
-import re
-from subprocess import PIPE, STDOUT, run, Popen
-
-def fetch_wal(request, s3bucket):
-    filename = request.args.get('filename')
-    if not re.match("^[A-Za-z0-9_]+$", filename):
-        raise Exception('invalid WAL filename: ' + filename)
-
-    # FIXME: this downloads the WAL file to current dir. Use a temp dir? Pipe?
-    s3bucket.download_file('walarchive/' + filename, filename)
-
-    result = run("zenith_wal_to_json " + filename, stdout=PIPE, universal_newlines=True, shell=True)
-
-    os.unlink(filename);
-
-    return result.stdout
--- a/mgmt-console/webpack.config.js
+++ b/mgmt-console/webpack.config.js
@@ -1,27 +0,0 @@
-var webpack = require('webpack');  
-module.exports = {  
-    entry: {
-	app: './js/app.js',
-	waldump: './js/waldump.js'
-    },
-    output: {
-	filename: "[name]_bundle.js",
-	path: __dirname + '/static'
-    },
-    module: {
-	rules: [
-	    {
-		test: /\.js?$/,
-		exclude: /node_modules/,
-		use: {
-		    loader: 'babel-loader',
-		    options: {
-			presets: ['@babel/preset-env']
-		    }
-		}
-	    }
-	]
-    },
-    plugins: [
-    ]
-};
--- a/mgmt-console/zenith.py
+++ b/mgmt-console/zenith.py
@@ -1,179 +0,0 @@
-#zenith.py
-import click
-import testgres
-import os
-
-from testgres import PostgresNode
-from tabulate import tabulate
-
-zenith_base_dir = '/home/anastasia/zenith/basedir'
-
-@click.group()
-def main():
-    """Run the Zenith CLI."""
-
-@click.group()
-def pg():
-    """Db operations
-
-        NOTE: 'database' here means one postgresql node
-    """
-
-@click.command(name='create')
-@click.option('--name', required=True)
-@click.option('-s', '--storage-name', help='Name of the storage',
-                                 default='zenith-local',
-                                 show_default=True)
-@click.option('--snapshot', help='init from the snapshot. Snap is a name or URL')
-@click.option('--no-start', is_flag=True, help='Do not start created node',
-                            default=False, show_default=True)
-def pg_create(name, storage_name, snapshot, no_start):
-    """Initialize the database"""
-    node = PostgresNode()
-    base_dir = os.path.join(zenith_base_dir, 'pg', name)
-    node = testgres.get_new_node(name, base_dir=base_dir)
-    # TODO skip init, instead of that link node with storage or upload it from snapshot
-    node.init()
-    if(no_start==False):
-        node.start()
-
-@click.command(name='start')
-@click.option('--name', required=True)
-@click.option('--snapshot')
-@click.option('--read-only', is_flag=True, help='Start read-only node', show_default=True)
-def pg_start(name, snapshot, read_only):
-    """Start the database"""
-    node = PostgresNode()
-    base_dir = os.path.join(zenith_base_dir, 'pg', name)
-    node = testgres.get_new_node(name, base_dir=base_dir)
-    # TODO pass snapshot as a parameter
-    node.start()
-
-@click.command(name='stop')
-@click.option('--name', required=True)
-def pg_stop(name):
-    """Stop the database"""
-    node = PostgresNode()
-    base_dir = os.path.join(zenith_base_dir, 'pg', name)
-    node = testgres.get_new_node(name, base_dir=base_dir)
-    node.stop()
-
-@click.command(name='destroy')
-@click.option('--name', required=True)
-def pg_destroy(name):
-    """Drop the database"""
-    node = PostgresNode()
-    base_dir = os.path.join(zenith_base_dir, 'pg', name)
-    node = testgres.get_new_node(name, base_dir=base_dir)
-    node.cleanup()
-
-@click.command(name='list')
-def pg_list():
-    """List existing databases"""
-    dirs = os.listdir(os.path.join(zenith_base_dir, 'pg'))
-    path={}
-    status={}
-    data=[]
-
-    for dirname in dirs:
-        path[dirname] = os.path.join(zenith_base_dir, 'pg', dirname)
-        fname = os.path.join( path[dirname], 'data/postmaster.pid')
-        try:
-            f = open(fname,'r')
-            status[dirname] = f.readlines()[-1]
-        except OSError as err:
-            status[dirname]='inactive'
-        data.append([dirname , status[dirname], path[dirname]])
-
-    print(tabulate(data, headers=['Name', 'Status', 'Path']))
-
-pg.add_command(pg_create)
-pg.add_command(pg_destroy)
-pg.add_command(pg_start)   
-pg.add_command(pg_stop)   
-pg.add_command(pg_list)
-
-
-
-@click.group()
-def storage():
-    """Storage operations"""
-
-@click.command(name='attach')
-@click.option('--name')
-def storage_attach(name):
-    """Attach the storage"""
-
-@click.command(name='detach')
-@click.option('--name')
-@click.option('--force', is_flag=True, show_default=True)
-def storage_detach(name):
-    """Detach the storage"""
-
-@click.command(name='list')
-def storage_list():
-    """List existing storages"""
-
-storage.add_command(storage_attach)
-storage.add_command(storage_detach)
-storage.add_command(storage_list)
-
-@click.group()
-def snapshot():
-    """Snapshot operations"""
-
-@click.command(name='create')
-def snapshot_create():
-    """Create new snapshot"""
-
-@click.command(name='destroy')
-def snapshot_destroy():
-    """Destroy the snapshot"""
-
-@click.command(name='pull')
-def snapshot_pull():
-    """Pull remote snapshot"""
-
-@click.command(name='push')
-def snapshot_push():
-    """Push snapshot to remote"""
-
-@click.command(name='import')
-def snapshot_import():
-    """Convert given format to zenith snapshot"""
-
-@click.command(name='export')
-def snapshot_export():
-    """Convert zenith snapshot to PostgreSQL compatible format"""
-
-snapshot.add_command(snapshot_create)
-snapshot.add_command(snapshot_destroy)
-snapshot.add_command(snapshot_pull)
-snapshot.add_command(snapshot_push)
-snapshot.add_command(snapshot_import)
-snapshot.add_command(snapshot_export)
-
-@click.group()
-def wal():
-    """WAL operations"""
-
-@click.command()
-def wallist(name="list"):
-    """List WAL files"""
-
-wal.add_command(wallist)
-
-
-@click.command()
-def console():
-    """Open web console"""
-
-main.add_command(pg)
-main.add_command(storage)
-main.add_command(snapshot)
-main.add_command(wal)
-main.add_command(console)
-
-
-if __name__ == '__main__':
-    main()
--- a/monitoring/docker-compose.yml
+++ b/monitoring/docker-compose.yml
@@ -0,0 +1,25 @@
+version: "3"
+services:
+
+  prometheus:
+    container_name: prometheus
+    image: prom/prometheus:latest
+    volumes:
+      - ./prometheus.yaml:/etc/prometheus/prometheus.yml
+    # ports:
+    #   - "9090:9090"
+    # TODO: find a proper portable solution
+    network_mode: "host"
+
+  grafana:
+    image: grafana/grafana:latest
+    volumes:
+      - ./grafana.yaml:/etc/grafana/provisioning/datasources/datasources.yaml
+    environment:
+      - GF_AUTH_ANONYMOUS_ENABLED=true
+      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin
+      - GF_AUTH_DISABLE_LOGIN_FORM=true
+    # ports:
+    #   - "3000:3000"
+    # TODO: find a proper portable solution
+    network_mode: "host"
--- a/monitoring/grafana.yaml
+++ b/monitoring/grafana.yaml
@@ -0,0 +1,12 @@
+apiVersion: 1
+
+datasources:
+- name: Prometheus
+  type: prometheus
+  access: proxy
+  orgId: 1
+  url: http://localhost:9090
+  basicAuth: false
+  isDefault: false
+  version: 1
+  editable: false
--- a/monitoring/prometheus.yaml
+++ b/monitoring/prometheus.yaml
@@ -0,0 +1,5 @@
+scrape_configs:
+  - job_name: 'default'
+    scrape_interval: 10s
+    static_configs:
+      - targets: ['localhost:9898']
--- a/pageserver/Cargo.lock
+++ b/pageserver/Cargo.lock
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -1,42 +1,56 @@
 [package]
 name = "pageserver"
 version = "0.1.0"
-authors = ["Stas Kelvich <stas@zenith.tech>"]
-edition = "2018"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+edition = "2021"

 [dependencies]
+bookfile = { git = "https://github.com/zenithdb/bookfile.git", branch="generic-readext" }
 chrono = "0.4.19"
-crossbeam-channel = "0.5.0"
 rand = "0.8.3"
 regex = "1.4.5"
-bytes = "1.0.1"
+bytes = { version = "1.0.1", features = ['serde'] }
 byteorder = "1.4.3"
 futures = "0.3.13"
+hyper = "0.14"
 lazy_static = "1.4.0"
-slog-stdlog = "4.1.0"
-slog-async = "2.6.0"
-slog-scope = "4.4.0"
-slog-term = "2.8.0"
-slog = "2.7.0"
 log = "0.4.14"
-clap = "2.33.0"
-termion = "1.5.6"
-tui = "0.14.0"
+clap = "3.0"
 daemonize = "0.4.1"
-rust-s3 = { git = "https://github.com/hlinnaka/rust-s3", rev="7f15a24ec7daa0a5d9516da706212745f9042818", features = ["no-verify-ssl"] }
-tokio = { version = "1.3.0", features = ["full"] }
-tokio-stream = { version = "0.1.4" }
-tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="a0d067b66447951d1276a53fb09886539c3fa094" }
-postgres-types = { git = "https://github.com/zenithdb/rust-postgres.git", rev="a0d067b66447951d1276a53fb09886539c3fa094" }
-postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="a0d067b66447951d1276a53fb09886539c3fa094" }
-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="a0d067b66447951d1276a53fb09886539c3fa094" }
-anyhow = "1.0"
+tokio = { version = "1.11", features = ["process", "sync", "macros", "fs", "rt", "io-util", "time"] }
+postgres-types = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
+postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
+postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
+tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
+tokio-stream = "0.1.8"
+anyhow = { version = "1.0", features = ["backtrace"] }
 crc32c = "0.6.0"
-walkdir = "2"
 thiserror = "1.0"
-hex = "0.4.3"
+hex = { version = "0.4.3", features = ["serde"] }
 tar = "0.4.33"
+humantime = "2.1.0"
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1"
+toml_edit = { version = "0.13", features = ["easy"] }
+scopeguard = "1.1.0"
+async-trait = "0.1"
+const_format = "0.2.21"
+tracing = "0.1.27"
+tracing-futures = "0.2"
+signal-hook = "0.3.10"
+url = "2"
+nix = "0.23"
+once_cell = "1.8.0"
+crossbeam-utils = "0.8.5"
+fail = "0.5.0"
+
+rust-s3 = { version = "0.28", default-features = false, features = ["no-verify-ssl", "tokio-rustls-tls"] }
+async-compression = {version = "0.3", features = ["zstd", "tokio"]}

 postgres_ffi = { path = "../postgres_ffi" }
+zenith_metrics = { path = "../zenith_metrics" }
+zenith_utils = { path = "../zenith_utils" }
+workspace_hack = { path = "../workspace_hack" }
+
+[dev-dependencies]
+hex-literal = "0.3"
+tempfile = "3.2"
--- a/pageserver/README
+++ b/pageserver/README
@@ -1,179 +0,0 @@
-Page Server
-===========
-
-
-How to test
-----------
-
-
-1. Compile and install Postgres from this repository (there are
-   modifications, so vanilla Postgres won't do)
-
-    ./configure --prefix=/home/heikki/zenith-install
-
-2. Compile the page server
-
-    cd pageserver
-    cargo build
-
-3. Create another "dummy" cluster that will be used by the page server when it applies
-   the WAL records. (shouldn't really need this, getting rid of it is a TODO):
-
-    /home/heikki/zenith-install/bin/initdb -D /data/zenith-dummy
-
-
-4. Initialize and start a new postgres cluster
-
-    /home/heikki/zenith-install/bin/initdb -D /data/zenith-test-db --username=postgres
-    /home/heikki/zenith-install/bin/postgres -D /data/zenith-test-db
-
-5. In another terminal, start the page server.
-
-    PGDATA=/data/zenith-dummy PATH=/home/heikki/zenith-install/bin:$PATH ./target/debug/pageserver
-
-   It should connect to the postgres instance using streaming replication, and print something
-   like this:
-
-    $ PGDATA=/data/zenith-dummy PATH=/home/heikki/zenith-install/bin:$PATH ./target/debug/pageserver
-    Starting WAL receiver
-    connecting...
-    Starting page server on 127.0.0.1:5430
-    connected!
-    page cache is empty
-
-6. You can now open another terminal and issue DDL commands. Generated WAL records will
-   be streamed to the page servers, and attached to blocks that they apply to in its
-   page cache
-
-    $ psql postgres -U postgres
-    psql (14devel)
-    Type "help" for help.
-    
-    postgres=# create table mydata (i int4);
-    CREATE TABLE
-    postgres=# insert into mydata select g from generate_series(1,100) g;
-    INSERT 0 100
-    postgres=# 
-
-7. The GetPage@LSN interface to the compute nodes isn't working yet, but to simulate
-   that, the page server generates a test GetPage@LSN call every 5 seconds on a random
-   block that's in the page cache. In a few seconds, you should see output from that:
-
-    testing GetPage@LSN for block 0
-    WAL record at LSN 23584576 initializes the page
-    2021-03-19 11:03:13.791 EET [11439] LOG:  applied WAL record at 0/167DF40
-    2021-03-19 11:03:13.791 EET [11439] LOG:  applied WAL record at 0/167DF80
-    2021-03-19 11:03:13.791 EET [11439] LOG:  applied WAL record at 0/167DFC0
-    2021-03-19 11:03:13.791 EET [11439] LOG:  applied WAL record at 0/167E018
-    2021-03-19 11:03:13.791 EET [11439] LOG:  applied WAL record at 0/167E058
-    2021-03-19 11:03:13.791 EET [11439] LOG:  applied WAL record at 0/167E098
-    2021-03-19 11:03:13.791 EET [11439] LOG:  applied WAL record at 0/167E0D8
-    2021-03-19 11:03:13.792 EET [11439] LOG:  applied WAL record at 0/167E118
-    2021-03-19 11:03:13.792 EET [11439] LOG:  applied WAL record at 0/167E158
-    2021-03-19 11:03:13.792 EET [11439] LOG:  applied WAL record at 0/167E198
-    applied 10 WAL records to produce page image at LSN 18446744073709547246
-
-
-
-Architecture
-============
-
-The Page Server is responsible for all operations on a number of
-"chunks" of relation data. A chunk corresponds to a PostgreSQL
-relation segment (i.e. one max. 1 GB file in the data directory), but
-it holds all the different versions of every page in the segment that
-are still needed by the system.
-
-Determining which chunk each Page Server holds is handled elsewhere. (TODO:
-currently, there is only one Page Server which holds all chunks)
-
-The Page Server has a few different duties:
-
- Respond to GetPage@LSN requests from the Compute Nodes
- Receive WAL from WAL safekeeper
- Replay WAL that's applicable to the chunks that the Page Server maintains
- Backup to S3
-
-
-The Page Server consists of multiple threads that operate on a shared
-cache of page versions:
-
-
-                                           | WAL
-                                           V
-                                   +--------------+
-                                   |              |
-                                   | WAL receiver |
-                                   |              |
-                                   +--------------+
-                                                                                 +----+
-                  +---------+                              ..........            |    |
-                  |         |                              .        .            |    |
- GetPage@LSN      |         |                              . backup .  ------->  | S3 |
------------->    |  Page   |         page cache           .        .            |    |
-                  | Service |                              ..........            |    |
-   page           |         |                                                    +----+
-<-------------    |         |
-                  +---------+
-
-                             ...................................
-                             .                                 .
-                             . Garbage Collection / Compaction .
-                             ...................................
-
-Legend:
-
-+--+
-|  |   A thread or multi-threaded service
-+--+
-
-....
-.  .   Component that we will need, but doesn't exist at the moment. A TODO.
-....
-
--->   Data flow
-<---
-
-
-Page Service
------------
-
-The Page Service listens for GetPage@LSN requests from the Compute Nodes,
-and responds with pages from the page cache.
-
-
-WAL Receiver
------------
-
-The WAL receiver connects to the external WAL safekeeping service (or
-directly to the primary) using PostgreSQL physical streaming
-replication, and continuously receives WAL. It decodes the WAL records,
-and stores them to the page cache.
-
-
-Page Cache
----------
-
-The Page Cache is a data structure, to hold all the different page versions.
-It is accessed by all the other threads, to perform their duties.
-
-Currently, the page cache is implemented fully in-memory. TODO: Store it
-on disk. Define a file format.
-
-
-TODO: Garbage Collection / Compaction
-------------------------------------
-
-Periodically, the Garbage Collection / Compaction thread runs
-and applies pending WAL records, and removes old page versions that
-are no longer needed.
-
-
-TODO: Backup service
--------------------
-
-The backup service is responsible for periodically pushing the chunks to S3.
-
-TODO: How/when do restore from S3? Whenever we get a GetPage@LSN request for
-a chunk we don't currently have? Or when an external Control Plane tells us?
-
--- a/pageserver/README.md
+++ b/pageserver/README.md
@@ -0,0 +1,167 @@
+## Page server architecture
+
+The Page Server has a few different duties:
+
+- Respond to GetPage@LSN requests from the Compute Nodes
+- Receive WAL from WAL safekeeper
+- Replay WAL that's applicable to the chunks that the Page Server maintains
+- Backup to S3
+
+S3 is the main fault-tolerant storage of all data, as there are no Page Server
+replicas. We use a separate fault-tolerant WAL service to reduce latency. It
+keeps track of WAL records which are not synced to S3 yet.
+
+The Page Server consists of multiple threads that operate on a shared
+repository of page versions:
+
+                                           | WAL
+                                           V
+                                   +--------------+
+                                   |              |
+                                   | WAL receiver |
+                                   |              |
+                                   +--------------+
+                                                                                 +----+
+                  +---------+                              ..........            |    |
+                  |         |                              .        .            |    |
+ GetPage@LSN      |         |                              . backup .  ------->  | S3 |
+------------->    |  Page   |         repository           .        .            |    |
+                  | Service |                              ..........            |    |
+   page           |         |                                                    +----+
+<-------------    |         |
+                  +---------+      +--------------------+
+		                   |   Checkpointing /  |
+				   | Garbage collection |
+                                   +--------------------+
+
+Legend:
+
+--+
+|  |   A thread or multi-threaded service
+--+
+
+....
+.  .   Component at its early development phase.
+....
+
+--->   Data flow
+<---
+
+
+Page Service
+------------
+
+The Page Service listens for GetPage@LSN requests from the Compute Nodes,
+and responds with pages from the repository.
+
+
+WAL Receiver
+------------
+
+The WAL receiver connects to the external WAL safekeeping service (or
+directly to the primary) using PostgreSQL physical streaming
+replication, and continuously receives WAL. It decodes the WAL records,
+and stores them to the repository.
+
+
+Repository
+----------
+
+The repository stores all the page versions, or WAL records needed to
+reconstruct them. Each tenant has a separate Repository, which is
+stored in the .zenith/tenants/<tenantid> directory.
+
+Repository is an abstract trait, defined in `repository.rs`. It is
+implemented by the LayeredRepository object in
+`layered_repository.rs`. There is only that one implementation of the
+Repository trait, but it's still a useful abstraction that keeps the
+interface for the low-level storage functionality clean. The layered
+storage format is described in layered_repository/README.md.
+
+Each repository consists of multiple Timelines. Timeline is a
+workhorse that accepts page changes from the WAL, and serves
+get_page_at_lsn() and get_rel_size() requests. Note: this has nothing
+to do with PostgreSQL WAL timeline. The term "timeline" is mostly
+interchangeable with "branch", there is a one-to-one mapping from
+branch to timeline. A timeline has a unique ID within the tenant,
+represented as 16-byte hex string that never changes, whereas a
+branch is a user-given name for a timeline.
+
+Each repository also has a WAL redo manager associated with it, see
+`walredo.rs`. The WAL redo manager is used to replay PostgreSQL WAL
+records, whenever we need to reconstruct a page version from WAL to
+satisfy a GetPage@LSN request, or to avoid accumulating too much WAL
+for a page. The WAL redo manager uses a Postgres process running in
+special zenith wal-redo mode to do the actual WAL redo, and
+communicates with the process using a pipe.
+
+
+Checkpointing / Garbage Collection
+----------------------------------
+
+Periodically, the checkpointer thread wakes up and performs housekeeping
+duties on the repository. It has two duties:
+
+### Checkpointing
+
+Flush WAL that has accumulated in memory to disk, so that the old WAL
+can be truncated away in the WAL safekeepers. Also, to free up memory
+for receiving new WAL. This process is called "checkpointing". It's
+similar to checkpointing in PostgreSQL or other DBMSs, but in the page
+server, checkpointing happens on a per-segment basis.
+
+### Garbage collection
+
+Remove old on-disk layer files that are no longer needed according to the
+PITR retention policy
+
+
+### Backup service
+
+The backup service, responsible for storing pageserver recovery data externally.
+
+Currently, pageserver stores its files in a filesystem directory it's pointed to.
+That working directory could be rather ephemeral for such cases as "a pageserver pod running in k8s with no persistent volumes attached".
+Therefore, the server interacts with external, more reliable storage to back up and restore its state.
+
+The code for storage support is extensible and can support arbitrary ones as long as they implement a certain Rust trait.
+There are the following implementations present:
+* local filesystem — to use in tests mainly
+* AWS S3           - to use in production
+
+Implementation details are covered in the [backup readme](./src/remote_storage/README.md) and corresponding Rust file docs, parameters documentation can be found at [settings docs](../docs/settings.md).
+
+The backup service is disabled by default and can be enabled to interact with a single remote storage.
+
+CLI examples:
+* Local FS: `${PAGESERVER_BIN} -c "remote_storage={local_path='/some/local/path/'}"`
+* AWS S3  : `${PAGESERVER_BIN} -c "remote_storage={bucket_name='some-sample-bucket',bucket_region='eu-north-1', prefix_in_bucket='/test_prefix/',access_key_id='SOMEKEYAAAAASADSAH*#',secret_access_key='SOMEsEcReTsd292v'}"`
+
+For Amazon AWS S3, a key id and secret access key could be located in `~/.aws/credentials` if awscli was ever configured to work with the desired bucket, on the AWS Settings page for a certain user. Also note, that the bucket names does not contain any protocols when used on AWS.
+For local S3 installations, refer to the their documentation for name format and credentials.
+
+Similar to other pageserver settings, toml config file can be used to configure either of the storages as backup targets.
+Required sections are:
+
+```toml
+[remote_storage]
+local_path = '/Users/someonetoignore/Downloads/tmp_dir/'
+```
+
+or
+
+```toml
+[remote_storage]
+bucket_name = 'some-sample-bucket'
+bucket_region = 'eu-north-1'
+prefix_in_bucket = '/test_prefix/'
+access_key_id = 'SOMEKEYAAAAASADSAH*#'
+secret_access_key = 'SOMEsEcReTsd292v'
+```
+
+Also, `AWS_SECRET_ACCESS_KEY` and `AWS_ACCESS_KEY_ID` variables can be used to specify the credentials instead of any of the ways above.
+
+TODO: Sharding
+--------------------
+
+We should be able to run multiple Page Servers that handle sharded data.
--- a/pageserver/build.rs
+++ b/pageserver/build.rs
@@ -1,41 +0,0 @@
-//
-//   Triggers postgres build if there is no postgres binary present at
-// 'REPO_ROOT/tmp_install/bin/postgres'.
-//
-//   I can see a lot of disadvantages with such automatization and main
-// advantage here is ability to build everything and run integration tests
-// in a bare repo by running 'cargo test'.
-//
-//   We can interceipt whether it is debug or release build and run
-// corresponding pg build. But it seems like an overkill for now.
-//
-// Problem #1 -- language server in my editor likes calling 'cargo build'
-// by himself. So if I delete tmp_install directory it would magically reappear
-// after some time. During this compilation 'cargo build' may whine about
-// "waiting for file lock on build directory".
-//
-// Problem #2 -- cargo build would run this only if something is changed in
-// the crate.
-//
-//   And generally speaking postgres is not a build dependency for the pageserver,
-// just for integration tests. So let's not mix that. I'll leave this file in
-// place for some time just in case if anybody would start doing the same.
-//
-
-// use std::path::Path;
-// use std::process::{Command};
-
-fn main() {
-    // // build some postgres if it is not done none yet
-    // if !Path::new("../tmp_install/bin/postgres").exists() {
-    //     let make_res = Command::new("make")
-    //         .arg("postgres")
-    //         .env_clear()
-    //         .status()
-    //         .expect("failed to execute 'make postgres'");
-
-    //     if !make_res.success() {
-    //         panic!("postgres build failed");
-    //     }
-    // }
-}
--- a/pageserver/launch.sh
+++ b/pageserver/launch.sh
@@ -1,62 +0,0 @@
-#!/bin/sh
-#
-# Set up a simple Compute Node + Page Server combination locally.
-#
-# NOTE: This doesn't clean up between invocations. You'll need to manually:
-#
-# - Kill any previous 'postgres' and 'pageserver' processes
-# - Clear the S3 bucket
-# - Remove the 'zenith-pgdata' directory
-
-
-set -e
-
-# Set up some config.
-#
-# CHANGE THESE ACCORDING TO YOUR S3 INSTALLATION
-export S3_REGION=auto
-export S3_ENDPOINT=https://localhost:9000
-export S3_ACCESSKEY=minioadmin
-export S3_SECRET=pikkunen
-export S3_BUCKET=zenith-testbucket
-
-
-COMPUTE_NODE_PGDATA=zenith-pgdata
-
-
-# 1. Initialize a cluster.
-initdb -D $COMPUTE_NODE_PGDATA -U zenith
-
-echo "port=65432" >> $COMPUTE_NODE_PGDATA/postgresql.conf
-echo "log_connections=on" >> $COMPUTE_NODE_PGDATA/postgresql.conf
-
-# Use a small shared_buffers, so that we hit the Page Server more
-# easily.
-echo "shared_buffers = 1MB" >> $COMPUTE_NODE_PGDATA/postgresql.conf
-
-# TODO: page server should use a replication slot, or some other mechanism
-# to make sure that the primary doesn't lose data that the page server still
-# needs. (The WAL safekeepers should ensure that)
-echo "wal_keep_size=10GB" >> $COMPUTE_NODE_PGDATA/postgresql.conf
-
-# Tell the Postgres server how to connect to the Page Server
-echo "page_server_connstring='host=localhost port=5430'" >> $COMPUTE_NODE_PGDATA/postgresql.conf
-
-
-# 2. Run zenith_push to push a base backup fo the database to an S3 bucket. The
-# Page Server will read it from there
-zenith_push -D $COMPUTE_NODE_PGDATA
-
-
-# 3. Launch page server
-rm -rf /tmp/pgdata-dummy
-initdb -N -D /tmp/pgdata-dummy
-PGDATA=/tmp/pgdata-dummy ./target/debug/pageserver  &
-
-# 4. Start up the Postgres server
-postgres -D $COMPUTE_NODE_PGDATA &
-
-
-echo "ALL SET! You can now connect to Postgres with something like:"
-echo ""
-echo 'psql "dbname=postgres host=localhost user=zenith port=65432"'
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -1,202 +1,334 @@
+//!
+//! Generate a tarball with files needed to bootstrap ComputeNode.
+//!
+//! TODO: this module has nothing to do with PostgreSQL pg_basebackup.
+//! It could use a better name.
+//!
+//! Stateless Postgres compute node is launched by sending a tarball
+//! which contains non-relational data (multixacts, clog, filenodemaps, twophase files),
+//! generated pg_control and dummy segment of WAL.
+//! This module is responsible for creation of such tarball
+//! from data stored in object storage.
+//!
+use anyhow::{Context, Result};
+use bytes::{BufMut, BytesMut};
 use log::*;
-use regex::Regex;
-use std::fmt;
+use std::fmt::Write as FmtWrite;
+use std::io;
 use std::io::Write;
-use tar::Builder;
-use walkdir::WalkDir;
+use std::sync::Arc;
+use std::time::SystemTime;
+use tar::{Builder, EntryType, Header};

-use crate::ZTimelineId;
+use crate::relish::*;
+use crate::repository::Timeline;
+use postgres_ffi::xlog_utils::*;
+use postgres_ffi::*;
+use zenith_utils::lsn::Lsn;

-pub fn send_snapshot_tarball(
-    write: &mut dyn Write,
-    timelineid: ZTimelineId,
-    snapshotlsn: u64,
-) -> Result<(), std::io::Error> {
-    let mut ar = Builder::new(write);
+/// This is short-living object only for the time of tarball creation,
+/// created mostly to avoid passing a lot of parameters between various functions
+/// used for constructing tarball.
+pub struct Basebackup<'a> {
+    ar: Builder<&'a mut dyn Write>,
+    timeline: &'a Arc<dyn Timeline>,
+    pub lsn: Lsn,
+    prev_record_lsn: Lsn,
+}

-    let snappath = format!("timelines/{}/snapshots/{:016X}", timelineid, snapshotlsn);
-    let walpath = format!("timelines/{}/wal", timelineid);
+// Create basebackup with non-rel data in it. Omit relational data.
+//
+// Currently we use empty lsn in two cases:
+//  * During the basebackup right after timeline creation
+//  * When working without safekeepers. In this situation it is important to match the lsn
+//    we are taking basebackup on with the lsn that is used in pageserver's walreceiver
+//    to start the replication.
+impl<'a> Basebackup<'a> {
+    pub fn new(
+        write: &'a mut dyn Write,
+        timeline: &'a Arc<dyn Timeline>,
+        req_lsn: Option<Lsn>,
+    ) -> Result<Basebackup<'a>> {
+        // Compute postgres doesn't have any previous WAL files, but the first
+        // record that it's going to write needs to include the LSN of the
+        // previous record (xl_prev). We include prev_record_lsn in the
+        // "zenith.signal" file, so that postgres can read it during startup.
+        //
+        // We don't keep full history of record boundaries in the page server,
+        // however, only the predecessor of the latest record on each
+        // timeline. So we can only provide prev_record_lsn when you take a
+        // base backup at the end of the timeline, i.e. at last_record_lsn.
+        // Even at the end of the timeline, we sometimes don't have a valid
+        // prev_lsn value; that happens if the timeline was just branched from
+        // an old LSN and it doesn't have any WAL of its own yet. We will set
+        // prev_lsn to Lsn(0) if we cannot provide the correct value.
+        let (backup_prev, backup_lsn) = if let Some(req_lsn) = req_lsn {
+            // Backup was requested at a particular LSN. Wait for it to arrive.
+            timeline.wait_lsn(req_lsn)?;

-    debug!("sending tarball of snapshot in {}", snappath);
-    //ar.append_dir_all("", &snappath)?;
-
-    for entry in WalkDir::new(&snappath) {
-        let entry = entry?;
-        let fullpath = entry.path();
-        let relpath = entry.path().strip_prefix(&snappath).unwrap();
-
-        if relpath.to_str().unwrap() == "" {
-            continue;
-        }
-
-        if entry.file_type().is_dir() {
-            trace!(
-                "sending dir {} as {}",
-                fullpath.display(),
-                relpath.display()
-            );
-            ar.append_dir(relpath, fullpath)?;
-        } else if entry.file_type().is_symlink() {
-            error!("ignoring symlink in snapshot dir");
-        } else if entry.file_type().is_file() {
-            // Shared catalogs are exempt
-            if relpath.starts_with("global/") {
-                trace!("sending shared catalog {}", relpath.display());
-                ar.append_path_with_name(fullpath, relpath)?;
-            } else if !is_rel_file_path(relpath.to_str().unwrap()) {
-                trace!("sending {}", relpath.display());
-                ar.append_path_with_name(fullpath, relpath)?;
+            // If the requested point is the end of the timeline, we can
+            // provide prev_lsn. (get_last_record_rlsn() might return it as
+            // zero, though, if no WAL has been generated on this timeline
+            // yet.)
+            let end_of_timeline = timeline.get_last_record_rlsn();
+            if req_lsn == end_of_timeline.last {
+                (end_of_timeline.prev, req_lsn)
            } else {
-                trace!("not sending {}", relpath.display());
-                // FIXME: send all files for now
-                ar.append_path_with_name(fullpath, relpath)?;
+                (Lsn(0), req_lsn)
            }
        } else {
-            error!("unknown file type: {}", fullpath.display());
-        }
-    }
-
-    // FIXME: also send all the WAL
-    for entry in std::fs::read_dir(&walpath)? {
-        let entry = entry?;
-        let fullpath = &entry.path();
-        let relpath = fullpath.strip_prefix(&walpath).unwrap();
-
-        if !entry.path().is_file() {
-            continue;
-        }
-
-        let archive_fname = relpath.to_str().unwrap().clone();
-        let archive_fname = archive_fname
-            .strip_suffix(".partial")
-            .unwrap_or(&archive_fname);
-        let archive_path = "pg_wal/".to_owned() + archive_fname;
-        ar.append_path_with_name(fullpath, archive_path)?;
-    }
-
-    ar.finish()?;
-    debug!("all tarred up!");
-    Ok(())
-}
-
-// formats:
-// <oid>
-// <oid>_<fork name>
-// <oid>.<segment number>
-// <oid>_<fork name>.<segment number>
-
-#[derive(Debug)]
-struct FilePathError {
-    msg: String,
-}
-
-impl FilePathError {
-    fn new(msg: &str) -> FilePathError {
-        FilePathError {
-            msg: msg.to_string(),
-        }
-    }
-}
-
-impl From<core::num::ParseIntError> for FilePathError {
-    fn from(e: core::num::ParseIntError) -> Self {
-        return FilePathError {
-            msg: format!("invalid filename: {}", e),
-        };
-    }
-}
-
-impl fmt::Display for FilePathError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "invalid filename")
-    }
-}
-
-fn forkname_to_forknum(forkname: Option<&str>) -> Result<u32, FilePathError> {
-    match forkname {
-        // "main" is not in filenames, it's implicit if the fork name is not present
-        None => Ok(0),
-        Some("fsm") => Ok(1),
-        Some("vm") => Ok(2),
-        Some("init") => Ok(3),
-        Some(_) => Err(FilePathError::new("invalid forkname")),
-    }
-}
-
-fn parse_filename(fname: &str) -> Result<(u32, u32, u32), FilePathError> {
-    let re = Regex::new(r"^(?P<relnode>\d+)(_(?P<forkname>[a-z]+))?(\.(?P<segno>\d+))?$").unwrap();
-
-    let caps = re
-        .captures(fname)
-        .ok_or_else(|| FilePathError::new("invalid relation data file name"))?;
-
-    let relnode_str = caps.name("relnode").unwrap().as_str();
-    let relnode = u32::from_str_radix(relnode_str, 10)?;
-
-    let forkname_match = caps.name("forkname");
-    let forkname = if forkname_match.is_none() {
-        None
-    } else {
-        Some(forkname_match.unwrap().as_str())
-    };
-    let forknum = forkname_to_forknum(forkname)?;
-
-    let segno_match = caps.name("segno");
-    let segno = if segno_match.is_none() {
-        0
-    } else {
-        u32::from_str_radix(segno_match.unwrap().as_str(), 10)?
-    };
-
-    return Ok((relnode, forknum, segno));
-}
-
-fn parse_rel_file_path(path: &str) -> Result<(), FilePathError> {
-    /*
-     * Relation data files can be in one of the following directories:
-     *
-     * global/
-     *		shared relations
-     *
-     * base/<db oid>/
-     *		regular relations, default tablespace
-     *
-     * pg_tblspc/<tblspc oid>/<tblspc version>/
-     *		within a non-default tablespace (the name of the directory
-     *		depends on version)
-     *
-     * And the relation data files themselves have a filename like:
-     *
-     * <oid>.<segment number>
-     */
-    if let Some(fname) = path.strip_prefix("global/") {
-        let (_relnode, _forknum, _segno) = parse_filename(fname)?;
-
-        return Ok(());
-    } else if let Some(dbpath) = path.strip_prefix("base/") {
-        let mut s = dbpath.split("/");
-        let dbnode_str = s
-            .next()
-            .ok_or_else(|| FilePathError::new("invalid relation data file name"))?;
-        let _dbnode = u32::from_str_radix(dbnode_str, 10)?;
-        let fname = s
-            .next()
-            .ok_or_else(|| FilePathError::new("invalid relation data file name"))?;
-        if s.next().is_some() {
-            return Err(FilePathError::new("invalid relation data file name"));
+            // Backup was requested at end of the timeline.
+            let end_of_timeline = timeline.get_last_record_rlsn();
+            (end_of_timeline.prev, end_of_timeline.last)
        };

-        let (_relnode, _forknum, _segno) = parse_filename(fname)?;
+        info!(
+            "taking basebackup lsn={}, prev_lsn={}",
+            backup_lsn, backup_prev
+        );

-        return Ok(());
-    } else if let Some(_) = path.strip_prefix("pg_tblspc/") {
-        // TODO
-        return Err(FilePathError::new("tablespaces not supported"));
-    } else {
-        return Err(FilePathError::new("invalid relation data file name"));
+        Ok(Basebackup {
+            ar: Builder::new(write),
+            timeline,
+            lsn: backup_lsn,
+            prev_record_lsn: backup_prev,
+        })
+    }
+
+    pub fn send_tarball(&mut self) -> anyhow::Result<()> {
+        // Create pgdata subdirs structure
+        for dir in pg_constants::PGDATA_SUBDIRS.iter() {
+            let header = new_tar_header_dir(*dir)?;
+            self.ar.append(&header, &mut io::empty())?;
+        }
+
+        // Send empty config files.
+        for filepath in pg_constants::PGDATA_SPECIAL_FILES.iter() {
+            if *filepath == "pg_hba.conf" {
+                let data = pg_constants::PG_HBA.as_bytes();
+                let header = new_tar_header(filepath, data.len() as u64)?;
+                self.ar.append(&header, data)?;
+            } else {
+                let header = new_tar_header(filepath, 0)?;
+                self.ar.append(&header, &mut io::empty())?;
+            }
+        }
+
+        // Gather non-relational files from object storage pages.
+        for obj in self.timeline.list_nonrels(self.lsn)? {
+            match obj {
+                RelishTag::Slru { slru, segno } => {
+                    self.add_slru_segment(slru, segno)?;
+                }
+                RelishTag::FileNodeMap { spcnode, dbnode } => {
+                    self.add_relmap_file(spcnode, dbnode)?;
+                }
+                RelishTag::TwoPhase { xid } => {
+                    self.add_twophase_file(xid)?;
+                }
+                _ => {}
+            }
+        }
+
+        // Generate pg_control and bootstrap WAL segment.
+        self.add_pgcontrol_file()?;
+        self.ar.finish()?;
+        debug!("all tarred up!");
+        Ok(())
+    }
+
+    //
+    // Generate SLRU segment files from repository.
+    //
+    fn add_slru_segment(&mut self, slru: SlruKind, segno: u32) -> anyhow::Result<()> {
+        let seg_size = self
+            .timeline
+            .get_relish_size(RelishTag::Slru { slru, segno }, self.lsn)?;
+
+        if seg_size == None {
+            trace!(
+                "SLRU segment {}/{:>04X} was truncated",
+                slru.to_str(),
+                segno
+            );
+            return Ok(());
+        }
+
+        let nblocks = seg_size.unwrap();
+
+        let mut slru_buf: Vec<u8> =
+            Vec::with_capacity(nblocks as usize * pg_constants::BLCKSZ as usize);
+        for blknum in 0..nblocks {
+            let img =
+                self.timeline
+                    .get_page_at_lsn(RelishTag::Slru { slru, segno }, blknum, self.lsn)?;
+            assert!(img.len() == pg_constants::BLCKSZ as usize);
+
+            slru_buf.extend_from_slice(&img);
+        }
+
+        let segname = format!("{}/{:>04X}", slru.to_str(), segno);
+        let header = new_tar_header(&segname, slru_buf.len() as u64)?;
+        self.ar.append(&header, slru_buf.as_slice())?;
+
+        trace!("Added to basebackup slru {} relsize {}", segname, nblocks);
+        Ok(())
+    }
+
+    //
+    // Extract pg_filenode.map files from repository
+    // Along with them also send PG_VERSION for each database.
+    //
+    fn add_relmap_file(&mut self, spcnode: u32, dbnode: u32) -> anyhow::Result<()> {
+        let img = self.timeline.get_page_at_lsn(
+            RelishTag::FileNodeMap { spcnode, dbnode },
+            0,
+            self.lsn,
+        )?;
+        let path = if spcnode == pg_constants::GLOBALTABLESPACE_OID {
+            let version_bytes = pg_constants::PG_MAJORVERSION.as_bytes();
+            let header = new_tar_header("PG_VERSION", version_bytes.len() as u64)?;
+            self.ar.append(&header, version_bytes)?;
+
+            let header = new_tar_header("global/PG_VERSION", version_bytes.len() as u64)?;
+            self.ar.append(&header, version_bytes)?;
+
+            String::from("global/pg_filenode.map") // filenode map for global tablespace
+        } else {
+            // User defined tablespaces are not supported
+            assert!(spcnode == pg_constants::DEFAULTTABLESPACE_OID);
+
+            // Append dir path for each database
+            let path = format!("base/{}", dbnode);
+            let header = new_tar_header_dir(&path)?;
+            self.ar.append(&header, &mut io::empty())?;
+
+            let dst_path = format!("base/{}/PG_VERSION", dbnode);
+            let version_bytes = pg_constants::PG_MAJORVERSION.as_bytes();
+            let header = new_tar_header(&dst_path, version_bytes.len() as u64)?;
+            self.ar.append(&header, version_bytes)?;
+
+            format!("base/{}/pg_filenode.map", dbnode)
+        };
+        assert!(img.len() == 512);
+        let header = new_tar_header(&path, img.len() as u64)?;
+        self.ar.append(&header, &img[..])?;
+        Ok(())
+    }
+
+    //
+    // Extract twophase state files
+    //
+    fn add_twophase_file(&mut self, xid: TransactionId) -> anyhow::Result<()> {
+        let img = self
+            .timeline
+            .get_page_at_lsn(RelishTag::TwoPhase { xid }, 0, self.lsn)?;
+
+        let mut buf = BytesMut::new();
+        buf.extend_from_slice(&img[..]);
+        let crc = crc32c::crc32c(&img[..]);
+        buf.put_u32_le(crc);
+        let path = format!("pg_twophase/{:>08X}", xid);
+        let header = new_tar_header(&path, buf.len() as u64)?;
+        self.ar.append(&header, &buf[..])?;
+
+        Ok(())
+    }
+
+    //
+    // Add generated pg_control file and bootstrap WAL segment.
+    // Also send zenith.signal file with extra bootstrap data.
+    //
+    fn add_pgcontrol_file(&mut self) -> anyhow::Result<()> {
+        let checkpoint_bytes = self
+            .timeline
+            .get_page_at_lsn(RelishTag::Checkpoint, 0, self.lsn)
+            .context("failed to get checkpoint bytes")?;
+        let pg_control_bytes = self
+            .timeline
+            .get_page_at_lsn(RelishTag::ControlFile, 0, self.lsn)
+            .context("failed get control bytes")?;
+        let mut pg_control = ControlFileData::decode(&pg_control_bytes)?;
+        let mut checkpoint = CheckPoint::decode(&checkpoint_bytes)?;
+
+        // Generate new pg_control needed for bootstrap
+        checkpoint.redo = normalize_lsn(self.lsn, pg_constants::WAL_SEGMENT_SIZE).0;
+
+        //reset some fields we don't want to preserve
+        //TODO Check this.
+        //We may need to determine the value from twophase data.
+        checkpoint.oldestActiveXid = 0;
+
+        //save new values in pg_control
+        pg_control.checkPoint = 0;
+        pg_control.checkPointCopy = checkpoint;
+        pg_control.state = pg_constants::DB_SHUTDOWNED;
+
+        // add zenith.signal file
+        let mut zenith_signal = String::new();
+        if self.prev_record_lsn == Lsn(0) {
+            if self.lsn == self.timeline.get_ancestor_lsn() {
+                write!(zenith_signal, "PREV LSN: none")?;
+            } else {
+                write!(zenith_signal, "PREV LSN: invalid")?;
+            }
+        } else {
+            write!(zenith_signal, "PREV LSN: {}", self.prev_record_lsn)?;
+        }
+        self.ar.append(
+            &new_tar_header("zenith.signal", zenith_signal.len() as u64)?,
+            zenith_signal.as_bytes(),
+        )?;
+
+        //send pg_control
+        let pg_control_bytes = pg_control.encode();
+        let header = new_tar_header("global/pg_control", pg_control_bytes.len() as u64)?;
+        self.ar.append(&header, &pg_control_bytes[..])?;
+
+        //send wal segment
+        let segno = self.lsn.segment_number(pg_constants::WAL_SEGMENT_SIZE);
+        let wal_file_name = XLogFileName(PG_TLI, segno, pg_constants::WAL_SEGMENT_SIZE);
+        let wal_file_path = format!("pg_wal/{}", wal_file_name);
+        let header = new_tar_header(&wal_file_path, pg_constants::WAL_SEGMENT_SIZE as u64)?;
+        let wal_seg = generate_wal_segment(segno, pg_control.system_identifier);
+        assert!(wal_seg.len() == pg_constants::WAL_SEGMENT_SIZE);
+        self.ar.append(&header, &wal_seg[..])?;
+        Ok(())
    }
 }

-fn is_rel_file_path(path: &str) -> bool {
-    return parse_rel_file_path(path).is_ok();
+//
+// Create new tarball entry header
+//
+fn new_tar_header(path: &str, size: u64) -> anyhow::Result<Header> {
+    let mut header = Header::new_gnu();
+    header.set_size(size);
+    header.set_path(path)?;
+    header.set_mode(0b110000000); // -rw-------
+    header.set_mtime(
+        // use currenttime as last modified time
+        SystemTime::now()
+            .duration_since(SystemTime::UNIX_EPOCH)
+            .unwrap()
+            .as_secs(),
+    );
+    header.set_cksum();
+    Ok(header)
+}
+
+fn new_tar_header_dir(path: &str) -> anyhow::Result<Header> {
+    let mut header = Header::new_gnu();
+    header.set_size(0);
+    header.set_path(path)?;
+    header.set_mode(0o755); // -rw-------
+    header.set_entry_type(EntryType::dir());
+    header.set_mtime(
+        // use currenttime as last modified time
+        SystemTime::now()
+            .duration_since(SystemTime::UNIX_EPOCH)
+            .unwrap()
+            .as_secs(),
+    );
+    header.set_cksum();
+    Ok(header)
 }
--- a/pageserver/src/bin/dump_layerfile.rs
+++ b/pageserver/src/bin/dump_layerfile.rs
@@ -0,0 +1,31 @@
+//! Main entry point for the dump_layerfile executable
+//!
+//! A handy tool for debugging, that's all.
+use anyhow::Result;
+use clap::{App, Arg};
+use pageserver::layered_repository::dump_layerfile_from_path;
+use pageserver::virtual_file;
+use std::path::PathBuf;
+use zenith_utils::GIT_VERSION;
+
+fn main() -> Result<()> {
+    let arg_matches = App::new("Zenith dump_layerfile utility")
+        .about("Dump contents of one layer file, for debugging")
+        .version(GIT_VERSION)
+        .arg(
+            Arg::new("path")
+                .help("Path to file to dump")
+                .required(true)
+                .index(1),
+        )
+        .get_matches();
+
+    let path = PathBuf::from(arg_matches.value_of("path").unwrap());
+
+    // Basic initialization of things that don't change after startup
+    virtual_file::init(10);
+
+    dump_layerfile_from_path(&path)?;
+
+    Ok(())
+}
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -1,224 +1,296 @@
-//
-// Main entry point for the Page Server executable
-//
+//! Main entry point for the Page Server executable.

-use log::*;
-use std::fs;
-use std::fs::{File, OpenOptions};
-use std::io;
-use std::path::PathBuf;
-use std::process::exit;
-use std::thread;
+use std::{env, path::Path, str::FromStr};
+use tracing::*;
+use zenith_utils::{auth::JwtAuth, logging, postgres_backend::AuthType, tcp_listener, GIT_VERSION};
+
+use anyhow::{bail, Context, Result};

-use anyhow::{Context, Result};
 use clap::{App, Arg};
 use daemonize::Daemonize;

-use slog::Drain;
-
-use pageserver::page_service;
-use pageserver::tui;
-//use pageserver::walreceiver;
-use pageserver::PageServerConf;
-
-fn zenith_repo_dir() -> String {
-    // Find repository path
-    match std::env::var_os("ZENITH_REPO_DIR") {
-        Some(val) => String::from(val.to_str().unwrap()),
-        None => ".zenith".into(),
-    }
-}
+use pageserver::{
+    branches,
+    config::{defaults::*, PageServerConf},
+    http, page_cache, page_service, remote_storage, tenant_mgr, thread_mgr,
+    thread_mgr::ThreadKind,
+    virtual_file, LOG_FILE_NAME,
+};
+use zenith_utils::http::endpoint;
+use zenith_utils::postgres_backend;
+use zenith_utils::shutdown::exit_now;
+use zenith_utils::signals::{self, Signal};

 fn main() -> Result<()> {
+    zenith_metrics::set_common_metrics_prefix("pageserver");
    let arg_matches = App::new("Zenith page server")
        .about("Materializes WAL stream to pages and serves them to the postgres")
+        .version(GIT_VERSION)
        .arg(
-            Arg::with_name("listen")
-                .short("l")
-                .long("listen")
-                .takes_value(true)
-                .help("listen for incoming page requests on ip:port (default: 127.0.0.1:5430)"),
-        )
-        .arg(
-            Arg::with_name("interactive")
-                .short("i")
-                .long("interactive")
-                .takes_value(false)
-                .help("Interactive mode"),
-        )
-        .arg(
-            Arg::with_name("daemonize")
-                .short("d")
+            Arg::new("daemonize")
+                .short('d')
                .long("daemonize")
                .takes_value(false)
                .help("Run in the background"),
        )
+        .arg(
+            Arg::new("init")
+                .long("init")
+                .takes_value(false)
+                .help("Initialize pageserver repo"),
+        )
+        .arg(
+            Arg::new("workdir")
+                .short('D')
+                .long("workdir")
+                .takes_value(true)
+                .help("Working directory for the pageserver"),
+        )
+        .arg(
+            Arg::new("create-tenant")
+                .long("create-tenant")
+                .takes_value(true)
+                .help("Create tenant during init")
+                .requires("init"),
+        )
+        // See `settings.md` for more details on the extra configuration patameters pageserver can process
+        .arg(
+            Arg::new("config-override")
+                .short('c')
+                .takes_value(true)
+                .number_of_values(1)
+                .multiple_occurrences(true)
+                .help("Additional configuration overrides of the ones from the toml config file (or new ones to add there).
+                Any option has to be a valid toml document, example: `-c=\"foo='hey'\"` `-c=\"foo={value=1}\"`"),
+        )
        .get_matches();

-    let mut conf = PageServerConf {
-        daemonize: false,
-        interactive: false,
-        listen_addr: "127.0.0.1:5430".parse().unwrap(),
+    let workdir = Path::new(arg_matches.value_of("workdir").unwrap_or(".zenith"));
+    let workdir = workdir
+        .canonicalize()
+        .with_context(|| format!("Error opening workdir '{}'", workdir.display()))?;
+    let cfg_file_path = workdir.join("pageserver.toml");
+
+    let init = arg_matches.is_present("init");
+    let create_tenant = arg_matches.value_of("create-tenant");
+
+    // Set CWD to workdir for non-daemon modes
+    env::set_current_dir(&workdir).with_context(|| {
+        format!(
+            "Failed to set application's current dir to '{}'",
+            workdir.display()
+        )
+    })?;
+
+    let daemonize = arg_matches.is_present("daemonize");
+    if init && daemonize {
+        bail!("--daemonize cannot be used with --init")
+    }
+
+    let mut toml = if init {
+        // We're initializing the repo, so there's no config file yet
+        DEFAULT_CONFIG_FILE
+            .parse::<toml_edit::Document>()
+            .expect("could not parse built-in config file")
+    } else {
+        // Supplement the CLI arguments with the config file
+        let cfg_file_contents = std::fs::read_to_string(&cfg_file_path)
+            .with_context(|| format!("No pageserver config at '{}'", cfg_file_path.display()))?;
+        cfg_file_contents
+            .parse::<toml_edit::Document>()
+            .with_context(|| {
+                format!(
+                    "Failed to read '{}' as pageserver config",
+                    cfg_file_path.display()
+                )
+            })?
    };

-    if arg_matches.is_present("daemonize") {
-        conf.daemonize = true;
-    }
+    // Process any extra options given with -c
+    if let Some(values) = arg_matches.values_of("config-override") {
+        for option_line in values {
+            let doc = toml_edit::Document::from_str(option_line).with_context(|| {
+                format!(
+                    "Option '{}' could not be parsed as a toml document",
+                    option_line
+                )
+            })?;

-    if arg_matches.is_present("interactive") {
-        conf.interactive = true;
+            for (key, item) in doc.iter() {
+                if key == "id" {
+                    anyhow::ensure!(
+                        init,
+                        "node id can only be set during pageserver init and cannot be overridden"
+                    );
+                }
+                toml.insert(key, item.clone());
+            }
+        }
    }
+    trace!("Resulting toml: {}", toml);
+    let conf = PageServerConf::parse_and_validate(&toml, &workdir)
+        .context("Failed to parse pageserver configuration")?;

-    if conf.daemonize && conf.interactive {
-        eprintln!("--daemonize is not allowed with --interactive: choose one");
-        exit(1);
+    // The configuration is all set up now. Turn it into a 'static
+    // that can be freely stored in structs and passed across threads
+    // as a ref.
+    let conf: &'static PageServerConf = Box::leak(Box::new(conf));
+
+    // Basic initialization of things that don't change after startup
+    virtual_file::init(conf.max_file_descriptors);
+
+    page_cache::init(conf);
+
+    // Create repo and exit if init was requested
+    if init {
+        branches::init_pageserver(conf, create_tenant).context("Failed to init pageserver")?;
+        // write the config file
+        std::fs::write(&cfg_file_path, toml.to_string()).with_context(|| {
+            format!(
+                "Failed to initialize pageserver config at '{}'",
+                cfg_file_path.display()
+            )
+        })?;
+        Ok(())
+    } else {
+        start_pageserver(conf, daemonize).context("Failed to start pageserver")
    }
-
-    if let Some(addr) = arg_matches.value_of("listen") {
-        conf.listen_addr = addr.parse()?;
-    }
-
-    start_pageserver(&conf)
 }

-fn start_pageserver(conf: &PageServerConf) -> Result<()> {
+fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()> {
    // Initialize logger
-    let _scope_guard = init_logging(&conf)?;
-    let _log_guard = slog_stdlog::init()?;
+    let log_file = logging::init(LOG_FILE_NAME, daemonize)?;

-    // Note: this `info!(...)` macro comes from `log` crate
-    info!("standard logging redirected to slog");
-
-    let tui_thread: Option<thread::JoinHandle<()>>;
-    if conf.interactive {
-        // Initialize the UI
-        tui_thread = Some(
-            thread::Builder::new()
-                .name("UI thread".into())
-                .spawn(|| {
-                    let _ = tui::ui_main();
-                })
-                .unwrap(),
-        );
-        //threads.push(tui_thread);
-    } else {
-        tui_thread = None;
-    }
-
-    if conf.daemonize {
-        info!("daemonizing...");
-
-        let repodir = PathBuf::from(zenith_repo_dir());
-
-        // There should'n be any logging to stdin/stdout. Redirect it to the main log so
-        // that we will see any accidental manual fprintf's or backtraces.
-        let log_filename = repodir.join("pageserver.log");
-        let stdout = OpenOptions::new()
-            .create(true)
-            .append(true)
-            .open(&log_filename)
-            .with_context(|| format!("failed to open {:?}", &log_filename))?;
-        let stderr = OpenOptions::new()
-            .create(true)
-            .append(true)
-            .open(&log_filename)
-            .with_context(|| format!("failed to open {:?}", &log_filename))?;
-
-        let daemonize = Daemonize::new()
-            .pid_file(repodir.clone().join("pageserver.pid"))
-            .working_directory(repodir)
-            .stdout(stdout)
-            .stderr(stderr);
-
-        match daemonize.start() {
-            Ok(_) => info!("Success, daemonized"),
-            Err(e) => error!("Error, {}", e),
-        }
-    } else {
-        // change into the repository directory. In daemon mode, Daemonize
-        // does this for us.
-        let repodir = zenith_repo_dir();
-        std::env::set_current_dir(&repodir)?;
-        info!("Changed current directory to repository in {}", &repodir);
-    }
-
-    let mut threads = Vec::new();
+    info!("version: {}", GIT_VERSION);

    // TODO: Check that it looks like a valid repository before going further

-    // Create directory for wal-redo datadirs
-    match fs::create_dir("wal-redo") {
-        Ok(_) => {}
-        Err(e) => match e.kind() {
-            io::ErrorKind::AlreadyExists => {}
-            _ => {
-                anyhow::bail!("Failed to create wal-redo data directory: {}", e);
-            }
-        },
-    }
+    // bind sockets before daemonizing so we report errors early and do not return until we are listening
+    info!(
+        "Starting pageserver http handler on {}",
+        conf.listen_http_addr
+    );
+    let http_listener = tcp_listener::bind(conf.listen_http_addr.clone())?;

-    // GetPage@LSN requests are served by another thread. (It uses async I/O,
-    // but the code in page_service sets up it own thread pool for that)
-    let conf_copy = conf.clone();
-    let page_server_thread = thread::Builder::new()
-        .name("Page Service thread".into())
-        .spawn(move || {
-            // thread code
-            page_service::thread_main(&conf_copy);
-        })
-        .unwrap();
-    threads.push(page_server_thread);
+    info!(
+        "Starting pageserver pg protocol handler on {}",
+        conf.listen_pg_addr
+    );
+    let pageserver_listener = tcp_listener::bind(conf.listen_pg_addr.clone())?;

-    if tui_thread.is_some() {
-        // The TUI thread exits when the user asks to Quit.
-        tui_thread.unwrap().join().unwrap();
-    } else {
-        // In non-interactive mode, wait forever.
-        for t in threads {
-            t.join().unwrap()
+    // NB: Don't spawn any threads before daemonizing!
+    if daemonize {
+        info!("daemonizing...");
+
+        // There shouldn't be any logging to stdin/stdout. Redirect it to the main log so
+        // that we will see any accidental manual fprintf's or backtraces.
+        let stdout = log_file.try_clone().unwrap();
+        let stderr = log_file;
+
+        let daemonize = Daemonize::new()
+            .pid_file("pageserver.pid")
+            .working_directory(".")
+            .stdout(stdout)
+            .stderr(stderr);
+
+        // XXX: The parent process should exit abruptly right after
+        // it has spawned a child to prevent coverage machinery from
+        // dumping stats into a `profraw` file now owned by the child.
+        // Otherwise, the coverage data will be damaged.
+        match daemonize.exit_action(|| exit_now(0)).start() {
+            Ok(_) => info!("Success, daemonized"),
+            Err(err) => error!(%err, "could not daemonize"),
        }
    }
-    Ok(())
+
+    let signals = signals::install_shutdown_handlers()?;
+    let sync_startup = remote_storage::start_local_timeline_sync(conf)
+        .context("Failed to set up local files sync with external storage")?;
+
+    // Initialize tenant manager.
+    tenant_mgr::set_timeline_states(conf, sync_startup.initial_timeline_states);
+
+    // initialize authentication for incoming connections
+    let auth = match &conf.auth_type {
+        AuthType::Trust | AuthType::MD5 => None,
+        AuthType::ZenithJWT => {
+            // unwrap is ok because check is performed when creating config, so path is set and file exists
+            let key_path = conf.auth_validation_public_key_path.as_ref().unwrap();
+            Some(JwtAuth::from_key_path(key_path)?.into())
+        }
+    };
+    info!("Using auth: {:#?}", conf.auth_type);
+
+    // Spawn a new thread for the http endpoint
+    // bind before launching separate thread so the error reported before startup exits
+    let auth_cloned = auth.clone();
+    thread_mgr::spawn(
+        ThreadKind::HttpEndpointListener,
+        None,
+        None,
+        "http_endpoint_thread",
+        move || {
+            let router = http::make_router(conf, auth_cloned);
+            endpoint::serve_thread_main(router, http_listener, thread_mgr::shutdown_watcher())
+        },
+    )?;
+
+    // Spawn a thread to listen for libpq connections. It will spawn further threads
+    // for each connection.
+    thread_mgr::spawn(
+        ThreadKind::LibpqEndpointListener,
+        None,
+        None,
+        "libpq endpoint thread",
+        move || page_service::thread_main(conf, auth, pageserver_listener, conf.auth_type),
+    )?;
+
+    signals.handle(|signal| match signal {
+        Signal::Quit => {
+            info!(
+                "Got {}. Terminating in immediate shutdown mode",
+                signal.name()
+            );
+            std::process::exit(111);
+        }
+
+        Signal::Interrupt | Signal::Terminate => {
+            info!(
+                "Got {}. Terminating gracefully in fast shutdown mode",
+                signal.name()
+            );
+            shutdown_pageserver();
+            unreachable!()
+        }
+    })
 }

-fn init_logging(conf: &PageServerConf) -> Result<slog_scope::GlobalLoggerGuard, io::Error> {
-    if conf.interactive {
-        Ok(tui::init_logging())
-    } else if conf.daemonize {
-        let log = zenith_repo_dir() + "/pageserver.log";
-        let log_file = File::create(&log).map_err(|err| {
-            // We failed to initialize logging, so we can't log this message with error!
-            eprintln!("Could not create log file {:?}: {}", log, err);
-            err
-        })?;
-        let decorator = slog_term::PlainSyncDecorator::new(log_file);
-        let drain = slog_term::CompactFormat::new(decorator).build();
-        let drain = slog::Filter::new(drain, |record: &slog::Record| {
-            if record.level().is_at_least(slog::Level::Debug) {
-                return true;
-            }
-            return false;
-        });
-        let drain = std::sync::Mutex::new(drain).fuse();
-        let logger = slog::Logger::root(drain, slog::o!());
-        Ok(slog_scope::set_global_logger(logger))
-    } else {
-        let decorator = slog_term::TermDecorator::new().build();
-        let drain = slog_term::FullFormat::new(decorator).build().fuse();
-        let drain = slog_async::Async::new(drain).chan_size(1000).build().fuse();
-        let drain = slog::Filter::new(drain, |record: &slog::Record| {
-            if record.level().is_at_least(slog::Level::Info) {
-                return true;
-            }
-            if record.level().is_at_least(slog::Level::Debug)
-                && record.module().starts_with("pageserver")
-            {
-                return true;
-            }
-            return false;
-        })
-        .fuse();
-        let logger = slog::Logger::root(drain, slog::o!());
-        Ok(slog_scope::set_global_logger(logger))
-    }
+fn shutdown_pageserver() {
+    // Shut down the libpq endpoint thread. This prevents new connections from
+    // being accepted.
+    thread_mgr::shutdown_threads(Some(ThreadKind::LibpqEndpointListener), None, None);
+
+    // Shut down any page service threads.
+    postgres_backend::set_pgbackend_shutdown_requested();
+    thread_mgr::shutdown_threads(Some(ThreadKind::PageRequestHandler), None, None);
+
+    // Shut down all the tenants. This flushes everything to disk and kills
+    // the checkpoint and GC threads.
+    tenant_mgr::shutdown_all_tenants();
+
+    // Stop syncing with remote storage.
+    //
+    // FIXME: Does this wait for the sync thread to finish syncing what's queued up?
+    // Should it?
+    thread_mgr::shutdown_threads(Some(ThreadKind::StorageSync), None, None);
+
+    // Shut down the HTTP endpoint last, so that you can still check the server's
+    // status while it's shutting down.
+    thread_mgr::shutdown_threads(Some(ThreadKind::HttpEndpointListener), None, None);
+
+    // There should be nothing left, but let's be sure
+    thread_mgr::shutdown_threads(None, None, None);
+
+    info!("Shut down successfully completed");
+    std::process::exit(0);
 }
--- a/pageserver/src/bin/pageserver_zst.rs
+++ b/pageserver/src/bin/pageserver_zst.rs
@@ -0,0 +1,334 @@
+//! A CLI helper to deal with remote storage (S3, usually) blobs as archives.
+//! See [`compression`] for more details about the archives.
+
+use std::{collections::BTreeSet, path::Path};
+
+use anyhow::{bail, ensure, Context};
+use clap::{App, Arg};
+use pageserver::{
+    layered_repository::metadata::{TimelineMetadata, METADATA_FILE_NAME},
+    remote_storage::compression,
+};
+use tokio::{fs, io};
+use zenith_utils::GIT_VERSION;
+
+const LIST_SUBCOMMAND: &str = "list";
+const ARCHIVE_ARG_NAME: &str = "archive";
+
+const EXTRACT_SUBCOMMAND: &str = "extract";
+const TARGET_DIRECTORY_ARG_NAME: &str = "target_directory";
+
+const CREATE_SUBCOMMAND: &str = "create";
+const SOURCE_DIRECTORY_ARG_NAME: &str = "source_directory";
+
+#[tokio::main(flavor = "current_thread")]
+async fn main() -> anyhow::Result<()> {
+    let arg_matches = App::new("pageserver zst blob [un]compressor utility")
+        .version(GIT_VERSION)
+        .subcommands(vec![
+            App::new(LIST_SUBCOMMAND)
+                .about("List the archive contents")
+                .arg(
+                    Arg::new(ARCHIVE_ARG_NAME)
+                        .required(true)
+                        .takes_value(true)
+                        .help("An archive to list the contents of"),
+                ),
+            App::new(EXTRACT_SUBCOMMAND)
+                .about("Extracts the archive into the directory")
+                .arg(
+                    Arg::new(ARCHIVE_ARG_NAME)
+                        .required(true)
+                        .takes_value(true)
+                        .help("An archive to extract"),
+                )
+                .arg(
+                    Arg::new(TARGET_DIRECTORY_ARG_NAME)
+                        .required(false)
+                        .takes_value(true)
+                        .help("A directory to extract the archive into. Optional, will use the current directory if not specified"),
+                ),
+            App::new(CREATE_SUBCOMMAND)
+                .about("Creates an archive with the contents of a directory (only the first level files are taken, metadata file has to be present in the same directory)")
+                .arg(
+                    Arg::new(SOURCE_DIRECTORY_ARG_NAME)
+                        .required(true)
+                        .takes_value(true)
+                        .help("A directory to use for creating the archive"),
+                )
+                .arg(
+                    Arg::new(TARGET_DIRECTORY_ARG_NAME)
+                        .required(false)
+                        .takes_value(true)
+                        .help("A directory to create the archive in. Optional, will use the current directory if not specified"),
+                ),
+        ])
+        .get_matches();
+
+    let subcommand_name = match arg_matches.subcommand_name() {
+        Some(name) => name,
+        None => bail!("No subcommand specified"),
+    };
+
+    let subcommand_matches = match arg_matches.subcommand_matches(subcommand_name) {
+        Some(matches) => matches,
+        None => bail!(
+            "No subcommand arguments were recognized for subcommand '{}'",
+            subcommand_name
+        ),
+    };
+
+    let target_dir = Path::new(
+        subcommand_matches
+            .value_of(TARGET_DIRECTORY_ARG_NAME)
+            .unwrap_or("./"),
+    );
+
+    match subcommand_name {
+        LIST_SUBCOMMAND => {
+            let archive = match subcommand_matches.value_of(ARCHIVE_ARG_NAME) {
+                Some(archive) => Path::new(archive),
+                None => bail!("No '{}' argument is specified", ARCHIVE_ARG_NAME),
+            };
+            list_archive(archive).await
+        }
+        EXTRACT_SUBCOMMAND => {
+            let archive = match subcommand_matches.value_of(ARCHIVE_ARG_NAME) {
+                Some(archive) => Path::new(archive),
+                None => bail!("No '{}' argument is specified", ARCHIVE_ARG_NAME),
+            };
+            extract_archive(archive, target_dir).await
+        }
+        CREATE_SUBCOMMAND => {
+            let source_dir = match subcommand_matches.value_of(SOURCE_DIRECTORY_ARG_NAME) {
+                Some(source) => Path::new(source),
+                None => bail!("No '{}' argument is specified", SOURCE_DIRECTORY_ARG_NAME),
+            };
+            create_archive(source_dir, target_dir).await
+        }
+        unknown => bail!("Unknown subcommand {}", unknown),
+    }
+}
+
+async fn list_archive(archive: &Path) -> anyhow::Result<()> {
+    let archive = archive.canonicalize().with_context(|| {
+        format!(
+            "Failed to get the absolute path for the archive path '{}'",
+            archive.display()
+        )
+    })?;
+    ensure!(
+        archive.is_file(),
+        "Path '{}' is not an archive file",
+        archive.display()
+    );
+    println!("Listing an archive at path '{}'", archive.display());
+    let archive_name = match archive.file_name().and_then(|name| name.to_str()) {
+        Some(name) => name,
+        None => bail!(
+            "Failed to get the archive name from the path '{}'",
+            archive.display()
+        ),
+    };
+
+    let archive_bytes = fs::read(&archive)
+        .await
+        .context("Failed to read the archive bytes")?;
+
+    let header = compression::read_archive_header(archive_name, &mut archive_bytes.as_slice())
+        .await
+        .context("Failed to read the archive header")?;
+
+    let empty_path = Path::new("");
+    println!("-------------------------------");
+
+    let longest_path_in_archive = header
+        .files
+        .iter()
+        .filter_map(|file| Some(file.subpath.as_path(empty_path).to_str()?.len()))
+        .max()
+        .unwrap_or_default()
+        .max(METADATA_FILE_NAME.len());
+
+    for regular_file in &header.files {
+        println!(
+            "File: {:width$} uncompressed size: {} bytes",
+            regular_file.subpath.as_path(empty_path).display(),
+            regular_file.size,
+            width = longest_path_in_archive,
+        )
+    }
+    println!(
+        "File: {:width$} uncompressed size: {} bytes",
+        METADATA_FILE_NAME,
+        header.metadata_file_size,
+        width = longest_path_in_archive,
+    );
+    println!("-------------------------------");
+
+    Ok(())
+}
+
+async fn extract_archive(archive: &Path, target_dir: &Path) -> anyhow::Result<()> {
+    let archive = archive.canonicalize().with_context(|| {
+        format!(
+            "Failed to get the absolute path for the archive path '{}'",
+            archive.display()
+        )
+    })?;
+    ensure!(
+        archive.is_file(),
+        "Path '{}' is not an archive file",
+        archive.display()
+    );
+    let archive_name = match archive.file_name().and_then(|name| name.to_str()) {
+        Some(name) => name,
+        None => bail!(
+            "Failed to get the archive name from the path '{}'",
+            archive.display()
+        ),
+    };
+
+    if !target_dir.exists() {
+        fs::create_dir_all(target_dir).await.with_context(|| {
+            format!(
+                "Failed to create the target dir at path '{}'",
+                target_dir.display()
+            )
+        })?;
+    }
+    let target_dir = target_dir.canonicalize().with_context(|| {
+        format!(
+            "Failed to get the absolute path for the target dir path '{}'",
+            target_dir.display()
+        )
+    })?;
+    ensure!(
+        target_dir.is_dir(),
+        "Path '{}' is not a directory",
+        target_dir.display()
+    );
+    let mut dir_contents = fs::read_dir(&target_dir)
+        .await
+        .context("Failed to list the target directory contents")?;
+    let dir_entry = dir_contents
+        .next_entry()
+        .await
+        .context("Failed to list the target directory contents")?;
+    ensure!(
+        dir_entry.is_none(),
+        "Target directory '{}' is not empty",
+        target_dir.display()
+    );
+
+    println!(
+        "Extracting an archive at path '{}' into directory '{}'",
+        archive.display(),
+        target_dir.display()
+    );
+
+    let mut archive_file = fs::File::open(&archive).await.with_context(|| {
+        format!(
+            "Failed to get the archive name from the path '{}'",
+            archive.display()
+        )
+    })?;
+    let header = compression::read_archive_header(archive_name, &mut archive_file)
+        .await
+        .context("Failed to read the archive header")?;
+    compression::uncompress_with_header(&BTreeSet::new(), &target_dir, header, &mut archive_file)
+        .await
+        .context("Failed to extract the archive")
+}
+
+async fn create_archive(source_dir: &Path, target_dir: &Path) -> anyhow::Result<()> {
+    let source_dir = source_dir.canonicalize().with_context(|| {
+        format!(
+            "Failed to get the absolute path for the source dir path '{}'",
+            source_dir.display()
+        )
+    })?;
+    ensure!(
+        source_dir.is_dir(),
+        "Path '{}' is not a directory",
+        source_dir.display()
+    );
+
+    if !target_dir.exists() {
+        fs::create_dir_all(target_dir).await.with_context(|| {
+            format!(
+                "Failed to create the target dir at path '{}'",
+                target_dir.display()
+            )
+        })?;
+    }
+    let target_dir = target_dir.canonicalize().with_context(|| {
+        format!(
+            "Failed to get the absolute path for the target dir path '{}'",
+            target_dir.display()
+        )
+    })?;
+    ensure!(
+        target_dir.is_dir(),
+        "Path '{}' is not a directory",
+        target_dir.display()
+    );
+
+    println!(
+        "Compressing directory '{}' and creating resulting archive in directory '{}'",
+        source_dir.display(),
+        target_dir.display()
+    );
+
+    let mut metadata_file_contents = None;
+    let mut files_co_archive = Vec::new();
+
+    let mut source_dir_contents = fs::read_dir(&source_dir)
+        .await
+        .context("Failed to read the source directory contents")?;
+
+    while let Some(source_dir_entry) = source_dir_contents
+        .next_entry()
+        .await
+        .context("Failed to read a source dir entry")?
+    {
+        let entry_path = source_dir_entry.path();
+        if entry_path.is_file() {
+            if entry_path.file_name().and_then(|name| name.to_str()) == Some(METADATA_FILE_NAME) {
+                let metadata_bytes = fs::read(entry_path)
+                    .await
+                    .context("Failed to read metata file bytes in the source dir")?;
+                metadata_file_contents = Some(
+                    TimelineMetadata::from_bytes(&metadata_bytes)
+                        .context("Failed to parse metata file contents in the source dir")?,
+                );
+            } else {
+                files_co_archive.push(entry_path);
+            }
+        }
+    }
+
+    let metadata = match metadata_file_contents {
+        Some(metadata) => metadata,
+        None => bail!(
+            "No metadata file found in the source dir '{}', cannot create the archive",
+            source_dir.display()
+        ),
+    };
+
+    let _ = compression::archive_files_as_stream(
+        &source_dir,
+        files_co_archive.iter(),
+        &metadata,
+        move |mut archive_streamer, archive_name| async move {
+            let archive_target = target_dir.join(&archive_name);
+            let mut archive_file = fs::File::create(&archive_target).await?;
+            io::copy(&mut archive_streamer, &mut archive_file).await?;
+            Ok(archive_target)
+        },
+    )
+    .await
+    .context("Failed to create an archive")?;
+
+    Ok(())
+}
--- a/pageserver/src/bin/update_metadata.rs
+++ b/pageserver/src/bin/update_metadata.rs
@@ -0,0 +1,72 @@
+//! Main entry point for the edit_metadata executable
+//!
+//! A handy tool for debugging, that's all.
+use anyhow::Result;
+use clap::{App, Arg};
+use pageserver::layered_repository::metadata::TimelineMetadata;
+use std::path::PathBuf;
+use std::str::FromStr;
+use zenith_utils::lsn::Lsn;
+use zenith_utils::GIT_VERSION;
+
+fn main() -> Result<()> {
+    let arg_matches = App::new("Zenith update metadata utility")
+        .about("Dump or update metadata file")
+        .version(GIT_VERSION)
+        .arg(
+            Arg::new("path")
+                .help("Path to metadata file")
+                .required(true),
+        )
+        .arg(
+            Arg::new("disk_lsn")
+                .short('d')
+                .long("disk_lsn")
+                .takes_value(true)
+                .help("Replace disk constistent lsn"),
+        )
+        .arg(
+            Arg::new("prev_lsn")
+                .short('p')
+                .long("prev_lsn")
+                .takes_value(true)
+                .help("Previous record LSN"),
+        )
+        .get_matches();
+
+    let path = PathBuf::from(arg_matches.value_of("path").unwrap());
+    let metadata_bytes = std::fs::read(&path)?;
+    let mut meta = TimelineMetadata::from_bytes(&metadata_bytes)?;
+    println!("Current metadata:\n{:?}", &meta);
+
+    let mut update_meta = false;
+
+    if let Some(disk_lsn) = arg_matches.value_of("disk_lsn") {
+        meta = TimelineMetadata::new(
+            Lsn::from_str(disk_lsn)?,
+            meta.prev_record_lsn(),
+            meta.ancestor_timeline(),
+            meta.ancestor_lsn(),
+            meta.latest_gc_cutoff_lsn(),
+            meta.initdb_lsn(),
+        );
+        update_meta = true;
+    }
+
+    if let Some(prev_lsn) = arg_matches.value_of("prev_lsn") {
+        meta = TimelineMetadata::new(
+            meta.disk_consistent_lsn(),
+            Some(Lsn::from_str(prev_lsn)?),
+            meta.ancestor_timeline(),
+            meta.ancestor_lsn(),
+            meta.latest_gc_cutoff_lsn(),
+            meta.initdb_lsn(),
+        );
+        update_meta = true;
+    }
+    if update_meta {
+        let metadata_bytes = meta.to_bytes()?;
+        std::fs::write(&path, &metadata_bytes)?;
+    }
+    Ok(())
+}
--- a/pageserver/src/branches.rs
+++ b/pageserver/src/branches.rs
@@ -0,0 +1,428 @@
+//!
+//! Branch management code
+//!
+// TODO: move all paths construction to conf impl
+//
+
+use anyhow::{bail, Context, Result};
+use postgres_ffi::ControlFileData;
+use serde::{Deserialize, Serialize};
+use std::{
+    fs,
+    path::Path,
+    process::{Command, Stdio},
+    str::FromStr,
+    sync::Arc,
+};
+use tracing::*;
+
+use zenith_utils::crashsafe_dir;
+use zenith_utils::logging;
+use zenith_utils::lsn::Lsn;
+use zenith_utils::zid::{ZTenantId, ZTimelineId};
+
+use crate::walredo::WalRedoManager;
+use crate::CheckpointConfig;
+use crate::{config::PageServerConf, repository::Repository};
+use crate::{import_datadir, LOG_FILE_NAME};
+use crate::{repository::RepositoryTimeline, tenant_mgr};
+
+#[derive(Serialize, Deserialize, Clone)]
+pub struct BranchInfo {
+    pub name: String,
+    #[serde(with = "hex")]
+    pub timeline_id: ZTimelineId,
+    pub latest_valid_lsn: Lsn,
+    pub ancestor_id: Option<String>,
+    pub ancestor_lsn: Option<String>,
+    pub current_logical_size: usize,
+    pub current_logical_size_non_incremental: Option<usize>,
+}
+
+impl BranchInfo {
+    pub fn from_path<T: AsRef<Path>>(
+        path: T,
+        repo: &Arc<dyn Repository>,
+        include_non_incremental_logical_size: bool,
+    ) -> Result<Self> {
+        let path = path.as_ref();
+        let name = path.file_name().unwrap().to_string_lossy().to_string();
+        let timeline_id = std::fs::read_to_string(path)
+            .with_context(|| {
+                format!(
+                    "Failed to read branch file contents at path '{}'",
+                    path.display()
+                )
+            })?
+            .parse::<ZTimelineId>()?;
+
+        let timeline = match repo.get_timeline(timeline_id)? {
+            RepositoryTimeline::Local(local_entry) => local_entry,
+            RepositoryTimeline::Remote { .. } => {
+                bail!("Timeline {} is remote, no branches to display", timeline_id)
+            }
+        };
+
+        // we use ancestor lsn zero if we don't have an ancestor, so turn this into an option based on timeline id
+        let (ancestor_id, ancestor_lsn) = match timeline.get_ancestor_timeline_id() {
+            Some(ancestor_id) => (
+                Some(ancestor_id.to_string()),
+                Some(timeline.get_ancestor_lsn().to_string()),
+            ),
+            None => (None, None),
+        };
+
+        // non incremental size calculation can be heavy, so let it be optional
+        // needed for tests to check size calculation
+        let current_logical_size_non_incremental = include_non_incremental_logical_size
+            .then(|| {
+                timeline.get_current_logical_size_non_incremental(timeline.get_last_record_lsn())
+            })
+            .transpose()?;
+
+        Ok(BranchInfo {
+            name,
+            timeline_id,
+            latest_valid_lsn: timeline.get_last_record_lsn(),
+            ancestor_id,
+            ancestor_lsn,
+            current_logical_size: timeline.get_current_logical_size(),
+            current_logical_size_non_incremental,
+        })
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct PointInTime {
+    pub timelineid: ZTimelineId,
+    pub lsn: Lsn,
+}
+
+pub fn init_pageserver(conf: &'static PageServerConf, create_tenant: Option<&str>) -> Result<()> {
+    // Initialize logger
+    // use true as daemonize parameter because otherwise we pollute zenith cli output with a few pages long output of info messages
+    let _log_file = logging::init(LOG_FILE_NAME, true)?;
+
+    // We don't use the real WAL redo manager, because we don't want to spawn the WAL redo
+    // process during repository initialization.
+    //
+    // FIXME: That caused trouble, because the WAL redo manager spawned a thread that launched
+    // initdb in the background, and it kept running even after the "zenith init" had exited.
+    // In tests, we started the  page server immediately after that, so that initdb was still
+    // running in the background, and we failed to run initdb again in the same directory. This
+    // has been solved for the rapid init+start case now, but the general race condition remains
+    // if you restart the server quickly. The WAL redo manager doesn't use a separate thread
+    // anymore, but I think that could still happen.
+    let dummy_redo_mgr = Arc::new(crate::walredo::DummyRedoManager {});
+
+    if let Some(tenantid) = create_tenant {
+        let tenantid = ZTenantId::from_str(tenantid)?;
+        println!("initializing tenantid {}", tenantid);
+        create_repo(conf, tenantid, dummy_redo_mgr).context("failed to create repo")?;
+    }
+    crashsafe_dir::create_dir_all(conf.tenants_path())?;
+
+    println!("pageserver init succeeded");
+    Ok(())
+}
+
+pub fn create_repo(
+    conf: &'static PageServerConf,
+    tenantid: ZTenantId,
+    wal_redo_manager: Arc<dyn WalRedoManager + Send + Sync>,
+) -> Result<Arc<dyn Repository>> {
+    let repo_dir = conf.tenant_path(&tenantid);
+    if repo_dir.exists() {
+        bail!("repo for {} already exists", tenantid)
+    }
+
+    // top-level dir may exist if we are creating it through CLI
+    crashsafe_dir::create_dir_all(&repo_dir)
+        .with_context(|| format!("could not create directory {}", repo_dir.display()))?;
+
+    crashsafe_dir::create_dir(conf.timelines_path(&tenantid))?;
+    crashsafe_dir::create_dir_all(conf.branches_path(&tenantid))?;
+    crashsafe_dir::create_dir_all(conf.tags_path(&tenantid))?;
+
+    info!("created directory structure in {}", repo_dir.display());
+
+    // create a new timeline directory
+    let timeline_id = ZTimelineId::generate();
+    let timelinedir = conf.timeline_path(&timeline_id, &tenantid);
+
+    crashsafe_dir::create_dir(&timelinedir)?;
+
+    let repo = Arc::new(crate::layered_repository::LayeredRepository::new(
+        conf,
+        wal_redo_manager,
+        tenantid,
+        conf.remote_storage_config.is_some(),
+    ));
+
+    // Load data into pageserver
+    // TODO To implement zenith import we need to
+    //      move data loading out of create_repo()
+    bootstrap_timeline(conf, tenantid, timeline_id, repo.as_ref())?;
+
+    Ok(repo)
+}
+
+// Returns checkpoint LSN from controlfile
+fn get_lsn_from_controlfile(path: &Path) -> Result<Lsn> {
+    // Read control file to extract the LSN
+    let controlfile_path = path.join("global").join("pg_control");
+    let controlfile = ControlFileData::decode(&fs::read(controlfile_path)?)?;
+    let lsn = controlfile.checkPoint;
+
+    Ok(Lsn(lsn))
+}
+
+// Create the cluster temporarily in 'initdbpath' directory inside the repository
+// to get bootstrap data for timeline initialization.
+//
+fn run_initdb(conf: &'static PageServerConf, initdbpath: &Path) -> Result<()> {
+    info!("running initdb in {}... ", initdbpath.display());
+
+    let initdb_path = conf.pg_bin_dir().join("initdb");
+    let initdb_output = Command::new(initdb_path)
+        .args(&["-D", initdbpath.to_str().unwrap()])
+        .args(&["-U", &conf.superuser])
+        .args(&["-E", "utf8"])
+        .arg("--no-instructions")
+        // This is only used for a temporary installation that is deleted shortly after,
+        // so no need to fsync it
+        .arg("--no-sync")
+        .env_clear()
+        .env("LD_LIBRARY_PATH", conf.pg_lib_dir().to_str().unwrap())
+        .env("DYLD_LIBRARY_PATH", conf.pg_lib_dir().to_str().unwrap())
+        .stdout(Stdio::null())
+        .output()
+        .context("failed to execute initdb")?;
+    if !initdb_output.status.success() {
+        anyhow::bail!(
+            "initdb failed: '{}'",
+            String::from_utf8_lossy(&initdb_output.stderr)
+        );
+    }
+
+    Ok(())
+}
+
+//
+// - run initdb to init temporary instance and get bootstrap data
+// - after initialization complete, remove the temp dir.
+//
+fn bootstrap_timeline(
+    conf: &'static PageServerConf,
+    tenantid: ZTenantId,
+    tli: ZTimelineId,
+    repo: &dyn Repository,
+) -> Result<()> {
+    let _enter = info_span!("bootstrapping", timeline = %tli, tenant = %tenantid).entered();
+
+    let initdb_path = conf.tenant_path(&tenantid).join("tmp");
+
+    // Init temporarily repo to get bootstrap data
+    run_initdb(conf, &initdb_path)?;
+    let pgdata_path = initdb_path;
+
+    let lsn = get_lsn_from_controlfile(&pgdata_path)?.align();
+
+    // Import the contents of the data directory at the initial checkpoint
+    // LSN, and any WAL after that.
+    // Initdb lsn will be equal to last_record_lsn which will be set after import.
+    // Because we know it upfront avoid having an option or dummy zero value by passing it to create_empty_timeline.
+    let timeline = repo.create_empty_timeline(tli, lsn)?;
+    import_datadir::import_timeline_from_postgres_datadir(
+        &pgdata_path,
+        timeline.writer().as_ref(),
+        lsn,
+    )?;
+    timeline.checkpoint(CheckpointConfig::Forced)?;
+
+    println!(
+        "created initial timeline {} timeline.lsn {}",
+        tli,
+        timeline.get_last_record_lsn()
+    );
+
+    let data = tli.to_string();
+    fs::write(conf.branch_path("main", &tenantid), data)?;
+    println!("created main branch");
+
+    // Remove temp dir. We don't need it anymore
+    fs::remove_dir_all(pgdata_path)?;
+
+    Ok(())
+}
+
+pub(crate) fn get_branches(
+    conf: &PageServerConf,
+    tenantid: &ZTenantId,
+    include_non_incremental_logical_size: bool,
+) -> Result<Vec<BranchInfo>> {
+    let repo = tenant_mgr::get_repository_for_tenant(*tenantid)?;
+
+    // Each branch has a corresponding record (text file) in the refs/branches
+    // with timeline_id.
+    let branches_dir = conf.branches_path(tenantid);
+
+    std::fs::read_dir(&branches_dir)
+        .with_context(|| {
+            format!(
+                "Found no branches directory '{}' for tenant {}",
+                branches_dir.display(),
+                tenantid
+            )
+        })?
+        .map(|dir_entry_res| {
+            let dir_entry = dir_entry_res.with_context(|| {
+                format!(
+                    "Failed to list branches directory '{}' content for tenant {}",
+                    branches_dir.display(),
+                    tenantid
+                )
+            })?;
+            BranchInfo::from_path(
+                dir_entry.path(),
+                &repo,
+                include_non_incremental_logical_size,
+            )
+        })
+        .collect()
+}
+
+pub(crate) fn create_branch(
+    conf: &PageServerConf,
+    branchname: &str,
+    startpoint_str: &str,
+    tenantid: &ZTenantId,
+) -> Result<BranchInfo> {
+    let repo = tenant_mgr::get_repository_for_tenant(*tenantid)?;
+
+    if conf.branch_path(branchname, tenantid).exists() {
+        anyhow::bail!("branch {} already exists", branchname);
+    }
+
+    let mut startpoint = parse_point_in_time(conf, startpoint_str, tenantid)?;
+    let timeline = repo
+        .get_timeline(startpoint.timelineid)?
+        .local_timeline()
+        .context("Cannot branch off the timeline that's not present locally")?;
+    if startpoint.lsn == Lsn(0) {
+        // Find end of WAL on the old timeline
+        let end_of_wal = timeline.get_last_record_lsn();
+        info!("branching at end of WAL: {}", end_of_wal);
+        startpoint.lsn = end_of_wal;
+    } else {
+        // Wait for the WAL to arrive and be processed on the parent branch up
+        // to the requested branch point. The repository code itself doesn't
+        // require it, but if we start to receive WAL on the new timeline,
+        // decoding the new WAL might need to look up previous pages, relation
+        // sizes etc. and that would get confused if the previous page versions
+        // are not in the repository yet.
+        timeline.wait_lsn(startpoint.lsn)?;
+    }
+    startpoint.lsn = startpoint.lsn.align();
+    if timeline.get_ancestor_lsn() > startpoint.lsn {
+        // can we safely just branch from the ancestor instead?
+        anyhow::bail!(
+            "invalid startpoint {} for the branch {}: less than timeline ancestor lsn {:?}",
+            startpoint.lsn,
+            branchname,
+            timeline.get_ancestor_lsn()
+        );
+    }
+
+    let new_timeline_id = ZTimelineId::generate();
+
+    // Forward entire timeline creation routine to repository
+    // backend, so it can do all needed initialization
+    repo.branch_timeline(startpoint.timelineid, new_timeline_id, startpoint.lsn)?;
+
+    // Remember the human-readable branch name for the new timeline.
+    // FIXME: there's a race condition, if you create a branch with the same
+    // name concurrently.
+    let data = new_timeline_id.to_string();
+    fs::write(conf.branch_path(branchname, tenantid), data)?;
+
+    Ok(BranchInfo {
+        name: branchname.to_string(),
+        timeline_id: new_timeline_id,
+        latest_valid_lsn: startpoint.lsn,
+        ancestor_id: Some(startpoint.timelineid.to_string()),
+        ancestor_lsn: Some(startpoint.lsn.to_string()),
+        current_logical_size: 0,
+        current_logical_size_non_incremental: Some(0),
+    })
+}
+
+//
+// Parse user-given string that represents a point-in-time.
+//
+// We support multiple variants:
+//
+// Raw timeline id in hex, meaning the end of that timeline:
+//    bc62e7d612d0e6fe8f99a6dd2f281f9d
+//
+// A specific LSN on a timeline:
+//    bc62e7d612d0e6fe8f99a6dd2f281f9d@2/15D3DD8
+//
+// Same, with a human-friendly branch name:
+//    main
+//    main@2/15D3DD8
+//
+// Human-friendly tag name:
+//    mytag
+//
+//
+fn parse_point_in_time(
+    conf: &PageServerConf,
+    s: &str,
+    tenantid: &ZTenantId,
+) -> Result<PointInTime> {
+    let mut strings = s.split('@');
+    let name = strings.next().unwrap();
+
+    let lsn = strings
+        .next()
+        .map(Lsn::from_str)
+        .transpose()
+        .context("invalid LSN in point-in-time specification")?;
+
+    // Check if it's a tag
+    if lsn.is_none() {
+        let tagpath = conf.tag_path(name, tenantid);
+        if tagpath.exists() {
+            let pointstr = fs::read_to_string(tagpath)?;
+
+            return parse_point_in_time(conf, &pointstr, tenantid);
+        }
+    }
+
+    // Check if it's a branch
+    // Check if it's branch @ LSN
+    let branchpath = conf.branch_path(name, tenantid);
+    if branchpath.exists() {
+        let pointstr = fs::read_to_string(branchpath)?;
+
+        let mut result = parse_point_in_time(conf, &pointstr, tenantid)?;
+
+        result.lsn = lsn.unwrap_or(Lsn(0));
+        return Ok(result);
+    }
+
+    // Check if it's a timelineid
+    // Check if it's timelineid @ LSN
+    if let Ok(timelineid) = ZTimelineId::from_str(name) {
+        let tlipath = conf.timeline_path(&timelineid, tenantid);
+        if tlipath.exists() {
+            return Ok(PointInTime {
+                timelineid,
+                lsn: lsn.unwrap_or(Lsn(0)),
+            });
+        }
+    }
+
+    bail!("could not parse point-in-time {}", s);
+}
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -0,0 +1,853 @@
+//! Functions for handling page server configuration options
+//!
+//! Configuration options can be set in the pageserver.toml configuration
+//! file, or on the command line.
+//! See also `settings.md` for better description on every parameter.
+
+use anyhow::{bail, ensure, Context, Result};
+use toml_edit;
+use toml_edit::{Document, Item};
+use zenith_utils::postgres_backend::AuthType;
+use zenith_utils::zid::{ZNodeId, ZTenantId, ZTimelineId};
+
+use std::convert::TryInto;
+use std::env;
+use std::num::{NonZeroU32, NonZeroUsize};
+use std::path::{Path, PathBuf};
+use std::str::FromStr;
+use std::time::Duration;
+
+use crate::layered_repository::TIMELINES_SEGMENT_NAME;
+
+pub mod defaults {
+    use const_format::formatcp;
+
+    pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
+    pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
+    pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
+    pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
+
+    // FIXME: This current value is very low. I would imagine something like 1 GB or 10 GB
+    // would be more appropriate. But a low value forces the code to be exercised more,
+    // which is good for now to trigger bugs.
+    pub const DEFAULT_CHECKPOINT_DISTANCE: u64 = 256 * 1024 * 1024;
+    pub const DEFAULT_CHECKPOINT_PERIOD: &str = "1 s";
+
+    pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;
+    pub const DEFAULT_GC_PERIOD: &str = "100 s";
+
+    pub const DEFAULT_SUPERUSER: &str = "zenith_admin";
+    pub const DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNC: usize = 100;
+    pub const DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS: u32 = 10;
+
+    pub const DEFAULT_PAGE_CACHE_SIZE: usize = 8192;
+    pub const DEFAULT_MAX_FILE_DESCRIPTORS: usize = 100;
+
+    ///
+    /// Default built-in configuration file.
+    ///
+    pub const DEFAULT_CONFIG_FILE: &str = formatcp!(
+        r###"
+# Initial configuration file created by 'pageserver --init'
+
+#listen_pg_addr = '{DEFAULT_PG_LISTEN_ADDR}'
+#listen_http_addr = '{DEFAULT_HTTP_LISTEN_ADDR}'
+
+#checkpoint_distance = {DEFAULT_CHECKPOINT_DISTANCE} # in bytes
+#checkpoint_period = '{DEFAULT_CHECKPOINT_PERIOD}'
+
+#gc_period = '{DEFAULT_GC_PERIOD}'
+#gc_horizon = {DEFAULT_GC_HORIZON}
+
+#max_file_descriptors = {DEFAULT_MAX_FILE_DESCRIPTORS}
+
+# initial superuser role name to use when creating a new tenant
+#initial_superuser_name = '{DEFAULT_SUPERUSER}'
+
+# [remote_storage]
+
+"###
+    );
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct PageServerConf {
+    // Identifier of that particular pageserver so e g safekeepers
+    // can safely distinguish different pageservers
+    pub id: ZNodeId,
+
+    /// Example (default): 127.0.0.1:64000
+    pub listen_pg_addr: String,
+    /// Example (default): 127.0.0.1:9898
+    pub listen_http_addr: String,
+
+    // Flush out an inmemory layer, if it's holding WAL older than this
+    // This puts a backstop on how much WAL needs to be re-digested if the
+    // page server crashes.
+    pub checkpoint_distance: u64,
+    pub checkpoint_period: Duration,
+
+    pub gc_horizon: u64,
+    pub gc_period: Duration,
+    pub superuser: String,
+
+    pub page_cache_size: usize,
+    pub max_file_descriptors: usize,
+
+    // Repository directory, relative to current working directory.
+    // Normally, the page server changes the current working directory
+    // to the repository, and 'workdir' is always '.'. But we don't do
+    // that during unit testing, because the current directory is global
+    // to the process but different unit tests work on different
+    // repositories.
+    pub workdir: PathBuf,
+
+    pub pg_distrib_dir: PathBuf,
+
+    pub auth_type: AuthType,
+
+    pub auth_validation_public_key_path: Option<PathBuf>,
+    pub remote_storage_config: Option<RemoteStorageConfig>,
+}
+
+// use dedicated enum for builder to better indicate the intention
+// and avoid possible confusion with nested options
+pub enum BuilderValue<T> {
+    Set(T),
+    NotSet,
+}
+
+impl<T> BuilderValue<T> {
+    pub fn ok_or<E>(self, err: E) -> Result<T, E> {
+        match self {
+            Self::Set(v) => Ok(v),
+            Self::NotSet => Err(err),
+        }
+    }
+}
+
+// needed to simplify config construction
+struct PageServerConfigBuilder {
+    listen_pg_addr: BuilderValue<String>,
+
+    listen_http_addr: BuilderValue<String>,
+
+    checkpoint_distance: BuilderValue<u64>,
+    checkpoint_period: BuilderValue<Duration>,
+
+    gc_horizon: BuilderValue<u64>,
+    gc_period: BuilderValue<Duration>,
+    superuser: BuilderValue<String>,
+
+    page_cache_size: BuilderValue<usize>,
+    max_file_descriptors: BuilderValue<usize>,
+
+    workdir: BuilderValue<PathBuf>,
+
+    pg_distrib_dir: BuilderValue<PathBuf>,
+
+    auth_type: BuilderValue<AuthType>,
+
+    //
+    auth_validation_public_key_path: BuilderValue<Option<PathBuf>>,
+    remote_storage_config: BuilderValue<Option<RemoteStorageConfig>>,
+
+    id: BuilderValue<ZNodeId>,
+}
+
+impl Default for PageServerConfigBuilder {
+    fn default() -> Self {
+        use self::BuilderValue::*;
+        use defaults::*;
+        Self {
+            listen_pg_addr: Set(DEFAULT_PG_LISTEN_ADDR.to_string()),
+            listen_http_addr: Set(DEFAULT_HTTP_LISTEN_ADDR.to_string()),
+            checkpoint_distance: Set(DEFAULT_CHECKPOINT_DISTANCE),
+            checkpoint_period: Set(humantime::parse_duration(DEFAULT_CHECKPOINT_PERIOD)
+                .expect("cannot parse default checkpoint period")),
+            gc_horizon: Set(DEFAULT_GC_HORIZON),
+            gc_period: Set(humantime::parse_duration(DEFAULT_GC_PERIOD)
+                .expect("cannot parse default gc period")),
+            superuser: Set(DEFAULT_SUPERUSER.to_string()),
+            page_cache_size: Set(DEFAULT_PAGE_CACHE_SIZE),
+            max_file_descriptors: Set(DEFAULT_MAX_FILE_DESCRIPTORS),
+            workdir: Set(PathBuf::new()),
+            pg_distrib_dir: Set(env::current_dir()
+                .expect("cannot access current directory")
+                .join("tmp_install")),
+            auth_type: Set(AuthType::Trust),
+            auth_validation_public_key_path: Set(None),
+            remote_storage_config: Set(None),
+            id: NotSet,
+        }
+    }
+}
+
+impl PageServerConfigBuilder {
+    pub fn listen_pg_addr(&mut self, listen_pg_addr: String) {
+        self.listen_pg_addr = BuilderValue::Set(listen_pg_addr)
+    }
+
+    pub fn listen_http_addr(&mut self, listen_http_addr: String) {
+        self.listen_http_addr = BuilderValue::Set(listen_http_addr)
+    }
+
+    pub fn checkpoint_distance(&mut self, checkpoint_distance: u64) {
+        self.checkpoint_distance = BuilderValue::Set(checkpoint_distance)
+    }
+
+    pub fn checkpoint_period(&mut self, checkpoint_period: Duration) {
+        self.checkpoint_period = BuilderValue::Set(checkpoint_period)
+    }
+
+    pub fn gc_horizon(&mut self, gc_horizon: u64) {
+        self.gc_horizon = BuilderValue::Set(gc_horizon)
+    }
+
+    pub fn gc_period(&mut self, gc_period: Duration) {
+        self.gc_period = BuilderValue::Set(gc_period)
+    }
+
+    pub fn superuser(&mut self, superuser: String) {
+        self.superuser = BuilderValue::Set(superuser)
+    }
+
+    pub fn page_cache_size(&mut self, page_cache_size: usize) {
+        self.page_cache_size = BuilderValue::Set(page_cache_size)
+    }
+
+    pub fn max_file_descriptors(&mut self, max_file_descriptors: usize) {
+        self.max_file_descriptors = BuilderValue::Set(max_file_descriptors)
+    }
+
+    pub fn workdir(&mut self, workdir: PathBuf) {
+        self.workdir = BuilderValue::Set(workdir)
+    }
+
+    pub fn pg_distrib_dir(&mut self, pg_distrib_dir: PathBuf) {
+        self.pg_distrib_dir = BuilderValue::Set(pg_distrib_dir)
+    }
+
+    pub fn auth_type(&mut self, auth_type: AuthType) {
+        self.auth_type = BuilderValue::Set(auth_type)
+    }
+
+    pub fn auth_validation_public_key_path(
+        &mut self,
+        auth_validation_public_key_path: Option<PathBuf>,
+    ) {
+        self.auth_validation_public_key_path = BuilderValue::Set(auth_validation_public_key_path)
+    }
+
+    pub fn remote_storage_config(&mut self, remote_storage_config: Option<RemoteStorageConfig>) {
+        self.remote_storage_config = BuilderValue::Set(remote_storage_config)
+    }
+
+    pub fn id(&mut self, node_id: ZNodeId) {
+        self.id = BuilderValue::Set(node_id)
+    }
+
+    pub fn build(self) -> Result<PageServerConf> {
+        Ok(PageServerConf {
+            listen_pg_addr: self
+                .listen_pg_addr
+                .ok_or(anyhow::anyhow!("missing listen_pg_addr"))?,
+            listen_http_addr: self
+                .listen_http_addr
+                .ok_or(anyhow::anyhow!("missing listen_http_addr"))?,
+            checkpoint_distance: self
+                .checkpoint_distance
+                .ok_or(anyhow::anyhow!("missing checkpoint_distance"))?,
+            checkpoint_period: self
+                .checkpoint_period
+                .ok_or(anyhow::anyhow!("missing checkpoint_period"))?,
+            gc_horizon: self
+                .gc_horizon
+                .ok_or(anyhow::anyhow!("missing gc_horizon"))?,
+            gc_period: self.gc_period.ok_or(anyhow::anyhow!("missing gc_period"))?,
+            superuser: self.superuser.ok_or(anyhow::anyhow!("missing superuser"))?,
+            page_cache_size: self
+                .page_cache_size
+                .ok_or(anyhow::anyhow!("missing page_cache_size"))?,
+            max_file_descriptors: self
+                .max_file_descriptors
+                .ok_or(anyhow::anyhow!("missing max_file_descriptors"))?,
+            workdir: self.workdir.ok_or(anyhow::anyhow!("missing workdir"))?,
+            pg_distrib_dir: self
+                .pg_distrib_dir
+                .ok_or(anyhow::anyhow!("missing pg_distrib_dir"))?,
+            auth_type: self.auth_type.ok_or(anyhow::anyhow!("missing auth_type"))?,
+            auth_validation_public_key_path: self
+                .auth_validation_public_key_path
+                .ok_or(anyhow::anyhow!("missing auth_validation_public_key_path"))?,
+            remote_storage_config: self
+                .remote_storage_config
+                .ok_or(anyhow::anyhow!("missing remote_storage_config"))?,
+            id: self.id.ok_or(anyhow::anyhow!("missing id"))?,
+        })
+    }
+}
+
+/// External backup storage configuration, enough for creating a client for that storage.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct RemoteStorageConfig {
+    /// Max allowed number of concurrent sync operations between pageserver and the remote storage.
+    pub max_concurrent_sync: NonZeroUsize,
+    /// Max allowed errors before the sync task is considered failed and evicted.
+    pub max_sync_errors: NonZeroU32,
+    /// The storage connection configuration.
+    pub storage: RemoteStorageKind,
+}
+
+/// A kind of a remote storage to connect to, with its connection configuration.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum RemoteStorageKind {
+    /// Storage based on local file system.
+    /// Specify a root folder to place all stored relish data into.
+    LocalFs(PathBuf),
+    /// AWS S3 based storage, storing all relishes into the root
+    /// of the S3 bucket from the config.
+    AwsS3(S3Config),
+}
+
+/// AWS S3 bucket coordinates and access credentials to manage the bucket contents (read and write).
+#[derive(Clone, PartialEq, Eq)]
+pub struct S3Config {
+    /// Name of the bucket to connect to.
+    pub bucket_name: String,
+    /// The region where the bucket is located at.
+    pub bucket_region: String,
+    /// A "subfolder" in the bucket, to use the same bucket separately by multiple pageservers at once.
+    pub prefix_in_bucket: Option<String>,
+    /// "Login" to use when connecting to bucket.
+    /// Can be empty for cases like AWS k8s IAM
+    /// where we can allow certain pods to connect
+    /// to the bucket directly without any credentials.
+    pub access_key_id: Option<String>,
+    /// "Password" to use when connecting to bucket.
+    pub secret_access_key: Option<String>,
+    /// A base URL to send S3 requests to.
+    /// By default, the endpoint is derived from a region name, assuming it's
+    /// an AWS S3 region name, erroring on wrong region name.
+    /// Endpoint provides a way to support other S3 flavors and their regions.
+    ///
+    /// Example: `http://127.0.0.1:5000`
+    pub endpoint: Option<String>,
+}
+
+impl std::fmt::Debug for S3Config {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("S3Config")
+            .field("bucket_name", &self.bucket_name)
+            .field("bucket_region", &self.bucket_region)
+            .field("prefix_in_bucket", &self.prefix_in_bucket)
+            .finish()
+    }
+}
+
+impl PageServerConf {
+    //
+    // Repository paths, relative to workdir.
+    //
+
+    pub fn tenants_path(&self) -> PathBuf {
+        self.workdir.join("tenants")
+    }
+
+    pub fn tenant_path(&self, tenantid: &ZTenantId) -> PathBuf {
+        self.tenants_path().join(tenantid.to_string())
+    }
+
+    pub fn tags_path(&self, tenantid: &ZTenantId) -> PathBuf {
+        self.tenant_path(tenantid).join("refs").join("tags")
+    }
+
+    pub fn tag_path(&self, tag_name: &str, tenantid: &ZTenantId) -> PathBuf {
+        self.tags_path(tenantid).join(tag_name)
+    }
+
+    pub fn branches_path(&self, tenantid: &ZTenantId) -> PathBuf {
+        self.tenant_path(tenantid).join("refs").join("branches")
+    }
+
+    pub fn branch_path(&self, branch_name: &str, tenantid: &ZTenantId) -> PathBuf {
+        self.branches_path(tenantid).join(branch_name)
+    }
+
+    pub fn timelines_path(&self, tenantid: &ZTenantId) -> PathBuf {
+        self.tenant_path(tenantid).join(TIMELINES_SEGMENT_NAME)
+    }
+
+    pub fn timeline_path(&self, timelineid: &ZTimelineId, tenantid: &ZTenantId) -> PathBuf {
+        self.timelines_path(tenantid).join(timelineid.to_string())
+    }
+
+    pub fn ancestor_path(&self, timelineid: &ZTimelineId, tenantid: &ZTenantId) -> PathBuf {
+        self.timeline_path(timelineid, tenantid).join("ancestor")
+    }
+
+    //
+    // Postgres distribution paths
+    //
+
+    pub fn pg_bin_dir(&self) -> PathBuf {
+        self.pg_distrib_dir.join("bin")
+    }
+
+    pub fn pg_lib_dir(&self) -> PathBuf {
+        self.pg_distrib_dir.join("lib")
+    }
+
+    /// Parse a configuration file (pageserver.toml) into a PageServerConf struct,
+    /// validating the input and failing on errors.
+    ///
+    /// This leaves any options not present in the file in the built-in defaults.
+    pub fn parse_and_validate(toml: &Document, workdir: &Path) -> Result<Self> {
+        let mut builder = PageServerConfigBuilder::default();
+        builder.workdir(workdir.to_owned());
+
+        for (key, item) in toml.iter() {
+            match key {
+                "listen_pg_addr" => builder.listen_pg_addr(parse_toml_string(key, item)?),
+                "listen_http_addr" => builder.listen_http_addr(parse_toml_string(key, item)?),
+                "checkpoint_distance" => builder.checkpoint_distance(parse_toml_u64(key, item)?),
+                "checkpoint_period" => builder.checkpoint_period(parse_toml_duration(key, item)?),
+                "gc_horizon" => builder.gc_horizon(parse_toml_u64(key, item)?),
+                "gc_period" => builder.gc_period(parse_toml_duration(key, item)?),
+                "initial_superuser_name" => builder.superuser(parse_toml_string(key, item)?),
+                "page_cache_size" => builder.page_cache_size(parse_toml_u64(key, item)? as usize),
+                "max_file_descriptors" => {
+                    builder.max_file_descriptors(parse_toml_u64(key, item)? as usize)
+                }
+                "pg_distrib_dir" => {
+                    builder.pg_distrib_dir(PathBuf::from(parse_toml_string(key, item)?))
+                }
+                "auth_validation_public_key_path" => builder.auth_validation_public_key_path(Some(
+                    PathBuf::from(parse_toml_string(key, item)?),
+                )),
+                "auth_type" => builder.auth_type(parse_toml_auth_type(key, item)?),
+                "remote_storage" => {
+                    builder.remote_storage_config(Some(Self::parse_remote_storage_config(item)?))
+                }
+                "id" => builder.id(ZNodeId(parse_toml_u64(key, item)?)),
+                _ => bail!("unrecognized pageserver option '{}'", key),
+            }
+        }
+
+        let mut conf = builder.build().context("invalid config")?;
+
+        if conf.auth_type == AuthType::ZenithJWT {
+            let auth_validation_public_key_path = conf
+                .auth_validation_public_key_path
+                .get_or_insert_with(|| workdir.join("auth_public_key.pem"));
+            ensure!(
+                auth_validation_public_key_path.exists(),
+                format!(
+                    "Can't find auth_validation_public_key at '{}'",
+                    auth_validation_public_key_path.display()
+                )
+            );
+        }
+
+        if !conf.pg_distrib_dir.join("bin/postgres").exists() {
+            bail!(
+                "Can't find postgres binary at {}",
+                conf.pg_distrib_dir.display()
+            );
+        }
+
+        Ok(conf)
+    }
+
+    /// subroutine of parse_config(), to parse the `[remote_storage]` table.
+    fn parse_remote_storage_config(toml: &toml_edit::Item) -> anyhow::Result<RemoteStorageConfig> {
+        let local_path = toml.get("local_path");
+        let bucket_name = toml.get("bucket_name");
+        let bucket_region = toml.get("bucket_region");
+
+        let max_concurrent_sync: NonZeroUsize = if let Some(s) = toml.get("max_concurrent_sync") {
+            parse_toml_u64("max_concurrent_sync", s)
+                .and_then(|toml_u64| {
+                    toml_u64.try_into().with_context(|| {
+                        format!("'max_concurrent_sync' value {} is too large", toml_u64)
+                    })
+                })
+                .ok()
+                .and_then(NonZeroUsize::new)
+                .context("'max_concurrent_sync' must be a non-zero positive integer")?
+        } else {
+            NonZeroUsize::new(defaults::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNC).unwrap()
+        };
+        let max_sync_errors: NonZeroU32 = if let Some(s) = toml.get("max_sync_errors") {
+            parse_toml_u64("max_sync_errors", s)
+                .and_then(|toml_u64| {
+                    toml_u64.try_into().with_context(|| {
+                        format!("'max_sync_errors' value {} is too large", toml_u64)
+                    })
+                })
+                .ok()
+                .and_then(NonZeroU32::new)
+                .context("'max_sync_errors' must be a non-zero positive integer")?
+        } else {
+            NonZeroU32::new(defaults::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS).unwrap()
+        };
+
+        let storage = match (local_path, bucket_name, bucket_region) {
+            (None, None, None) => bail!("no 'local_path' nor 'bucket_name' option"),
+            (_, Some(_), None) => {
+                bail!("'bucket_region' option is mandatory if 'bucket_name' is given ")
+            }
+            (_, None, Some(_)) => {
+                bail!("'bucket_name' option is mandatory if 'bucket_region' is given ")
+            }
+            (None, Some(bucket_name), Some(bucket_region)) => RemoteStorageKind::AwsS3(S3Config {
+                bucket_name: parse_toml_string("bucket_name", bucket_name)?,
+                bucket_region: parse_toml_string("bucket_region", bucket_region)?,
+                access_key_id: toml
+                    .get("access_key_id")
+                    .map(|access_key_id| parse_toml_string("access_key_id", access_key_id))
+                    .transpose()?,
+                secret_access_key: toml
+                    .get("secret_access_key")
+                    .map(|secret_access_key| {
+                        parse_toml_string("secret_access_key", secret_access_key)
+                    })
+                    .transpose()?,
+                prefix_in_bucket: toml
+                    .get("prefix_in_bucket")
+                    .map(|prefix_in_bucket| parse_toml_string("prefix_in_bucket", prefix_in_bucket))
+                    .transpose()?,
+                endpoint: toml
+                    .get("endpoint")
+                    .map(|endpoint| parse_toml_string("endpoint", endpoint))
+                    .transpose()?,
+            }),
+            (Some(local_path), None, None) => RemoteStorageKind::LocalFs(PathBuf::from(
+                parse_toml_string("local_path", local_path)?,
+            )),
+            (Some(_), Some(_), _) => bail!("local_path and bucket_name are mutually exclusive"),
+        };
+
+        Ok(RemoteStorageConfig {
+            max_concurrent_sync,
+            max_sync_errors,
+            storage,
+        })
+    }
+
+    #[cfg(test)]
+    pub fn test_repo_dir(test_name: &str) -> PathBuf {
+        PathBuf::from(format!("../tmp_check/test_{}", test_name))
+    }
+
+    #[cfg(test)]
+    pub fn dummy_conf(repo_dir: PathBuf) -> Self {
+        PageServerConf {
+            id: ZNodeId(0),
+            checkpoint_distance: defaults::DEFAULT_CHECKPOINT_DISTANCE,
+            checkpoint_period: Duration::from_secs(10),
+            gc_horizon: defaults::DEFAULT_GC_HORIZON,
+            gc_period: Duration::from_secs(10),
+            page_cache_size: defaults::DEFAULT_PAGE_CACHE_SIZE,
+            max_file_descriptors: defaults::DEFAULT_MAX_FILE_DESCRIPTORS,
+            listen_pg_addr: defaults::DEFAULT_PG_LISTEN_ADDR.to_string(),
+            listen_http_addr: defaults::DEFAULT_HTTP_LISTEN_ADDR.to_string(),
+            superuser: "zenith_admin".to_string(),
+            workdir: repo_dir,
+            pg_distrib_dir: PathBuf::new(),
+            auth_type: AuthType::Trust,
+            auth_validation_public_key_path: None,
+            remote_storage_config: None,
+        }
+    }
+}
+
+// Helper functions to parse a toml Item
+
+fn parse_toml_string(name: &str, item: &Item) -> Result<String> {
+    let s = item
+        .as_str()
+        .with_context(|| format!("configure option {} is not a string", name))?;
+    Ok(s.to_string())
+}
+
+fn parse_toml_u64(name: &str, item: &Item) -> Result<u64> {
+    // A toml integer is signed, so it cannot represent the full range of an u64. That's OK
+    // for our use, though.
+    let i: i64 = item
+        .as_integer()
+        .with_context(|| format!("configure option {} is not an integer", name))?;
+    if i < 0 {
+        bail!("configure option {} cannot be negative", name);
+    }
+    Ok(i as u64)
+}
+
+fn parse_toml_duration(name: &str, item: &Item) -> Result<Duration> {
+    let s = item
+        .as_str()
+        .with_context(|| format!("configure option {} is not a string", name))?;
+
+    Ok(humantime::parse_duration(s)?)
+}
+
+fn parse_toml_auth_type(name: &str, item: &Item) -> Result<AuthType> {
+    let v = item
+        .as_str()
+        .with_context(|| format!("configure option {} is not a string", name))?;
+    AuthType::from_str(v)
+}
+
+#[cfg(test)]
+mod tests {
+    use std::fs;
+
+    use tempfile::{tempdir, TempDir};
+
+    use super::*;
+
+    const ALL_BASE_VALUES_TOML: &str = r#"
+# Initial configuration file created by 'pageserver --init'
+
+listen_pg_addr = '127.0.0.1:64000'
+listen_http_addr = '127.0.0.1:9898'
+
+checkpoint_distance = 111 # in bytes
+checkpoint_period = '111 s'
+
+gc_period = '222 s'
+gc_horizon = 222
+
+page_cache_size = 444
+max_file_descriptors = 333
+
+# initial superuser role name to use when creating a new tenant
+initial_superuser_name = 'zzzz'
+id = 10
+
+"#;
+
+    #[test]
+    fn parse_defaults() -> anyhow::Result<()> {
+        let tempdir = tempdir()?;
+        let (workdir, pg_distrib_dir) = prepare_fs(&tempdir)?;
+        // we have to create dummy pathes to overcome the validation errors
+        let config_string = format!("pg_distrib_dir='{}'\nid=10", pg_distrib_dir.display());
+        let toml = config_string.parse()?;
+
+        let parsed_config =
+            PageServerConf::parse_and_validate(&toml, &workdir).unwrap_or_else(|e| {
+                panic!("Failed to parse config '{}', reason: {}", config_string, e)
+            });
+
+        assert_eq!(
+            parsed_config,
+            PageServerConf {
+                id: ZNodeId(10),
+                listen_pg_addr: defaults::DEFAULT_PG_LISTEN_ADDR.to_string(),
+                listen_http_addr: defaults::DEFAULT_HTTP_LISTEN_ADDR.to_string(),
+                checkpoint_distance: defaults::DEFAULT_CHECKPOINT_DISTANCE,
+                checkpoint_period: humantime::parse_duration(defaults::DEFAULT_CHECKPOINT_PERIOD)?,
+                gc_horizon: defaults::DEFAULT_GC_HORIZON,
+                gc_period: humantime::parse_duration(defaults::DEFAULT_GC_PERIOD)?,
+                superuser: defaults::DEFAULT_SUPERUSER.to_string(),
+                page_cache_size: defaults::DEFAULT_PAGE_CACHE_SIZE,
+                max_file_descriptors: defaults::DEFAULT_MAX_FILE_DESCRIPTORS,
+                workdir,
+                pg_distrib_dir,
+                auth_type: AuthType::Trust,
+                auth_validation_public_key_path: None,
+                remote_storage_config: None,
+            },
+            "Correct defaults should be used when no config values are provided"
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn parse_basic_config() -> anyhow::Result<()> {
+        let tempdir = tempdir()?;
+        let (workdir, pg_distrib_dir) = prepare_fs(&tempdir)?;
+
+        let config_string = format!(
+            "{}pg_distrib_dir='{}'",
+            ALL_BASE_VALUES_TOML,
+            pg_distrib_dir.display()
+        );
+        let toml = config_string.parse()?;
+
+        let parsed_config =
+            PageServerConf::parse_and_validate(&toml, &workdir).unwrap_or_else(|e| {
+                panic!("Failed to parse config '{}', reason: {}", config_string, e)
+            });
+
+        assert_eq!(
+            parsed_config,
+            PageServerConf {
+                id: ZNodeId(10),
+                listen_pg_addr: "127.0.0.1:64000".to_string(),
+                listen_http_addr: "127.0.0.1:9898".to_string(),
+                checkpoint_distance: 111,
+                checkpoint_period: Duration::from_secs(111),
+                gc_horizon: 222,
+                gc_period: Duration::from_secs(222),
+                superuser: "zzzz".to_string(),
+                page_cache_size: 444,
+                max_file_descriptors: 333,
+                workdir,
+                pg_distrib_dir,
+                auth_type: AuthType::Trust,
+                auth_validation_public_key_path: None,
+                remote_storage_config: None,
+            },
+            "Should be able to parse all basic config values correctly"
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn parse_remote_fs_storage_config() -> anyhow::Result<()> {
+        let tempdir = tempdir()?;
+        let (workdir, pg_distrib_dir) = prepare_fs(&tempdir)?;
+
+        let local_storage_path = tempdir.path().join("local_remote_storage");
+
+        let identical_toml_declarations = &[
+            format!(
+                r#"[remote_storage]
+local_path = '{}'"#,
+                local_storage_path.display()
+            ),
+            format!(
+                "remote_storage={{local_path='{}'}}",
+                local_storage_path.display()
+            ),
+        ];
+
+        for remote_storage_config_str in identical_toml_declarations {
+            let config_string = format!(
+                r#"{}
+pg_distrib_dir='{}'
+
+{}"#,
+                ALL_BASE_VALUES_TOML,
+                pg_distrib_dir.display(),
+                remote_storage_config_str,
+            );
+
+            let toml = config_string.parse()?;
+
+            let parsed_remote_storage_config = PageServerConf::parse_and_validate(&toml, &workdir)
+                .unwrap_or_else(|e| {
+                    panic!("Failed to parse config '{}', reason: {}", config_string, e)
+                })
+                .remote_storage_config
+                .expect("Should have remote storage config for the local FS");
+
+            assert_eq!(
+            parsed_remote_storage_config,
+            RemoteStorageConfig {
+                max_concurrent_sync: NonZeroUsize::new(
+                    defaults::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNC
+                )
+                .unwrap(),
+                max_sync_errors: NonZeroU32::new(defaults::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS)
+                    .unwrap(),
+                storage: RemoteStorageKind::LocalFs(local_storage_path.clone()),
+            },
+            "Remote storage config should correctly parse the local FS config and fill other storage defaults"
+        );
+        }
+        Ok(())
+    }
+
+    #[test]
+    fn parse_remote_s3_storage_config() -> anyhow::Result<()> {
+        let tempdir = tempdir()?;
+        let (workdir, pg_distrib_dir) = prepare_fs(&tempdir)?;
+
+        let bucket_name = "some-sample-bucket".to_string();
+        let bucket_region = "eu-north-1".to_string();
+        let prefix_in_bucket = "test_prefix".to_string();
+        let access_key_id = "SOMEKEYAAAAASADSAH*#".to_string();
+        let secret_access_key = "SOMEsEcReTsd292v".to_string();
+        let endpoint = "http://localhost:5000".to_string();
+        let max_concurrent_sync = NonZeroUsize::new(111).unwrap();
+        let max_sync_errors = NonZeroU32::new(222).unwrap();
+
+        let identical_toml_declarations = &[
+            format!(
+                r#"[remote_storage]
+max_concurrent_sync = {}
+max_sync_errors = {}
+bucket_name = '{}'
+bucket_region = '{}'
+prefix_in_bucket = '{}'
+access_key_id = '{}'
+secret_access_key = '{}'
+endpoint = '{}'"#,
+                max_concurrent_sync, max_sync_errors, bucket_name, bucket_region, prefix_in_bucket, access_key_id, secret_access_key, endpoint
+            ),
+            format!(
+                "remote_storage={{max_concurrent_sync={}, max_sync_errors={}, bucket_name='{}', bucket_region='{}', prefix_in_bucket='{}', access_key_id='{}', secret_access_key='{}', endpoint='{}'}}",
+                max_concurrent_sync, max_sync_errors, bucket_name, bucket_region, prefix_in_bucket, access_key_id, secret_access_key, endpoint
+            ),
+        ];
+
+        for remote_storage_config_str in identical_toml_declarations {
+            let config_string = format!(
+                r#"{}
+pg_distrib_dir='{}'
+
+{}"#,
+                ALL_BASE_VALUES_TOML,
+                pg_distrib_dir.display(),
+                remote_storage_config_str,
+            );
+
+            let toml = config_string.parse()?;
+
+            let parsed_remote_storage_config = PageServerConf::parse_and_validate(&toml, &workdir)
+                .unwrap_or_else(|e| {
+                    panic!("Failed to parse config '{}', reason: {}", config_string, e)
+                })
+                .remote_storage_config
+                .expect("Should have remote storage config for S3");
+
+            assert_eq!(
+                parsed_remote_storage_config,
+                RemoteStorageConfig {
+                    max_concurrent_sync,
+                    max_sync_errors,
+                    storage: RemoteStorageKind::AwsS3(S3Config {
+                        bucket_name: bucket_name.clone(),
+                        bucket_region: bucket_region.clone(),
+                        access_key_id: Some(access_key_id.clone()),
+                        secret_access_key: Some(secret_access_key.clone()),
+                        prefix_in_bucket: Some(prefix_in_bucket.clone()),
+                        endpoint: Some(endpoint.clone())
+                    }),
+                },
+                "Remote storage config should correctly parse the S3 config"
+            );
+        }
+        Ok(())
+    }
+
+    fn prepare_fs(tempdir: &TempDir) -> anyhow::Result<(PathBuf, PathBuf)> {
+        let tempdir_path = tempdir.path();
+
+        let workdir = tempdir_path.join("workdir");
+        fs::create_dir_all(&workdir)?;
+
+        let pg_distrib_dir = tempdir_path.join("pg_distrib");
+        fs::create_dir_all(&pg_distrib_dir)?;
+        let postgres_bin_dir = pg_distrib_dir.join("bin");
+        fs::create_dir_all(&postgres_bin_dir)?;
+        fs::write(postgres_bin_dir.join("postgres"), "I'm postgres, trust me")?;
+
+        Ok((workdir, pg_distrib_dir))
+    }
+}
--- a/pageserver/src/http/mod.rs
+++ b/pageserver/src/http/mod.rs
@@ -0,0 +1,3 @@
+pub mod models;
+pub mod routes;
+pub use routes::make_router;
--- a/pageserver/src/http/models.rs
+++ b/pageserver/src/http/models.rs
@@ -0,0 +1,23 @@
+use serde::{Deserialize, Serialize};
+
+use crate::ZTenantId;
+use zenith_utils::zid::ZNodeId;
+
+#[derive(Serialize, Deserialize)]
+pub struct BranchCreateRequest {
+    #[serde(with = "hex")]
+    pub tenant_id: ZTenantId,
+    pub name: String,
+    pub start_point: String,
+}
+
+#[derive(Serialize, Deserialize)]
+pub struct TenantCreateRequest {
+    #[serde(with = "hex")]
+    pub tenant_id: ZTenantId,
+}
+
+#[derive(Serialize)]
+pub struct StatusResponse {
+    pub id: ZNodeId,
+}
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -0,0 +1,438 @@
+openapi: "3.0.2"
+info:
+  title: Page Server API
+  version: "1.0"
+servers:
+  - url: ""
+paths:
+  /v1/status:
+    description: Healthcheck endpoint
+    get:
+      description: Healthcheck
+      security: []
+      responses:
+        "200":
+          description: OK
+          content:
+            application/json:
+              schema:
+                type: object
+                required:
+                - id
+                properties:
+                  id:
+                    type: integer
+  /v1/timeline/{tenant_id}:
+    parameters:
+      - name: tenant_id
+        in: path
+        required: true
+        schema:
+          type: string
+          format: hex
+    get:
+      description: List tenant timelines
+      responses:
+        "200":
+          description: array of brief timeline descriptions
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  # currently, just a timeline id string, but when remote index gets to be accessed
+                  # remote/local timeline field would be added at least
+                  type: string
+        "400":
+          description: Error when no tenant id found in path
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+  /v1/timeline/{tenant_id}/{timeline_id}:
+    parameters:
+      - name: tenant_id
+        in: path
+        required: true
+        schema:
+          type: string
+          format: hex
+      - name: timeline_id
+        in: path
+        required: true
+        schema:
+          type: string
+          format: hex
+    get:
+      description: Get timeline info for tenant's remote timeline
+      responses:
+        "200":
+          description: TimelineInfo
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/TimelineInfo"
+        "400":
+          description: Error when no tenant id found in path or no branch name
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+  /v1/branch/{tenant_id}:
+    parameters:
+      - name: tenant_id
+        in: path
+        required: true
+        schema:
+          type: string
+          format: hex
+      - name: include-non-incremental-logical-size
+        in: query
+        schema:
+          type: string
+          description: Controls calculation of current_logical_size_non_incremental
+    get:
+      description: Get branches for tenant
+      responses:
+        "200":
+          description: BranchInfo
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: "#/components/schemas/BranchInfo"
+        "400":
+          description: Error when no tenant id found in path
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+  /v1/branch/{tenant_id}/{branch_name}:
+    parameters:
+      - name: tenant_id
+        in: path
+        required: true
+        schema:
+          type: string
+          format: hex
+      - name: branch_name
+        in: path
+        required: true
+        schema:
+          type: string
+      - name: include-non-incremental-logical-size
+        in: query
+        schema:
+          type: string
+          description: Controls calculation of current_logical_size_non_incremental
+    get:
+      description: Get branches for tenant
+      responses:
+        "200":
+          description: BranchInfo
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/BranchInfo"
+        "400":
+          description: Error when no tenant id found in path or no branch name
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+  /v1/branch/:
+    post:
+      description: Create branch
+      requestBody:
+        content:
+          application/json:
+            schema:
+              type: object
+              required:
+                - "tenant_id"
+                - "name"
+                - "start_point"
+              properties:
+                tenant_id:
+                  type: string
+                  format: hex
+                name:
+                  type: string
+                start_point:
+                  type: string
+      responses:
+        "201":
+          description: BranchInfo
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/BranchInfo"
+        "400":
+          description: Malformed branch create request
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+  /v1/tenant/:
+    get:
+      description: Get tenants list
+      responses:
+        "200":
+          description: TenantInfo
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: "#/components/schemas/TenantInfo"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+    post:
+      description: Create tenant
+      requestBody:
+        content:
+          application/json:
+            schema:
+              type: object
+              required:
+                - "tenant_id"
+              properties:
+                tenant_id:
+                  type: string
+                  format: hex
+      responses:
+        "201":
+          description: CREATED
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  type: string
+        "400":
+          description: Malformed tenant create request
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+
+components:
+  securitySchemes:
+    JWT:
+      type: http
+      scheme: bearer
+      bearerFormat: JWT
+  schemas:
+    TenantInfo:
+      type: object
+      required:
+        - id
+        - state
+      properties:
+        id:
+          type: string
+        state:
+          type: string
+    BranchInfo:
+      type: object
+      required:
+        - name
+        - timeline_id
+        - latest_valid_lsn
+        - current_logical_size
+      properties:
+        name:
+          type: string
+        timeline_id:
+          type: string
+          format: hex
+        ancestor_id:
+          type: string
+          format: hex
+        ancestor_lsn:
+          type: string
+        current_logical_size:
+          type: integer
+        current_logical_size_non_incremental:
+          type: integer
+        latest_valid_lsn:
+          type: integer
+    TimelineInfo:
+      type: object
+      required:
+        - timeline_id
+        - tenant_id
+        - last_record_lsn
+        - prev_record_lsn
+        - start_lsn
+        - disk_consistent_lsn
+      properties:
+        timeline_id:
+          type: string
+          format: hex
+        tenant_id:
+          type: string
+          format: hex
+        ancestor_timeline_id:
+          type: string
+          format: hex
+        last_record_lsn:
+          type: string
+        prev_record_lsn:
+          type: string
+        start_lsn:
+          type: string
+        disk_consistent_lsn:
+          type: string
+        timeline_state:
+          type: string
+
+    Error:
+      type: object
+      required:
+        - msg
+      properties:
+        msg:
+          type: string
+    UnauthorizedError:
+      type: object
+      required:
+        - msg
+      properties:
+        msg:
+          type: string
+    ForbiddenError:
+      type: object
+      required:
+        - msg
+      properties:
+        msg:
+          type: string
+
+security:
+  - JWT: []
--- a/Show More
+++ b/Show More